knitr::opts_chunk$set(warning = FALSE, message = FALSE)
list.files()
[1] "~$部数据分析师笔试题目_180412.docx"  
[2] "eda.nb.html"                         
[3] "eda.Rmd"                             
[4] "spam.csv"                            
[5] "数据部数据分析师笔试题目_180412.docx"
spam <- read_csv('spam.csv')
Parsed with column specification:
cols(
  .default = col_double(),
  cs = col_integer(),
  capitalLong = col_integer(),
  capitalTotal = col_integer(),
  type = col_character()
)
See spec(...) for full column specifications.
number of columns of result is not a multiple of vector length (arg 1)145 parsing failures.
row # A tibble: 5 x 5 col     row col   expected               actual file       expected   <int> <chr> <chr>                  <chr>  <chr>      actual 1  1449 cs    no trailing characters .1     'spam.csv' file 2  1833 cs    no trailing characters .32    'spam.csv' row 3  1845 cs    no trailing characters .54    'spam.csv' col 4  1847 cs    no trailing characters .39    'spam.csv' expected 5  1860 cs    no trailing characters .14    'spam.csv'
... ................................. ... ...................................................... ........ ................................................................................................................................................................................... ...... ............................................................................... .... ............................................................................... ... ............................................................................... ... ............................................................................... ........ ...............................................................................
See problems(...) for more details.
spam_edited <- 
spam %>% 
    na.omit() # 自己处理下缺失值
pca_model <- 
    prcomp(spam_edited %>% select(-type),
           center = TRUE,scale. = TRUE)
eda_data <- 
predict(pca_model) %>% 
    as_tibble() %>% 
    select(1:2) %>% 
    bind_cols(spam_edited %>% select(type))
eda_data %>% 
    mutate(type = as.factor(type)) %>% 
    ggplot(aes(x = PC1, y = PC2, col = type)) + 
        geom_point()

LS0tCnRpdGxlOiAiZWRhIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7ciBzZXR1cH0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KHdhcm5pbmcgPSBGQUxTRSwgbWVzc2FnZSA9IEZBTFNFKQpsaXN0LmZpbGVzKCkKc3BhbSA8LSByZWFkX2Nzdignc3BhbS5jc3YnKQpgYGAKCmBgYHtyfQpzcGFtX2VkaXRlZCA8LSAKc3BhbSAlPiUgCiAgICBuYS5vbWl0KCkgIyDoh6rlt7HlpITnkIbkuIvnvLrlpLHlgLwKcGNhX21vZGVsIDwtIAogICAgcHJjb21wKHNwYW1fZWRpdGVkICU+JSBzZWxlY3QoLXR5cGUpLAogICAgICAgICAgIGNlbnRlciA9IFRSVUUsc2NhbGUuID0gVFJVRSkKZWRhX2RhdGEgPC0gCnByZWRpY3QocGNhX21vZGVsKSAlPiUgCiAgICBhc190aWJibGUoKSAlPiUgCiAgICBzZWxlY3QoMToyKSAlPiUgCiAgICBiaW5kX2NvbHMoc3BhbV9lZGl0ZWQgJT4lIHNlbGVjdCh0eXBlKSkKYGBgCgoqIFtyIC0gRXJyb3IgaW4gc3ZkKHgsIG51ID0gMCkgOiAwIGV4dGVudCBkaW1lbnNpb25zIC0gU3RhY2sgT3ZlcmZsb3ddKGh0dHBzOi8vc3RhY2tvdmVyZmxvdy5jb20vcXVlc3Rpb25zLzEzMzUyODE1L2Vycm9yLWluLXN2ZHgtbnUtMC0wLWV4dGVudC1kaW1lbnNpb25zKQoKYGBge3J9CmVkYV9kYXRhICU+JSAKICAgIG11dGF0ZSh0eXBlID0gYXMuZmFjdG9yKHR5cGUpKSAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSBQQzEsIHkgPSBQQzIsIGNvbCA9IHR5cGUpKSArIAogICAgICAgIGdlb21fcG9pbnQoKQpgYGAKCg==