knitr::opts_chunk$set(warning = FALSE, message = FALSE)
list.files()
[1] "~$部数据分析师笔试题目_180412.docx"
[2] "eda.nb.html"
[3] "eda.Rmd"
[4] "spam.csv"
[5] "数据部数据分析师笔试题目_180412.docx"
spam <- read_csv('spam.csv')
Parsed with column specification:
cols(
.default = col_double(),
cs = col_integer(),
capitalLong = col_integer(),
capitalTotal = col_integer(),
type = col_character()
)
See spec(...) for full column specifications.
number of columns of result is not a multiple of vector length (arg 1)145 parsing failures.
row [38;5;246m# A tibble: 5 x 5[39m col row col expected actual file expected [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m actual [38;5;250m1[39m [4m1[24m449 cs no trailing characters .1 'spam.csv' file [38;5;250m2[39m [4m1[24m833 cs no trailing characters .32 'spam.csv' row [38;5;250m3[39m [4m1[24m845 cs no trailing characters .54 'spam.csv' col [38;5;250m4[39m [4m1[24m847 cs no trailing characters .39 'spam.csv' expected [38;5;250m5[39m [4m1[24m860 cs no trailing characters .14 'spam.csv'
... ................................. ... ...................................................... ........ ................................................................................................................................................................................... ...... ............................................................................... .... ............................................................................... ... ............................................................................... ... ............................................................................... ........ ...............................................................................
See problems(...) for more details.
spam_edited <-
spam %>%
na.omit() # 自己处理下缺失值
pca_model <-
prcomp(spam_edited %>% select(-type),
center = TRUE,scale. = TRUE)
eda_data <-
predict(pca_model) %>%
as_tibble() %>%
select(1:2) %>%
bind_cols(spam_edited %>% select(type))
eda_data %>%
mutate(type = as.factor(type)) %>%
ggplot(aes(x = PC1, y = PC2, col = type)) +
geom_point()
LS0tCnRpdGxlOiAiZWRhIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7ciBzZXR1cH0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KHdhcm5pbmcgPSBGQUxTRSwgbWVzc2FnZSA9IEZBTFNFKQpsaXN0LmZpbGVzKCkKc3BhbSA8LSByZWFkX2Nzdignc3BhbS5jc3YnKQpgYGAKCmBgYHtyfQpzcGFtX2VkaXRlZCA8LSAKc3BhbSAlPiUgCiAgICBuYS5vbWl0KCkgIyDoh6rlt7HlpITnkIbkuIvnvLrlpLHlgLwKcGNhX21vZGVsIDwtIAogICAgcHJjb21wKHNwYW1fZWRpdGVkICU+JSBzZWxlY3QoLXR5cGUpLAogICAgICAgICAgIGNlbnRlciA9IFRSVUUsc2NhbGUuID0gVFJVRSkKZWRhX2RhdGEgPC0gCnByZWRpY3QocGNhX21vZGVsKSAlPiUgCiAgICBhc190aWJibGUoKSAlPiUgCiAgICBzZWxlY3QoMToyKSAlPiUgCiAgICBiaW5kX2NvbHMoc3BhbV9lZGl0ZWQgJT4lIHNlbGVjdCh0eXBlKSkKYGBgCgoqIFtyIC0gRXJyb3IgaW4gc3ZkKHgsIG51ID0gMCkgOiAwIGV4dGVudCBkaW1lbnNpb25zIC0gU3RhY2sgT3ZlcmZsb3ddKGh0dHBzOi8vc3RhY2tvdmVyZmxvdy5jb20vcXVlc3Rpb25zLzEzMzUyODE1L2Vycm9yLWluLXN2ZHgtbnUtMC0wLWV4dGVudC1kaW1lbnNpb25zKQoKYGBge3J9CmVkYV9kYXRhICU+JSAKICAgIG11dGF0ZSh0eXBlID0gYXMuZmFjdG9yKHR5cGUpKSAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSBQQzEsIHkgPSBQQzIsIGNvbCA9IHR5cGUpKSArIAogICAgICAgIGdlb21fcG9pbnQoKQpgYGAKCg==