knitr::opts_chunk$set(warning = FALSE, message = FALSE)
数据下载
library(tidyverse)
load("datasets/fashion_mnist_500.RData")
fashion_mnist %>% dim
## [1] 500 785
fashion_mnist %>% names %>% head
## [1] "label" "pixel1" "pixel2" "pixel3" "pixel4" "pixel5"
library(randomForest)
library(data.table)
start_time <- Sys.time()
naive_model <- randomForest(label ~ ., data = fashion_mnist)
end_time <- Sys.time()
end_time - start_time
Time difference of 10.496 secs
naive_model <-
read_rds("datasets/fashion_mnist_500_naive_rf_model.rds")
naive_model$err.rate[,"OOB"] %>% min()
## [1] 0.226
下面开始tsne
library(Rtsne)
# Generate a three-dimensional t-SNE embedding without PCA
tsne_output <- Rtsne(fashion_mnist[, -1], PCA = F, dims = 2)
tsne_output$Y %>%
as_tibble() %>%
write_rds("datasets/fashion_mnist_500_tsne_features.rds")
开始新的预测
tsne_output <- read_rds("datasets/fashion_mnist_500_tsne_features.rds")
library(randomForest)
model_tsne_only <- randomForest(label ~ ., cbind(fashion_mnist[,1],tsne_output))
model_tsne_only <-
read_rds("datasets/fashion_mnist_500_model_only_tsne.rds")
model_tsne_only$err.rate[,"OOB"] %>% min()
## [1] 0.3
误差比 glrm-feature-perf 好很多,但是也变差了。
tsne 特征工程变量加入
library(randomForest)
library(data.table)
start_time <- Sys.time()
tsne_output <- read_rds("datasets/fashion_mnist_500_tsne_features.rds")
model_add_tsne <- randomForest(label ~ ., data = cbind(fashion_mnist,tsne_output))
end_time <- Sys.time()
end_time - start_time
Time difference of 10.328 secs
model_add_tsne <-
read_rds("datasets/fashion_mnist_500_model_add_tsne.rds")
model_add_tsne$err.rate[,"OOB"] %>% min()
## [1] 0.218
有增益,相对还可以。