knitr::opts_chunk$set(warning = FALSE, message = FALSE)

数据下载

  1. datasets/fashion_mnist_500_naive_rf_model.rds
  2. datasets/fashion_mnist_500_tsne_features.rds
  3. datasets/fashion_mnist_500_model_only_tsne.rds
  4. datasets/fashion_mnist_500_tsne_features.rds
  5. datasets/fashion_mnist_500_model_add_tsne.rds
library(tidyverse)
load("datasets/fashion_mnist_500.RData")
fashion_mnist %>% dim
## [1] 500 785
fashion_mnist %>% names %>% head
## [1] "label"  "pixel1" "pixel2" "pixel3" "pixel4" "pixel5"
library(randomForest)
library(data.table)
start_time <- Sys.time()
naive_model <- randomForest(label ~ ., data = fashion_mnist)
end_time <- Sys.time()
end_time - start_time
Time difference of 10.496 secs
naive_model <- 
    read_rds("datasets/fashion_mnist_500_naive_rf_model.rds")
naive_model$err.rate[,"OOB"] %>% min()
## [1] 0.226

下面开始tsne

library(Rtsne)
# Generate a three-dimensional t-SNE embedding without PCA
tsne_output <- Rtsne(fashion_mnist[, -1], PCA = F, dims = 2)
tsne_output$Y %>% 
    as_tibble() %>% 
    write_rds("datasets/fashion_mnist_500_tsne_features.rds")

开始新的预测

tsne_output <- read_rds("datasets/fashion_mnist_500_tsne_features.rds")
library(randomForest)
model_tsne_only <- randomForest(label ~ ., cbind(fashion_mnist[,1],tsne_output))
model_tsne_only <- 
    read_rds("datasets/fashion_mnist_500_model_only_tsne.rds")
model_tsne_only$err.rate[,"OOB"] %>% min()
## [1] 0.3

误差比 glrm-feature-perf 好很多,但是也变差了。

tsne 特征工程变量加入

library(randomForest)
library(data.table)
start_time <- Sys.time()
tsne_output <- read_rds("datasets/fashion_mnist_500_tsne_features.rds")
model_add_tsne <- randomForest(label ~ ., data = cbind(fashion_mnist,tsne_output))
end_time <- Sys.time()
end_time - start_time
Time difference of 10.328 secs
model_add_tsne <- 
    read_rds("datasets/fashion_mnist_500_model_add_tsne.rds")
model_add_tsne$err.rate[,"OOB"] %>% min()
## [1] 0.218

有增益,相对还可以。