参考 Chollet and Allaire (2018, 81–87)
主要研究 Boston Housing Price 数据集,主要有两个特点
dataset <- dataset_boston_housing()
dataset %>% write_rds(here("datasets/dataset_boston_housing.rds"))
library(readr)
library(here)
## here() starts at D:/work/learn_nn
library(keras)
dataset <- read_rds(here("datasets/dataset_boston_housing.rds"))
c(c(train_data, train_targets), c(test_data, test_targets)) %<-% dataset
str(train_data)
dim(train_data)
str(test_data)
dim(test_data)
一共有13个字段。
str(train_targets)
range(train_targets)
str(test_targets)
range(test_targets)
这里单位是 \(10^3\) 美元
注意这里需要使用train组的 mean 和 sd 进行标准化。
mean <- apply(train_data,2,mean)
std <- apply(train_data,2,sd)
# apply 常见于矩阵运算
mean %>% str
train_data <- scale(train_data,center = mean,scale = std)
test_data <- scale(test_data,center = mean,scale = std)
build_model <- function() {
model <- keras_model_sequential() %>%
layer_dense(units = 64
,activation = "relu"
,input_shape = dim(train_data)[[2]]
) %>%
layer_dense(units = 64
,activation = "relu"
) %>%
layer_dense(units = 1)
model %>%
compile(
optimizer = "rmsprop"
,loss = "mse"
,metrics = c("mae")
)
}
为之后使用 CV 做准备。
K-fold validation scratch 函数。
When you’re working with little data, K-fold validation can help reliably evaluate your model.
当数据量很小时,K-fold 可以更高效的使用数据。
k <- 4
indices <- sample(1:nrow(train_data))
folds <- cut(indices, breaks = k, labels = FALSE)
num_epochs <- 100
all_scores <- c()
for (i in 1:k) {
cat("processing fold #", i, "\n")
# Good way write log
val_indices <- which(folds == i, arr.ind = TRUE)
val_data <- train_data[val_indices,]
val_targets <- train_targets[val_indices]
partial_train_data <- train_data[-val_indices,]
partial_train_targets <- train_targets[-val_indices]
model <- build_model()
model %>%
fit(
partial_train_data
,partial_train_targets
,epochs = num_epochs
,batch_size = 1
,verbose = 0
)
results <- model %>% evaluate(val_data, val_targets, verbose = 0)
all_scores <- c(all_scores, results$mean_absolute_error)
# append base way
}
all_scores %>%
write_rds(here("datasets/bhp_all_scores.rds"))
产生了报错
2019-01-22 17:13:09.289000: I T:\src\github\tensorflow\tensorflow\core\platform\cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
all_scores <- read_rds(here("datasets/bhp_all_scores.rds"))
all_scores
## [1] 2.557272 3.150928 2.372173 2.811112
all_scores %>% mean
## [1] 2.722871
误差在 2700
对于在 10000-50000 之间还是有点大, 因此尝试 500 次迭代。
# Do run, too long
num_epochs <- 500
all_mae_histories <- NULL
for (i in 1:k) {
cat("processing fold #", i, "\n")
val_indices <- which(folds == i, arr.ind = TRUE)
val_data <- train_data[val_indices,]
val_targets <- train_targets[val_indices]
partial_train_data <- train_data[-val_indices,]
partial_train_targets <- train_targets[-val_indices]
model <- build_model()
history <- model %>% fit(
partial_train_data, partial_train_targets,
validation_data = list(val_data, val_targets),
epochs = num_epochs, batch_size = 1, verbose = 0
)
mae_history <- history$metrics$val_mean_absolute_error
all_mae_histories <- rbind(all_mae_histories, mae_history)
}
average_mae_history <- data.frame(
epoch = seq(1:ncol(all_mae_histories)),
validation_mae = apply(all_mae_histories, 2, mean)
)
average_mae_history %>%
write_rds(here("datasets/bhp_average_mae_history.rds"))
average_mae_history <- read_rds(here("datasets/bhp_average_mae_history.rds"))
library(ggplot2)
ggplot(average_mae_history, aes(x = epoch, y = validation_mae)) +
geom_line() +
geom_smooth()
因此确定,大概在100次迭代前就开始过拟合了,锁定了迭代次数<100
If you don’t have much training data, use a small network with only one or two hidden layers, to avoid severe overfitting.
When little training data is available, it’s preferable to use a small network with few hidden layers (typically only one or two), in order to avoid severe overfitting.
只用一层或者两层。
Chollet, François, and J.J. Allaire. 2018. Deep Learning with R. Manning Publications.