library(glmnet)
## Warning: package 'glmnet' was built under R version 3.5.3
## Loading required package: Matrix
## Loading required package: foreach
## Warning: package 'foreach' was built under R version 3.5.3
## Loaded glmnet 2.0-16
x1 <- c(1, 2, 3, 4, 5, 6)
x2 <- c(3, 4, 5, 6, 7, 8)
y <- c(1, 0, 0, 1, 0, 1)
x <- cbind(x1, x2)
fit <- glmnet(x, y, alpha = 1)
if alpha = 1, fit a lasso model (James et al. 2013, 256)
str(fit)
## List of 12
## $ a0 : Named num [1:55] 0.5 0.491 0.483 0.476 0.469 ...
## ..- attr(*, "names")= chr [1:55] "s0" "s1" "s2" "s3" ...
## $ beta :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
## .. ..@ i : int [1:54] 0 0 0 0 0 0 0 0 0 0 ...
## .. ..@ p : int [1:56] 0 0 1 2 3 4 5 6 7 8 ...
## .. ..@ Dim : int [1:2] 2 55
## .. ..@ Dimnames:List of 2
## .. .. ..$ : chr [1:2] "x1" "x2"
## .. .. ..$ : chr [1:55] "s0" "s1" "s2" "s3" ...
## .. ..@ x : num [1:54] 0.00254 0.00485 0.00696 0.00888 0.01063 ...
## .. ..@ factors : list()
## $ df : int [1:55] 0 1 1 1 1 1 1 1 1 1 ...
## $ dim : int [1:2] 2 55
## $ lambda : num [1:55] 0.0488 0.0445 0.0405 0.0369 0.0336 ...
## $ dev.ratio: num [1:55] 0 0.00162 0.00296 0.00407 0.005 ...
## $ nulldev : num 1.5
## $ npasses : int 145
## $ jerr : int 0
## $ offset : logi FALSE
## $ call : language glmnet(x = x, y = y, alpha = 1)
## $ nobs : int 6
## - attr(*, "class")= chr [1:2] "elnet" "glmnet"
这里没有提供 p 值的对象。
参考 (https://stats.stackexchange.com/users/148386/majeed-simaan) (n.d.) 的做法是
lm
,这个自己可以尝试下我们知道 OLS 回归中,\(\hat beta\) 是估计的均值,se 是 \(\hat beta\) 的标准差,比值是 t 值,对应地,计算出 p 值,因此我们只要模拟出 \(\hat beta\) 的分布即可。
library(boot)
library(glmnet)
library(tidyverse)
x1 <- rnorm(100)
x2 <- rnorm(100)
y <- runif(100)
x <- cbind(x1, x2)
get_beta <- function() {
train_index <- sample(1:length(y), 80 , replace = FALSE)
x_train <- x[train_index,]
x_test <- x[-train_index,]
y_train <- y[train_index]
lasso_mod <-
glmnet(x_train, y_train, alpha = 1, lambda = 0) # Fit lasso model on training data
beta_result <-
coef(lasso_mod) %>% t() %>% as.matrix() %>% as.data.frame()
return(beta_result)
}
beta_list <- list()
for (i in 1:30) {
# beta_matrix <- append(beta_list,possibly(get_beta, NA)())
beta_list[[i]] <- get_beta()
}
beta_df <-
beta_list %>%
bind_rows()
beta_df %>% head
## (Intercept) x1 x2
## 1 0.5199715 0.04355759 -0.011852844
## 2 0.5541208 0.07449146 -0.031424320
## 3 0.5331912 0.11408834 -0.025574453
## 4 0.5128562 0.07480453 0.018870909
## 5 0.5424976 0.06168805 0.002116037
## 6 0.5498111 0.10840449 -0.024797055
beta_df %>%
gather() %>%
group_by(key) %>%
summarise(
beta = mean(value),
sd = sd(value)
) %>%
mutate(
t = beta/(sd/sqrt(100)),
pvalue = 2*pt(-abs(t),df=100-1)
# https://www.cyclismo.org/tutorial/R/pValues.html
)
## # A tibble: 3 x 5
## key beta sd t pvalue
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.538 0.0145 371. 1.84e-157
## 2 x1 0.0670 0.0194 34.6 4.18e- 57
## 3 x2 -0.0126 0.0147 -8.57 1.40e- 13
(https://stats.stackexchange.com/users/148386/majeed-simaan). n.d. “LASSO Regression - P-Values and Coefficients.” Cross Validated. https://stats.stackexchange.com/q/410196.
James, Gareth, Daniela Witten, Trevor Hastie, and Robert Tibshirani. 2013. An Introduction to Statistical Learning with Applications in R. 8th ed. Springer New York.