1. 使用 RMarkdown 的 child 参数,进行文档拼接。
  2. 这样拼接以后的笔记方便复习。
  3. 相关问题提交到 Issue

Li, Gao, and Huang (2020) 创建这个项目,用于 Lu, Li, and Sheng (2020) 的实证的扩展、复用。

1 固定效应模型

自变量以CPI:居民消费价格指数为例

1.1 模型实例

  • 因变量:CPI:居民消费价格指数
  • 自变量:
  • Treasury Bonds Notes:
  • US Corp Bonds:公司债
  • TOT:贸易

1.2 加载包

library(PooledMeanGroup)
library(plm)
library(zoo)
library(lmtest)
library(magrittr)
library(pder)
library(texreg)
library(tidyverse)
library(stargazer)

1.3 导入数据

path<-"../output/190726_beta_data.csv"
pdata <- read_csv(path) %>% mutate(
    TIME = TIME %>% as.character() %>% as.factor() %>% as.integer(),
    COUNTRY = COUNTRY %>% as.character() %>% as.factor() %>% as.integer()
) %>%
    group_by(COUNTRY) %>%
    arrange(TIME) 
pdata$GDP <- as.numeric(pdata$GDP)

1.4 固定效应模型

fm <-
    diff(CPI) ~ diff(log(PTreasuryBondsNotes))  + diff(log(SUSCorpBonds)) +
    diff(TOT)
fixed <- function(fm, data) {
    re <- data_frame(mod = c("pooling", "between", "within")) %>%
        mutate(re = map(
            mod,
            ~ plm(
                fm,
                data = data.frame(data),
                model = . ,
                effect = "individual"
            )
        )) %>%
        .$re
    
}

mod1<-fixed(fm, pdata)

1.5 动态固定效应模型

fd <-
    diff(CPI) ~ diff(log(PTreasuryBondsNotes))  + diff(log(SUSCorpBonds)) +
    diff(TOT) + lag(diff(CPI))
dy_fixed <- function(fd, data) {
    fit_re <- plm(fd,
                  data = data.frame(data),
                  model = "within" ,
                  effect = "individual")
}
mod2 <- dy_fixed(fd, pdata)

1.6 组间均值模型

这里不能data不能group

dmg <- function(fm, data) {
    mg <-
        pmg(
            fm,
            data,
            trend = TRUE,
            index = c("TIME", "COUNTRY"),
            model = "mg"
        )
    dmg <- update(mg, model = "dmg")
    cmg <- update(mg, model = "cmg")
    mods <- list(mg, dmg,cmg)
}
mod3 <- dmg(fm, ungroup(pdata))

1.7 结果展示

show_stargazer <-
    function(mod1, mod2, mod3, output_path = "../output/fixed.doc") {
        stargazer(
            mod1,
            mod2,
            mod3,
            type = "text",
            title = "Regression Results",
            header = F,
            column.labels = c("pooling", "between", "within","d_within","mg","dmg","cmg"),
            covariate.labels = c("Treasury Bonds Notes", "US Corp Bonds", "Term of Trade"),
            align = TRUE,
            no.space = TRUE,
            keep.stat = "n",
            out = output_path
        )
    }
show_stargazer(mod1, mod2, mod3)
## 
## Regression Results
## ==================================================================================================
##                                                          Dependent variable:                      
##                                    ---------------------------------------------------------------
##                                                               diff(CPI)                           
##                                                    panel                            mean          
##                                                    linear                          groups         
##                                     pooling  between   within       d       mg      dmg     cmg   
##                                       (1)      (2)       (3)       (4)      (5)     (6)     (7)   
## --------------------------------------------------------------------------------------------------
## Treasury Bonds Notes               0.208***  0.612*** 0.202***  0.317***   0.241  -0.433   -1.713 
##                                     (0.039)  (0.165)   (0.042)   (0.043)  (0.180) (0.311) (2.436) 
## US Corp Bonds                      -0.277***  0.099   -0.268*** -0.374*** -0.815* -0.130   -0.172 
##                                     (0.042)  (0.224)   (0.046)   (0.046)  (0.428) (0.248) (0.621) 
## Term of Trade                      -0.004**  0.017*** -0.006*** -0.010*** 0.129**  0.004   -0.079 
##                                     (0.002)  (0.003)   (0.002)   (0.002)  (0.060) (0.012) (0.084) 
## lag(diff(CPI))                                                  -0.431***                         
##                                                                  (0.034)                          
## y.bar                                                                                     1.363***
##                                                                                           (0.371) 
## diff(log(PTreasuryBondsNotes)).bar                                                         1.369  
##                                                                                           (2.450) 
## diff(log(SUSCorpBonds)).bar                                                                       
##                                                                                                   
## diff(TOT).bar                                                                                     
##                                                                                                   
## trend                                                                     -0.082   0.065          
##                                                                           (0.169) (0.051)         
## Constant                             0.058   0.147**                       0.731  -0.278   -0.299 
##                                     (0.059)  (0.069)                      (0.808) (0.190) (0.389) 
## --------------------------------------------------------------------------------------------------
## Observations                         1,152     192      1,152      960     1,152   1,152   1,152  
## ==================================================================================================
## Note:                                                                  *p<0.1; **p<0.05; ***p<0.01

2 GARCH模型

利用GARCH模型计算风险 得到的风险进行固定效应模型

library(tidyverse)
library(rugarch)
library(magrittr)
library(caret)
library(timetk)
library(lubridate)
library(xts)
library(plm)
library(stargazer)

2.1 导入数据

df<-read_csv("../output/190726_beta_data.csv")
df<-df %>%mutate(country=COUNTRY %>% as.character() %>% as.factor() )

单变量的garch选择10年期债券收益率

因为不能使用长面板数据,因此要对国家分组 目前用到的数据里面一共7个国家

2.2 建模,可调参数很多

可调节的主要分为3个部分

  • 方差方程:variance.model
  • 均值方程:mean.model
  • 分布:distribution
vol_func <- function(data) {
  vol_total <- list()
  cou_name <- cou_name <- data$country %>% unique() %>% make.names()
  for (i in cou_name) {
    df_t <-
      df %>% select(TIME, country, tenyearbong) %>% filter(country == i) %>% mutate(index =
                                                                                      ymd(TIME)) %>% tk_xts() %>% na.omit()
    x <- df_t[, 'tenyearbong']
    spec_d = ugarchspec(
      variance.model = list(
        model = "sGARCH",
        garchOrder = c(1, 1),
        submodel = NULL,
        external.regressors = NULL,
        variance.targeting = FALSE
      ),
      
      mean.model = list(
        armaOrder = c(1, 1),
        include.mean = TRUE,
        archm = FALSE,
        archpow = 1,
        arfima = FALSE,
        external.regressors = NULL,
        archex = FALSE
      ),
      
      distribution.model = "norm"
      
    )
    vol <- ugarchfit(x, spec = spec_d) %>% sigma()
    vol_total[[i]] <- vol
  }
  voltility_total <-
    rbind(vol_total[[1]],
          vol_total[[2]],
          vol_total[[3]],
          vol_total[[4]],
          vol_total[[5]],
          vol_total[[6]],
          vol_total[[7]])
  
  voltility_total
}

2.3 平稳性检验

library(tseries)
df_fix<-df_fixed%>% data.frame()%>% select(voltility,CPI,GDP,PTreasuryBondsNotes,SUSCorpBonds,TOT)
df_fix%>% map(adf.test)
## $voltility
## 
##  Augmented Dickey-Fuller Test
## 
## data:  .x[[i]]
## Dickey-Fuller = -3.1351, Lag order = 11, p-value = 0.0991
## alternative hypothesis: stationary
## 
## 
## $CPI
## 
##  Augmented Dickey-Fuller Test
## 
## data:  .x[[i]]
## Dickey-Fuller = -2.131, Lag order = 11, p-value = 0.5228
## alternative hypothesis: stationary
## 
## 
## $GDP
## 
##  Augmented Dickey-Fuller Test
## 
## data:  .x[[i]]
## Dickey-Fuller = -2.8682, Lag order = 11, p-value = 0.2108
## alternative hypothesis: stationary
## 
## 
## $PTreasuryBondsNotes
## 
##  Augmented Dickey-Fuller Test
## 
## data:  .x[[i]]
## Dickey-Fuller = -5.3209, Lag order = 11, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
## $SUSCorpBonds
## 
##  Augmented Dickey-Fuller Test
## 
## data:  .x[[i]]
## Dickey-Fuller = -5.0799, Lag order = 11, p-value = 0.01
## alternative hypothesis: stationary
## 
## 
## $TOT
## 
##  Augmented Dickey-Fuller Test
## 
## data:  .x[[i]]
## Dickey-Fuller = -6.3536, Lag order = 11, p-value = 0.01
## alternative hypothesis: stationary

2.4 滚动风险的固定效应模型

fm <-
    voltility~ diff(log(PTreasuryBondsNotes))  + diff(log(SUSCorpBonds)) +
    diff(TOT)
fixed <- function(fm, data) {
    re <- data_frame(mod = c("pooling", "between", "within")) %>%
        mutate(re = map(
            mod,
            ~ plm(
                fm,
                data = data.frame(data),
                model = . ,
                effect = "individual"
            )
        )) %>%
        .$re
    
}

mod1<-fixed(fm, df_fixed)

2.5 动态固定效应模型

fd <-
    voltility ~ diff(log(PTreasuryBondsNotes))  + diff(log(SUSCorpBonds)) +
    diff(TOT) + lag(voltility)
dy_fixed <- function(fd, data) {
    fit_re <- plm(fd,
                  data = data.frame(data),
                  model = "within" ,
                  effect = "individual")
}
mod2 <- dy_fixed(fd, df_fixed)

2.6 组间均值模型

这里不能data不能group

dmg <- function(fm, data) {
    mg <-
        pmg(
            fm,
            data,
            trend = TRUE,
            index = c("TIME", "COUNTRY"),
            model = "mg"
        )
    dmg <- update(mg, model = "dmg")
    cmg <- update(mg, model = "cmg")
    mods <- list(mg, dmg,cmg)
}
mod3 <- dmg(fm, ungroup(df_fixed))

2.7 结果展示

show_stargazer <-
    function(mod1, mod2, mod3, output_path = "../output/garch.doc") {
        stargazer(
            mod1,
            mod2,
            mod3,
            type = "text",
            title = "Regression Results",
            header = F,
            column.labels = c("pooling", "between", "within","d_within","mg","dmg","cmg"),
            covariate.labels = c("Treasury Bonds Notes", "US Corp Bonds", "Term of Trade"),
            align = TRUE,
            no.space = TRUE,
            keep.stat = "n",
            out = output_path
        )
    }
show_stargazer(mod1, mod2, mod3)
## 
## Regression Results
## ======================================================================================================
##                                                            Dependent variable:                        
##                                    -------------------------------------------------------------------
##                                                                 voltility                             
##                                                     panel                             mean            
##                                                    linear                            groups           
##                                     pooling   between   within       d        mg        dmg      cmg  
##                                       (1)       (2)       (3)       (4)       (5)       (6)      (7)  
## ------------------------------------------------------------------------------------------------------
## Treasury Bonds Notes               -0.014***   0.020   -0.015*** -0.007*** -0.015***   0.026    0.190 
##                                     (0.002)   (0.017)   (0.002)   (0.002)   (0.004)   (0.019)  (0.176)
## US Corp Bonds                        0.001    0.068***   0.001    -0.003    -0.002     0.016    0.087 
##                                     (0.002)   (0.023)   (0.002)   (0.002)   (0.009)   (0.010)  (0.070)
## Term of Trade                      -0.0004***  0.001   -0.001*** -0.001***  0.0001   -0.002*** -0.005 
##                                     (0.0001)  (0.0004) (0.0001)  (0.0001)   (0.001)   (0.001)  (0.008)
## lag(voltility)                                                   -0.292***                            
##                                                                   (0.032)                             
## y.bar                                                                                          -1.080 
##                                                                                                (1.027)
## diff(log(PTreasuryBondsNotes)).bar                                                             -0.224 
##                                                                                                (0.198)
## diff(log(SUSCorpBonds)).bar                                                                           
##                                                                                                       
## diff(TOT).bar                                                                                         
##                                                                                                       
## trend                                                                      -0.009**    0.001          
##                                                                             (0.004)   (0.003)         
## Constant                            0.191***  0.209***                     0.236***   -0.005   0.384**
##                                     (0.003)   (0.007)                       (0.018)   (0.011)  (0.181)
## ------------------------------------------------------------------------------------------------------
## Observations                         1,152      192      1,152     1,152     1,152     1,152    1,152 
## ======================================================================================================
## Note:                                                                      *p<0.1; **p<0.05; ***p<0.01

结果不符合假设

3 滚动风险的固定效应模型

library(PooledMeanGroup)
library(tibbletime)
library(lubridate)
library(plm)
library(tseries)
library(stargazer)
library(lmtest)
library(magrittr)
library(pder)
library(texreg)
library(tidyverse)
# 处理时间窗口数据
pdata <- read_csv("../output/190726_beta_data.csv")
tenyearbong <- as.numeric(pdata$tenyearbong)
pdata<-pdata%>% group_by(TIME) %>%
    mutate(mean_r = mean(tenyearbong, na.rm = TRUE))%>% #添加一个均值
    group_by(COUNTRY) %>%
    mutate(n = row_number()) %>%
    select(
        TIME,
        COUNTRY,
        mean_r,
        tenyearbong,
        PTreasuryBondsNotes,
        SUSCorpBonds,
        TOT
    )


window <- 30
rolling_lm <-
    rollify(
        .f = function(mean_r,tenyearbong) {
            lm(mean_r ~tenyearbong )
        },
        window = window,
        unlist = FALSE
    )
beta_df <-
    pdata %>% group_by(COUNTRY) %>%
    mutate(ols = rolling_lm( mean_r,tenyearbong)) %>%
    slice(-1:-29) %>%
    mutate(beta = map(ols, broom::tidy)) %>%
    select(TIME, beta) %>%
    unnest() %>%
    filter(term == 'tenyearbong') %>%
    select(TIME, estimate, COUNTRY)

begin <- "2004-06-01"
end <- "2018-12-01"
interva <- interval(begin, end)
df_new <- pdata %>%
    group_by(COUNTRY) %>%
    select(TIME,
           COUNTRY,
           PTreasuryBondsNotes,
           SUSCorpBonds,
           TOT
    ) %>%
    filter(TIME %within% interva)

df_data <- merge.data.frame(beta_df, df_new) %>% ungroup()
df_data <-
    df_data %>%
    mutate(
        TIME = TIME %>% as.character() %>% as.factor() %>% as.integer(),
        COUNTRY = COUNTRY %>% as.character() %>% as.factor() %>% as.integer()
    ) %>%
    group_by(COUNTRY) %>%
    arrange(TIME)

3.1 固定效应模型

fm <-
    diff(estimate) ~ diff(log(PTreasuryBondsNotes))  + diff(log(SUSCorpBonds)) +
    diff(TOT)
fixed <- function(fm, data) {
    re <- data_frame(mod = c("pooling", "between", "within")) %>%
        mutate(re = map(
            mod,
            ~ plm(
                fm,
                data = data.frame(data),
                model = . ,
                effect = "individual"
            )
        )) %>%
        .$re
    
}

mod1<-fixed(fm, df_data)

3.2 动态固定效应模型

fd <-
    diff(estimate) ~ diff(log(PTreasuryBondsNotes))  + diff(log(SUSCorpBonds)) +
    diff(TOT) + lag(diff(estimate))
dy_fixed <- function(fd, data) {
    fit_re <- plm(fd,
                  data = data.frame(data),
                  model = "within" ,
                  effect = "individual")
}
mod2 <- dy_fixed(fd, df_data)

3.3 组间均值模型

这里data不能group

dmg <- function(fm, data) {
    mg <-
        pmg(
            fm,
            data,
            trend = TRUE,
            index = c("TIME", "COUNTRY"),
            model = "mg"
        )
    dmg <- update(mg, model = "dmg")
    mods <- list(mg, dmg)
}
mod3 <- dmg(fm, ungroup(df_data))

3.4 结果展示

show_stargazer <-
    function(mod1, mod2, mod3, output_path = "../output/beta_fixed.doc") {
        stargazer(
            mod1,
            mod2,
            mod3,
            type = "text",
            title = "Regression Results",
            header = F,
            column.labels = c("pooling", "between", "within"),
            covariate.labels = c("Treasury Bonds Notes", "US Corp Bonds", "Term of Trade"),
            align = TRUE,
            no.space = TRUE,
            keep.stat = "n",
            out = output_path
        )
    }
show_stargazer(mod1, mod2, mod3)
## 
## Regression Results
## ===============================================================================
##                                         Dependent variable:                    
##                      ----------------------------------------------------------
##                                            diff(estimate)                      
##                                      panel                         mean        
##                                      linear                       groups       
##                       pooling  between   within                                
##                         (1)      (2)       (3)       (4)       (5)       (6)   
## -------------------------------------------------------------------------------
## Treasury Bonds Notes 0.137***  0.197*** 0.137***   0.044**  0.157***   0.449*  
##                       (0.015)  (0.044)   (0.016)   (0.021)   (0.041)   (0.257) 
## US Corp Bonds        -0.101***  0.058   -0.101***  -0.018    -0.106    -0.033  
##                       (0.017)  (0.070)   (0.019)   (0.023)   (0.070)   (0.127) 
## Term of Trade         -0.001   0.002**   -0.001    -0.001    -0.013   -0.018***
##                       (0.001)  (0.001)   (0.001)   (0.001)   (0.008)   (0.005) 
## lag(diff(estimate))                               -0.324***                    
##                                                    (0.045)                     
## trend                                                       -0.112***  -0.035  
##                                                              (0.027)   (0.022) 
## Constant               0.010   0.048**                       0.281**    0.123  
##                       (0.022)  (0.020)                       (0.119)   (0.083) 
## -------------------------------------------------------------------------------
## Observations            978      163       978       815       978       978   
## ===============================================================================
## Note:                                               *p<0.1; **p<0.05; ***p<0.01

4 PMG模型

library(PooledMeanGroup)
library(plm)
library(tseries)
library(stargazer)
library(lmtest)
library(magrittr)
library(pder)
library(texreg)
library(tidyverse)

pdata <- read_csv("190726_beta_data.csv")
pdata$GDP <- as.numeric(pdata$GDP)  
names(pdata)

pdata <-
  pdata %>%
  mutate(
    TIME = TIME %>% as.character() %>% as.factor() %>% as.integer(),
    COUNTRY = COUNTRY %>% as.character() %>% as.factor() %>% as.integer()
  ) %>%
  group_by(COUNTRY)%>%
  arrange(TIME)%>%
  na.omit()

anyNA(pdata)

cpi <- data.frame(cpi = pdata[, 4], row.names = row.names(pdata))
gdp <- data.frame(cpi = pdata[, 5], row.names = row.names(pdata))
PTreasuryBondsNotes <-
  data.frame(PTreasuryBondsNotes = pdata[, 6],
             row.names = row.names(pdata))
SUSCorpBonds <-
  data.frame(SUSCorpBonds = pdata[, 7], row.names = row.names(pdata))
TOT <-
  data.frame(TOT = pdata[, 9], row.names = row.names(pdata))

pdata %>%
  summarise(n_ctr = n_distinct(COUNTRY),
            n_yr = n_distinct(TIME),
            n = n()) %>%
  mutate(n_mul = n_ctr * n_yr)


dcpi <- DiffPanel(variable = pdata[, 4], quantity = rep(192, 7))
lcpi=LagPanel(variable=cpi, quantity=rep(192,7))
dgdp <- DiffPanel(variable = pdata[, 5], quantity = rep(192, 7))
dPTreasuryBondsNotes <-
  DiffPanel(variable = pdata[, 6], quantity =  rep(192, 7))
dSUSCorpBonds <-
  DiffPanel(variable = pdata[, 7], quantity =   rep(192, 7))
dTOT <-
  DiffPanel(variable = pdata[, 9], quantity =   rep(192, 7))

dataPanel <- cbind(
  cpi,
  PTreasuryBondsNotes,
  SUSCorpBonds,
  TOT,
  dcpi,
  dPTreasuryBondsNotes,
  dSUSCorpBonds,
  dTOT,
  lcpi
)
dataPanel <- data.frame(dataPanel)
names(dataPanel) = c(
  "cpi",
  "PTreasuryBondsNotes",
  "SUSCorpBonds",
  "TOT",
  "dcpi",
  "dPTreasuryBondsNotes",
  "dSUSCorpBonds",
  "dTOT",
  "lcpi"
)


dataPanel <- PanelNaOmit(dataset = dataPanel, quantity = rep(192, 7))
dataPanel$quantity
dataPanel

#debug(optimPMG)
OptimPmgExp = optimPMG(
  dLL = 10 ^ -10,
  maxIter = 200,
  TetaStart = rep(x = 1, times = 3),
  vecSR = list(
    SR1 = c(
      "dcpi",
      "dPTreasuryBondsNotes",
      "dSUSCorpBonds",
      "dTOT"
    ),
    
    SR2 = c(
      "dcpi",
      "dPTreasuryBondsNotes",
      "dSUSCorpBonds",
      "dTOT"
    ),
    SR3 = c(
      "dcpi",
      "dPTreasuryBondsNotes",
      "dSUSCorpBonds",
      "dTOT"
    ),
    SR4 = c(
      "dcpi",
      "dPTreasuryBondsNotes",
      "dSUSCorpBonds",
      "dTOT"
    ),
    SR5 = c(
      "dcpi",
      "dPTreasuryBondsNotes",
      "dSUSCorpBonds",
      "dTOT"
    ),
    SR6 = c(
      "dcpi",
      "dPTreasuryBondsNotes",
      "dSUSCorpBonds",
      "dTOT"
    ),
    SR7 = c(
      "dcpi",
      "dPTreasuryBondsNotes",
      "dSUSCorpBonds",
      "dTOT"
    )
  ),
  vecLR = c(
    "lcpi",
    "PTreasuryBondsNotes",
    "SUSCorpBonds",
    "TOT"
  ),
  dataset = dataPanel$dataset,
  quantity = dataPanel$quantity,
  const = TRUE
)



OptimPmgExp$LR




PmgExp = PMG(
  paramTeta = c(2.689219, -32.845854,-237.696453),
  vecSR = list(
    SR1 = c(
      "dcpi",
      "dPTreasuryBondsNotes",
      "dSUSCorpBonds",
      "dTOT"
    ),
    
    SR2 = c(
      "dcpi",
      "dPTreasuryBondsNotes",
      "dSUSCorpBonds",
      "dTOT"
    ),
    SR3 = c(
      "dcpi",
      "dPTreasuryBondsNotes",
      "dSUSCorpBonds",
      "dTOT"
    ),
    SR4 = c(
      "dcpi",
      "dPTreasuryBondsNotes",
      "dSUSCorpBonds",
      "dTOT"
    ),
    SR5 = c(
      "dcpi",
      "dPTreasuryBondsNotes",
      "dSUSCorpBonds",
      "dTOT"
    ),
    SR6 = c(
      "dcpi",
      "dPTreasuryBondsNotes",
      "dSUSCorpBonds",
      "dTOT"
    ),
    SR7 = c(
      "dcpi",
      "dPTreasuryBondsNotes",
      "dSUSCorpBonds",
      "dTOT"
    )
  ),
  vecLR = c(
    "lcpi",
    "PTreasuryBondsNotes",
    "SUSCorpBonds",
    "TOT"
  ), 
  dataset = dataPanel$dataset,
  quantity = dataPanel$quantity,
  const = TRUE
)


str(PmgExp$SR)
PmgExp$LR


PmgExp$SR <- 
  map(1:7, ~ PmgExp$SR %>% .[[.x]] %>% `names<-`(c("Coef", "StdErr", "z", "P>|z|", "Low95%", "High95%")) %>% rownames_to_column) %>% 
  bind_rows() %>% 
  group_by(rowname) %>% 
  summarise(
    Coef = mean(Coef, na.rm = TRUE),
    StdErr = sqrt(sum(StdErr^2)),
    na_Coef = sum(is.na(Coef)),
    na_StdErr = sum(is.na(StdErr))
  ) %>% 
  filter(!rowname %>% str_detect("^ec\\d"))

x<-data.frame(PmgExp$SR)
x
t_value<-x[,2]/x[,3]
p_value<-2*pt(-abs(t_value),df=191)
SR_SUM<-mutate(x,t_value,p_value)
SR_SUM
stargazer(SR_SUM,
          type = "html",
          title = "CPI PMG Results",
          header = F,
          align=TRUE,
          no.space=TRUE,
          summary=FALSE,
          out = "190806_cpi_pmg.doc"
)

5 XGBoost寻找交叉项

knitr::opts_chunk$set(message = FALSE, warning = FALSE)
path<-"../output/190726_beta_data.csv"
pdata <- read_csv(path)
df<-pdata %>% select(CPI,PTreasuryBondsNotes:LOW)
df %>% head()
dtrain <- dtrain <- 
    xgb.DMatrix(
        data = as.matrix(
            df %>% select(-CPI)
        )
        ,label = df$CPI
    )
xgb_model <- xgb.train(
    data=dtrain,
    eta = 0.1,
    max_depth = 15,
    nround=16,
    subsample = 0.5,
    colsample_bytree = 0.5,
    seed = 1,
    eval.metric = "auc",
    # eval.metric = "error",
    # eval.metric = "logloss",
    # num_class = 12, # set the number of classes. To use only with multiclass objectives.
    nfold = 100,
    nthread = 3
)
xgb_tree <- 
    xgboost::xgb.model.dt.tree(model = xgb_model) %>% 
    rename_all(str_to_lower)
head(xgb_tree)
  1. xgb_tree反应了模型每个数据的具体路径。我们将要通过这个表进行交叉项的输出。

  2. Quality: either the split gain (change in loss) or the leaf value

说明某个节点,Quality 衡量了变量的价值。 Quality作为衡量交叉项的评价标准。

那么如何找出有效的交叉项目?

mod_interactions <- interactions(xgb_model, df %>% select(xgb_model$feature_names) %>% as.matrix())
mod_interactions %>% write_rds("../output/mod_interactions.rds")
mod_interactions <- read_rds("../output/mod_interactions.rds")
# 执行时间慢,因此建立中间变量。
mod_interactions %>% head
##                    Parent                     Child  sumGain frequency
## 1:                 trade2    Foreignexchangereserve 191.4146         2
## 2:             pureexport Tradesurplusconsolidation 177.7712         4
## 3: Foreignexchangereserve            servicesexport 166.6120         4
## 4:             pureexport             Exportproduct 137.0093         2
## 5:         servicesexport             Importproduct 108.7695         4
## 6:          serviceimport             Importproduct 105.4075         1
# 不存在这个函数?
#interactions2pairs(mod_interactions) %>% head
stargazer::stargazer(fit1, fit2,type = "text", p.auto = TRUE)
## 
## ================================================================================
##                                            Dependent variable:                  
##                           ------------------------------------------------------
##                                                    CPI                          
##                                      (1)                        (2)             
## --------------------------------------------------------------------------------
## lag(CPI)                                                      0.945***          
##                                                               (0.008)           
##                                                                                 
## PTreasuryBondsNotes              0.00000***                   0.00000           
##                                   (0.00000)                  (0.00000)          
##                                                                                 
## SUSCorpBonds                      -0.00001                    -0.00000          
##                                   (0.00001)                  (0.00000)          
##                                                                                 
## Importproduct                     0.0004**                    -0.0001           
##                                   (0.0002)                    (0.0001)          
##                                                                                 
## TOT                                -0.002                     0.001***          
##                                    (0.002)                    (0.0005)          
##                                                                                 
## Exportproduct                     -0.0004**                    0.0001           
##                                   (0.0002)                    (0.0001)          
##                                                                                 
## tenyearbong                       0.361***                     0.018*           
##                                    (0.030)                    (0.010)           
##                                                                                 
## pureexport                        0.0004**                    -0.0001           
##                                   (0.0002)                    (0.0001)          
##                                                                                 
## servicesexport                    10.777***                    0.200            
##                                    (2.676)                    (0.821)           
##                                                                                 
## serviceimport                    -10.777***                    -0.200           
##                                    (2.676)                    (0.821)           
##                                                                                 
## pureserviceexport                -10.777***                    -0.200           
##                                    (2.676)                    (0.821)           
##                                                                                 
## exchangerate                      0.002***                   0.0002***          
##                                   (0.0001)                   (0.00005)          
##                                                                                 
## Tradesurplusconsolidation         0.00001**                   0.00000           
##                                   (0.00000)                  (0.00000)          
##                                                                                 
## trade1                            -0.057***                   -0.005**          
##                                    (0.008)                    (0.002)           
##                                                                                 
## trade2                            0.019***                    0.002**           
##                                    (0.002)                    (0.001)           
##                                                                                 
## Foreignexchangereserve            0.000***                    0.000**           
##                                    (0.000)                    (0.000)           
##                                                                                 
## Totaltrade                       -0.00002***                -0.00000***         
##                                   (0.00000)                  (0.00000)          
##                                                                                 
## Closingquotation                  2.194***                    0.253**           
##                                    (0.399)                    (0.123)           
##                                                                                 
## `Opening quotation`               2.351***                     -0.092           
##                                    (0.341)                    (0.106)           
##                                                                                 
## HIGH                              -3.311***                   -0.307**          
##                                    (0.396)                    (0.124)           
##                                                                                 
## LOW                               -1.052***                    0.185*           
##                                    (0.366)                    (0.112)           
##                                                                                 
## Constant                          -1.239***                   -0.218**          
##                                    (0.291)                    (0.089)           
##                                                                                 
## --------------------------------------------------------------------------------
## Observations                        1,344                      1,343            
## R2                                  0.448                      0.949            
## Adjusted R2                         0.440                      0.948            
## Residual Std. Error           1.293 (df = 1323)          0.394 (df = 1321)      
## F Statistic               53.660*** (df = 20; 1323) 1,163.646*** (df = 21; 1321)
## ================================================================================
## Note:                                                *p<0.1; **p<0.05; ***p<0.01
broom::tidy(fit1) %>% 
    arrange(p.value)

附录

参考文献

Li, Jiaxiang, Wenxin Gao, and Xuliang Huang. 2020. “USD Data Analysis.” GitHub. 2020. https://github.com/JiaxiangBU/usd-data-analysisEX.

Lu, Changrong, Jiaxiang Li, and Yi Sheng. 2020. “A Study on the Negative Externality of Usd Liquidity - Based on the Asset Allocation Efficiency of Us Treasury Securities.” The Singapore Economic Review (SSCI).