USD Data Analysis 实证笔记
2020-04-19
- 使用 RMarkdown 的
child
参数,进行文档拼接。 - 这样拼接以后的笔记方便复习。
- 相关问题提交到 Issue
Li, Gao, and Huang (2020) 创建这个项目,用于 Lu, Li, and Sheng (2020) 的实证的扩展、复用。
1 固定效应模型
自变量以CPI:居民消费价格指数为例
1.1 模型实例
- 因变量:CPI:居民消费价格指数
- 自变量:
- Treasury Bonds Notes:
- US Corp Bonds:公司债
- TOT:贸易
1.2 加载包
1.3 导入数据
1.4 固定效应模型
fm <-
diff(CPI) ~ diff(log(PTreasuryBondsNotes)) + diff(log(SUSCorpBonds)) +
diff(TOT)
fixed <- function(fm, data) {
re <- data_frame(mod = c("pooling", "between", "within")) %>%
mutate(re = map(
mod,
~ plm(
fm,
data = data.frame(data),
model = . ,
effect = "individual"
)
)) %>%
.$re
}
mod1<-fixed(fm, pdata)
1.5 动态固定效应模型
1.6 组间均值模型
这里不能data不能group
1.7 结果展示
show_stargazer <-
function(mod1, mod2, mod3, output_path = "../output/fixed.doc") {
stargazer(
mod1,
mod2,
mod3,
type = "text",
title = "Regression Results",
header = F,
column.labels = c("pooling", "between", "within","d_within","mg","dmg","cmg"),
covariate.labels = c("Treasury Bonds Notes", "US Corp Bonds", "Term of Trade"),
align = TRUE,
no.space = TRUE,
keep.stat = "n",
out = output_path
)
}
show_stargazer(mod1, mod2, mod3)
##
## Regression Results
## ==================================================================================================
## Dependent variable:
## ---------------------------------------------------------------
## diff(CPI)
## panel mean
## linear groups
## pooling between within d mg dmg cmg
## (1) (2) (3) (4) (5) (6) (7)
## --------------------------------------------------------------------------------------------------
## Treasury Bonds Notes 0.208*** 0.612*** 0.202*** 0.317*** 0.241 -0.433 -1.713
## (0.039) (0.165) (0.042) (0.043) (0.180) (0.311) (2.436)
## US Corp Bonds -0.277*** 0.099 -0.268*** -0.374*** -0.815* -0.130 -0.172
## (0.042) (0.224) (0.046) (0.046) (0.428) (0.248) (0.621)
## Term of Trade -0.004** 0.017*** -0.006*** -0.010*** 0.129** 0.004 -0.079
## (0.002) (0.003) (0.002) (0.002) (0.060) (0.012) (0.084)
## lag(diff(CPI)) -0.431***
## (0.034)
## y.bar 1.363***
## (0.371)
## diff(log(PTreasuryBondsNotes)).bar 1.369
## (2.450)
## diff(log(SUSCorpBonds)).bar
##
## diff(TOT).bar
##
## trend -0.082 0.065
## (0.169) (0.051)
## Constant 0.058 0.147** 0.731 -0.278 -0.299
## (0.059) (0.069) (0.808) (0.190) (0.389)
## --------------------------------------------------------------------------------------------------
## Observations 1,152 192 1,152 960 1,152 1,152 1,152
## ==================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
2 GARCH模型
利用GARCH模型计算风险 得到的风险进行固定效应模型
library(tidyverse)
library(rugarch)
library(magrittr)
library(caret)
library(timetk)
library(lubridate)
library(xts)
library(plm)
library(stargazer)
2.1 导入数据
df<-read_csv("../output/190726_beta_data.csv")
df<-df %>%mutate(country=COUNTRY %>% as.character() %>% as.factor() )
单变量的garch选择10年期债券收益率
因为不能使用长面板数据,因此要对国家分组 目前用到的数据里面一共7个国家
2.2 建模,可调参数很多
可调节的主要分为3个部分
- 方差方程:variance.model
- 均值方程:mean.model
- 分布:distribution
vol_func <- function(data) {
vol_total <- list()
cou_name <- cou_name <- data$country %>% unique() %>% make.names()
for (i in cou_name) {
df_t <-
df %>% select(TIME, country, tenyearbong) %>% filter(country == i) %>% mutate(index =
ymd(TIME)) %>% tk_xts() %>% na.omit()
x <- df_t[, 'tenyearbong']
spec_d = ugarchspec(
variance.model = list(
model = "sGARCH",
garchOrder = c(1, 1),
submodel = NULL,
external.regressors = NULL,
variance.targeting = FALSE
),
mean.model = list(
armaOrder = c(1, 1),
include.mean = TRUE,
archm = FALSE,
archpow = 1,
arfima = FALSE,
external.regressors = NULL,
archex = FALSE
),
distribution.model = "norm"
)
vol <- ugarchfit(x, spec = spec_d) %>% sigma()
vol_total[[i]] <- vol
}
voltility_total <-
rbind(vol_total[[1]],
vol_total[[2]],
vol_total[[3]],
vol_total[[4]],
vol_total[[5]],
vol_total[[6]],
vol_total[[7]])
voltility_total
}
2.3 平稳性检验
library(tseries)
df_fix<-df_fixed%>% data.frame()%>% select(voltility,CPI,GDP,PTreasuryBondsNotes,SUSCorpBonds,TOT)
df_fix%>% map(adf.test)
## $voltility
##
## Augmented Dickey-Fuller Test
##
## data: .x[[i]]
## Dickey-Fuller = -3.1351, Lag order = 11, p-value = 0.0991
## alternative hypothesis: stationary
##
##
## $CPI
##
## Augmented Dickey-Fuller Test
##
## data: .x[[i]]
## Dickey-Fuller = -2.131, Lag order = 11, p-value = 0.5228
## alternative hypothesis: stationary
##
##
## $GDP
##
## Augmented Dickey-Fuller Test
##
## data: .x[[i]]
## Dickey-Fuller = -2.8682, Lag order = 11, p-value = 0.2108
## alternative hypothesis: stationary
##
##
## $PTreasuryBondsNotes
##
## Augmented Dickey-Fuller Test
##
## data: .x[[i]]
## Dickey-Fuller = -5.3209, Lag order = 11, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $SUSCorpBonds
##
## Augmented Dickey-Fuller Test
##
## data: .x[[i]]
## Dickey-Fuller = -5.0799, Lag order = 11, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $TOT
##
## Augmented Dickey-Fuller Test
##
## data: .x[[i]]
## Dickey-Fuller = -6.3536, Lag order = 11, p-value = 0.01
## alternative hypothesis: stationary
2.4 滚动风险的固定效应模型
fm <-
voltility~ diff(log(PTreasuryBondsNotes)) + diff(log(SUSCorpBonds)) +
diff(TOT)
fixed <- function(fm, data) {
re <- data_frame(mod = c("pooling", "between", "within")) %>%
mutate(re = map(
mod,
~ plm(
fm,
data = data.frame(data),
model = . ,
effect = "individual"
)
)) %>%
.$re
}
mod1<-fixed(fm, df_fixed)
2.5 动态固定效应模型
2.6 组间均值模型
这里不能data不能group
2.7 结果展示
show_stargazer <-
function(mod1, mod2, mod3, output_path = "../output/garch.doc") {
stargazer(
mod1,
mod2,
mod3,
type = "text",
title = "Regression Results",
header = F,
column.labels = c("pooling", "between", "within","d_within","mg","dmg","cmg"),
covariate.labels = c("Treasury Bonds Notes", "US Corp Bonds", "Term of Trade"),
align = TRUE,
no.space = TRUE,
keep.stat = "n",
out = output_path
)
}
show_stargazer(mod1, mod2, mod3)
##
## Regression Results
## ======================================================================================================
## Dependent variable:
## -------------------------------------------------------------------
## voltility
## panel mean
## linear groups
## pooling between within d mg dmg cmg
## (1) (2) (3) (4) (5) (6) (7)
## ------------------------------------------------------------------------------------------------------
## Treasury Bonds Notes -0.014*** 0.020 -0.015*** -0.007*** -0.015*** 0.026 0.190
## (0.002) (0.017) (0.002) (0.002) (0.004) (0.019) (0.176)
## US Corp Bonds 0.001 0.068*** 0.001 -0.003 -0.002 0.016 0.087
## (0.002) (0.023) (0.002) (0.002) (0.009) (0.010) (0.070)
## Term of Trade -0.0004*** 0.001 -0.001*** -0.001*** 0.0001 -0.002*** -0.005
## (0.0001) (0.0004) (0.0001) (0.0001) (0.001) (0.001) (0.008)
## lag(voltility) -0.292***
## (0.032)
## y.bar -1.080
## (1.027)
## diff(log(PTreasuryBondsNotes)).bar -0.224
## (0.198)
## diff(log(SUSCorpBonds)).bar
##
## diff(TOT).bar
##
## trend -0.009** 0.001
## (0.004) (0.003)
## Constant 0.191*** 0.209*** 0.236*** -0.005 0.384**
## (0.003) (0.007) (0.018) (0.011) (0.181)
## ------------------------------------------------------------------------------------------------------
## Observations 1,152 192 1,152 1,152 1,152 1,152 1,152
## ======================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
结果不符合假设
3 滚动风险的固定效应模型
library(PooledMeanGroup)
library(tibbletime)
library(lubridate)
library(plm)
library(tseries)
library(stargazer)
library(lmtest)
library(magrittr)
library(pder)
library(texreg)
library(tidyverse)
# 处理时间窗口数据
pdata <- read_csv("../output/190726_beta_data.csv")
tenyearbong <- as.numeric(pdata$tenyearbong)
pdata<-pdata%>% group_by(TIME) %>%
mutate(mean_r = mean(tenyearbong, na.rm = TRUE))%>% #添加一个均值
group_by(COUNTRY) %>%
mutate(n = row_number()) %>%
select(
TIME,
COUNTRY,
mean_r,
tenyearbong,
PTreasuryBondsNotes,
SUSCorpBonds,
TOT
)
window <- 30
rolling_lm <-
rollify(
.f = function(mean_r,tenyearbong) {
lm(mean_r ~tenyearbong )
},
window = window,
unlist = FALSE
)
beta_df <-
pdata %>% group_by(COUNTRY) %>%
mutate(ols = rolling_lm( mean_r,tenyearbong)) %>%
slice(-1:-29) %>%
mutate(beta = map(ols, broom::tidy)) %>%
select(TIME, beta) %>%
unnest() %>%
filter(term == 'tenyearbong') %>%
select(TIME, estimate, COUNTRY)
begin <- "2004-06-01"
end <- "2018-12-01"
interva <- interval(begin, end)
df_new <- pdata %>%
group_by(COUNTRY) %>%
select(TIME,
COUNTRY,
PTreasuryBondsNotes,
SUSCorpBonds,
TOT
) %>%
filter(TIME %within% interva)
df_data <- merge.data.frame(beta_df, df_new) %>% ungroup()
df_data <-
df_data %>%
mutate(
TIME = TIME %>% as.character() %>% as.factor() %>% as.integer(),
COUNTRY = COUNTRY %>% as.character() %>% as.factor() %>% as.integer()
) %>%
group_by(COUNTRY) %>%
arrange(TIME)
3.1 固定效应模型
fm <-
diff(estimate) ~ diff(log(PTreasuryBondsNotes)) + diff(log(SUSCorpBonds)) +
diff(TOT)
fixed <- function(fm, data) {
re <- data_frame(mod = c("pooling", "between", "within")) %>%
mutate(re = map(
mod,
~ plm(
fm,
data = data.frame(data),
model = . ,
effect = "individual"
)
)) %>%
.$re
}
mod1<-fixed(fm, df_data)
3.2 动态固定效应模型
3.3 组间均值模型
这里data不能group
3.4 结果展示
show_stargazer <-
function(mod1, mod2, mod3, output_path = "../output/beta_fixed.doc") {
stargazer(
mod1,
mod2,
mod3,
type = "text",
title = "Regression Results",
header = F,
column.labels = c("pooling", "between", "within"),
covariate.labels = c("Treasury Bonds Notes", "US Corp Bonds", "Term of Trade"),
align = TRUE,
no.space = TRUE,
keep.stat = "n",
out = output_path
)
}
show_stargazer(mod1, mod2, mod3)
##
## Regression Results
## ===============================================================================
## Dependent variable:
## ----------------------------------------------------------
## diff(estimate)
## panel mean
## linear groups
## pooling between within
## (1) (2) (3) (4) (5) (6)
## -------------------------------------------------------------------------------
## Treasury Bonds Notes 0.137*** 0.197*** 0.137*** 0.044** 0.157*** 0.449*
## (0.015) (0.044) (0.016) (0.021) (0.041) (0.257)
## US Corp Bonds -0.101*** 0.058 -0.101*** -0.018 -0.106 -0.033
## (0.017) (0.070) (0.019) (0.023) (0.070) (0.127)
## Term of Trade -0.001 0.002** -0.001 -0.001 -0.013 -0.018***
## (0.001) (0.001) (0.001) (0.001) (0.008) (0.005)
## lag(diff(estimate)) -0.324***
## (0.045)
## trend -0.112*** -0.035
## (0.027) (0.022)
## Constant 0.010 0.048** 0.281** 0.123
## (0.022) (0.020) (0.119) (0.083)
## -------------------------------------------------------------------------------
## Observations 978 163 978 815 978 978
## ===============================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
4 PMG模型
library(PooledMeanGroup)
library(plm)
library(tseries)
library(stargazer)
library(lmtest)
library(magrittr)
library(pder)
library(texreg)
library(tidyverse)
pdata <- read_csv("190726_beta_data.csv")
pdata$GDP <- as.numeric(pdata$GDP)
names(pdata)
pdata <-
pdata %>%
mutate(
TIME = TIME %>% as.character() %>% as.factor() %>% as.integer(),
COUNTRY = COUNTRY %>% as.character() %>% as.factor() %>% as.integer()
) %>%
group_by(COUNTRY)%>%
arrange(TIME)%>%
na.omit()
anyNA(pdata)
cpi <- data.frame(cpi = pdata[, 4], row.names = row.names(pdata))
gdp <- data.frame(cpi = pdata[, 5], row.names = row.names(pdata))
PTreasuryBondsNotes <-
data.frame(PTreasuryBondsNotes = pdata[, 6],
row.names = row.names(pdata))
SUSCorpBonds <-
data.frame(SUSCorpBonds = pdata[, 7], row.names = row.names(pdata))
TOT <-
data.frame(TOT = pdata[, 9], row.names = row.names(pdata))
pdata %>%
summarise(n_ctr = n_distinct(COUNTRY),
n_yr = n_distinct(TIME),
n = n()) %>%
mutate(n_mul = n_ctr * n_yr)
dcpi <- DiffPanel(variable = pdata[, 4], quantity = rep(192, 7))
lcpi=LagPanel(variable=cpi, quantity=rep(192,7))
dgdp <- DiffPanel(variable = pdata[, 5], quantity = rep(192, 7))
dPTreasuryBondsNotes <-
DiffPanel(variable = pdata[, 6], quantity = rep(192, 7))
dSUSCorpBonds <-
DiffPanel(variable = pdata[, 7], quantity = rep(192, 7))
dTOT <-
DiffPanel(variable = pdata[, 9], quantity = rep(192, 7))
dataPanel <- cbind(
cpi,
PTreasuryBondsNotes,
SUSCorpBonds,
TOT,
dcpi,
dPTreasuryBondsNotes,
dSUSCorpBonds,
dTOT,
lcpi
)
dataPanel <- data.frame(dataPanel)
names(dataPanel) = c(
"cpi",
"PTreasuryBondsNotes",
"SUSCorpBonds",
"TOT",
"dcpi",
"dPTreasuryBondsNotes",
"dSUSCorpBonds",
"dTOT",
"lcpi"
)
dataPanel <- PanelNaOmit(dataset = dataPanel, quantity = rep(192, 7))
dataPanel$quantity
dataPanel
#debug(optimPMG)
OptimPmgExp = optimPMG(
dLL = 10 ^ -10,
maxIter = 200,
TetaStart = rep(x = 1, times = 3),
vecSR = list(
SR1 = c(
"dcpi",
"dPTreasuryBondsNotes",
"dSUSCorpBonds",
"dTOT"
),
SR2 = c(
"dcpi",
"dPTreasuryBondsNotes",
"dSUSCorpBonds",
"dTOT"
),
SR3 = c(
"dcpi",
"dPTreasuryBondsNotes",
"dSUSCorpBonds",
"dTOT"
),
SR4 = c(
"dcpi",
"dPTreasuryBondsNotes",
"dSUSCorpBonds",
"dTOT"
),
SR5 = c(
"dcpi",
"dPTreasuryBondsNotes",
"dSUSCorpBonds",
"dTOT"
),
SR6 = c(
"dcpi",
"dPTreasuryBondsNotes",
"dSUSCorpBonds",
"dTOT"
),
SR7 = c(
"dcpi",
"dPTreasuryBondsNotes",
"dSUSCorpBonds",
"dTOT"
)
),
vecLR = c(
"lcpi",
"PTreasuryBondsNotes",
"SUSCorpBonds",
"TOT"
),
dataset = dataPanel$dataset,
quantity = dataPanel$quantity,
const = TRUE
)
OptimPmgExp$LR
PmgExp = PMG(
paramTeta = c(2.689219, -32.845854,-237.696453),
vecSR = list(
SR1 = c(
"dcpi",
"dPTreasuryBondsNotes",
"dSUSCorpBonds",
"dTOT"
),
SR2 = c(
"dcpi",
"dPTreasuryBondsNotes",
"dSUSCorpBonds",
"dTOT"
),
SR3 = c(
"dcpi",
"dPTreasuryBondsNotes",
"dSUSCorpBonds",
"dTOT"
),
SR4 = c(
"dcpi",
"dPTreasuryBondsNotes",
"dSUSCorpBonds",
"dTOT"
),
SR5 = c(
"dcpi",
"dPTreasuryBondsNotes",
"dSUSCorpBonds",
"dTOT"
),
SR6 = c(
"dcpi",
"dPTreasuryBondsNotes",
"dSUSCorpBonds",
"dTOT"
),
SR7 = c(
"dcpi",
"dPTreasuryBondsNotes",
"dSUSCorpBonds",
"dTOT"
)
),
vecLR = c(
"lcpi",
"PTreasuryBondsNotes",
"SUSCorpBonds",
"TOT"
),
dataset = dataPanel$dataset,
quantity = dataPanel$quantity,
const = TRUE
)
str(PmgExp$SR)
PmgExp$LR
PmgExp$SR <-
map(1:7, ~ PmgExp$SR %>% .[[.x]] %>% `names<-`(c("Coef", "StdErr", "z", "P>|z|", "Low95%", "High95%")) %>% rownames_to_column) %>%
bind_rows() %>%
group_by(rowname) %>%
summarise(
Coef = mean(Coef, na.rm = TRUE),
StdErr = sqrt(sum(StdErr^2)),
na_Coef = sum(is.na(Coef)),
na_StdErr = sum(is.na(StdErr))
) %>%
filter(!rowname %>% str_detect("^ec\\d"))
x<-data.frame(PmgExp$SR)
x
t_value<-x[,2]/x[,3]
p_value<-2*pt(-abs(t_value),df=191)
SR_SUM<-mutate(x,t_value,p_value)
SR_SUM
stargazer(SR_SUM,
type = "html",
title = "CPI PMG Results",
header = F,
align=TRUE,
no.space=TRUE,
summary=FALSE,
out = "190806_cpi_pmg.doc"
)
5 XGBoost寻找交叉项
path<-"../output/190726_beta_data.csv"
pdata <- read_csv(path)
df<-pdata %>% select(CPI,PTreasuryBondsNotes:LOW)
df %>% head()
xgb_model <- xgb.train(
data=dtrain,
eta = 0.1,
max_depth = 15,
nround=16,
subsample = 0.5,
colsample_bytree = 0.5,
seed = 1,
eval.metric = "auc",
# eval.metric = "error",
# eval.metric = "logloss",
# num_class = 12, # set the number of classes. To use only with multiclass objectives.
nfold = 100,
nthread = 3
)
xgb_tree反应了模型每个数据的具体路径。我们将要通过这个表进行交叉项的输出。
Quality: either the split gain (change in loss) or the leaf value
说明某个节点,Quality 衡量了变量的价值。 Quality作为衡量交叉项的评价标准。
那么如何找出有效的交叉项目?
mod_interactions <- interactions(xgb_model, df %>% select(xgb_model$feature_names) %>% as.matrix())
mod_interactions %>% write_rds("../output/mod_interactions.rds")
mod_interactions <- read_rds("../output/mod_interactions.rds")
# 执行时间慢,因此建立中间变量。
mod_interactions %>% head
## Parent Child sumGain frequency
## 1: trade2 Foreignexchangereserve 191.4146 2
## 2: pureexport Tradesurplusconsolidation 177.7712 4
## 3: Foreignexchangereserve servicesexport 166.6120 4
## 4: pureexport Exportproduct 137.0093 2
## 5: servicesexport Importproduct 108.7695 4
## 6: serviceimport Importproduct 105.4075 1
##
## ================================================================================
## Dependent variable:
## ------------------------------------------------------
## CPI
## (1) (2)
## --------------------------------------------------------------------------------
## lag(CPI) 0.945***
## (0.008)
##
## PTreasuryBondsNotes 0.00000*** 0.00000
## (0.00000) (0.00000)
##
## SUSCorpBonds -0.00001 -0.00000
## (0.00001) (0.00000)
##
## Importproduct 0.0004** -0.0001
## (0.0002) (0.0001)
##
## TOT -0.002 0.001***
## (0.002) (0.0005)
##
## Exportproduct -0.0004** 0.0001
## (0.0002) (0.0001)
##
## tenyearbong 0.361*** 0.018*
## (0.030) (0.010)
##
## pureexport 0.0004** -0.0001
## (0.0002) (0.0001)
##
## servicesexport 10.777*** 0.200
## (2.676) (0.821)
##
## serviceimport -10.777*** -0.200
## (2.676) (0.821)
##
## pureserviceexport -10.777*** -0.200
## (2.676) (0.821)
##
## exchangerate 0.002*** 0.0002***
## (0.0001) (0.00005)
##
## Tradesurplusconsolidation 0.00001** 0.00000
## (0.00000) (0.00000)
##
## trade1 -0.057*** -0.005**
## (0.008) (0.002)
##
## trade2 0.019*** 0.002**
## (0.002) (0.001)
##
## Foreignexchangereserve 0.000*** 0.000**
## (0.000) (0.000)
##
## Totaltrade -0.00002*** -0.00000***
## (0.00000) (0.00000)
##
## Closingquotation 2.194*** 0.253**
## (0.399) (0.123)
##
## `Opening quotation` 2.351*** -0.092
## (0.341) (0.106)
##
## HIGH -3.311*** -0.307**
## (0.396) (0.124)
##
## LOW -1.052*** 0.185*
## (0.366) (0.112)
##
## Constant -1.239*** -0.218**
## (0.291) (0.089)
##
## --------------------------------------------------------------------------------
## Observations 1,344 1,343
## R2 0.448 0.949
## Adjusted R2 0.440 0.948
## Residual Std. Error 1.293 (df = 1323) 0.394 (df = 1321)
## F Statistic 53.660*** (df = 20; 1323) 1,163.646*** (df = 21; 1321)
## ================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
附录
参考文献
Li, Jiaxiang, Wenxin Gao, and Xuliang Huang. 2020. “USD Data Analysis.” GitHub. 2020. https://github.com/JiaxiangBU/usd-data-analysisEX.
Lu, Changrong, Jiaxiang Li, and Yi Sheng. 2020. “A Study on the Negative Externality of Usd Liquidity - Based on the Asset Allocation Efficiency of Us Treasury Securities.” The Singapore Economic Review (SSCI).