本文是课程《数据科学与金融计算》第6章的学习笔记,主要介绍GARCH类、SV类模型和高频波动模型,用于知识点总结和代码练习,Q&A为问题及解决方案。
目录
- 第六章 金融数据整理与预处理
- 6.1 GARCH类模型
- 案例:恒生指数 GARCH模型
- 6.2 SV类模型
- 案例:SV模型
- 案例:多元SV模型
- 6.3 高频波动模型
- 案例:ACD模型
- 案例:高频“已实现”方差
第六章 金融数据整理与预处理
6.1 GARCH类模型
1、ARCH模型
(1)模型
(2)ARCH效应检验
LM检验(原假设:不存在ARCH效应αi=0):
(3)估计方法:极大似然估计
2、GARCH模型
(1)模型
(2)模型定阶
q:残差序列(平方),PACF
p:AIC 、BIC准则(3)模型估计:MLE
(4)模型检验
①标准化残差
②Ljung-Box统计量:均值方程与方差方程设定的正确性。
Q-Q图:分布假设的正确性。
(5)模型预测
两边取期望:
用递推方式对条件方差进行求解
案例:恒生指数 GARCH模型
函数:chartSeries(): 画出价格与交易的时序图
library(fGarch)
source("ArchTest.R")
# (1) 读数据
library(quantmod) # 加载包
getSymbols('^HSI', from='2000-01-01', to='2020-05-08') # 从Yahoo网站下载恒生指数日价格数据
dim(HSI) # 数据规模
names(HSI) # 数据变量名称
chartSeries(HSI, theme='white') # 画出价格与交易的时序图
# HSI <- read.table('HSI.txt') # 或者从硬盘中读取恒生指数日价格数据
# HSI <- as.xts(HSI) # 将数据格式转化为xts格式
# (2) 计算收益率数据
ptd.HSI <- HSI$HSI.Adjusted # 提取日收盘价信息
rtd.HSI <- diff(log(ptd.HSI))*100 # 计算日对数收益
rtd.HSI <- rtd.HSI[-1,] # 删除一期缺失值
plot(rtd.HSI) # 画出日收益序列的时序图
ptm.HSI <- to.monthly(HSI)$HSI.Adjusted # 提取月收盘价信息
rtm.HSI <- diff(log(ptm.HSI))*100 # 计算月对数收益
rtm.HSI <- rtm.HSI[-1,] # 删除一期缺失值
plot(rtm.HSI) # 画出月收益序列的时序图
detach(package:quantmod)
# (3) ARCH效应检验
# rtm.HSI <- as.numeric(rtm.HSI)
ind.outsample <- sub(' ','',substr(index(rtm.HSI), 4, 8)) %in% '2013' # 设置样本外下标:2013年为样本外
ind.insample <- !ind.outsample # 设置样本内下标:其余为样本内
rtm.insample <- rtm.HSI[ind.insample]
rtm.outsample <- rtm.HSI[ind.outsample]
Box.test(rtm.insample, lag=12, type='Ljung-Box')
# 月收益序列不存在自相关
Box.test(rtm.insample^2, lag=12, type='Ljung-Box') # 平方月收益序列存在自相关
ArchTest(x=rtm.insample, lags=12) # 存在显著的ARCH效应
# (4) 模型定阶
epst <- rtm.insample - mean(rtm.insample) # 均值调整对数收益
par(mfrow=c(1,2))
acf(as.numeric(epst)^2, lag.max=20, main='平方序列')
pacf(as.numeric(epst)^2, lag.max=20, main='平方序列')
# (5) 建立GARCH类模型
GARCH.model_1 <- garchFit(~garch(1,1), data=rtm.insample, trace=FALSE) # GARCH(1,1)-N模型
GARCH.model_2 <- garchFit(~garch(2,1), data=rtm.insample, trace=FALSE) # GARCH(1,2)-N模型
GARCH.model_3 <- garchFit(~garch(1,1), data=rtm.insample, cond.dist='std', trace=FALSE) # GARCH(1,1)-t模型
GARCH.model_4 <- garchFit(~garch(1,1), data=rtm.insample, cond.dist='sstd', trace=FALSE) # GARCH(1,1)-st模型
GARCH.model_5 <- garchFit(~garch(1,1), data=rtm.insample, cond.dist='ged', trace=FALSE) # GARCH(1,1)-GED模型
GARCH.model_6 <- garchFit(~garch(1,1), data=rtm.insample, cond.dist='sged', trace=FALSE) # GARCH(1,1)-SGED模型
summary(GARCH.model_1)
summary(GARCH.model_3)
plot(GARCH.model_1) # 请键入相应数字获取信息
# (6) 提取GARCH类模型信息
vol_1 <- fBasics::volatility(GARCH.model_1) # 提取GARCH(1,1)-N模型得到的波动率估计
sres_1 <- residuals(GARCH.model_1, standardize=TRUE) # 提取GARCH(1,1)-N模型得到的标准化残差
vol_1.ts <- ts(vol_1, frequency=12, start=c(1990, 1))
sres_1.ts <- ts(sres_1, frequency=12, start=c(1990, 1))
par(mfcol=c(2,1))
plot(vol_1.ts, xlab='年', ylab='波动率')
plot(sres_1.ts, xlab='年', ylab='标准化残差')
# (7) 模型检验
par(mfrow=c(2,2))
acf(sres_1, lag=24)
pacf(sres_1, lag=24)
acf(sres_1^2, lag=24)
pacf(sres_1^2, lag=24)
par(mfrow=c(1,1))
qqnorm(sres_1)
qqline(sres_1)
# (8) 模型预测
pred.model_1 <- predict(GARCH.model_1, n.ahead = 11, trace = FALSE, mse = 'cond', plot=FALSE)
pred.model_2 <- predict(GARCH.model_2, n.ahead = 11, trace = FALSE, mse = 'cond', plot=FALSE)
pred.model_3 <- predict(GARCH.model_3, n.ahead = 11, trace = FALSE, mse = 'cond', plot=FALSE)
pred.model_4 <- predict(GARCH.model_4, n.ahead = 11, trace = FALSE, mse = 'cond', plot=FALSE)
pred.model_5 <- predict(GARCH.model_5, n.ahead = 11, trace = FALSE, mse = 'cond', plot=FALSE)
pred.model_6 <- predict(GARCH.model_6, n.ahead = 11, trace = FALSE, mse = 'cond', plot=FALSE)
predVol_1 <- pred.model_1$standardDeviation
predVol_2 <- pred.model_2$standardDeviation
predVol_3 <- pred.model_3$standardDeviation
predVol_4 <- pred.model_4$standardDeviation
predVol_5 <- pred.model_5$standardDeviation
predVol_6 <- pred.model_6$standardDeviation
et <- abs(rtm.outsample - mean(rtm.outsample))
rtd.HSI.2013 <- rtd.HSI['2013']
rv <- sqrt(aggregate(rtd.HSI.2013^2, by=substr(index(rtd.HSI.2013), 1, 7), sum))
predVol <- round(rbind(predVol_1,predVol_2,predVol_3,predVol_4,predVol_5,predVol_6,
as.numeric(et), as.numeric(rv)), digits=3)
colnames(predVol) <- 1:11
rownames(predVol) <- c('GARCH(1,1)-N模型','GARCH(1,2)-N模型','GARCH(1,1)-t模型','GARCH(1,1)-st模型',
'GARCH(1,1)-GED模型','GARCH(1,1)-SGED模型','残差绝对值', '已实现波动')
print(predVol)
# (9) 模型选择
cor(t(predVol))
3、GARCH模型拓展
(1)GARCH-M(p,q)模型
(2)IGARCH模型(单整)
(3)EGARCH模型(指数)
(4)TGARCH模型
(5)APARCH模型
library(rugarch)
# (1) GARCH-M模型
GARCHM.spec <- ugarchspec(variance.model=list(model='fGARCH', garchOrder=c(1,1), submodel='GARCH'),
mean.model=list(armaOrder=c(0,0), include.mean=TRUE, archm=TRUE),
distribution.model='norm')
GARCHM.fit <- ugarchfit(GARCHM.spec, data=rtm.insample)
#(2)EGARCH模型
EGARCH.spec <- ugarchspec(variance.model=list(model='eGARCH', garchOrder=c(1,1)),
mean.model=list(armaOrder=c(0,0), include.mean=TRUE),
distribution.model='norm')
EGARCH.fit <- ugarchfit(EGARCH.spec, data=rtm.insample,solver="gosolnp") #solver="hybrid"
EGARCH.fit
#(3)IGARCH模型
IGARCH.spec <- ugarchspec(variance.model=list(model='iGARCH', garchOrder=c(1,1)),
mean.model=list(armaOrder=c(0,0), include.mean=TRUE),
distribution.model='norm')
IGARCH.fit <- ugarchfit(IGARCH.spec, data=rtm.insample)
IGARCH.fit
# (4) TGARCH模型
TGARCH.spec <- ugarchspec(variance.model=list(model='fGARCH', garchOrder=c(1,1), submodel='TGARCH'),
mean.model=list(armaOrder=c(0,0), include.mean=TRUE, archm=FALSE),
distribution.model='norm')
TGARCH.fit <- ugarchfit(TGARCH.spec, data=rtm.insample)
# (5) APARCH模型
APARCH.model_1 <- garchFit(~1+aparch(1,1), data=rtm.insample, trace=FALSE) # GARCH(1,1)-N模型
summary(APARCH.model_1)
APARCH.model_2 <- garchFit(~1+aparch(1,1), data=rtm.insample, delta=2, trace=FALSE) # GARCH(1,1)-N模型
summary(APARCH.model_2)
# 6. 例6-6:案例分析, MGARCH模型
library(rmgarch)
library(xlsx)
# (1) DCC-GARCH模型
StockExchange <- read.xlsx(file='Data.xlsx', sheetIndex='StockExchange')
rstock <- diff(log(StockExchange$stock)) # 计算对数收益率
rexchange <- diff(log(StockExchange$exchange)) # 计算对数收益率
r.StockExchange <- data.frame(rstock=rstock, rexchange=rexchange, row.names=StockExchange$date[-1])
r.StockExchange <- as.xts(r.StockExchange) # 转化为时间序列结构
par(mfrow=c(2,1)) # 时间序列图形
plot(r.StockExchange$rstock, ylab='收益率',main='沪深300',lty=1,cex.lab=0.8,cex.main=0.8)
plot(r.StockExchange$rexchange, ylab='收益率',main='汇率',lty=1,cex.lab=0.8,cex.main=0.8)
garch11.spec <- ugarchspec(mean.model = list(armaOrder = c(1,1),include.mean =TRUE),
variance.model = list(garchOrder = c(1,1),model = "eGARCH"),
distribution.model = "sstd") # 设定GARCH和DCC过程
dcc.garch11.spec <- dccspec(uspec = multispec(replicate(2,garch11.spec)),VAR=FALSE,dccOrder = c(1,1),distribution="mvt")
dcc.fit = dccfit(dcc.garch11.spec, data=r.StockExchange) # DCC估计
dcc.fit # 显示DCC估计结果
plot(dcc.fit) # DCC结果输出,按照要求键入一个数字(1-5)
dcc.fcst = dccforecast(dcc.fit, n.ahead=20) # 外推预测
plot(dcc.fcst) # 显示外推预测,按照要求键入一个数字(1-5)
多元GARCH模型
6.2 SV类模型
一、SV模型(随机效应)
随机波动模型(SV)认为波动率由潜在的不可观测的随机过程所决定,即在波动率方程中引入一个新的随机变量,该变量可能服从马尔科夫过程、随机游走或其他。
- “看不见的积分积掉,平均化”
- “似然函数是观测数据的概率分布”
1、基本模型
补充,给定随机变量的分布,求期望和方差。
n=10000
x=rnorm(n)
mean(log(x^2))
var(log(x^2))
2、参数估计(QML方法,转化为标准的线性状态空间形式)
3、参数估计(证明过程见笔记)
在这个线性状态空间模型中,可以使用Kalman滤波实现模型参数估计,包含三个步骤。
第一步:初始化
第二步:序列迭代
第三步:样本内拟合
拟极大似然(不断变化更新的):“条件密度可以求和加起来”
4、波动预测(样本外预测与平滑预测)
(1)向前l步预测
(2)向后的平滑预测
- 注:卡尔曼滤波(Kalman filtering)[1]是一种利用线性系统状态方程,通过系统输入输出观测数据,对系统状态进行最优估计的算法。由于观测数据中包括系统中的噪声和干扰的影响,所以最优估计也可看作是滤波过程。
案例:SV模型
建立上证综指收益序列的SV模型,随机误差项服从混合正态分布
# 0. initializing
rm(list=ls())
# 1. read data
# (1) get price
library(quantmod)
getSymbols('^SSEC', from='2008-01-01', to='2013-12-31') # downloads daily price data of APPLE,please wait
dim(SSEC) # dimensions of the data
names(SSEC) # names of the data
detach(package:quantmod)
# (2) comput return of stocks
price.SSEC <- SSEC$SSEC.Adjusted
r.SSEC <- 100*diff(log(price.SSEC)) # 计算日收益率
r <- as.numeric(r.SSEC[-1]) # 移除第一个NA值
# (3) logrithm square return
y <- log(r^2 + 1e-4) # 加上一个极小的数使得log里面的数保持正值
sum(is.na(y))
y=na.omit(y)
导入函数文件:source('sub-06.R')
# 2. load filter function
source('sub-06.R')
#滤波方法,写出似然
# 1. SVfilter through Kalman filter
SVfilter <- function (y, phi0, phi1, sQ, alpha, mu1, sR1, mu2, sR2)
{
y = as.numeric(y)
num = length(y)
Q = sQ^2
R1 = sR1^2
R2 = sR2^2
xp = matrix(0, num, 1)
Pp = matrix(0, num, 1)
xp[1] = 0 #初始值
Pp[1] = phi1^2 + Q #初始方差
pi1 = 1/2
pi2 = 1/2
pit1 = 1/2
pit2 = 1/2
like = 0
# 返回似然值
for (i in 2:num) {
sig1 = Pp[i - 1] + R1 #两组数据,给定t-1,方差
sig2 = Pp[i - 1] + R2
k1 = phi1 * Pp[i - 1]/sig1
k2 = phi1 * Pp[i - 1]/sig2
e1 = y[i] - xp[i - 1] - mu1 - alpha #观测数据减均值
e2 = y[i] - xp[i - 1] - mu2 - alpha
den1 = (1/sqrt(sig1)) * exp(-0.5 * e1^2/sig1) #密度函数
den2 = (1/sqrt(sig2)) * exp(-0.5 * e2^2/sig2)
denom = pi1 * den1 + pi2 * den2 #联合密度
pit1[i] = pi1 * den1/denom
pit2[i] = pi2 * den2/denom
xp[i] = phi0 + phi1 * xp[i-1] + pit1[i]*k1*e1 + pit2[i]*k2*e2 # 更新
Pp[i] = (phi1^2)*Pp[i-1] + Q - pit1[i]*(k1^2)*sig1 - pit2[i]*(k2^2)*sig2
like = like - 0.5 * log(pit1[i]*den1 + pit2[i]*den2) # 写出密度函数
# like = like - 0.5 * log(pi1 * den1 + pi0 * den0)
}
list(xp = xp, Pp = Pp, like = like)
}
# 2. innovations Likelihood
Linn <- function(para, y){
phi0<-para[1]; phi1<-para[2]; sQ<-para[3]; alpha<-para[4];
mu1<-para[5]; sR1<-para[6]; mu2<-para[7]; sR2<-para[8]
sv <- SVfilter(y, phi0, phi1, sQ, alpha, mu1, sR1, mu2, sR2)
return(sv$like)
}
# 3. SV modeling
# (1) Initial Parameters
phi0<-0; phi1<-.95; sQ<-.2; alpha <- mean(y); mu1<- 0; sR1<-2; mu2<- 0; sR2<-2
init.par <- c(phi0, phi1, sQ, alpha, mu1, sR1, mu2, sR2)
# (2) Innovations Likelihood
# see sub-06.R
# (3) Estimation
est <- optim(init.par, Linn, y=y, method="BFGS", hessian=TRUE, control=list(trace=1,REPORT=1)) #参数极大化,极小化负的极大似然
SE <- sqrt(diag(solve(est$hessian))) #取方差对角元开根号
t_test <- est$par/SE
prob <- 2*pt(abs(t_test), df=length(y)-1, lower.tail=FALSE)
u <- round(cbind(estimates=est$par, SE, t_test, prob), digits=4)
rownames(u)<-c("phi0","phi1","sQ","alpha","mu1","sigv1","mu2","sigv2")
print(u)
# 4. compare distribution
# (1) filters at the estimated parameters
phi0<-est$par[1]; phi1<-est$par[2]; sQ<-est$par[3]; alpha<-est$par[4];
mu1<-est$par[5]; sR1<-est$par[6]; mu2<-est$par[7]; sR2<-est$par[8]
sv <- SVfilter(y, phi0, phi1, sQ, alpha, mu1, sR1, mu2, sR2) #随机波动模型
# (2) densities plot (f is chi-sq, fm is fitted mixture)
x <- seq(-15, 6, by=.01)
f <- exp(-.5*(exp(x)-x))/(sqrt(2*pi)) #双指数分布
f1 <- exp(-.5*(x-mu1)^2/sR1^2)/(sR1*sqrt(2*pi))
f2 <- exp(-.5*(x-mu2)^2/sR2^2)/(sR2*sqrt(2*pi))
fm <- (f1+f2)/2 #混合正态
plot(density(y), ylim=c(0, 0.25), main='', xlab='x', ylab='密度', lty=1, lwd=2, col='black')
lines(x, f, lty=2, lwd=2, col='blue')
lines(x, fm, lty=3,lwd=1, col='red')
legend('topleft', legend=c('y的核密度', '卡方自然对数', '拟合的混合正态'),
lty=c(1,2,3), lwd=c(2,1,1), col=c('black', 'blue','red'))
# 5. volatility plot
Time <- 1:100
par(mfrow=c(3,1))
plot(Time, y[Time], type='l', main='SSEC平方收益自然对数)', xlab='时间', ylab='y')
plot(Time, sv$xp[Time], type='l', main='预测波动的自然对数',
ylim=c(-3.0, 2.5), xlab='时间', ylab='对数波动')
lines(Time, sv$xp[Time]+2*sqrt(sv$Pp[Time]), lty='dashed')
lines(Time, sv$xp[Time]-2*sqrt(sv$Pp[Time]), lty='dashed')
plot(Time, exp(sv$xp[Time]), type='l', main='预测波动', xlab='时间', ylab='波动率')
par(mfrow=c(1,1))
案例:多元SV模型
# 0. initializing
rm(list=ls())
# 1. load package
library(stochvol)
# 2. numerical simualtion for univariate SV process
# (1) simulate a highly persistent SV process
set.seed(12345)
sim <- svsim(500, mu = -5, phi = 0.95, sigma = 0.25)
# (2) 有放回地抽取5000个,有先验信息
draws <- svsample(sim$y, draws=5000, burnin=500, priormu=c(-8, 1),priorphi=c(10, 1.2), priorsigma=0.2)
# (3) 估计量,抽完之后看后验
print(sim)
summary(draws)
# (4) predict 20 days ahead
fore <- predict(draws, 20)
plot(draws, forecast = fore)
# plot(draws, pages=1, all.terms=TRUE, forecast = fore)
volplot(draws, forecast = 10)
# (5) re-plot with different quantiles
newquants <- c(0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99)
draws <- updatesummary(draws, quantiles = newquants)
plot(draws, forecast = 20, showobs = FALSE, col = seq(along = newquants),
forecastlty = 3, showprior = FALSE)
volplot(draws, forecast = 10)
6.3 高频波动模型
一、概念
1、金融高频数据及其特征:非等间隔、离散价格、日历效应、多重交易
2、主要变量:
收盘价、高频收益率
3、描述统计:样本均值、标准差(具有相依性)
4、“已实现”方差:“对数收益是增量,相依性为0”
如果日内收益不相关,则:
积分方差
二、模型(详见笔记)
1、HAR-RV
2、HAR-RV-J(跳)
3、HEAVY(对方差、已实现测度建模)
4、ACD模型
用最大似然估计法
假定指数分布(具有无记忆性)
5、ACD模型的扩展
(1)分布角度的扩展
WACD模型、GGACD模型
(2)函数关系角度的扩展
LACD模型、TACD模型
案例:ACD模型
# 0. initializing
rm(list=ls())
# 1. read data
caterp <- read.table('taq-cat-t-jan04t082010.txt', header=T)
head(caterp)
(TT <- nrow(caterp)) # sample size
# 2. comput price change and durations
source('sub-06.R')
hf.series <- compRtnDura(da=caterp, plot.it=TRUE)
print(hf.series$len.total)
print(hf.series$len.norm)
print(hf.series$len.duration)
print(hf.series$ratio)
layout(matrix(c(1,2,3,3), 2, 2, byrow=FALSE), c(3,2), c(2,2), respect=TRUE)
layout.show(3)
par(mar=c(4, 4, 2, 2))
plot(hf.series$prs, xlab='时间', ylab='价格', type='l')
plot(hf.series$prc, xlab='时间', ylab='价格变化', type='l')
hist(hf.series$prc, nclass = 100,xlab='价格变化', ylab='频数', main='价格变化直方图')
# 3. extract trading information
extrTrad(da=caterp, date='2010-01-04', from='09:30', to='09:45', plot.ACF=TRUE)
# 4. comput returns
# (1) comput return series for different frequency
ints <- c(1:5, 10, 15, 20, 25, 30) # include 10 intervals
rtns <- list()
ntrads <- list()
for (i in seq_along(ints)){
cat('the current interval is', ints[i], '\n')
tmp <- hfanal(da=caterp, int=ints[i], basic=1)
rtns[[i]] <- tmp$returns
ntrads[[i]] <- tmp$ntrad
}
# (2) comput descriptive statistics for 1-min return series
par(mfrow=c(3,1))
par(mar=c(5.5, 5.5, 1.5, 2.5))
plot(rtns[[1]], xlab='时间', ylab='对数收益', type='l')
hist(rtns[[1]], xlab='对数收益', ylab='频数', main='')
acf(rtns[[1]], xlab='滞后期', lag.max=300, main='')
par(mfrow=c(1,1))
# # 5. comput realized volatility
rv <- NULL
for (i in seq_along(ints)){
cat('the current interval is', ints[i], '\n')
tmp <- hfanal(da=caterp, int=ints[i], basic=1)
rv <- cbind(rv, tmp$realized)
}
colnames(rv) <- paste('int=', ints, sep='')
matplot(1:nrow(rv), rv, xlab='time', ylab='realized volatility', type='l')
legend('topleft', legend=paste('int=', ints, sep=''), lty=1:10,col=1:10)
# 6. implement ACD modeling
# (1) prepare data
sec <- 3600*caterp$hour+60*caterp$minute+caterp$second # 转换为秒
ist <- 3600*9+30*60; end <- 3600*16
lunch <- 3600*12
idx <- c(1:length(sec))[sec < ist] # before market opens
jdx <- c(1:length(sec))[sec > end] # after market closes
sec.norm <- sec[-c(idx,jdx)] # normal trading hours only.
dt <- diff(sec.norm)
kdx <- c(1:length(dt))[dt > 0] # Positive durations only
ti <- sec.norm[2:length(sec.norm)]
dt <- dt[kdx]
ti <- ti[kdx]
st <- 3600*6.5
f1 <- (ti-lunch)/st # trading time
# (2) fit a simple time series model
m1 <- lm(log(dt)~f1+I(f1^2))
summary(m1)
names(m1)
fit <- m1$fitted.values
adjdt <- dt/exp(fit)
plot(adjdt[1:1200], type='l', xlab='时间', ylab='调整持续期')
# (3) ACD modeling
m2 <- acd(adjdt,order=c(1,1), cond.dist="exp")
m3 <- acd(adjdt,order=c(1,1), cond.dist="weibull")
m4 <- acd(adjdt,order=c(1,2), cond.dist="weibull")
m5 <- acd(adjdt,order=c(1,2), cond.dist="gamma")
names(m5)
ep2 <- m2$epsilon
ep3 <- m3$epsilon
ep4 <- m4$epsilon
ep5 <- m5$epsilon
par(mfrow=c(5,1))
par(mar=c(4,4,3,2))
acf(adjdt, xlab='滞后期', main='调整序列', ylim=c(-0.05,0.25))
acf(ep2, xlab='滞后期', main=expression(epsilon[2]), ylim=c(-0.05,0.25))
acf(ep3, xlab='滞后期', main=expression(epsilon[3]), ylim=c(-0.05,0.25))
acf(ep4, xlab='滞后期', main=expression(epsilon[4]), ylim=c(-0.05,0.25))
acf(ep5, xlab='滞后期', main=expression(epsilon[5]), ylim=c(-0.05,0.25))
par(mfrow=c(1,1))
Box.test(ep5, lag=5, type='Ljung')
Box.test(ep5, lag=10, type='Ljung')
Box.test(ep5, lag=15, type='Ljung')
Box.test(ep5, lag=20, type='Ljung')
Box.test(ep5^2, lag=5, type='Ljung')
Box.test(ep5^2, lag=10, type='Ljung')
Box.test(ep5^2, lag=15, type='Ljung')
Box.test(ep5^2, lag=20, type='Ljung')
案例:高频“已实现”方差
# 0. initializing
#setwd('F:/programe/book/R with application to financial quantitive analysis/CH-06')
#rm(list=ls())
# 1. clean HF data
library(highfrequency)
library(xts)
data(sample_tdataraw)
dim(sample_tdataraw)
head(sample_tdataraw)
args(tradesCleanup)
tdata_afterfirstcleaning <- tradesCleanup(tdataraw=sample_tdataraw, exchanges="N")
names(tdata_afterfirstcleaning)
print(tdata_afterfirstcleaning$report)
barplot(tdata_afterfirstcleaning$report)
dim(tdata_afterfirstcleaning$tdata)
# 2. aggregate HF data
# (1) load sample price data
data("sample_tdata")
ts <- sample_tdata$PRICE
head(ts)
# (2) aggregate previous tick to the 30-second sampling frequency
args(aggregatets)
tsagg30sec = aggregatets(ts, on="seconds", k=30) # 按秒进行整合,每30s做平均
head(tsagg30sec, 20)
# (3) aggregate previous tick to the 5-minute sampling frequency
tsagg5min <- aggregatets(ts, on="minutes", k=5)
head(tsagg5min, 20)
# 3. comput descriptive statistics for realized returns
source('sub-06.R')
data(sample_real5minprices)
# 12-19是时间,取出字符串,!=不等于的意思
pdata <- sample_real5minprices[substr(time(sample_real5minprices), 12, 19)!='00:00:00']
length(pdata)
mins <- c(5, 10, 15, 20, 25, 30)
rtn <- list()
stats <- matrix(NA, nrow=length(mins), ncol=7)
for (i in seq_along(mins)){
rtn[[i]] <- HFrtn(pdata=pdata, on="minutes", k=mins[i])
stats[i,] <- stat(as.vector(rtn[[i]]))
}
#对数差分,加起来,相当于第一个数减最后一个
head(rtn[[1]])
head(rtn[[2]])
rownames(stats) <- paste('min=', mins, sep='')
colnames(stats) <- c('mean', 'sd', 'skew', 'kurt', 'median', 'min', 'max')
print(stats)
par(mfrow=c(2,2))
par(mar=c(4,4,2,2))
plot(mins, stats[,'mean'], xlab='分钟', ylab='均值', type='o', pch='*')
plot(mins, stats[,'sd'], xlab='分钟', ylab='标准差', type='o', pch='*')
plot(mins, stats[,'skew'], xlab='分钟', ylab='偏度', type='o', pch='*')
plot(mins, stats[,'kurt'], xlab='分钟', ylab='峰度', type='o', pch='*')
par(mfrow=c(1,1))
# 4. comput (weighted) realized volatility
# 权重可以根据实际情况修改
days <- levels(factor(substr(index(pdata), 1, 10)))
rv <- matrix(NA, nrow=length(days), ncol=length(mins))
wrv <- matrix(NA, nrow=length(days), ncol=length(mins))
for (i in seq_along(mins)){
ind.day <- substr(index(rtn[[i]]), 1, 10)
rv[,i] <- aggregate(rtn[[i]], ind.day, sumsq)
ind.time <- substr(index(rtn[[i]]), 12, 19)
lambda <- aggregate(rtn[[i]], ind.time, sumsq)/sum(rtn[[i]]^2)
ind.time <- index(lambda)[lambda!=0]
ind <- substr(index(rtn[[i]]), 12, 19) %in% ind.time
rtn.f <- rtn[[i]][ind]
ind.day <- substr(index(rtn.f), 1, 10)
N <- length(ind.time)
lambda <- as.numeric(lambda)[lambda!=0]
weight <- 1/(N*lambda)
weight <- weight/sum(weight)*N
wrv[,i] <- aggregate(rtn.f, ind.day, wsumsq, w=weight)
}
rownames(rv) <- days
colnames(rv) <- paste('min=', mins, sep='')
matplot(1:nrow(rv), sqrt(rv), xlab='时间', ylab='已实现波动', type='l')
legend('topright', legend=paste('min=', mins, sep=''), lty=1:6,col=1:6)
rownames(wrv) <- days
colnames(wrv) <- paste('min=', mins, sep='')
matplot(1:nrow(wrv), sqrt(wrv), xlab='时间', ylab='加权已实现波动', type='l')
legend('topright', legend=paste('min=', mins, sep=''), lty=1:6,col=1:6)
# 5. modeling realized volatility
# (1) HARRV model
data(realized_library) # Get sample daily Realized Volatility data
DJI_RV <- read.csv("DJIRV.txt") #realized_library$Dow.Jones.Industrials.Realized.Variance; # Select DJI
DJI_RV <- xts(DJI_RV[,2], order.by = as.Date(DJI_RV[,1]))
DJI_RV <- DJI_RV[!is.na(DJI_RV)] # Remove NA's
DJI_RV <- DJI_RV['2008']
args(harModel)
HARRV <- harModel(data=DJI_RV, periods=c(1,5,22), RVest=c("rCov"), type="HARRV",h=1)
class(HARRV)
summary(HARRV)
plot(HARRV)
matplot(predict(HARRV, interval='confidence'), type='l', xlab='index', ylab='realized volatility')
# (2) HEAVY model
#returns <- realized_library$Dow.Jones.Industrials.Returns # realized return
#rk <- realized_library$Dow.Jones.Industrials.Realized.Kernel # realized measure
returns <- read.csv("DJIReturn.txt")
rk <- read.csv("DJIRK.txt")
returns <- xts(returns[,2], order.by = as.Date(returns[,1]))
rk <- xts(rk[,2], order.by = as.Date(rk[,1]))
returns <- returns[!is.na(rk)]
rk <- rk[!is.na(rk)] # remove NA's
data <- cbind(returns^2, rk ) # make data matrix with squared returns and realized measures
backcast <- matrix(c(var(returns),mean(rk)), ncol=1)
args(heavyModel)
startvalues <- c(0.004,0.02,0.44,0.41,0.74,0.56) # initial values
HEAVY <- heavyModel(data=as.matrix(data,ncol=2), compconst=FALSE, startingvalues=startvalues, backcast=backcast)
names(HEAVY)
print(HEAVY$estparams)
plot(sqrt(HEAVY$condvar)[,1], xlab='时间', ylab='波动率', ylim=c(-0.01,0.1),main='', lty=1)
lines(sqrt(HEAVY$condvar)[,2],col=2)
lines(sqrt(rk)[,1], lty=2,col=3)
legend('topleft', legend=c('可观测已实现核', 'HEAVY条件波动'), lty=c(1,2))