Before getting the stock data we specify some stock indices to be used.
## symbols symbols.names
## [1,] "^FTSE" "FTSE 100"
## [2,] "^GDAXI" "DAX"
## [3,] "^FCHI" "CAC 40"
## [4,] "^GSPC" "S&P 500"
## [5,] "^N225" "Nikkei 225"
## [6,] "^HSI" "Hang Seng"
## [7,] "^BVSP" "Bovespa"
## [8,] "^MXX" "IPC"
The returns data for the indices are loaded from Yahoo. To pull multiple indices at one time over a fairly long historical period of about 20 years, we define a funciton to loop through the indices and pull the data from Yahoo’s historal data query site ichart.yahoo.com
. The date limits in the URL are in the format \[ &a=<startday>&b=<startmonth>&c=<startyear>&c=<endday>&e=<endmonth>&f=<endyear>&g= \].
# returns list: [[1]] data frame of given series from all symbols
# [[2]] full list of all series from all symbols
multiStockHistory <- function(symbols, #char vector
symbols.names, #char vector
startday=1, #int
startmonth=1,
startyear=1970,
endday=19,
endmonth=10,
endyear=2014,
series="Adj.Close",
...)
{
#if(!require(xts)) {install.packages('xts')}
if(is.null(symbols.names)) {symbols.names <- symbols}
# loop through symbols downloading series to list
x <- list()
for (i in 1:length(symbols)) {
base <- "http://ichart.yahoo.com/table.csv?s="
tail <- paste("&a=",startday,"&b=",startmonth,"&c=",startyear,"&d=",endday,"&e=",endmonth,"&f=",endyear,"&g=d&ignore=.csv",sep="")
url <- paste(base,symbols[i],tail,sep="")
x[[i]] <- read.table(url,sep=",",header=T, stringsAsFactors=F,flush = T,...)
x[[i]][,1] <- as.Date(x[[i]][,1])
x[[i]] <- xts(x[[i]][,-1], order.by=x[[i]][,1])
xtsAttributes(x[[i]]) <- list(IndexSymbol=symbols[i], IndexName=symbols.names[i])
if(i%%2 == 0) {cat("pulled symbol ",i,"\n") }
}
#build xts df with only 1 series, e.g., Adj.Close
ind.df <- data.frame()
for (i in 1:length(x)) {
if (dim(ind.df)[2]==0) {
ind.df <- x[[i]][,series]
colnames(ind.df)[i] <- unlist(xtsAttributes(x[[i]]))[2]
} else {
ind.df <- merge.xts(ind.df,x[[i]][,series])
colnames(ind.df)[i] <- unlist(xtsAttributes(x[[i]]))[2]
}
}
return(list(df=ind.df,full.list=x))
} #end function
y <- multiStockHistory(symbols = symbols,symbols.names = symbols.names,
startday=1,startmonth=1,startyear=1999,
endday=19,endmonth=10,endyear=2014,series="Adj.Close")
dput(x = y, file = "historicalIndicesList.Rdata")
Unfortunately, knitr is have some trouble opening the connection to read the data from the URL in the knitr markdown file, but this function works fine in a regular R script (or even within knitr markdown…up until the document is compiled, oddly enough). No matter, we can just save the data to an Rdata
file and
Now we can take a look at just the Adjusted Close data frame we will be using.
y <- dget("historicalIndicesList.Rdata")
ydf <- y$df
ydfn <- na.omit(ydf)
head(ydfn)
## FTSE.100 DAX CAC.40 S.P.500 Nikkei.225 Hang.Seng Bovespa IPC
## 1999-02-01 6012 5191 4304 1273 14465 9600 8891 4030
## 1999-02-02 6013 5167 4244 1262 14350 9503 8732 4015
## 1999-02-03 5940 5086 4189 1272 14161 9420 8676 4061
## 1999-02-04 5940 5078 4167 1248 14087 9439 8653 4014
## 1999-02-08 5835 5027 4154 1244 13992 9140 8824 4017
## 1999-02-09 5780 4904 4038 1216 13903 9244 8674 3947
summary(ydfn)
## Index FTSE.100 DAX CAC.40
## Min. :1999-02-01 Min. :3287 Min. : 2203 Min. :2403
## 1st Qu.:2003-02-04 1st Qu.:4987 1st Qu.: 4821 1st Qu.:3564
## Median :2007-01-04 Median :5701 Median : 5955 Median :4068
## Mean :2006-12-28 Mean :5539 Mean : 5978 Mean :4258
## 3rd Qu.:2010-11-30 3rd Qu.:6239 3rd Qu.: 7085 3rd Qu.:4852
## Max. :2014-10-17 Max. :6930 Max. :10029 Max. :6857
## S.P.500 Nikkei.225 Hang.Seng Bovespa
## Min. : 676 Min. : 7055 Min. : 8409 Min. : 8371
## 1st Qu.:1115 1st Qu.: 9801 1st Qu.:13180 1st Qu.:16396
## Median :1267 Median :11582 Median :17177 Median :39128
## Mean :1277 Mean :12521 Mean :17346 Mean :38363
## 3rd Qu.:1408 3rd Qu.:15301 3rd Qu.:21740 3rd Qu.:57252
## Max. :2011 Max. :20833 Max. :31638 Max. :73517
## IPC
## Min. : 3941
## 1st Qu.: 7178
## Median :20917
## Mean :21999
## 3rd Qu.:34389
## Max. :46357
dim(ydfn)
## [1] 3398 8
Next we need exchange rates to standardize indices to a single currency, in this case US$. Exchange rates can be downloaded from the US Federal Reserve at www.federalreserve.gov/datadownload/ to choose which currencies. The file used in this analysis was pulled from: http://www.federalreserve.gov/datadownload/Download.aspx?rel=H10&series=a6a0113179fdeb9c6fbcdc18575ec09c&filetype=csv&label=include&layout=seriescolumn&from=01/01/1999&to=04/09/2014
# exchange rates from 01-01-1999 to 04-09-2014
url <- "http://www.federalreserve.gov/datadownload/Output.aspx?rel=H10&series=a6a0113179fdeb9c6fbcdc18575ec09c&lastObs=&from=01/01/1999&to=04/09/2014&filetype=csv&label=include&layout=seriescolumn"
ex <- read.table(url,skip=5,sep=",",na.strings="ND",header=T,stringsAsFactors = F)
ex[,1] <- ymd(ex[,1])
ex <- as.xts(ex[,-1], order.by=ex[,1])
# assign colnames from two-letter currency code (last 2 digits)
for (i in 1:length(names(ex))) {
colnames(ex)[i] <- substr(colnames(ex)[i],nchar(colnames(ex)[i])-1,nchar(colnames(ex)[i]))
}
ex <- na.omit(ex)
# drop the returns dates outside of exchange dates range
exsub <- ex["1999/2014-04-04"]
ydfsub <- ydf["1999/2014-04-04"]
Merge the returns series with the exchange rate data and and convert all series to US$.
merged <- na.omit(merge.xts(ydfsub,exsub))
# data frame for converted returns
r <- merged[,1:8]
# [1] "FTSE.100" "DAX" "CAC.40"
# [4] "S.P.500" "Nikkei.225" "Hang.Seng"
# [7] "Bovespa" "IPC"
r[,1] <- merged[,1] * merged$UK
r[,2] <- merged[,2] * merged$EU
r[,3] <- merged[,3] * merged$EU
r[,4] <- merged[,4] * 1
r[,5] <- merged[,5] / merged$JA
r[,6] <- merged[,6] / merged$HK
r[,7] <- merged[,7] / merged$BZ
r[,8] <- merged[,8] / merged$MX
head(r)
## FTSE.100 DAX CAC.40 S.P.500 Nikkei.225 Hang.Seng Bovespa IPC
## 1999-02-01 9866 5867 4865 1273 125.7 1239 4513 398.2
## 1999-02-02 9879 5853 4807 1262 128.0 1226 4824 397.7
## 1999-02-03 9727 5767 4750 1272 125.9 1216 4820 401.0
## 1999-02-04 9738 5741 4712 1248 125.7 1218 4741 396.8
## 1999-02-08 9558 5679 4692 1244 122.3 1179 4669 398.0
## 1999-02-09 9453 5542 4563 1216 121.5 1193 4541 391.6
summary(r)
## Index FTSE.100 DAX CAC.40
## Min. :1999-02-01 Min. : 4873 Min. : 2428 Min. :2621
## 1st Qu.:2002-12-04 1st Qu.: 7973 1st Qu.: 5376 1st Qu.:4341
## Median :2006-09-17 Median : 9202 Median : 6997 Median :5014
## Mean :2006-09-17 Mean : 9158 Mean : 7241 Mean :5155
## 3rd Qu.:2010-07-06 3rd Qu.:10170 3rd Qu.: 9186 3rd Qu.:5776
## Max. :2014-04-04 Max. :13965 Max. :13381 Max. :8462
## S.P.500 Nikkei.225 Hang.Seng Bovespa
## Min. : 676 Min. : 63.2 Min. :1078 Min. : 2157
## 1st Qu.:1110 1st Qu.:102.7 1st Qu.:1682 1st Qu.: 7158
## Median :1260 Median :115.8 Median :2160 Median :17092
## Mean :1253 Mean :118.6 Mean :2200 Mean :19117
## 3rd Qu.:1394 3rd Qu.:138.0 3rd Qu.:2752 3rd Qu.:30033
## Max. :1891 Max. :198.0 Max. :4083 Max. :44667
## IPC
## Min. : 392
## 1st Qu.: 721
## Median :1743
## Mean :1794
## 3rd Qu.:2771
## Max. :3680
# log differenced data for returns
rna <- na.omit(r)
ret <- diff(rna,lag = 1,log = T)
ret <- na.omit(ret)
Lets take a look at these data.
xtsExtra::plot.xts(log(ydfsub), screens=factor(1,1), auto.legend=T, legend.loc = "topleft" , main="Selected World Composite Indices \n (log daily prices in USD)", minor.ticks=F,cex=.6)
## Warning: the condition has length > 1 and only the first element will be
## used
And check their correlations.
pairs(as.data.frame(ret),
lower.panel=panel.smooth,upper.panel=panel.cor,diag.panel=panel.hist)
The ghyp package offers the ability to fit symmetric generalized hyperbolic (SGH) distributions in R. We fit the pairs assuming an SGH and plot.
library(ghyp)
ghypmv.fit <- ghyp::fit.ghypmv(data=ret,silent=TRUE, symmetric=T)
pairs(ghypmv.fit, cex=0.5, nbins=20)
And we can isolate a few specific series like the S&P500 and DAX.
ghypuv.fit.sp <- fit.hypuv(data=ret[,"S.P.500"],silent=T,symmetric=T)
ghypuv.fit.dx <- fit.hypuv(data=ret[,"DAX"],silent=T,symmetric=T)
par(mfrow=c(2,2))
hist(ghypuv.fit.sp,ghyp.col="blue",legend.cex=0.9,
main="S&P 500\nHistogram of data")
qqghyp(ghypuv.fit.sp,plot.legend=T,legend.cex=0.9,
main="S&P 500\nGeneralized Hyperbolic QQ Plot")
hist(ghypuv.fit.dx,ghyp.col="blue",legend.cex=0.9,
main="DAX\nHistogram of data")
qqghyp(ghypuv.fit.dx,plot.legend=T,legend.cex=0.9,
main="DAX\nGeneralized Hyperbolic QQ Plot")
The depmix()
function in the depmixS4 package can get us a HMM fit assuming a Gaussian distribution or several other common distributions, but not generalized hyperbolic. That’s a start. These Markov state classifications combined with the fit.ghypmv()
function in the ghyp package for fitting multivariate SGH should provide an improvement over the HMM assuming Gaussian returns.
The piecemeal algorithm for combining these two packages in R is as follows:
depmixS4::depmix()
functionghyp::fit.ghypmv(...,symmetric=TRUE)
functionThis is performed with the ghypMarkov()
function defined below:
# returns a list of posterior probabilities, HMM state classifications,
# parameters, transition matrix and steady state probabilities
ghypMarkov <- function(data,
response, #list of series in dataframe
family, #list of initial HMM distr used in depmix()
...
) {
library(depmixS4)
library(ghyp)
## Markov multistep calculation assumes only 2 Markov states #########
# 1. determine states from Gaussian
msp <- depmix(data=data, response = response,family = family, nstates = 2, ...)
#optimize parameters
set.seed(1)
fmsp <- fit(msp, ...)
# 2. fit ghypmv to only obs identified as state 1 (or 2)
s1 <- which(posterior(fmsp)[,1]==1)
s2 <- which(posterior(fmsp)[,1]==2)
fitghs1 <- fit.ghypmv(data = data[s1,],symmetric = T,save.data = T,trace = T,silent = T, ...)
lambda1 <- attributes(fitghs1)$lambda
alpha.bar1 <- attributes(fitghs1)$alpha.bar
mu1 <- attributes(fitghs1)$mu
sigma1 <- attributes(fitghs1)$sigma
gamma1 <- rep(0,dim(data)[2])
fitghs2 <- fit.ghypmv(data = data[s2,],symmetric = T,save.data = T,trace = T,silent = T, ...)
lambda2 <- attributes(fitghs2)$lambda
alpha.bar2 <- attributes(fitghs2)$alpha.bar
mu2 <- attributes(fitghs2)$mu
sigma2 <- attributes(fitghs2)$sigma
gamma2 <- rep(0,dim(data)[2])
# 3. recalculate posterior probabilities with new ghypmv params
p <- matrix(rep(NA,4),2)
p[1,1:2] <- attributes(attributes(fmsp)$transition[[1]])$parameters$coefficients
p[2,1:2]<- attributes(attributes(fmsp)$transition[[2]])$parameters$coefficients
pi <- vector()
pi[1] <- (1-p[2,2]) / (2 - p[1,1] - p[2,2])
pi[2] <- (1-p[1,1]) / (2 - p[1,1] - p[2,2])
ghyp1 <- ghyp(lambda = lambda1,
alpha.bar = alpha.bar1,
mu = mu1,
sigma = sigma1,
gamma = gamma1 )
ghyp2 <- ghyp(lambda = lambda2,
alpha.bar = alpha.bar2,
mu = mu2,
sigma = sigma2,
gamma = gamma2)
dgh1 <- dghyp(x = data[s1,], object = ghyp1)
dgh2 <- dghyp(x = data[s2,], object = ghyp2)
pstar1 <- c()
pstar2 <- c()
for (i in 1:dim(data)[1]) {
if (i==1) {
post1 <- (pi[1]*p[1,1]+pi[2]*p[2,1])*dghyp(x=data[1,],
object=ghyp1)
post2 <- (pi[1]*p[1,2]+pi[2]*p[2,2])*dghyp(x=data[1,],
object=ghyp2)
pstar1[i] <- post1 / (post1 + post2)
pstar2[i] <- post2 / (post1 + post2)
} else {
post1 <- (pstar1[i-1]*p[1,1]
+pstar2[i-1]*p[2,1])*dghyp(x=data[i-1,],
object=ghyp1)
post2 <- (pstar1[i-1]*p[1,2]
+pstar2[i-1]*p[2,2])*dghyp(x=data[i-1,],
object=ghyp2)
pstar1[i] <- post1 / (post1 + post2)
pstar2[i] <- post2 / (post1 + post2)
}
}
psdf <- data.frame(pstar1=pstar1,pstar2=pstar2)
psdf$state <- NA
psdf$state[psdf$pstar2 >= .5] <- 2
psdf$state[psdf$pstar1 >= .5] <- 1
psdf <- psdf[,c(3,1,2)] #reorder state var first
return(list(ghyp.posterior=psdf,
ghyp.state1=fitghs1,
ghyp.state2=fitghs2,
gaussian.posterior=posterior(fmsp),
pi=pi,
p=p) )
} # end function
To run the ghypMarkov()
function we first need to assign the response series as a list according to the depmix()
convention, and then define the family of distributions for each series to be used by depmix()
as well.
library(depmixS4)
library(ghyp)
# run functions
response8 <- list(FTSE.100~1,DAX~1,CAC.40~1,S.P.500~1,Nikkei.225~1,
Hang.Seng~1,Bovespa~1,IPC~1)
family8 <- list(gaussian(),gaussian(),gaussian(),gaussian(),
gaussian(),gaussian(),gaussian(),gaussian())
# 8 indices
fit.ghypMarkov <- ghypMarkov(data = ret,
response = response8,
family = family8)
## iteration 0 logLik: 68382
## iteration 5 logLik: 72595
## iteration 10 logLik: 72605
## iteration 15 logLik: 72605
## iteration 20 logLik: 72605
## iteration 25 logLik: 72605
## iteration 30 logLik: 72605
## iteration 35 logLik: 72605
## converged at iteration 36 with logLik: 72605
The parameters for the different states are now easily extracted from the resulting list:
fit.ghypMarkov$ghyp.state1
## Symmetric Generalized Hyperbolic Distribution:
##
## Parameters:
## lambda alpha.bar
## -3.158e+00 2.898e-07
##
## mu:
## [1] -0.005207 -0.006357 -0.006535 -0.003614 -0.003290 -0.004208 -0.007474
## [8] -0.005371
##
## sigma:
## FTSE.100 DAX CAC.40 S.P.500 Nikkei.225 Hang.Seng
## FTSE.100 0.0007345 0.0008027 0.0008261 4.456e-04 2.160e-04 0.0003905
## DAX 0.0008027 0.0011225 0.0010499 5.765e-04 2.443e-04 0.0004203
## CAC.40 0.0008261 0.0010499 0.0010877 5.497e-04 2.571e-04 0.0004368
## S.P.500 0.0004456 0.0005765 0.0005497 5.997e-04 6.588e-05 0.0001930
## Nikkei.225 0.0002160 0.0002443 0.0002571 6.588e-05 5.780e-04 0.0004018
## Hang.Seng 0.0003905 0.0004203 0.0004368 1.930e-04 4.018e-04 0.0007299
## Bovespa 0.0008176 0.0009738 0.0009601 7.603e-04 2.618e-04 0.0005297
## IPC 0.0006176 0.0007533 0.0007502 6.081e-04 1.841e-04 0.0003946
## Bovespa IPC
## FTSE.100 0.0008176 0.0006176
## DAX 0.0009738 0.0007533
## CAC.40 0.0009601 0.0007502
## S.P.500 0.0007603 0.0006081
## Nikkei.225 0.0002618 0.0001841
## Hang.Seng 0.0005297 0.0003946
## Bovespa 0.0018915 0.0010959
## IPC 0.0010959 0.0010377
##
## gamma:
## [1] 0 0 0 0 0 0 0 0
##
## log-likelihood:
## 18160
##
##
## Call:
## fit.ghypmv(data = data[s1, ], symmetric = T, silent = T, save.data = T, trace = T)
fit.ghypMarkov$ghyp.state2
## Symmetric Generalized Hyperbolic Distribution:
##
## Parameters:
## lambda alpha.bar
## 4.259670 0.004611
##
## mu:
## [1] 0.001419 0.002020 0.001749 0.001007 0.001143 0.001494 0.002462 0.002105
##
## sigma:
## FTSE.100 DAX CAC.40 S.P.500 Nikkei.225 Hang.Seng
## FTSE.100 7.347e-05 6.307e-05 6.496e-05 2.266e-05 1.883e-05 2.374e-05
## DAX 6.307e-05 1.122e-04 9.361e-05 3.304e-05 2.583e-05 2.674e-05
## CAC.40 6.496e-05 9.361e-05 1.053e-04 2.976e-05 2.341e-05 2.626e-05
## S.P.500 2.266e-05 3.304e-05 2.976e-05 6.198e-05 1.288e-06 5.358e-06
## Nikkei.225 1.883e-05 2.583e-05 2.341e-05 1.288e-06 1.812e-04 6.614e-05
## Hang.Seng 2.374e-05 2.674e-05 2.626e-05 5.358e-06 6.614e-05 1.305e-04
## Bovespa 4.834e-05 6.008e-05 6.006e-05 5.802e-05 1.577e-05 3.096e-05
## IPC 3.653e-05 4.734e-05 4.614e-05 4.647e-05 1.278e-05 2.358e-05
## Bovespa IPC
## FTSE.100 4.834e-05 3.653e-05
## DAX 6.008e-05 4.734e-05
## CAC.40 6.006e-05 4.614e-05
## S.P.500 5.802e-05 4.647e-05
## Nikkei.225 1.577e-05 1.278e-05
## Hang.Seng 3.096e-05 2.358e-05
## Bovespa 3.142e-04 1.035e-04
## IPC 1.035e-04 1.385e-04
##
## gamma:
## [1] 0 0 0 0 0 0 0 0
##
## log-likelihood:
## 63467
##
##
## Call:
## fit.ghypmv(data = data[s2, ], symmetric = T, silent = T, save.data = T, trace = T)
filSGH10 <- filter(fit.ghypMarkov[[1]][,2],filter = rep(1/10,10),
method = "convolution",sides = 1)
filGaus10 <- filter(fit.ghypMarkov[[4]][,2],filter = rep(1/10,10),
method = "convolution",sides = 1)
filSGH100 <- filter(fit.ghypMarkov[[1]][,2],filter = rep(1/100,100),
method = "convolution",sides = 1)
filGaus100 <- filter(fit.ghypMarkov[[4]][,2],filter = rep(1/100,100),
method = "convolution",sides = 1)
par(mfrow=c(2,1))
par(mar=c(2,4.3,2.1,1.5))
matplot(cbind(fit.ghypMarkov[[1]][,2],
fit.ghypMarkov[[4]][,2],
filSGH100,
filGaus100),
alpha=0.5,
type="l",ylab=expression(P(xi[1])),
main="Gaussian vs SGH Markov Posterior Probabilities",
lty=c(1,1,1,1),lwd=c(1,1,3,3)
)
## Warning: "alpha" is not a graphical parameter
## Warning: "alpha" is not a graphical parameter
## Warning: "alpha" is not a graphical parameter
## Warning: "alpha" is not a graphical parameter
## Warning: "alpha" is not a graphical parameter
## Warning: "alpha" is not a graphical parameter
matplot(cbind(fit.ghypMarkov[[1]][,2],
fit.ghypMarkov[[4]][,2],
filSGH10,
filGaus10),
type="l",xlim=c(2300,2400),
main="Zoom into 100pd Window",
ylab=expression(P(xi[i1])),lty=c(1,1,1,1), lwd=c(1,1,2,2)
)
legend(x=2370,y=.8,legend=c("SGH","Gaussian","SGH-10pdMA","Gaus-10pdMA"),
col=c('black','red','green','blue'),
lty=c(1,1,1,1),lwd=c(1,1,2,2))
retMarkov <- function(data,
posterior)
{
#returns by hidden state
mret <- data.frame(state1=rep(NA,dim(data)[2]),state2=rep(NA,dim(data)[2]))
for (i in 1:dim(data)[2]) {
mret[i,1] <- mean(data[posterior[,1]==1,i])
mret[i,2] <- mean(data[posterior[,1]==2,i])
}
row.names(mret) <- names(data)
mrettot <- data.frame(state1=rep(NA,1),state2=rep(NA,1))
mrettot[1,1] <- mean(data[posterior[,1]==1,])
mrettot[1,2] <- mean(data[posterior[,1]==2,])
if(mrettot[1,1]>mrettot[1,2]) {
colnames(mret) <- c("Optimistic", "Pessimistic")
colnames(mrettot) <- c("Optimistic", "Pessimistic")
} else {
colnames(mret) <- c("Pessimistic", "Optimistic")
colnames(mrettot) <- c("Pessimistic", "Optimistic")
}
return(list(returns.by.index=mret, returns.total=mrettot))
} #end function
ret8 <- retMarkov(data = ret, posterior = fit.ghypMarkov[[1]])
plot(ret8$returns.by.index$Optimistic,type='o',col='dark green',ylim=c(-.002,.001),main="Profile of Log Returns by Hidden Markov State \n8 Indices",ylab="Log Returns");lines(ret8$returns.by.index$Pessimistic,type='o',col='red');abline(h = 0);abline(h=ret8$returns.total$Optimistic,lty=3,col='dark green');abline(h=ret8$returns.total$Pessimistic,lty=3,col='red');legend(x = 1,y = -.0012,legend = c("Optimistic","Pessimistic"),lty = 1,col = c('dark green','red'))