#Multivariate time series ## Data is 5 technology companies msft.daily=read.csv("MSFT2yearAug2009.csv",header=T) intel.daily=read.csv("INTC2yearAug2009.csv",header=T) ibm.daily=read.csv("IBM2yearAug2009.csv",header=T) amd.daily=read.csv("AMD2yearAug2009.csv",header=T) nvidia.daily=read.csv("NVIDIA2yearAug2009.csv",header=T) #Now let us make a big matrix for my data library(timeSeries) ## to calculate the returns MSFT= returns(msft.daily$Adj.Close[length(msft.daily$Adj.Close):1])[-1] INTC= returns(intel.daily$Adj.Close[length(intel.daily$Adj.Close):1])[-1] IBM= returns(ibm.daily$Adj.Close[length(ibm.daily$Adj.Close):1])[-1] AMD= returns(amd.daily$Adj.Close[length(amd.daily$Adj.Close):1])[-1] NVDA= returns(nvidia.daily$Adj.Close[length(nvidia.daily$Adj.Close):1])[-1] z=cbind(MSFT,INTC,IBM,AMD,NVDA) ## Now to calculate multivariate Ljung-Box test ## This is from Tsay's website ## Select and run the function below: "mq" <- function(x,lag){ # Compute multivariate Ljung-Box test statistics # if(!is.matrix(x))x=as.matrix(x) nr=dim(x)[1] nc=dim(x)[2] g0=var(x) ginv=solve(g0) qm=0.0 print("m, Q(m) and p-value:") df = 0 for (i in 1:lag){ x1=x[(i+1):nr,] x2=x[1:(nr-i),] g = cov(x1,x2) g = g*(nr-i-1)/(nr-1) h=t(g)%*%ginv%*%g%*%ginv qm=qm+nr*nr*sum(diag(h))/(nr-i) df=df+nc*nc pv=1-pchisq(qm,df) print(c(i,qm,pv),digits=3) } } ## now to check: mq(z,10) ## 10= max lag ## there are significant correlations at all lags ## Let us fit a VAR model: model1=ar(z,order.max=20) model1$order # it fitted a VAR(1) model1 Call: ar(x = z, order.max = 20) $ar , , 1 ## the phi matrix MSFT INTC IBM AMD NVDA MSFT -0.00705 -0.000265 -0.06227 -0.09838 -0.007989 INTC -0.10789 -0.054524 0.02563 -0.04906 0.014611 IBM -0.02922 0.010867 0.02948 -0.07143 0.024338 AMD -0.01015 -0.163284 0.30266 -0.07877 0.042011 NVDA -0.13027 0.109202 0.08214 -0.08715 0.022907 $var.pred ## estimated variance matrix (capital sigma) MSFT INTC IBM AMD NVDA MSFT 0.0007432 0.0005510 0.0003522 0.0006056 0.0006506 INTC 0.0005510 0.0008404 0.0004157 0.0008173 0.0009502 IBM 0.0003522 0.0004157 0.0004340 0.0005301 0.0005227 AMD 0.0006056 0.0008173 0.0005301 0.0023947 0.0011327 NVDA 0.0006506 0.0009502 0.0005227 0.0011327 0.0022836 predict(model1,n.ahead=10) ## More functions from Tsay's website: "VARorder" <- function(x,maxp=0){ # Compute the AIC, BIC, HQ, amd M-statistics for AR order selection. # Created by Ruey S. Tsay. Latest version done on April 7, 2009. # x1=as.matrix(x) T=nrow(x1) k=ncol(x1) ksq=k*k s=cov(x1) olsdet=rep(0,(maxp+1)) olsdet[1]=log(det(s)) chidet=olsdet chidet[1]=log(det(s*((T-1)/T))) if(maxp < 1) maxp=max(1,2*floor(log(T))) aic=rep(0,(maxp+1)) aic[1]=olsdet[1] bic=aic hq=aic for (p in 1:maxp){ idm=k*p+1 ne=T-p ist=p+1 y=x1[ist:T,] xmtx=cbind(rep(1,ne),x1[p:(T-1),]) if(p > 1){ for (i in 2:p){ xmtx=cbind(xmtx,x1[(ist-i):(T-i),]) } } y=as.matrix(y) xmtx=as.matrix(xmtx) xpx = t(xmtx)%*%xmtx xpxinv=solve(xpx) xpy=t(xmtx)%*%y beta=xpxinv%*%xpy yhat=xmtx%*%beta resi=y-yhat sse=(t(resi)%*%resi)/ne #print(paste("For p = ",p,"residual variance is", sse)) d1=log(det(sse)) aic[p+1]=d1+(2*p*ksq)/T bic[p+1]=d1+(log(T)*p*ksq)/T hq[p+1]=d1+(2*log(log(T))*p*ksq)/T olsdet[p+1]=d1 chidet[p+1]=log(det(sse*(ne/T))) } maic=min(aic) aicor=c(1:(maxp+1))[aic==maic]-1 mbic=min(bic) bicor=c(1:(maxp+1))[bic==mbic]-1 mhq=min(hq) hqor=c(1:(maxp+1))[hq==mhq]-1 ch=rep(0,maxp) pv=rep(0,maxp) for (j in 1:maxp){ ch[j]=(T-k-j-1-1.5)*(chidet[j]-chidet[j+1]) pv[j]=1-pchisq(ch[j],ksq) } Mor1=0 Mor5=0 for (i in 1:maxp){ if(pv[i] < 0.05)Mor5=i if(pv[i] < 0.01)Mor1=i } cat("Order selected by aic = ",aicor,"\n") cat("Order selected by bic = ",bicor,"\n") cat("Order selected by hq = ",hqor,"\n") cat("Order selected by Chi-sq(5%) = ",Mor5,"\n") cat("Order selected by Chi-sq(1%) = ",Mor1,"\n") VARorder<-list(aic=aic,aicor=aicor,bic=bic,bicor=bicor,hq=hq,hqor=hqor,Mstat=ch,Mpv=pv) } ### Calling it: VARorder(z,30) ############################### "VAR" <- function(x,p=1){ # Fits a vector AR(p) model, computes its AIC, BIC, and residuals if(!is.matrix(x))x=as.matrix(x) T=nrow(x) k=ncol(x) idm=k*p+1 ne=T-p ist=p+1 y=x[ist:T,] xmtx=cbind(rep(1,ne),x[p:(T-1),]) if(p > 1){ for (i in 2:p){ xmtx=cbind(xmtx,x[(ist-i):(T-i),]) } } y=as.matrix(y) xmtx=as.matrix(xmtx) xpx = t(xmtx)%*%xmtx xpxinv=solve(xpx) xpy=t(xmtx)%*%y beta=xpxinv%*%xpy yhat=xmtx%*%beta resi=y-yhat sse=(t(resi)%*%resi)/ne c=beta[1,] dd=diag(xpxinv) sdbeta=matrix(0,idm,k) for (i in 1:k){ sdbeta[,i]=sqrt(sse[i,i]*dd) } se=sdbeta[1,] cat("Constant term:","\n") print(c,digits=3) cat("Std error","\n") print(se,digits=3) cat("AR coefficient matrix","\n") jst=1 for (i in 1:p){ cat("AR(",i,")-matrix:","\n") phi=t(beta[(jst+1):(jst+k),]) se=t(sdbeta[(jst+1):(jst+k),]) print(phi,digits=3) cat("Standard errors:","\n") print(se,digits=3) jst=jst+k cat(" ","\n") } cat("Residuals cov-mtx:","\n") print(sse) dd=det(sse) cat(" ","\n") cat("det(sse) =",dd,"\n") d1=log(dd) aic=d1+(2*p*k*k)/T bic=d1+log(T)*p*k*k/T cat("AIC = ",aic,"\n") cat("BIC = ",bic,"\n") VAR<-list(coef=beta,sigma=sse,aic=aic,bic=bic,residuals=resi,secoef=sdbeta) } #################################### model2=VAR(z,1) ## Notice that it also provides the phi zero term ## also note that the estimates are very similar with the ones provided before. ## If we removed the mean first from the matrix of returns should be identical ###################################### "VARpred" <- function(m1,x,orig,h=1){ # Compute the 1, 2, ..., h-step ahead forecats of a VAR(p) model at the forecast origin orig. # x: vector time series # m1: VAR(p) model fitted by VAR.R # orig: origin of forecast # H: forecast horizon. # Note: constant term is always in the model. (This can be modified if VAR.R is also changed.) # Created by Ruey S. Tsay, April 2009. # if(!is.matrix(x))x=as.matrix(x) T=nrow(x) k=ncol(x) if(h < 1)h=1 if(orig > T)orig=T beta=m1$coef Sig=m1$sigma p=floor(nrow(beta)-1)/k fore=matrix(0,h,k) sefore=matrix(0,h,k) # comput psi-weight matrices to calculate forecast errors mm=VARpsi(m1,h) Psi=mm$psi cat("Forecats at origin: ",orig,"\n") SS=matrix(0,k,k) for (j in 1:h){ t=orig+j xmtx=c(1) for(i in 1:p){ if((t-i) <= T){ xmtx=c(xmtx,x[(t-i),]) } else{ ii=(t-i)-T xmtx=c(xmtx,fore[ii,]) } } yhat=xmtx%*%beta fore[j,]=yhat idx=(j-1)*k w1=Psi[,(idx+1):(idx+k)] SS=SS+w1%*%Sig%*%t(w1) fse=sqrt(diag(SS)) sefore[j,]=fse } cat("Predictions: ","\n") print(fore,digits=3) cat(" ","\n") cat("S.E. of predictions:","\n") print(sefore,digits=3) VARpred <- list(prediction=fore,se=sefore) } ## Note that you need this function as well "VARpsi" <- function(m1,lag){ # Computes the psi-weight matrices of a VAR(p) model. # m1: VAR.R output object. # Created by Ruey S. Tsay, April 2009. # beta=m1$coef Phi=t(beta) # Compute MA representions k=nrow(Phi) m=ncol(Phi) p=floor(m/k) Si=diag(rep(1,k)) if(p < 1) p =1 if(lag < 1) lag=1 # for (i in 1:lag){ if (i <= p){ idx=(i-1)*k+1 tmp=Phi[,(idx+1):(idx+k)] } else{ tmp=matrix(0,k,k) } # jj=i-1 jp=min(jj,p) if(jp > 0){ for(j in 1:jp){ jdx=(j-1)*k+1 idx=(i-j)*k w1=Phi[,(jdx+1):(jdx+k)] w2=Si[,(idx+1):(idx+k)] tmp=tmp+w1%*%w2 ##print(tmp,digits=4) } } Si=cbind(Si,tmp) } VARpsi <- list(psi=Si) } ############################ VARpred(model2,z, orig=length(z[,1]), h=10) ## this tsay function did not work for me. ############################ ## Cointegration ############################ "coint" <- function(x,lags=0,trend=1){ # Perform Johansen's co-integration test. # trend = 0 means no constant # = 1 means with a constant # = 2 means with time trend. # T=nrow(x) k=ncol(x) y=as.matrix(x) # setup the differenced data dy=cbind(rep(0,k),t(diff(y))) dy=t(dy) #for (i in 1:k){ #dy[,i]=c(0,diff(y[,i])) #} ist = lags+1 ut=dy[(ist+1):T,] vt=y[ist:(T-1),] # Run regression to remove the impact of stationary part or trend. nobe=T-ist idx=0 if( (trend > 0) || (lags > 0)){ idim=trend + lags*k ##print(c(idim,nobe)) bx=matrix(0,nobe,idim) if(trend >0){ bx[,1]=rep(1,nobe) idx=1 if(trend > 1){ idx=2 bx[,2]=c(1:nobe) } } if(lags > 0){ for (j in 1:lags){ bx[,(idx+1):(idx+k)]=dy[(ist-j+1):(T-j),] idx=idx+k } } xpx=t(bx)%*%bx ##print(xpx) xpxinv=solve(xpx) y1=dy[(ist+1):T,] xpy=t(bx)%*%y1 b1=xpxinv%*%xpy ut=y1-bx%*%b1 zm1=y[ist:(T-1),] xpy=t(bx)%*%zm1 b2=xpxinv%*%xpy vt=zm1-bx%*%b2 } # Perform eigenvalues &eigenvector analysis S00=t(ut)%*%ut/(T-lags) S01=t(ut)%*%vt/(T-lags) S11=t(vt)%*%vt/(T-lags) print("S00-mtx") print(S00) print("S01-mtx") print(S01) print("S11-mtx") print(S11) S00inv=solve(S00) S11inv=solve(S11) A=S11inv%*%t(S01) A1=S00inv%*%S01 AA=A%*%A1 print("The U-mtx") print(AA) mm=eigen(AA) ev=mm$values #print(ev) evto=mm$vectors lktr=rep(0,k) lkmx=rep(0,k) for (j in 1:k){ lkmx[j]=-nobe*log(1-ev[j]) } #print(lkmx) lktr[k]=lkmx[k] for (j in 1:(k-1)){ lktr[(k-j)]=lktr[(k-j+1)]+lkmx[(k-j)] } #print(lktr) rnk=c(1:k)-1 resu=cbind(rnk,ev,lktr,lkmx) print("Rank, Eigenvalue, trace, & max-eig") print(resu,digits=3) #print(paste("selected order: aic =",aicor)) coint<-list(trace=lktr,maxeig=lkmx,eigvalue=ev,eigvector=evto) }