## Package needed Rmetrics # R metrics is a collection of libraries. It is not available as a package anymore instead we need this: source("http://www.rmetrics.org/Rmetrics.R") ## this connects to the file and reads the R code there install.Rmetrics() ## THis is a wrapper for the packages needed ## The idea is that doing this way we always have the most up to date packages ## It only installs the packages to access the functions available the packages still have to be loaded with library("package") ## the package FinTS contains functions specifically designed for the book by Dr. Spencer Graves. You can install it using the menu then load using: library(FinTS) sp500daily=read.csv("GSPCdaily14May2009.csv",header=T) ## these is a file I downloaded from yahoo finance MSFT.day.data=read.csv("TestMSFT20051111.csv",header=T) ## First file is daily data, secon file is minute data from some random day sp500daily$Date ## note that the first file contains data in inverse chronological order sp500.price= sp500daily$Adj.Close[length(sp500daily$Adj.Close):1] ## this puts the data in proper order (first line is the oldest) sp500.date= sp500daily$Date[length(sp500daily$Adj.Close):1] sp500.dayreturn=diff(sp500.price)/ sp500.price[-length(sp500.price)] ## daily simple return sp500.logreturn=diff(log(sp500.price)) ## Cont compounded return hist(sp500.dayreturn) hist(sp500.logreturn) par(mfrow=c(1,2)) hist(sp500.dayreturn, freq=F,xlim=c(-0.25,0.1)) hist(sp500.logreturn,freq=F,xlim=c(-0.25,0.1)) par(mfrow=c(1,1)) plot(1:length(sp500.price),sp500.price,type="l") plot(1:length(sp500.dayreturn),sp500.dayreturn,type="l") lines(1:length(sp500.logreturn),sp500.logreturn,col="red") lines(1:length(sp500.dayreturn),sp500.dayreturn,col="green") ### Fancier stuff: date.sp500=strptime(sp500.date, "%Y-%m-%d") ## this tells to strip time in a format that R understands plot(date.sp500[1:length(sp500.price)],sp500.price,type="l",xaxt="n",xlab="Time", ylab="Rsquared") r <- as.POSIXct(round(range(date.sp500), "months")) axis.POSIXct(1, at=seq(r[1], r[2], by="month"), format="%b") axis.POSIXct(3, at=seq(r[1], r[2], by="year"), format="%b%y") ############### hist(sp500.dayreturn, freq=F) points(density( sp500.dayreturn),type="l",col="blue") hist(sp500.dayreturn, freq=F,ylim=c(0,50)) points(density( sp500.dayreturn),type="l",col="blue") points(density( sp500.dayreturn,width=0.03),type="l",col="lightblue") points(density( sp500.dayreturn,width=0.05),type="l",col="lightgreen") points(density( sp500.dayreturn,kernel="gaussian"),type="l",col="red") points(density( sp500.dayreturn,kernel="epanechnikov"),type="l",col="orange") points(density( sp500.dayreturn,kernel="cosine"),type="l",col="yellow") qqnorm(sp500.dayreturn) #calcultes basic stats mean(sp500.logreturn) sd(sp500.logreturn) library(fBasics) ## This loads the package fBasic basicStats(sp500.logreturn) ## all the stats ## some stats can be accessed directly skewness(sp500.logreturn) kurtosis(sp500.logreturn) ## Excess kurtosis mean(sp500.dayreturn) sd(sp500.dayreturn) skewness(sp500.dayreturn) kurtosis(sp500.dayreturn) basicStats( sp500.dayreturn) t.test(sp500.dayreturn) basicStats( sp500.logreturn) t.test(sp500.logreturn) ## Normality tests ## Check documentation on normalTest ?normalTest normalTest(sp500.dayreturn,method="jb") #jbTest(sp500.dayreturn) ## Linear time series models: acf(sp500.dayreturn,lag=15) # Obtain the ACF plot s1=acf(sp500.dayreturn,lag=15) # Obtain the ACF plot and more. names(s1) s1$acf s2=pacf(sp500.dayreturn,lag=15) s2$acf Box.test(sp500.dayreturn,lag=10) Box.test(sp500.dayreturn,lag=10,type="Ljung") length(sp500.dayreturn) par(mfcol=c(2,2)) # put 4 plots on one page plot(sp500.dayreturn,type='l') # first plot plot(sp500.dayreturn[1:(length(sp500.dayreturn)-1)],sp500.dayreturn[2:length(sp500.dayreturn)]) # lag 1 plot plot(sp500.dayreturn[1:(length(sp500.dayreturn)-2)],sp500.dayreturn[3:length(sp500.dayreturn)]) # lag 2 plot acf(sp500.dayreturn,lag=15) par(mfcol=c(1,1)) model1=ar( sp500.dayreturn ,method="mle") # Automatic AR fitting using AIC criterion. model1 ## AR(12) is specified names(model1) ## tsdiag(model1) ## diagnostic plots - does not work for some reason plot(model1$resid,type='l') ## checks residuals Box.test(model1$resid,lag=10,type='Ljung') model1$x.mean # Predicted overal mean value (daily) [1] 0.0003425036 #i.e. annual return ## Another approach with order specified model2= arima(sp500.dayreturn, order=c(12,0,0)) model2 tsdiag(model2) Box.test(model2$residuals,lag=10,type='Ljung') plot(model2$residuals,type='l') ## Further analysis: poly1=c(1,-model2$coef[1:12]) roots=polyroot(poly1) roots Mod(roots) ## All roots lie outside the unit circle therefore stationary time series k=2*pi/acos(0.697312/1.368456) k 6.064139 ## days ## Prediction predict(model2,10) ## predict 10 days ahead ################ Same analysis for minute data