## Unit root testing:
library(Rmetrics)
help(UnitrootTests)
gdp=read.csv("GDPannualreal.csv",header=T)
gdp.return=returns(gdp$VALUE)[-1]

adfTest(gdp.return,lags=5)  ## We assume AR(5)

Title:
 Augmented Dickey-Fuller Test

Test Results:
  PARAMETER:
    Lag Order: 5
  STATISTIC:
    Dickey-Fuller: -1.5463
  P VALUE:
    0.1203            ## Seems that the null hypothesis of existence of a unit root cannot be rejected

Description:
 Mon Jun 16 11:51:14 2008 by user: Ion

### LOOK AT: adfTest(log(gdp$VALUE),lags=4) ## What does this do and what does the result mean?


## A better analysis (more caferful).

ord=ar(gdp.return)

> ord

Call:
ar(x = gdp.return)

Coefficients:
      1        2        3        4        5
 0.5495   0.0237  -0.1332  -0.2981   0.1827

Order selected 5  sigma^2 estimated as  0.001619           ## Seems that an order 5 is estimated

adfTest(gdp.return,lags=5)            ## same as we did before

## What about the difference series

ar(diff(gdp.return))    ## also 5

adfTest(diff(gdp.return),lags=5)    ## This gives a p-value much smaller that means that the unit root is rejected for the difference series. Means that the I in ARIMA is only 1

#############
#SEASONAL time series:


jnj.data=read.csv("JNJmonthlySeasonal.csv",header=T)   ## jnj.data=read.csv("BritishAIRwaysmonthlySeasonal.csv",header=T)
jnj.prices= jnj.data$Adj.Close[length(jnj.data$Adj.Close):1] ## recall that data from yahoo is presented with the most recent date on top

plot(jnj.prices,type="l") ## Plot of data. note the huge increase in share value - cannot see anything

jnj.logprices=log(jnj.prices)

plot(jnj.logprices,type="l")        ## this is to remove the exponential increase and see better
points(jnj.logprices,pch=16)## can't really see much

plot(jnj.logprices[1:100],type="l")
points(jnj.logprices[1:100],pch=16,col="red")

acf(jnj.logprices,lag.max=16)

jnj.return=diff(jnj.logprices)
acf(jnj.return,lag.max=16)

m1=arima(jnj.return,order=c(0,1,1),seasonal=list(order=c(0,1,1),period=12))
m1

tsdiag(m1) # Model checking
Box.test(m1$resid,lag=10,type='Ljung')  ##  recall H0 means uncorrelated resid (good)

f1=predict(m1,8) # prediction
names(f1)
f1

##Book looks at Quaterly data so let us do that too:
jnj.prices= jnj.data$Adj.Close[length(jnj.data$Adj.Close):1]
quaterly.sequence=seq(1,length(jnj.prices),by=3)

jnj.prices=jnj.prices[quaterly.sequence]

## Now repeat the code above for this new data.

jnj.logprices=log(jnj.prices)
plot(jnj.logprices,type="l")
points(jnj.logprices,pch=16)

acf(jnj.logprices,lag.max=16)   ## figure 2.14.a) page 74

jnj.return=diff(jnj.logprices)
acf(jnj.return,lag.max=16)     ## similar with 2.14.b)


## seasonal difference:
sjnj.logprices= diff(jnj.logprices,4)
acf(sjnj.logprices,lag.max=16)    ## Plot of 2.14 c)
sjnj.return=diff(jnj.return,4)    ## yearly = 4 quarters
acf(sjnj.return,lag.max=16)      ##  Plot of 2.14 d)
## last shows  negative at lag 4 and positive at lag 12

##define the airline model:
m1=arima(jnj.return,order=c(0,1,1),seasonal=list(order=c(0,1,1),period=4))
m1

tsdiag(m1) # Model checking
f1=predict(m1,8) # prediction for 2 years
names(f1)
f1

## Let us see how good the model is: construct the model without the last 8 observations then predict them:
m2=arima(jnj.return[1:(length(jnj.return)-8)],order=c(0,1,1),seasonal=list(order=c(0,1,1),period=4))
f2=predict(m2,8)


plot((length(jnj.return)-20):length(jnj.return),jnj.return[(length(jnj.return)-20):length(jnj.return)],type="l")
points((length(jnj.return)-7):length(jnj.return),f2$pred,col="red")
lines((length(jnj.return)-7):length(jnj.return),f2$pred+1.96*f2$se,col="green")
lines((length(jnj.return)-7):length(jnj.return),f2$pred-1.96*f2$se,col="green")

## TRY it yourself with your own seasonal time series

##************************************
## For this part we will set apart 12 months from the data, build models based on the inital kept data and see how well they forecast.

#Other data:
## From federal reserves monthly value of Total Consumer Loans Owned by Commercial Banks in Billions from 1942 to present
comercialloan=read.csv("FRB_G19.csv", skip=5,header=T)
names(comercialloan)
plot(comercialloan[,1],comercialloan[,2],type="l")

plot(comercialloan[,1],log(comercialloan[,2]),type="l")
plot(comercialloan[,1],returns(comercialloan[,2]),col="red")
lines(comercialloan[,1],returns(comercialloan[,2]))

#Unit root testing:
ar(comercialloan[,2])
adfTest(comercialloan[,2],lags=1)   ## clearly has a unit root as indicated by the coeff in the AR model as well

ar(log(comercialloan[,2]))
adfTest(log(comercialloan[,2]),lags=1)    ## same story

#Now work with the returns which should eliminate the unit root if there is only one.
comercialloanreturns=returns(comercialloan[,2])[-1]
ar(comercialloanreturns)
adfTest(comercialloanreturns,lags=25)       ## pvalue is small indicating that there is no unit root anymore.

#usual plots
acf(comercialloanreturns,lag.max=16)
pacf(comercialloanreturns,lag.max=16)

###### NOW model building. Create data for model building and forecasting:
cloan2008=comercialloanreturns[(length(comercialloanreturns)-11):length(comercialloanreturns)]
comercialloanreturns.full=comercialloanreturns
comercialloanreturns = comercialloanreturns[1:(length(comercialloanreturns)-12)]
ar(comercialloanreturns)
adfTest(comercialloanreturns,lags=25)

## So let us try a ARMA model for this (previous one was an AR.
## determine order:
library(tseries)
eacf.tables(comercialloanreturns)  ## Not really clear 

#ANOTHER version of eacf is already codded in TSA package:
library(TSA)
eacf(comercialloanreturns)

## using AIC:

max.p=15
max.q=15
model.aic=matrix(,nrow=max.p+1,ncol=max.q+1)
for (p in 0:max.p){
  for (q in 0:max.q){
  model.aic[p+1,q+1]= arima(comercialloanreturns,order=c(p,0,q))$aic
  }
}                   ## takes forever
 print(model.aic)
           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]      [,7]      [,8]      [,9]     [,10]     [,11]     [,12]     [,13]     [,14]     [,15]     [,16]
 [1,] -4579.610 -4810.313 -4896.645 -4894.656 -4941.883 -4949.437 -4958.285 -4957.564 -4965.640 -4970.253 -5012.913 -5017.241 -5077.807 -5116.024 -5123.726 -5122.169
 [2,] -4919.428 -4927.051 -4962.512 -4966.046 -4990.833 -4994.029 -4992.348 -5005.798 -5012.557 -5010.634 -5027.899 -5025.900 -5114.947 -5120.824 -5115.721 -5120.130
 [3,] -4921.476        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA
 [4,]        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA
 [5,]        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA
 [6,]        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA
 [7,]        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA
 [8,]        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA
 [9,]        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA
[10,]        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA
[11,]        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA
[12,]        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA
[13,]        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA
[14,]        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA
[15,]        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA        NA

#looking at this table let us try several more models in the range  p+1=3:15 q+1=6:15

for (p in 2:max.p){
  for (q in 7:max.q){
  model.aic[p+1,q+1]= arima(comercialloanreturns,order=c(p,0,q))$aic
  }
}                   ## takes forever

for (p in 3:max.p){
  for (q in 11:max.q){
  model.aic[p+1,q+1]= arima(comercialloanreturns,order=c(p,0,q))$aic
  }
}   

model.aic[is.na(model.aic)]=10000 # to eliminate the NA values and replace them with a large value

which.min(model.aic)  ## gives the index of the element in the matrix as if it were a long vector
which(model.aic==min(model.aic),arr.ind=T)

     row col
[1,]   3  10           ## Seems that an ARMA(2,9) would be appropriate  ## An MA 14 seems also to have low AIC

## A better 

model1= arima(comercialloanreturns,order=c(25,0,0))
model2= arima(comercialloanreturns,order=c(0,0,14))
model3= arima(comercialloanreturns,order=c(2,0,9))

## What about seasonality? Since this is monthly data perhaps we can try an airplane type model with 12 seasonal lag:

model4= arima(comercialloanreturns,order=c(0,1,1),seasonal=list(order=c(0,1,1),period=12))

#NOW let us compare the three:

plot((length(comercialloanreturns.full)-20):length(comercialloanreturns.full),comercialloanreturns.full[(length(comercialloanreturns.full)-20):length(comercialloanreturns.full)],type="l")
lines((length(comercialloanreturns.full)-11):length(comercialloanreturns.full),predict(model1,12)$pred,col="green")
lines((length(comercialloanreturns.full)-11):length(comercialloanreturns.full),predict(model2,12)$pred,col="blue")
lines((length(comercialloanreturns.full)-11):length(comercialloanreturns.full),predict(model3,12)$pred,col="red")
lines((length(comercialloanreturns.full)-11):length(comercialloanreturns.full),predict(model4,12)$pred,col="magenta")

points((length(comercialloanreturns.full)-11):length(comercialloanreturns.full),predict(model1,12)$pred,col="green")
points((length(comercialloanreturns.full)-11):length(comercialloanreturns.full),predict(model2,12)$pred,col="blue")
points((length(comercialloanreturns.full)-11):length(comercialloanreturns.full),predict(model3,12)$pred,col="red")
points((length(comercialloanreturns.full)-11):length(comercialloanreturns.full),predict(model4,12)$pred,col="magenta")

## What about errors?

#model1:
sum((cloan2008-predict(model1,12)$pred)^2)
[1] 0.0006412673
sum((cloan2008-predict(model2,12)$pred)^2)
[1] 0.00053344
sum((cloan2008-predict(model3,12)$pred)^2)
[1] 0.000849362
sum((cloan2008-predict(model4,12)$pred)^2)
[1] 0.000775068

### All small. Usually in such a case the more parcimonius model is chosen.