GPD

EDA

Show the code
gdp <- read.csv('data/Employment59.csv')
gdp$DATE <- as.Date(gdp$DATE)
gdp_ts <- ts(gdp$PAYEMS, star= c(1959,1),frequency = 4)
autoplot(gdp_ts)+ggtitle("US GDP")

Show the code
acf(gdp_ts)

Show the code
dec2=decompose(gdp_ts,type = "multiplicative")
plot(dec2)

Show the code
gdp_ts %>% diff() %>% ggtsdisplay() #first ordinary differencing

Show the code
gdp_ts %>% diff(lag = 4) %>% ggtsdisplay() #first ordinary differencing

Determine Parameters

Show the code
gdp_ts %>% diff(lag=4) %>% diff() %>% ggtsdisplay() #do both

p:0,1,3 d:2 q:1,2,3 P:1,2,3 D:2 Q:1

Show the code
######################## Check for different combinations ########


#write a funtion
SARIMA.c=function(p1,p2,q1,q2,P1,P2,Q1,Q2,data){
  
  #K=(p2+1)*(q2+1)*(P2+1)*(Q2+1)
  
  temp=c()
  d=1
  D=1
  s=12
  
  i=1
  temp= data.frame()
  ls=matrix(rep(NA,9*35),nrow=35)
  
  
  for (p in p1:p2)
  {
    for(q in q1:q2)
    {
      for(P in P1:P2)
      {
        for(Q in Q1:Q2)
        {
          if(p+d+q+P+D+Q<=9)
          {
            
            model<- Arima(data,order=c(p-1,d,q-1),seasonal=c(P-1,D,Q-1))
            ls[i,]= c(p-1,d,q-1,P-1,D,Q-1,model$aic,model$bic,model$aicc)
            i=i+1
            #print(i)
            
          }
          
        }
      }
    }
    
  }
  
  
  temp= as.data.frame(ls)
  names(temp)= c("p","d","q","P","D","Q","AIC","BIC","AICc")
  
  temp
  
}

output = SARIMA.c(p1=1,p2=4,q1=1,q2=4,P1=1,P2=3,Q1=1,Q2=2, data = gdp_ts)
#output

knitr::kable(output)
p d q P D Q AIC BIC AICc
0 1 0 0 1 0 4498.257 4501.782 4498.273
0 1 0 0 1 1 4348.055 4355.106 4348.104
0 1 0 1 1 0 4439.336 4446.387 4439.384
0 1 0 1 1 1 4350.031 4360.607 4350.128
0 1 0 2 1 0 4379.073 4389.649 4379.170
0 1 0 2 1 1 4350.737 4364.839 4350.900
0 1 1 0 1 0 4496.142 4503.193 4496.191
0 1 1 0 1 1 4348.392 4358.968 4348.489
0 1 1 1 1 0 4439.612 4450.188 4439.709
0 1 1 1 1 1 4350.294 4364.396 4350.456
0 1 1 2 1 0 4381.060 4395.162 4381.223
0 1 2 0 1 0 4488.150 4498.727 4488.248
0 1 2 0 1 1 4349.256 4363.357 4349.418
0 1 2 1 1 0 4440.078 4454.180 4440.241
0 1 3 0 1 0 4478.378 4492.480 4478.541
1 1 0 0 1 0 4495.859 4502.910 4495.907
1 1 0 0 1 1 4348.206 4358.782 4348.303
1 1 0 1 1 0 4439.428 4450.005 4439.525
1 1 0 1 1 1 4350.101 4364.203 4350.264
1 1 0 2 1 0 4381.057 4395.159 4381.220
1 1 1 0 1 0 4497.843 4508.419 4497.940
1 1 1 0 1 1 4350.038 4364.139 4350.200
1 1 1 1 1 0 4441.326 4455.428 4441.489
1 1 2 0 1 0 4489.217 4503.319 4489.380
2 1 0 0 1 0 4497.803 4508.379 4497.900
2 1 0 0 1 1 4349.683 4363.785 4349.845
2 1 0 1 1 0 4441.003 4455.105 4441.165
2 1 1 0 1 0 4499.678 4513.780 4499.841
3 1 0 0 1 0 4494.238 4508.340 4494.400
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA
Show the code
output[which.min(output$AIC),] 
  p d q P D Q      AIC      BIC     AICc
2 0 1 0 0 1 1 4348.055 4355.106 4348.104
Show the code
output[which.min(output$BIC),] 
  p d q P D Q      AIC      BIC     AICc
2 0 1 0 0 1 1 4348.055 4355.106 4348.104
Show the code
output[which.min(output$AICc),] 
  p d q P D Q      AIC      BIC     AICc
2 0 1 0 0 1 1 4348.055 4355.106 4348.104
Show the code
set.seed(123)
model_output <- capture.output(sarima(gdp_ts, 0,1,0,0,1,1,4))

Show the code
cat(model_output[21:50], model_output[length(model_output)], sep = "\n")
$fit

Call:
arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S), 
    include.mean = !no.constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc, 
        REPORT = 1, reltol = tol))

Coefficients:
         sma1
      -0.9492
s.e.   0.0345

sigma^2 estimated as 1852941:  log likelihood = -2172.03,  aic = 4348.06

$degrees_of_freedom
[1] 250

$ttable
     Estimate     SE  t.value p.value
sma1  -0.9492 0.0345 -27.4916       0

$AIC
[1] 17.32293

$AICc
[1] 17.32299

$BIC
[1] 17.35102

Model Fitting

Show the code
fit <- Arima(gdp_ts, order=c(0,1,0), seasonal=c(0,1,1))
summary(fit)
Series: gdp_ts 
ARIMA(0,1,0)(0,1,1)[4] 

Coefficients:
         sma1
      -0.9492
s.e.   0.0345

sigma^2 = 1860405:  log likelihood = -2172.03
AIC=4348.06   AICc=4348.1   BIC=4355.11

Training set error measures:
                   ME     RMSE      MAE       MPE      MAPE      MASE
Training set 3.482494 1347.888 492.0962 0.0137906 0.4616182 0.2031885
                    ACF1
Training set -0.08526289

forecasting

Show the code
fit %>% forecast(h=12) %>% autoplot() #next 3 years

Show the code
sarima.for(gdp_ts, 12, 0,1,0,0,1,1,4)

$pred
         Qtr1     Qtr2     Qtr3     Qtr4
2023 154768.4 154437.3 155221.5 155790.7
2024 156273.1 155942.0 156726.2 157295.5
2025 157777.8 157446.7 158231.0 158800.2

$se
         Qtr1     Qtr2     Qtr3     Qtr4
2023 1361.322 1925.194 2357.869 2722.631
2024 3075.622 3392.066 3681.408 3949.611
2025 4224.838 4483.186 4727.437 4959.674

Compare with Benchmark methods

Show the code
fit <- Arima(gdp_ts, order=c(0,1,0), seasonal=c(0,1,1))

autoplot(gdp_ts) +
  autolayer(meanf(gdp_ts, h=12),
            series="Mean", PI=FALSE) +
  autolayer(naive(gdp_ts, h=12),
            series="Naïve", PI=FALSE) +
  autolayer(snaive(gdp_ts, h=12),
            series="SNaïve", PI=FALSE)+
  autolayer(rwf(gdp_ts, h=12, drift=TRUE),
            series="Drift", PI=FALSE)+
  autolayer(forecast(fit,12), 
            series="fit",PI=FALSE) +
  guides(colour=guide_legend(title="Forecast"))

Show the code
f2 <- snaive(gdp_ts, h=12) 

accuracy(f2)
                   ME     RMSE     MAE      MPE     MAPE MASE      ACF1
Training set 1575.474 3048.225 2421.87 1.628746 2.330202    1 0.7384914
Show the code
summary(fit)
Series: gdp_ts 
ARIMA(0,1,0)(0,1,1)[4] 

Coefficients:
         sma1
      -0.9492
s.e.   0.0345

sigma^2 = 1860405:  log likelihood = -2172.03
AIC=4348.06   AICc=4348.1   BIC=4355.11

Training set error measures:
                   ME     RMSE      MAE       MPE      MAPE      MASE
Training set 3.482494 1347.888 492.0962 0.0137906 0.4616182 0.2031885
                    ACF1
Training set -0.08526289

Cross Validation

Show the code
k <- 75 # minimum data length for fitting a model  n*0.3
n <- length(gdp_ts)

i=1
err1 = c()
#err2 = c()

for(i in 1:(n-k))
{
  xtrain <- gdp_ts[1:(k-1)+i] #observations from 1 to 75
  xtest <- gdp_ts[k+i] #76th observation as the test set
  
  # Arima(gdp_ts, order=c(0,1,0), seasonal=c(0,1,1))
  fit <- Arima(xtrain, order=c(0,1,0), seasonal=c(0,1,1),include.drift=FALSE, method="ML")
  fcast1 <- forecast(fit, h=1)
  
  #capture error for each iteration
  # This is mean absolute error
  err1 = c(err1, abs(fcast1$mean-xtest)) 
  #err2 = c(err2, abs(fcast2$mean-xtest))
  
  # This is mean squared error
  err3 = c(err1, (fcast1$mean-xtest)^2)
  #err4 = c(err2, (fcast2$mean-xtest)^2)
  
}

(MAE1=mean(err1)) # This is mean absolute error
[1] 771.8619
Show the code
MSE1=mean(err1)
MSE1
[1] 771.8619