GPD
EDA
Show the code
<- read.csv('data/Employment59.csv')
gdp $DATE <- as.Date(gdp$DATE)
gdp<- ts(gdp$PAYEMS, star= c(1959,1),frequency = 4)
gdp_ts autoplot(gdp_ts)+ggtitle("US GDP")
Show the code
acf(gdp_ts)
Show the code
=decompose(gdp_ts,type = "multiplicative")
dec2plot(dec2)
Show the code
%>% diff() %>% ggtsdisplay() #first ordinary differencing gdp_ts
Show the code
%>% diff(lag = 4) %>% ggtsdisplay() #first ordinary differencing gdp_ts
Determine Parameters
Show the code
%>% diff(lag=4) %>% diff() %>% ggtsdisplay() #do both gdp_ts
p:0,1,3 d:2 q:1,2,3 P:1,2,3 D:2 Q:1
Show the code
######################## Check for different combinations ########
#write a funtion
=function(p1,p2,q1,q2,P1,P2,Q1,Q2,data){
SARIMA.c
#K=(p2+1)*(q2+1)*(P2+1)*(Q2+1)
=c()
temp=1
d=1
D=12
s
=1
i= data.frame()
temp=matrix(rep(NA,9*35),nrow=35)
ls
for (p in p1:p2)
{for(q in q1:q2)
{for(P in P1:P2)
{for(Q in Q1:Q2)
{if(p+d+q+P+D+Q<=9)
{
<- Arima(data,order=c(p-1,d,q-1),seasonal=c(P-1,D,Q-1))
model= c(p-1,d,q-1,P-1,D,Q-1,model$aic,model$bic,model$aicc)
ls[i,]=i+1
i#print(i)
}
}
}
}
}
= as.data.frame(ls)
tempnames(temp)= c("p","d","q","P","D","Q","AIC","BIC","AICc")
temp
}
= SARIMA.c(p1=1,p2=4,q1=1,q2=4,P1=1,P2=3,Q1=1,Q2=2, data = gdp_ts)
output #output
::kable(output) knitr
p | d | q | P | D | Q | AIC | BIC | AICc |
---|---|---|---|---|---|---|---|---|
0 | 1 | 0 | 0 | 1 | 0 | 4498.257 | 4501.782 | 4498.273 |
0 | 1 | 0 | 0 | 1 | 1 | 4348.055 | 4355.106 | 4348.104 |
0 | 1 | 0 | 1 | 1 | 0 | 4439.336 | 4446.387 | 4439.384 |
0 | 1 | 0 | 1 | 1 | 1 | 4350.031 | 4360.607 | 4350.128 |
0 | 1 | 0 | 2 | 1 | 0 | 4379.073 | 4389.649 | 4379.170 |
0 | 1 | 0 | 2 | 1 | 1 | 4350.737 | 4364.839 | 4350.900 |
0 | 1 | 1 | 0 | 1 | 0 | 4496.142 | 4503.193 | 4496.191 |
0 | 1 | 1 | 0 | 1 | 1 | 4348.392 | 4358.968 | 4348.489 |
0 | 1 | 1 | 1 | 1 | 0 | 4439.612 | 4450.188 | 4439.709 |
0 | 1 | 1 | 1 | 1 | 1 | 4350.294 | 4364.396 | 4350.456 |
0 | 1 | 1 | 2 | 1 | 0 | 4381.060 | 4395.162 | 4381.223 |
0 | 1 | 2 | 0 | 1 | 0 | 4488.150 | 4498.727 | 4488.248 |
0 | 1 | 2 | 0 | 1 | 1 | 4349.256 | 4363.357 | 4349.418 |
0 | 1 | 2 | 1 | 1 | 0 | 4440.078 | 4454.180 | 4440.241 |
0 | 1 | 3 | 0 | 1 | 0 | 4478.378 | 4492.480 | 4478.541 |
1 | 1 | 0 | 0 | 1 | 0 | 4495.859 | 4502.910 | 4495.907 |
1 | 1 | 0 | 0 | 1 | 1 | 4348.206 | 4358.782 | 4348.303 |
1 | 1 | 0 | 1 | 1 | 0 | 4439.428 | 4450.005 | 4439.525 |
1 | 1 | 0 | 1 | 1 | 1 | 4350.101 | 4364.203 | 4350.264 |
1 | 1 | 0 | 2 | 1 | 0 | 4381.057 | 4395.159 | 4381.220 |
1 | 1 | 1 | 0 | 1 | 0 | 4497.843 | 4508.419 | 4497.940 |
1 | 1 | 1 | 0 | 1 | 1 | 4350.038 | 4364.139 | 4350.200 |
1 | 1 | 1 | 1 | 1 | 0 | 4441.326 | 4455.428 | 4441.489 |
1 | 1 | 2 | 0 | 1 | 0 | 4489.217 | 4503.319 | 4489.380 |
2 | 1 | 0 | 0 | 1 | 0 | 4497.803 | 4508.379 | 4497.900 |
2 | 1 | 0 | 0 | 1 | 1 | 4349.683 | 4363.785 | 4349.845 |
2 | 1 | 0 | 1 | 1 | 0 | 4441.003 | 4455.105 | 4441.165 |
2 | 1 | 1 | 0 | 1 | 0 | 4499.678 | 4513.780 | 4499.841 |
3 | 1 | 0 | 0 | 1 | 0 | 4494.238 | 4508.340 | 4494.400 |
NA | NA | NA | NA | NA | NA | NA | NA | NA |
NA | NA | NA | NA | NA | NA | NA | NA | NA |
NA | NA | NA | NA | NA | NA | NA | NA | NA |
NA | NA | NA | NA | NA | NA | NA | NA | NA |
NA | NA | NA | NA | NA | NA | NA | NA | NA |
NA | NA | NA | NA | NA | NA | NA | NA | NA |
Show the code
which.min(output$AIC),] output[
p d q P D Q AIC BIC AICc
2 0 1 0 0 1 1 4348.055 4355.106 4348.104
Show the code
which.min(output$BIC),] output[
p d q P D Q AIC BIC AICc
2 0 1 0 0 1 1 4348.055 4355.106 4348.104
Show the code
which.min(output$AICc),] output[
p d q P D Q AIC BIC AICc
2 0 1 0 0 1 1 4348.055 4355.106 4348.104
Show the code
set.seed(123)
<- capture.output(sarima(gdp_ts, 0,1,0,0,1,1,4)) model_output
Show the code
cat(model_output[21:50], model_output[length(model_output)], sep = "\n")
$fit
Call:
arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
include.mean = !no.constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc,
REPORT = 1, reltol = tol))
Coefficients:
sma1
-0.9492
s.e. 0.0345
sigma^2 estimated as 1852941: log likelihood = -2172.03, aic = 4348.06
$degrees_of_freedom
[1] 250
$ttable
Estimate SE t.value p.value
sma1 -0.9492 0.0345 -27.4916 0
$AIC
[1] 17.32293
$AICc
[1] 17.32299
$BIC
[1] 17.35102
Model Fitting
Show the code
<- Arima(gdp_ts, order=c(0,1,0), seasonal=c(0,1,1))
fit summary(fit)
Series: gdp_ts
ARIMA(0,1,0)(0,1,1)[4]
Coefficients:
sma1
-0.9492
s.e. 0.0345
sigma^2 = 1860405: log likelihood = -2172.03
AIC=4348.06 AICc=4348.1 BIC=4355.11
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set 3.482494 1347.888 492.0962 0.0137906 0.4616182 0.2031885
ACF1
Training set -0.08526289
forecasting
Show the code
%>% forecast(h=12) %>% autoplot() #next 3 years fit
Show the code
sarima.for(gdp_ts, 12, 0,1,0,0,1,1,4)
$pred
Qtr1 Qtr2 Qtr3 Qtr4
2023 154768.4 154437.3 155221.5 155790.7
2024 156273.1 155942.0 156726.2 157295.5
2025 157777.8 157446.7 158231.0 158800.2
$se
Qtr1 Qtr2 Qtr3 Qtr4
2023 1361.322 1925.194 2357.869 2722.631
2024 3075.622 3392.066 3681.408 3949.611
2025 4224.838 4483.186 4727.437 4959.674
Compare with Benchmark methods
Show the code
<- Arima(gdp_ts, order=c(0,1,0), seasonal=c(0,1,1))
fit
autoplot(gdp_ts) +
autolayer(meanf(gdp_ts, h=12),
series="Mean", PI=FALSE) +
autolayer(naive(gdp_ts, h=12),
series="Naïve", PI=FALSE) +
autolayer(snaive(gdp_ts, h=12),
series="SNaïve", PI=FALSE)+
autolayer(rwf(gdp_ts, h=12, drift=TRUE),
series="Drift", PI=FALSE)+
autolayer(forecast(fit,12),
series="fit",PI=FALSE) +
guides(colour=guide_legend(title="Forecast"))
Show the code
<- snaive(gdp_ts, h=12)
f2
accuracy(f2)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 1575.474 3048.225 2421.87 1.628746 2.330202 1 0.7384914
Show the code
summary(fit)
Series: gdp_ts
ARIMA(0,1,0)(0,1,1)[4]
Coefficients:
sma1
-0.9492
s.e. 0.0345
sigma^2 = 1860405: log likelihood = -2172.03
AIC=4348.06 AICc=4348.1 BIC=4355.11
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set 3.482494 1347.888 492.0962 0.0137906 0.4616182 0.2031885
ACF1
Training set -0.08526289
Cross Validation
Show the code
<- 75 # minimum data length for fitting a model n*0.3
k <- length(gdp_ts)
n
=1
i= c()
err1 #err2 = c()
for(i in 1:(n-k))
{<- gdp_ts[1:(k-1)+i] #observations from 1 to 75
xtrain <- gdp_ts[k+i] #76th observation as the test set
xtest
# Arima(gdp_ts, order=c(0,1,0), seasonal=c(0,1,1))
<- Arima(xtrain, order=c(0,1,0), seasonal=c(0,1,1),include.drift=FALSE, method="ML")
fit <- forecast(fit, h=1)
fcast1
#capture error for each iteration
# This is mean absolute error
= c(err1, abs(fcast1$mean-xtest))
err1 #err2 = c(err2, abs(fcast2$mean-xtest))
# This is mean squared error
= c(err1, (fcast1$mean-xtest)^2)
err3 #err4 = c(err2, (fcast2$mean-xtest)^2)
}
MAE1=mean(err1)) # This is mean absolute error (
[1] 771.8619
Show the code
=mean(err1)
MSE1 MSE1
[1] 771.8619