Senin, 20 Maret 2017

Predicting Using ARIMA With Exogenous Variables (ARIMAX) in R

Melalukan peramalan biasanya menggunakan data historis, namun sering sekali data historis tidak reperesentatif dalam menghasilkan peramalan yang tepat. Selain pengaruh data historis, sebetulnya masih banyak variabel-variabel lain yang mempengaruhi hasil peramalan. Kali ini kita akan membahas bagaimana melakukan analisa mengggunakan Auto Regressive Integrated Moving Average (ARIMA).

data <- read.delim("clipboard", head=T)
summary(data)
#      Periode       Close     
# 01-Feb-15: 1   Min.   :7100  
# 01-Jan-15: 1   1st Qu.:7838  
# 01-Mar-15: 1   Median :8000  
# 02-Feb-15: 1   Mean   :7989  
# 02-Jan-15: 1   3rd Qu.:8200  
# 03-Feb-15: 1   Max.   :8750  
# (Other)  :54                 
plot(as.Date(data$Periode, "%d-%b-%y"), 
    data$Close, 
    xlab = "Dates", 
    ylab = "Adjusted closing price", 
    type = "l", 
    col = "red", 
    main = "Adjusted Closing Price per Periode")

library(tseries, quietly = T)
adf.test(data$Close)
#
#        Augmented Dickey-Fuller Test
#
# data:  data$Close
# Dickey-Fuller = -2.4464, Lag order = 3, p-value = 0.3943
# alternative hypothesis: stationary
infy_ret <- 100 * diff(log(data$Close))
infy_ret
#  [1]  0.0000000  3.1548358  0.0000000 -1.2500163  0.0000000  0.6269613
#  [7]  3.6813973 -1.8237588 -1.8576386  0.0000000  1.2422520  1.8349139
# [13] -1.2195273  1.8237588  0.6006024 -1.8127385  1.2121361  2.9675768
# [19]  2.3122417 -0.5730675 -1.1560822 -3.5506688 -3.0583423 -2.5158560
# [25]  1.8928010 -1.8928010  1.2658397  2.4845999  0.0000000  0.6116227
# [31] -2.4692613 -3.1748698  2.5479085 -0.6309169  1.2578782  0.0000000
# [37]  1.8576386  0.6116227  1.8127385  0.5970167 -0.5970167 -3.6589447
# [43]  0.6191970 -4.4171218  1.9169916  0.6309169 -1.2658397  1.2658397
# [49] -2.5479085 -8.7598059  5.4808236 -1.9522666  1.9522666  0.6644543
# [55]  1.3158085  0.6514681 -4.6520016 -2.0619287  1.3793322
plot(as.Date(data$Periode[-1], "%d-%b-%y"), 
    infy_ret, 
    xlab= "Dates", 
    ylab= "Returns percentage(%)", 
    type='l', 
    col='blue', 
    main="Plot Daily returns")

acf(infy_ret, main = "Plot ACF")

pacf(infy_ret, main = "Plot Partial ACF")
summary(arma(infy_ret, order = c(1, 2)))
# 
# Call:
# arma(x = infy_ret, order = c(1, 2))
#
# Model:
# ARMA(1,2)
#
# Residuals:
#      Min       1Q   Median       3Q      Max 
# -6.25422 -1.72059 -0.02185  1.67380  4.34409 
#
# Coefficient(s):
#            Estimate  Std. Error  t value Pr(>|t|)    
# ar1        0.418415    0.044569    9.388  < 2e-16 ***
# ma1       -0.777719    0.004578 -169.874  < 2e-16 ***
# ma2       -0.466684    0.008243  -56.613  < 2e-16 ***
# intercept -0.140463    0.022158   -6.339 2.31e-10 ***
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Fit:
# sigma^2 estimated as 4.178,  Conditional Sum-of-Squares = 234.53,  AIC = 259.8

library(forecast, quietly = T)
# Train dataset
infy_ret_train <- infy_ret[1:(0.9 * length(infy_ret))]
# Test dataset
infy_ret_test <- infy_ret[(0.9 * length(infy_ret) + 1):length(infy_ret)] 
fit <- arima(infy_ret_train, order = c(1, 0, 2))
fit
#
# Call:
# arima(x = infy_ret_train, order = c(1, 0, 2))
#
# Coefficients:
#           ar1     ma1      ma2  intercept
#       -0.2875  0.1616  -0.0116    -0.0784
# s.e.   1.6228  1.6103   0.2905     0.2904
#
# sigma^2 estimated as 5.569:  log likelihood = -120.72,  aic = 251.44
arma.preds <- predict(fit, n.ahead = (length(infy_ret) - (0.9 * length(infy_ret))))$pred
arma.forecast <- forecast(fit, h = 25)
plot(arma.forecast, main = "Plot ARMA forecasts")
# RMSE values
accuracy(arma.preds, infy_ret_test)[2]  
# [1] 2.378915
data$day <- as.factor(weekdays(as.Date(data$Periode, "%d-%b-%y")))
days <- data$day[2:nrow(data)]
xreg1 <- model.matrix(~as.factor(days))[, 2:4]
xregres <- xreg1[-54:-59,]
colnames(xreg1) <- c("Jumat", "Sabtu", "Minggu")
fit2 <- arima(infy_ret_train, order = c(1, 0, 2), xreg = xregres)
# 
# Call:
# arima(x = infy_ret_train, order = c(1, 0, 2), xreg = xregres)
#
# Coefficients:
#          ar1      ma1     ma2  intercept   Jumat    Sabtu  Minggu
#       0.8255  -1.0150  0.0150    -0.0891  0.2374  -0.3093  0.1033
# s.e.  0.1202   0.1807  0.1688     0.3052  1.0624   0.9969  1.0416
#
# sigma^2 estimated as 5.143:  log likelihood = -119.51,  aic = 255.01
plot(forecast(fit2, h = 25, xreg = xregres), main = "Plot ARIMAX forecasts")
# Akurasi menggunakan ARIMAX
accuracy(fit1.preds$pred, infy_ret_test)[2]
# [1] 2.478665

Sekian dulu bagaimana melakukan Prediksi menggunakan ARIMAX, semoga ada manfaat buat kita semua !


Tidak ada komentar:

Posting Komentar