29,322
社区成员
原题请前往chegg查看
输入数据
data <- read.table('AUSRetail2021.csv',header = T,sep=",",stringsAsFactors = F,fileEncoding = "UTF-8")
str(data)
## 'data.frame': 349 obs. of 3 variables:
## $ DATE : chr "1/01/1992" "1/02/1992" "1/03/1992" "1/04/1992" ...
## $ Retail.Trade: num 7.46e+09 7.48e+09 7.60e+09 7.68e+09 7.67e+09 ...
## $ GDP : num 2.07e+11 2.07e+11 2.08e+11 2.08e+11 2.09e+11 ...
data$Month <- as.numeric(substr(data$DATE,3,4)) #create D
data$Quarter <- 4
data$Quarter[data$Month<4] <- 1
data$Quarter[data$Month>3 & data$Month<7] <- 2
data$Quarter[data$Month>6 & data$Month<10] <- 3
#create M
data$Month <- as.factor(data$Month) data$Quarter <- as.factor(data$Quarter) ##dummy vriables
dat <- model.matrix(~Month+Quarter, data) dd <- as.data.frame(dat)
分析参数,MSE,AIC和BIC
T <- c(1:range(nrow(dat)))
X1 <- as.matrix(cbind(dd$`(Intercept)`,T,dd$Quarter4))
X2 <- as.matrix(cbind(T,dd$Quarter2,dd$Quarter3,dd$Quarter4)) X3 <- as.matrix(cbind(dd$`(Intercept)`,T,dd$Month12))
X4 <- cbind(T,subset(dd, select=c(setdiff(colnames(dd),c(" (Intercept)","Quarter2","Quarter3","Quarter4"))) ))
X4 <- as.matrix(X4)
#cal paramater
cal_param_1b <- function(X){
Y <- data$Retail.Trade;N <- nrow(data)
param <- solve.default(t(X)%*%X)%*%t(X)%*%Y
return(param) }
print(list(S1=t(cal_param_1b(X1)),S2=t(cal_param_1b(X2)),S3=t(cal_param_1b(X3))
## $S1
## T
## [1,] 5704216929 65320291 13999323
##
## $S2
## T
## [1,] 79456946 3230306897 3211172840 3187754874
##
## $S3
## T
## [1,] 5707114772 65321477 4626345
##
## $S3
## T Month2 Month3 Month4 Month5 Month6 Month7
## [1,] 72110425 4531965655 4595955230 4396758598 4536544724 4570461885 4532120426
## Month8 Month9 Month10 Month11 Month12
## [1,] 4492234138 4488127161 4468933978 4549682173 4489730369
fitting_meassure <- function(X){
Y <- data$Retail.Trade;N <- nrow(data)
param <- solve.default(t(X)%*%X)%*%t(X)%*%Y yhat <- X%*%param
MSE <- mean((Y-yhat)**2) AIC = N*MSE + 5*2;
BIC = N*MSE + 5*log(N);
res <- c(MSE,AIC,BIC,yhat,param) return(res)
}
res1_b <- as.data.frame(rbind(fitting_meassure(X1)[1:3],fitting_meassure(X2) [1:3],fitting_meassure(X3)[1:3],fitting_meassure(X4)[1:3]))
colnames(res1_b) <- c("MSE","AIC","BIC") rownames(res1_b) <- c("S1","S2","S3","S4") res1_b
## MSE AIC BIC
## S1 4.457485e+17 1.555662e+20 1.555662e+20
## S2 5.226295e+18 1.823977e+21 1.823977e+21
## S3 4.457835e+17 1.555784e+20 1.555784e+20
## S4 2.712821e+18 9.467745e+20 9.467745e+20
一步和三步预测的MSFE
X1_c <- X1[1:63,];X2_c <- X2[1:63,];X3_c <- X3[1:63,];X4_c <- X4[1:63,]
forecast1_c <- function(X_,N){
Y <- data$Retail.Trade[1:N] X <- X_[1:N,]
param <- solve.default(t(X)%*%X)%*%t(X)%*%Y
yhat_1 <- X_[N+1,]%*%param
yhat_2 <- X_[N+3,]%*%param
MSFE_1 <- (data$Retail.Trade[N+1]-yhat_1)**2
MSFE_2 <- (data$Retail.Trade[N+3]-yhat_2)**2
return(c(MSFE_1,MSFE_2))
}
res1_c <-
as.data.frame(rbind(forecast1_c(X1,60),forecast1_c(X2,60),forecast1_c(X3,60),forecast1_c(X4,60)))
colnames(res1_c) <- c("MSFE1","MSFE2")
rownames(res1_c) <- c("S1","S2","S3","S4")
res1_c
## MSFE1 MSFE2
## S1 1.447209e+16 5.049427e+15
## S2 7.003849e+17 4.616751e+17
## S3 1.257163e+16 3.935529e+15
## S4 1.955711e+19 2.875718e+18
预测点图
library(ggplot2)
dd_11d <- as.data.frame(cbind(data$Retail.Trade,c(fitting_meassure(X1)[4:352])))
colnames(dd_11d) <- c("Retail.Trade","Retail.Trade_pre")
plot1_11 <- ggplot(data = dd_11d, aes(x = c(1:range(349)))) + geom_line(aes(y =
Retail.Trade, linetype = "y", colour = "y"), size = 0.8)
plot1_12 <- plot1_11 + geom_line(aes(y = Retail.Trade_pre ,linetype = "yhat",
colour = "yhat"), size = 0.8) +scale_linetype_manual(name = "", values = c("y" =
"solid", "yhat" = "twodash")) +
scale_colour_manual(name = "", values = c("y" = "red", "yhat" = "blue"))
plot1_12图一
dd_12d <- as.data.frame(cbind(data$Retail.Trade,c(fitting_meassure(X2)[4:352])))
colnames(dd_12d) <- c("Retail.Trade","Retail.Trade_pre")
plot1_21 <- ggplot(data = dd_12d, aes(x = c(1:range(349)))) + geom_line(aes(y =
Retail.Trade, linetype = "y", colour = "y"), size = 0.8)
plot1_22 <- plot1_21 + geom_line(aes(y = Retail.Trade_pre ,linetype = "yhat",
colour = "yhat"), size = 0.8) +scale_linetype_manual(name = "", values = c("y" =
"solid", "yhat" = "twodash")) +
scale_colour_manual(name = "", values = c("y" = "red", "yhat" = "blue"))
plot1_22图二
dd_13d <- as.data.frame(cbind(data$Retail.Trade,c(fitting_meassure(X3)[4:352])))
colnames(dd_13d) <- c("Retail.Trade","Retail.Trade_pre")
plot1_31 <- ggplot(data = dd_13d, aes(x = c(1:range(349)))) + geom_line(aes(y =
Retail.Trade, linetype = "y", colour = "y"), size = 0.8)
plot1_32 <- plot1_31 + geom_line(aes(y = Retail.Trade_pre ,linetype = "yhat",
colour = "yhat"), size = 0.8) +scale_linetype_manual(name = "", values = c("y" =
"solid", "yhat" = "twodash")) +
scale_colour_manual(name = "", values = c("y" = "red", "yhat" = "blue"))
plot1_32图三
dd_14d <- as.data.frame(cbind(data$Retail.Trade,c(fitting_meassure(X4)[4:352])))
colnames(dd_14d) <- c("Retail.Trade","Retail.Trade_pre")
plot1_41 <- ggplot(data = dd_14d, aes(x = c(1:range(349)))) + geom_line(aes(y =
Retail.Trade, linetype = "y", colour = "y"), size = 0.8)
plot1_42 <- plot1_41 + geom_line(aes(y = Retail.Trade_pre ,linetype = "yhat",
colour = "yhat"), size = 0.8) +scale_linetype_manual(name = "", values = c("y" =
"solid", "yhat" = "twodash")) +
scale_colour_manual(name = "", values = c("y" = "red", "yhat" = "blue"))
plot1_42图四
加入方差
cal_sigma <- function(X){
Y <- data$Retail.Trade;T <- dim(X)[1];K <- dim(X)[2]
param <- solve.default(t(X)%*%X)%*%t(X)%*%Y
yhat <- X%*%param
sigma_hat <- sum((Y-yhat)**2)/(T-K)
return(sigma_hat)
}
res2_a <-
as.data.frame(rbind(cal_sigma(X1),cal_sigma(X2),cal_sigma(X3),cal_sigma(X4)))
colnames(res2_a) <- c("sigma_hat")
rownames(res2_a) <- c("S1","S2","S3","S4")
res2_a
## sigma_hat
## S1 4.496133e+17
## S2 5.286890e+18
## S3 4.496487e+17
## S4 2.809420e+18
95概率预测
x1 <- rbind(c(1,350,0),c(1,351,0),c(1,352,0))
x2 <- rbind(c(350,0,0,0),c(351,0,0,0),c(352,1,0,0)) x3 <- rbind(c(1,350,0),c(1,351,0),c(1,352,0))
x4 <- rbind(c(350,1,rep(0,10)),c(351,0,1,rep(0,9)),c(352,0,0,1,rep(0,8)))
forecast_95 <- function(X,x){
Y <- data$Retail.Trade;N <- nrow(data)
param <- solve.default(t(X)%*%X)%*%t(X)%*%Y
yhat <- x[3, ]%*%param
sigma_ <- sqrt(cal_sigma(X))
yhat_left <- yhat-1.96*sigma_
yhat_right <- yhat+1.96*sigma_
return(c(yhat_left,yhat_right))
}
res2_b <-
as.data.frame(rbind(forecast_95(X1,x1),forecast_95(X2,x2),forecast_95(X3,x3),forecast_95(X4,x4)))
colnames(res2_b) <- c("yhat_left","yhat_left")
rownames(res2_b) <- c("S1","S2","S3","S4")
res2_b
## yhat_left yhat_left
## S1 27382716279 30011202188
## S2 26692477452 35705826593
## S3 27385979930 30014569109
## S4 26494408639 33064847865
八月零售超过31bn的概率
prob2_c <- function(X,x){
Y <- data$Retail.Trade;N <- nrow(data)
param <- solve.default(t(X)%*%X)%*%t(X)%*%Y
yhat <- x[3, ]%*%param
sigma_ <- sqrt(cal_sigma(X))
prob <- dnorm(3100000000,yhat,sigma_)
return(prob)
}
res2_c <-
as.data.frame(rbind(prob2_c(X1,x1),prob2_c(X2,x2),prob2_c(X3,x3),prob2_c(X4,x4)))
colnames(res2_c) <- c("prob")
rownames(res2_c) <- c("S1","S2","S3","S4")
res2_c
## prob
## S1 0.000000e+00
## S2 6.453455e-43
## S3 0.000000e+00
## S4 2.288576e-65
GDP作为参所得可行性
library(MASS)
h=12
N=nrow(data)
X1_ <- cbind(X1[h:N-1,],data$GDP[-c(1:range(h))])
X2_ <- cbind(X2[h:N-1,],data$GDP[-c(1:range(h))])
X3_ <- cbind(X3[h:N-1,],data$GDP[-c(1:range(h))])
X4_ <- cbind(X4[h:N-1,],data$GDP[-c(1:range(h))])
forecast3_c <- function(X_,N){
Y <- data$Retail.Trade[1:N]
X <- X_[1:N,]
param <- ginv(t(X)%*%X)%*%t(X)%*%Y #矩阵非奇异,这里用广义矩阵逆代替
yhat_1 <- X_[N+1,]%*%param
yhat_2 <- X_[N+3,]%*%param
MSFE_1 <- (data$Retail.Trade[N+1]-yhat_1)**2
MSFE_3 <- (data$Retail.Trade[N+3]-yhat_2)**2
return(c(MSFE_1,MSFE_3,param))
}
res3_c <- as.data.frame(rbind(forecast3_c(X1_,60)[1:2],forecast3_c(X2_,60)
[1:2],forecast3_c(X3_,60)[1:2],forecast3_c(X4_,60)[1:2]))
colnames(res3_c) <- c("MSFE1","MSFE3")
rownames(res3_c) <- c("S1","S2","S3","S4")
res3_c
## MSFE1 MSFE3
## S1 4.449682e+16 7.305452e+16
## S2 4.449682e+16 7.305452e+16
## S3 4.449682e+16 7.305452e+16
## S4 4.449682e+16 7.305452e+16
#S1 S2 S3 S4 T0_3=60
yhat3_1 <- X1_[-c(1:range(T0_3)),]%*%(as.matrix(forecast3_c(X1_,60)[-c(1,2)]))
dd_31 <- as.data.frame(cbind(data$Retail.Trade[-c(1:range(T0_3))],yhat3_1))
colnames(dd_31) <- c("Retail.Trade","Retail.Trade_pre")
plot3_11 <- ggplot(data = dd_31, aes(x = c(1:range(278)))) + geom_line(aes(y =
Retail.Trade, linetype = "y", colour = "y"), size = 0.8)
plot3_12 <- plot3_11 + geom_line(aes(y = Retail.Trade_pre ,linetype = "yhat",
colour = "yhat"), size = 0.8) +scale_linetype_manual(name = "", values = c("y" =
"solid", "yhat" = "twodash")) +
scale_colour_manual(name = "", values = c("y" = "red", "yhat" = "blue"))
plot3_12
霍尔特-温特模型预测
Y <- data$Retail.Trade
T0 = 50; h_4 = 4; s=4
syhat = rep(0,N-h_4);
ytph_4 <- Y[T0+h_4:N]
alpha = 0.4; beta = 0.4;gamma = 0.4
St = rep(0,N-h_4);
Lt = mean(Y[1:s]); bt = 0; St[1:s] = Y[1:s] - Lt;
for (t in s+1:range(N-h_4)){
newLt = alpha*(Y[t] - St[t-s]) + (1-alpha)*(Lt+bt)
newbt = beta*(newLt-Lt) + (1-beta)*bt;
St[t] = gamma*(Y[t]-newLt) + (1-gamma)*St[t-s]
yhat = newLt + h*newbt + St[t+h-s];
Lt = newLt; bt = newbt;
if(t>T0){
syhat[t-T0+1]=yhat
}
}
print(list(step_forecast_1=syhat[2],step_forecast_3=syhat[4]))
## $step_forecast_1
## [1] 9677218905 ##
## $step_forecast_3
## [1] 9650686572
MSFE_41 = (ytph_4[2]-syhat[2])**2
MSFE_43 = (ytph_4[4]-syhat[4])**2
print(list(MSFE_41 = MSFE_41,MSFE_43 = MSFE_43))
## $MSFE_41
## [1] 9.607706e+15 ##
## $MSFE_43
## [1] 4.075819e+16
dd_41 <- as.data.frame(cbind(ytph_4[2:260],syhat[2:260]))
colnames(dd_41) <- c("Retail.Trade","Retail.Trade_pre")
plot4_1 <- ggplot(data = dd_41, aes(x = c(1:range(259)))) + geom_line(aes(y =
Retail.Trade, linetype = "y", colour = "y"), size = 0.8)
plot4_2 <- plot4_1 + geom_line(aes(y = Retail.Trade_pre ,linetype = "yhat", colour
= "yhat"), size = 0.8) +scale_linetype_manual(name = "", values = c("y" =
"solid", "yhat" = "twodash")) +
scale_colour_manual(name = "", values = c("y" = "red", "yhat" = "blue"))
plot4_2
Y_4 <- ts(Y, frequency=12)
Y_holt <- HoltWinters(Y_4);Y_holt
## Holt-Winters exponential smoothing with trend and additive seasonal component.
##
## Call:
## HoltWinters(x = Y_4)
##
## Smoothing parameters:
## alpha: 0.2448791
## beta : 0.1425503
## gamma: 0.04990648
##
## Coefficients:
## [,1]
## a 30648654256
## b 235382240
## s1 1616510
## s2 87341440
## s3 -141080770
## s4 70079426
## s5 92060074
## s6 72079264
## s7 -30607380
## s8 -13097427
## s9 2858214
## s10 98400544
## s11 -14271954
## s12 6488123
print(list(MSFE_42=Y_holt$SSE))
## $MSFE_42
## [1] 3.912362e+19
library("forecast")
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
syhat_2 <- forecast(Y_holt, h=80)
plot(syhat_2)
使用其他模型
library(forecast)
fit <- auto.arima(Y);fit #to estimate the model
## Series: Y
## ARIMA(0,1,0) with drift
##
## Coefficients:
## drift
## 66305747
## s.e. 19328662
##
## sigma^2 estimated as 1.856e+17: log likelihood=-7411.95
## AIC=14827.91 AICc=14827.94 BIC=14835.61
fit <- Arima(Y,c(0,1,0),include.drift = T);fit
## Series: Y
## ARIMA(0,1,0) with drift
##
## Coefficients:
## drift
## 66305747
## s.e. 19328662
##
## sigma^2 estimated as 1.856e+17: log likelihood=-7411.95
## AIC=14827.91 AICc=14827.94 BIC=14835.61
tsdiag(fit)
fore <- forecast::forecast(fit,h=10);
MSE_6 <- mean(fit$residuals**2)
AIC_6 <- fit$aic
BIC_6 <- fit$bic
res6_1 <- as.data.frame(cbind(MSE_6,AIC_6,BIC_6))
colnames(res6_1) <- c("MSE","AIC","BIC")
rownames(res6_1) <- "ARIMA(0,1,0)"
res6_1
## MSE AIC BIC
## ARIMA(0,1,0) 1.845482e+17 14827.91 14835.61
plot(fore,lty=2)