SOCR ≫ TCIU Website ≫ TCIU GitHub ≫

1 TCIU Kime ARIMAX on ED Econ Data

1.1 Data Source Type

# TSplotly
# https://rdrr.io/cran/TSplotly/f/vignettes/TSplotly.Rmd
# https://github.com/QJoshua/TSplotly
# install.packages("C:/Users/Dinov/Desktop/TSplotly_1.1.0.tar.gz", repos = NULL, type = "source")

library(dplyr)
library(ggplot2)
library(plotly)
library(forecast)
library(ggplot2)
library(reshape2)
library(tidyr)
# library(TSplotly)
library(zoo)
load("MCSI.RData")
# dim(MCSI_Data)    
# [1] 279242    107

# extract features
# ICS                INDEX OF CONSUMER SENTIMENT
# ICC                INDEX OF CURRENT ECONOMIC CONDITIONS                
# GOVT               GOVERNMENT ECONOMIC POLICY
# DUR                DURABLES BUYING ATTITUDES 
# HOM                HOME BUYING ATTITUDES
# CAR                VEHICLE BUYING ATTITUDES
# INCOME             TOTAL HOUSEHOLD INCOME - CURRENT DOLLARS
# AGE                AGE OF RESPONDENT
# EDUC               EDUCATION OF RESPONDENT


DT::datatable(head(MCSI_Data,1000), fillContainer = TRUE)
MCSI_Data_short <- 
  MCSI_Data[ , which(names(MCSI_Data) %in% 
                       c("YYYYMM", "ICS", "ICC", "GOVT", "DUR",
                         "HOM", "CAR", "INCOME", "AGE", "EDUC"))]
# dim(MCSI_Data_short)
# [1] 279242      10
# Compute the monthly averages across all participants
MCSI_Data_monthAvg <- aggregate(.~YYYYMM, data = MCSI_Data_short, mean)
# dim(MCSI_Data_monthAvg)
# [1] 492  10
# colnames(MCSI_Data_monthAvg)

# convert YYYYMM to Date
MCSI_Data_monthAvg$YYYYMM <- 
  as.Date(as.character(
    as.POSIXct(paste0(as.character(MCSI_Data_monthAvg$YYYYMM),"01"), format = "%Y%m%d")))
# head(MCSI_Data_monthAvg$YYYYMM)
# View(MCSI_Data_monthAvg)

# plot all features on the same plot (across time/months)

MCSI_Data_monthAvg_melt <- 
  melt(MCSI_Data_monthAvg , id.vars = 'YYYYMM', variable.name = 'series')

1.2 Figure 1.9

First, plot on same grid, each series colored differently

# good if the series have same scale
# ggplot(MCSI_Data_monthAvg_melt, aes(YYYYMM, value), log10="y") + 
# geom_line(aes(colour = series)) 
# Exclude INCOME as it's too large!
ggplot(MCSI_Data_monthAvg_melt[MCSI_Data_monthAvg_melt$series!="INCOME", ], 
       aes(YYYYMM, value)) + 
    geom_line(aes(linetype=series, colour = series), size=2) +
    geom_point(aes(shape=series, colour = series), size=0.3) +
      # geom_line(aes(colour = series)) +
    geom_smooth(aes(colour = series), se = TRUE) +
    coord_trans(y="log10") +
    xlab("Time (monthly)") + ylab("Index Values (log-scale)") +
    # scale_x_date(labels = date_label("%m-%Y")) +
    scale_x_date(date_breaks = "12 month", date_labels =  "%m-%Y")  +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          text = element_text(size=20))+
    theme(legend.position="top")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Interactive plot here

updatemenus <- list(
  list(
    xanchor="left",
    yanchor="top",
    active = -1,
    type= 'buttons',
    buttons = list(
      list(
        label = "ALL",
        method = "update",
        args = list(list(visible = c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)),
                    list(title = "All Indexes"))),
      list(
        label = "ICS",
        method = "update",
        args = list(list(visible = c(FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE)),
                    list(title = "ICS"))),
      list(
        label = "ICC",
        method = "update",
        args = list(list(visible = c(FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE)),
                    list(title = "ICC"))),
      list(
        label = "GOVT",
        method = "update",
        args = list(list(visible = c(FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE)),
                    list(title = "GOVT"))),
      list(
        label = "DUR",
        method = "update",
        args = list(list(visible = c(FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE)),
                    list(title = "DUR"))),
      list(
        label = "HOM",
        method = "update",
        args = list(list(visible = c(FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE)),
                    list(title = "HOM"))),
      list(
        label = "CAR",
        method = "update",
        args = list(list(visible = c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE)),
                    list(title = "CAR"))),
      list(
        label = "AGE",
        method = "update",
        args = list(list(visible = c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE)),
                    list(title = "AGE"))),
      list(
        label = "EDUC",
        method = "update",
        args = list(list(visible = c(TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE)),
                    list(title = "EDUC")))
      )
  )
)

# https://github.com/SOCR/TCIU/blob/master/package/TSplotly/TSplotly/R/GGtoPY.R
GGtoPY <- function(dataframe,NAME=NULL,X=NULL,Y=NULL) {
  levelnames<-levels(dataframe[,which(colnames(dataframe)==NAME)])
  NAmatrix<-matrix(NA,nrow =nrow(dataframe[dataframe[,which(colnames(dataframe)==NAME)]==levelnames[1],]) ,ncol = length(levelnames))
  NAmatrix<-as.data.frame(NAmatrix)
  colnames(NAmatrix)<-levelnames
  rownames(NAmatrix)<-dataframe[dataframe[,which(colnames(dataframe)==NAME)]==levelnames[1],][,which(colnames(dataframe)==X)]
  for (i in 1:length(levelnames)) {
    NAmatrix[,i]<-dataframe[dataframe[,which(colnames(dataframe)==NAME)]==levelnames[i],][,which(colnames(dataframe)==Y)]
  }
  return(NAmatrix)
}

PYdf<-GGtoPY(MCSI_Data_monthAvg_melt[MCSI_Data_monthAvg_melt$series!="INCOME", ],NAME="series",X="YYYYMM",Y="value")
PYdf$INCOME<-NULL
PYdf<-log10(PYdf)
plot_ly(type="scatter",mode="lines")%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$ICS,name="ICS",line=list(color="powderblue"))%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$ICC,name="ICC",line=list(color="red"))%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$GOVT,name="GOVT",line=list(color="green"))%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$DUR,name="DUR",line=list(color="orange"))%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$HOM,name="HOM",line=list(color="purple"))%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$CAR,name="CAR",line=list(color="pink"))%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$AGE,name="AGE",line=list(color="brown"))%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$EDUC,name="EDUC",line=list(color="black"))%>%
  layout(title= list(text="Time series for 8 indexes",font=list(family = "Times New Roman",size = 16,color = "black" )),
           paper_bgcolor='rgb(255,255,255)', plot_bgcolor='rgb(229,229,229)',
           xaxis = list(title ="Time (monthly)",
                        gridcolor = 'rgb(255,255,255)',
                        showgrid = TRUE,
                        showline = FALSE,
                        showticklabels = TRUE,
                        tickcolor = 'rgb(127,127,127)',
                        ticks = 'outside',
                        zeroline = FALSE),
           yaxis = list(title = "Index Values (log10-scale)",
                        gridcolor = 'rgb(255,255,255)',
                        showgrid = TRUE,
                        showline = FALSE,
                        showticklabels = TRUE,
                        tickcolor = 'rgb(127,127,127)',
                        ticks = 'outside',
                        zeroline = FALSE),
         updatemenus=updatemenus)

Next, we want to take this example one step further and demonstrate the foundation of spacekime analytics using one oversimplified example that illustrates the tradeoffs between traditional spacetime inference and their spacekime analytic counterparts.

# colnames(MCSI_Data_monthAvg)

MCSI_Data_monthAvg_ts <- 
  ts(MCSI_Data_monthAvg, start=c(1978,1), end=c(2018, 12), frequency = 12)
# class(MCSI_Data_monthAvg_ts)
# dim(MCSI_Data_monthAvg_ts)
# [1] 492  10
# Time-series plots: 41 years * 12 months = 492 time points
plot.ts(MCSI_Data_monthAvg_ts)

plot(MCSI_Data_monthAvg_ts, plot.type="m")

# See MCSI docs: https://data.sca.isr.umich.edu/fetchdoc.php?docid=45121
# Outcome to Predict: ICS = INDEX OF CONSUMER SENTIMENT
Y <- MCSI_Data_monthAvg$ICS

# Covariates explaining the longitudinal outcome: 
# GOVT, DUR, HOM, CAR, INCOME, EDUC
X <- cbind(MCSI_Data_monthAvg$GOVT, MCSI_Data_monthAvg$DUR, 
           MCSI_Data_monthAvg$HOM, MCSI_Data_monthAvg$CAR, 
           MCSI_Data_monthAvg$INCOME, MCSI_Data_monthAvg$EDUC)

# Outcome Variable to be modeled, as a time series
MCSI_Data_monthAvg_ts_Y <- ts(Y, start=c(1978,1), end=c(2018, 12), frequency = 12)

pred_length <- 2 * 12 # 2-years of monthly forward forecasting (2017-2018)
start_period <- 468  # 39 years (2017-1978)*12

Fit model only on 39 years (1978-2016), and Evaluate on testing data (2017-2018)

Y_train <- MCSI_Data_monthAvg$ICS[1:(39*12)]
# length(Y_train)/12
Y_test <- MCSI_Data_monthAvg$ICS[((39*12)+1):length(MCSI_Data_monthAvg$ICS)] 
# length(Y_test)/12

# Training and Testing Data Covariates explaining the longitudinal outcome (Y): 
# GOVT, DUR, HOM, CAR, INCOME, EDUC
X_train <- 
  as.data.frame(cbind(MCSI_Data_monthAvg$GOVT, MCSI_Data_monthAvg$DUR, 
                      MCSI_Data_monthAvg$HOM, MCSI_Data_monthAvg$CAR, 
                      MCSI_Data_monthAvg$INCOME, MCSI_Data_monthAvg$EDUC))[1:(39*12), ]
# dim(X_train)
X_test <- 
  as.data.frame(cbind(MCSI_Data_monthAvg$GOVT, MCSI_Data_monthAvg$DUR, 
                      MCSI_Data_monthAvg$HOM, MCSI_Data_monthAvg$CAR, 
                      MCSI_Data_monthAvg$INCOME,
                      MCSI_Data_monthAvg$EDUC))[((39*12)+1):length(MCSI_Data_monthAvg$ICS), ]
# dim(X_test)

# Outcome Variable to be modeled, as a time series
MCSI_Data_monthAvg_ts_Y_train <- ts(Y_train, start=c(1978,1), end=c(2016, 12), frequency = 12)
MCSI_Data_monthAvg_ts_Y_test <- ts(Y_test, start=c(2017,1), end=c(2018, 12), frequency = 12)

# Find ARIMAX model
modArima_train <- auto.arima(MCSI_Data_monthAvg_ts_Y_train, xreg=as.matrix(X_train))

pred2years_ICS <- predict(modArima_train, n.ahead = pred_length, frequency=12, newxreg = X_test)

modArima_train
## Series: MCSI_Data_monthAvg_ts_Y_train 
## Regression with ARIMA(2,0,1)(1,0,1)[12] errors 
## 
## Coefficients:
##          ar1      ar2      ma1    sar1     sma1  intercept        V1        V2
##       1.4447  -0.4737  -0.6944  0.5781  -0.5307   152.4668  -11.4417  -10.9797
## s.e.  0.1915   0.1719   0.1667  0.4193   0.4340     8.6771    1.1624    1.0447
##            V3       V4     V5      V6
##       -2.8305  -0.7844  0e+00  1.6079
## s.e.   0.9301   1.0023  1e-04  1.8702
## 
## sigma^2 estimated as 8.564:  log likelihood=-1161.29
## AIC=2348.59   AICc=2349.39   BIC=2402.52

1.3 Figure 1.10

Plot ICS training Data, Testing ICS and Predicted ICS

plot(forecast(modArima_train, xreg = as.matrix(X_test) ), lwd=2, xlab="Time", 
     ylab="ICS = INDEX OF CONSUMER SENTIMENT (US)",
     main = "(2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data
      \n using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC")
lines(MCSI_Data_monthAvg_ts_Y_test, col = "red", lwd = 4, lty=1)  
legend(1990, 60, bty="n", legend=c("Training ICS Data (1978-2016)", 
                                   "ARIMAX-model ICS Forecasting (2017-2018)",
                                   "MCSI Official ICS (2017-2018)"),
       col=c("black", "blue", "red"), 
       lty=c(1,1,1), lwd=c(2,2,4), cex=1.2, x.intersp=0.5, y.intersp=0.7)

Interactive Plot here.

# https://github.com/SOCR/TCIU/blob/master/package/TSplotly/TSplotly/R/ADDline.R

ADDline <- function(ARIMAmodel=NULL,XREG=NULL,TS=NULL,linetype="TS",Name=NULL) {
  if(linetype=="ARIMA"){
    tsmodel<-forecast(ARIMAmodel, xreg = XREG)
    newline<-list(X=as.yearmon(time(tsmodel$x)),TEXT=as.character(as.yearmon(time(tsmodel$x))),Y=tsmodel$mean,NAME=Name)
  }
  else if(linetype=="TS"){
    newline<-list(X=as.yearmon(time(TS)),TEXT=as.character(as.yearmon(time(TS))),Y=as.numeric(TS),NAME=Name)
  }
  return(newline)
}

newline<-ADDline(TS = MCSI_Data_monthAvg_ts_Y_test,linetype = "TS",Name = "MCSI Official ICS (2017-2018)")

# https://github.com/SOCR/TCIU/blob/master/package/TSplotly/TSplotly/R/TSplot.R
TSplot<- function(origin_t,ARIMAmodel,XREG=NULL,NEWtitle="Result",Ylab="Value",Xlab="Time(Month/Year)"
                  ,ts_original="original time series",ts_forecast="forecasted time series",title_size=10) {
  tsmodel<-forecast(ARIMAmodel, xreg = XREG)
  if(origin_t=="all"){
    TIME=1
  }
  else{
    TIME=(length(tsmodel$x)-origin_t+1)
  }
  includetime<-c(tsmodel$x[TIME:length(tsmodel$x)],rep(NA,length(tsmodel$mean)))
  includetime2<-c(rep(NA,length(as.yearmon(time(tsmodel$x)[TIME:length(tsmodel$x)]))),tsmodel$mean)
  includetime3<-c(rep(NA,length(as.yearmon(time(tsmodel$x)[TIME:length(tsmodel$x)]))),tsmodel$lower[,1])
  includetime4<-c(rep(NA,length(as.yearmon(time(tsmodel$x)[TIME:length(tsmodel$x)]))),tsmodel$upper[,1])
  includetime5<-c(rep(NA,length(as.yearmon(time(tsmodel$x)[TIME:length(tsmodel$x)]))),tsmodel$lower[,2])
  includetime6<-c(rep(NA,length(as.yearmon(time(tsmodel$x)[TIME:length(tsmodel$x)]))),tsmodel$upper[,2])
  alltime<-c(as.yearmon(time(tsmodel$x)[TIME:length(tsmodel$x)]),as.yearmon(time(tsmodel$mean)))
  TSP<-plot_ly(type="scatter",mode="lines")%>%
    layout(title= list(text=NEWtitle,font=list(family = "Times New Roman",size = title_size,color = "black" )),
           paper_bgcolor='rgb(255,255,255)', plot_bgcolor='rgb(229,229,229)',
           xaxis = list(title = Xlab,
                        gridcolor = 'rgb(255,255,255)',
                        showgrid = TRUE,
                        showline = FALSE,
                        showticklabels = TRUE,
                        tickcolor = 'rgb(127,127,127)',
                        ticks = 'outside',
                        zeroline = FALSE),
           yaxis = list(title = Ylab,
                        gridcolor = 'rgb(255,255,255)',
                        showgrid = TRUE,
                        showline = FALSE,
                        showticklabels = TRUE,
                        tickcolor = 'rgb(127,127,127)',
                        ticks = 'outside',
                        zeroline = FALSE))%>%
    add_lines(x=alltime,text=as.character(alltime),y=includetime,name=ts_original,line=list(color="green"))%>%
    add_lines(x=alltime,text=as.character(alltime),y=includetime5,name="95% lower bound",line=list(color="powderblue"))%>%
    add_trace(x=alltime,text=as.character(alltime),y=includetime6,type="scatter",mode="lines",line=list(color="powderblue"),fill = 'tonexty',fillcolor="powderblue",name="95% upper bound")%>%
    add_lines(x=alltime,text=as.character(alltime),y=includetime3,name="80% lower bound",line=list(color="lightpink"))%>%
    add_trace(x=alltime,text=as.character(alltime),y=includetime4,type="scatter",mode="lines",line=list(color="lightpink"),fill = 'tonexty',fillcolor="lightpink",name="80% upper bound")%>%
    add_lines(x=alltime,text=as.character(alltime),y=includetime2,name=ts_forecast,line=list(color="red"))
  return(TSP)
}

TSplot("all",modArima_train,as.matrix(X_test), NEWtitle = "ZOOM: (2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC",Ylab ="ICS = INDEX OF CONSUMER SENTIMENT (US)",Xlab ="Time",title_size = 8,ts_original = "Training ICS Data (1978-2016)",ts_forecast = "ARIMAX-model ICS Forecasting (2017-2018)")%>%
  add_lines(x=newline$X,text=newline$TEXT,y=newline$Y,name=newline$NAME,line=list(color="gray"))

Zoom in on a smaller interval (4-years prior and 2-years forecasting)

plot(forecast(modArima_train, xreg = as.matrix(X_test) ), include=48, lwd=2, xlab="Time", 
     ylab="ICS = INDEX OF CONSUMER SENTIMENT (US)",
     main = "ZOOM: (2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data
      \n using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC")
lines(MCSI_Data_monthAvg_ts_Y_test, col = "red", lwd = 4, lty=1)  
legend("topleft", bty="n", legend=c("Training ICS Data (2013-2016)", 
                                   "ARIMAX-model ICS Forecasting (2017-2018)",
                                   "MCSI Official ICS (2017-2018)"),
       col=c("black", "blue", "red"), 
       lty=c(1,1,1), lwd=c(2,2,4), cex=1.2, x.intersp=1.5, y.intersp=0.7)
text(2014, 91, "Training Region (1978-2016)\n Model(ICS) -> ARIMAX(p,q,r)\n XReg={X_i}, 1<=i<=6", cex=1.5)
text(2018, 79, "Validation Region (2017-2018)\n hat(ICS) <- ARIMAX(2,0,1)\n XReg={X_i}, 1<=i<=6", cex=1.5)

Interactive plot here

# https://github.com/SOCR/TCIU/blob/master/package/TSplotly/TSplotly/R/ADDline.R
ADDline <- function(ARIMAmodel=NULL,XREG=NULL,TS=NULL,linetype="TS",Name=NULL) {
  if(linetype=="ARIMA"){
    tsmodel<-forecast(ARIMAmodel, xreg = XREG)
    newline<-list(X=as.yearmon(time(tsmodel$x)),TEXT=as.character(as.yearmon(time(tsmodel$x))),Y=tsmodel$mean,NAME=Name)
  }
  else if(linetype=="TS"){
    newline<-list(X=as.yearmon(time(TS)),TEXT=as.character(as.yearmon(time(TS))),Y=as.numeric(TS),NAME=Name)
  }
  return(newline)
}
newline<-ADDline(TS = MCSI_Data_monthAvg_ts_Y_test,linetype = "TS",Name = "MCSI Official ICS (2017-2018)")

TSplot(48,modArima_train,as.matrix(X_test),NEWtitle = "(2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC",Ylab ="ICS = INDEX OF CONSUMER SENTIMENT (US)",Xlab ="Time",title_size = 8,ts_original = "Training ICS Data (2013-2016)\nTraining Region (1978-2016)\n Model(ICS) -> ARIMAX(p,q,r)\n XReg={X_i}, 1<=i<=6",ts_forecast = "ARIMAX-model ICS Forecasting (2017-2018)\nValidation Region (2017-2018)\n hat(ICS) <- ARIMAX(2,0,1)\n XReg={X_i}, 1<=i<=6")%>%
  add_lines(x=newline$X,text=newline$TEXT,y=newline$Y,name=newline$NAME,line=list(color="gray"))

Transform all 6 predictors (X) and outcome (Y) series to k-space (Fourier domain) Transform entire Xreg matrix (1239 = 468 PLUS 122 = 24, a total of 492 time points: Training + Testing) But transform only the Training Y outcome (12*39 = 468 time points: Training)

# FT/Spacekime Analytics
# 1D timeseries FFT SHIFT
fftshift1D <- function(img_ff) {
  rows <- length(img_ff)   
  rows_half <- ceiling(rows/2)
  return(append(img_ff[(rows_half+1):rows], img_ff[1:rows_half]))
}

MCSI_data <- cbind(Y_train, X_train)
# dim(MCSI_data)
colnames(MCSI_data) <- c("Y_train", "GOVT", "DUR", "HOM", "CAR", "INCOME", "EDUC")
#head(MCSI_data)
colnames(X_test) <- c("GOVT", "DUR", "HOM", "CAR", "INCOME", "EDUC")
head(X_test)
##         GOVT      DUR      HOM      CAR    INCOME     EDUC
## 469 3.147368 1.843860 1.814035 2.150877 100026.35 4.514035
## 470 3.443463 1.906360 2.008834 2.259717 101614.96 4.514134
## 471 3.291740 1.970123 2.082601 2.168717 102438.36 4.434095
## 472 3.347594 1.764706 1.957219 2.062389  99627.75 4.525847
## 473 3.249129 1.864111 2.094077 2.466899 102524.11 4.505226
## 474 3.448763 1.998233 2.151943 2.388693 109156.26 4.505300
#   MCSI_data_numeric <- as.matrix(MCSI_data)  # remove column names for FT processing

FT_MCSI_data <- array(complex(), c(468, 7))
mag_FT_MCSI_data <- array(complex(), c(468, 7))
phase_FT_MCSI_data <- array(complex(), c(468, 7))
IFT_NilPhase_FT_MCSI_data <- cbind(Y_train, X_train)        # array( , c(468, 7))
IFT_ScramblePhase_FT_MCSI_data <- cbind(Y_train, X_train)   # array( , c(468, 7))
IFT_LoadedPhase_FT_MCSI_data <- cbind(Y_train, X_train)   # array( , c(468, 7))

for (i in 1:7) {
  FT_MCSI_data[ , i] <- fft(MCSI_data[ , i])
  X2 <- FT_MCSI_data[ , i]
  # plot(fftshift1D(log(Re(X2)+2)), main = "log(fftshift1D(Re(FFT(timeseries))))") 
  mag_FT_MCSI_data[ , i] <- sqrt(Re(X2)^2+Im(X2)^2); 
  # plot(log(fftshift1D(Re(mag_FT_MCSI_data))), main = "log(Magnitude(FFT(timeseries)))") 
  phase_FT_MCSI_data[ , i] <- atan2(Im(X2), Re(X2)); 
  # plot(Re(fftshift1D(phase_FT_MCSI_data[ , 1])), main = "Shift(Phase(FFT(timeseries)))")
}

######### Test the FT-IFT analysis-synthesis back-and-forth transform process to confirm calculations
#           X2 <- FT_MCSI_data[ , 1]; mag_FT_MCSI_data[ , 1] <- sqrt(Re(X2)^2+Im(X2)^2); 
#           phase_FT_MCSI_data[ , 1] <- atan2(Im(X2), Re(X2)); 
# Real2 = mag_FT_MCSI_data[ , 1] * cos(phase_FT_MCSI_data[ , 1])
# Imaginary2 = mag_FT_MCSI_data[ , 1] * sin(phase_FT_MCSI_data[ , 1])
# man_hat_X2 = Re(fft(Real2 + 1i*Imaginary2, inverse = T)/length(X2))
# ifelse(abs(man_hat_X2[5] - MCSI_data[5, 1]) < 0.001, "Perfect Syntesis", "Problems!!!")
#########

Examine the Kime-direction Distributions of the Phases for all 7 features (predictors + outcome)

df.phase_FT_MCSI_data <- as.data.frame(Re(phase_FT_MCSI_data))
df.phase_FT_MCSI_data %>% 
  gather() %>% 
  head()
##   key     value
## 1  V1  0.000000
## 2  V1 -2.845538
## 3  V1 -1.074318
## 4  V1  1.744811
## 5  V1  0.832915
## 6  V1  1.627984
colnames(df.phase_FT_MCSI_data) <- colnames(MCSI_data)
colnames(df.phase_FT_MCSI_data)[1] <- "ICS"
phaseDistributions <- gather(df.phase_FT_MCSI_data)
colnames(phaseDistributions) <- c("Feature", "Phase")

1.4 Figure 1.12

Map the value as our x variable, and use facet_wrap to separate by the key column

ggplot(phaseDistributions, aes(Phase)) + 
  # geom_histogram(bins = 10) + 
  geom_histogram(aes(y=..density..), bins = 10) + 
  facet_wrap( ~Feature, scales = 'free_x') +
  xlim(-pi, pi) + 
  theme(strip.text.x = element_text(size = 16, colour = "black", angle = 0))

  1. Perform the Space-kime transform separately for the X_test data
# dim(X_test) # [1] 24  6
# colnames(X_test)

FT_X_test <- array(complex(), c(24, 6))
mag_FT_X_test <- array(complex(), c(24, 6))
phase_FT_X_test <- array(complex(), c(24, 6))
IFT_NilPhase_FT_X_test <- X_test        # array( , c(24, 6))
IFT_ScramblePhase_FT_X_test <- X_test   # array( , c(24, 6)
IFT_LoadedPhase_FT_X_test <- X_test   # array( , c(24, 6)

for (i in 1:6) {
  FT_X_test[ , i] <- fft(X_test[ , i])
  X2 <- FT_X_test[ , i]
  mag_FT_X_test[ , i] <- sqrt(Re(X2)^2+Im(X2)^2); 
  phase_FT_X_test[ , i] <- atan2(Im(X2), Re(X2)); 
}

head(X_test)
##         GOVT      DUR      HOM      CAR    INCOME     EDUC
## 469 3.147368 1.843860 1.814035 2.150877 100026.35 4.514035
## 470 3.443463 1.906360 2.008834 2.259717 101614.96 4.514134
## 471 3.291740 1.970123 2.082601 2.168717 102438.36 4.434095
## 472 3.347594 1.764706 1.957219 2.062389  99627.75 4.525847
## 473 3.249129 1.864111 2.094077 2.466899 102524.11 4.505226
## 474 3.448763 1.998233 2.151943 2.388693 109156.26 4.505300
  1. Nil-Phase reconstruction, invert back to spacetime the mag_FT_MCSI_data[ , i] & mag_FT_X_test [, i] signals with nil-phase
for (i in 1:7) {
  Real <- mag_FT_MCSI_data[ , i] * cos(0)  
  Imaginary <- mag_FT_MCSI_data[ , i] * sin(0) 
  IFT_NilPhase_FT_MCSI_data[ ,i] <- 
    Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_MCSI_data[ , i]))
# display(ift_NilPhase_X2mag, method = "raster")
# dim(IFT_NilPhase_FT_MCSI_data); View(IFT_NilPhase_FT_MCSI_data); # compare to View(MCSI_data[ , ])
}
colnames(IFT_NilPhase_FT_MCSI_data) <- colnames(MCSI_data)
# dim(IFT_NilPhase_FT_MCSI_data)
# dim(FT_MCSI_data)

# colnames(IFT_NilPhase_FT_MCSI_data) 
head(IFT_NilPhase_FT_MCSI_data)
##    Y_train     GOVT      DUR      HOM      CAR   INCOME     EDUC
## 1 207.9947 7.096082 6.361494 7.448879 6.702678 225647.6 6.456717
## 2 158.7957 5.514798 4.119853 5.316651 4.422884 160111.0 5.162829
## 3 141.7572 5.090193 3.742753 4.849988 4.050338 143263.9 5.044055
## 4 133.9254 4.841114 3.482007 4.525654 3.756915 130886.9 4.913845
## 5 127.0829 4.662767 3.358568 4.331847 3.639068 125015.0 4.861882
## 6 122.8747 4.585099 3.284439 4.192362 3.508368 121664.0 4.790130
head(MCSI_data)
##    Y_train     GOVT      DUR      HOM      CAR   INCOME     EDUC
## 1 87.68719 3.287462 2.255352 2.577982 3.094801 15542.81 3.409786
## 2 90.29462 3.368831 2.645022 2.677922 3.205195 16283.55 3.466667
## 3 82.79319 3.586345 2.424364 2.534137 2.962517 16686.75 3.484605
## 4 84.97138 3.625538 2.400287 2.552367 2.988522 16291.25 3.487805
## 5 85.62600 3.525480 2.392648 2.615706 3.007519 16643.69 3.491228
## 6 79.01301 3.694864 2.348943 2.566465 2.956193 16019.64 3.469789
for (i in 1:6) {
  Real <- mag_FT_X_test[ , i] * cos(0)  
  Imaginary <- mag_FT_X_test[ , i] * sin(0) 
  IFT_NilPhase_FT_X_test[ ,i] <- 
    Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_X_test[ , i]))
}
head(IFT_NilPhase_FT_X_test)
##         GOVT      DUR      HOM      CAR   INCOME     EDUC
## 469 3.751431 2.303827 2.806630 3.152860 119636.8 4.666088
## 470 3.368501 1.872374 2.306036 2.500152 108714.2 4.502943
## 471 3.310008 1.858239 2.217383 2.506473 104066.2 4.498433
## 472 3.286447 1.911739 2.221120 2.385652 105367.7 4.532361
## 473 3.171287 1.801815 2.191204 2.324749 102780.5 4.510567
## 474 3.178573 1.829411 2.063818 2.243006 106010.0 4.490027
head(X_test)
##         GOVT      DUR      HOM      CAR    INCOME     EDUC
## 469 3.147368 1.843860 1.814035 2.150877 100026.35 4.514035
## 470 3.443463 1.906360 2.008834 2.259717 101614.96 4.514134
## 471 3.291740 1.970123 2.082601 2.168717 102438.36 4.434095
## 472 3.347594 1.764706 1.957219 2.062389  99627.75 4.525847
## 473 3.249129 1.864111 2.094077 2.466899 102524.11 4.505226
## 474 3.448763 1.998233 2.151943 2.388693 109156.26 4.505300
  1. Perform ARIMAX modeling on IFT_NilPhase_FT_MCSI_data; report (p,d,q) params and quality metrics AIC/BIC
IFT_NilPhase_FT_MCSI_data_Y_train <- IFT_NilPhase_FT_MCSI_data$Y_train[1:(39*12)]
# length(Y_train)/12
# Y_test <- IFT_NilPhase_FT_MCSI_data$Y_train[(39*12+1):length(IFT_NilPhase_FT_MCSI_data$Y_train)]; length(Y_test)/12

# Training and Testing Data Covariates explaining the longitudinal outcome (Y): 
# GOVT, DUR, HOM, CAR, INCOME, EDUC
IFT_NilPhase_FT_MCSI_data_X_train <- 
  as.data.frame(cbind(IFT_NilPhase_FT_MCSI_data$GOVT, IFT_NilPhase_FT_MCSI_data$DUR, 
                      IFT_NilPhase_FT_MCSI_data$HOM, IFT_NilPhase_FT_MCSI_data$CAR, 
                      IFT_NilPhase_FT_MCSI_data$INCOME, 
                      IFT_NilPhase_FT_MCSI_data$EDUC))[1:(39*12), ]
# dim(X_train)

colnames(IFT_NilPhase_FT_MCSI_data_X_train) <- colnames(IFT_NilPhase_FT_X_test)

# Outcome Variable to be modeled, as a timeseries
MCSI_Data_monthAvg_ts_Y_train_IFT_NilPhase <- 
         ts(IFT_NilPhase_FT_MCSI_data_Y_train, start=c(1978,1), end=c(2016, 12), frequency = 12)

# Find ARIMAX model
modArima_train_IFT_NilPhase <- 
  auto.arima(MCSI_Data_monthAvg_ts_Y_train_IFT_NilPhase,
             xreg=as.matrix(IFT_NilPhase_FT_MCSI_data_X_train))
modArima_train_IFT_NilPhase
## Series: MCSI_Data_monthAvg_ts_Y_train_IFT_NilPhase 
## Regression with ARIMA(1,0,0)(2,0,0)[12] errors 
## 
## Coefficients:
##          ar1    sar1    sar2  intercept     GOVT     DUR     HOM     CAR
##       0.8212  0.1398  0.1069    26.4905  10.4987  8.9880  1.8927  2.1449
## s.e.  0.0314  0.0480  0.0484     5.9479   1.3461  1.1386  1.1916  1.0819
##       INCOME     EDUC
##        2e-04  -3.4997
## s.e.   0e+00   1.8767
## 
## sigma^2 estimated as 3.371:  log likelihood=-944.25
## AIC=1910.49   AICc=1911.07   BIC=1956.13
# Regression with ARIMA(1,0,0)(1,0,0)[12] errors 
# Coefficients:
#  ar1    sar1  intercept       V1      V2      V3      V4     V5       V6
# 0.8194  0.1554    26.3831  10.6846  8.6527  2.3138  2.2177  2e-04  -3.5998
# s.e.  0.0316  0.0480     5.9317   1.3501  1.1259  1.1826  1.0817    NaN   1.8950
# sigma^2 estimated as 3.401:  log likelihood=-946.65, AIC=1913.31   AICc=1913.79   BIC=1954.79

pred_arimax_1_0_1 <- forecast(modArima_train_IFT_NilPhase, xreg = as.matrix(IFT_NilPhase_FT_X_test))
pred_arimax_1_0_1_2017_2018 <- 
  ts(pred_arimax_1_0_1$mean, frequency=12, start=c(2017,1), end=c(2018,12))

pred_arimax_1_0_1_2017_2018
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2017 109.90548  97.41496  94.62942  94.51787  91.35742  90.75062  90.29304
## 2018  87.43147  87.56057  88.13400  88.33882  86.86751  87.93807  88.93841
##            Aug       Sep       Oct       Nov       Dec
## 2017  88.47012  86.78642  88.22678  88.08146  87.78943
## 2018  88.53741  88.14400  91.15186  91.02073  93.30892
# alternatively:
# pred_arimax_1_0_1_2017_2018 <- predict(modArima_train_IFT_NilPhase, 
#                                              n.ahead = pred_length, newxreg = X_test)$pred
  1. Scrambled-Phase reconstruction Invert back to spacetime the mag_FT_MCSI_data[ , i] signal using scrambled-phases randomly shuffle the rows of the Phases-matrix (Training & Testing XReg Data)
shuffle_phase_FT_MCSI_data <- phase_FT_MCSI_data

for (i in 1:7) {
  set.seed(12345)   # sample randomly Phases for each of the 7 covariates (X & Y)
  shuffle_phase_FT_MCSI_data[ , i] <- phase_FT_MCSI_data[sample(nrow(phase_FT_MCSI_data)), i]
  Real <- mag_FT_MCSI_data[ , i] * cos(Re(shuffle_phase_FT_MCSI_data[ , i]))  
  Imaginary <- mag_FT_MCSI_data[ , i] * sin(Re(shuffle_phase_FT_MCSI_data[ , i])) 
  IFT_ScramblePhase_FT_MCSI_data[ ,i] <- 
    Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_MCSI_data[ , i]))
}
colnames(IFT_ScramblePhase_FT_MCSI_data) <- colnames(MCSI_data)
# dim(IFT_ScramblePhase_FT_MCSI_data)
# dim(FT_MCSI_data)
# colnames(IFT_ScramblePhase_FT_MCSI_data)
head(IFT_ScramblePhase_FT_MCSI_data)
##     Y_train      GOVT       DUR      HOM        CAR     INCOME      EDUC
## 1  6.482728 -1.125671 -2.159094 1.875901 -0.1693016 -108864.79 -1.702054
## 2 10.748641 -1.199417 -2.166547 1.851391 -0.3366837  -78883.03 -1.347813
## 3  9.101059 -1.046110 -2.111091 1.734381 -0.4663080  -72834.60 -1.407347
## 4  9.563311 -1.101155 -2.174029 1.669424 -0.3714120  -66248.91 -1.256556
## 5  7.531369 -1.145802 -2.239729 1.435862 -0.6186482  -64776.01 -1.270882
## 6  5.776107 -1.162289 -2.372581 1.495593 -0.5358616  -55892.88 -1.267362
# Perform similar scrambling of the phases separately for X_test (24 * 6) data
shuffle_phase_FT_X_test <- phase_FT_X_test
for (i in 1:6) {
  set.seed(12345)   # sample randomly Phases for each of the 6 predictors covariates (X)
  shuffle_phase_FT_X_test[ , i] <- phase_FT_MCSI_data[sample(nrow(phase_FT_X_test)), i]
  Real <- mag_FT_X_test[ , i] * cos(Re(shuffle_phase_FT_X_test[ , i]))  
  Imaginary <- mag_FT_X_test[ , i] * sin(Re(shuffle_phase_FT_X_test[ , i])) 
  IFT_ScramblePhase_FT_X_test[ ,i] <- 
    Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_X_test[ , i]))
}
# dim(IFT_ScramblePhase_FT_X_test)
head(IFT_ScramblePhase_FT_X_test)
##          GOVT      DUR         HOM       CAR   INCOME         EDUC
## 469 0.7025311 1.428040 -0.19540518 -1.182654 21305.67 -0.007572879
## 470 0.6618414 1.517814 -0.17378186 -1.509495 16567.94 -0.004889550
## 471 0.6737464 1.546981 -0.05771217 -1.273308 22255.07 -0.006445659
## 472 0.8437895 1.342235 -0.28304009 -1.491103 18828.74 -0.001261558
## 473 0.9608393 1.417579 -0.33714487 -1.543223 17529.16 -0.011443648
## 474 0.8444402 1.599271 -0.18953768 -1.160880 23382.07  0.006601303
  1. Perform ARIMAX modeling on IFT_ScramblePhase_FT_MCSI_data; report (p,d,q) params and quality metrics AIC/BIC
IFT_ScramblePhase_FT_MCSI_data_Y_train <- 
  IFT_ScramblePhase_FT_MCSI_data$Y_train[1:(39*12)]

# length(Y_train)/12

# Training and Testing Data Covariates explaining the longitudinal outcome (Y): 
# GOVT, DUR, HOM, CAR, INCOME, EDUC
IFT_ScramblePhase_FT_MCSI_data_X_train <- 
  as.data.frame(cbind(IFT_ScramblePhase_FT_MCSI_data$GOVT, IFT_ScramblePhase_FT_MCSI_data$DUR, 
                      IFT_ScramblePhase_FT_MCSI_data$HOM, IFT_ScramblePhase_FT_MCSI_data$CAR, 
                      IFT_ScramblePhase_FT_MCSI_data$INCOME,
                      IFT_ScramblePhase_FT_MCSI_data$EDUC))[1:(39*12), ]
# dim(IFT_ScramblePhase_FT_MCSI_data_X_train)

# Outcome Variable to be modeled, as a time series
MCSI_Data_monthAvg_ts_Y_train_IFT_ScramblePhase <- 
  ts(IFT_ScramblePhase_FT_MCSI_data_Y_train, start=c(1978,1), end=c(2016, 12), frequency = 12)
# unchanged: MCSI_Data_monthAvg_ts_Y_test <- ts(IFT_ScramblePhase_FT_MCSI_data_Y_train, start=c(2017,1), end=c(2018, 12), frequency = 12)

# Find ARIMAX model
modArima_train_IFT_ScramblePhase <- 
  auto.arima(MCSI_Data_monthAvg_ts_Y_train_IFT_ScramblePhase,
             xreg=as.matrix(IFT_ScramblePhase_FT_MCSI_data_X_train))

modArima_train_IFT_ScramblePhase
## Series: MCSI_Data_monthAvg_ts_Y_train_IFT_ScramblePhase 
## Regression with ARIMA(0,0,4)(2,0,1)[12] errors 
## 
## Coefficients:
##          ma1     ma2     ma3     ma4    sar1     sar2     sma1  intercept
##       1.0977  0.9818  0.7564  0.2771  1.2610  -0.4362  -0.7847     7.2614
## s.e.  0.0512  0.0650  0.0556  0.0460  0.1039   0.0571   0.1048     3.4569
##            V1       V2       V3       V4      V5      V6
##       -10.109  -5.2836  -3.2822  -0.9586  -1e-04  8.1702
## s.e.    1.274   1.1168   0.9937   1.0454   1e-04  1.8237
## 
## sigma^2 estimated as 8.633:  log likelihood=-1164.01
## AIC=2358.02   AICc=2359.08   BIC=2420.25
# Regression with ARIMA(1,0,2)(2,0,0)[12] errors 
# Coefficients:
#  ar1      ma1      ma2     sar1    sar2  intercept       V1       V2       V3       V4   V5      V6
#0.9570  -0.0508  -0.1066  -0.0249  0.0320     62.736  -7.4009  -6.2064  -2.1374  -3.2478    0  4.5433
#s.e.  0.0151   0.0511   0.0516   0.0483  0.0497        NaN   1.3351   1.0909   1.1224   1.1051  NaN  1.9499
# sigma^2 estimated as 6.45:  log likelihood=-1095.3 AIC=2216.6   AICc=2217.4   BIC=2270.53

pred_arimax_1_1_3 <- 
  forecast(modArima_train_IFT_ScramblePhase, 
           xreg =as.matrix(IFT_ScramblePhase_FT_X_test))
pred_arimax_1_1_3_2017_2018 <- 
  ts(pred_arimax_1_1_3$mean, frequency=12, start=c(2017,1), end=c(2018,12))
pred_arimax_1_1_3_2017_2018
##             Jan        Feb        Mar        Apr        May        Jun
## 2017 -16.207593 -14.040479 -13.627646  -9.911815 -10.131338 -12.720078
## 2018 -11.669356  -9.575028 -10.158516  -7.422994  -6.529859  -7.375194
##             Jul        Aug        Sep        Oct        Nov        Dec
## 2017 -13.694857 -13.923550 -14.044626 -13.325872 -11.560602 -12.577584
## 2018  -7.188819  -6.916454  -8.000034  -7.653727  -8.082183  -9.429002
#pred_arimax_1_1_3_2017_2018_pred <- predict(modArima_train_IFT_ScramblePhase, n.ahead = pred_length, newxreg = IFT_ScramblePhase_FT_X_test)$pred
  1. Loaded-Phase reconstruction: Phase = \(\pi\)/8 + Phase Invert back to spacetime the mag_FT_MCSI_data[ , i] signal using Loaded-phases define a pi-wrapper calculator to ensure phase-arithmetic stays within [-pi : pi)
pi_wrapper <- function (x) {  
  if(abs(x%%(2*pi)) <= pi) return(x%%(2*pi))
  else if (-2*pi <= x%%(2*pi) & x%%(2*pi) < -pi) return (2*pi + x%%(2*pi))
  else if (pi <= x%%(2*pi) & x%%(2*pi) < 2*pi) return (x%%(2*pi) - 2*pi)
  else return (0)
}

loaded_phase_FT_MCSI_data <- Re(phase_FT_MCSI_data)
for (i in 1:dim(phase_FT_MCSI_data)[1]) {
  for (j in 1:dim(phase_FT_MCSI_data)[2])
  loaded_phase_FT_MCSI_data[i,j] <- pi_wrapper(pi/8 + Re(phase_FT_MCSI_data[i,j]))
}
# head(loaded_phase_FT_MCSI_data)
# Compare Loaded Phases to Native Phases
    df.loadedPhase_FT_MCSI_data <- as.data.frame(loaded_phase_FT_MCSI_data)
    df.loadedPhase_FT_MCSI_data %>% gather() %>% head()
##   key      value
## 1  V1  0.3926991
## 2  V1 -2.4528385
## 3  V1 -0.6816193
## 4  V1  2.1375102
## 5  V1  1.2256141
## 6  V1  2.0206829
    colnames(df.loadedPhase_FT_MCSI_data) <- colnames(MCSI_data)
    colnames(df.loadedPhase_FT_MCSI_data)[1] <- "ICS"
    loadedPhaseDistributions <- gather(df.loadedPhase_FT_MCSI_data)
    colnames(loadedPhaseDistributions) <- c("Feature", "Phase")
    
    # map the value as our x variable, and use facet_wrap to separate by the key column:
    ggplot(loadedPhaseDistributions, aes(Phase)) + 
      geom_histogram(aes(y=..density..), bins = 10) + 
      facet_wrap( ~Feature, scales = 'free_x') +
      xlim(-pi, pi) + 
      theme(strip.text.x = element_text(size = 16, colour = "black", angle = 0))

for (i in 1:7) {
  Real <- mag_FT_MCSI_data[ , i] * cos(loaded_phase_FT_MCSI_data[ , i])  
  Imaginary <- mag_FT_MCSI_data[ , i] * sin(loaded_phase_FT_MCSI_data[ , i]) 
  IFT_LoadedPhase_FT_MCSI_data[ ,i] <- 
    Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_MCSI_data[ , i]))
}
colnames(IFT_LoadedPhase_FT_MCSI_data) <- colnames(MCSI_data)
# dim(IFT_LoadedPhase_FT_MCSI_data)
# dim(FT_MCSI_data)
head(IFT_LoadedPhase_FT_MCSI_data)
##    Y_train     GOVT      DUR      HOM      CAR   INCOME     EDUC
## 1 81.01240 3.037219 2.083673 2.381744 2.859224 14359.69 3.150231
## 2 83.42135 3.112394 2.443681 2.474077 2.961214 15044.04 3.202782
## 3 76.49093 3.313351 2.239820 2.341237 2.737009 15416.54 3.219355
## 4 78.50332 3.349560 2.217576 2.358080 2.761035 15051.15 3.222312
## 5 79.10811 3.257119 2.210519 2.416597 2.778585 15376.77 3.225474
## 6 72.99850 3.413609 2.170140 2.371105 2.731167 14800.22 3.205667
head(MCSI_data)
##    Y_train     GOVT      DUR      HOM      CAR   INCOME     EDUC
## 1 87.68719 3.287462 2.255352 2.577982 3.094801 15542.81 3.409786
## 2 90.29462 3.368831 2.645022 2.677922 3.205195 16283.55 3.466667
## 3 82.79319 3.586345 2.424364 2.534137 2.962517 16686.75 3.484605
## 4 84.97138 3.625538 2.400287 2.552367 2.988522 16291.25 3.487805
## 5 85.62600 3.525480 2.392648 2.615706 3.007519 16643.69 3.491228
## 6 79.01301 3.694864 2.348943 2.566465 2.956193 16019.64 3.469789
  1. Perform ARIMAX modeling on IFT_LoadedPhase_FT_MCSI_data; report (p,d,q) params and quality metrics AIC/BIC
IFT_LoadedPhase_FT_MCSI_data_Y_train <- IFT_LoadedPhase_FT_MCSI_data$Y_train[1:(39*12)]

# length(IFT_LoadedPhase_FT_MCSI_data_Y_train)/12
# Y_test <- IFT_NilPhase_FT_MCSI_data$Y_train[(39*12+1):length(IFT_NilPhase_FT_MCSI_data$Y_train)]; length(Y_test)/12

# Training and Testing Data Covariates explaining the longitudinal outcome (Y): 
# GOVT, DUR, HOM, CAR, INCOME, EDUC
IFT_LoadedPhase_FT_MCSI_data_X_train <- 
  as.data.frame(cbind(IFT_LoadedPhase_FT_MCSI_data$GOVT, IFT_LoadedPhase_FT_MCSI_data$DUR, 
                      IFT_LoadedPhase_FT_MCSI_data$HOM, IFT_LoadedPhase_FT_MCSI_data$CAR, 
                      IFT_LoadedPhase_FT_MCSI_data$INCOME,
                      IFT_LoadedPhase_FT_MCSI_data$EDUC))[1:(39*12),]
# dim(IFT_LoadedPhase_FT_MCSI_data_X_train)

# Outcome Variable to be modeled, as a timeseries
MCSI_Data_monthAvg_ts_Y_train_IFT_LoadedPhase <- 
  ts(IFT_LoadedPhase_FT_MCSI_data_Y_train, start=c(1978,1), end=c(2016, 12), frequency = 12)
# unchanged: MCSI_Data_monthAvg_ts_Y_test <- ts(IFT_LoadedPhase_FT_MCSI_data_Y_train, start=c(2017,1), end=c(2018, 12), frequency = 12)

# Find ARIMAX model
modArima_train_IFT_LoadedPhase <- 
  auto.arima(MCSI_Data_monthAvg_ts_Y_train_IFT_LoadedPhase,
             xreg=as.matrix(IFT_LoadedPhase_FT_MCSI_data_X_train))
pred_arimax_2_0_1_Loaded_2017_2018 <- 
  predict(modArima_train_IFT_LoadedPhase, n.ahead = pred_length, newxreg = X_test)$pred
modArima_train_IFT_LoadedPhase
## Series: MCSI_Data_monthAvg_ts_Y_train_IFT_LoadedPhase 
## Regression with ARIMA(2,0,1)(1,0,1)[12] errors 
## 
## Coefficients:
##          ar1      ar2      ma1    sar1     sma1  intercept        V1        V2
##       1.4447  -0.4737  -0.6944  0.5781  -0.5307   140.8610  -11.4417  -10.9797
## s.e.  0.1915   0.1719   0.1667  0.4193   0.4340     8.0166    1.1624    1.0447
##            V3       V4     V5      V6
##       -2.8305  -0.7844  0e+00  1.6079
## s.e.   0.9301   1.0023  1e-04  1.8702
## 
## sigma^2 estimated as 7.31:  log likelihood=-1124.24
## AIC=2274.48   AICc=2275.28   BIC=2328.41

1.5 Figure 1.13

  1. Final VIZ: Zoom in on a smaller interval
plot(forecast(modArima_train, xreg = as.matrix(X_test)),                             # Spacetime ARIMA forecast
     include=48, lwd=4, xlab="Time", ylab="ICS = INDEX OF CONSUMER SENTIMENT (US)", ylim=c(65,110),
     main = "Spacetime vs. Spacekime Analytics: (2017-2018) US ICS Forecasting\n
      based on fitting ARIMAX Models on raw & kime-transformed 1978-2016 data")
lines(MCSI_Data_monthAvg_ts_Y_test, col = "red", lwd = 4, lty=1)          # Observed Y_Test timeseries
lines(pred_arimax_1_0_1_2017_2018, col = "green", lwd = 4, lty=1)         # Nil Phase reconstr
lines(pred_arimax_1_1_3_2017_2018, col = "purple", lwd = 4, lty=1)        # Scramble Phase recon
lines(pred_arimax_2_0_1_Loaded_2017_2018, col = "brown", lwd = 4, lty=1)  # Loaded Phase recon
lines(26+pred_arimax_1_1_3_2017_2018, col = "orange", lwd = 4, lty=1)        # Offset Scramble Phase recon
legend("topleft", bty="n", legend=c("Training ICS Data (2013-2016)", 
                        "ARIMAX(2,0,1)-model ICS Forecasting (2017-2018)",
                        "MCSI-reported Official Index (2017-2018)",
                        "ARIMAX(1,0,0) Nil-Phase recon model ICS Forecasting (2017-2018)",
                        "ARIMAX(1,0,2) Scrambled-Phase recon model ICS Forecasting (2017-2018)",
                        "ARIMAX(2,0,1) Loaded-Phase recon model ICS Forecasting (2017-2018)",
                        "ARIMAX(1,0,2) Offset Scrambled-Phase recon model ICS Forecasting (2017-2018)"),
       col=c("black", "blue", "red", "green", "purple", "brown", "orange"), 
       lty=c(1,1,1,1,1,1,1), lwd=c(4,4,4,4,4,4,4), cex=1.5, x.intersp=1.5, y.intersp=0.4)
text(2015.4, 66, expression(atop(paste("Training Region (1978-2016)"), 
                paste(Model(ICS) %->% "ARIMAX(p, q, r) ;  ", 
                      XReg %==% X[i], " ", i %in% {1 : 6}))), cex=1.5)
text(2018, 66, expression(atop(paste("Validation Region (2017-2018)"), 
        paste(hat(ICS) %<-% "ARIMAX( . , . , . ) ;  ", 
              XReg %==% X[i], " ", i %in% {1 : 6}))), cex=1.5)

Interactive plot here

nl1<-ADDline(TS = MCSI_Data_monthAvg_ts_Y_test,linetype = "TS",Name = "MCSI-reported Official Index (2017-2018)")
nl2<-ADDline(TS = pred_arimax_1_1_3_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,0) Nil-Phase\nrecon model ICS Forecasting (2017-2018)")
nl3<-ADDline(TS = pred_arimax_1_1_3_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,2) Scrambled-Phase\nrecon model ICS Forecasting (2017-2018)")
nl4<-ADDline(TS = pred_arimax_2_0_1_Loaded_2017_2018,linetype = "TS",Name = "ARIMAX(2,0,1) Loaded-Phase\nrecon model ICS Forecasting (2017-2018)")
nl5<-ADDline(TS = 26+pred_arimax_1_1_3_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,2) Offset Scrambled-Phase\nrecon model ICS Forecasting (2017-2018)")

TSplot(48,modArima_train,as.matrix(X_test),NEWtitle = "(2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC",Ylab ="ICS = INDEX OF CONSUMER SENTIMENT (US)",Xlab ="Time",title_size = 8,ts_original = "Training ICS Data (2013-2016)\nTraining Region (1978-2016)\n Model(ICS) -> ARIMAX(p,q,r)\n XReg={X_i}, 1<=i<=6",ts_forecast = "ARIMAX-model ICS Forecasting (2017-2018)\nValidation Region (2017-2018)\n hat(ICS) <- ARIMAX(2,0,1)\n XReg={X_i}, 1<=i<=6")%>%
  add_lines(x=nl1$X,text=nl1$TEXT,y=nl1$Y,name=nl1$NAME,line=list(color="gray"))%>%
  add_lines(x=nl2$X,text=nl2$TEXT,y=nl2$Y,name=nl2$NAME,line=list(color="blue"))%>%
  add_lines(x=nl3$X,text=nl3$TEXT,y=nl3$Y,name=nl3$NAME,line=list(color="purple"))%>%
  add_lines(x=nl4$X,text=nl4$TEXT,y=nl4$Y,name=nl4$NAME,line=list(color="brown"))%>%
  add_lines(x=nl5$X,text=nl5$TEXT,y=nl5$Y,name=nl5$NAME,line=list(color="orange"))

Full 41-year plot

plot(forecast(modArima_train, xreg = as.matrix(X_test)),  # include=48,              # Spacetime ARIMA forecast
     lwd=4, xlab="Time", ylab="ICS = INDEX OF CONSUMER SENTIMENT (US)", ylim=c(55,125),
     main = "Spacetime vs. Spacekime Analytics: (2017-2018) US ICS Forecasting\n
     based on fitting ARIMAX Models on raw & kime-transformed 1978-2016 data")
lines(MCSI_Data_monthAvg_ts_Y_test, col = "red", lwd = 4, lty=1)          # Observed Y_Test timeseries
lines(pred_arimax_1_0_1_2017_2018, col = "green", lwd = 4, lty=1)         # Nil Phase reconstr
lines(pred_arimax_1_1_3_2017_2018, col = "purple", lwd = 4, lty=1)        # Scramble Phase recon
lines(pred_arimax_2_0_1_Loaded_2017_2018, col = "brown", lwd = 4, lty=1)  # Loaded Phase recon
lines(26+pred_arimax_1_1_3_2017_2018, col = "orange", lwd = 4, lty=1)        # Offset Scramble Phase recon
legend("topleft", bty="n", legend=c("Training ICS Data (2013-2016)", 
                                    "ARIMAX(2,0,1)-model ICS Forecasting (2017-2018)",
                                    "MCSI-reported Official Index (2017-2018)",
                                    "ARIMAX(1,0,0) Nil-Phase recon model ICS Forecasting (2017-2018)",
                                    "ARIMAX(1,0,2) Scrambled-Phase recon model ICS Forecasting (2017-2018)",
                                    "ARIMAX(2,0,1) Loaded-Phase recon model ICS Forecasting (2017-2018)",
                                    "ARIMAX(1,0,2) Offset Scrambled-Phase recon model ICS Forecasting (2017-2018)"),
       col=c("black", "blue", "red", "green", "purple", "brown", "orange"), 
       lty=c(1,1,1,1,1,1,1), lwd=c(4,4,4,4,4,4,4), cex=1.2, x.intersp=1.5, y.intersp=0.4)

Interactive plot here

nl1<-ADDline(TS = MCSI_Data_monthAvg_ts_Y_test,linetype = "TS",Name = "MCSI-reported Official Index (2017-2018)")
nl2<-ADDline(TS = pred_arimax_1_0_1_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,0) Nil-Phase\nrecon model ICS Forecasting (2017-2018)")
nl3<-ADDline(TS = pred_arimax_1_1_3_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,2) Scrambled-Phase\nrecon model ICS Forecasting (2017-2018)")
nl4<-ADDline(TS = pred_arimax_2_0_1_Loaded_2017_2018,linetype = "TS",Name = "ARIMAX(2,0,1) Loaded-Phase\nrecon model ICS Forecasting (2017-2018)")
nl5<-ADDline(TS = 26+pred_arimax_1_1_3_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,2) Offset Scrambled-Phase\nrecon model ICS Forecasting (2017-2018)")

TSplot("all",modArima_train,as.matrix(X_test), NEWtitle = "(2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC",Ylab ="ICS = INDEX OF CONSUMER SENTIMENT (US)",Xlab ="Time",title_size = 8,ts_original = "Training ICS Data (2013-2016)\nTraining Region (1978-2016)\n Model(ICS) -> ARIMAX(p,q,r)\n XReg={X_i}, 1<=i<=6",ts_forecast = "ARIMAX-model ICS Forecasting (2017-2018)\nValidation Region (2017-2018)\n hat(ICS) <- ARIMAX(2,0,1)\n XReg={X_i}, 1<=i<=6")%>%
  add_lines(x=nl1$X,text=nl1$TEXT,y=nl1$Y,name=nl1$NAME,line=list(color="gray"))%>%
  add_lines(x=nl2$X,text=nl2$TEXT,y=nl2$Y,name=nl2$NAME,line=list(color="blue"))%>%
  add_lines(x=nl3$X,text=nl3$TEXT,y=nl3$Y,name=nl3$NAME,line=list(color="purple"))%>%
  add_lines(x=nl4$X,text=nl4$TEXT,y=nl4$Y,name=nl4$NAME,line=list(color="brown"))%>%
  add_lines(x=nl5$X,text=nl5$TEXT,y=nl5$Y,name=nl5$NAME,line=list(color="orange"))
SOCR Resource Visitor number Web Analytics SOCR Email