SOCR ≫ | TCIU Website ≫ | TCIU GitHub ≫ |
# Get this figure: fig <- get_figure("MattSundquist", 4064)
# Get this figure's data: data <- get_figure("MattSundquist", 4064)$data
# Add data to this figure: p <- add_trace(p, x=c(4, 5), y=c(4, 5), kwargs=list(filename="Klein bottle", fileopt="extend"))
# Get y data of first trace: y1 <- get_figure("MattSundquist", 4064)$data[[1]]$y
library(plotly)
The function fftshift()
is useful for visualizing the Fourier transform with the zero-frequency component in the middle of the spectrum. Its inverse counterpart, ifftshift()
, is needed to rearrange again the indices appropriately after the IFT is employed, so that the image is correctly reconstructed in spacetime. The FT only computes half of the frequency spectrum corresponding to the non-negative (positive and zero if the length(f)
is odd) frequencies in order to save computation time. To preserve the dimensions of the output \(\hat{f}=FT(f)\), the second half of the frequency spectrum (the complex conjugate of the first half) is just added at the end of this vector. In a 1D setting, the result of fft()
is:
\(0\ 1\ 2\ 3\ ...\ (freq\ bins > 0)\ ... {\frac{Fs}{2}}\) and \(-{\frac{Fs}{2}}\ ... \ (freq\ bins < 0)\ ...\ -3\ -2\ -1.\)
where \(F_s\) is the frequency sample. The fftshift
method sets the zero-frequency component in the center of the array, i.e., it just shifts (offsets) the second part with the negative frequency bins to the beginning and the first part to the end of the resulting FT vector, or matrix. Thus, the shifted discrete FT can be nicely plotted in the center covering the frequency spectrum from \(-{\frac{Fs}{2}}\) on the left to \({\frac{Fs}{2}}\) on the right. This is not necessary, but is used for better visualization aesthetics. To synthesize back the correct image, after using fftshift
on the FT signal, we always have to undo that re-indexing by using ifftshift()
on the inverse-FT.
# FFT SHIFT
<- function(img_ff, dim = -1) {
fftshift
<- dim(img_ff)[1]
rows <- dim(img_ff)[2]
cols # planes <- dim(img_ff)[3]
<- function(img_ff) {
swap_up_down <- ceiling(rows/2)
rows_half return(rbind(img_ff[((rows_half+1):rows), (1:cols)], img_ff[(1:rows_half), (1:cols)]))
}
<- function(img_ff) {
swap_left_right <- ceiling(cols/2)
cols_half return(cbind(img_ff[1:rows, ((cols_half+1):cols)], img_ff[1:rows, 1:cols_half]))
}
#swap_side2side <- function(img_ff) {
# planes_half <- ceiling(planes/2)
# return(cbind(img_ff[1:rows, 1:cols, ((planes_half+1):planes)], img_ff[1:rows, 1:cols, 1:planes_half]))
#}
if (dim == -1) {
<- swap_up_down(img_ff)
img_ff return(swap_left_right(img_ff))
}else if (dim == 1) {
return(swap_up_down(img_ff))
}else if (dim == 2) {
return(swap_left_right(img_ff))
}else if (dim == 3) {
# Use the `abind` package to bind along any dimension a pair of multi-dimensional arrays
# install.packages("abind")
library(abind)
<- dim(img_ff)[3]
planes <- ceiling(rows/2)
rows_half <- ceiling(cols/2)
cols_half <- ceiling(planes/2)
planes_half
<- abind(img_ff[((rows_half+1):rows), (1:cols), (1:planes)],
img_ff 1:rows_half), (1:cols), (1:planes)], along=1)
img_ff[(<- abind(img_ff[1:rows, ((cols_half+1):cols), (1:planes)],
img_ff 1:rows, 1:cols_half, (1:planes)], along=2)
img_ff[<- abind(img_ff[1:rows, 1:cols, ((planes_half+1):planes)],
img_ff 1:rows, 1:cols, 1:planes_half], along=3)
img_ff[return(img_ff)
}else {
stop("Invalid dimension parameter")
}
}
<- function(img_ff, dim = -1) {
ifftshift
<- dim(img_ff)[1]
rows <- dim(img_ff)[2]
cols
<- function(img_ff) {
swap_up_down <- floor(rows/2)
rows_half return(rbind(img_ff[((rows_half+1):rows), (1:cols)], img_ff[(1:rows_half), (1:cols)]))
}
<- function(img_ff) {
swap_left_right <- floor(cols/2)
cols_half return(cbind(img_ff[1:rows, ((cols_half+1):cols)], img_ff[1:rows, 1:cols_half]))
}
if (dim == -1) {
<- swap_left_right(img_ff)
img_ff return(swap_up_down(img_ff))
}else if (dim == 1) {
return(swap_up_down(img_ff))
}else if (dim == 2) {
return(swap_left_right(img_ff))
}else if (dim == 3) {
# Use the `abind` package to bind along any dimension a pair of multi-dimensional arrays
# install.packages("abind")
library(abind)
<- dim(img_ff)[3]
planes <- floor(rows/2)
rows_half <- floor(cols/2)
cols_half <- floor(planes/2)
planes_half
<- abind(img_ff[1:rows, 1:cols, ((planes_half+1):planes)],
img_ff 1:rows, 1:cols, 1:planes_half], along=3)
img_ff[<- abind(img_ff[1:rows, ((cols_half+1):cols), (1:planes)],
img_ff 1:rows, 1:cols_half, (1:planes)], along=2)
img_ff[<- abind(img_ff[((rows_half+1):rows), (1:cols), (1:planes)],
img_ff 1:rows_half), (1:cols), (1:planes)], along=1)
img_ff[(return(img_ff)
}else {
stop("Invalid dimension parameter")
} }
We can use the UCI ML Air Quality Dataset to demonstrate the effect of kime-direction on the analysis of the longitudinal data. The Air Quality data consists of \(9358\) hourly-averaged responses from an array of 5 sensors embedded in an Air Quality Chemical Multisensor Device. These measurements were obtained in a significantly polluted area during a one year period (March 2004 to February 2005). The features include Concentrations for CO, Non Metanic Hydrocarbons, Benzene, Total Nitrogen Oxides (NOx), and Nitrogen Dioxide (NO2).
The attributes in the CSV file include:
<- read.csv("https://umich.instructure.com/files/8208336/download?download_frd=1")
aqi_data summary(aqi_data)
## Date Time CO.GT. PT08.S1.CO.
## Length:9471 Length:9471 Min. :-200.00 Min. :-200
## Class :character Class :character 1st Qu.: 0.60 1st Qu.: 921
## Mode :character Mode :character Median : 1.50 Median :1053
## Mean : -34.21 Mean :1049
## 3rd Qu.: 2.60 3rd Qu.:1221
## Max. : 11.90 Max. :2040
## NA's :114 NA's :114
## NMHC.GT. C6H6.GT. PT08.S2.NMHC. NOx.GT.
## Min. :-200.0 Min. :-200.000 Min. :-200.0 Min. :-200.0
## 1st Qu.:-200.0 1st Qu.: 4.000 1st Qu.: 711.0 1st Qu.: 50.0
## Median :-200.0 Median : 7.900 Median : 895.0 Median : 141.0
## Mean :-159.1 Mean : 1.866 Mean : 894.6 Mean : 168.6
## 3rd Qu.:-200.0 3rd Qu.: 13.600 3rd Qu.:1105.0 3rd Qu.: 284.0
## Max. :1189.0 Max. : 63.700 Max. :2214.0 Max. :1479.0
## NA's :114 NA's :114 NA's :114 NA's :114
## PT08.S3.NOx. NO2.GT. PT08.S4.NO2. PT08.S5.O3.
## Min. :-200 Min. :-200.00 Min. :-200 Min. :-200.0
## 1st Qu.: 637 1st Qu.: 53.00 1st Qu.:1185 1st Qu.: 700.0
## Median : 794 Median : 96.00 Median :1446 Median : 942.0
## Mean : 795 Mean : 58.15 Mean :1391 Mean : 975.1
## 3rd Qu.: 960 3rd Qu.: 133.00 3rd Qu.:1662 3rd Qu.:1255.0
## Max. :2683 Max. : 340.00 Max. :2775 Max. :2523.0
## NA's :114 NA's :114 NA's :114 NA's :114
## T RH AH X
## Min. :-200.000 Min. :-200.00 Min. :-200.0000 Mode:logical
## 1st Qu.: 10.900 1st Qu.: 34.10 1st Qu.: 0.6923 NA's:9471
## Median : 17.200 Median : 48.60 Median : 0.9768
## Mean : 9.778 Mean : 39.49 Mean : -6.8376
## 3rd Qu.: 24.100 3rd Qu.: 61.90 3rd Qu.: 1.2962
## Max. : 44.600 Max. : 88.70 Max. : 2.2310
## NA's :114 NA's :114 NA's :114
## X.1
## Mode:logical
## NA's:9471
##
##
##
##
##
<- ts(aqi_data, start=c(2004,3), freq=24) # hourly sampling rate
aqi_data.ts
<- as.POSIXct(paste(aqi_data$Date, aqi_data$Time), format="%m/%d/%Y %H:%M:%S")
dateTime
# set up training and testing time-periods
<- window(aqi_data.ts, end=c(2004,3))
alltrain.ts <- window(aqi_data.ts, start=c(2005,1))
allvalid.ts
# Estimate the ARIMAX model
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
<- auto.arima(aqi_data$CO.GT., xreg= as.matrix(aqi_data[ ,
fitArimaX c("PT08.S1.CO.", "NMHC.GT.", "C6H6.GT.", "PT08.S2.NMHC.",
"NOx.GT.", "PT08.S3.NOx.", "NO2.GT.", "PT08.S4.NO2.",
"PT08.S5.O3.", "T", "RH", "AH")]))
fitArimaX
## Series: aqi_data$CO.GT.
## Regression with ARIMA(1,0,1) errors
##
## Coefficients:
## ar1 ma1 intercept PT08.S1.CO. NMHC.GT. C6H6.GT.
## 0.9758 -0.5825 -40.4356 0.0056 0.0452 -2.9162
## s.e. 0.0025 0.0099 22.5593 0.0120 0.0071 0.5165
## PT08.S2.NMHC. NOx.GT. PT08.S3.NOx. NO2.GT. PT08.S4.NO2. PT08.S5.O3.
## 0.1017 0.0363 -0.0157 0.0091 -0.0196 -0.0091
## s.e. 0.0188 0.0057 0.0073 0.0098 0.0095 0.0044
## T RH AH
## -0.3853 -0.2147 3.4773
## s.e. 0.3318 0.1219 0.6934
##
## sigma^2 estimated as 1202: log likelihood=-46447.71
## AIC=92927.43 AICc=92927.49 BIC=93041.73
# Predict prospective CO concentration
<- 24*30 # 1 month forward forecasting
pred_length <- predict(fitArimaX, n.ahead = pred_length,
predArrivals newxreg = aqi_data[c((9471-pred_length+1):9471), c(4:15)])
#plot(predArrivals$pred, main="Forward time-series predictions (fitArimaX)")
plot(forecast(fitArimaX, xreg = as.matrix(aqi_data[c((9471-pred_length+1):9471), c(4:15)])))
## Warning in forecast.forecast_ARIMA(fitArimaX, xreg = as.matrix(aqi_data[c((9471
## - : Upper prediction intervals are not finite.
library(plotly)
library(magrittr)
# generate date sequence
<- forecast(fitArimaX, xreg = as.matrix(aqi_data[c((9471-pred_length+1):9471), c(4:15)])) arimaForecast
## Warning in forecast.forecast_ARIMA(fitArimaX, xreg = as.matrix(aqi_data[c((9471
## - : Upper prediction intervals are not finite.
str(arimaForecast )
## List of 10
## $ method : chr "Regression with ARIMA(1,0,1) errors"
## $ model :List of 19
## ..$ coef : Named num [1:15] 0.97577 -0.58251 -40.43563 0.00562 0.04517 ...
## .. ..- attr(*, "names")= chr [1:15] "ar1" "ma1" "intercept" "PT08.S1.CO." ...
## ..$ sigma2 : num 1202
## ..$ var.coef : num [1:15, 1:15] 6.22e-06 -9.86e-06 -3.02e-03 -1.36e-06 -1.69e-07 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:15] "ar1" "ma1" "intercept" "PT08.S1.CO." ...
## .. .. ..$ : chr [1:15] "ar1" "ma1" "intercept" "PT08.S1.CO." ...
## ..$ mask : logi [1:15] TRUE TRUE TRUE TRUE TRUE TRUE ...
## ..$ loglik : num -46448
## ..$ aic : num 92927
## ..$ arma : int [1:7] 1 1 0 0 1 0 0
## ..$ residuals: Time-Series [1:9471] from 1 to 9471: 11.79 5.48 4.3 3.47 8.06 ...
## ..$ call : language auto.arima(y = aqi_data$CO.GT., xreg = c(1360, 1292, 1402, 1376, 1272, 1197, 1185, 1136, 1094, 1010, 1011, 1066,| __truncated__ ...
## ..$ series : chr "aqi_data$CO.GT."
## ..$ code : int 1
## ..$ n.cond : num 0
## ..$ nobs : int 9357
## ..$ model :List of 10
## .. ..$ phi : num 0.976
## .. ..$ theta: num -0.583
## .. ..$ Delta: num(0)
## .. ..$ Z : num [1:2] 1 0
## .. ..$ a : num [1:2] 0.932 0
## .. ..$ P : num [1:2, 1:2] 4.218 -0.583 -0.583 0.339
## .. ..$ T : num [1:2, 1:2] 0.976 0 1 0
## .. ..$ V : num [1:2, 1:2] 1 -0.583 -0.583 0.339
## .. ..$ h : num 0
## .. ..$ Pn : num [1:2, 1:2] 4.218 -0.583 -0.583 0.339
## ..$ bic : num 93042
## ..$ aicc : num 92927
## ..$ xreg : num [1:9471, 1:12] 1360 1292 1402 1376 1272 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : NULL
## .. .. ..$ : chr [1:12] "PT08.S1.CO." "NMHC.GT." "C6H6.GT." "PT08.S2.NMHC." ...
## ..$ x : Time-Series [1:9471] from 1 to 9471: 2.6 2 2.2 2.2 1.6 1.2 1.2 1 0.9 0.6 ...
## ..$ fitted : Time-Series [1:9471] from 1 to 9471: -9.19 -3.48 -2.1 -1.27 -6.46 ...
## ..- attr(*, "class")= chr [1:3] "forecast_ARIMA" "ARIMA" "Arima"
## $ level : num [1:2] 80 95
## $ mean : Time-Series [1:720] from 9358 to 10077: -11.29 -6.94 -12.26 -13.45 -36.93 ...
## ..- attr(*, "names")= chr [1:720] "8752" "8753" "8754" "8755" ...
## $ lower : Time-Series [1:720, 1:2] from 9358 to 10077: -102.5 -98.2 -103.5 -104.7 -128.2 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:2] "80%" "95%"
## $ upper : Time-Series [1:720, 1:2] from 9358 to 10077: 80 84.3 79 77.8 54.3 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:2] "80%" "95%"
## $ x : Time-Series [1:9471] from 1 to 9471: 2.6 2 2.2 2.2 1.6 1.2 1.2 1 0.9 0.6 ...
## $ series : chr "aqi_data$CO.GT."
## $ fitted : Time-Series [1:9471] from 1 to 9471: -9.19 -3.48 -2.1 -1.27 -6.46 ...
## $ residuals: Time-Series [1:9471] from 1 to 9471: 11.79 5.48 4.3 3.47 8.06 ...
## - attr(*, "class")= chr "forecast"
plot_ly(x=~dateTime, y=~arimaForecast$fitted,
name="ARIMA Model", type = "scatter", mode='lines+markers')
# Figure 6.9 ================================================
# aqi_data[is.na(aqi_data)] <- 0
# y <- aqi_data$PT08.S1.CO.
# t <- 1:dim(aqi_data)[1]
# # range <- diff(range(y))
#
# # Compute the spectral decomposition (harmonics)
# ff_harmonicsPlotLy = function(x=NULL, n=NULL, up=10L, plot=TRUE, add=F, main=NULL, ...) {
# # The discrete Fourier transformation
# dff = fft(x)
# # time
# t = seq(from = 1, to = length(x))
#
# # Upsampled time
# nt = seq(from = 1, to = length(x)+1-1/up, by = 1/up)
#
# #New spectrum
# ndff = array(data = 0, dim = c(length(nt), 1L))
# ndff[1] = dff[1] # mean, DC component
# if(n != 0){
# ndff[2:(n+1)] = dff[2:(n+1)] # positive frequencies come first
# ndff[length(ndff):(length(ndff) - n + 1)] = dff[length(x):(length(x) - n + 1)] # negative frequencies
# }
#
# # Invert the FT
# indff = fft(ndff/length(y), inverse = TRUE)
# idff = fft(dff/length(y), inverse = TRUE)
# if(plot){
# plot(x = dateTime, y = x, pch = 16L, xlab = "Time", ylab = "Measurement",
# col = rgb(red = 0.5, green = 0.5, blue = 0.5, alpha = 0.5),
# main = ifelse(is.null(main), paste(n, "harmonics"), main))
# lines(y = Mod(idff), x = dateTime, col = adjustcolor(1L, alpha = 0.5))
#
# plot_ly(x=~dateTime, y=~x, name="ARIMA Model", type = "scatter", mode='lines+markers') %>%
# add_trace(x=~dateTime, y =~ Mod(idff),
# name="First 3 Harmonics Model",
# type = "scatter", mode='lines+markers') %>%
# layout(title = "Air Quality with First 3 Harmonics Model")
# }
# ret = data.frame(time = dateTime, y = Mod(indff))
# return(ret)
# }
#
# # Apply ff_harmonics to the timeseries as x, specifying the number of harmonics (n) and
# # the upsampling (so we plot points in time beside the original ones)
# result = ff_harmonicsPlotLy(x = y, n = 12L, up = 100L, col = 2L, lwd=3, cex=2)
We can first explore the time-course harmonics of the data.
is.na(aqi_data)] <- 0
aqi_data[<- aqi_data$PT08.S1.CO.
y <- 1:dim(aqi_data)[1]
t # range <- diff(range(y))
# Compute the spectral decomposition (harmonics)
= function(x=NULL, n=NULL, up=10L, plot=TRUE, add=F, main=NULL, ...) {
ff_harmonics # The discrete Fourier transformation
= fft(x)
dff # time
= seq(from = 1, to = length(x))
t
# Upsampled time
= seq(from = 1, to = length(x)+1-1/up, by = 1/up)
nt
#New spectrum
= array(data = 0, dim = c(length(nt), 1L))
ndff 1] = dff[1] # mean, DC component
ndff[if(n != 0){
2:(n+1)] = dff[2:(n+1)] # positive frequencies come first
ndff[length(ndff):(length(ndff) - n + 1)] = dff[length(x):(length(x) - n + 1)] # negative frequencies
ndff[
}
# Invert the FT
= fft(ndff/length(y), inverse = TRUE)
indff = fft(dff/length(y), inverse = TRUE)
idff if(plot){
if(!add){
plot(x = t, y = x, pch = 16L, xlab = "Time", ylab = "Measurement",
col = rgb(red = 0.5, green = 0.5, blue = 0.5, alpha = 0.5),
main = ifelse(is.null(main), paste(n, "harmonics"), main))
lines(y = Mod(idff), x = t, col = adjustcolor(1L, alpha = 0.5))
}lines(y = Mod(indff), x = nt, ...)
}= data.frame(time = nt, y = Mod(indff))
ret return(ret)
}
# Apply ff_harmonics to the timeseries as x, specifying the number of harmonics (n) and
# the upsampling (so we plot points in time beside the original ones)
= ff_harmonics(x = y, n = 12L, up = 100L, col = 2L, lwd=3, cex=2) result
# We can add the fourth-to-twelveth harmonics and look at their sum (as a series difference, 12-3)
= ff_harmonics(x = y, n = 12L, up = 10L, col = 2L, plot = FALSE)
add4to12_harmonics $y <- add4to12_harmonics$y - ff_harmonics(x = y, n = 3L, up = 10L, plot = T, col = 2L, lwd=3)$y add4to12_harmonics
plot(add4to12_harmonics, pch = 16L, xlab = "Time", ylab = "Measurement",
main = "Sum of all harmonics up to order 12", type = "l", col = 2)
# Harmonics plot of multiple frequencies (waves) in different colors
= rainbow(14, alpha = 0.6)
colors # ff_harmonics(x = y, n = 28L, up = 100L, col = colors[1], add=T)
for(i in 1:14){
= ifelse(i == 1, FALSE, TRUE)
ad ff_harmonics(x = y, n = i, up = 100L, col = colors[i], add = ad,
lwd= 3, main = "All waves up to 14th harmonic")
}
# install.packages("circular")
library("circular")
##
## Attaching package: 'circular'
## The following object is masked from 'package:plotly':
##
## wind
## The following objects are masked from 'package:stats':
##
## sd, var
set.seed(1234)
<- rvonmises(n=1000, mu=circular(pi/5), kappa=3)
x <- rvonmises(n=1000, mu=circular(-pi/3), kappa=5)
y <- rvonmises(n=1000, mu=circular(0), kappa=10)
z <- density(x, bw=25)
resx <- plot(resx, points.plot=TRUE, xlim=c(-1.5,1), ylim=c(-1.1, 1.5), offset=1.1, shrink=1.2, lwd=3)
res <- density(y, bw=25)
resy lines(resy, points.plot=TRUE, col=2, points.col=2, plot.info=res, offset=1.1, shrink=1.45, lwd=3)
<- density(z, bw=25)
resz lines(resz, points.plot=TRUE, col=3, points.col=3, plot.info=res, offset=1.1, shrink=1.2, lwd=3)
There is a key difference between spacekime data analytics and spacetime data modeling and inference. This contrast is based on the fact that in spacetime, statistical results are obtained by aggregating repeated (IID) dataset samples or measuring identical replicate cohorts under identical conditions. In spacekime, reliable inference can be made on a single sample, if the kime direction angles are known. Indeed the latter are generally no observable, however, they can be estimated, inferred, or approximated. As the FT and IFT are linear functionals, addition, averaging and multiplication by constants are preserved by the forward and inverse Fourier transforms. Therefore, if we have a number of phase estimates in k-space, we can aggregate these (e.g., by averaging them) and use the resulting assemblage phase to synthesize the data in spacekime. If the composite phases are indeed representative of the process kime orientation, then the reconstructed spacekime inference is expected to be valid even if we used a single sample. In a way, spacekime inference allows a dual representation of the central limit theorem, which guarantees the convergence of sample averages to their corresponding population mean counterparts.
In light of this analytics duality, we can now perform a traditional ARIMA modeling of CO
concentration (outcome: PT08.S1.CO.
) based on several covariates, e.g., predictors: NMHC.GT.
, C6H6.GT.
, PT08.S2.NMHC.
, NOx.GT.
, PT08.S3.NOx.
, NO2.GT.
, PT08.S4.NO2.
, PT08.S5.O3.
, T
, RH
, and AH
.
dim(aqi_data)
## [1] 9471 17
<- as.matrix(aqi_data[c(1:9000) , -c(1:3, 16:17)])
epochs_aqi_data is.matrix(epochs_aqi_data); dim(epochs_aqi_data)
## [1] TRUE
## [1] 9000 12
dim(epochs_aqi_data) <- c(9, 1000, 12)
dim(epochs_aqi_data); identical(epochs_aqi_data[9, 1000, 12], aqi_data[9000, 12+3])
## [1] 9 1000 12
## [1] TRUE
<- epochs_aqi_data[1, , ]; dim(epochs_aqi_data_1) epochs_aqi_data_1
## [1] 1000 12
# 1D timeseries FFT SHIFT
<- function(img_ff) {
fftshift1D <- length(img_ff)
rows <- ceiling(rows/2)
rows_half return(append(img_ff[(rows_half+1):rows], img_ff[1:rows_half]))
}
# 1. Transform all 9 signals to k-space (Fourier domain)
<- c(1:1000)
x1 <- array(complex(), c(9, 1000, 12))
FT_epochs_aqi_data <- array(complex(), c(9, 1000, 12))
mag_FT_epochs_aqi_data <- array(complex(), c(9, 1000, 12))
phase_FT_epochs_aqi_data for (i in 1:9) {
<- fft(epochs_aqi_data[i, , ])
FT_epochs_aqi_data[i, , ] <- FT_epochs_aqi_data[i, , ]
X2 # plot(fftshift1D(log(Re(X2)+2)), main = "log(fftshift1D(Re(FFT(timeseries))))")
<- sqrt(Re(X2)^2+Im(X2)^2);
mag_FT_epochs_aqi_data[i, , ] # plot(log(fftshift1D(Re(X2_mag))), main = "log(Magnitude(FFT(timeseries)))")
<- atan2(Im(X2), Re(X2));
phase_FT_epochs_aqi_data[i, , ] # plot(fftshift1D(X2_phase), main = "Shift(Phase(FFT(timeseries)))")
}
### Test the process to confirm calculations
# X2<-FT_epochs_aqi_data[1,,];X2_mag<-mag_FT_epochs_aqi_data[1,,];X2_phase<-phase_FT_epochs_aqi_data[1,,]
# Real2 = X2_mag * cos(X2_phase)
# Imaginary2 = X2_mag * sin(X2_phase)
# man_hat_X2 = Re(fft(Real2 + 1i*Imaginary2, inverse = T)/length(X2))
# ifelse(abs(man_hat_X2[5,10] - epochs_aqi_data[1, 5, 10]) < 0.001, "Perfect Syntesis", "Problems!!!")
#######
# 2. Invert back to spacetime the epochs_aqi_data_1 signal with nil phase
= mag_FT_epochs_aqi_data[1, , ] * cos(0) # cos(phase_FT_epochs_aqi_data[1, , ])
Real = mag_FT_epochs_aqi_data[1, , ] * sin(0) # sin(phase_FT_epochs_aqi_data[1, , ])
Imaginary = Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_epochs_aqi_data[1,,]))
ift_NilPhase_X2mag # display(ift_NilPhase_X2mag, method = "raster")
# dim(ift_NilPhase_X2mag); View(ift_NilPhase_X2mag); # compare to View(epochs_aqi_data[1, , ])
# 3. Perform ARIMAX modeling of ift_NilPhase_X2mag; report (p,d,q) params and quality metrics AIC/BIC
library(forecast)
<- auto.arima(ift_NilPhase_X2mag[ , 1], xreg= ift_NilPhase_X2mag[ , 2:12])
fitArimaX_nil fitArimaX_nil
## Series: ift_NilPhase_X2mag[, 1]
## Regression with ARIMA(2,0,1) errors
##
## Coefficients:
## ar1 ar2 ma1 intercept xreg1 xreg2 xreg3 xreg4
## 1.1141 -0.1457 -0.7892 503.3455 -0.4028 0.1366 -0.5146 1.0961
## s.e. 0.2064 0.1571 0.1821 73.4212 0.1087 0.1123 0.1072 0.1059
## xreg5 xreg6 xreg7 xreg8 xreg9 xreg10 xreg11
## 1.2195 1.3063 1.2087 1.1491 -0.4823 0.0315 -0.4640
## s.e. 0.1029 0.1564 0.1023 0.1101 0.1049 0.1125 0.1076
##
## sigma^2 estimated as 30448: log likelihood=-6573.68
## AIC=13179.36 AICc=13179.91 BIC=13257.88
# Regression with ARIMA(2,0,1) errors
#Coefficients:
# ar1 ar2 ma1 intercept xreg1 xreg2 xreg3 xreg4 xreg5 xreg6 xreg7 xreg8
# 1.1141 -0.1457 -0.7892 503.3455 -0.4028 0.1366 -0.5146 1.0961 1.2195 1.3063 1.2087 1.1491
#s.e. 0.2064 0.1571 0.1821 73.4212 0.1087 0.1123 0.1072 0.1059 0.1029 0.1564 0.1023 0.1101
# xreg9 xreg10 xreg11
# -0.4823 0.0315 -0.4640
#s.e. 0.1049 0.1125 0.1076
# sigma^2 estimated as 30448: log likelihood=-6573.68 AIC=13179.36 AICc=13179.91 BIC=13257.88
# Predict prospective CO concentration
<- 24*7 # 1 week forward forecasting
pred_length <- predict(fitArimaX_nil, n.ahead = pred_length, newxreg = ift_NilPhase_X2mag[800:1000, 2:12]) predArrivals
## Warning in cbind(intercept = rep(1, n.ahead), newxreg): number of rows of result
## is not a multiple of vector length (arg 1)
## Warning in z[[1L]] + xm: longer object length is not a multiple of shorter
## object length
#plot(predArrivals$pred, main="Forward time-series predictions (fitArimaX)")
plot(forecast(fitArimaX_nil, xreg = ift_NilPhase_X2mag[800:1000, 2:12]), main = "ARIMAX(2,0,1) Model Forecasting (1,001:1,200)", ylim = c(500, 6000))
lines(c(1001:1200), epochs_aqi_data[2, c(1:200), 1], col = "red", lwd = 2, lty=2)
# overlay original data to show Nil-Phase effects on reconstruction
legend("top", bty="n", legend=c("Prediction via Nil-Phase Reconstruction", "Real Timeseries (CO)"),
col=c("blue", "red"), lty=c(1,2), lwd=c(2,2), cex=0.9, x.intersp=0.5)
# 4. Compute the *average phase* across the eight series 2:9
<- apply(phase_FT_epochs_aqi_data, c(2,3), mean); dim(phase_Avg); phase_Avg[1:5 , 1:5] phase_Avg
## [1] 1000 12
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.0000000+0i 3.0047664+0i -1.038689+0i 0.6640319+0i 0.6095669+0i
## [2,] -1.4212860+0i -0.1016919+0i -2.648523+0i -0.5298284+0i 2.3392869+0i
## [3,] -1.4819520+0i 0.4516082+0i -2.289320+0i -0.9302969+0i 2.5178117+0i
## [4,] 0.1747996+0i -2.5427688+0i -1.433284+0i 1.6936688+0i 2.6054166+0i
## [5,] -2.0177463+0i -0.8160858+0i 3.018639+0i -1.0881887+0i 2.7825180+0i
# 5. Invert epochs_aqi_data_1 signal to spacetime using average-phase
= mag_FT_epochs_aqi_data[1, , ] * cos(phase_Avg)
Real = mag_FT_epochs_aqi_data[1, , ] * sin(phase_Avg)
Imaginary = Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_epochs_aqi_data[1,,]))
ift_AvgPhase_X2mag # display(ift_AvgPhase_X2mag, method = "raster")
# dim(ift_AvgPhase_X2mag); View(ift_AvgPhase_X2mag); # compare to View(epochs_aqi_data[1, , ])
# 6. Perform ARIMAX modeling on ift_AvgPhase_X2mag; report (p, d, q) parameters and quality metrics
<- auto.arima(ift_AvgPhase_X2mag[ , 1], xreg= ift_NilPhase_X2mag[ , 2:12])
fitArimaX_avg fitArimaX_avg
## Series: ift_AvgPhase_X2mag[, 1]
## Regression with ARIMA(2,0,3) errors
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 intercept xreg1 xreg2
## 0.3295 0.2384 0.2673 -0.0061 0.1573 742.8001 0.5838 0.2809
## s.e. 0.1354 0.1150 0.1363 0.0650 0.0451 97.1678 0.1700 0.1776
## xreg3 xreg4 xreg5 xreg6 xreg7 xreg8 xreg9 xreg10
## -0.6497 1.2399 -0.0261 1.0818 0.2540 0.3065 -0.4052 0.3511
## s.e. 0.1468 0.1695 0.1710 0.2394 0.1706 0.1665 0.1450 0.1791
## xreg11
## -0.4577
## s.e. 0.1709
##
## sigma^2 estimated as 82982: log likelihood=-7073.9
## AIC=14183.8 AICc=14184.5 BIC=14272.14
# ARIMA(2,0,3)
# Coefficients:
# ar1 ar2 ma1 ma2 ma3 intercept xreg1 xreg2 xreg3 xreg4 xreg5 xreg6
# 0.3295 0.2384 0.2673 -0.0061 0.1573 742.8001 0.5838 0.2809 -0.6497 1.2399 -0.0261 1.0818
# s.e. 0.1354 0.1150 0.1363 0.0650 0.0451 97.1676 0.1700 0.1776 0.1468 0.1695 0.1710 0.2394
# xreg7 xreg8 xreg9 xreg10 xreg11
# 0.2540 0.3065 -0.4052 0.3511 -0.4577
# s.e. 0.1706 0.1665 0.1450 0.1791 0.1709
# sigma^2 estimated as 82982: log likelihood=-7073.9 AIC=14183.8 AICc=14184.5 BIC=14272.14
# 7. Perform ARIMAX modeling on epochs_aqi_data[1,,]; report (p,d,q) parameters and quality metrics
<- auto.arima(epochs_aqi_data[1, , 1], xreg= epochs_aqi_data[1, , 2:12])
fitArimaX_orig fitArimaX_orig
## Series: epochs_aqi_data[1, , 1]
## Regression with ARIMA(0,1,4) errors
##
## Coefficients:
## ma1 ma2 ma3 ma4 xreg1 xreg2 xreg3 xreg4
## -0.6227 -0.0504 0.0067 -0.2011 0.0788 6.2927 0.0893 0.0180
## s.e. 0.0327 0.0366 0.0396 0.0307 0.0212 1.6588 0.0553 0.0152
## xreg5 xreg6 xreg7 xreg8 xreg9 xreg10 xreg11
## -0.0522 -0.0127 0.1809 0.1788 6.4129 1.7568 -11.9521
## s.e. 0.0206 0.0256 0.0273 0.0118 0.7105 0.2722 1.8927
##
## sigma^2 estimated as 2291: log likelihood=-5275.11
## AIC=10582.23 AICc=10582.78 BIC=10660.73
# Regression with ARIMA(1,1,4) errors
# Coefficients:
# ar1 ma1 ma2 ma3 ma4 xreg1 xreg2 xreg3 xreg4 xreg5 xreg6 xreg7
# 0.2765 -0.8891 0.1268 0.0304 -0.1766 0.0804 6.1495 0.0986 0.0163 -0.0482 -0.0110 0.1833
#s.e. 0.1294 0.1272 0.0933 0.0450 0.0384 0.0213 1.6611 0.0554 0.0152 0.0207 0.0257 0.0274
# xreg8 xreg9 xreg10 xreg11
# 0.1765 6.5374 1.7939 -12.0697
#s.e. 0.0118 0.7141 0.2724 1.8905
# sigma^2 estimated as 2287: log likelihood=-5273.65 AIC=10581.29 AICc=10581.92 BIC=10664.71
# 8. Compare the analytics results from #3, #6, and #7
# Generate a table with results
### correlations
<- format(cor(forecast(fitArimaX_orig, xreg = epochs_aqi_data[2, c(801:1000), 2:12])$mean,
cor_orig_obs 2, c(1:200), 1]), digits=3); cor_orig_obs epochs_aqi_data[
## [1] "-0.0114"
<- format(cor(forecast(fitArimaX_orig, xreg = epochs_aqi_data[2, c(801:1000), 2:12])$mean,
cor_orig_nil forecast(fitArimaX_nil, xreg = ift_NilPhase_X2mag[801:1000, 2:12])$mean), digits=3); cor_orig_nil
## [1] "0.077"
<- format(cor(forecast(fitArimaX_orig, xreg = epochs_aqi_data[2, c(801:1000), 2:12])$mean,
cor_orig_avg forecast(fitArimaX_avg, xreg = ift_AvgPhase_X2mag[801:1000, 2:12])$mean), digits=3); cor_orig_avg
## [1] "0.309"
### plots
plot(forecast(fitArimaX_orig, xreg = epochs_aqi_data[1, 800:1000, 2:12]),
main = sprintf("ARIMAX Model Forecasting (1,001:1,200): Corr(TrueObs,Orig)=%s", cor_orig_obs),
xlim=c(800, 1200), ylim = c(500, 2000), col="black", lwd = 1, lty=1)
#lines(c(1001:1200), forecast(fitArimaX_orig, xreg = epochs_aqi_data[2, c(801:1000), 2:12])$mean,
# col = "black", lwd = 1, lty=1) # Original=True Phase reconstruction
lines(c(1001:1200), forecast(fitArimaX_nil, xreg = ift_NilPhase_X2mag[801:1000, 2:12])$mean,
col = "purple", lwd = 1, lty=1)
lines(c(1001:1200), forecast(fitArimaX_avg, xreg = ift_AvgPhase_X2mag[801:1000, 2:12])$mean,
col = "red", lwd = 1, lty=1)
lines(c(1001:1200), epochs_aqi_data[2, c(1:200), 1], col = "green", lwd = 1, lty=1)
legend("topleft", bty="n", legend=c(
sprintf("Original (Correct Phases): Corr(Orig, TrueObs)=%s", cor_orig_obs),
sprintf("Prediction via Nil-Phase Reconstruction: Corr(Orig, Nil)=%s", cor_orig_nil),
sprintf("Prediction via Average-Phase Reconstruction: Corr(Orig, Avg)=%s", cor_orig_avg),
sprintf("Real CO Timeseries (Epoch 2)"),
"Orig (True Phase) ARIMA(1,1,4) Model Forecast"),
col=c("black", "purple", "red", "green", "blue"), lty=c(1,1,1,1), lwd=c(2,2,2,2), cex=0.9,
x.intersp=1, text.width=c(0.085,0.235,0.35, 0.3), xjust=0, yjust=0)
## Warning in w0 * rep.int(0:(ncol - 1), rep.int(n.legpercol, ncol)): longer object
## length is not a multiple of shorter object length
#### Zoom in
plot(forecast(fitArimaX_orig, xreg = epochs_aqi_data[1, 800:1000, 2:12]),
main = sprintf("ARIMAX Model Forecasting (1,001:1,200): Corr(TrueObs,Orig)=%s", cor_orig_obs),
xlim=c(950, 1050), ylim = c(500, 2000), col="black", lwd = 1, lty=1)
#lines(c(1001:1200), forecast(fitArimaX_orig, xreg = epochs_aqi_data[2, c(801:1000), 2:12])$mean,
# col = "black", lwd = 1, lty=1) # Original=True Phase reconstruction
lines(c(1001:1200), forecast(fitArimaX_nil, xreg = ift_NilPhase_X2mag[801:1000, 2:12])$mean,
col = "purple", lwd = 1, lty=1)
lines(c(1001:1200), forecast(fitArimaX_avg, xreg = ift_AvgPhase_X2mag[801:1000, 2:12])$mean,
col = "red", lwd = 1, lty=1)
lines(c(1001:1200), epochs_aqi_data[2, c(1:200), 1], col = "green", lwd = 1, lty=1)
legend("topleft", bty="n", legend=c(
sprintf("Original (Correct Phases): Corr(Orig, TrueObs)=%s", cor_orig_obs),
sprintf("Prediction via Nil-Phase Reconstruction: Corr(Orig, Nil)=%s", cor_orig_nil),
sprintf("Prediction via Average-Phase Reconstruction: Corr(Orig, Avg)=%s", cor_orig_avg),
sprintf("Real CO Timeseries (Epoch 2)"),
"Orig (True Phase) ARIMA(1,1,4) Model Forecast"),
col=c("black", "purple", "red", "green", "blue"), lty=c(1,1,1,1), lwd=c(2,2,2,2), cex=0.9,
x.intersp=1, text.width=c(0.085,0.235,0.35, 0.3), xjust=0, yjust=0)
## Warning in w0 * rep.int(0:(ncol - 1), rep.int(n.legpercol, ncol)): longer object
## length is not a multiple of shorter object length
An alternative data analytic approach involves using the Fourier transform applied to the complete 2D data-matrix (rows=time, columns=features), inverting it back in spacetime, and investigating the effect of the timeseries analysis with and without using the correct kime-directions (phases). Knowing the kime-directions is expected to produce better analytical results (e.g., lower bias and lower dispersion).
dim(aqi_data)
## [1] 9471 17
# 1. Transform the 2D matrix to k-space (Fourier domain)
<- aqi_data[ , 3:15] # remove string columns
aqi_data1 <- as.matrix(aqi_data1[complete.cases(aqi_data1), ])
aqi_data_complete dim(aqi_data_complete) # ; display(aqi_data_complete, method = "raster")
## [1] 9471 13
<- fft(aqi_data_complete)
FT_aqi_data <- FT_aqi_data # display(FT_aqi_data, method = "raster")
X2 <- sqrt(Re(X2)^2+Im(X2)^2)
mag_FT_aqi_data # plot(log(fftshift1D(Re(X2_mag))), main = "log(Magnitude(FFT(timeseries)))")
<- atan2(Im(X2), Re(X2))
phase_FT_aqi_data
### Test the process to confirm calculations
# X2<-FT_aqi_data; X2_mag <- mag_FT_aqi_data; X2_phase<-phase_FT_aqi_data
# Real2 = X2_mag * cos(X2_phase)
# Imaginary2 = X2_mag * sin(X2_phase)
# man_hat_X2 = Re(fft(Real2 + 1i*Imaginary2, inverse = T)/length(X2))
# ifelse(abs(man_hat_X2[5,10] - aqi_data1[5, 10]) < 0.001, "Perfect Syntesis", "Problems!!!")
#######
# 2. Invert back to spacetime the epochs_aqi_data_1 signal with nil phase
= mag_FT_aqi_data * cos(0) # cos(phase_FT_aqi_data)
Real = mag_FT_aqi_data * sin(0) # sin(phase_FT_aqi_data)
Imaginary = Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_aqi_data))
ift_NilPhase_X2mag # display(ift_NilPhase_X2mag, method = "raster")
# dim(ift_NilPhase_X2mag); View(ift_NilPhase_X2mag); # compare to View(aqi_data1)
summary(aqi_data_complete); summary(ift_NilPhase_X2mag, method = "raster")
## CO.GT. PT08.S1.CO. NMHC.GT. C6H6.GT.
## Min. :-200.0 Min. :-200 Min. :-200.0 Min. :-200.000
## 1st Qu.: 0.6 1st Qu.: 915 1st Qu.:-200.0 1st Qu.: 3.900
## Median : 1.5 Median :1050 Median :-200.0 Median : 7.800
## Mean : -33.8 Mean :1036 Mean :-157.2 Mean : 1.843
## 3rd Qu.: 2.6 3rd Qu.:1218 3rd Qu.:-200.0 3rd Qu.: 13.500
## Max. : 11.9 Max. :2040 Max. :1189.0 Max. : 63.700
## PT08.S2.NMHC. NOx.GT. PT08.S3.NOx. NO2.GT.
## Min. :-200.0 Min. :-200.0 Min. :-200.0 Min. :-200.00
## 1st Qu.: 704.0 1st Qu.: 47.0 1st Qu.: 631.0 1st Qu.: 51.00
## Median : 890.0 Median : 139.0 Median : 791.0 Median : 95.00
## Mean : 883.8 Mean : 166.6 Mean : 785.4 Mean : 57.45
## 3rd Qu.:1102.0 3rd Qu.: 281.5 3rd Qu.: 957.0 3rd Qu.: 132.00
## Max. :2214.0 Max. :1479.0 Max. :2683.0 Max. : 340.00
## PT08.S4.NO2. PT08.S5.O3. T RH
## Min. :-200 Min. :-200.0 Min. :-200.000 Min. :-200.00
## 1st Qu.:1171 1st Qu.: 689.0 1st Qu.: 10.700 1st Qu.: 33.50
## Median :1440 Median : 936.0 Median : 17.000 Median : 48.10
## Mean :1375 Mean : 963.3 Mean : 9.661 Mean : 39.01
## 3rd Qu.:1658 3rd Qu.:1250.0 3rd Qu.: 24.000 3rd Qu.: 61.70
## Max. :2775 Max. :2523.0 Max. : 44.600 Max. : 88.70
## AH
## Min. :-200.0000
## 1st Qu.: 0.6768
## Median : 0.9711
## Mean : -6.7553
## 3rd Qu.: 1.2915
## Max. : 2.2310
## CO.GT. PT08.S1.CO. NMHC.GT. C6H6.GT.
## Min. : 1143 Min. :-185.1 Min. :-392.3 Min. : -65.27
## 1st Qu.: 1789 1st Qu.: 138.1 1st Qu.: 188.4 1st Qu.: 228.36
## Median : 1982 Median : 213.8 Median : 254.5 Median : 298.23
## Mean : 2100 Mean : 218.9 Mean : 257.7 Mean : 304.04
## 3rd Qu.: 2223 3rd Qu.: 291.7 3rd Qu.: 324.4 3rd Qu.: 373.90
## Max. :41122 Max. :3787.7 Max. :1323.0 Max. :2876.22
## PT08.S2.NMHC. NOx.GT. PT08.S3.NOx. NO2.GT.
## Min. : -93.41 Min. : 32.15 Min. :-183.04 Min. :-183.04
## 1st Qu.: 97.44 1st Qu.: 392.22 1st Qu.: 22.36 1st Qu.: 22.36
## Median : 152.62 Median : 458.39 Median : 78.28 Median : 78.28
## Mean : 167.31 Mean : 476.19 Mean : 85.98 Mean : 85.98
## 3rd Qu.: 217.35 3rd Qu.: 533.20 3rd Qu.: 139.73 3rd Qu.: 139.73
## Max. :4611.64 Max. :7979.90 Max. :3896.77 Max. :3896.77
## PT08.S4.NO2. PT08.S5.O3. T RH
## Min. : 32.15 Min. : -93.41 Min. : -65.27 Min. :-392.3
## 1st Qu.: 392.22 1st Qu.: 97.44 1st Qu.: 228.36 1st Qu.: 188.4
## Median : 458.39 Median : 152.62 Median : 298.23 Median : 254.5
## Mean : 476.19 Mean : 167.31 Mean : 304.04 Mean : 257.7
## 3rd Qu.: 533.20 3rd Qu.: 217.35 3rd Qu.: 373.90 3rd Qu.: 324.4
## Max. :7979.90 Max. :4611.64 Max. :2876.22 Max. :1323.0
## AH
## Min. :-185.1
## 1st Qu.: 138.1
## Median : 213.8
## Mean : 218.9
## 3rd Qu.: 291.7
## Max. :3787.7
# 3. Perform 2D modeling of ift_NilPhase_X2mag, e.g.,
### LASSO "CO.GT." ~ "PT08.S1.CO."+"NMHC.GT."+"C6H6.GT."+"PT08.S2.NMHC."+"NOx.GT."+"PT08.S3.NOx."+ "NO2.GT."+"PT08.S4.NO2."+"PT08.S5.O3."+"T"+"RH"+"AH"
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.1.2
## Loading required package: Matrix
## Loaded glmnet 4.1-3
library(arm)
## Warning: package 'arm' was built under R version 4.1.2
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
##
## select
## Loading required package: lme4
##
## arm (Version 1.12-2, built: 2021-10-15)
## Working directory is E:/Ivo.dir/Eclipse_Projects/HTML5_WebSite/TCIU/Chapter6
library(knitr)
## Warning: package 'knitr' was built under R version 4.1.2
<- ift_NilPhase_X2mag[ , 1] # CO as outcome variable
y <- ift_NilPhase_X2mag[ , 2:13] # remaining features are predictors
X
set.seed(1234)
= sample(1 : nrow(X), round((4/5) * nrow(X)))
train = -train
test
# subset training data
= y[train]
yTrain = X[train, ]
XTrain # subset test data
= y[test]
yTest = X[test, ]
XTest
#### Model Estimation & Selection ####
# Estimate models: glmnet automatically standardizes the predictors
= glmnet(XTrain, yTrain, alpha = 0) # Ridge Regression
fitRidge = glmnet(XTrain, yTrain, alpha = 1) # The LASSO
fitLASSO
### Plot Solution Path #### LASSO
plot(fitLASSO, xvar="lambda", label="TRUE") # add label to upper x-axis
mtext("(Nil-Phase) LASSO regularizer: Number of Nonzero (Active) Coefficients", side=3, line=2.5)
### Plot Solution Path #### Ridge
plot(fitRidge, xvar="lambda", label="TRUE") # add label to upper x-axis
mtext("(Nil-Phase) Ridge regularizer: Number of Nonzero (Active) Coefficients", side=3, line=2.5)
#### 10-fold cross validation ##### LASSO
library("glmnet")
library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
registerDoParallel(6)
set.seed(1234) # set seed
= cv.glmnet(XTrain, yTrain, alpha = 1, parallel=TRUE) # (10-fold) cross validation for the LASSO
cvLASSO plot(cvLASSO); mtext("(Nil-Phase) CV LASSO: Number of Nonzero (Active) Coefficients", side=3, line=2.5)
### Predict and Report LASSO MSE
<- predict(cvLASSO, s = cvLASSO$lambda.1se, newx = XTest)
predLASSO <- mean((predLASSO - yTest)^2); testMSE_LASSO testMSE_LASSO
## [1] 38880.76
# LASSO coefficient estimates
= as.double(coef(fitLASSO, s = cvLASSO$lambda.1se)) # s is lambda
betaHatLASSO coefplot(betaHatLASSO[2:12], sd = rep(0, 11), pch=1, col.pts = "red", cex.pts = 2)
legend("bottomright", "(Nil-Phase) LASSO", col = "blue", bty = "o", cex = 1)
# 4. Perform LASSO modeling on ift_TruePhase_X2mag
<-FT_aqi_data; X2_mag <- mag_FT_aqi_data; X2_phase<-phase_FT_aqi_data
X2= X2_mag * cos(X2_phase)
Real2 = X2_mag * sin(X2_phase)
Imaginary2 = Re(fft(Real2 + 1i*Imaginary2, inverse = T)/length(X2))
man_hat_X2
<- man_hat_X2[ , 1] # CO as outcome variable
y3 <- man_hat_X2[ , 2:13] # remaining features are predictors
X3
# subset training data
= y3[train]
y3Train = X3[train, ]
X3Train # subset test data
= y3[test]
y3Test = X3[test, ]
X3Test
#### Model Estimation & Selection ####
# Estimate models: glmnet automatically standardizes the predictors
= glmnet(X3Train, y3Train, alpha = 0) # Ridge Regression
fitRidge3 = glmnet(X3Train, y3Train, alpha = 1) # The LASSO
fitLASSO3
### Plot Solution Path #### LASSO
plot(fitLASSO3, xvar="lambda", label="TRUE") # add label to upper x-axis
mtext("(True-Phase) LASSO regularizer: Number of Nonzero (Active) Coefficients", side=3, line=2.5)
### Plot Solution Path #### Ridge
plot(fitRidge3, xvar="lambda", label="TRUE") # add label to upper x-axis
mtext("(True-Phase) Ridge regularizer: Number of Nonzero (Active) Coefficients", side=3, line=2.5)
#### 10-fold cross validation ##### LASSO
library("glmnet")
library(doParallel)
registerDoParallel(6)
set.seed(1234) # set seed
= cv.glmnet(X3Train, y3Train, alpha = 1, parallel=TRUE) # (10-fold) cross validation for the LASSO
cvLASSO3 plot(cvLASSO3); mtext("(True-Phase) CV LASSO: Number of Nonzero (Active) Coefficients", side=3, line=2.5)
### Predict and Report LASSO MSE
<- predict(cvLASSO3, s = cvLASSO3$lambda.1se, newx = X3Test)
predLASSO3 <- mean((predLASSO3 - y3Test)^2); testMSE_LASSO3 testMSE_LASSO3
## [1] 2876.822
# LASSO coefficient estimates
= as.double(coef(fitLASSO3, s = cvLASSO3$lambda.1se)) # s is lambda
betaHatLASSO3 coefplot(betaHatLASSO3[2:12], sd = rep(0, 11), pch=1, col.pts = "red", cex.pts = 2)
legend("bottomright", "(True-Phase)LASSO", col = "blue", bty = "o", cex = 1.5)
# 5. Compare the analytics results from #3, #6, and #7
# Stop local cluster
stopImplicitCluster()