set.seed(1)
<- rnorm(100)
w plot(w, type = "l")
Chapter 4: Basic Stochastic Models
Throughout this book, we will often fit models to data that we have simulated and attempt to recover the underlying model parameters. At first sight, this might seem odd, given that the parameters are used to simulate the data so that we already know at the outset the values the parameters should take. However, the procedure is useful for a number of reasons. In particular, to be able to simulate data using a model requires that the model formulation be correctly understood. If the model is understood but incorrectly implemented, then the parameter estimates from the fitted model may deviate significantly from the underlying model values used in the simulation. Simulation can therefore help ensure that the model is both correctly understood and correctly implemented.
4.2.3 “Simulation in R
” Modern Look
Book code
Tidyverts code
::p_load("tsibble", "fable", "feasts",
pacman"tsibbledata", "fable.prophet", "tidyverse", "patchwork")
set.seed(1)
<- tibble(
wd x = 1:100,
y = rnorm(100))
|> ggplot(aes(x = x, y = y)) + geom_line() wd
<- seq(-3, 3, length = 1000)
x hist(rnorm(100), prob = T)
points(x, dnorm(x), type = "l")
<- tibble(
xd x = seq(-3, 3, length = 1000),
norm = rnorm(1000),
density = dnorm(x)
)|>
xd ggplot(aes(x = norm)) +
geom_histogram(aes(y = after_stat(density)),
color = "white", fill = "darkgrey") +
geom_line(aes(x = x, y = density)) +
theme_bw()
Show the Tidyverts code in full
set.seed(1)
wd <- tibble(
x = 1:100,
y = rnorm(100))
wd |> ggplot(aes(x = x, y = y)) + geom_line()
xd <- tibble(
x = seq(-3, 3, length = 1000),
norm = rnorm(1000),
density = dnorm(x)
)
xd |>
ggplot(aes(x = norm)) +
geom_histogram(aes(y = after_stat(density)),
color = "white", fill = "darkgrey") +
geom_line(aes(x = x, y = density)) +
theme_bw()
4.2.4 “Second-order properties and the correlogram” Modern Look
Book code
set.seed(2)
acf(rnorm(100))
Tidyverts code
set.seed(2)
tsibble(seq = 1:100,
values = rnorm(100),
index = seq) |>
ACF(values) |>
autoplot()
4.3.7 “Simulation” Modern Look
Book code
set.seed(2)
<- w <- rnorm(1000)
x for (t in 2:1000) x[t] <- x[t - 1] + w[t]
plot(x, type = "l")
Tidyverts code
set.seed(2)
<- tibble(w = rnorm(1000)) |>
dat mutate(
index = 1:n(),
x = purrr::accumulate2(
lag(w), w,
+ w,
\(acc, nxt, w) acc .init = 0)[-1]) |>
::as_tsibble(index = index)
tsibbleautoplot(dat, .var = x)
acf(x)
ACF(dat, x) |>
autoplot()
4.4.1 “Simulated random walk series” Modern Look
Book code
acf(diff(x))
Tidyverts code
|>
dat mutate(diff = x - lag(x)) |>
ACF(diff) |>
autoplot()
4.4.2 “Exchange rate series” Modern Look
Book code
<- read.table("data/pounds_nz.dat", header = T)
Z <- ts(Z, st = 1991, fr = 4)
Z.ts acf(diff(Z.ts))
Tidyverts code
<- read_table("data/pounds_nz.dat")
z <- seq(
date_seq ::ymd("1991-01-01"),
lubridateby = "3 months",
length.out = nrow(z))
<- z |>
z_ts mutate(
date = date_seq,
quarter = tsibble::yearquarter(date)) |>
as_tsibble(index = quarter)
|>
z_ts mutate(diff = xrate - lag(xrate)) |>
ACF(diff) |>
autoplot()
<- HoltWinters(Z.ts, alpha = 1, gamma = FALSE) # changed from book
Z.hw acf(resid(Z.hw)) # not exact match.
<- z_ts |>
z_hw model(Additive = ETS(xrate ~
trend("A", alpha = 1) +
error("A") + season("N", gamma = 0),
opt_crit = "amse", nmse = 1)) |>
residuals()
ACF(z_hw, .resid) |>
autoplot()
Show the Tidyverts code in full
z <- read_table("data/pounds_nz.dat")
date_seq <- seq(
lubridate::ymd("1991-01-01"),
by = "3 months",
length.out = nrow(z))
z_ts <- z |>
mutate(
date = date_seq,
quarter = tsibble::yearquarter(date)) |>
as_tsibble(index = quarter)
z_ts |>
mutate(diff = xrate - lag(xrate)) |>
ACF(diff) |>
autoplot()
z_hw <- z_ts |>
model(Additive = ETS(xrate ~
trend("A", alpha = 1) +
error("A") + season("N", gamma = 0),
opt_crit = "amse", nmse = 1)) |>
residuals()
ACF(z_hw, .resid) |>
autoplot()
4.4.3 “Random walk with drift” data Modern Look
Book code
<- read.table("data/HP.txt", header = T)
HP.dat # attach(HP.dat) # bad practice
plot(as.ts(HP.dat$Price))
Tidyverts code
<- read_table("data/HP.txt") |>
hp_dat mutate(Day = 1:n())
ggplot(hp_dat, aes(x = Day, y = Price)) + geom_line()
<- diff(HP.dat$Price)
DP plot(as.ts(DP))
|>
hp_dat mutate(diff = Price - lag(Price)) |>
ggplot(aes(x = Day, y = diff)) +
geom_line()
acf(DP)
|>
hp_dat mutate(diff = Price - lag(Price)) |>
as_tsibble(index = Day) |>
ACF(diff) |>
autoplot()
mean(DP) + c(-2, 2) * sd(DP) / sqrt(length(DP))
[1] 0.004378275 0.075353468
|>
hp_dat mutate(diff = Price - lag(Price)) |>
slice(-1) |>
summarize(
mean = mean(diff),
sd = sd(diff),
n = n(),
std_error = sd / sqrt(n),
lower = mean - 2 * std_error,
upper = mean + 2 * std_error)
# A tibble: 1 × 6
mean sd n std_error lower upper
<dbl> <dbl> <int> <dbl> <dbl> <dbl>
1 0.0399 0.460 671 0.0177 0.00438 0.0754
Show the Tidyverts code in full
hp_dat <- read_table("data/HP.txt") |>
mutate(Day = 1:n())
ggplot(hp_dat, aes(x = Day, y = Price))
geom_line()
hp_dat |>
mutate(diff = Price - lag(Price)) |>
ggplot(aes(x = Day, y = diff)) +
geom_line()
hp_dat |>
mutate(diff = Price - lag(Price)) |>
as_tsibble(index = Day) |>
ACF(diff) |>
autoplot()
hp_dat |>
mutate(diff = Price - lag(Price)) |>
slice(-1) |>
summarize(
mean = mean(diff),
sd = sd(diff),
n = n(),
std_error = sd / sqrt(n),
lower = mean - 2 * std_error,
upper = mean + 2 * std_error)
4.5.7 Autoregressive “Simulation” Modern Look
Book code
<- function(k, alpha) alpha^k
rho layout(1:2)
plot(0:10, rho(0:10, 0.7), type = "b")
plot(0:10, rho(0:10, -0.7), type = "b")
Tidyverts code
<- function(k, alpha) alpha^k
rho <- tibble(
dat x = 0:10,
a7 = rho(x, .7),
am7 = rho(x, -.7)) |>
pivot_longer(a7:am7,
names_to = "param",
values_to = "value")
ggplot(dat, aes(x = x, y = value)) +
geom_line() +
geom_point() +
facet_wrap(~param, ncol = 1)
set.seed(1)
<- w <- rnorm(100)
x for (t in 2:100) x[t] <- 0.7 * x[t - 1] + w[t]
plot(x, type = "l")
set.seed(1)
<- tibble(w = rnorm(100)) |>
dat mutate(
index = 1:n(),
x = purrr::accumulate2(
lag(w), w,
0.7 * acc + w,
\(acc, nxt, w) .init = 0)[-1]) |>
::as_tsibble(index = index)
tsibbleautoplot(dat, .var = x)
acf(x)
ACF(dat, x) |>
autoplot()
pacf(x)
PACF(dat, x) |>
autoplot()
4.6.1 “Model fitted to simulated series” Modern look
The stats::ar()
and fable::AR()
appear to have different optimization routines. For example, the fable::AR()
model only allows optimization using Ordinary Least Squares (OLS), and stats::ar()
provides multiple options. The textbook uses method = "mle"
in these examples. Other differences are also apparent. As such, the results in the remaining sections do not match exactly.
Book code
set.seed(1)
<- w <- rnorm(100)
x for (t in 2:100) x[t] <- 0.7 * x[t - 1] + w[t]
<- ar(x, method = "mle")
x.ar $order x.ar
[1] 1
Tidyverts code
set.seed(1)
<- tibble(w = rnorm(100)) |>
dat mutate(
index = 1:n(),
x = purrr::accumulate2(
lag(w), w,
0.7 * acc + w,
\(acc, nxt, w) .init = 0)[-1]) |>
::as_tsibble(index = index)
tsibble# Note that mle is not available using ols in ARMA
# answers slightly different
<- dat |>
fit_ar model(AR(x ~ order(1)))
print("Order defined as 1 using order")
[1] "Order defined as 1 using order"
$ar x.ar
[1] 0.6009459
tidy(fit_ar) |>
filter(str_detect(term, "ar")) |>
pull("estimate")
[1] 0.6015796
$ar + c(-2, 2) * sqrt(x.ar$asy.var)[1] # slight change x.ar
[1] 0.4404031 0.7614886
tidy(fit_ar) |>
mutate(
lb = estimate - 2 * std.error,
ub = estimate + 2 * std.error
)
# A tibble: 2 × 8
.model term estimate std.error statistic p.value lb ub
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AR(x ~ order(1)) constant 0.157 0.0955 1.65 1.03e- 1 -0.0339 0.348
2 AR(x ~ order(1)) ar1 0.602 0.0814 7.39 5.03e-11 0.439 0.764
Show the Tidyverts code in full
set.seed(1)
dat <- tibble(w = rnorm(100)) |>
mutate(
index = 1:n(),
x = purrr::accumulate2(
lag(w), w,
\(acc, nxt, w) 0.7 * acc + w,
.init = 0)[-1]) |>
tsibble::as_tsibble(index = index)
# Note that mle is not available using ols in ARMA
# answers slightly different
fit_ar <- dat |>
model(AR(x ~ order(1)))
tidy(fit_ar) |>
filter(str_detect(term, "ar")) |>
pull("estimate")
tidy(fit_ar) |>
mutate(
lb = estimate - 2 * std.error,
ub = estimate + 2 * std.error
)
4.6.2 “Exchange rate series: Fitted AR model”
See note about stats::ar()
and fable::AR()
in previous section.
Book code
<- read.table("data/pounds_nz.dat", header = T)
Z <- ts(Z, st = 1991, fr = 4)
Z.ts <- ar(Z.ts)
Z.ar mean(Z.ts)
[1] 2.823251
Tidyverts code
<- read_table("data/pounds_nz.dat")
z <- seq(
date_seq ::ymd("1991-01-01"),
lubridateby = "3 months",
length.out = nrow(z))
<- z |>
z_ts mutate(
date = date_seq,
quarter = tsibble::yearquarter(date)) |>
as_tsibble(index = quarter)
<- z_ts |>
z_ar model(AR(xrate ~ order(1)))
mean(z_ts$xrate)
[1] 2.823251
c(Z.ar$order, Z.ar$ar)
[1] 1.000000 0.890261
tidy(z_ar) |>
filter(str_detect(term, "ar")) |>
pull("estimate")
[1] 1.005271
$ar + c(-2, 2) * sqrt(Z.ar$asy.var)[1] Z.ar
[1] 0.7405097 1.0400123
tidy(z_ar) |>
mutate(
lb = estimate - 2 * std.error,
ub = estimate + 2 * std.error
)
# A tibble: 1 × 8
.model term estimate std.error statistic p.value lb ub
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AR(xrate ~ order(1)) ar1 1.01 0.00781 129. 8.55e-52 0.990 1.02
acf(Z.ar$res[-1])
residuals(z_ar) |> ACF(y = .resid) |> autoplot()
Show the Tidyverts code in full
z <- read_table("data/pounds_nz.dat")
date_seq <- seq(
lubridate::ymd("1991-01-01"),
by = "3 months",
length.out = nrow(z))
z_ts <- z |>
mutate(
date = date_seq,
quarter = tsibble::yearquarter(date)) |>
as_tsibble(index = quarter)
z_ar <- z_ts |>
model(AR(xrate ~ order(1)))
mean(z_ts$xrate)
tidy(z_ar) |>
filter(str_detect(term, "ar")) |>
pull("estimate")
tidy(z_ar) |>
mutate(
lb = estimate - 2 * std.error,
ub = estimate + 2 * std.error
)
residuals(z_ar) |> ACF(y = .resid) |> autoplot()
4.6.3 “Global temperature series: Fitted AR model” Modern look
See note about stats::ar()
and fable::AR()
in previous section.
Book code
= scan("data/global.dat")
Global = ts(Global, st = c(1856, 1), end = c(2005, 12),fr = 12)
Global.ts <- ar(aggregate(Global.ts, FUN = mean), method = "mle")
Global.ar mean(aggregate(Global.ts, FUN = mean))
[1] -0.1382628
Tidyverts code
<- tibble(x = scan("data/global.dat")) |>
global mutate(
date = seq(
ymd("1856-01-01"),
by = "1 months",
length.out = n()),
year = year(date),
year_month = tsibble::yearmonth(date))
<- global |>
global_ts as_tsibble(index = year_month)
<- global |>
global_ts_year summarise(x = mean(x), .by = year) |>
as_tsibble(index = year)
<- global_ts_year |>
global_ar model(AR(x ~ order(1:9)))
mean(global_ts$x)
[1] -0.1382628
$order Global.ar
[1] 4
tidy(global_ar) |> filter(str_detect(term, "ar")) |> nrow()
[1] 4
$ar Global.ar
[1] 0.58762026 0.01260252 0.11116731 0.26763656
tidy(global_ar) |> filter(str_detect(term, "ar")) |> pull(estimate)
[1] 0.58176066 0.02164876 0.10747306 0.26707158
acf(Global.ar$res[-(1:Global.ar$order)], lag = 50)
residuals(global_ar) |> ACF(lag_max = 50) |> autoplot()
Show the Book code in full
Show the Tidyverts code in full
global <- tibble(x = scan("data/global.dat")) |>
mutate(
date = seq(
ymd("1856-01-01"),
by = "1 months",
length.out = n()),
year = year(date),
year_month = tsibble::yearmonth(date))
global_ts <- global |>
as_tsibble(index = year_month)
global_ts_year <- global |>
summarise(x = mean(x), .by = year) |>
as_tsibble(index = year)
global_ar <- global_ts_year |>
model(AR(x ~ order(1:9)))
mean(global_ts$x)
tidy(global_ar) |> filter(str_detect(term, "ar")) |> nrow()
tidy(global_ar) |> filter(str_detect(term, "ar")) |> pull(estimate)
residuals(global_ar) |> ACF(lag_max = 50) |> autoplot()
4.7 Summary of Modern R
commands used in examples
ggplot2::geom_histogram()
: Histograms and frequency polygonsggplot2::after_stat()
: Control aesthetic evaluationdplyr::slice()
: Subset rows using their positionsggplot2::facet_wrap()
: Wrap a 1d ribbon of panels into 2dpurr::accumulate2()
: Accumulate intermediate results of a vector reduction\()
:fable::AR()
: Estimate a AR modelstats::residuals()
: Extract Model Residuals