Introduction
This is the accompanying R
code for my PhD thesis entitled “Regression modelling using priors depending on Fisher information covariance kernels”. Download the required data sets from this link.
Preliminary
Install and load all required R
packages.
if (!require("pacman")) install.packages("pacman") # not strictly necessary,
# but elegant way of loading
# all packages
pacman::p_load(knitr, devtools, R2MLwiN, lme4, lmerTest, jmcm, caret, tidyverse,
directlabels, reshape2, kableExtra, cowplot, ElemStatLearn,
spatstat, stringr, caret, kernlab, ggmcmc, coda, mvtnorm,
MCMCglmm, runjags, rstan, plotly, parallel, doSNOW, RPEnsemble,
foreach, glmnet, BAS, ggmap, ggrepel)
# I-prior packages, install from GitHub
devtools::install_github("haziqj/iprior")
devtools::install_github("haziqj/iprobit")
devtools::install_github("haziqj/ipriorBVS")
# ggplot2 general setting
ggplot2::theme_set(theme_bw())
# rstan settings
stan2coda <- function(fit) {
mcmc.list(lapply(1:ncol(fit), function(x) mcmc(as.array(fit)[,x,])))
}
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
Chapter 2
Plots of the sample I-prior paths in Chapter 2.
# ggplot2 theme for the plots
theme_kernel_path <- theme_classic() +
theme(legend.position = "none")
# RKHS of constant functions
kernel_path_constant <- function(n = 1000, seed = 789) {
x <- seq(-1, 1, length = n)
m <- 5 # no. of paths
f.prior <- matrix(NA, nrow = n, ncol = m)
if (!is.null(seed)) set.seed(seed)
for (i in seq_len(m)) f.prior[, i] <- matrix(1, nrow = n, ncol = n) %*% rnorm(n)
plot.df <- data.frame(x, f = f.prior)
plot.df <- melt(plot.df, id = "x")
ggplot(plot.df, aes(x, value)) +
geom_line(aes(col = variable)) +
coord_cartesian(ylim = c(min(f.prior) - 10, max(f.prior) + 10)) +
labs(x = expression(italic(x)), y = expression(italic(f(x))),
title = "Sample constant I-prior paths") +
theme_kernel_path +
scale_y_continuous(breaks = 0) +
scale_x_continuous(breaks = 0)
}
# Canonical RKHS
kernel_path_canonical <- function(n = 1000, seed = 789, cen = TRUE,
intercept = FALSE) {
x <- seq(-1, 1, length = n)
m <- 5 # no. of paths
f.prior <- matrix(NA, nrow = n, ncol = m)
if (!is.null(seed)) set.seed(seed)
for (i in seq_len(m))
f.prior[, i] <- (as.numeric(intercept) +
iprior::kern_canonical(x, centre = cen)) %*% rnorm(n)
plot.df <- data.frame(x, f = f.prior)
plot.df <- melt(plot.df, id = "x")
p <- ggplot(plot.df, aes(x, value)) +
geom_line(aes(col = variable)) +
labs(x = expression(italic(x)), y = expression(italic(f(x))),
title = "Sample linear I-prior paths") +
theme_kernel_path +
scale_y_continuous(breaks = 0) +
scale_x_continuous(breaks = 0)
}
# Fractional Brownian motion RKHS
kernel_path_fbm <- function(n = 1000, seed = 789, hurst = 0.5, cen = TRUE,
intercept = FALSE) {
the.title <- paste0("Sample fBm I-prior paths (Hurst = ", hurst, ")")
x <- seq(-1, 1, length = n)
m <- 5 # no. of paths
f.prior <- matrix(NA, nrow = n, ncol = m)
if (!is.null(seed)) set.seed(seed)
for (i in seq_len(m))
f.prior[, i] <- (as.numeric(intercept) +
iprior::kern_fbm(x, gamma = hurst, centre = cen)) %*% rnorm(n)
plot.df <- data.frame(x, f = f.prior)
plot.df <- melt(plot.df, id = "x")
p <- ggplot(plot.df, aes(x, value)) +
geom_line(aes(col = variable)) +
labs(x = expression(italic(x)), y = expression(italic(f(x))),
title = the.title) +
theme_kernel_path +
scale_y_continuous(breaks = 0) +
scale_x_continuous(breaks = 0)
}
# Squared exponential RKHS
kernel_path_se <- function(n = 1000, seed = 789, l = 0.5, cen = TRUE,
intercept = FALSE) {
x <- seq(-1, 1, length = n)
m <- 5 # no. of paths
f.prior <- matrix(NA, nrow = n, ncol = m)
if (!is.null(seed)) set.seed(seed)
for (i in seq_len(m))
f.prior[, i] <- (as.numeric(intercept) +
iprior::kern_se(x, l = l, centre = cen)) %*% rnorm(n)
plot.df <- data.frame(x, f = f.prior)
plot.df <- melt(plot.df, id = "x")
ggplot(plot.df, aes(x, value)) +
geom_line(aes(col = variable)) +
labs(x = expression(italic(x)), y = expression(italic(f(x))),
title = bquote(Sample~SE~"I-prior"~paths~(italic(l)~"="~.(l)))) +
theme_kernel_path +
scale_y_continuous(breaks = 0) +
scale_x_continuous(breaks = 0)
}
# Pearson RKHS
kernel_path_pearson <- function(n = 1000, seed = 789) {
x <- factor(sample(LETTERS[1:5], size = n, replace = TRUE))
m <- 5 # no. of paths
f.prior <- matrix(NA, nrow = n, ncol = m)
if (!is.null(seed)) set.seed(seed)
for (i in seq_len(m)) f.prior[, i] <- iprior::kern_pearson(x) %*% rnorm(n)
plot.df <- data.frame(x, f = f.prior)
plot.df <- plot.df[match(unique(plot.df$x), plot.df$x), ]
plot.df <- melt(plot.df, id = "x")
ggplot(plot.df, aes(x, value)) +
geom_point(aes(col = variable), size = 3, shape = 21, fill = NA, stroke = 1.2) +
geom_point(aes(fill = variable, col = variable), size = 3, shape = 21, alpha = 0.5) +
labs(x = expression(italic(x)), y = expression(italic(f(x))),
title = "Sample Pearson I-prior points") +
theme_kernel_path +
scale_y_continuous(breaks = 0)
}
# Polynomial kernel RKHS
kernel_path_poly <- function(n = 1000, seed = 789, c = 0, d = 2, cen = TRUE,
intercept = 0.5) {
the.title <- paste0("Sample polynomial I-prior paths (degree = ", d, ")")
x <- seq(-1, 1, length = n)
m <- 5 # no. of paths
f.prior <- matrix(NA, nrow = n, ncol = m)
if (!is.null(seed)) set.seed(seed)
for (i in seq_len(m))
f.prior[, i] <- (as.numeric(intercept) +
iprior::kern_poly(x, c = c, d = d, centre = cen)) %*% rnorm(n)
plot.df <- data.frame(x, f = f.prior)
plot.df <- melt(plot.df, id = "x")
ggplot(plot.df, aes(x, value)) +
geom_line(aes(col = variable)) +
labs(x = expression(italic(x)), y = expression(italic(f(x))),
title = the.title) +
theme_kernel_path +
scale_y_continuous(breaks = 0) +
scale_x_continuous(breaks = 0)
}
Chapter 4
Chapter 4 uses the iprior
package to fit I-prior models either using direct optimisation, EM algorithm or a combination of both. We also used rstan
to fit a fully Bayesian I-prior model using HMC.
Comparison of methods
Comparing direct optimisation (quasi-Newton methods), EM algorithm and MCMC for estimation of I-prior models.
# Generate data set. Note that this data set can also be generated using
# iprior::gen_smooth().
set.seed(123)
N <- 150
f <- function(x, truth = FALSE) {
35 * dnorm(x, mean = 1, sd = 0.8) +
65 * dnorm(x, mean = 4, sd = 1.5) +
(x > 4.5) * (exp((1.25 * (x - 4.5))) - 1) +
3 * dnorm(x, mean = 2.5, sd = 0.3)
}
x <- c(seq(0.2, 1.9, length = N * 5 / 8), seq(3.7, 4.6, length = N * 3 / 8))
x <- sample(x, size = N)
x <- x + rnorm(N, sd = 0.65) # adding random fluctuation to the x
x <- sort(x)
y.err <- rt(N, df = 1)
y <- f(x) + sign(y.err) * pmin(abs(y.err), rnorm(N, mean = 4.1)) # adding random terms to the y
# Direct optimisation
mod1 <- iprior(y ~ x, dat.fit, kernel = "fbm", train.samp = 1:N)
# EM algorithm
mod2 <- iprior(y ~ x, dat.fit, kernel = "fbm", method = "em", train.samp = 1:N,
control = list(maxit = 1000))
# MCMC
stan.iprior.dat <- list(y = y, H = iprior::kern_fbm(x), n = length(y),
ytrue = f(x))
stan.iprior.mod <- "
data {
int<lower=0> n; // number of data
vector[n] y; // data y
matrix[n, n] H; // the kernel matrix
vector[n] ytrue; // true values
}
parameters {
real alpha;
real<lower=0> psi;
real<lower=0> lambda;
}
transformed parameters {
cov_matrix[n] Vy;
Vy = psi * (lambda * H) * (lambda * H);
for (i in 1:n)
Vy[i, i] = Vy[i, i] + 1 / psi;
}
model {
matrix[n, n] L_cov;
L_cov = cholesky_decompose(Vy);
y ~ multi_normal_cholesky(rep_vector(alpha, n), L_cov);
lambda ~ normal(0, 10);
psi ~ normal(0, 10);
}
generated quantities {
// generate from posterior of y
vector[n] w;
matrix[n,n] Vw;
vector[n] ynew;
vector[n] muynew;
matrix[n,n] Vynew;
real rmse;
real logLik;
Vw = inverse(Vy);
w = psi * lambda * H * inverse(Vy) * (y - alpha);
muynew = alpha + lambda * H * w;
Vynew = square(lambda) * H * Vw * H;
for (i in 1:n)
Vynew[i, i] = Vynew[i, i] + 1 / psi;
ynew = multi_normal_rng(muynew, Vynew);
rmse = sqrt(mean(square(ynew - ytrue)));
logLik = multi_normal_lpdf(y|rep_vector(alpha, n), Vy);
}
"
# Compile the Stan programme
m <- stan_model(model_code = stan.iprior.mod)
m@model_name <- "iprior.fbm"
# Fit stan model
fit.stan <- sampling(m, data = stan.iprior.dat,
pars = c("alpha", "lambda", "psi", "ynew", "rmse", "logLik"),
iter = 2000, chains = 8, thin = 1)
print(fit.stan, pars = c("alpha", "lambda", "psi", "rmse", "logLik"))
Post-estimation
Some of the post-estimation procedures run on an I-prior model.
# Set ggplot theme
theme_reg <- theme_bw() +
theme(axis.ticks = element_blank(), axis.text = element_blank(),
panel.grid = element_blank())
# Generate data set. Note that this data set can also be generated using
# iprior::gen_smooth().
set.seed(123)
N <- 150
f <- function(x, truth = FALSE) {
35 * dnorm(x, mean = 1, sd = 0.8) +
65 * dnorm(x, mean = 4, sd = 1.5) +
(x > 4.5) * (exp((1.25 * (x - 4.5))) - 1) +
3 * dnorm(x, mean = 2.5, sd = 0.3)
}
x <- c(seq(0.2, 1.9, length = N * 5 / 8), seq(3.7, 4.6, length = N * 3 / 8))
x <- sample(x, size = N)
x <- x + rnorm(N, sd = 0.65) # adding random fluctuation to the x
x <- sort(x)
y.err <- rt(N, df = 1)
y <- f(x) + sign(y.err) * pmin(abs(y.err), rnorm(N, mean = 4.1)) # adding random terms to the y
# True values
x.true <- seq(-2.1, 7, length = 1000)
y.true <- f(x.true, TRUE)
# Data for plot
dat <- data.frame(x, y, points = "Observed")
dat.truth <- data.frame(x = x.true, y = y.true)
p1 <- ggplot() +
geom_point(data = dat, aes(x = x, y = y)) +
scale_x_continuous(
limits = c(min(x.true), max(x.true)),
breaks = NULL, name = expression(italic(x))
) +
scale_y_continuous(
breaks = NULL, name = expression(italic(y))
) +
coord_cartesian(ylim = c(min(y, y) - 5, max(y, y) + 5)) +
theme_bw()
no.of.draws <- 90
# Based on an fBm RKHS
mod <- iprior(y ~ x, dat, kernel = "fbm", est.hurst = FALSE)
n <- length(y)
H.eta <- iprior::get_kern_matrix(mod)
alpha <- iprior::get_intercept(mod)
psi <- iprior::get_psi(mod)
Vy <- psi * H.eta %*% H.eta + diag(1 / psi, n)
w.hat <- mod$w
H.star <- iprior::kern_fbm(x, x.true, gamma = get_hurst(mod)) * iprior::get_lambda(mod) / 2
# reason for this is to not make the prior paths so wild
y.fitted <- predict(mod, newdata = dat.truth)$y # predicted values
y.fitted2 <- fitted(mod)$y # fitted values
# Prior variance for f
Vf.pri <- psi * tcrossprod(H.star)
# Posterior variance for f
Vf.pos <- H.star %*% solve(Vy, t(H.star))
# Prepare random draws from prior and posterior
draw.pri <- t(mvtnorm::rmvnorm(no.of.draws, mean = rep(alpha, 1000),
sigma = Vf.pri))
draw.pos <- t(mvtnorm::rmvnorm(40, mean = y.fitted, sigma = Vf.pos))
melted.pos <- melt(data.frame(f = draw.pos, x = x.true), id.vars = "x")
melted.pri <- melt(data.frame(f = draw.pri, x = x.true), id.vars = "x")
melted <- rbind(cbind(melted.pri, type = "Prior"),
cbind(melted.pos, type = "Posterior"))
# Posterior predictive covariance matrix
varyprior <- abs(diag(Vf.pri)) + 1 / psi
varystar <- abs(diag(Vf.pos)) + 1 / psi
dat.fit <- data.frame(x.true, y.fitted, sdev = sqrt(varystar),
type = "95% credible interval")
dat.f <- rbind(data.frame(x = x.true, y = mean(y), sdev = NA, type = "Prior"),
data.frame(x = x.true, y = y.fitted, sdev = sqrt(varystar),
type = "Posterior"))
# Prepare random draws for posterior predictive checks
VarY.hat <- H.eta %*% solve(Vy, H.eta) + diag(1 / psi, nrow(Vy))
ppc <- t(mvtnorm::rmvnorm(no.of.draws, mean = y.fitted2, sigma = VarY.hat))
melted.ppc <- melt(data.frame(x = x, ppc = ppc), id.vars = "x")
melted.ppc <- cbind(melted.ppc, type = "Posterior predictive check")
linlab <- paste0("alpha + ", "italic(f(x))")
# Prior and posterior sample path plots and histogram
p <- ggplot() +
geom_point(data = dat, aes(x = x, y = y), col = "grey60") +
scale_x_continuous(breaks = NULL, name = expression(italic(x))) +
scale_y_continuous(breaks = NULL, name = expression(italic(y))) +
coord_cartesian(
xlim = c(min(x.true) + 0.45, max(x.true) - 0.45),
ylim = c(min(y) - 5, max(y) + 5)
)
p.prior.post <- p +
geom_line(data = melted,
aes(x = x, y = value, group = variable, linetype = "1",
col = "1", size = "1"), alpha = 0.5) +
geom_line(data = dat.f,
aes(x = x, y = y, linetype = "2", col = "2", size = "2")) +
scale_linetype_manual(name = "", values = c("solid", "dashed"),
labels = c("Sample paths", "Mean path")) +
scale_colour_manual(name = "", values = c("steelblue3", "grey20"),
labels = c("Sample paths", "Mean path")) +
scale_size_manual(name = "", values = c(0.19, 0.8),
labels = c("Sample paths", "Mean path")) +
facet_grid(type ~ .) +
theme_bw() +
theme(
axis.ticks = element_blank(),
axis.text = element_blank(),
panel.grid = element_blank(),
legend.key.width = unit(3, "line"),
legend.justification = c(1, 0),
legend.position = c(1 - 0.001, 0 + 0.001),
legend.text.align = 0,
legend.background = element_rect(fill = scales::alpha('white', 0))
)
p.hist <- ggplot(dat, aes(x = x)) +
geom_density(fill = "grey80", col = "grey80") +
scale_x_continuous(limits = c(min(x.true) + 0.8, max(x.true))) +
theme_void()
gg.marg.size <- 8
grid.arrange(p.hist, p.prior.post, ncol = 1, heights = c(1, gg.marg.size))
# Credibility interval plot
ggplot() +
scale_x_continuous(breaks = NULL, name = expression(italic(x))) +
scale_y_continuous(breaks = NULL, name = expression(italic(y))) +
coord_cartesian(
xlim = c(min(x.true) + 0.45, max(x.true) - 0.45),
ylim = c(min(y) - 5, max(y) + 5)
) +
geom_ribbon(data = dat.fit, fill = "grey", alpha = 0.5,
aes(x = x.true, ymin = y.fitted + qnorm(0.05 / 2) * sdev,
ymax = y.fitted + qnorm(1 - 0.05 / 2) * sdev)) +
geom_point(data = dat, aes(x = x, y = y), col = "grey10", shape = 21) +
geom_line(data = dat.fit, aes(x = x.true, y = y.fitted, col = "1", linetype = "1"), size = 0.8) +
geom_line(data = dat.truth, aes(x = x, y = y, col = "2", linetype = "2"), size = 0.8) +
scale_colour_manual(name = "", values = c("black", "red3"),
labels = c("Estimated", "Truth")) +
scale_linetype_manual(name = "", values = c("solid", "dashed"),
labels = c("Estimated", "Truth")) +
theme_bw() +
theme(
axis.ticks = element_blank(),
axis.text = element_blank(),
panel.grid = element_blank(),
legend.key.width = unit(3, "line"),
legend.justification = c(1, 0),
legend.position = c(1 - 0.001, 0 + 0.001),
legend.text.align = 0,
legend.background = element_rect(fill = scales::alpha('white', 0))
) -> p.cred; p.cred
# Posterior predictive check plot
p.ppc <- ggplot() +
scale_x_continuous(breaks = NULL, name = expression(italic(y))) +
scale_y_continuous(breaks = NULL) +
geom_line(data = melted.ppc,
aes(x = value, group = variable, col = "yrep", size = "yrep"),
stat = "density", alpha = 0.5) +
geom_line(data = dat, aes(x = y, col = "y", size = "y"), stat = "density") +
theme(legend.position = "bottom") +
scale_colour_manual(
name = NULL, labels = c("Observed", "Replications"),
values = c("grey10", "steelblue3")
) +
scale_size_manual(
name = NULL, labels = c("Observed", "Replications"),
values = c(1.1, 0.19)
) +
theme_bw() +
theme(legend.position = c(0.9, 0.5))
Ridge in log-likelihood
Plot showing ridge in the I-prior log-likelihood. Uses the plotly
package.
# Fit an fBm I-prior model using the toy data set
mod <- iprior(y ~ X, gen_smooth(), kernel = "fbm")
# Create grid for plot
no.points <- 50
x <- log(get_lambda(mod))
x <- seq(x - 20, x + 15, length = no.points) # lambda
x <- sort(c(x, log(get_lambda(mod))))
y <- log(get_psi(mod))
y <- seq(y - 10, y + 15, length = no.points) # psi
y <- sort(c(y, log(get_psi(mod))))
tab <- expand.grid(x = x, y = y)
# Calculate the log-likelihood for each point in the grid
z <- rep(NA, nrow(tab))
for (i in seq_along(z)) {
z[i] <- logLik(mod, theta = as.numeric(tab[i, ]))
}
tab.loglik <- matrix(z, nrow = no.points + 1, ncol = no.points + 1)
zmin <- -5000
# 3D plot
plot_ly(x = x, y = y, z = tab.loglik, zauto = FALSE, zmin = zmin) %>%
add_surface() %>%
layout(
title = "I-prior log-likelihood",
scene = list(
xaxis = list(title = "log(lambda)"),
yaxis = list(title = "log(psi)"),
zaxis = list(title = "Log-likelihood", range = c(zmin, max(z)))
))
Random effects model
Random effects model (IGF data set). Data obtained from nlme
package available in base R
.
# Load data set
data(IGF, package = "nlme")
head(IGF)
# Fit I-prior model
mod.iprior <- iprior(conc ~ age * Lot, IGF, method = "em")
summary(mod.iprior)
plot_fitted_multilevel(mod.iprior, facet = 1, cred.bands = FALSE,
extrapolate = TRUE, show.legend = FALSE)
# Fit standard random effects model using lme4
(mod.lmer <- lmer(conc ~ age + (age | Lot), IGF))
round(coef(summary(mod.lmer)), 4)
# Check that correlation matrix is negative-definite
eigen(VarCorr(mod.lmer)$Lot)
# This is how to recover the betas from the I-prior model. Essentially it is
# just fitting a straight line throught the fitted points and manually getting
# the intercept and slope.
grp <- unique(as.numeric(IGF$Lot))
beta.iprior <- matrix(NA, nrow = length(grp), ncol = 2)
tmp.df <- data.frame(x = IGF$age, y = fitted(mod.iprior)$y,
grp = as.numeric(IGF$Lot))
for (i in seq_along(grp)) {
beta.iprior[i, ] <- coef(lm(y ~ x, tmp.df[tmp.df$grp == grp[i], ]))
}
beta.lmer <- coef(mod.lmer)$Lot
beta.fixed.iprior <- coef(lm(y ~ x, tmp.df))
beta.fixed.lmer <- mod.lmer@beta
beta.fixed.df <- data.frame(
beta = c(beta.fixed.iprior, beta.fixed.lmer),
type = rep(c("Intercept", "Slope")),
model = rep(c("iprior", "lmer"), each = 2)
)
Sigma.iprior <- cov(beta.iprior)
sigma0.iprior <- sqrt(Sigma.iprior[1, 1])
sigma1.iprior <- sqrt(Sigma.iprior[2, 2])
corr.iprior <- cor(beta.iprior)[1, 2] # Sigma.iprior[1, 2] / (sigma0.iprior * sigma1.iprior)
Sigma.lmer <- VarCorr(mod.lmer)[[1]]
sigma0.lmer <- attr(Sigma.lmer, "stddev")[1]
sigma1.lmer <- attr(Sigma.lmer, "stddev")[2]
corr.lmer <- attr(Sigma.lmer, "correlation")[1, 2]
plot.df.beta <- data.frame(
beta = as.numeric(beta.iprior),
Lot = unique(IGF$Lot),
type = rep(c("Intercept", "Slope"), each = 10),
model = "iprior"
)
plot.df.beta <- rbind(plot.df.beta, data.frame(
beta = unlist(beta.lmer),
Lot = unique(IGF$Lot),
type = rep(c("Intercept", "Slope"), each = 10),
model = "lmer"
))
plot.df.param <- data.frame(
param = c(sigma0.iprior, sigma1.iprior, corr.iprior,
sigma0.lmer, sigma1.lmer, corr.lmer),
name = rep(c("sigma[0]", "sigma[1]", "sigma[01]"), 2),
model = rep(c("iprior", "lmer"), each = 3)
)
plot.df.param$name <- factor(plot.df.param$name,
levels = c("sigma[01]", "sigma[1]", "sigma[0]"))
# These fake points help centre the plot
plot.df.fake <- plot.df.beta[c(19, 39, 9, 29), ]
plot.df.fake[1, 1] <- 0.02 # slopes
plot.df.fake[2, 1] <- -0.02
plot.df.fake[3, 1] <- 5.45 # intercepts
plot.df.fake[4, 1] <- 5.25
# Plot
ggplot(plot.df.beta) +
geom_point(aes(beta, Lot, col = model)) +
geom_point(data = plot.df.fake, aes(beta, Lot, col = model), alpha = 0) +
geom_vline(data = beta.fixed.df, aes(xintercept = beta, col = model),
linetype = "dashed") +
facet_grid(. ~ type, scales = "free") +
theme_bw() +
theme(legend.position = "top") +
labs(colour = "Model", x = expression(beta))
Longitudinal analysis
Longitudinal analysis of cow growth data. Data obtained from jmcm
package.
# Load and inspect data
data(cattle, package = "jmcm")
names(cattle) <- c("id", "time", "group", "weight")
cattle$id <- as.factor(cattle$id) # convert to factors
levels(cattle$group) <- c("Treatment A", "Treatment B")
str(cattle)
# Model 1: weight ~ f(time)
set.seed(456)
(mod1 <- iprior(weight ~ time, cattle, kern = "fbm", method = "mixed"))
# Model 2: weight ~ f(time) + f(treatment) + f(time dependent treatment)
set.seed(456)
mod2 <- iprior(weight ~ group * time, cattle, kernel = "fbm",
method = "em", control = list(restarts = TRUE))
# Model 3: weight ~ f(time) + f(cow index) + f(time dependent cow index)
set.seed(456)
mod3 <- iprior(weight ~ id * time, cattle, kernel = "fbm",
method = "mixed")
# Model 4: weight ~ f(time) + f(cow index) + f(treatment)
# + f(time dependent cow index)
# + f(time dependent treatment)
set.seed(456)
mod4 <- iprior(weight ~ group * time + id * time, cattle,
kernel = "fbm", method = "mixed")
# Model 5: weight ~ f(time:cow:treatment)
set.seed(456)
mod5 <- iprior(weight ~ id * group * time, cattle, kernel = "fbm",
method = "mixed")
# Results table
cow_table <- function(mod) {
form <- capture.output(mod$ipriorKernel$formula)
form <- substring(form, 10)
tibble(formula = form, loglik = logLik(mod), error = get_prederror(mod),
no_lambda = length(coef(mod)) - 1)
}
tab <- rbind(
cow_table(mod1), cow_table(mod2), cow_table(mod3),
cow_table(mod4), cow_table(mod5)
)
cbind(model = 1:5, tab)
# Plot of fitted regression lines
plot_fitted_multilevel(mod5, show.legend = FALSE, cred.bands = FALSE) +
labs(x = "Time", y = "Weight")
Functional regression
Regression with functional covariates (Tecator data set). Data obtained from caret
package. Gaussian process regression models fitted using kernlab
# Load data set
data(tecator, package = "caret")
endpoints <- as.data.frame(endpoints)
colnames(endpoints) <- c("water", "fat", "protein")
# Plot of 10 random spectra predicting fat content
# (Inspired by package caret, v6.0-68. See ?caret::tecator for details)
set.seed(123)
inSubset <- sample(1:dim(endpoints)[1], 10)
absorpSubset <- absorp[inSubset,]
endpointSubset <- endpoints$fat[inSubset]
newOrder <- order(absorpSubset[,1])
absorpSubset <- absorpSubset[newOrder,]
endpointSubset <- endpointSubset[newOrder]
as.tibble(data.frame(id = factor(1:10), fat = endpointSubset,
wave = absorpSubset)) %>%
melt(id.vars = c("id", "fat")) %>%
as.tibble() %>%
mutate(x = rep(1:100, each = 10)) %>%
ggplot(aes(x, value, col = id)) +
geom_line() +
geom_dl(aes(label = fat), method = list("last.polygons", cex = 0.9)) +
theme_bw() +
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5)) +
scale_x_continuous(breaks = c(1, 25, 50, 75, 100)) +
coord_cartesian(xlim = c(0, 105)) +
labs(x = "Wavelength index", y = "Absorbance Units")
# Prepare data for iprior package
# n = 215, use first 172 for training, and remaining 43 for testing
absorp.train <- -t(diff(t(absorp))) # this takes first differences using diff()
absorp.test <- absorp.train[173:215,]
absorp.train <- absorp.train[1:172,]
# Other variables
fat.train <- endpoints$fat[1:172]
fat.test <- endpoints$fat[173:215]
water.train <- endpoints$water[1:172]
water.test <- endpoints$water[173:215]
# NOTE: Some of the following model fits have set.seed() to fix the initial
# starting values for the estimation procedure. With direct optimisation,
# multiple local modes are a problem, but with EM it is less of a problem. All
# of the models were analysed with EM and multiple restarts, but for the
# vignette/paper, Models 1-5 showcase the iprior() functionality.
# Model 1: Canonical RKHS (linear)
set.seed(7)
(mod1 <- iprior(y = fat.train, absorp.train))
# Model 2: Polynomial RKHS (quadratic)
set.seed(6)
mod2 <- iprior(y = fat.train, absorp.train, kernel = "poly2",
est.offset = TRUE)
# Model 3: Polynomial RKHS (cubic)
mod3 <- iprior(y = fat.train, absorp.train, kernel = "poly3",
est.offset = TRUE)
# Model 4: fBm RKHS (default Hurst = 0.5)
(mod4 <- iprior(y = fat.train, absorp.train, kernel = "fbm",
method = "em", control = list(stop.crit = 1e-3)))
# Model 5: fBm RKHS (estimate Hurst)
set.seed(17)
(mod5 <- iprior(fat.train, absorp.train, kernel = "fbm", method = "mixed",
est.hurst = TRUE, control = list(stop.crit = 1e-3)))
# Model 6: SE kernel
(mod6 <- iprior(fat.train, absorp.train, est.lengthscale = TRUE,
kernel = "se", control = list(restarts = TRUE,
par.maxit = 100)))
# GPR models
mod7 <- gausspr(x = absorp.train, y = fat.train, kernel = "vanilladot")
y.hat <- predict(mod7, absorp.test)
RMSE.train7 <- sqrt(mod7@error)
RMSE.test7 <- sqrt(mean((y.hat - fat.test) ^ 2))
mod8 <- gausspr(x = absorp.train, y = fat.train)
y.hat <- predict(mod8, absorp.test)
RMSE.train8 <- sqrt(mod8@error)
RMSE.test8 <- sqrt(mean((y.hat - fat.test) ^ 2))
len.scale8 <- sqrt(1 / (2 * mod8@kernelf@kpar$sigma))
# Function to make it easier to compare models
tecator_res <- function(mod) {
RMSE.train <- as.numeric(get_prederror(mod))
RMSE.test <- sqrt(predict(mod, list(absorp.test), fat.test)$test.error)
c("Log-lik" = logLik(mod), "Training RMSE" = RMSE.train,
"Test RMSE" = RMSE.test)
}
tecator_tab <- function() {
tab <- t(sapply(list(mod1, mod2, mod3, mod4, mod5, mod6),
tecator_res))
rownames(tab) <- c("Linear", "Quadratic", "Cubic", "fBm-0.5",
paste0("fBm-", round(get_hurst(mod5), 3)),
paste0("SE-", round(get_lengthscale(mod6), 3)))
tab
}
tecator_tab()
Nyström example
Notet that fitting full model with n = 2000
takes roughly 15 minutes.
# Generate data set
dat <- gen_smooth(n = 2000, xlim = c(-1, 5.5), seed = 1)
head(dat)
ggplot2::qplot(X, y, data = dat)
# Full model
(mod.full <- iprior(y ~ X, dat, kernel = "fbm"))
# Nystrom method
(mod.nys <- iprior(y ~ X, dat, kernel = "fbm", nystrom = 50))
# Comparison and plots
get_time(mod.full); get_size(mod.full, "MB"); get_prederror(mod.full)
get_time(mod.nys); get_size(mod.nys); get_prederror(mod.nys)
plot(mod.full) + ggtitle("Full model")
plot(mod.nys) + ggtitle("Nyström model")
Chapter 5
Chapter 5 mainly uses the iprobit
package to fit categorical I-prior models using variational EM algorithm.
Comparison of methods
Comparing Laplace approximation, variational EM and Hamiltonian MC for binary I-probit model. MCMC and Laplace approximation takes a very long time to complete.
# Generate data set
dat <- iprobit::gen_spiral(n = 300, seed = 123) # generate binary toy example data set
# Variational EM
mod.vi <- iprobit(y ~ X1 + X2, dat, one.lam = TRUE, kernel = "fbm")
# Laplace's method
mod.lap <- iprobit(y ~ X1 + X2, dat, one.lam = TRUE, kernel = "fbm",
method = "laplace")
# MCMC
stan.iprobit.dat <- list(y = as.numeric(dat$y) - 1, H = iprior::kern_fbm(dat$X),
n = length(dat$y))
stan.iprobit.mod <- "
data {
int<lower=0> n; // number of data
int<lower=0,upper=1> y[n]; // data y
matrix[n, n] H; // the kernel matrix
}
parameters {
real alpha;
real<lower=0> lambda;
vector[n] w;
}
transformed parameters {
vector[n] mu;
vector<lower=0,upper=1>[n] pi;
mu = alpha + lambda * H * w;
pi = Phi(mu);
}
model {
y ~ bernoulli(pi);
w ~ normal(0, 1);
lambda ~ normal(0, 10);
}
generated quantities {
// generate from posterior of y
vector[n] ypred;
vector[n] yvec;
real brierscore;
real errorrate;
real logLik;
for (i in 1:n)
yvec[i] = y[i];
brierscore = mean(square(pi - yvec));
for (i in 1:n)
if (mu[i] > 0) {
ypred[i] = 1;
} else {
ypred[i] = 0;
}
errorrate = mean(square(ypred - yvec)) * 100;
logLik = bernoulli_lpmf(y|pi);
}
"
# Compile the Stan programme
m <- stan_model(model_code = stan.iprobit.mod)
m@model_name <- "iprobit.fbm"
# Fit stan model
fit.stan <- sampling(m, data = stan.iprobit.dat,
pars = c("alpha", "lambda", "brierscore", "errorrate", "w",
"logLik"),
iter = 200, chains = 8, thin = 1)
print(fit.stan, pars = c("alpha", "lambda", "brierscore", "errorrate", "logLik"))
postmean <- summary(stan2coda(fit.stan))$stat[, 1]
postsd <- summary(stan2coda(fit.stan))$stat[, 2]
b.alpha <- postmean[grep("alpha", names(postmean))]
b.lambda <- postmean[grep("lambda", names(postmean))]
b.alpha.se <- postsd[grep("alpha", names(postsd))]
b.lambda.se <- postsd[grep("lambda", names(postsd))]
b.w <- postmean[grep("w", names(postmean))]
b.w.se <- postmean[grep("w", names(postsd))]
# Slight hack to get the picture
mod.hmc <- iprobit(y ~ X1 + X2, dat, one.lam = TRUE, kernel = "fbm",
control = list(theta0 = log(b.lambda), int.only = TRUE))
mod.hmc$w <- b.w
mod.hmc$param.full[1, ] <- b.alpha
iplot_predict(mod.hmc)
Binary classification
Binary classification cardiac arrhythmia. Requires the arrhythmia data set Arrh194.RData
. First load the required functions to run the simulation programme.
# Set number of cores
no.cores <- parallel::detectCores()
# Function to combine mean and se
mean_and_se <- function(x, y) paste0(dec_plac(x, 2), " (", dec_plac(y, 2), ")")
# Function to calculate ranks
tab_rank <- function(x, y) {
# This is based on a weighted average. More weight given if low classification
# error in small sammple size
tmp <- apply(x + y, 1, function(x) sum(n * x) / sum(n))
rank(tmp)
}
# Function to tabulate mean and se
tab_res <- function(...) {
this <- list(...)
K <- length(this)
tab.mean <- tab.se <- tab <- NULL
for (k in 1:K) {
if (any(is.na(this[[k]]))) {
tab.mean <- rbind(tab.mean, NA)
tab.se <- rbind(tab.se, NA)
tab <- rbind(tab, NA)
} else {
tab.mean.tmp <- apply(this[[k]], 2, mean)
tab.se.tmp <- apply(this[[k]], 2, sd) / sqrt(nrow(this[[k]]))
tab.mean.and.se <- mean_and_se(tab.mean.tmp, tab.se.tmp)
tab.mean <- rbind(tab.mean, tab.mean.tmp)
tab.se <- rbind(tab.se, tab.se.tmp)
tab <- rbind(tab, tab.mean.and.se)
}
}
rownames(tab.mean) <- rownames(tab.se) <- rownames(tab) <- names(this)
colnames(tab.mean) <- colnames(tab.se) <- colnames(tab) <- colnames(this[[1]])
list(
tab = as.data.frame(tab),
tab.mean = as.data.frame(tab.mean),
tab.se = as.data.frame(tab.se)
)
}
# I-probit simulation
my_iprobit_sim <- function(nsim = 100, kernel = c("canonical", "fbm", "se")) {
# kernel <- match.arg(kernel, c("canonical", "fbm", "se"))
pb <- txtProgressBar(min = 0, max = nsim, style = 3)
progress <- function(i) setTxtProgressBar(pb, i)
cl <- makeCluster(no.cores)
registerDoSNOW(cl)
res <- foreach(i = 1:nsim, .combine = rbind,
.packages = c("iprior", "iprobit"),
.export = c("n", "y", "X"),
.options.snow = list(progress = progress)) %dopar% {
res.tmp <- rep(NA, length(n))
for (j in 1:length(n)) {
tmp <- iprobit(y, X, kernel = kernel, control = list(maxit = 5),
train.samp = sample(seq_along(y), size = n[j]))
res.tmp[j] <- predict(tmp)$error
}
res.tmp
}
close(pb)
stopCluster(cl)
push.message <- paste0(
experiment.name, ": ", kernel, " I-prior probit COMPLETED."
)
pushoverr::pushover(message = push.message, user = userID, app = appToken)
colnames(res) <- paste0(c("n = "), n)
res
}
# 451 observations with 194 continuous covariates
load("data/Arrh194.RData")
experiment.name <- "Cardiac data"
X.orig <- ArrhDataNew$x
y <- ArrhDataNew$y
y <- y - 1 # convert to 0 and 1
y <- as.factor(y)
levels(y) <- c("Normal", "Arrhythmia")
N <- length(y)
n <- c(50, 100, 200) # subsamples
X <- scale(X.orig) # standardise data
X[, 36] <- rep(0, N); X[, 181] <- rep(0, N) # all zeroes
summary(y)
# Full I-probit model
set.seed(123)
(mod <- iprobit(y, X, kernel = "fbm", control = list(maxit = 100)))
iplot_lb(mod, lab.pos = "down") +
scale_y_continuous(sec.axis = dup_axis(name = " ")) +
coord_cartesian(xlim = c(1, 14))
iplot_error(mod) + coord_cartesian(xlim = c(1, 14))
# Simulation study
set.seed(456)
res.canonical <- my_iprobit_sim(nsim = 100, kernel = "canonical")
res.fbm <- my_iprobit_sim(nsim = 100, kernel = "fbm")
res.se <- my_iprobit_sim(nsim = 100, kernel = "se")
(tab <- tab_res("I-probit (linear)" = res.canonical,
"I-probit (fBm-0.5)" = res.fbm,
"I-probit (SE-1.0)" = res.se))
# Other results
knn.mean <- c(40.64, 38.94, 35.76)
knn.se <- c(0.33, 0.33, 0.36)
knn <- mean_and_se(knn.mean, knn.se)
svm.mean <- c(36.16, 35.64, 35.20)
svm.se <- c(0.47, 0.39, 0.35)
svm <- mean_and_se(svm.mean, svm.se)
svm.radial.mean <- c(48.39, 47.24, 46.85)
svm.radial.se <- c(0.49, 0.46, 0.43)
svm.radial <- mean_and_se(svm.radial.mean, svm.radial.se)
gpc.radial.mean <- c(37.28, 33.80, 29.31)
gpc.radial.se <- c(0.42, 0.40, 0.35)
gpc.radial <- mean_and_se(gpc.radial.mean, gpc.radial.se)
rf.mean <- c(31.65, 26.72, 22.40)
rf.se <- c(0.39, 0.29, 0.31)
rf <- mean_and_se(rf.mean, rf.se)
nsc.mean <- c(34.98, 33.00, 31.08) # nearest shrunken centroids
nsc.se <- c(0.46, 0.440, 0.41)
nsc <- mean_and_se(nsc.mean, nsc.se)
penlog.mean <- c(34.92, 30.48, 26.12) # L1-penalised logistic regression
penlog.se <- c(0.42, 0.34, 0.27)
penlog <- mean_and_se(penlog.mean, penlog.se)
other.tab <- rbind(
"k-nn" = knn,
"SVM" = svm,
"GP (radial)" = gpc.radial,
"Random forests" = rf,
"NSC" = nsc,
"L-1 logistic" = penlog
)
colnames(other.tab) <- colnames(tab$tab)
# Calculate ranks
tab.mean <- rbind(tab$tab.mean,
"k-nn" = knn.mean,
"SVM" = svm.mean,
"GP (radial)" = gpc.radial.mean,
"Random forests" = rf.mean,
"NSC" = nsc.mean,
"L-1 logistic" = penlog.mean
)
tab.se <- rbind(tab$tab.se,
"k-nn" = knn.se,
"SVM" = svm.se,
"GP (radial)" = gpc.radial.se,
"Random forests" = rf.se,
"NSC" = nsc.se,
"L-1 logistic" = penlog.se
)
tab.ranks <- tab_rank(tab.mean, tab.se)
# Tabulate results
cbind(rbind(tab$tab, other.tab), Rank = tab.ranks)
Meta-analysis
Meta-analysis of smoking cessation data. All models take an extremely long time to converge due to the large data set. Data set required is gum.txt
and smoking_re_results.csv
.
# Load data set and convert to long format
smoking <- read.table(
"data/gum.txt",
sep = "\t", header = TRUE
)
sum(smoking[, c(3, 5)]) # sample size
smoking.small <- smoking
levels(smoking[, 1]) <- c(levels(smoking[, 1]), "Summary measure")
as.tibble(
rbind(smoking,
c("Summary measure", apply(smoking[, -1], 2, sum)))
) %>%
mutate(d1 = as.numeric(d1), # number of patients quit in treatment group
n1 = as.numeric(n1), # number of patients in treatment group
d0 = as.numeric(d0), # number of patients quit in control group
n0 = as.numeric(n0), # number of patients in control group
logodds = log((d1 / (n1 - d1)) / (d0 / (n0 - d0))),
lower = NA,
upper = NA,
n = n0 + n1,
method = "Raw odds") %>%
select(Study, lower, logodds, upper, n, method) -> smoke.raw.odds
study.size <- smoke.raw.odds$n
makeSmoke <- function(x) {
rbind(
data.frame(y = rep(1, x[2]), Study = x[1], Group = "Treated"), # Quit
data.frame(y = rep(0, x[3] - x[2]), Study = x[1], Group = "Treated"), # Remain
data.frame(y = rep(1, x[4]), Study = x[1], Group = "Control"), # Quit
data.frame(y = rep(0, x[5] - x[4]), Study = x[1], Group = "Control") # Remain
)
}
dat.smoke <- NULL
for (i in 1:nrow(smoking.small)) {
dat.smoke <- rbind(dat.smoke, makeSmoke(smoking.small[i, ]))
} # ignore warning about row names discarded
dat.smoke$y <- as.factor(dat.smoke$y)
levels(dat.smoke$y) <- c("Remain", "Quit")
str(dat.smoke)
# Distribution among control and treatment group
tmp <- apply(smoking[, -1], 2, mean)[c(2, 4)] # treatment vs control
(dist.mean <- tmp / sum(tmp) * 100)
# Raw odds ratio
(raw.odds.ratio <- exp(smoke.raw.odds$logodds[28]))
# Load Logistic RE model results
as.tibble(read.csv("data/smoking_re_results.csv")) %>%
mutate(n = study.size, method = "Logistic RE",
studynam = factor(studynam, levels = levels(smoke.raw.odds[[1]]))) %>%
rename(Study = studynam, logodds = eff) -> smoke.re.res
# Initial data exploration plot
as.tibble(dat.smoke) %>%
group_by(y, Group, Study) %>%
summarise(count = n()) %>%
ungroup() %>%
mutate(Group = factor(Group, levels = c("Control", "Treated")),
Group = recode(Group, Treated = "Treatment")) %>%
ggplot(aes(y, count, fill = y)) +
facet_grid(. ~ Group) +
geom_boxplot(notch = TRUE, outlier.alpha = 0) +
geom_jitter(width = 0.12, height = 0, pch = 21) +
scale_y_log10(breaks = c(1, 2, 5, 10, 25, 50, 100, 500)) +
labs(x = NULL, y = "Count") +
theme_bw() +
theme(legend.position = "none")
# Fit I-prior models
mod1 <- iprobit(y ~ Group, dat.smoke)
mod2 <- iprobit(y ~ Group + Study, dat.smoke,
control = list(theta0 = c(3.86e+03, 1.06e-03), alpha0 = -0.769))
mod3 <- iprobit(y ~ Group * Study, dat.smoke,
control = list(theta0 = c(0.014894, 0.000878), alpha0 = -0.767))
# Function to calculate log odds ratio
l1 <- logLik(mod1); e1 <- get_error_rate(mod1)
l2 <- logLik(mod2); e2 <- get_error_rate(mod2)
l3 <- logLik(mod3); e3 <- get_error_rate(mod3)
calc_odds <- function(n.samp = 100) {
studies <- levels(dat.smoke$Study)
as.tibble(dat.smoke) %>%
mutate(tmp = paste0(Study, Group)) -> tmp
unq.i <- match(unique(tmp$tmp), tmp$tmp)
quants <- predict(mod3, dat.smoke[unq.i, ], quantiles = TRUE, raw = TRUE,
n.samp = n.samp)[[1]][[2]] # prob quit
res <- quants[seq_len(length(studies) + 1), ]; res[] <- NA
for (i in seq_along(studies)) {
prob.treated <- quants[2 * i - 1, ]
prob.control <- quants[2 * i, ]
a <- prob.treated; b <- 1 - a # probs in treatment groups
c <- prob.control; d <- 1 - c # probs in control groups
res[i, ] <- log(a * d) - log(b * c)
}
unq.i <- match(unique(dat.smoke$Group), dat.smoke$Group)
quants <- predict(mod1, dat.smoke[unq.i, ], quantiles = TRUE, raw = TRUE,
n.samp = n.samp)[[1]][[2]]
prob.treated <- quants[1, ]
prob.control <- quants[2, ]
a <- prob.treated; b <- 1 - a
c <- prob.control; d <- 1 - c
res[nrow(res), ] <- log(a * d) - log(b * c)
tab <- data.frame(
Study = c(studies, "Summary measure"),
t(apply(res, 1, function(x) c(logodds = mean(x),
quantile(x, probs = c(0.05, 0.95))))),
n = study.size,
method = "I-probit"
)
names(tab)[3:4] <- c("lower", "upper")
tab
}
l <- c(l1, l2, l3)
err <- c(e1, e2, e3)
b <- c(get_brier_score(mod1), get_brier_score(mod2), get_brier_score(mod3))
smoke.ip.res <- calc_odds(100)
# Table to compare results
tab.compare <- as.data.frame(cbind(
c("$f_1$", "$f_1 + f_2$",
"$f_1 + f_2 + f_{12}$"),
paste0("$-", dec_plac(-l, 2), "$"),
dec_plac(b, 3),
c(1, 2, 2)
))
colnames(tab.compare) <- c("Model", "Lower bound", "Brier score",
"No. of RKHS\\newline scale param.")
tab.compare
# Forest plot
ggplot(rbind(smoke.raw.odds, smoke.re.res, smoke.ip.res), aes(Study, logodds)) +
geom_hline(yintercept = 0, linetype = "dotted", col = "grey50") +
geom_errorbar(aes(ymin = lower, ymax = upper, linetype = method, col = Study),
width = 0.4, size = 0.5, position = position_dodge(width = 0.55)) +
geom_point(aes(fill = Study, size = n, shape = method),
position = position_dodge(width = 0.55), col = "white", alpha = 0.75) +
scale_x_discrete(name = NULL, limits = rev(levels(smoke.re.res$Study))) +
scale_y_continuous(name = NULL,
breaks = c(-0.5, 0, 0.5, 1, 1.5)
) +
scale_size_continuous(range = c(1.8, 8)) +
coord_flip() +
theme_bw() +
scale_linetype_manual(name = NULL, values = c(1, 2, 0)) +
scale_shape_manual(name = NULL, values = c(21, 22, 23)) +
guides(col = FALSE, size = FALSE, fill = FALSE,
linetype = guide_legend(override.aes = list(col = "black",
fill = "black",
linetype = c(1, 2, 0)))) +
theme(legend.position = "top", legend.key.width = unit(2.5, "line"))
Multiclass classification
Multiclass classification of Deterding’s vowel data set. In each of the model runs, we checked whether the variational EM converged to different lower bounds, but it did not. So for the final model we do not require multiple restarts.
# Load data set
data("vowel.train", package = "ElemStatLearn")
data("vowel.test", package = "ElemStatLearn")
vowel.train$y <- as.factor(vowel.train$y); n.train <- nrow(vowel.train)
vowel.test$y <- as.factor(vowel.test$y); n.test <- nrow(vowel.test)
vowel.dat <- rbind(vowel.train, vowel.test)
# Canonical I-probit
mod.can <- iprobit(y ~ ., vowel.dat, train.samp = 1:n.train, one.lam = TRUE,
control = list(maxit = 10, restarts = TRUE))
iplot_par_lb(mod.can)
iplot_par_error(mod.can)
iplot_par_error(mod.can, type = "test")
mod.can <- iprobit(y ~ ., vowel.dat, train.samp = 1:n.train, one.lam = TRUE,
control = list(maxit = 1000))
# fBm-0.5 I-probit
mod.fbm <- iprobit(y ~ ., vowel.dat, train.samp = 1:n.train, one.lam = TRUE,
kernel = "fbm", control = list(maxit = 10, restarts = TRUE))
iplot_par_lb(mod.fbm)
iplot_par_error(mod.fbm)
iplot_par_error(mod.fbm, type = "test")
mod.fbm <- iprobit(y ~ ., vowel.dat, train.samp = 1:n.train, one.lam = TRUE,
kernel = "fbm", control = list(maxit = 1000))
# SE I-probit
mod.se <- iprobit(y ~ ., vowel.dat, train.samp = 1:n.train, one.lam = TRUE,
kernel = "se", control = list(maxit = 10, restarts = TRUE))
iplot_par_lb(mod.se)
iplot_par_error(mod.se)
iplot_par_error(mod.se, type = "test")
mod.se <- iprobit(y ~ ., vowel.dat, train.samp = 1:n.train, one.lam = TRUE,
kernel = "se", control = list(maxit = 1000))
# Confusion matrix
conf_mat <- function(mod) {
tibble(test = vowel.test$y, predicted = mod$test$y) %>%
group_by(test, predicted) %>%
tally() %>%
mutate(prop = n / 42) %>%
complete(predicted, fill = list(n = 0, prop = 0)) -> plot.df
levels(plot.df$test) <- levels(plot.df$predicted) <- c(
"heed", "hid", "head", "had", "hud", "hard", "hod", "hoard", "hood", "who'd", "heard"
)
ggplot(plot.df, aes(test, predicted)) +
geom_tile(aes(fill = n)) +
geom_text(aes(label = ifelse(n > 0, n, NA)), na.rm = TRUE) +
scale_y_discrete(limits = rev(levels(plot.df$test))) +
scale_x_discrete(position = "top") +
scale_fill_continuous(low = "white", high = "slategray", limits = c(0, 42)) +
labs(x = "Test data", y = "Predicted classes") +
theme_bw() +
theme(legend.position = "none", plot.title = element_text(hjust = 0))
}
conf_mat(mod.can)
conf_mat(mod.fbm)
conf_mat(mod.se)
# Gaussian process classification using kernlab
# mod.gpc1 <- gausspr(y ~ ., vowel.train, kernel = "vanilladot") # DOES NOT WORK
mod.gpc2 <- gausspr(y ~ ., vowel.train, kernel = "rbfdot")
y.hat <- predict(mod.gpc2, vowel.test)
RMSE.train <- sum(fitted(mod.gpc2) != vowel.train$y) / length(vowel.train$y) * 100
RMSE.test <- sum(y.hat != vowel.test$y) / length(vowel.test$y) * 100
Spatio-temporal modelling
Spatio-temporal modelling of bovine tuberculosis (BTB) in Cornwall. Requires BTBppp.RData
and BTB_spoligotype_data.txt
. Models obviously take a long time to fit, but so do the plotting functions.
# Load data set
load("data/BTBppp.RData")
plot.df <- as.data.frame(pppdata$window)
W <- pppdata$window
simpW <- simplify.owin(W, 100) # This reduces resolution of plot
plot.df <- as.data.frame(W)
# Get data points (location, spoligotypes and year)
btb <- read.table("data/BTB_spoligotype_data.txt", header = FALSE, sep = " ",
skip = 1)
colnames(btb) <- c("x", "y", "year", "sp")
btb$sp <- factor(btb$sp)
# Keep the four largest classes
btb %>%
group_by(sp) %>%
tally() %>%
arrange(desc(n)) -> btb.n
(classes.to.keep <- as.character(btb.n$sp[1:4]))
levels(btb$sp) <- c("Sp9", "Others", "Others", "Sp12" , "Sp15",
"Others", "Sp20", "Others", "Others", "Others")
btb$sp <- factor(btb$sp, levels = c("Sp9", "Sp12", "Sp15", "Sp20", "Others"))
# A similar table to Diggle et al. (2005)
btb.summary <- btb %>%
group_by(year, sp) %>%
arrange(year) %>%
tally() %>%
complete(sp, year, fill = list(n = 0)) %>%
reshape2::dcast(year ~ sp)
# Group the years into 4 categories: 1) < 1997; 2) 1997-1998; 3) 1999-2000; 4)
# 2001-02
btb_year_fn <- function(x) {
res <- x
res[x < 1997] <- "< 1997"
res[x == 1997 | x == 1998] <- "1997-1998"
res[x == 1999 | x == 2000] <- "1999-2000"
res[x == 2001 | x == 2002] <- "2001-2002"
res
}
as.tibble(btb) %>%
mutate(period = factor(btb_year_fn(year))) -> btb
# Bar plot
as.tibble(btb.summary) %>%
reshape2::melt(id.vars = "year") %>%
ggplot(aes(year, value, col = variable)) +
geom_segment(aes(xend = year, yend = 0), size = 12) +
scale_x_continuous(breaks = seq(1989, 2002, by = 1),
minor_breaks = seq(1989, 2002, by = 1)) +
labs(x = "Year", y = "Count") +
scale_color_discrete(name = "Spoligotype") +
theme_bw() +
guides(col = guide_legend(override.aes = list(size = 6)))
# Plot of Cornwall
ggplot() +
geom_polygon(data = plot.df, aes(x, y, group = id), fill = NA, col = "grey25") +
geom_point(data = btb, aes(x, y, col = sp)) +
labs(x = "Eastings (1,000 km)", y = "Northings (1,000 km)",
col = "Spoligotype") +
scale_x_continuous(labels = function(x) x / 1000) +
scale_y_continuous(labels = function(x) x / 1000) +
theme_bw() +
theme(legend.position = c(0.98, 0.005), legend.justification = c(1, 0))
# Prepare to fit I-probit models
Spoligotype <- btb$sp
X <- scale(btb[, 1:2])
mu.x <- attr(X, "scaled:center")
sd.x <- attr(X, "scaled:scale")
year <- scale(btb$year, scale = FALSE)
mu.year <- attr(year, "scaled:center")
period <- btb$period
# Constant only model
mod0 <- iprobit(y = Spoligotype, X, control = list(int.only = TRUE, theta0 = -100))
# Spatial model
mod1 <- iprobit(y = Spoligotype, X, kernel = "fbm",
control = list(maxit = 200, restarts = TRUE))
# Spatio-period model (period is categorical)
mod2 <- iprobit(y = Spoligotype, X, period, kernel = "fbm",
interactions = "1:2", control = list(maxit = 200, restarts = TRUE))
# Spatio-temporal model (year is continuous)
mod3 <- iprobit(y = Spoligotype, X, year, kernel = "fbm",
interactions = "1:2", control = list(maxit = 200, restarts = TRUE))
# Tabulate results
rest_fn <- function(mod) {
res <- iprior::dec_plac(summary(mod)$tab[, c(1, 2)], 3)
mod.name <- deparse(substitute(mod))
if (mod.name == "mod0")
res <- rbind(res[-6, ], NA, NA)
if (mod.name == "mod1")
res <- rbind(res, NA)
pij <- fitted(mod)$p
suppressWarnings(y <- iprior::get_y(mod$ipriorKernel))
loglik <- sum(log(pij[cbind(seq_along(y), y)])) # matrix indexing
rbind(
res,
# loglik = c(iprior::dec_plac(logLik(mod), 2), NA),
loglik = c(iprior::dec_plac(loglik, 2), NA),
error = c(iprior::dec_plac(get_error_rate(mod), 2), NA),
brier = c(iprior::dec_plac(get_brier_score(mod), 3), NA)
)
}
tab.btb <- cbind(rest_fn(mod0), " " = "",
rest_fn(mod1), " " = "",
rest_fn(mod3), " " = "",
rest_fn(mod2))
rownames(tab.btb) <- c(
paste0("Intercept (", levels(btb$sp), ")"),
"Scale (spatial)", "Scale (temporal)",
"Log-likelihood", "Error rate (%)", "Brier score"
)
tab.btb
# Preparation for plotting the predicted probability surfaces. First
# obtain points inside the polygon (takes a while)
rescalee <- function(x) {
res <- x * rep(sd.x, each = nrow(x)) + rep(mu.x, each = nrow(x))
colnames(res) <- c("x", "y")
res
}
X.var <- 1:2
maxmin <- cbind(apply(X, 2, min), apply(X, 2, max))
xx <- list(NULL)
for (j in 1:2) {
mm <- maxmin[X.var[j], ]
xx[[j]] <- seq(from = mm[1] - 1, to = mm[2] + 1, length.out = 300) # change this for resolution
}
mm <- maxmin[X.var, ]
x.df.full <- expand.grid(xx[[1]], xx[[2]])
tmp <- x.df.full * rep(sd.x, each = nrow(x.df.full)) +
rep(mu.x, each = nrow(x.df.full))
isin <- inside.owin(tmp[, 1], tmp[, 2], W)
x.df <- x.df.full[isin, ]
# Calculate fitted probabilities (also takes a while!)
fill.col <- iprior::gg_col_hue(5)
N <- nrow(x.df)
a <- predict(mod1, list(x.df)) # spatial-model
period.df <- data.frame(
factor(rep(levels(btb$period)[1], N), levels = levels(btb$period)),
factor(rep(levels(btb$period)[2], N), levels = levels(btb$period)),
factor(rep(levels(btb$period)[3], N), levels = levels(btb$period)),
factor(rep(levels(btb$period)[4], N), levels = levels(btb$period))
)
a1 <- predict(mod2, list(x.df, period.df[, 1]))
a2 <- predict(mod2, list(x.df, period.df[, 2]))
a3 <- predict(mod2, list(x.df, period.df[, 3]))
a4 <- predict(mod2, list(x.df, period.df[, 4]))
b <- list(NULL)
unq.year <- sort(as.numeric(unique(year)))
for (i in seq_along(unique(year))) {
b[[i]] <- predict(mod3, list(x.df, matrix(unq.year[i], nrow = N, ncol = 1)))$prob
print(i)
}
# Function to plot the SPATIAL MODEL
plot_spatial_model <- function(m = 1, method = "bottom.pieces", points = FALSE,
contour.labels = FALSE) {
current.label <- paste0("Spoligotype ", gsub("Sp", "", levels(btb$sp)[m]))
contour.df <- cbind(rescalee(x.df), prob = a$prob[, m])
ggplot(data = contour.df, aes(x, y)) +
geom_raster(aes(fill = prob)) -> v
v <- v + geom_contour(aes(z = prob), col = "grey20", size = 0.3) +
geom_polygon(data = plot.df, aes(x, y, group = id), fill = NA,
col = "grey25") +
labs(x = "Eastings (1,000 km)", y = "Northings (1,000 km)") +
scale_x_continuous(labels = function(x) x / 1000) +
scale_y_continuous(labels = function(x) x / 1000) +
scale_fill_continuous(name = NULL, low = "white", high = fill.col[m],
limits = c(0, 1), breaks = seq(0, 1, by = 0.2)) +
theme_bw() +
annotate("text", x = min(plot.df$x), y = max(plot.df$y),
label = current.label, vjust = 1, hjust = 0) +
theme(panel.grid = element_blank())
if (isTRUE(points)) {
v <- v + geom_point(data = subset(btb, sp == levels(btb$sp)[m]), aes(x, y),
col = "grey30", shape = 21, fill = fill.col[m], size = 1)
}
if (isTRUE(contour.labels)) {
v <- v + directlabels::geom_dl(aes(z = prob, label = ..level..),
stat = "contour", colour = "grey20",
method = list("far.from.others.borders",
"calc.boxes", "draw.rects",
cex = 0.65)) +
guides(fill = FALSE)
} else {
v <- v + guides(fill = guide_colorbar(barwidth = 0.45, barheight = 16))
}
v
}
p1 <- plot_spatial_model(1, points = TRUE, contour.labels = TRUE)
p2 <- plot_spatial_model(2, points = TRUE, contour.labels = TRUE)
p3 <- plot_spatial_model(3, points = TRUE, contour.labels = TRUE)
p4 <- plot_spatial_model(4, points = TRUE, contour.labels = TRUE)
cowplot::plot_grid(p1, p2, p3, p4, ncol = 2,
labels = c("(a)", "(b)", "(c)", "(d)"), label_size = 10,
label_fontface = "plain")
# Function to plot for SPATIO-TEMPORAL MODEL
mean.year <- attr(year, "scaled:center")
plot_stemporal_model <- function(year = 1, points = TRUE,
mod = c("period", "temporal")) {
mod <- match.arg(mod, c("period", "temporal"))
if (mod == "period") {
current.period <- levels(btb$period)[year]
current.label <- paste0("Year: ", current.period)
alphaa <- 0.95
if (year == 1)
contour.df <- cbind(rescalee(x.df), prob = a1$prob)
if (year == 2)
contour.df <- cbind(rescalee(x.df), prob = a2$prob)
if (year == 3)
contour.df <- cbind(rescalee(x.df), prob = a3$prob)
if (year == 4)
contour.df <- cbind(rescalee(x.df), prob = a4$prob)
} else if (mod == "temporal") {
current.period <- as.integer(mean.year + unq.year[year])
current.label <- paste0("Year: ", current.period)
alphaa <- 0.95
contour.df <- cbind(rescalee(x.df), prob = b[[year]])
}
# Add first layer ------------------------------------------------------------
p <- ggplot(contour.df, aes(x, y)) +
geom_raster(aes(alpha = alphaa * contour.df[, 2 + 1]), fill = fill.col[1]) +
scale_alpha_continuous(range = c(0, alphaa))
# Add subsequent layers ------------------------------------------------------
for (j in 2:4) {
p <- p +
annotate(geom = "raster", x = contour.df$x, y = contour.df$y,
alpha = alphaa * contour.df[, 2 + j], fill = fill.col[j])
}
# Add decision boundaries ----------------------------------------------------
tmp.df <- reshape2::melt(contour.df, id.vars = c("x", "y"))
p <- p +
geom_contour(data = tmp.df, aes(x, y, z = value, col = variable), size = 1,
linetype = "dashed", binwidth = 0.1,
breaks = seq(0.5, 0.5, by = 0.1)) +
scale_colour_manual(values = iprior::gg_colour_hue(5)[1:4])
# Add points -----------------------------------------------------------------
if (isTRUE(points)) {
if (mod == "period") {
as.tibble(btb) %>%
subset(sp != "Others" & period == current.period) -> points.df
} else if (mod == "temporal") {
as.tibble(btb) %>%
subset(sp != "Others" & year == current.period) -> points.df
}
p <- p +
geom_point(data = points.df, aes(x, y, fill = sp), col = "grey15",
shape = 21, size = 2) +
scale_fill_manual(values = fill.col[1:4])
}
p +
geom_polygon(data = plot.df, aes(x, y, group = id), fill = NA,
col = "grey25") +
labs(x = "Eastings (1,000 km)", y = "Northings (1,000 km)") +
scale_x_continuous(labels = function(x) x / 1000) +
scale_y_continuous(labels = function(x) x / 1000) +
theme_bw() +
guides(colour = FALSE, alpha = FALSE,
fill = guide_legend(nrow = 2,
title = "Spoligotype",
direction = "horizontal",
title.position = "top",
override.aes = list(size = 2))) +
theme(legend.position = c(0.99, 0.01), legend.justification = c(1, 0),
legend.title = element_text(size = 8), legend.title.align = 0.5,
panel.grid = element_blank()) +
# legend.box.background = element_rect(fill = NA)) +
annotate("text", x = min(plot.df$x), y = max(plot.df$y),
label = current.label, vjust = 1, hjust = 0)
}
p1a <- plot_stemporal_model(1)
p2a <- plot_stemporal_model(2)
p3a <- plot_stemporal_model(3)
p4a <- plot_stemporal_model(4)
cowplot::plot_grid(p1a, p2a, p3a, p4a, ncol = 2,
labels = c("(a)", "(b)", "(c)", "(d)"), label_size = 10,
label_fontface = "plain")
# GIF
makeplot <- function() {
for (i in seq_along(unq.year)) {
p <- plot_stemporal_model(i, mod = "temporal")
print(p)
}
animation::ani.pause()
}
animation::saveGIF(makeplot(), interval = 1, ani.width = 600, ani.height = 550)
# Bootstrap standard errors (takes a long time). Uncomment which model to fit
B <- 30
snow.options.list <- list(progress = function(i) setTxtProgressBar(pb, i))
pb <- txtProgressBar(min = 0, max = B, style = 1)
cl <- parallel::makeCluster(parallel::detectCores())
doSNOW::registerDoSNOW(cl)
res <- foreach::`%dopar%`(
foreach::foreach(
i = seq_len(B),
.packages = c("iprior", "iprobit"),
.options.snow = snow.options.list
), {
ts <- sample(seq_len(length(Spoligotype)), replace = TRUE)
# # Constant only model
# iprobit(y = Spoligotype, X, control = list(int.only = TRUE,
# theta0 = -100))$param.full
# Spatial model
# iprobit(y = Spoligotype, X, kernel = "fbm",
# control = list(maxit = 200))$param.full
# # Spatio-period model (period is categorical)
# iprobit(y = Spoligotype, X, period, kernel = "fbm", interactions = "1:2",
# control = list(maxit = 200))$param.full
# # Spatio-temporal model (year is continuous)
iprobit(y = Spoligotype, X, year, kernel = "fbm", interactions = "1:2",
control = list(maxit = 200))$param.full
}
)
parallel::stopCluster(cl)
apply(simplify2array(res), 1:2, sd)
Chapter 6
Bayesian variable selection uses the ipriorBVS
package. It provides a wrapper function around JAGS which has the I-prior model built in. It has a convenient formula based input, but also takes matrices/vectors input.
I-prior BVS simulations
There are two parts to the simulation study. The first part is immediately below, which involves all four of the priors/method: I-prior, g-prior, independent prior and also Lasso. This takes a long time to run, but for brevity the number of simulations is cut down from 100 to 4, so shouldn’t take very long to test.
# The simulation function to run all four methods in two stages simultaneously
other_res <- function(form, dat, method = c("lasso", "gprior"), truth) {
method <- match.arg(method, c("lasso", "gprior"))
if (method == "lasso") {
X <- as.matrix(dat[, -1])
Y <- as.numeric(dat$y)
mod <- cv.glmnet(X, Y)
where.lambdamin <- which(mod$lambda == mod$lambda.min)
gamma <- as.numeric(mod$glmnet.fit$beta[, where.lambdamin] > 0)
brier <- NA
}
if (method == "gprior") {
p <- ncol(dat) - 1
# First stage --------------------------------------------------------------
mod <- bas.lm(as.formula(form), dat, prior = "g-prior", modelprior = uniform(),
alpha = p)
tmp <- summary(mod)
pips <- tmp[(1:p) + 1, 1]
x.keep <- (pips >= 0.5)
# Second stage -------------------------------------------------------------
mod <- bas.lm(as.formula(form), dat[, c(TRUE, x.keep)], prior = "g-prior",
modelprior = uniform(), alpha = p)
tmp <- summary(mod)
gamma <- pips <- x.keep
gamma[] <- pips[] <- 0
pips[x.keep] <- tmp[seq_len(sum(x.keep)) + 1, 1]
brier <- mean((pips - truth) ^ 2)
gamma[x.keep] <- tmp[seq_len(sum(x.keep)) + 1, 2]
}
gamma.diff <- gamma - truth
no.false.inclusions <- sum(gamma.diff > 0) # false inclusions
no.false.exclusions <- sum(gamma.diff < 0) # false exclusions
no.false.choices <- sum(gamma.diff != 0) # false choices
c(false.inc = no.false.inclusions, false.exc = no.false.exclusions,
false = no.false.choices, brier = brier)
}
runjags::runjags.options(silent.jags = TRUE, silent.runjags = TRUE, modules = "lecuyer")
bvs_sim <- function(snr = 0.90, n.sim = 100, save.intermediate = TRUE) {
bvs.mods <- c("iprior_sing", "flat_prior", "gprior", "lasso")
res <- matrix(NA, nrow = n.sim, ncol = 4)
colnames(res) <- c("false.inc", "false.exc", "false", "brier")
res <- list(res, res, res, res) # one for each model
names(res) <- bvs.mods
for (sim in seq_len(n.sim)) {
cat("\n")
cat("------ SIMULATION NUMBER", sim, paste0("(SNR: ", snr, ")"), "------\n")
cat("\n")
dat <- gen_bvs(n = 150, p = 100, snr = snr)
# Bayesian variable selection models ---------------------------------------
for (i in seq_along(bvs.mods)) { #seq_along(res)
cat(bvs.mods[i])
if (i < 3) {
mod <- ipriorBVS(y ~ ., dat, model = bvs.mods[i], n.samp = 10000,
n.burnin = 500, n.adapt = 125, two.stage = TRUE
, priors = "lambda ~ dunif(0, 100)"
)
tmp <- unlist(predict(mod, dat$truth))[1:4]
} else {
tmp <- other_res(y ~ ., dat$data, bvs.mods[i], dat$truth)
cat("\n")
}
res[[i]][sim, ] <- tmp
print(res[[i]][sim, ])
cat("\n")
}
if (isTRUE(save.intermediate)) save(res, file = paste0("data/bvs-", snr))
}
push.message <- paste0(
"BVS simulations (SNR: ", snr, ") COMPLETED."
)
pushoverr::pushover(message = push.message, user = userID, app = appToken)
res
}
# Use the function above to do the simulations. Note that n.sim has been
# reduced to 4 (originally 100) to reduce runtime if anyone wants to try it out
res.90 <- bvs_sim(snr = 0.90, n.sim = 4)
res.75 <- bvs_sim(snr = 0.75, n.sim = 4)
res.50 <- bvs_sim(snr = 0.50, n.sim = 4)
res.25 <- bvs_sim(snr = 0.25, n.sim = 4)
res.10 <- bvs_sim(snr = 0.10, n.sim = 4)
res <- list("0.90" = res.90, "0.75" = res.75, "0.50" = res.50, "0.25" = res.25, "0.10" = res.10)
# Helper functions to do the analysis
count_false_choices <- function(x) {
# Counts number of false choices
x.0to2 <- mean(x <= 2)
x.6 <- mean(x >= 6)
x.3to5 <- 1 - x.0to2 - x.6
ress <- c(x.0to2, x.3to5, x.6)
names(ress) <- c("0-2", "3-5", ">5")
ress
}
se_false_choices <- function(x) {
# Gets the S.E. for number of false choices
n <- length(x)
x.0to2 <- (sd(x <= 2) * (n - 1) / n) / sqrt(n)
x.6 <- (sd(x >= 6) * (n - 1) / n) / sqrt(n)
x.3to5 <- (x > 2) & (x < 6)
x.3to5 <- (sd(x.3to5) * (n - 1) / n) / sqrt(n)
ress <- c(x.0to2, x.3to5, x.6)
names(ress) <- c("0-2", "3-5", ">5")
ress
}
bvs_res <- function(x = res, type = mean, models = "all") {
# Get the results of BVS simulations. Choice of models are
# c("all", "iprior", "flat_prior", "gprior", "lasso")
# type can be either "mean", "count_false_choices" or "se_false_choices"
if (models == "all") {
res <- lapply(x, function(y) t(sapply(y, function(z) apply(z, 2, type))))
if (ncol(res[[1]]) > 3) {
res <- lapply(res, function(y) {
y <- y[, 7:9];
colnames(y) <- c("0-2", "3-5", ">5");
y
})
}
} else {
if (models == "iprior") tmp.res <- lapply(x, function(y) y[[1]])
if (models == "flat_prior") tmp.res <- lapply(x, function(y) y[[2]])
if (models == "gprior") tmp.res <- lapply(x, function(y) y[[3]])
if (models == "lasso") tmp.res <- lapply(x, function(y) y[[4]])
res <- t(sapply(tmp.res, function(x) apply(x, 2, type)))
if (ncol(res) > 3) {
res <- res[, 7:9]
colnames(res) <- c("0-2", "3-5", ">5")
}
}
res
}
# Run this to collate all results and plot
plot.iprior.df <- rbind(
data.frame(res[[1]][[1]], SNR = "90%"),
data.frame(res[[2]][[1]], SNR = "75%"),
data.frame(res[[3]][[1]], SNR = "50%"),
data.frame(res[[4]][[1]], SNR = "25%"),
data.frame(res[[5]][[1]], SNR = "10%")
)
plot.flat.df <- rbind(
data.frame(res[[1]][[2]], SNR = "90%"),
data.frame(res[[2]][[2]], SNR = "75%"),
data.frame(res[[3]][[2]], SNR = "50%"),
data.frame(res[[4]][[2]], SNR = "25%"),
data.frame(res[[5]][[2]], SNR = "10%")
)
plot.gprior.df <- rbind(
data.frame(res[[1]][[3]], SNR = "90%"),
data.frame(res[[2]][[3]], SNR = "75%"),
data.frame(res[[3]][[3]], SNR = "50%"),
data.frame(res[[4]][[3]], SNR = "25%"),
data.frame(res[[5]][[3]], SNR = "10%")
)
plot.lasso.df <- rbind(
data.frame(res[[1]][[4]], SNR = "90%"),
data.frame(res[[2]][[4]], SNR = "75%"),
data.frame(res[[3]][[4]], SNR = "50%"),
data.frame(res[[4]][[4]], SNR = "25%"),
data.frame(res[[5]][[4]], SNR = "10%")
)
ggplot(plot.iprior.df, aes(false, ..density.., col = SNR)) +
geom_freqpoly(binwidth = 1, size = 0.8) +
scale_y_continuous(limits = c(0, 0.85)) +
scale_x_continuous(limits = c(0, 90)) +
ggtitle("I-prior") +
labs(x = "Number of false choices", y = "Proportion") -> p1
ggplot(plot.flat.df, aes(false, ..density.., col = SNR)) +
geom_freqpoly(binwidth = 1, size = 0.8) +
scale_y_continuous(limits = c(0, 0.85)) +
scale_x_continuous(limits = c(0, 90)) +
ggtitle("Independent prior") +
labs(x = "Number of false choices", y = "Proportion") -> p2
ggplot(plot.gprior.df, aes(false, ..density.., col = SNR)) +
geom_freqpoly(binwidth = 1, size = 0.8) +
scale_y_continuous(limits = c(0, 0.85)) +
scale_x_continuous(limits = c(0, 90)) +
ggtitle("g-prior") +
labs(x = "Number of false choices", y = "Proportion") -> p3
ggplot(plot.lasso.df, aes(false, ..density.., col = SNR)) +
geom_freqpoly(binwidth = 1, size = 0.8) +
scale_y_continuous(limits = c(0, 0.85)) +
scale_x_continuous(limits = c(0, 90)) +
ggtitle("Lasso") +
labs(x = "Number of false choices", y = "Proportion") -> p4
cowplot::plot_grid(p1, p2, p3, p4, ncol = 1)
Sensitivity analysis
The second part involves only I-priors. This experiment is to see how well I-prior behaves under different SNR. Again, number of simulations have been reduced to 4 (originally 100) so as to provide a proof of concept.
# Load the simulation functions
bvs_sim <- function(snr = 0.90, gamma = snr, n.sim = 100,
save.intermediate = TRUE) {
res1 <- res2 <- matrix(NA, nrow = n.sim, ncol = 4)
colnames(res1) <- colnames(res2) <- c("false.inc", "false.exc", "false", "brier")
for (sim in seq_len(n.sim)) {
cat("\n")
cat("------ SIMULATION NUMBER", sim, paste0("(SNR: ", snr, ")"), "------")
cat("\n")
dat <- gen_bvs(n = 150, p = 100, snr = snr)
# BVS I-prior (first stage) ------------------------------------------------
runjags::runjags.options(silent.jags = TRUE, silent.runjags = TRUE, modules = "lecuyer")
mod <- ipriorBVS(y ~ ., dat, n.samp = 10000, n.burnin = 500,
n.adapt = 125, gamma.prob = gamma,
priors = "lambda ~ dunif(0, 100)", two.stage = FALSE)
tmp <- unlist(predict(mod, dat$truth))[1:4]
res1[sim, ] <- tmp
cat("FIRST STAGE\n")
print(res1[sim, ])
cat("\n")
# BVS I-prior (second stage) -----------------------------------------------
runjags::runjags.options(silent.jags = TRUE, silent.runjags = TRUE, modules = "lecuyer")
mod <- ipriorBVS(y ~ ., dat, n.samp = 10000, n.burnin = 500,
n.adapt = 125, gamma.prob = gamma * get_mpm(mod),
priors = "lambda ~ dunif(0, 100)", two.stage = FALSE)
tmp <- unlist(predict(mod, dat$truth))[1:4]
res2[sim, ] <- tmp
cat("SECOND STAGE\n")
print(res2[sim, ])
cat("\n")
if (isTRUE(save.intermediate)) {
save(res1, file = paste0("data/bvs-stage1-", snr, "-gam", gamma))
save(res2, file = paste0("data/bvs-stage2-", snr, "-gam", gamma))
}
}
push.message <- paste0(
"BVS simulations (SNR: ", snr, ", gamma:", gamma, ") COMPLETED."
)
pushoverr::pushover(message = push.message, user = userID, app = appToken)
list(stage1 = res1, stage2 = res2, snr = snr, gamma = gamma)
}
# This code runs the simulations for SNR = 0.90 under all gamma choices
res.90 <- bvs_sim(snr = 0.90, gamma = 0.90, n.sim = 4)
res.75 <- bvs_sim(snr = 0.90, gamma = 0.75, n.sim = 4)
res.50 <- bvs_sim(snr = 0.90, gamma = 0.50, n.sim = 4)
res.25 <- bvs_sim(snr = 0.90, gamma = 0.25, n.sim = 4)
res.10 <- bvs_sim(snr = 0.90, gamma = 0.10, n.sim = 4)
res.snr90 <- list("0.90" = res.90[1:2],
"0.75" = res.75[1:2],
"0.50" = res.50[1:2],
"0.25" = res.25[1:2],
"0.10" = res.10[1:2])
# This code runs the simulations for SNR = 0.75 under all gamma choices
res.90 <- bvs_sim(snr = 0.75, gamma = 0.90, n.sim = 4)
res.75 <- bvs_sim(snr = 0.75, gamma = 0.75, n.sim = 4)
res.50 <- bvs_sim(snr = 0.75, gamma = 0.50, n.sim = 4)
res.25 <- bvs_sim(snr = 0.75, gamma = 0.25, n.sim = 4)
res.10 <- bvs_sim(snr = 0.75, gamma = 0.10, n.sim = 4)
res.snr75 <- list("0.90" = res.90[1:2],
"0.75" = res.75[1:2],
"0.50" = res.50[1:2],
"0.25" = res.25[1:2],
"0.10" = res.10[1:2])
# This code runs the simulations for SNR = 0.50 under all gamma choices
res.90 <- bvs_sim(snr = 0.50, gamma = 0.90, n.sim = 4)
res.75 <- bvs_sim(snr = 0.50, gamma = 0.75, n.sim = 4)
res.50 <- bvs_sim(snr = 0.50, gamma = 0.50, n.sim = 4)
res.25 <- bvs_sim(snr = 0.50, gamma = 0.25, n.sim = 4)
res.10 <- bvs_sim(snr = 0.50, gamma = 0.10, n.sim = 4)
res.snr50 <- list("0.90" = res.90[1:2],
"0.75" = res.75[1:2],
"0.50" = res.50[1:2],
"0.25" = res.25[1:2],
"0.10" = res.10[1:2])
# This code runs the simulations for SNR = 0.25 under all gamma choices
res.90 <- bvs_sim(snr = 0.25, gamma = 0.90, n.sim = 4)
res.75 <- bvs_sim(snr = 0.25, gamma = 0.75, n.sim = 4)
res.50 <- bvs_sim(snr = 0.25, gamma = 0.50, n.sim = 4)
res.25 <- bvs_sim(snr = 0.25, gamma = 0.25, n.sim = 4)
res.10 <- bvs_sim(snr = 0.25, gamma = 0.10, n.sim = 4)
res.snr25 <- list("0.90" = res.90[1:2],
"0.75" = res.75[1:2],
"0.50" = res.50[1:2],
"0.25" = res.25[1:2],
"0.10" = res.10[1:2])
# This code runs the simulations for SNR = 0.10 under all gamma choices
res.90 <- bvs_sim(snr = 0.10, gamma = 0.90, n.sim = 4)
res.75 <- bvs_sim(snr = 0.10, gamma = 0.75, n.sim = 4)
res.50 <- bvs_sim(snr = 0.10, gamma = 0.50, n.sim = 4)
res.25 <- bvs_sim(snr = 0.10, gamma = 0.25, n.sim = 4)
res.10 <- bvs_sim(snr = 0.10, gamma = 0.10, n.sim = 4)
res.snr10 <- list("0.90" = res.90[1:2],
"0.75" = res.75[1:2],
"0.50" = res.50[1:2],
"0.25" = res.25[1:2],
"0.10" = res.10[1:2])
# Function to collate and plot the results
create_bvs_plot_df <- function(res, stage = 1) {
rbind(
data.frame(res[[1]][[stage]], gamma = "0.90"),
data.frame(res[[2]][[stage]], gamma = "0.75"),
data.frame(res[[3]][[stage]], gamma = "0.50"),
data.frame(res[[4]][[stage]], gamma = "0.25"),
data.frame(res[[5]][[stage]], gamma = "0.10")
)
}
plot_res2 <- function(stage = 2, type = "false") {
tmp <- function(res) {
res <- matrix(rapply(res, function(x) apply(x, 2, mean, na.rm = TRUE)),
ncol = 4, byrow = TRUE)
if (stage == 1) res <- res[c(1, 3, 5, 7, 9), ]
if (stage == 2) res <- res[c(2, 4, 6, 8, 10), ]
data.frame(res, gamma = c(0.90, 0.75, 0.50, 0.25, 0.10))
}
df <- data.frame(rbind(
cbind(tmp(res.snr90), SNR = "90%"),
cbind(tmp(res.snr75), SNR = "75%"),
cbind(tmp(res.snr50), SNR = "50%"),
cbind(tmp(res.snr25), SNR = "25%"),
cbind(tmp(res.snr10), SNR = "10%")
))
df <- df[, c(2, 1, 3, 4, 5, 6)]
colnames(df)[1:4] <- c("false.exc", "false.inc", "false", "brier")
df$SNR <- factor(df$SNR, levels = c("90%", "75%", "50%", "25%", "10%"))
df <- reshape2::melt(df, id = c("gamma", "SNR"))
levels(df$variable)[1:2] <- c("False exclusion", "False inclusion")
ggplot(subset(df, df$var == "False inclusion" | df$var == "False exclusion"),
aes(x = gamma, group = SNR)) +
geom_line(aes(y = value, col = SNR)) +
facet_grid(. ~ variable) +
labs(y = "Count", x = expression(paste("Hyperprior setting for ", pi[j]))) +
theme_bw()
}
plot_res2()
Aerobic data set
Analysis of the Aerobic data set.
# Load data from ipriorBVS package
data(aerobic, package = "ipriorBVS")
colnames(aerobic)[-1] <- paste0("X", 1:6)
# Fit one-stage model
(mod1 <- ipriorBVS(Oxygen ~ ., aerobic))
plot_coef2(mod1)
# Fit second-stage model
(mod2 <- ipriorBVS(Oxygen ~ ., aerobic, two.stage = TRUE))
Mortality and air pollution
Mortality and air pollution data set (McDonald & Schwing’s ridge analysis).
# Load data set from iprior package
data("pollution", package = "iprior")
pollution.stand <- as.data.frame(scale(pollution))
# Fit the I-prior BVS two stage model
mod <- ipriorBVS(Mortality ~ ., pollution.stand, stand.x = FALSE, stand.y = FALSE,
two.stage = TRUE)
coef(mod)
Ozone data set
Ozone data set (Breiman & Friedman and Casella & Moreno). Data set obtained from mlbench
package.
# Load data and clean up and create squared and interaction terms
data(Ozone, package = "mlbench")
colnames(Ozone) <- c("Month", "DayMonth", "DayWeek", "Ozone", "PresVand",
"WindLAX", "HumLAX", "TempSand", "TempElMon", "ibhLAX",
"PresGrad", "ibtLAX", "VisLAX")
Ozone <- Ozone[complete.cases(Ozone), ]
Ozone <- as.data.frame(lapply(Ozone, as.numeric))
y <- Ozone$Ozone; y <- scale(y)
X <- Ozone[, -4]; X <- scale(X)
X.sq <- X ^ 2
colnames(X.sq) <- paste0(colnames(X), "2")
X.int.names <- X.int <- NULL
for (i in seq_len(ncol(X))) {
for (j in seq_len(ncol(X))) if (j > i) {
X.int <- cbind(X.int, X[, j] * X[, i])
X.int.names <- c(X.int.names, paste0(colnames(X)[j], "_", colnames(X)[i]))
}
}
colnames(X.int) <- X.int.names
X2 <- cbind(X, X.sq, X.int)
n <- nrow(Ozone)
# This experiment uses 50 training points and the rest is used for testing
# (obtaining out-of-sample test prediction error rates)
n.sim <- 50
res1 <- res2 <- data.frame(matrix(NA, nrow = n.sim, ncol = 4))
runjags::runjags.options(silent.jags = TRUE, silent.runjags = TRUE)
for (i in seq_len(n.sim)) {
test.samp <- sample(1:n, size = 25)
train.samp <- (1:n)[-test.samp]
y.test <- y[test.samp]
y.train <- y[train.samp]
X.test <- X[test.samp, ]
X.train <- X[train.samp, ]
X2.test <- X2[test.samp, ]
X2.train <- X2[train.samp, ]
mod1 <- ipriorBVS(y.train, X.train, two.stage = TRUE)
mod2 <- ipriorBVS(y.train, X2.train, two.stage = TRUE)
hpm1 <- get_pmps(mod1)[1]
hpm2 <- get_pmps(mod2)[1]
pred1 <- get_predict(mod1, X.test, y.test)
pred2 <- get_predict(mod2, X2.test, y.test)
res1[i, ] <- c(names(hpm1), as.numeric(hpm1), get_R2(mod1), sqrt(pred1$mse))
res2[i, ] <- c(names(hpm2), as.numeric(hpm2), get_R2(mod2), sqrt(pred2$mse))
cat("SIMULATION ", i, "\n")
}
colnames(res1) <- colnames(res2) <- c("Model", "PostProb", "R2", "RMSE")
res1[, -1] <- as.data.frame(lapply(res1[, -1], as.numeric))
res2[, -1] <- as.data.frame(lapply(res2[, -1], as.numeric))
res1 %>%
group_by(Model) %>%
summarise(prob = mean(`Post. Prob`), R2 = mean(R2), RMSE = mean(RMSE),
prop = n() / 50) %>%
arrange(desc(prob)) -> res1
res2 %>%
group_by(Model) %>%
summarise(prob = mean(`Post. Prob`), R2 = mean(R2), RMSE = mean(RMSE),
prop = n() / 50) %>%
arrange(desc(prob)) -> res2
# HPM for mod1
colnames(X)[as.logical(as.numeric(strsplit(res1[[1]][[1]], "")[[1]]))]
# HPM for mod2
colnames(X2)[as.logical(as.numeric(strsplit(res2[[1]][[1]], "")[[1]]))]
California map
The code to produce the map of the locations in California of interest pertaining to the Ozone data set. The latitude and longitude of the places are given in ozone_map.txt
.
# Prepare data points using ggmap package
ozone.points <- read.table("data/ozone_map.txt", header = TRUE)
ozone.points <- cbind(ozone.points, labels = c(
"El Monte, CA",
"Sandberg, CA",
"Upland, CA",
"Vandenberg AFB",
"LAX airport",
"Dagget, CA"
))
ozone.box <- make_bbox(lat = ozone.points$lat, lon = ozone.points$lon, f = 0.2)
ozone.box["bottom"] <- ozone.box["bottom"] - 0.2
ozone.box["top"] <- ozone.box["top"] + 0.2
ozone.map <- get_map(location = ozone.box, maptype = "watercolor",
crop = TRUE, source = "stamen", zoom = 10)
# Plot
ggmap(ozone.map) +
geom_label_repel(data = ozone.points, aes(x = lon, y = lat, label = labels),
box.padding = 1, nudge_x = -0.05, col = "grey30",
segment.colour = "grey30") +
geom_point(data = ozone.points, mapping = aes(x = lon, y = lat),
fill = "grey30", colour = "grey30", size = 3,
shape = 21) +
theme_void() # ignore warning messages
Session information
My session information.
> devtools::session_info()
Session info --------------------------------------------------------------------------------
setting value
version R version 3.5.0 (2018-04-23)
system x86_64, darwin15.6.0
ui RStudio (1.1.447)
language (EN)
collate en_GB.UTF-8
tz Europe/London
date 2018-06-23
Packages ------------------------------------------------------------------------------------
package * version date source
abind 1.4-5 2016-07-21 CRAN (R 3.5.0)
ape * 5.1 2018-04-04 CRAN (R 3.5.0)
assertthat 0.2.0 2017-04-11 CRAN (R 3.5.0)
backports 1.1.2 2017-12-13 CRAN (R 3.5.0)
BAS * 1.5.0 2018-05-03 CRAN (R 3.5.0)
base * 3.5.0 2018-04-24 local
base64enc 0.1-3 2015-07-28 CRAN (R 3.5.0)
bindr 0.1.1 2018-03-13 CRAN (R 3.5.0)
bindrcpp 0.2.2 2018-03-29 CRAN (R 3.5.0)
bitops 1.0-6 2013-08-17 CRAN (R 3.5.0)
broom 0.4.4 2018-03-29 CRAN (R 3.5.0)
caret * 6.0-79 2018-03-29 CRAN (R 3.5.0)
cellranger 1.1.0 2016-07-27 CRAN (R 3.5.0)
class 7.3-14 2015-08-30 CRAN (R 3.5.0)
cli 1.0.0 2017-11-05 CRAN (R 3.5.0)
coda * 0.19-1 2016-12-08 CRAN (R 3.5.0)
codetools 0.2-15 2016-10-05 CRAN (R 3.5.0)
colorspace 1.3-2 2016-12-14 CRAN (R 3.5.0)
compiler 3.5.0 2018-04-24 local
corpcor 1.6.9 2017-04-01 CRAN (R 3.5.0)
cowplot * 0.9.2 2017-12-17 CRAN (R 3.5.0)
crayon 1.3.4 2017-09-16 CRAN (R 3.5.0)
cubature 1.3-11 2017-07-19 CRAN (R 3.5.0)
CVST 0.2-1 2013-12-10 CRAN (R 3.5.0)
data.table 1.11.0 2018-05-01 CRAN (R 3.5.0)
datasets * 3.5.0 2018-04-24 local
ddalpha 1.3.3 2018-04-30 CRAN (R 3.5.0)
deldir 0.1-15 2018-04-01 CRAN (R 3.5.0)
DEoptimR 1.0-8 2016-11-19 CRAN (R 3.5.0)
devtools * 1.13.5 2018-02-18 CRAN (R 3.5.0)
digest 0.6.15 2018-01-28 CRAN (R 3.5.0)
dimRed 0.1.0 2017-05-04 CRAN (R 3.5.0)
directlabels * 2017.03.31 2017-04-08 cran (@2017.03)
doParallel 1.0.11 2017-09-28 CRAN (R 3.5.0)
doSNOW * 1.0.16 2017-12-13 cran (@1.0.16)
dplyr * 0.7.5 2018-05-19 cran (@0.7.5)
DRR 0.0.3 2018-01-06 CRAN (R 3.5.0)
ElemStatLearn * 2015.6.26 2015-06-27 CRAN (R 3.5.0)
evaluate 0.10.1 2017-06-24 CRAN (R 3.5.0)
forcats * 0.3.0 2018-02-19 CRAN (R 3.5.0)
foreach * 1.4.4 2017-12-12 cran (@1.4.4)
foreign 0.8-70 2017-11-28 CRAN (R 3.5.0)
geometry 0.3-6 2015-09-09 CRAN (R 3.5.0)
GGally 1.3.2 2017-08-02 CRAN (R 3.5.0)
ggmap * 2.7.900 2018-06-08 Github (dkahle/ggmap@1c2fb46)
ggmcmc * 1.1 2016-06-28 CRAN (R 3.5.0)
ggplot2 * 2.2.1 2016-12-30 CRAN (R 3.5.0)
ggrepel * 0.7.0 2017-09-29 cran (@0.7.0)
glmnet * 2.0-16 2018-04-02 CRAN (R 3.5.0)
glue 1.2.0 2017-10-29 CRAN (R 3.5.0)
goftest 1.1-1 2017-04-03 CRAN (R 3.5.0)
gower 0.1.2 2017-02-23 CRAN (R 3.5.0)
graphics * 3.5.0 2018-04-24 local
grDevices * 3.5.0 2018-04-24 local
grid 3.5.0 2018-04-24 local
gridExtra 2.3 2017-09-09 CRAN (R 3.5.0)
gtable 0.2.0 2016-02-26 CRAN (R 3.5.0)
haven 1.1.1 2018-01-18 CRAN (R 3.5.0)
highr 0.6 2016-05-09 CRAN (R 3.5.0)
hms 0.4.2 2018-03-10 CRAN (R 3.5.0)
htmltools 0.3.6 2017-04-28 CRAN (R 3.5.0)
htmlwidgets 1.2 2018-04-19 CRAN (R 3.5.0)
httr 1.3.1 2017-08-20 CRAN (R 3.5.0)
inline 0.3.14 2015-04-13 CRAN (R 3.5.0)
ipred 0.9-6 2017-03-01 CRAN (R 3.5.0)
iprior * 0.7.1.9002 2018-05-19 Github (haziqj/iprior@8992d08)
ipriorBVS * 0.1.1 2018-06-06 local
iprobit * 0.1.0.9004 2018-05-22 local
iterators * 1.0.9 2017-12-12 cran (@1.0.9)
jmcm * 0.1.8.0 2017-11-25 CRAN (R 3.5.0)
jpeg 0.1-8 2014-01-23 CRAN (R 3.5.0)
jsonlite 1.5 2017-06-01 CRAN (R 3.5.0)
kableExtra * 0.8.0 2018-04-05 CRAN (R 3.5.0)
kernlab * 0.9-26 2018-04-30 CRAN (R 3.5.0)
knitr * 1.20 2018-02-20 CRAN (R 3.5.0)
labeling 0.3 2014-08-23 CRAN (R 3.5.0)
lattice * 0.20-35 2017-03-25 CRAN (R 3.5.0)
lava 1.6.1 2018-03-28 CRAN (R 3.5.0)
lazyeval 0.2.1 2017-10-29 CRAN (R 3.5.0)
lme4 * 1.1-17 2018-04-03 CRAN (R 3.5.0)
lmerTest * 3.0-1 2018-04-23 CRAN (R 3.5.0)
lubridate 1.7.4 2018-04-11 CRAN (R 3.5.0)
magic 1.5-8 2018-01-26 CRAN (R 3.5.0)
magrittr 1.5 2014-11-22 CRAN (R 3.5.0)
MASS * 7.3-50 2018-04-30 CRAN (R 3.5.0)
Matrix * 1.2-14 2018-04-13 CRAN (R 3.5.0)
MCMCglmm * 2.25 2017-10-05 CRAN (R 3.5.0)
memisc * 0.99.14.9 2017-12-20 CRAN (R 3.5.0)
memoise 1.1.0 2017-04-21 CRAN (R 3.5.0)
methods * 3.5.0 2018-04-24 local
mgcv 1.8-23 2018-01-21 CRAN (R 3.5.0)
minqa 1.2.4 2014-10-09 CRAN (R 3.5.0)
mnormt 1.5-5 2016-10-15 CRAN (R 3.5.0)
ModelMetrics 1.1.0 2016-08-26 CRAN (R 3.5.0)
modelr 0.1.1 2017-07-24 CRAN (R 3.5.0)
munsell 0.4.3 2016-02-13 CRAN (R 3.5.0)
mvtnorm * 1.0-7 2018-01-26 cran (@1.0-7)
nlme * 3.1-137 2018-04-07 CRAN (R 3.5.0)
nloptr 1.0.4 2017-08-22 CRAN (R 3.5.0)
nnet 7.3-12 2016-02-02 CRAN (R 3.5.0)
numDeriv 2016.8-1 2016-08-27 CRAN (R 3.5.0)
pacman * 0.4.6 2017-05-14 CRAN (R 3.5.0)
parallel * 3.5.0 2018-04-24 local
pillar 1.2.2 2018-04-26 CRAN (R 3.5.0)
pkgconfig 2.0.1 2017-03-21 CRAN (R 3.5.0)
plotly * 4.7.1 2017-07-29 CRAN (R 3.5.0)
plyr 1.8.4 2016-06-08 CRAN (R 3.5.0)
png 0.1-7 2013-12-03 CRAN (R 3.5.0)
polyclip 1.6-1 2017-03-22 CRAN (R 3.5.0)
prodlim 2018.04.18 2018-04-18 CRAN (R 3.5.0)
psych 1.8.3.3 2018-03-30 CRAN (R 3.5.0)
purrr * 0.2.4 2017-10-18 CRAN (R 3.5.0)
pushoverr 1.0.0 2016-11-23 CRAN (R 3.5.0)
quadprog 1.5-5 2013-04-17 cran (@1.5-5)
R2MLwiN * 0.8-5 2017-09-01 CRAN (R 3.5.0)
R6 2.2.2 2017-06-17 CRAN (R 3.5.0)
rbugs 0.5-9 2013-04-09 CRAN (R 3.5.0)
RColorBrewer 1.1-2 2014-12-07 CRAN (R 3.5.0)
Rcpp 0.12.17 2018-05-18 cran (@0.12.17)
RcppRoll 0.2.2 2015-04-05 CRAN (R 3.5.0)
readr * 1.1.1 2017-05-16 CRAN (R 3.5.0)
readxl 1.1.0 2018-04-20 CRAN (R 3.5.0)
recipes 0.1.2 2018-01-11 CRAN (R 3.5.0)
repr 0.13 2018-04-14 CRAN (R 3.5.0)
reshape 0.8.7 2017-08-06 CRAN (R 3.5.0)
reshape2 * 1.4.3 2017-12-11 CRAN (R 3.5.0)
RgoogleMaps 1.4.2 2018-06-08 CRAN (R 3.5.0)
rjags 4-6 2016-02-19 CRAN (R 3.5.0)
rjson 0.2.20 2018-06-08 CRAN (R 3.5.0)
rlang 0.2.1 2018-05-30 cran (@0.2.1)
rmarkdown 1.9 2018-03-01 CRAN (R 3.5.0)
robustbase 0.93-0 2018-04-24 CRAN (R 3.5.0)
rpart * 4.1-13 2018-02-23 CRAN (R 3.5.0)
RPEnsemble * 0.4 2017-10-07 CRAN (R 3.5.0)
rprojroot 1.3-2 2018-01-03 CRAN (R 3.5.0)
rstan * 2.17.3 2018-01-20 CRAN (R 3.5.0)
rstudioapi 0.7 2017-09-07 CRAN (R 3.5.0)
runjags * 2.0.4-2 2016-07-25 CRAN (R 3.5.0)
rvest 0.3.2 2016-06-17 CRAN (R 3.5.0)
scales 0.5.0 2017-08-24 CRAN (R 3.5.0)
sfsmisc 1.1-2 2018-03-05 CRAN (R 3.5.0)
snow * 0.4-2 2016-10-14 cran (@0.4-2)
spatstat * 1.55-1 2018-04-05 CRAN (R 3.5.0)
spatstat.data * 1.2-0 2017-11-20 CRAN (R 3.5.0)
spatstat.utils 1.8-0 2017-11-20 CRAN (R 3.5.0)
splines 3.5.0 2018-04-24 local
StanHeaders * 2.17.2 2018-01-20 CRAN (R 3.5.0)
stats * 3.5.0 2018-04-24 local
stats4 * 3.5.0 2018-04-24 local
stringi 1.2.2 2018-05-02 CRAN (R 3.5.0)
stringr * 1.3.0 2018-02-19 CRAN (R 3.5.0)
survival 2.42-3 2018-04-16 CRAN (R 3.5.0)
tensor 1.5 2012-05-05 CRAN (R 3.5.0)
tensorA 0.36 2010-12-01 CRAN (R 3.5.0)
texreg 1.36.23 2017-03-03 CRAN (R 3.5.0)
tibble * 1.4.2 2018-01-22 CRAN (R 3.5.0)
tidyr * 0.8.0 2018-01-29 CRAN (R 3.5.0)
tidyselect 0.2.4 2018-02-26 CRAN (R 3.5.0)
tidyverse * 1.2.1 2017-11-14 CRAN (R 3.5.0)
timeDate 3043.102 2018-02-21 CRAN (R 3.5.0)
tools 3.5.0 2018-04-24 local
utils * 3.5.0 2018-04-24 local
viridisLite 0.3.0 2018-02-01 CRAN (R 3.5.0)
withr 2.1.2 2018-03-15 CRAN (R 3.5.0)
xml2 1.2.0 2018-01-24 CRAN (R 3.5.0)
yaml 2.1.19 2018-05-01 CRAN (R 3.5.0)