rmvn <- function(n, mu = 0, V = matrix(1)) {
p <- length(mu)
if (any(is.na(match(dim(V), p))))
stop("Dimension problem!")
D <- chol(V)
t(matrix(rnorm(n * p), ncol = p) %*% D + rep(mu, rep(n, p)))
}
set.seed(1)
n <- 100
coords <- cbind(runif(n, 0, 1), runif(n, 0, 1))
x <- cbind(1, rnorm(n))
B <- as.matrix(c(1, 5))
sigma.sq <- 5
tau.sq <- 1
phi <- 3/0.5
D <- as.matrix(dist(coords))
R <- exp(-phi * D)
w <- rmvn(1, rep(0, n), sigma.sq * R)
y <- rnorm(n, x %*% B + w, sqrt(tau.sq))
library(spNNGP)
n.samples <- 500
starting <- list(phi = phi, sigma.sq = 5, tau.sq = 1)
tuning <- list(phi = 0.5, sigma.sq = 0.5, tau.sq = 0.5)
priors <- list(phi.Unif = c(3/1, 3/0.01), sigma.sq.IG = c(2, 5), tau.sq.IG = c(2,
1))
cov.model <- "exponential"
m.s <- spNNGP(y ~ x - 1, coords = coords, starting = starting, method = "latent",
n.neighbors = 5, tuning = tuning, priors = priors, cov.model = cov.model, n.samples = n.samples,
return.neighbor.info = TRUE, n.omp.threads = 2)
----------------------------------------
Building the neighbor list
----------------------------------------
----------------------------------------
Building the neighbors of neighbors list
----------------------------------------
----------------------------------------
Model description
----------------------------------------
NNGP Latent model fit with 100 observations.
Number of covariates 2 (including intercept if specified).
Using the exponential spatial correlation model.
Using 5 nearest neighbors.
Number of MCMC samples 500.
Priors and hyperpriors:
beta flat.
sigma.sq IG hyperpriors shape=2.00000 and scale=5.00000
tau.sq IG hyperpriors shape=2.00000 and scale=1.00000
phi Unif hyperpriors a=3.00000 and b=300.00000
Source compiled with OpenMP support and model fit using 2 thread(s).
----------------------------------------
Sampling
----------------------------------------
Sampled: 100 of 500, 20.00%
Report interval Metrop. Acceptance rate: 58.00%
Overall Metrop. Acceptance rate: 58.00%
-------------------------------------------------
Sampled: 200 of 500, 40.00%
Report interval Metrop. Acceptance rate: 53.00%
Overall Metrop. Acceptance rate: 55.50%
-------------------------------------------------
Sampled: 300 of 500, 60.00%
Report interval Metrop. Acceptance rate: 52.00%
Overall Metrop. Acceptance rate: 54.33%
-------------------------------------------------
Sampled: 400 of 500, 80.00%
Report interval Metrop. Acceptance rate: 42.00%
Overall Metrop. Acceptance rate: 51.25%
-------------------------------------------------
Sampled: 500 of 500, 100.00%
Report interval Metrop. Acceptance rate: 55.00%
Overall Metrop. Acceptance rate: 52.00%
-------------------------------------------------
round(summary(m.s$p.beta.samples)$quantiles[, c(3, 1, 5)], 2)
50% 2.5% 97.5%
x1 2.11 1.4 2.79
x2 4.87 4.5 5.23
round(summary(m.s$p.theta.samples)$quantiles[, c(3, 1, 5)], 2)
50% 2.5% 97.5%
sigma.sq 3.95 2.17 6.12
tau.sq 0.92 0.26 2.38
phi 11.77 4.45 25.91
plot(w, apply(m.s$p.w.samples, 1, median), xlab = "True w", ylab = "Posterior median w")
Take a look at the neighbor ordering (just for fun)
names(m.s$neighbor.info)
[1] "n.neighbors" "n.indx" "nn.indx" "nn.indx.lu"
[5] "u.indx" "u.indx.lu" "ui.indx" "ord"
[9] "nn.indx.run.time" "u.indx.run.time"
ord <- m.s$neighbor.info$ord
n.indx <- m.s$neighbor.info$n.indx
s <- 50
plot(m.s$coords[ord, ], xlab = "Easting", ylab = "Northing")
points(m.s$coords[ord, ][s, , drop = FALSE], cex = 2, col = "red")
points(m.s$coords[ord, ][n.indx[[s]], , drop = FALSE], pch = 19, col = "blue")
abline(v = m.s$coords[ord, ][s, 1], lty = 3)
# for(s in 2:n){ plot(m.s$coords[ord,], xlab='Easting', ylab='Northing')
# points(m.s$coords[ord,][s,,drop=FALSE], cex=2, col='red')
# points(m.s$coords[ord,][n.indx[[s]],,drop=FALSE], pch=19, col='blue')
# abline(v=m.s$coords[ord,][s,1], lty=3) readline(prompt = 'Pause. Press
# <Enter> to continue...') }
Here we consider forest canopy height (m) data measured using the NASA Goddard’s LiDAR Hyperspectral and Thermal (G-LiHT) Airborne Imager over a subset of Harvard Forest Simes Tract, MA, collected in Summer 2012. This is a sampling LiDAR system that only records strips of canopy height across the landscape. We would like to use the Harvard Forest data to assess if the current density of LiDAR measurements can be reduced, which would allow for wider strips to be collected. Ultimately, interest is in creating wall-to-wall maps of forest canopy height with associated uncertainty.
Let’s load the necessary packages and canopy height data which are
part of the spNNGP
package. Here too, we subset the data
and divide it into a model and testing set.
library(geoR)
library(raster)
library(leaflet)
load("CHM.rda")
CHM <- CHM[CHM[, 3] > 0, ]
set.seed(1)
mod <- sample(1:nrow(CHM), 25000)
ho <- sample((1:nrow(CHM))[-mod], 10000)
CHM.mod <- CHM[mod, ]
CHM.ho <- CHM[ho, ]
Let’s again start with a leaflet
basemap then overlay
the canopy height data. Recall, leaflet
maps expect data to
be in geographic coordinate system (i.e., longitude and latitude), so we
first need reproject the CHM data (just for visualization purposes,
we’ll fit the model using the projected coordinates).
chm.r <- rasterFromXYZ(CHM)
proj4string(chm.r) <- "+proj=utm +zone=18 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0"
chm.r.ll <- projectRaster(chm.r, crs = "+proj=longlat +datum=WGS84")
pal <- colorNumeric(rev(terrain.colors(50)), domain = values(chm.r.ll), na.color = "transparent")
base.map <- leaflet(width = "100%") %>%
addProviderTiles("Esri.WorldImagery", group = "Satellite") %>%
addProviderTiles("Esri.WorldShadedRelief", group = "Terrain")
base.map %>%
addRasterImage(chm.r.ll, colors = pal, opacity = 1, group = "Canopy height") %>%
addLegend("bottomright", pal = pal, values = values(chm.r.ll), opacity = 1, title = "<center>Canopy height (m)</center>") %>%
addLayersControl(baseGroup = c("Satellite", "Terrain"), overlayGroups = c("Canopy height"),
options = layersControlOptions(collapsed = FALSE))
Let’s try and fit a variogram to the data to get a sense of the
spatial structure. These variog
function calculates the
\(n\times n\) Euclidean distance matrix
to construct the empirical variogram. When \(n\) is large will you will likely run out
of memory, so you might need to consider only a subset of your data.
sub <- 1:10000
# note, max intersite distance is ~1.5km
v <- variog(coords = CHM.mod[sub, 1:2], data = CHM.mod[sub, 3], uvec = (seq(0, 500,
length = 30)))
variog: computing omnidirectional variogram
plot(v, xlab = "Distance (m)")
Now let’s fit some spatial regression models using NNGP random effects.
n.samples <- 1000
starting <- list(phi = 3/50, sigma.sq = 15, tau.sq = 2.5)
tuning <- list(phi = 0.05, sigma.sq = 0.01, tau.sq = 0.01)
priors <- list(phi.Unif = c(3/1000, 3/10), sigma.sq.IG = c(2, 10), tau.sq.IG = c(2,
5))
cov.model <- "exponential"
## Response model
m.r <- spNNGP(CHM.mod[, 3] ~ 1, coords = CHM.mod[, 1:2], starting = starting, method = "response",
n.neighbors = 10, tuning = tuning, priors = priors, cov.model = cov.model, n.samples = n.samples,
n.omp.threads = 2, n.report = 100)
----------------------------------------
Building the neighbor list
----------------------------------------
----------------------------------------
Model description
----------------------------------------
NNGP Response model fit with 25000 observations.
Number of covariates 1 (including intercept if specified).
Using the exponential spatial correlation model.
Using 10 nearest neighbors.
Number of MCMC samples 1000.
Priors and hyperpriors:
beta flat.
sigma.sq IG hyperpriors shape=2.00000 and scale=10.00000
tau.sq IG hyperpriors shape=2.00000 and scale=5.00000
phi Unif hyperpriors a=0.00300 and b=0.30000
Source compiled with OpenMP support and model fit using 2 thread(s).
----------------------------------------
Sampling
----------------------------------------
Sampled: 100 of 1000, 10.00%
Report interval Metrop. Acceptance rate: 39.00%
Overall Metrop. Acceptance rate: 39.00%
-------------------------------------------------
Sampled: 200 of 1000, 20.00%
Report interval Metrop. Acceptance rate: 40.00%
Overall Metrop. Acceptance rate: 39.50%
-------------------------------------------------
Sampled: 300 of 1000, 30.00%
Report interval Metrop. Acceptance rate: 35.00%
Overall Metrop. Acceptance rate: 38.00%
-------------------------------------------------
Sampled: 400 of 1000, 40.00%
Report interval Metrop. Acceptance rate: 37.00%
Overall Metrop. Acceptance rate: 37.75%
-------------------------------------------------
Sampled: 500 of 1000, 50.00%
Report interval Metrop. Acceptance rate: 47.00%
Overall Metrop. Acceptance rate: 39.60%
-------------------------------------------------
Sampled: 600 of 1000, 60.00%
Report interval Metrop. Acceptance rate: 41.00%
Overall Metrop. Acceptance rate: 39.83%
-------------------------------------------------
Sampled: 700 of 1000, 70.00%
Report interval Metrop. Acceptance rate: 46.00%
Overall Metrop. Acceptance rate: 40.71%
-------------------------------------------------
Sampled: 800 of 1000, 80.00%
Report interval Metrop. Acceptance rate: 45.00%
Overall Metrop. Acceptance rate: 41.25%
-------------------------------------------------
Sampled: 900 of 1000, 90.00%
Report interval Metrop. Acceptance rate: 45.00%
Overall Metrop. Acceptance rate: 41.67%
-------------------------------------------------
Sampled: 1000 of 1000, 100.00%
Report interval Metrop. Acceptance rate: 33.00%
Overall Metrop. Acceptance rate: 40.80%
-------------------------------------------------
round(summary(m.r$p.beta.samples)$quantiles[c(3, 1, 5)], 2)
50% 2.5% 97.5%
21.07 20.80 21.34
round(summary(m.r$p.theta.samples)$quantiles[, c(3, 1, 5)], 3)
50% 2.5% 97.5%
sigma.sq 15.422 14.390 16.377
tau.sq 2.250 1.992 2.445
phi 0.048 0.044 0.052
plot(m.r$p.beta.samples)
plot(m.r$p.theta.samples)
m.r$run.time
user system elapsed
197.858 0.027 98.959
Now prediction for the holdout set.
burn.in <- floor(0.5 * n.samples)
p.r <- predict(m.r, X.0 = as.matrix(rep(1, nrow(CHM.ho))), coords.0 = CHM.ho[, c("x",
"y")], sub.sample = list(start = burn.in, thin = 2), n.report = 5000, n.omp.threads = 2)
----------------------------------------
Prediction description
----------------------------------------
NNGP Response model fit with 25000 observations.
Number of covariates 1 (including intercept if specified).
Using the exponential spatial correlation model.
Using 10 nearest neighbors.
Number of MCMC samples 251.
Predicting at 10000 locations.
Source compiled with OpenMP support and model fit using 2 threads.
-------------------------------------------------
Predicting
-------------------------------------------------
Location: 5000 of 10000, 50.00%
Location: 10000 of 10000, 100.00%
y.hat.r <- apply(p.r$p.y.0, 1, mean)
plot(CHM.ho[, 3], y.hat.r, main = "Response NNGP model", xlab = "True canopy height",
ylab = "Posterior predictive distribution mean")