Skip to content

Commit

Permalink
Merge pull request #78 from NIEHS/main-sciome
Browse files Browse the repository at this point in the history
Parallel createUMultivariate and test fixes
  • Loading branch information
kyle-messier authored Dec 6, 2024
2 parents c3937f4 + b7e6887 commit 39c0ffc
Show file tree
Hide file tree
Showing 12 changed files with 136 additions and 267 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: PrestoGP
Type: Package
Title: Penalized Regression for Spatio-Temporal Outcomes via Gaussian Processes
Version: 0.2.0.9036
Version: 0.2.0.9037
Authors@R: c(
person(given = "Eric",
family = "Bair",
Expand All @@ -23,7 +23,7 @@ Description: Simultaneous variable seletion and estimation of LUR models with sp
Depends:
R (>= 3.5.0)
LinkingTo:
Rcpp, RcppArmadillo
Rcpp, RcppArmadillo, BH
Imports:
GPvecchia,
fields,
Expand Down
19 changes: 17 additions & 2 deletions R/PrestoGP_Model.R
Original file line number Diff line number Diff line change
Expand Up @@ -826,16 +826,31 @@ setMethod(
model <- specify(model)

if (is.null(beta.hat)) {
if (!quiet) {
cat("\n")
}
if (sum(!model@Y_obs) > 0 & min(lodv) < Inf) {
if (!quiet) {
cat("Imputing missing y's and estimating initial beta...", "\n")
}
cur.mi <- lod_reg_mi(model@Y_train, model@X_train, lodv, !model@Y_obs,
parallel = parallel, foldid = foldid)
parallel = parallel, foldid = foldid, verbose = verbose)
model@Y_train[!model@Y_obs] <- cur.mi$y.impute
beta.hat <- as.matrix(cur.mi$coef)
if (!quiet) {
cat("Estimation of initial beta complete", "\n")
}
} else {
if (!quiet) {
cat("Estimating initial beta...", "\n")
}
beta0.glmnet <- cv.glmnet(model@X_train, model@Y_train,
parallel = parallel, foldid = foldid)
beta.hat <- as.matrix(predict(beta0.glmnet,
type = "coefficients", s = beta0.glmnet$lambda.1se))
if (!quiet) {
cat("Estimation of initial beta complete", "\n")
}
}
}
Y.hat <- beta.hat[1, 1] + model@X_train %*% beta.hat[-1, ]
Expand Down Expand Up @@ -927,7 +942,7 @@ setMethod(
}
if (!quiet) {
cat("Iteration", iter, "complete", "\n")
cat("Current penalized likelihood:", model@error, "\n")
cat("Current penalized negative log likelihood:", model@error, "\n")
cat("Current MSE:", mean(model@res^2), "\n")
}
iter <- iter + 1
Expand Down
47 changes: 21 additions & 26 deletions R/PrestoGP_Multivariate_Vecchia.R
Original file line number Diff line number Diff line change
Expand Up @@ -435,38 +435,33 @@ setMethod("impute_y_lod", "MultivariateVecchiaModel", function(model, lod,
yhat.ni <- X %*% cur.coef[-1]
yhat.ni <- yhat.ni + mean(y[!miss]) - mean(yhat.ni[!miss])

coef.mat <- matrix(nrow = n.mi, ncol = (ncol(X) + 1))
if (parallel) {
coef.mat <- foreach(i = seq_len(n.mi), .combine = rbind) %dopar% {
yi <- rtmvn_snn(y - yhat.ni, rep(-Inf, length(y)),
rep(lod, length(y)) - yhat.ni, is.na(y), locs.nn, covmat = Sigma.hat)
yi <- yi + yhat.ni

tiid <- transform_miid(cbind(yi, X), vecchia.approx, params)
yt <- tiid[, 1]
Xt <- as.matrix(tiid[, -1])

cur.glmnet <- cv.glmnet(as.matrix(Xt), as.matrix(yt),
alpha = model@alpha, family = family, nfolds = nfolds,
foldid = foldid)
as.vector(coef(cur.glmnet, s = "lambda.min"))
yi <- foreach(i = seq_len(n.mi), .combine = cbind) %dopar% {
out <- rtmvn_snn(y - yhat.ni, rep(-Inf, length(y)),
rep(lod, length(y)) - yhat.ni, is.na(y), locs.nn,
covmat = Sigma.hat)
out + yhat.ni
}
} else {
coef.mat <- matrix(nrow = n.mi, ncol = (ncol(X) + 1))
yi <- matrix(nrow = length(yhat.ni), ncol = n.mi)
for (i in seq_len(n.mi)) {
yi <- rtmvn_snn(y - yhat.ni, rep(-Inf, length(y)),
rep(lod, length(y)) - yhat.ni, is.na(y), locs.nn, covmat = Sigma.hat)
yi <- yi + yhat.ni

tiid <- transform_miid(cbind(yi, X), vecchia.approx, params)
yt <- tiid[, 1]
Xt <- as.matrix(tiid[, -1])

cur.glmnet <- cv.glmnet(as.matrix(Xt), as.matrix(yt),
alpha = model@alpha, family = family, nfolds = nfolds,
foldid = foldid)
coef.mat[i, ] <- as.matrix(coef(cur.glmnet, s = "lambda.min"))
yi[, i] <- rtmvn_snn(y - yhat.ni, rep(-Inf, length(y)),
rep(lod, length(y)) - yhat.ni, is.na(y), locs.nn,
covmat = Sigma.hat)
yi[, i] <- yi[, i] + yhat.ni
}
}
tiid <- transform_miid(cbind(yi, X), vecchia.approx, params)
yt <- tiid[, seq_len(ncol(yi))]
Xt <- as.matrix(tiid[, -(seq_len(ncol(yi)))])

for (i in seq_len(n.mi)) {
cur.glmnet <- cv.glmnet(as.matrix(Xt), as.matrix(yt[, i]),
alpha = model@alpha, family = family, nfolds = nfolds,
foldid = foldid, parallel = parallel)
coef.mat[i, ] <- as.matrix(coef(cur.glmnet, s = "lambda.min"))
}
last.coef <- cur.coef
cur.coef <- colMeans(coef.mat)
if (verbose) {
Expand Down
7 changes: 6 additions & 1 deletion R/PrestoGP_Util_Functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ eliminate_dupes <- function(locs, locs.pred = NULL) {
}

lod_reg_mi <- function(y, X, lod, miss, n.mi = 10, eps = 0.01, maxit = 10,
parallel, foldid) {
parallel, foldid, verbose) {
lod <- lod[miss]
last.coef <- rep(Inf, ncol(X) + 1)
cur.glmnet <- cv.glmnet(X, y, parallel = parallel, foldid = foldid,
Expand All @@ -375,6 +375,11 @@ lod_reg_mi <- function(y, X, lod, miss, n.mi = 10, eps = 0.01, maxit = 10,
s = "lambda.min"))
}
cur.coef <- colMeans(coef.mat)
if (verbose) {
cat("LOD imputation iteration", itn, "complete", "\n")
cat("Current coefficients:", "\n")
print(cur.coef)
}
}
miss.means <- X[miss, ] %*% cur.coef[-1] + cur.coef[1]
obs.means <- X[!miss, ] %*% cur.coef[-1] + cur.coef[1]
Expand Down
50 changes: 22 additions & 28 deletions R/PrestoGP_Vecchia.R
Original file line number Diff line number Diff line change
Expand Up @@ -261,40 +261,34 @@ setMethod("impute_y_lod", "VecchiaModel", function(model, lod, n.mi = 10,
yhat.ni <- X %*% cur.coef[-1]
yhat.ni <- yhat.ni + mean(y[!miss]) - mean(yhat.ni[!miss])

coef.mat <- matrix(nrow = n.mi, ncol = (ncol(X) + 1))
if (parallel) {
coef.mat <- foreach(i = seq_len(n.mi), .combine = rbind) %dopar% {
yi <- rtmvn_snn(y - yhat.ni, rep(-Inf, length(y)),
rep(lod, length(y)) - yhat.ni, is.na(y), locs.nn, covmat = Sigma.hat)
yi <- yi + yhat.ni

tiid <- transform_iid(cbind(yi, X), vecchia.approx, params[1:3],
params[4])
yt <- tiid[, 1]
Xt <- as.matrix(tiid[, -1])

cur.glmnet <- cv.glmnet(as.matrix(Xt), as.matrix(yt),
alpha = model@alpha, family = family, nfolds = nfolds,
foldid = foldid)
as.vector(coef(cur.glmnet, s = "lambda.min"))
yi <- foreach(i = seq_len(n.mi), .combine = cbind) %dopar% {
out <- rtmvn_snn(y - yhat.ni, rep(-Inf, length(y)),
rep(lod, length(y)) - yhat.ni, is.na(y), locs.nn,
covmat = Sigma.hat)
out + yhat.ni
}
} else {
coef.mat <- matrix(nrow = n.mi, ncol = (ncol(X) + 1))
yi <- matrix(nrow = length(yhat.ni), ncol = n.mi)
for (i in seq_len(n.mi)) {
yi <- rtmvn_snn(y - yhat.ni, rep(-Inf, length(y)),
rep(lod, length(y)) - yhat.ni, is.na(y), locs.nn, covmat = Sigma.hat)
yi <- yi + yhat.ni

tiid <- transform_iid(cbind(yi, X), vecchia.approx, params[1:3],
params[4])
yt <- tiid[, 1]
Xt <- as.matrix(tiid[, -1])

cur.glmnet <- cv.glmnet(as.matrix(Xt), as.matrix(yt),
alpha = model@alpha, family = family, nfolds = nfolds,
foldid = foldid)
coef.mat[i, ] <- as.matrix(coef(cur.glmnet, s = "lambda.min"))
yi[, i] <- rtmvn_snn(y - yhat.ni, rep(-Inf, length(y)),
rep(lod, length(y)) - yhat.ni, is.na(y), locs.nn,
covmat = Sigma.hat)
yi[, i] <- yi[, i] + yhat.ni
}
}
tiid <- transform_iid(cbind(yi, X), vecchia.approx, params[1:3],
params[4])
yt <- tiid[, seq_len(ncol(yi))]
Xt <- as.matrix(tiid[, -(seq_len(ncol(yi)))])

for (i in seq_len(n.mi)) {
cur.glmnet <- cv.glmnet(as.matrix(Xt), as.matrix(yt[, i]),
alpha = model@alpha, family = family, nfolds = nfolds,
foldid = foldid, parallel = parallel)
coef.mat[i, ] <- as.matrix(coef(cur.glmnet, s = "lambda.min"))
}
last.coef <- cur.coef
cur.coef <- colMeans(coef.mat)
if (verbose) {
Expand Down
89 changes: 46 additions & 43 deletions src/createUMC.cpp
Original file line number Diff line number Diff line change
@@ -1,60 +1,47 @@
#define ARMA_64BIT_WORD 1
#ifdef _OPENMP
#include <omp.h>
#endif
#include <RcppArmadillo.h>
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::plugins(cpp17)]]
// [[Rcpp::plugins(openmp)]]
#include <cmath>
#include <boost/math/special_functions/bessel.hpp>
#include <boost/math/special_functions/gamma.hpp>

double matern1(const double &x, const double &smooth, const double &alpha) {
if (smooth == 0.5) {
return exp(-x * alpha);
} else if (smooth == 1.5) {
double normcon = (pow(alpha, 3) / sqrt(M_PI)) * (tgamma(2.0) / tgamma(1.5));
double normcon = (pow(alpha, 3) / sqrt(M_PI)) *
(boost::math::tgamma(2.0) / boost::math::tgamma(1.5));
return normcon * 0.5 * M_PI * pow(alpha, -3) * exp(-x * alpha) *
(1 + x * alpha);
} else if (smooth == 2.5) {
double normcon = (pow(alpha, 5) / sqrt(M_PI)) * (tgamma(3.0) / tgamma(2.5));
double normcon = (pow(alpha, 5) / sqrt(M_PI)) *
(boost::math::tgamma(3.0) / boost::math::tgamma(2.5));
double scaled_x = x * alpha;
return normcon * 0.125 * M_PI * pow(alpha, -5) * exp(-scaled_x) *
(3 + 3 * scaled_x + scaled_x * scaled_x);
} else if (x == 0.0) {
return 1.0;
} else {
double normcon = pow(2.0, 1.0 - smooth) / tgamma(smooth);
double normcon = pow(2.0, 1.0 - smooth) / boost::math::tgamma(smooth);
double scaled_x = x * alpha;
return normcon * pow(scaled_x, smooth) * R::bessel_k(scaled_x, smooth, 1);
return normcon * pow(scaled_x, smooth) *
boost::math::cyl_bessel_k(smooth, scaled_x);
}
}

// computes Euclidean distance between 2 vectors ([slightly slower] helper
// function for rdist in C++)
double dist1(const arma::rowvec &a, const arma::rowvec &b) {
double temp = 0.0;
// loop over each element and square the distance, add to the placeholder
for (arma::uword i = 0; i < a.n_elem; i++) {
temp += (a(i) - b(i)) * (a(i) - b(i));
}
return sqrt(temp);
return arma::norm(a - b, 2);
}

// [[Rcpp::export]]
arma::vec na_omit_c(arma::vec x) {
// make placeholder vector r
arma::vec r(x.size());
// make counter for number of non-NAs
int k = 0;
for (unsigned j = 0; j < x.size(); j++) {
// if not NA, add 1 to counter and set r(j) = x(j)
if (x(j) == x(j)) {
r(j) = x(j);
k++;
}
}
// resize r to non-NA size (removing NAs)
r.resize(k);
// subtract one because C++ indexes from 0 while inputs from R index from 1
// r -= 1.0;
return r;
}
arma::vec na_omit_c(arma::vec x) { return x(arma::find_finite(x)); }

// [[Rcpp::export]]
arma::sp_mat createU_helper_mat(const arma::mat &olocs, const arma::vec &ondx,
Expand All @@ -81,6 +68,9 @@ arma::sp_mat createU_helper_mat(const arma::mat &olocs, const arma::vec &ondx,
// feeder(2, arma::span(0,6)) = U_beginning.t();
int ind = 7;
// int ind = 0;
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (arma::uword i = 3; i < (ondx.n_elem + 1); i++) {
double temppi = ondx(i - 1);
arma::vec cy = na_omit_c(curqys.col(i - 1));
Expand Down Expand Up @@ -109,10 +99,8 @@ arma::sp_mat createU_helper_mat(const arma::mat &olocs, const arma::vec &ondx,
k2(j, j) += nugget(temppj - 1);
}
}
// //make symmetric
k2 += k2.t();
// //but diagonal is double counted
k2.diag() /= 2.0;
// make symmetric
k2 = arma::symmatu(k2);
// Rcpp::Rcout << k2;
arma::vec bi(nq);
bool status = arma::solve(bi, k2, k1);
Expand All @@ -136,18 +124,22 @@ arma::sp_mat createU_helper_mat(const arma::mat &olocs, const arma::vec &ondx,
// arma::vec bi = arma::solve(k2, k1);
// uhat(2*i - 1, 2*i - 1) = pow(nugget(temppi - 1), -0.5);
// uhat(2*i - 2, 2*i - 1) = -1.0 * uhat(2*i - 1, 2*i - 1);

int siz = cq.n_elem + 3;
arma::umat ndx_private(2, siz);
arma::vec vals_private(siz);
// uhat(2*i - 2, 2*i - 2) = 1.0 / ri;
for (arma::uword j = 0; j < cq.n_elem; j++) {
arma::uword xind = 2 * cq(j) - 2;
if (j >= cy.n_elem) {
xind += 1;
}
// feeder.col(ind) = {(double)xind,(double) 2*i - 2, bi(j)};
ndx.col(ind) = {xind, 2 * i - 2};
vals(ind) = bi(j);
// ndx.col(ind) = {xind, 2 * i - 2};
// vals(ind) = bi(j);
ndx_private.col(j) = {xind, 2 * i - 2};
vals_private(j) = bi(j);
// vals[ind] = bi(j);
ind++;
// ind++;
// uhat(xind, 2.0*i - 2.0) = bi(j);
}
// vals[ind] = 1.0 / ri;
Expand All @@ -156,17 +148,28 @@ arma::sp_mat createU_helper_mat(const arma::mat &olocs, const arma::vec &ondx,
-0.5); // used twice so temp storage to avoid recomputation
// feeder.col(ind) = {(double)2*i - 2,(double)2*i - 2, 1.0 / ri};
// feeder.col(ind + 2) = {(double)2*i - 1,(double) 2*i - 1, feed_temp};
ndx.col(ind) = {2 * i - 2, 2 * i - 2};
vals(ind) = 1.0 / ri;
ndx.col(ind + 2) = {2 * i - 1, 2 * i - 1};
vals(ind + 2) = feed_temp;
// ndx.col(ind) = {2 * i - 2, 2 * i - 2};
// vals(ind) = 1.0 / ri;
// ndx.col(ind + 2) = {2 * i - 1, 2 * i - 1};
// vals(ind + 2) = feed_temp;
ndx_private.col(nq) = {2 * i - 2, 2 * i - 2};
vals_private(nq) = 1.0 / ri;
ndx_private.col(nq + 2) = {2 * i - 1, 2 * i - 1};
vals_private(nq + 2) = feed_temp;
// vals[ind + 2] = pow(nugget(temppi - 1), -0.5);
// vals[ind + 1] = -1.0 * vals[ind + 2];
// feeder.col(ind + 1) = {(double)2*i - 2,(double) 2*i - 1, -1.0 *
// feed_temp};
ndx.col(ind + 1) = {2 * i - 2, 2 * i - 1};
vals(ind + 1) = -1.0 * feed_temp;
ind += 3;
// ndx.col(ind + 1) = {2 * i - 2, 2 * i - 1};
// vals(ind + 1) = -1.0 * feed_temp;
// ind += 3;
ndx_private.col(nq + 1) = {2 * i - 2, 2 * i - 1};
vals_private(nq + 1) = -1.0 * feed_temp;
int local_ind;
#pragma omp atomic capture
{local_ind = ind; ind += siz;}
ndx.cols(local_ind, local_ind + siz - 1) = ndx_private;
vals.subvec(local_ind, local_ind + siz - 1) = vals_private;
}
arma::sp_mat uhat(ndx, vals);
// 0 indexing -> 1 indexing for R
Expand Down
Loading

0 comments on commit 39c0ffc

Please sign in to comment.