diff --git a/.github/workflows/R_CMD_check_Hades.yaml b/.github/workflows/R_CMD_check_Hades.yaml index 94a9379b..04a216d4 100644 --- a/.github/workflows/R_CMD_check_Hades.yaml +++ b/.github/workflows/R_CMD_check_Hades.yaml @@ -20,12 +20,12 @@ jobs: fail-fast: false matrix: config: - - {os: windows-latest, r: 'release'} # Does not appear to have Java 32-bit, hence the --no-multiarch + - {os: windows-latest, r: 'release'} - {os: macOS-latest, r: 'release'} - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} -# - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} env: + GITHUB_PAT: ${{ secrets.GH_TOKEN }} R_REMOTES_NO_ERRORS_FROM_WARNINGS: true RSPM: ${{ matrix.config.rspm }} CDM5_ORACLE_CDM_SCHEMA: ${{ secrets.CDM5_ORACLE_CDM_SCHEMA }} @@ -43,75 +43,51 @@ jobs: CDM5_SQL_SERVER_PASSWORD: ${{ secrets.CDM5_SQL_SERVER_PASSWORD }} CDM5_SQL_SERVER_SERVER: ${{ secrets.CDM5_SQL_SERVER_SERVER }} CDM5_SQL_SERVER_USER: ${{ secrets.CDM5_SQL_SERVER_USER }} + CDM5_REDSHIFT_CDM_SCHEMA: ${{ secrets.CDM5_REDSHIFT_CDM_SCHEMA }} + CDM5_REDSHIFT_OHDSI_SCHEMA: ${{ secrets.CDM5_REDSHIFT_OHDSI_SCHEMA }} + CDM5_REDSHIFT_PASSWORD: ${{ secrets.CDM5_REDSHIFT_PASSWORD }} + CDM5_REDSHIFT_SERVER: ${{ secrets.CDM5_REDSHIFT_SERVER }} + CDM5_REDSHIFT_USER: ${{ secrets.CDM5_REDSHIFT_USER }} + CDM5_SPARK_USER: ${{ secrets.CDM5_SPARK_USER }} + CDM5_SPARK_PASSWORD: ${{ secrets.CDM5_SPARK_PASSWORD }} + CDM5_SPARK_CONNECTION_STRING: ${{ secrets.CDM5_SPARK_CONNECTION_STRING }} + WEBAPI_TEST_WEBAPI_URL: ${{ secrets.WEBAPI_TEST_WEBAPI_URL }} + WEBAPI_TEST_SECURE_WEBAPI_URL: ${{ secrets.WEBAPI_TEST_SECURE_WEBAPI_URL }} + WEBAPI_TEST_ADMIN_USER_NAME: ${{ secrets.WEBAPI_TEST_ADMIN_USER_NAME }} + WEBAPI_TEST_ADMIN_USER_PASSWORD: ${{ secrets.WEBAPI_TEST_ADMIN_USER_PASSWORD }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} - - uses: r-lib/actions/setup-tinytex@v1 + - uses: r-lib/actions/setup-tinytex@v2 - - uses: r-lib/actions/setup-pandoc@v1 + - uses: r-lib/actions/setup-pandoc@v2 - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v2 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies + - name: Install system requirements if: runner.os == 'Linux' run: | + sudo apt-get install -y libssh-dev + Rscript -e 'install.packages("remotes")' while read -r cmd do eval sudo $cmd done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - - name: Install libssh - if: runner.os == 'Linux' - run: | - sudo apt-get install libssh-dev - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE, INSTALL_opts=c("--no-multiarch")) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} - - - name: Install covr - if: runner.os == 'macOS' - run: | - remotes::install_cran("covr") - shell: Rscript {0} - - - name: Remove check folder if exists - if: runner.os == 'macOS' - run: unlink("check", recursive = TRUE) - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran", "--no-multiarch"), error_on = "warning", check_dir = "check") - shell: Rscript {0} + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check - - name: Upload check results - if: failure() - uses: actions/upload-artifact@v2 + - uses: r-lib/actions/check-r-package@v2 with: - name: ${{ runner.os }}-r${{ matrix.config.r }}-results - path: check + args: 'c("--no-manual", "--as-cran")' + build_args: 'c("--compact-vignettes=both")' + error-on: '"warning"' + check-dir: '"check"' - name: Upload source package if: success() && runner.os == 'macOS' && github.event_name != 'pull_request' && github.ref == 'refs/heads/main' @@ -120,6 +96,12 @@ jobs: name: package_tarball path: check/*.tar.gz + - name: Install covr + if: runner.os == 'macOS' + run: | + install.packages("covr") + shell: Rscript {0} + - name: Test coverage if: runner.os == 'macOS' run: covr::codecov() @@ -137,7 +119,7 @@ jobs: steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: fetch-depth: 0 @@ -163,7 +145,7 @@ jobs: draft: false prerelease: false - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 if: ${{ env.new_version != '' }} - name: Install drat @@ -192,3 +174,4 @@ jobs: if: ${{ env.new_version != '' }} run: | curl --data "build=true" -X POST https://registry.hub.docker.com/u/ohdsi/broadsea-methodslibrary/trigger/f0b51cec-4027-4781-9383-4b38b42dd4f5/ + diff --git a/Cyclops.Rproj b/Cyclops.Rproj index 235f524b..b68581e0 100644 --- a/Cyclops.Rproj +++ b/Cyclops.Rproj @@ -1,7 +1,7 @@ Version: 1.0 -RestoreWorkspace: Default -SaveWorkspace: Default +RestoreWorkspace: No +SaveWorkspace: No AlwaysSaveHistory: Default EnableCodeIndexing: Yes diff --git a/DESCRIPTION b/DESCRIPTION index 14d493be..f0525ba9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: Cyclops Type: Package Title: Cyclic Coordinate Descent for Logistic, Poisson and Survival Analysis -Version: 3.3.1 +Version: 3.3.1.999 Authors@R: c( person("Marc A.", "Suchard", email = "msuchard@ucla.edu", role = c("aut","cre")), person("Martijn J.", "Schuemie", role = "aut"), @@ -30,7 +30,7 @@ Biarch: true URL: https://github.com/ohdsi/cyclops BugReports: https://github.com/ohdsi/cyclops/issues Depends: - R (>= 3.1.0) + R (>= 3.5.0) Imports: rlang, Matrix, @@ -41,7 +41,6 @@ Imports: survival, bit64 LinkingTo: Rcpp, - BH (>= 1.51.0), RcppEigen (>= 0.3.2) Suggests: testthat, diff --git a/NAMESPACE b/NAMESPACE index 40fdd8f3..4fc36ad5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ S3method(survfit,cyclopsFit) S3method(vcov,cyclopsFit) export(Multitype) export(convertToCyclopsData) +export(convertToTimeVaryingCoef) export(coverage) export(createAutoGridCrossValidationControl) export(createControl) @@ -20,6 +21,7 @@ export(createCyclopsData) export(createNonSeparablePrior) export(createParameterizedPrior) export(createPrior) +export(createWeightBasedSearchControl) export(finalizeSqlCyclopsData) export(fitCyclopsModel) export(fitCyclopsSimulation) @@ -39,6 +41,7 @@ export(meanLinearPredictor) export(mse) export(readCyclopsData) export(simulateCyclopsData) +export(splitTime) import(Matrix) import(Rcpp) import(dplyr) @@ -71,5 +74,6 @@ importFrom(stats,terms) importFrom(stats,time) importFrom(stats,vcov) importFrom(survival,Surv) +importFrom(survival,survSplit) importFrom(survival,survfit) useDynLib(Cyclops, .registration = TRUE) diff --git a/NEWS.md b/NEWS.md index 4c72ca68..0c8f5c71 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,23 +1,42 @@ +Cyclops v3.4.0 +============== + +Changes: + +1. remove dependence on `BH` +2. improvements on adaptive likelihood profiling +3. add `auto` option to `cvRepetitions` +4. bumped explicit C++11 requirement up to R v4.1 +5. removed deprecated use of `dbplyr:::$.tbl_lazy` + a. breaking change in `dbplyr v2.4.0` + Cyclops v3.3.1 ============== Changes: -1. fix uninitialized value in detected in computeAsymptoticPrecisionMatrix(); value was priorType -2. fix memory leak caused by call to ::Rf_error() +1. fix uninitialized value in detected in `computeAsymptoticPrecisionMatrix()`; value was priorType +2. fix memory leak caused by call to `::Rf_error()` 3. fix line-endings on Makevar on windows Cyclops v3.3.0 ============== -Changes: - 1. bump for R 4.2 2. fix CRAN warnings a. used `minValues` 3. fix CRAN notes a. remove explicit dependence on C++11 (except for R <= 4.0) +Cyclops v3.2.1 +============== + +Changes: + +1. fix small memory leak caused by direct call to '::Rf_error()' +2. disable JVM calls on CRAN due to uninitialized memory in Java JVM + + Cyclops v3.2.0 ============== diff --git a/R/Cyclops-package.R b/R/Cyclops-package.R new file mode 100644 index 00000000..a65cf643 --- /dev/null +++ b/R/Cyclops-package.R @@ -0,0 +1,6 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +## usethis namespace: end +NULL diff --git a/R/DataManagement.R b/R/DataManagement.R index c5b212e5..d0ca2ca1 100644 --- a/R/DataManagement.R +++ b/R/DataManagement.R @@ -634,8 +634,8 @@ reduce <- function(object, covariates, groupBy, power = 1) { #' \code{appendSqlCyclopsData} appends data to an OHDSI data object. #' #' @details Append data using two tables. The outcomes table is dense and contains ... The covariates table is sparse and contains ... -#' All entries in the outcome table must be sorted in increasing order by {oStratumId, oRowId}. All entries in the covariate table -#' must be sorted in increasing order by {cRowId}. Each cRowId value must match exactly one oRowId value. +#' All entries in the outcome table must be sorted in increasing order by (oStratumId, oRowId). All entries in the covariate table +#' must be sorted in increasing order by (cRowId). Each cRowId value must match exactly one oRowId value. #' #' @param object OHDSI Cyclops data object to append entries #' @param oStratumId Integer vector (optional): non-unique stratum identifier for each row in outcomes table diff --git a/R/ModelFit.R b/R/ModelFit.R index ae71ec9d..7916c4cb 100644 --- a/R/ModelFit.R +++ b/R/ModelFit.R @@ -204,6 +204,11 @@ fitCyclopsModel <- function(cyclopsData, writeLines(paste("Using cross-validation selector type", control$selectorType)) } } + + if (control$cvRepetitions == "auto") { + control$cvRepetitions <- .getNumberOfRepetitions(getNumberOfRows(cyclopsData)) + } + control <- .setControl(cyclopsData$cyclopsInterfacePtr, control) threads <- control$threads @@ -218,6 +223,7 @@ fitCyclopsModel <- function(cyclopsData, } .cyclopsSetBeta(cyclopsData$cyclopsInterfacePtr, startingCoefficients) + .cyclopsSetStartingBeta(cyclopsData$cyclopsInterfacePtr, startingCoefficients) } if (!is.null(fixedCoefficients)) { @@ -324,6 +330,11 @@ fitCyclopsModel <- function(cyclopsData, fit$scale <- cyclopsData$scale fit$threads <- threads fit$seed <- control$seed + + if (prior$useCrossValidation) { + fit$cvRepetitions <- control$cvRepetitions + } + class(fit) <- "cyclopsFit" return(fit) } @@ -847,6 +858,18 @@ confint.cyclopsFit <- function(object, parm, level = 0.95, #control, prof } +.initAdaptiveProfile <- function(object, parm, bounds, includePenalty) { + # If an MLE was found, let's not throw that bit of important information away: + if (object$return_flag == "SUCCESS" && + coef(object)[as.character(parm)] > bounds[1] && + coef(object)[as.character(parm)] < bounds[2]) { + profile <- tibble(point = coef(object)[as.character(parm)], + value = fixedGridProfileLogLikelihood(object, parm, coef(object)[as.character(parm)], includePenalty)$value) + } else { + profile <- tibble() + } +} + #' @title Profile likelihood for Cyclops model parameters #' #' @description @@ -873,7 +896,7 @@ getCyclopsProfileLogLikelihood <- function(object, tolerance = 1E-3, initialGridSize = 10, includePenalty = TRUE) { - + maxResets <- 10 if (!xor(is.null(x), is.null(bounds))) { stop("Must provide either `x` or `bounds`, but not both.") } @@ -883,26 +906,16 @@ getCyclopsProfileLogLikelihood <- function(object, if (length(bounds) != 2 || bounds[1] >= bounds[2]) { stop("Must provide bounds[1] < bounds[2]") } - - # If an MLE was found, let's not throw that bit of important information away: - if (object$return_flag == "SUCCESS" && - coef(object)[as.character(parm)] > bounds[1] && - coef(object)[as.character(parm)] < bounds[2]) { - profile <- tibble(point = coef(object)[as.character(parm)], - value = fixedGridProfileLogLikelihood(object, parm, coef(object)[as.character(parm)], includePenalty)$value) - } else { - profile <- tibble() - } + profile <- .initAdaptiveProfile(object, parm, bounds, includePenalty) # Start with sparse grid: grid <- seq(bounds[1], bounds[2], length.out = initialGridSize) # Iterate until stopping criteria met: priorMaxMaxError <- Inf + resetsPerformed <- 0 while (length(grid) != 0) { - ll <- fixedGridProfileLogLikelihood(object, parm, grid, includePenalty) - profile <- bind_rows(profile, ll) %>% arrange(.data$point) if (any(is.nan(profile$value))) { @@ -929,6 +942,16 @@ getCyclopsProfileLogLikelihood <- function(object, deltaY <- profile$value[2:nrow(profile)] - profile$value[1:(nrow(profile) - 1)] slopes <- deltaY / deltaX + if (resetsPerformed < maxResets && !all(slopes[2:length(slopes)] < slopes[1:(length(slopes)-1)])) { + warning("Coefficient drift detected. Resetting Cyclops object and recomputing all likelihood values computed so far.") + grid <- profile$point + profile <- tibble() + interface <- .cyclopsInitializeModel(object$cyclopsData$cyclopsDataPtr, modelType = object$cyclopsData$modelType, "native", computeMLE = TRUE) + assign("cyclopsInterfacePtr", interface$interface, object) + resetsPerformed <- resetsPerformed + 1 + next + } + # Compute where prior and posterior slopes intersect slopes <- c(slopes[1] + (slopes[2] - slopes[3]), slopes, @@ -1104,3 +1127,11 @@ convertToGlmnetLambda <- function(variance, nobs) { graph <- lapply(0:(nParents - 1), function(n, types) { 0:(nTypes - 1) + n * nTypes }, types = nTypes) graph } + +.getNumberOfRepetitions <- function(nrows) { + top <- 1E2 + factor <- 0.5 + pmax(pmin( + ceiling(top / nrows^(factor)) + , 10), 1) +} diff --git a/R/NewDataConversion.R b/R/NewDataConversion.R index 67a9424c..0ae9d4ef 100644 --- a/R/NewDataConversion.R +++ b/R/NewDataConversion.R @@ -253,7 +253,7 @@ convertToCyclopsData.tbl_dbi <- function(outcomes, } if (modelType == "pr" | modelType == "cpr") { - if (any(outcomes$time <= 0)) { + if (any(pull(outcomes, .data$time) <= 0)) { stop("time cannot be non-positive", call. = FALSE) } } diff --git a/R/RcppExports.R b/R/RcppExports.R index d4d5369b..702c104a 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -25,10 +25,22 @@ invisible(.Call(`_Cyclops_cyclopsSetBeta`, inRcppCcdInterface, beta)) } +.cyclopsGetBeta <- function(inRcppCcdInterface, index) { + .Call(`_Cyclops_cyclopsGetBeta`, inRcppCcdInterface, index) +} + +.cyclopsSetStartingBeta <- function(inRcppCcdInterface, inStartingBeta) { + invisible(.Call(`_Cyclops_cyclopsSetStartingBeta`, inRcppCcdInterface, inStartingBeta)) +} + .cyclopsSetFixedBeta <- function(inRcppCcdInterface, beta, fixed) { invisible(.Call(`_Cyclops_cyclopsSetFixedBeta`, inRcppCcdInterface, beta, fixed)) } +.cyclopsGetFixedBeta <- function(inRcppCcdInterface, index) { + .Call(`_Cyclops_cyclopsGetFixedBeta`, inRcppCcdInterface, index) +} + .cyclopsGetIsRegularized <- function(inRcppCcdInterface, index) { .Call(`_Cyclops_cyclopsGetIsRegularized`, inRcppCcdInterface, index) } diff --git a/R/TimeEffects.R b/R/TimeEffects.R new file mode 100644 index 00000000..3bbdb6b5 --- /dev/null +++ b/R/TimeEffects.R @@ -0,0 +1,133 @@ +#' @title Split the analysis time into several intervals for time-varying coefficients. +#' +#' @description +#' \code{splitTime} split the analysis time into several intervals for time-varying coefficients +#' +#' @param shortOut A data frame containing the outcomes with predefined columns (see below). +#' @param cut Numeric: Time points to cut at +#' +#' These columns are expected in the shortOut object: +#' \tabular{lll}{ +#' \verb{rowId} \tab(integer) \tab Row ID is used to link multiple covariates (x) to a single outcome (y) \cr +#' \verb{y} \tab(real) \tab Observed event status \cr +#' \verb{time} \tab(real) \tab Observed event time \cr +#' } +#' +#' @return A long outcome table for time-varying coefficients. +#' @importFrom survival survfit survSplit +#' @export +splitTime <- function(shortOut, cut) { + + if (!"time" %in% colnames(shortOut)) stop("Must provide observed event time.") + if (!"y" %in% colnames(shortOut)) stop("Must provide observed event status.") + if ("rowId" %in% colnames(shortOut)) { + shortOut <- shortOut %>% + rename(subjectId = .data$rowId) %>% + arrange(.data$subjectId) + } else { + shortOut <- shortOut %>% + mutate(subjectId = row_number()) + } + + shortOut <- collect(shortOut) + longOut <- do.call('survSplit', list(formula = Surv(shortOut$time, shortOut$y)~., + data = shortOut, + cut = cut, + episode = "stratumId", + id = "newSubjectId")) + longOut <- longOut %>% + rename(y = .data$event) %>% + mutate(time = .data$tstop - .data$tstart) %>% + select(-c(.data$newSubjectId, .data$tstart, .data$tstop)) %>% + arrange(.data$stratumId, .data$subjectId) + + # Restore rowIds + SubjectIds <- shortOut$subjectId + newSubjectId <- max(SubjectIds)+1 + longOut$rowId <-c(SubjectIds, # rowId = subjectId at 1st stratum + newSubjectId:(newSubjectId+(nrow(longOut)-length(SubjectIds))-1)) # create new distinct rowIds for other strata + + # Reorder columns + longOut <- longOut %>% + select(.data$stratumId, .data$subjectId, .data$rowId, everything()) + # select(.data$rowId, everything()) %>% + # select(.data$subjectId, everything()) %>% + # select(.data$stratumId, everything()) + + return(longOut) +} + +#' @title Convert short sparse covariate table to long sparse covariate table for time-varying coefficients. +#' +#' @description +#' \code{convertToTimeVaryingCoef} convert short sparse covariate table to long sparse covariate table for time-varying coefficients. +#' +#' @param shortCov A data frame containing the covariate with predefined columns (see below). +#' @param longOut A data frame containing the outcomes with predefined columns (see below), output of \code{splitTime}. +#' @param timeVaryCoefId Integer: A numeric identifier of a time-varying coefficient +#' +#' @details +#' These columns are expected in the shortCov object: +#' \tabular{lll}{ +#' \verb{rowId} \tab(integer) \tab Row ID is used to link multiple covariates (x) to a single outcome (y) \cr +#' \verb{covariateId} \tab(integer) \tab A numeric identifier of a covariate \cr +#' \verb{covariateValue} \tab(real) \tab The value of the specified covariate \cr +#' } +#' +#' These columns are expected in the longOut object: +#' \tabular{lll}{ +#' \verb{stratumId} \tab(integer) \tab Stratum ID for time-varying models \cr +#' \verb{subjectId} \tab(integer) \tab Subject ID is used to link multiple covariates (x) at different time intervals to a single subject \cr +#' \verb{rowId} \tab(integer) \tab Row ID is used to link multiple covariates (x) to a single outcome (y) \cr +#' \verb{y} \tab(real) \tab The outcome variable \cr +#' \verb{time} \tab(real) \tab For models that use time (e.g. Poisson or Cox regression) this contains time \cr +#' \tab \tab(e.g. number of days) \cr +#' } +#' @return A long sparse covariate table for time-varying coefficients. +#' @export +convertToTimeVaryingCoef <- function(shortCov, longOut, timeVaryCoefId) { + + # Process time-varying coefficients + timeVaryCoefId <- sort(unique(timeVaryCoefId)) + numTime <- length(timeVaryCoefId) # number of time-varying covariates + maxCovId <- max(shortCov$covariateId) + + # First stratum + longCov <- shortCov + longCov$stratumId <- 1 + colnames(longCov)[which(names(longCov) == "rowId")] <- "subjectId" + colnames(shortCov)[which(names(shortCov) == "rowId")] <- "subjectId" + + # Rest of strata + maxStrata <- max(longOut$stratumId) + for (st in 2:maxStrata) { + + # get valid subjects in current stratum + subId <- longOut[longOut$stratumId == st, ]$subjectId + + # get valid sparse covariates information in current stratum + curStrata <- shortCov[shortCov$subjectId %in% subId, ] + + if (any(curStrata$covariateId %in% timeVaryCoefId)) { # skip when valid subjects only have non-zero time-indep covariates + curStrata$stratumId <- st # assign current stratumId + + # recode covariateId for time-varying coefficients + # TODO update label + for (i in 1:numTime) { + curStrata[curStrata$covariateId == timeVaryCoefId[i], "covariateId"] <- maxCovId + numTime * (st - 2) + i + } + + # bind current stratum to longCov + longCov <- rbind(longCov, curStrata) + } + } + + # match rowId in longCov + longCov$rowId <- NA + for (i in 1:nrow(longCov)) { + longCov$rowId[i] <- longOut[with(longOut, subjectId == longCov$subjectId[i] & stratumId == longCov$stratumId[i]), "rowId"] + } + + return(longCov) +} + diff --git a/R/WeightBasedSearchControl.R b/R/WeightBasedSearchControl.R new file mode 100644 index 00000000..54ec29b1 --- /dev/null +++ b/R/WeightBasedSearchControl.R @@ -0,0 +1,47 @@ +#' @title Create a Cyclops control object that supports in- / out-of-sample hyperparameter search using weights +#' +#' @description +#' \code{createWeightBasedSearchControl} creates a Cyclops control object for use with \code{\link{fitCyclopsModel}} +#' that supports hyperparameter optimization through an auto-search where weight = 1 identifies in-sample observations +#' and weight = 0 identifies out-of-sample observations. +#' +#' @param cvType Must equal "auto" +#' @param initialValue Initial value for auto-search parameter +#' @param ... Additional parameters passed through to \code{\link{createControl}} +#' +#' @return +#' A Cyclops prior object of class inheriting from \code{"cyclopsControl"} +#' for use with \code{fitCyclopsModel}. +#' +#' @export +createWeightBasedSearchControl <- function(cvType = "auto", + initialValue = 1, + ...) { + if (cvType != "auto") { + stop("Only auto-search is currently implemented") + } + control <- createControl(cvType = cvType, fold = -1, cvRepetitions = 1, minCVData = 2, ...) + + control$setHook <- function(cyclopsData, prior, control, weights, ...) { + # closure to capture arguments + + if (!prior$useCrossValidation) { + stop("Prior specification must require cross-validation") + } + + if (is.null(weights)) { + stop("Must specify weights") + } + + control$setHook <- NULL # Do not re-enter call-back + + fit <- fitCyclopsModel(cyclopsData, + prior = prior, + control = control, + weights = weights, ...) + + return(fit) + } + + return(control) +} diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index e32d316c..00000000 --- a/appveyor.yml +++ /dev/null @@ -1,42 +0,0 @@ -# DO NOT CHANGE the "init" and "install" sections below - -# Download script file from GitHub -init: - ps: | - $ErrorActionPreference = "Stop" - Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" - Import-Module '..\appveyor-tool.ps1' - -install: - ps: Bootstrap - -# Adapt as necessary starting from here - -build_script: - - travis-tool.sh install_deps - -test_script: - - travis-tool.sh run_tests - -on_failure: - - 7z a failure.zip *.Rcheck\* - - appveyor PushArtifact failure.zip - -artifacts: - - path: '*.Rcheck\**\*.log' - name: Logs - - - path: '*.Rcheck\**\*.out' - name: Logs - - - path: '*.Rcheck\**\*.fail' - name: Logs - - - path: '*.Rcheck\**\*.Rout' - name: Logs - - - path: '\*_*.tar.gz' - name: Bits - - - path: '\*_*.zip' - name: Bits diff --git a/cran-comments.md b/cran-comments.md index edfb58e1..ad72b40d 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -16,8 +16,8 @@ * win-builder (devel and release) ## R CMD check results -* There were no ERRORs or WARNINGs -* There is 1 occasional NOTE (besides days since last submission): +* There were no ERRORs or WARNINGs +* There is 1 occasional NOTE: checking installed package size ... NOTE installed size is 22.5Mb sub-directories of 1Mb or more: diff --git a/man/Cyclops-package.Rd b/man/Cyclops-package.Rd new file mode 100644 index 00000000..d726da88 --- /dev/null +++ b/man/Cyclops-package.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Cyclops-package.R +\docType{package} +\name{Cyclops-package} +\alias{Cyclops} +\alias{Cyclops-package} +\title{Cyclops: Cyclic Coordinate Descent for Logistic, Poisson and Survival Analysis} +\description{ +This model fitting tool incorporates cyclic coordinate descent and majorization-minimization approaches to fit a variety of regression models found in large-scale observational healthcare data. Implementations focus on computational optimization and fine-scale parallelization to yield efficient inference in massive datasets. Please see: Suchard, Simpson, Zorych, Ryan and Madigan (2013) \doi{10.1145/2414416.2414791}. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/ohdsi/cyclops} + \item Report bugs at \url{https://github.com/ohdsi/cyclops/issues} +} + +} +\author{ +\strong{Maintainer}: Marc A. Suchard \email{msuchard@ucla.edu} + +Authors: +\itemize{ + \item Martijn J. Schuemie + \item Trevor R. Shaddox + \item Yuxi Tian + \item Jianxiao Yang + \item Eric Kawaguchi +} + +Other contributors: +\itemize{ + \item Sushil Mittal [contributor] + \item Observational Health Data Sciences and Informatics [copyright holder] + \item Marcus Geelnard (provided the TinyThread library) [copyright holder, contributor] + \item Rutgers University (provided the HParSearch routine) [copyright holder, contributor] + \item R Development Core Team (provided the ZeroIn routine) [copyright holder, contributor] +} + +} +\keyword{internal} diff --git a/man/appendSqlCyclopsData.Rd b/man/appendSqlCyclopsData.Rd index be922eff..6b9547c3 100644 --- a/man/appendSqlCyclopsData.Rd +++ b/man/appendSqlCyclopsData.Rd @@ -37,7 +37,7 @@ appendSqlCyclopsData( } \details{ Append data using two tables. The outcomes table is dense and contains ... The covariates table is sparse and contains ... -All entries in the outcome table must be sorted in increasing order by {oStratumId, oRowId}. All entries in the covariate table -must be sorted in increasing order by {cRowId}. Each cRowId value must match exactly one oRowId value. +All entries in the outcome table must be sorted in increasing order by (oStratumId, oRowId). All entries in the covariate table +must be sorted in increasing order by (cRowId). Each cRowId value must match exactly one oRowId value. } \keyword{internal} diff --git a/man/convertToTimeVaryingCoef.Rd b/man/convertToTimeVaryingCoef.Rd new file mode 100644 index 00000000..b587f8f2 --- /dev/null +++ b/man/convertToTimeVaryingCoef.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/TimeEffects.R +\name{convertToTimeVaryingCoef} +\alias{convertToTimeVaryingCoef} +\title{Convert short sparse covariate table to long sparse covariate table for time-varying coefficients.} +\usage{ +convertToTimeVaryingCoef(shortCov, longOut, timeVaryCoefId) +} +\arguments{ +\item{shortCov}{A data frame containing the covariate with predefined columns (see below).} + +\item{longOut}{A data frame containing the outcomes with predefined columns (see below), output of \code{splitTime}.} + +\item{timeVaryCoefId}{Integer: A numeric identifier of a time-varying coefficient} +} +\value{ +A long sparse covariate table for time-varying coefficients. +} +\description{ +\code{convertToTimeVaryingCoef} convert short sparse covariate table to long sparse covariate table for time-varying coefficients. +} +\details{ +These columns are expected in the shortCov object: +\tabular{lll}{ + \verb{rowId} \tab(integer) \tab Row ID is used to link multiple covariates (x) to a single outcome (y) \cr + \verb{covariateId} \tab(integer) \tab A numeric identifier of a covariate \cr + \verb{covariateValue} \tab(real) \tab The value of the specified covariate \cr +} + +These columns are expected in the longOut object: +\tabular{lll}{ + \verb{stratumId} \tab(integer) \tab Stratum ID for time-varying models \cr + \verb{subjectId} \tab(integer) \tab Subject ID is used to link multiple covariates (x) at different time intervals to a single subject \cr + \verb{rowId} \tab(integer) \tab Row ID is used to link multiple covariates (x) to a single outcome (y) \cr + \verb{y} \tab(real) \tab The outcome variable \cr + \verb{time} \tab(real) \tab For models that use time (e.g. Poisson or Cox regression) this contains time \cr + \tab \tab(e.g. number of days) \cr +} +} diff --git a/man/createWeightBasedSearchControl.Rd b/man/createWeightBasedSearchControl.Rd new file mode 100644 index 00000000..4faf3f9f --- /dev/null +++ b/man/createWeightBasedSearchControl.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/WeightBasedSearchControl.R +\name{createWeightBasedSearchControl} +\alias{createWeightBasedSearchControl} +\title{Create a Cyclops control object that supports in- / out-of-sample hyperparameter search using weights} +\usage{ +createWeightBasedSearchControl(cvType = "auto", initialValue = 1, ...) +} +\arguments{ +\item{cvType}{Must equal "auto"} + +\item{initialValue}{Initial value for auto-search parameter} + +\item{...}{Additional parameters passed through to \code{\link{createControl}}} +} +\value{ +A Cyclops prior object of class inheriting from \code{"cyclopsControl"} +for use with \code{fitCyclopsModel}. +} +\description{ +\code{createWeightBasedSearchControl} creates a Cyclops control object for use with \code{\link{fitCyclopsModel}} +that supports hyperparameter optimization through an auto-search where weight = 1 identifies in-sample observations +and weight = 0 identifies out-of-sample observations. +} diff --git a/man/splitTime.Rd b/man/splitTime.Rd new file mode 100644 index 00000000..fedc4a7e --- /dev/null +++ b/man/splitTime.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/TimeEffects.R +\name{splitTime} +\alias{splitTime} +\title{Split the analysis time into several intervals for time-varying coefficients.} +\usage{ +splitTime(shortOut, cut) +} +\arguments{ +\item{shortOut}{A data frame containing the outcomes with predefined columns (see below).} + +\item{cut}{Numeric: Time points to cut at + +These columns are expected in the shortOut object: +\tabular{lll}{ + \verb{rowId} \tab(integer) \tab Row ID is used to link multiple covariates (x) to a single outcome (y) \cr + \verb{y} \tab(real) \tab Observed event status \cr + \verb{time} \tab(real) \tab Observed event time \cr +}} +} +\value{ +A long outcome table for time-varying coefficients. +} +\description{ +\code{splitTime} split the analysis time into several intervals for time-varying coefficients +} diff --git a/src/Makevars.in b/src/Makevars.in index ee8a04ba..00abe168 100644 --- a/src/Makevars.in +++ b/src/Makevars.in @@ -42,6 +42,7 @@ OBJECTS.drivers = \ cyclops/drivers/BootstrapDriver.o \ cyclops/drivers/BootstrapSelector.o \ cyclops/drivers/CrossValidationSelector.o \ + cyclops/drivers/WeightBasedSelector.o \ cyclops/drivers/GridSearchCrossValidationDriver.o \ cyclops/drivers/HierarchyAutoSearchCrossValidationDriver.o \ cyclops/drivers/HierarchyGridSearchCrossValidationDriver.o \ diff --git a/src/RcppCyclopsInterface.cpp b/src/RcppCyclopsInterface.cpp index ae8ff12e..c2ccab92 100644 --- a/src/RcppCyclopsInterface.cpp +++ b/src/RcppCyclopsInterface.cpp @@ -116,6 +116,20 @@ void cyclopsSetBeta(SEXP inRcppCcdInterface, const std::vector& beta) { interface->getCcd().setBeta(beta); } +// [[Rcpp::export(.cyclopsGetBeta)]] +double cyclopsGetBeta(SEXP inRcppCcdInterface, const int index) { + using namespace bsccs; + XPtr interface(inRcppCcdInterface); + return interface->getCcd().getBeta(index); +} + +// [[Rcpp::export(.cyclopsSetStartingBeta)]] +void cyclopsSetStartingBeta(SEXP inRcppCcdInterface, const std::vector& inStartingBeta) { + using namespace bsccs; + XPtr interface(inRcppCcdInterface); + interface->getCcd().setStartingBeta(inStartingBeta); +} + // [[Rcpp::export(.cyclopsSetFixedBeta)]] void cyclopsSetFixedBeta(SEXP inRcppCcdInterface, int beta, bool fixed) { using namespace bsccs; @@ -124,6 +138,14 @@ void cyclopsSetFixedBeta(SEXP inRcppCcdInterface, int beta, bool fixed) { interface->getCcd().setFixedBeta(beta - 1, fixed); } +// [[Rcpp::export(.cyclopsGetFixedBeta)]] +bool cyclopsGetFixedBeta(SEXP inRcppCcdInterface, const int index){ + using namespace bsccs; + XPtr interface(inRcppCcdInterface); + return interface->getCcd().getFixedBeta(index); +} + + // [[Rcpp::export(".cyclopsGetIsRegularized")]] bool cyclopsGetIsRegularized(SEXP inRcppCcdInterface, const int index) { using namespace bsccs; diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 67ad4bce..bf33c7c5 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -73,6 +73,29 @@ BEGIN_RCPP return R_NilValue; END_RCPP } +// cyclopsGetBeta +double cyclopsGetBeta(SEXP inRcppCcdInterface, const int index); +RcppExport SEXP _Cyclops_cyclopsGetBeta(SEXP inRcppCcdInterfaceSEXP, SEXP indexSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type inRcppCcdInterface(inRcppCcdInterfaceSEXP); + Rcpp::traits::input_parameter< const int >::type index(indexSEXP); + rcpp_result_gen = Rcpp::wrap(cyclopsGetBeta(inRcppCcdInterface, index)); + return rcpp_result_gen; +END_RCPP +} +// cyclopsSetStartingBeta +void cyclopsSetStartingBeta(SEXP inRcppCcdInterface, const std::vector& inStartingBeta); +RcppExport SEXP _Cyclops_cyclopsSetStartingBeta(SEXP inRcppCcdInterfaceSEXP, SEXP inStartingBetaSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type inRcppCcdInterface(inRcppCcdInterfaceSEXP); + Rcpp::traits::input_parameter< const std::vector& >::type inStartingBeta(inStartingBetaSEXP); + cyclopsSetStartingBeta(inRcppCcdInterface, inStartingBeta); + return R_NilValue; +END_RCPP +} // cyclopsSetFixedBeta void cyclopsSetFixedBeta(SEXP inRcppCcdInterface, int beta, bool fixed); RcppExport SEXP _Cyclops_cyclopsSetFixedBeta(SEXP inRcppCcdInterfaceSEXP, SEXP betaSEXP, SEXP fixedSEXP) { @@ -85,6 +108,18 @@ BEGIN_RCPP return R_NilValue; END_RCPP } +// cyclopsGetFixedBeta +bool cyclopsGetFixedBeta(SEXP inRcppCcdInterface, const int index); +RcppExport SEXP _Cyclops_cyclopsGetFixedBeta(SEXP inRcppCcdInterfaceSEXP, SEXP indexSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< SEXP >::type inRcppCcdInterface(inRcppCcdInterfaceSEXP); + Rcpp::traits::input_parameter< const int >::type index(indexSEXP); + rcpp_result_gen = Rcpp::wrap(cyclopsGetFixedBeta(inRcppCcdInterface, index)); + return rcpp_result_gen; +END_RCPP +} // cyclopsGetIsRegularized bool cyclopsGetIsRegularized(SEXP inRcppCcdInterface, const int index); RcppExport SEXP _Cyclops_cyclopsGetIsRegularized(SEXP inRcppCcdInterfaceSEXP, SEXP indexSEXP) { @@ -766,7 +801,10 @@ static const R_CallMethodDef CallEntries[] = { {"_Cyclops_cyclopsGetUseOffsetNames", (DL_FUNC) &_Cyclops_cyclopsGetUseOffsetNames, 0}, {"_Cyclops_cyclopsGetComputeDevice", (DL_FUNC) &_Cyclops_cyclopsGetComputeDevice, 1}, {"_Cyclops_cyclopsSetBeta", (DL_FUNC) &_Cyclops_cyclopsSetBeta, 2}, + {"_Cyclops_cyclopsGetBeta", (DL_FUNC) &_Cyclops_cyclopsGetBeta, 2}, + {"_Cyclops_cyclopsSetStartingBeta", (DL_FUNC) &_Cyclops_cyclopsSetStartingBeta, 2}, {"_Cyclops_cyclopsSetFixedBeta", (DL_FUNC) &_Cyclops_cyclopsSetFixedBeta, 3}, + {"_Cyclops_cyclopsGetFixedBeta", (DL_FUNC) &_Cyclops_cyclopsGetFixedBeta, 2}, {"_Cyclops_cyclopsGetIsRegularized", (DL_FUNC) &_Cyclops_cyclopsGetIsRegularized, 2}, {"_Cyclops_cyclopsSetWeights", (DL_FUNC) &_Cyclops_cyclopsSetWeights, 2}, {"_Cyclops_cyclopsSetCensorWeights", (DL_FUNC) &_Cyclops_cyclopsSetCensorWeights, 2}, diff --git a/src/cyclops/CcdInterface.cpp b/src/cyclops/CcdInterface.cpp index 5431047a..1fb2cc9c 100644 --- a/src/cyclops/CcdInterface.cpp +++ b/src/cyclops/CcdInterface.cpp @@ -29,7 +29,7 @@ #include "CyclicCoordinateDescent.h" #include "ModelData.h" -#include "boost/iterator/counting_iterator.hpp" +//#include "boost/iterator/counting_iterator.hpp" #include "Thread.h" // #include "io/InputReader.h" @@ -45,6 +45,7 @@ // #include "io/BBRInputReader.h" // #include "io/OutputWriter.h" #include "drivers/CrossValidationSelector.h" +#include "drivers/WeightBasedSelector.h" #include "drivers/GridSearchCrossValidationDriver.h" #include "drivers/HierarchyGridSearchCrossValidationDriver.h" #include "drivers/AutoSearchCrossValidationDriver.h" @@ -369,9 +370,9 @@ double CcdInterface::profileModel(CyclicCoordinateDescent *ccd, AbstractModelDat } ); } else { - auto scheduler = TaskScheduler >( - boost::make_counting_iterator(0), - boost::make_counting_iterator(static_cast(bounds.size())), + auto scheduler = TaskScheduler>( + IncrementableIterator(0), // boost::make_counting_iterator(0), + IncrementableIterator(bounds.size()), //boost::make_counting_iterator(static_cast(bounds.size())), nThreads); auto oneTask = [&getBound, &scheduler, &ccdPool, &bounds](unsigned long task) { @@ -582,8 +583,10 @@ double CcdInterface::evaluateProfileModel(CyclicCoordinateDescent *ccd, Abstract values[i] = evaluate(points[i], ccd); } } else { - auto scheduler = TaskScheduler>( - boost::make_counting_iterator(0), boost::make_counting_iterator(static_cast(points.size())), nThreads); + auto scheduler = TaskScheduler>( + IncrementableIterator(0), //boost::make_counting_iterator(0), + IncrementableIterator(points.size()), //boost::make_counting_iterator(static_cast(points.size())), + nThreads); auto oneTask = [&evaluate, &scheduler, &ccdPool, &points, &values](unsigned long task) { values[task] = evaluate(points[task], ccdPool[scheduler.getThreadIndex(task)]); @@ -746,12 +749,32 @@ double CcdInterface::runCrossValidation(CyclicCoordinateDescent *ccd, AbstractMo } } - CrossValidationSelector selector(arguments.crossValidation.fold, - modelData->getPidVectorSTL(), - selectorType, arguments.seed, logger, error, - nullptr, - (useWeights ? &weights : nullptr) - ); // TODO ERROR HERE! NOT ALL MODELS ARE SUBJECT + // TODO ADD CODE HERE + + //arguments.crossValidation.fold != -1 ? + + AbstractSelector* ptr; + if (arguments.crossValidation.fold != -1) { + ptr = new CrossValidationSelector(arguments.crossValidation.fold, + modelData->getPidVectorSTL(), + selectorType, arguments.seed, logger, error, + nullptr, + (useWeights ? &weights : nullptr)); + } else { + ptr = + + new WeightBasedSelector(1, + modelData->getPidVectorSTL(), + selectorType, arguments.seed, logger, error, + nullptr, + &weights); + arguments.crossValidation.foldToCompute = 1; + } + + std::unique_ptr selector = + bsccs::unique_ptr(ptr); + + // std::make_unique_ptr(ptr2); AbstractCrossValidationDriver* driver; if (arguments.crossValidation.useAutoSearchCV) { @@ -768,7 +791,7 @@ double CcdInterface::runCrossValidation(CyclicCoordinateDescent *ccd, AbstractMo } } - driver->drive(*ccd, selector, arguments); + driver->drive(*ccd, *selector, arguments); gettimeofday(&time2, NULL); @@ -781,7 +804,7 @@ double CcdInterface::runCrossValidation(CyclicCoordinateDescent *ccd, AbstractMo logger->writeLine(stream); } // Do full fit for optimal parameter - driver->resetForOptimal(*ccd, selector, arguments); + driver->resetForOptimal(*ccd, *selector, arguments); fitModel(ccd); if (arguments.fitMLEAtMode) { runFitMLEAtMode(ccd); diff --git a/src/cyclops/CyclicCoordinateDescent.cpp b/src/cyclops/CyclicCoordinateDescent.cpp index 6e54b9f2..cfb55506 100644 --- a/src/cyclops/CyclicCoordinateDescent.cpp +++ b/src/cyclops/CyclicCoordinateDescent.cpp @@ -207,10 +207,14 @@ void CyclicCoordinateDescent::init(bool offset) { hDelta.resize(J, static_cast(initialBound)); hBeta.resize(J, static_cast(0.0)); + // initialize starting betas to default value 0.0 + startingBeta.resize(J, static_cast(0.0)); + // hXBeta.resize(K, static_cast(0.0)); // hXBetaSave.resize(K, static_cast(0.0)); fixBeta.resize(J, false); + hWeights.resize(0); cWeights.resize(0); // ESK: For censor weights @@ -221,6 +225,7 @@ void CyclicCoordinateDescent::init(bool offset) { varianceKnown = false; if (offset) { hBeta[0] = static_cast(1); + startingBeta[0] = static_cast(1); fixBeta[0] = true; xBetaKnown = false; } else { @@ -261,7 +266,13 @@ void CyclicCoordinateDescent::resetBeta(void) { auto start = hXI.getHasOffsetCovariate() ? 1 : 0; for (auto j = start; j < J; j++) { - hBeta[j] = 0.0; + + // check if a non-zero starting beta is present + if (startingBeta[j] != 0.0) { + hBeta[j] = startingBeta[j]; + } else { + hBeta[j] = 0.0; + } } computeXBeta(); sufficientStatisticsKnown = false; @@ -474,6 +485,13 @@ void CyclicCoordinateDescent::setBeta(const std::vector& beta) { varianceKnown = false; } +void CyclicCoordinateDescent::setStartingBeta(const std::vector& inStartingBeta) { + // ToDo: This functionality could be merged into setBeta() + for (int j = 0; j < J; ++j) { + startingBeta[j] = inStartingBeta[j]; + } +} + void CyclicCoordinateDescent::setBeta(int i, double beta) { #define PROCESS_IN_MS #ifdef PROCESS_IN_MS @@ -677,7 +695,7 @@ void CyclicCoordinateDescent::kktSwindle(const ModeFindingArguments& arguments) int swindleIterationCount = 1; // int initialActiveSize = activeSet.size(); - int perPassSize = arguments.swindleMultipler; + // int perPassSize = arguments.swindleMultipler; while (!done) { @@ -805,7 +823,7 @@ void CyclicCoordinateDescent::kktSwindle(const ModeFindingArguments& arguments) } } ++swindleIterationCount; - perPassSize *= 2; + // perPassSize *= 2; logger->yield(); // This is not re-entrant safe } diff --git a/src/cyclops/CyclicCoordinateDescent.h b/src/cyclops/CyclicCoordinateDescent.h index 515adf21..9ccba618 100644 --- a/src/cyclops/CyclicCoordinateDescent.h +++ b/src/cyclops/CyclicCoordinateDescent.h @@ -133,6 +133,8 @@ class CyclicCoordinateDescent { // template void setBeta(const std::vector& beta); + void setStartingBeta(const std::vector& inStartingBeta); + void setBeta(int i, double beta); // void double getHessianComponent(int i, int j); @@ -330,6 +332,7 @@ class CyclicCoordinateDescent { typedef std::vector DoubleVector; DoubleVector hBeta; + DoubleVector startingBeta; // DoubleVector& hXBeta; // TODO Delegate to ModelSpecifics // DoubleVector& hXBetaSave; // Delegate @@ -343,7 +346,7 @@ class CyclicCoordinateDescent { string conditionId; bool computeMLE; - int priorType; + int priorType = priors::NONE; double initialBound; diff --git a/src/cyclops/Iterators.h b/src/cyclops/Iterators.h index 9b16022f..de82c796 100644 --- a/src/cyclops/Iterators.h +++ b/src/cyclops/Iterators.h @@ -1,7 +1,8 @@ #ifndef ITERATORS_H #define ITERATORS_H -#include +//#include +#include #include "CompressedDataMatrix.h" @@ -14,6 +15,47 @@ namespace bsccs { * single run-time determined iterator */ +template +class IncrementableIterator { +public: + + using iterator_category = std::forward_iterator_tag; + using value_type = T; + using difference_type = T; + using pointer = T*; + using reference = T&; + + IncrementableIterator(T value) : value(value) { } + + IncrementableIterator& operator++() { + ++value; + return *this; + } + + T operator*() const { return value; } + + bool operator==(const IncrementableIterator& rhs) const { + return value == rhs.value; + } + + bool operator!=(const IncrementableIterator& rhs) const { + return !(*this == rhs); + } + + template + IncrementableIterator operator+(V n) const { + IncrementableIterator copy = *this; + copy.value += n; + return copy; + } + + bool operator<(const IncrementableIterator& rhs) const { + return value < rhs.value; + } + +private: + T value; +}; struct IndicatorTag {}; struct SparseTag {}; @@ -31,7 +73,7 @@ class IndicatorIterator { typedef IndicatorTag tag; typedef int Index; typedef Scalar ValueType; - typedef boost::tuples::tuple XTuple; + typedef std::tuple XTuple; const static std::string name; @@ -123,7 +165,8 @@ class SparseIterator { typedef SparseTag tag; typedef int Index; typedef Scalar ValueType; - typedef boost::tuples::tuple XTuple; +// typedef boost::tuples::tuple XTuple; + typedef std::tuple XTuple; const static std::string name; @@ -300,7 +343,8 @@ class DenseIterator { typedef DenseTag tag; typedef int Index; typedef Scalar ValueType; - typedef boost::tuples::tuple XTuple; +// typedef boost::tuples::tuple XTuple; + typedef std::tuple XTuple; const static std::string name; @@ -392,7 +436,8 @@ class InterceptIterator { typedef InterceptTag tag; // TODO Fix!!! typedef int Index; typedef Scalar ValueType; - typedef boost::tuples::tuple XTuple; +// typedef boost::tuples::tuple XTuple; + typedef std::tuple XTuple; const static std::string name; diff --git a/src/cyclops/ModelData.cpp b/src/cyclops/ModelData.cpp index 00cc1be4..f877917d 100644 --- a/src/cyclops/ModelData.cpp +++ b/src/cyclops/ModelData.cpp @@ -17,8 +17,8 @@ #include #include -#include -#include +// #include +// #include #include "ModelData.h" diff --git a/src/cyclops/drivers/AbstractCrossValidationDriver.cpp b/src/cyclops/drivers/AbstractCrossValidationDriver.cpp index 33d255fc..de3246fe 100644 --- a/src/cyclops/drivers/AbstractCrossValidationDriver.cpp +++ b/src/cyclops/drivers/AbstractCrossValidationDriver.cpp @@ -7,8 +7,9 @@ #include #include +#include -#include "boost/iterator/counting_iterator.hpp" +// #include "boost/iterator/counting_iterator.hpp" #include "Types.h" #include "Thread.h" @@ -30,7 +31,7 @@ AbstractCrossValidationDriver::~AbstractCrossValidationDriver() { void AbstractCrossValidationDriver::resetForOptimal( CyclicCoordinateDescent& ccd, - CrossValidationSelector& selector, + AbstractSelector& selector, const CCDArguments& arguments) { ccd.setWeights(NULL); @@ -146,10 +147,15 @@ double AbstractCrossValidationDriver::doCrossValidationStep( auto& weightsExclude = this->weightsExclude; auto& logger = this->logger; - auto scheduler = TaskScheduler( - boost::make_counting_iterator(0), - boost::make_counting_iterator(arguments.foldToCompute), - nThreads); + // auto scheduler = TaskScheduler( + // boost::make_counting_iterator(0), + // boost::make_counting_iterator(arguments.foldToCompute), + // nThreads); + + auto scheduler = TaskScheduler>( + IncrementableIterator(0), + IncrementableIterator(arguments.foldToCompute), + nThreads); auto oneTask = [step, coldStart, nThreads, &ccdPool, &selectorPool, diff --git a/src/cyclops/drivers/AbstractCrossValidationDriver.h b/src/cyclops/drivers/AbstractCrossValidationDriver.h index 8eaf51d7..defa6581 100644 --- a/src/cyclops/drivers/AbstractCrossValidationDriver.h +++ b/src/cyclops/drivers/AbstractCrossValidationDriver.h @@ -39,7 +39,7 @@ class AbstractCrossValidationDriver : public AbstractDriver { virtual void resetForOptimal( CyclicCoordinateDescent& ccd, - CrossValidationSelector& selector, + AbstractSelector& selector, const CCDArguments& arguments); virtual void logResults(const CCDArguments& arguments) = 0; // pure virtual diff --git a/src/cyclops/drivers/AutoSearchCrossValidationDriver.cpp b/src/cyclops/drivers/AutoSearchCrossValidationDriver.cpp index 93f6a23c..db07e813 100644 --- a/src/cyclops/drivers/AutoSearchCrossValidationDriver.cpp +++ b/src/cyclops/drivers/AutoSearchCrossValidationDriver.cpp @@ -21,7 +21,7 @@ #include "AbstractSelector.h" #include "../utils/HParSearch.h" -#include "boost/iterator/counting_iterator.hpp" +// #include "boost/iterator/counting_iterator.hpp" namespace bsccs { diff --git a/src/cyclops/drivers/HierarchyAutoSearchCrossValidationDriver.cpp b/src/cyclops/drivers/HierarchyAutoSearchCrossValidationDriver.cpp index 3331a17a..46f88c39 100644 --- a/src/cyclops/drivers/HierarchyAutoSearchCrossValidationDriver.cpp +++ b/src/cyclops/drivers/HierarchyAutoSearchCrossValidationDriver.cpp @@ -41,7 +41,7 @@ HierarchyAutoSearchCrossValidationDriver::~HierarchyAutoSearchCrossValidationDri void HierarchyAutoSearchCrossValidationDriver::resetForOptimal( CyclicCoordinateDescent& ccd, - CrossValidationSelector& selector, + AbstractSelector& selector, const CCDArguments& arguments) { ccd.setWeights(NULL); diff --git a/src/cyclops/drivers/HierarchyAutoSearchCrossValidationDriver.h b/src/cyclops/drivers/HierarchyAutoSearchCrossValidationDriver.h index 59adf486..4b888480 100644 --- a/src/cyclops/drivers/HierarchyAutoSearchCrossValidationDriver.h +++ b/src/cyclops/drivers/HierarchyAutoSearchCrossValidationDriver.h @@ -24,7 +24,7 @@ class HierarchyAutoSearchCrossValidationDriver : public AutoSearchCrossValidatio virtual void resetForOptimal( CyclicCoordinateDescent& ccd, - CrossValidationSelector& selector, + AbstractSelector& selector, const CCDArguments& arguments); virtual void drive( diff --git a/src/cyclops/drivers/HierarchyGridSearchCrossValidationDriver.cpp b/src/cyclops/drivers/HierarchyGridSearchCrossValidationDriver.cpp index 68c3d1a0..f080e9b3 100644 --- a/src/cyclops/drivers/HierarchyGridSearchCrossValidationDriver.cpp +++ b/src/cyclops/drivers/HierarchyGridSearchCrossValidationDriver.cpp @@ -39,7 +39,7 @@ HierarchyGridSearchCrossValidationDriver::~HierarchyGridSearchCrossValidationDri void HierarchyGridSearchCrossValidationDriver::resetForOptimal( CyclicCoordinateDescent& ccd, - CrossValidationSelector& selector, + AbstractSelector& selector, const CCDArguments& arguments) { diff --git a/src/cyclops/drivers/HierarchyGridSearchCrossValidationDriver.h b/src/cyclops/drivers/HierarchyGridSearchCrossValidationDriver.h index 831de898..8ad768cb 100644 --- a/src/cyclops/drivers/HierarchyGridSearchCrossValidationDriver.h +++ b/src/cyclops/drivers/HierarchyGridSearchCrossValidationDriver.h @@ -17,14 +17,14 @@ class HierarchyGridSearchCrossValidationDriver : public GridSearchCrossValidatio HierarchyGridSearchCrossValidationDriver( const CCDArguments& arguments, loggers::ProgressLoggerPtr _logger, - loggers::ErrorHandlerPtr _error, + loggers::ErrorHandlerPtr _error, std::vector* wtsExclude = NULL); virtual ~HierarchyGridSearchCrossValidationDriver(); virtual void resetForOptimal( CyclicCoordinateDescent& ccd, - CrossValidationSelector& selector, + AbstractSelector& selector, const CCDArguments& arguments); virtual void drive( diff --git a/src/cyclops/drivers/WeightBasedSelector.cpp b/src/cyclops/drivers/WeightBasedSelector.cpp new file mode 100644 index 00000000..462abaa3 --- /dev/null +++ b/src/cyclops/drivers/WeightBasedSelector.cpp @@ -0,0 +1,66 @@ +/* + * WeightBasedSelector.cpp + * + * Created on: Aug 26, 2022 + * Author: msuchard + */ + +#include +#include +#include +#include + +#include "WeightBasedSelector.h" + +namespace bsccs { + +using std::vector; + +WeightBasedSelector::WeightBasedSelector( + int inFold, + std::vector inIds, + SelectorType inType, + long inSeed, + loggers::ProgressLoggerPtr _logger, + loggers::ErrorHandlerPtr _error, + std::vector* wtsExclude, + std::vector* wtsOriginal) : AbstractSelector(inIds, inType, inSeed, _logger, _error) { + + std::ostringstream stream; + stream << "Performing in- / out-of-sample search based on provided weights"; + logger->writeLine(stream); + + weightsExclude = wtsExclude; + weightsOriginal = wtsOriginal; +} + +void WeightBasedSelector::reseed() { + // Do nothing +} + +WeightBasedSelector::~WeightBasedSelector() { + // Do nothing +} + +void WeightBasedSelector::getWeights(int batch, std::vector& weights) { + if (weights.size() < weightsOriginal->size()) { + weights.resize(weightsOriginal->size()); + } + std::copy(weightsOriginal->begin(), weightsOriginal->end(), weights.begin()); +} + +AbstractSelector* WeightBasedSelector::clone() const { + return new (std::nothrow) WeightBasedSelector(*this); // default copy constructor +} + +void WeightBasedSelector::getComplement(std::vector& weights) { + for (auto it = weights.begin(); it != weights.end(); it++) { + *it = 1 - *it; + } +} + +void WeightBasedSelector::permute() { + // Do nothing +} + +} // namespace diff --git a/src/cyclops/drivers/WeightBasedSelector.h b/src/cyclops/drivers/WeightBasedSelector.h new file mode 100644 index 00000000..c59c44f5 --- /dev/null +++ b/src/cyclops/drivers/WeightBasedSelector.h @@ -0,0 +1,51 @@ +/* + * WeightBasedSelector.h + * + * Created on: Aug 26, 2022 + * Author: msuchard + */ + +#ifndef WEIGHTBASED_H_ +#define WEIGHTBASED_H_ + +// #include + +#include "CrossValidationSelector.h" + +namespace bsccs { + +class WeightBasedSelector : public AbstractSelector { +public: + WeightBasedSelector( + int inFold, + std::vector inIds, + SelectorType inType, + long inSeed, + loggers::ProgressLoggerPtr _logger, + loggers::ErrorHandlerPtr _error, + std::vector* wtsExclude, + std::vector* wtsOriginal); + + virtual ~WeightBasedSelector(); + + void permute(); + + void reseed(); + + void getWeights(int batch, std::vector& weights); + + void getComplement(std::vector& weights); + + AbstractSelector* clone() const; +// +private: +// int fold; +// std::vector permutation; +// std::vector intervalStart; + std::vector* weightsExclude; + std::vector* weightsOriginal; +}; + +} // namespace + +#endif /* WEIGHTBASED_H_ */ diff --git a/src/cyclops/engine/ModelSpecifics.h b/src/cyclops/engine/ModelSpecifics.h index 88dfaa68..1397711e 100644 --- a/src/cyclops/engine/ModelSpecifics.h +++ b/src/cyclops/engine/ModelSpecifics.h @@ -1696,7 +1696,7 @@ struct TupleXGetterNew { template inline ReturnType operator()(TupleType& tuple) const { - return boost::get(tuple); + return std::get(tuple); } }; @@ -1728,7 +1728,7 @@ struct TestNumeratorKernel { template NumeratorType operator()(const NumeratorType lhs, const TupleType tuple) { - const auto expXBeta = boost::get<0>(tuple); + const auto expXBeta = std::get<0>(tuple); const auto x = getX(tuple); //boost::get<1>(tuple); return { @@ -1752,8 +1752,8 @@ struct TestGradientKernel { template GradientType operator()(const GradientType lhs, const NumeratorType numerator, const TupleType tuple) { - const auto denominator = boost::get<0>(tuple); - const auto weight = boost::get<1>(tuple); + const auto denominator = std::get<0>(tuple); + const auto weight = std::get<1>(tuple); return BaseModel::template incrementGradientAndHessian( diff --git a/src/cyclops/engine/ModelSpecifics.hpp b/src/cyclops/engine/ModelSpecifics.hpp index e08e7da1..e6e90023 100644 --- a/src/cyclops/engine/ModelSpecifics.hpp +++ b/src/cyclops/engine/ModelSpecifics.hpp @@ -19,7 +19,7 @@ #include "Recursions.hpp" #include "ParallelLoops.h" -#include "Ranges.h" +// #include "Ranges.h" #ifdef USE_RCPP_PARALLEL #include @@ -1124,8 +1124,6 @@ void ModelSpecifics::computeGradientAndHessianImpl(int index // gradient = result.real(); // hessian = result.imag(); - // using RealTypePair = std::pair; - IteratorType it(hX, index); const int end = it.size() - 1; @@ -1266,7 +1264,6 @@ void ModelSpecifics::computeThirdDerivativeImpl(int index, d #endif RealType third = static_cast(0); - // RealType hessian = static_cast(0); if (BaseModel::cumulativeGradientAndHessian) { // Compile-time switch @@ -1671,19 +1668,7 @@ inline void ModelSpecifics::updateXBetaImpl(RealType realDel #endif IteratorType it(hX, index); -// <<<<<<< HEAD -// for (; it; ++it) { -// const int k = it.index(); -// hXBeta[k] += realDelta * it.value(); // TODO Check optimization with indicator and intercept -// // Update denominators as well (denominators include (weight * offsExpXBeta)) -// if (BaseModel::likelihoodHasDenominator) { // Compile-time switch -// RealType oldEntry = Weights::isWeighted ? hKWeight[k] * offsExpXBeta[k] : offsExpXBeta[k]; // TODO Delegate condition to forming offExpXBeta -// offsExpXBeta[k] = BaseModel::getOffsExpXBeta(hOffs.data(), hXBeta[k], hY[k], k); // Update offsExpXBeta -// RealType newEntry = Weights::isWeighted ? hKWeight[k] * offsExpXBeta[k] : offsExpXBeta[k]; // TODO Delegate condition -// incrementByGroup(denomPid.data(), hPid, k, (newEntry - oldEntry)); // Update denominators -// } -// } -// ======= + if (BaseModel::cumulativeGradientAndHessian) { // cox for (; it; ++it) { const int k = it.index(); @@ -1710,7 +1695,6 @@ inline void ModelSpecifics::updateXBetaImpl(RealType realDel } } } -// >>>>>>> develop computeAccumlatedDenominator(Weights::isWeighted); diff --git a/src/cyclops/engine/ParallelLoops.h b/src/cyclops/engine/ParallelLoops.h index bb6acf15..6333160f 100644 --- a/src/cyclops/engine/ParallelLoops.h +++ b/src/cyclops/engine/ParallelLoops.h @@ -5,7 +5,7 @@ #include #include #include -#include +// #include // #define USE_RCPP_PARALLEL #undef USE_RCPP_PARALLEL diff --git a/tests/testthat/test-cv.R b/tests/testthat/test-cv.R index 8b47229c..3252e670 100644 --- a/tests/testthat/test-cv.R +++ b/tests/testthat/test-cv.R @@ -206,3 +206,23 @@ test_that("Seed gets returned", { control = createControl(seed = NULL), warnings = FALSE) expect_true(!is.null(fit$seed)) }) + +test_that("Auto cvRepetitions", { + data <- simulateCyclopsData(nstrata = 1, nrows = 1000, ncovars = 10, model = "logistic") + cyclopsData <- convertToCyclopsData(data$outcomes, + data$covariates, + modelType = "lr", + addIntercept = TRUE) + + prior <- createPrior("laplace", exclude = c(0), useCrossValidation = TRUE) + + control <- createControl(noiseLevel = "quiet", + cvType = "auto", + cvRepetitions = "auto") + + fit <- fitCyclopsModel(cyclopsData, + prior = prior, + control = control) + + expect_equal(fit$cvRepetitions, 4) +}) diff --git a/tests/testthat/test-dataConversionStratified.R b/tests/testthat/test-dataConversionStratified.R index 84787239..3e33e93d 100644 --- a/tests/testthat/test-dataConversionStratified.R +++ b/tests/testthat/test-dataConversionStratified.R @@ -7,6 +7,7 @@ context("test-dataConversionStratified.R") suppressWarnings(RNGversion("3.5.0")) test_that("Test clr", { + gold <- clogit(case ~ spontaneous + induced + strata(stratum), data=infert) #Convert infert dataset to Cyclops format: @@ -33,6 +34,7 @@ test_that("Test clr", { }) test_that("Test stratified cox", { + test1 <- data.frame(time=c(4,3,1,1,2,2,3), status=c(1,1,1,0,1,1,0), x=c(0,2,1,1,1,0,0), @@ -70,6 +72,7 @@ test_that("Test stratified cox", { }) test_that("Test stratified cox using lung dataset ", { + test <- lung test[is.na(test)] <- 0 # Don't want to bother with missing values diff --git a/tests/testthat/test-dataConversionUnstratified.R b/tests/testthat/test-dataConversionUnstratified.R index 9b7a12a0..aa4254b2 100644 --- a/tests/testthat/test-dataConversionUnstratified.R +++ b/tests/testthat/test-dataConversionUnstratified.R @@ -6,6 +6,7 @@ library("gnm") context("test-dataConversionUnstratified.R") test_that("Test data.frame to data for lr", { + gold <- glm(case ~ spontaneous + induced, data=infert, family="binomial") #Convert infert dataset to Cyclops format: @@ -31,6 +32,7 @@ test_that("Test data.frame to data for lr", { }) test_that("Test unstratified cox using lung dataset ", { + test <- lung test[is.na(test)] <- 0 # Don't want to bother with missing values @@ -68,6 +70,7 @@ test_that("Test unstratified cox using lung dataset ", { test_that("Test poisson regression", { + sim <- simulateCyclopsData(nstrata = 1, nrows = 10000, ncovars = 2, eCovarsPerRow = 0.5, effectSizeSd = 1,model = "poisson") covariates <- sim$covariates outcomes <- sim$outcomes diff --git a/tests/testthat/test-outOfSample.R b/tests/testthat/test-outOfSample.R new file mode 100644 index 00000000..61d2af2d --- /dev/null +++ b/tests/testthat/test-outOfSample.R @@ -0,0 +1,36 @@ +library("testthat") + +context("test-outOfSample.R") +suppressWarnings(RNGversion("3.5.0")) + +test_that("Out-of-sample optimization", { + skip_on_cran() + set.seed(666) + + nrows <- 10000 + outOfSampleProp <- 0.2 + + simulant <- simulateCyclopsData(nstrata = 1, + nrows = nrows, + ncovars = 5, + model = "logistic") + + nOutSample <- nrows * outOfSampleProp + nInSample <- nrows - nOutSample + + weights <- c(rep(1, nInSample), rep(0, nOutSample)) + + data <- convertToCyclopsData(simulant$outcomes, + simulant$covariates, + modelType = "lr", + addIntercept = TRUE) + + prior <- createPrior("laplace", variance = 1, exclude = "(Intercept)", + useCrossValidation = TRUE) + + control <- createWeightBasedSearchControl(initialValue = 1, + noiseLevel = "noisy") + + optimal <- fitCyclopsModel(data, prior, control, weights = weights) + +}) diff --git a/tests/testthat/test-predict.R b/tests/testthat/test-predict.R index 2dbdcc5a..18d90c3b 100644 --- a/tests/testthat/test-predict.R +++ b/tests/testthat/test-predict.R @@ -4,6 +4,7 @@ library("Andromeda") context("test-predict.R") test_that("Test predict for Poisson regression", { + sim <- simulateCyclopsData(nstrata = 1, nrows = 10000, ncovars = 2, eCovarsPerRow = 0.5, effectSizeSd = 1,model = "poisson") covariates <- sim$covariates outcomes <- sim$outcomes @@ -23,6 +24,7 @@ test_that("Test predict for Poisson regression", { }) test_that("Test predict for logistic regression", { + sim <- simulateCyclopsData(nstrata = 1, nrows = 10000, ncovars = 2, eCovarsPerRow = 0.5, effectSizeSd = 1,model = "logistic") covariates <- sim$covariates outcomes <- sim$outcomes @@ -42,6 +44,7 @@ test_that("Test predict for logistic regression", { }) test_that("Test predict for pr with all-zero betas", { + sim <- simulateCyclopsData(nstrata = 1, nrows = 1000, ncovars = 2, eCovarsPerRow = 0.5, effectSizeSd = 1,model = "poisson") covariates <- sim$covariates outcomes <- sim$outcomes @@ -65,6 +68,7 @@ test_that("Test predict for pr with all-zero betas", { }) test_that("Test predict for lr with all-zero betas", { + sim <- simulateCyclopsData(nstrata = 1, nrows = 1000, ncovars = 2, eCovarsPerRow = 0.5, effectSizeSd = 1,model = "logistic") covariates <- sim$covariates outcomes <- sim$outcomes diff --git a/tests/testthat/test-smallFineGray.R b/tests/testthat/test-smallFineGray.R index 5e2316f8..ee1fb9f8 100644 --- a/tests/testthat/test-smallFineGray.R +++ b/tests/testthat/test-smallFineGray.R @@ -5,6 +5,7 @@ library("cmprsk") suppressWarnings(RNGversion("3.5.0")) test_that("Check that Cox fits only have 2 outcome identifies", { + test <- read.table(header=T, sep = ",", text = " start, length, event, x1, x2 0, 4, 1,0,0 @@ -123,6 +124,7 @@ test_that("Check very small Fine-Gray example with no ties (sparse vs. dense)", }) test_that("Check very small Fine-Gray example with no ties using Andromeda", { + test <- read.table(header=T, sep = ",", text = " start, length, event, x1, x2 0, 4, 1,0,0 diff --git a/tools/configure.R b/tools/configure.R index 08ab5033..4b4222e9 100644 --- a/tools/configure.R +++ b/tools/configure.R @@ -9,11 +9,13 @@ makevars_win_out <- file.path("src", "Makevars.win") txt <- readLines(makevars_in) txt_win <- readLines(makevars_win_in) -if (getRversion() < "4.1") { +if (getRversion() < "4.3") { # macOS / linux if (!any(grepl("^CXX_STD", txt))) { txt <- c("CXX_STD = CXX11", txt) } +} +if (getRversion() < "4.2") { # Windoz if (!any(grepl("^CXX_STD", txt_win))) { txt_win <- c("CXX_STD = CXX11", txt_win) }