From fee07f5ed49b3145c263319a9dc90b4baefe9ec6 Mon Sep 17 00:00:00 2001 From: JianxiaoYang Date: Thu, 19 Oct 2023 15:58:12 -0700 Subject: [PATCH] commit code for bootstrapping differences --- R/NewDataConversion.R | 5 +- src/cyclops/CcdInterface.cpp | 1 + src/cyclops/CcdInterface.h | 1 + src/cyclops/ModelData.cpp | 7 ++- src/cyclops/drivers/BootstrapDriver.cpp | 81 ++++++++++++++++--------- 5 files changed, 65 insertions(+), 30 deletions(-) diff --git a/R/NewDataConversion.R b/R/NewDataConversion.R index 855268c5..ae62a919 100644 --- a/R/NewDataConversion.R +++ b/R/NewDataConversion.R @@ -250,7 +250,7 @@ convertToCyclopsData.data.frame <- function(outcomes, name = covarNames) if (modelType == "cox_time" && !is.null(timeEffectMap)) { - if (!all(timeEffectMap$covariateId %in% covariates$covariateId)) stop("Invalid covariateId for time effects.") + if (!all(timeEffectMap$covariateId %in% covarNames)) stop("Invalid covariateId for time effects.") loadNewSqlCyclopsDataStratTimeEffects(object = dataPtr, stratumId = outcomes$stratumId, rowId = outcomes$rowId, @@ -440,7 +440,8 @@ convertToCyclopsData.tbl_dbi <- function(outcomes, batchSize = 100000) # TODO Pick magic number if (modelType == "cox_time" && !is.null(timeEffectMap)) { - if (!all(timeEffectMap$covariateId %in% covariates$covariateId)) stop("Invalid covariateId for time effects.") + covarNames <- unique(pull(covariates, covariateId)) + if (!all(timeEffectMap$covariateId %in% covarNames)) stop("Invalid covariateId for time effects.") loadNewSqlCyclopsDataStratTimeEffects(object = dataPtr, stratumId = outcomes$stratumId, rowId = outcomes$rowId, diff --git a/src/cyclops/CcdInterface.cpp b/src/cyclops/CcdInterface.cpp index b6f6000d..1d8c695b 100644 --- a/src/cyclops/CcdInterface.cpp +++ b/src/cyclops/CcdInterface.cpp @@ -171,6 +171,7 @@ void CcdInterface::setDefaultArguments(void) { arguments.doBootstrap = false; arguments.replicates = 100; arguments.reportRawEstimates = false; + arguments.reportDifference = false; arguments.modelName = "sccs"; arguments.fileFormat = "generic"; //arguments.outputFormat = "estimates"; diff --git a/src/cyclops/CcdInterface.h b/src/cyclops/CcdInterface.h index 318dee1c..c4177c50 100644 --- a/src/cyclops/CcdInterface.h +++ b/src/cyclops/CcdInterface.h @@ -127,6 +127,7 @@ struct CCDArguments { // Needed for boot-strapping bool doBootstrap; bool reportRawEstimates; + bool reportDifference; int replicates; std::string bsFileName; bool doPartial; diff --git a/src/cyclops/ModelData.cpp b/src/cyclops/ModelData.cpp index 6efa148a..9f72c2da 100644 --- a/src/cyclops/ModelData.cpp +++ b/src/cyclops/ModelData.cpp @@ -465,7 +465,12 @@ std::vector ModelData::loadStratTimeEffects( IdType timeCovId = ++maxCovariateId; X.getColumn(index).add_label(timeCovId); // TODO return label to user or automatically exclude this column from L1 regularization timeEffectCovariates.push_back({st+1, timeEffectCovariateIds[j]}); // (stratum, timeEffectCovariateName) - //std::cout << "Create a new column with label [" << timeCovId << "] at stratum [" << st+1 << "] from cov [" << timeEffectCovariateIds[j] << "]\n"; + // std::cout << "Create a new column with label [" << timeCovId << "] at stratum [" << st+1 << "] from cov [" << timeEffectCovariateIds[j] << "]\n"; + std::ostringstream stream; + stream << "Created a new column with label " << timeCovId + << " at stratum " << st+1 + << " from cov " << timeEffectCovariateIds[j]; + log->writeLine(stream); } i++; diff --git a/src/cyclops/drivers/BootstrapDriver.cpp b/src/cyclops/drivers/BootstrapDriver.cpp index 75e6e530..396d66a4 100644 --- a/src/cyclops/drivers/BootstrapDriver.cpp +++ b/src/cyclops/drivers/BootstrapDriver.cpp @@ -133,7 +133,6 @@ void BootstrapDriver::logResults(const CCDArguments& arguments, std::vector& savedBeta, std::string treatmentId) { -// int j = J-1; int tId = 0; while (modelData->getColumnLabel(tId) != treatmentId) tId++; @@ -146,43 +145,71 @@ void BootstrapDriver::logHR(const CCDArguments& arguments, std::vector& string sep(","); // TODO Make option - // Raw estimates -// for(rvector::iterator it = estimates[j]->begin(); it != estimates[j]->end(); ++it) outLog << *it << endl; - // Stats outLog << "Drug_concept_id" << sep << "Condition_concept_id" << sep << "score" << sep << "standard_error" << sep << "bs_mean" << sep << "bs_lower" << sep << "bs_upper" << sep << "bs_prob0" << endl; for (int j = tId; j < J; ++j) { - outLog << modelData->getColumnLabel(j) << - sep << treatmentId << sep; - - double mean = 0.0; - double var = 0.0; - double prob0 = 0.0; - for (rvector::iterator it = estimates[j]->begin(); it != estimates[j]->end(); ++it) { - mean += *it; - var += *it * *it; - if (*it == 0.0) { - prob0 += 1.0; + outLog << modelData->getColumnLabel(j) << + sep << treatmentId << sep; + + double mean = 0.0; + double var = 0.0; + double prob0 = 0.0; + for (rvector::iterator it = estimates[j]->begin(); it != estimates[j]->end(); ++it) { + mean += *it; + var += *it * *it; + if (*it == 0.0) { + prob0 += 1.0; + } } - } - double size = static_cast(estimates[j]->size()); - mean /= size; - var = (var / size) - (mean * mean); - prob0 /= size; + double size = static_cast(estimates[j]->size()); + mean /= size; + var = (var / size) - (mean * mean); + prob0 /= size; + + sort(estimates[j]->begin(), estimates[j]->end()); + int offsetLower = static_cast(size * 0.025); + int offsetUpper = static_cast(size * 0.975); - sort(estimates[j]->begin(), estimates[j]->end()); - int offsetLower = static_cast(size * 0.025); - int offsetUpper = static_cast(size * 0.975); + double lower = *(estimates[j]->begin() + offsetLower); + double upper = *(estimates[j]->begin() + offsetUpper); - double lower = *(estimates[j]->begin() + offsetLower); - double upper = *(estimates[j]->begin() + offsetUpper); + outLog << savedBeta[j] << sep; + outLog << std::sqrt(var) << sep << mean << sep << lower << sep << upper << sep << prob0 << endl; + } - outLog << savedBeta[j] << sep; - outLog << std::sqrt(var) << sep << mean << sep << lower << sep << upper << sep << prob0 << endl; + if (arguments.reportDifference) { + double mean = 0.0; + double var = 0.0; + double prob0 = 0.0; + + double size = static_cast(estimates[tId]->size()); + std::vector diff(size, static_cast(0)); + for (int i = 0; i < size; i++) { + diff[i] = *(estimates[tId]->begin() + i) - *(estimates[tId+1]->begin() + i); + mean += diff[i]; + var += diff[i] * diff[i]; + if (diff[i] == 0.0) { + prob0 += 1.0; + } + } + + mean /= size; + var = (var / size) - (mean * mean); + prob0 /= size; + sort(diff.begin(), diff.end()); + + int offsetLower = static_cast(size * 0.025); + int offsetUpper = static_cast(size * 0.975); + + double lower = diff[offsetLower]; + double upper = diff[offsetUpper]; + + outLog << savedBeta[tId] - savedBeta[tId+1] << sep; + outLog << std::sqrt(var) << sep << mean << sep << lower << sep << upper << sep << prob0 << endl; } outLog.close(); }