From 353a0bf862f32e29f4e2982deba12ac78c04b057 Mon Sep 17 00:00:00 2001 From: "Marc A. Suchard" Date: Sun, 1 Dec 2024 15:51:48 -0800 Subject: [PATCH] intermediate work on Schoenfeld residual test --- R/Residuals.R | 4 +- src/cyclops/engine/ModelSpecifics.hpp | 246 ++++++++++++++------------ tests/testthat/test-residuals.R | 55 ++++++ 3 files changed, 187 insertions(+), 118 deletions(-) diff --git a/R/Residuals.R b/R/Residuals.R index a9fad514..812c2555 100644 --- a/R/Residuals.R +++ b/R/Residuals.R @@ -56,7 +56,7 @@ residuals.cyclopsFit <- function(object, parm = NULL, type = "schoenfeld", ...) tbl <- table(res$strata) if (dim(tbl) > 1) { - names(tbl) <- paste0("strata=", names(tbl)) + names(tbl) <- paste0("stratum=", names(tbl)) attr(result, "strata") <- tbl } @@ -101,7 +101,7 @@ testProportionality <- function(object, parm = NULL, transformedTimes) { res$hessian <- matrix(res$hessian, nrow = (nCovariates + 1)) if (any(abs(res$gradient[1:nCovariates]) > 1E-5)) { - stop("Internal state of Cyclops 'object' is not at its mode") + warning("Internal state of Cyclops 'object' is not at its mode") # TODO change to `stop` } u <- c(rep(0, nCovariates), res$gradient[nCovariates + 1]) diff --git a/src/cyclops/engine/ModelSpecifics.hpp b/src/cyclops/engine/ModelSpecifics.hpp index eb5da323..01ccceca 100644 --- a/src/cyclops/engine/ModelSpecifics.hpp +++ b/src/cyclops/engine/ModelSpecifics.hpp @@ -765,159 +765,173 @@ void ModelSpecifics::getSchoenfeldResidualsImpl(int index, RealType uGradient = static_cast(0); // unweighted RealType uHessian = static_cast(0); - RealType wGradient = static_cast(0); // weighted RealType wHessian = static_cast(0); - RealType xHessian = static_cast(0); // TODO: only written for accummulive models (Cox, Fine/Grey) - IteratorType it(hX, index); - - RealType resNumerator = static_cast(0); - RealType resDenominator = static_cast(0); - - RealType scoreNumerator1 = static_cast(0); - RealType scoreNumerator2 = static_cast(0); - RealType scoreDenominator = static_cast(0); - - // find start relavent accumulator reset point - auto reset = begin(accReset); - while( *reset < it.index() ) { - ++reset; - } - - for (; it; ) { - int i = it.index(); - const RealType x = it.value(); - - if (*reset <= i) { - resNumerator = static_cast(0); - resDenominator = static_cast(0); + IteratorType it(hX, index); - scoreNumerator1 = static_cast(0); - scoreNumerator2 = static_cast(0); - scoreDenominator = static_cast(0); + RealType resNumerator = static_cast(0); + RealType resDenominator = static_cast(0); + RealType scoreNumerator1 = static_cast(0); + RealType scoreNumerator2 = static_cast(0); + RealType scoreDenominator = static_cast(0); - ++reset; - } + // find start relavent accumulator reset point + auto reset = begin(accReset); + while( *reset < it.index() ) { + ++reset; + } - const auto expXBeta = offsExpXBeta[i]; // std::exp(hXBeta[i]); + auto processRow = [&](int i, RealType x) { - resNumerator += expXBeta * x; - resDenominator += expXBeta; + std::cerr << "row " << i; - if (hY[i] == 1 ) { - if (hasResiduals) { - residuals->push_back(it.value() - resNumerator / resDenominator); - } - if (hasTimes) { - times->push_back(hOffs[i]); - } - if (hasStrata) { - strata->push_back(hPidOriginal[i]); - } - } + if (*reset <= i) { - if (hasScore) { - const auto weight = covariate[i]; - // MAS does not believe reweighing is correct, but is matching cox.zph + std::cerr << " reset "; - // const auto xt = x * covariate[i]; - // MAS believes covariate should be adjusted - const auto numerator1 = expXBeta * x; // TODO not xt? - const auto numerator2 = expXBeta * x * x; // TODO not xt? + resNumerator = static_cast(0); + resDenominator = static_cast(0); + scoreNumerator1 = static_cast(0); + scoreNumerator2 = static_cast(0); + scoreDenominator = static_cast(0); - if (hY[i] == 1) { - uGradient += x; - wGradient += x * weight; // TODO not xt and no weight? - } + ++reset; + } - scoreNumerator1 += numerator1; - scoreNumerator2 += numerator2; + const auto expXBeta = offsExpXBeta[i]; // std::exp(hXBeta[i]); - const auto t = scoreNumerator1 / resDenominator; - const auto gradient = hNWeight[i] * t; - const auto hessian = hNWeight[i] * (scoreNumerator2 / resDenominator - t * t); + resNumerator += expXBeta * x; + resDenominator += expXBeta; - uGradient -= gradient; - wGradient -= gradient * weight; + if (hY[i] == 1 ) { + std::cerr << " " << x << " for " << resNumerator << " / " << resDenominator; + if (hasResiduals) { + const auto residual = x - resNumerator / resDenominator; + residuals->push_back(residual); + } + if (hasTimes) { + times->push_back(hOffs[i]); + } + if (hasStrata) { + strata->push_back(hPidOriginal[i]); + } + } - uHessian += hessian; - wHessian += hessian * weight * weight; - xHessian += hessian * weight; - } + if (hasScore) { + const auto weight = covariate[i]; + // MAS does not believe reweighing is correct, but is matching cox.zph - ++it; + // const auto xt = x * covariate[i]; + // MAS believes covariate should be adjusted + const auto numerator1 = expXBeta * x; // TODO not xt? + const auto numerator2 = expXBeta * x * x; // TODO not xt? - if (IteratorType::isSparse) { + if (hY[i] == 1) { + uGradient += x; + wGradient += x * weight; // TODO not xt and no weight? + } - const int next = it ? it.index() : N; - for (++i; i < next; ++i) { + scoreNumerator1 += numerator1; + scoreNumerator2 += numerator2; - if (*reset <= i) { - resNumerator = static_cast(0); - resDenominator = static_cast(0); + const auto t = scoreNumerator1 / resDenominator; + const auto gradient = hNWeight[i] * t; + const auto hessian = hNWeight[i] * (scoreNumerator2 / resDenominator - t * t); - scoreNumerator1 = static_cast(0); - scoreNumerator2 = static_cast(0); - scoreDenominator = static_cast(0); + uGradient -= gradient; + wGradient -= gradient * weight; - ++reset; - } + uHessian += hessian; + wHessian += hessian * weight * weight; + xHessian += hessian * weight; + } - const auto expXBeta = offsExpXBeta[i]; // std::exp(hXBeta[i]); + std::cerr << "\n"; + }; - resDenominator += expXBeta; + // main loop + for (; it; ) { - if (hY[i] == 1) { - if (hasResiduals) { - residuals->push_back(-resNumerator / resDenominator); - } - if (hasTimes) { - times->push_back(hOffs[i]); - } - if (hasStrata) { - strata->push_back(hPidOriginal[i]); - } - } - - if (hasScore) { - const auto weight = covariate[i]; - // MAS does not believe reweighing is correct, but is matching cox.zph + int i = it.index(); + const RealType x = it.value(); - // const auto xt = x * covariate[i]; - // MAS believes covariate should be adjusted - const auto numerator1 = expXBeta * x; // TODO not xt? - const auto numerator2 = expXBeta * x * x; // TODO not xt? + processRow(i, x); - if (hY[i] == 1) { - uGradient += x; - wGradient += x * weight; // TODO not xt and no weight? - } + ++it; - scoreNumerator1 += numerator1; - scoreNumerator2 += numerator2; + if (IteratorType::isSparse) { - const auto t = scoreNumerator1 / resDenominator; - const auto gradient = hNWeight[i] * t; - const auto hessian = hNWeight[i] * (scoreNumerator2 / resDenominator - t * t); + const int next = it ? it.index() : N; + for (++i; i < next; ++i) { - uGradient -= gradient; - wGradient -= gradient * weight; + processRow(i, static_cast(0)); - uHessian += hessian; - wHessian += hessian * weight * weight; - } - } - } - } +// if (*reset <= i) { +// resNumerator = static_cast(0); +// resDenominator = static_cast(0); +// +// scoreNumerator1 = static_cast(0); +// scoreNumerator2 = static_cast(0); +// scoreDenominator = static_cast(0); +// +// ++reset; +// } +// +// const auto expXBeta = offsExpXBeta[i]; // std::exp(hXBeta[i]); +// +// resDenominator += expXBeta; +// +// if (hY[i] == 1) { +// if (hasResiduals) { +// residuals->push_back(-resNumerator / resDenominator); +// } +// if (hasTimes) { +// times->push_back(hOffs[i]); +// } +// if (hasStrata) { +// strata->push_back(hPidOriginal[i]); +// } +// } +// +// if (hasScore) { +// const auto weight = covariate[i]; +// // MAS does not believe reweighing is correct, but is matching cox.zph +// +// // const auto xt = x * covariate[i]; +// // MAS believes covariate should be adjusted +// const auto numerator1 = expXBeta * x; // TODO not xt? +// const auto numerator2 = expXBeta * x * x; // TODO not xt? +// +// if (hY[i] == 1) { +// uGradient += x; +// wGradient += x * weight; // TODO not xt and no weight? +// } +// +// scoreNumerator1 += numerator1; +// scoreNumerator2 += numerator2; +// +// const auto t = scoreNumerator1 / resDenominator; +// const auto gradient = hNWeight[i] * t; +// const auto hessian = hNWeight[i] * (scoreNumerator2 / resDenominator - t * t); +// +// uGradient -= gradient; +// wGradient -= gradient * weight; +// +// uHessian += hessian; +// wHessian += hessian * weight * weight; +// } + } + } + } if (hasScore) { + score[0] = static_cast(uGradient); score[1] = static_cast(wGradient); - score[2] = static_cast(uHessian); score[3] = static_cast(xHessian); score[4] = static_cast(xHessian); diff --git a/tests/testthat/test-residuals.R b/tests/testthat/test-residuals.R index 2712d102..5094ff89 100644 --- a/tests/testthat/test-residuals.R +++ b/tests/testthat/test-residuals.R @@ -46,3 +46,58 @@ test_that("Check Schoenfeld residuals and PH test, with strata", { expect_equal(ctest$table, gtest$table) }) + +test_that("Check Schoenfeld residuals and PH test, with sparse covariates", { + test <- read.table(header=T, sep = ",", text = " +start, length, event, x1, x2 +0, 4, 1,0,0 +0, 3, 1,2,0 +0, 3, 0,0,1 +0, 2, 1,0,1 +0, 2, 1,1,1 +0, 1, 0,1,0 +0, 1, 1,1,0 +") + + gfit <- coxph(Surv(length, event) ~ x1, test, ties = "breslow") + gres <- residuals(gfit, "schoenfeld") + gtest <- cox.zph(gfit, transform = "identity", global = FALSE) + + + data <- createCyclopsData(Surv(length, event) ~ x1, + # sparseFormula = ~ x1, + data = test, modelType = "cox") + + cfit <- fitCyclopsModel(data) + cres <- residuals(cfit, "schoenfeld") # TODO broken (and not even sparse yet) + + ttimes <- test$length - mean(test$length[test$event == 1]) + ctest <- testProportionality(cfit, parm = NULL, transformedTimes = ttimes) + + + expect_equivalent(cres, gres) +}) + +test_that("Check Schoenfeld residuals and PH test, with sparse covariates", { + test <- read.table(header=T, sep = ",", text = " +start, length, event, x1, x2 +0, 4, 1,0,0 +0, 3, 1,2,0 +0, 3, 0,0,1 +0, 2, 1,0,1 +0, 2, 1,1,1 +0, 1, 0,1,0 +0, 1, 1,1,0 +") + + gfit <- coxph(Surv(length, event) ~ x1 + strata(x2), test, ties = "breslow") + gres <- residuals(gfit, "schoenfeld") + + data <- createCyclopsData(Surv(length, event) ~ x1+ strata(x2), + # sparseFormula = ~ x1, + data = test, modelType = "cox") + + cfit <- fitCyclopsModel(data) + cres <- residuals(cfit, "schoenfeld") # TODO broken (and not even sparse yet) + expect_equivalent(cres, gres) +})