From 2d969059c5b513f1ac432e9d089fafdeaaa35b86 Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Sat, 9 Jul 2022 17:13:39 -0700 Subject: [PATCH] Store Cholesky of the OIM rather than separate matrices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we're storing both the (observed) Fisher information matrix and its inverse, the variance-covariance matrix, separately in the `CoxModel` object. Since we know the observed information is positive semi-definite, we can instead store a (possibly pivoted) Cholesky factorization of the information. That allows us to easily construct both the original matrix and its inverse without needing to store both. In the case of few parameters (like ≤3) and positive semi-definite information but not positive definite, this will actually use a bit more memory than storing the matrices separately. However, for any more parameters, regardless of whether the information is positive definite, this saves a good amount of memory, with the difference becoming more pronounced with more parameters. --- src/cox.jl | 28 ++++++++++++++++++++++------ test/runtests.jl | 7 ++++--- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/cox.jl b/src/cox.jl index 44ffcbf..61bfeab 100644 --- a/src/cox.jl +++ b/src/cox.jl @@ -78,13 +78,12 @@ end # Structure of Cox regression output -struct CoxModel{T<:Real} <: RegressionModel +struct CoxModel{T<:Real,C<:Union{Cholesky{T},CholeskyPivoted{T}}} <: RegressionModel aux::CoxAux{T} β::Vector{T} loglik::T score::Vector{T} - fischer_info::Matrix{T} - vcov::Matrix{T} + chol::C end function StatsAPI.coeftable(obj::CoxModel) @@ -119,7 +118,7 @@ StatsAPI.dof(obj::CoxModel) = length(coef(obj)) StatsAPI.dof_residual(obj::CoxModel) = nobs(obj) - dof(obj) -StatsAPI.vcov(obj::CoxModel) = obj.vcov +StatsAPI.vcov(obj::CoxModel) = inv(obj.chol) StatsAPI.stderror(obj::CoxModel) = sqrt.(diag(vcov(obj))) @@ -193,6 +192,19 @@ function _cox_fgh!(β, grad, hes, c::CoxAux{T}) where T return y end +function _chol_me_maybe(A) + C = cholesky(A; check=false) + if issuccess(C) + return C + else + @static if VERSION >= v"1.8.0-DEV.1139" + return cholesky(A, RowMaximum()) + else + return cholesky(A, Val(true)) + end + end +end + _coxph(X::AbstractArray{<:Integer}, s::AbstractVector; tol, l2_cost) = _coxph(float(X), s; tol=tol, l2_cost=l2_cost) function _coxph(X::AbstractArray{T}, s::AbstractVector; l2_cost, tol) where T @@ -201,8 +213,12 @@ function _coxph(X::AbstractArray{T}, s::AbstractVector; l2_cost, tol) where T β₀ = zeros(R, size(X, 2)) fgh! = TwiceDifferentiable(Optim.only_fgh!((f, G, H, x)->_cox_fgh!(x, G, H, c)), β₀) res = optimize(fgh!, β₀, NewtonTrustRegion(), Optim.Options(g_tol = tol)) - β, neg_ll, grad, hes = Optim.minimizer(res), Optim.minimum(res), Optim.gradient(fgh!), Optim.hessian(fgh!) - return CoxModel{R}(c, β, -neg_ll, -grad, hes, pinv(hes)) + β = Optim.minimizer(res) + neg_ll = minimum(res) + grad = Optim.gradient(fgh!) + hes = Symmetric(Optim.hessian(fgh!)) + chol = _chol_me_maybe(hes) + return CoxModel{R,typeof(chol)}(c, β, -neg_ll, -grad, chol) end StatsModels.drop_intercept(::Type{CoxModel}) = true diff --git a/test/runtests.jl b/test/runtests.jl index 94e2e46..5ff619d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -201,7 +201,7 @@ end @test modelmatrix(outcome) == modelmatrix(outcome_without_formula) @test sprint(show, outcome_without_formula) == """ -CoxModel{Float64} +CoxModel{Float64, Cholesky{Float64, Matrix{Float64}}} Coefficients: ────────────────────────────────────────────── @@ -241,8 +241,9 @@ x7 0.0914971 0.0286485 3.19378 0.0014 @test dof(outcome) == 7 @test dof_residual(outcome) == 425 @test loglikelihood(outcome) > nullloglikelihood(outcome) - @test all(x->x > 0, eigen(outcome.model.fischer_info).values) - @test outcome.model.fischer_info * vcov(outcome) ≈ I atol=1e-10 + fisher_info = Matrix(outcome.model.chol) + @test all(x->x > 0, eigen(fisher_info).values) + @test fisher_info * vcov(outcome) ≈ I atol=1e-10 @test norm(outcome.model.score) < 1e-5 @test hcat(outcome_coefmat.cols[1:3]...) ≈ expected_coefs[:,1:3] atol=1e-5