From 2d969059c5b513f1ac432e9d089fafdeaaa35b86 Mon Sep 17 00:00:00 2001
From: Alex Arslan <ararslan@comcast.net>
Date: Sat, 9 Jul 2022 17:13:39 -0700
Subject: [PATCH] Store Cholesky of the OIM rather than separate matrices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we're storing both the (observed) Fisher information matrix
and its inverse, the variance-covariance matrix, separately in the
`CoxModel` object. Since we know the observed information is positive
semi-definite, we can instead store a (possibly pivoted) Cholesky
factorization of the information. That allows us to easily construct
both the original matrix and its inverse without needing to store both.

In the case of few parameters (like ≤3) and positive semi-definite
information but not positive definite, this will actually use a bit more
memory than storing the matrices separately. However, for any more
parameters, regardless of whether the information is positive definite,
this saves a good amount of memory, with the difference becoming more
pronounced with more parameters.
---
 src/cox.jl       | 28 ++++++++++++++++++++++------
 test/runtests.jl |  7 ++++---
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/src/cox.jl b/src/cox.jl
index 44ffcbf..61bfeab 100644
--- a/src/cox.jl
+++ b/src/cox.jl
@@ -78,13 +78,12 @@ end
 
 # Structure of Cox regression output
 
-struct CoxModel{T<:Real} <: RegressionModel
+struct CoxModel{T<:Real,C<:Union{Cholesky{T},CholeskyPivoted{T}}} <: RegressionModel
     aux::CoxAux{T}
     β::Vector{T}
     loglik::T
     score::Vector{T}
-    fischer_info::Matrix{T}
-    vcov::Matrix{T}
+    chol::C
 end
 
 function StatsAPI.coeftable(obj::CoxModel)
@@ -119,7 +118,7 @@ StatsAPI.dof(obj::CoxModel) = length(coef(obj))
 
 StatsAPI.dof_residual(obj::CoxModel) = nobs(obj) - dof(obj)
 
-StatsAPI.vcov(obj::CoxModel) = obj.vcov
+StatsAPI.vcov(obj::CoxModel) = inv(obj.chol)
 
 StatsAPI.stderror(obj::CoxModel) = sqrt.(diag(vcov(obj)))
 
@@ -193,6 +192,19 @@ function _cox_fgh!(β, grad, hes, c::CoxAux{T}) where T
     return y
 end
 
+function _chol_me_maybe(A)
+    C = cholesky(A; check=false)
+    if issuccess(C)
+        return C
+    else
+        @static if VERSION >= v"1.8.0-DEV.1139"
+            return cholesky(A, RowMaximum())
+        else
+            return cholesky(A, Val(true))
+        end
+    end
+end
+
 _coxph(X::AbstractArray{<:Integer}, s::AbstractVector; tol, l2_cost) = _coxph(float(X), s; tol=tol, l2_cost=l2_cost)
 
 function _coxph(X::AbstractArray{T}, s::AbstractVector; l2_cost, tol) where T
@@ -201,8 +213,12 @@ function _coxph(X::AbstractArray{T}, s::AbstractVector; l2_cost, tol) where T
     β₀ = zeros(R, size(X, 2))
     fgh! = TwiceDifferentiable(Optim.only_fgh!((f, G, H, x)->_cox_fgh!(x, G, H, c)), β₀)
     res = optimize(fgh!, β₀, NewtonTrustRegion(), Optim.Options(g_tol = tol))
-    β, neg_ll, grad, hes = Optim.minimizer(res), Optim.minimum(res), Optim.gradient(fgh!), Optim.hessian(fgh!)
-    return CoxModel{R}(c, β, -neg_ll, -grad, hes, pinv(hes))
+    β = Optim.minimizer(res)
+    neg_ll = minimum(res)
+    grad = Optim.gradient(fgh!)
+    hes = Symmetric(Optim.hessian(fgh!))
+    chol = _chol_me_maybe(hes)
+    return CoxModel{R,typeof(chol)}(c, β, -neg_ll, -grad, chol)
 end
 
 StatsModels.drop_intercept(::Type{CoxModel}) = true
diff --git a/test/runtests.jl b/test/runtests.jl
index 94e2e46..5ff619d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -201,7 +201,7 @@ end
     @test modelmatrix(outcome) == modelmatrix(outcome_without_formula)
 
     @test sprint(show, outcome_without_formula) == """
-CoxModel{Float64}
+CoxModel{Float64, Cholesky{Float64, Matrix{Float64}}}
 
 Coefficients:
 ──────────────────────────────────────────────
@@ -241,8 +241,9 @@ x7   0.0914971  0.0286485   3.19378     0.0014
     @test dof(outcome) == 7
     @test dof_residual(outcome) == 425
     @test loglikelihood(outcome) > nullloglikelihood(outcome)
-    @test all(x->x > 0, eigen(outcome.model.fischer_info).values)
-    @test outcome.model.fischer_info * vcov(outcome) ≈ I atol=1e-10
+    fisher_info = Matrix(outcome.model.chol)
+    @test all(x->x > 0, eigen(fisher_info).values)
+    @test fisher_info * vcov(outcome) ≈ I atol=1e-10
     @test norm(outcome.model.score) < 1e-5
     @test hcat(outcome_coefmat.cols[1:3]...) ≈ expected_coefs[:,1:3] atol=1e-5