From 2146c2c8fa9cea381056c0565789d070c53c126d Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Mon, 18 Jul 2022 18:25:40 -0700 Subject: [PATCH] Extend functions from StatsAPI instead of StatsBase (#42) Also define methods for `modelmatrix` and `dof_residual`. The direct dependency on StatsBase may actually be unnecessary now but I didn't bother to verify. --- Project.toml | 2 ++ docs/Project.toml | 1 + docs/make.jl | 2 +- docs/src/cox.md | 2 +- docs/src/km.md | 4 ++-- docs/src/na.md | 4 ++-- src/Survival.jl | 3 +++ src/cox.jl | 23 ++++++++++++++--------- src/estimator.jl | 8 ++++---- src/kaplanmeier.jl | 6 +++--- src/nelsonaalen.jl | 6 +++--- test/runtests.jl | 6 ++++++ 12 files changed, 42 insertions(+), 25 deletions(-) diff --git a/Project.toml b/Project.toml index 062ac55..934359f 100644 --- a/Project.toml +++ b/Project.toml @@ -7,6 +7,7 @@ Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Optim = "429524aa-4258-5aef-a3af-852621145aeb" +StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" @@ -16,6 +17,7 @@ Compat = "3.43, 4" DataFrames = "1" Distributions = "0.20, 0.21, 0.22, 0.23, 0.24, 0.25" Optim = "1" +StatsAPI = "1" StatsBase = "0.30, 0.31, 0.32, 0.33" StatsModels = "0.6" julia = "1.6" diff --git a/docs/Project.toml b/docs/Project.toml index 00d07c1..97e4e6b 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,5 +1,6 @@ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Survival = "8a913413-2070-5976-9d4c-2b364fdc2f7f" diff --git a/docs/make.jl b/docs/make.jl index 4c08352..5a39886 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,4 +1,4 @@ -using Survival, Documenter, StatsBase +using Survival, Documenter, StatsBase, StatsAPI makedocs( modules = [Survival], diff --git a/docs/src/cox.md b/docs/src/cox.md index 444d968..40fcb8a 100644 --- a/docs/src/cox.md +++ b/docs/src/cox.md @@ -16,7 +16,7 @@ the vector of coefficients in the model. ## API ```@docs -StatsBase.fit(::Type{CoxModel}, M::AbstractMatrix, y::AbstractVector; kwargs...) +StatsAPI.fit(::Type{CoxModel}, M::AbstractMatrix, y::AbstractVector; kwargs...) ``` ## References diff --git a/docs/src/km.md b/docs/src/km.md index 01f31dc..3c7db40 100644 --- a/docs/src/km.md +++ b/docs/src/km.md @@ -24,8 +24,8 @@ using Greenwood's formula: ```@docs Survival.KaplanMeier -StatsBase.fit(::Type{KaplanMeier}, ::Any, ::Any) -StatsBase.confint(::KaplanMeier, ::Float64) +StatsAPI.fit(::Type{KaplanMeier}, ::Any, ::Any) +StatsAPI.confint(::KaplanMeier, ::Float64) ``` ## References diff --git a/docs/src/na.md b/docs/src/na.md index f0cc2d9..5b32258 100644 --- a/docs/src/na.md +++ b/docs/src/na.md @@ -24,8 +24,8 @@ from ``n_i`` samples: ```@docs Survival.NelsonAalen -StatsBase.fit(::Type{NelsonAalen}, ::Any, ::Any) -StatsBase.confint(::NelsonAalen, ::Float64) +StatsAPI.fit(::Type{NelsonAalen}, ::Any, ::Any) +StatsAPI.confint(::NelsonAalen, ::Float64) ``` ## References diff --git a/src/Survival.jl b/src/Survival.jl index 0f1f948..c5ec85f 100644 --- a/src/Survival.jl +++ b/src/Survival.jl @@ -4,6 +4,7 @@ using Compat using Distributions using LinearAlgebra using Optim +using StatsAPI using StatsBase using StatsModels @@ -21,10 +22,12 @@ export CoxModel, coxph, coef, + modelmatrix, loglikelihood, nullloglikelihood, nobs, dof, + dof_residual, vcov, stderror diff --git a/src/cox.jl b/src/cox.jl index a43eb34..44ffcbf 100644 --- a/src/cox.jl +++ b/src/cox.jl @@ -87,7 +87,7 @@ struct CoxModel{T<:Real} <: RegressionModel vcov::Matrix{T} end -function StatsBase.coeftable(obj::CoxModel) +function StatsAPI.coeftable(obj::CoxModel) β = coef(obj) se = stderror(obj) z_score = β ./ se @@ -105,18 +105,23 @@ function Base.show(io::IO, model::CoxModel) show(io, ct) end -StatsBase.coef(obj::CoxModel) = obj.β +StatsAPI.coef(obj::CoxModel) = obj.β -StatsBase.loglikelihood(obj::CoxModel) = obj.loglik -StatsBase.nullloglikelihood(obj::CoxModel{T}) where {T} = -_cox_f(obj.β * zero(T), obj.aux) +StatsAPI.modelmatrix(obj::CoxModel) = obj.aux.X -StatsBase.nobs(obj::CoxModel) = size(obj.aux.X, 1) +StatsAPI.loglikelihood(obj::CoxModel) = obj.loglik -StatsBase.dof(obj::CoxModel) = length(obj.β) +StatsAPI.nullloglikelihood(obj::CoxModel) = -_cox_f(zero(coef(obj)), obj.aux) -StatsBase.vcov(obj::CoxModel) = obj.vcov +StatsAPI.nobs(obj::CoxModel) = size(modelmatrix(obj), 1) -StatsBase.stderror(obj::CoxModel) = sqrt.(diag(vcov(obj))) +StatsAPI.dof(obj::CoxModel) = length(coef(obj)) + +StatsAPI.dof_residual(obj::CoxModel) = nobs(obj) - dof(obj) + +StatsAPI.vcov(obj::CoxModel) = obj.vcov + +StatsAPI.stderror(obj::CoxModel) = sqrt.(diag(vcov(obj))) #compute negative loglikelihood @@ -209,7 +214,7 @@ Given a matrix `M` of predictors and a corresponding vector of events, compute t Cox proportional hazard model estimate of coefficients. Returns a `CoxModel` object. """ -function StatsBase.fit(::Type{CoxModel}, M::AbstractMatrix, y::AbstractVector; tol=1e-4, l2_cost=0) +function StatsAPI.fit(::Type{CoxModel}, M::AbstractMatrix, y::AbstractVector; tol=1e-4, l2_cost=0) index_perm = sortperm(y) X = M[index_perm,:] s = y[index_perm] diff --git a/src/estimator.jl b/src/estimator.jl index 553860a..9fe9ef9 100644 --- a/src/estimator.jl +++ b/src/estimator.jl @@ -75,9 +75,9 @@ function _estimator(::Type{S}, ets::AbstractVector{EventTime{T}}) where {S,T} return S{T}(times, nevents, ncensor, natrisk, estimator, stderr) end -function StatsBase.fit(::Type{S}, - times::AbstractVector{T}, - status::AbstractVector{<:Integer}) where {S<:NonparametricEstimator,T} +function StatsAPI.fit(::Type{S}, + times::AbstractVector{T}, + status::AbstractVector{<:Integer}) where {S<:NonparametricEstimator,T} ntimes = length(times) nstatus = length(status) if ntimes != nstatus @@ -87,7 +87,7 @@ function StatsBase.fit(::Type{S}, return fit(S, map(EventTime{T}, times, status)) end -function StatsBase.fit(::Type{S}, ets::AbstractVector{<:EventTime}) where S<:NonparametricEstimator +function StatsAPI.fit(::Type{S}, ets::AbstractVector{<:EventTime}) where S<:NonparametricEstimator isempty(ets) && throw(ArgumentError("can't compute $(nameof(S)) from 0 observations")) return _estimator(S, issorted(ets) ? ets : sort(ets)) end diff --git a/src/kaplanmeier.jl b/src/kaplanmeier.jl index 3a2f0f1..f3fda0c 100644 --- a/src/kaplanmeier.jl +++ b/src/kaplanmeier.jl @@ -36,7 +36,7 @@ stderr_update(::Type{KaplanMeier}, gw, dᵢ, nᵢ) = gw + dᵢ / (nᵢ * (nᵢ - Compute the pointwise log-log transformed confidence intervals for the survivor function as a vector of tuples. """ -function StatsBase.confint(km::KaplanMeier, α::Float64=0.05) +function StatsAPI.confint(km::KaplanMeier, α::Float64=0.05) q = quantile(Normal(), 1 - α/2) return map(km.survival, km.stderr) do srv, se l = log(-log(srv)) @@ -52,7 +52,7 @@ Given a vector of times to events and a corresponding vector of indicators that denote whether each time is an observed event or is right censored, compute the Kaplan-Meier estimate of the survivor function. """ -StatsBase.fit(::Type{KaplanMeier}, times, status) +StatsAPI.fit(::Type{KaplanMeier}, times, status) """ fit(KaplanMeier, ets) -> KaplanMeier @@ -60,4 +60,4 @@ StatsBase.fit(::Type{KaplanMeier}, times, status) Compute the Kaplan-Meier estimate of the survivor function from a vector of [`EventTime`](@ref) values. """ -StatsBase.fit(::Type{KaplanMeier}, ets) +StatsAPI.fit(::Type{KaplanMeier}, ets) diff --git a/src/nelsonaalen.jl b/src/nelsonaalen.jl index c3107e0..2daf7fc 100644 --- a/src/nelsonaalen.jl +++ b/src/nelsonaalen.jl @@ -35,7 +35,7 @@ stderr_update(::Type{NelsonAalen}, gw, dᵢ, nᵢ) = gw + dᵢ * (nᵢ - dᵢ) / Compute the pointwise confidence intervals for the cumulative hazard function as a vector of tuples. """ -function StatsBase.confint(na::NelsonAalen, α::Float64=0.05) +function StatsAPI.confint(na::NelsonAalen, α::Float64=0.05) q = quantile(Normal(), 1 - α/2) return map(na.chaz, na.stderr) do srv, se srv - q * se, srv + q * se @@ -49,7 +49,7 @@ Given a vector of times to events and a corresponding vector of indicators that denote whether each time is an observed event or is right censored, compute the Nelson-Aalen estimate of the cumulative hazard rate function. """ -StatsBase.fit(::Type{NelsonAalen}, times, status) +StatsAPI.fit(::Type{NelsonAalen}, times, status) """ fit(NelsonAalen, ets) -> NelsonAalen @@ -57,4 +57,4 @@ StatsBase.fit(::Type{NelsonAalen}, times, status) Compute the Nelson-Aalen estimate of the cumulative hazard rate function from a vector of [`EventTime`](@ref) values. """ -StatsBase.fit(::Type{NelsonAalen}, ets) +StatsAPI.fit(::Type{NelsonAalen}, ets) diff --git a/test/runtests.jl b/test/runtests.jl index b17c8eb..94e2e46 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -198,6 +198,8 @@ end outcome_without_formula = coxph(regressor_matrix, event_vector) + @test modelmatrix(outcome) == modelmatrix(outcome_without_formula) + @test sprint(show, outcome_without_formula) == """ CoxModel{Float64} @@ -215,6 +217,9 @@ x7 0.0914971 0.0286485 3.19378 0.0014 ──────────────────────────────────────────────""" coef_matrix = ModelMatrix(ModelFrame(@formula(event ~ 0 + fin + age + race + wexp + mar + paro + prio), rossi)).m + + @test modelmatrix(outcome) == coef_matrix[sortperm(event_vector), :] + outcome_from_matrix = coxph(coef_matrix, rossi.event; tol=1e-8, l2_cost=0) outcome_from_matrix32 = coxph(Float32.(coef_matrix), rossi.event; tol=1e-5) outcome_from_matrix_int = coxph(Int64.(coef_matrix), rossi.event; tol=1e-6, l2_cost=0.0) @@ -234,6 +239,7 @@ x7 0.0914971 0.0286485 3.19378 0.0014 @test coef(outcome_from_matrix) ≈ coef(outcome_from_matrix_int) atol=1e-5 @test nobs(outcome) == size(rossi, 1) @test dof(outcome) == 7 + @test dof_residual(outcome) == 425 @test loglikelihood(outcome) > nullloglikelihood(outcome) @test all(x->x > 0, eigen(outcome.model.fischer_info).values) @test outcome.model.fischer_info * vcov(outcome) ≈ I atol=1e-10