From 668cc433cbde2174c2f9e02150ea06830edc2114 Mon Sep 17 00:00:00 2001
From: tiemvanderdeure <tiemvanderdeure@gmail.com>
Date: Wed, 14 Feb 2024 17:20:04 +0100
Subject: [PATCH 1/7] remove matrix data, interface with glm more directly

---
 src/MLJGLMInterface.jl | 139 ++++++++++++++---------------------------
 1 file changed, 47 insertions(+), 92 deletions(-)

diff --git a/src/MLJGLMInterface.jl b/src/MLJGLMInterface.jl
index 7a682e3..1869f31 100644
--- a/src/MLJGLMInterface.jl
+++ b/src/MLJGLMInterface.jl
@@ -97,20 +97,6 @@ const GLM_MODELS = Union{
 ## Helper functions
 ###
 
-"""
-    augment_X(X::AbstractMatrix, b::Bool)
-
-Augment the matrix `X` with a column of ones if the intercept should
-be fitted (`b==true`) and return `X` otherwise.
-"""
-function augment_X(X::AbstractMatrix, b::Bool)
-    if b
-        return hcat(X, ones(float(Int), size(X, 1), 1))
-    else
-        return X
-    end
-end
-
 _to_vector(v::Vector) = v
 _to_vector(v) = collect(v)
 _to_array(v::AbstractArray) = v
@@ -203,38 +189,15 @@ function check_sample_size(model, n, p)
     return nothing
 end
 
-function _matrix_and_features(model, Xcols, handle_intercept=false)
-    col_names = Tables.columnnames(Xcols)
-    n, p = Tables.rowcount(Xcols), length(col_names)
-    augment = handle_intercept && model.fit_intercept
-
-    if !handle_intercept # i.e This only runs during `fit`
-        check_sample_size(model, n, p)
-    end
-
-    if p == 0
-        Xmatrix = Matrix{float(Int)}(undef, n, p)
-    else
-        Xmatrix = Tables.matrix(Xcols)
-    end
-
-    Xmatrix = augment_X(Xmatrix, augment)
-
-    return Xmatrix, col_names
-end
-
-_to_columns(t::Tables.AbstractColumns) = t
-_to_columns(t) = Tables.Columns(t)
-
 """
     prepare_inputs(model, X; handle_intercept=false)
 
 Handle `model.offsetcol` and `model.fit_intercept` if `handle_intercept=true`.
 `handle_intercept` is disabled for fitting since the StatsModels.@formula handles the intercept.
 """
-function prepare_inputs(model, X; handle_intercept=false)
-    Xcols = _to_columns(X)
-    table_features = Tables.columnnames(Xcols)
+function prepare_inputs(model, X)
+    Xcols = Tables.columntable(X)
+    table_features = Base.keys(Xcols)
     p = length(table_features)
     p >= 1 || throw(
         ArgumentError("`X` must contain at least one feature column.")
@@ -253,9 +216,12 @@ function prepare_inputs(model, X; handle_intercept=false)
         end
     end
     Xminoffset, offset = split_X_offset(Xcols, model.offsetcol)
-    Xminoffset_cols = _to_columns(Xminoffset)
-    Xmatrix, features = _matrix_and_features(model, Xminoffset_cols , handle_intercept)
-    return Xmatrix, offset, _to_array(features)
+    features = Tables.columnnames(Xminoffset)
+
+    @show first(Xminoffset)
+    check_sample_size(model, length(first(Xminoffset)), p)
+
+    return Xminoffset, offset, _to_array(features)
 end
 
 """
@@ -285,14 +251,6 @@ function glm_report(glm_model, features, reportkeys)
     end
     if :coef_table in reportkeys
         coef_table = GLM.coeftable(glm_model)
-        # Update the variable names in the `coef_table` with the actual variable
-        # names seen during fit.
-        if length(coef_table.rownms) == length(features)
-            # This means `fit_intercept` is false
-            coef_table.rownms = string.(features)
-        else
-            coef_table.rownms = [string.(features); "(Intercept)"]
-        end
         report_dict[:coef_table] = coef_table
     end
     if :glm_model in reportkeys
@@ -376,31 +334,31 @@ end
 dispersion(fr::FitResult) = fr.dispersion
 params(fr::FitResult) = fr.params
 
+
+
 function MMI.fit(model::LinearRegressor, verbosity::Int, X, y, w=nothing)
     # apply the model
-    Xmatrix, offset, features = prepare_inputs(model, X)
+    X_col_table, offset, features = prepare_inputs(model, X)
     y_ = isempty(offset) ? y : y .- offset
     wts = check_weights(w, y_)
-    data = glm_data(model, Xmatrix, y_, features)
+    data = merge(X_col_table, (; y = y_))
     form = glm_formula(model, features)
-    fitted_lm = GLM.lm(form, data; model.dropcollinear, wts).model
-    fitresult = FitResult(
-        GLM.coef(fitted_lm), GLM.dispersion(fitted_lm), (features = features,)
-    )
+    fitted_lm = GLM.lm(form, data; model.dropcollinear, wts)
+
     # form the report
     report = glm_report(fitted_lm, features, model.report_keys)
     cache = nothing
     # return
-    return fitresult, cache, report
+    return fitted_lm, cache, report
 end
 
 function MMI.fit(model::LinearCountRegressor, verbosity::Int, X, y, w=nothing)
     # apply the model
-    Xmatrix, offset, features = prepare_inputs(model, X)
-    data = glm_data(model, Xmatrix, y, features)
+    X_col_table, offset, features = prepare_inputs(model, X)
+    data = merge(X_col_table, (; y))
     wts = check_weights(w, y)
     form = glm_formula(model, features)
-    fitted_glm_frame = GLM.glm(
+    fitted_glm = GLM.glm(
         form, data, model.distribution, model.link;
         offset,
         model.maxiter,
@@ -409,26 +367,23 @@ function MMI.fit(model::LinearCountRegressor, verbosity::Int, X, y, w=nothing)
         model.minstepfac,
         wts
     )
-    fitted_glm = fitted_glm_frame.model
-    fitresult = FitResult(
-        GLM.coef(fitted_glm), GLM.dispersion(fitted_glm), (features = features,)
-    )
+
     # form the report
     report = glm_report(fitted_glm, features, model.report_keys)
     cache = nothing
     # return
-    return fitresult, cache, report
+    return fitted_glm, cache, report
 end
 
 function MMI.fit(model::LinearBinaryClassifier, verbosity::Int, X, y, w=nothing)
     # apply the model
-    decode = y[1]
+    decode = MMI.classes(y)
     y_plain = MMI.int(y) .- 1 # 0, 1 of type Int
     wts = check_weights(w, y_plain)
-    Xmatrix, offset, features = prepare_inputs(model, X)
-    data = glm_data(model, Xmatrix, y_plain, features)
+    X_col_table, offset, features = prepare_inputs(model, X)
+    data = merge(X_col_table, (; y = y_plain))
     form = glm_formula(model, features)
-    fitted_glm_frame = GLM.glm(
+    fitted_glm = GLM.glm(
         form, data, Bernoulli(), model.link;
         offset,
         model.maxiter,
@@ -437,27 +392,22 @@ function MMI.fit(model::LinearBinaryClassifier, verbosity::Int, X, y, w=nothing)
         model.minstepfac,
         wts
     )
-    fitted_glm = fitted_glm_frame.model
-    fitresult = FitResult(
-        GLM.coef(fitted_glm), GLM.dispersion(fitted_glm), (features = features,)
-    )
+
     # form the report
     report = glm_report(fitted_glm, features, model.report_keys)
     cache = nothing
     # return
-    return (fitresult, decode), cache, report
+    return (fitted_glm, decode), cache, report
 end
 
 glm_fitresult(::LinearRegressor, fitresult) = fitresult
 glm_fitresult(::LinearCountRegressor, fitresult) = fitresult
 glm_fitresult(::LinearBinaryClassifier, fitresult) = fitresult[1]
 
-coefs(fr::FitResult) = fr.coefs
-
 function MMI.fitted_params(model::GLM_MODELS, fitresult)
     result = glm_fitresult(model, fitresult)
-    coef = coefs(result)
-    features = copy(params(result).features)
+    coef = GLM.coef(result)
+    features = Symbol.(filter(x -> x!="(Intercept)", GLM.coefnames(result)))
     if model.fit_intercept
         intercept = coef[end]
         coef_ = coef[1:end-1]
@@ -468,11 +418,9 @@ function MMI.fitted_params(model::GLM_MODELS, fitresult)
     return (; features, coef=coef_, intercept)
 end
 
-
 ####
 #### PREDICT FUNCTIONS
 ####
-
 glm_link(model) = model.link
 glm_link(::LinearRegressor) = GLM.IdentityLink()
 
@@ -493,16 +441,22 @@ function MMI.predict_mean(model::GLM_MODELS, fitresult, Xnew)
     return glm_predict(link, coef, Xmatrix, model.offsetcol, offset)
 end
 
-# barrier function to aid performance
-function glm_predict(link, coef, Xmatrix, offsetcol, offset)
-    η = offsetcol === nothing ? (Xmatrix * coef) : (Xmatrix * coef .+ offset)
-    μ = GLM.linkinv.(link, η)
-    return μ
+
+# More efficient fallback. mean is not defined for LinearBinaryClassifier
+function MMI.predict_mean(model::LinearRegressor, fitresult, Xnew) 
+    X_col_table, offset, features = prepare_inputs(model, Xnew)
+    p = GLM.predict(fitresult, X_col_table)
+    isempty(offset) ? p : p .+ offset
+end
+
+function MMI.predict_mean(model::LinearCountRegressor, fitresult, Xnew) 
+    X_col_table, offset, features = prepare_inputs(model, Xnew)
+    return GLM.predict(fitresult, X_col_table; offset = offset)
 end
 
 function MMI.predict(model::LinearRegressor, fitresult, Xnew)
     μ = MMI.predict_mean(model, fitresult, Xnew)
-    σ̂ = dispersion(fitresult)
+    σ̂ = GLM.dispersion(fitresult.model)
     return [GLM.Normal(μᵢ, σ̂) for μᵢ ∈ μ]
 end
 
@@ -512,8 +466,9 @@ function MMI.predict(model::LinearCountRegressor, fitresult, Xnew)
 end
 
 function MMI.predict(model::LinearBinaryClassifier, (fitresult, decode), Xnew)
-    π = MMI.predict_mean(model, (fitresult, decode), Xnew)
-    return MMI.UnivariateFinite(MMI.classes(decode), π, augment=true)
+    X_col_table, offset, features = prepare_inputs(model, Xnew)
+    π = GLM.predict(fitresult, X_col_table; offset)
+    return MMI.UnivariateFinite(decode, π, augment=true)
 end
 
 # NOTE: predict_mode uses MLJBase's fallback
@@ -539,7 +494,7 @@ metadata_pkg.(
 
 metadata_model(
     LinearRegressor,
-    input = Table(Continuous),
+    input = Table(Continuous, Finite),
     target = AbstractVector{Continuous},
     supports_weights = true,
     path = "$PKG.LinearRegressor"
@@ -547,7 +502,7 @@ metadata_model(
 
 metadata_model(
     LinearBinaryClassifier,
-    input = Table(Continuous),
+    input = Table(Continuous, Finite),
     target = AbstractVector{<:Finite{2}},
     supports_weights = true,
     path = "$PKG.LinearBinaryClassifier"
@@ -555,7 +510,7 @@ metadata_model(
 
 metadata_model(
     LinearCountRegressor,
-    input = Table(Continuous),
+    input = Table(Continuous, Finite),
     target = AbstractVector{Count},
     supports_weights = true,
     path = "$PKG.LinearCountRegressor"

From 068ba59f5f3c60900c18abf358b1e392adc74b73 Mon Sep 17 00:00:00 2001
From: tiemvanderdeure <tiemvanderdeure@gmail.com>
Date: Wed, 14 Feb 2024 17:20:10 +0100
Subject: [PATCH 2/7] tweak the tests

---
 test/runtests.jl | 52 ++++++++++++++++++++++--------------------------
 1 file changed, 24 insertions(+), 28 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index a266fc6..6df6da7 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -49,7 +49,7 @@ expit(X) = 1 ./ (1 .+ exp.(-X))
 
     # test `predict` object
     p_distr = predict(atom_ols, fitresult, selectrows(X, test))
-    dispersion =  MLJGLMInterface.dispersion(fitresult)
+    dispersion =  GLM.dispersion(fitresult.model)
     @test p_distr[1] == Normal(p[1], dispersion)
 
     # test metadata
@@ -103,7 +103,7 @@ end
 
     # check predict on `Xnew` with wrong dims
     Xnew = MLJBase.table(Tables.matrix(X)[:, 1:3], names=Tables.columnnames(X)[1:3])
-    @test_throws DimensionMismatch predict(lr, fitresult, Xnew)
+    @test_throws ErrorException predict(lr, fitresult, Xnew)
 
     fitted_params(pr, fitresult)
 
@@ -158,7 +158,7 @@ end
     Xnew = MLJBase.table(
         Tables.matrix(XTable)[:, 1:3], names=Tables.columnnames(XTable)[1:3]
     )
-    @test_throws DimensionMismatch predict(lcr, fitresult, Xnew)
+    @test_throws ErrorException predict(lcr, fitresult, Xnew)
 
     # Test metadata
     model = lcr
@@ -194,31 +194,14 @@ modeltypes = [LinearRegressor, LinearBinaryClassifier, LinearCountRegressor]
         end
 
     end
-
-    @testset "intercept/offsetcol" for mt in modeltypes
-            X = (x1=[1,2,3], x2=[4,5,6])
-            m = mt(fit_intercept=true, offsetcol=:x2)
-            r = MLJGLMInterface.prepare_inputs(m, X; handle_intercept=true)
-            Xmatrix, offset, features = r
-            @test offset == [4, 5, 6]
-            @test Xmatrix== [1 1;
-                             2 1;
-                             3 1]
-            @test features == [:x1]
-
-            m1 = mt(fit_intercept=true, offsetcol=:x3)
-            @test_throws ArgumentError MLJGLMInterface.prepare_inputs(m1, X)
-    end
-
+    
     @testset "no intercept/no offsetcol" for mt in modeltypes
         X = (x1=[1,2,3], x2=[4,5,6])
         m = mt(fit_intercept=false)
-        r = MLJGLMInterface.prepare_inputs(m, X; handle_intercept=true)
-        Xmatrix, offset, features = r
+        r = MLJGLMInterface.prepare_inputs(m, X)
+        Xcols, offset, features = r
         @test offset == []
-        @test Xmatrix == [1 4;
-                          2 5;
-                          3 6]
+        @test Xcols == (x1 = [1, 2, 3], x2 = [4, 5, 6])
         @test features == [:x1, :x2]
 
         X1 = NamedTuple()
@@ -228,10 +211,10 @@ modeltypes = [LinearRegressor, LinearBinaryClassifier, LinearCountRegressor]
     @testset "offsetcol but no intercept" for mt in modeltypes
         X = (x1=[1,2,3], x2=[4,5,6])
         m = mt(offsetcol=:x1, fit_intercept=false)
-        Xmatrix, offset, features = MLJGLMInterface.prepare_inputs(m, X)
+        Xcols, offset, features = MLJGLMInterface.prepare_inputs(m, X)
 
         @test offset == [1, 2, 3]
-        @test Xmatrix == permutedims([4 5 6])
+        @test Xcols == (x2 = [4, 5, 6],)
         @test features == [:x2]
 
         # throw error for tables with just one column.
@@ -343,7 +326,7 @@ end
     @test :stderror in keys(report)
     @test :dof_residual ∉ keys(report)
     @test :glm_model in keys(report)
-    @test report.glm_model isa GLM.GeneralizedLinearModel
+    @test report.glm_model.model isa GLM.GeneralizedLinearModel
 
     # check that an empty `NamedTuple` is outputed for
     # `report_params === nothing`
@@ -352,8 +335,21 @@ end
     @test report === NamedTuple()
 end
 
+@testset "Categorical predictors" begin
+    X = (x1=[1.,2.,3.], x2 = categorical([0,1,0]))
+    y = categorical([false, false, true])
+
+    mach = machine(LinearBinaryClassifier(), X, y)
+    fit!(mach)
+
+    fp = fitted_params(mach)
+
+    @test fp.features == Symbol.(["x1", "x2: 0", "x2: 1"])
+    @test_throws KeyError predict(mach, (x1 = [2,3,4], x2 = categorical([0,1,2])))
+    @test all(isapprox.(pdf.(predict(mach, X), true), [0,0,1], atol = 1e-3))
+end
+
 @testset "Issue 27" begin
-    @inferred MLJGLMInterface.augment_X(rand(2, 2), false)
     n, p = (100, 2)
     Xmat = rand(p, n)' # note using adjoint
     X = MLJBase.table(Xmat)

From 4e05569cc401805dbf5b5f796ff8cf1d2704a5b2 Mon Sep 17 00:00:00 2001
From: tiemvanderdeure <tiemvanderdeure@gmail.com>
Date: Thu, 15 Feb 2024 10:23:02 +0100
Subject: [PATCH 3/7] add FitResult again

---
 src/MLJGLMInterface.jl | 75 +++++++++++++++++++++---------------------
 test/runtests.jl       |  4 +--
 2 files changed, 39 insertions(+), 40 deletions(-)

diff --git a/src/MLJGLMInterface.jl b/src/MLJGLMInterface.jl
index 1869f31..cad5b59 100644
--- a/src/MLJGLMInterface.jl
+++ b/src/MLJGLMInterface.jl
@@ -18,7 +18,7 @@ import MLJModelInterface
 import MLJModelInterface: metadata_pkg, metadata_model, Table, Continuous, Count, Finite,
     OrderedFactor, Multiclass, @mlj_model
 using Distributions: Bernoulli, Distribution, Poisson
-using StatsModels: ConstantTerm, Term, FormulaTerm, term
+using StatsModels: ConstantTerm, Term, FormulaTerm, term, modelcols
 using Tables
 import GLM
 
@@ -218,7 +218,6 @@ function prepare_inputs(model, X)
     Xminoffset, offset = split_X_offset(Xcols, model.offsetcol)
     features = Tables.columnnames(Xminoffset)
 
-    @show first(Xminoffset)
     check_sample_size(model, length(first(Xminoffset)), p)
 
     return Xminoffset, offset, _to_array(features)
@@ -322,7 +321,9 @@ end
 #### FIT FUNCTIONS
 ####
 
-struct FitResult{V<:AbstractVector, T, R}
+struct FitResult{F, V<:AbstractVector, T, R}
+    "Formula containing all coefficients and their types"
+    formula::F
     "Vector containg coeficients of the predictors and intercept"
     coefs::V
     "An estimate of the dispersion parameter of the glm model. "
@@ -331,10 +332,11 @@ struct FitResult{V<:AbstractVector, T, R}
     params::R
 end
 
+FitResult(fitted_glm, features) = FitResult(GLM.formula(fitted_glm), GLM.coef(fitted_glm), GLM.dispersion(fitted_glm.model), (features = features,))
+
 dispersion(fr::FitResult) = fr.dispersion
 params(fr::FitResult) = fr.params
-
-
+coefs(fr::FitResult) = fr.coefs
 
 function MMI.fit(model::LinearRegressor, verbosity::Int, X, y, w=nothing)
     # apply the model
@@ -345,11 +347,13 @@ function MMI.fit(model::LinearRegressor, verbosity::Int, X, y, w=nothing)
     form = glm_formula(model, features)
     fitted_lm = GLM.lm(form, data; model.dropcollinear, wts)
 
+    fitresult = FitResult(fitted_lm, features)
+
     # form the report
     report = glm_report(fitted_lm, features, model.report_keys)
     cache = nothing
     # return
-    return fitted_lm, cache, report
+    return fitresult, cache, report
 end
 
 function MMI.fit(model::LinearCountRegressor, verbosity::Int, X, y, w=nothing)
@@ -368,11 +372,13 @@ function MMI.fit(model::LinearCountRegressor, verbosity::Int, X, y, w=nothing)
         wts
     )
 
+    fitresult = FitResult(fitted_glm, features)
+
     # form the report
     report = glm_report(fitted_glm, features, model.report_keys)
     cache = nothing
     # return
-    return fitted_glm, cache, report
+    return fitresult, cache, report
 end
 
 function MMI.fit(model::LinearBinaryClassifier, verbosity::Int, X, y, w=nothing)
@@ -393,21 +399,24 @@ function MMI.fit(model::LinearBinaryClassifier, verbosity::Int, X, y, w=nothing)
         wts
     )
 
+    fitresult = FitResult(fitted_glm, features)
+
     # form the report
     report = glm_report(fitted_glm, features, model.report_keys)
     cache = nothing
     # return
-    return (fitted_glm, decode), cache, report
+    return (fitresult, decode), cache, report
 end
 
 glm_fitresult(::LinearRegressor, fitresult) = fitresult
 glm_fitresult(::LinearCountRegressor, fitresult) = fitresult
 glm_fitresult(::LinearBinaryClassifier, fitresult) = fitresult[1]
 
+
 function MMI.fitted_params(model::GLM_MODELS, fitresult)
     result = glm_fitresult(model, fitresult)
-    coef = GLM.coef(result)
-    features = Symbol.(filter(x -> x!="(Intercept)", GLM.coefnames(result)))
+    coef = coefs(result)
+    features = copy(params(result).features)
     if model.fit_intercept
         intercept = coef[end]
         coef_ = coef[1:end-1]
@@ -424,39 +433,30 @@ end
 glm_link(model) = model.link
 glm_link(::LinearRegressor) = GLM.IdentityLink()
 
-# more efficient than MLJBase fallback
-function MMI.predict_mean(model::GLM_MODELS, fitresult, Xnew)
-    Xmatrix, offset, _ = prepare_inputs(model, Xnew; handle_intercept=true)
-    result = glm_fitresult(model, fitresult) # ::FitResult
-    coef = coefs(result)
-    p = size(Xmatrix, 2)
-    if p != length(coef)
-        throw(
-            DimensionMismatch(
-                "The number of features in training and prediction datasets must be equal"
-            )
-        )
-    end
-    link = glm_link(model)
-    return glm_predict(link, coef, Xmatrix, model.offsetcol, offset)
+function glm_predict(link, terms, coef, offsetcol::Nothing, Xnew)
+    mm = modelcols(terms, Xnew)
+    η = mm * coef
+    μ = GLM.linkinv.(link, η)
+    return μ
 end
 
-
-# More efficient fallback. mean is not defined for LinearBinaryClassifier
-function MMI.predict_mean(model::LinearRegressor, fitresult, Xnew) 
-    X_col_table, offset, features = prepare_inputs(model, Xnew)
-    p = GLM.predict(fitresult, X_col_table)
-    isempty(offset) ? p : p .+ offset
+function glm_predict(link, terms, coef, offsetcol::Symbol, Xnew)
+    mm = modelcols(terms, Xnew)
+    offset = Tables.getcolumn(Xnew, offsetcol)
+    η = mm * coef .+ offset
+    μ = GLM.linkinv.(link, η)
+    return μ
 end
 
-function MMI.predict_mean(model::LinearCountRegressor, fitresult, Xnew) 
-    X_col_table, offset, features = prepare_inputs(model, Xnew)
-    return GLM.predict(fitresult, X_col_table; offset = offset)
+# More efficient fallback. predict_mean is not defined for LinearBinaryClassifier
+function MMI.predict_mean(model::Union{LinearRegressor, LinearCountRegressor}, fitresult, Xnew) 
+    p = glm_predict(glm_link(model), fitresult.formula.rhs, fitresult.coefs, model.offsetcol, Xnew)
+    return p
 end
 
 function MMI.predict(model::LinearRegressor, fitresult, Xnew)
     μ = MMI.predict_mean(model, fitresult, Xnew)
-    σ̂ = GLM.dispersion(fitresult.model)
+    σ̂ = dispersion(fitresult)
     return [GLM.Normal(μᵢ, σ̂) for μᵢ ∈ μ]
 end
 
@@ -466,9 +466,8 @@ function MMI.predict(model::LinearCountRegressor, fitresult, Xnew)
 end
 
 function MMI.predict(model::LinearBinaryClassifier, (fitresult, decode), Xnew)
-    X_col_table, offset, features = prepare_inputs(model, Xnew)
-    π = GLM.predict(fitresult, X_col_table; offset)
-    return MMI.UnivariateFinite(decode, π, augment=true)
+    p = glm_predict(glm_link(model), fitresult.formula.rhs, fitresult.coefs, model.offsetcol, Xnew)
+    return MMI.UnivariateFinite(decode, p, augment=true)
 end
 
 # NOTE: predict_mode uses MLJBase's fallback
diff --git a/test/runtests.jl b/test/runtests.jl
index 6df6da7..bb383f2 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -49,7 +49,7 @@ expit(X) = 1 ./ (1 .+ exp.(-X))
 
     # test `predict` object
     p_distr = predict(atom_ols, fitresult, selectrows(X, test))
-    dispersion =  GLM.dispersion(fitresult.model)
+    dispersion =  MLJGLMInterface.dispersion(fitresult)
     @test p_distr[1] == Normal(p[1], dispersion)
 
     # test metadata
@@ -344,7 +344,7 @@ end
 
     fp = fitted_params(mach)
 
-    @test fp.features == Symbol.(["x1", "x2: 0", "x2: 1"])
+    @test fp.features == [:x1, :x2]
     @test_throws KeyError predict(mach, (x1 = [2,3,4], x2 = categorical([0,1,2])))
     @test all(isapprox.(pdf.(predict(mach, X), true), [0,0,1], atol = 1e-3))
 end

From 8984846e4ed0123c493d57dead84b2a44f2d590c Mon Sep 17 00:00:00 2001
From: tiemvanderdeure <tiemvanderdeure@gmail.com>
Date: Fri, 16 Feb 2024 13:49:25 +0100
Subject: [PATCH 4/7] intercept as first term

---
 src/MLJGLMInterface.jl | 18 ++++--------------
 test/runtests.jl       |  9 ++-------
 2 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/src/MLJGLMInterface.jl b/src/MLJGLMInterface.jl
index cad5b59..a30d077 100644
--- a/src/MLJGLMInterface.jl
+++ b/src/MLJGLMInterface.jl
@@ -259,15 +259,6 @@ function glm_report(glm_model, features, reportkeys)
     return NamedTuple{Tuple(keys(report_dict))}(values(report_dict))
 end
 
-function _ensure_intercept_at_end(form::FormulaTerm)
-    fixed_rhs = if first(form.rhs) isa ConstantTerm
-        (form.rhs[2:end]..., form.rhs[1])
-    else
-        form.rhs
-    end
-    return FormulaTerm(form.lhs, fixed_rhs)
-end
-
 """
     glm_formula(model, features::AbstractVector{Symbol}) -> FormulaTerm
 
@@ -278,9 +269,8 @@ function glm_formula(model, features::AbstractVector{Symbol})::FormulaTerm
     # Adding a zero term explicitly disables the intercept.
     # See the StatsModels.jl tests for more information.
     intercept_term = model.fit_intercept ? 1 : 0
-    form = FormulaTerm(Term(:y), sum(term.(features)) + term(intercept_term))
-    fixed_form = _ensure_intercept_at_end(form)
-    return fixed_form
+    form = FormulaTerm(Term(:y), term(intercept_term) + sum(term.(features)))
+    return form
 end
 
 """
@@ -418,8 +408,8 @@ function MMI.fitted_params(model::GLM_MODELS, fitresult)
     coef = coefs(result)
     features = copy(params(result).features)
     if model.fit_intercept
-        intercept = coef[end]
-        coef_ = coef[1:end-1]
+        intercept = coef[1]
+        coef_ = coef[2:end]
     else
         intercept = zero(eltype(coef))
         coef_ = copy(coef)
diff --git a/test/runtests.jl b/test/runtests.jl
index bb383f2..f5ead7e 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -294,8 +294,8 @@ end
     fitresult, _, report = fit(lr, 1, X, y)
     ctable = last(report)
     parameters = ctable.rownms # Row names.
-    @test parameters == ["a", "b", "c", "(Intercept)"]
-    intercept = ctable.cols[1][4]
+    @test parameters == ["(Intercept)", "a", "b", "c"]
+    intercept = ctable.cols[1][1]
     yhat = predict(lr, fitresult, X)
     @test cross_entropy(yhat, y) < 0.6
 
@@ -359,8 +359,3 @@ end
     fit(lr, 1, X, y)
 end
 
-@testset "Issue 34" begin
-    model = LinearRegressor()
-    form = MLJGLMInterface.glm_formula(model, [:a, :b]) |> string
-    @test form == "y ~ a + b + 1"
-end

From 88da66166bdb81df415573bf82c2f1b2bd36f948 Mon Sep 17 00:00:00 2001
From: tiemvanderdeure <tiemvanderdeure@gmail.com>
Date: Fri, 16 Feb 2024 14:03:38 +0100
Subject: [PATCH 5/7] more tests for categorical variables

---
 test/runtests.jl | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index f5ead7e..22bccbf 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -336,17 +336,27 @@ end
 end
 
 @testset "Categorical predictors" begin
-    X = (x1=[1.,2.,3.], x2 = categorical([0,1,0]))
-    y = categorical([false, false, true])
+    X = (x1=[1.,2.,3.,4.], x2 = categorical([0,1,0,0]), x3 = categorical([1, 0, 1,0], ordered=true))
+    y = categorical([false, false, true, true])
 
     mach = machine(LinearBinaryClassifier(), X, y)
     fit!(mach)
-
     fp = fitted_params(mach)
 
-    @test fp.features == [:x1, :x2]
-    @test_throws KeyError predict(mach, (x1 = [2,3,4], x2 = categorical([0,1,2])))
-    @test all(isapprox.(pdf.(predict(mach, X), true), [0,0,1], atol = 1e-3))
+    @test fp.features == [:x1, :x2, :x3]
+    @test_throws KeyError predict(mach, (x1 = [2,3,4], x2 = categorical([0,1,2]), x3 = categorical([1,0,1], ordered=true)))
+    @test all(isapprox.(pdf.(predict(mach, X), true), [0,0,1,1], atol = 1e-3))
+
+    # only a categorical variable, with and without intercept
+    X2 = (; x = X.x2) 
+    y2 = [0., 2., 1., 2.]
+    fitresult, _, report = fit(LinearRegressor(), 1, X2, y2)
+    pred = predict_mean(LinearRegressor(), fitresult, X2)
+    fitresult_nointercept, _, report = fit(LinearRegressor(fit_intercept = false), 1, X2, y2)
+
+    @test all(isapprox.(fitresult.coefs, [1.0, 1.0]))
+    @test all(isapprox.(fitresult_nointercept.coefs, [1.0, 2.0]))
+
 end
 
 @testset "Issue 27" begin

From 84b41d4793cfea4eba02b83ef447fb7085012252 Mon Sep 17 00:00:00 2001
From: tiemvanderdeure <tiemvanderdeure@gmail.com>
Date: Fri, 16 Feb 2024 14:17:11 +0100
Subject: [PATCH 6/7] remove unused function glm_data

---
 src/MLJGLMInterface.jl | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/src/MLJGLMInterface.jl b/src/MLJGLMInterface.jl
index a30d077..668097c 100644
--- a/src/MLJGLMInterface.jl
+++ b/src/MLJGLMInterface.jl
@@ -273,16 +273,6 @@ function glm_formula(model, features::AbstractVector{Symbol})::FormulaTerm
     return form
 end
 
-"""
-    glm_data(model, Xmatrix, y, features)
-
-Return data which is ready to be passed to `fit(form, data, ...)`.
-"""
-function glm_data(model, Xmatrix, y, features)
-    data = Tables.table([Xmatrix y]; header=[features...; :y])
-    return data
-end
-
 """
     check_weights(w, y)
 

From 51d7d52d06bb24f6583c53ced6715914ce8aabe9 Mon Sep 17 00:00:00 2001
From: tiemvanderdeure <tiemvanderdeure@gmail.com>
Date: Sun, 25 Feb 2024 14:49:50 +0300
Subject: [PATCH 7/7] add a test for predict with offset

---
 test/runtests.jl | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 22bccbf..b4b8631 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -229,12 +229,17 @@ end
 
 @testset "Test offsetting models" begin
     @testset "Test split_X_offset" begin
-        rng = StableRNGs.StableRNG(123)
-        N = 100
         X = (x1=[1,2,3], x2=[4,5,6])
         @test MLJGLMInterface.split_X_offset(X, nothing) == (X, Float64[])
         @test MLJGLMInterface.split_X_offset(X, :x1) == ((x2=[4,5,6],), [1,2,3])
 
+        lr = LinearRegressor(fit_intercept = false, offsetcol = :x1)
+        fitresult, _, report = fit(lr, 1, X, [5, 7, 9])
+        yhat = predict_mean(lr, fitresult, (x1 = [2, 3, 4], x2 = [5, 6, 7]))
+        @test yhat == [7.0, 9.0, 11.0]
+
+        rng = StableRNGs.StableRNG(123)
+        N = 100
         X = MLJBase.table(rand(rng, N, 3))
         Xnew, offset = MLJGLMInterface.split_X_offset(X, :x2)
         @test offset isa Vector