From 2fb0d614c9728f77f40156b90ab5d2b9441d9602 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 27 Feb 2024 11:26:53 +1300 Subject: [PATCH 1/4] add tests for categorical features --- test/runtests.jl | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 32c4df3..4a70962 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,6 +8,7 @@ using MLJGLMInterface using GLM: coeftable import GLM import MLJTestInterface +using Tables using Distributions: Normal, Poisson, Uniform import StableRNGs @@ -15,12 +16,33 @@ using Tables expit(X) = 1 ./ (1 .+ exp.(-X)) -# TODO: Add more datasets to the following generic interface tests after #45 is merged +# synthesize small data sets with mixed features: + +n = 100 +X_regression, y_regression = MLJBase.make_regression(n, 3) +outlook = categorical(rand(["sunny", "overcast", "rainy"], n)) +temperature = categorical( + rand(["cold", "mild", "hot"], n), + ordered=true, + levels=["cold", "mild", "hot"], + ) +X = merge( + Tables.columntable(X_regression), + (; outlook, temperature), +) +y_binary = (temperature .== "mild" .|| outlook .== "sunny") |> categorical +y_count = map(X.x1) do x + floor(Int, 10*abs(x)) +end +mixed_binary = (X, y_binary) +mixed_count = (X, y_count) +mixed_regression = (X, y_regression) @testset "generic interface tests" begin @testset "LinearRegressor" begin for data in [ - MLJTestInterface.make_regression(), + MLJTestInterface.make_regression(), + mixed_regression, ] failures, summary = MLJTestInterface.test( [LinearRegressor,], @@ -36,6 +58,7 @@ expit(X) = 1 ./ (1 .+ exp.(-X)) @testset "LinearCountRegressor" begin for data in [ MLJTestInterface.make_count(), + mixed_count, ] failures, summary = MLJTestInterface.test( [LinearCountRegressor,], @@ -51,6 +74,7 @@ expit(X) = 1 ./ (1 .+ exp.(-X)) @testset "LinearBinaryClassifier" begin for data in [ MLJTestInterface.make_binary(), + mixed_binary, ] failures, summary = MLJTestInterface.test( [LinearBinaryClassifier,], From 9d32af8c2fa4e7060b75cbb39ed1e0a47ce70e4a Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 28 Feb 2024 17:15:37 +1300 Subject: [PATCH 2/4] try to fix problem with new test in julia 1.6 --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 4a70962..98320e1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -30,7 +30,7 @@ X = merge( Tables.columntable(X_regression), (; outlook, temperature), ) -y_binary = (temperature .== "mild" .|| outlook .== "sunny") |> categorical +y_binary = categorical(temperature .== "mild" .|| outlook .== "sunny") y_count = map(X.x1) do x floor(Int, 10*abs(x)) end From 048c55b27176bd9db9994e361cdf4f93cada0e1d Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 28 Feb 2024 17:20:06 +1300 Subject: [PATCH 3/4] try again --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 98320e1..351a895 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -30,7 +30,7 @@ X = merge( Tables.columntable(X_regression), (; outlook, temperature), ) -y_binary = categorical(temperature .== "mild" .|| outlook .== "sunny") +y_binary = categorical(temperature .== "mild" .| outlook .== "sunny") y_count = map(X.x1) do x floor(Int, 10*abs(x)) end From 886deee0e5b3ac25f365403a738ed983e69ef7f1 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 28 Feb 2024 18:07:05 +1300 Subject: [PATCH 4/4] and again... --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 351a895..8d475dc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -30,7 +30,7 @@ X = merge( Tables.columntable(X_regression), (; outlook, temperature), ) -y_binary = categorical(temperature .== "mild" .| outlook .== "sunny") +y_binary = categorical((temperature .== "mild") .| (outlook .== "sunny")) y_count = map(X.x1) do x floor(Int, 10*abs(x)) end