TuringLang · devmotion · Jan 17, 2023 · Jan 3, 2023 · Jan 4, 2023 · Jan 4, 2023
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -1,14 +1,14 @@
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
 MCMCDiagnosticTools = "be115224-59cd-429b-ad48-344e309966f0"
 MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
-MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [compat]
 Documenter = "0.27"
+EvoTrees = "0.14"
 MCMCDiagnosticTools = "0.2"
 MLJBase = "0.19, 0.20, 0.21"
-MLJXGBoostInterface = "0.1, 0.2, 0.3"
 julia = "1.3"
diff --git a/src/rstar.jl b/src/rstar.jl
@@ -40,21 +40,22 @@ function rstar(
         throw(ArgumentError("training and test data subsets must not be empty"))
 
     xtable = _astable(x)
+    ycategorical = MLJModelInterface.categorical(ysplit)
 
     # train classifier on training data
-    ycategorical = MLJModelInterface.categorical(ysplit)
-    xtrain = MLJModelInterface.selectrows(xtable, train_ids)
-    fitresult, _ = MLJModelInterface.fit(
-        classifier, verbosity, xtrain, ycategorical[train_ids]
+    xtrain, ytrain = MLJModelInterface.reformat(
+        classifier, MLJModelInterface.selectrows(xtable, train_ids), ycategorical[train_ids]
     )
+    fitresult, _ = MLJModelInterface.fit(classifier, verbosity, xtrain, ytrain)
 
     # compute predictions on test data
-    xtest = MLJModelInterface.selectrows(xtable, test_ids)
+    xtest, ytest = MLJModelInterface.reformat(
+        classifier, MLJModelInterface.selectrows(xtable, test_ids), ycategorical[train_ids]
+    )
-    xtest, ytest = MLJModelInterface.reformat(
-        classifier, MLJModelInterface.selectrows(xtable, test_ids), ycategorical[train_ids]
-    )
+    xtest, = MLJModelInterface.selectrows(classifier, test_ids, xdata)
+    ytest = ycategorical[test_ids]
-    xtest, ytest = MLJModelInterface.reformat(
-        classifier, MLJModelInterface.selectrows(xtable, test_ids), ycategorical[train_ids]
-    )
+    xtest, ytest = MLJModelInterface.selectrows(classifier, test_ids, xdata, ydata)
-    xtest, ytest = MLJModelInterface.reformat(
-        classifier, MLJModelInterface.selectrows(xtable, test_ids), ycategorical[train_ids]
-    )
+    xtest, = MLJModelInterface.selectrows(classifier, test_ids, xdata)
+    ytest = ycategorical[test_ids]
-    xtest, ytest = MLJModelInterface.reformat(
-        classifier, MLJModelInterface.selectrows(xtable, test_ids), ycategorical[train_ids]
-    )
+    xtest, ytest = MLJModelInterface.selectrows(classifier, test_ids, xdata, ydata)
     predictions = _predict(classifier, fitresult, xtest)
 
     # compute statistic
-    ytest = ycategorical[test_ids]
-    result = _rstar(predictions, ytest)
+    result = _rstar(classifier, predictions, ytest)
 
     return result
 end
@@ -109,7 +110,7 @@ is returned (algorithm 2).
 # Examples
 
 ```jldoctest rstar; setup = :(using Random; Random.seed!(101))
-julia> using MLJBase, MLJXGBoostInterface, Statistics
+julia> using MLJBase, EvoTrees, Statistics
 
 julia> samples = fill(4.0, 100, 3, 2);
 ```
@@ -118,7 +119,7 @@ One can compute the distribution of the ``R^*`` statistic (algorithm 2) with the
 probabilistic classifier.
 
 ```jldoctest rstar
-julia> distribution = rstar(XGBoostClassifier(), samples);
+julia> distribution = rstar(EvoTreeClassifier(), samples);
 
 julia> isapprox(mean(distribution), 1; atol=0.1)
 true
@@ -129,9 +130,9 @@ Deterministic classifiers can also be derived from probabilistic classifiers by
 predicting the mode. In MLJ this corresponds to a pipeline of models.
 
 ```jldoctest rstar
-julia> xgboost_deterministic = Pipeline(XGBoostClassifier(); operation=predict_mode);
+julia> evotree_deterministic = Pipeline(EvoTreeClassifier(); operation=predict_mode);
 
-julia> value = rstar(xgboost_deterministic, samples);
+julia> value = rstar(evotree_deterministic, samples);
 
 julia> isapprox(value, 1; atol=0.2)
 true
@@ -161,7 +162,7 @@ function rstar(classif::MLJModelInterface.Supervised, x::AbstractArray{<:Any,3};
 end
 
 # R⋆ for deterministic predictions (algorithm 1)
-function _rstar(predictions::AbstractVector{T}, ytest::AbstractVector{T}) where {T}
+function _rstar(::MLJModelIntetface.Deterministic, predictions::AbstractVector, ytest::AbstractVector)
     length(predictions) == length(ytest) ||
         error("numbers of predictions and targets must be equal")
     mean_accuracy = Statistics.mean(p == y for (p, y) in zip(predictions, ytest))
@@ -170,7 +171,7 @@ function _rstar(predictions::AbstractVector{T}, ytest::AbstractVector{T}) where
 end
 
 # R⋆ for probabilistic predictions (algorithm 2)
-function _rstar(predictions::AbstractVector, ytest::AbstractVector)
+function _rstar(::MLJModelInferface.Probabilistic, predictions::AbstractVector, ytest::AbstractVector)
     length(predictions) == length(ytest) ||
         error("numbers of predictions and targets must be equal")
 

diff --git a/test/Project.toml b/test/Project.toml
@@ -1,22 +1,21 @@
 [deps]
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 MCMCDiagnosticTools = "be115224-59cd-429b-ad48-344e309966f0"
 MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 MLJLIBSVMInterface = "61c7150f-6c77-4bb1-949c-13197eac2a52"
-MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91"
-Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [compat]
 Distributions = "0.25"
+EvoTrees = "0.14"
 FFTW = "1.1"
 MCMCDiagnosticTools = "0.2"
 MLJBase = "0.19, 0.20, 0.21"
 MLJLIBSVMInterface = "0.1, 0.2"
-MLJXGBoostInterface = "0.1, 0.2, 0.3"
 Tables = "1"
 julia = "1.3"
diff --git a/test/rstar.jl b/test/rstar.jl
@@ -1,18 +1,18 @@
 using MCMCDiagnosticTools
 
 using Distributions
+using EvoTrees
 using MLJBase
 using MLJLIBSVMInterface
-using MLJXGBoostInterface
 using Tables
 
 using Random
 using Test
 
-const xgboost_deterministic = Pipeline(XGBoostClassifier(); operation=predict_mode)
+const evotree_deterministic = Pipeline(EvoTreeClassifier(); operation=predict_mode)
 
 @testset "rstar.jl" begin
-    classifiers = (XGBoostClassifier(), xgboost_deterministic, SVC())
+    classifiers = (EvoTreeClassifier(), evotree_deterministic, SVC())
     N = 1_000
 
     @testset "samples input type: $wrapper" for wrapper in [Vector, Array, Tables.table]

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,5 +1,3 @@
-using Pkg
-
 using MCMCDiagnosticTools
 using FFTW
 
@@ -40,11 +38,6 @@ Random.seed!(1)
         include("rafterydiag.jl")
     end
     @testset "R⋆ diagnostic" begin
-        # XGBoost errors on 32bit systems: https://github.com/dmlc/XGBoost.jl/issues/92
-        if Sys.WORD_SIZE == 64
-            include("rstar.jl")
-        else
-            @info "R⋆ not tested: requires 64bit architecture"
-        end
+        include("rstar.jl")
     end
 end