From 54df09e734cbc98f261bf4a108e781e41d591914 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 12 Jan 2022 21:16:09 -0600
Subject: [PATCH] update multiclass tests

---
 tests/python_package_test/test_dask.py | 36 ++++++++++++++------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index 8910a5a3b210..3ff84f6329ac 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -493,15 +493,16 @@ def test_classifier_custom_objective(output, task, cluster):
     with Client(cluster) as client:
         X, y, w, _, dX, dy, dw, _ = _create_data(
             objective=task,
-            output=output
+            output=output,
         )
 
         params = {
             "n_estimators": 50,
             "num_leaves": 31,
-            "min_data": 1,
             "verbose": -1,
-            "learning_rate": 0.01,
+            "seed": 708,
+            "deterministic": True,
+            "force_col_wise": True
         }
 
         if task == 'binary-classification':
@@ -522,25 +523,26 @@ def test_classifier_custom_objective(output, task, cluster):
         )
         dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw)
         dask_classifier_local = dask_classifier.to_local()
-        p1_proba = dask_classifier.predict_proba(dX).compute()
-        p1_proba_local = dask_classifier_local.predict_proba(X)
+        p1_raw = dask_classifier.predict(dX, raw_score=True).compute()
+        p1_raw_local = dask_classifier_local.predict(X, raw_score=True)
 
         # with a custom objective, prediction result is a raw score instead of predicted class
-        p1_class = (1.0 / (1.0 + np.exp(-p1_proba))) > 0.5
-        p1_class = p1_class.astype(np.int64)
-        p1_class_local = (1.0 / (1.0 + np.exp(-p1_proba_local))) > 0.5
-        p1_class_local = p1_class_local.astype(np.int64)
+        p1_proba = 1.0 / (1.0 + np.exp(-p1_raw))
+        p1_proba_local = 1.0 / (1.0 + np.exp(-p1_raw_local))
 
         local_classifier = lgb.LGBMClassifier(**params)
         local_classifier.fit(X, y, sample_weight=w)
-        p2_proba = local_classifier.predict_proba(X)
-        p2_class = (1.0 / (1.0 + np.exp(-p1_proba))) > 0.5
-        p2_class = p2_class.astype(np.int64)
+        p2_raw = local_classifier.predict(X, raw_score=True)
+        p2_proba = 1.0 / (1.0 + np.exp(-p2_raw))
 
-        if task == 'multiclass-classification':
-            p1_class = p1_class.argmax(axis=1)
-            p1_class_local = p1_class_local.argmax(axis=1)
-            p2_class = p2_class.argmax(axis=1)
+        if task == 'binary-classification':
+            p1_class = (p1_proba > 0.5).astype(np.int64)
+            p1_class_local = (p1_proba_local > 0.5).astype(np.int64)
+            p2_class = (p2_proba > 0.5).astype(np.int64)
+        elif task == 'multiclass-classification':
+            p1_class = p1_proba.argmax(axis=1)
+            p1_class_local = p1_proba_local.argmax(axis=1)
+            p2_class = p2_proba.argmax(axis=1)
 
         # function should have been preserved
         assert callable(dask_classifier.objective_)
@@ -552,7 +554,7 @@ def test_classifier_custom_objective(output, task, cluster):
         assert_eq(p2_class, y)
 
         # probability estimates should be similar
-        assert_eq(p1_proba, p2_proba, atol=0.03)
+        assert_eq(p1_proba, p2_proba, atol=0.04)
 
 
 def test_group_workers_by_host():