asreview · J535D165 · Oct 31, 2024 · Aug 27, 2024 · Aug 27, 2024 · Sep 4, 2024
diff --git a/asreviewcontrib/insights/algorithms.py b/asreviewcontrib/insights/algorithms.py
@@ -1,5 +1,4 @@
 import numpy as np
-from sklearn import metrics
 
 
 def _recall_values(labels, x_absolute=False, y_absolute=False):
@@ -21,11 +20,34 @@ def _recall_values(labels, x_absolute=False, y_absolute=False):
 
 
 def _loss_value(labels):
-    positive_doc_ratio = sum(labels) / len(labels)
-    triangle_before_perfect_recall = positive_doc_ratio * 0.5
-    aera_under_recall_curve = metrics.auc(*_recall_values(labels))
-
-    return 1 - (triangle_before_perfect_recall + aera_under_recall_curve)
+    Ny = sum(labels)
+    Nx = len(labels)
+
+    # The best AUC represents the entire area under the perfect curve, which is
+    # the total area Nx * Ny, minus the area above the perfect curve (which is
+    # the sum of a series with a formula (Ny * Ny) / 2) plus 0.5 to account for
+    # the boundary.
+    best_auc = Nx * Ny - (((Ny * Ny) / 2) + 0.5)
+
+    # Compute recall values (y) based on the provided labels. We don't need x
+    # values because the points are uniformly spaced.
+    y = np.array(_recall_values(labels, x_absolute=True, y_absolute=True)[1])
+
+    # The actual AUC is calculated by approximating the area under the curve
+    # using the trapezoidal rule. (y[1:] + y[:-1]) / 2 takes the average height
+    # between consecutive y values, and we sum them up.
+    actual_auc = np.sum((y[1:] + y[:-1]) / 2)
+
+    # The worst AUC represents the area under the worst-case step curve, which
+    # is simply the area under the recall curve where all positive labels are
+    # clumped at the end, calculated as (Ny * Ny) / 2.
+    worst_auc = ((Ny * Ny) / 2)
+
+    # The normalized loss is the difference between the best AUC and the actual
+    # AUC, normalized by the range between the best and worst AUCs.
+    normalized_loss = (best_auc - actual_auc) / (best_auc - worst_auc) if best_auc != worst_auc else 0  # noqa: E501
+
+    return normalized_loss
 
 
 def _wss_values(labels, x_absolute=False, y_absolute=False):

diff --git a/asreviewcontrib/insights/metrics.py b/asreviewcontrib/insights/metrics.py
@@ -183,12 +183,8 @@ def loss(state_obj, priors=False):
     """
     labels = _pad_simulation_labels(state_obj, priors=priors)
 
-    return _loss(labels)
-
-def _loss(labels):
     return _loss_value(labels)
 
-
 def get_metrics(
     state_obj,
     recall=None,

diff --git a/pyproject.toml b/pyproject.toml
@@ -15,7 +15,7 @@ classifiers = [
     "Programming Language :: Python :: 3.11"
 ]
 license = {text = "Apache-2.0"}
-dependencies = ["numpy", "matplotlib", "asreview>=1,<2", "scikit-learn"]
+dependencies = ["numpy", "matplotlib", "asreview>=1,<2"]
 dynamic = ["version"]
 requires-python = ">=3.7"
 

diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -4,9 +4,11 @@
 from numpy import array_equal
 from numpy.testing import assert_almost_equal
 
+from asreviewcontrib.insights.algorithms import _loss_value
 from asreviewcontrib.insights.metrics import _recall
 from asreviewcontrib.insights.metrics import _time_to_discovery
 from asreviewcontrib.insights.metrics import get_metrics
+from asreviewcontrib.insights.metrics import loss
 from asreviewcontrib.insights.metrics import recall
 
 TEST_ASREVIEW_FILES = Path(Path(__file__).parent, "asreview_files")
@@ -111,3 +113,37 @@ def test_label_padding():
         stop_if_full = get_metrics(s)
 
     assert stop_if_min == stop_if_full
+
+def test_loss():
+    with open_state(
+        Path(TEST_ASREVIEW_FILES, "sim_van_de_schoot_2017_stop_if_min.asreview")
+    ) as s:
+        loss_value = loss(s)
+        assert_almost_equal(loss_value, 0.011590940352087164, decimal=6)
+
+def test_loss_value_function():
+    labels = [1, 0]
+    loss_value = _loss_value(labels)
+    assert_almost_equal(loss_value, 0, decimal=6)
+
+    labels = [0, 1]
+    loss_value = _loss_value(labels)
+    assert_almost_equal(loss_value, 1, decimal=6)
+
+    labels = [1, 1, 0, 0, 0]
+    loss_value = _loss_value(labels)
+    assert_almost_equal(loss_value, 0, decimal=6)
+
+    labels = [0, 0, 0, 1, 1]
+    loss_value = _loss_value(labels)
+    assert_almost_equal(loss_value, 1, decimal=6)
+
+    import random
+    for i in range(100):
+        length = random.randint(2, 100)
+        labels = [random.randint(0, 1) for _ in range(length)]
+        loss_value = _loss_value(labels)
+        if not (0 <= loss_value <= 1):
+            print(f"Test {i+1}: Labels: {labels}, Loss: {loss_value}")
+        assert 0 <= loss_value <= 1, f"Loss value {loss_value} not between 0 and 1 for \
+            labels {labels}"