Preference Learning now tested and functional!

tournesol-app · Jan 10, 2025 · a8468bd · a8468bd
1 parent 977cf57
commit a8468bd
Show file tree

Hide file tree

Showing 24 changed files with 1,217 additions and 184 deletions.
diff --git a/solidago/experiments/toy.py b/solidago/experiments/toy.py
@@ -20,69 +20,20 @@
 # s = generative_model()
 # s.save("tests/pipeline/saved")
 
-s = State.load("tests/pipeline/saved")
+# s = State.load("tests/pipeline/saved")
 pipeline= Sequential.load("tests/pipeline/test_pipeline.json")
+states = [ State.load(f"tests/pipeline/saved_{seed}") for seed in range(5) ]
+
+for seed in range(5):
+    s = states[seed]
+    s = pipeline.trust_propagation.state2state_function(s, save_directory=f"tests/pipeline/saved_{seed}")
+    s = pipeline.preference_learning.state2state_function(s, save_directory=f"tests/pipeline/saved_{seed}")
 
 # s = pipeline(s, "tests/pipeline/saved")
 
-# s = pipeline.trust_propagation.state2state_function(s, save_directory="tests/pipeline/saved")
-# s = pipeline.preference_learning.state2state_function(s, save_directory="tests/pipeline/saved")
 # s = pipeline.voting_rights.state2state_function(s, save_directory="tests/pipeline/saved")
 # s = pipeline.scaling.state2state_function(s, save_directory="tests/pipeline/saved")
 # s = pipeline.aggregation.state2state_function(s, save_directory="tests/pipeline/saved")
 # s = pipeline.post_process.state2state_function(s, save_directory="tests/pipeline/saved")
 
-self = NumbaUniformGBT()
-
-assessments = s.assessments.reorder_keys(["username", "criterion", "entity_name"])
-comparisons = s.comparisons.reorder_keys(["username", "criterion", "left_name", "right_name"])
-user = next(iter(s.users))
-assessments = assessments[user]
-comparisons = comparisons[user]
-
-compared_entity_names = comparisons.get_set("left_name") | comparisons.get_set("right_name")
-entities = s.entities.get(compared_entity_names)
-init = s.user_models[user](entities).reorder_keys(["criterion", "entity_name"])
-comparisons = comparisons.reorder_keys(["criterion", "left_name", "right_name"])
-criteria = comparisons.get_set("criterion") | init.get_set("criterion")
-criterion = next(iter(criteria))
-comparisons = comparisons[criterion]
-init = init[criterion]
-
-entity_name2index = { str(entity): index for index, entity in enumerate(entities) }
-comparisons = comparisons.order_by_entities()
-
-entity_index = np.random.randint(len(entities))
-entity_name = entities.iloc[entity_index].name
-scores = np.arange(len(entities), dtype=np.float64)
-
-def get_partial_derivative_args(entity_index: int, scores: np.ndarray) -> tuple:
-    entity_name = entities.iloc[entity_index].name
-    normalized_comparisons = comparisons[entity_name].normalized_comparisons(self.last_comparison_only)
-    df = comparisons[entity_name].to_df(last_row_only=self.last_comparison_only)
-    indices = df["other_name"].map(entity_name2index)
-    return scores[indices], np.array(normalized_comparisons)
-
-get_partial_derivative_args(entity_index, scores)
-
-empty_function = lambda coordinate, variable: tuple()
-get_update_coordinate_function_args = empty_function
-
-def coordinate_function(coordinate: int, variable: np.ndarray[np.float64]):
-    @njit
-    def f(value: np.float64, *partial_derivative_args) -> np.float64:
-        return self.partial_derivative(coordinate, np.array([ 
-            variable[i] if i != coordinate else value
-            for i in range(len(variable))
-        ], dtype=np.float64), *partial_derivative_args)
-    return f
 
-coordinate_optimization_xtol = 1e-5
-def update_coordinate_function(coordinate: int, variable: np.ndarray[np.float64], *coordinate_update_args) -> float:
-    return njit_brentq(
-        f=coordinate_function(coordinate, variable),
-        args=get_partial_derivative_args(coordinate, variable, *coordinate_update_args),
-        xtol=coordinate_optimization_xtol,
-        a=variable[coordinate] - 1.0,
-        b=variable[coordinate] + 1.0
-    )
diff --git a/solidago/src/solidago/_pipeline/_preference_learning/generalized_bradley_terry.py b/solidago/src/solidago/_pipeline/_preference_learning/generalized_bradley_terry.py
@@ -288,23 +288,24 @@ def __init__(self,
             max_uncertainty=max_uncertainty,
             last_comparison_only=last_comparison_only,
         )
-
+    
     def cumulant_generating_function(self, score_diffs: npt.NDArray) -> npt.NDArray:
         """ The cgf of UniformGBT is simply log( sinh(score_diff) / score_diff ).
         However, numerical accuracy requires care in the cases 
         where abs(score_diff) is small (because of division by zero)
         or where it is large (because sinh explodes).
         """
         score_diffs_abs = np.abs(score_diffs)
-        return np.where(
-            score_diffs_abs > 1,
-            np.where(
-                score_diffs_abs < 10.0,
-                np.log(np.sinh(score_diffs) / score_diffs),
-                score_diffs_abs - np.log(2) - np.log(score_diffs_abs),
-            ),
-            score_diffs_abs ** 2 / 6 - score_diffs_abs ** 4 / 180,
-        )
+        with np.errstate(all='ignore'):
+            return np.where(
+                score_diffs_abs > 1e-1,
+                np.where(
+                    score_diffs_abs < 20.0,
+                    np.log(np.sinh(score_diffs) / score_diffs),
+                    score_diffs_abs - np.log(2) - np.log(score_diffs_abs),
+                ),
+                score_diffs_abs ** 2 / 6 - score_diffs_abs ** 4 / 180,
+            )
 
     def cumulant_generating_function_derivative(self, score_diffs: npt.NDArray) -> npt.NDArray:
         """ The cgf derivative of UniformGBT is simply 

diff --git a/solidago/src/solidago/_pipeline/_preference_learning/lbfgs_generalized_bradley_terry.py b/solidago/src/solidago/_pipeline/_preference_learning/lbfgs_generalized_bradley_terry.py
@@ -66,7 +66,7 @@ def __init__(self,
         self.device = device
 
     @abstractmethod
-    def cumulant_generating_function(self, score_diffs: torch.Tensor) -> torch.Tensor:
+    def torch_cumulant_generating_function(self, score_diffs: torch.Tensor) -> torch.Tensor:
         """ To use the cumulant generating function in the context of pytorch,
         it is sufficent to write the cumulant generating function.
         This function must however be written as a torch function,
@@ -83,18 +83,27 @@ def cumulant_generating_function(self, score_diffs: torch.Tensor) -> torch.Tenso
             cgf[i] is the cumulant-generating function at score_diffs[i]
         """
 
+    def init_scores(self, 
+        entity_name2index: dict[str, int],
+        init_multiscores: MultiScore, # key_names == "entity_name"
+    ) -> torch.Tensor:
+        """ To avoid nan errors in autograd, we initialize at nonzero values """
+        scores = 1e-5 * torch.normal(0, 1, (len(entity_name2index),))
+        for entity, init_score in init_multiscores:
+            if not init_score.isnan():
+                scores[entity_name2index[str(entity)]] += init_score.value
+        scores.requires_grad = True
+        scores = scores.to(self.device)
+        return scores
+
     def compute_scores(self, 
         entities: Entities,
         entity_name2index: dict[str, int],
         comparisons: Comparisons, # key_names == ["left_name, right_name"]
         init_multiscores : MultiScore, # key_names == ["entity_name"]
     ) -> npt.NDArray:
         """ Computes the scores given comparisons """
-        scores = self.init_scores(entity_name2index, init_multiscores)
-        scores = torch.tensor(scores, dtype=torch.float64)
-        scores.requires_grad = True
-        scores = scores.to(self.device)
-
+        scores = self.init_scores(entity_name2index, init_multiscores)        
         lbfgs = torch.optim.LBFGS(
             (scores,),
             max_iter=self.max_iter,
@@ -129,8 +138,8 @@ def negative_log_posterior(self,
         indices = comparisons.compared_entity_indices(entity_name2index, self.last_comparison_only)
         score_diffs = scores[indices["left"]] - scores[indices["right"]]
         normalized_comparisons = comparisons.normalized_comparisons(self.last_comparison_only)
-        loss = self.cumulant_generating_function(score_diffs).sum()
-        loss -= (score_diffs * torch.tensor(normalized_comparisons)).sum()
+        loss = self.torch_cumulant_generating_function(score_diffs).sum()
+        loss += (score_diffs * torch.tensor(normalized_comparisons)).sum()
         return loss + (scores**2).sum() / (2 * self.prior_std_dev**2)
 
 
@@ -158,7 +167,7 @@ def __init__(self,
             last_comparison_only=last_comparison_only
         )
 
-    def cumulant_generating_function(self, score_diffs: torch.Tensor) -> torch.Tensor:
+    def torch_cumulant_generating_function(self, score_diffs: torch.Tensor) -> torch.Tensor:
         """ Vectorized cumulant generating function adapted for pytorch
 
         Parameters
@@ -171,7 +180,7 @@ def cumulant_generating_function(self, score_diffs: torch.Tensor) -> torch.Tenso
         cgf: torch.Tensor
             cfg[i] is the cgf of score_diff[i]
         """
-        score_diffs_abs = score_diffs.abs()
+        score_diffs_abs = torch.abs(score_diffs)
         return torch.where(
             score_diffs_abs > 1e-1,
             torch.where(

diff --git a/solidago/src/solidago/_pipeline/_preference_learning/numba_generalized_bradley_terry.py b/solidago/src/solidago/_pipeline/_preference_learning/numba_generalized_bradley_terry.py
@@ -55,7 +55,7 @@ def __init__(self,
     def cumulant_generating_function_derivative(self) -> Callable[[npt.NDArray], npt.NDArray]:
         """ To use numba, instead of defining directly the cgf derivative,
         it is useful to instead define this method as a property,
-        which outputs a callable function decorated with @njit.
+        which outputs a jitted callable function.
         This callable function must have the following annocations.
         
         Parameters
@@ -79,19 +79,30 @@ def compute_scores(self,
     ) -> npt.NDArray:
         """ Computes the scores given comparisons """
         comparisons = comparisons.order_by_entities()
-        def get_partial_derivative_args(entity_index: int, scores: np.ndarray) -> tuple:
-            entity_name = entities.iloc[entity_index].name
-            normalized_comparisons = comparisons[entity_name].normalized_comparisons(self.last_comparison_only)
-            df = comparisons[entity_name].to_df(last_row_only=self.last_comparison_only)
-            indices = df["other_name"].map(entity_name2index)
-            return scores[indices], np.array(normalized_comparisons)
-
+
         return coordinate_descent(
             self.partial_derivative,
-            get_partial_derivative_args=get_partial_derivative_args,
-            initialization=self.init_scores(entity_name2index, init_multiscores),
+            self.init_scores(entity_name2index, init_multiscores),
+            self.get_partial_derivative_args(entities, entity_name2index, comparisons),
             error=self.convergence_error,
         )
+
+    def get_partial_derivative_args(self, 
+        entities: Entities, 
+        entity_name2index: dict[str, int],
+        entity_ordered_comparisons: Comparisons, # key_names == ["entity_name", "other_name"]
+    ) -> Callable[[int, np.ndarray], tuple[np.ndarray, np.ndarray]]:
+
+        def f(entity_index: int, scores: np.ndarray) -> tuple:
+            entity_name = entities.iloc[entity_index].name
+            comparisons = entity_ordered_comparisons[entity_name]
+            normalized_comparisons = comparisons.normalized_comparisons(self.last_comparison_only)
+            df = comparisons.to_df(last_row_only=self.last_comparison_only)
+            indices = df["other_name"].map(entity_name2index)
+            return scores[indices], np.array(normalized_comparisons)
+
+        return f
+
 
     @cached_property
     def partial_derivative(self) -> Callable[[int, np.ndarray[np.float64], dict, dict], float]:
@@ -104,15 +115,15 @@ def partial_derivative(self) -> Callable[[int, np.ndarray[np.float64], dict, dic
         prior_var = self.prior_std_dev**2
         cfg_deriv = self.cumulant_generating_function_derivative
 
-        @njit
+        # @njit
         def njit_partial_derivative(
             entity_index: int,
             scores: float,
             compared_scores: npt.NDArray, 
             normalized_comparisons: npt.NDArray, 
         ) -> npt.NDArray:
             score_diffs = scores[entity_index] - compared_scores
-            nll_derivative = np.sum(cfg_deriv(score_diffs) - normalized_comparisons)
+            nll_derivative = np.sum(cfg_deriv(score_diffs) + normalized_comparisons)
             prior_derivative = scores[entity_index] / prior_var
             return prior_derivative + nll_derivative
 
@@ -149,12 +160,13 @@ def cumulant_generating_function_derivative(self) -> Callable[[npt.NDArray], npt
         and as it must be njit to be used by coordinate_descent,
         we write it as a cached property njit function.
         """
-        @njit
+        # @njit
         def njit_cumulant_generating_function_derivative(score_diffs: npt.NDArray):
-            return np.where(
-                np.abs(score_diffs) < 1e-2,
-                score_diffs / 3,
-                1 / np.tanh(score_diffs) - 1 / score_diffs,
-            )
+            with np.errstate(all='ignore'):
+                return np.where(
+                    np.abs(score_diffs) < 1e-2,
+                    score_diffs / 3,
+                    1 / np.tanh(score_diffs) - 1 / score_diffs,
+                )
 
         return njit_cumulant_generating_function_derivative
diff --git a/solidago/src/solidago/_state/_comparisons/base.py b/solidago/src/solidago/_state/_comparisons/base.py
@@ -46,6 +46,11 @@ def order_by_entities(self) -> "Comparisons": # key_names == ["entity_name", "ot
             return self.reorder_keys(key_names)
         assert "left_name" in self.key_names and "right_name" in self.key_names, "" \
             "Comparisons must have columns `left_name` and `right_name`"
+
+        def invert(comparison):
+            if "comparison" in comparison:
+                comparison["comparison"] = - comparison["comparison"]
+            return comparison
         key_names = ["entity_name", "other_name"] + [ 
             kn for kn in self.key_names if kn not in ("left_name", "right_name") 
         ]
@@ -65,7 +70,7 @@ def order_by_entities(self) -> "Comparisons": # key_names == ["entity_name", "ot
             )
             result.add_row(
                 [right_name, left_name] + non_entity_keys,
-                new_comparison | dict(location="right")
+                invert(new_comparison) | dict(location="right")
             )
         return result
 

diff --git a/solidago/src/solidago/_state/_models/score.py b/solidago/src/solidago/_state/_models/score.py
@@ -159,7 +159,7 @@ def process_stored_value(self, keys: list[str], stored_value: tuple[float, float
     def sanitize(self, value: Union[tuple, Score, dict]) -> tuple[float, float, float]:
         if isinstance(value, (list, tuple)):
             assert len(value) == 3
-            return value
+            return tuple(float(v) for v in value)
         if isinstance(value, Score):
             return value.to_triplet()
         assert isinstance(value, (dict, Series))

diff --git a/solidago/src/solidago/primitives/optimize.py b/solidago/src/solidago/primitives/optimize.py
@@ -22,7 +22,7 @@
 _rtol = 4 * np.finfo(float).eps
 
 
-@njit
+# @njit
 def _bisect_interval(a, b, fa, fb) -> Tuple[float, int]:
     """Conditional checks for intervals in methods involving bisection"""
     if fa * fb > 0:
@@ -41,7 +41,7 @@ def _bisect_interval(a, b, fa, fb) -> Tuple[float, int]:
     return root, status
 
 
-@njit
+# @njit
 def njit_brentq(
     f,
     args=(),
@@ -289,7 +289,7 @@ def coordinate_function(
         coordinate: int, 
         variable: np.ndarray[np.float64],
     ) -> Callable[[float, Tuple], float]:
-        @njit
+        # @njit
         def f(value: np.float64, *partial_derivative_args) -> np.float64:
             return partial_derivative(coordinate, np.array([ 
                 variable[i] if i != coordinate else value

diff --git a/solidago/tests/pipeline/saved_0/state.json b/solidago/tests/pipeline/saved_0/state.json
@@ -36,8 +36,51 @@
     "user_models": [
         "UserModels",
         {
-            "users": {},
-            "dataframes": {},
+            "users": {
+                "user_0": [
+                    "DirectScoring",
+                    {}
+                ],
+                "user_1": [
+                    "DirectScoring",
+                    {}
+                ],
+                "user_2": [
+                    "DirectScoring",
+                    {}
+                ],
+                "user_3": [
+                    "DirectScoring",
+                    {}
+                ],
+                "user_4": [
+                    "DirectScoring",
+                    {}
+                ],
+                "user_5": [
+                    "DirectScoring",
+                    {}
+                ],
+                "user_6": [
+                    "DirectScoring",
+                    {}
+                ],
+                "user_7": [
+                    "DirectScoring",
+                    {}
+                ],
+                "user_8": [
+                    "DirectScoring",
+                    {}
+                ],
+                "user_9": [
+                    "DirectScoring",
+                    {}
+                ]
+            },
+            "dataframes": {
+                "directs": "tests/pipeline/saved_0/user_directs.csv"
+            },
             "default_model_cls": "DirectScoring"
         }
     ],