From 9af1c77cb68a31258a476e234a67d3f5e9587f53 Mon Sep 17 00:00:00 2001 From: Paroma Varma Date: Tue, 10 Sep 2019 11:30:08 -0700 Subject: [PATCH] Ignore abstains in Scorer, change LabelModel default tie break policy (#1450) --- snorkel/analysis/scorer.py | 6 +++++- snorkel/labeling/model/label_model.py | 11 ++++++++--- test/analysis/test_scorer.py | 8 +++++++- test/labeling/model/test_label_model.py | 20 ++++++++++++++++++++ 4 files changed, 40 insertions(+), 5 deletions(-) diff --git a/snorkel/analysis/scorer.py b/snorkel/analysis/scorer.py index 131e0e86c..b8d8f43e5 100644 --- a/snorkel/analysis/scorer.py +++ b/snorkel/analysis/scorer.py @@ -49,7 +49,11 @@ def __init__( if metric not in METRICS: raise ValueError(f"Unrecognized metric: {metric}") - filter_dict = {} if abstain_label is None else {"golds": [abstain_label]} + filter_dict = ( + {} + if abstain_label is None + else {"golds": [abstain_label], "preds": [abstain_label]} + ) self.metrics = { m: partial(metric_score, metric=m, filter_dict=filter_dict) for m in metrics diff --git a/snorkel/labeling/model/label_model.py b/snorkel/labeling/model/label_model.py index bbe743e51..1519066e2 100644 --- a/snorkel/labeling/model/label_model.py +++ b/snorkel/labeling/model/label_model.py @@ -396,7 +396,7 @@ def predict( self, L: np.ndarray, return_probs: Optional[bool] = False, - tie_break_policy: str = "random", + tie_break_policy: str = "abstain", ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: """Return predicted labels, with ties broken according to policy. @@ -446,7 +446,7 @@ def score( L: np.ndarray, Y: np.ndarray, metrics: Optional[List[str]] = ["accuracy"], - tie_break_policy: str = "random", + tie_break_policy: str = "abstain", ) -> Dict[str, float]: """Calculate one or more scores from user-specified and/or user-defined metrics. @@ -455,7 +455,7 @@ def score( L An [n,m] matrix with values in {-1,0,1,...,k-1} Y - Gold labels associated with datapoints in L + Gold labels associated with data points in L metrics A list of metric names tie_break_policy @@ -477,6 +477,11 @@ def score( >>> label_model.score(L, Y=np.array([1, 1, 1]), metrics=["f1"]) {'f1': 0.8} """ + if tie_break_policy == "abstain": # pragma: no cover + logging.warning( + "Metrics calculated over data points with non-abstain labels only" + ) + Y_pred, Y_prob = self.predict( L, return_probs=True, tie_break_policy=tie_break_policy ) diff --git a/test/analysis/test_scorer.py b/test/analysis/test_scorer.py index 3cf40d7c4..2de9ae4e2 100644 --- a/test/analysis/test_scorer.py +++ b/test/analysis/test_scorer.py @@ -68,12 +68,18 @@ def test_abstain_labels(self) -> None: results_expected = dict(accuracy=0.6) self.assertEqual(results, results_expected) - # Test abstain=-1 + # Test abstain=-1 for gold scorer = Scorer(metrics=["accuracy"], abstain_label=-1) results = scorer.score(golds, preds, probs) results_expected = dict(accuracy=0.75) self.assertEqual(results, results_expected) + # Test abstain=-1 for preds and gold + abstain_preds = np.array([-1, -1, 1, 1, 0]) + results = scorer.score(golds, abstain_preds) + results_expected = dict(accuracy=0.5) + self.assertEqual(results, results_expected) + # Test abstain set to different value scorer = Scorer(metrics=["accuracy"], abstain_label=10) results = scorer.score(golds, preds, probs) diff --git a/test/labeling/model/test_label_model.py b/test/labeling/model/test_label_model.py index 2f9a24e2f..ede1bbc8b 100644 --- a/test/labeling/model/test_label_model.py +++ b/test/labeling/model/test_label_model.py @@ -240,6 +240,14 @@ def test_predict_proba(self): np.testing.assert_array_almost_equal(probs, true_probs) def test_predict(self): + # 3 LFs that always disagree/abstain leads to all abstains + L = np.array([[-1, 1, 0], [0, -1, 1], [1, 0, -1]]) + label_model = LabelModel(cardinality=2, verbose=False) + label_model.fit(L, n_epochs=100) + np.testing.assert_array_almost_equal( + label_model.predict(L), np.array([-1, -1, -1]) + ) + L = np.array([[0, 1, 0], [0, 1, 0]]) label_model = self._set_up_model(L) @@ -254,6 +262,18 @@ def test_predict(self): np.testing.assert_array_almost_equal(probs, true_probs) def test_score(self): + L = np.array([[1, 1, 0], [-1, -1, -1], [1, 0, 1]]) + Y = np.array([1, 0, 1]) + label_model = LabelModel(cardinality=2, verbose=False) + label_model.fit(L, n_epochs=100) + results = label_model.score(L, Y) + np.testing.assert_array_almost_equal( + label_model.predict(L), np.array([1, -1, 1]) + ) + + results_expected = dict(accuracy=1.0) + self.assertEqual(results, results_expected) + L = np.array([[1, 0, 1], [1, 0, 1]]) label_model = self._set_up_model(L) label_model.mu = nn.Parameter(label_model.mu_init.clone().clamp(0.01, 0.99))