From 52162f3f0b984a2032667a2332bf95f6aabcf033 Mon Sep 17 00:00:00 2001 From: Salman Faroz Date: Mon, 23 Dec 2024 19:59:58 +0530 Subject: [PATCH] Adding evaluation checks to prevent Transformer ValueError (#3105) * Adding evaluation checks to prevent Transformer ValueError * Update trainer.py * Update trainer.py * Update trainer.py * Rewrite comments & ValueError somewhat * Fix a typo that I introduced * Extend the no_eval_dataset_with_eval_strategy test with newly updated edge case * Fix tests for Python 3.9, 3.10 --------- Co-authored-by: Tom Aarsen --- sentence_transformers/trainer.py | 18 +++++++++++++++--- tests/test_trainer.py | 9 ++++++++- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/sentence_transformers/trainer.py b/sentence_transformers/trainer.py index 182908656..b8d09fd4f 100644 --- a/sentence_transformers/trainer.py +++ b/sentence_transformers/trainer.py @@ -206,6 +206,10 @@ def __init__( train_dataset = DatasetDict(train_dataset) if isinstance(eval_dataset, dict) and not isinstance(eval_dataset, DatasetDict): eval_dataset = DatasetDict(eval_dataset) + + # Transformers v4.46.0 introduced a ValueError if `eval_dataset` is None while eval_strategy is not "no", + # but in Sentence Transformers you can also evaluate without an eval_dataset via an evaluator, so we set + # it to "dummy" in that case to avoid the ValueError super_kwargs = { "model": None if self.model_init else model, "args": args, @@ -223,10 +227,18 @@ def __init__( super_kwargs["processing_class"] = tokenizer else: super_kwargs["tokenizer"] = tokenizer + + # super.__init__() will still raise a ValueError if `eval_dataset` is None, `evaluator` is None, + # while eval_strategy is not "no", so let's get ahead of it with a more useful ST-specific error message + if eval_dataset is None and evaluator is None and args.eval_strategy != "no": + raise ValueError( + f"You have set `args.eval_strategy` to {args.eval_strategy}, but you didn't provide an `eval_dataset` or an `evaluator`. " + "Either provide an `eval_dataset` or an `evaluator` to `SentenceTransformerTrainer`, " + "or set `args.eval_strategy='no'` to skip evaluation." + ) + super().__init__(**super_kwargs) - # Transformers v4.46.0 introduced a ValueError if `eval_dataset` is None while eval_strategy is not "no", - # but in Sentence Transformers you can also evaluate without an eval_dataset via an evaluator, so we set - # it to "dummy" in that case to avoid the ValueError + # If the eval_dataset is "dummy", then we set it back to None if self.eval_dataset == "dummy": self.eval_dataset = None diff --git a/tests/test_trainer.py b/tests/test_trainer.py index a978d8f5e..29476f65c 100644 --- a/tests/test_trainer.py +++ b/tests/test_trainer.py @@ -645,7 +645,14 @@ def test_trainer_no_eval_dataset_with_eval_strategy( kwargs["evaluator"] = evaluator if not use_eval_dataset and not use_evaluator: - context = pytest.raises(ValueError, match=".*`args.eval_strategy`.*") + context = pytest.raises( + ValueError, + match=( + "You have set `args.eval_strategy` to (IntervalStrategy.STEPS|steps), but you didn't provide an " + "`eval_dataset` or an `evaluator`. Either provide an `eval_dataset` or an `evaluator` " + "to `SentenceTransformerTrainer`, or set `args.eval_strategy='no'` to skip evaluation." + ), + ) else: context = nullcontext()