Skip to content

Commit

Permalink
Adding evaluation checks to prevent Transformer ValueError (#3105)
Browse files Browse the repository at this point in the history
* Adding evaluation checks to prevent Transformer ValueError

* Update trainer.py

* Update trainer.py

* Update trainer.py

* Rewrite comments & ValueError somewhat

* Fix a typo that I introduced

* Extend the no_eval_dataset_with_eval_strategy test with newly updated edge case

* Fix tests for Python 3.9, 3.10

---------

Co-authored-by: Tom Aarsen <[email protected]>
  • Loading branch information
stsfaroz and tomaarsen authored Dec 23, 2024
1 parent cfb883c commit 52162f3
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 4 deletions.
18 changes: 15 additions & 3 deletions sentence_transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,10 @@ def __init__(
train_dataset = DatasetDict(train_dataset)
if isinstance(eval_dataset, dict) and not isinstance(eval_dataset, DatasetDict):
eval_dataset = DatasetDict(eval_dataset)

# Transformers v4.46.0 introduced a ValueError if `eval_dataset` is None while eval_strategy is not "no",
# but in Sentence Transformers you can also evaluate without an eval_dataset via an evaluator, so we set
# it to "dummy" in that case to avoid the ValueError
super_kwargs = {
"model": None if self.model_init else model,
"args": args,
Expand All @@ -223,10 +227,18 @@ def __init__(
super_kwargs["processing_class"] = tokenizer
else:
super_kwargs["tokenizer"] = tokenizer

# super.__init__() will still raise a ValueError if `eval_dataset` is None, `evaluator` is None,
# while eval_strategy is not "no", so let's get ahead of it with a more useful ST-specific error message
if eval_dataset is None and evaluator is None and args.eval_strategy != "no":
raise ValueError(
f"You have set `args.eval_strategy` to {args.eval_strategy}, but you didn't provide an `eval_dataset` or an `evaluator`. "
"Either provide an `eval_dataset` or an `evaluator` to `SentenceTransformerTrainer`, "
"or set `args.eval_strategy='no'` to skip evaluation."
)

super().__init__(**super_kwargs)
# Transformers v4.46.0 introduced a ValueError if `eval_dataset` is None while eval_strategy is not "no",
# but in Sentence Transformers you can also evaluate without an eval_dataset via an evaluator, so we set
# it to "dummy" in that case to avoid the ValueError
# If the eval_dataset is "dummy", then we set it back to None
if self.eval_dataset == "dummy":
self.eval_dataset = None

Expand Down
9 changes: 8 additions & 1 deletion tests/test_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,14 @@ def test_trainer_no_eval_dataset_with_eval_strategy(
kwargs["evaluator"] = evaluator

if not use_eval_dataset and not use_evaluator:
context = pytest.raises(ValueError, match=".*`args.eval_strategy`.*")
context = pytest.raises(
ValueError,
match=(
"You have set `args.eval_strategy` to (IntervalStrategy.STEPS|steps), but you didn't provide an "
"`eval_dataset` or an `evaluator`. Either provide an `eval_dataset` or an `evaluator` "
"to `SentenceTransformerTrainer`, or set `args.eval_strategy='no'` to skip evaluation."
),
)
else:
context = nullcontext()

Expand Down

0 comments on commit 52162f3

Please sign in to comment.