Skip to content

Commit

Permalink
Made result loading more permissive, changed eval splits for HotPotQA…
Browse files Browse the repository at this point in the history
… and DBPedia (#1554)

* Removed train and dev from eval splits on HotpotQA

* Removed dev from eval splits on DBPedia

* Made task_results validation more permissive

* Readded exception in get_score

* Ran linting
  • Loading branch information
x-tabdeveloping authored Dec 6, 2024
1 parent 2ee8d44 commit 2905813
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 3 deletions.
4 changes: 3 additions & 1 deletion mteb/load_results/task_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,9 @@ def validate_and_filter_scores(self, task: AbsTask | None = None) -> AbsTask:
)
seen_splits.add(split)
if seen_splits != set(splits):
raise ValueError(f"Missing splits {set(splits) - seen_splits}")
logger.warning(
f"{task.metadata.name}: Missing splits {set(splits) - seen_splits}"
)
new_res = {**self.to_dict(), "scores": new_scores}
new_res = TaskResult.from_validated(**new_res)
return new_res
2 changes: 1 addition & 1 deletion mteb/tasks/Retrieval/eng/DBPediaRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class DBPedia(AbsTaskRetrieval):
type="Retrieval",
category="s2p",
modalities=["text"],
eval_splits=["dev", "test"],
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="ndcg_at_10",
date=("2017-01-01", "2017-01-01"), # best guess: based on publication date
Expand Down
2 changes: 1 addition & 1 deletion mteb/tasks/Retrieval/eng/HotpotQARetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class HotpotQA(AbsTaskRetrieval):
type="Retrieval",
category="s2p",
modalities=["text"],
eval_splits=["train", "dev", "test"],
eval_splits=["test"],
eval_langs=["eng-Latn"],
main_score="ndcg_at_10",
date=("2018-01-01", "2018-12-31"), # best guess: based on publication date
Expand Down

0 comments on commit 2905813

Please sign in to comment.