fix: Add listing all available benchmarks CLI option (#1256)

* add benchmarks.md in README * add cli option * add benchmark cli test case * correct typo
embeddings-benchmark · Sep 29, 2024 · 5e1e290 · 5e1e290
1 parent f04279d
commit 5e1e290
Show file tree

Hide file tree

Showing 5 changed files with 42 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -378,6 +378,7 @@ df = results_to_dataframe(results)
 | Documentation                  |                        |
 | ------------------------------ | ---------------------- |
 | 📋 [Tasks] | Overview of available tasks |
+| 📐 [Benchmarks] | Overview of available benchmarks |
 | 📈 [Leaderboard] | The interactive leaderboard of the benchmark |
 | 🤖 [Adding a model] | Information related to how to submit a model to the leaderboard |
 | 👩‍🔬 [Reproducible workflows] | Information related to how to reproduce and create reproducible workflows with MTEB |
@@ -387,6 +388,7 @@ df = results_to_dataframe(results)
 | 🌐 [MMTEB] | An open-source effort to extend MTEB to cover a broad set of languages |  
 
 [Tasks]: docs/tasks.md
+[Benchmarks]: docs/benchmarks.md
 [Contributing]: CONTRIBUTING.md
 [Adding a model]: docs/adding_a_model.md
 [Adding a dataset]: docs/adding_a_dataset.md

diff --git a/docs/benchmarks.md b/docs/benchmarks.md
@@ -1,5 +1,5 @@
 ## Available benchmarks
-The following tables give you an overview of the benchmarks in MTEB.
+The following table gives you an overview of the benchmarks in MTEB.
 
 <details>
 

diff --git a/mteb/cli.py b/mteb/cli.py
@@ -30,6 +30,14 @@
 mteb available_tasks --task_types Clustering # list tasks of type Clustering
 ```
 
+## Listing Available Benchmarks
+
+To list the available benchmarks within MTEB, use the `mteb available_benchmarks` command. For example:
+
+```bash
+mteb available_benchmarks # list all available benchmarks
+```
+
 
 ## Creating Model Metadata
 
@@ -144,6 +152,12 @@ def run(args: argparse.Namespace) -> None:
     _save_model_metadata(model, Path(args.output_folder))
 
 
+def available_benchmarks(args: argparse.Namespace) -> None:
+    benchmarks = mteb.get_benchmarks()
+    eval = mteb.MTEB(tasks=benchmarks)
+    eval.mteb_benchmarks()
+
+
 def available_tasks(args: argparse.Namespace) -> None:
     tasks = mteb.get_tasks(
         categories=args.categories,
@@ -198,6 +212,15 @@ def add_available_tasks_parser(subparsers) -> None:
     parser.set_defaults(func=available_tasks)
 
 
+def add_available_benchmarks_parser(subparsers) -> None:
+    parser = subparsers.add_parser(
+        "available_benchmarks", help="List the available benchmarks within MTEB"
+    )
+    add_task_selection_args(parser)
+
+    parser.set_defaults(func=available_benchmarks)
+
+
 def add_run_parser(subparsers) -> None:
     parser = subparsers.add_parser("run", help="Run a model on a set of tasks")
 
@@ -321,6 +344,7 @@ def main():
     )
     add_run_parser(subparsers)
     add_available_tasks_parser(subparsers)
+    add_available_benchmarks_parser(subparsers)
     add_create_meta_parser(subparsers)
 
     args = parser.parse_args()

diff --git a/mteb/evaluation/MTEB.py b/mteb/evaluation/MTEB.py
@@ -168,6 +168,12 @@ def _display_tasks(self, task_list, name=None):
                     console.print(f"{prefix}{name}{category}{multilingual}")
                 console.print("\n")
 
+    def mteb_benchmarks(self):
+        """Get all benchmarks available in the MTEB."""
+        for benchmark in self._tasks:
+            name = benchmark.name
+            self._display_tasks(benchmark.tasks, name=name)
+
     @classmethod
     def mteb_tasks(cls):
         """Get all tasks available in the MTEB."""

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -22,6 +22,15 @@ def test_available_tasks():
     ), "Sample task Banking77Classification task not found in available tasks"
 
 
+def test_available_benchmarks():
+    command = f"{sys.executable} -m mteb available_benchmarks"
+    result = subprocess.run(command, shell=True, capture_output=True, text=True)
+    assert result.returncode == 0, "Command failed"
+    assert (
+        "MTEB(eng)" in result.stdout
+    ), "Sample benchmark MTEB(eng) task not found in available bencmarks"
+
+
 run_task_fixures = [
     (
         "average_word_embeddings_komninos",