From 05b79e7cd064e0aa505c7449d4383c395f044a69 Mon Sep 17 00:00:00 2001 From: Nandan Thakur Date: Wed, 14 Jul 2021 12:44:12 +0200 Subject: [PATCH 1/7] add: top-k retrieval accuracy metric added, commonly used for DPR model evaluation --- beir/retrieval/custom_metrics.py | 32 ++++++++++++++++++- beir/retrieval/evaluation.py | 9 ++++-- .../custom/evaluate_custom_metrics.py | 1 + 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/beir/retrieval/custom_metrics.py b/beir/retrieval/custom_metrics.py index 57c970c..df51452 100644 --- a/beir/retrieval/custom_metrics.py +++ b/beir/retrieval/custom_metrics.py @@ -84,4 +84,34 @@ def hole(qrels: Dict[str, Dict[str, int]], Hole[f"Hole@{k}"] = round(Hole[f"Hole@{k}"]/len(results), 5) logging.info("Hole@{}: {:.4f}".format(k, Hole[f"Hole@{k}"])) - return Hole \ No newline at end of file + return Hole + +def top_k_accuracy( + qrels: Dict[str, Dict[str, int]], + results: Dict[str, Dict[str, float]], + k_values: List[int]) -> Tuple[Dict[str, float]]: + + top_k_acc = {} + + for k in k_values: + top_k_acc[f"Accuracy@{k}"] = 0.0 + + k_max, top_hits = max(k_values), {} + logging.info("\n") + + for query_id, doc_scores in results.items(): + top_hits[query_id] = sorted(doc_scores.keys(), key=lambda item: item[1], reverse=True)[0:k_max] + + for query_id in qrels: + query_relevant_docs = set([doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0]) + for k in k_values: + for relevant_doc_id in query_relevant_docs: + if relevant_doc_id in top_hits[query_id][0:k]: + top_k_acc[f"Accuracy@{k}"] += 1.0 + break + + for k in k_values: + top_k_acc[f"Accuracy@{k}"] = round(top_k_acc[f"Accuracy@{k}"]/len(qrels), 5) + logging.info("Accuracy@{}: {:.4f}".format(k, top_k_acc[f"Accuracy@{k}"])) + + return top_k_acc \ No newline at end of file diff --git a/beir/retrieval/evaluation.py b/beir/retrieval/evaluation.py index 4af0a1c..918929a 100644 --- a/beir/retrieval/evaluation.py +++ b/beir/retrieval/evaluation.py @@ -5,7 +5,7 @@ from .search.dense import DenseRetrievalFaissSearch as DRFS from .search.lexical import BM25Search as BM25 from .search.sparse import SparseSearch as SS -from .custom_metrics import mrr, recall_cap, hole +from .custom_metrics import mrr, recall_cap, hole, top_k_accuracy logger = logging.getLogger(__name__) @@ -86,7 +86,7 @@ def evaluate(qrels: Dict[str, Dict[str, int]], @staticmethod def evaluate_custom(qrels: Dict[str, Dict[str, int]], results: Dict[str, Dict[str, float]], - k_values: List[int], metric: str in ["mrr", "r_cap", "hole"]) -> Tuple[Dict[str, float]]: + k_values: List[int], metric: str) -> Tuple[Dict[str, float]]: if metric.lower() in ["mrr", "mrr@k", "mrr_cut"]: return mrr(qrels, results, k_values) @@ -95,4 +95,7 @@ def evaluate_custom(qrels: Dict[str, Dict[str, int]], return recall_cap(qrels, results, k_values) elif metric.lower() in ["hole", "hole@k"]: - return hole(qrels, results, k_values) \ No newline at end of file + return hole(qrels, results, k_values) + + elif metric.lower() in ["acc", "top_k_acc", "accuracy", "accuracy@k", "top_k_accuracy"]: + return top_k_accuracy(qrels, results, k_values) \ No newline at end of file diff --git a/examples/retrieval/evaluation/custom/evaluate_custom_metrics.py b/examples/retrieval/evaluation/custom/evaluate_custom_metrics.py index bd95d25..899c076 100644 --- a/examples/retrieval/evaluation/custom/evaluate_custom_metrics.py +++ b/examples/retrieval/evaluation/custom/evaluate_custom_metrics.py @@ -50,6 +50,7 @@ mrr = retriever.evaluate_custom(qrels, results, retriever.k_values, metric="mrr") recall_cap = retriever.evaluate_custom(qrels, results, retriever.k_values, metric="recall_cap") hole = retriever.evaluate_custom(qrels, results, retriever.k_values, metric="hole") +top_k_accuracy = retriever.evaluate_custom(qrels, results, retriever.k_values, metric="top_k_accuracy") #### Print top-k documents retrieved #### top_k = 10 From d4f9690eee1aa07387d8fa41633dde13185f30b2 Mon Sep 17 00:00:00 2001 From: Nandan Thakur Date: Wed, 14 Jul 2021 12:45:14 +0200 Subject: [PATCH 2/7] add: sorting document lengths by size, helps encode faster in exact search --- beir/retrieval/search/dense/exact_search.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/beir/retrieval/search/dense/exact_search.py b/beir/retrieval/search/dense/exact_search.py index 423906c..e75a4cf 100644 --- a/beir/retrieval/search/dense/exact_search.py +++ b/beir/retrieval/search/dense/exact_search.py @@ -14,6 +14,7 @@ def __init__(self, model, batch_size: int = 128, corpus_chunk_size: int = 50000, self.model = model self.batch_size = batch_size self.score_functions = {'cos_sim': cos_sim, 'dot': dot_score} + self.score_function_desc = {'cos_sim': "Cosine Similarity", 'dot': "Dot Product"} self.corpus_chunk_size = corpus_chunk_size self.show_progress_bar = True #TODO: implement no progress bar if false self.convert_to_tensor = True @@ -38,10 +39,14 @@ def search(self, query_embeddings = self.model.encode_queries( queries, batch_size=self.batch_size, show_progress_bar=self.show_progress_bar, convert_to_tensor=self.convert_to_tensor) - logger.info("Encoding Corpus in batches... Warning: This might take a while!") - corpus_ids = list(corpus.keys()) + logger.info("Sorting Corpus by document length (Longest first)...") + + corpus_ids = sorted(corpus, key=lambda k: len(corpus[k].get("title", "") + corpus[k].get("text", "")), reverse=True) corpus = [corpus[cid] for cid in corpus_ids] + logger.info("Encoding Corpus in batches... Warning: This might take a while!") + logger.info("Scoring Function: {} ({})".format(self.score_function_desc[score_function], score_function)) + itr = range(0, len(corpus), self.corpus_chunk_size) for batch_num, corpus_start_idx in enumerate(itr): From d58e91cb4e3df710dd849fee5a36d567706791a5 Mon Sep 17 00:00:00 2001 From: Nandan Thakur Date: Wed, 14 Jul 2021 12:47:27 +0200 Subject: [PATCH 3/7] fix and added PCA with rotation (OPQ) and SQ (fp-16) faiss search --- beir/retrieval/search/dense/__init__.py | 2 +- beir/retrieval/search/dense/faiss_index.py | 21 +------- beir/retrieval/search/dense/faiss_search.py | 60 +++++++++++++++++---- 3 files changed, 53 insertions(+), 30 deletions(-) diff --git a/beir/retrieval/search/dense/__init__.py b/beir/retrieval/search/dense/__init__.py index 2942fa3..b69e37a 100644 --- a/beir/retrieval/search/dense/__init__.py +++ b/beir/retrieval/search/dense/__init__.py @@ -1,2 +1,2 @@ from .exact_search import DenseRetrievalExactSearch -from .faiss_search import DenseRetrievalFaissSearch, BinaryFaissSearch, PQFaissSearch, HNSWFaissSearch, FlatIPFaissSearch, PCAFaissSearch \ No newline at end of file +from .faiss_search import DenseRetrievalFaissSearch, BinaryFaissSearch, PQFaissSearch, HNSWFaissSearch, FlatIPFaissSearch, PCAFaissSearch, SQFaissSearch \ No newline at end of file diff --git a/beir/retrieval/search/dense/faiss_index.py b/beir/retrieval/search/dense/faiss_index.py index 0d0e94f..9aa0beb 100644 --- a/beir/retrieval/search/dense/faiss_index.py +++ b/beir/retrieval/search/dense/faiss_index.py @@ -77,7 +77,7 @@ def build( passage_embeddings = np.hstack((passage_embeddings, aux_dims.reshape(-1, 1))) return super().build(passage_ids, passage_embeddings, index, buffer_size) -class FaissPQIndex(FaissIndex): +class FaissTrainIndex(FaissIndex): def search(self, query_embeddings: np.ndarray, k: int, **kwargs) -> Tuple[np.ndarray, np.ndarray]: return super().search(query_embeddings, k) @@ -95,25 +95,6 @@ def build( index.train(passage_embeddings) return super().build(passage_ids, passage_embeddings, index, buffer_size) -class FaissPCAIndex(FaissIndex): - def search(self, query_embeddings: np.ndarray, k: int, **kwargs) -> Tuple[np.ndarray, np.ndarray]: - return super().search(query_embeddings, k) - - def save(self, output_path: str): - super().save(output_path) - - @classmethod - def build( - cls, - passage_ids: List[int], - passage_embeddings: np.ndarray, - index: Optional[faiss.Index] = None, - buffer_size: int = 50000, - ): - index.train(passage_embeddings) - return super().build(passage_ids, passage_embeddings, index, buffer_size) - - class FaissBinaryIndex(FaissIndex): def __init__(self, index: faiss.Index, passage_ids: List[int] = None, passage_embeddings: np.ndarray = None): self.index = index diff --git a/beir/retrieval/search/dense/faiss_search.py b/beir/retrieval/search/dense/faiss_search.py index c7f03e4..f15042c 100644 --- a/beir/retrieval/search/dense/faiss_search.py +++ b/beir/retrieval/search/dense/faiss_search.py @@ -1,5 +1,5 @@ from .util import cos_sim, dot_score, normalize, save_dict_to_tsv, load_tsv_to_dict -from .faiss_index import FaissBinaryIndex, FaissPQIndex, FaissHNSWIndex, FaissPCAIndex, FaissIndex +from .faiss_index import FaissBinaryIndex, FaissTrainIndex, FaissHNSWIndex, FaissIndex import logging import sys import torch @@ -137,6 +137,7 @@ def load(self, input_dir: str, prefix: str = "my-index", ext: str = "bin"): def index(self, corpus: Dict[str, Dict[str, str]], score_function: str = None): faiss_ids, corpus_embeddings = super()._index(corpus, score_function) logger.info("Using Binary Hashing in Flat Mode!") + logger.info("Output Dimension: {}".format(self.dim_size)) base_index = faiss.IndexBinaryFlat(self.dim_size * 8) self.faiss_index = FaissBinaryIndex.build(faiss_ids, corpus_embeddings, base_index) @@ -154,25 +155,34 @@ def search(self, class PQFaissSearch(DenseRetrievalFaissSearch): def __init__(self, model, batch_size: int = 128, corpus_chunk_size: int = 50000, num_of_centroids: int = 96, - code_size: int = 8, similarity_metric=faiss.METRIC_INNER_PRODUCT, **kwargs): + code_size: int = 8, similarity_metric=faiss.METRIC_INNER_PRODUCT, use_rotation: bool = False, **kwargs): super(PQFaissSearch, self).__init__(model, batch_size, corpus_chunk_size, **kwargs) self.num_of_centroids = num_of_centroids self.code_size = code_size self.similarity_metric = similarity_metric + self.use_rotation = use_rotation def load(self, input_dir: str, prefix: str = "my-index", ext: str = "pq"): input_faiss_path, passage_ids = super()._load(input_dir, prefix, ext) base_index = faiss.read_index(input_faiss_path) - self.faiss_index = FaissPQIndex(base_index, passage_ids) + self.faiss_index = FaissTrainIndex(base_index, passage_ids) def index(self, corpus: Dict[str, Dict[str, str]], score_function: str = None, **kwargs): - faiss_ids, corpus_embeddings = super()._index(corpus, score_function, **kwargs) + faiss_ids, corpus_embeddings = super()._index(corpus, score_function, **kwargs) + logger.info("Using Product Quantization (PQ) in Flat mode!") logger.info("Parameters Used: num_of_centroids: {} ".format(self.num_of_centroids)) - logger.info("Parameters Used: code_size: {}".format(self.code_size)) - + logger.info("Parameters Used: code_size: {}".format(self.code_size)) + base_index = faiss.IndexPQ(self.dim_size, self.num_of_centroids, self.code_size, self.similarity_metric) - self.faiss_index = FaissPQIndex.build(faiss_ids, corpus_embeddings, base_index) + + if self.use_rotation: + logger.info("Rotating data before encoding it with a product quantizer...") + logger.info("Creating OPQ Matrix...") + opq_matrix = faiss.OPQMatrix(self.dim_size, self.code_size) + base_index = faiss.IndexPreTransform(opq_matrix, base_index) + + self.faiss_index = FaissTrainIndex.build(faiss_ids, corpus_embeddings, base_index) def save(self, output_dir: str, prefix: str = "my-index", ext: str = "pq"): super().save(output_dir, prefix, ext) @@ -256,7 +266,7 @@ def __init__(self, model, base_index: faiss.Index, output_dimension: int, batch_ def load(self, input_dir: str, prefix: str = "my-index", ext: str = "pca"): input_faiss_path, passage_ids = super()._load(input_dir, prefix, ext) base_index = faiss.read_index(input_faiss_path) - self.faiss_index = FaissPCAIndex(base_index, passage_ids) + self.faiss_index = FaissTrainIndex(base_index, passage_ids) def index(self, corpus: Dict[str, Dict[str, str]], score_function: str = None, **kwargs): faiss_ids, corpus_embeddings = super()._index(corpus, score_function, **kwargs) @@ -264,11 +274,43 @@ def index(self, corpus: Dict[str, Dict[str, str]], score_function: str = None, * logger.info("Input Dimension: {}, Output Dimension: {}".format(self.dim_size, self.output_dim)) pca_matrix = faiss.PCAMatrix(self.dim_size, self.output_dim, 0, True) final_index = faiss.IndexPreTransform(pca_matrix, self.base_index) - self.faiss_index = FaissPCAIndex.build(faiss_ids, corpus_embeddings, final_index) + self.faiss_index = FaissTrainIndex.build(faiss_ids, corpus_embeddings, final_index) def save(self, output_dir: str, prefix: str = "my-index", ext: str = "pca"): super().save(output_dir, prefix, ext) + def search(self, + corpus: Dict[str, Dict[str, str]], + queries: Dict[str, str], + top_k: int, + score_function = str, **kwargs) -> Dict[str, Dict[str, float]]: + + return super().search(corpus, queries, top_k, score_function, **kwargs) + +class SQFaissSearch(DenseRetrievalFaissSearch): + def __init__(self, model, batch_size: int = 128, corpus_chunk_size: int = 50000, + similarity_metric=faiss.METRIC_INNER_PRODUCT, quantizer_type: str = "QT_fp16", **kwargs): + super(PCAFaissSearch, self).__init__(model, batch_size, corpus_chunk_size, **kwargs) + self.similarity_metric = similarity_metric + self.qtype = quantizer_type + + def load(self, input_dir: str, prefix: str = "my-index", ext: str = "sq"): + input_faiss_path, passage_ids = super()._load(input_dir, prefix, ext) + base_index = faiss.read_index(input_faiss_path) + self.faiss_index = FaissTrainIndex(base_index, passage_ids) + + def index(self, corpus: Dict[str, Dict[str, str]], score_function: str = None, **kwargs): + faiss_ids, corpus_embeddings = super()._index(corpus, score_function, **kwargs) + + logger.info("Using Scalar Quantizer in Flat Mode!") + logger.info("Parameters Used: quantizer_type: {}".format(self.qtype)) + + base_index = faiss.IndexScalarQuantizer(self.dim_size, self.qtype, self.similarity_metric) + self.faiss_index = FaissTrainIndex.build(faiss_ids, corpus_embeddings, base_index) + + def save(self, output_dir: str, prefix: str = "my-index", ext: str = "sq"): + super().save(output_dir, prefix, ext) + def search(self, corpus: Dict[str, Dict[str, str]], queries: Dict[str, str], From d799cdafecb9ede35fb87bad9ec60c0ce4e77d1d Mon Sep 17 00:00:00 2001 From: Nandan Thakur Date: Thu, 15 Jul 2021 11:41:07 +0200 Subject: [PATCH 4/7] fix: SQ faiss search and add: sorting of documents by longest first --- beir/retrieval/search/dense/faiss_search.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/beir/retrieval/search/dense/faiss_search.py b/beir/retrieval/search/dense/faiss_search.py index f15042c..70e16e4 100644 --- a/beir/retrieval/search/dense/faiss_search.py +++ b/beir/retrieval/search/dense/faiss_search.py @@ -61,12 +61,14 @@ def save(self, output_dir: str, prefix: str, ext: str): def _index(self, corpus: Dict[str, Dict[str, str]], score_function: str = None): - logger.info("Encoding Corpus in batches... Warning: This might take a while!") - corpus_ids = list(corpus.keys()) + logger.info("Sorting Corpus by document length (Longest first)...") + corpus_ids = sorted(corpus, key=lambda k: len(corpus[k].get("title", "") + corpus[k].get("text", "")), reverse=True) self._create_mapping_ids(corpus_ids) corpus = [corpus[cid] for cid in corpus_ids] normalize_embeddings = True if score_function == "cos_sim" else False + logger.info("Encoding Corpus in batches... Warning: This might take a while!") + itr = range(0, len(corpus), self.corpus_chunk_size) for batch_num, corpus_start_idx in enumerate(itr): @@ -290,9 +292,9 @@ def search(self, class SQFaissSearch(DenseRetrievalFaissSearch): def __init__(self, model, batch_size: int = 128, corpus_chunk_size: int = 50000, similarity_metric=faiss.METRIC_INNER_PRODUCT, quantizer_type: str = "QT_fp16", **kwargs): - super(PCAFaissSearch, self).__init__(model, batch_size, corpus_chunk_size, **kwargs) + super(SQFaissSearch, self).__init__(model, batch_size, corpus_chunk_size, **kwargs) self.similarity_metric = similarity_metric - self.qtype = quantizer_type + self.qname = quantizer_type def load(self, input_dir: str, prefix: str = "my-index", ext: str = "sq"): input_faiss_path, passage_ids = super()._load(input_dir, prefix, ext) @@ -303,9 +305,10 @@ def index(self, corpus: Dict[str, Dict[str, str]], score_function: str = None, * faiss_ids, corpus_embeddings = super()._index(corpus, score_function, **kwargs) logger.info("Using Scalar Quantizer in Flat Mode!") - logger.info("Parameters Used: quantizer_type: {}".format(self.qtype)) + logger.info("Parameters Used: quantizer_type: {}".format(self.qname)) - base_index = faiss.IndexScalarQuantizer(self.dim_size, self.qtype, self.similarity_metric) + qtype = getattr(faiss.ScalarQuantizer, self.qname) + base_index = faiss.IndexScalarQuantizer(self.dim_size, qtype, self.similarity_metric) self.faiss_index = FaissTrainIndex.build(faiss_ids, corpus_embeddings, base_index) def save(self, output_dir: str, prefix: str = "my-index", ext: str = "sq"): From 6c630d5691536b3b87db748194e1cfa20ddc19bb Mon Sep 17 00:00:00 2001 From: Nandan Thakur Date: Mon, 19 Jul 2021 16:58:19 +0200 Subject: [PATCH 5/7] fix: add exception if key not present in corpus while training --- beir/retrieval/train.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/beir/retrieval/train.py b/beir/retrieval/train.py index a7391df..60af1c2 100644 --- a/beir/retrieval/train.py +++ b/beir/retrieval/train.py @@ -9,6 +9,7 @@ from typing import Dict, Type, List, Callable, Iterable, Tuple import logging import time +import difflib logger = logging.getLogger(__name__) @@ -29,9 +30,12 @@ def load_train(self, corpus: Dict[str, Dict[str, str]], queries: Dict[str, str], for query_id in query_ids_batch: for corpus_id, score in qrels[query_id].items(): if score >= 1: # if score = 0, we don't consider for training - s1 = queries[query_id] - s2 = corpus[corpus_id].get("title") + " " + corpus[corpus_id].get("text") - train_samples.append(InputExample(guid=idx, texts=[s1, s2], label=1)) + try: + s1 = queries[query_id] + s2 = corpus[corpus_id].get("title") + " " + corpus[corpus_id].get("text") + train_samples.append(InputExample(guid=idx, texts=[s1, s2], label=1)) + except KeyError: + logging.error("Error: Key {} not present in corpus!".format(corpus_id)) logger.info("Loaded {} training pairs.".format(len(train_samples))) return train_samples From 7e4fc71049dfbe39651c290431edde735fe93902 Mon Sep 17 00:00:00 2001 From: Nandan Thakur Date: Mon, 19 Jul 2021 16:59:54 +0200 Subject: [PATCH 6/7] add: New metric and new script provided by @joshdevins in #28 --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 2558e7d..2e4afef 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,7 @@ To easily understand and get your hands dirty with BEIR, we invite you to try ou | ------------------------------------------- | ---------- | | Hybrid sparse retrieval using SPARTA | [evaluate_sparta.py](https://github.com/UKPLab/beir/blob/main/examples/retrieval/evaluation/sparse/evaluate_sparta.py) | | Sparse retrieval using docT5query and Pyserini | [evaluate_anserini_docT5query.py](https://github.com/UKPLab/beir/blob/main/examples/retrieval/evaluation/sparse/evaluate_anserini_docT5query.py) | +| Sparse retrieval using docT5query (MultiGPU) and Pyserini | [evaluate_anserini_docT5query_parallel.py](https://github.com/UKPLab/beir/blob/main/examples/retrieval/evaluation/sparse/evaluate_anserini_docT5query_parallel.py) :new: | | Sparse retrieval using DeepCT and Pyserini :new: | [evaluate_deepct.py](https://github.com/UKPLab/beir/blob/main/examples/retrieval/evaluation/sparse/evaluate_deepct.py) | ### :beers: Reranking (Evaluation) @@ -429,6 +430,7 @@ We also include custom-metrics now which can be used for evaluation, please refe - MRR (``MRR@k``) - Capped Recall (``R_cap@k``) - Hole (``Hole@k``): % of top-k docs retrieved unseen by annotators +- Top-K Accuracy (``Accuracy@k``): % of relevant docs present in top-k results ## :beers: Citing & Authors From 17a5a630dd849570da12be300647d0ea091dc35a Mon Sep 17 00:00:00 2001 From: Nandan Thakur Date: Mon, 19 Jul 2021 17:18:51 +0200 Subject: [PATCH 7/7] fix: training doc-ids present in qrels in fever not present in corpus, problems with special characters solved --- README.md | 4 +++- examples/dataset/md5.csv | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2e4afef..1dd6ea6 100644 --- a/README.md +++ b/README.md @@ -213,6 +213,8 @@ For other datasets, just use one of the datasets names, mention below. ## :beers: Available Datasets +Command to generate md5hash using Terminal: ``md5hash filename.zip``. + | Dataset | Website| BEIR-Name | Queries | Corpus | Rel D/Q | Down-load | md5 | | -------- | -----| ---------| ----------- | ---------| ---------| :----------: | :------:| | MSMARCO | [Homepage](https://microsoft.github.io/msmarco/)| ``msmarco`` | 6,980 | 8.84M | 1.1 | [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/msmarco.zip) | ``444067daf65d982533ea17ebd59501e4`` | @@ -230,7 +232,7 @@ For other datasets, just use one of the datasets names, mention below. | Quora| [Homepage](https://www.quora.com/q/quoradata/First-Quora-Dataset-Release-Question-Pairs) | ``quora``| 10,000 | 523K | 1.6 | [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/quora.zip) | ``18fb154900ba42a600f84b839c173167`` | | DBPedia | [Homepage](https://github.com/iai-group/DBpedia-Entity/) | ``dbpedia-entity``| 400 | 4.63M | 38.2 | [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/dbpedia-entity.zip) | ``c2a39eb420a3164af735795df012ac2c`` | | SCIDOCS| [Homepage](https://allenai.org/data/scidocs) | ``scidocs``| 1,000 | 25K | 4.9 | [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/scidocs.zip) | ``38121350fc3a4d2f48850f6aff52e4a9`` | -| FEVER| [Homepage](http://fever.ai) | ``fever``| 6,666 | 5.42M | 1.2| [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/fever.zip) | ``88591ef8eb2913126d0c93ecbde6285f`` | +| FEVER | [Homepage](http://fever.ai) | ``fever``| 6,666 | 5.42M | 1.2| [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/fever.zip) | ``5a818580227bfb4b35bb6fa46d9b6c03`` | | Climate-FEVER| [Homepage](http://climatefever.ai) | ``climate-fever``| 1,535 | 5.42M | 3.0 | [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/climate-fever.zip) | ``8b66f0a9126c521bae2bde127b4dc99d`` | | SciFact| [Homepage](https://github.com/allenai/scifact) | ``scifact``| 300 | 5K | 1.1 | [Link](https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/scifact.zip) | ``5f7d1de60b170fc8027bb7898e2efca1`` | | Robust04 | [Homepage](https://trec.nist.gov/data/robust/04.guidelines.html) | ``robust04``| 249 | 528K | 69.9 | No | [How to Reproduce?](https://github.com/UKPLab/beir/blob/main/examples/dataset#3-robust04) | diff --git a/examples/dataset/md5.csv b/examples/dataset/md5.csv index 1a4128b..b8b518f 100644 --- a/examples/dataset/md5.csv +++ b/examples/dataset/md5.csv @@ -11,7 +11,7 @@ cqadupstack.zip,4e41456d7df8ee7760a7f866133bda78 quora.zip,18fb154900ba42a600f84b839c173167 dbpedia-entity.zip,c2a39eb420a3164af735795df012ac2c scidocs.zip,38121350fc3a4d2f48850f6aff52e4a9 -fever.zip,88591ef8eb2913126d0c93ecbde6285f +fever.zip,5f7d1de60b170fc8027bb7898e2efca1 climate-fever.zip,8b66f0a9126c521bae2bde127b4dc99d scifact.zip,5f7d1de60b170fc8027bb7898e2efca1 germanquad.zip,95a581c3162d10915a418609bcce851b