Simple utilities for piece-wise evaluation of LLM based chat and retrieval systems
Build from source
pip install -e .
ragged --quickstart vectordb
from ragged.dataset import LlamaIndexDataset
from ragged.metrics.retriever import HitRate
from ragged.search_utils import QueryType
from lancedb.rerankers import CrossEncoderReranker
# 1. Select dataset
# Automatically download the dataset from llama-hub or pass existing path="/path/to/dataset"
dataset = LlamaIndexDataset("Uber10KDataset2021")
# 2. Select eval metrics
hit_rate = HitRate(
dataset,
embedding_registry_id="sentence-transformers",
embed_model_kwarg={"name":"BAAI/bge-small-en-v1.5"},
reranker=CohereReranker(),
)
# 3. Evaluate on desired query types
#print(hit_rate.evaluate(top_k=5, query_type=QueryType.VECTOR)) # Evaluate vector search
print(hit_rate.evaluate(top_k=5, query_type="all")) # Evaliate all possible query types
from ragged.dataset import CSVDataset, SquadDataset
from ragged.rag import llamaIndexRAG
from ragged.metrics.retriever.hit_rate import HitRate
from lancedb.rerankers import LinearCombinationReranker
from ragged.search_utils import QueryType
import wandb
dataset = SquadDataset()
reranker = LinearCombinationReranker()
hit_rate = HitRate(dataset, embedding_registry_id="sentence-transformers", embed_model_kwarg={"name": "tuned_model_4", "device": "cuda"})
query_types = [QueryType.VECTOR]
use_existing_table = False
for query_type in query_types:
run = wandb.init(project="ragged_bench", name=f"Base_4")
hr = hit_rate.evaluate(5, query_type=query_type, use_existing_table=use_existing_table)
run.log({f"{query_type}": hr.model_dump()[f"{query_type}"]})
use_existing_table = True
wandb.finish()
Most of popular toy datasets are not semantically challenging enough to evaluate the performance of LLM based retrieval systems. Most of them work well with simple BM25 based retrieval systems. To generate a custom dataset, that is semantically challenging, you can use the following code snippet.
NOTE: directory
can contain pdfs, txt files or any other file format that can be handled by Llama-index directory reader.
from ragged.dataset.gen.gen_retrieval_data import gen_query_context_dataset
fragged.dataset.gen.llm_calls import OpenAIInferenceClient
clinet = OpenAIInferenceClient()
df = gen_query_context_dataset(directory="data/source_files", inference_client=clinet)
print(df.head())
# save the dataframe
df.to_csv("data.csv")
Now, you can evaluate this dataset using the ragged --quickstart vectordb
GUI or via the API:
from ragged.dataset.csv import CSVDataset
from ragged.metrics.retriever import HitRate
from lancedb.rerankers import CohereReranker
data = CSVDataset(path="data.csv")
reranker = CohereReranker()
hit_rate = HitRate(data, reranker=reranker, embedding_registry_id="openai", embed_model_kwarg={"model":"text-embedding-3-small"})
res = hit_rate.evaluate(top_k=5, query_type="all")
print(res)