From 63824779e39cd7ee175ba71f328fbdf5130892bc Mon Sep 17 00:00:00 2001 From: Ivar Holmlund <89637692+ivholmlu@users.noreply.github.com> Date: Thu, 27 Jun 2024 00:02:02 +0200 Subject: [PATCH] Added utility for saving results to specified folder (#14375) --- .../llama_index/packs/rag_evaluator/base.py | 18 ++++++++++++++++-- .../pyproject.toml | 2 +- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/llama-index-packs/llama-index-packs-rag-evaluator/llama_index/packs/rag_evaluator/base.py b/llama-index-packs/llama-index-packs-rag-evaluator/llama_index/packs/rag_evaluator/base.py index 0fc5c7ee68ba7..973baaab43b7b 100644 --- a/llama-index-packs/llama-index-packs-rag-evaluator/llama_index/packs/rag_evaluator/base.py +++ b/llama-index-packs/llama-index-packs-rag-evaluator/llama_index/packs/rag_evaluator/base.py @@ -4,6 +4,8 @@ import warnings from collections import deque from typing import Any, List, Optional +import os +from pathlib import Path import pandas as pd import tqdm @@ -44,6 +46,7 @@ def __init__( judge_llm: Optional[LLM] = None, embed_model: Optional[BaseEmbedding] = None, show_progress: bool = True, + result_path: Optional[str] = None, ): self.query_engine = query_engine self.rag_dataset = rag_dataset @@ -64,6 +67,15 @@ def __init__( } self.eval_queue = deque(range(len(rag_dataset.examples))) self.prediction_dataset = None + if result_path is None: + self.result_path = Path.cwd() + else: + self.result_path = Path(result_path) + if not self.result_path.is_absolute(): + self.result_path = Path.cwd() / self.result_path + + if not os.path.exists(self.result_path): + os.makedirs(self.result_path) async def _amake_predictions( self, @@ -223,7 +235,9 @@ def _save_evaluations(self): "relevancy": [e.dict() for e in self.evals["relevancy"]], } - with open("_evaluations.json", "w") as json_file: + with open( + os.path.join(self.result_path, "_evaluations.json"), "w" + ) as json_file: json.dump(evaluations_objects, json_file) def _prepare_and_save_benchmark_results(self): @@ -263,7 +277,7 @@ def _prepare_and_save_benchmark_results(self): mean_scores_df.index = mean_scores_df.index.set_names(["metrics"]) # save mean_scores_df - mean_scores_df.to_csv("benchmark.csv") + mean_scores_df.to_csv(os.path.join(self.result_path, "benchmark.csv")) return mean_scores_df def _make_evaluations( diff --git a/llama-index-packs/llama-index-packs-rag-evaluator/pyproject.toml b/llama-index-packs/llama-index-packs-rag-evaluator/pyproject.toml index b91ae04d35261..55a8a92cd82a0 100644 --- a/llama-index-packs/llama-index-packs-rag-evaluator/pyproject.toml +++ b/llama-index-packs/llama-index-packs-rag-evaluator/pyproject.toml @@ -29,7 +29,7 @@ license = "MIT" maintainers = ["nerdai"] name = "llama-index-packs-rag-evaluator" readme = "README.md" -version = "0.1.5" +version = "0.1.6" [tool.poetry.dependencies] python = ">=3.8.1,<4.0"