diff --git a/autorag/nodes/generator/vllm.py b/autorag/nodes/generator/vllm.py index d61285708..c6684d83f 100644 --- a/autorag/nodes/generator/vllm.py +++ b/autorag/nodes/generator/vllm.py @@ -41,7 +41,8 @@ def __del__(self): if torch.cuda.is_available(): from vllm.distributed.parallel_state import ( - destroy_model_parallel, destroy_distributed_environment + destroy_model_parallel, + destroy_distributed_environment, ) destroy_model_parallel() diff --git a/autorag/nodes/passagefilter/similarity_percentile_cutoff.py b/autorag/nodes/passagefilter/similarity_percentile_cutoff.py index 9dd6ebb94..cbed737c7 100644 --- a/autorag/nodes/passagefilter/similarity_percentile_cutoff.py +++ b/autorag/nodes/passagefilter/similarity_percentile_cutoff.py @@ -11,7 +11,7 @@ embedding_query_content, ) from autorag.utils import result_to_dataframe -from autorag.utils.util import empty_cuda_cache +from autorag.utils.util import empty_cuda_cache, pop_params class SimilarityPercentileCutoff(BasePassageFilter): @@ -21,7 +21,7 @@ def __init__(self, project_dir: Union[str, Path], *args, **kwargs): :param project_dir: The project directory to use for initializing the module :param embedding_model: The embedding model string to use for calculating similarity - Default is "openai" which is OpenAI text-embedding-ada-002 embedding model. + Default is "openai" which is OpenAI text-embedding-ada-002 embedding model. """ super().__init__(project_dir, *args, **kwargs) embedding_model_str = kwargs.pop("embedding_model", "openai") @@ -34,9 +34,10 @@ def __del__(self): empty_cuda_cache() @result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"]) - def pure(self, previous_result: pd.DataFrame, *args, **kwargs): + def pure(self, previous_result: pd.DataFrame, **kwargs): queries, contents, scores, ids = self.cast_to_run(previous_result) - return self._pure(queries, contents, scores, ids, *args, **kwargs) + kwargs = pop_params(self._pure, kwargs) + return self._pure(queries, contents, scores, ids, **kwargs) def _pure( self, diff --git a/autorag/nodes/passagefilter/similarity_threshold_cutoff.py b/autorag/nodes/passagefilter/similarity_threshold_cutoff.py index 96f99f1ce..29f0b2e59 100644 --- a/autorag/nodes/passagefilter/similarity_threshold_cutoff.py +++ b/autorag/nodes/passagefilter/similarity_threshold_cutoff.py @@ -8,8 +8,9 @@ from autorag.nodes.passagefilter.base import BasePassageFilter from autorag.utils.util import ( embedding_query_content, - result_to_dataframe, empty_cuda_cache, + result_to_dataframe, + pop_params, ) @@ -20,10 +21,10 @@ def __init__(self, project_dir: str, *args, **kwargs): :param project_dir: The project directory to use for initializing the module :param embedding_model: The embedding model string to use for calculating similarity - Default is "openai" which is OpenAI text-embedding-ada-002 embedding model. + Default is "openai" which is OpenAI text-embedding-ada-002 embedding model. """ super().__init__(project_dir, *args, **kwargs) - embedding_model_str = kwargs.pop("embedding_model", "openai") + embedding_model_str = kwargs.get("embedding_model", "openai") self.embedding_model = embedding_models[embedding_model_str]() def __del__(self): @@ -33,6 +34,7 @@ def __del__(self): @result_to_dataframe(["retrieved_contents", "retrieved_ids", "retrieve_scores"]) def pure(self, previous_result: pd.DataFrame, *args, **kwargs): + kwargs = pop_params(self._pure, kwargs) queries, contents, scores, ids = self.cast_to_run(previous_result) return self._pure(queries, contents, scores, ids, *args, **kwargs) diff --git a/autorag/vectordb/milvus.py b/autorag/vectordb/milvus.py index 7a4a1c9f0..0bc4103bc 100644 --- a/autorag/vectordb/milvus.py +++ b/autorag/vectordb/milvus.py @@ -32,7 +32,7 @@ def __init__( user: str = "", password: str = "", timeout: Optional[float] = None, - params: Dict[str, Any] = {}, + params: Dict[str, Any] = {}, ): super().__init__(embedding_model, similarity_metric, embedding_batch) @@ -49,7 +49,7 @@ def __init__( self.timeout = timeout self.params = params self.index_type = index_type - + # Set Collection if not utility.has_collection(collection_name, timeout=timeout): # Get the dimension of the embeddings diff --git a/tests/autorag/nodes/passagefilter/test_similarity_percentile_cutoff.py b/tests/autorag/nodes/passagefilter/test_similarity_percentile_cutoff.py index 7b638f041..7e8ca80fa 100644 --- a/tests/autorag/nodes/passagefilter/test_similarity_percentile_cutoff.py +++ b/tests/autorag/nodes/passagefilter/test_similarity_percentile_cutoff.py @@ -56,6 +56,9 @@ def test_similarity_percentile_cutoff(similarity_percentile_cutoff_instance): ) def test_similarity_percentile_cutoff_node(): result_df = SimilarityPercentileCutoff.run_evaluator( - project_dir=project_dir, previous_result=previous_result, percentile=0.9 + project_dir=project_dir, + previous_result=previous_result, + percentile=0.9, + embedding_model="openai_embed_3_large", ) base_passage_filter_node_test(result_df) diff --git a/tests/autorag/nodes/passagefilter/test_similarity_threshold_cutoff.py b/tests/autorag/nodes/passagefilter/test_similarity_threshold_cutoff.py index 224891b4a..495a46688 100644 --- a/tests/autorag/nodes/passagefilter/test_similarity_threshold_cutoff.py +++ b/tests/autorag/nodes/passagefilter/test_similarity_threshold_cutoff.py @@ -49,6 +49,10 @@ def test_similarity_threshold_cutoff(similarity_threshold_cutoff_instance): ) def test_similarity_threshold_cutoff_node(): result_df = SimilarityThresholdCutoff.run_evaluator( - project_dir=project_dir, previous_result=previous_result, threshold=0.9 + project_dir=project_dir, + previous_result=previous_result, + threshold=0.9, + embedding_model="openai_embed_3_large", + marker="big-boy", ) base_passage_filter_node_test(result_df)