Skip to content

Commit

Permalink
Merge branch 'main' into Feature/#517
Browse files Browse the repository at this point in the history
  • Loading branch information
vkehfdl1 authored Jun 30, 2024
2 parents 9e8ccff + 8850fa4 commit 9183b82
Show file tree
Hide file tree
Showing 19 changed files with 151 additions and 12 deletions.
2 changes: 1 addition & 1 deletion autorag/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.2.7
0.2.8
9 changes: 8 additions & 1 deletion autorag/evaluation/metric/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@

from autorag import embedding_models
from autorag.evaluation.metric.util import calculate_cosine_similarity
from autorag.utils.util import process_batch, openai_truncate_by_token
from autorag.utils.util import process_batch, openai_truncate_by_token, convert_inputs_to_list


def generation_metric(func):
@functools.wraps(func)
@convert_inputs_to_list
def wrapper(generation_gt: List[List[str]], generations: List[str], **kwargs) -> List[float]:
"""
Compute generation metric.
Expand All @@ -39,6 +40,7 @@ def wrapper(generation_gt: List[List[str]], generations: List[str], **kwargs) ->
return wrapper


@convert_inputs_to_list
def huggingface_evaluate(instance, key: str,
generation_gt: List[List[str]], generations: List[str],
**kwargs) -> List[float]:
Expand Down Expand Up @@ -83,6 +85,7 @@ def bleu(generation_gt: List[List[str]], generations: [str], tokenize: str|None
return result


@convert_inputs_to_list
def meteor(generation_gt: List[List[str]], generations: List[str],
alpha: float = 0.9,
beta: float = 3.0,
Expand Down Expand Up @@ -110,6 +113,7 @@ def meteor(generation_gt: List[List[str]], generations: List[str],
return result


@convert_inputs_to_list
def rouge(generation_gt: List[List[str]], generations: List[str],
rouge_type: Optional[str] = 'rougeL',
use_stemmer: bool = False,
Expand Down Expand Up @@ -154,6 +158,7 @@ async def compute(gt: List[str], pred: str) -> float:
return result


@convert_inputs_to_list
def sem_score(generation_gt: List[List[str]], generations: List[str],
embedding_model: Optional[BaseEmbedding] = None,
batch: int = 128) -> List[float]:
Expand Down Expand Up @@ -207,6 +212,7 @@ def sem_score(generation_gt: List[List[str]], generations: List[str],
return result


@convert_inputs_to_list
def g_eval(generation_gt: List[List[str]], generations: List[str],
metrics: Optional[List[str]] = None,
model: str = 'gpt-4-0125-preview',
Expand Down Expand Up @@ -296,6 +302,7 @@ def get_g_eval_score(responses, max_score: int = 5) -> int:
return sum(g_eval_scores) / len(g_eval_scores)


@convert_inputs_to_list
def bert_score(generation_gt: List[List[str]], generations: List[str],
lang: str = 'en',
batch: int = 128,
Expand Down
6 changes: 5 additions & 1 deletion autorag/evaluation/metric/retrieval.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import functools
import itertools
import math
from typing import List

import math

from autorag.utils.util import convert_inputs_to_list


def retrieval_metric(func):
@functools.wraps(func)
@convert_inputs_to_list
def wrapper(retrieval_gt: List[List[List[str]]], pred_ids: List[List[str]]) -> List[float]:
results = []
for gt, pred in zip(retrieval_gt, pred_ids):
Expand Down
3 changes: 2 additions & 1 deletion autorag/evaluation/metric/retrieval_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@

import numpy as np

from autorag.utils.util import normalize_string
from autorag.utils.util import normalize_string, convert_inputs_to_list


def retrieval_contents_metric(func):
@functools.wraps(func)
@convert_inputs_to_list
def wrapper(gt_contents: List[List[str]], pred_contents: List[List[str]]) -> List[float]:
results = []
for gt, pred in zip(gt_contents, pred_contents):
Expand Down
4 changes: 2 additions & 2 deletions autorag/nodes/passagereranker/flag_embedding.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Tuple
from typing import List, Tuple, Iterable

import pandas as pd
import torch
Expand Down Expand Up @@ -58,7 +58,7 @@ def flag_embedding_run_model(input_texts, model, batch_size: int):
for batch_texts in tqdm(batch_input_texts):
with torch.no_grad():
pred_scores = model.compute_score(sentence_pairs=batch_texts)
if batch_size == 1:
if batch_size == 1 or not isinstance(pred_scores, Iterable):
results.append(pred_scores)
else:
results.extend(pred_scores)
Expand Down
3 changes: 3 additions & 0 deletions autorag/nodes/queryexpansion/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ def wrapper(
prompt=prompt,
generator_func=generator_callable,
generator_params=generator_param)
# delete empty string in the nested expanded queries list
expanded_queries = [list(map(lambda x: x.strip(), sublist)) for sublist in expanded_queries]
expanded_queries = [list(filter(lambda x: bool(x), sublist)) for sublist in expanded_queries]
return expanded_queries

return wrapper
Expand Down
2 changes: 1 addition & 1 deletion autorag/nodes/queryexpansion/query_decompose.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
Question: {question}
Decompositions:"
Decompositions:
"""


Expand Down
5 changes: 3 additions & 2 deletions autorag/nodes/queryexpansion/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def run_query_expansion_node(modules: List[Callable],
# Run evaluation when there are more than one module.
if len(modules) > 1:
# pop general keys from strategies (e.g. metrics, speed_threshold)
general_key = ['metrics', 'speed_threshold']
general_key = ['metrics', 'speed_threshold', 'strategy']
general_strategy = dict(filter(lambda x: x[0] in general_key, strategies.items()))
extra_strategy = dict(filter(lambda x: x[0] not in general_key, strategies.items()))

Expand All @@ -93,7 +93,8 @@ def run_query_expansion_node(modules: List[Callable],
# run evaluation
evaluation_results = list(map(lambda result: evaluate_one_query_expansion_node(
retrieval_callables, retrieval_params, result['queries'].tolist(), retrieval_gt,
general_strategy['metrics'], project_dir, previous_result, strategies.get('strategy', 'mean')), results))
general_strategy['metrics'], project_dir, previous_result, general_strategy.get('strategy', 'mean')),
results))

evaluation_df = pd.DataFrame({
'filename': filenames,
Expand Down
34 changes: 32 additions & 2 deletions autorag/utils/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@
import os
import re
import string
import unicodedata
from copy import deepcopy
from typing import List, Callable, Dict, Optional, Any, Collection
from typing import List, Callable, Dict, Optional, Any, Collection, Iterable

import numpy as np
import pandas as pd
import tiktoken
import unicodedata
from llama_index.embeddings.openai import OpenAIEmbedding
from pydantic import BaseModel as BM
from pydantic.v1 import BaseModel

logger = logging.getLogger("AutoRAG")

Expand Down Expand Up @@ -454,3 +457,30 @@ def embedding_query_content(queries: List[str], contents_list: List[List[str]],
content_embeddings_flatten = embedding_model.get_text_embedding_batch(flatten_contents)
content_embeddings = reconstruct_list(content_embeddings_flatten, content_lengths)
return query_embeddings, content_embeddings


def to_list(item):
"""Recursively convert collections to Python lists."""
if isinstance(item, np.ndarray):
# Convert numpy array to list and recursively process each element
return [to_list(sub_item) for sub_item in item.tolist()]
elif isinstance(item, pd.Series):
# Convert pandas Series to list and recursively process each element
return [to_list(sub_item) for sub_item in item.tolist()]
elif isinstance(item, Iterable) and not isinstance(item, (str, bytes, BaseModel, BM)):
# Recursively process each element in other iterables
return [to_list(sub_item) for sub_item in item]
else:
return item


def convert_inputs_to_list(func):
"""Decorator to convert all function inputs to Python lists."""

@functools.wraps(func)
def wrapper(*args, **kwargs):
new_args = [to_list(arg) for arg in args]
new_kwargs = {k: to_list(v) for k, v in kwargs.items()}
return func(*new_args, **new_kwargs)

return wrapper
52 changes: 52 additions & 0 deletions docs/source/nodes/query_expansion/query_decompose.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,55 @@ modules:
llm: openai
model: [ gpt-3.5-turbo-16k, gpt-3.5-turbo-1106 ]
```
## Default Prompt
When the question doesn't need decomposition, it must return "The question needs no decomposition".
Plus, each question will be allocated in `{question}`, so you have to write it in the prompt.

```
Decompose a question in self-contained sub-questions. Use \"The question needs no decomposition\" when no decomposition is needed.

Example 1:

Question: Is Hamlet more common on IMDB than Comedy of Errors?
Decompositions:
1: How many listings of Hamlet are there on IMDB?
2: How many listing of Comedy of Errors is there on IMDB?

Example 2:

Question: Are birds important to badminton?

Decompositions:
The question needs no decomposition

Example 3:

Question: Is it legal for a licensed child driving Mercedes-Benz to be employed in US?

Decompositions:
1: What is the minimum driving age in the US?
2: What is the minimum age for someone to be employed in the US?

Example 4:

Question: Are all cucumbers the same texture?

Decompositions:
The question needs no decomposition

Example 5:

Question: Hydrogen's atomic number squared exceeds number of Spice Girls?

Decompositions:
1: What is the atomic number of hydrogen?
2: How many Spice Girls are there?

Example 6:

Question: {question}

Decompositions:
```
10 changes: 10 additions & 0 deletions tests/autorag/evaluate/metric/test_retrieval_metric.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import pytest

from autorag.evaluation.metric import (retrieval_f1, retrieval_precision, retrieval_recall, retrieval_ndcg,
Expand Down Expand Up @@ -34,6 +35,15 @@ def test_retrieval_f1():
assert gt == pytest.approx(res, rel=1e-4)


def test_numpy_retrieval_metric():
retrieval_gt_np = [[np.array(['test-1', 'test-4'])], np.array([['test-2']])]
pred_np = np.array([['test-2', 'test-3', 'test-1'], ['test-5', 'test-6', 'test-8']])
solution = [1.0, 0.0]
result = retrieval_recall(retrieval_gt=retrieval_gt_np, pred_ids=pred_np)
for gt, res in zip(solution, result):
assert gt == pytest.approx(res, rel=1e-4)


def test_retrieval_recall():
solution = [0.5, 1 / 3, 1, 2 / 3, 1, None, None, 1]
result = retrieval_recall(retrieval_gt=retrieval_gt, pred_ids=pred)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def test_run_passage_augmenter_node(node_line_dir):
module_params = [{'top_k': 2, 'num_passages': 1}]
strategies = {
'metrics': ['retrieval_f1', 'retrieval_recall'],
'strategy': 'rank',
}
best_result = run_passage_augmenter_node(modules, module_params, previous_result, node_line_dir, strategies)
assert os.path.exists(os.path.join(node_line_dir, "passage_augmenter"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def test_run_passage_compressor_node(node_line_dir):
{'llm': 'mock', 'model': 'gpt-3.5-turbo'}]
strategies = {
'metrics': ['retrieval_token_f1', 'retrieval_token_precision'],
'strategy': 'normalize_mean',
'speed_threshold': 5,
}
best_result = run_passage_compressor_node(modules, module_params, previous_result, node_line_dir, strategies)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def test_run_passage_filter_node(node_line_dir):
module_params = [{'threshold': 0.87}]
strategies = {
'metrics': ['retrieval_f1', 'retrieval_recall'],
'strategy': 'rank',
}
best_result = run_passage_filter_node(modules, module_params, previous_result, node_line_dir, strategies)
assert os.path.exists(os.path.join(node_line_dir, "passage_filter"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def test_run_passage_reranker_node(node_line_dir):
module_params = [{'top_k': 4, 'model_name': 'castorini_monot5-3b-msmarco-10k'}]
strategies = {
'metrics': ['retrieval_f1', 'retrieval_recall'],
'strategy': 'rank',
}
best_result = run_passage_reranker_node(modules, module_params, previous_result, node_line_dir, strategies)
assert os.path.exists(os.path.join(node_line_dir, "passage_reranker"))
Expand Down
1 change: 1 addition & 0 deletions tests/autorag/nodes/promptmaker/test_prompt_maker_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def test_run_prompt_maker_node(node_line_dir):
'speed_threshold': 5,
'token_threshold': 25,
'tokenizer': 'gpt-3.5-turbo',
'strategy': 'rank',
'generator_modules': [{
'module_type': 'llama_index_llm',
'llm': 'mock',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def test_run_query_expansion_node(node_line_dir):
'metrics': metrics,
'speed_threshold': 5,
'top_k': 4,
'strategy': 'rank',
'retrieval_modules': [{'module_type': 'bm25', 'bm25_tokenizer': 'gpt2'}],
}
best_result = run_query_expansion_node(modules, module_params, previous_result, node_line_dir, strategies)
Expand Down
1 change: 1 addition & 0 deletions tests/autorag/nodes/retrieval/test_run_retrieval_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def test_run_retrieval_node(node_line_dir):
qa_path = os.path.join(project_dir, "data", "qa.parquet")
strategies = {
'metrics': ['retrieval_f1', 'retrieval_recall'],
'strategy': 'normalize_mean',
'speed_threshold': 5,
}
previous_result = pd.read_parquet(qa_path)
Expand Down
26 changes: 25 additions & 1 deletion tests/autorag/utils/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,20 @@
import tempfile
from datetime import datetime, date

import numpy as np
import pandas as pd
import pytest
import tiktoken
from llama_index.core.base.embeddings.base import BaseEmbedding
from llama_index.core.llms import CompletionResponse
from llama_index.embeddings.openai import OpenAIEmbedding

from autorag.utils import fetch_contents
from autorag.utils.util import load_summary_file, result_to_dataframe, \
make_combinations, explode, replace_value_in_dict, normalize_string, convert_string_to_tuple_in_dict, process_batch, \
convert_env_in_dict, openai_truncate_by_token, convert_datetime_string, split_dataframe, find_trial_dir, \
find_node_summary_files, normalize_unicode, dict_to_markdown, dict_to_markdown_table
find_node_summary_files, normalize_unicode, dict_to_markdown, dict_to_markdown_table, convert_inputs_to_list, \
to_list
from tests.mock import MockLLM

root_dir = pathlib.PurePath(os.path.dirname(os.path.realpath(__file__))).parent.parent
Expand Down Expand Up @@ -407,3 +411,23 @@ def test_dict_to_markdown_table():
| key2 | value2 |
"""
assert result == result_text


@convert_inputs_to_list
def convert_inputs_to_list_function(int_type, str_type, iterable_type, iterable_type2):
assert isinstance(int_type, int)
assert isinstance(str_type, str)
assert isinstance(iterable_type, list)
assert isinstance(iterable_type2, list)


def test_convert_inputs_to_list():
convert_inputs_to_list_function(1, 'jax', (2, 3), (5, 6, [4, 66]))
convert_inputs_to_list_function(1, 'jax', np.array([3, 4]), [pd.Series([12, 13]), 14])
convert_inputs_to_list_function(4, 'jax', pd.Series([7, 8, 9]), np.array([[3, 4], [4, 5]]))


def test_to_list():
embedding_model = OpenAIEmbedding()
new_model = to_list(embedding_model)
assert isinstance(new_model, BaseEmbedding)

0 comments on commit 9183b82

Please sign in to comment.