Skip to content

Commit

Permalink
change package name to 'evaluation' and add llama-index-readers-file …
Browse files Browse the repository at this point in the history
…to fix bug on pytest (#516)

Co-authored-by: jeffrey <[email protected]>
  • Loading branch information
vkehfdl1 and jeffrey authored Jun 22, 2024
1 parent a51344d commit 0964abb
Show file tree
Hide file tree
Showing 30 changed files with 29 additions and 25 deletions.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

import pandas as pd

from autorag.evaluate.metric.generation import bleu, meteor, rouge, sem_score, g_eval, bert_score
from autorag.evaluate.util import cast_metrics
from autorag.evaluation.metric.generation import bleu, meteor, rouge, sem_score, g_eval, bert_score
from autorag.evaluation.util import cast_metrics

GENERATION_METRIC_FUNC_DICT = {func.__name__: func for func in
[bleu, meteor, rouge, sem_score, g_eval, bert_score]}
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from rouge_score.rouge_scorer import RougeScorer

from autorag import embedding_models
from autorag.evaluate.metric.util import calculate_cosine_similarity
from autorag.evaluation.metric.util import calculate_cosine_similarity
from autorag.utils.util import process_batch, openai_truncate_by_token


Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@

import pandas as pd

from autorag.evaluate.metric import (retrieval_recall, retrieval_precision, retrieval_f1, retrieval_ndcg, retrieval_mrr,
retrieval_map)
from autorag.evaluate.util import cast_metrics
from autorag.evaluation.metric import (retrieval_recall, retrieval_precision, retrieval_f1, retrieval_ndcg,
retrieval_mrr,
retrieval_map)
from autorag.evaluation.util import cast_metrics

RETRIEVAL_METRIC_FUNC_DICT = {func.__name__: func for func in
[retrieval_recall, retrieval_precision, retrieval_f1, retrieval_ndcg, retrieval_mrr,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pandas as pd

from autorag.evaluate.metric import retrieval_token_f1, retrieval_token_precision, retrieval_token_recall
from autorag.evaluation.metric import retrieval_token_f1, retrieval_token_precision, retrieval_token_recall


def evaluate_retrieval_contents(retrieval_gt: List[List[str]], metrics: List[str]):
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions autorag/nodes/generator/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

import pandas as pd

from autorag.evaluate import evaluate_generation
from autorag.evaluate.util import cast_metrics
from autorag.evaluation import evaluate_generation
from autorag.evaluation.util import cast_metrics
from autorag.strategy import measure_speed, filter_by_threshold, select_best


Expand Down
2 changes: 1 addition & 1 deletion autorag/nodes/passageaugmenter/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import torch

from autorag import embedding_models
from autorag.evaluate.metric.util import calculate_cosine_similarity
from autorag.evaluation.metric.util import calculate_cosine_similarity
from autorag.utils import (result_to_dataframe, validate_qa_dataset, fetch_contents, sort_by_scores,
validate_corpus_dataset, cast_corpus_dataset)
from autorag.utils.util import reconstruct_list, filter_dict_keys, select_top_k
Expand Down
2 changes: 1 addition & 1 deletion autorag/nodes/passagecompressor/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import pandas as pd

from autorag.evaluate.metric import retrieval_token_recall, retrieval_token_precision, retrieval_token_f1
from autorag.evaluation.metric import retrieval_token_recall, retrieval_token_precision, retrieval_token_f1
from autorag.strategy import measure_speed, filter_by_threshold, select_best
from autorag.utils.util import fetch_contents

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import torch.cuda

from autorag.evaluate.metric.util import calculate_cosine_similarity
from autorag.evaluation.metric.util import calculate_cosine_similarity
from autorag.nodes.passagefilter.base import passage_filter_node
from autorag.nodes.passagefilter.similarity_threshold_cutoff import embedding_query_content

Expand Down
2 changes: 1 addition & 1 deletion autorag/nodes/passagefilter/similarity_threshold_cutoff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import torch.cuda

from autorag import embedding_models
from autorag.evaluate.metric.util import calculate_cosine_similarity
from autorag.evaluation.metric.util import calculate_cosine_similarity
from autorag.nodes.passagefilter.base import passage_filter_node
from autorag.utils.util import reconstruct_list

Expand Down
4 changes: 2 additions & 2 deletions autorag/nodes/promptmaker/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import pandas as pd
import tokenlog

from autorag.evaluate import evaluate_generation
from autorag.evaluate.util import cast_metrics
from autorag.evaluation import evaluate_generation
from autorag.evaluation.util import cast_metrics
from autorag.strategy import measure_speed, filter_by_threshold, select_best
from autorag.support import get_support_modules
from autorag.utils import validate_qa_dataset
Expand Down
2 changes: 1 addition & 1 deletion autorag/nodes/retrieval/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import pandas as pd

from autorag.evaluate import evaluate_retrieval
from autorag.evaluation import evaluate_retrieval
from autorag.strategy import measure_speed, filter_by_threshold, select_best
from autorag.utils.util import load_summary_file

Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ llmlingua # for longllmlingua
### LlamaIndex ###
llama-index>=0.10.1
llama-index-core>=0.10.1
# readers
llama-index-readers-file
# Embeddings
llama-index-embeddings-openai
llama-index-embeddings-huggingface
Expand Down
2 changes: 1 addition & 1 deletion tests/autorag/evaluate/metric/test_generation_metric.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
from llama_index.embeddings.openai import OpenAIEmbedding

from autorag.evaluate.metric import bleu, meteor, rouge, sem_score, g_eval, bert_score
from autorag.evaluation.metric import bleu, meteor, rouge, sem_score, g_eval, bert_score
from tests.delete_tests import is_github_action

generation_gts = [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from autorag.evaluate.metric.retrieval_contents import single_token_f1, retrieval_token_f1, retrieval_token_precision, \
from autorag.evaluation.metric.retrieval_contents import single_token_f1, retrieval_token_f1, retrieval_token_precision, \
retrieval_token_recall

gt = [
Expand Down
5 changes: 3 additions & 2 deletions tests/autorag/evaluate/metric/test_retrieval_metric.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pytest

from autorag.evaluate.metric import (retrieval_f1, retrieval_precision, retrieval_recall, retrieval_ndcg, retrieval_mrr,
retrieval_map)
from autorag.evaluation.metric import (retrieval_f1, retrieval_precision, retrieval_recall, retrieval_ndcg,
retrieval_mrr,
retrieval_map)

retrieval_gt = [
[['test-1', 'test-2'], ['test-3']],
Expand Down
2 changes: 1 addition & 1 deletion tests/autorag/evaluate/test_evaluate_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from autorag import embedding_models
from autorag.evaluate.util import cast_metrics
from autorag.evaluation.util import cast_metrics


def test_cast_metrics():
Expand Down
2 changes: 1 addition & 1 deletion tests/autorag/evaluate/test_generation_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from openai.types.chat.chat_completion_token_logprob import TopLogprob
from transformers import AutoTokenizer

from autorag.evaluate.generation import evaluate_generation
from autorag.evaluation.generation import evaluate_generation

generation_gts = [
['The dog had bit the man.', 'The man had bitten the dog.'],
Expand Down
2 changes: 1 addition & 1 deletion tests/autorag/evaluate/test_retrieval_contents_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pandas as pd
import pytest

from autorag.evaluate import evaluate_retrieval_contents
from autorag.evaluation import evaluate_retrieval_contents

gt = [
['Enough for drinking water', 'Just looking for a water bottle'],
Expand Down
2 changes: 1 addition & 1 deletion tests/autorag/evaluate/test_retrieval_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pandas as pd

from autorag.evaluate import evaluate_retrieval
from autorag.evaluation import evaluate_retrieval

retrieval_gt = [[[f'test{i}-{j}'] for i in range(2)] for j in range(4)]
queries_example = ["Query 1", "Query 2", "Query 3", "Query 4"]
Expand Down
2 changes: 1 addition & 1 deletion tests/autorag/nodes/promptmaker/test_prompt_maker_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from llama_index.llms.openai import OpenAI

from autorag import generator_models
from autorag.evaluate.util import cast_metrics
from autorag.evaluation.util import cast_metrics
from autorag.nodes.generator import llama_index_llm
from autorag.nodes.promptmaker import fstring
from autorag.nodes.promptmaker.run import evaluate_generator_result, evaluate_one_prompt_maker_node, \
Expand Down

0 comments on commit 0964abb

Please sign in to comment.