Skip to content

Commit

Permalink
fix: update import path of embedding_models from autorag to autorag.e…
Browse files Browse the repository at this point in the history
…mbedding.base
  • Loading branch information
Um Changyong committed Dec 19, 2024
1 parent 214dc13 commit 306a3e2
Show file tree
Hide file tree
Showing 10 changed files with 61 additions and 59 deletions.
17 changes: 17 additions & 0 deletions autorag/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,23 @@
with open(version_path, "r") as f:
__version__ = f.read().strip()

class LazyInit:
def __init__(self, factory, *args, **kwargs):
self._factory = factory
self._args = args
self._kwargs = kwargs
self._instance = None

def __call__(self):
if self._instance is None:
self._instance = self._factory(*self._args, **self._kwargs)
return self._instance

def __getattr__(self, name):
if self._instance is None:
self._instance = self._factory(*self._args, **self._kwargs)
return getattr(self._instance, name)

rich_format = "[%(filename)s:%(lineno)s] >> %(message)s"
logging.basicConfig(
level="INFO", format=rich_format, handlers=[RichHandler(rich_tracebacks=True)]
Expand Down
2 changes: 1 addition & 1 deletion autorag/data/chunk/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pandas as pd

from autorag import embedding_models
from autorag.embedding.base import embedding_models
from autorag.data import chunk_modules, sentence_splitter_modules
from autorag.utils import result_to_dataframe

Expand Down
87 changes: 36 additions & 51 deletions autorag/embedding/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,9 @@
from langchain_openai.embeddings import OpenAIEmbeddings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

logger = logging.getLogger("AutoRAG")


class LazyInit:
def __init__(self, factory, *args, **kwargs):
self._factory = factory
self._args = args
self._kwargs = kwargs
self._instance = None

def __call__(self):
if self._instance is None:
self._instance = self._factory(*self._args, **self._kwargs)
return self._instance

def __getattr__(self, name):
if self._instance is None:
self._instance = self._factory(*self._args, **self._kwargs)
return getattr(self._instance, name)
from autorag import LazyInit

logger = logging.getLogger("AutoRAG")

class MockEmbeddingRandom(MockEmbedding):
"""Mock embedding with random vectors."""
Expand All @@ -37,43 +20,45 @@ def _get_vector(self) -> List[float]:
return [random() for _ in range(self.embed_dim)]


embedding_models = {
# llama index
"openai": LazyInit(
OpenAIEmbedding
), # default model is OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002
"openai_embed_3_large": LazyInit(
OpenAIEmbedding, model_name=OpenAIEmbeddingModelType.TEXT_EMBED_3_LARGE
),
"openai_embed_3_small": LazyInit(
OpenAIEmbedding, model_name=OpenAIEmbeddingModelType.TEXT_EMBED_3_SMALL
),
"mock": LazyInit(MockEmbeddingRandom, embed_dim=768),
# langchain
"openai_langchain": LazyInit(OpenAIEmbeddings),
# local model
"huggingface_baai_bge_small": LazyInit(
HuggingFaceEmbedding, model_name="BAAI/bge-small-en-v1.5"
),
"huggingface_cointegrated_rubert_tiny2": LazyInit(
HuggingFaceEmbedding, model_name="cointegrated/rubert-tiny2"
),
"huggingface_all_mpnet_base_v2": LazyInit(
HuggingFaceEmbedding,
model_name="sentence-transformers/all-mpnet-base-v2",
max_length=512,
),
"huggingface_bge_m3": LazyInit(HuggingFaceEmbedding, model_name="BAAI/bge-m3"),
"huggingface_multilingual_e5_large": LazyInit(
HuggingFaceEmbedding, model_name="intfloat/multilingual-e5-large-instruct"
),
}


class EmbeddingModel:
embedding_models = {
# llama index
"openai": LazyInit(
OpenAIEmbedding
), # default model is OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002
"openai_embed_3_large": LazyInit(
OpenAIEmbedding, model_name=OpenAIEmbeddingModelType.TEXT_EMBED_3_LARGE
),
"openai_embed_3_small": LazyInit(
OpenAIEmbedding, model_name=OpenAIEmbeddingModelType.TEXT_EMBED_3_SMALL
),
"mock": LazyInit(MockEmbeddingRandom, embed_dim=768),
# langchain
"openai_langchain": LazyInit(OpenAIEmbeddings),
# local model
"huggingface_baai_bge_small": LazyInit(
HuggingFaceEmbedding, model_name="BAAI/bge-small-en-v1.5"
),
"huggingface_cointegrated_rubert_tiny2": LazyInit(
HuggingFaceEmbedding, model_name="cointegrated/rubert-tiny2"
),
"huggingface_all_mpnet_base_v2": LazyInit(
HuggingFaceEmbedding,
model_name="sentence-transformers/all-mpnet-base-v2",
max_length=512,
),
"huggingface_bge_m3": LazyInit(HuggingFaceEmbedding, model_name="BAAI/bge-m3"),
"huggingface_multilingual_e5_large": LazyInit(
HuggingFaceEmbedding, model_name="intfloat/multilingual-e5-large-instruct"
),
}

@staticmethod
def load(name: str = ""):
try:
return EmbeddingModel.embedding_models[name]()
return embedding_models[name]()
except KeyError:
raise ValueError(f"Embedding model '{name}' is not supported")

Expand Down
2 changes: 1 addition & 1 deletion autorag/evaluation/metric/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from rouge_score.rouge_scorer import RougeScorer
from sacrebleu.metrics.bleu import BLEU

from autorag import embedding_models
from autorag.embedding.base import embedding_models
from autorag.evaluation.metric.deepeval_prompt import FaithfulnessTemplate
from autorag.evaluation.metric.util import (
autorag_metric_loop,
Expand Down
2 changes: 1 addition & 1 deletion autorag/evaluation/util.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from copy import deepcopy
from typing import Union, List, Dict, Tuple, Any

from autorag import embedding_models
from autorag.embedding.base import embedding_models


def cast_metrics(
Expand Down
2 changes: 1 addition & 1 deletion autorag/nodes/passageaugmenter/prev_next_augmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pandas as pd

from autorag import embedding_models
from autorag.embedding.base import embedding_models
from autorag.evaluation.metric.util import calculate_cosine_similarity
from autorag.nodes.passageaugmenter.base import BasePassageAugmenter
from autorag.utils.util import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
import pandas as pd

from autorag import embedding_models
from autorag.embedding.base import embedding_models
from autorag.evaluation.metric.util import calculate_cosine_similarity
from autorag.nodes.passagefilter.base import BasePassageFilter
from autorag.nodes.passagefilter.similarity_threshold_cutoff import (
Expand Down
2 changes: 1 addition & 1 deletion autorag/nodes/passagefilter/similarity_threshold_cutoff.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pandas as pd

from autorag import embedding_models
from autorag.embedding.base import embedding_models
from autorag.evaluation.metric.util import calculate_cosine_similarity
from autorag.nodes.passagefilter.base import BasePassageFilter
from autorag.utils.util import (
Expand Down
2 changes: 1 addition & 1 deletion tests/autorag/evaluate/test_evaluate_util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from autorag import embedding_models
from autorag.embedding.base import embedding_models
from autorag.evaluation.util import cast_metrics


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from llama_index.core.base.llms.types import CompletionResponse
from llama_index.llms.openai import OpenAI

from autorag import embedding_models, MockEmbeddingRandom, LazyInit
from autorag.embedding.base import embedding_models, MockEmbeddingRandom, LazyInit
from autorag.nodes.queryexpansion import QueryDecompose, HyDE
from autorag.nodes.queryexpansion.run import evaluate_one_query_expansion_node
from autorag.nodes.queryexpansion.run import run_query_expansion_node
Expand Down

0 comments on commit 306a3e2

Please sign in to comment.