Skip to content

Commit

Permalink
fix: import err
Browse files Browse the repository at this point in the history
  • Loading branch information
e7217 committed Dec 19, 2024
1 parent 306a3e2 commit 159496e
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 32 deletions.
2 changes: 2 additions & 0 deletions autorag/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
with open(version_path, "r") as f:
__version__ = f.read().strip()


class LazyInit:
def __init__(self, factory, *args, **kwargs):
self._factory = factory
Expand All @@ -39,6 +40,7 @@ def __getattr__(self, name):
self._instance = self._factory(*self._args, **self._kwargs)
return getattr(self._instance, name)


rich_format = "[%(filename)s:%(lineno)s] >> %(message)s"
logging.basicConfig(
level="INFO", format=rich_format, handlers=[RichHandler(rich_tracebacks=True)]
Expand Down
85 changes: 53 additions & 32 deletions autorag/embedding/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import sys

from random import random
from typing import List
Expand All @@ -7,12 +8,12 @@
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.embeddings.openai import OpenAIEmbeddingModelType
from langchain_openai.embeddings import OpenAIEmbeddings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

from autorag import LazyInit

logger = logging.getLogger("AutoRAG")


class MockEmbeddingRandom(MockEmbedding):
"""Mock embedding with random vectors."""

Expand All @@ -21,40 +22,50 @@ def _get_vector(self) -> List[float]:


embedding_models = {
# llama index
"openai": LazyInit(
OpenAIEmbedding
), # default model is OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002
"openai_embed_3_large": LazyInit(
OpenAIEmbedding, model_name=OpenAIEmbeddingModelType.TEXT_EMBED_3_LARGE
),
"openai_embed_3_small": LazyInit(
OpenAIEmbedding, model_name=OpenAIEmbeddingModelType.TEXT_EMBED_3_SMALL
),
"mock": LazyInit(MockEmbeddingRandom, embed_dim=768),
# langchain
"openai_langchain": LazyInit(OpenAIEmbeddings),
# local model
"huggingface_baai_bge_small": LazyInit(
HuggingFaceEmbedding, model_name="BAAI/bge-small-en-v1.5"
),
"huggingface_cointegrated_rubert_tiny2": LazyInit(
HuggingFaceEmbedding, model_name="cointegrated/rubert-tiny2"
),
"huggingface_all_mpnet_base_v2": LazyInit(
HuggingFaceEmbedding,
model_name="sentence-transformers/all-mpnet-base-v2",
max_length=512,
),
"huggingface_bge_m3": LazyInit(HuggingFaceEmbedding, model_name="BAAI/bge-m3"),
"huggingface_multilingual_e5_large": LazyInit(
HuggingFaceEmbedding, model_name="intfloat/multilingual-e5-large-instruct"
),
# llama index
"openai": LazyInit(
OpenAIEmbedding
), # default model is OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002
"openai_embed_3_large": LazyInit(
OpenAIEmbedding, model_name=OpenAIEmbeddingModelType.TEXT_EMBED_3_LARGE
),
"openai_embed_3_small": LazyInit(
OpenAIEmbedding, model_name=OpenAIEmbeddingModelType.TEXT_EMBED_3_SMALL
),
"mock": LazyInit(MockEmbeddingRandom, embed_dim=768),
# langchain
"openai_langchain": LazyInit(OpenAIEmbeddings),
}

try:
# you can use your own model in this way.
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embedding_models["huggingface_baai_bge_small"] = LazyInit(
HuggingFaceEmbedding, model_name="BAAI/bge-small-en-v1.5"
)
embedding_models["huggingface_cointegrated_rubert_tiny2"] = LazyInit(
HuggingFaceEmbedding, model_name="cointegrated/rubert-tiny2"
)
embedding_models["huggingface_all_mpnet_base_v2"] = LazyInit(
HuggingFaceEmbedding,
model_name="sentence-transformers/all-mpnet-base-v2",
max_length=512,
)
embedding_models["huggingface_bge_m3"] = LazyInit(
HuggingFaceEmbedding, model_name="BAAI/bge-m3"
)
embedding_models["huggingface_multilingual_e5_large"] = LazyInit(
HuggingFaceEmbedding, model_name="intfloat/multilingual-e5-large-instruct"
)
except ImportError:
logger.info(
"You are using API version of AutoRAG."
"To use local version, run pip install 'AutoRAG[gpu]'"
)

class EmbeddingModel:

class EmbeddingModel:
@staticmethod
def load(name: str = ""):
try:
Expand All @@ -76,6 +87,16 @@ def _check_keys(target: dict):
f"Embedding model type '{target['type']}' is not supported"
)

def _get_huggingface_class():
module = sys.modules.get("llama_index.embeddings.huggingface")
if not module:
logger.info(
"You are using API version of AutoRAG. "
"To use local version, run `pip install 'AutoRAG[gpu]'`."
)
return None
return getattr(module, "HuggingFaceEmbedding", None)

_check_one_item(option)
_check_keys(option[0])

Expand All @@ -84,8 +105,8 @@ def _check_keys(target: dict):

embedding_map = {
"openai": OpenAIEmbedding,
"huggingface": HuggingFaceEmbedding,
"mock": MockEmbeddingRandom,
"huggingface": _get_huggingface_class(),
}

embedding_class = embedding_map.get(model_type)
Expand Down

0 comments on commit 159496e

Please sign in to comment.