diff --git a/docs/examples/embeddings/nemo.ipynb b/docs/examples/embeddings/nemo.ipynb new file mode 100644 index 0000000000000..8f7f8c1a677fa --- /dev/null +++ b/docs/examples/embeddings/nemo.ipynb @@ -0,0 +1,91 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Nvidia NeMo embeddings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Connect to NVIDIA's embedding service using the NeMoEmbeddings class." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install llama-index-embeddings-nemo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install llama-index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "from llama_index.embeddings.nemo import NeMoEmbedding" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "batch_size = 16\n", + "model = \"NV-Embed-QA-003\"\n", + "api_endpoint_url = \"http://localhost:8080/v1/embeddings\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "embedding_model = NeMoEmbedding(\n", + " batch_size=batch_size, model=model, api_endpoint_url=api_endpoint_url\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "embedding_model.embed_query(\"Hello world\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/.gitignore b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/.gitignore new file mode 100644 index 0000000000000..990c18de22908 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/.gitignore @@ -0,0 +1,153 @@ +llama_index/_static +.DS_Store +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +bin/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +etc/ +include/ +lib/ +lib64/ +parts/ +sdist/ +share/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +.ruff_cache + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints +notebooks/ + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pyvenv.cfg + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Jetbrains +.idea +modules/ +*.swp + +# VsCode +.vscode + +# pipenv +Pipfile +Pipfile.lock + +# pyright +pyrightconfig.json diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/BUILD new file mode 100644 index 0000000000000..0896ca890d8bf --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/BUILD @@ -0,0 +1,3 @@ +poetry_requirements( + name="poetry", +) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/Makefile b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/Makefile new file mode 100644 index 0000000000000..b9eab05aa3706 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/Makefile @@ -0,0 +1,17 @@ +GIT_ROOT ?= $(shell git rev-parse --show-toplevel) + +help: ## Show all Makefile targets. + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' + +format: ## Run code autoformatters (black). + pre-commit install + git ls-files | xargs pre-commit run black --files + +lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy + pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files + +test: ## Run tests via pytest. + pytest tests + +watch-docs: ## Build and watch documentation. + sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/README.md b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/README.md new file mode 100644 index 0000000000000..89f9945dfeb71 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/README.md @@ -0,0 +1 @@ +# LlamaIndex Embeddings Integration: Nemo diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/BUILD new file mode 100644 index 0000000000000..db46e8d6c978c --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/__init__.py new file mode 100644 index 0000000000000..c955855037f0d --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/__init__.py @@ -0,0 +1,3 @@ +from llama_index.embeddings.nemo.base import NemoEmbedding + +__all__ = ["NemoEmbedding"] diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py new file mode 100644 index 0000000000000..f6f7f6a9f8adf --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py @@ -0,0 +1,83 @@ +"""NeMo embeddings file.""" + +import json +import requests +import aiohttp +from typing import Any, List, Optional + +from llama_index.core.base.embeddings.base import ( + DEFAULT_EMBED_BATCH_SIZE, + BaseEmbedding, +) +from llama_index.core.callbacks.base import CallbackManager + + +class NeMoEmbedding(BaseEmbedding): + """Nvidia NeMo embeddings.""" + + def __init__( + self, + model_name: str = "NV-Embed-QA-003", + api_endpoint_url: str = "http://localhost:8088/v1/embeddings", + embed_batch_size: int = DEFAULT_EMBED_BATCH_SIZE, + callback_manager: Optional[CallbackManager] = None, + **kwargs: Any, + ): + self.api_endpoint_url = api_endpoint_url + + super().__init__( + model_name=model_name, + embed_batch_size=embed_batch_size, + callback_manager=callback_manager, + **kwargs, + ) + + @classmethod + def class_name(cls) -> str: + return "NeMoEmbedding" + + def _get_embedding(self, text: str, input_type: str) -> List[float]: + payload = json.dumps( + {"input": text, "model": self.model_name, "input_type": input_type} + ) + headers = {"Content-Type": "application/json"} + + response = requests.request( + "POST", self.api_endpoint_url, headers=headers, data=payload + ) + response = json.loads(response.text) + + return response["data"][0]["embedding"] + + async def _aget_embedding(self, session: Any, text: str, input_type: str) -> List[float]: + + headers = {"Content-Type": "application/json"} + + async with session.post( + self.api_endpoint_url, + json={"input": text, "model": self.model, "input_type": input_type}, + headers=headers, + ) as response: + response.raise_for_status() + answer = await response.text() + answer = json.loads(answer) + return answer["data"][0]["embedding"] + + def _get_query_embedding(self, query: str) -> List[float]: + return self._get_embedding(query, input_type="query") + + def _get_text_embedding(self, text: str) -> List[float]: + return self._get_embedding(text, input_type="passage") + + def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]: + return [self._get_embedding(text, input_type="passage") for text in texts] + + def _aget_query_embedding(self, query: str) -> List[float]: + async with aiohttp.ClientSession() as session: + embedding = await self._aget_embedding(session, query, "query") + return embedding + + def _aget_text_embedding(self, text: str) -> List[float]: + async with aiohttp.ClientSession() as session: + embedding = await self._aget_embedding(session, text, "passage") + return embedding diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/pyproject.toml new file mode 100644 index 0000000000000..a3015c680d6a7 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/pyproject.toml @@ -0,0 +1,54 @@ +[build-system] +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core"] + +[tool.codespell] +check-filenames = true +check-hidden = true +skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" + +[tool.mypy] +disallow_untyped_defs = true +exclude = ["_static", "build", "examples", "notebooks", "venv"] +ignore_missing_imports = true +python_version = "3.8" + +[tool.poetry] +authors = ["Your Name "] +description = "llama-index embeddings nemo integration" +license = "MIT" +name = "llama-index-embeddings-nemo" +readme = "README.md" +version = "0.1.0" + +[tool.poetry.dependencies] +python = ">=3.8.1,<3.12" +llama-index-core = "0.10.0" + +[tool.poetry.group.dev.dependencies] +ipython = "8.10.0" +jupyter = "^1.0.0" +mypy = "0.991" +pre-commit = "3.2.0" +pylint = "2.15.10" +pytest = "7.2.1" +pytest-mock = "3.11.1" +ruff = "0.0.292" +tree-sitter-languages = "^1.8.0" +types-Deprecated = ">=0.1.0" +types-PyYAML = "^6.0.12.12" +types-protobuf = "^4.24.0.4" +types-redis = "4.5.5.0" +types-requests = "2.28.11.8" +types-setuptools = "67.1.0.0" + +[tool.poetry.group.dev.dependencies.black] +extras = ["jupyter"] +version = "<=23.9.1,>=23.7.0" + +[tool.poetry.group.dev.dependencies.codespell] +extras = ["toml"] +version = ">=v2.2.6" + +[[tool.poetry.packages]] +include = "llama_index/" diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/BUILD new file mode 100644 index 0000000000000..dabf212d7e716 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/BUILD @@ -0,0 +1 @@ +python_tests() diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/test_embeddings_nemo.py b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/test_embeddings_nemo.py new file mode 100644 index 0000000000000..c5e3f69f89082 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/test_embeddings_nemo.py @@ -0,0 +1,7 @@ +from llama_index.core.base.embeddings.base import BaseEmbedding +from llama_index.embeddings.nemo import NeMoEmbedding + + +def test_embedding_class(): + emb = NeMoEmbedding() + assert isinstance(emb, BaseEmbedding) diff --git a/llama_index/embeddings/nemo.py b/llama_index/embeddings/nemo.py new file mode 100644 index 0000000000000..fdc00d0bb4ddb --- /dev/null +++ b/llama_index/embeddings/nemo.py @@ -0,0 +1,62 @@ +"""NeMo embeddings file.""" + +import json +import requests +from typing import Any, List, Optional + +from llama_index.core.base.embeddings.base import ( + DEFAULT_EMBED_BATCH_SIZE, + BaseEmbedding, +) +from llama_index.core.callbacks.base import CallbackManager + + +class NemoEmbedding(BaseEmbedding): + """Nvidia NeMo embeddings. + """ + + def __init__( + self, + model_name: str = "NV-Embed-QA-003", + api_endpoint_url: str = "http://localhost:8088/v1/embeddings", + embed_batch_size: int = DEFAULT_EMBED_BATCH_SIZE, + callback_manager: Optional[CallbackManager] = None, + **kwargs: Any, + ): + self.api_endpoint_url = api_endpoint_url + + super().__init__( + model_name=model_name, + embed_batch_size=embed_batch_size, + callback_manager=callback_manager, + **kwargs, + ) + + @classmethod + def class_name(cls) -> str: + return "NemoEmbedding" + + def _get_embedding(self, text: str, input_type: str): + payload = json.dumps({ + "input": text, + "model": self.model_name, + "input_type": input_type + }) + headers = { + 'Content-Type': 'application/json' + } + + response = requests.request( + "POST", self.api_endpoint_url, headers=headers, data=payload) + response = json.loads(response.text) + + return response["data"][0]["embedding"] + + def _get_query_embedding(self, query: str) -> List[float]: + return self._get_embedding(text, input_type="query") + + def _get_text_embedding(self, text: str) -> List[float]: + return self._get_embedding(text, input_type="passage") + + def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]: + return [self._get_embedding(text, input_type="passage") for text in texts]