From 4a463caa1df73068f80d8036135e9003b829d9f0 Mon Sep 17 00:00:00 2001 From: Chris Jarrett Date: Mon, 12 Feb 2024 15:15:15 -0800 Subject: [PATCH 1/5] Add nemo retriever embeddings integration --- .../llama-index-embeddings-nemo/.gitignore | 153 ++++++++++++++++++ .../llama-index-embeddings-nemo/BUILD | 3 + .../llama-index-embeddings-nemo/Makefile | 17 ++ .../llama-index-embeddings-nemo/README.md | 1 + .../llama_index/embeddings/nemo/BUILD | 1 + .../llama_index/embeddings/nemo/__init__.py | 3 + .../llama_index/embeddings/nemo/base.py | 62 +++++++ .../pyproject.toml | 54 +++++++ 8 files changed, 294 insertions(+) create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-nemo/.gitignore create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-nemo/BUILD create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-nemo/Makefile create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-nemo/README.md create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/BUILD create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/__init__.py create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-nemo/pyproject.toml diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/.gitignore b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/.gitignore new file mode 100644 index 0000000000000..990c18de22908 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/.gitignore @@ -0,0 +1,153 @@ +llama_index/_static +.DS_Store +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +bin/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +etc/ +include/ +lib/ +lib64/ +parts/ +sdist/ +share/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +.ruff_cache + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints +notebooks/ + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pyvenv.cfg + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Jetbrains +.idea +modules/ +*.swp + +# VsCode +.vscode + +# pipenv +Pipfile +Pipfile.lock + +# pyright +pyrightconfig.json diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/BUILD new file mode 100644 index 0000000000000..0896ca890d8bf --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/BUILD @@ -0,0 +1,3 @@ +poetry_requirements( + name="poetry", +) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/Makefile b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/Makefile new file mode 100644 index 0000000000000..b9eab05aa3706 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/Makefile @@ -0,0 +1,17 @@ +GIT_ROOT ?= $(shell git rev-parse --show-toplevel) + +help: ## Show all Makefile targets. + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' + +format: ## Run code autoformatters (black). + pre-commit install + git ls-files | xargs pre-commit run black --files + +lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy + pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files + +test: ## Run tests via pytest. + pytest tests + +watch-docs: ## Build and watch documentation. + sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/README.md b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/README.md new file mode 100644 index 0000000000000..89f9945dfeb71 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/README.md @@ -0,0 +1 @@ +# LlamaIndex Embeddings Integration: Nemo diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/BUILD new file mode 100644 index 0000000000000..db46e8d6c978c --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/BUILD @@ -0,0 +1 @@ +python_sources() diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/__init__.py new file mode 100644 index 0000000000000..c955855037f0d --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/__init__.py @@ -0,0 +1,3 @@ +from llama_index.embeddings.nemo.base import NemoEmbedding + +__all__ = ["NemoEmbedding"] diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py new file mode 100644 index 0000000000000..fdc00d0bb4ddb --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py @@ -0,0 +1,62 @@ +"""NeMo embeddings file.""" + +import json +import requests +from typing import Any, List, Optional + +from llama_index.core.base.embeddings.base import ( + DEFAULT_EMBED_BATCH_SIZE, + BaseEmbedding, +) +from llama_index.core.callbacks.base import CallbackManager + + +class NemoEmbedding(BaseEmbedding): + """Nvidia NeMo embeddings. + """ + + def __init__( + self, + model_name: str = "NV-Embed-QA-003", + api_endpoint_url: str = "http://localhost:8088/v1/embeddings", + embed_batch_size: int = DEFAULT_EMBED_BATCH_SIZE, + callback_manager: Optional[CallbackManager] = None, + **kwargs: Any, + ): + self.api_endpoint_url = api_endpoint_url + + super().__init__( + model_name=model_name, + embed_batch_size=embed_batch_size, + callback_manager=callback_manager, + **kwargs, + ) + + @classmethod + def class_name(cls) -> str: + return "NemoEmbedding" + + def _get_embedding(self, text: str, input_type: str): + payload = json.dumps({ + "input": text, + "model": self.model_name, + "input_type": input_type + }) + headers = { + 'Content-Type': 'application/json' + } + + response = requests.request( + "POST", self.api_endpoint_url, headers=headers, data=payload) + response = json.loads(response.text) + + return response["data"][0]["embedding"] + + def _get_query_embedding(self, query: str) -> List[float]: + return self._get_embedding(text, input_type="query") + + def _get_text_embedding(self, text: str) -> List[float]: + return self._get_embedding(text, input_type="passage") + + def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]: + return [self._get_embedding(text, input_type="passage") for text in texts] diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/pyproject.toml new file mode 100644 index 0000000000000..a3015c680d6a7 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/pyproject.toml @@ -0,0 +1,54 @@ +[build-system] +build-backend = "poetry.core.masonry.api" +requires = ["poetry-core"] + +[tool.codespell] +check-filenames = true +check-hidden = true +skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" + +[tool.mypy] +disallow_untyped_defs = true +exclude = ["_static", "build", "examples", "notebooks", "venv"] +ignore_missing_imports = true +python_version = "3.8" + +[tool.poetry] +authors = ["Your Name "] +description = "llama-index embeddings nemo integration" +license = "MIT" +name = "llama-index-embeddings-nemo" +readme = "README.md" +version = "0.1.0" + +[tool.poetry.dependencies] +python = ">=3.8.1,<3.12" +llama-index-core = "0.10.0" + +[tool.poetry.group.dev.dependencies] +ipython = "8.10.0" +jupyter = "^1.0.0" +mypy = "0.991" +pre-commit = "3.2.0" +pylint = "2.15.10" +pytest = "7.2.1" +pytest-mock = "3.11.1" +ruff = "0.0.292" +tree-sitter-languages = "^1.8.0" +types-Deprecated = ">=0.1.0" +types-PyYAML = "^6.0.12.12" +types-protobuf = "^4.24.0.4" +types-redis = "4.5.5.0" +types-requests = "2.28.11.8" +types-setuptools = "67.1.0.0" + +[tool.poetry.group.dev.dependencies.black] +extras = ["jupyter"] +version = "<=23.9.1,>=23.7.0" + +[tool.poetry.group.dev.dependencies.codespell] +extras = ["toml"] +version = ">=v2.2.6" + +[[tool.poetry.packages]] +include = "llama_index/" From f6bb399627570b72dccc04f7ca98d0e9093f4c71 Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Wed, 28 Feb 2024 10:21:51 -0600 Subject: [PATCH 2/5] linting --- .../llama_index/embeddings/nemo/base.py | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py index fdc00d0bb4ddb..68c83b0e5675d 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py @@ -12,8 +12,7 @@ class NemoEmbedding(BaseEmbedding): - """Nvidia NeMo embeddings. - """ + """Nvidia NeMo embeddings.""" def __init__( self, @@ -37,24 +36,21 @@ def class_name(cls) -> str: return "NemoEmbedding" def _get_embedding(self, text: str, input_type: str): - payload = json.dumps({ - "input": text, - "model": self.model_name, - "input_type": input_type - }) - headers = { - 'Content-Type': 'application/json' - } + payload = json.dumps( + {"input": text, "model": self.model_name, "input_type": input_type} + ) + headers = {"Content-Type": "application/json"} response = requests.request( - "POST", self.api_endpoint_url, headers=headers, data=payload) + "POST", self.api_endpoint_url, headers=headers, data=payload + ) response = json.loads(response.text) return response["data"][0]["embedding"] def _get_query_embedding(self, query: str) -> List[float]: return self._get_embedding(text, input_type="query") - + def _get_text_embedding(self, text: str) -> List[float]: return self._get_embedding(text, input_type="passage") From a25b14a5a14abfca7cd47cce4eba8854f375442e Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Wed, 28 Feb 2024 10:23:23 -0600 Subject: [PATCH 3/5] bug --- .../llama_index/embeddings/nemo/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py index 68c83b0e5675d..1e48899c67d5e 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py @@ -49,7 +49,7 @@ def _get_embedding(self, text: str, input_type: str): return response["data"][0]["embedding"] def _get_query_embedding(self, query: str) -> List[float]: - return self._get_embedding(text, input_type="query") + return self._get_embedding(query, input_type="query") def _get_text_embedding(self, text: str) -> List[float]: return self._get_embedding(text, input_type="passage") From bf4feb9dca5c27db4433802b34a92966aa039225 Mon Sep 17 00:00:00 2001 From: Chris Jarrett Date: Fri, 1 Mar 2024 16:28:26 -0800 Subject: [PATCH 4/5] Add async function --- docs/examples/embeddings/nemo.ipynb | 91 +++++++++++++++++++ .../llama_index/embeddings/nemo/base.py | 31 ++++++- llama_index/embeddings/nemo.py | 62 +++++++++++++ 3 files changed, 181 insertions(+), 3 deletions(-) create mode 100644 docs/examples/embeddings/nemo.ipynb create mode 100644 llama_index/embeddings/nemo.py diff --git a/docs/examples/embeddings/nemo.ipynb b/docs/examples/embeddings/nemo.ipynb new file mode 100644 index 0000000000000..8f7f8c1a677fa --- /dev/null +++ b/docs/examples/embeddings/nemo.ipynb @@ -0,0 +1,91 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Nvidia NeMo embeddings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Connect to NVIDIA's embedding service using the NeMoEmbeddings class." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install llama-index-embeddings-nemo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install llama-index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "from llama_index.embeddings.nemo import NeMoEmbedding" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "batch_size = 16\n", + "model = \"NV-Embed-QA-003\"\n", + "api_endpoint_url = \"http://localhost:8080/v1/embeddings\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "embedding_model = NeMoEmbedding(\n", + " batch_size=batch_size, model=model, api_endpoint_url=api_endpoint_url\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "embedding_model.embed_query(\"Hello world\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py index 1e48899c67d5e..f6f7f6a9f8adf 100644 --- a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/llama_index/embeddings/nemo/base.py @@ -2,6 +2,7 @@ import json import requests +import aiohttp from typing import Any, List, Optional from llama_index.core.base.embeddings.base import ( @@ -11,7 +12,7 @@ from llama_index.core.callbacks.base import CallbackManager -class NemoEmbedding(BaseEmbedding): +class NeMoEmbedding(BaseEmbedding): """Nvidia NeMo embeddings.""" def __init__( @@ -33,9 +34,9 @@ def __init__( @classmethod def class_name(cls) -> str: - return "NemoEmbedding" + return "NeMoEmbedding" - def _get_embedding(self, text: str, input_type: str): + def _get_embedding(self, text: str, input_type: str) -> List[float]: payload = json.dumps( {"input": text, "model": self.model_name, "input_type": input_type} ) @@ -48,6 +49,20 @@ def _get_embedding(self, text: str, input_type: str): return response["data"][0]["embedding"] + async def _aget_embedding(self, session: Any, text: str, input_type: str) -> List[float]: + + headers = {"Content-Type": "application/json"} + + async with session.post( + self.api_endpoint_url, + json={"input": text, "model": self.model, "input_type": input_type}, + headers=headers, + ) as response: + response.raise_for_status() + answer = await response.text() + answer = json.loads(answer) + return answer["data"][0]["embedding"] + def _get_query_embedding(self, query: str) -> List[float]: return self._get_embedding(query, input_type="query") @@ -56,3 +71,13 @@ def _get_text_embedding(self, text: str) -> List[float]: def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]: return [self._get_embedding(text, input_type="passage") for text in texts] + + def _aget_query_embedding(self, query: str) -> List[float]: + async with aiohttp.ClientSession() as session: + embedding = await self._aget_embedding(session, query, "query") + return embedding + + def _aget_text_embedding(self, text: str) -> List[float]: + async with aiohttp.ClientSession() as session: + embedding = await self._aget_embedding(session, text, "passage") + return embedding diff --git a/llama_index/embeddings/nemo.py b/llama_index/embeddings/nemo.py new file mode 100644 index 0000000000000..fdc00d0bb4ddb --- /dev/null +++ b/llama_index/embeddings/nemo.py @@ -0,0 +1,62 @@ +"""NeMo embeddings file.""" + +import json +import requests +from typing import Any, List, Optional + +from llama_index.core.base.embeddings.base import ( + DEFAULT_EMBED_BATCH_SIZE, + BaseEmbedding, +) +from llama_index.core.callbacks.base import CallbackManager + + +class NemoEmbedding(BaseEmbedding): + """Nvidia NeMo embeddings. + """ + + def __init__( + self, + model_name: str = "NV-Embed-QA-003", + api_endpoint_url: str = "http://localhost:8088/v1/embeddings", + embed_batch_size: int = DEFAULT_EMBED_BATCH_SIZE, + callback_manager: Optional[CallbackManager] = None, + **kwargs: Any, + ): + self.api_endpoint_url = api_endpoint_url + + super().__init__( + model_name=model_name, + embed_batch_size=embed_batch_size, + callback_manager=callback_manager, + **kwargs, + ) + + @classmethod + def class_name(cls) -> str: + return "NemoEmbedding" + + def _get_embedding(self, text: str, input_type: str): + payload = json.dumps({ + "input": text, + "model": self.model_name, + "input_type": input_type + }) + headers = { + 'Content-Type': 'application/json' + } + + response = requests.request( + "POST", self.api_endpoint_url, headers=headers, data=payload) + response = json.loads(response.text) + + return response["data"][0]["embedding"] + + def _get_query_embedding(self, query: str) -> List[float]: + return self._get_embedding(text, input_type="query") + + def _get_text_embedding(self, text: str) -> List[float]: + return self._get_embedding(text, input_type="passage") + + def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]: + return [self._get_embedding(text, input_type="passage") for text in texts] From fe9fd1d345cf18ac835aa7aa6e90652fbc28f76a Mon Sep 17 00:00:00 2001 From: Chris Jarrett Date: Fri, 1 Mar 2024 16:34:44 -0800 Subject: [PATCH 5/5] Add tests --- .../embeddings/llama-index-embeddings-nemo/tests/BUILD | 1 + .../llama-index-embeddings-nemo/tests/__init__.py | 0 .../tests/test_embeddings_nemo.py | 7 +++++++ 3 files changed, 8 insertions(+) create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/BUILD create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/__init__.py create mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/test_embeddings_nemo.py diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/BUILD new file mode 100644 index 0000000000000..dabf212d7e716 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/BUILD @@ -0,0 +1 @@ +python_tests() diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/test_embeddings_nemo.py b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/test_embeddings_nemo.py new file mode 100644 index 0000000000000..c5e3f69f89082 --- /dev/null +++ b/llama-index-integrations/embeddings/llama-index-embeddings-nemo/tests/test_embeddings_nemo.py @@ -0,0 +1,7 @@ +from llama_index.core.base.embeddings.base import BaseEmbedding +from llama_index.embeddings.nemo import NeMoEmbedding + + +def test_embedding_class(): + emb = NeMoEmbedding() + assert isinstance(emb, BaseEmbedding)