From 3177eb98260186e68bc7a04849067c1a75ff0538 Mon Sep 17 00:00:00 2001
From: nnabar <nnabar@bamfunds.com>
Date: Thu, 15 Feb 2024 16:36:19 +0000
Subject: [PATCH 1/9] Databricks Vector Search integration from BAM Elevate

---
 .../.gitignore                                | 153 +++++++
 .../BUILD                                     |   3 +
 .../Makefile                                  |  17 +
 .../README.md                                 |   1 +
 .../databricks-vector-search/BUILD            |   1 +
 .../databricks-vector-search/__init__.py      |   5 +
 .../databricks-vector-search/base.py          | 388 ++++++++++++++++++
 .../databricks-vector-search/utils.py         |  15 +
 .../pyproject.toml                            |  60 +++
 .../tests/BUILD                               |   1 +
 .../tests/__init__.py                         |   0
 ..._vector_stores_databricks_vector_search.py |   7 +
 12 files changed, 651 insertions(+)
 create mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/.gitignore
 create mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/BUILD
 create mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/Makefile
 create mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/README.md
 create mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/BUILD
 create mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/__init__.py
 create mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py
 create mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/utils.py
 create mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/pyproject.toml
 create mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/BUILD
 create mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/__init__.py
 create mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/test_vector_stores_databricks_vector_search.py

diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/.gitignore b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/.gitignore
new file mode 100644
index 0000000000000..990c18de22908
--- /dev/null
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/.gitignore
@@ -0,0 +1,153 @@
+llama_index/_static
+.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+bin/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+etc/
+include/
+lib/
+lib64/
+parts/
+sdist/
+share/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+.ruff_cache
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+notebooks/
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pyvenv.cfg
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Jetbrains
+.idea
+modules/
+*.swp
+
+# VsCode
+.vscode
+
+# pipenv
+Pipfile
+Pipfile.lock
+
+# pyright
+pyrightconfig.json
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/BUILD
new file mode 100644
index 0000000000000..0896ca890d8bf
--- /dev/null
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/BUILD
@@ -0,0 +1,3 @@
+poetry_requirements(
+    name="poetry",
+)
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/Makefile b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/Makefile
new file mode 100644
index 0000000000000..b9eab05aa3706
--- /dev/null
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/Makefile
@@ -0,0 +1,17 @@
+GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+
+help:	## Show all Makefile targets.
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
+
+format:	## Run code autoformatters (black).
+	pre-commit install
+	git ls-files | xargs pre-commit run black --files
+
+lint:	## Run linters: pre-commit (black, ruff, codespell) and mypy
+	pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
+
+test:	## Run tests via pytest.
+	pytest tests
+
+watch-docs:	## Build and watch documentation.
+	sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/README.md b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/README.md
new file mode 100644
index 0000000000000..837b6aaec7fbe
--- /dev/null
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/README.md
@@ -0,0 +1 @@
+# LlamaIndex Vector_Stores Integration: Databricks Vector Search
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/BUILD
new file mode 100644
index 0000000000000..db46e8d6c978c
--- /dev/null
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/BUILD
@@ -0,0 +1 @@
+python_sources()
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/__init__.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/__init__.py
new file mode 100644
index 0000000000000..041494b47fb05
--- /dev/null
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/__init__.py
@@ -0,0 +1,5 @@
+from llama_index.vector_stores.databricks_vector_search.base import (
+    DatabricksVectorSearch,
+)
+
+__all__ = ["DatabricksVectorSearch"]
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py
new file mode 100644
index 0000000000000..9ef9d0298d3d8
--- /dev/null
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py
@@ -0,0 +1,388 @@
+"""
+Databricks Vector Search index.
+
+Supports Delta Sync indexes and Direct Access indexes in Databricks Vector Search.
+"""
+
+import json
+import logging
+from typing import (
+    Any,
+    List,
+    Dict,
+    Optional,
+    TYPE_CHECKING,
+    cast,
+)
+from enum import Enum
+
+from pydantic import BaseModel, Field
+
+from llama_index.core.vector_stores.types import (
+    BasePydanticVectorStore,
+    MetadataFilters,
+    FilterCondition,
+    FilterOperator,
+    VectorStoreQuery,
+    VectorStoreQueryResult,
+    VectorStoreQueryMode,
+)
+from llama_index.core.vector_stores.utils import node_to_metadata_dict
+from llama_index.core.schema import TextNode, BaseNode
+from llama_index.core.bridge.pydantic import PrivateAttr
+
+from llama_index.vector_stores.databricks_vector_search.utils import _import_databricks
+
+
+if TYPE_CHECKING:
+    from databricks.vector_search.client import VectorSearchIndex
+
+
+class _DatabricksIndexType(str, Enum):
+    DIRECT_ACCESS = "DIRECT_ACCESS"
+    DELTA_SYNC = "DELTA_SYNC"
+
+
+class _DatabricksIndexDescription(BaseModel):
+    primary_key: str
+    index_type: _DatabricksIndexType
+    delta_sync_index_spec: Dict = Field(default_factory=dict)
+    direct_access_index_spec: Dict = Field(default_factory=dict)
+
+
+_logger = logging.getLogger(__name__)
+
+
+_filter_translation = {
+    FilterOperator.EQ: "",
+    FilterOperator.GT: ">",
+    FilterOperator.LT: "<",
+    FilterOperator.NE: "NOT",
+    FilterOperator.GTE: ">=",
+    FilterOperator.LTE: "<=",
+    FilterOperator.IN: "",
+    FilterOperator.NIN: "NOT",
+}
+
+
+def _transform_databricks_filter_operator(operator: FilterOperator) -> str:
+    try:
+        return _filter_translation[operator]
+
+    except KeyError as e:
+        raise ValueError(f"filter operator {operator} is not supported")
+
+
+def _to_databricks_filter(standard_filters: MetadataFilters) -> dict:
+    """Convert from standard dataclass to databricks filter dict."""
+    filters = {}
+
+    condition = standard_filters.condition or FilterOperator.AND
+
+    for filter in standard_filters.filters:
+        value = filter.value if isinstance(filter.value, list) else [filter.value]
+
+        transformed_operator = _transform_databricks_filter_operator(filter.operator)
+
+        if transformed_operator == "":
+            key = filter.key
+
+        else:
+            key = f"{filter.key} {transformed_operator}"
+
+        if key in filters:
+            raise ValueError(f"filter condition already exists for {key}")
+
+        filters[key] = value
+
+    if condition == FilterCondition.AND:
+        return filters
+
+    elif condition == FilterCondition.OR:
+        keys, values = zip(*filters.items())
+        return {" OR ".join(keys): values}
+
+    raise ValueError(f"condition {condition} is not supported")
+
+
+class DatabricksVectorSearch(BasePydanticVectorStore):
+    """
+    Vector store for Databricks Vector Search.
+
+    Install ``databricks-vectorsearch`` package using the following in a Databricks notebook:
+    %pip install databricks-vectorsearch
+    dbutils.library.restartPython()
+    """
+
+    stores_text: bool = True
+    text_column: Optional[str]
+    columns: Optional[List[str]]
+
+    _index: VectorSearchIndex = PrivateAttr()
+    _primary_key: str = PrivateAttr()
+    _index_type: str = PrivateAttr()
+    _delta_sync_index_spec: dict = PrivateAttr()
+    _direct_access_index_spec: dict = PrivateAttr()
+
+    def __init__(
+        self,
+        index: VectorSearchIndex,
+        text_column: Optional[str] = None,
+        columns: Optional[List[str]] = None,
+    ) -> None:
+        _import_databricks()
+
+        if not isinstance(index, VectorSearchIndex):
+            raise TypeError(
+                f"index must be of type `VectorSearchIndex`, not {type(index)}"
+            )
+
+        self._index = index
+
+        # unpack the index spec
+        index_description = _DatabricksIndexDescription.parse_obj(
+            self._index.describe()
+        )
+
+        self._primary_key = index_description.primary_key
+        self._index_type = index_description.index_type
+        self._delta_sync_index_spec = index_description.delta_sync_index_spec
+        self._direct_access_index_spec = index_description.direct_access_index_spec
+
+        super().__init__(
+            text_column=text_column, columns=columns,
+        )
+
+        # initialize the column name for the text column in the delta table
+        if self._is_databricks_managed_embeddings():
+            index_source_column = self._embedding_source_column_name()
+
+            # check if input text column matches the source column of the index
+            if text_column is not None and text_column != index_source_column:
+                raise ValueError(
+                    f"text_column '{text_column}' does not match with the "
+                    f"source column of the index: '{index_source_column}'."
+                )
+
+            self.text_column = index_source_column
+        else:
+            if text_column is None:
+                raise ValueError("text_column is required for self-managed embeddings.")
+            self.text_column = text_column
+
+        # Fold primary key and text column into columns if they're not empty.
+        columns_to_add = set(columns or [])
+        columns_to_add.add(self._primary_key)
+        columns_to_add.add(self.text_column)
+        columns_to_add -= {"", None}
+
+        self.columns = list(columns_to_add)
+
+        # If the index schema is known, all our columns should be in that index.
+        # Validate specified columns are in the index
+        index_schema = self._index_schema()
+
+        if self._is_direct_access_index() and index_schema:
+            missing_columns = columns_to_add - set(index_schema.keys())
+
+            if missing_columns:
+                raise ValueError(
+                    f"columns missing from schema: {', '.join(missing_columns)}"
+                )
+
+    def add(self, nodes: List[BaseNode], **add_kwargs: Any,) -> List[str]:
+        """Add nodes to index.
+
+        Args:
+            nodes: List[BaseNode]: list of nodes with embeddings
+
+        """
+        if self._is_databricks_managed_embeddings():
+            raise ValueError(
+                "Adding nodes is not supported for Databricks-managed embeddings."
+            )
+
+        # construct the entries to upsert
+        entries = []
+        ids = []
+        for node in nodes:
+            node_id = node.node_id
+            metadata = node_to_metadata_dict(node, remove_text=True, flat_metadata=True)
+            entry = {
+                self._primary_key: node_id,
+                self.text_column: node.get_content(),
+                self._embedding_vector_column_name(): node.get_embedding(),
+                **{
+                    col: metadata.get(col)
+                    for col in filter(
+                        lambda column: column
+                        not in (self._primary_key, self.text_column),
+                        self.columns or [],
+                    )
+                },
+            }
+
+            entries.append(entry)
+            ids.append(node_id)
+
+        # attempt the upsert
+        upsert_resp = self._index.upsert(entries,)
+
+        # return the successful IDs
+        response_status = upsert_resp.get("status")
+
+        failed_ids = (
+            set(upsert_resp["result"]["failed_primary_keys"] or [])
+            if "result" in upsert_resp
+            and "failed_primary_keys" in upsert_resp["result"]
+            else set()
+        )
+
+        if response_status not in ("PARTIAL_SUCCESS", "FAILURE") or not failed_ids:
+            return ids
+
+        elif response_status == "PARTIAL_SUCCESS":
+            _logger.warning(
+                "failed to add %d out of %d texts to the index",
+                len(failed_ids),
+                len(ids),
+            )
+
+        elif response_status == "FAILURE":
+            _logger.error("failed to add all %d texts to the index", len(ids))
+
+        return list(filter(lambda id_: id_ not in failed_ids, ids))
+
+    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
+        """
+        Delete nodes with ref_doc_id.
+
+        Args:
+            ref_doc_id (str): The doc_id of the document to delete.
+
+        """
+        self._index.delete(primary_keys=[ref_doc_id],)
+
+    def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
+        """Query index for top k most similar nodes.
+        """
+        if self._is_databricks_managed_embeddings():
+            query_text = query.query_str
+            query_vector = None
+        else:
+            query_text = None
+            query_vector = cast(List[float], query.query_embedding)
+
+        if query.mode not in (
+            VectorStoreQueryMode.DEFAULT,
+            VectorStoreQueryMode.HYBRID,
+        ):
+            raise ValueError(
+                "Only DEFAULT and HYBRID modes are supported for Databricks Vector Search."
+            )
+
+        if query.filters is not None:
+            filters = _to_databricks_filter(query.filters)
+        else:
+            filters = None
+
+        search_resp = self._index.similarity_search(
+            columns=self.columns,
+            query_text=query_text,
+            query_vector=query_vector,
+            filters=filters,
+            num_results=query.similarity_top_k,
+        )
+
+        columns = [
+            col["name"]
+            for col in search_resp.get("manifest", dict()).get("columns", [])
+        ]
+        top_k_nodes = []
+        top_k_ids = []
+        top_k_scores = []
+        for result in search_resp.get("result", dict()).get("data_array", []):
+            doc_id = result[columns.index(self._primary_key)]
+            text_content = result[columns.index(self.text_column)]
+            metadata = {
+                col: value
+                for col, value in zip(columns[:-1], result[:-1])
+                if col not in [self._primary_key, self.text_column]
+            }
+            metadata[self._primary_key] = doc_id
+            score = result[-1]
+            node = TextNode(
+                text=text_content, id_=doc_id, metadata=metadata
+            )  # TODO star_char, end_char, relationships? https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/vector_stores/llama-index-vector-stores-pinecone/llama_index/vector_stores/pinecone/base.py
+
+            top_k_ids.append(doc_id)
+            top_k_nodes.append(node)
+            top_k_scores.append(score)
+
+        return VectorStoreQueryResult(
+            nodes=top_k_nodes, similarities=top_k_scores, ids=top_k_ids
+        )
+
+    @property
+    def client(self) -> Any:
+        """Return VectorStoreIndex"""
+        return self._index
+
+    # The remaining utilities (and snippets of the above) are taken from
+    # https://github.com/langchain-ai/langchain/blob/master/libs/community/langchain_community/vectorstores/databricks_vector_search.py
+    def _index_schema(self) -> Optional[dict]:
+        """Return the index schema as a dictionary.
+        Return None if no schema found.
+        """
+        if self._is_direct_access_index():
+            schema_json = self._direct_access_index_spec.get("schema_json")
+            if schema_json is not None:
+                return json.loads(schema_json)
+        return None
+
+    def _embedding_vector_column_name(self) -> Optional[str]:
+        """Return the name of the embedding vector column.
+        None if the index is not a self-managed embedding index.
+        """
+        return self._embedding_vector_column().get("name")
+
+    def _embedding_vector_column(self) -> dict:
+        """Return the embedding vector column configs as a dictionary.
+        Empty if the index is not a self-managed embedding index.
+        """
+        index_spec = (
+            self._delta_sync_index_spec
+            if self._is_delta_sync_index()
+            else self._direct_access_index_spec
+        )
+        return next(iter(index_spec.get("embedding_vector_columns") or list()), dict())
+
+    def _embedding_source_column_name(self) -> Optional[str]:
+        """Return the name of the embedding source column.
+        None if the index is not a Databricks-managed embedding index.
+        """
+        return self._embedding_source_column().get("name")
+
+    def _embedding_source_column(self) -> dict:
+        """Return the embedding source column configs as a dictionary.
+        Empty if the index is not a Databricks-managed embedding index.
+        """
+        return next(
+            iter(self._delta_sync_index_spec.get("embedding_source_columns") or list()),
+            dict(),
+        )
+
+    def _is_delta_sync_index(self) -> bool:
+        """Return True if the index is a delta-sync index."""
+        return self._index_type == _DatabricksIndexType.DELTA_SYNC
+
+    def _is_direct_access_index(self) -> bool:
+        """Return True if the index is a direct-access index."""
+        return self._index_type == _DatabricksIndexType.DIRECT_ACCESS
+
+    def _is_databricks_managed_embeddings(self) -> bool:
+        """Return True if the embeddings are managed by Databricks Vector Search."""
+        return (
+            self._is_delta_sync_index()
+            and self._embedding_source_column_name() is not None
+        )
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/utils.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/utils.py
new file mode 100644
index 0000000000000..e2fef63a28558
--- /dev/null
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/utils.py
@@ -0,0 +1,15 @@
+from typing import Any
+
+
+def _import_databricks() -> Any:
+    """
+    Try to import databricks.vector_search.client.VectorSearchIndex. If databricks module it's not already installed, instruct user how to install.
+    """
+
+    try:
+        from databricks.vector_search.client import VectorSearchIndex
+    except ImportError:
+        raise ImportError(
+            "`databricks-vectorsearch` package not found: "
+            "please run `pip install databricks-vectorsearch`"
+        )
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/pyproject.toml
new file mode 100644
index 0000000000000..d2edc205d0252
--- /dev/null
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/pyproject.toml
@@ -0,0 +1,60 @@
+[build-system]
+build-backend = "poetry.core.masonry.api"
+requires = ["poetry-core"]
+
+[tool.codespell]
+check-filenames = true
+check-hidden = true
+skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
+
+[tool.llamahub]
+classes = ["DatabricksVectorSearch"]
+contains_example = false
+import_path = "llama_index.vector_stores.databricks_vector_search"
+
+[tool.mypy]
+disallow_untyped_defs = true
+exclude = ["_static", "build", "examples", "notebooks", "venv"]
+ignore_missing_imports = true
+python_version = "3.8"
+
+[tool.poetry]
+authors = ["Nickhil Nabar <nickhil@bamelevate.com", "Alberto Da Costa <alberto@bamelevate.com>"]
+description = "llama-index vector_stores databricks vector search integration"
+license = "MIT"
+name = "llama-index-vector-stores-databricks-vector-search"
+readme = "README.md"
+version = "0.1.1"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<3.12"
+llama-index-core = "^0.10.1"
+databricks-vectorsearch
+
+[tool.poetry.group.dev.dependencies]
+ipython = "8.10.0"
+jupyter = "^1.0.0"
+mypy = "0.991"
+pre-commit = "3.2.0"
+pylint = "2.15.10"
+pytest = "7.2.1"
+pytest-mock = "3.11.1"
+ruff = "0.0.292"
+tree-sitter-languages = "^1.8.0"
+types-Deprecated = ">=0.1.0"
+types-PyYAML = "^6.0.12.12"
+types-protobuf = "^4.24.0.4"
+types-redis = "4.5.5.0"
+types-requests = "2.28.11.8"
+types-setuptools = "67.1.0.0"
+
+[tool.poetry.group.dev.dependencies.black]
+extras = ["jupyter"]
+version = "<=23.9.1,>=23.7.0"
+
+[tool.poetry.group.dev.dependencies.codespell]
+extras = ["toml"]
+version = ">=v2.2.6"
+
+[[tool.poetry.packages]]
+include = "llama_index/"
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/BUILD
new file mode 100644
index 0000000000000..dabf212d7e716
--- /dev/null
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/BUILD
@@ -0,0 +1 @@
+python_tests()
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/__init__.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/test_vector_stores_databricks_vector_search.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/test_vector_stores_databricks_vector_search.py
new file mode 100644
index 0000000000000..d8b07700ce3e6
--- /dev/null
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/test_vector_stores_databricks_vector_search.py
@@ -0,0 +1,7 @@
+from llama_index.core.vector_stores.types import BasePydanticVectorStore
+from llama_index.vector_stores.databricks_vector_search import DatabricksVectorSearch
+
+
+def test_class():
+    names_of_base_classes = [b.__name__ for b in DatabricksVectorSearch.__mro__]
+    assert BasePydanticVectorStore.__name__ in names_of_base_classes

From 18d700c1ae32b98646caac2361b3dbf0f6a2dddf Mon Sep 17 00:00:00 2001
From: Haotian Zhang <socool.king@gmail.com>
Date: Fri, 16 Feb 2024 00:29:22 -0500
Subject: [PATCH 2/9] cr

---
 .../pyproject.toml                                            | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/pyproject.toml
index d2edc205d0252..5296cebaae2a0 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/pyproject.toml
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/pyproject.toml
@@ -19,7 +19,7 @@ ignore_missing_imports = true
 python_version = "3.8"
 
 [tool.poetry]
-authors = ["Nickhil Nabar <nickhil@bamelevate.com", "Alberto Da Costa <alberto@bamelevate.com>"]
+authors = ["Alberto Da Costa <alberto@bamelevate.com>", "Nickhil Nabar <nickhil@bamelevate.com"]
 description = "llama-index vector_stores databricks vector search integration"
 license = "MIT"
 name = "llama-index-vector-stores-databricks-vector-search"
@@ -29,7 +29,7 @@ version = "0.1.1"
 [tool.poetry.dependencies]
 python = ">=3.8.1,<3.12"
 llama-index-core = "^0.10.1"
-databricks-vectorsearch
+databricks-vectorsearch = "^0.21"
 
 [tool.poetry.group.dev.dependencies]
 ipython = "8.10.0"

From abebfd282638724f4d1ab22901ef01593f96e643 Mon Sep 17 00:00:00 2001
From: Haotian Zhang <socool.king@gmail.com>
Date: Fri, 16 Feb 2024 10:11:10 -0500
Subject: [PATCH 3/9] cr

---
 .../databricks-vector-search/base.py          | 34 ++++++++++++-------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py
index 9ef9d0298d3d8..1089f770b5dbb 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py
@@ -112,6 +112,7 @@ class DatabricksVectorSearch(BasePydanticVectorStore):
     Install ``databricks-vectorsearch`` package using the following in a Databricks notebook:
     %pip install databricks-vectorsearch
     dbutils.library.restartPython()
+
     """
 
     stores_text: bool = True
@@ -150,7 +151,8 @@ def __init__(
         self._direct_access_index_spec = index_description.direct_access_index_spec
 
         super().__init__(
-            text_column=text_column, columns=columns,
+            text_column=text_column,
+            columns=columns,
         )
 
         # initialize the column name for the text column in the delta table
@@ -190,7 +192,11 @@ def __init__(
                     f"columns missing from schema: {', '.join(missing_columns)}"
                 )
 
-    def add(self, nodes: List[BaseNode], **add_kwargs: Any,) -> List[str]:
+    def add(
+        self,
+        nodes: List[BaseNode],
+        **add_kwargs: Any,
+    ) -> List[str]:
         """Add nodes to index.
 
         Args:
@@ -226,7 +232,9 @@ def add(self, nodes: List[BaseNode], **add_kwargs: Any,) -> List[str]:
             ids.append(node_id)
 
         # attempt the upsert
-        upsert_resp = self._index.upsert(entries,)
+        upsert_resp = self._index.upsert(
+            entries,
+        )
 
         # return the successful IDs
         response_status = upsert_resp.get("status")
@@ -261,11 +269,12 @@ def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
             ref_doc_id (str): The doc_id of the document to delete.
 
         """
-        self._index.delete(primary_keys=[ref_doc_id],)
+        self._index.delete(
+            primary_keys=[ref_doc_id],
+        )
 
     def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
-        """Query index for top k most similar nodes.
-        """
+        """Query index for top k most similar nodes."""
         if self._is_databricks_managed_embeddings():
             query_text = query.query_str
             query_vector = None
@@ -295,13 +304,12 @@ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResul
         )
 
         columns = [
-            col["name"]
-            for col in search_resp.get("manifest", dict()).get("columns", [])
+            col["name"] for col in search_resp.get("manifest", {}).get("columns", [])
         ]
         top_k_nodes = []
         top_k_ids = []
         top_k_scores = []
-        for result in search_resp.get("result", dict()).get("data_array", []):
+        for result in search_resp.get("result", {}).get("data_array", []):
             doc_id = result[columns.index(self._primary_key)]
             text_content = result[columns.index(self.text_column)]
             metadata = {
@@ -325,7 +333,7 @@ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResul
 
     @property
     def client(self) -> Any:
-        """Return VectorStoreIndex"""
+        """Return VectorStoreIndex."""
         return self._index
 
     # The remaining utilities (and snippets of the above) are taken from
@@ -355,7 +363,7 @@ def _embedding_vector_column(self) -> dict:
             if self._is_delta_sync_index()
             else self._direct_access_index_spec
         )
-        return next(iter(index_spec.get("embedding_vector_columns") or list()), dict())
+        return next(iter(index_spec.get("embedding_vector_columns") or []), {})
 
     def _embedding_source_column_name(self) -> Optional[str]:
         """Return the name of the embedding source column.
@@ -368,8 +376,8 @@ def _embedding_source_column(self) -> dict:
         Empty if the index is not a Databricks-managed embedding index.
         """
         return next(
-            iter(self._delta_sync_index_spec.get("embedding_source_columns") or list()),
-            dict(),
+            iter(self._delta_sync_index_spec.get("embedding_source_columns") or []),
+            {},
         )
 
     def _is_delta_sync_index(self) -> bool:

From d45cc3dcb9b1216fd2933056c0d9c861c95e38f2 Mon Sep 17 00:00:00 2001
From: Haotian Zhang <socool.king@gmail.com>
Date: Fri, 16 Feb 2024 23:19:02 -0500
Subject: [PATCH 4/9] cr

---
 .../databricks-vector-search/base.py              | 11 +++++++----
 .../databricks-vector-search/utils.py             | 15 ---------------
 2 files changed, 7 insertions(+), 19 deletions(-)
 delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/utils.py

diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py
index 1089f770b5dbb..3f3ec169ee20e 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py
@@ -31,8 +31,6 @@
 from llama_index.core.schema import TextNode, BaseNode
 from llama_index.core.bridge.pydantic import PrivateAttr
 
-from llama_index.vector_stores.databricks_vector_search.utils import _import_databricks
-
 
 if TYPE_CHECKING:
     from databricks.vector_search.client import VectorSearchIndex
@@ -131,8 +129,13 @@ def __init__(
         text_column: Optional[str] = None,
         columns: Optional[List[str]] = None,
     ) -> None:
-        _import_databricks()
-
+        try:
+            from databricks.vector_search.client import VectorSearchIndex
+        except ImportError:
+            raise ImportError(
+                "`databricks-vectorsearch` package not found: "
+                "please run `pip install databricks-vectorsearch`"
+            )
         if not isinstance(index, VectorSearchIndex):
             raise TypeError(
                 f"index must be of type `VectorSearchIndex`, not {type(index)}"
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/utils.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/utils.py
deleted file mode 100644
index e2fef63a28558..0000000000000
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/utils.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from typing import Any
-
-
-def _import_databricks() -> Any:
-    """
-    Try to import databricks.vector_search.client.VectorSearchIndex. If databricks module it's not already installed, instruct user how to install.
-    """
-
-    try:
-        from databricks.vector_search.client import VectorSearchIndex
-    except ImportError:
-        raise ImportError(
-            "`databricks-vectorsearch` package not found: "
-            "please run `pip install databricks-vectorsearch`"
-        )

From 17e9639dadf768bc1b20634abeeb44a0cec6a3dd Mon Sep 17 00:00:00 2001
From: Logan Markewich <logan.markewich@live.com>
Date: Tue, 20 Feb 2024 20:44:13 -0600
Subject: [PATCH 5/9] pyproject.toml

---
 .../pyproject.toml                                            | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/pyproject.toml
index 5296cebaae2a0..8151bb9da3521 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/pyproject.toml
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/pyproject.toml
@@ -8,10 +8,12 @@ check-hidden = true
 skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
 
 [tool.llamahub]
-classes = ["DatabricksVectorSearch"]
 contains_example = false
 import_path = "llama_index.vector_stores.databricks_vector_search"
 
+[tool.llamahub.class_authors]
+DatabricksVectorSearch = "NickhilN"
+
 [tool.mypy]
 disallow_untyped_defs = true
 exclude = ["_static", "build", "examples", "notebooks", "venv"]

From 258d65a6dc003fae31086ac1954a69496f8b6b56 Mon Sep 17 00:00:00 2001
From: Nickhil Nabar <nnabar13@gmail.com>
Date: Tue, 12 Mar 2024 17:45:59 +0000
Subject: [PATCH 6/9] added linting and documentation/examples to the
 DatabricksVectorSearch module

---
 .../DatabricksVectorSearchDemo.ipynb          | 289 ++++++++++++++++++
 docs/module_guides/storing/vector_stores.md   |   2 +
 2 files changed, 291 insertions(+)
 create mode 100644 docs/examples/vector_stores/DatabricksVectorSearchDemo.ipynb

diff --git a/docs/examples/vector_stores/DatabricksVectorSearchDemo.ipynb b/docs/examples/vector_stores/DatabricksVectorSearchDemo.ipynb
new file mode 100644
index 0000000000000..f5b043fc80d2d
--- /dev/null
+++ b/docs/examples/vector_stores/DatabricksVectorSearchDemo.ipynb
@@ -0,0 +1,289 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "2f685925-940a-418f-9b00-5500f8878fc3",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "# Databricks Vector Search\n",
+    "\n",
+    "Databricks Vector Search is a vector database that is built into the Databricks Intelligence Platform and integrated with its governance and productivity tools. Full docs here: https://docs.databricks.com/en/generative-ai/vector-search.html"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install llama-index and databricks-vectorsearch. You must be inside a Databricks runtime to use the Vector Search python client."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "8289764f-1001-4eb7-b162-92490746ebe8",
+     "showTitle": true,
+     "title": "Install llama-index and databricks-vectorsearch client"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%pip install llama-index\n",
+    "%pip install databricks-vectorsearch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Import databricks dependencies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "75dd1adb-1937-49d2-aef1-393886271d46",
+     "showTitle": true,
+     "title": "Import Databricks dependencies"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from databricks.vector_search.client import VectorSearchIndex, VectorSearchClient"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Import LlamaIndex dependencies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "b4ca851b-b0ee-4ea6-a31c-755c07e16d51",
+     "showTitle": true,
+     "title": "Import LlamaIndex dependencies"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from llama_index.core import (\n",
+    "    VectorStoreIndex,\n",
+    "    SimpleDirectoryReader,\n",
+    "    ServiceContext,\n",
+    "    StorageContext,\n",
+    ")\n",
+    "from llama_index.vector_stores.databricks_vector_search import DatabricksVectorSearch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Load example data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "dd06759d-0070-48a8-aa74-3d46b12457f8",
+     "showTitle": true,
+     "title": "Load example data"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "!mkdir -p 'data/paul_graham/'\n",
+    "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Read the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "7a82b624-bffb-453b-b5c6-f8414566dc2f",
+     "showTitle": true,
+     "title": "Read the data"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# load documents\n",
+    "documents = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()\n",
+    "print(f\"Total documents: {len(documents)}\")\n",
+    "print(f\"First document, id: {documents[0].doc_id}\")\n",
+    "print(f\"First document, hash: {documents[0].hash}\")\n",
+    "print(\n",
+    "    \"First document, text\"\n",
+    "    f\" ({len(documents[0].text)} characters):\\n{'='*20}\\n{documents[0].text[:360]} ...\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a Databricks Vector Search endpoint which will serve the index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "99c56854-c182-4dfe-bc08-cee8263461ee",
+     "showTitle": true,
+     "title": "Create the Databricks Vector Search endpoint"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Create a vector search endpoint\n",
+    "client = VectorSearchClient()\n",
+    "client.create_endpoint(\n",
+    "    name=\"llamaindex_dbx_vector_store_test_endpoint\", endpoint_type=\"STANDARD\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create the Databricks Vector Search index, and build it from the documents"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "6abe427b-79ca-4c0c-8e58-ba5f670294ae",
+     "showTitle": true,
+     "title": "Build the index from the documents"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Create a vector search index\n",
+    "# it must be placed inside a Unity Catalog-enabled schema\n",
+    "\n",
+    "# We'll use self-managed embeddings (i.e. managed by LlamaIndex) rather than a Databricks-managed index\n",
+    "databricks_index = client.create_direct_access_index(\n",
+    "    endpoint_name=\"llamaindex_dbx_vector_store_test_endpoint\",\n",
+    "    index_name=\"my_catalog.my_schema.my_test_table\",\n",
+    "    primary_key=\"my_primary_key_name\",\n",
+    "    embedding_dimension=1536,  # match the embeddings model dimension you're going to use\n",
+    "    embedding_vector_column=\"my_embedding_vector_column_name\",  # you name this anything you want - it'll be picked up by the LlamaIndex class\n",
+    "    schema={\n",
+    "        \"my_primary_key_name\": \"string\",\n",
+    "        \"my_embedding_vector_column_name\": \"array<double>\",\n",
+    "        \"text\": \"string\",  # one column must match the text_column in the DatabricksVectorSearch instance created below; this will hold the raw node text.\n",
+    "        # add any other metadata you may have in your nodes (Databricks Vector Search supports metadata filtering)\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "databricks_vector_store = DatabricksVectorSearch(\n",
+    "    index=databricks_index, text_column=\"text\"\n",
+    ")  # text_column is required for self-managed embeddings\n",
+    "storage_context = StorageContext.from_defaults(vector_store=databricks_vector_store)\n",
+    "index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Query the index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "3e8c18f7-db8c-45c1-bb82-b75ad2307824",
+     "showTitle": true,
+     "title": "Query using the index"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "query_engine = index.as_query_engine()\n",
+    "response = query_engine.query(\"Why did the author choose to work on AI?\")\n",
+    "\n",
+    "print(response.response)"
+   ]
+  }
+ ],
+ "metadata": {
+  "application/vnd.databricks.v1+notebook": {
+   "dashboards": [],
+   "language": "python",
+   "notebookMetadata": {
+    "pythonIndentUnit": 4
+   },
+   "notebookName": "Databricks Vector Search Demo (LlamaIndex Integration)",
+   "widgets": {}
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/docs/module_guides/storing/vector_stores.md b/docs/module_guides/storing/vector_stores.md
index e09e63318e9db..97a3d98a04a59 100644
--- a/docs/module_guides/storing/vector_stores.md
+++ b/docs/module_guides/storing/vector_stores.md
@@ -22,6 +22,7 @@ We are actively adding more integrations and improving feature coverage for each
 | ChatGPT Retrieval Plugin | aggregator          |                    |               | ✓      | ✓               |       |
 | Chroma                   | self-hosted         | ✓                  |               | ✓      | ✓               |       |
 | DashVector               | cloud               | ✓                  | ✓             | ✓      | ✓               |       |
+| Databricks               | cloud               | ✓                  |               | ✓      | ✓               |
 | Deeplake                 | self-hosted / cloud | ✓                  |               | ✓      | ✓               |       |
 | DocArray                 | aggregator          | ✓                  |               | ✓      | ✓               |       |
 | DynamoDB                 | cloud               |                    |               | ✓      |                 |       |
@@ -67,6 +68,7 @@ maxdepth: 1
 /examples/vector_stores/ChromaIndexDemo.ipynb
 /examples/vector_stores/DashvectorIndexDemo.ipynb
 /examples/vector_stores/DashvectorIndexDemo-Hybrid.ipynb
+/examples/vector_stores/DatabricksVectorSearchDemo.ipynb
 /examples/vector_stores/DeepLakeIndexDemo.ipynb
 /examples/vector_stores/DocArrayHnswIndexDemo.ipynb
 /examples/vector_stores/DocArrayInMemoryIndexDemo.ipynb

From 0c54272fb2f019467804c822efb035a4d1cda970 Mon Sep 17 00:00:00 2001
From: Nickhil Nabar <nnabar13@gmail.com>
Date: Wed, 13 Mar 2024 00:01:12 +0000
Subject: [PATCH 7/9] updated delete functionality to track documeent ID to
 node ID mappings and delete appropriately. clarified documentation around
 declaring doc_id and other metadata fields in the schema and columns keyword
 argumnet

---
 .../DatabricksVectorSearchDemo.ipynb          |  7 +++++--
 .../databricks-vector-search/base.py          | 19 +++++++++++++++----
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/docs/examples/vector_stores/DatabricksVectorSearchDemo.ipynb b/docs/examples/vector_stores/DatabricksVectorSearchDemo.ipynb
index f5b043fc80d2d..58044d3d54c5c 100644
--- a/docs/examples/vector_stores/DatabricksVectorSearchDemo.ipynb
+++ b/docs/examples/vector_stores/DatabricksVectorSearchDemo.ipynb
@@ -230,13 +230,16 @@
     "    schema={\n",
     "        \"my_primary_key_name\": \"string\",\n",
     "        \"my_embedding_vector_column_name\": \"array<double>\",\n",
-    "        \"text\": \"string\",  # one column must match the text_column in the DatabricksVectorSearch instance created below; this will hold the raw node text.\n",
+    "        \"text\": \"string\",  # one column must match the text_column in the DatabricksVectorSearch instance created below; this will hold the raw node text,\n",
+    "        \"doc_id\": \"string\", # one column must contain the reference document ID (this will be populated by LlamaIndex automatically)\n",
     "        # add any other metadata you may have in your nodes (Databricks Vector Search supports metadata filtering)\n",
+    "        # NOTE THAT THESE FIELDS MUST BE ADDED EXPLICITLY TO BE USED FOR METADATA FILTERING\n",
     "    },\n",
     ")\n",
     "\n",
     "databricks_vector_store = DatabricksVectorSearch(\n",
-    "    index=databricks_index, text_column=\"text\"\n",
+    "    index=databricks_index, text_column=\"text\",\n",
+    "    columns=None, # YOU MUST ALSO RECORD YOUR METADATA FIELD NAMES HERE\n",
     ")  # text_column is required for self-managed embeddings\n",
     "storage_context = StorageContext.from_defaults(vector_store=databricks_vector_store)\n",
     "index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)"
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py
index 3f3ec169ee20e..7178c9a18a77e 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py
@@ -122,6 +122,7 @@ class DatabricksVectorSearch(BasePydanticVectorStore):
     _index_type: str = PrivateAttr()
     _delta_sync_index_spec: dict = PrivateAttr()
     _direct_access_index_spec: dict = PrivateAttr()
+    _doc_id_to_pk: dict = PrivateAttr()
 
     def __init__(
         self,
@@ -152,7 +153,12 @@ def __init__(
         self._index_type = index_description.index_type
         self._delta_sync_index_spec = index_description.delta_sync_index_spec
         self._direct_access_index_spec = index_description.direct_access_index_spec
+        self._doc_id_to_pk = {}
 
+        if columns is None:
+            columns = []
+        if "doc_id" not in columns:
+            columns = columns[:19] + ["doc_id"]
         super().__init__(
             text_column=text_column,
             columns=columns,
@@ -226,10 +232,12 @@ def add(
                     for col in filter(
                         lambda column: column
                         not in (self._primary_key, self.text_column),
-                        self.columns or [],
+                        self.columns + ["doc_id"] or ["doc_id"], # explicitly record doc_id as metadata (for delete)
                     )
                 },
             }
+            doc_id = metadata.get("doc_id")
+            self._doc_id_to_pk[doc_id] = list(set(self._doc_id_to_pk.get(doc_id, []) + [node_id])) # associate this node_id with this doc_id
 
             entries.append(entry)
             ids.append(node_id)
@@ -272,9 +280,12 @@ def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
             ref_doc_id (str): The doc_id of the document to delete.
 
         """
-        self._index.delete(
-            primary_keys=[ref_doc_id],
-        )
+        primary_keys = self._doc_id_to_pk.get(ref_doc_id, None) # get the node_ids associated with the doc_id
+        if primary_keys is not None:
+            self._index.delete(
+                primary_keys=primary_keys,
+            )
+            self._doc_id_to_pk.pop(ref_doc_id) # remove this doc_id from the doc_id-to-node_id map
 
     def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
         """Query index for top k most similar nodes."""

From 50b4d1183f6c8126b378808be73cc897b74ec6fb Mon Sep 17 00:00:00 2001
From: Logan Markewich <logan.markewich@live.com>
Date: Thu, 14 Mar 2024 16:31:53 -0600
Subject: [PATCH 8/9] nits

---
 .../DatabricksVectorSearchDemo.ipynb          | 129 ++++--------------
 .../BUILD                                     |   3 -
 .../databricks-vector-search/__init__.py      |   5 -
 .../.gitignore                                |   0
 .../BUILD                                     |   4 +
 .../Makefile                                  |   0
 .../README.md                                 |   0
 .../vector_stores/databricks}/BUILD           |   0
 .../vector_stores/databricks/__init__.py      |   5 +
 .../vector_stores/databricks}/base.py         |  28 ++--
 .../pyproject.toml                            |   4 +-
 .../tests/BUILD                               |   0
 .../tests/__init__.py                         |   0
 ..._vector_stores_databricks_vector_search.py |   2 +-
 14 files changed, 55 insertions(+), 125 deletions(-)
 delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/BUILD
 delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/__init__.py
 rename llama-index-integrations/vector_stores/{llama-index-vector-stores-databricks-vector-search => llama-index-vector-stores-databricks}/.gitignore (100%)
 create mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/BUILD
 rename llama-index-integrations/vector_stores/{llama-index-vector-stores-databricks-vector-search => llama-index-vector-stores-databricks}/Makefile (100%)
 rename llama-index-integrations/vector_stores/{llama-index-vector-stores-databricks-vector-search => llama-index-vector-stores-databricks}/README.md (100%)
 rename llama-index-integrations/vector_stores/{llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search => llama-index-vector-stores-databricks/llama_index/vector_stores/databricks}/BUILD (100%)
 create mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/llama_index/vector_stores/databricks/__init__.py
 rename llama-index-integrations/vector_stores/{llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search => llama-index-vector-stores-databricks/llama_index/vector_stores/databricks}/base.py (94%)
 rename llama-index-integrations/vector_stores/{llama-index-vector-stores-databricks-vector-search => llama-index-vector-stores-databricks}/pyproject.toml (91%)
 rename llama-index-integrations/vector_stores/{llama-index-vector-stores-databricks-vector-search => llama-index-vector-stores-databricks}/tests/BUILD (100%)
 rename llama-index-integrations/vector_stores/{llama-index-vector-stores-databricks-vector-search => llama-index-vector-stores-databricks}/tests/__init__.py (100%)
 rename llama-index-integrations/vector_stores/{llama-index-vector-stores-databricks-vector-search => llama-index-vector-stores-databricks}/tests/test_vector_stores_databricks_vector_search.py (73%)

diff --git a/docs/examples/vector_stores/DatabricksVectorSearchDemo.ipynb b/docs/examples/vector_stores/DatabricksVectorSearchDemo.ipynb
index 58044d3d54c5c..e48cfd9267a49 100644
--- a/docs/examples/vector_stores/DatabricksVectorSearchDemo.ipynb
+++ b/docs/examples/vector_stores/DatabricksVectorSearchDemo.ipynb
@@ -2,15 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "metadata": {
-    "application/vnd.databricks.v1+cell": {
-     "cellMetadata": {},
-     "inputWidgets": {},
-     "nuid": "2f685925-940a-418f-9b00-5500f8878fc3",
-     "showTitle": false,
-     "title": ""
-    }
-   },
+   "metadata": {},
    "source": [
     "# Databricks Vector Search\n",
     "\n",
@@ -27,21 +19,10 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "application/vnd.databricks.v1+cell": {
-     "cellMetadata": {
-      "byteLimit": 2048000,
-      "rowLimit": 10000
-     },
-     "inputWidgets": {},
-     "nuid": "8289764f-1001-4eb7-b162-92490746ebe8",
-     "showTitle": true,
-     "title": "Install llama-index and databricks-vectorsearch client"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install llama-index\n",
+    "%pip install llama-index llama-index-vector-stores-databricks\n",
     "%pip install databricks-vectorsearch"
    ]
   },
@@ -55,21 +36,13 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "application/vnd.databricks.v1+cell": {
-     "cellMetadata": {
-      "byteLimit": 2048000,
-      "rowLimit": 10000
-     },
-     "inputWidgets": {},
-     "nuid": "75dd1adb-1937-49d2-aef1-393886271d46",
-     "showTitle": true,
-     "title": "Import Databricks dependencies"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "from databricks.vector_search.client import VectorSearchIndex, VectorSearchClient"
+    "from databricks.vector_search.client import (\n",
+    "    VectorSearchIndex,\n",
+    "    VectorSearchClient,\n",
+    ")"
    ]
   },
   {
@@ -82,18 +55,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "application/vnd.databricks.v1+cell": {
-     "cellMetadata": {
-      "byteLimit": 2048000,
-      "rowLimit": 10000
-     },
-     "inputWidgets": {},
-     "nuid": "b4ca851b-b0ee-4ea6-a31c-755c07e16d51",
-     "showTitle": true,
-     "title": "Import LlamaIndex dependencies"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "from llama_index.core import (\n",
@@ -102,7 +64,7 @@
     "    ServiceContext,\n",
     "    StorageContext,\n",
     ")\n",
-    "from llama_index.vector_stores.databricks_vector_search import DatabricksVectorSearch"
+    "from llama_index.vector_stores.databricks import DatabricksVectorSearch"
    ]
   },
   {
@@ -115,15 +77,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "application/vnd.databricks.v1+cell": {
-     "cellMetadata": {},
-     "inputWidgets": {},
-     "nuid": "dd06759d-0070-48a8-aa74-3d46b12457f8",
-     "showTitle": true,
-     "title": "Load example data"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "!mkdir -p 'data/paul_graham/'\n",
@@ -140,15 +94,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "application/vnd.databricks.v1+cell": {
-     "cellMetadata": {},
-     "inputWidgets": {},
-     "nuid": "7a82b624-bffb-453b-b5c6-f8414566dc2f",
-     "showTitle": true,
-     "title": "Read the data"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# load documents\n",
@@ -172,18 +118,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "application/vnd.databricks.v1+cell": {
-     "cellMetadata": {
-      "byteLimit": 2048000,
-      "rowLimit": 10000
-     },
-     "inputWidgets": {},
-     "nuid": "99c56854-c182-4dfe-bc08-cee8263461ee",
-     "showTitle": true,
-     "title": "Create the Databricks Vector Search endpoint"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Create a vector search endpoint\n",
@@ -203,18 +138,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "application/vnd.databricks.v1+cell": {
-     "cellMetadata": {
-      "byteLimit": 2048000,
-      "rowLimit": 10000
-     },
-     "inputWidgets": {},
-     "nuid": "6abe427b-79ca-4c0c-8e58-ba5f670294ae",
-     "showTitle": true,
-     "title": "Build the index from the documents"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Create a vector search index\n",
@@ -231,18 +155,23 @@
     "        \"my_primary_key_name\": \"string\",\n",
     "        \"my_embedding_vector_column_name\": \"array<double>\",\n",
     "        \"text\": \"string\",  # one column must match the text_column in the DatabricksVectorSearch instance created below; this will hold the raw node text,\n",
-    "        \"doc_id\": \"string\", # one column must contain the reference document ID (this will be populated by LlamaIndex automatically)\n",
+    "        \"doc_id\": \"string\",  # one column must contain the reference document ID (this will be populated by LlamaIndex automatically)\n",
     "        # add any other metadata you may have in your nodes (Databricks Vector Search supports metadata filtering)\n",
     "        # NOTE THAT THESE FIELDS MUST BE ADDED EXPLICITLY TO BE USED FOR METADATA FILTERING\n",
     "    },\n",
     ")\n",
     "\n",
     "databricks_vector_store = DatabricksVectorSearch(\n",
-    "    index=databricks_index, text_column=\"text\",\n",
-    "    columns=None, # YOU MUST ALSO RECORD YOUR METADATA FIELD NAMES HERE\n",
+    "    index=databricks_index,\n",
+    "    text_column=\"text\",\n",
+    "    columns=None,  # YOU MUST ALSO RECORD YOUR METADATA FIELD NAMES HERE\n",
     ")  # text_column is required for self-managed embeddings\n",
-    "storage_context = StorageContext.from_defaults(vector_store=databricks_vector_store)\n",
-    "index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)"
+    "storage_context = StorageContext.from_defaults(\n",
+    "    vector_store=databricks_vector_store\n",
+    ")\n",
+    "index = VectorStoreIndex.from_documents(\n",
+    "    documents, storage_context=storage_context\n",
+    ")"
    ]
   },
   {
@@ -255,15 +184,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "application/vnd.databricks.v1+cell": {
-     "cellMetadata": {},
-     "inputWidgets": {},
-     "nuid": "3e8c18f7-db8c-45c1-bb82-b75ad2307824",
-     "showTitle": true,
-     "title": "Query using the index"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "query_engine = index.as_query_engine()\n",
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/BUILD
deleted file mode 100644
index 0896ca890d8bf..0000000000000
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/BUILD
+++ /dev/null
@@ -1,3 +0,0 @@
-poetry_requirements(
-    name="poetry",
-)
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/__init__.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/__init__.py
deleted file mode 100644
index 041494b47fb05..0000000000000
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from llama_index.vector_stores.databricks_vector_search.base import (
-    DatabricksVectorSearch,
-)
-
-__all__ = ["DatabricksVectorSearch"]
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/.gitignore b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/.gitignore
similarity index 100%
rename from llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/.gitignore
rename to llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/.gitignore
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/BUILD
new file mode 100644
index 0000000000000..05444d69d26e8
--- /dev/null
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/BUILD
@@ -0,0 +1,4 @@
+poetry_requirements(
+    name="poetry",
+    module_mapping={"databricks-vectorsearch": ["databricks"]}
+)
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/Makefile b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/Makefile
similarity index 100%
rename from llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/Makefile
rename to llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/Makefile
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/README.md b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/README.md
similarity index 100%
rename from llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/README.md
rename to llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/README.md
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/llama_index/vector_stores/databricks/BUILD
similarity index 100%
rename from llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/BUILD
rename to llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/llama_index/vector_stores/databricks/BUILD
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/llama_index/vector_stores/databricks/__init__.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/llama_index/vector_stores/databricks/__init__.py
new file mode 100644
index 0000000000000..3d63d6acf2fa4
--- /dev/null
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/llama_index/vector_stores/databricks/__init__.py
@@ -0,0 +1,5 @@
+from llama_index.vector_stores.databricks.base import (
+    DatabricksVectorSearch,
+)
+
+__all__ = ["DatabricksVectorSearch"]
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/llama_index/vector_stores/databricks/base.py
similarity index 94%
rename from llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py
rename to llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/llama_index/vector_stores/databricks/base.py
index 7178c9a18a77e..bd7f5ae328218 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/llama_index/vector_stores/databricks-vector-search/base.py
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/llama_index/vector_stores/databricks/base.py
@@ -11,13 +11,13 @@
     List,
     Dict,
     Optional,
-    TYPE_CHECKING,
     cast,
 )
 from enum import Enum
 
-from pydantic import BaseModel, Field
+from databricks.vector_search.client import VectorSearchIndex
 
+from llama_index.core.bridge.pydantic import BaseModel, Field, PrivateAttr
 from llama_index.core.vector_stores.types import (
     BasePydanticVectorStore,
     MetadataFilters,
@@ -32,10 +32,6 @@
 from llama_index.core.bridge.pydantic import PrivateAttr
 
 
-if TYPE_CHECKING:
-    from databricks.vector_search.client import VectorSearchIndex
-
-
 class _DatabricksIndexType(str, Enum):
     DIRECT_ACCESS = "DIRECT_ACCESS"
     DELTA_SYNC = "DELTA_SYNC"
@@ -223,6 +219,12 @@ def add(
         for node in nodes:
             node_id = node.node_id
             metadata = node_to_metadata_dict(node, remove_text=True, flat_metadata=True)
+
+            metadata_columns = self.columns or []
+
+            # explicitly record doc_id as metadata (for delete)
+            metadata_columns.append("doc_id")
+
             entry = {
                 self._primary_key: node_id,
                 self.text_column: node.get_content(),
@@ -232,12 +234,14 @@ def add(
                     for col in filter(
                         lambda column: column
                         not in (self._primary_key, self.text_column),
-                        self.columns + ["doc_id"] or ["doc_id"], # explicitly record doc_id as metadata (for delete)
+                        metadata_columns,
                     )
                 },
             }
             doc_id = metadata.get("doc_id")
-            self._doc_id_to_pk[doc_id] = list(set(self._doc_id_to_pk.get(doc_id, []) + [node_id])) # associate this node_id with this doc_id
+            self._doc_id_to_pk[doc_id] = list(
+                set(self._doc_id_to_pk.get(doc_id, []) + [node_id])  # noqa: RUF005
+            )  # associate this node_id with this doc_id
 
             entries.append(entry)
             ids.append(node_id)
@@ -280,12 +284,16 @@ def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
             ref_doc_id (str): The doc_id of the document to delete.
 
         """
-        primary_keys = self._doc_id_to_pk.get(ref_doc_id, None) # get the node_ids associated with the doc_id
+        primary_keys = self._doc_id_to_pk.get(
+            ref_doc_id, None
+        )  # get the node_ids associated with the doc_id
         if primary_keys is not None:
             self._index.delete(
                 primary_keys=primary_keys,
             )
-            self._doc_id_to_pk.pop(ref_doc_id) # remove this doc_id from the doc_id-to-node_id map
+            self._doc_id_to_pk.pop(
+                ref_doc_id
+            )  # remove this doc_id from the doc_id-to-node_id map
 
     def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
         """Query index for top k most similar nodes."""
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/pyproject.toml
similarity index 91%
rename from llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/pyproject.toml
rename to llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/pyproject.toml
index 8151bb9da3521..0b45006cde64b 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/pyproject.toml
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/pyproject.toml
@@ -9,7 +9,7 @@ skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
 
 [tool.llamahub]
 contains_example = false
-import_path = "llama_index.vector_stores.databricks_vector_search"
+import_path = "llama_index.vector_stores.databricks"
 
 [tool.llamahub.class_authors]
 DatabricksVectorSearch = "NickhilN"
@@ -24,7 +24,7 @@ python_version = "3.8"
 authors = ["Alberto Da Costa <alberto@bamelevate.com>", "Nickhil Nabar <nickhil@bamelevate.com"]
 description = "llama-index vector_stores databricks vector search integration"
 license = "MIT"
-name = "llama-index-vector-stores-databricks-vector-search"
+name = "llama-index-vector-stores-databricks"
 readme = "README.md"
 version = "0.1.1"
 
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/tests/BUILD
similarity index 100%
rename from llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/BUILD
rename to llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/tests/BUILD
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/__init__.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/tests/__init__.py
similarity index 100%
rename from llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/__init__.py
rename to llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/tests/__init__.py
diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/test_vector_stores_databricks_vector_search.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/tests/test_vector_stores_databricks_vector_search.py
similarity index 73%
rename from llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/test_vector_stores_databricks_vector_search.py
rename to llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/tests/test_vector_stores_databricks_vector_search.py
index d8b07700ce3e6..e8555354df2da 100644
--- a/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks-vector-search/tests/test_vector_stores_databricks_vector_search.py
+++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-databricks/tests/test_vector_stores_databricks_vector_search.py
@@ -1,5 +1,5 @@
 from llama_index.core.vector_stores.types import BasePydanticVectorStore
-from llama_index.vector_stores.databricks_vector_search import DatabricksVectorSearch
+from llama_index.vector_stores.databricks import DatabricksVectorSearch
 
 
 def test_class():

From 61083d98177b20ff47d775bc1ce378d56b5b4d4a Mon Sep 17 00:00:00 2001
From: Logan Markewich <logan.markewich@live.com>
Date: Thu, 14 Mar 2024 16:40:55 -0600
Subject: [PATCH 9/9] linting

---
 docs/module_guides/storing/vector_stores.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/module_guides/storing/vector_stores.md b/docs/module_guides/storing/vector_stores.md
index ba27da0932367..8a6a39e1a1384 100644
--- a/docs/module_guides/storing/vector_stores.md
+++ b/docs/module_guides/storing/vector_stores.md
@@ -23,7 +23,7 @@ We are actively adding more integrations and improving feature coverage for each
 | ChatGPT Retrieval Plugin | aggregator              |                    |               | ✓      | ✓               |       |
 | Chroma                   | self-hosted             | ✓                  |               | ✓      | ✓               |       |
 | DashVector               | cloud                   | ✓                  | ✓             | ✓      | ✓               |       |
-| Databricks               | cloud               | ✓                  |               | ✓      | ✓               |       |
+| Databricks               | cloud                   | ✓                  |               | ✓      | ✓               |       |
 | Deeplake                 | self-hosted / cloud     | ✓                  |               | ✓      | ✓               |       |
 | DocArray                 | aggregator              | ✓                  |               | ✓      | ✓               |       |
 | DuckDB                   | in-memory / self-hosted | ✓                  |               | ✓      | ✓               |       |