Skip to content

Commit

Permalink
Implemented get_nodes() in deeplake vector store (#14388)
Browse files Browse the repository at this point in the history
  • Loading branch information
nvoxland-al authored Jun 26, 2024
1 parent 5373d9f commit 720e4b4
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
BasePydanticVectorStore,
VectorStoreQuery,
VectorStoreQueryResult,
MetadataFilters,
FilterCondition,
FilterOperator,
)
from llama_index.core.vector_stores.utils import (
metadata_dict_to_node,
Expand Down Expand Up @@ -137,6 +140,64 @@ def client(self) -> Any:
"""
return self._vectorstore.dataset

def summary(self):
self._vectorstore.summary()

def get_nodes(
self,
node_ids: Optional[List[str]] = None,
filters: Optional[MetadataFilters] = None,
) -> List[BaseNode]:
"""Get nodes from vector store."""
if not node_ids:
data = self._vectorstore.search(filter=lambda x: True)
else:
data = self._vectorstore.search(filter=lambda x: x.id.text() in node_ids)

nodes = []
for metadata in data["metadata"]:
nodes.append(metadata_dict_to_node(metadata))

def filter_func(doc):
if not filters:
return True

found_one = False
for f in filters.filters:
value = doc.metadata[f.key]
if f.operator == FilterOperator.EQ:
result = value == f.value
elif f.operator == FilterOperator.GT:
result = value > f.value
elif f.operator == FilterOperator.GTE:
result = value >= f.value
elif f.operator == FilterOperator.LT:
result = value < f.value
elif f.operator == FilterOperator.LTE:
result = value <= f.value
elif f.operator == FilterOperator.NE:
result = value != f.value
elif f.operator == FilterOperator.IN:
result = value in f.value
elif f.operator == FilterOperator.NOT_IN:
result = value not in f.value
elif f.operator == FilterOperator.TEXT_MATCH:
result = f.value in value
else:
raise ValueError(f"Unsupported filter operator: {f.operator}")

if result:
found_one = True
if filters.condition == FilterCondition.OR:
return True
else:
if filters.condition == FilterCondition.AND:
return False

return found_one

return [x for x in nodes if filter_func(x)]

def add(self, nodes: List[BaseNode], **add_kwargs: Any) -> List[str]:
"""Add the embeddings and their nodes into DeepLake.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,18 @@ ignore_missing_imports = true
python_version = "3.8"

[tool.poetry]
authors = ["Your Name <[email protected]>"]
authors = ["Activeloop <[email protected]>"]
description = "llama-index vector_stores deeplake integration"
exclude = ["**/BUILD"]
license = "MIT"
name = "llama-index-vector-stores-deeplake"
readme = "README.md"
version = "0.1.2"
version = "0.1.3"

[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
llama-index-core = "^0.10.1"
deeplake = ">=3.9.0"

[tool.poetry.group.dev.dependencies]
ipython = "8.10.0"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,102 @@
from llama_index.core.vector_stores.types import BasePydanticVectorStore
from llama_index.core import Document
from llama_index.core.vector_stores.types import (
BasePydanticVectorStore,
MetadataFilter,
MetadataFilters,
FilterCondition,
FilterOperator,
)
from llama_index.vector_stores.deeplake import DeepLakeVectorStore


def test_class():
names_of_base_classes = [b.__name__ for b in DeepLakeVectorStore.__mro__]
assert BasePydanticVectorStore.__name__ in names_of_base_classes


def test_e2e():
vs = DeepLakeVectorStore(dataset_path="mem://test", overwrite=True)
ids = vs.add(
nodes=[
Document(text="Doc 1", embedding=[1, 2, 1], metadata={"a": "1", "b": 10}),
Document(text="Doc 2", embedding=[1, 2, 2], metadata={"a": "2", "b": 11}),
Document(text="Doc 3", embedding=[1, 2, 3], metadata={"a": "3", "b": 12}),
]
)

nodes = vs.get_nodes(node_ids=[ids[0], ids[2]])
assert [x.text for x in nodes] == ["Doc 1", "Doc 3"]

nodes = vs.get_nodes(node_ids=["a"])
assert len(nodes) == 0

assert [
x.text
for x in vs.get_nodes(
filters=MetadataFilters(
filters=[
MetadataFilter(key="a", value="2"),
]
)
)
] == ["Doc 2"]

assert [
x.text
for x in vs.get_nodes(
filters=MetadataFilters(
filters=[
MetadataFilter(key="a", value="2"),
MetadataFilter(key="a", value="3"),
]
)
)
] == []

assert [
x.text
for x in vs.get_nodes(
filters=MetadataFilters(
condition=FilterCondition.OR,
filters=[
MetadataFilter(key="a", value="2"),
MetadataFilter(key="a", value="3"),
],
)
)
] == ["Doc 2", "Doc 3"]

assert [
x.text
for x in vs.get_nodes(
filters=MetadataFilters(
condition=FilterCondition.OR,
filters=[
MetadataFilter(key="a", value="2"),
MetadataFilter(key="a", value="3"),
],
)
)
] == ["Doc 2", "Doc 3"]

assert [
x.text
for x in vs.get_nodes(
filters=MetadataFilters(
filters=[
MetadataFilter(key="b", value=10, operator=FilterOperator.GT),
]
)
)
] == ["Doc 2", "Doc 3"]

assert [
x.text
for x in vs.get_nodes(
filters=MetadataFilters(
filters=[
MetadataFilter(key="b", value=11, operator=FilterOperator.LTE),
]
)
)
] == ["Doc 1", "Doc 2"]

0 comments on commit 720e4b4

Please sign in to comment.