From b9d8c3935ab142461aed7e615a98f6408aa4d6df Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Wed, 7 Feb 2024 19:46:15 -0600 Subject: [PATCH 1/5] wip --- llama_index/core/base_retriever.py | 27 +++++++++++++++++++++++++-- llama_index/indices/base.py | 7 ++++++- llama_index/schema.py | 27 ++++++++++++++++++++++++++- llama_index/vector_stores/qdrant.py | 4 ++++ 4 files changed, 61 insertions(+), 4 deletions(-) diff --git a/llama_index/core/base_retriever.py b/llama_index/core/base_retriever.py index f2900c6429f82..ea3baa01d7f8d 100644 --- a/llama_index/core/base_retriever.py +++ b/llama_index/core/base_retriever.py @@ -1,4 +1,5 @@ """Base retriever.""" + from abc import abstractmethod from typing import Any, Dict, List, Optional @@ -72,7 +73,29 @@ def _retrieve_from_object( f"Retrieving from object {obj.__class__.__name__} with query {query_bundle.query_str}\n", color="llama_pink", ) - if isinstance(obj, NodeWithScore): + + if isinstance(obj, str): + return [ + NodeWithScore( + node=TextNode(text=obj), + score=score, + ) + ] + elif isinstance(obj, dict): + from llama_index.storage.docstore.utils import json_to_doc + + # check if its a node, else assume string + try: + node = json_to_doc(obj) + return [NodeWithScore(node=node, score=score)] + except Exception: + return [ + NodeWithScore( + node=TextNode(text=str(obj)), + score=score, + ) + ] + elif isinstance(obj, NodeWithScore): return [obj] elif isinstance(obj, BaseNode): return [NodeWithScore(node=obj, score=score)] @@ -144,7 +167,7 @@ def _handle_recursive_retrieval( node = n.node score = n.score or 1.0 if isinstance(node, IndexNode): - obj = self.object_map.get(node.index_id, None) + obj = node.obj or self.object_map.get(node.index_id, None) if obj is not None: if self._verbose: print_text( diff --git a/llama_index/indices/base.py b/llama_index/indices/base.py index 8cfd2e9f65668..058339599e90b 100644 --- a/llama_index/indices/base.py +++ b/llama_index/indices/base.py @@ -1,4 +1,5 @@ """Base index classes.""" + import logging from abc import ABC, abstractmethod from typing import Any, Dict, Generic, List, Optional, Sequence, Type, TypeVar, cast @@ -66,7 +67,11 @@ def __init__( self._graph_store = self._storage_context.graph_store objects = objects or [] - self._object_map = {obj.index_id: obj.obj for obj in objects} + self._object_map = {} + for obj in objects: + self._object_map[obj.index_id] = obj.obj + obj.obj = None # clear the object avoid serialization issues + with self._service_context.callback_manager.as_trace("index_construction"): if index_struct is None: nodes = nodes or [] diff --git a/llama_index/schema.py b/llama_index/schema.py index e3382bae136f7..e85861bf5344f 100644 --- a/llama_index/schema.py +++ b/llama_index/schema.py @@ -1,4 +1,5 @@ """Base schema for data structures.""" + import json import textwrap import uuid @@ -501,7 +502,31 @@ class IndexNode(TextNode): """ index_id: str - obj: Any = Field(exclude=True) + obj: Any = None + + def dict(self, **kwargs: Any) -> Dict[str, Any]: + from llama_index.storage.docstore.utils import doc_to_json + + data = super().dict(**kwargs) + + is_obj_serializable = False + try: + if self.obj is None: + data["obj"] = None + elif isinstance(self.obj, BaseNode): + data["obj"] = doc_to_json(self.obj) + elif isinstance(self.obj, BaseModel): + data["obj"] = self.obj.dict() + else: + data["obj"] = json.dumps(self.obj) + is_obj_serializable = True + except Exception: + pass + + if not is_obj_serializable: + raise ValueError("IndexNode obj is not serializable: " + str(self.obj)) + + return data @classmethod def from_text_node( diff --git a/llama_index/vector_stores/qdrant.py b/llama_index/vector_stores/qdrant.py index 16dc728f120be..c5b807eac0fc4 100644 --- a/llama_index/vector_stores/qdrant.py +++ b/llama_index/vector_stores/qdrant.py @@ -4,6 +4,7 @@ An index that is built on top of an existing Qdrant collection. """ + import logging from typing import Any, List, Optional, Tuple, cast @@ -716,6 +717,9 @@ def parse_to_query_result(self, response: List[Any]) -> VectorStoreQueryResult: similarities = [] ids = [] + import pdb + + pdb.set_trace() for point in response: payload = cast(Payload, point.payload) try: From 8c3e77374d89e56af630605898082b09e099a6dc Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Sun, 11 Feb 2024 22:21:37 -0600 Subject: [PATCH 2/5] address comments --- llama_index/core/base_retriever.py | 23 +---------------------- llama_index/schema.py | 27 ++++++++++++++++++++++----- llama_index/vector_stores/utils.py | 6 +++--- 3 files changed, 26 insertions(+), 30 deletions(-) diff --git a/llama_index/core/base_retriever.py b/llama_index/core/base_retriever.py index ea3baa01d7f8d..051b613aee95f 100644 --- a/llama_index/core/base_retriever.py +++ b/llama_index/core/base_retriever.py @@ -74,28 +74,7 @@ def _retrieve_from_object( color="llama_pink", ) - if isinstance(obj, str): - return [ - NodeWithScore( - node=TextNode(text=obj), - score=score, - ) - ] - elif isinstance(obj, dict): - from llama_index.storage.docstore.utils import json_to_doc - - # check if its a node, else assume string - try: - node = json_to_doc(obj) - return [NodeWithScore(node=node, score=score)] - except Exception: - return [ - NodeWithScore( - node=TextNode(text=str(obj)), - score=score, - ) - ] - elif isinstance(obj, NodeWithScore): + if isinstance(obj, NodeWithScore): return [obj] elif isinstance(obj, BaseNode): return [NodeWithScore(node=obj, score=score)] diff --git a/llama_index/schema.py b/llama_index/schema.py index e85861bf5344f..7b687d0b1b576 100644 --- a/llama_index/schema.py +++ b/llama_index/schema.py @@ -509,7 +509,6 @@ def dict(self, **kwargs: Any) -> Dict[str, Any]: data = super().dict(**kwargs) - is_obj_serializable = False try: if self.obj is None: data["obj"] = None @@ -519,11 +518,7 @@ def dict(self, **kwargs: Any) -> Dict[str, Any]: data["obj"] = self.obj.dict() else: data["obj"] = json.dumps(self.obj) - is_obj_serializable = True except Exception: - pass - - if not is_obj_serializable: raise ValueError("IndexNode obj is not serializable: " + str(self.obj)) return data @@ -541,6 +536,28 @@ def from_text_node( index_id=index_id, ) + # TODO: return type here not supported by current mypy version + @classmethod + def from_dict(cls, data: Dict[str, Any], **kwargs: Any) -> Self: # type: ignore + output = super().from_dict(data, **kwargs) + + obj = data.get("obj", None) + parsed_obj = None + if isinstance(obj, str): + parsed_obj = (TextNode(text=obj),) + elif isinstance(obj, dict): + from llama_index.storage.docstore.utils import json_to_doc + + # check if its a node, else assume string + try: + parsed_obj = json_to_doc(obj) + except Exception: + parsed_obj = TextNode(text=str(obj)) + + output.obj = parsed_obj + + return output + @classmethod def get_type(cls) -> str: return ObjectType.INDEX diff --git a/llama_index/vector_stores/utils.py b/llama_index/vector_stores/utils.py index d3aa069fda646..919c2f44d71a3 100644 --- a/llama_index/vector_stores/utils.py +++ b/llama_index/vector_stores/utils.py @@ -71,11 +71,11 @@ def metadata_dict_to_node(metadata: dict, text: Optional[str] = None) -> BaseNod node: BaseNode if node_type == IndexNode.class_name(): - node = IndexNode.parse_raw(node_json) + node = IndexNode.from_json(node_json) elif node_type == ImageNode.class_name(): - node = ImageNode.parse_raw(node_json) + node = ImageNode.from_json(node_json) else: - node = TextNode.parse_raw(node_json) + node = TextNode.from_json(node_json) if text is not None: node.set_content(text) From 7e327e6410016ac925f1aaa25b07cc18207e70b1 Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Thu, 15 Feb 2024 23:02:09 -0600 Subject: [PATCH 3/5] remove pdb --- llama-index-legacy/llama_index/legacy/vector_stores/qdrant.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/llama-index-legacy/llama_index/legacy/vector_stores/qdrant.py b/llama-index-legacy/llama_index/legacy/vector_stores/qdrant.py index 4e1b37de648fe..9007e7b4c4e2b 100644 --- a/llama-index-legacy/llama_index/legacy/vector_stores/qdrant.py +++ b/llama-index-legacy/llama_index/legacy/vector_stores/qdrant.py @@ -717,9 +717,6 @@ def parse_to_query_result(self, response: List[Any]) -> VectorStoreQueryResult: similarities = [] ids = [] - import pdb - - pdb.set_trace() for point in response: payload = cast(Payload, point.payload) try: From aeb8e1e1801ddaedeb4ddd800841596b45023642 Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Thu, 15 Feb 2024 23:03:38 -0600 Subject: [PATCH 4/5] fix small errors --- llama-index-core/llama_index/core/schema.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llama-index-core/llama_index/core/schema.py b/llama-index-core/llama_index/core/schema.py index 13bb8482f2ec8..4ecda5b9475b9 100644 --- a/llama-index-core/llama_index/core/schema.py +++ b/llama-index-core/llama_index/core/schema.py @@ -503,7 +503,7 @@ class IndexNode(TextNode): obj: Any = None def dict(self, **kwargs: Any) -> Dict[str, Any]: - from llama_index.storage.docstore.utils import doc_to_json + from llama_index.core.storage.docstore.utils import doc_to_json data = super().dict(**kwargs) @@ -542,11 +542,11 @@ def from_dict(cls, data: Dict[str, Any], **kwargs: Any) -> Self: # type: ignore obj = data.get("obj", None) parsed_obj = None if isinstance(obj, str): - parsed_obj = (TextNode(text=obj),) + parsed_obj = TextNode(text=obj) elif isinstance(obj, dict): - from llama_index.storage.docstore.utils import json_to_doc + from llama_index.core.storage.docstore.utils import json_to_doc - # check if its a node, else assume string + # check if its a node, else assume stringable try: parsed_obj = json_to_doc(obj) except Exception: From affd5b235a1b995ae51e9223c43f9ca938f4d6f1 Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Fri, 16 Feb 2024 22:28:59 -0600 Subject: [PATCH 5/5] fix litellm tests --- llama-index-integrations/llms/llama-index-llms-litellm/BUILD | 4 ++++ .../llms/llama-index-llms-litellm/pyproject.toml | 2 +- .../llms/llama-index-llms-litellm/tests/BUILD | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/llama-index-integrations/llms/llama-index-llms-litellm/BUILD b/llama-index-integrations/llms/llama-index-llms-litellm/BUILD index 0896ca890d8bf..a8f4940ed6efe 100644 --- a/llama-index-integrations/llms/llama-index-llms-litellm/BUILD +++ b/llama-index-integrations/llms/llama-index-llms-litellm/BUILD @@ -1,3 +1,7 @@ poetry_requirements( name="poetry", ) + +python_sources( + interpreter_constraints=["==3.9.*", "==3.10.*"], +) diff --git a/llama-index-integrations/llms/llama-index-llms-litellm/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-litellm/pyproject.toml index 8165327c55ffb..fec49c9e66f13 100644 --- a/llama-index-integrations/llms/llama-index-llms-litellm/pyproject.toml +++ b/llama-index-integrations/llms/llama-index-llms-litellm/pyproject.toml @@ -27,7 +27,7 @@ readme = "README.md" version = "0.1.1" [tool.poetry.dependencies] -python = ">=3.8.1,<3.12" +python = ">=3.9,<3.12" llama-index-core = "^0.10.1" litellm = "^1.18.13" diff --git a/llama-index-integrations/llms/llama-index-llms-litellm/tests/BUILD b/llama-index-integrations/llms/llama-index-llms-litellm/tests/BUILD index dabf212d7e716..5cd7615688ba0 100644 --- a/llama-index-integrations/llms/llama-index-llms-litellm/tests/BUILD +++ b/llama-index-integrations/llms/llama-index-llms-litellm/tests/BUILD @@ -1 +1,3 @@ -python_tests() +python_tests( + interpreter_constraints=["==3.9.*", "==3.10.*"], +)