Skip to content

Commit

Permalink
improve multi doc retrieval (run-llama#11346)
Browse files Browse the repository at this point in the history
* improve multi doc retrieval

* cr

* cr

* cr

* cr
  • Loading branch information
hatianzhang authored and Dominastorm committed Feb 28, 2024
1 parent dfaee2b commit b7aec36
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 4 deletions.
1 change: 1 addition & 0 deletions llama-index-core/llama_index/core/node_parser/interface.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Node parser interface."""

from abc import ABC, abstractmethod
from typing import Any, Callable, List, Sequence

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,11 @@ def _get_nodes_from_buffer(
doc = Document(text="\n\n".join(list(buffer)))
return node_parser.get_nodes_from_documents([doc])

def get_nodes_from_elements(self, elements: List[Element]) -> List[BaseNode]:
def get_nodes_from_elements(
self,
elements: List[Element],
metadata_inherited: Optional[Dict[str, Any]] = None,
) -> List[BaseNode]:
"""Get nodes and mappings."""
from llama_index.core.node_parser import SentenceSplitter

Expand Down Expand Up @@ -342,5 +346,8 @@ def get_nodes_from_elements(self, elements: List[Element]) -> List[BaseNode]:
nodes.extend(cur_text_nodes)
cur_text_el_buffer = []

# remove empty nodes
# remove empty nodes and keep node original metadata inherited from parent nodes
for node in nodes:
if metadata_inherited:
node.metadata.update(metadata_inherited)
return [node for node in nodes if len(node.text) > 0]
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def get_nodes_from_node(self, node: TextNode) -> List[BaseNode]:
self.extract_table_summaries(table_elements)
# convert into nodes
# will return a list of Nodes and Index Nodes
return self.get_nodes_from_elements(elements)
return self.get_nodes_from_elements(elements, node.metadata)

def extract_elements(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def get_nodes_from_node(self, node: TextNode) -> List[BaseNode]:
self.extract_table_summaries(table_elements)
# convert into nodes
# will return a list of Nodes and Index Nodes
return self.get_nodes_from_elements(elements)
return self.get_nodes_from_elements(elements, node.metadata)

def extract_elements(
self, text: str, table_filters: Optional[List[Callable]] = None, **kwargs: Any
Expand Down

0 comments on commit b7aec36

Please sign in to comment.