Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ChatKnowledge):Add knowledge Reference && Deploy Document Refactor #773

Merged
merged 38 commits into from
Nov 4, 2023
Merged
Changes from 1 commit
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
fc656e1
feat:knowledge rag graph
Aries-ckt Oct 10, 2023
e2a1990
Merge remote-tracking branch 'origin/main' into feat_rag_graph
Aries-ckt Oct 10, 2023
eb2c220
feat:rag graph conponent
Aries-ckt Oct 12, 2023
8a5e35c
feat:knowledge rag graph
Aries-ckt Oct 12, 2023
fa6a904
feat:knowledge rag graph
Aries-ckt Oct 13, 2023
2f82f98
feat:knowledge rag graph
Aries-ckt Oct 13, 2023
37bcbff
Merge remote-tracking branch 'origin/main' into feat_rag_graph
Aries-ckt Oct 15, 2023
71c31c3
Merge remote-tracking branch 'origin/main' into feat_rag_graph
Aries-ckt Oct 16, 2023
b63fa2d
feat:rag graph
Aries-ckt Oct 16, 2023
68c9010
feat:rag graph
Aries-ckt Oct 16, 2023
f93af98
feat:rag graph
Aries-ckt Oct 17, 2023
f6694d9
Merge remote-tracking branch 'origin/main' into feat_rag_graph
Aries-ckt Oct 18, 2023
aff0553
style:fmt
Aries-ckt Oct 19, 2023
39219a4
feat:rag_graph
Aries-ckt Oct 19, 2023
93de9c7
Merge remote-tracking branch 'origin/main' into feat_rag_graph
Aries-ckt Oct 19, 2023
318979a
Merge remote-tracking branch 'origin/main' into feat_rag_graph
Aries-ckt Oct 22, 2023
724456d
feat:extract summary
Aries-ckt Oct 25, 2023
e4c96e3
Merge remote-tracking branch 'origin/main' into feat_rag_graph
Aries-ckt Oct 28, 2023
53b1fc4
feat:document summary
Aries-ckt Oct 30, 2023
07ad8fa
Merge remote-tracking branch 'origin/main' into feat_rag_graph
Aries-ckt Oct 31, 2023
dca3ddb
feat:add summary
Aries-ckt Oct 31, 2023
16dd8e3
feat:document summary
Aries-ckt Oct 31, 2023
523838f
feat:document summary
Aries-ckt Oct 31, 2023
7dcfa19
feat:document summary
Aries-ckt Oct 31, 2023
b3dbf31
feat:document summary
Aries-ckt Oct 31, 2023
40eed54
chore:discord expire
Aries-ckt Oct 31, 2023
04dcd90
feat:document summary
Aries-ckt Oct 31, 2023
de90244
feat:document summary
Aries-ckt Oct 31, 2023
67f4155
feat:mapreduce summary
Aries-ckt Oct 31, 2023
be1e1cb
feat:document summary set max iteration
Aries-ckt Oct 31, 2023
606d384
feat:add knowledge reference
Aries-ckt Nov 1, 2023
9cc6386
doc:update deploy doc
Aries-ckt Nov 3, 2023
6fe7bfd
feat:merge main branch
Aries-ckt Nov 3, 2023
0874fa2
doc:update deploy doc
Aries-ckt Nov 3, 2023
3111760
doc:update document
Aries-ckt Nov 3, 2023
3ea85a6
feat:web add reference
Aries-ckt Nov 3, 2023
463b97f
doc:css
Aries-ckt Nov 3, 2023
76975d9
feat:knowledge refernce
Aries-ckt Nov 3, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
style:fmt
Aries-ckt committed Oct 19, 2023
commit aff0553b7ecb5d2173b45995fdf08ca1409ca75f
21 changes: 17 additions & 4 deletions pilot/graph_engine/graph_engine.py
Original file line number Diff line number Diff line change
@@ -107,15 +107,26 @@ def _build_index_from_docs(self, documents: List[Document]) -> KG:
"""Build the index from nodes."""
index_struct = self.index_struct_cls()
num_threads = 5
chunk_size = len(documents) if (len(documents) < num_threads) else len(documents) // num_threads
chunk_size = (
len(documents)
if (len(documents) < num_threads)
else len(documents) // num_threads
)

import concurrent

future_tasks = []
with concurrent.futures.ThreadPoolExecutor() as executor:
for i in range(num_threads):
start = i * chunk_size
end = start + chunk_size if i < num_threads - 1 else None
future_tasks.append(executor.submit(self._extract_triplets_task, documents[start:end][0], index_struct))
future_tasks.append(
executor.submit(
self._extract_triplets_task,
documents[start:end][0],
index_struct,
)
)

result = [future.result() for future in future_tasks]
return index_struct
@@ -132,7 +143,6 @@ def _build_index_from_docs(self, documents: List[Document]) -> KG:
#
# return index_struct


def search(self, query):
from pilot.graph_engine.graph_search import RAGGraphSearch

@@ -141,14 +151,17 @@ def search(self, query):

def _extract_triplets_task(self, doc, index_struct):
import threading

thread_id = threading.get_ident()
print(f"current thread-{thread_id} begin extract triplets task")
triplets = self._extract_triplets(doc.page_content)
if len(triplets) == 0:
triplets = []
text_node = TextNode(text=doc.page_content, metadata=doc.metadata)
logger.info(f"extracted knowledge triplets: {triplets}")
print(f"current thread-{thread_id} end extract triplets tasks, triplets-{triplets}")
print(
f"current thread-{thread_id} end extract triplets tasks, triplets-{triplets}"
)
for triplet in triplets:
subj, _, obj = triplet
self.graph_store.upsert_triplet(*triplet)
1 change: 1 addition & 0 deletions pilot/scene/base_chat.py
Original file line number Diff line number Diff line change
@@ -107,6 +107,7 @@ def get_llm_speak(self, prompt_define_response):

async def __call_base(self):
import inspect

input_values = (
await self.generate_input_values()
if inspect.isawaitable(self.generate_input_values())
3 changes: 0 additions & 3 deletions pilot/server/knowledge/service.py
Original file line number Diff line number Diff line change
@@ -258,9 +258,6 @@ def sync_knowledge_document(self, space_name, sync_request: DocumentSyncRequest)
ComponentType.RAG_GRAPH_DEFAULT.value, RAGGraphFactory
).create()
rag_engine.knowledge_graph(docs=chunk_docs)
# docs = engine.search(
# "Comparing Curry and James in terms of their positions, playing styles, and achievements in the NBA"
# )
# update document status
doc.status = SyncStatus.RUNNING.name
doc.chunk_size = len(chunk_docs)
3 changes: 2 additions & 1 deletion pilot/vector_store/weaviate_store.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import logging
#import weaviate

# import weaviate
from langchain.schema import Document
from pilot.configs.config import Config
from pilot.configs.model_config import KNOWLEDGE_UPLOAD_ROOT_PATH