Skip to content

Commit

Permalink
FIX:1598 Use PageTextSplitter for DatasourceKnowledge (#1599)
Browse files Browse the repository at this point in the history
Co-authored-by: shenk-b <[email protected]>
Co-authored-by: aries_ckt <[email protected]>
  • Loading branch information
3 people authored Jun 6, 2024
1 parent 59b7aa7 commit 20e7ccc
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
10 changes: 10 additions & 0 deletions dbgpt/rag/knowledge/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def support_chunk_strategy(cls) -> List[ChunkStrategy]:
return [
ChunkStrategy.CHUNK_BY_SIZE,
ChunkStrategy.CHUNK_BY_SEPARATOR,
ChunkStrategy.CHUNK_BY_PAGE,
]

@classmethod
Expand All @@ -63,3 +64,12 @@ def type(cls) -> KnowledgeType:
def document_type(cls) -> DocumentType:
"""Return document type."""
return DocumentType.DATASOURCE

@classmethod
def default_chunk_strategy(cls) -> ChunkStrategy:
"""Return default chunk strategy.
Returns:
ChunkStrategy: default chunk strategy
"""
return ChunkStrategy.CHUNK_BY_PAGE
4 changes: 3 additions & 1 deletion dbgpt/rag/summary/db_summary_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,10 @@ def init_db_profile(self, db_summary_client, dbname):
from dbgpt.rag.assembler.db_schema import DBSchemaAssembler

db_assembler = DBSchemaAssembler.load_from_connection(
connector=db_summary_client.db, vector_store_connector=vector_connector
connector=db_summary_client.db,
vector_store_connector=vector_connector,
)

if len(db_assembler.get_chunks()) > 0:
db_assembler.persist()
else:
Expand Down

0 comments on commit 20e7ccc

Please sign in to comment.