Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ChatKnowledge): Support Financial Report Analysis #1702

Merged
merged 31 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
0c393a5
feat:financial report chat
Aries-ckt Jun 28, 2024
e918ae3
feat: differentiated response type
2976151305 Jun 28, 2024
f143751
Merge remote-tracking branch 'main-com/feat/fetch-event' into feat_fi…
Aries-ckt Jun 29, 2024
934300b
feat: add financial chat
Aries-ckt Jul 4, 2024
0fb1ce3
feat: add financial report chat
Aries-ckt Jul 5, 2024
7e989be
feat: add web statics
Aries-ckt Jul 5, 2024
dc21e0e
doc:add chat financial report document.
Aries-ckt Jul 7, 2024
6ee2ba1
docs:add examples cases.
Aries-ckt Jul 7, 2024
0c6344d
fix:update indicator formula
Aries-ckt Jul 9, 2024
7fa29fa
style:fmt
Aries-ckt Jul 9, 2024
9ce4d5c
feat:financial report -> dbgpts awel flow
Aries-ckt Jul 18, 2024
003424a
fix: solve conflict
Aries-ckt Jul 19, 2024
5693b47
fix: chat_dag_stream error.
Aries-ckt Jul 19, 2024
8970e93
feat(core): Load dag by domain type
fangyinc Jul 20, 2024
7d8f1f5
feat(rag): New knowledge config api
fangyinc Jul 20, 2024
bd0090e
feat: add space config api
lcxadml Jul 22, 2024
9bcb4da
feat:add domain type
Aries-ckt Jul 23, 2024
04e01bc
feat:change prompt
Aries-ckt Jul 23, 2024
2a4f072
feat:update classifier model
Aries-ckt Jul 23, 2024
85714d2
feat: Install and uninstall multiple dbgpts at one time
fangyinc Jul 24, 2024
17bcd72
fix: async persist problem.
Aries-ckt Jul 24, 2024
a86f6dc
Merge branch 'feat_fin_report' of https://github.com/Aries-ckt/DB-GPT…
Aries-ckt Jul 24, 2024
1d678f5
doc:add how to use financial chat
Aries-ckt Jul 25, 2024
3114f94
chore:delete unusual files
Aries-ckt Jul 25, 2024
8ed791c
chore: Remove unused code
fangyinc Jul 25, 2024
befc740
chore: Remove unused model
fangyinc Jul 25, 2024
21460e4
Merge remote-tracking branch 'origin/main' into feat_fin_report
Aries-ckt Jul 25, 2024
0a1c5a7
chore:add v0.5.10 tag
Aries-ckt Jul 25, 2024
db9b37a
chore:wechat update
Aries-ckt Jul 25, 2024
325797c
doc:upgrade sql script
Aries-ckt Jul 26, 2024
957672c
feat:web domain i18n
Aries-ckt Jul 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -292,4 +292,9 @@ DBGPT_LOG_LEVEL=INFO
# OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE=
# OTEL_EXPORTER_OTLP_TRACES_HEADERS=
# OTEL_EXPORTER_OTLP_TRACES_TIMEOUT=
# OTEL_EXPORTER_OTLP_TRACES_COMPRESSION=
# OTEL_EXPORTER_OTLP_TRACES_COMPRESSION=

#*******************************************************************#
#** FINANCIAL CHAT Config **#
#*******************************************************************#
# FIN_REPORT_MODEL=/app/models/bge-large-zh
1 change: 1 addition & 0 deletions assets/schema/dbgpt.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ CREATE TABLE IF NOT EXISTS `knowledge_space`
`id` int NOT NULL AUTO_INCREMENT COMMENT 'auto increment id',
`name` varchar(100) NOT NULL COMMENT 'knowledge space name',
`vector_type` varchar(50) NOT NULL COMMENT 'vector type',
`domain_type` varchar(50) NOT NULL COMMENT 'domain type',
`desc` varchar(500) NOT NULL COMMENT 'description',
`owner` varchar(100) DEFAULT NULL COMMENT 'owner',
`context` TEXT DEFAULT NULL COMMENT 'context argument',
Expand Down
3 changes: 3 additions & 0 deletions assets/schema/upgrade/v0_5_10/upgrade_to_v0.5.10.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
USE dbgpt;
ALTER TABLE knowledge_space
ADD COLUMN `domain_type` varchar(50) null comment 'space domain type' after `vector_type`;
396 changes: 396 additions & 0 deletions assets/schema/upgrade/v0_5_10/v0.5.9.sql

Large diffs are not rendered by default.

Binary file modified assets/wechat.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
28 changes: 15 additions & 13 deletions dbgpt/_private/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,18 +166,18 @@ def __init__(self) -> None:
self.execute_local_commands = (
os.getenv("EXECUTE_LOCAL_COMMANDS", "False").lower() == "true"
)
### message stor file
# message stor file
self.message_dir = os.getenv("MESSAGE_HISTORY_DIR", "../../message")

### Native SQL Execution Capability Control Configuration
# Native SQL Execution Capability Control Configuration
self.NATIVE_SQL_CAN_RUN_DDL = (
os.getenv("NATIVE_SQL_CAN_RUN_DDL", "True").lower() == "true"
)
self.NATIVE_SQL_CAN_RUN_WRITE = (
os.getenv("NATIVE_SQL_CAN_RUN_WRITE", "True").lower() == "true"
)

### dbgpt meta info database connection configuration
# dbgpt meta info database connection configuration
self.LOCAL_DB_HOST = os.getenv("LOCAL_DB_HOST")
self.LOCAL_DB_PATH = os.getenv("LOCAL_DB_PATH", "data/default_sqlite.db")
self.LOCAL_DB_TYPE = os.getenv("LOCAL_DB_TYPE", "sqlite")
Expand All @@ -193,13 +193,13 @@ def __init__(self) -> None:

self.CHAT_HISTORY_STORE_TYPE = os.getenv("CHAT_HISTORY_STORE_TYPE", "db")

### LLM Model Service Configuration
# LLM Model Service Configuration
self.LLM_MODEL = os.getenv("LLM_MODEL", "glm-4-9b-chat")
self.LLM_MODEL_PATH = os.getenv("LLM_MODEL_PATH")

### Proxy llm backend, this configuration is only valid when "LLM_MODEL=proxyllm"
### When we use the rest API provided by deployment frameworks like fastchat as a proxyllm, "PROXYLLM_BACKEND" is the model they actually deploy.
### We need to use "PROXYLLM_BACKEND" to load the prompt of the corresponding scene.
# Proxy llm backend, this configuration is only valid when "LLM_MODEL=proxyllm"
# When we use the rest API provided by deployment frameworks like fastchat as a proxyllm, "PROXYLLM_BACKEND" is the model they actually deploy.
# We need to use "PROXYLLM_BACKEND" to load the prompt of the corresponding scene.
self.PROXYLLM_BACKEND = None
if self.LLM_MODEL == "proxyllm":
self.PROXYLLM_BACKEND = os.getenv("PROXYLLM_BACKEND")
Expand All @@ -211,7 +211,7 @@ def __init__(self) -> None:
"MODEL_SERVER", "http://127.0.0.1" + ":" + str(self.MODEL_PORT)
)

### Vector Store Configuration
# Vector Store Configuration
self.VECTOR_STORE_TYPE = os.getenv("VECTOR_STORE_TYPE", "Chroma")
self.MILVUS_URL = os.getenv("MILVUS_URL", "127.0.0.1")
self.MILVUS_PORT = os.getenv("MILVUS_PORT", "19530")
Expand All @@ -223,7 +223,7 @@ def __init__(self) -> None:
self.ELASTICSEARCH_USERNAME = os.getenv("ELASTICSEARCH_USERNAME", None)
self.ELASTICSEARCH_PASSWORD = os.getenv("ELASTICSEARCH_PASSWORD", None)

## OceanBase Configuration
# OceanBase Configuration
self.OB_HOST = os.getenv("OB_HOST", "127.0.0.1")
self.OB_PORT = int(os.getenv("OB_PORT", "2881"))
self.OB_USER = os.getenv("OB_USER", "root")
Expand All @@ -245,7 +245,7 @@ def __init__(self) -> None:
os.environ["load_8bit"] = str(self.IS_LOAD_8BIT)
os.environ["load_4bit"] = str(self.IS_LOAD_4BIT)

### EMBEDDING Configuration
# EMBEDDING Configuration
self.EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text2vec")
# Rerank model configuration
self.RERANK_MODEL = os.getenv("RERANK_MODEL")
Expand Down Expand Up @@ -276,17 +276,17 @@ def __init__(self) -> None:
os.getenv("KNOWLEDGE_CHAT_SHOW_RELATIONS", "False").lower() == "true"
)

### SUMMARY_CONFIG Configuration
# SUMMARY_CONFIG Configuration
self.SUMMARY_CONFIG = os.getenv("SUMMARY_CONFIG", "FAST")

self.MAX_GPU_MEMORY = os.getenv("MAX_GPU_MEMORY", None)

### Log level
# Log level
self.DBGPT_LOG_LEVEL = os.getenv("DBGPT_LOG_LEVEL", "INFO")

self.SYSTEM_APP: Optional["SystemApp"] = None

### Temporary configuration
# Temporary configuration
self.USE_FASTCHAT: bool = os.getenv("USE_FASTCHAT", "True").lower() == "true"

self.MODEL_CACHE_ENABLE: bool = (
Expand All @@ -312,6 +312,8 @@ def __init__(self) -> None:
self.DBGPT_APP_SCENE_NON_STREAMING_PARALLELISM_BASE = int(
os.getenv("DBGPT_APP_SCENE_NON_STREAMING_PARALLELISM_BASE", 1)
)
# experimental financial report model configuration
self.FIN_REPORT_MODEL = os.getenv("FIN_REPORT_MODEL", None)

@property
def local_db_manager(self) -> "ConnectorManager":
Expand Down
2 changes: 1 addition & 1 deletion dbgpt/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = "0.5.9"
version = "0.5.10"
88 changes: 63 additions & 25 deletions dbgpt/app/knowledge/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,27 @@
from dbgpt.app.knowledge.service import KnowledgeService
from dbgpt.app.openapi.api_v1.api_v1 import no_stream_generator, stream_generator
from dbgpt.app.openapi.api_view_model import Result
from dbgpt.configs import TAG_KEY_KNOWLEDGE_FACTORY_DOMAIN_TYPE
from dbgpt.configs.model_config import (
EMBEDDING_MODEL_CONFIG,
KNOWLEDGE_UPLOAD_ROOT_PATH,
)
from dbgpt.core.awel.dag.dag_manager import DAGManager
from dbgpt.rag import ChunkParameters
from dbgpt.rag.embedding.embedding_factory import EmbeddingFactory
from dbgpt.rag.knowledge.base import ChunkStrategy
from dbgpt.rag.knowledge.factory import KnowledgeFactory
from dbgpt.rag.retriever.embedding import EmbeddingRetriever
from dbgpt.serve.rag.api.schemas import KnowledgeSyncRequest
from dbgpt.serve.rag.api.schemas import (
KnowledgeConfigResponse,
KnowledgeDomainType,
KnowledgeStorageType,
KnowledgeSyncRequest,
)
from dbgpt.serve.rag.connector import VectorStoreConnector
from dbgpt.serve.rag.service.service import Service
from dbgpt.storage.vector_store.base import VectorStoreConfig
from dbgpt.util.i18n_utils import _
from dbgpt.util.tracer import SpanType, root_tracer

logger = logging.getLogger(__name__)
Expand All @@ -52,6 +60,11 @@ def get_rag_service() -> Service:
return Service.get_instance(CFG.SYSTEM_APP)


def get_dag_manager() -> DAGManager:
"""Get DAG Manager."""
return DAGManager.get_instance(CFG.SYSTEM_APP)


@router.post("/knowledge/space/add")
def space_add(request: KnowledgeSpaceRequest):
print(f"/space/add params: {request}")
Expand Down Expand Up @@ -147,6 +160,55 @@ def chunk_strategies():
return Result.failed(code="E000X", msg=f"chunk strategies error {e}")


@router.get("/knowledge/space/config", response_model=Result[KnowledgeConfigResponse])
async def space_config() -> Result[KnowledgeConfigResponse]:
"""Get space config"""
try:
storage_list: List[KnowledgeStorageType] = []
dag_manager: DAGManager = get_dag_manager()
# Vector Storage
vs_domain_types = [KnowledgeDomainType(name="Normal", desc="Normal")]
dag_map = dag_manager.get_dags_by_tag_key(TAG_KEY_KNOWLEDGE_FACTORY_DOMAIN_TYPE)
for domain_type, dags in dag_map.items():
vs_domain_types.append(
KnowledgeDomainType(
name=domain_type, desc=dags[0].description or domain_type
)
)

storage_list.append(
KnowledgeStorageType(
name="VectorStore",
desc=_("Vector Store"),
domain_types=vs_domain_types,
)
)
# Graph Storage
storage_list.append(
KnowledgeStorageType(
name="KnowledgeGraph",
desc=_("Knowledge Graph"),
domain_types=[KnowledgeDomainType(name="Normal", desc="Normal")],
)
)
# Full Text
storage_list.append(
KnowledgeStorageType(
name="FullText",
desc=_("Full Text"),
domain_types=[KnowledgeDomainType(name="Normal", desc="Normal")],
)
)

return Result.succ(
KnowledgeConfigResponse(
storage=storage_list,
)
)
except Exception as e:
return Result.failed(code="E000X", msg=f"space config error {e}")


@router.post("/knowledge/{space_name}/document/list")
def document_list(space_name: str, query_request: DocumentQueryRequest):
print(f"/document/list params: {space_name}, {query_request}")
Expand Down Expand Up @@ -350,27 +412,3 @@ async def document_summary(request: DocumentSummaryRequest):
)
except Exception as e:
return Result.failed(code="E000X", msg=f"document summary error {e}")


@router.post("/knowledge/entity/extract")
async def entity_extract(request: EntityExtractRequest):
logger.info(f"Received params: {request}")
try:
import uuid

from dbgpt.app.scene import ChatScene
from dbgpt.util.chat_util import llm_chat_response_nostream

chat_param = {
"chat_session_id": uuid.uuid1(),
"current_user_input": request.text,
"select_param": "entity",
"model_name": request.model_name,
}

res = await llm_chat_response_nostream(
ChatScene.ExtractEntity.value(), **{"chat_param": chat_param}
)
return Result.succ(res)
except Exception as e:
return Result.failed(code="E000X", msg=f"entity extract error {e}")
9 changes: 9 additions & 0 deletions dbgpt/app/knowledge/request/request.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from enum import Enum
from typing import List, Optional

from dbgpt._private.pydantic import BaseModel, ConfigDict
Expand All @@ -19,12 +20,20 @@ class KnowledgeSpaceRequest(BaseModel):
name: str = None
"""vector_type: vector type"""
vector_type: str = None
"""vector_type: vector type"""
domain_type: str = "normal"
"""desc: description"""
desc: str = None
"""owner: owner"""
owner: str = None


class BusinessFieldType(Enum):
"""BusinessFieldType"""

NORMAL = "Normal"


class KnowledgeDocumentRequest(BaseModel):
"""doc_name: doc path"""

Expand Down
2 changes: 2 additions & 0 deletions dbgpt/app/knowledge/request/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class SpaceQueryResponse(BaseModel):
name: str = None
"""vector_type: vector type"""
vector_type: str = None
"""field_type: field type"""
domain_type: str = None
"""desc: description"""
desc: str = None
"""context: context"""
Expand Down
6 changes: 6 additions & 0 deletions dbgpt/app/knowledge/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
SpaceQueryResponse,
)
from dbgpt.component import ComponentType
from dbgpt.configs import DOMAIN_TYPE_FINANCIAL_REPORT
from dbgpt.configs.model_config import EMBEDDING_MODEL_CONFIG
from dbgpt.core import LLMClient
from dbgpt.model import DefaultLLMClient
Expand Down Expand Up @@ -133,6 +134,7 @@ def get_knowledge_space(self, request: KnowledgeSpaceRequest):
res.id = space.id
res.name = space.name
res.vector_type = space.vector_type
res.domain_type = space.domain_type
res.desc = space.desc
res.owner = space.owner
res.gmt_created = space.gmt_created
Expand Down Expand Up @@ -299,6 +301,10 @@ def delete_space(self, space_name: str):
llm_client=self.llm_client,
model_name=None,
)
if space.domain_type == DOMAIN_TYPE_FINANCIAL_REPORT:
conn_manager = CFG.local_db_manager
conn_manager.delete_db(f"{space.name}_fin_report")

vector_store_connector = VectorStoreConnector(
vector_store_type=space.vector_type, vector_store_config=config
)
Expand Down
Loading
Loading