From 2decf02c12efbc92cfa960e34c852c09908c7e07 Mon Sep 17 00:00:00 2001 From: slp <296940174@qq.com> Date: Mon, 18 Nov 2024 09:46:03 +0800 Subject: [PATCH 1/3] fix(milvus): Fix milvus store search with topk always set to 4 (#2131) --- dbgpt/storage/vector_store/milvus_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbgpt/storage/vector_store/milvus_store.py b/dbgpt/storage/vector_store/milvus_store.py index 6b2d89f50..3763f14f8 100644 --- a/dbgpt/storage/vector_store/milvus_store.py +++ b/dbgpt/storage/vector_store/milvus_store.py @@ -458,7 +458,7 @@ def similar_search_with_scores( # convert to milvus expr filter. milvus_filter_expr = self.convert_metadata_filters(filters) if filters else None _, docs_and_scores = self._search( - query=text, topk=topk, expr=milvus_filter_expr + query=text, k=topk, expr=milvus_filter_expr ) if any(score < 0.0 or score > 1.0 for _, score, id in docs_and_scores): logger.warning( From b392d51adfbfe84364b83c83b463b7c6fa4312bc Mon Sep 17 00:00:00 2001 From: Aries-ckt <916701291@qq.com> Date: Mon, 18 Nov 2024 09:46:18 +0800 Subject: [PATCH 2/3] fix:core agent dependent prompt service bug (#2129) --- dbgpt/agent/core/plan/awel/agent_operator.py | 2 +- dbgpt/agent/core/plan/awel/agent_operator_resource.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/dbgpt/agent/core/plan/awel/agent_operator.py b/dbgpt/agent/core/plan/awel/agent_operator.py index 650c92cf0..8a68d307a 100644 --- a/dbgpt/agent/core/plan/awel/agent_operator.py +++ b/dbgpt/agent/core/plan/awel/agent_operator.py @@ -22,7 +22,6 @@ # TODO: Don't dependent on MixinLLMOperator from dbgpt.model.operators.llm_operator import MixinLLMOperator -from dbgpt.serve.prompt.api.endpoints import get_service from dbgpt.util.i18n_utils import _ from .... import ActionOutput @@ -291,6 +290,7 @@ async def get_agent( prompt_template = None if self.awel_agent.agent_prompt: + from dbgpt.serve.prompt.api.endpoints import get_service prompt_service = get_service() prompt_template = prompt_service.get_template( self.awel_agent.agent_prompt.code diff --git a/dbgpt/agent/core/plan/awel/agent_operator_resource.py b/dbgpt/agent/core/plan/awel/agent_operator_resource.py index 226274f8c..bbdbbfa68 100644 --- a/dbgpt/agent/core/plan/awel/agent_operator_resource.py +++ b/dbgpt/agent/core/plan/awel/agent_operator_resource.py @@ -12,8 +12,6 @@ ResourceCategory, register_resource, ) -from dbgpt.serve.prompt.api.endpoints import get_service - from ....resource.base import AgentResource, ResourceType from ....resource.manage import get_resource_manager from ....util.llm.llm import LLMConfig, LLMStrategyType @@ -21,6 +19,7 @@ def _agent_resource_prompt_values() -> List[OptionValue]: + from dbgpt.serve.prompt.api.endpoints import get_service prompt_service = get_service() prompts = prompt_service.get_target_prompt() return [ From 780ce803e325b87f4ddfbe5824451e379aeee56c Mon Sep 17 00:00:00 2001 From: Raphael Date: Sun, 17 Nov 2024 17:46:33 -0800 Subject: [PATCH 3/3] Fix Critical Directory Traversal Vulnerability (#2098) --- dbgpt/app/knowledge/api.py | 113 +++++++++++++++++++++---------------- 1 file changed, 65 insertions(+), 48 deletions(-) diff --git a/dbgpt/app/knowledge/api.py b/dbgpt/app/knowledge/api.py index cb5c8b370..9e00db3ec 100644 --- a/dbgpt/app/knowledge/api.py +++ b/dbgpt/app/knowledge/api.py @@ -3,8 +3,9 @@ import shutil import tempfile from typing import List +from pathlib import Path -from fastapi import APIRouter, Depends, File, Form, UploadFile +from fastapi import APIRouter, Depends, File, Form, UploadFile, HTTPException from dbgpt._private.config import Config from dbgpt.app.knowledge.request.request import ( @@ -332,54 +333,70 @@ def document_delete(space_name: str, query_request: DocumentQueryRequest): @router.post("/knowledge/{space_name}/document/upload") async def document_upload( - space_name: str, - doc_name: str = Form(...), - doc_type: str = Form(...), - doc_file: UploadFile = File(...), + space_name: str, + doc_name: str = Form(...), + doc_type: str = Form(...), + doc_file: UploadFile = File(...), ): - print(f"/document/upload params: {space_name}") - try: - if doc_file: - if not os.path.exists(os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, space_name)): - os.makedirs(os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, space_name)) - # We can not move temp file in windows system when we open file in context of `with` - tmp_fd, tmp_path = tempfile.mkstemp( - dir=os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, space_name) - ) - with os.fdopen(tmp_fd, "wb") as tmp: - tmp.write(await doc_file.read()) - shutil.move( - tmp_path, - os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, space_name, doc_file.filename), - ) - request = KnowledgeDocumentRequest() - request.doc_name = doc_name - request.doc_type = doc_type - request.content = os.path.join( - KNOWLEDGE_UPLOAD_ROOT_PATH, space_name, doc_file.filename - ) - space_res = knowledge_space_service.get_knowledge_space( - KnowledgeSpaceRequest(name=space_name) - ) - if len(space_res) == 0: - # create default space - if "default" != space_name: - raise Exception(f"you have not create your knowledge space.") - knowledge_space_service.create_knowledge_space( - KnowledgeSpaceRequest( - name=space_name, - desc="first db-gpt rag application", - owner="dbgpt", - ) - ) - return Result.succ( - knowledge_space_service.create_knowledge_document( - space=space_name, request=request - ) - ) - return Result.failed(code="E000X", msg=f"doc_file is None") - except Exception as e: - return Result.failed(code="E000X", msg=f"document add error {e}") + print(f"/document/upload params: {space_name}") + try: + if doc_file: + # Sanitize inputs to prevent path traversal + safe_space_name = os.path.basename(space_name) + safe_filename = os.path.basename(doc_file.filename) + + # Create absolute paths and verify they are within allowed directory + upload_dir = os.path.abspath(os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, safe_space_name)) + target_path = os.path.abspath(os.path.join(upload_dir, safe_filename)) + + if not os.path.abspath(KNOWLEDGE_UPLOAD_ROOT_PATH) in target_path: + raise HTTPException(status_code=400, detail="Invalid path detected") + + if not os.path.exists(upload_dir): + os.makedirs(upload_dir) + + # Create temp file + tmp_fd, tmp_path = tempfile.mkstemp(dir=upload_dir) + + try: + with os.fdopen(tmp_fd, "wb") as tmp: + tmp.write(await doc_file.read()) + + shutil.move(tmp_path, target_path) + + request = KnowledgeDocumentRequest() + request.doc_name = doc_name + request.doc_type = doc_type + request.content = target_path + + space_res = knowledge_space_service.get_knowledge_space( + KnowledgeSpaceRequest(name=safe_space_name) + ) + if len(space_res) == 0: + # create default space + if "default" != safe_space_name: + raise Exception(f"you have not create your knowledge space.") + knowledge_space_service.create_knowledge_space( + KnowledgeSpaceRequest( + name=safe_space_name, + desc="first db-gpt rag application", + owner="dbgpt", + ) + ) + return Result.succ( + knowledge_space_service.create_knowledge_document( + space=safe_space_name, request=request + ) + ) + except Exception as e: + # Clean up temp file if anything goes wrong + if os.path.exists(tmp_path): + os.unlink(tmp_path) + raise e + + return Result.failed(code="E000X", msg=f"doc_file is None") + except Exception as e: + return Result.failed(code="E000X", msg=f"document add error {e}") @router.post("/knowledge/{space_name}/document/sync")