From 2c146aff74bac427f0a4921971d8472097410bdf Mon Sep 17 00:00:00 2001 From: liunux4odoo <41217877+liunux4odoo@users.noreply.github.com> Date: Tue, 14 May 2024 09:46:19 +0800 Subject: [PATCH 1/4] path traversal bug in api /knowledge_base/download_doc(#4008) (#4009) close #4008 --- server/knowledge_base/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server/knowledge_base/utils.py b/server/knowledge_base/utils.py index f2ddbfd012..b6772cd2e5 100644 --- a/server/knowledge_base/utils.py +++ b/server/knowledge_base/utils.py @@ -42,7 +42,10 @@ def get_vs_path(knowledge_base_name: str, vector_name: str): def get_file_path(knowledge_base_name: str, doc_name: str): - return os.path.join(get_doc_path(knowledge_base_name), doc_name) + doc_path = Path(get_doc_path(knowledge_base_name)) + file_path = doc_path / doc_name + if file_path.is_relative_to(doc_path): + return str(file_path) def list_kbs_from_folder(): From ede538bfe2db5cd6b480a1519c6881d78d3a9d8c Mon Sep 17 00:00:00 2001 From: liunux4odoo <41217877+liunux4odoo@users.noreply.github.com> Date: Tue, 14 May 2024 18:48:55 +0800 Subject: [PATCH 2/4] =?UTF-8?q?update=20"path=20traversal=20bug=20in=20api?= =?UTF-8?q?=20/knowledge=5Fbase/download=5Fdoc(#4008)=E2=80=A6=20(#4017)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Revert "path traversal bug in api /knowledge_base/download_doc(#4008) (#4009)" This reverts commit 2c146aff74bac427f0a4921971d8472097410bdf. * Update utils.py --- server/knowledge_base/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/knowledge_base/utils.py b/server/knowledge_base/utils.py index b6772cd2e5..cfd8d1d7fd 100644 --- a/server/knowledge_base/utils.py +++ b/server/knowledge_base/utils.py @@ -43,7 +43,7 @@ def get_vs_path(knowledge_base_name: str, vector_name: str): def get_file_path(knowledge_base_name: str, doc_name: str): doc_path = Path(get_doc_path(knowledge_base_name)) - file_path = doc_path / doc_name + file_path = (doc_path / doc_name).resolve() if file_path.is_relative_to(doc_path): return str(file_path) From 8ca9e8ff280e869b144b81e04af87d158002ad01 Mon Sep 17 00:00:00 2001 From: liunux4odoo <41217877+liunux4odoo@users.noreply.github.com> Date: Thu, 16 May 2024 11:06:22 +0800 Subject: [PATCH 3/4] replace Path.is_relative_to to make py38 compatible (#4029) --- server/knowledge_base/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/knowledge_base/utils.py b/server/knowledge_base/utils.py index cfd8d1d7fd..d83124ac06 100644 --- a/server/knowledge_base/utils.py +++ b/server/knowledge_base/utils.py @@ -42,9 +42,9 @@ def get_vs_path(knowledge_base_name: str, vector_name: str): def get_file_path(knowledge_base_name: str, doc_name: str): - doc_path = Path(get_doc_path(knowledge_base_name)) + doc_path = Path(get_doc_path(knowledge_base_name)).resolve() file_path = (doc_path / doc_name).resolve() - if file_path.is_relative_to(doc_path): + if str(file_path).startswith(str(doc_path)): return str(file_path) From b9827529aad9d565e2f2ef90e96632ed628a7d40 Mon Sep 17 00:00:00 2001 From: af su <53895794+vegetablest@users.noreply.github.com> Date: Mon, 20 May 2024 14:44:02 +0800 Subject: [PATCH 4/4] fix: incorrect variable usage in update_doc_by_ids method (#4048) Co-authored-by: saf --- server/knowledge_base/kb_service/base.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/server/knowledge_base/kb_service/base.py b/server/knowledge_base/kb_service/base.py index c83753d54c..5990903e65 100644 --- a/server/knowledge_base/kb_service/base.py +++ b/server/knowledge_base/kb_service/base.py @@ -194,14 +194,14 @@ def update_doc_by_ids(self, docs: Dict[str, Document]) -> bool: 如果对应 doc_id 的值为 None,或其 page_content 为空,则删除该文档 ''' self.del_doc_by_ids(list(docs.keys())) - docs = [] + pending_docs = [] ids = [] - for k, v in docs.items(): - if not v or not v.page_content.strip(): + for _id, doc in docs.items(): + if not doc or not doc.page_content.strip(): continue - ids.append(k) - docs.append(v) - self.do_add_doc(docs=docs, ids=ids) + ids.append(_id) + pending_docs.append(doc) + self.do_add_doc(docs=pending_docs, ids=ids) return True def list_docs(self, file_name: str = None, metadata: Dict = {}) -> List[DocumentWithVSId]: