From 416bd9ebc7564ea4b081906e0c3bcc37d965ef63 Mon Sep 17 00:00:00 2001 From: yuehuazhang Date: Tue, 7 Jan 2025 09:07:39 +0800 Subject: [PATCH] fix:SyntaxWarning: invalid escape sequence '\s' --- .../server/file_rag/text_splitter/ali_text_splitter.py | 4 ++-- .../text_splitter/chinese_recursive_text_splitter.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/chatchat-server/chatchat/server/file_rag/text_splitter/ali_text_splitter.py b/chatchat-server/chatchat/server/file_rag/text_splitter/ali_text_splitter.py index 9def31a..85d9526 100644 --- a/chatchat-server/chatchat/server/file_rag/text_splitter/ali_text_splitter.py +++ b/chatchat-server/chatchat/server/file_rag/text_splitter/ali_text_splitter.py @@ -15,8 +15,8 @@ def split_text(self, text: str) -> List[str]: # 考虑到使用了三个模型,可能对于低配置gpu不太友好,因此这里将模型load进cpu计算,有需要的话可以替换device为自己的显卡id if self.pdf: text = re.sub(r"\n{3,}", r"\n", text) - text = re.sub("\s", " ", text) - text = re.sub("\n\n", "", text) + text = re.sub(r"\s", " ", text) + text = re.sub(r"\n\n", "", text) try: from modelscope.pipelines import pipeline except ImportError: diff --git a/chatchat-server/chatchat/server/file_rag/text_splitter/chinese_recursive_text_splitter.py b/chatchat-server/chatchat/server/file_rag/text_splitter/chinese_recursive_text_splitter.py index 5445dd7..aabe19b 100644 --- a/chatchat-server/chatchat/server/file_rag/text_splitter/chinese_recursive_text_splitter.py +++ b/chatchat-server/chatchat/server/file_rag/text_splitter/chinese_recursive_text_splitter.py @@ -41,10 +41,10 @@ def __init__( self._separators = separators or [ "\n\n", "\n", - "。|!|?", - "\.\s|\!\s|\?\s", - ";|;\s", - ",|,\s", + r"。|!|?", + r"\.\s|\!\s|\?\s", + r";|;\s", + r",|,\s", ] self._is_separator_regex = is_separator_regex