Skip to content

Commit

Permalink
fix(tools): fix fix_encode error
Browse files Browse the repository at this point in the history
  • Loading branch information
ma-pony committed Nov 8, 2023
1 parent 97b1e62 commit c9cf8c5
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
8 changes: 6 additions & 2 deletions spider_brew_kit/tools/text_encode.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
from spider_brew_kit.contants import ENCODINGS


def fix_encode(garbled_text: str, decoding='utf-8') -> (str, str):
def fix_encode(garbled_text: str, decoding='utf-8', return_encoding=False):
"""
修复乱码
:param garbled_text: 乱码文本
:param decoding: 解码方式
:param return_encoding: 是否返回编码
:return: 正常文本, 编码
"""
for encoding in ENCODINGS:
try:
res = garbled_text.encode(encoding).decode(decoding)
return res, encoding
if return_encoding:
return res, encoding
else:
return res
except (UnicodeEncodeError, UnicodeDecodeError):
continue
4 changes: 4 additions & 0 deletions test/unit/tools/text_encode.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,9 @@ def test_fix_encode(self):
garbled_text = "亿元"
expected_text = "亿元"
expected_encoding = "cp1252"

result = fix_encode(garbled_text)
assert result == expected_text

result = fix_encode(garbled_text, return_encoding=True)
assert result == (expected_text, expected_encoding)

0 comments on commit c9cf8c5

Please sign in to comment.