From 683237c0f99ae96c78ee2f10b9d774e188885ab5 Mon Sep 17 00:00:00 2001 From: Ahmed TAHRI Date: Sat, 30 Sep 2023 07:24:40 +0200 Subject: [PATCH] :bug: Fix unreachable code in the sorting algorithm of CharsetMatch --- CHANGELOG.md | 3 +++ charset_normalizer/models.py | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d9e1ed2..e0e8609e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection - Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.7 +### Fixed +- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350) + ## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07) ### Changed diff --git a/charset_normalizer/models.py b/charset_normalizer/models.py index 7f8ca389..f3f7bcc8 100644 --- a/charset_normalizer/models.py +++ b/charset_normalizer/models.py @@ -54,16 +54,16 @@ def __lt__(self, other: object) -> bool: # Below 1% difference --> Use Coherence if chaos_difference < 0.01 and coherence_difference > 0.02: - # When having a tough decision, use the result that decoded as many multi-byte as possible. - if chaos_difference == 0.0 and self.coherence == other.coherence: - return self.multi_byte_usage > other.multi_byte_usage return self.coherence > other.coherence + elif chaos_difference < 0.01 and coherence_difference <= 0.02: + # When having a difficult decision, use the result that decoded as many multi-byte as possible. + return self.multi_byte_usage > other.multi_byte_usage return self.chaos < other.chaos @property def multi_byte_usage(self) -> float: - return 1.0 - len(str(self)) / len(self.raw) + return 1.0 - (len(str(self)) / len(self.raw)) def __str__(self) -> str: # Lazy Str Loading