diff --git a/CHANGELOG.md b/CHANGELOG.md index 12a27fd7..ffbccaf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31) + +### Fixed +- Unintentional memory usage regression when using large payload that match several encoding (#376) + + ## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22) ### Changed diff --git a/charset_normalizer/models.py b/charset_normalizer/models.py index f3f7bcc8..a760b9c5 100644 --- a/charset_normalizer/models.py +++ b/charset_normalizer/models.py @@ -57,6 +57,9 @@ def __lt__(self, other: object) -> bool: return self.coherence > other.coherence elif chaos_difference < 0.01 and coherence_difference <= 0.02: # When having a difficult decision, use the result that decoded as many multi-byte as possible. + # preserve RAM usage! + if len(self._payload) >= TOO_BIG_SEQUENCE: + return self.chaos < other.chaos return self.multi_byte_usage > other.multi_byte_usage return self.chaos < other.chaos diff --git a/charset_normalizer/version.py b/charset_normalizer/version.py index 83683f4c..5a4da4ff 100644 --- a/charset_normalizer/version.py +++ b/charset_normalizer/version.py @@ -2,5 +2,5 @@ Expose version """ -__version__ = "3.3.1" +__version__ = "3.3.2" VERSION = __version__.split(".")