From e8d70d338dd50671d6648f25201a32afa69797dc Mon Sep 17 00:00:00 2001 From: TAHRI Ahmed R Date: Tue, 19 Mar 2024 09:07:17 +0100 Subject: [PATCH] :bug: relax TypeError with a CharsetMatch instance when trying to compare it with anything else than a CharsetMatch instance (#444) --- CHANGELOG.md | 8 ++++++++ charset_normalizer/models.py | 8 +++----- tests/test_base_detection.py | 11 +++++++++++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index de66da4f..404f7e6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [3.3.3](https://github.com/Ousret/charset_normalizer/compare/3.3.2...master) (2024-03-??) + +### Fixed +- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch. + +### Changed +- Optional mypyc compilation upgraded to version 1.9.0 for Python >= 3.8 + ## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31) ### Fixed diff --git a/charset_normalizer/models.py b/charset_normalizer/models.py index a760b9c5..4d2ce867 100644 --- a/charset_normalizer/models.py +++ b/charset_normalizer/models.py @@ -35,11 +35,9 @@ def __init__( def __eq__(self, other: object) -> bool: if not isinstance(other, CharsetMatch): - raise TypeError( - "__eq__ cannot be invoked on {} and {}.".format( - str(other.__class__), str(self.__class__) - ) - ) + if isinstance(other, str): + return iana_name(other) == self.encoding + return False return self.encoding == other.encoding and self.fingerprint == other.fingerprint def __lt__(self, other: object) -> bool: diff --git a/tests/test_base_detection.py b/tests/test_base_detection.py index 3180a500..e5d774d3 100644 --- a/tests/test_base_detection.py +++ b/tests/test_base_detection.py @@ -123,3 +123,14 @@ def test_doc_example_short_cp1251(): ).best() assert best_guess.encoding == "cp1251" + + +def test_direct_cmp_charset_match(): + best_guess = from_bytes( + "😀 Hello World! How affairs are going? 😀".encode("utf_8") + ).best() + + assert best_guess == "utf_8" + assert best_guess == "utf-8" + assert best_guess != 8 + assert best_guess != None