Skip to content

Commit

Permalink
✔️ Add tests for previous issue 520, 509 and 498
Browse files Browse the repository at this point in the history
  • Loading branch information
Ousret committed Sep 28, 2024
1 parent 39b6f5c commit 46d5ae5
Showing 1 changed file with 30 additions and 0 deletions.
30 changes: 30 additions & 0 deletions tests/test_edge_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,33 @@ def test_unicode_edge_case():

assert best_guess is not None, "Payload should have given something, detection failure"
assert best_guess.encoding == "utf_8", "UTF-8 payload wrongly detected"


def test_issue_gh520():
"""Verify that minorities does not strip basic latin characters!"""
payload = b"/includes/webform.compon\xd2\xaants.inc/"

best_guess = from_bytes(payload).best()

assert best_guess is not None, "Payload should have given something, detection failure"
assert "Basic Latin" in best_guess.alphabets


def test_issue_gh509():
"""Two common ASCII punctuations should render as-is."""
payload = b");"

best_guess = from_bytes(payload).best()

assert best_guess is not None, "Payload should have given something, detection failure"
assert "ascii" == best_guess.encoding


def test_issue_gh498():
"""This case was mistaken for utf-16-le, this should never happen again."""
payload = b'\x84\xae\xaa\xe3\xac\xa5\xad\xe2 Microsoft Word.docx'

best_guess = from_bytes(payload).best()

assert best_guess is not None, "Payload should have given something, detection failure"
assert "Cyrillic" in best_guess.alphabets

0 comments on commit 46d5ae5

Please sign in to comment.