From 5fc961e081e542bd63edcc873d39371a29381d74 Mon Sep 17 00:00:00 2001 From: Carbon225 Date: Fri, 1 Dec 2023 13:30:18 +0100 Subject: [PATCH] add ruff --- .github/workflows/python-app.yml | 29 +- .github/workflows/ruff.yml | 8 + .pre-commit-config.yaml | 7 + ens_normalize/normalization.py | 528 +++++++++++++++++++------------ poetry.lock | 228 ++++++++++++- pyproject.toml | 6 + tests/test_normalization.py | 436 +++++++++++++------------ tools/updater/update_ens.py | 32 +- 8 files changed, 833 insertions(+), 441 deletions(-) create mode 100644 .github/workflows/ruff.yml create mode 100644 .pre-commit-config.yaml diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 0ea5ac3..4844b4c 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -22,27 +22,26 @@ jobs: steps: - uses: actions/checkout@v3 with: - ref: ${{ github.event.pull_request.head.ref }} + ref: ${{ github.event.pull_request.head.ref }} + + - name: Install poetry + run: pipx install poetry + - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + cache: "poetry" + - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install flake8 pytest pytest-cov pytest-mock coverage-badge - pip install . - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + run: poetry install + - name: Test with pytest - run: | - pytest -vv --cov-report term-missing --cov=ens_normalize tests/ + run: poetry run pytest -vv --cov-report term-missing --cov=ens_normalize tests/ + - name: Coverage - run: coverage-badge -f -o coverage_badge.svg + run: poetry run coverage-badge -f -o coverage_badge.svg + - name: Commit changess uses: EndBug/add-and-commit@v9 with: diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 0000000..563b87d --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,8 @@ +name: Ruff +on: [push, pull_request] +jobs: + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: chartboost/ruff-action@v1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..6b6a871 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,7 @@ +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.6 + hooks: + - id: ruff + args: [ --fix ] + - id: ruff-format diff --git a/ens_normalize/normalization.py b/ens_normalize/normalization.py index 3101296..f070fe1 100644 --- a/ens_normalize/normalization.py +++ b/ens_normalize/normalization.py @@ -1,4 +1,14 @@ -from typing import Callable, Dict, List, NamedTuple, Set, Optional, Tuple, Union, Iterable +from typing import ( + Callable, + Dict, + List, + NamedTuple, + Set, + Optional, + Tuple, + Union, + Iterable, +) from enum import Enum import re import json @@ -8,14 +18,14 @@ import warnings -SPEC_PICKLE_PATH = os.path.join(os.path.dirname(__file__), 'spec.pickle') +SPEC_PICKLE_PATH = os.path.join(os.path.dirname(__file__), "spec.pickle") class DisallowedSequenceTypeBase(Enum): - ''' + """ Base class for disallowed sequence types. See README: Glossary -> Sequences. - ''' + """ def __new__(cls, *args): value = len(cls.__members__) + 1 @@ -32,10 +42,10 @@ def code(self) -> str: class CurableSequenceTypeBase(Enum): - ''' + """ Base class for curable sequence types. See README: Glossary -> Sequences. - ''' + """ def __new__(cls, *args): value = len(cls.__members__) + 1 @@ -66,7 +76,9 @@ class DisallowedSequenceType(DisallowedSequenceTypeBase): # CONFUSABLES ---------- - CONF_WHOLE = "Contains visually confusing characters from {script1} and {script2} scripts" + CONF_WHOLE = ( + "Contains visually confusing characters from {script1} and {script2} scripts" + ) class CurableSequenceType(CurableSequenceTypeBase): @@ -77,46 +89,65 @@ class CurableSequenceType(CurableSequenceTypeBase): # GENERIC ---------------- - UNDERSCORE = "Contains an underscore in a disallowed position", \ - "An underscore is only allowed at the start of a label" + UNDERSCORE = ( + "Contains an underscore in a disallowed position", + "An underscore is only allowed at the start of a label", + ) - HYPHEN = "Contains the sequence '--' in a disallowed position", \ - "Hyphens are disallowed at the 2nd and 3rd positions of a label" + HYPHEN = ( + "Contains the sequence '--' in a disallowed position", + "Hyphens are disallowed at the 2nd and 3rd positions of a label", + ) - EMPTY_LABEL = "Contains a disallowed empty label", \ - "Empty labels are not allowed, e.g. abc..eth" + EMPTY_LABEL = ( + "Contains a disallowed empty label", + "Empty labels are not allowed, e.g. abc..eth", + ) # CM --------------------- - CM_START = "Contains a combining mark in a disallowed position at the start of the label", \ - "A combining mark is disallowed at the start of a label" + CM_START = ( + "Contains a combining mark in a disallowed position at the start of the label", + "A combining mark is disallowed at the start of a label", + ) - CM_EMOJI = "Contains a combining mark in a disallowed position after an emoji", \ - "A combining mark is disallowed after an emoji" + CM_EMOJI = ( + "Contains a combining mark in a disallowed position after an emoji", + "A combining mark is disallowed after an emoji", + ) # TOKENS ----------------- - DISALLOWED = "Contains a disallowed character", \ - "This character is disallowed" + DISALLOWED = "Contains a disallowed character", "This character is disallowed" - INVISIBLE = "Contains a disallowed invisible character", \ - "This invisible character is disallowed" + INVISIBLE = ( + "Contains a disallowed invisible character", + "This invisible character is disallowed", + ) # FENCED ---------------- - FENCED_LEADING = "Contains a disallowed character at the start of a label", \ - "This character is disallowed at the start of a label" + FENCED_LEADING = ( + "Contains a disallowed character at the start of a label", + "This character is disallowed at the start of a label", + ) - FENCED_MULTI = "Contains a disallowed consecutive sequence of characters", \ - "Characters in this sequence cannot be placed next to each other" + FENCED_MULTI = ( + "Contains a disallowed consecutive sequence of characters", + "Characters in this sequence cannot be placed next to each other", + ) - FENCED_TRAILING = "Contains a disallowed character at the end of a label", \ - "This character is disallowed at the end of a label" + FENCED_TRAILING = ( + "Contains a disallowed character at the end of a label", + "This character is disallowed at the end of a label", + ) # CONFUSABLES ---------- - CONF_MIXED = "Contains visually confusing characters from multiple scripts ({scripts})", \ - "This character{script1} is disallowed because it is visually confusing with another character{script2}" + CONF_MIXED = ( + "Contains visually confusing characters from multiple scripts ({scripts})", + "This character{script1} is disallowed because it is visually confusing with another character{script2}", + ) class NormalizableSequenceType(CurableSequenceTypeBase): @@ -125,24 +156,32 @@ class NormalizableSequenceType(CurableSequenceTypeBase): See README: Glossary -> Sequences. """ - IGNORED = "Contains disallowed \"ignored\" characters that have been removed", \ - "This character is ignored during normalization and has been automatically removed" + IGNORED = ( + 'Contains disallowed "ignored" characters that have been removed', + "This character is ignored during normalization and has been automatically removed", + ) - MAPPED = "Contains a disallowed character that has been replaced by a normalized sequence", \ - "This character is disallowed and has been automatically replaced by a normalized sequence" + MAPPED = ( + "Contains a disallowed character that has been replaced by a normalized sequence", + "This character is disallowed and has been automatically replaced by a normalized sequence", + ) - FE0F = "Contains a disallowed variant of an emoji which has been replaced by an equivalent normalized emoji", \ - "This emoji has been automatically fixed to remove an invisible character" + FE0F = ( + "Contains a disallowed variant of an emoji which has been replaced by an equivalent normalized emoji", + "This emoji has been automatically fixed to remove an invisible character", + ) - NFC = "Contains a disallowed sequence that is not \"NFC normalized\" which has been replaced by an equivalent normalized sequence", \ - "This sequence has been automatically normalized into NFC canonical form" + NFC = ( + 'Contains a disallowed sequence that is not "NFC normalized" which has been replaced by an equivalent normalized sequence', + "This sequence has been automatically normalized into NFC canonical form", + ) class DisallowedSequence(Exception): - ''' + """ An unnormalized sequence without any normalization suggestion. See README: Glossary -> Sequences. - ''' + """ def __init__(self, type: DisallowedSequenceType, meta: Dict[str, str] = {}): super().__init__(type.general_info) @@ -171,17 +210,19 @@ def general_info(self) -> str: class CurableSequence(DisallowedSequence): - ''' + """ An unnormalized sequence containing a normalization suggestion that is automatically applied using `ens_cure`. See README: Glossary -> Sequences. - ''' - - def __init__(self, - type: CurableSequenceType, - index: int, - sequence: str, - suggested: str, - meta: Dict[str, str] = {}): + """ + + def __init__( + self, + type: CurableSequenceType, + index: int, + sequence: str, + suggested: str, + meta: Dict[str, str] = {}, + ): super().__init__(type, meta) self.type = type self.index = index @@ -204,28 +245,30 @@ def sequence_info(self) -> str: class NormalizableSequence(CurableSequence): - ''' + """ An unnormalized sequence containing a normalization suggestion that is automatically applied using `ens_normalize` and `ens_cure`. See README: Glossary -> Sequences. - ''' - - def __init__(self, - type: NormalizableSequenceType, - index: int, - sequence: str, - suggested: str, - meta: Dict[str, str] = {}): + """ + + def __init__( + self, + type: NormalizableSequenceType, + index: int, + sequence: str, + suggested: str, + meta: Dict[str, str] = {}, + ): super().__init__(type, index, sequence, suggested, meta) self.type = type -TY_VALID = 'valid' -TY_MAPPED = 'mapped' -TY_IGNORED = 'ignored' -TY_DISALLOWED = 'disallowed' -TY_EMOJI = 'emoji' -TY_STOP = 'stop' -TY_NFC = 'nfc' +TY_VALID = "valid" +TY_MAPPED = "mapped" +TY_IGNORED = "ignored" +TY_DISALLOWED = "disallowed" +TY_EMOJI = "emoji" +TY_STOP = "stop" +TY_NFC = "nfc" CP_STOP = 0x2E @@ -308,28 +351,33 @@ def cps2str(cps: List[int]) -> str: """ Convert a list of integer codepoints to string. """ - return ''.join(chr(cp) for cp in cps) + return "".join(chr(cp) for cp in cps) def filter_fe0f(text: str) -> str: """ Remove all FE0F from text. """ - return text.replace('\uFE0F', '') + return text.replace("\uFE0F", "") def create_emoji_regex_pattern(emojis: List[str]) -> str: - fe0f = re.escape('\uFE0F') + fe0f = re.escape("\uFE0F") + def make_emoji(emoji: str) -> str: # make FE0F optional - return re.escape(emoji).replace(fe0f, f'{fe0f}?') + return re.escape(emoji).replace(fe0f, f"{fe0f}?") + # sort to match the longest first def order(emoji: str) -> int: # emojis with FE0F need to be pushed back because the FE0F would trap the regex matching # re.search(r'AF?|AB', '_AB_') # >>> return len(filter_fe0f(emoji)) - return '|'.join(make_emoji(emoji) for emoji in sorted(emojis, key=order, reverse=True)) + + return "|".join( + make_emoji(emoji) for emoji in sorted(emojis, key=order, reverse=True) + ) def create_emoji_fe0f_lookup(emojis: List[str]) -> Dict[str, str]: @@ -340,27 +388,27 @@ def create_emoji_fe0f_lookup(emojis: List[str]) -> Dict[str, str]: def compute_valid(groups: List[Dict]) -> Set[int]: - ''' + """ Compute the set of valid codepoints from the spec.json file. - ''' + """ valid = set() for g in groups: - valid.update(g['V']) - valid.update(map(ord, NFD(''.join(map(chr, valid))))) + valid.update(g["V"]) + valid.update(map(ord, NFD("".join(map(chr, valid))))) return valid def read_groups(groups: List[Dict]) -> List[Dict]: - ''' + """ Read and parse the groups field from the spec.json file. - ''' + """ return [ { - 'name': g['name'], - 'P': set(g['primary']), - 'Q': set(g['secondary']), - 'V': set(g['primary'] + g['secondary']), - 'M': 'cm' not in g, + "name": g["name"], + "P": set(g["primary"]), + "Q": set(g["secondary"]), + "V": set(g["primary"] + g["secondary"]), + "M": "cm" not in g, } for g in groups ] @@ -374,66 +422,74 @@ def try_str_to_int(x): def dict_keys_to_int(d): - ''' + """ Recursively convert dictionary keys to integers (for JSON parsing). - ''' + """ if isinstance(d, dict): return {try_str_to_int(k): dict_keys_to_int(v) for k, v in d.items()} return d def find_group_id(groups, name): - ''' + """ Find the index of a group by name. - ''' + """ for i, g in enumerate(groups): - if g['name'] == name: + if g["name"] == name: return i def group_names_to_ids(groups, whole_map): - ''' + """ Convert group names to group ids in the whole_map for faster lookup. - ''' + """ for v in whole_map.values(): if isinstance(v, dict): - for k in v['M']: - for i in range(len(v['M'][k])): - id = find_group_id(groups, v['M'][k][i]) + for k in v["M"]: + for i in range(len(v["M"][k])): + id = find_group_id(groups, v["M"][k][i]) assert id is not None - v['M'][k][i] = id + v["M"][k][i] = id class NormalizationData: def __init__(self, spec_json_path: str): - with open(spec_json_path, encoding='utf-8') as f: + with open(spec_json_path, encoding="utf-8") as f: spec = json.load(f) - self.unicode_version: str = spec['unicode'] - self.ignored: Set[int] = set(spec['ignored']) - self.mapped: Dict[int, List[int]] = {cp_src: mapping for cp_src, mapping in spec['mapped']} - self.cm: Set[int] = set(spec['cm']) - self.emoji: List[List[int]] = spec['emoji'] - self.nfc_check: Set[int] = set(spec['nfc_check']) - self.fenced: Dict[int, str] = {x[0]: x[1] for x in spec['fenced']} - self.groups: List[Dict] = read_groups(spec['groups']) + self.unicode_version: str = spec["unicode"] + self.ignored: Set[int] = set(spec["ignored"]) + self.mapped: Dict[int, List[int]] = { + cp_src: mapping for cp_src, mapping in spec["mapped"] + } + self.cm: Set[int] = set(spec["cm"]) + self.emoji: List[List[int]] = spec["emoji"] + self.nfc_check: Set[int] = set(spec["nfc_check"]) + self.fenced: Dict[int, str] = {x[0]: x[1] for x in spec["fenced"]} + self.groups: List[Dict] = read_groups(spec["groups"]) self.valid: Set[int] = compute_valid(self.groups) - self.whole_map: Dict = dict_keys_to_int(spec['whole_map']) + self.whole_map: Dict = dict_keys_to_int(spec["whole_map"]) group_names_to_ids(self.groups, self.whole_map) - self.nsm_max: int = spec['nsm_max'] - self.nsm: Set[int] = set(spec['nsm']) + self.nsm_max: int = spec["nsm_max"] + self.nsm: Set[int] = set(spec["nsm"]) self.cm.remove(CP_FE0F) - self.emoji_fe0f_lookup = create_emoji_fe0f_lookup([''.join(chr(cp) for cp in cps) for cps in self.emoji]) - self.emoji_regex = re.compile(create_emoji_regex_pattern([''.join(chr(cp) for cp in cps) for cps in self.emoji])) + self.emoji_fe0f_lookup = create_emoji_fe0f_lookup( + ["".join(chr(cp) for cp in cps) for cps in self.emoji] + ) + self.emoji_regex = re.compile( + create_emoji_regex_pattern( + ["".join(chr(cp) for cp in cps) for cps in self.emoji] + ) + ) def load_normalization_data_pickle(spec_pickle_path: str) -> NormalizationData: """ Loads `NormalizationData` from a pickle file. """ - with open(spec_pickle_path, 'rb') as f: + with open(spec_pickle_path, "rb") as f: return pickle.load(f) @@ -443,9 +499,9 @@ def load_normalization_data_pickle(spec_pickle_path: str) -> NormalizationData: def check_spec_unicode_version(): if not NORMALIZATION.unicode_version.startswith(UNICODE_VERSION): warnings.warn( - f'Unicode version mismatch: ' - f'pyunormalize is using {UNICODE_VERSION}, ' - f'but the ENS Normalization spec is for {NORMALIZATION.unicode_version}', + f"Unicode version mismatch: " + f"pyunormalize is using {UNICODE_VERSION}, " + f"but the ENS Normalization spec is for {NORMALIZATION.unicode_version}", UnicodeWarning, ) @@ -464,9 +520,11 @@ def collapse_valid_tokens(tokens: List[Token]) -> List[Token]: j = i + 1 while j < len(tokens) and tokens[j].type == TY_VALID: j += 1 - out.append(TokenValid( - cps = [cp for k in range(i, j) for cp in tokens[k].cps], - )) + out.append( + TokenValid( + cps=[cp for k in range(i, j) for cp in tokens[k].cps], + ) + ) i = j else: out.append(tokens[i]) @@ -499,16 +557,23 @@ def normalize_tokens(tokens: List[Token]) -> List[Token]: if start < 0: start = i slice = tokens[start:end] - cps = [cp for tok in slice if tok.type in (TY_VALID, TY_MAPPED) for cp in tok.cps] + cps = [ + cp + for tok in slice + if tok.type in (TY_VALID, TY_MAPPED) + for cp in tok.cps + ] str0 = cps2str(cps) str = NFC(str0) if str0 == str: i = end - 1 else: - tokens[start:end] = [TokenNFC( - input = cps, - cps = str2cps(str), - )] + tokens[start:end] = [ + TokenNFC( + input=cps, + cps=str2cps(str), + ) + ] i = start start = -1 else: @@ -526,56 +591,60 @@ def post_check_empty(name: str, input: str) -> Optional[CurableSequence]: CurableSequenceType.EMPTY_LABEL, index=0, sequence=input, - suggested='', + suggested="", ) - if name[0] == '.': + if name[0] == ".": return CurableSequence( CurableSequenceType.EMPTY_LABEL, index=0, - sequence='.', - suggested='', + sequence=".", + suggested="", ) - if name[-1] == '.': + if name[-1] == ".": return CurableSequence( CurableSequenceType.EMPTY_LABEL, index=len(name) - 1, - sequence='.', - suggested='', + sequence=".", + suggested="", ) - i = name.find('..') + i = name.find("..") if i >= 0: return CurableSequence( CurableSequenceType.EMPTY_LABEL, index=i, - sequence='..', - suggested='.', + sequence="..", + suggested=".", ) def post_check_underscore(label: str) -> Optional[CurableSequence]: in_middle = False for i, c in enumerate(label): - if c != '_': + if c != "_": in_middle = True elif in_middle: cnt = 1 - while i + cnt < len(label) and label[i + cnt] == '_': + while i + cnt < len(label) and label[i + cnt] == "_": cnt += 1 return CurableSequence( CurableSequenceType.UNDERSCORE, index=i, - sequence='_' * cnt, - suggested='', + sequence="_" * cnt, + suggested="", ) def post_check_hyphen(label: str) -> Optional[CurableSequence]: - if len(label) >= 4 and all(ord(cp) < 0x80 for cp in label) and '-' == label[2] == label[3]: + if ( + len(label) >= 4 + and all(ord(cp) < 0x80 for cp in label) + and "-" == label[2] == label[3] + ): return CurableSequence( CurableSequenceType.HYPHEN, index=2, - sequence='--', - suggested='', + sequence="--", + suggested="", ) @@ -587,7 +656,7 @@ def post_check_cm_leading_emoji(cps: List[int]) -> Optional[CurableSequence]: CurableSequenceType.CM_START, index=i, sequence=chr(cps[i]), - suggested='', + suggested="", ) else: prev = cps[i - 1] @@ -598,12 +667,12 @@ def post_check_cm_leading_emoji(cps: List[int]) -> Optional[CurableSequence]: # we cannot report the emoji because it was replaced with FE0F index=i, sequence=chr(cps[i]), - suggested='', + suggested="", ) def make_fenced_error(cps: List[int], start: int, end: int) -> CurableSequence: - suggested = '' + suggested = "" if start == 0: type_ = CurableSequenceType.FENCED_LEADING elif end == len(cps): @@ -614,7 +683,7 @@ def make_fenced_error(cps: List[int], start: int, end: int) -> CurableSequence: return CurableSequence( type_, index=start, - sequence=''.join(map(chr, cps[start:end])), + sequence="".join(map(chr, cps[start:end])), suggested=suggested, ) @@ -639,7 +708,9 @@ def post_check_fenced(cps: List[int]) -> Optional[CurableSequence]: return make_fenced_error(cps, n - 1, n) -def post_check_group_whole(cps: List[int], is_greek: List[bool]) -> Optional[Union[DisallowedSequence, CurableSequence]]: +def post_check_group_whole( + cps: List[int], is_greek: List[bool] +) -> Optional[Union[DisallowedSequence, CurableSequence]]: cps_no_fe0f = [cp for cp in cps if cp != CP_FE0F] unique = set(cps_no_fe0f) # we pass cps with fe0f to align error position with the original input @@ -648,52 +719,51 @@ def post_check_group_whole(cps: List[int], is_greek: List[bool]) -> Optional[Uni return e g = g[0] # pass is_greek up to the caller - is_greek[0] = g['name'] == 'Greek' - return ( - post_check_group(g, cps_no_fe0f, cps) - or post_check_whole(g, unique) - ) + is_greek[0] = g["name"] == "Greek" + return post_check_group(g, cps_no_fe0f, cps) or post_check_whole(g, unique) def meta_for_conf_mixed(g, cp): - ''' + """ Create metadata for the CONF_MIXED error. - ''' - s1 = [g['name'] for g in NORMALIZATION.groups if cp in g['V']] + """ + s1 = [g["name"] for g in NORMALIZATION.groups if cp in g["V"]] s1 = s1[0] if s1 else None - s2 = g['name'] + s2 = g["name"] if s1 is not None: return { - 'scripts': f'{s1}/{s2}', - 'script1': f' from the {s1} script', - 'script2': f' from the {s2} script', + "scripts": f"{s1}/{s2}", + "script1": f" from the {s1} script", + "script2": f" from the {s2} script", } else: return { - 'scripts': f'{s2} plus other scripts', - 'script1': '', - 'script2': f' from the {s2} script', + "scripts": f"{s2} plus other scripts", + "script1": "", + "script2": f" from the {s2} script", } -def determine_group(unique: Iterable[int], cps: List[int]) -> Tuple[Optional[List[Dict]], Optional[CurableSequence]]: +def determine_group( + unique: Iterable[int], cps: List[int] +) -> Tuple[Optional[List[Dict]], Optional[CurableSequence]]: groups = NORMALIZATION.groups for cp in unique: - gs = [g for g in groups if cp in g['V']] + gs = [g for g in groups if cp in g["V"]] if len(gs) == 0: if groups == NORMALIZATION.groups: return None, CurableSequence( CurableSequenceType.DISALLOWED, index=cps.index(cp), sequence=chr(cp), - suggested='', + suggested="", ) else: return None, CurableSequence( CurableSequenceType.CONF_MIXED, index=cps.index(cp), sequence=chr(cp), - suggested='', + suggested="", meta=meta_for_conf_mixed(groups[0], cp), ) groups = gs @@ -702,15 +772,17 @@ def determine_group(unique: Iterable[int], cps: List[int]) -> Tuple[Optional[Lis return groups, None -def post_check_group(g, cps: List[int], input: List[int]) -> Optional[Union[DisallowedSequence, CurableSequence]]: - v, m = g['V'], g['M'] +def post_check_group( + g, cps: List[int], input: List[int] +) -> Optional[Union[DisallowedSequence, CurableSequence]]: + v, m = g["V"], g["M"] for cp in cps: if cp not in v: return CurableSequence( CurableSequenceType.CONF_MIXED, index=input.index(cp), sequence=chr(cp), - suggested='', + suggested="", meta=meta_for_conf_mixed(g, cp), ) if m: @@ -725,7 +797,9 @@ def post_check_group(g, cps: List[int], input: List[int]) -> Optional[Union[Disa return DisallowedSequence(DisallowedSequenceType.NSM_TOO_MANY) for k in range(i, j): if decomposed[k] == decomposed[j]: - return DisallowedSequence(DisallowedSequenceType.NSM_REPEATED) + return DisallowedSequence( + DisallowedSequenceType.NSM_REPEATED + ) j += 1 i = j i += 1 @@ -740,7 +814,7 @@ def post_check_whole(group, cps: Iterable[int]) -> Optional[DisallowedSequence]: if whole == 1: return None if whole is not None: - set_ = whole['M'].get(cp) + set_ = whole["M"].get(cp) if maker is not None: maker = [g for g in maker if g in set_] else: @@ -752,17 +826,19 @@ def post_check_whole(group, cps: Iterable[int]) -> Optional[DisallowedSequence]: if maker is not None: for g_ind in maker: g = NORMALIZATION.groups[g_ind] - if all(cp in g['V'] for cp in shared): + if all(cp in g["V"] for cp in shared): return DisallowedSequence( DisallowedSequenceType.CONF_WHOLE, meta={ - 'script1': group['name'], - 'script2': g['name'], + "script1": group["name"], + "script2": g["name"], }, ) -def post_check(name: str, label_is_greek: List[bool], input: str) -> Optional[Union[DisallowedSequence, CurableSequence]]: +def post_check( + name: str, label_is_greek: List[bool], input: str +) -> Optional[Union[DisallowedSequence, CurableSequence]]: # name has emojis replaced with a single FE0F if len(input) == 0: return None @@ -770,7 +846,7 @@ def post_check(name: str, label_is_greek: List[bool], input: str) -> Optional[Un if e is not None: return e label_offset = 0 - for label in name.split('.'): + for label in name.split("."): # will be set inside post_check_group_whole is_greek = [False] cps = str2cps(label) @@ -785,7 +861,9 @@ def post_check(name: str, label_is_greek: List[bool], input: str) -> Optional[Un label_is_greek.append(is_greek[0]) if e is not None: # post_checks are called on a single label and need an offset - if isinstance(e, CurableSequence): # or NormalizableSequence because of inheritance + if isinstance( + e, CurableSequence + ): # or NormalizableSequence because of inheritance e.index = label_offset + e.index if e.index is not None else None return e label_offset += len(label) + 1 @@ -808,7 +886,7 @@ def find_normalizations(tokens: List[Token]) -> List[NormalizableSequence]: elif tok.type == TY_IGNORED: warning = NormalizableSequenceType.IGNORED disallowed = chr(tok.cp) - suggestion = '' + suggestion = "" scanned = 1 elif tok.type == TY_EMOJI: if tok.input != tok.cps: @@ -826,13 +904,18 @@ def find_normalizations(tokens: List[Token]) -> List[NormalizableSequence]: else: # TY_STOP scanned = 1 if warning is not None: - warnings.append(NormalizableSequence(warning, start, disallowed, suggestion)) + warnings.append( + NormalizableSequence(warning, start, disallowed, suggestion) + ) warning = None start += scanned return warnings -def tokens2str(tokens: List[Token], emoji_fn: Callable[[TokenEmoji], str] = lambda tok: cps2str(tok.cps)) -> str: +def tokens2str( + tokens: List[Token], + emoji_fn: Callable[[TokenEmoji], str] = lambda tok: cps2str(tok.cps), +) -> str: t = [] for tok in tokens: if tok.type in (TY_IGNORED, TY_DISALLOWED): @@ -843,7 +926,7 @@ def tokens2str(tokens: List[Token], emoji_fn: Callable[[TokenEmoji], str] = lamb t.append(chr(tok.cp)) else: t.append(cps2str(tok.cps)) - return ''.join(t) + return "".join(t) def tokens2beautified(tokens: List[Token], label_is_greek: List[bool]) -> str: @@ -865,25 +948,34 @@ def tokens2beautified(tokens: List[Token], label_is_greek: List[bool]) -> str: s.append(chr(tok.cp)) else: if not label_is_greek[label_index]: - s.append(cps2str([CP_XI_CAPITAL if cp == CP_XI_SMALL else cp for cp in tok.cps])) + s.append( + cps2str( + [ + CP_XI_CAPITAL if cp == CP_XI_SMALL else cp + for cp in tok.cps + ] + ) + ) else: s.append(cps2str(tok.cps)) label_start = i label_index += 1 - return ''.join(s) + return "".join(s) -SIMPLE_NAME_REGEX = re.compile(r'^[a-z0-9]+(?:\.[a-z0-9]+)*$') +SIMPLE_NAME_REGEX = re.compile(r"^[a-z0-9]+(?:\.[a-z0-9]+)*$") -def ens_process(input: str, - do_normalize: bool = False, - do_beautify: bool = False, - do_tokenize: bool = False, - do_normalizations: bool = False, - do_cure: bool = False) -> ENSProcessResult: +def ens_process( + input: str, + do_normalize: bool = False, + do_beautify: bool = False, + do_tokenize: bool = False, + do_normalizations: bool = False, + do_cure: bool = False, +) -> ENSProcessResult: """ Used to compute @@ -948,14 +1040,16 @@ def ens_process(input: str, emoji_no_fe0f = filter_fe0f(emoji) emoji_fe0f = NORMALIZATION.emoji_fe0f_lookup[emoji_no_fe0f] - tokens.append(TokenEmoji( - # 'pretty' version - emoji = str2cps(emoji_fe0f), - # raw input - input = str2cps(emoji), - # text version - cps = str2cps(emoji_no_fe0f), - )) + tokens.append( + TokenEmoji( + # 'pretty' version + emoji=str2cps(emoji_fe0f), + # raw input + input=str2cps(emoji), + # text version + cps=str2cps(emoji_no_fe0f), + ) + ) continue @@ -968,37 +1062,45 @@ def ens_process(input: str, continue if cp in NORMALIZATION.valid: - tokens.append(TokenValid( - cps = [cp], - )) + tokens.append( + TokenValid( + cps=[cp], + ) + ) continue if cp in NORMALIZATION.ignored: - tokens.append(TokenIgnored( - cp = cp, - )) + tokens.append( + TokenIgnored( + cp=cp, + ) + ) continue mapping = NORMALIZATION.mapped.get(cp) if mapping is not None: - tokens.append(TokenMapped( - cp = cp, - cps = mapping, - )) + tokens.append( + TokenMapped( + cp=cp, + cps=mapping, + ) + ) continue error = error or CurableSequence( CurableSequenceType.INVISIBLE - if c in ('\u200d', '\u200c') + if c in ("\u200d", "\u200c") else CurableSequenceType.DISALLOWED, index=input_cur - 1, sequence=c, - suggested='', + suggested="", ) - tokens.append(TokenDisallowed( - cp = cp, - )) + tokens.append( + TokenDisallowed( + cp=cp, + ) + ) tokens = normalize_tokens(tokens) @@ -1006,17 +1108,19 @@ def ens_process(input: str, if error is None: # run post checks - emojis_as_fe0f = ''.join(tokens2str(tokens, lambda _: '\uFE0F')) + emojis_as_fe0f = "".join(tokens2str(tokens, lambda _: "\uFE0F")) # true for each label that is greek # will be set by post_check() label_is_greek = [] error = post_check(emojis_as_fe0f, label_is_greek, input) - if isinstance(error, CurableSequence): # or NormalizableSequence because of inheritance + if isinstance( + error, CurableSequence + ): # or NormalizableSequence because of inheritance offset_err_start(error, tokens) # else: - # only the result of post_check() is not input aligned - # so we do not offset the error set by the input scanning loop + # only the result of post_check() is not input aligned + # so we do not offset the error set by the input scanning loop if error is not None: normalized = None @@ -1106,11 +1210,13 @@ def _ens_cure(text: str) -> Tuple[str, List[CurableSequence]]: try: return ens_normalize(text), cures except CurableSequence as e: - text = text[:e.index] + e.suggested + text[e.index + len(e.sequence):] + text = text[: e.index] + e.suggested + text[e.index + len(e.sequence) :] cures.append(e) # DisallowedSequence is not caught here because it is not curable # this should never happen - raise Exception('ens_cure() exceeded max iterations. Please report this as a bug along with the input string.') + raise Exception( + "ens_cure() exceeded max iterations. Please report this as a bug along with the input string." + ) def ens_cure(text: str) -> str: diff --git a/poetry.lock b/poetry.lock index 5ea1363..21aa7aa 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,15 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. + +[[package]] +name = "cfgv" +version = "3.4.0" +description = "Validate configuration and produce human readable error messages." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, +] [[package]] name = "colorama" @@ -78,6 +89,31 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1 [package.extras] toml = ["tomli"] +[[package]] +name = "coverage-badge" +version = "1.1.0" +description = "Generate coverage badges for Coverage.py." +optional = false +python-versions = "*" +files = [ + {file = "coverage-badge-1.1.0.tar.gz", hash = "sha256:c824a106503e981c02821e7d32f008fb3984b2338aa8c3800ec9357e33345b78"}, + {file = "coverage_badge-1.1.0-py2.py3-none-any.whl", hash = "sha256:e365d56e5202e923d1b237f82defd628a02d1d645a147f867ac85c58c81d7997"}, +] + +[package.dependencies] +coverage = "*" + +[[package]] +name = "distlib" +version = "0.3.7" +description = "Distribution utilities" +optional = false +python-versions = "*" +files = [ + {file = "distlib-0.3.7-py2.py3-none-any.whl", hash = "sha256:2e24928bc811348f0feb63014e97aaae3037f2cf48712d51ae61df7fd6075057"}, + {file = "distlib-0.3.7.tar.gz", hash = "sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8"}, +] + [[package]] name = "exceptiongroup" version = "1.1.3" @@ -92,6 +128,36 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "filelock" +version = "3.13.1" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.8" +files = [ + {file = "filelock-3.13.1-py3-none-any.whl", hash = "sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c"}, + {file = "filelock-3.13.1.tar.gz", hash = "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.24)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] +typing = ["typing-extensions (>=4.8)"] + +[[package]] +name = "identify" +version = "2.5.32" +description = "File identification library for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "identify-2.5.32-py2.py3-none-any.whl", hash = "sha256:0b7656ef6cba81664b783352c73f8c24b39cf82f926f78f4550eda928e5e0545"}, + {file = "identify-2.5.32.tar.gz", hash = "sha256:5d9979348ec1a21c768ae07e0a652924538e8bce67313a73cb0f681cf08ba407"}, +] + +[package.extras] +license = ["ukkonen"] + [[package]] name = "iniconfig" version = "2.0.0" @@ -103,6 +169,20 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "nodeenv" +version = "1.8.0" +description = "Node.js virtual environment builder" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" +files = [ + {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"}, + {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"}, +] + +[package.dependencies] +setuptools = "*" + [[package]] name = "packaging" version = "23.2" @@ -114,6 +194,21 @@ files = [ {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, ] +[[package]] +name = "platformdirs" +version = "4.0.0" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = ">=3.7" +files = [ + {file = "platformdirs-4.0.0-py3-none-any.whl", hash = "sha256:118c954d7e949b35437270383a3f2531e99dd93cf7ce4dc8340d3356d30f173b"}, + {file = "platformdirs-4.0.0.tar.gz", hash = "sha256:cb633b2bcf10c51af60beb0ab06d2f1d69064b43abf4c185ca6b28865f3f9731"}, +] + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] + [[package]] name = "pluggy" version = "1.3.0" @@ -129,6 +224,24 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "pre-commit" +version = "3.5.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pre_commit-3.5.0-py2.py3-none-any.whl", hash = "sha256:841dc9aef25daba9a0238cd27984041fa0467b4199fc4852e27950664919f660"}, + {file = "pre_commit-3.5.0.tar.gz", hash = "sha256:5804465c675b659b0862f07907f96295d490822a450c4c40e747d0b1c6ebcb32"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + [[package]] name = "pytest" version = "7.4.3" @@ -196,6 +309,97 @@ files = [ {file = "pyunormalize-15.1.0.tar.gz", hash = "sha256:cf4a87451a0f1cb76911aa97f432f4579e1f564a2f0c84ce488c73a73901b6c1"}, ] +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "ruff" +version = "0.1.6" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.1.6-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:88b8cdf6abf98130991cbc9f6438f35f6e8d41a02622cc5ee130a02a0ed28703"}, + {file = "ruff-0.1.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5c549ed437680b6105a1299d2cd30e4964211606eeb48a0ff7a93ef70b902248"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cf5f701062e294f2167e66d11b092bba7af6a057668ed618a9253e1e90cfd76"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:05991ee20d4ac4bb78385360c684e4b417edd971030ab12a4fbd075ff535050e"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87455a0c1f739b3c069e2f4c43b66479a54dea0276dd5d4d67b091265f6fd1dc"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:683aa5bdda5a48cb8266fcde8eea2a6af4e5700a392c56ea5fb5f0d4bfdc0240"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:137852105586dcbf80c1717facb6781555c4e99f520c9c827bd414fac67ddfb6"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd98138a98d48a1c36c394fd6b84cd943ac92a08278aa8ac8c0fdefcf7138f35"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a0cd909d25f227ac5c36d4e7e681577275fb74ba3b11d288aff7ec47e3ae745"}, + {file = "ruff-0.1.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8fd1c62a47aa88a02707b5dd20c5ff20d035d634aa74826b42a1da77861b5ff"}, + {file = "ruff-0.1.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fd89b45d374935829134a082617954120d7a1470a9f0ec0e7f3ead983edc48cc"}, + {file = "ruff-0.1.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:491262006e92f825b145cd1e52948073c56560243b55fb3b4ecb142f6f0e9543"}, + {file = "ruff-0.1.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:ea284789861b8b5ca9d5443591a92a397ac183d4351882ab52f6296b4fdd5462"}, + {file = "ruff-0.1.6-py3-none-win32.whl", hash = "sha256:1610e14750826dfc207ccbcdd7331b6bd285607d4181df9c1c6ae26646d6848a"}, + {file = "ruff-0.1.6-py3-none-win_amd64.whl", hash = "sha256:4558b3e178145491e9bc3b2ee3c4b42f19d19384eaa5c59d10acf6e8f8b57e33"}, + {file = "ruff-0.1.6-py3-none-win_arm64.whl", hash = "sha256:03910e81df0d8db0e30050725a5802441c2022ea3ae4fe0609b76081731accbc"}, + {file = "ruff-0.1.6.tar.gz", hash = "sha256:1b09f29b16c6ead5ea6b097ef2764b42372aebe363722f1605ecbcd2b9207184"}, +] + +[[package]] +name = "setuptools" +version = "69.0.2" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-69.0.2-py3-none-any.whl", hash = "sha256:1e8fdff6797d3865f37397be788a4e3cba233608e9b509382a2777d25ebde7f2"}, + {file = "setuptools-69.0.2.tar.gz", hash = "sha256:735896e78a4742605974de002ac60562d286fa8051a7e2299445e8e8fbb01aa6"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + [[package]] name = "tomli" version = "2.0.1" @@ -207,7 +411,27 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +[[package]] +name = "virtualenv" +version = "20.24.7" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.7" +files = [ + {file = "virtualenv-20.24.7-py3-none-any.whl", hash = "sha256:a18b3fd0314ca59a2e9f4b556819ed07183b3e9a3702ecfe213f593d44f7b3fd"}, + {file = "virtualenv-20.24.7.tar.gz", hash = "sha256:69050ffb42419c91f6c1284a7b24e0475d793447e35929b488bf6a0aade39353"}, +] + +[package.dependencies] +distlib = ">=0.3.7,<1" +filelock = ">=3.12.2,<4" +platformdirs = ">=3.9.1,<5" + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] + [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "870f19563c9e828fb081ebcc707181ac54ddafcb27b5f61aacc77d5af9659be1" +content-hash = "b171f8c59ff2005462ec4841aade37892716efcaea0d2a4543f99fbecd32dd50" diff --git a/pyproject.toml b/pyproject.toml index 51c4641..3bdc978 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,12 @@ pyunormalize = "^15.1.0" pytest = "^7.2.1" pytest-cov = "^4.0.0" pytest-mock = "^3.10.0" +ruff = "^0.1.6" +pre-commit = "^3.5.0" +coverage-badge = "^1.1.0" + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401"] [build-system] requires = ["poetry-core"] diff --git a/tests/test_normalization.py b/tests/test_normalization.py index aa61eb4..1965792 100644 --- a/tests/test_normalization.py +++ b/tests/test_normalization.py @@ -22,27 +22,27 @@ import pickletools -TESTS_PATH = os.path.join(os.path.dirname(__file__), 'ens-normalize-tests.json') +TESTS_PATH = os.path.join(os.path.dirname(__file__), "ens-normalize-tests.json") @pytest.mark.parametrize( - 'fn,field', + "fn,field", [ - (ens_normalize, 'norm'), - (ens_beautify, 'beautified'), - ] + (ens_normalize, "norm"), + (ens_beautify, "beautified"), + ], ) def test_ens_normalize_full(fn, field): - with open(TESTS_PATH, encoding='utf-8') as f: + with open(TESTS_PATH, encoding="utf-8") as f: data = json.load(f) good = 0 bad = 0 for test in data: - name = test['name'] + name = test["name"] - if 'error' in test: + if "error" in test: try: fn(name) bad += 1 @@ -50,7 +50,7 @@ def test_ens_normalize_full(fn, field): except DisallowedSequence: good += 1 else: - test['norm'] = test.get('norm', name) + test["norm"] = test.get("norm", name) expected = test[field] try: @@ -64,34 +64,34 @@ def test_ens_normalize_full(fn, field): bad += 1 print(f'! "{name}" threw "{e}"') - assert bad == 0, f'{100 * good / (good + bad):.2f}%, {bad} failing' + assert bad == 0, f"{100 * good / (good + bad):.2f}%, {bad} failing" def test_ens_beautify_xi(): - assert ens_beautify('ξabc') == 'Ξabc' - assert ens_beautify('ξλφα') == 'ξλφα' - assert ens_beautify('ξabc.ξλφα.ξabc.ξλφα') == 'Ξabc.ξλφα.Ξabc.ξλφα' + assert ens_beautify("ξabc") == "Ξabc" + assert ens_beautify("ξλφα") == "ξλφα" + assert ens_beautify("ξabc.ξλφα.ξabc.ξλφα") == "Ξabc.ξλφα.Ξabc.ξλφα" def test_ens_tokenize_full(): - with open(TESTS_PATH, encoding='utf-8') as f: + with open(TESTS_PATH, encoding="utf-8") as f: data = json.load(f) good = 0 bad = 0 for test in data: - if 'tokenized' not in test: + if "tokenized" not in test: continue - name = test['name'] - expected = test['tokenized'] + name = test["name"] + expected = test["tokenized"] # we do not keep track of which tokens were changed for t in expected: - if t['type'] == 'nfc': - del t['tokens'] - del t['tokens0'] + if t["type"] == "nfc": + del t["tokens"] + del t["tokens0"] res = [t._asdict() for t in ens_tokenize(name)] @@ -101,63 +101,78 @@ def test_ens_tokenize_full(): bad += 1 print(f'! "{name}" tokenized incorrectly') - assert bad == 0, f'{100 * good / (good + bad):.2f}%, {bad} failing' - - -@pytest.mark.parametrize('label,error,start,disallowed,suggested', [ - ('good', None, None, None, None), - - # underscore - ('a_a', CurableSequenceType.UNDERSCORE, 1, '_', ''), - # -- - ('aa--a', CurableSequenceType.HYPHEN, 2, '--', ''), - # empty - ("a..b", CurableSequenceType.EMPTY_LABEL, 1, '..', '.'), - (".ab", CurableSequenceType.EMPTY_LABEL, 0, '.', ''), - ("ab.", CurableSequenceType.EMPTY_LABEL, 2, '.', ''), - - # combining mark at the beginning - ('\u0327a', CurableSequenceType.CM_START, 0, '\u0327', ''), - ('\u0327\u0327', CurableSequenceType.CM_START, 0, '\u0327', ''), - # combining mark after emoji - ('a👩🏿‍🦲\u0327\u0327', CurableSequenceType.CM_EMOJI, len('a👩🏿‍🦲'), '\u0327', ''), - - # disallowed - ('a?', CurableSequenceType.DISALLOWED, 1, '?', ''), - # disallowed/ignored invisible - ('a\u200d', CurableSequenceType.INVISIBLE, 1, '\u200d', ''), - # ignored - (f'a{chr(173)}', NormalizableSequenceType.IGNORED, 1, chr(173), ''), # invisible "soft hyphen" - # mapped - ('aA', NormalizableSequenceType.MAPPED, 1, 'A', 'a'), - # FE0F emoji - ('a🚴‍♂️', NormalizableSequenceType.FE0F, 1, '🚴‍♂️', '🚴‍♂'), - # not NFC normalized - ('aa\u0300b', NormalizableSequenceType.NFC, 1, 'a\u0300', 'à'), - - # fenced - # leading - ("'ab", CurableSequenceType.FENCED_LEADING, 0, "’", ""), - # ("·ab", CurableSequenceType.FENCED_LEADING, 0, "·", ""), # was disallowed - ("⁄ab", CurableSequenceType.FENCED_LEADING, 0, "⁄", ""), - # multi - ("a''b", CurableSequenceType.FENCED_MULTI, 1, "’’", "’"), - # ("a··b", CurableSequenceType.FENCED_MULTI, 1, "··", "·"), - ("a⁄⁄b", CurableSequenceType.FENCED_MULTI, 1, "⁄⁄", "⁄"), - ("a'⁄b", CurableSequenceType.FENCED_MULTI, 1, "’⁄", "’"), - # trailing - ("ab'", CurableSequenceType.FENCED_TRAILING, 2, "’", ""), - # ("ab·", CurableSequenceType.FENCED_TRAILING, 2, "·", ""), - ("ab⁄", CurableSequenceType.FENCED_TRAILING, 2, "⁄", ""), - - # confusables - ('bitcoin.bitcοin.bi̇tcoin.bitсoin', CurableSequenceType.CONF_MIXED, 12, 'ο', ''), - ('0x.0χ.0х', DisallowedSequenceType.CONF_WHOLE, None, None, None), - - # NSM - ('-إؐؑؐ-.eth', DisallowedSequenceType.NSM_REPEATED, None, None, None), - ('-إؐؑؒؓؔ-.eth', DisallowedSequenceType.NSM_TOO_MANY, None, None, None), -]) + assert bad == 0, f"{100 * good / (good + bad):.2f}%, {bad} failing" + + +@pytest.mark.parametrize( + "label,error,start,disallowed,suggested", + [ + ("good", None, None, None, None), + # underscore + ("a_a", CurableSequenceType.UNDERSCORE, 1, "_", ""), + # -- + ("aa--a", CurableSequenceType.HYPHEN, 2, "--", ""), + # empty + ("a..b", CurableSequenceType.EMPTY_LABEL, 1, "..", "."), + (".ab", CurableSequenceType.EMPTY_LABEL, 0, ".", ""), + ("ab.", CurableSequenceType.EMPTY_LABEL, 2, ".", ""), + # combining mark at the beginning + ("\u0327a", CurableSequenceType.CM_START, 0, "\u0327", ""), + ("\u0327\u0327", CurableSequenceType.CM_START, 0, "\u0327", ""), + # combining mark after emoji + ( + "a👩🏿‍🦲\u0327\u0327", + CurableSequenceType.CM_EMOJI, + len("a👩🏿‍🦲"), + "\u0327", + "", + ), + # disallowed + ("a?", CurableSequenceType.DISALLOWED, 1, "?", ""), + # disallowed/ignored invisible + ("a\u200d", CurableSequenceType.INVISIBLE, 1, "\u200d", ""), + # ignored + ( + f"a{chr(173)}", + NormalizableSequenceType.IGNORED, + 1, + chr(173), + "", + ), # invisible "soft hyphen" + # mapped + ("aA", NormalizableSequenceType.MAPPED, 1, "A", "a"), + # FE0F emoji + ("a🚴‍♂️", NormalizableSequenceType.FE0F, 1, "🚴‍♂️", "🚴‍♂"), + # not NFC normalized + ("aa\u0300b", NormalizableSequenceType.NFC, 1, "a\u0300", "à"), + # fenced + # leading + ("'ab", CurableSequenceType.FENCED_LEADING, 0, "’", ""), + # ("·ab", CurableSequenceType.FENCED_LEADING, 0, "·", ""), # was disallowed + ("⁄ab", CurableSequenceType.FENCED_LEADING, 0, "⁄", ""), + # multi + ("a''b", CurableSequenceType.FENCED_MULTI, 1, "’’", "’"), + # ("a··b", CurableSequenceType.FENCED_MULTI, 1, "··", "·"), + ("a⁄⁄b", CurableSequenceType.FENCED_MULTI, 1, "⁄⁄", "⁄"), + ("a'⁄b", CurableSequenceType.FENCED_MULTI, 1, "’⁄", "’"), + # trailing + ("ab'", CurableSequenceType.FENCED_TRAILING, 2, "’", ""), + # ("ab·", CurableSequenceType.FENCED_TRAILING, 2, "·", ""), + ("ab⁄", CurableSequenceType.FENCED_TRAILING, 2, "⁄", ""), + # confusables + ( + "bitcoin.bitcοin.bi̇tcoin.bitсoin", + CurableSequenceType.CONF_MIXED, + 12, + "ο", + "", + ), + ("0x.0χ.0х", DisallowedSequenceType.CONF_WHOLE, None, None, None), + # NSM + ("-إؐؑؐ-.eth", DisallowedSequenceType.NSM_REPEATED, None, None, None), + ("-إؐؑؒؓؔ-.eth", DisallowedSequenceType.NSM_TOO_MANY, None, None, None), + ], +) def test_ens_normalization_reason(label, error, start, disallowed, suggested): res = ens_process(label, do_normalizations=True) if error is None: @@ -176,71 +191,69 @@ def test_ens_normalization_reason(label, error, start, disallowed, suggested): @pytest.mark.parametrize( - 'error_type, code', + "error_type, code", [ - (CurableSequenceType.UNDERSCORE, 'UNDERSCORE'), - (CurableSequenceType.HYPHEN, 'HYPHEN'), - (CurableSequenceType.CM_START, 'CM_START'), - (CurableSequenceType.CM_EMOJI, 'CM_EMOJI'), - (CurableSequenceType.DISALLOWED, 'DISALLOWED'), - (CurableSequenceType.INVISIBLE, 'INVISIBLE'), - (NormalizableSequenceType.IGNORED, 'IGNORED'), - (NormalizableSequenceType.MAPPED, 'MAPPED'), - (NormalizableSequenceType.FE0F, 'FE0F'), - (NormalizableSequenceType.NFC, 'NFC'), - ] + (CurableSequenceType.UNDERSCORE, "UNDERSCORE"), + (CurableSequenceType.HYPHEN, "HYPHEN"), + (CurableSequenceType.CM_START, "CM_START"), + (CurableSequenceType.CM_EMOJI, "CM_EMOJI"), + (CurableSequenceType.DISALLOWED, "DISALLOWED"), + (CurableSequenceType.INVISIBLE, "INVISIBLE"), + (NormalizableSequenceType.IGNORED, "IGNORED"), + (NormalizableSequenceType.MAPPED, "MAPPED"), + (NormalizableSequenceType.FE0F, "FE0F"), + (NormalizableSequenceType.NFC, "NFC"), + ], ) def test_normalization_error_type_code(error_type: DisallowedSequenceType, code: str): assert error_type.code == code -@pytest.mark.parametrize('text', [ - # multi char emoji - 'abc👩🏿‍🦲', - # NFC - 'aa\u0300b', - # mapped - 'a¼b' - # ignored - 'a\xadb', - # multi label - 'abc.abc.abc.abc', - 'abc.abc.abc👩🏿‍🦲.aa\u0300b.a¼b.a\xadb', -]) +@pytest.mark.parametrize( + "text", + [ + # multi char emoji + "abc👩🏿‍🦲", + # NFC + "aa\u0300b", + # mapped + "a¼b" + # ignored + "a\xadb", + # multi label + "abc.abc.abc.abc", + "abc.abc.abc👩🏿‍🦲.aa\u0300b.a¼b.a\xadb", + ], +) def test_ens_norm_error_pos(text): - ret = ens_process(text + '_') + ret = ens_process(text + "_") assert ret.error.type == CurableSequenceType.UNDERSCORE assert ret.error.index == len(text) - assert ret.error.sequence == '_' - assert ret.error.suggested == '' + assert ret.error.sequence == "_" + assert ret.error.suggested == "" def test_ens_norm_error_pos_disallowed(): - t = 'abc.abc.abc👩🏿‍🦲.aa\u0300b.a¼b.a\xadb' - ret = ens_process(t + '?') + t = "abc.abc.abc👩🏿‍🦲.aa\u0300b.a¼b.a\xadb" + ret = ens_process(t + "?") assert ret.error.type == CurableSequenceType.DISALLOWED assert ret.error.index == len(t) - assert ret.error.sequence == '?' - assert ret.error.suggested == '' + assert ret.error.sequence == "?" + assert ret.error.suggested == "" def test_ens_norm_error_pos_nfc(): - t = 'abc.abc.abc👩🏿‍🦲.ab.ab.ab' - ret = ens_process(t + 'a\u0300', do_normalizations=True) + t = "abc.abc.abc👩🏿‍🦲.ab.ab.ab" + ret = ens_process(t + "a\u0300", do_normalizations=True) e = ret.normalizations[0] assert e.type == NormalizableSequenceType.NFC assert e.index == len(t) - assert e.sequence == 'a\u0300' - assert e.suggested == 'à' + assert e.sequence == "a\u0300" + assert e.suggested == "à" def test_ens_warnings_many(): - t = ( - f'a{chr(173)}' + - 'aA.' + - 'a🚴‍♂️' + - 'aa\u0300b' - ) + t = f"a{chr(173)}" + "aA." + "a🚴‍♂️" + "aa\u0300b" warnings = ens_normalizations(t) assert len(warnings) == 4 @@ -249,185 +262,212 @@ def test_ens_warnings_many(): assert e.type == NormalizableSequenceType.IGNORED assert e.index == 1 assert e.sequence == chr(173) - assert e.suggested == '' + assert e.suggested == "" e = warnings[1] assert e.type == NormalizableSequenceType.MAPPED assert e.index == 3 - assert e.sequence == 'A' - assert e.suggested == 'a' + assert e.sequence == "A" + assert e.suggested == "a" e = warnings[2] assert e.type == NormalizableSequenceType.FE0F assert e.index == 6 - assert e.sequence == '🚴‍♂️' - assert e.suggested == '🚴‍♂' + assert e.sequence == "🚴‍♂️" + assert e.suggested == "🚴‍♂" e = warnings[3] assert e.type == NormalizableSequenceType.NFC assert e.index == 11 - assert e.sequence == 'a\u0300' - assert e.suggested == 'à' + assert e.sequence == "a\u0300" + assert e.suggested == "à" def test_throws(): - t = 'a_b' + t = "a_b" with pytest.raises(CurableSequence) as e: ens_normalize(t) assert e.value.type == CurableSequenceType.UNDERSCORE assert e.value.index == 1 - assert e.value.sequence == '_' - assert e.value.suggested == '' + assert e.value.sequence == "_" + assert e.value.suggested == "" with pytest.raises(CurableSequence) as e: ens_beautify(t) assert e.value.type == CurableSequenceType.UNDERSCORE assert e.value.index == 1 - assert e.value.sequence == '_' - assert e.value.suggested == '' + assert e.value.sequence == "_" + assert e.value.suggested == "" with pytest.raises(CurableSequence) as e: ens_normalizations(t) assert e.value.type == CurableSequenceType.UNDERSCORE assert e.value.index == 1 - assert e.value.sequence == '_' - assert e.value.suggested == '' + assert e.value.sequence == "_" + assert e.value.suggested == "" def test_ens_is_normalized(): - assert is_ens_normalized('a') - assert not is_ens_normalized('a_b') - assert not is_ens_normalized('Abc') - assert is_ens_normalized('') + assert is_ens_normalized("a") + assert not is_ens_normalized("a_b") + assert not is_ens_normalized("Abc") + assert is_ens_normalized("") def test_normalization_error_object(): - t = 'a_b' + t = "a_b" try: ens_normalize(t) except CurableSequence as e: assert e.type == CurableSequenceType.UNDERSCORE assert e.index == 1 - assert e.sequence == '_' - assert e.suggested == '' + assert e.sequence == "_" + assert e.suggested == "" assert e.code == CurableSequenceType.UNDERSCORE.code assert e.general_info == CurableSequenceType.UNDERSCORE.general_info assert e.sequence_info == CurableSequenceType.UNDERSCORE.sequence_info assert str(e) == e.general_info - assert repr(e) == 'CurableSequence(code="UNDERSCORE", index=1, sequence="_", suggested="")' + assert ( + repr(e) + == 'CurableSequence(code="UNDERSCORE", index=1, sequence="_", suggested="")' + ) try: - ens_normalize('0х0') + ens_normalize("0х0") except DisallowedSequence as e: assert e.type == DisallowedSequenceType.CONF_WHOLE assert e.code == DisallowedSequenceType.CONF_WHOLE.code - assert e.general_info == DisallowedSequenceType.CONF_WHOLE.general_info.format(script1='Cyrillic', script2='Latin') + assert e.general_info == DisallowedSequenceType.CONF_WHOLE.general_info.format( + script1="Cyrillic", script2="Latin" + ) assert str(e) == e.general_info assert repr(e) == 'DisallowedSequence(code="CONF_WHOLE")' def test_error_is_exception(): with pytest.raises(Exception): - ens_normalize('0х0') + ens_normalize("0х0") def test_str_repr(): - e = ens_process('a_').error + e = ens_process("a_").error assert str(e) == CurableSequenceType.UNDERSCORE.general_info - assert repr(e) == 'CurableSequence(code="UNDERSCORE", index=1, sequence="_", suggested="")' + assert ( + repr(e) + == 'CurableSequence(code="UNDERSCORE", index=1, sequence="_", suggested="")' + ) def test_ens_cure(): - assert ens_cure('Ab') == 'ab' - assert ens_cure('a_b') == 'ab' - assert ens_cure('a\'\'b') == 'a’b' - assert ens_cure('bitcoin.bitcοin.bi̇tcoin') == 'bitcoin.bitcin.bitcoin' + assert ens_cure("Ab") == "ab" + assert ens_cure("a_b") == "ab" + assert ens_cure("a''b") == "a’b" + assert ens_cure("bitcoin.bitcοin.bi̇tcoin") == "bitcoin.bitcin.bitcoin" with pytest.raises(DisallowedSequence) as e: - ens_cure('0x.0χ.0х') + ens_cure("0x.0χ.0х") assert e.value.type == DisallowedSequenceType.CONF_WHOLE - assert ens_cure('?') == '' - assert ens_cure('abc.?') == 'abc' - assert ens_cure('abc.?.xyz') == 'abc.xyz' - assert ens_cure('?.xyz') == 'xyz' - assert ens_cure('abc..?.xyz') == 'abc.xyz' + assert ens_cure("?") == "" + assert ens_cure("abc.?") == "abc" + assert ens_cure("abc.?.xyz") == "abc.xyz" + assert ens_cure("?.xyz") == "xyz" + assert ens_cure("abc..?.xyz") == "abc.xyz" def test_ens_process_cure(): - ret = ens_process('a_..b', do_cure=True) - assert ret.cured == 'a.b' - assert [e.code for e in ret.cures] == ['EMPTY_LABEL', 'UNDERSCORE'] - ret = ens_process('', do_cure=True) - assert ret.cured == '' + ret = ens_process("a_..b", do_cure=True) + assert ret.cured == "a.b" + assert [e.code for e in ret.cures] == ["EMPTY_LABEL", "UNDERSCORE"] + ret = ens_process("", do_cure=True) + assert ret.cured == "" assert ret.cures == [] - ret = ens_process('0х0', do_cure=True) + ret = ens_process("0х0", do_cure=True) assert ret.cured is None assert ret.cures is None def test_error_meta(): # mixed - e: CurableSequence = ens_process('bitcoin.bitcοin.bi̇tcoin.bitсoin').error - assert e.general_info == 'Contains visually confusing characters from multiple scripts (Greek/Latin)' - assert e.sequence_info == 'This character from the Greek script is disallowed because it is visually confusing with another character from the Latin script' - assert e.sequence == 'ο' + e: CurableSequence = ens_process("bitcoin.bitcοin.bi̇tcoin.bitсoin").error + assert ( + e.general_info + == "Contains visually confusing characters from multiple scripts (Greek/Latin)" + ) + assert ( + e.sequence_info + == "This character from the Greek script is disallowed because it is visually confusing with another character from the Latin script" + ) + assert e.sequence == "ο" # whole - e = ens_process('0x.0χ.0х').error - assert e.general_info == 'Contains visually confusing characters from Cyrillic and Latin scripts' + e = ens_process("0x.0χ.0х").error + assert ( + e.general_info + == "Contains visually confusing characters from Cyrillic and Latin scripts" + ) # unknown script for character c = chr(771) - e: CurableSequence = ens_process(f'bitcoin.bitcin.bi̇tcin.bitсin{c}').error - assert e.general_info == 'Contains visually confusing characters from multiple scripts (Latin plus other scripts)' - assert e.sequence_info == 'This character is disallowed because it is visually confusing with another character from the Latin script' + e: CurableSequence = ens_process(f"bitcoin.bitcin.bi̇tcin.bitсin{c}").error + assert ( + e.general_info + == "Contains visually confusing characters from multiple scripts (Latin plus other scripts)" + ) + assert ( + e.sequence_info + == "This character is disallowed because it is visually confusing with another character from the Latin script" + ) def test_unicode_version_check(mocker): - mocker.patch('ens_normalize.normalization.UNICODE_VERSION', '15.0.1') - warnings.filterwarnings('error') - with pytest.raises(UnicodeWarning, match=r'Unicode version mismatch'): + mocker.patch("ens_normalize.normalization.UNICODE_VERSION", "15.0.1") + warnings.filterwarnings("error") + with pytest.raises(UnicodeWarning, match=r"Unicode version mismatch"): ens_normalize_module.normalization.check_spec_unicode_version() def test_ens_cure_max_iters(mocker): - mocker.patch('ens_normalize.normalization.ens_normalize', lambda _: ens_normalize('?')) - with pytest.raises(Exception, match=r'ens_cure\(\) exceeded max iterations'): - ens_cure('???') + mocker.patch( + "ens_normalize.normalization.ens_normalize", lambda _: ens_normalize("?") + ) + with pytest.raises(Exception, match=r"ens_cure\(\) exceeded max iterations"): + ens_cure("???") def test_data_creation(): - data = ens_normalize_module.normalization.NormalizationData(os.path.join(os.path.dirname(__file__), '..', 'tools', 'updater', 'spec.json')) + data = ens_normalize_module.normalization.NormalizationData( + os.path.join(os.path.dirname(__file__), "..", "tools", "updater", "spec.json") + ) buf1 = pickletools.optimize(pickle.dumps(data, protocol=5)) - with open(ens_normalize_module.normalization.SPEC_PICKLE_PATH, 'rb') as f: + with open(ens_normalize_module.normalization.SPEC_PICKLE_PATH, "rb") as f: buf2 = f.read() assert buf1 == buf2 def test_empty_name(): - assert ens_normalize('') == '' - assert ens_beautify('') == '' - assert ens_tokenize('') == [] - assert ens_cure('') == '' + assert ens_normalize("") == "" + assert ens_beautify("") == "" + assert ens_tokenize("") == [] + assert ens_cure("") == "" def test_ignorable_name(): - assert ens_process('').error is None - e = ens_process('\ufe0f\ufe0f').error + assert ens_process("").error is None + e = ens_process("\ufe0f\ufe0f").error assert e.type == CurableSequenceType.EMPTY_LABEL assert e.index == 0 - assert e.sequence == '\ufe0f\ufe0f' + assert e.sequence == "\ufe0f\ufe0f" def test_is_normalizable(): - assert is_ens_normalizable('nick.eth') - assert not is_ens_normalizable('ni_ck.eth') - assert is_ens_normalizable('') + assert is_ens_normalizable("nick.eth") + assert not is_ens_normalizable("ni_ck.eth") + assert is_ens_normalizable("") def test_simple_name_optimization(): r = ens_process( - 'abc123.eth', + "abc123.eth", do_normalize=False, do_beautify=False, do_tokenize=False, @@ -443,14 +483,14 @@ def test_simple_name_optimization(): assert r.normalizations is None r = ens_process( - 'abc123.eth', + "abc123.eth", do_normalize=True, do_beautify=False, do_tokenize=False, do_normalizations=False, do_cure=False, ) - assert r.normalized == 'abc123.eth' + assert r.normalized == "abc123.eth" assert r.beautified is None assert r.tokens is None assert r.cured is None @@ -459,7 +499,7 @@ def test_simple_name_optimization(): assert r.normalizations is None r = ens_process( - 'abc123.eth', + "abc123.eth", do_normalize=False, do_beautify=True, do_tokenize=False, @@ -467,7 +507,7 @@ def test_simple_name_optimization(): do_cure=False, ) assert r.normalized is None - assert r.beautified == 'abc123.eth' + assert r.beautified == "abc123.eth" assert r.tokens is None assert r.cured is None assert r.cures is None @@ -475,7 +515,7 @@ def test_simple_name_optimization(): assert r.normalizations is None r = ens_process( - 'abc123.eth', + "abc123.eth", do_normalize=False, do_beautify=False, do_tokenize=True, @@ -491,7 +531,7 @@ def test_simple_name_optimization(): assert r.normalizations is None r = ens_process( - 'abc123.eth', + "abc123.eth", do_normalize=False, do_beautify=False, do_tokenize=False, @@ -509,7 +549,7 @@ def test_simple_name_optimization(): assert len(r.normalizations) == 0 r = ens_process( - 'abc123.eth', + "abc123.eth", do_normalize=False, do_beautify=False, do_tokenize=False, @@ -519,7 +559,7 @@ def test_simple_name_optimization(): assert r.normalized is None assert r.beautified is None assert r.tokens is None - assert r.cured == 'abc123.eth' + assert r.cured == "abc123.eth" assert r.cures is not None assert len(r.cures) == 0 assert r.error is None diff --git a/tools/updater/update_ens.py b/tools/updater/update_ens.py index 07aa029..ff6a6a3 100644 --- a/tools/updater/update_ens.py +++ b/tools/updater/update_ens.py @@ -5,15 +5,17 @@ from ens_normalize.normalization import NormalizationData -SPEC_JSON_PATH = os.path.join(os.path.dirname(__file__), 'spec.json') -SPEC_PICKLE_PATH = os.path.join(os.path.dirname(__file__), '..', '..', 'ens_normalize', 'spec.pickle') +SPEC_JSON_PATH = os.path.join(os.path.dirname(__file__), "spec.json") +SPEC_PICKLE_PATH = os.path.join( + os.path.dirname(__file__), "..", "..", "ens_normalize", "spec.pickle" +) INDEX_JS_PATH = os.path.join( os.path.dirname(__file__), - 'node_modules', - '@adraffy', - 'ens-normalize', - 'dist', - 'index.mjs', + "node_modules", + "@adraffy", + "ens-normalize", + "dist", + "index.mjs", ) @@ -22,23 +24,23 @@ def generate_pickle(): # Python >= 3.8 is required for protocol 5 buf = pickle.dumps(data, protocol=5) buf = pickletools.optimize(buf) - with open(SPEC_PICKLE_PATH, 'wb') as f: + with open(SPEC_PICKLE_PATH, "wb") as f: f.write(buf) def add_whole_map_export(): - with open(INDEX_JS_PATH, encoding='utf-8') as f: + with open(INDEX_JS_PATH, encoding="utf-8") as f: content = f.read() - content += '\n\n// added by update_ens.py\ninit();\nexport {WHOLE_MAP};\n' + content += "\n\n// added by update_ens.py\ninit();\nexport {WHOLE_MAP};\n" - with open(INDEX_JS_PATH, 'w', encoding='utf-8') as f: + with open(INDEX_JS_PATH, "w", encoding="utf-8") as f: f.write(content) -if __name__ == '__main__': - shutil.rmtree('node_modules', ignore_errors=True) - os.system('npm install') +if __name__ == "__main__": + shutil.rmtree("node_modules", ignore_errors=True) + os.system("npm install") add_whole_map_export() - os.system('node update-ens.js') + os.system("node update-ens.js") generate_pickle()