Skip to content

Commit

Permalink
Refactor: Move some code to new files for reuse (#3434)
Browse files Browse the repository at this point in the history
  • Loading branch information
nthykier authored May 28, 2024
1 parent a31bdc4 commit e89fd5b
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 62 deletions.
65 changes: 3 additions & 62 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
Tuple,
)

from ._spellchecker import Misspelling, build_dict
from ._text_util import fix_case

# autogenerated by setuptools_scm
from ._version import ( # type: ignore[import-not-found]
__version__ as VERSION, # noqa: N812
Expand All @@ -52,9 +55,6 @@
"(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|"
"\\b[\\w.%+-]+@[\\w.-]+\\b)"
)
# Pass all misspellings through this translation table to generate
# alternative misspellings and fixes.
alt_chars = (("'", "’"),) # noqa: RUF001
inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P<words>[\w,]*))?")
USAGE = """
\t%prog [OPTIONS] [file1 file2 ... fileN]
Expand Down Expand Up @@ -167,13 +167,6 @@ def match(self, filename: str) -> bool:
return any(fnmatch.fnmatch(filename, p) for p in self.pattern_list)


class Misspelling:
def __init__(self, data: str, fix: bool, reason: str) -> None:
self.data = data
self.fix = fix
self.reason = reason


class TermColors:
def __init__(self) -> None:
self.FILE = "\033[33m"
Expand Down Expand Up @@ -703,48 +696,6 @@ def build_ignore_words(
)


def add_misspelling(
key: str,
data: str,
misspellings: Dict[str, Misspelling],
) -> None:
data = data.strip()

if "," in data:
fix = False
data, reason = data.rsplit(",", 1)
reason = reason.lstrip()
else:
fix = True
reason = ""

misspellings[key] = Misspelling(data, fix, reason)


def build_dict(
filename: str,
misspellings: Dict[str, Misspelling],
ignore_words: Set[str],
) -> None:
with open(filename, encoding="utf-8") as f:
translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars]
for line in f:
[key, data] = line.split("->")
# TODO: For now, convert both to lower.
# Someday we can maybe add support for fixing caps.
key = key.lower()
data = data.lower()
if key not in ignore_words:
add_misspelling(key, data, misspellings)
# generate alternative misspellings/fixes
for x, table in translate_tables:
if x in key:
alt_key = key.translate(table)
alt_data = data.translate(table)
if alt_key not in ignore_words:
add_misspelling(alt_key, alt_data, misspellings)


def is_hidden(filename: str, check_hidden: bool) -> bool:
bfilename = os.path.basename(filename)

Expand All @@ -759,16 +710,6 @@ def is_text_file(filename: str) -> bool:
return b"\x00" not in s


def fix_case(word: str, fixword: str) -> str:
if word == word.capitalize():
return ", ".join(w.strip().capitalize() for w in fixword.split(","))
if word == word.upper():
return fixword.upper()
# they are both lower case
# or we don't have any idea
return fixword


def ask_for_word_fix(
line: str,
match: Match[str],
Expand Down
75 changes: 75 additions & 0 deletions codespell_lib/_spellchecker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see
# https://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
"""
Copyright (C) 2010-2011 Lucas De Marchi <[email protected]>
Copyright (C) 2011 ProFUSION embedded systems
"""

from typing import (
Dict,
Set,
)

# Pass all misspellings through this translation table to generate
# alternative misspellings and fixes.
alt_chars = (("'", "’"),) # noqa: RUF001


class Misspelling:
def __init__(self, data: str, fix: bool, reason: str) -> None:
self.data = data
self.fix = fix
self.reason = reason


def add_misspelling(
key: str,
data: str,
misspellings: Dict[str, Misspelling],
) -> None:
data = data.strip()

if "," in data:
fix = False
data, reason = data.rsplit(",", 1)
reason = reason.lstrip()
else:
fix = True
reason = ""

misspellings[key] = Misspelling(data, fix, reason)


def build_dict(
filename: str,
misspellings: Dict[str, Misspelling],
ignore_words: Set[str],
) -> None:
with open(filename, encoding="utf-8") as f:
translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars]
for line in f:
[key, data] = line.split("->")
# TODO: For now, convert both to lower.
# Someday we can maybe add support for fixing caps.
key = key.lower()
data = data.lower()
if key not in ignore_words:
add_misspelling(key, data, misspellings)
# generate alternative misspellings/fixes
for x, table in translate_tables:
if x in key:
alt_key = key.translate(table)
alt_data = data.translate(table)
if alt_key not in ignore_words:
add_misspelling(alt_key, alt_data, misspellings)
27 changes: 27 additions & 0 deletions codespell_lib/_text_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see
# https://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
"""
Copyright (C) 2010-2011 Lucas De Marchi <[email protected]>
Copyright (C) 2011 ProFUSION embedded systems
"""


def fix_case(word: str, fixword: str) -> str:
if word == word.capitalize():
return ", ".join(w.strip().capitalize() for w in fixword.split(","))
if word == word.upper():
return fixword.upper()
# they are both lower case
# or we don't have any idea
return fixword

0 comments on commit e89fd5b

Please sign in to comment.