diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 64db267776..5598e6b11d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -58,7 +58,7 @@ repos:
           - -d
           - "{extends: relaxed, rules: {line-length: {max: 90}}}"
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.4.4
+    rev: v0.4.5
     hooks:
       - id: ruff
       - id: ruff-format
@@ -68,7 +68,7 @@ repos:
       - id: prettier
         types_or: [yaml, markdown, html, css, scss, javascript, json]
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.6
+    rev: v2.3.0
     hooks:
       - id: codespell
         args: [--toml, pyproject-codespell.precommit-toml]
diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
index 62a51b75b3..2ea9764e63 100644
--- a/codespell_lib/_codespell.py
+++ b/codespell_lib/_codespell.py
@@ -39,6 +39,9 @@
     Tuple,
 )
 
+from ._spellchecker import DetectedMisspelling, LineTokenizer, Spellchecker
+from ._text_util import fix_case
+
 # autogenerated by setuptools_scm
 from ._version import (  # type: ignore[import-not-found]
     __version__ as VERSION,  # noqa: N812
@@ -52,10 +55,6 @@
     "(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|"
     "\\b[\\w.%+-]+@[\\w.-]+\\b)"
 )
-# Pass all misspellings through this translation table to generate
-# alternative misspellings and fixes.
-alt_chars = (("'", "’"),)  # noqa: RUF001
-inline_ignore_regex = re.compile(r"[^\w\s]\s?codespell:ignore\b(\s+(?P<words>[\w,]*))?")
 USAGE = """
 \t%prog [OPTIONS] [file1 file2 ... fileN]
 """
@@ -167,13 +166,6 @@ def match(self, filename: str) -> bool:
         return any(fnmatch.fnmatch(filename, p) for p in self.pattern_list)
 
 
-class Misspelling:
-    def __init__(self, data: str, fix: bool, reason: str) -> None:
-        self.data = data
-        self.fix = fix
-        self.reason = reason
-
-
 class TermColors:
     def __init__(self) -> None:
         self.FILE = "\033[33m"
@@ -703,48 +695,6 @@ def build_ignore_words(
         )
 
 
-def add_misspelling(
-    key: str,
-    data: str,
-    misspellings: Dict[str, Misspelling],
-) -> None:
-    data = data.strip()
-
-    if "," in data:
-        fix = False
-        data, reason = data.rsplit(",", 1)
-        reason = reason.lstrip()
-    else:
-        fix = True
-        reason = ""
-
-    misspellings[key] = Misspelling(data, fix, reason)
-
-
-def build_dict(
-    filename: str,
-    misspellings: Dict[str, Misspelling],
-    ignore_words: Set[str],
-) -> None:
-    with open(filename, encoding="utf-8") as f:
-        translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars]
-        for line in f:
-            [key, data] = line.split("->")
-            # TODO: For now, convert both to lower.
-            #       Someday we can maybe add support for fixing caps.
-            key = key.lower()
-            data = data.lower()
-            if key not in ignore_words:
-                add_misspelling(key, data, misspellings)
-            # generate alternative misspellings/fixes
-            for x, table in translate_tables:
-                if x in key:
-                    alt_key = key.translate(table)
-                    alt_data = data.translate(table)
-                    if alt_key not in ignore_words:
-                        add_misspelling(alt_key, alt_data, misspellings)
-
-
 def is_hidden(filename: str, check_hidden: bool) -> bool:
     bfilename = os.path.basename(filename)
 
@@ -759,26 +709,18 @@ def is_text_file(filename: str) -> bool:
     return b"\x00" not in s
 
 
-def fix_case(word: str, fixword: str) -> str:
-    if word == word.capitalize():
-        return ", ".join(w.strip().capitalize() for w in fixword.split(","))
-    if word == word.upper():
-        return fixword.upper()
-    # they are both lower case
-    # or we don't have any idea
-    return fixword
-
-
 def ask_for_word_fix(
     line: str,
-    match: Match[str],
-    misspelling: Misspelling,
+    issue: "DetectedMisspelling[re.Match[str]]",
     interactivity: int,
     colors: TermColors,
-) -> Tuple[bool, str]:
-    wrongword = match.group()
+) -> Tuple[bool, Sequence[str]]:
+    wrongword = issue.word
+    misspelling = issue.misspelling
     if interactivity <= 0:
-        return misspelling.fix, fix_case(wrongword, misspelling.data)
+        return misspelling.fix, fix_case(wrongword, misspelling.candidates)
+
+    match = issue.token
 
     line_ui = (
         f"{line[:match.start()]}"
@@ -788,7 +730,8 @@ def ask_for_word_fix(
 
     if misspelling.fix and interactivity & 1:
         r = ""
-        fixword = fix_case(wrongword, misspelling.data)
+        candidates = fix_case(wrongword, misspelling.candidates)
+        fixword = candidates[0]
         while not r:
             print(f"{line_ui}\t{wrongword} ==> {fixword} (Y/n) ", end="", flush=True)
             r = sys.stdin.readline().strip().upper()
@@ -806,12 +749,12 @@ def ask_for_word_fix(
         # we ask the user which word to use
 
         r = ""
-        opt = [w.strip() for w in misspelling.data.split(",")]
+        opt = misspelling.candidates
         while not r:
             print(f"{line_ui} Choose an option (blank for none): ", end="")
-            for i, o in enumerate(opt):
-                fixword = fix_case(wrongword, o)
-                print(f" {i}) {fixword}", end="")
+            cased_candidates = fix_case(wrongword, opt)
+            for i, candidates in enumerate(cased_candidates):
+                print(f" {i}) {candidates}", end="")
             print(": ", end="", flush=True)
 
             n = sys.stdin.readline().strip()
@@ -826,9 +769,9 @@ def ask_for_word_fix(
 
         if r:
             misspelling.fix = True
-            misspelling.data = r
+            misspelling.candidates = (r,)
 
-    return misspelling.fix, fix_case(wrongword, misspelling.data)
+    return misspelling.fix, fix_case(wrongword, misspelling.candidates)
 
 
 def print_context(
@@ -888,12 +831,39 @@ def apply_uri_ignore_words(
     return check_matches
 
 
+def line_tokenizer_factory(
+    uri_ignore_words: Set[str],
+    uri_regex: Pattern[str],
+    word_regex: Pattern[str],
+    ignore_word_regex: Optional[Pattern[str]],
+) -> "LineTokenizer[re.Match[str]]":
+    def line_tokenizer(line: str) -> Iterable[Match[str]]:
+        # If all URI spelling errors will be ignored, erase any URI before
+        # extracting words. Otherwise, apply ignores after extracting words.
+        # This ensures that if a URI ignore word occurs both inside a URI and
+        # outside, it will still be a spelling error.
+        if "*" in uri_ignore_words:
+            line = uri_regex.sub(" ", line)
+        check_matches = extract_words_iter(line, word_regex, ignore_word_regex)
+        if "*" not in uri_ignore_words:
+            check_matches = apply_uri_ignore_words(
+                check_matches,
+                line,
+                word_regex,
+                ignore_word_regex,
+                uri_regex,
+                uri_ignore_words,
+            )
+        return check_matches
+
+    return line_tokenizer
+
+
 def parse_file(
     filename: str,
     colors: TermColors,
     summary: Optional[Summary],
-    misspellings: Dict[str, Misspelling],
-    ignore_words_cased: Set[str],
+    spellchecker: Spellchecker,
     exclude_lines: Set[str],
     file_opener: FileOpener,
     word_regex: Pattern[str],
@@ -914,22 +884,23 @@ def parse_file(
     else:
         if options.check_filenames:
             for word in extract_words(filename, word_regex, ignore_word_regex):
-                if word in ignore_words_cased:
+                if word in spellchecker.ignore_words_cased:
                     continue
                 lword = word.lower()
-                if lword not in misspellings:
+                misspelling = spellchecker.check_lower_cased_word(lword)
+                if misspelling is None:
                     continue
-                fix = misspellings[lword].fix
-                fixword = fix_case(word, misspellings[lword].data)
+                fix = misspelling.fix
+                candidates = fix_case(word, misspelling.candidates)
 
                 if summary and fix:
                     summary.update(lword)
 
                 cfilename = f"{colors.FILE}{filename}{colors.DISABLE}"
                 cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}"
-                crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}"
+                crightword = f"{colors.FWORD}{', '.join(candidates)}{colors.DISABLE}"
 
-                reason = misspellings[lword].reason
+                reason = misspelling.reason
                 if reason:
                     if options.quiet_level & QuietLevels.DISABLED_FIXES:
                         continue
@@ -964,127 +935,90 @@ def parse_file(
         except OSError:
             return bad_count
 
+    line_tokenizer = line_tokenizer_factory(
+        uri_ignore_words,
+        uri_regex,
+        word_regex,
+        ignore_word_regex,
+    )
+
     for i, line in enumerate(lines):
-        if line.rstrip() in exclude_lines:
+        line = line.rstrip()
+        if not line or line in exclude_lines:
             continue
 
-        extra_words_to_ignore = set()
-        match = inline_ignore_regex.search(line)
-        if match:
-            extra_words_to_ignore = set(
-                filter(None, (match.group("words") or "").split(","))
-            )
-            if not extra_words_to_ignore:
-                continue
-
         fixed_words = set()
         asked_for = set()
 
-        # If all URI spelling errors will be ignored, erase any URI before
-        # extracting words. Otherwise, apply ignores after extracting words.
-        # This ensures that if a URI ignore word occurs both inside a URI and
-        # outside, it will still be a spelling error.
-        if "*" in uri_ignore_words:
-            line = uri_regex.sub(" ", line)
-        check_matches = extract_words_iter(line, word_regex, ignore_word_regex)
-        if "*" not in uri_ignore_words:
-            check_matches = apply_uri_ignore_words(
-                check_matches,
-                line,
-                word_regex,
-                ignore_word_regex,
-                uri_regex,
-                uri_ignore_words,
-            )
-        for match in check_matches:
-            word = match.group()
-            if word in ignore_words_cased:
-                continue
-            lword = word.lower()
-            if lword in misspellings and lword not in extra_words_to_ignore:
-                # Sometimes we find a 'misspelling' which is actually a valid word
-                # preceded by a string escape sequence.  Ignore such cases as
-                # they're usually false alarms; see issue #17 among others.
-                char_before_idx = match.start() - 1
-                if (
-                    char_before_idx >= 0
-                    and line[char_before_idx] == "\\"
-                    # bell, backspace, formfeed, newline, carriage-return, tab, vtab.
-                    and word.startswith(("a", "b", "f", "n", "r", "t", "v"))
-                    and lword[1:] not in misspellings
-                ):
-                    continue
+        for issue in spellchecker.spellcheck_line(line, line_tokenizer):
+            misspelling = issue.misspelling
+            word = issue.word
+            lword = issue.lword
 
-                context_shown = False
-                fix = misspellings[lword].fix
-                fixword = fix_case(word, misspellings[lword].data)
-
-                if options.interactive and lword not in asked_for:
-                    if context is not None:
-                        context_shown = True
-                        print_context(lines, i, context)
-                    fix, fixword = ask_for_word_fix(
-                        lines[i],
-                        match,
-                        misspellings[lword],
-                        options.interactive,
-                        colors=colors,
-                    )
-                    asked_for.add(lword)
+            context_shown = False
+            fix = misspelling.fix
+            candidates = fix_case(word, misspelling.candidates)
 
-                if summary and fix:
-                    summary.update(lword)
+            if options.interactive and lword not in asked_for:
+                if context is not None:
+                    context_shown = True
+                    print_context(lines, i, context)
+                fix, candidates = ask_for_word_fix(
+                    lines[i],
+                    issue,
+                    options.interactive,
+                    colors=colors,
+                )
+                asked_for.add(lword)
 
-                if word in fixed_words:  # can skip because of re.sub below
-                    continue
+            if summary and fix:
+                summary.update(lword)
 
-                if options.write_changes and fix:
-                    changed = True
-                    lines[i] = re.sub(rf"\b{word}\b", fixword, lines[i])
-                    fixed_words.add(word)
-                    continue
+            if word in fixed_words:  # can skip because of re.sub below
+                continue
 
-                # otherwise warning was explicitly set by interactive mode
-                if (
-                    options.interactive & 2
-                    and not fix
-                    and not misspellings[lword].reason
-                ):
-                    continue
+            if options.write_changes and fix:
+                changed = True
+                lines[i] = re.sub(rf"\b{word}\b", candidates[0], lines[i])
+                fixed_words.add(word)
+                continue
 
-                cfilename = f"{colors.FILE}{filename}{colors.DISABLE}"
-                cline = f"{colors.FILE}{i + 1}{colors.DISABLE}"
-                cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}"
-                crightword = f"{colors.FWORD}{fixword}{colors.DISABLE}"
+            # otherwise warning was explicitly set by interactive mode
+            if options.interactive & 2 and not fix and not misspelling.reason:
+                continue
 
-                reason = misspellings[lword].reason
-                if reason:
-                    if options.quiet_level & QuietLevels.DISABLED_FIXES:
-                        continue
-                    creason = f"  | {colors.FILE}{reason}{colors.DISABLE}"
-                else:
-                    if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES:
-                        continue
-                    creason = ""
+            cfilename = f"{colors.FILE}{filename}{colors.DISABLE}"
+            cline = f"{colors.FILE}{i + 1}{colors.DISABLE}"
+            cwrongword = f"{colors.WWORD}{word}{colors.DISABLE}"
+            crightword = f"{colors.FWORD}{', '.join(candidates)}{colors.DISABLE}"
 
-                # If we get to this point (uncorrected error) we should change
-                # our bad_count and thus return value
-                bad_count += 1
+            reason = misspelling.reason
+            if reason:
+                if options.quiet_level & QuietLevels.DISABLED_FIXES:
+                    continue
+                creason = f"  | {colors.FILE}{reason}{colors.DISABLE}"
+            else:
+                if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES:
+                    continue
+                creason = ""
 
-                if (not context_shown) and (context is not None):
-                    print_context(lines, i, context)
-                if filename != "-":
-                    print(
-                        f"{cfilename}:{cline}: {cwrongword} "
-                        f"==> {crightword}{creason}"
-                    )
-                elif options.stdin_single_line:
-                    print(f"{cline}: {cwrongword} ==> {crightword}{creason}")
-                else:
-                    print(
-                        f"{cline}: {line.strip()}\n\t{cwrongword} "
-                        f"==> {crightword}{creason}"
-                    )
+            # If we get to this point (uncorrected error) we should change
+            # our bad_count and thus return value
+            bad_count += 1
+
+            if (not context_shown) and (context is not None):
+                print_context(lines, i, context)
+            if filename != "-":
+                print(
+                    f"{cfilename}:{cline}: {cwrongword} " f"==> {crightword}{creason}"
+                )
+            elif options.stdin_single_line:
+                print(f"{cline}: {cwrongword} ==> {crightword}{creason}")
+            else:
+                print(
+                    f"{cline}: {line.strip()}\n\t{cwrongword} "
+                    f"==> {crightword}{creason}"
+                )
 
     if changed:
         if filename == "-":
@@ -1229,9 +1163,10 @@ def main(*args: str) -> int:
                 parser.print_help()
                 return EX_USAGE
             use_dictionaries.append(dictionary)
-    misspellings: Dict[str, Misspelling] = {}
+    spellchecker = Spellchecker()
+    spellchecker.ignore_words_cased = ignore_words_cased
     for dictionary in use_dictionaries:
-        build_dict(dictionary, misspellings, ignore_words)
+        spellchecker.add_from_file(dictionary, ignore_words=ignore_words)
     colors = TermColors()
     if not options.colors:
         colors.disable()
@@ -1306,8 +1241,7 @@ def main(*args: str) -> int:
                         fname,
                         colors,
                         summary,
-                        misspellings,
-                        ignore_words_cased,
+                        spellchecker,
                         exclude_lines,
                         file_opener,
                         word_regex,
@@ -1331,8 +1265,7 @@ def main(*args: str) -> int:
                 filename,
                 colors,
                 summary,
-                misspellings,
-                ignore_words_cased,
+                spellchecker,
                 exclude_lines,
                 file_opener,
                 word_regex,
diff --git a/codespell_lib/_spellchecker.py b/codespell_lib/_spellchecker.py
new file mode 100644
index 0000000000..ac43074798
--- /dev/null
+++ b/codespell_lib/_spellchecker.py
@@ -0,0 +1,298 @@
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see
+# https://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
+"""
+Copyright (C) 2010-2011  Lucas De Marchi <lucas.de.marchi@gmail.com>
+Copyright (C) 2011  ProFUSION embedded systems
+"""
+
+import os
+import re
+from typing import (
+    Container,
+    Dict,
+    FrozenSet,
+    Generic,
+    Iterable,
+    Optional,
+    Protocol,
+    Sequence,
+    TypeVar,
+)
+
+# Pass all misspellings through this translation table to generate
+# alternative misspellings and fixes.
+alt_chars = (("'", "’"),)  # noqa: RUF001
+
+T_co = TypeVar("T_co", bound="Token", covariant=True)
+
+
+supported_languages_en = ("en", "en_GB", "en_US", "en_CA", "en_AU")
+supported_languages = supported_languages_en
+
+# Users might want to link this file into /usr/local/bin, so we resolve the
+# symbolic link path to the real path if necessary.
+_data_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data")
+_builtin_dictionaries = (
+    # name, desc, name, err in aspell, correction in aspell, \
+    # err dictionary array, rep dictionary array
+    # The arrays must contain the names of aspell dictionaries
+    # The aspell tests here aren't the ideal state, but the None's are
+    # realistic for obscure words
+    ("clear", "for unambiguous errors", "", False, None, supported_languages_en, None),
+    (
+        "rare",
+        "for rare (but valid) words that are likely to be errors",
+        "_rare",
+        None,
+        None,
+        None,
+        None,
+    ),
+    (
+        "informal",
+        "for making informal words more formal",
+        "_informal",
+        True,
+        True,
+        supported_languages_en,
+        supported_languages_en,
+    ),
+    (
+        "usage",
+        "for replacing phrasing with recommended terms",
+        "_usage",
+        None,
+        None,
+        None,
+        None,
+    ),
+    (
+        "code",
+        "for words from code and/or mathematics that are likely to be typos in other contexts (such as uint)",  # noqa: E501
+        "_code",
+        None,
+        None,
+        None,
+        None,
+    ),
+    (
+        "names",
+        "for valid proper names that might be typos",
+        "_names",
+        None,
+        None,
+        None,
+        None,
+    ),
+    (
+        "en-GB_to_en-US",
+        "for corrections from en-GB to en-US",
+        "_en-GB_to_en-US",
+        True,
+        True,
+        ("en_GB",),
+        ("en_US",),
+    ),
+)
+_builtin_default = "clear,rare"
+
+_builtin_default_as_tuple = tuple(_builtin_default.split(","))
+
+_codespell_ignore_tag = "codespell:ignore"
+_inline_ignore_regex = re.compile(
+    rf"[^\w\s]\s?{_codespell_ignore_tag}\b(\s+(?P<words>[\w,]*))?"
+)
+
+
+class UnknownBuiltinDictionaryError(ValueError):
+    def __init__(self, name: str) -> None:
+        super().__init__(f"Unknown built-in dictionary: {name}")
+
+
+class BuiltinDictionariesAlreadyLoadedError(TypeError):
+    def __init__(self) -> None:
+        super().__init__(
+            "load_builtin_dictionaries must not be called more than once",
+        )
+
+
+class LineTokenizer(Protocol[T_co]):
+    """Callable that splits a line into multiple tokens to be spellchecked
+
+    Generally, a regex will do for simple cases. A probably too simple one is:
+
+        >>> tokenizer = re.compile(r"[^ ]+").finditer
+
+    For more complex cases, either use more complex regexes or custom tokenization
+    code.
+    """
+
+    def __call__(self, line: str) -> Iterable[T_co]: ...
+
+
+class Token(Protocol):
+    """Describes a token
+
+    This is a protocol to support `re.Match[str]` (which codespell uses) and any
+    other tokenization method that our API consumers might be using.
+    """
+
+    def group(self) -> str: ...
+
+    def start(self) -> int: ...
+
+
+class Misspelling:
+    def __init__(self, candidates: Sequence[str], fix: bool, reason: str) -> None:
+        self.candidates = candidates
+        self.fix = fix
+        self.reason = reason
+
+
+class DetectedMisspelling(Generic[T_co]):
+    def __init__(
+        self,
+        word: str,
+        lword: str,
+        misspelling: Misspelling,
+        token: T_co,
+    ) -> None:
+        self.word = word
+        self.lword = lword
+        self.misspelling = misspelling
+        self.token = token
+
+
+class Spellchecker:
+    def __init__(self) -> None:
+        self._misspellings: Dict[str, Misspelling] = {}
+        self.ignore_words_cased: Container[str] = frozenset()
+
+    def _parse_inline_ignore(self, line: str) -> Optional[FrozenSet[str]]:
+        if _codespell_ignore_tag not in line:
+            return frozenset()
+        inline_ignore_match = _inline_ignore_regex.search(line)
+        if inline_ignore_match:
+            words = frozenset(
+                filter(None, (inline_ignore_match.group("words") or "").split(","))
+            )
+            return words if words else None
+        return frozenset()
+
+    def spellcheck_line(
+        self,
+        line: str,
+        tokenizer: LineTokenizer[T_co],
+        *,
+        respect_inline_ignore: bool = True,
+    ) -> Iterable[DetectedMisspelling[T_co]]:
+        """Tokenize and spellcheck a line
+
+        Split the line into tokens based using the provided tokenizer. See the doc
+        string for the class for an example.
+
+        :param line: The line to spellcheck.
+        :param tokenizer: A callable that will tokenize the line
+        :param respect_inline_ignore: Whether to check the line for
+           `codespell:ignore` instructions
+        :returns: An iterable of discovered typos.
+        """
+        misspellings = self._misspellings
+        ignore_words_cased = self.ignore_words_cased
+
+        extra_words_to_ignore = (
+            self._parse_inline_ignore(line) if respect_inline_ignore else frozenset()
+        )
+        if extra_words_to_ignore is None:
+            return
+
+        for token in tokenizer(line):
+            word = token.group()
+            if word in ignore_words_cased:
+                continue
+            lword = word.lower()
+            misspelling = misspellings.get(lword)
+            if misspelling is not None and lword not in extra_words_to_ignore:
+                # Sometimes we find a 'misspelling' which is actually a valid word
+                # preceded by a string escape sequence.  Ignore such cases as
+                # they're usually false alarms; see issue #17 among others.
+                char_before_idx = token.start() - 1
+                if (
+                    char_before_idx >= 0
+                    and line[char_before_idx] == "\\"
+                    # bell, backspace, formfeed, newline, carriage-return, tab, vtab.
+                    and word.startswith(("a", "b", "f", "n", "r", "t", "v"))
+                    and lword[1:] not in misspellings
+                ):
+                    continue
+                yield DetectedMisspelling(word, lword, misspelling, token)
+
+    def check_lower_cased_word(self, word: str) -> Optional[Misspelling]:
+        """Check a given word against the loaded dictionaries
+
+        :param word: The word to check. This should be all lower-case.
+        """
+        return self._misspellings.get(word)
+
+    def add_from_file(
+        self,
+        filename: str,
+        *,
+        ignore_words: Container[str] = frozenset(),
+    ) -> None:
+        """Parse a codespell dictionary
+
+        :param filename: The codespell dictionary file to parse
+        :param ignore_words: Words to ignore from this dictionary.
+        """
+        misspellings = self._misspellings
+        with open(filename, encoding="utf-8") as f:
+            translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars]
+            for line in f:
+                [key, data] = line.split("->")
+                # TODO: For now, convert both to lower.
+                #       Someday we can maybe add support for fixing caps.
+                key = key.lower()
+                data = data.lower()
+                if key not in ignore_words:
+                    _add_misspelling(key, data, misspellings)
+                # generate alternative misspellings/fixes
+                for x, table in translate_tables:
+                    if x in key:
+                        alt_key = key.translate(table)
+                        alt_data = data.translate(table)
+                        if alt_key not in ignore_words:
+                            _add_misspelling(alt_key, alt_data, misspellings)
+
+
+def _add_misspelling(
+    key: str,
+    data: str,
+    misspellings: Dict[str, Misspelling],
+) -> None:
+    data = data.strip()
+
+    if "," in data:
+        fix = False
+        data, reason = data.rsplit(",", 1)
+        reason = reason.lstrip()
+    else:
+        fix = True
+        reason = ""
+
+    misspellings[key] = Misspelling(
+        tuple(c.strip() for c in data.split(",")),
+        fix,
+        reason,
+    )
diff --git a/codespell_lib/_text_util.py b/codespell_lib/_text_util.py
new file mode 100644
index 0000000000..33e6d7e033
--- /dev/null
+++ b/codespell_lib/_text_util.py
@@ -0,0 +1,29 @@
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see
+# https://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
+"""
+Copyright (C) 2010-2011  Lucas De Marchi <lucas.de.marchi@gmail.com>
+Copyright (C) 2011  ProFUSION embedded systems
+"""
+
+from typing import Sequence
+
+
+def fix_case(word: str, candidates: Sequence[str]) -> Sequence[str]:
+    if word == word.capitalize():
+        return tuple(c.capitalize() for c in candidates)
+    if word == word.upper():
+        return tuple(c.upper() for c in candidates)
+    # they are both lower-case
+    # or we don't have any idea
+    return candidates