Skip to content

ENH: Added downoading a dictionary from a URL. #65

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions comment_spell_check/comment_spell_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,9 @@ def spell_check_comment(
prefixes = prefixes or []
error_word = remove_prefix(error_word, prefixes)

if len(error_word) == 0 or error_word in spell or error_word.lower() in spell:
if not error_word:
continue
if error_word in spell or error_word.lower() in spell:
continue

# Try splitting camel case words and checking each sub-word
Expand Down Expand Up @@ -322,10 +324,7 @@ def build_dictionary_list(args):
if not isinstance(args.dict, list):
return dict_list

for d in args.dict:
dpath = Path(d)
if dpath.exists():
dict_list.append(dpath)
dict_list.extend(args.dict)

return dict_list

Expand Down Expand Up @@ -362,7 +361,8 @@ def output_results(args, bad_words):
print(f"vim +{line_num} {found_file}", file=sys.stderr)
else:
print(
f"file: {found_file:30} line: {line_num:3d} word: {misspelled_word}",
f"file: {found_file:30} line: {line_num:3d} ",
f"word: {misspelled_word}",
file=sys.stderr,
)

Expand Down Expand Up @@ -490,6 +490,7 @@ def comment_spell_check(args):


def main():
"""Parse the command line arguments and call the spell checking function."""
args = parseargs.parse_args()
comment_spell_check(args)

Expand Down
33 changes: 27 additions & 6 deletions comment_spell_check/utils/create_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
import importlib.resources
import spellchecker
import requests


def create_checker(dict_list: list[str] = None) -> spellchecker.SpellChecker:
Expand All @@ -20,15 +21,35 @@ def create_checker(dict_list: list[str] = None) -> spellchecker.SpellChecker:
# load the English dictionary
lib_path = importlib.resources.files(spellchecker)
english_dict = str(lib_path) + "/resources/en.json.gz"
logger.info("Loading English dictionary from: %s", english_dict)
checker.word_frequency.load_dictionary(english_dict)
logger.info("Loaded %s", english_dict)
logger.info("%d words", checker.word_frequency.unique_words)

# load the additional dictionaries
if not isinstance(dict_list, list):
if not isinstance(dict_list, list) or not dict_list:
return checker
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would do:

if dict_list is None:
   return checker

I don't think the if Len below is needed.

if len(dict_list) > 0:
for d in dict_list:
logger.info("Loading additional dictionary from: %s", d)
checker.word_frequency.load_text_file(d)

for d in dict_list:

# load dictionary from URL
try:
response = requests.get(d)
response.raise_for_status()
checker.word_frequency.load_text(response.text)

except requests.exceptions.MissingSchema:
# URL didn't work so assume it's a local file path
try:
checker.word_frequency.load_text_file(d)
except IOError:
logger.error("Error loading %s", d)
continue

except requests.exceptions.RequestException as e:
logger.error("Error loading dictionary from URL %s: %s", d, e)
continue

logger.info("Loaded %s", d)
logger.info("%d words", checker.word_frequency.unique_words)

return checker
6 changes: 5 additions & 1 deletion comment_spell_check/utils/parseargs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""command line argument parser for comment_spell_check."""

import argparse
from importlib.metadata import version, PackageNotFoundError

Expand All @@ -11,6 +13,7 @@


def create_parser():
"""Create an argument parser for the command-line interface."""
parser = argparse.ArgumentParser()

parser.add_argument("filenames", nargs="*")
Expand Down Expand Up @@ -60,7 +63,8 @@ def create_parser():
dest="dict",
help="File that contains words that will be ignored."
" Argument can be passed multiple times."
" File must contain 1 word per line.",
" File must contain 1 word per line."
" Argument can also be a URL to a text file with words.",
)

parser.add_argument(
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
comment_parser
pyspellchecker
bibtexparser
requests
30 changes: 29 additions & 1 deletion tests/test_comment_spell_check.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Test suite for the comment_spell_check command line tool."""

# ==========================================================================
#
# Copyright NumFOCUS
Expand All @@ -21,9 +23,12 @@


class TestCommentSpellCheck(unittest.TestCase):
"""Test class for comment_spell_check command line tool."""

@classmethod
def setUpClass(self):
def setUpClass(cls):
"""Setting up comment_spell_check tests"""
return cls()

@classmethod
def tearDownClass(cls):
Expand All @@ -43,6 +48,7 @@ def test_basic(self):
],
cwd="comment_spell_check",
stdout=subprocess.PIPE,
check=False,
)
self.assertEqual(runresult.returncode, 0, runresult.stdout)

Expand All @@ -62,6 +68,7 @@ def test_codebase(self):
],
cwd="comment_spell_check",
stdout=subprocess.PIPE,
check=False,
)
self.assertEqual(runresult.returncode, 0, runresult.stdout)

Expand All @@ -74,6 +81,7 @@ def test_version(self):
],
cwd="comment_spell_check",
stdout=subprocess.PIPE,
check=False,
)
self.assertEqual(runresult.returncode, 0)

Expand All @@ -93,6 +101,26 @@ def test_bibtex(self):
],
cwd="comment_spell_check",
stdout=subprocess.PIPE,
check=False,
)
self.assertEqual(runresult.returncode, 0, runresult.stdout)

def test_url(self):
"""URL test"""
url = (
"https://raw.githubusercontent.com/SimpleITK/SimpleITK/"
"refs/heads/master/.github/workflows/additional_dictionary.txt"
)
runresult = subprocess.run(
[
"comment_spell_check",
"--dict",
url,
"../tests/urltest.py",
],
cwd="comment_spell_check",
stdout=subprocess.PIPE,
check=False,
)
self.assertEqual(runresult.returncode, 0, runresult.stdout)

Expand Down
4 changes: 4 additions & 0 deletions tests/urltest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# visualstudio. This word is in the SimpleITK dictionary but not the
# local one.
#
print("Hi Mom!")