diff --git a/comment_spell_check/comment_spell_check.py b/comment_spell_check/comment_spell_check.py index 64b9a12..adf0445 100755 --- a/comment_spell_check/comment_spell_check.py +++ b/comment_spell_check/comment_spell_check.py @@ -222,7 +222,9 @@ def spell_check_comment( prefixes = prefixes or [] error_word = remove_prefix(error_word, prefixes) - if len(error_word) == 0 or error_word in spell or error_word.lower() in spell: + if not error_word: + continue + if error_word in spell or error_word.lower() in spell: continue # Try splitting camel case words and checking each sub-word @@ -322,10 +324,7 @@ def build_dictionary_list(args): if not isinstance(args.dict, list): return dict_list - for d in args.dict: - dpath = Path(d) - if dpath.exists(): - dict_list.append(dpath) + dict_list.extend(args.dict) return dict_list @@ -362,7 +361,8 @@ def output_results(args, bad_words): print(f"vim +{line_num} {found_file}", file=sys.stderr) else: print( - f"file: {found_file:30} line: {line_num:3d} word: {misspelled_word}", + f"file: {found_file:30} line: {line_num:3d} ", + f"word: {misspelled_word}", file=sys.stderr, ) @@ -490,6 +490,7 @@ def comment_spell_check(args): def main(): + """Parse the command line arguments and call the spell checking function.""" args = parseargs.parse_args() comment_spell_check(args) diff --git a/comment_spell_check/utils/create_checker.py b/comment_spell_check/utils/create_checker.py index 3111e85..b6d51fc 100644 --- a/comment_spell_check/utils/create_checker.py +++ b/comment_spell_check/utils/create_checker.py @@ -5,6 +5,7 @@ import logging import importlib.resources import spellchecker +import requests def create_checker(dict_list: list[str] = None) -> spellchecker.SpellChecker: @@ -20,15 +21,35 @@ def create_checker(dict_list: list[str] = None) -> spellchecker.SpellChecker: # load the English dictionary lib_path = importlib.resources.files(spellchecker) english_dict = str(lib_path) + "/resources/en.json.gz" - logger.info("Loading English dictionary from: %s", english_dict) checker.word_frequency.load_dictionary(english_dict) + logger.info("Loaded %s", english_dict) + logger.info("%d words", checker.word_frequency.unique_words) # load the additional dictionaries - if not isinstance(dict_list, list): + if not isinstance(dict_list, list) or not dict_list: return checker - if len(dict_list) > 0: - for d in dict_list: - logger.info("Loading additional dictionary from: %s", d) - checker.word_frequency.load_text_file(d) + + for d in dict_list: + + # load dictionary from URL + try: + response = requests.get(d) + response.raise_for_status() + checker.word_frequency.load_text(response.text) + + except requests.exceptions.MissingSchema: + # URL didn't work so assume it's a local file path + try: + checker.word_frequency.load_text_file(d) + except IOError: + logger.error("Error loading %s", d) + continue + + except requests.exceptions.RequestException as e: + logger.error("Error loading dictionary from URL %s: %s", d, e) + continue + + logger.info("Loaded %s", d) + logger.info("%d words", checker.word_frequency.unique_words) return checker diff --git a/comment_spell_check/utils/parseargs.py b/comment_spell_check/utils/parseargs.py index 8b84dd1..4df319f 100644 --- a/comment_spell_check/utils/parseargs.py +++ b/comment_spell_check/utils/parseargs.py @@ -1,3 +1,5 @@ +"""command line argument parser for comment_spell_check.""" + import argparse from importlib.metadata import version, PackageNotFoundError @@ -11,6 +13,7 @@ def create_parser(): + """Create an argument parser for the command-line interface.""" parser = argparse.ArgumentParser() parser.add_argument("filenames", nargs="*") @@ -60,7 +63,8 @@ def create_parser(): dest="dict", help="File that contains words that will be ignored." " Argument can be passed multiple times." - " File must contain 1 word per line.", + " File must contain 1 word per line." + " Argument can also be a URL to a text file with words.", ) parser.add_argument( diff --git a/requirements.txt b/requirements.txt index 552927d..c10f7a2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ comment_parser pyspellchecker bibtexparser +requests diff --git a/tests/test_comment_spell_check.py b/tests/test_comment_spell_check.py index b745b2b..67ceec5 100644 --- a/tests/test_comment_spell_check.py +++ b/tests/test_comment_spell_check.py @@ -1,3 +1,5 @@ +"""Test suite for the comment_spell_check command line tool.""" + # ========================================================================== # # Copyright NumFOCUS @@ -21,9 +23,12 @@ class TestCommentSpellCheck(unittest.TestCase): + """Test class for comment_spell_check command line tool.""" + @classmethod - def setUpClass(self): + def setUpClass(cls): """Setting up comment_spell_check tests""" + return cls() @classmethod def tearDownClass(cls): @@ -43,6 +48,7 @@ def test_basic(self): ], cwd="comment_spell_check", stdout=subprocess.PIPE, + check=False, ) self.assertEqual(runresult.returncode, 0, runresult.stdout) @@ -62,6 +68,7 @@ def test_codebase(self): ], cwd="comment_spell_check", stdout=subprocess.PIPE, + check=False, ) self.assertEqual(runresult.returncode, 0, runresult.stdout) @@ -74,6 +81,7 @@ def test_version(self): ], cwd="comment_spell_check", stdout=subprocess.PIPE, + check=False, ) self.assertEqual(runresult.returncode, 0) @@ -93,6 +101,26 @@ def test_bibtex(self): ], cwd="comment_spell_check", stdout=subprocess.PIPE, + check=False, + ) + self.assertEqual(runresult.returncode, 0, runresult.stdout) + + def test_url(self): + """URL test""" + url = ( + "https://raw.githubusercontent.com/SimpleITK/SimpleITK/" + "refs/heads/master/.github/workflows/additional_dictionary.txt" + ) + runresult = subprocess.run( + [ + "comment_spell_check", + "--dict", + url, + "../tests/urltest.py", + ], + cwd="comment_spell_check", + stdout=subprocess.PIPE, + check=False, ) self.assertEqual(runresult.returncode, 0, runresult.stdout) diff --git a/tests/urltest.py b/tests/urltest.py new file mode 100644 index 0000000..d30be5e --- /dev/null +++ b/tests/urltest.py @@ -0,0 +1,4 @@ +# visualstudio. This word is in the SimpleITK dictionary but not the +# local one. +# +print("Hi Mom!")