ENH: Added downoading a dictionary from a URL.

dave3d · dave3d · commit a088484c76f0 · 2025-07-11T11:18:43.000-04:00
diff --git a/comment_spell_check/comment_spell_check.py b/comment_spell_check/comment_spell_check.py
@@ -222,7 +222,9 @@ def spell_check_comment(
         prefixes = prefixes or []
         error_word = remove_prefix(error_word, prefixes)
 
-        if len(error_word) == 0 or error_word in spell or error_word.lower() in spell:
+        if not error_word:
+            continue
+        if error_word in spell or error_word.lower() in spell:
             continue
 
         # Try splitting camel case words and checking each sub-word
@@ -322,10 +324,7 @@ def build_dictionary_list(args):
     if not isinstance(args.dict, list):
         return dict_list
 
-    for d in args.dict:
-        dpath = Path(d)
-        if dpath.exists():
-            dict_list.append(dpath)
+    dict_list.extend(args.dict)
 
     return dict_list
 
@@ -362,7 +361,8 @@ def output_results(args, bad_words):
             print(f"vim +{line_num} {found_file}", file=sys.stderr)
         else:
             print(
-                f"file: {found_file:30}  line: {line_num:3d}  word: {misspelled_word}",
+                f"file: {found_file:30}  line: {line_num:3d}  ",
+                f"word: {misspelled_word}",
                 file=sys.stderr,
             )
 
@@ -490,6 +490,7 @@ def comment_spell_check(args):
 
 
 def main():
+    """Parse the command line arguments and call the spell checking function."""
     args = parseargs.parse_args()
     comment_spell_check(args)
 
diff --git a/comment_spell_check/utils/create_checker.py b/comment_spell_check/utils/create_checker.py
@@ -5,6 +5,7 @@
 import logging
 import importlib.resources
 import spellchecker
+import requests
 
 
 def create_checker(dict_list: list[str] = None) -> spellchecker.SpellChecker:
@@ -20,15 +21,35 @@ def create_checker(dict_list: list[str] = None) -> spellchecker.SpellChecker:
     # load the English dictionary
     lib_path = importlib.resources.files(spellchecker)
     english_dict = str(lib_path) + "/resources/en.json.gz"
-    logger.info("Loading English dictionary from: %s", english_dict)
     checker.word_frequency.load_dictionary(english_dict)
+    logger.info("Loaded %s", english_dict)
+    logger.info("%d words", checker.word_frequency.unique_words)
 
     # load the additional dictionaries
-    if not isinstance(dict_list, list):
+    if not isinstance(dict_list, list) or not dict_list:
         return checker
-    if len(dict_list) > 0:
-        for d in dict_list:
-            logger.info("Loading additional dictionary from: %s", d)
-            checker.word_frequency.load_text_file(d)
+
+    for d in dict_list:
+
+        # load dictionary from URL
+        try:
+            response = requests.get(d)
+            response.raise_for_status()
+            checker.word_frequency.load_text(response.text)
+
+        except requests.exceptions.MissingSchema:
+            # URL didn't work so assume it's a local file path
+            try:
+                checker.word_frequency.load_text_file(d)
+            except IOError:
+                logger.error("Error loading %s", d)
+                continue
+
+        except requests.exceptions.RequestException as e:
+            logger.error("Error loading dictionary from URL %s: %s", d, e)
+            continue
+
+        logger.info("Loaded %s", d)
+        logger.info("%d words", checker.word_frequency.unique_words)
 
     return checker
diff --git a/comment_spell_check/utils/parseargs.py b/comment_spell_check/utils/parseargs.py
@@ -1,3 +1,5 @@
+"""command line argument parser for comment_spell_check."""
+
 import argparse
 from importlib.metadata import version, PackageNotFoundError
 
@@ -11,6 +13,7 @@
 
 
 def create_parser():
+    """Create an argument parser for the command-line interface."""
     parser = argparse.ArgumentParser()
 
     parser.add_argument("filenames", nargs="*")
@@ -60,7 +63,8 @@ def create_parser():
         dest="dict",
         help="File that contains words that will be ignored."
         " Argument can be passed multiple times."
-        " File must contain 1 word per line.",
+        " File must contain 1 word per line."
+        " Argument can also be a URL to a text file with words.",
     )
 
     parser.add_argument(
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 comment_parser
 pyspellchecker
 bibtexparser
+requests
diff --git a/tests/test_comment_spell_check.py b/tests/test_comment_spell_check.py
@@ -1,3 +1,5 @@
+"""Test suite for the comment_spell_check command line tool."""
+
 # ==========================================================================
 #
 #   Copyright NumFOCUS
@@ -21,9 +23,12 @@
 
 
 class TestCommentSpellCheck(unittest.TestCase):
+    """Test class for comment_spell_check command line tool."""
+
     @classmethod
-    def setUpClass(self):
+    def setUpClass(cls):
         """Setting up comment_spell_check tests"""
+        return cls()
 
     @classmethod
     def tearDownClass(cls):
@@ -43,6 +48,7 @@ def test_basic(self):
             ],
             cwd="comment_spell_check",
             stdout=subprocess.PIPE,
+            check=False,
         )
         self.assertEqual(runresult.returncode, 0, runresult.stdout)
 
@@ -62,6 +68,7 @@ def test_codebase(self):
             ],
             cwd="comment_spell_check",
             stdout=subprocess.PIPE,
+            check=False,
         )
         self.assertEqual(runresult.returncode, 0, runresult.stdout)
 
@@ -74,6 +81,7 @@ def test_version(self):
             ],
             cwd="comment_spell_check",
             stdout=subprocess.PIPE,
+            check=False,
         )
         self.assertEqual(runresult.returncode, 0)
 
@@ -93,6 +101,26 @@ def test_bibtex(self):
             ],
             cwd="comment_spell_check",
             stdout=subprocess.PIPE,
+            check=False,
+        )
+        self.assertEqual(runresult.returncode, 0, runresult.stdout)
+
+    def test_url(self):
+        """URL test"""
+        url = (
+            "https://raw.githubusercontent.com/SimpleITK/SimpleITK/"
+            "refs/heads/master/.github/workflows/additional_dictionary.txt"
+        )
+        runresult = subprocess.run(
+            [
+                "comment_spell_check",
+                "--dict",
+                url,
+                "../tests/urltest.py",
+            ],
+            cwd="comment_spell_check",
+            stdout=subprocess.PIPE,
+            check=False,
         )
         self.assertEqual(runresult.returncode, 0, runresult.stdout)
 
diff --git a/tests/urltest.py b/tests/urltest.py
@@ -0,0 +1,4 @@
+# visualstudio.  This word is in the SimpleITK dictionary but not the
+# local one.
+#
+print("Hi Mom!")