Skip to content

Commit 0577277

Browse files
committed
ENH: Added downoading a dictionary from a URL.
1 parent b8508ed commit 0577277

File tree

6 files changed

+81
-10
lines changed

6 files changed

+81
-10
lines changed

comment_spell_check/comment_spell_check.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,9 @@ def spell_check_comment(
222222
prefixes = prefixes or []
223223
error_word = remove_prefix(error_word, prefixes)
224224

225-
if len(error_word) == 0 or error_word in spell or error_word.lower() in spell:
225+
if len(error_word) == 0:
226+
continue
227+
if error_word in spell or error_word.lower() in spell:
226228
continue
227229

228230
# Try splitting camel case words and checking each sub-word
@@ -322,10 +324,7 @@ def build_dictionary_list(args):
322324
if not isinstance(args.dict, list):
323325
return dict_list
324326

325-
for d in args.dict:
326-
dpath = Path(d)
327-
if dpath.exists():
328-
dict_list.append(dpath)
327+
dict_list.extend(args.dict)
329328

330329
return dict_list
331330

@@ -362,7 +361,8 @@ def output_results(args, bad_words):
362361
print(f"vim +{line_num} {found_file}", file=sys.stderr)
363362
else:
364363
print(
365-
f"file: {found_file:30} line: {line_num:3d} word: {misspelled_word}",
364+
f"file: {found_file:30} line: {line_num:3d} ",
365+
f"word: {misspelled_word}",
366366
file=sys.stderr,
367367
)
368368

@@ -490,6 +490,7 @@ def comment_spell_check(args):
490490

491491

492492
def main():
493+
"""Main function to run the spell checker."""
493494
args = parseargs.parse_args()
494495
comment_spell_check(args)
495496

comment_spell_check/utils/create_checker.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
additional dictionaries if provided.
33
"""
44

5+
import pathlib
56
import logging
67
import importlib.resources
78
import spellchecker
9+
import requests
810

911

1012
def create_checker(dict_list: list[str] = None) -> spellchecker.SpellChecker:
@@ -22,13 +24,44 @@ def create_checker(dict_list: list[str] = None) -> spellchecker.SpellChecker:
2224
english_dict = str(lib_path) + "/resources/en.json.gz"
2325
logger.info("Loading English dictionary from: %s", english_dict)
2426
checker.word_frequency.load_dictionary(english_dict)
27+
logger.info("# of words: %d", checker.word_frequency.unique_words)
2528

2629
# load the additional dictionaries
2730
if not isinstance(dict_list, list):
2831
return checker
2932
if len(dict_list) > 0:
3033
for d in dict_list:
31-
logger.info("Loading additional dictionary from: %s", d)
32-
checker.word_frequency.load_text_file(d)
34+
if isinstance(d, pathlib.PosixPath):
35+
# local file path
36+
try:
37+
checker.word_frequency.load_text_file(d)
38+
logger.info("Loading dictionary: %s", d)
39+
except IsADirectoryError:
40+
# if a directory is provided, load all text files in it
41+
for file in d.glob("*.txt"):
42+
try:
43+
checker.word_frequency.load_text_file(file)
44+
logger.info("Loading dictionary: %s", file)
45+
except FileNotFoundError:
46+
logger.error("File not found: %s", file)
47+
continue
48+
else:
49+
# load dictionary from URL
50+
try:
51+
response = requests.get(d)
52+
response.raise_for_status()
53+
checker.word_frequency.load_text(response.text)
54+
logger.info("Loading dictionary URL: %s", d)
55+
except requests.exceptions.MissingSchema:
56+
# URL didn't work so assume it's a local file path
57+
try:
58+
checker.word_frequency.load_text_file(d)
59+
logger.info("Loading dictionary: %s", d)
60+
except FileNotFoundError:
61+
logger.error("File not found: %s", d)
62+
continue
63+
except requests.exceptions.RequestException as e:
64+
logger.error("Error loading dictionary from URL %s: %s", d, e)
65+
logger.info("# of words: %d", checker.word_frequency.unique_words)
3366

3467
return checker

comment_spell_check/utils/parseargs.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""command line argument parser for comment_spell_check."""
2+
13
import argparse
24
from importlib.metadata import version, PackageNotFoundError
35

@@ -11,6 +13,7 @@
1113

1214

1315
def create_parser():
16+
"""Create an argument parser for the command-line interface."""
1417
parser = argparse.ArgumentParser()
1518

1619
parser.add_argument("filenames", nargs="*")
@@ -60,7 +63,8 @@ def create_parser():
6063
dest="dict",
6164
help="File that contains words that will be ignored."
6265
" Argument can be passed multiple times."
63-
" File must contain 1 word per line.",
66+
" File must contain 1 word per line."
67+
" Argument can also be a URL to a text file with words.",
6468
)
6569

6670
parser.add_argument(

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
comment_parser
22
pyspellchecker
33
bibtexparser
4+
requests

tests/test_comment_spell_check.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""Test suite for the comment_spell_check command line tool."""
2+
13
# ==========================================================================
24
#
35
# Copyright NumFOCUS
@@ -21,9 +23,12 @@
2123

2224

2325
class TestCommentSpellCheck(unittest.TestCase):
26+
"""Test class for comment_spell_check command line tool."""
27+
2428
@classmethod
25-
def setUpClass(self):
29+
def setUpClass(cls):
2630
"""Setting up comment_spell_check tests"""
31+
return cls()
2732

2833
@classmethod
2934
def tearDownClass(cls):
@@ -43,6 +48,7 @@ def test_basic(self):
4348
],
4449
cwd="comment_spell_check",
4550
stdout=subprocess.PIPE,
51+
check=False,
4652
)
4753
self.assertEqual(runresult.returncode, 0, runresult.stdout)
4854

@@ -62,6 +68,7 @@ def test_codebase(self):
6268
],
6369
cwd="comment_spell_check",
6470
stdout=subprocess.PIPE,
71+
check=False,
6572
)
6673
self.assertEqual(runresult.returncode, 0, runresult.stdout)
6774

@@ -74,6 +81,7 @@ def test_version(self):
7481
],
7582
cwd="comment_spell_check",
7683
stdout=subprocess.PIPE,
84+
check=False,
7785
)
7886
self.assertEqual(runresult.returncode, 0)
7987

@@ -93,6 +101,26 @@ def test_bibtex(self):
93101
],
94102
cwd="comment_spell_check",
95103
stdout=subprocess.PIPE,
104+
check=False,
105+
)
106+
self.assertEqual(runresult.returncode, 0, runresult.stdout)
107+
108+
def test_url(self):
109+
"""URL test"""
110+
url = (
111+
"https://raw.githubusercontent.com/SimpleITK/SimpleITK/"
112+
"refs/heads/master/.github/workflows/additional_dictionary.txt"
113+
)
114+
runresult = subprocess.run(
115+
[
116+
"comment_spell_check",
117+
"--dict",
118+
url,
119+
"../tests/urltest.py",
120+
],
121+
cwd="comment_spell_check",
122+
stdout=subprocess.PIPE,
123+
check=False,
96124
)
97125
self.assertEqual(runresult.returncode, 0, runresult.stdout)
98126

tests/urltest.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# visualstudio. This word is in the SimpleITK dictionary but not the
2+
# local one.
3+
#
4+
print("Hi Mom!")

0 commit comments

Comments
 (0)