Skip to content

Commit e44ae2d

Browse files
committed
ENH: Added downoading a dictionary from a URL.
1 parent b8508ed commit e44ae2d

File tree

6 files changed

+42
-6
lines changed

6 files changed

+42
-6
lines changed

comment_spell_check/comment_spell_check.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -322,10 +322,7 @@ def build_dictionary_list(args):
322322
if not isinstance(args.dict, list):
323323
return dict_list
324324

325-
for d in args.dict:
326-
dpath = Path(d)
327-
if dpath.exists():
328-
dict_list.append(dpath)
325+
dict_list.extend(args.dict)
329326

330327
return dict_list
331328

comment_spell_check/utils/create_checker.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import logging
66
import importlib.resources
77
import spellchecker
8+
import requests
9+
import pathlib
810

911

1012
def create_checker(dict_list: list[str] = None) -> spellchecker.SpellChecker:
@@ -22,13 +24,26 @@ def create_checker(dict_list: list[str] = None) -> spellchecker.SpellChecker:
2224
english_dict = str(lib_path) + "/resources/en.json.gz"
2325
logger.info("Loading English dictionary from: %s", english_dict)
2426
checker.word_frequency.load_dictionary(english_dict)
27+
logger.info("number of words: %s", checker.word_frequency.unique_words)
2528

2629
# load the additional dictionaries
2730
if not isinstance(dict_list, list):
2831
return checker
2932
if len(dict_list) > 0:
3033
for d in dict_list:
3134
logger.info("Loading additional dictionary from: %s", d)
32-
checker.word_frequency.load_text_file(d)
35+
if isinstance(d, pathlib.PosixPath):
36+
# assume it's a local file path
37+
checker.word_frequency.load_text_file(d)
38+
else:
39+
# load dictionary from URL
40+
if d.startswith("http://") or d.startswith("https://"):
41+
response = requests.get(d)
42+
response.raise_for_status()
43+
checker.word_frequency.load_text(response.text)
44+
else:
45+
# assume it's a local file path
46+
checker.word_frequency.load_text_file(d)
47+
logger.info("# of words: %s", checker.word_frequency.unique_words)
3348

3449
return checker

comment_spell_check/utils/parseargs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ def create_parser():
6060
dest="dict",
6161
help="File that contains words that will be ignored."
6262
" Argument can be passed multiple times."
63-
" File must contain 1 word per line.",
63+
" File must contain 1 word per line."
64+
" Argument can also be a URL to a text file with words.",
6465
)
6566

6667
parser.add_argument(

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
comment_parser
22
pyspellchecker
33
bibtexparser
4+
requests

tests/test_comment_spell_check.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,24 @@ def test_bibtex(self):
9696
)
9797
self.assertEqual(runresult.returncode, 0, runresult.stdout)
9898

99+
def test_url(self):
100+
"""URL test"""
101+
url = (
102+
"https://raw.githubusercontent.com/SimpleITK/SimpleITK/"
103+
"refs/heads/master/.github/workflows/additional_dictionary.txt"
104+
)
105+
runresult = subprocess.run(
106+
[
107+
"comment_spell_check",
108+
"--dict",
109+
url,
110+
"../tests/urltest.py",
111+
],
112+
cwd="comment_spell_check",
113+
stdout=subprocess.PIPE,
114+
)
115+
self.assertEqual(runresult.returncode, 0, runresult.stdout)
116+
99117

100118
if __name__ == "__main__":
101119
unittest.main()

tests/urltest.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# visualstudio. This word is in the SimpleITK dictionary but not the
2+
# local one.
3+
#
4+
print("Hi Mom!")

0 commit comments

Comments
 (0)