Skip to content

Commit ee2537a

Browse files
authored
Perform update in parallel (#110)
The update command implementation runs over files that are independent from each other. As such, the overall update operation can be trivially parallelised to speed things up. This change introduces The list of files that need to be compared/updated is collected in a first past. This list is then given to a multiprocessing pool to farm out the actual update of each individual file. The amount of parallelism is controlled through a new "jobs" parameter, command line option and environment variable. If no value is given for this option, all CPUs are used. I noticed this chance for improvement when doing a test run of the update of .po files for the Spanish translation of the CPython documentation. Local numbers in my 8-core, hyper-threaded AMD Ryzen 7 5825U: -j 1 (same as old behaviour) real 12m5.402s user 12m4.942s sys 0m0.273s -j 8 real 2m23.609s user 17m45.201s sys 0m0.460s <no value given> real 1m57.398s user 26m22.654s sys 0m0.989s Signed-off-by: Rodrigo Tobar <[email protected]>
1 parent 390a46b commit ee2537a

File tree

2 files changed

+90
-37
lines changed

2 files changed

+90
-37
lines changed

sphinx_intl/basic.py

Lines changed: 70 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
import dataclasses
2+
import multiprocessing as mp
13
import os
24
from glob import glob
5+
from typing import Optional
36

47
import click
58

@@ -24,7 +27,56 @@ def get_lang_dirs(path):
2427
# commands
2528

2629

27-
def update(locale_dir, pot_dir, languages, line_width=76, ignore_obsolete=False):
30+
@dataclasses.dataclass(frozen=True)
31+
class UpdateItem:
32+
po_file: str
33+
pot_file: str
34+
lang: str
35+
line_width: int
36+
ignore_obsolete: bool
37+
38+
39+
@dataclasses.dataclass(frozen=True)
40+
class UpdateResult:
41+
po_file: str
42+
status: str
43+
added: Optional[int] = 0
44+
deleted: Optional[int] = 0
45+
46+
47+
def _update_single_file(update_item: UpdateItem):
48+
cat_pot = c.load_po(update_item.pot_file)
49+
if os.path.exists(update_item.po_file):
50+
cat = c.load_po(update_item.po_file)
51+
msgids = {m.id for m in cat if m.id}
52+
c.update_with_fuzzy(cat, cat_pot)
53+
new_msgids = {m.id for m in cat if m.id}
54+
if msgids != new_msgids:
55+
added = new_msgids - msgids
56+
deleted = msgids - new_msgids
57+
c.dump_po(
58+
update_item.po_file,
59+
cat,
60+
width=update_item.line_width,
61+
ignore_obsolete=update_item.ignore_obsolete,
62+
)
63+
return UpdateResult(update_item.po_file, "update", len(added), len(deleted))
64+
else:
65+
return UpdateResult(update_item.po_file, "notchanged")
66+
else: # new po file
67+
cat_pot.locale = update_item.lang
68+
c.dump_po(
69+
update_item.po_file,
70+
cat_pot,
71+
width=update_item.line_width,
72+
ignore_obsolete=update_item.ignore_obsolete,
73+
)
74+
return UpdateResult(update_item.po_file, "create")
75+
76+
77+
def update(
78+
locale_dir, pot_dir, languages, line_width=76, ignore_obsolete=False, jobs=0
79+
):
2880
"""
2981
Update specified language's po files from pot.
3082
@@ -33,6 +85,7 @@ def update(locale_dir, pot_dir, languages, line_width=76, ignore_obsolete=False)
3385
:param tuple languages: languages to update po files
3486
:param number line_width: maximum line width of po files
3587
:param bool ignore_obsolete: ignore obsolete entries in po files
88+
:param number jobs: number of CPUs to use
3689
:return: {'create': 0, 'update': 0, 'notchanged': 0}
3790
:rtype: dict
3891
"""
@@ -42,6 +95,7 @@ def update(locale_dir, pot_dir, languages, line_width=76, ignore_obsolete=False)
4295
"notchanged": 0,
4396
}
4497

98+
to_translate = []
4599
for dirpath, dirnames, filenames in os.walk(pot_dir):
46100
for filename in filenames:
47101
pot_file = os.path.join(dirpath, filename)
@@ -52,40 +106,21 @@ def update(locale_dir, pot_dir, languages, line_width=76, ignore_obsolete=False)
52106
for lang in languages:
53107
po_dir = os.path.join(locale_dir, lang, "LC_MESSAGES")
54108
po_file = os.path.join(po_dir, basename + ".po")
55-
cat_pot = c.load_po(pot_file)
56-
if os.path.exists(po_file):
57-
cat = c.load_po(po_file)
58-
msgids = {m.id for m in cat if m.id}
59-
c.update_with_fuzzy(cat, cat_pot)
60-
new_msgids = {m.id for m in cat if m.id}
61-
if msgids != new_msgids:
62-
added = new_msgids - msgids
63-
deleted = msgids - new_msgids
64-
status["update"] += 1
65-
click.echo(
66-
"Update: {} +{}, -{}".format(
67-
po_file, len(added), len(deleted)
68-
)
69-
)
70-
c.dump_po(
71-
po_file,
72-
cat,
73-
width=line_width,
74-
ignore_obsolete=ignore_obsolete,
75-
)
76-
else:
77-
status["notchanged"] += 1
78-
click.echo(f"Not Changed: {po_file}")
79-
else: # new po file
80-
status["create"] += 1
81-
click.echo(f"Create: {po_file}")
82-
cat_pot.locale = lang
83-
c.dump_po(
84-
po_file,
85-
cat_pot,
86-
width=line_width,
87-
ignore_obsolete=ignore_obsolete,
88-
)
109+
to_translate.append(
110+
UpdateItem(po_file, pot_file, lang, line_width, ignore_obsolete)
111+
)
112+
113+
with mp.Pool(processes=jobs or None) as pool:
114+
for result in pool.imap_unordered(_update_single_file, to_translate):
115+
status[result.status] += 1
116+
if result.status == "update":
117+
click.echo(
118+
f"Update: {result.po_file} +{result.added}, -{result.deleted}"
119+
)
120+
elif result.status == "create":
121+
click.echo(f"Create: {result.po_file}")
122+
else:
123+
click.echo(f"Not Changed: {result.po_file}")
89124

90125
return status
91126

sphinx_intl/commands.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,18 @@ def convert(self, value, param, ctx):
157157
"disable line wrapping",
158158
)
159159

160+
option_jobs = click.option(
161+
"-j",
162+
"--jobs",
163+
envvar=ENVVAR_PREFIX + "_JOBS",
164+
type=int,
165+
default=0,
166+
metavar="<JOBS>",
167+
show_default=True,
168+
multiple=False,
169+
help="The number of CPUs to use for updates. 0 means all",
170+
)
171+
160172
option_no_obsolete = click.option(
161173
"--no-obsolete",
162174
envvar=ENVVAR_PREFIX + "_NO_OBSOLETE",
@@ -271,7 +283,8 @@ def main(ctx, config, tag):
271283
@option_language
272284
@option_line_width
273285
@option_no_obsolete
274-
def update(locale_dir, pot_dir, language, line_width, no_obsolete):
286+
@option_jobs
287+
def update(locale_dir, pot_dir, language, line_width, no_obsolete, jobs):
275288
"""
276289
Update specified language's po files from pot.
277290
@@ -300,7 +313,12 @@ def update(locale_dir, pot_dir, language, line_width, no_obsolete):
300313
raise click.BadParameter(msg, param_hint="language")
301314

302315
basic.update(
303-
locale_dir, pot_dir, languages, line_width, ignore_obsolete=no_obsolete
316+
locale_dir,
317+
pot_dir,
318+
languages,
319+
line_width,
320+
ignore_obsolete=no_obsolete,
321+
jobs=jobs,
304322
)
305323

306324

0 commit comments

Comments
 (0)