Skip to content

Commit 5aa8192

Browse files
committed
update: Add references only if definition visible in version
Fixes #292 Previous attempt didn't actually work and caused references to be missing if definition was in an older file.
1 parent d72291d commit 5aa8192

File tree

1 file changed

+43
-28
lines changed

1 file changed

+43
-28
lines changed

elixir/update.py

Lines changed: 43 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33
from multiprocessing import cpu_count
44
from multiprocessing.pool import Pool
5-
from typing import Dict, Iterable, List, Optional, Tuple
5+
from typing import Dict, Iterable, List, Optional, Tuple, Set
66

77
from find_compatible_dts import FindCompatibleDTS
88

@@ -29,57 +29,70 @@
2929
# References parsing output, ident -> (file_idx, family) -> list of lines
3030
RefsDict = Dict[bytes, Dict[Tuple[int, str], List[int]]]
3131

32-
# Cache of definitions found in current tag, ident -> list of (file_idx, line)
33-
DefCache = Dict[bytes, List[Tuple[int, int]]]
34-
3532
# Generic dictionary of ident -> list of lines
3633
LinesListDict = Dict[str, List[int]]
3734

35+
# File idx -> (hash, filename, is a new file?)
36+
IdxCache = Dict[int, Tuple[bytes, str, bool]]
37+
38+
# Check if definition for ident is visible in current version
39+
def def_in_version(db: DB, def_cache: Set[bytes], idx_to_hash_and_filename: IdxCache, ident: bytes) -> bool:
40+
if ident in def_cache:
41+
return True
42+
43+
defs_this_ident = db.defs.get(ident)
44+
if not defs_this_ident:
45+
return False
46+
47+
for def_idx, _, _, _ in defs_this_ident.iter():
48+
if def_idx in idx_to_hash_and_filename:
49+
def_cache.add(ident)
50+
return True
51+
52+
return False
53+
3854
# Add definitions to database
39-
def add_defs(db: DB, def_cache: DefCache, defs: DefsDict):
55+
def add_defs(db: DB, defs: DefsDict):
4056
for ident, occ_list in defs.items():
4157
obj = db.defs.get(ident)
4258
if obj is None:
4359
obj = DefList()
4460

45-
if ident in def_cache:
46-
lines_list = def_cache[ident]
47-
else:
48-
lines_list = []
49-
def_cache[ident] = lines_list
50-
5161
for (idx, type, line, family) in occ_list:
5262
obj.append(idx, type, line, family)
53-
lines_list.append((idx, line))
5463

5564
db.defs.put(ident, obj)
5665

5766
# Add references to database
58-
def add_refs(db: DB, def_cache: DefCache, refs: RefsDict):
67+
def add_refs(db: DB, def_cache: Set[bytes], idx_to_hash_and_filename: IdxCache, refs: RefsDict):
5968
for ident, idx_to_lines in refs.items():
60-
# Skip reference if definition was not collected in this tag
61-
deflist = def_cache.get(ident)
62-
if deflist is None:
69+
deflist = db.defs.get(ident)
70+
in_version = def_in_version(db, def_cache, idx_to_hash_and_filename, ident)
71+
72+
if deflist is None or not in_version:
6373
continue
6474

65-
def deflist_exists(idx, n):
66-
for didx, dn in deflist:
67-
if didx == idx and dn == n:
75+
def deflist_exists(idx: int, line: int):
76+
for def_idx, _, def_line, _ in deflist.iter():
77+
if def_idx == idx and def_line == line:
6878
return True
6979
return False
7080

7181
obj = db.refs.get(ident)
7282
if obj is None:
7383
obj = RefList()
7484

85+
modified = False
7586
for (idx, family), lines in idx_to_lines.items():
76-
lines = [n for n in lines if not deflist_exists(str(idx).encode(), n)]
87+
lines = [n for n in lines if not deflist_exists(idx, n)]
7788

7889
if len(lines) != 0:
7990
lines_str = ','.join((str(n) for n in lines))
8091
obj.append(idx, lines_str, family)
92+
modified = True
8193

82-
db.refs.put(ident, obj)
94+
if modified:
95+
db.refs.put(ident, obj)
8396

8497
# Add documentation references to database
8598
def add_docs(db: DB, idx: int, family: str, docs: Dict[str, List[int]]):
@@ -111,7 +124,7 @@ def add_to_lineslist(db_file: BsdDB, idx: int, family: str, to_add: Dict[str, Li
111124

112125

113126
# Adds blob list to database, returns blob id -> (hash, filename) dict
114-
def collect_blobs(db: DB, tag: bytes) -> Dict[int, Tuple[bytes, str]]:
127+
def collect_blobs(db: DB, tag: bytes) -> IdxCache:
115128
idx = db.vars.get('numBlobs')
116129
if idx is None:
117130
idx = 0
@@ -126,11 +139,14 @@ def collect_blobs(db: DB, tag: bytes) -> Dict[int, Tuple[bytes, str]]:
126139
hash, path = blob.split(b' ',maxsplit=1)
127140
filename = os.path.basename(path.decode())
128141
blob_idx = db.blob.get(hash)
142+
129143
if blob_idx is not None:
130144
versionBuf.append((blob_idx, path))
145+
if blob_idx not in idx_to_hash_and_filename:
146+
idx_to_hash_and_filename[blob_idx] = (hash, filename, False)
131147
else:
132148
versionBuf.append((idx, path))
133-
idx_to_hash_and_filename[idx] = (hash, filename)
149+
idx_to_hash_and_filename[idx] = (hash, filename, True)
134150
db.blob.put(hash, idx)
135151
db.hash.put(idx, hash)
136152
db.file.put(idx, filename)
@@ -271,18 +287,17 @@ def get_comps_docs(file_id: FileId) -> Optional[Tuple[int, str, LinesListDict]]:
271287
# Update a single version - collects data from all the stages and saves it in the database
272288
def update_version(db: DB, tag: bytes, pool: Pool, dts_comp_support: bool):
273289
idx_to_hash_and_filename = collect_blobs(db, tag)
274-
def_cache = {}
275290

276291
# Collect blobs to process and split list of blobs into chunks
277-
idxes = [(idx, hash, filename) for (idx, (hash, filename)) in idx_to_hash_and_filename.items()]
292+
idxes = [(idx, hash, filename) for (idx, (hash, filename, new)) in idx_to_hash_and_filename.items() if new]
278293
chunksize = int(len(idxes) / cpu_count())
279294
chunksize = min(max(1, chunksize), 100)
280295

281296
logger.info("collecting blobs done")
282297

283298
for result in pool.imap_unordered(get_defs, idxes, chunksize):
284299
if result is not None:
285-
add_defs(db, def_cache, result)
300+
add_defs(db, result)
286301

287302
logger.info("defs done")
288303

@@ -305,16 +320,16 @@ def update_version(db: DB, tag: bytes, pool: Pool, dts_comp_support: bool):
305320

306321
logger.info("dts comps docs done")
307322

323+
def_cache = set()
308324
for result in pool.imap_unordered(get_refs, idxes, chunksize):
309325
if result is not None:
310-
add_refs(db, def_cache, result)
326+
add_refs(db, def_cache, idx_to_hash_and_filename, result)
311327

312328
logger.info("refs done")
313329

314330
generate_defs_caches(db)
315331
logger.info("update done")
316332

317-
318333
if __name__ == "__main__":
319334
dts_comp_support = bool(int(script('dts-comp')))
320335
db = None

0 commit comments

Comments
 (0)