22import  logging 
33from  multiprocessing  import  cpu_count 
44from  multiprocessing .pool  import  Pool 
5- from  typing  import  Dict , Iterable , List , Optional , Tuple 
5+ from  typing  import  Dict , Iterable , List , Optional , Tuple ,  Set 
66
77from  find_compatible_dts  import  FindCompatibleDTS 
88
2929# References parsing output, ident -> (file_idx, family) -> list of lines 
3030RefsDict  =  Dict [bytes , Dict [Tuple [int , str ], List [int ]]]
3131
32- # Cache of definitions found in current tag, ident -> list of (file_idx, line) 
33- DefCache  =  Dict [bytes , List [Tuple [int , int ]]]
34- 
3532# Generic dictionary of ident -> list of lines 
3633LinesListDict  =  Dict [str , List [int ]]
3734
35+ # File idx -> (hash, filename, is a new file?) 
36+ IdxCache  =  Dict [int , Tuple [bytes , str , bool ]]
37+ 
38+ # Check if definition for ident is visible in current version 
39+ def  def_in_version (db : DB , def_cache : Set [bytes ], idx_to_hash_and_filename : IdxCache , ident : bytes ) ->  bool :
40+     if  ident  in  def_cache :
41+         return  True 
42+ 
43+     defs_this_ident  =  db .defs .get (ident )
44+     if  not  defs_this_ident :
45+         return  False 
46+ 
47+     for  def_idx , _ , _ , _  in  defs_this_ident .iter ():
48+         if  def_idx  in  idx_to_hash_and_filename :
49+             def_cache .add (ident )
50+             return  True 
51+ 
52+     return  False 
53+ 
3854# Add definitions to database 
39- def  add_defs (db : DB , def_cache :  DefCache ,  defs : DefsDict ):
55+ def  add_defs (db : DB , defs : DefsDict ):
4056    for  ident , occ_list  in  defs .items ():
4157        obj  =  db .defs .get (ident )
4258        if  obj  is  None :
4359            obj  =  DefList ()
4460
45-         if  ident  in  def_cache :
46-             lines_list  =  def_cache [ident ]
47-         else :
48-             lines_list  =  []
49-             def_cache [ident ] =  lines_list 
50- 
5161        for  (idx , type , line , family ) in  occ_list :
5262            obj .append (idx , type , line , family )
53-             lines_list .append ((idx , line ))
5463
5564        db .defs .put (ident , obj )
5665
5766# Add references to database 
58- def  add_refs (db : DB , def_cache : DefCache , refs : RefsDict ):
67+ def  add_refs (db : DB , def_cache : Set [ bytes ],  idx_to_hash_and_filename :  IdxCache , refs : RefsDict ):
5968    for  ident , idx_to_lines  in  refs .items ():
60-         # Skip reference if definition was not collected in this tag 
61-         deflist  =  def_cache .get (ident )
62-         if  deflist  is  None :
69+         deflist  =  db .defs .get (ident )
70+         in_version  =  def_in_version (db , def_cache , idx_to_hash_and_filename , ident )
71+ 
72+         if  deflist  is  None  or  not  in_version :
6373            continue 
6474
65-         def  deflist_exists (idx ,  n ):
66-             for  didx ,  dn   in  deflist :
67-                 if  didx  ==  idx  and  dn  ==  n :
75+         def  deflist_exists (idx :  int ,  line :  int ):
76+             for  def_idx ,  _ ,  def_line ,  _   in  deflist . iter () :
77+                 if  def_idx  ==  idx  and  def_line  ==  line :
6878                    return  True 
6979            return  False 
7080
7181        obj  =  db .refs .get (ident )
7282        if  obj  is  None :
7383            obj  =  RefList ()
7484
85+         modified  =  False 
7586        for  (idx , family ), lines  in  idx_to_lines .items ():
76-             lines  =  [n  for  n  in  lines  if  not  deflist_exists (str ( idx ). encode () , n )]
87+             lines  =  [n  for  n  in  lines  if  not  deflist_exists (idx , n )]
7788
7889            if  len (lines ) !=  0 :
7990                lines_str  =  ',' .join ((str (n ) for  n  in  lines ))
8091                obj .append (idx , lines_str , family )
92+                 modified  =  True 
8193
82-         db .refs .put (ident , obj )
94+         if  modified :
95+             db .refs .put (ident , obj )
8396
8497# Add documentation references to database 
8598def  add_docs (db : DB , idx : int , family : str , docs : Dict [str , List [int ]]):
@@ -111,7 +124,7 @@ def add_to_lineslist(db_file: BsdDB, idx: int, family: str, to_add: Dict[str, Li
111124
112125
113126# Adds blob list to database, returns blob id -> (hash, filename) dict 
114- def  collect_blobs (db : DB , tag : bytes ) ->  Dict [ int ,  Tuple [ bytes ,  str ]] :
127+ def  collect_blobs (db : DB , tag : bytes ) ->  IdxCache :
115128    idx  =  db .vars .get ('numBlobs' )
116129    if  idx  is  None :
117130        idx  =  0 
@@ -126,11 +139,14 @@ def collect_blobs(db: DB, tag: bytes) -> Dict[int, Tuple[bytes, str]]:
126139        hash , path  =  blob .split (b' ' ,maxsplit = 1 )
127140        filename  =  os .path .basename (path .decode ())
128141        blob_idx  =  db .blob .get (hash )
142+ 
129143        if  blob_idx  is  not None :
130144            versionBuf .append ((blob_idx , path ))
145+             if  blob_idx  not  in idx_to_hash_and_filename :
146+                 idx_to_hash_and_filename [blob_idx ] =  (hash , filename , False )
131147        else :
132148            versionBuf .append ((idx , path ))
133-             idx_to_hash_and_filename [idx ] =  (hash , filename )
149+             idx_to_hash_and_filename [idx ] =  (hash , filename ,  True )
134150            db .blob .put (hash , idx )
135151            db .hash .put (idx , hash )
136152            db .file .put (idx , filename )
@@ -271,18 +287,17 @@ def get_comps_docs(file_id: FileId) -> Optional[Tuple[int, str, LinesListDict]]:
271287# Update a single version - collects data from all the stages and saves it in the database 
272288def  update_version (db : DB , tag : bytes , pool : Pool , dts_comp_support : bool ):
273289    idx_to_hash_and_filename  =  collect_blobs (db , tag )
274-     def_cache  =  {}
275290
276291    # Collect blobs to process and split list of blobs into chunks 
277-     idxes  =  [(idx , hash , filename ) for  (idx , (hash , filename )) in  idx_to_hash_and_filename .items ()]
292+     idxes  =  [(idx , hash , filename ) for  (idx , (hash , filename ,  new )) in  idx_to_hash_and_filename .items ()  if   new ]
278293    chunksize  =  int (len (idxes ) /  cpu_count ())
279294    chunksize  =  min (max (1 , chunksize ), 100 )
280295
281296    logger .info ("collecting blobs done" )
282297
283298    for  result  in  pool .imap_unordered (get_defs , idxes , chunksize ):
284299        if  result  is  not None :
285-             add_defs (db , def_cache ,  result )
300+             add_defs (db , result )
286301
287302    logger .info ("defs done" )
288303
@@ -305,16 +320,16 @@ def update_version(db: DB, tag: bytes, pool: Pool, dts_comp_support: bool):
305320
306321        logger .info ("dts comps docs done" )
307322
323+     def_cache  =  set ()
308324    for  result  in  pool .imap_unordered (get_refs , idxes , chunksize ):
309325        if  result  is  not None :
310-             add_refs (db , def_cache , result )
326+             add_refs (db , def_cache , idx_to_hash_and_filename ,  result )
311327
312328    logger .info ("refs done" )
313329
314330    generate_defs_caches (db )
315331    logger .info ("update done" )
316332
317- 
318333if  __name__  ==  "__main__" :
319334    dts_comp_support  =  bool (int (script ('dts-comp' )))
320335    db  =  None 
0 commit comments