@@ -45,13 +45,16 @@ def calc_hash(row, hash_fields):
4545 return hash
4646
4747
48- def get_all_existing_ids (connection_string , db_table , key_fields , hash_fields , schema ):
48+ def get_all_existing_ids (connection_string , db_table , key_fields , hash_fields , array_fields ):
4949 ret = DB ()
5050 storage = Storage (create_engine (connection_string ))
5151
5252 if db_table in storage .buckets :
53- descriptor = storage .describe (db_table , schema )
54- db_fields = [f ['name' ] for f in descriptor ['fields' ]]
53+ descriptor = storage .describe (db_table )
54+ for field in descriptor ['fields' ]:
55+ if field ['name' ] in array_fields :
56+ field ['type' ] = 'array'
57+ descriptor = storage .describe (db_table , descriptor )
5558 for rec in storage .iter (db_table ):
5659 rec = dict (zip (db_fields , rec ))
5760 existing_id = dict (
@@ -121,6 +124,7 @@ def main():
121124 resource_name = parameters ['resource-name' ]
122125 input_key_fields = parameters ['key-fields' ]
123126 input_hash_fields = parameters .get ('hash-fields' )
127+ array_fields = parameters .get ('array-fields' , [])
124128
125129 for res in dp ['resources' ]:
126130 if resource_name == res ['name' ]:
@@ -133,12 +137,18 @@ def main():
133137 db_key_fields = parameters .get ('db-key-fields' , input_key_fields )
134138 db_hash_fields = parameters .get ('db-hash-fields' , input_hash_fields )
135139
140+ schema_array_fields = [
141+ field ['name' ]
142+ for field in res ['schema' ]['fields' ]
143+ if field ['type' ] == 'array'
144+ ]
145+
136146 existing_ids = \
137147 get_all_existing_ids (connection_string ,
138148 parameters ['db-table' ],
139149 db_key_fields ,
140150 db_hash_fields ,
141- res [ 'schema' ] )
151+ array_fields + schema_array_fields )
142152 break
143153
144154 assert existing_ids is not None
0 commit comments