Skip to content

Commit dca0f3b

Browse files
committed
Another fix for manage_revisions
1 parent 817477a commit dca0f3b

File tree

2 files changed

+17
-4
lines changed

2 files changed

+17
-4
lines changed

datapackage_pipelines_budgetkey/pipelines/procurement/spending/fix-report-values.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ def boolean(x):
1313

1414

1515
def date(x):
16+
if x is None:
17+
return None
18+
1619
try:
1720
x = float(x)
1821
return datetime.date(1900, 1, 1) + datetime.timedelta(days=int(x))

datapackage_pipelines_budgetkey/processors/manage-revisions.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,16 @@ def calc_hash(row, hash_fields):
4545
return hash
4646

4747

48-
def get_all_existing_ids(connection_string, db_table, key_fields, hash_fields, schema):
48+
def get_all_existing_ids(connection_string, db_table, key_fields, hash_fields, array_fields):
4949
ret = DB()
5050
storage = Storage(create_engine(connection_string))
5151

5252
if db_table in storage.buckets:
53-
descriptor = storage.describe(db_table, schema)
54-
db_fields = [f['name'] for f in descriptor['fields']]
53+
descriptor = storage.describe(db_table)
54+
for field in descriptor['fields']:
55+
if field['name'] in array_fields:
56+
field['type'] = 'array'
57+
descriptor = storage.describe(db_table, descriptor)
5558
for rec in storage.iter(db_table):
5659
rec = dict(zip(db_fields, rec))
5760
existing_id = dict(
@@ -121,6 +124,7 @@ def main():
121124
resource_name = parameters['resource-name']
122125
input_key_fields = parameters['key-fields']
123126
input_hash_fields = parameters.get('hash-fields')
127+
array_fields = parameters.get('array-fields', [])
124128

125129
for res in dp['resources']:
126130
if resource_name == res['name']:
@@ -133,12 +137,18 @@ def main():
133137
db_key_fields = parameters.get('db-key-fields', input_key_fields)
134138
db_hash_fields = parameters.get('db-hash-fields', input_hash_fields)
135139

140+
schema_array_fields = [
141+
field['name']
142+
for field in res['schema']['fields']
143+
if field['type'] == 'array'
144+
]
145+
136146
existing_ids = \
137147
get_all_existing_ids(connection_string,
138148
parameters['db-table'],
139149
db_key_fields,
140150
db_hash_fields,
141-
res['schema'])
151+
array_fields + schema_array_fields)
142152
break
143153

144154
assert existing_ids is not None

0 commit comments

Comments
 (0)