Skip to content

Commit 9fe9876

Browse files
committed
Another fix for manage_revisions
1 parent 817477a commit 9fe9876

File tree

2 files changed

+17
-3
lines changed

2 files changed

+17
-3
lines changed

datapackage_pipelines_budgetkey/pipelines/procurement/spending/fix-report-values.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ def boolean(x):
1313

1414

1515
def date(x):
16+
if x is None:
17+
return None
18+
1619
try:
1720
x = float(x)
1821
return datetime.date(1900, 1, 1) + datetime.timedelta(days=int(x))

datapackage_pipelines_budgetkey/processors/manage-revisions.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,16 @@ def calc_hash(row, hash_fields):
4545
return hash
4646

4747

48-
def get_all_existing_ids(connection_string, db_table, key_fields, hash_fields, schema):
48+
def get_all_existing_ids(connection_string, db_table, key_fields, hash_fields, array_fields):
4949
ret = DB()
5050
storage = Storage(create_engine(connection_string))
5151

5252
if db_table in storage.buckets:
53-
descriptor = storage.describe(db_table, schema)
53+
descriptor = storage.describe(db_table)
54+
for field in descriptor['fields']:
55+
if field['name'] in array_fields:
56+
field['type'] = 'array'
57+
storage.describe(db_table, descriptor)
5458
db_fields = [f['name'] for f in descriptor['fields']]
5559
for rec in storage.iter(db_table):
5660
rec = dict(zip(db_fields, rec))
@@ -121,6 +125,7 @@ def main():
121125
resource_name = parameters['resource-name']
122126
input_key_fields = parameters['key-fields']
123127
input_hash_fields = parameters.get('hash-fields')
128+
array_fields = parameters.get('array-fields', [])
124129

125130
for res in dp['resources']:
126131
if resource_name == res['name']:
@@ -133,12 +138,18 @@ def main():
133138
db_key_fields = parameters.get('db-key-fields', input_key_fields)
134139
db_hash_fields = parameters.get('db-hash-fields', input_hash_fields)
135140

141+
schema_array_fields = [
142+
field['name']
143+
for field in res['schema']['fields']
144+
if field['type'] == 'array'
145+
]
146+
136147
existing_ids = \
137148
get_all_existing_ids(connection_string,
138149
parameters['db-table'],
139150
db_key_fields,
140151
db_hash_fields,
141-
res['schema'])
152+
array_fields + schema_array_fields)
142153
break
143154

144155
assert existing_ids is not None

0 commit comments

Comments
 (0)