Skip to content

Commit 895e26c

Browse files
committed
Simpler revised filling mechanism
1 parent 57d9a45 commit 895e26c

File tree

3 files changed

+72
-50
lines changed

3 files changed

+72
-50
lines changed

datapackage_pipelines_budgetkey/pipelines/budget/national/processed/combine-immediate-children.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ def nop(x):
66
return x
77

88

9-
FIELDS = ['code', 'title', 'net_revised']
9+
FIELDS = ['code', 'title', 'net_allocated', 'net_revised']
1010

1111

1212
params, dp, res_iter = ingest()
@@ -15,9 +15,13 @@ def nop(x):
1515

1616
def combine_immediate_children(rows):
1717
for row in rows:
18-
row['children-net_revised'] = map(int, row['children-net_revised'])
18+
if row['children-net_revised'] is None:
19+
logging.warning('children-net_revised is undefined, %r', row)
20+
row['children-net_revised'] = row['children-net_allocated']
21+
for f in ('children-net_revised', 'children-net_allocated'):
22+
row[f] = map(int, row[f])
1923
try:
20-
children = zip(*(row['children-'+x] for x in FIELDS))
24+
children = zip(*(row.get('children-'+x, []) for x in FIELDS))
2125
except TypeError:
2226
logging.error('Failed to process children for row %r', row)
2327
raise

datapackage_pipelines_budgetkey/pipelines/budget/national/processed/join_phases.py

Lines changed: 57 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1+
import copy
2+
13
from datapackage_pipelines.wrapper import spew, ingest
24
from decimal import Decimal
35

46
parameters, datapackage, res_iter = ingest()
57

8+
last_completed_year = str(parameters['last-completed-year'])
9+
610
amounts = [
711
'net',
812
'gross',
@@ -69,56 +73,64 @@
6973
resource['schema']['fields'] = new_fields
7074

7175

76+
def process_row(row, phase_key):
77+
for amount, factor in zip(amounts, factors):
78+
value = row[amount]
79+
if value is not None and value.strip() != '':
80+
value = Decimal(row[amount]) * factor
81+
row[amount + '_' + phase_key] = value
82+
del row[amount]
83+
84+
save = {}
85+
for code_key, title_key in codes_and_titles:
86+
save[code_key] = row[code_key]
87+
if len(save[code_key]) % 2 == 1:
88+
save[code_key] = '0' + save[code_key]
89+
save[title_key] = row[title_key]
90+
del row[code_key]
91+
del row[title_key]
92+
93+
if int(save[codes_and_titles[0][0]]) != 0:
94+
hierarchy = [['00', 'המדינה']]
95+
else:
96+
hierarchy = []
97+
for i, (code_key, title_key) in enumerate(codes_and_titles):
98+
expected_length = i*2 + 4
99+
row['code'] = '0'*(expected_length-len(save[code_key])) + save[code_key]
100+
row['title'] = save[title_key] or 'לא ידוע'
101+
row['hierarchy'] = hierarchy
102+
row['parent'] = None if len(hierarchy) == 0 else hierarchy[-1][0]
103+
yield row
104+
hierarchy.append([row['code'], row['title']])
105+
106+
row['hierarchy'] = None
107+
row['parent'] = None
108+
109+
row['code'] = 'C%d' % (int(row['func_cls_code_1']),)
110+
row['title'] = '%s' % (row['func_cls_title_1'],)
111+
yield row
112+
113+
row['code'] = 'C%d%02d' % (int(row['func_cls_code_1']), int(row['func_cls_code_2']))
114+
row['title'] = '%s/%s' % (row['func_cls_title_1'], row['func_cls_title_2'])
115+
yield row
116+
117+
if not row['code'].startswith('0000'):
118+
row['code'] = '00'
119+
row['title'] = 'המדינה'
120+
row['hierarchy'] = []
121+
yield row
122+
123+
72124
def process_first(rows):
73-
for row in rows:
125+
for i, row in enumerate(rows):
74126
phase_key = phases[row['phase']]
75127
del row['phase']
76128

77-
for amount, factor in zip(amounts, factors):
78-
value = row[amount]
79-
if value is not None and value.strip() != '':
80-
value = Decimal(row[amount]) * factor
81-
row[amount + '_' + phase_key] = value
82-
del row[amount]
83-
84-
save = {}
85-
for code_key, title_key in codes_and_titles:
86-
save[code_key] = row[code_key]
87-
if len(save[code_key]) % 2 == 1:
88-
save[code_key] = '0' + save[code_key]
89-
save[title_key] = row[title_key]
90-
del row[code_key]
91-
del row[title_key]
92-
93-
if int(save[codes_and_titles[0][0]]) != 0:
94-
hierarchy = [['00', 'המדינה']]
95-
else:
96-
hierarchy = []
97-
for i, (code_key, title_key) in enumerate(codes_and_titles):
98-
expected_length = i*2 + 4
99-
row['code'] = '0'*(expected_length-len(save[code_key])) + save[code_key]
100-
row['title'] = save[title_key] or 'לא ידוע'
101-
row['hierarchy'] = hierarchy
102-
row['parent'] = None if len(hierarchy) == 0 else hierarchy[-1][0]
103-
yield row
104-
hierarchy.append([row['code'], row['title']])
105-
106-
row['hierarchy'] = None
107-
row['parent'] = None
108-
109-
row['code'] = 'C%d' % (int(row['func_cls_code_1']),)
110-
row['title'] = '%s' % (row['func_cls_title_1'],)
111-
yield row
112-
113-
row['code'] = 'C%d%02d' % (int(row['func_cls_code_1']), int(row['func_cls_code_2']))
114-
row['title'] = '%s/%s' % (row['func_cls_title_1'], row['func_cls_title_2'])
115-
yield row
129+
row_ = copy.deepcopy(row)
130+
yield from process_row(row_, phase_key)
116131

117-
if not row['code'].startswith('0000'):
118-
row['code'] = '00'
119-
row['title'] = 'המדינה'
120-
row['hierarchy'] = []
121-
yield row
132+
if row['year'] > last_completed_year:
133+
yield from process_row(row, 'revised')
122134

123135

124136
def process_resources(res_iter_):

datapackage_pipelines_budgetkey/pipelines/budget/national/processed/pipeline-spec.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ aggregated-yearly:
8585
commitment_balance:
8686
- יתרת התחיבויות
8787
- run: join_phases
88+
parameters:
89+
last-completed-year: 2016
90+
8891
- run: topical_sums
8992
- run: join_categories
9093
- run: set_types
@@ -235,8 +238,8 @@ aggregated-yearly:
235238
name: "econ_cls_title_1_\u05e9\u05db\u05e8"
236239
aggregate: sum
237240

238-
- run: fill_in_revised
239-
241+
# - run: fill_in_revised
242+
#
240243
- run: join
241244
parameters:
242245
source:
@@ -255,6 +258,9 @@ aggregated-yearly:
255258
children-title:
256259
name: title
257260
aggregate: array
261+
children-net_allocated:
262+
name: net_allocated
263+
aggregate: array
258264
children-net_revised:
259265
name: net_revised
260266
aggregate: array

0 commit comments

Comments
 (0)