Skip to content

Commit e49ec29

Browse files
authored
Merge pull request #1065 from no23reason/dho/cq-1387-pandas
feat: add on_execution_submitted to gooddata_pandas
2 parents be33aeb + a5eef85 commit e49ec29

File tree

3 files changed

+96
-33
lines changed

3 files changed

+96
-33
lines changed

gooddata-pandas/gooddata_pandas/data_access.py

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
# (C) 2021 GoodData Corporation
22
from __future__ import annotations
33

4-
from typing import Any, Optional, Union
4+
from typing import Any, Callable, Optional, Union
55

66
from gooddata_sdk import (
77
Attribute,
88
AttributeFilter,
99
CatalogAttribute,
10+
Execution,
1011
ExecutionDefinition,
11-
ExecutionResponse,
1212
Filter,
1313
GoodDataSdk,
1414
Metric,
@@ -257,7 +257,7 @@ def _compute(
257257
columns: ColumnsDef,
258258
index_by: Optional[IndexDef] = None,
259259
filter_by: Optional[Union[Filter, list[Filter]]] = None,
260-
) -> tuple[ExecutionResponse, dict[str, int], dict[str, int], dict[str, int]]:
260+
) -> tuple[Execution, dict[str, int], dict[str, int], dict[str, int]]:
261261
"""
262262
Internal function that computes an execution-by-convention to retrieve data for a data frame with the provided
263263
columns, optionally indexed by the index_by label and optionally filtered.
@@ -271,7 +271,7 @@ def _compute(
271271
272272
Returns:
273273
tuple: A tuple containing the following elements:
274-
- ExecutionResponse: The execution response.
274+
- Execution: The execution response.
275275
- dict[str, int]: A mapping of pandas column names to attribute dimension indices.
276276
- dict[str, int]: A mapping of pandas column names to metric dimension indices.
277277
- dict[str, int]: A mapping of pandas index names to attribute dimension indices.
@@ -299,20 +299,20 @@ def _compute(
299299
#
300300

301301

302-
def _extract_for_metrics_only(response: ExecutionResponse, cols: list, col_to_metric_idx: dict) -> dict:
302+
def _extract_for_metrics_only(execution: Execution, cols: list, col_to_metric_idx: dict) -> dict:
303303
"""
304304
Internal function that extracts data for metrics-only columns when there are no attribute columns.
305305
306306
Args:
307-
response (ExecutionResponse): The execution response to extract data from.
307+
execution (Execution): The execution response to extract data from.
308308
cols (list): A list of column names.
309309
col_to_metric_idx (dict): A mapping of pandas column names to metric dimension indices.
310310
311311
Returns:
312312
dict: A dictionary containing the extracted data.
313313
"""
314-
exec_def = response.exec_def
315-
result = response.read_result(len(exec_def.metrics))
314+
exec_def = execution.exec_def
315+
result = execution.read_result(len(exec_def.metrics))
316316
if len(result.data) == 0:
317317
return {col: [] for col in cols}
318318

@@ -345,7 +345,7 @@ def _typed_result(attributes: list[CatalogAttribute], attribute: Attribute, resu
345345

346346

347347
def _extract_from_attributes_and_maybe_metrics(
348-
response: ExecutionResponse,
348+
execution: Execution,
349349
attributes: list[CatalogAttribute],
350350
cols: list[str],
351351
col_to_attr_idx: dict[str, int],
@@ -357,7 +357,7 @@ def _extract_from_attributes_and_maybe_metrics(
357357
optionally metrics columns.
358358
359359
Args:
360-
response (ExecutionResponse): The execution response to extract data from.
360+
execution (Execution): The execution response to extract data from.
361361
attributes (list[CatalogAttribute]): The catalog of attributes.
362362
cols (list[str]): A list of column names.
363363
col_to_attr_idx (dict[str, int]): A mapping of pandas column names to attribute dimension indices.
@@ -370,11 +370,11 @@ def _extract_from_attributes_and_maybe_metrics(
370370
- dict: A dictionary containing the extracted data.
371371
- dict: A dictionary containing the extracted index data.
372372
"""
373-
exec_def = response.exec_def
373+
exec_def = execution.exec_def
374374
offset = [0 for _ in exec_def.dimensions]
375375
limit = [len(exec_def.metrics), _RESULT_PAGE_LEN] if exec_def.has_metrics() else [_RESULT_PAGE_LEN]
376376
attribute_dim = 1 if exec_def.has_metrics() else 0
377-
result = response.read_result(limit=limit, offset=offset)
377+
result = execution.read_result(limit=limit, offset=offset)
378378
safe_index_to_attr_idx = index_to_attr_idx if index_to_attr_idx is not None else dict()
379379

380380
# mappings from column name to Attribute
@@ -401,7 +401,7 @@ def _extract_from_attributes_and_maybe_metrics(
401401
break
402402

403403
offset[attribute_dim] = result.next_page_start(attribute_dim)
404-
result = response.read_result(limit=limit, offset=offset)
404+
result = execution.read_result(limit=limit, offset=offset)
405405

406406
return data, index
407407

@@ -412,6 +412,7 @@ def compute_and_extract(
412412
columns: ColumnsDef,
413413
index_by: Optional[IndexDef] = None,
414414
filter_by: Optional[Union[Filter, list[Filter]]] = None,
415+
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
415416
) -> tuple[dict, dict]:
416417
"""
417418
Convenience function that computes and extracts data from the execution response.
@@ -422,14 +423,16 @@ def compute_and_extract(
422423
columns (ColumnsDef): The columns definition.
423424
index_by (Optional[IndexDef]): The index definition, if any.
424425
filter_by (Optional[Union[Filter, list[Filter]]]): A filter or a list of filters, if any.
426+
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
427+
submitted to the backend.
425428
426429
Returns:
427430
tuple: A tuple containing the following dictionaries:
428431
- dict: A dictionary with data for each column in `columns`.
429432
- dict: A dictionary with data for constructing index(es) for each index in index_by.
430433
431-
Note: For convenience it is possible to pass just single index. in that case the index dict will contain exactly
432-
one key of '0' (just get first value from dict when consuming the result).
434+
Note: For convenience, it is possible to pass just a single index. In that case, the index dict will contain exactly
435+
one key of '0' (just get the first value from dict when consuming the result).
433436
"""
434437
result = _compute(
435438
sdk=sdk,
@@ -439,17 +442,20 @@ def compute_and_extract(
439442
filter_by=filter_by,
440443
)
441444

442-
response, col_to_attr_idx, col_to_metric_idx, index_to_attr_idx = result
445+
execution, col_to_attr_idx, col_to_metric_idx, index_to_attr_idx = result
443446

444-
exec_def = response.exec_def
447+
if on_execution_submitted is not None:
448+
on_execution_submitted(execution)
449+
450+
exec_def = execution.exec_def
445451
cols = list(columns.keys())
446452

447453
if not exec_def.has_attributes():
448-
return _extract_for_metrics_only(response, cols, col_to_metric_idx), dict()
454+
return _extract_for_metrics_only(execution, cols, col_to_metric_idx), dict()
449455
else:
450456
attributes = get_catalog_attributes_for_extract(sdk, workspace_id, exec_def.attributes)
451457
return _extract_from_attributes_and_maybe_metrics(
452-
response,
458+
execution,
453459
attributes,
454460
cols,
455461
col_to_attr_idx,

gooddata-pandas/gooddata_pandas/dataframe.py

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
# (C) 2021 GoodData Corporation
22
from __future__ import annotations
33

4-
from typing import Optional, Union
4+
from typing import Callable, Optional, Union
55

66
import pandas
77
from gooddata_api_client import models
88
from gooddata_sdk import (
99
Attribute,
1010
BareExecutionResponse,
11+
Execution,
1112
ExecutionDefinition,
1213
Filter,
1314
GoodDataSdk,
@@ -68,19 +69,25 @@ def __init__(self, sdk: GoodDataSdk, workspace_id: str) -> None:
6869
self._workspace_id = workspace_id
6970

7071
def indexed(
71-
self, index_by: IndexDef, columns: ColumnsDef, filter_by: Optional[Union[Filter, list[Filter]]] = None
72+
self,
73+
index_by: IndexDef,
74+
columns: ColumnsDef,
75+
filter_by: Optional[Union[Filter, list[Filter]]] = None,
76+
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
7277
) -> pandas.DataFrame:
7378
"""
7479
Creates a data frame indexed by values of the label. The data frame columns will be created from either
7580
metrics or other label values.
7681
77-
Note that depending on composition of the labels, the DataFrame's index may or may not be unique.
82+
Note that depending on the composition of the labels, the DataFrame's index may or may not be unique.
7883
7984
Args:
8085
index_by (IndexDef): One or more labels to index by.
8186
columns (ColumnsDef): Dictionary mapping column name to its definition.
8287
filter_by (Optional[Union[Filter, list[Filter]]]):
8388
Optional filters to apply during computation on the server.
89+
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
90+
submitted to the backend.
8491
8592
Returns:
8693
pandas.DataFrame: A DataFrame instance.
@@ -91,14 +98,18 @@ def indexed(
9198
columns=columns,
9299
index_by=index_by,
93100
filter_by=filter_by,
101+
on_execution_submitted=on_execution_submitted,
94102
)
95103

96104
_idx = make_pandas_index(index)
97105

98106
return pandas.DataFrame(data=data, index=_idx)
99107

100108
def not_indexed(
101-
self, columns: ColumnsDef, filter_by: Optional[Union[Filter, list[Filter]]] = None
109+
self,
110+
columns: ColumnsDef,
111+
filter_by: Optional[Union[Filter, list[Filter]]] = None,
112+
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
102113
) -> pandas.DataFrame:
103114
"""
104115
Creates a data frame with columns created from metrics and or labels.
@@ -107,28 +118,42 @@ def not_indexed(
107118
columns (ColumnsDef): Dictionary mapping column name to its definition.
108119
filter_by (Optional[Union[Filter, list[Filter]]]): Optionally specify filters to apply during
109120
computation on the server.
121+
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
122+
submitted to the backend.
110123
111124
Returns:
112125
pandas.DataFrame: A DataFrame instance.
113126
"""
114127

115-
data, _ = compute_and_extract(self._sdk, self._workspace_id, columns=columns, filter_by=filter_by)
128+
data, _ = compute_and_extract(
129+
self._sdk,
130+
self._workspace_id,
131+
columns=columns,
132+
filter_by=filter_by,
133+
on_execution_submitted=on_execution_submitted,
134+
)
116135

117136
return pandas.DataFrame(data=data)
118137

119138
def for_items(
120-
self, items: ColumnsDef, filter_by: Optional[Union[Filter, list[Filter]]] = None, auto_index: bool = True
139+
self,
140+
items: ColumnsDef,
141+
filter_by: Optional[Union[Filter, list[Filter]]] = None,
142+
auto_index: bool = True,
143+
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
121144
) -> pandas.DataFrame:
122145
"""
123146
Creates a data frame for named items. This is a convenience method that will create DataFrame with or
124-
without index based on the context of the items that you pass.
147+
without an index based on the context of the items that you pass.
125148
126149
Args:
127150
items (ColumnsDef): Dictionary mapping item name to its definition.
128151
filter_by (Optional[Union[Filter, list[Filter]]]): Optionally specify filters to apply during computation
129152
on the server.
130153
auto_index (bool): Default True. Enables creation of DataFrame with index depending on the contents
131154
of the items.
155+
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
156+
submitted to the backend.
132157
133158
Returns:
134159
pandas.DataFrame: A DataFrame instance.
@@ -157,16 +182,24 @@ def for_items(
157182
index_by=resolved_attr_cols,
158183
columns=resolved_measure_cols,
159184
filter_by=filter_by,
185+
on_execution_submitted=on_execution_submitted,
160186
)
161187

162-
def for_visualization(self, visualization_id: str, auto_index: bool = True) -> pandas.DataFrame:
188+
def for_visualization(
189+
self,
190+
visualization_id: str,
191+
auto_index: bool = True,
192+
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
193+
) -> pandas.DataFrame:
163194
"""
164195
Creates a data frame with columns based on the content of the visualization with the provided identifier.
165196
166197
Args:
167198
visualization_id (str): Visualization identifier.
168199
auto_index (bool): Default True. Enables creation of DataFrame with index depending on the contents
169200
of the visualization.
201+
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
202+
submitted to the backend.
170203
171204
Returns:
172205
pandas.DataFrame: A DataFrame instance.
@@ -181,22 +214,31 @@ def for_visualization(self, visualization_id: str, auto_index: bool = True) -> p
181214
**{naming.col_name_for_metric(m): m.as_computable() for m in visualization.metrics},
182215
}
183216

184-
return self.for_items(columns, filter_by=filter_by, auto_index=auto_index)
217+
return self.for_items(
218+
columns, filter_by=filter_by, auto_index=auto_index, on_execution_submitted=on_execution_submitted
219+
)
185220

186221
def for_created_visualization(
187-
self, created_visualizations_response: dict
222+
self,
223+
created_visualizations_response: dict,
224+
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
188225
) -> tuple[pandas.DataFrame, DataFrameMetadata]:
189226
"""
190227
Creates a data frame using a created visualization.
191228
192229
Args:
193230
created_visualizations_response (dict): Created visualization response.
231+
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
232+
submitted to the backend.
194233
195234
Returns:
196235
pandas.DataFrame: A DataFrame instance.
197236
"""
198237
execution_definition = self._sdk.compute.build_exec_def_from_chat_result(created_visualizations_response)
199-
return self.for_exec_def(exec_def=execution_definition)
238+
return self.for_exec_def(
239+
exec_def=execution_definition,
240+
on_execution_submitted=on_execution_submitted,
241+
)
200242

201243
def result_cache_metadata_for_exec_result_id(self, result_id: str) -> ResultCacheMetadata:
202244
"""
@@ -217,6 +259,7 @@ def for_exec_def(
217259
result_size_dimensions_limits: ResultSizeDimensions = (),
218260
result_size_bytes_limit: Optional[int] = None,
219261
page_size: int = _DEFAULT_PAGE_SIZE,
262+
on_execution_submitted: Optional[Callable[[Execution], None]] = None,
220263
) -> tuple[pandas.DataFrame, DataFrameMetadata]:
221264
"""
222265
Creates a data frame using an execution definition.
@@ -247,6 +290,8 @@ def for_exec_def(
247290
result_size_dimensions_limits (ResultSizeDimensions): A tuple containing maximum size of result dimensions.
248291
result_size_bytes_limit (Optional[int]): Maximum size of result in bytes.
249292
page_size (int): Number of records per page.
293+
on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
294+
submitted to the backend.
250295
251296
Returns:
252297
Tuple[pandas.DataFrame, DataFrameMetadata]: Tuple holding DataFrame and DataFrame metadata.
@@ -257,6 +302,9 @@ def for_exec_def(
257302
execution = self._sdk.compute.for_exec_def(workspace_id=self._workspace_id, exec_def=exec_def)
258303
result_cache_metadata = self.result_cache_metadata_for_exec_result_id(execution.result_id)
259304

305+
if on_execution_submitted is not None:
306+
on_execution_submitted(execution)
307+
260308
return convert_execution_response_to_dataframe(
261309
execution_response=execution.bare_exec_response,
262310
result_cache_metadata=result_cache_metadata,
@@ -302,7 +350,7 @@ def for_exec_result_id(
302350
label_overrides (Optional[LabelOverrides]): Label overrides for metrics and attributes.
303351
result_cache_metadata (Optional[ResultCacheMetadata]): Cache metadata for the execution result.
304352
result_size_dimensions_limits (ResultSizeDimensions): A tuple containing maximum size of result dimensions.
305-
result_size_bytes_limit (Optional[int]): Maximum size of result in bytes.
353+
result_size_bytes_limit (Optional[int]): Maximum size of the result in bytes.
306354
use_local_ids_in_headers (bool): Use local identifier in headers.
307355
use_primary_labels_in_attributes (bool): Use primary labels in attributes.
308356
page_size (int): Number of records per page.

0 commit comments

Comments
 (0)