Skip to content

Commit 06f0718

Browse files
Introduce dataframe to ExperimentData (step1) (#1133)
### Summary Executing approved design proposal in https://github.com/Qiskit/rfcs/blob/master/0007-experiment-dataframe.md. Introduction of the `Artifact` will be done in the followup. This PR introduces dataframe to `ExperimentData` and replaces the conventional [AnalysisResult](https://github.com/Qiskit/qiskit-experiments/blob/main/qiskit_experiments/framework/analysis_result.py) with it. With this PR, experimentalist can visualize all analysis results with a single html table by `dataframe=True`. ![image](https://user-images.githubusercontent.com/39517270/230959307-d5a4d471-1659-4cf4-974e-cdb21e625ad8.png) Once can control the columns to show in the html table with `columns` option. ### Details and comments Experiment analysis is a local operation on client side, and data generated there must be agnostic to the payload format of remote service. In the current implementation, `AnalysisResult` appears in the `BaseAnalysis` and the data generated by the analysis is implicitly typecasted to this class. Note that `AnalysisResult` *not only* defines canonical data format *but also* implements the API for the IBM Experiment service. This tight coupling to the IBM Experiment service may limit data expressibility and an analysis class author always need to be aware of the data structure of the remote service. In this PR, internal data structure is replaced with the flat dataframe, and one should be able to add arbitrary key-value pair to the analysis result without concerning about data model. Added key creates a new column in the table, which must be internally formatted and absorbed by extra field of the payload at a later time when these results are saved. This is implemented by `qiskit_experiments.framework.experiment_data._series_to_service_result`. Bypassing the generation of `AnalysisResult` object also drastically improves the performance of heavily parallelized analysis that generates huge amount of data. --------- Co-authored-by: Helena Zhang <[email protected]>
1 parent 1c7ed5b commit 06f0718

File tree

13 files changed

+1243
-178
lines changed

13 files changed

+1243
-178
lines changed

qiskit_experiments/database_service/device_component.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,10 @@ def to_component(string: str) -> DeviceComponent:
8181
Raises:
8282
ValueError: If input string is not a valid device component.
8383
"""
84+
if isinstance(string, DeviceComponent):
85+
return string
8486
if string.startswith("Q"):
8587
return Qubit(int(string[1:]))
86-
elif string.startswith("R"):
88+
if string.startswith("R"):
8789
return Resonator(int(string[1:]))
88-
else:
89-
return UnknownComponent(string)
90+
return UnknownComponent(string)

qiskit_experiments/database_service/utils.py

Lines changed: 199 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@
1919
from abc import ABC, abstractmethod
2020
from collections import OrderedDict
2121
from datetime import datetime, timezone
22-
from typing import Callable, Tuple, Dict, Any, Union, Type, Optional
22+
from typing import Callable, Tuple, List, Dict, Any, Union, Type, Optional
2323
import json
2424

25+
import pandas as pd
2526
import dateutil.parser
2627
import pkg_resources
2728
from dateutil import tz
29+
2830
from qiskit.version import __version__ as terra_version
2931

3032
from qiskit_ibm_experiment import (
@@ -276,3 +278,199 @@ def append(self, value):
276278
"""Append to the list."""
277279
with self._lock:
278280
self._container.append(value)
281+
282+
283+
class ThreadSafeDataFrame(ThreadSafeContainer):
284+
"""Thread safe data frame.
285+
286+
This class wraps pandas dataframe with predefined column labels,
287+
which is specified by the class method `_default_columns`.
288+
Subclass can override this method to provide default labels specific to its data structure.
289+
290+
This object is expected to be used internally in the ExperimentData.
291+
"""
292+
293+
def __init__(self, init_values=None):
294+
"""ThreadSafeContainer constructor."""
295+
self._columns = self._default_columns()
296+
self._extra = []
297+
super().__init__(init_values)
298+
299+
@classmethod
300+
def _default_columns(cls) -> List[str]:
301+
return []
302+
303+
def _init_container(self, init_values: Optional[Union[Dict, pd.DataFrame]] = None):
304+
"""Initialize the container."""
305+
if init_values is None:
306+
return pd.DataFrame(columns=self.get_columns())
307+
if isinstance(init_values, pd.DataFrame):
308+
input_columns = list(init_values.columns)
309+
if input_columns != self.get_columns():
310+
raise ValueError(
311+
f"Input data frame contains unexpected columns {input_columns}. "
312+
f"{self.__class__.__name__} defines {self.get_columns()} as default columns."
313+
)
314+
return init_values
315+
if isinstance(init_values, dict):
316+
return pd.DataFrame.from_dict(
317+
data=init_values,
318+
orient="index",
319+
columns=self.get_columns(),
320+
)
321+
raise TypeError(f"Initial value of {type(init_values)} is not valid data type.")
322+
323+
def get_columns(self) -> List[str]:
324+
"""Return current column names.
325+
326+
Returns:
327+
List of column names.
328+
"""
329+
with self._lock:
330+
return self._columns.copy()
331+
332+
def add_columns(self, *new_columns: str, default_value: Any = None):
333+
"""Add new columns to the table.
334+
335+
This operation mutates the current container.
336+
337+
Args:
338+
new_columns: Name of columns to add.
339+
default_value: Default value to fill added columns.
340+
"""
341+
with self._lock:
342+
# Order sensitive
343+
new_columns = [c for c in new_columns if c not in self.get_columns()]
344+
if len(new_columns) == 0:
345+
return
346+
347+
# Update columns
348+
for new_column in new_columns:
349+
self._container.insert(len(self._container.columns), new_column, default_value)
350+
self._columns.extend(new_columns)
351+
self._extra.extend(new_columns)
352+
353+
def clear(self):
354+
"""Remove all elements from this container."""
355+
with self._lock:
356+
self._container = self._init_container()
357+
self._columns = self._default_columns()
358+
self._extra = []
359+
360+
def container(
361+
self,
362+
collapse_extra: bool = True,
363+
) -> pd.DataFrame:
364+
"""Return bare pandas dataframe.
365+
366+
Args:
367+
collapse_extra: Set True to show only default columns.
368+
369+
Returns:
370+
Bare pandas dataframe. This object is no longer thread safe.
371+
"""
372+
with self._lock:
373+
container = self._container.copy()
374+
375+
if collapse_extra:
376+
return container[self._default_columns()]
377+
return container
378+
379+
def drop_entry(
380+
self,
381+
index: str,
382+
):
383+
"""Drop entry from the dataframe.
384+
385+
Args:
386+
index: Name of entry to drop.
387+
388+
Raises:
389+
ValueError: When index is not in this table.
390+
"""
391+
with self._lock:
392+
if index not in self._container.index:
393+
raise ValueError(f"Table index {index} doesn't exist in this table.")
394+
self._container.drop(index, inplace=True)
395+
396+
def get_entry(
397+
self,
398+
index: str,
399+
) -> pd.Series:
400+
"""Get entry from the dataframe.
401+
402+
Args:
403+
index: Name of entry to acquire.
404+
405+
Returns:
406+
Pandas Series of acquired entry. This doesn't mutate the table.
407+
408+
Raises:
409+
ValueError: When index is not in this table.
410+
"""
411+
with self._lock:
412+
if index not in self._container.index:
413+
raise ValueError(f"Table index {index} doesn't exist in this table.")
414+
415+
return self._container.loc[index]
416+
417+
def add_entry(
418+
self,
419+
index: str,
420+
**kwargs,
421+
) -> pd.Series:
422+
"""Add new entry to the dataframe.
423+
424+
Args:
425+
index: Name of this entry. Must be unique in this table.
426+
kwargs: Description of new entry to register.
427+
428+
Returns:
429+
Pandas Series of added entry. This doesn't mutate the table.
430+
431+
Raises:
432+
ValueError: When index is not unique in this table.
433+
"""
434+
with self._lock:
435+
if index in self._container.index:
436+
raise ValueError(f"Table index {index} already exists in the table.")
437+
438+
if kwargs.keys() - set(self.get_columns()):
439+
self.add_columns(*kwargs.keys())
440+
441+
template = dict.fromkeys(self.get_columns())
442+
template.update(kwargs)
443+
444+
if not isinstance(index, str):
445+
index = str(index)
446+
self._container.loc[index] = list(template.values())
447+
448+
return self._container.iloc[-1]
449+
450+
def _repr_html_(self) -> Union[str, None]:
451+
"""Return HTML representation of this dataframe."""
452+
with self._lock:
453+
# Remove underscored columns.
454+
return self._container._repr_html_()
455+
456+
def __json_encode__(self) -> Dict[str, Any]:
457+
with self._lock:
458+
return {
459+
"class": "ThreadSafeDataFrame",
460+
"data": self._container.to_dict(orient="index"),
461+
"columns": self._columns,
462+
"extra": self._extra,
463+
}
464+
465+
@classmethod
466+
def __json_decode__(cls, value: Dict[str, Any]) -> "ThreadSafeDataFrame":
467+
if not value.get("class", None) == "ThreadSafeDataFrame":
468+
raise ValueError("JSON decoded value for ThreadSafeDataFrame is not valid class type.")
469+
470+
instance = object.__new__(cls)
471+
# Need to update self._columns first to set extra columns in the dataframe container.
472+
instance._columns = value.get("columns", cls._default_columns())
473+
instance._extra = value.get("extra", [])
474+
instance._lock = threading.RLock()
475+
instance._container = instance._init_container(init_values=value.get("data", {}))
476+
return instance

qiskit_experiments/framework/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
AnalysisStatus
8787
AnalysisResult
8888
AnalysisResultData
89+
AnalysisResultTable
8990
ExperimentConfig
9091
AnalysisConfig
9192
ExperimentEncoder
@@ -137,6 +138,7 @@
137138
from .backend_timing import BackendTiming
138139
from .configs import ExperimentConfig, AnalysisConfig
139140
from .analysis_result_data import AnalysisResultData
141+
from .analysis_result_table import AnalysisResultTable
140142
from .experiment_data import ExperimentData
141143
from .composite import (
142144
ParallelExperiment,

qiskit_experiments/framework/analysis_result_data.py

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,80 @@
1616
import logging
1717
from typing import Optional, Dict, Any, List
1818

19+
from qiskit_experiments.database_service.device_component import DeviceComponent
20+
21+
1922
LOG = logging.getLogger(__name__)
2023

2124

2225
@dataclasses.dataclass
2326
class AnalysisResultData:
2427
"""Dataclass for experiment analysis results"""
2528

26-
# TODO: move stderr and unit into custom value class
2729
name: str
2830
value: Any
31+
experiment: str = None
2932
chisq: Optional[float] = None
3033
quality: Optional[str] = None
34+
experiment_id: Optional[str] = None
35+
result_id: Optional[str] = None
36+
tags: List = dataclasses.field(default_factory=list)
37+
backend: Optional[str] = None
38+
run_time: Optional[str] = None
39+
created_time: Optional[str] = None
3140
extra: Dict[str, Any] = dataclasses.field(default_factory=dict, hash=False, compare=False)
3241
device_components: List = dataclasses.field(default_factory=list)
3342

43+
@classmethod
44+
def from_table_element(
45+
cls,
46+
name: str,
47+
value: Any,
48+
experiment: Optional[str] = None,
49+
components: Optional[List[DeviceComponent]] = None,
50+
quality: Optional[str] = None,
51+
experiment_id: Optional[str] = None,
52+
result_id: Optional[str] = None,
53+
tags: Optional[List[str]] = None,
54+
backend: Optional[str] = None,
55+
run_time: Optional[str] = None,
56+
created_time: Optional[str] = None,
57+
**extra,
58+
):
59+
"""A factory method of AnalysisResultData from a single element in AnalysisResultTable.
60+
61+
Args:
62+
name: Name of this entity.
63+
value: Result value.
64+
experiment: Type of experiment.
65+
components: Device component that the experiment was run on.
66+
quality: Quality of this result.
67+
experiment_id: ID of associated experiment.
68+
result_id: Unique ID of this data entry in the storage.
69+
tags: List of tags.
70+
backend: Device name that the experiment was run on.
71+
run_time: A time at the experiment was run.
72+
created_time: A time at this value was computed.
73+
**extra: Extra information.
74+
"""
75+
chisq = extra.pop("chisq", None)
76+
77+
return AnalysisResultData(
78+
name=name,
79+
value=value,
80+
experiment=experiment,
81+
chisq=chisq,
82+
quality=quality,
83+
experiment_id=experiment_id,
84+
result_id=result_id,
85+
tags=tags,
86+
backend=backend,
87+
run_time=run_time,
88+
created_time=created_time,
89+
device_components=components,
90+
extra=extra,
91+
)
92+
3493
def __str__(self):
3594
out = f"{self.name}:"
3695
out += f"\n- value:{self.value}"
@@ -47,3 +106,35 @@ def __str__(self):
47106
def __iter__(self):
48107
"""Return iterator of data fields (attr, value)"""
49108
return iter((field.name, getattr(self, field.name)) for field in dataclasses.fields(self))
109+
110+
111+
def as_table_element(
112+
result_data: AnalysisResultData,
113+
) -> Dict[str, Any]:
114+
"""Python dataclass as_dict-like function to return
115+
canonical data for analysis AnalysisResultTable.
116+
117+
Args:
118+
result_data: AnalysisResultData dataclass to format.
119+
120+
Returns:
121+
Formatted data representation in dictionary format.
122+
"""
123+
out = {
124+
"name": result_data.name,
125+
"experiment": result_data.experiment,
126+
"components": result_data.device_components,
127+
"value": result_data.value,
128+
"quality": result_data.quality,
129+
"experiment_id": result_data.experiment_id,
130+
"result_id": result_data.result_id,
131+
"tags": result_data.tags,
132+
"backend": result_data.backend,
133+
"run_time": result_data.run_time,
134+
"created_time": result_data.created_time,
135+
}
136+
if result_data.chisq is not None:
137+
out["chisq"] = result_data.chisq
138+
out.update(result_data.extra)
139+
140+
return out

0 commit comments

Comments
 (0)