Skip to content

Commit f5c759b

Browse files
Reimplement AnalysisResultTable (#1252)
### Summary Follow-up of #1133 for data frame. ### Details and comments I realized I had some mistake when I wrote a base class of `ThreadSafeDataFrame`. We plan to move the curve data (XY data) to the artifact for better reusability of the data points. In addition, [CurveData](https://github.com/Qiskit-Extensions/qiskit-experiments/blob/c66034c90dad73d705af25be7e9ed9617e7eb2ef/qiskit_experiments/curve_analysis/curve_data.py#L88-L113) object which is a container of the XY data is also replaced with the data frame in a follow up PR. Since this table should contain predefined columns such as `xval`, `yval`, `yerr`, I was assuming this container could be a subclass of `ThreadSafeDataFrame` -- but this was not a right assumption. Since the curve analysis always runs on a single thread (analysis callbacks might be run on multiple threads and thus `AnalysisResultTable` must be thread-safe), this curve data frame doesn't need to be thread-safe object. In this PR, a functionality to define default columns of the table is reimplemented as a Python mixin class so that the function to add default columns will be used also for curve data frame without introducing the unnecessary thread safe mechanisms. `AnalysisResultTable` is a thread safe container but the functionality to manage default columns is delegated to the mixin.
1 parent 1acc02d commit f5c759b

File tree

6 files changed

+340
-324
lines changed

6 files changed

+340
-324
lines changed

qiskit_experiments/database_service/utils.py

Lines changed: 1 addition & 198 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,9 @@
1919
from abc import ABC, abstractmethod
2020
from collections import OrderedDict
2121
from datetime import datetime, timezone
22-
from typing import Callable, Tuple, List, Dict, Any, Union, Type, Optional
22+
from typing import Callable, Tuple, Dict, Any, Union, Type, Optional
2323
import json
2424

25-
import pandas as pd
2625
import dateutil.parser
2726
import pkg_resources
2827
from dateutil import tz
@@ -278,199 +277,3 @@ def append(self, value):
278277
"""Append to the list."""
279278
with self._lock:
280279
self._container.append(value)
281-
282-
283-
class ThreadSafeDataFrame(ThreadSafeContainer):
284-
"""Thread safe data frame.
285-
286-
This class wraps pandas dataframe with predefined column labels,
287-
which is specified by the class method `_default_columns`.
288-
Subclass can override this method to provide default labels specific to its data structure.
289-
290-
This object is expected to be used internally in the ExperimentData.
291-
"""
292-
293-
def __init__(self, init_values=None):
294-
"""ThreadSafeContainer constructor."""
295-
self._columns = self._default_columns()
296-
self._extra = []
297-
super().__init__(init_values)
298-
299-
@classmethod
300-
def _default_columns(cls) -> List[str]:
301-
return []
302-
303-
def _init_container(self, init_values: Optional[Union[Dict, pd.DataFrame]] = None):
304-
"""Initialize the container."""
305-
if init_values is None:
306-
return pd.DataFrame(columns=self.get_columns())
307-
if isinstance(init_values, pd.DataFrame):
308-
input_columns = list(init_values.columns)
309-
if input_columns != self.get_columns():
310-
raise ValueError(
311-
f"Input data frame contains unexpected columns {input_columns}. "
312-
f"{self.__class__.__name__} defines {self.get_columns()} as default columns."
313-
)
314-
return init_values
315-
if isinstance(init_values, dict):
316-
return pd.DataFrame.from_dict(
317-
data=init_values,
318-
orient="index",
319-
columns=self.get_columns(),
320-
)
321-
raise TypeError(f"Initial value of {type(init_values)} is not valid data type.")
322-
323-
def get_columns(self) -> List[str]:
324-
"""Return current column names.
325-
326-
Returns:
327-
List of column names.
328-
"""
329-
with self._lock:
330-
return self._columns.copy()
331-
332-
def add_columns(self, *new_columns: str, default_value: Any = None):
333-
"""Add new columns to the table.
334-
335-
This operation mutates the current container.
336-
337-
Args:
338-
new_columns: Name of columns to add.
339-
default_value: Default value to fill added columns.
340-
"""
341-
with self._lock:
342-
# Order sensitive
343-
new_columns = [c for c in new_columns if c not in self.get_columns()]
344-
if len(new_columns) == 0:
345-
return
346-
347-
# Update columns
348-
for new_column in new_columns:
349-
self._container.insert(len(self._container.columns), new_column, default_value)
350-
self._columns.extend(new_columns)
351-
self._extra.extend(new_columns)
352-
353-
def clear(self):
354-
"""Remove all elements from this container."""
355-
with self._lock:
356-
self._container = self._init_container()
357-
self._columns = self._default_columns()
358-
self._extra = []
359-
360-
def container(
361-
self,
362-
collapse_extra: bool = True,
363-
) -> pd.DataFrame:
364-
"""Return bare pandas dataframe.
365-
366-
Args:
367-
collapse_extra: Set True to show only default columns.
368-
369-
Returns:
370-
Bare pandas dataframe. This object is no longer thread safe.
371-
"""
372-
with self._lock:
373-
container = self._container.copy()
374-
375-
if collapse_extra:
376-
return container[self._default_columns()]
377-
return container
378-
379-
def drop_entry(
380-
self,
381-
index: str,
382-
):
383-
"""Drop entry from the dataframe.
384-
385-
Args:
386-
index: Name of entry to drop.
387-
388-
Raises:
389-
ValueError: When index is not in this table.
390-
"""
391-
with self._lock:
392-
if index not in self._container.index:
393-
raise ValueError(f"Table index {index} doesn't exist in this table.")
394-
self._container.drop(index, inplace=True)
395-
396-
def get_entry(
397-
self,
398-
index: str,
399-
) -> pd.Series:
400-
"""Get entry from the dataframe.
401-
402-
Args:
403-
index: Name of entry to acquire.
404-
405-
Returns:
406-
Pandas Series of acquired entry. This doesn't mutate the table.
407-
408-
Raises:
409-
ValueError: When index is not in this table.
410-
"""
411-
with self._lock:
412-
if index not in self._container.index:
413-
raise ValueError(f"Table index {index} doesn't exist in this table.")
414-
415-
return self._container.loc[index]
416-
417-
def add_entry(
418-
self,
419-
index: str,
420-
**kwargs,
421-
) -> pd.Series:
422-
"""Add new entry to the dataframe.
423-
424-
Args:
425-
index: Name of this entry. Must be unique in this table.
426-
kwargs: Description of new entry to register.
427-
428-
Returns:
429-
Pandas Series of added entry. This doesn't mutate the table.
430-
431-
Raises:
432-
ValueError: When index is not unique in this table.
433-
"""
434-
with self._lock:
435-
if index in self._container.index:
436-
raise ValueError(f"Table index {index} already exists in the table.")
437-
438-
if kwargs.keys() - set(self.get_columns()):
439-
self.add_columns(*kwargs.keys())
440-
441-
template = dict.fromkeys(self.get_columns())
442-
template.update(kwargs)
443-
444-
if not isinstance(index, str):
445-
index = str(index)
446-
self._container.loc[index] = list(template.values())
447-
448-
return self._container.iloc[-1]
449-
450-
def _repr_html_(self) -> Union[str, None]:
451-
"""Return HTML representation of this dataframe."""
452-
with self._lock:
453-
# Remove underscored columns.
454-
return self._container._repr_html_()
455-
456-
def __json_encode__(self) -> Dict[str, Any]:
457-
with self._lock:
458-
return {
459-
"class": "ThreadSafeDataFrame",
460-
"data": self._container.to_dict(orient="index"),
461-
"columns": self._columns,
462-
"extra": self._extra,
463-
}
464-
465-
@classmethod
466-
def __json_decode__(cls, value: Dict[str, Any]) -> "ThreadSafeDataFrame":
467-
if not value.get("class", None) == "ThreadSafeDataFrame":
468-
raise ValueError("JSON decoded value for ThreadSafeDataFrame is not valid class type.")
469-
470-
instance = object.__new__(cls)
471-
# Need to update self._columns first to set extra columns in the dataframe container.
472-
instance._columns = value.get("columns", cls._default_columns())
473-
instance._extra = value.get("extra", [])
474-
instance._lock = threading.RLock()
475-
instance._container = instance._init_container(init_values=value.get("data", {}))
476-
return instance

0 commit comments

Comments
 (0)