Drop __getattr__ from the ThreadSafeDataFrame. This was implemented to provide convenient access to internal dataframe container so that ThreadSafeDataFrame becomes comparable with pandas DataFrame (indeed pandas implements lots of convenient method to manipulate data). However, this is dangerous in multithread environment, because lock is released after acquired item (this may be some method to manipulate data) is returned. The holder can still mutate the internal container asynchronously in this situation. To avoid this problem, __getattr__ is dropped and all necessary methods are implemented as method with reentrant lock.

nkanazawa1989 · nkanazawa1989 · commit f5c2bab15ca9 · 2023-07-04T02:29:50.000+09:00
diff --git a/qiskit_experiments/database_service/utils.py b/qiskit_experiments/database_service/utils.py
@@ -376,17 +376,58 @@ def container(
             return container[self._default_columns()]
         return container
 
+    def drop_entry(
+        self,
+        index: str,
+    ):
+        """Drop entry from the dataframe.
+
+        Args:
+            index: Name of entry to drop.
+
+        Raises:
+            ValueError: When index is not in this table.
+        """
+        with self._lock:
+            if index not in self._container.index:
+                raise ValueError(f"Table index {index} doesn't exist in this table.")
+            self._container.drop(index, inplace=True)
+
+    def get_entry(
+        self,
+        index: str,
+    ) -> pd.Series:
+        """Get entry from the dataframe.
+
+        Args:
+            index: Name of entry to acquire.
+
+        Returns:
+            Pandas Series of acquired entry. This doesn't mutate the table.
+
+        Raises:
+            ValueError: When index is not in this table.
+        """
+        with self._lock:
+            if index not in self._container.index:
+                raise ValueError(f"Table index {index} doesn't exist in this table.")
+
+            return self._container.loc[index]
+
     def add_entry(
         self,
         index: str,
         **kwargs,
-    ):
+    ) -> pd.Series:
         """Add new entry to the dataframe.
 
         Args:
             index: Name of this entry. Must be unique in this table.
             kwargs: Description of new entry to register.
 
+        Returns:
+            Pandas Series of added entry. This doesn't mutate the table.
+
         Raises:
             ValueError: When index is not unique in this table.
         """
@@ -406,22 +447,14 @@ def add_entry(
                 index = str(index)
             self._container.loc[index] = list(template.values())
 
+            return self._container.iloc[-1]
+
     def _repr_html_(self) -> Union[str, None]:
         """Return HTML representation of this dataframe."""
         with self._lock:
             # Remove underscored columns.
             return self._container._repr_html_()
 
-    def __getattr__(self, item):
-        lock = object.__getattribute__(self, "_lock")
-
-        with lock:
-            # Lock when access to container's member.
-            container = object.__getattribute__(self, "_container")
-            if hasattr(container, item):
-                return getattr(container, item)
-        raise AttributeError(f"'ThreadSafeDataFrame' object has no attribute '{item}'")
-
     def __json_encode__(self) -> Dict[str, Any]:
         with self._lock:
             return {
diff --git a/qiskit_experiments/framework/analysis_result_table.py b/qiskit_experiments/framework/analysis_result_table.py
@@ -17,6 +17,8 @@
 import warnings
 from typing import List, Union, Optional
 
+import pandas as pd
+
 from qiskit_experiments.database_service.utils import ThreadSafeDataFrame
 
 LOG = logging.getLogger(__name__)
@@ -52,6 +54,11 @@ def _default_columns(cls) -> List[str]:
             "created_time",
         ]
 
+    def result_ids(self) -> List[str]:
+        """Return all result IDs in this table."""
+        with self._lock:
+            return self._container["result_id"].to_list()
+
     def filter_columns(self, columns: Union[str, List[str]]) -> List[str]:
         """Filter columns names available in this table.
 
@@ -68,35 +75,37 @@ def filter_columns(self, columns: Union[str, List[str]]) -> List[str]:
         Raises:
             ValueError: When column is given in string which doesn't match with any builtin group.
         """
-        if columns == "all":
-            return self._columns
-        if columns == "default":
-            return [
-                "name",
-                "experiment",
-                "components",
-                "value",
-                "quality",
-                "backend",
-                "run_time",
-            ] + self._extra
-        if columns == "minimal":
-            return [
-                "name",
-                "components",
-                "value",
-                "quality",
-            ] + self._extra
-        if not isinstance(columns, str):
-            out = []
-            for column in columns:
-                if column in self._columns:
-                    out.append(column)
-                else:
-                    warnings.warn(
-                        f"Specified column name {column} does not exist in this table.", UserWarning
-                    )
-            return out
+        with self._lock:
+            if columns == "all":
+                return self._columns
+            if columns == "default":
+                return [
+                    "name",
+                    "experiment",
+                    "components",
+                    "value",
+                    "quality",
+                    "backend",
+                    "run_time",
+                ] + self._extra
+            if columns == "minimal":
+                return [
+                    "name",
+                    "components",
+                    "value",
+                    "quality",
+                ] + self._extra
+            if not isinstance(columns, str):
+                out = []
+                for column in columns:
+                    if column in self._columns:
+                        out.append(column)
+                    else:
+                        warnings.warn(
+                            f"Specified column name {column} does not exist in this table.",
+                            UserWarning,
+                        )
+                return out
         raise ValueError(
             f"Column group {columns} is not valid name. Use either 'all', 'default', 'minimal'."
         )
@@ -106,16 +115,19 @@ def add_entry(
         self,
         result_id: Optional[str] = None,
         **kwargs,
-    ):
+    ) -> pd.Series:
         """Add new entry to the table.
 
         Args:
             result_id: Result ID. Automatically generated when not provided.
                 This must be valid hexadecimal UUID string.
             kwargs: Description of new entry to register.
+
+        Returns:
+            Pandas Series of added entry. This doesn't mutate the table.
         """
         if result_id:
-            with self.lock:
+            with self._lock:
                 if result_id[:8] in self._container.index:
                     raise ValueError(
                         f"The short ID of the result_id '{result_id[:8]}' already exists in the "
@@ -129,15 +141,15 @@ def add_entry(
         # This mechanism is similar with the github commit hash.
         short_index = result_id[:8]
 
-        super().add_entry(
+        return super().add_entry(
             index=short_index,
             result_id=result_id,
             **kwargs,
         )
 
     def _unique_table_index(self):
         """Generate unique UUID which is unique in the table with first 8 characters."""
-        with self.lock:
+        with self._lock:
             n = 0
             while n < 1000:
                 tmp_id = uuid.uuid4().hex
diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py
@@ -686,7 +686,7 @@ def _set_hgp_from_provider(self, provider):
     def _clear_results(self):
         """Delete all currently stored analysis results and figures"""
         # Schedule existing analysis results for deletion next save call
-        self._deleted_analysis_results.extend(list(self._analysis_results["result_id"]))
+        self._deleted_analysis_results.extend(list(self._analysis_results.result_ids()))
         self._analysis_results.clear()
         # Schedule existing figures for deletion next save call
         for key in self._figures.keys():
@@ -1375,7 +1375,7 @@ def add_analysis_results(
             tags = tags or []
             backend = backend or self.backend_name
 
-            self._analysis_results.add_entry(
+            series = self._analysis_results.add_entry(
                 result_id=result_id,
                 name=name,
                 value=value,
@@ -1391,7 +1391,7 @@ def add_analysis_results(
             )
             if self.auto_save:
                 service_result = _series_to_service_result(
-                    series=self._analysis_results.iloc[-1],
+                    series=series,
                     service=self._service,
                     auto_save=False,
                 )
@@ -1426,7 +1426,7 @@ def delete_analysis_result(
                 "Try another key that can uniquely determine entry to delete."
             )
 
-        self._analysis_results.drop(to_delete.name, inplace=True)
+        self._analysis_results.drop_entry(str(to_delete.name))
         if self._service and self.auto_save:
             with service_exception_to_warning():
                 self.service.delete_analysis_result(result_id=to_delete.result_id)
@@ -1697,8 +1697,6 @@ def save(
                 json_encoder=self._json_encoder,
                 max_workers=max_workers,
             )
-            for result in self._analysis_results.values():
-                result._created_in_db = True
         except Exception as ex:  # pylint: disable=broad-except
             # Don't automatically fail the experiment just because its data cannot be saved.
             LOG.error("Unable to save the experiment data: %s", traceback.format_exc())
diff --git a/test/extended_equality.py b/test/extended_equality.py
@@ -281,6 +281,10 @@ def _check_dataframes(
     **kwargs,
 ):
     """Check equality of data frame which may involve Qiskit Experiments class value."""
+    if isinstance(data1, ThreadSafeDataFrame):
+        data1 = data1.container(collapse_extra=False)
+    if isinstance(data2, ThreadSafeDataFrame):
+        data2 = data2.container(collapse_extra=False)
     return is_equivalent(
         data1.to_dict(orient="index"),
         data2.to_dict(orient="index"),
diff --git a/test/framework/test_data_table.py b/test/framework/test_data_table.py
@@ -14,6 +14,7 @@
 
 from test.base import QiskitExperimentsTestCase
 
+import uuid
 import numpy as np
 import pandas as pd
 
@@ -58,28 +59,33 @@ def test_raises_initializing_with_wrong_table(self):
             # columns doesn't match with default_columns
             TestBaseTable.TestTable(wrong_table)
 
+    def test_get_entry(self):
+        """Test getting an entry from the table."""
+        table = TestBaseTable.TestTable({"x": [1.0, 2.0, 3.0]})
+        self.assertListEqual(table.get_entry("x").to_list(), [1.0, 2.0, 3.0])
+
     def test_add_entry(self):
         """Test adding data with default keys to table."""
         table = TestBaseTable.TestTable()
         table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0)
 
-        self.assertListEqual(table.loc["x"].to_list(), [0.0, 1.0, 2.0])
+        self.assertListEqual(table.get_entry("x").to_list(), [0.0, 1.0, 2.0])
 
     def test_add_entry_with_missing_key(self):
         """Test adding entry with partly specified keys."""
         table = TestBaseTable.TestTable()
         table.add_entry(index="x", value1=0.0, value3=2.0)
 
         # NaN value cannot be compared with assert
-        np.testing.assert_equal(table.loc["x"].to_list(), [0.0, float("nan"), 2.0])
+        np.testing.assert_equal(table.get_entry("x").to_list(), [0.0, float("nan"), 2.0])
 
     def test_add_entry_with_new_key(self):
         """Test adding data with new keys to table."""
         table = TestBaseTable.TestTable()
         table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0, extra=3.0)
 
         self.assertListEqual(table.get_columns(), ["value1", "value2", "value3", "extra"])
-        self.assertListEqual(table.loc["x"].to_list(), [0.0, 1.0, 2.0, 3.0])
+        self.assertListEqual(table.get_entry("x").to_list(), [0.0, 1.0, 2.0, 3.0])
 
     def test_add_entry_with_new_key_with_existing_entry(self):
         """Test adding new key will expand existing entry."""
@@ -88,10 +94,24 @@ def test_add_entry_with_new_key_with_existing_entry(self):
         table.add_entry(index="y", value1=0.0, value2=1.0, value3=2.0, extra=3.0)
 
         self.assertListEqual(table.get_columns(), ["value1", "value2", "value3", "extra"])
-        self.assertListEqual(table.loc["y"].to_list(), [0.0, 1.0, 2.0, 3.0])
+        self.assertListEqual(table.get_entry("y").to_list(), [0.0, 1.0, 2.0, 3.0])
 
         # NaN value cannot be compared with assert
-        np.testing.assert_equal(table.loc["x"].to_list(), [0.0, 1.0, 2.0, float("nan")])
+        np.testing.assert_equal(table.get_entry("x").to_list(), [0.0, 1.0, 2.0, float("nan")])
+
+    def test_drop_entry(self):
+        """Test drop entry from the table."""
+        table = TestBaseTable.TestTable()
+        table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0)
+        table.drop_entry("x")
+
+        self.assertEqual(len(table), 0)
+
+    def test_drop_non_existing_entry(self):
+        """Test dropping non-existing entry raises ValueError."""
+        table = TestBaseTable.TestTable()
+        with self.assertRaises(ValueError):
+            table.drop_entry("x")
 
     def test_return_only_default_columns(self):
         """Test extra entry is correctly recognized."""
@@ -131,7 +151,7 @@ def test_container_is_immutable(self):
         self.assertListEqual(dataframe.loc["x"].to_list(), [100, 0.2, 0.3])
 
         # Original object in the experiment payload is preserved
-        self.assertListEqual(table.loc["x"].to_list(), [0.1, 0.2, 0.3])
+        self.assertListEqual(table.get_entry("x").to_list(), [0.1, 0.2, 0.3])
 
     def test_round_trip(self):
         """Test JSON roundtrip serialization with the experiment encoder."""
@@ -149,7 +169,7 @@ def test_add_entry_with_result_id(self):
         """Test adding entry with result_id. Index is created by truncating long string."""
         table = AnalysisResultTable()
         table.add_entry(result_id="9a0bdec8c0104ef7bb7db84939717a6b", value=0.123)
-        self.assertEqual(table.loc["9a0bdec8"].value, 0.123)
+        self.assertEqual(table.get_entry("9a0bdec8").value, 0.123)
 
     def test_extra_column_name_is_always_returned(self):
         """Test extra column names are always returned in filtered column names."""
@@ -165,6 +185,16 @@ def test_extra_column_name_is_always_returned(self):
         all_columns = table.filter_columns("all")
         self.assertTrue("extra" in all_columns)
 
+    def test_listing_result_id(self):
+        """Test returning result IDs of all stored entries."""
+        table = AnalysisResultTable()
+
+        ref_ids = [uuid.uuid4().hex for _ in range(10)]
+        for ref_id in ref_ids:
+            table.add_entry(result_id=ref_id, value=0)
+
+        self.assertListEqual(table.result_ids(), ref_ids)
+
     def test_no_overlap_result_id(self):
         """Test automatically prepare unique result IDs for sufficient number of entries."""
         table = AnalysisResultTable()