TRI-ML
diff --git a/‎sequentialized_barnard_tests/step.py‎
Lines changed: 85 additions & 26 deletions b/‎sequentialized_barnard_tests/step.py‎
Lines changed: 85 additions & 26 deletions
diff --git a/‎tests/eval_data/TRI_FOLD_RED_TOWEL.npy‎
928 Bytes b/‎tests/eval_data/TRI_FOLD_RED_TOWEL.npy‎
928 Bytes
diff --git a/‎tests/eval_data/TRI_SIM_EGGPLANT_IN_BASKET.npy‎
7.94 KB b/‎tests/eval_data/TRI_SIM_EGGPLANT_IN_BASKET.npy‎
7.94 KB
diff --git a/‎tests/eval_data/TRI_SIM_SPOON_ON_TOWEL.npy‎
7.94 KB b/‎tests/eval_data/TRI_SIM_SPOON_ON_TOWEL.npy‎
7.94 KB
diff --git a/‎tests/eval_data/TRI_SIM_STACK_CUBE.npy‎
7.94 KB b/‎tests/eval_data/TRI_SIM_STACK_CUBE.npy‎
7.94 KB
diff --git a/‎tests/sequentialized_barnard_tests/test_step.py‎
Lines changed: 67 additions & 25 deletions b/‎tests/sequentialized_barnard_tests/test_step.py‎
Lines changed: 67 additions & 25 deletions
@@ -166,21 +166,18 @@ def step(
         x = int(self._state[0])
         y = int(self._state[1])
 
-        if (y > x and self.alternative == Hypothesis.P0LessThanP1) or (
-            x > y and self.alternative == Hypothesis.P0MoreThanP1
-        ):
-            if y > x:
-                x_absolute = x
-                y_absolute = y
-            else:
-                x_absolute = y
-                y_absolute = x
+        if y > x:
+            #     and self.alternative == Hypothesis.P0LessThanP1) or (
+            #     x > y and self.alternative == Hypothesis.P0MoreThanP1
+            # ):
+            x_absolute = x
+            y_absolute = y
 
             # New policy > old policy (empirically)
             # Therefore, look only to REJECT in standard setting
 
             # Extract relevant component of policy
-            decision_array = self.policy[self._t - 1][x_absolute]
+            decision_array = self.policy[self._t][x_absolute]
 
             # Number of non-zero / non-unity policy bins at this x and t
             L = decision_array.shape[0] - 1
@@ -189,16 +186,19 @@ def step(
             critical_zero_y = int(decision_array[0])
 
             if y_absolute <= critical_zero_y:  # Current state cannot be significant
-                info = {"Time": self._t, "State": self._state}
+                info = {"Time": self._t + 1, "State": self._state}
                 result = TestResult(self._current_decision, info)
 
                 return result
 
             elif (
                 y_absolute > critical_zero_y + L
             ):  # Current state is definitely significant
-                self._current_decision = Decision.AcceptAlternative
-                info = {"Time": self._t, "State": self._state}
+                if self.alternative == Hypothesis.P0LessThanP1:
+                    self._current_decision = Decision.AcceptAlternative
+                else:
+                    self._current_decision = Decision.FailToDecide
+                info = {"Time": self._t + 1, "State": self._state}
                 result = TestResult(self._current_decision, info)
 
                 return result
@@ -212,7 +212,65 @@ def step(
                 if (
                     random_scalar <= comparator_rv
                 ):  # Then we have probabilistically rejected
+                    if self.alternative == Hypothesis.P0LessThanP1:
+                        self._current_decision = Decision.AcceptAlternative
+                    else:
+                        self._current_decision = Decision.FailToDecide
+                    info = {"Time": self._t + 1, "State": self._state}
+                    result = TestResult(self._current_decision, info)
+                else:  # Then we have probabilistically continued
+                    info = {"Time": self._t + 1, "State": self._state}
+                    result = TestResult(self._current_decision, info)
+
+                return result
+
+        elif x > y:
+            x_absolute = y
+            y_absolute = x
+
+            # New policy > old policy (empirically)
+            # Therefore, look only to REJECT in reverse setting
+
+            # Extract relevant component of policy
+            decision_array = self.policy[self._t][x_absolute]
+
+            # Number of non-zero / non-unity policy bins at this x and t
+            L = decision_array.shape[0] - 1
+
+            # Highest value of y for which we CONTINUE [i.e., policy = 0]
+            critical_zero_y = int(decision_array[0])
+
+            if y_absolute <= critical_zero_y:  # Current state cannot be significant
+                info = {"Time": self._t + 1, "State": self._state}
+                result = TestResult(self._current_decision, info)
+
+                return result
+
+            elif (
+                y_absolute > critical_zero_y + L
+            ):  # Current state is definitely significant
+                if self.alternative == Hypothesis.P0MoreThanP1:
                     self._current_decision = Decision.AcceptAlternative
+                else:
+                    self._current_decision = Decision.FailToDecide
+                info = {"Time": self._t + 1, "State": self._state}
+                result = TestResult(self._current_decision, info)
+
+                return result
+
+            else:  # Current state is in probabilistic regime
+                # random_scalar = np.random.rand(
+                #     1
+                # )  # TODO: add some kind of seeding procedure to ensure repeatibility
+                random_scalar = self.rng.random(1)
+                comparator_rv = decision_array[y_absolute - critical_zero_y]
+                if (
+                    random_scalar <= comparator_rv
+                ):  # Then we have probabilistically rejected
+                    if self.alternative == Hypothesis.P0MoreThanP1:
+                        self._current_decision = Decision.AcceptAlternative
+                    else:
+                        self._current_decision = Decision.FailToDecide
                     info = {"Time": self._t, "State": self._state}
                     result = TestResult(self._current_decision, info)
                 else:  # Then we have probabilistically continued
@@ -221,8 +279,8 @@ def step(
 
                 return result
         else:
-            # Cannot reject; as test is one-sided, can only continue!
-            info = {"Time": self._t, "State": self._state}
+            # Cannot reject because delta is exactly 0; can only continue!
+            info = {"Time": self._t + 1, "State": self._state}
             result = TestResult(self._current_decision, info)
 
             return result
@@ -238,7 +296,7 @@ def reset(
             verbose (bool, optional): If True, print the outputs to stdout.
                 Defaults to False.
         """
-        self._state = np.zeros(2)
+        self._state = np.zeros(2).astype(int)
         self._t = int(0)
         self._current_decision = Decision.FailToDecide
 
@@ -377,19 +435,20 @@ def step(
                 )
             )
 
-        # Iterate time state
-        self._t += 1
-
         # Handle case in which we have exceeded n_max
         if self._t > self.n_max:
             warnings.warn(
                 "Have exceeded the allowed number of evals; not updating internal states."
             )
+            self._t += 1
             info = {"Time": self._t, "State": self._state}
             result = TestResult(self._current_decision, info)
 
             return result
 
+        # Iterate time state
+        self._t += 1
+
         if self.policy is None:
             # warnings.warn(
             #     "No policy assigned, so will default to Fail to Decide. Ensure "
@@ -431,7 +490,7 @@ def step(
             critical_zero_y = int(decision_array[0])
 
             if y_absolute <= critical_zero_y:  # Current state cannot be significant
-                info = {"Time": self._t, "State": self._state}
+                info = {"Time": self._t + 1, "State": self._state}
                 result = TestResult(self._current_decision, info)
 
                 return result
@@ -443,7 +502,7 @@ def step(
                     self._current_decision = Decision.AcceptAlternative
                 else:
                     self._current_decision = Decision.AcceptNull
-                info = {"Time": self._t, "State": self._state}
+                info = {"Time": self._t + 1, "State": self._state}
                 result = TestResult(self._current_decision, info)
 
                 return result
@@ -461,10 +520,10 @@ def step(
                         self._current_decision = Decision.AcceptAlternative
                     else:
                         self._current_decision = Decision.AcceptNull
-                    info = {"Time": self._t, "State": self._state}
+                    info = {"Time": self._t + 1, "State": self._state}
                     result = TestResult(self._current_decision, info)
                 else:  # Then we have probabilistically continued
-                    info = {"Time": self._t, "State": self._state}
+                    info = {"Time": self._t + 1, "State": self._state}
                     result = TestResult(self._current_decision, info)
 
                 return result
@@ -486,7 +545,7 @@ def step(
             critical_zero_y = int(decision_array[0])
 
             if y_absolute <= critical_zero_y:  # Current state cannot be significant
-                info = {"Time": self._t, "State": self._state}
+                info = {"Time": self._t + 1, "State": self._state}
                 result = TestResult(self._current_decision, info)
 
                 return result
@@ -498,7 +557,7 @@ def step(
                     self._current_decision = Decision.AcceptAlternative
                 else:
                     self._current_decision = Decision.AcceptNull
-                info = {"Time": self._t, "State": self._state}
+                info = {"Time": self._t + 1, "State": self._state}
                 result = TestResult(self._current_decision, info)
 
                 return result
@@ -525,7 +584,7 @@ def step(
                 return result
         else:
             # Cannot reject because delta is exactly 0; can only continue!
-            info = {"Time": self._t, "State": self._state}
+            info = {"Time": self._t + 1, "State": self._state}
             result = TestResult(self._current_decision, info)
 
             return result
@@ -18,7 +18,21 @@
         )
     ).resolve()
 )
-eval_trajectories = np.load(f"{paper_data_path}/TRI_CLEAN_SPILL_v2.npy")
+eval_clean_up_spill = np.load(
+    f"{paper_data_path}/TRI_CLEAN_SPILL_v2.npy"
+)  # Must be flipped for standard form
+eval_fold_red_towel = np.load(
+    f"{paper_data_path}/TRI_FOLD_RED_TOWEL.npy"
+)  # ALREADY in standard form
+eval_sim_spoon_on_towel = np.load(
+    f"{paper_data_path}/TRI_SIM_SPOON_ON_TOWEL.npy"
+)  # Must be flipped for standard form
+eval_sim_eggplant_in_basket = np.load(
+    f"{paper_data_path}/TRI_SIM_EGGPLANT_IN_BASKET.npy"
+)  # Must be flipped for standard form
+eval_sim_stack_cube = np.load(
+    f"{paper_data_path}/TRI_SIM_STACK_CUBE.npy"
+)  # Must be flipped for standard form
 
 
 @pytest.fixture(scope="module")
@@ -63,10 +77,10 @@ def test_step_input_value_error(step):
         (Hypothesis.P0MoreThanP1, np.zeros(15), np.ones(15), Decision.FailToDecide),
         (Hypothesis.P0LessThanP1, np.ones(15), np.zeros(15), Decision.FailToDecide),
         (Hypothesis.P0MoreThanP1, np.ones(15), np.zeros(15), Decision.AcceptAlternative),
-        (Hypothesis.P0LessThanP1, eval_trajectories[:, 1], eval_trajectories[:, 0], Decision.AcceptAlternative),
-        (Hypothesis.P0MoreThanP1, eval_trajectories[:, 1], eval_trajectories[:, 0], Decision.FailToDecide),
-        (Hypothesis.P0LessThanP1, eval_trajectories[:, 0], eval_trajectories[:, 1], Decision.FailToDecide),
-        (Hypothesis.P0MoreThanP1, eval_trajectories[:, 0], eval_trajectories[:, 1], Decision.AcceptAlternative),
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], Decision.AcceptAlternative),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], Decision.FailToDecide),
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], Decision.FailToDecide),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], Decision.AcceptAlternative),
         # fmt: on
     ],
     indirect=["step"],
@@ -80,17 +94,21 @@ def test_step(step, sequence_0, sequence_1, expected):
     ("step", "sequence_0", "sequence_1", "expected"),
     [
         # fmt: off
-        (Hypothesis.P0LessThanP1, eval_trajectories[:, 1], eval_trajectories[:, 0], 22.5),
-        (Hypothesis.P0MoreThanP1, eval_trajectories[:, 1], eval_trajectories[:, 0], 50),
-        (Hypothesis.P0LessThanP1, eval_trajectories[:, 0], eval_trajectories[:, 1], 50),
-        (Hypothesis.P0MoreThanP1, eval_trajectories[:, 0], eval_trajectories[:, 1], 22.5),
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 23),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 50),
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 50),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 23),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 21.5),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 50),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 50),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 21.5),
         # fmt: on
     ],
     indirect=["step"],
 )
 def test_step_time(step, sequence_0, sequence_1, expected):
     result = step.run_on_sequence(sequence_0, sequence_1)
-    assert np.abs(float(result.info["Time"]) - expected) <= 3.0
+    assert np.abs(float(result.info["Time"]) - expected) <= 1.2
 
 
 @pytest.fixture(scope="module")
@@ -108,17 +126,21 @@ def step500(request):
     ("step500", "sequence_0", "sequence_1", "expected"),
     [
         # fmt: off
-        (Hypothesis.P0LessThanP1, eval_trajectories[:, 1], eval_trajectories[:, 0], 33),
-        (Hypothesis.P0MoreThanP1, eval_trajectories[:, 1], eval_trajectories[:, 0], 50),
-        (Hypothesis.P0LessThanP1, eval_trajectories[:, 0], eval_trajectories[:, 1], 50),
-        (Hypothesis.P0MoreThanP1, eval_trajectories[:, 0], eval_trajectories[:, 1], 33),
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 25.5),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 50),
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 50),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 25.5),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 23.5),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 50),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 50),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 23.5),
         # fmt: on
     ],
     indirect=["step500"],
 )
 def test_step500_time(step500, sequence_0, sequence_1, expected):
     result = step500.run_on_sequence(sequence_0, sequence_1)
-    assert np.abs(result.info["Time"] - expected) <= 1.5
+    assert np.abs(result.info["Time"] - expected) <= 0.6
 
 
 ##### Mirrored STEP Test #####
@@ -160,17 +182,21 @@ def test_mirrored_step(mirrored_step, sequence_0, sequence_1, expected):
     ("mirrored_step", "sequence_0", "sequence_1", "expected"),
     [
         # fmt: off
-        (Hypothesis.P0LessThanP1, eval_trajectories[:, 1], eval_trajectories[:, 0], 25),
-        (Hypothesis.P0MoreThanP1, eval_trajectories[:, 1], eval_trajectories[:, 0], 25),
-        (Hypothesis.P0LessThanP1, eval_trajectories[:, 0], eval_trajectories[:, 1], 25),
-        (Hypothesis.P0MoreThanP1, eval_trajectories[:, 0], eval_trajectories[:, 1], 25),
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 23.5),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 23.5),
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 23.5),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 23.5),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 21.5),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 21.5),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 21.5),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 21.5),
         # fmt: on
     ],
     indirect=["mirrored_step"],
 )
 def test_mirrored_step_time(mirrored_step, sequence_0, sequence_1, expected):
     result = mirrored_step.run_on_sequence(sequence_0, sequence_1)
-    assert np.abs(result.info["Time"] - expected) <= 1.5
+    assert np.abs(result.info["Time"] - expected) <= 0.6
 
 
 @pytest.fixture(scope="module")
@@ -188,14 +214,30 @@ def mirrored_step500(request):
     ("mirrored_step500", "sequence_0", "sequence_1", "expected"),
     [
         # fmt: off
-        (Hypothesis.P0LessThanP1, eval_trajectories[:, 1], eval_trajectories[:, 0], 33),
-        (Hypothesis.P0MoreThanP1, eval_trajectories[:, 1], eval_trajectories[:, 0], 33),
-        (Hypothesis.P0LessThanP1, eval_trajectories[:, 0], eval_trajectories[:, 1], 33),
-        (Hypothesis.P0MoreThanP1, eval_trajectories[:, 0], eval_trajectories[:, 1], 33),
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 25.5),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 25.5),
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 25.5),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 25.5),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 23.5),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 23.5),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 23.5),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 23.5),
+        (Hypothesis.P0LessThanP1, eval_sim_spoon_on_towel[:, 1], eval_sim_spoon_on_towel[:, 0], 32.5),
+        (Hypothesis.P0MoreThanP1, eval_sim_spoon_on_towel[:, 1], eval_sim_spoon_on_towel[:, 0], 32.5),
+        (Hypothesis.P0LessThanP1, eval_sim_spoon_on_towel[:, 0], eval_sim_spoon_on_towel[:, 1], 32.5),
+        (Hypothesis.P0MoreThanP1, eval_sim_spoon_on_towel[:, 0], eval_sim_spoon_on_towel[:, 1], 32.5),
+        (Hypothesis.P0LessThanP1, eval_sim_eggplant_in_basket[:, 1], eval_sim_eggplant_in_basket[:, 0], 119.5),
+        (Hypothesis.P0MoreThanP1, eval_sim_eggplant_in_basket[:, 1], eval_sim_eggplant_in_basket[:, 0], 119.5),
+        (Hypothesis.P0LessThanP1, eval_sim_eggplant_in_basket[:, 0], eval_sim_eggplant_in_basket[:, 1], 119.5),
+        (Hypothesis.P0MoreThanP1, eval_sim_eggplant_in_basket[:, 0], eval_sim_eggplant_in_basket[:, 1], 119.5),
+        (Hypothesis.P0LessThanP1, eval_sim_stack_cube[:, 1], eval_sim_stack_cube[:, 0], 172.5),
+        (Hypothesis.P0MoreThanP1, eval_sim_stack_cube[:, 1], eval_sim_stack_cube[:, 0], 172.5),
+        (Hypothesis.P0LessThanP1, eval_sim_stack_cube[:, 0], eval_sim_stack_cube[:, 1], 172.5),
+        (Hypothesis.P0MoreThanP1, eval_sim_stack_cube[:, 0], eval_sim_stack_cube[:, 1], 172.5),
         # fmt: on
     ],
     indirect=["mirrored_step500"],
 )
 def test_mirrored_step500_time(mirrored_step500, sequence_0, sequence_1, expected):
     result = mirrored_step500.run_on_sequence(sequence_0, sequence_1)
-    assert np.abs(result.info["Time"] - expected) <= 1.5
+    assert np.abs(result.info["Time"] - expected) <= 0.6