Add paper results to (and fix) pytest schedule for Lai and STEP

DavidSnyder-TRI · DavidSnyder-TRI · commit 036d6b4bb874 · 2025-05-15T19:33:26.000-04:00
diff --git a/scripts/paper_results/confirm_paper_results.py b/scripts/paper_results/confirm_paper_results.py
@@ -1,14 +1,10 @@
 """Script to print out all (most) paper results from camera-ready version.
 """
 
-import copy
 import os
 from pathlib import Path
 
 import numpy as np
-from matplotlib import pyplot as plt
-from numpy.typing import ArrayLike
-from tqdm import tqdm
 
 from sequentialized_barnard_tests import (
     Hypothesis,
@@ -60,23 +56,27 @@
         alternative=Hypothesis.P0LessThanP1, n_max=50, alpha=0.05
     )
     lai_hardware_50.calibrate_c()
+    print("Lai-50 value of c: ", lai_hardware_50.c)
 
     lai_hardware_200 = MirroredLaiTest(
         alternative=Hypothesis.P0LessThanP1, n_max=200, alpha=0.05
     )
     lai_hardware_200.calibrate_c()
+    print("Lai-200 value of c: ", lai_hardware_200.c)
     # lai_hardware_200.set_c(0.00014741399676752065)
 
     lai_hardware_500 = MirroredLaiTest(
         alternative=Hypothesis.P0LessThanP1, n_max=500, alpha=0.05
     )
     lai_hardware_500.calibrate_c()
+    print("Lai-500 value of c: ", lai_hardware_500.c)
     # lai_hardware_500.set_c(5.349419043278717e-05)
 
     lai_simulation_500 = MirroredLaiTest(
         alternative=Hypothesis.P0LessThanP1, n_max=500, alpha=0.01
     )
     lai_simulation_500.calibrate_c()
+    print("Lai-500 SIM value of c: ", lai_simulation_500.c)
     # lai_simulation_500.set_c(1.184327928758278e-05)
 
     # Load STEP tests
diff --git a/tests/sequentialized_barnard_tests/test_lai.py b/tests/sequentialized_barnard_tests/test_lai.py
@@ -1,12 +1,38 @@
 """Unit tests for the Lai procedure"""
 
+import os
+from pathlib import Path
+
 import numpy as np
 import pytest
 
 from sequentialized_barnard_tests import Decision, Hypothesis
 from sequentialized_barnard_tests.lai import LaiTest, MirroredLaiTest
 
 ##### Lai Test #####
+paper_data_path = str(
+    Path(
+        os.path.join(
+            os.path.dirname(os.path.abspath(__file__)),
+            "../eval_data/",
+        )
+    ).resolve()
+)
+eval_clean_up_spill = np.load(
+    f"{paper_data_path}/TRI_CLEAN_SPILL_v4.npy"
+)  # Must be flipped for standard form
+eval_fold_red_towel = np.load(
+    f"{paper_data_path}/TRI_FOLD_RED_TOWEL.npy"
+)  # ALREADY in standard form
+eval_sim_spoon_on_towel = np.load(
+    f"{paper_data_path}/TRI_SIM_SPOON_ON_TOWEL.npy"
+)  # Must be flipped for standard form
+eval_sim_eggplant_in_basket = np.load(
+    f"{paper_data_path}/TRI_SIM_EGGPLANT_IN_BASKET.npy"
+)  # Must be flipped for standard form
+eval_sim_stack_cube = np.load(
+    f"{paper_data_path}/TRI_SIM_STACK_CUBE.npy"
+)  # Must be flipped for standard form
 
 
 @pytest.fixture(scope="module")
@@ -18,7 +44,7 @@ def lai(request):
         calibrate_regularizer=False,
         use_offline_calibration=False,
     )
-    test.set_c(4.3320915613895993e-05)
+    test.set_c(5.3077895340120925e-05)
     return test
 
 
@@ -62,6 +88,70 @@ def test_lai(lai, sequence_0, sequence_1, expected):
     assert result.decision == expected
 
 
+@pytest.fixture(scope="module")
+def lai200(request):
+    test = LaiTest(
+        alternative=request.param,
+        n_max=200,
+        alpha=0.05,
+    )
+    test.set_c(0.00014121395942619315)
+    return test
+
+
+@pytest.mark.parametrize(
+    ("lai200", "sequence_0", "sequence_1", "expected"),
+    [
+        # fmt: off
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 13),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 50),
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 50),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 13),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 21),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 50),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 50),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 21),
+        # fmt: on
+    ],
+    indirect=["lai200"],
+)
+def test_lai200_time(lai200, sequence_0, sequence_1, expected):
+    result = lai200.run_on_sequence(sequence_0, sequence_1)
+    assert np.abs(result.info["Time"] - expected) <= 0.6
+
+
+@pytest.fixture(scope="module")
+def lai50(request):
+    test = LaiTest(
+        alternative=request.param,
+        n_max=50,
+        alpha=0.05,
+    )
+    test.set_c(0.000561395711114114)
+    return test
+
+
+@pytest.mark.parametrize(
+    ("lai50", "sequence_0", "sequence_1", "expected"),
+    [
+        # fmt: off
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 8),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 50),
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 50),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 8),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 17),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 50),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 50),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 17),
+        # fmt: on
+    ],
+    indirect=["lai50"],
+)
+def test_lai50_time(lai50, sequence_0, sequence_1, expected):
+    result = lai50.run_on_sequence(sequence_0, sequence_1)
+    assert np.abs(result.info["Time"] - expected) <= 0.6
+
+
 ##### Mirrored Lai Test #####
 
 
@@ -99,6 +189,108 @@ def test_mirrored_lai(mirrored_lai, sequence_0, sequence_1, expected):
     assert result.decision == expected
 
 
+@pytest.fixture(scope="module")
+def mirrored_lai200(request):
+    test = MirroredLaiTest(
+        alternative=request.param,
+        n_max=200,
+        alpha=0.05,
+    )
+    test.set_c(0.00014121395942619315)
+    return test
+
+
+@pytest.mark.parametrize(
+    ("mirrored_lai200", "sequence_0", "sequence_1", "expected"),
+    [
+        # fmt: off
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 13),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 13),
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 13),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 13),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 21),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 21),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 21),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 21),
+        # fmt: on
+    ],
+    indirect=["mirrored_lai200"],
+)
+def test_mirrored_lai200_time(mirrored_lai200, sequence_0, sequence_1, expected):
+    result = mirrored_lai200.run_on_sequence(sequence_0, sequence_1)
+    assert np.abs(result.info["Time"] - expected) <= 0.6
+
+
+@pytest.fixture(scope="module")
+def mirrored_lai50(request):
+    test = MirroredLaiTest(
+        alternative=request.param,
+        n_max=50,
+        alpha=0.05,
+    )
+    test.set_c(0.000561395711114114)
+    return test
+
+
+@pytest.mark.parametrize(
+    ("mirrored_lai50", "sequence_0", "sequence_1", "expected"),
+    [
+        # fmt: off
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 8),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 1], eval_clean_up_spill[:, 0], 8),
+        (Hypothesis.P0LessThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 8),
+        (Hypothesis.P0MoreThanP1, eval_clean_up_spill[:, 0], eval_clean_up_spill[:, 1], 8),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 17),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 0], eval_fold_red_towel[:, 1], 17),
+        (Hypothesis.P0LessThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 17),
+        (Hypothesis.P0MoreThanP1, eval_fold_red_towel[:, 1], eval_fold_red_towel[:, 0], 17),
+        # fmt: on
+    ],
+    indirect=["mirrored_lai50"],
+)
+def test_mirrored_lai50_time(mirrored_lai50, sequence_0, sequence_1, expected):
+    result = mirrored_lai50.run_on_sequence(sequence_0, sequence_1)
+    assert np.abs(result.info["Time"] - expected) <= 0.6
+
+
+@pytest.fixture(scope="module")
+def mirrored_lai500(request):
+    test = MirroredLaiTest(
+        alternative=request.param,
+        n_max=500,
+        alpha=0.01,
+        calibrate_regularizer=False,
+        use_offline_calibration=False,
+    )
+    test.set_c(1.013009359863071e-05)
+    return test
+
+
+@pytest.mark.parametrize(
+    ("mirrored_lai500", "sequence_0", "sequence_1", "expected"),
+    [
+        # fmt: off
+        (Hypothesis.P0LessThanP1, eval_sim_spoon_on_towel[:, 1], eval_sim_spoon_on_towel[:, 0], 36),
+        (Hypothesis.P0MoreThanP1, eval_sim_spoon_on_towel[:, 1], eval_sim_spoon_on_towel[:, 0], 36),
+        (Hypothesis.P0LessThanP1, eval_sim_spoon_on_towel[:, 0], eval_sim_spoon_on_towel[:, 1], 36),
+        (Hypothesis.P0MoreThanP1, eval_sim_spoon_on_towel[:, 0], eval_sim_spoon_on_towel[:, 1], 36),
+        (Hypothesis.P0LessThanP1, eval_sim_eggplant_in_basket[:, 1], eval_sim_eggplant_in_basket[:, 0], 125),
+        (Hypothesis.P0MoreThanP1, eval_sim_eggplant_in_basket[:, 1], eval_sim_eggplant_in_basket[:, 0], 125),
+        (Hypothesis.P0LessThanP1, eval_sim_eggplant_in_basket[:, 0], eval_sim_eggplant_in_basket[:, 1], 125),
+        (Hypothesis.P0MoreThanP1, eval_sim_eggplant_in_basket[:, 0], eval_sim_eggplant_in_basket[:, 1], 125),
+        (Hypothesis.P0LessThanP1, eval_sim_stack_cube[:, 1], eval_sim_stack_cube[:, 0], 417),
+        (Hypothesis.P0MoreThanP1, eval_sim_stack_cube[:, 1], eval_sim_stack_cube[:, 0], 417),
+        (Hypothesis.P0LessThanP1, eval_sim_stack_cube[:, 0], eval_sim_stack_cube[:, 1], 417),
+        (Hypothesis.P0MoreThanP1, eval_sim_stack_cube[:, 0], eval_sim_stack_cube[:, 1], 417),
+        # fmt: on
+    ],
+    indirect=["mirrored_lai500"],
+)
+def test_mirrored_lai500_time(mirrored_lai500, sequence_0, sequence_1, expected):
+    result = mirrored_lai500.run_on_sequence(sequence_0, sequence_1)
+    assert np.abs(result.info["Time"] - expected) <= 0.6
+
+
 ##### Offline Calibration Test #####
 @pytest.mark.parametrize(
     ("alpha", "n_max"),
diff --git a/tests/sequentialized_barnard_tests/test_step.py b/tests/sequentialized_barnard_tests/test_step.py