From 9c3d6bbfe257bd4b74c7f40f5fd99822b304bbf5 Mon Sep 17 00:00:00 2001
From: DavidSnyder-TRI <david.snyder.pi@tri.global>
Date: Thu, 27 Mar 2025 14:42:20 -0400
Subject: [PATCH 1/3] Add sequentialized Barnard procedure

---
 .../barnard_sequential.py                     | 329 ++++++++++++++++++
 .../test_barnard_sequential.py                | 166 +++++++++
 2 files changed, 495 insertions(+)
 create mode 100644 sequentialized_barnard_tests/barnard_sequential.py
 create mode 100644 tests/sequentialized_barnard_tests/test_barnard_sequential.py

diff --git a/sequentialized_barnard_tests/barnard_sequential.py b/sequentialized_barnard_tests/barnard_sequential.py
new file mode 100644
index 0000000..60105ed
--- /dev/null
+++ b/sequentialized_barnard_tests/barnard_sequential.py
@@ -0,0 +1,329 @@
+"""Module implementing a rectified (valid) sequential Barnard procedure.
+
+This function incorporates the base (batch) Barnard procedure into a sequentialized
+method using the Bonferroni (union bound) correction.
+"""
+
+import warnings
+from typing import Any, Optional, Tuple, Type, Union
+
+import numpy as np
+from numpy.typing import ArrayLike
+from scipy.stats import barnard_exact
+
+from sequentialized_barnard_tests.base import (
+    Decision,
+    Hypothesis,
+    MirroredTestMixin,
+    SequentialTestBase,
+    TestResult,
+)
+from sequentialized_barnard_tests.batch import (
+    BarnardExactTest,
+    MirroredBarnardExactTest,
+)
+
+
+class BarnardSequentialTest(SequentialTestBase):
+    """_summary_
+
+    Args:
+        SequentialTestBase (_type_): _description_
+    """
+
+    def __init__(
+        self,
+        alternative: Hypothesis,
+        alpha: float,
+        n_max: int,
+        times_of_evaluation: ArrayLike,
+        weights_of_evaluation: Optional[ArrayLike] = None,
+    ) -> None:
+        """_summary_
+
+        Args:
+            alternative (Hypothesis): _description_
+            alpha (float): _description_
+            n_max (int): _description_
+            times_of_evaluation (ArrayLike): _description_
+            weights_of_evaluation (Optional[ArrayLike], optional): _description_. Defaults to None.
+
+        Raises:
+            ValueError: If alpha, n_max, times_of_evaluation are invalid
+        """
+        # Handle inputs and check for errors in test attributes
+        try:
+            assert 0.0 < alpha and alpha < 1.0
+            assert n_max >= 1
+        except:
+            raise ValueError(
+                "Invalid inputs: alpha must be in (0., 1.) and n_max must be >= 1"
+            )
+
+        # Assign test attributes
+        self.alpha = alpha
+        self.n_max = n_max
+        self.alternative = alternative
+
+        # Handle inputs and check for errors in evaluation protocol attributes
+        try:
+            assert len(times_of_evaluation) >= 1
+            evaluation_times_is_array = True
+        except:
+            try:
+                assert times_of_evaluation >= 1
+            except:
+                raise ValueError("Scalar times_of_evaluation must be postive integer!")
+
+            evaluation_times_is_array = False
+
+        # Handle additional error cases and assign times_of_evaluation and n_evaluations
+        if evaluation_times_is_array:
+            times_of_evaluation = np.sort(times_of_evaluation)
+            try:
+                assert np.min(times_of_evaluation) >= 1
+                assert np.min(np.diff(times_of_evaluation)) >= 1
+            except:
+                raise ValueError(
+                    "If an array, times_of_evaluation must be positive and strictly increasing"
+                )
+
+            if np.max(times_of_evaluation) <= self.n_max:
+                # All values acceptable
+                self.times_of_evaluation = times_of_evaluation
+                self.n_evaluations = len(self.times_of_evaluation)
+            else:
+                # Can only accept up to self.n_max
+                self.times_of_evaluation = np.where(
+                    times_of_evaluation <= self.n_max, times_of_evaluation
+                )
+                self.n_evaluations = len(self.times_of_evaluation)
+        else:
+            # Make scalar into an explicit array of times to sample
+            self.times_of_evaluation = np.arange(0, n_max, times_of_evaluation)[1:]
+            self.n_evaluations = len(self.times_of_evaluation)
+
+        # Given n_evaluations and times_of_evaluation, check the
+        # weighting of risk and handle errors
+        if weights_of_evaluation is None:
+            weights_of_evaluation = np.ones(self.n_evaluations) / self.n_evaluations
+        elif len(weights_of_evaluation) < self.n_evaluations:
+            warnings.warn(
+                "Weights of each test is too short. Reverting to uniform risk budget."
+            )
+            weights_of_evaluation = np.ones(self.n_evaluations) / self.n_evaluations
+        elif len(weights_of_evaluation) == self.n_evaluations:
+            if np.min(weights_of_evaluation) > 0:
+                weights_of_evaluation *= self.alpha / sum(weights_of_evaluation)
+            else:
+                warnings.warn(
+                    "Weights contain negative elements; unclear how to rectify. Reverting to uniform risk budget."
+                )
+                weights_of_evaluation = np.ones(self.n_evaluations) / self.n_evaluations
+                # weights_of_evaluation += np.min(weights_of_evaluation)
+                # weights_of_evaluation *= self.alpha / sum(weights_of_evaluation)
+        else:
+            warnings.warn(
+                f"Weights of each test is too long. Taking only the first self.n_evaluations (here, {self.n_evaluations}) components."
+            )
+            weights_of_evaluation = weights_of_evaluation[: self.n_evaluations]
+            assert len(weights_of_evaluation) == self.n_evaluations
+            if np.min(weights_of_evaluation) > 0:
+                weights_of_evaluation *= self.alpha / sum(weights_of_evaluation)
+            else:
+                warnings.warn(
+                    "Weights contain negative elements; unclear how to rectify. Reverting to uniform risk budget."
+                )
+                weights_of_evaluation = np.ones(self.n_evaluations) / self.n_evaluations
+
+        # Assign the risk weighting
+        self.weights_of_evaluation = weights_of_evaluation
+
+        # Assign state variables
+        self.t = None
+        self.sequence = None
+        self.reset()
+
+    def step(
+        self,
+        datum_0: Union[bool, int, float],
+        datum_1: Union[bool, int, float],
+        verbose: Optional[bool] = False,
+    ) -> TestResult:
+        """_summary_
+
+        Args:
+            datum_0 (_type_): _description_
+            datum_1 (_type_): _description_
+            verbose (Optional[bool], optional): _description_. Defaults to False.
+
+        Returns:
+            TestResult: _description_
+        """
+        is_bernoulli_0 = datum_0 in [0, 1]
+        is_bernoulli_1 = datum_1 in [0, 1]
+        if not (is_bernoulli_0 and is_bernoulli_1):
+            raise (ValueError("Input data are not interpretable as Bernoulli."))
+        if verbose:
+            print(
+                (
+                    "Update the Barnard sequential process given new "
+                    f"datum_0 == {datum_0} and datum_1 == {datum_1}."
+                )
+            )
+
+        # Update state (i.e., time)
+        self.sequence[self.t, 0] = datum_0
+        self.sequence[self.t, 1] = datum_1
+        self.t += 1
+
+        # If time matches an element of 'times_of_evaluation,' run a batch test
+        run_test = False
+        if np.min(np.abs(self.times_of_evaluation - self.t)) < 0.5:
+            run_test = True
+            idx = np.argmin(np.abs(self.times_of_evaluation - self.t))
+
+        if run_test:
+            batch_test = BarnardExactTest(
+                self.alternative, self.weights_of_evaluation[idx]
+            )
+            if self.t < self.n_max:
+                result = batch_test.run_on_sequence(
+                    self.sequence[: self.t, 0], self.sequence[: self.t, 1]
+                )
+            else:
+                result = batch_test.run_on_sequence(
+                    self.sequence[:, 0], self.sequence[:, 1]
+                )
+
+            # Append time of decision
+            result.info["Time"] = self.t
+
+            return result
+        else:
+            decision = Decision.FailToDecide
+            info = {"Time": self.t}
+
+            result = TestResult(decision, info)
+            return result
+
+    def reset(self, verbose: Optional[bool] = False) -> None:
+        """_summary_
+
+        Args:
+            verbose (Optional[bool], optional): _description_. Defaults to False.
+        """
+        self.t = int(0)
+        self.sequence = np.zeros((self.n_max, 2))
+
+
+class MirroredBarnardSequentialTest(MirroredTestMixin, SequentialTestBase):
+    """A pair of one-sided Barnard's exact tests with mirrored alternatives.
+
+    In our terminology, a mirrored test is one that runs two one-sided tests
+    simultaneously, with the null and the alternaive flipped from each other. This is so
+    that it can yield either Decision.AcceptNull or Decision.AcceptAlternative depending
+    on the input data, unlike standard one-sided tests that can never 'accept' the null.
+    (Those standard tests will at most fail to reject the null, as represented by
+    Decision.FailToDecide.)
+
+    For example, if the alternative is Hypothesis.P0MoreThanP1 and the decision is
+    Decision.AcceptNull, it should be interpreted as accepting Hypothesis.P0LessThanP1.
+
+    The significance level alpha controls the following two errors simultaneously: (1)
+    probability of wrongly accepting the alternative when the null is true, and (2)
+    probability of wrongly accepting the null when the alternative is true. Note that
+    Bonferroni correction is not needed since the null hypothesis for one test is the
+    alternative for the other.
+
+    Attributes:
+        alternative: Specification of the alternative hypothesis.
+        alpha: Significance level of the test.
+    """
+
+    _base_class = BarnardSequentialTest
+
+    def run_on_sequence(
+        self, sequence_0: ArrayLike, sequence_1: ArrayLike
+    ) -> TestResult:
+        """Runs the test on a pair of two Bernoulli sequences.
+
+        Args:
+            sequence_0: Sequence of Bernoulli data from the first source.
+            sequence_1: Sequence of Bernoulli data from the second source.
+
+        Returns:
+            TestResult: Result of the hypothesis test.
+        """
+        self._test_for_alternative.reset()
+        self._test_for_null.reset()
+
+        if not (len(sequence_0) == len(sequence_1)):
+            raise (ValueError("The two input sequences must have the same size."))
+
+        # Solve the problem of reaching the end of the sequence by
+        # assigning result to FailToDecide by default
+        result = TestResult(decision=Decision.FailToDecide)
+
+        # Run through sequence until a decision is reached or sequence is
+        # fully completed.
+        for idx in range(len(sequence_0)):
+            result_for_alternative, result_for_null = self.step(
+                sequence_0[idx], sequence_1[idx]
+            )
+
+            # result_for_alternative = self._test_for_alternative.step(
+            #     sequence_0[idx], sequence_1[idx], *args, **kwargs
+            # )
+            # result_for_null = self._test_for_null.step(
+            #     sequence_0[idx], sequence_1[idx], *args, **kwargs
+            # )
+
+            # Store the info (constituent test results)
+            info = {
+                "result_for_alternative": result_for_alternative,
+                "result_for_null": result_for_null,
+            }
+
+            # Assign decision variable
+            if (not result_for_alternative.decision == Decision.FailToDecide) and (
+                result_for_null.decision == Decision.FailToDecide
+            ):
+                # If only result_for_alternative, then overall decision is Reject Null / Accept Alternative
+                decision = Decision.AcceptAlternative
+            elif (not result_for_null.decision == Decision.FailToDecide) and (
+                result_for_alternative.decision == Decision.FailToDecide
+            ):
+                # If only result_for_null, then overall decision is Accept Null / Reject Alternative
+                decision = Decision.AcceptNull
+            else:
+                # Neither (or both) significant: decision is Decision.FailToDecide
+                decision = Decision.FailToDecide
+
+            if not decision == Decision.FailToDecide:
+                break
+
+        # Assign result
+        result = TestResult(decision, info)
+        return result
+
+    def step(
+        self, datum_0, datum_1, verbose: Optional[bool] = False
+    ) -> Tuple[TestResult, TestResult]:
+        """_summary_
+
+        Args:
+            datum_0 (_type_): _description_
+            datum_1 (_type_): _description_
+            verbose (Optional[bool], optional): _description_. Defaults to False.
+
+        Returns:
+            Tuple[TestResult, TestResult]: _description_
+        """
+        result_for_alternative = self._test_for_alternative.step(
+            datum_0, datum_1, verbose
+        )
+
+        result_for_null = self._test_for_null.step(datum_0, datum_1, verbose)
+
+        return [result_for_alternative, result_for_null]
diff --git a/tests/sequentialized_barnard_tests/test_barnard_sequential.py b/tests/sequentialized_barnard_tests/test_barnard_sequential.py
new file mode 100644
index 0000000..8754fca
--- /dev/null
+++ b/tests/sequentialized_barnard_tests/test_barnard_sequential.py
@@ -0,0 +1,166 @@
+"""Unit tests for the Barnard sequential (rectified) method.
+"""
+
+import numpy as np
+import pytest
+
+from sequentialized_barnard_tests import Decision, Hypothesis
+from sequentialized_barnard_tests.barnard_sequential import (
+    BarnardSequentialTest,
+    MirroredBarnardSequentialTest,
+)
+
+##### Barnard Sequential Test #####
+
+
+@pytest.fixture(scope="module")
+def barnard_sequential(request):
+    test = BarnardSequentialTest(
+        alternative=request.param, alpha=0.05, n_max=500, times_of_evaluation=10
+    )
+    return test
+
+
+@pytest.mark.parametrize(
+    ("barnard_sequential"),
+    [(Hypothesis.P0LessThanP1), (Hypothesis.P0MoreThanP1)],
+    indirect=["barnard_sequential"],
+)
+def test_barnard_sequential_value_error(barnard_sequential):
+    # Should raise a ValueError if non-binary data is given.
+    with pytest.raises(ValueError):
+        barnard_sequential.step(1.2, 1)
+    with pytest.raises(ValueError):
+        barnard_sequential.step(1, 1.2)
+
+    # Should raise a ValueError if two sequences do not have the same length.
+    with pytest.raises(ValueError):
+        barnard_sequential.run_on_sequence([0, 0], [1, 1, 1])
+    with pytest.raises(ValueError):
+        barnard_sequential.run_on_sequence([1, 1, 1], [0, 0])
+
+
+@pytest.mark.parametrize(
+    ("barnard_sequential", "sequence_0", "sequence_1", "expected"),
+    [
+        # fmt: off
+        (Hypothesis.P0LessThanP1, [0, 0, 0], [1, 1, 1], Decision.FailToDecide),
+        (Hypothesis.P0MoreThanP1, [0, 0, 0], [1, 1, 1], Decision.FailToDecide),
+        (Hypothesis.P0LessThanP1, [1, 1, 1], [0, 0, 0], Decision.FailToDecide),
+        (Hypothesis.P0MoreThanP1, [1, 1, 1], [0, 0, 0], Decision.FailToDecide),
+        (Hypothesis.P0LessThanP1, np.zeros(31), np.ones(31), Decision.AcceptAlternative),
+        (Hypothesis.P0MoreThanP1, np.zeros(31), np.ones(31), Decision.FailToDecide),
+        (Hypothesis.P0LessThanP1, np.ones(31), np.zeros(31), Decision.FailToDecide),
+        (Hypothesis.P0MoreThanP1, np.ones(31), np.zeros(31), Decision.AcceptAlternative),
+        # fmt: on
+    ],
+    indirect=["barnard_sequential"],
+)
+def test_barnard_sequential(barnard_sequential, sequence_0, sequence_1, expected):
+    result = barnard_sequential.run_on_sequence(sequence_0, sequence_1)
+    assert result.decision == expected
+
+
+##### Mirrored Barnard Sequential Test #####
+
+
+# def test_mirrored_savi_attribute_assignment():
+#     test = MirroredBarnardSequentialTest(alternative=Hypothesis.P0MoreThanP1, alpha=0.05)
+#     assert test.alpha == 0.05
+#     assert test._test_for_alternative.alpha == test.alpha
+#     assert test._test_for_null.alpha == test.alpha
+#     assert test.alternative == Hypothesis.P0MoreThanP1
+#     assert test._test_for_alternative.alternative == test.alternative
+#     assert test._test_for_null.alternative == Hypothesis.P0LessThanP1
+
+#     new_alpha = 0.2
+#     test.alpha = new_alpha
+#     assert test._test_for_alternative.alpha == new_alpha
+#     assert test._test_for_null.alpha == new_alpha
+
+#     new_alternative = Hypothesis.P0LessThanP1
+#     test.alternative = new_alternative
+#     assert test._test_for_alternative.alternative == test.alternative
+#     assert test._test_for_null.alternative == Hypothesis.P0MoreThanP1
+
+
+@pytest.fixture(scope="module")
+def mirrored_barnard_sequential(request):
+    test = MirroredBarnardSequentialTest(
+        alternative=request.param, alpha=0.05, n_max=500, times_of_evaluation=10
+    )
+    return test
+
+
+@pytest.mark.parametrize(
+    ("mirrored_barnard_sequential", "sequence_0", "sequence_1", "expected"),
+    [
+        # fmt: off
+        (Hypothesis.P0LessThanP1, [0, 0, 0], [1, 1, 1], Decision.FailToDecide),
+        (Hypothesis.P0MoreThanP1, [0, 0, 0], [1, 1, 1], Decision.FailToDecide),
+        (Hypothesis.P0LessThanP1, [1, 1, 1], [0, 0, 0], Decision.FailToDecide),
+        (Hypothesis.P0MoreThanP1, [1, 1, 1], [0, 0, 0], Decision.FailToDecide),
+        (Hypothesis.P0LessThanP1, np.zeros(31), np.ones(31), Decision.AcceptAlternative),
+        (Hypothesis.P0MoreThanP1, np.zeros(31), np.ones(31), Decision.AcceptNull),
+        (Hypothesis.P0LessThanP1, np.ones(31), np.zeros(31), Decision.AcceptNull),
+        (Hypothesis.P0MoreThanP1, np.ones(31), np.zeros(31), Decision.AcceptAlternative),
+        # fmt: on
+    ],
+    indirect=["mirrored_barnard_sequential"],
+)
+def test_mirrored_barnard_sequential(
+    mirrored_barnard_sequential, sequence_0, sequence_1, expected
+):
+    result = mirrored_barnard_sequential.run_on_sequence(sequence_0, sequence_1)
+    assert result.decision == expected
+
+
+@pytest.mark.parametrize(
+    ("mirrored_barnard_sequential", "sequence_0", "sequence_1", "expected"),
+    [
+        # fmt: off
+        (Hypothesis.P0LessThanP1, [0, 1, 0, 0, 1, 1, 0, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 0, 1, 1], Decision.FailToDecide),
+        # fmt: on
+    ],
+    indirect=["mirrored_barnard_sequential"],
+)
+def test_mirrored_barnard_sequential_special(
+    mirrored_barnard_sequential, sequence_0, sequence_1, expected
+):
+    result = mirrored_barnard_sequential.run_on_sequence(sequence_0, sequence_1)
+    assert result.decision == expected
+    assert (
+        result.info["result_for_alternative"].info["p_value"]
+        < mirrored_barnard_sequential.alpha
+    )
+
+
+@pytest.fixture(scope="module")
+def mirrored_barnard_sequential_slow(request):
+    test = MirroredBarnardSequentialTest(
+        alternative=request.param, alpha=0.05, n_max=500, times_of_evaluation=50
+    )
+    return test
+
+
+@pytest.mark.parametrize(
+    ("mirrored_barnard_sequential_slow", "sequence_0", "sequence_1", "expected"),
+    [
+        # fmt: off
+        (Hypothesis.P0LessThanP1, [0, 0, 0], [1, 1, 1], Decision.FailToDecide),
+        (Hypothesis.P0MoreThanP1, [0, 0, 0], [1, 1, 1], Decision.FailToDecide),
+        (Hypothesis.P0LessThanP1, [1, 1, 1], [0, 0, 0], Decision.FailToDecide),
+        (Hypothesis.P0MoreThanP1, [1, 1, 1], [0, 0, 0], Decision.FailToDecide),
+        (Hypothesis.P0LessThanP1, np.zeros(49), np.ones(49), Decision.FailToDecide),
+        (Hypothesis.P0MoreThanP1, np.zeros(49), np.ones(49), Decision.FailToDecide),
+        (Hypothesis.P0LessThanP1, np.ones(49), np.zeros(49), Decision.FailToDecide),
+        (Hypothesis.P0MoreThanP1, np.ones(49), np.zeros(49), Decision.FailToDecide),
+        # fmt: on
+    ],
+    indirect=["mirrored_barnard_sequential_slow"],
+)
+def test_mirrored_barnard_sequential_slow(
+    mirrored_barnard_sequential_slow, sequence_0, sequence_1, expected
+):
+    result = mirrored_barnard_sequential_slow.run_on_sequence(sequence_0, sequence_1)
+    assert result.decision == expected

From 49f337859e50ed1d5712e2e1eef2ead9f9755609 Mon Sep 17 00:00:00 2001
From: DavidSnyder-TRI <david.snyder.pi@tri.global>
Date: Thu, 27 Mar 2025 14:43:07 -0400
Subject: [PATCH 2/3] Store copy of debugging test file

---
 test_barnard_sequential.ipynb | 204 ++++++++++++++++++++++++++++++++++
 1 file changed, 204 insertions(+)
 create mode 100644 test_barnard_sequential.ipynb

diff --git a/test_barnard_sequential.ipynb b/test_barnard_sequential.ipynb
new file mode 100644
index 0000000..c26323d
--- /dev/null
+++ b/test_barnard_sequential.ipynb
@@ -0,0 +1,204 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np \n",
+    "from sequentialized_barnard_tests.barnard_sequential import BarnardSequentialTest, MirroredBarnardSequentialTest\n",
+    "from sequentialized_barnard_tests import Decision, Hypothesis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "alpha = 0.05\n",
+    "n_max = 500\n",
+    "times_of_evaluation = 10 "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "base_test = BarnardSequentialTest(alternative=Hypothesis.P0LessThanP1, alpha=alpha, n_max=n_max, times_of_evaluation=times_of_evaluation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "base_mirrored_test = MirroredBarnardSequentialTest(alternative=Hypothesis.P0LessThanP1, alpha=alpha, n_max=n_max, times_of_evaluation=times_of_evaluation)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result_base = base_test.run_on_sequence(np.zeros(31), np.ones(31))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Decision.AcceptAlternative\n",
+      "{'p_value': 9.5367431640625e-07, 'statistic': -4.47213595499958, 'Time': 10}\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(result_base.decision)\n",
+    "print(result_base.info)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result_mirrored_base = base_mirrored_test.run_on_sequence(np.zeros(31), np.ones(31))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Decision.AcceptAlternative\n",
+      "TestResult(decision=<Decision.AcceptAlternative: 1>, info={'p_value': 9.5367431640625e-07, 'statistic': -4.47213595499958, 'Time': 10})\n",
+      "TestResult(decision=<Decision.FailToDecide: 2>, info={'p_value': 1.0, 'statistic': -4.47213595499958, 'Time': 10})\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(result_mirrored_base.decision)\n",
+    "print(result_mirrored_base.info[\"result_for_alternative\"])\n",
+    "print(result_mirrored_base.info[\"result_for_null\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Decision.FailToDecide\n",
+      "{'Time': 31}\n"
+     ]
+    }
+   ],
+   "source": [
+    "result_base = base_test.run_on_sequence(np.ones(31), np.zeros(31))\n",
+    "print(result_base.decision)\n",
+    "print(result_base.info)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Decision.AcceptNull\n",
+      "TestResult(decision=<Decision.FailToDecide: 2>, info={'p_value': 1.0, 'statistic': 4.47213595499958, 'Time': 10})\n",
+      "TestResult(decision=<Decision.AcceptAlternative: 1>, info={'p_value': 9.5367431640625e-07, 'statistic': 4.47213595499958, 'Time': 10})\n"
+     ]
+    }
+   ],
+   "source": [
+    "result_mirrored_base = base_mirrored_test.run_on_sequence(np.ones(31), np.zeros(31))\n",
+    "print(result_mirrored_base.decision)\n",
+    "print(result_mirrored_base.info[\"result_for_alternative\"])\n",
+    "print(result_mirrored_base.info[\"result_for_null\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Decision.FailToDecide\n",
+      "TestResult(decision=<Decision.FailToDecide: 2>, info={'p_value': 0.031154807675887342, 'statistic': -1.9518001458970664, 'Time': 10})\n",
+      "TestResult(decision=<Decision.FailToDecide: 2>, info={'p_value': 1.0, 'statistic': -1.9518001458970664, 'Time': 10})\n",
+      "0.05\n"
+     ]
+    }
+   ],
+   "source": [
+    "# result_mirrored_base = base_mirrored_test.run_on_sequence(\n",
+    "#     [0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], \n",
+    "#     [1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], \n",
+    "#     )\n",
+    "result_mirrored_base = base_mirrored_test.run_on_sequence(\n",
+    "    [0, 1, 0, 0, 1, 1, 0, 1, 1, 0], # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], \n",
+    "    [1, 1, 1, 1, 1, 1, 1, 0, 1, 1], # , 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], \n",
+    "    )\n",
+    "print(result_mirrored_base.decision)\n",
+    "print(result_mirrored_base.info[\"result_for_alternative\"])\n",
+    "print(result_mirrored_base.info[\"result_for_null\"])\n",
+    "print(base_mirrored_test.alpha)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "seq_barnard",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.6"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 2b813d4d1223ffcd82c53e20a357f82c0f431216 Mon Sep 17 00:00:00 2001
From: DavidSnyder-TRI <david.snyder.pi@tri.global>
Date: Thu, 27 Mar 2025 15:07:28 -0400
Subject: [PATCH 3/3] Update documentation and remove the old testing notebook

---
 .../barnard_sequential.py                     |  78 ++++---
 test_barnard_sequential.ipynb                 | 204 ------------------
 .../test_barnard_sequential.py                |  22 +-
 3 files changed, 48 insertions(+), 256 deletions(-)
 delete mode 100644 test_barnard_sequential.ipynb

diff --git a/sequentialized_barnard_tests/barnard_sequential.py b/sequentialized_barnard_tests/barnard_sequential.py
index 60105ed..a0422ea 100644
--- a/sequentialized_barnard_tests/barnard_sequential.py
+++ b/sequentialized_barnard_tests/barnard_sequential.py
@@ -25,10 +25,12 @@
 
 
 class BarnardSequentialTest(SequentialTestBase):
-    """_summary_
+    """Naive sequential rectification of Barnard's Exact Test. This method
+    utilizes (weighted) Bonferroni error correction across the discrete times
+    chosen for batch evaluation. At each time of evaluation, a batch Barnard's
+    Exact Test is run at level alpha_prime, such that the sum of all alpha_prime
+    for all evaluation times is equal to alpha, the overall FPR limit of the test.
 
-    Args:
-        SequentialTestBase (_type_): _description_
     """
 
     def __init__(
@@ -36,20 +38,26 @@ def __init__(
         alternative: Hypothesis,
         alpha: float,
         n_max: int,
-        times_of_evaluation: ArrayLike,
+        times_of_evaluation: Union[ArrayLike, int],
         weights_of_evaluation: Optional[ArrayLike] = None,
     ) -> None:
-        """_summary_
+        """Initialize the BarnardSequentialTest object.
 
         Args:
-            alternative (Hypothesis): _description_
-            alpha (float): _description_
-            n_max (int): _description_
-            times_of_evaluation (ArrayLike): _description_
-            weights_of_evaluation (Optional[ArrayLike], optional): _description_. Defaults to None.
+            alternative (Hypothesis): Alternative hypothesis for the test.
+            alpha (float): Significance level of the test. Lies in (0., 1.)
+            n_max (int): Maximum number of trials of the test. Integer, must be > 0.
+            times_of_evaluation (ArrayLike, int): Set of times at which to run batch evaluation (ArrayLike), or
+                                                regular interval at which evaluation is to be done (int). Must be
+                                                positive.
+            weights_of_evaluation (Optional[ArrayLike]): Weights (fraction of risk budget) to spend at each evaluation.
+                                                If None, defaults to uniform: alpha_prime = alpha / n_evaluations. Defaults to None.
 
         Raises:
-            ValueError: If alpha, n_max, times_of_evaluation are invalid
+            ValueError: If alpha, n_max are invalid
+            ValueError: If times_of_evaluation is anywhere non-positive
+            Warning: If weights_of_evaluation is broadcastable to times_of_evaluation in an INEXACT manner
+
         """
         # Handle inputs and check for errors in test attributes
         try:
@@ -80,6 +88,8 @@ def __init__(
         # Handle additional error cases and assign times_of_evaluation and n_evaluations
         if evaluation_times_is_array:
             times_of_evaluation = np.sort(times_of_evaluation)
+
+            # TODO: Add in floor + redundancy handling (remove multiple instances of the same time)
             try:
                 assert np.min(times_of_evaluation) >= 1
                 assert np.min(np.diff(times_of_evaluation)) >= 1
@@ -150,15 +160,20 @@ def step(
         datum_1: Union[bool, int, float],
         verbose: Optional[bool] = False,
     ) -> TestResult:
-        """_summary_
+        """Step through a one-sided Sequentialized Barnard Exact test. Procedure
+        aggregates new data to stored self.sequence, iterates the time variable self.time,
+        and then does one of two things:
+        (1) If NOT a time_of_evaluation, return decision = Decision.FailToDecide
+        (2) If a time_of_evaluation, run the associated evaluation via running
+        a batch test at level alpha_prime = alpha * weight_of_evaluation[idx].
 
         Args:
-            datum_0 (_type_): _description_
-            datum_1 (_type_): _description_
-            verbose (Optional[bool], optional): _description_. Defaults to False.
+            datum_0 (Union[bool, int, float]): New datum from sequence 0
+            datum_1 (Union[bool, int, float]): New datum from sequence 1
+            verbose (Optional[bool], optional): If true, print to stdout. Defaults to False.
 
         Returns:
-            TestResult: _description_
+            TestResult: Result of the test at time self.time.
         """
         is_bernoulli_0 = datum_0 in [0, 1]
         is_bernoulli_1 = datum_1 in [0, 1]
@@ -208,17 +223,19 @@ def step(
             return result
 
     def reset(self, verbose: Optional[bool] = False) -> None:
-        """_summary_
+        """Resets the state variables in order to allow for evaluating
+        a new trajectory.
 
         Args:
-            verbose (Optional[bool], optional): _description_. Defaults to False.
+            verbose (Optional[bool], optional): If True, print to stdout. Defaults to False.
         """
+        # Reset state variables
         self.t = int(0)
         self.sequence = np.zeros((self.n_max, 2))
 
 
 class MirroredBarnardSequentialTest(MirroredTestMixin, SequentialTestBase):
-    """A pair of one-sided Barnard's exact tests with mirrored alternatives.
+    """A pair of one-sided, sequential Barnard's exact tests with mirrored alternatives.
 
     In our terminology, a mirrored test is one that runs two one-sided tests
     simultaneously, with the null and the alternaive flipped from each other. This is so
@@ -272,13 +289,6 @@ def run_on_sequence(
                 sequence_0[idx], sequence_1[idx]
             )
 
-            # result_for_alternative = self._test_for_alternative.step(
-            #     sequence_0[idx], sequence_1[idx], *args, **kwargs
-            # )
-            # result_for_null = self._test_for_null.step(
-            #     sequence_0[idx], sequence_1[idx], *args, **kwargs
-            # )
-
             # Store the info (constituent test results)
             info = {
                 "result_for_alternative": result_for_alternative,
@@ -308,17 +318,21 @@ def run_on_sequence(
         return result
 
     def step(
-        self, datum_0, datum_1, verbose: Optional[bool] = False
+        self,
+        datum_0: Union[bool, int, float],
+        datum_1: Union[bool, int, float],
+        verbose: Optional[bool] = False,
     ) -> Tuple[TestResult, TestResult]:
-        """_summary_
+        """Step through the mirrored test, which amounts to stepping through
+        each constituent test.
 
         Args:
-            datum_0 (_type_): _description_
-            datum_1 (_type_): _description_
-            verbose (Optional[bool], optional): _description_. Defaults to False.
+            datum_0 (Union[bool, int, float]): new datum from sequence 0
+            datum_1 (Union[bool, int, float]): new datum from sequence 1
+            verbose (Optional[bool], optional): If True, print to stdout. Defaults to False.
 
         Returns:
-            Tuple[TestResult, TestResult]: _description_
+            Tuple[TestResult, TestResult]: Pair of constituent TestResult objects
         """
         result_for_alternative = self._test_for_alternative.step(
             datum_0, datum_1, verbose
diff --git a/test_barnard_sequential.ipynb b/test_barnard_sequential.ipynb
deleted file mode 100644
index c26323d..0000000
--- a/test_barnard_sequential.ipynb
+++ /dev/null
@@ -1,204 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np \n",
-    "from sequentialized_barnard_tests.barnard_sequential import BarnardSequentialTest, MirroredBarnardSequentialTest\n",
-    "from sequentialized_barnard_tests import Decision, Hypothesis"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "alpha = 0.05\n",
-    "n_max = 500\n",
-    "times_of_evaluation = 10 "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "base_test = BarnardSequentialTest(alternative=Hypothesis.P0LessThanP1, alpha=alpha, n_max=n_max, times_of_evaluation=times_of_evaluation)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "base_mirrored_test = MirroredBarnardSequentialTest(alternative=Hypothesis.P0LessThanP1, alpha=alpha, n_max=n_max, times_of_evaluation=times_of_evaluation)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "result_base = base_test.run_on_sequence(np.zeros(31), np.ones(31))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Decision.AcceptAlternative\n",
-      "{'p_value': 9.5367431640625e-07, 'statistic': -4.47213595499958, 'Time': 10}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(result_base.decision)\n",
-    "print(result_base.info)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "result_mirrored_base = base_mirrored_test.run_on_sequence(np.zeros(31), np.ones(31))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Decision.AcceptAlternative\n",
-      "TestResult(decision=<Decision.AcceptAlternative: 1>, info={'p_value': 9.5367431640625e-07, 'statistic': -4.47213595499958, 'Time': 10})\n",
-      "TestResult(decision=<Decision.FailToDecide: 2>, info={'p_value': 1.0, 'statistic': -4.47213595499958, 'Time': 10})\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(result_mirrored_base.decision)\n",
-    "print(result_mirrored_base.info[\"result_for_alternative\"])\n",
-    "print(result_mirrored_base.info[\"result_for_null\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Decision.FailToDecide\n",
-      "{'Time': 31}\n"
-     ]
-    }
-   ],
-   "source": [
-    "result_base = base_test.run_on_sequence(np.ones(31), np.zeros(31))\n",
-    "print(result_base.decision)\n",
-    "print(result_base.info)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Decision.AcceptNull\n",
-      "TestResult(decision=<Decision.FailToDecide: 2>, info={'p_value': 1.0, 'statistic': 4.47213595499958, 'Time': 10})\n",
-      "TestResult(decision=<Decision.AcceptAlternative: 1>, info={'p_value': 9.5367431640625e-07, 'statistic': 4.47213595499958, 'Time': 10})\n"
-     ]
-    }
-   ],
-   "source": [
-    "result_mirrored_base = base_mirrored_test.run_on_sequence(np.ones(31), np.zeros(31))\n",
-    "print(result_mirrored_base.decision)\n",
-    "print(result_mirrored_base.info[\"result_for_alternative\"])\n",
-    "print(result_mirrored_base.info[\"result_for_null\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Decision.FailToDecide\n",
-      "TestResult(decision=<Decision.FailToDecide: 2>, info={'p_value': 0.031154807675887342, 'statistic': -1.9518001458970664, 'Time': 10})\n",
-      "TestResult(decision=<Decision.FailToDecide: 2>, info={'p_value': 1.0, 'statistic': -1.9518001458970664, 'Time': 10})\n",
-      "0.05\n"
-     ]
-    }
-   ],
-   "source": [
-    "# result_mirrored_base = base_mirrored_test.run_on_sequence(\n",
-    "#     [0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], \n",
-    "#     [1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], \n",
-    "#     )\n",
-    "result_mirrored_base = base_mirrored_test.run_on_sequence(\n",
-    "    [0, 1, 0, 0, 1, 1, 0, 1, 1, 0], # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], \n",
-    "    [1, 1, 1, 1, 1, 1, 1, 0, 1, 1], # , 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], \n",
-    "    )\n",
-    "print(result_mirrored_base.decision)\n",
-    "print(result_mirrored_base.info[\"result_for_alternative\"])\n",
-    "print(result_mirrored_base.info[\"result_for_null\"])\n",
-    "print(base_mirrored_test.alpha)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "seq_barnard",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.6"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/tests/sequentialized_barnard_tests/test_barnard_sequential.py b/tests/sequentialized_barnard_tests/test_barnard_sequential.py
index 8754fca..9ebfdf0 100644
--- a/tests/sequentialized_barnard_tests/test_barnard_sequential.py
+++ b/tests/sequentialized_barnard_tests/test_barnard_sequential.py
@@ -1,4 +1,6 @@
 """Unit tests for the Barnard sequential (rectified) method.
+
+TODO: Add additional tests to verify unusual specification of the times of evaluation, etc
 """
 
 import numpy as np
@@ -64,26 +66,6 @@ def test_barnard_sequential(barnard_sequential, sequence_0, sequence_1, expected
 ##### Mirrored Barnard Sequential Test #####
 
 
-# def test_mirrored_savi_attribute_assignment():
-#     test = MirroredBarnardSequentialTest(alternative=Hypothesis.P0MoreThanP1, alpha=0.05)
-#     assert test.alpha == 0.05
-#     assert test._test_for_alternative.alpha == test.alpha
-#     assert test._test_for_null.alpha == test.alpha
-#     assert test.alternative == Hypothesis.P0MoreThanP1
-#     assert test._test_for_alternative.alternative == test.alternative
-#     assert test._test_for_null.alternative == Hypothesis.P0LessThanP1
-
-#     new_alpha = 0.2
-#     test.alpha = new_alpha
-#     assert test._test_for_alternative.alpha == new_alpha
-#     assert test._test_for_null.alpha == new_alpha
-
-#     new_alternative = Hypothesis.P0LessThanP1
-#     test.alternative = new_alternative
-#     assert test._test_for_alternative.alternative == test.alternative
-#     assert test._test_for_null.alternative == Hypothesis.P0MoreThanP1
-
-
 @pytest.fixture(scope="module")
 def mirrored_barnard_sequential(request):
     test = MirroredBarnardSequentialTest(