From 9c3d6bbfe257bd4b74c7f40f5fd99822b304bbf5 Mon Sep 17 00:00:00 2001 From: DavidSnyder-TRI Date: Thu, 27 Mar 2025 14:42:20 -0400 Subject: [PATCH 1/3] Add sequentialized Barnard procedure --- .../barnard_sequential.py | 329 ++++++++++++++++++ .../test_barnard_sequential.py | 166 +++++++++ 2 files changed, 495 insertions(+) create mode 100644 sequentialized_barnard_tests/barnard_sequential.py create mode 100644 tests/sequentialized_barnard_tests/test_barnard_sequential.py diff --git a/sequentialized_barnard_tests/barnard_sequential.py b/sequentialized_barnard_tests/barnard_sequential.py new file mode 100644 index 0000000..60105ed --- /dev/null +++ b/sequentialized_barnard_tests/barnard_sequential.py @@ -0,0 +1,329 @@ +"""Module implementing a rectified (valid) sequential Barnard procedure. + +This function incorporates the base (batch) Barnard procedure into a sequentialized +method using the Bonferroni (union bound) correction. +""" + +import warnings +from typing import Any, Optional, Tuple, Type, Union + +import numpy as np +from numpy.typing import ArrayLike +from scipy.stats import barnard_exact + +from sequentialized_barnard_tests.base import ( + Decision, + Hypothesis, + MirroredTestMixin, + SequentialTestBase, + TestResult, +) +from sequentialized_barnard_tests.batch import ( + BarnardExactTest, + MirroredBarnardExactTest, +) + + +class BarnardSequentialTest(SequentialTestBase): + """_summary_ + + Args: + SequentialTestBase (_type_): _description_ + """ + + def __init__( + self, + alternative: Hypothesis, + alpha: float, + n_max: int, + times_of_evaluation: ArrayLike, + weights_of_evaluation: Optional[ArrayLike] = None, + ) -> None: + """_summary_ + + Args: + alternative (Hypothesis): _description_ + alpha (float): _description_ + n_max (int): _description_ + times_of_evaluation (ArrayLike): _description_ + weights_of_evaluation (Optional[ArrayLike], optional): _description_. Defaults to None. + + Raises: + ValueError: If alpha, n_max, times_of_evaluation are invalid + """ + # Handle inputs and check for errors in test attributes + try: + assert 0.0 < alpha and alpha < 1.0 + assert n_max >= 1 + except: + raise ValueError( + "Invalid inputs: alpha must be in (0., 1.) and n_max must be >= 1" + ) + + # Assign test attributes + self.alpha = alpha + self.n_max = n_max + self.alternative = alternative + + # Handle inputs and check for errors in evaluation protocol attributes + try: + assert len(times_of_evaluation) >= 1 + evaluation_times_is_array = True + except: + try: + assert times_of_evaluation >= 1 + except: + raise ValueError("Scalar times_of_evaluation must be postive integer!") + + evaluation_times_is_array = False + + # Handle additional error cases and assign times_of_evaluation and n_evaluations + if evaluation_times_is_array: + times_of_evaluation = np.sort(times_of_evaluation) + try: + assert np.min(times_of_evaluation) >= 1 + assert np.min(np.diff(times_of_evaluation)) >= 1 + except: + raise ValueError( + "If an array, times_of_evaluation must be positive and strictly increasing" + ) + + if np.max(times_of_evaluation) <= self.n_max: + # All values acceptable + self.times_of_evaluation = times_of_evaluation + self.n_evaluations = len(self.times_of_evaluation) + else: + # Can only accept up to self.n_max + self.times_of_evaluation = np.where( + times_of_evaluation <= self.n_max, times_of_evaluation + ) + self.n_evaluations = len(self.times_of_evaluation) + else: + # Make scalar into an explicit array of times to sample + self.times_of_evaluation = np.arange(0, n_max, times_of_evaluation)[1:] + self.n_evaluations = len(self.times_of_evaluation) + + # Given n_evaluations and times_of_evaluation, check the + # weighting of risk and handle errors + if weights_of_evaluation is None: + weights_of_evaluation = np.ones(self.n_evaluations) / self.n_evaluations + elif len(weights_of_evaluation) < self.n_evaluations: + warnings.warn( + "Weights of each test is too short. Reverting to uniform risk budget." + ) + weights_of_evaluation = np.ones(self.n_evaluations) / self.n_evaluations + elif len(weights_of_evaluation) == self.n_evaluations: + if np.min(weights_of_evaluation) > 0: + weights_of_evaluation *= self.alpha / sum(weights_of_evaluation) + else: + warnings.warn( + "Weights contain negative elements; unclear how to rectify. Reverting to uniform risk budget." + ) + weights_of_evaluation = np.ones(self.n_evaluations) / self.n_evaluations + # weights_of_evaluation += np.min(weights_of_evaluation) + # weights_of_evaluation *= self.alpha / sum(weights_of_evaluation) + else: + warnings.warn( + f"Weights of each test is too long. Taking only the first self.n_evaluations (here, {self.n_evaluations}) components." + ) + weights_of_evaluation = weights_of_evaluation[: self.n_evaluations] + assert len(weights_of_evaluation) == self.n_evaluations + if np.min(weights_of_evaluation) > 0: + weights_of_evaluation *= self.alpha / sum(weights_of_evaluation) + else: + warnings.warn( + "Weights contain negative elements; unclear how to rectify. Reverting to uniform risk budget." + ) + weights_of_evaluation = np.ones(self.n_evaluations) / self.n_evaluations + + # Assign the risk weighting + self.weights_of_evaluation = weights_of_evaluation + + # Assign state variables + self.t = None + self.sequence = None + self.reset() + + def step( + self, + datum_0: Union[bool, int, float], + datum_1: Union[bool, int, float], + verbose: Optional[bool] = False, + ) -> TestResult: + """_summary_ + + Args: + datum_0 (_type_): _description_ + datum_1 (_type_): _description_ + verbose (Optional[bool], optional): _description_. Defaults to False. + + Returns: + TestResult: _description_ + """ + is_bernoulli_0 = datum_0 in [0, 1] + is_bernoulli_1 = datum_1 in [0, 1] + if not (is_bernoulli_0 and is_bernoulli_1): + raise (ValueError("Input data are not interpretable as Bernoulli.")) + if verbose: + print( + ( + "Update the Barnard sequential process given new " + f"datum_0 == {datum_0} and datum_1 == {datum_1}." + ) + ) + + # Update state (i.e., time) + self.sequence[self.t, 0] = datum_0 + self.sequence[self.t, 1] = datum_1 + self.t += 1 + + # If time matches an element of 'times_of_evaluation,' run a batch test + run_test = False + if np.min(np.abs(self.times_of_evaluation - self.t)) < 0.5: + run_test = True + idx = np.argmin(np.abs(self.times_of_evaluation - self.t)) + + if run_test: + batch_test = BarnardExactTest( + self.alternative, self.weights_of_evaluation[idx] + ) + if self.t < self.n_max: + result = batch_test.run_on_sequence( + self.sequence[: self.t, 0], self.sequence[: self.t, 1] + ) + else: + result = batch_test.run_on_sequence( + self.sequence[:, 0], self.sequence[:, 1] + ) + + # Append time of decision + result.info["Time"] = self.t + + return result + else: + decision = Decision.FailToDecide + info = {"Time": self.t} + + result = TestResult(decision, info) + return result + + def reset(self, verbose: Optional[bool] = False) -> None: + """_summary_ + + Args: + verbose (Optional[bool], optional): _description_. Defaults to False. + """ + self.t = int(0) + self.sequence = np.zeros((self.n_max, 2)) + + +class MirroredBarnardSequentialTest(MirroredTestMixin, SequentialTestBase): + """A pair of one-sided Barnard's exact tests with mirrored alternatives. + + In our terminology, a mirrored test is one that runs two one-sided tests + simultaneously, with the null and the alternaive flipped from each other. This is so + that it can yield either Decision.AcceptNull or Decision.AcceptAlternative depending + on the input data, unlike standard one-sided tests that can never 'accept' the null. + (Those standard tests will at most fail to reject the null, as represented by + Decision.FailToDecide.) + + For example, if the alternative is Hypothesis.P0MoreThanP1 and the decision is + Decision.AcceptNull, it should be interpreted as accepting Hypothesis.P0LessThanP1. + + The significance level alpha controls the following two errors simultaneously: (1) + probability of wrongly accepting the alternative when the null is true, and (2) + probability of wrongly accepting the null when the alternative is true. Note that + Bonferroni correction is not needed since the null hypothesis for one test is the + alternative for the other. + + Attributes: + alternative: Specification of the alternative hypothesis. + alpha: Significance level of the test. + """ + + _base_class = BarnardSequentialTest + + def run_on_sequence( + self, sequence_0: ArrayLike, sequence_1: ArrayLike + ) -> TestResult: + """Runs the test on a pair of two Bernoulli sequences. + + Args: + sequence_0: Sequence of Bernoulli data from the first source. + sequence_1: Sequence of Bernoulli data from the second source. + + Returns: + TestResult: Result of the hypothesis test. + """ + self._test_for_alternative.reset() + self._test_for_null.reset() + + if not (len(sequence_0) == len(sequence_1)): + raise (ValueError("The two input sequences must have the same size.")) + + # Solve the problem of reaching the end of the sequence by + # assigning result to FailToDecide by default + result = TestResult(decision=Decision.FailToDecide) + + # Run through sequence until a decision is reached or sequence is + # fully completed. + for idx in range(len(sequence_0)): + result_for_alternative, result_for_null = self.step( + sequence_0[idx], sequence_1[idx] + ) + + # result_for_alternative = self._test_for_alternative.step( + # sequence_0[idx], sequence_1[idx], *args, **kwargs + # ) + # result_for_null = self._test_for_null.step( + # sequence_0[idx], sequence_1[idx], *args, **kwargs + # ) + + # Store the info (constituent test results) + info = { + "result_for_alternative": result_for_alternative, + "result_for_null": result_for_null, + } + + # Assign decision variable + if (not result_for_alternative.decision == Decision.FailToDecide) and ( + result_for_null.decision == Decision.FailToDecide + ): + # If only result_for_alternative, then overall decision is Reject Null / Accept Alternative + decision = Decision.AcceptAlternative + elif (not result_for_null.decision == Decision.FailToDecide) and ( + result_for_alternative.decision == Decision.FailToDecide + ): + # If only result_for_null, then overall decision is Accept Null / Reject Alternative + decision = Decision.AcceptNull + else: + # Neither (or both) significant: decision is Decision.FailToDecide + decision = Decision.FailToDecide + + if not decision == Decision.FailToDecide: + break + + # Assign result + result = TestResult(decision, info) + return result + + def step( + self, datum_0, datum_1, verbose: Optional[bool] = False + ) -> Tuple[TestResult, TestResult]: + """_summary_ + + Args: + datum_0 (_type_): _description_ + datum_1 (_type_): _description_ + verbose (Optional[bool], optional): _description_. Defaults to False. + + Returns: + Tuple[TestResult, TestResult]: _description_ + """ + result_for_alternative = self._test_for_alternative.step( + datum_0, datum_1, verbose + ) + + result_for_null = self._test_for_null.step(datum_0, datum_1, verbose) + + return [result_for_alternative, result_for_null] diff --git a/tests/sequentialized_barnard_tests/test_barnard_sequential.py b/tests/sequentialized_barnard_tests/test_barnard_sequential.py new file mode 100644 index 0000000..8754fca --- /dev/null +++ b/tests/sequentialized_barnard_tests/test_barnard_sequential.py @@ -0,0 +1,166 @@ +"""Unit tests for the Barnard sequential (rectified) method. +""" + +import numpy as np +import pytest + +from sequentialized_barnard_tests import Decision, Hypothesis +from sequentialized_barnard_tests.barnard_sequential import ( + BarnardSequentialTest, + MirroredBarnardSequentialTest, +) + +##### Barnard Sequential Test ##### + + +@pytest.fixture(scope="module") +def barnard_sequential(request): + test = BarnardSequentialTest( + alternative=request.param, alpha=0.05, n_max=500, times_of_evaluation=10 + ) + return test + + +@pytest.mark.parametrize( + ("barnard_sequential"), + [(Hypothesis.P0LessThanP1), (Hypothesis.P0MoreThanP1)], + indirect=["barnard_sequential"], +) +def test_barnard_sequential_value_error(barnard_sequential): + # Should raise a ValueError if non-binary data is given. + with pytest.raises(ValueError): + barnard_sequential.step(1.2, 1) + with pytest.raises(ValueError): + barnard_sequential.step(1, 1.2) + + # Should raise a ValueError if two sequences do not have the same length. + with pytest.raises(ValueError): + barnard_sequential.run_on_sequence([0, 0], [1, 1, 1]) + with pytest.raises(ValueError): + barnard_sequential.run_on_sequence([1, 1, 1], [0, 0]) + + +@pytest.mark.parametrize( + ("barnard_sequential", "sequence_0", "sequence_1", "expected"), + [ + # fmt: off + (Hypothesis.P0LessThanP1, [0, 0, 0], [1, 1, 1], Decision.FailToDecide), + (Hypothesis.P0MoreThanP1, [0, 0, 0], [1, 1, 1], Decision.FailToDecide), + (Hypothesis.P0LessThanP1, [1, 1, 1], [0, 0, 0], Decision.FailToDecide), + (Hypothesis.P0MoreThanP1, [1, 1, 1], [0, 0, 0], Decision.FailToDecide), + (Hypothesis.P0LessThanP1, np.zeros(31), np.ones(31), Decision.AcceptAlternative), + (Hypothesis.P0MoreThanP1, np.zeros(31), np.ones(31), Decision.FailToDecide), + (Hypothesis.P0LessThanP1, np.ones(31), np.zeros(31), Decision.FailToDecide), + (Hypothesis.P0MoreThanP1, np.ones(31), np.zeros(31), Decision.AcceptAlternative), + # fmt: on + ], + indirect=["barnard_sequential"], +) +def test_barnard_sequential(barnard_sequential, sequence_0, sequence_1, expected): + result = barnard_sequential.run_on_sequence(sequence_0, sequence_1) + assert result.decision == expected + + +##### Mirrored Barnard Sequential Test ##### + + +# def test_mirrored_savi_attribute_assignment(): +# test = MirroredBarnardSequentialTest(alternative=Hypothesis.P0MoreThanP1, alpha=0.05) +# assert test.alpha == 0.05 +# assert test._test_for_alternative.alpha == test.alpha +# assert test._test_for_null.alpha == test.alpha +# assert test.alternative == Hypothesis.P0MoreThanP1 +# assert test._test_for_alternative.alternative == test.alternative +# assert test._test_for_null.alternative == Hypothesis.P0LessThanP1 + +# new_alpha = 0.2 +# test.alpha = new_alpha +# assert test._test_for_alternative.alpha == new_alpha +# assert test._test_for_null.alpha == new_alpha + +# new_alternative = Hypothesis.P0LessThanP1 +# test.alternative = new_alternative +# assert test._test_for_alternative.alternative == test.alternative +# assert test._test_for_null.alternative == Hypothesis.P0MoreThanP1 + + +@pytest.fixture(scope="module") +def mirrored_barnard_sequential(request): + test = MirroredBarnardSequentialTest( + alternative=request.param, alpha=0.05, n_max=500, times_of_evaluation=10 + ) + return test + + +@pytest.mark.parametrize( + ("mirrored_barnard_sequential", "sequence_0", "sequence_1", "expected"), + [ + # fmt: off + (Hypothesis.P0LessThanP1, [0, 0, 0], [1, 1, 1], Decision.FailToDecide), + (Hypothesis.P0MoreThanP1, [0, 0, 0], [1, 1, 1], Decision.FailToDecide), + (Hypothesis.P0LessThanP1, [1, 1, 1], [0, 0, 0], Decision.FailToDecide), + (Hypothesis.P0MoreThanP1, [1, 1, 1], [0, 0, 0], Decision.FailToDecide), + (Hypothesis.P0LessThanP1, np.zeros(31), np.ones(31), Decision.AcceptAlternative), + (Hypothesis.P0MoreThanP1, np.zeros(31), np.ones(31), Decision.AcceptNull), + (Hypothesis.P0LessThanP1, np.ones(31), np.zeros(31), Decision.AcceptNull), + (Hypothesis.P0MoreThanP1, np.ones(31), np.zeros(31), Decision.AcceptAlternative), + # fmt: on + ], + indirect=["mirrored_barnard_sequential"], +) +def test_mirrored_barnard_sequential( + mirrored_barnard_sequential, sequence_0, sequence_1, expected +): + result = mirrored_barnard_sequential.run_on_sequence(sequence_0, sequence_1) + assert result.decision == expected + + +@pytest.mark.parametrize( + ("mirrored_barnard_sequential", "sequence_0", "sequence_1", "expected"), + [ + # fmt: off + (Hypothesis.P0LessThanP1, [0, 1, 0, 0, 1, 1, 0, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 0, 1, 1], Decision.FailToDecide), + # fmt: on + ], + indirect=["mirrored_barnard_sequential"], +) +def test_mirrored_barnard_sequential_special( + mirrored_barnard_sequential, sequence_0, sequence_1, expected +): + result = mirrored_barnard_sequential.run_on_sequence(sequence_0, sequence_1) + assert result.decision == expected + assert ( + result.info["result_for_alternative"].info["p_value"] + < mirrored_barnard_sequential.alpha + ) + + +@pytest.fixture(scope="module") +def mirrored_barnard_sequential_slow(request): + test = MirroredBarnardSequentialTest( + alternative=request.param, alpha=0.05, n_max=500, times_of_evaluation=50 + ) + return test + + +@pytest.mark.parametrize( + ("mirrored_barnard_sequential_slow", "sequence_0", "sequence_1", "expected"), + [ + # fmt: off + (Hypothesis.P0LessThanP1, [0, 0, 0], [1, 1, 1], Decision.FailToDecide), + (Hypothesis.P0MoreThanP1, [0, 0, 0], [1, 1, 1], Decision.FailToDecide), + (Hypothesis.P0LessThanP1, [1, 1, 1], [0, 0, 0], Decision.FailToDecide), + (Hypothesis.P0MoreThanP1, [1, 1, 1], [0, 0, 0], Decision.FailToDecide), + (Hypothesis.P0LessThanP1, np.zeros(49), np.ones(49), Decision.FailToDecide), + (Hypothesis.P0MoreThanP1, np.zeros(49), np.ones(49), Decision.FailToDecide), + (Hypothesis.P0LessThanP1, np.ones(49), np.zeros(49), Decision.FailToDecide), + (Hypothesis.P0MoreThanP1, np.ones(49), np.zeros(49), Decision.FailToDecide), + # fmt: on + ], + indirect=["mirrored_barnard_sequential_slow"], +) +def test_mirrored_barnard_sequential_slow( + mirrored_barnard_sequential_slow, sequence_0, sequence_1, expected +): + result = mirrored_barnard_sequential_slow.run_on_sequence(sequence_0, sequence_1) + assert result.decision == expected From 49f337859e50ed1d5712e2e1eef2ead9f9755609 Mon Sep 17 00:00:00 2001 From: DavidSnyder-TRI Date: Thu, 27 Mar 2025 14:43:07 -0400 Subject: [PATCH 2/3] Store copy of debugging test file --- test_barnard_sequential.ipynb | 204 ++++++++++++++++++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 test_barnard_sequential.ipynb diff --git a/test_barnard_sequential.ipynb b/test_barnard_sequential.ipynb new file mode 100644 index 0000000..c26323d --- /dev/null +++ b/test_barnard_sequential.ipynb @@ -0,0 +1,204 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np \n", + "from sequentialized_barnard_tests.barnard_sequential import BarnardSequentialTest, MirroredBarnardSequentialTest\n", + "from sequentialized_barnard_tests import Decision, Hypothesis" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "alpha = 0.05\n", + "n_max = 500\n", + "times_of_evaluation = 10 " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "base_test = BarnardSequentialTest(alternative=Hypothesis.P0LessThanP1, alpha=alpha, n_max=n_max, times_of_evaluation=times_of_evaluation)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "base_mirrored_test = MirroredBarnardSequentialTest(alternative=Hypothesis.P0LessThanP1, alpha=alpha, n_max=n_max, times_of_evaluation=times_of_evaluation)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "result_base = base_test.run_on_sequence(np.zeros(31), np.ones(31))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Decision.AcceptAlternative\n", + "{'p_value': 9.5367431640625e-07, 'statistic': -4.47213595499958, 'Time': 10}\n" + ] + } + ], + "source": [ + "print(result_base.decision)\n", + "print(result_base.info)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "result_mirrored_base = base_mirrored_test.run_on_sequence(np.zeros(31), np.ones(31))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Decision.AcceptAlternative\n", + "TestResult(decision=, info={'p_value': 9.5367431640625e-07, 'statistic': -4.47213595499958, 'Time': 10})\n", + "TestResult(decision=, info={'p_value': 1.0, 'statistic': -4.47213595499958, 'Time': 10})\n" + ] + } + ], + "source": [ + "print(result_mirrored_base.decision)\n", + "print(result_mirrored_base.info[\"result_for_alternative\"])\n", + "print(result_mirrored_base.info[\"result_for_null\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Decision.FailToDecide\n", + "{'Time': 31}\n" + ] + } + ], + "source": [ + "result_base = base_test.run_on_sequence(np.ones(31), np.zeros(31))\n", + "print(result_base.decision)\n", + "print(result_base.info)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Decision.AcceptNull\n", + "TestResult(decision=, info={'p_value': 1.0, 'statistic': 4.47213595499958, 'Time': 10})\n", + "TestResult(decision=, info={'p_value': 9.5367431640625e-07, 'statistic': 4.47213595499958, 'Time': 10})\n" + ] + } + ], + "source": [ + "result_mirrored_base = base_mirrored_test.run_on_sequence(np.ones(31), np.zeros(31))\n", + "print(result_mirrored_base.decision)\n", + "print(result_mirrored_base.info[\"result_for_alternative\"])\n", + "print(result_mirrored_base.info[\"result_for_null\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Decision.FailToDecide\n", + "TestResult(decision=, info={'p_value': 0.031154807675887342, 'statistic': -1.9518001458970664, 'Time': 10})\n", + "TestResult(decision=, info={'p_value': 1.0, 'statistic': -1.9518001458970664, 'Time': 10})\n", + "0.05\n" + ] + } + ], + "source": [ + "# result_mirrored_base = base_mirrored_test.run_on_sequence(\n", + "# [0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], \n", + "# [1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], \n", + "# )\n", + "result_mirrored_base = base_mirrored_test.run_on_sequence(\n", + " [0, 1, 0, 0, 1, 1, 0, 1, 1, 0], # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], \n", + " [1, 1, 1, 1, 1, 1, 1, 0, 1, 1], # , 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], \n", + " )\n", + "print(result_mirrored_base.decision)\n", + "print(result_mirrored_base.info[\"result_for_alternative\"])\n", + "print(result_mirrored_base.info[\"result_for_null\"])\n", + "print(base_mirrored_test.alpha)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "seq_barnard", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 2b813d4d1223ffcd82c53e20a357f82c0f431216 Mon Sep 17 00:00:00 2001 From: DavidSnyder-TRI Date: Thu, 27 Mar 2025 15:07:28 -0400 Subject: [PATCH 3/3] Update documentation and remove the old testing notebook --- .../barnard_sequential.py | 78 ++++--- test_barnard_sequential.ipynb | 204 ------------------ .../test_barnard_sequential.py | 22 +- 3 files changed, 48 insertions(+), 256 deletions(-) delete mode 100644 test_barnard_sequential.ipynb diff --git a/sequentialized_barnard_tests/barnard_sequential.py b/sequentialized_barnard_tests/barnard_sequential.py index 60105ed..a0422ea 100644 --- a/sequentialized_barnard_tests/barnard_sequential.py +++ b/sequentialized_barnard_tests/barnard_sequential.py @@ -25,10 +25,12 @@ class BarnardSequentialTest(SequentialTestBase): - """_summary_ + """Naive sequential rectification of Barnard's Exact Test. This method + utilizes (weighted) Bonferroni error correction across the discrete times + chosen for batch evaluation. At each time of evaluation, a batch Barnard's + Exact Test is run at level alpha_prime, such that the sum of all alpha_prime + for all evaluation times is equal to alpha, the overall FPR limit of the test. - Args: - SequentialTestBase (_type_): _description_ """ def __init__( @@ -36,20 +38,26 @@ def __init__( alternative: Hypothesis, alpha: float, n_max: int, - times_of_evaluation: ArrayLike, + times_of_evaluation: Union[ArrayLike, int], weights_of_evaluation: Optional[ArrayLike] = None, ) -> None: - """_summary_ + """Initialize the BarnardSequentialTest object. Args: - alternative (Hypothesis): _description_ - alpha (float): _description_ - n_max (int): _description_ - times_of_evaluation (ArrayLike): _description_ - weights_of_evaluation (Optional[ArrayLike], optional): _description_. Defaults to None. + alternative (Hypothesis): Alternative hypothesis for the test. + alpha (float): Significance level of the test. Lies in (0., 1.) + n_max (int): Maximum number of trials of the test. Integer, must be > 0. + times_of_evaluation (ArrayLike, int): Set of times at which to run batch evaluation (ArrayLike), or + regular interval at which evaluation is to be done (int). Must be + positive. + weights_of_evaluation (Optional[ArrayLike]): Weights (fraction of risk budget) to spend at each evaluation. + If None, defaults to uniform: alpha_prime = alpha / n_evaluations. Defaults to None. Raises: - ValueError: If alpha, n_max, times_of_evaluation are invalid + ValueError: If alpha, n_max are invalid + ValueError: If times_of_evaluation is anywhere non-positive + Warning: If weights_of_evaluation is broadcastable to times_of_evaluation in an INEXACT manner + """ # Handle inputs and check for errors in test attributes try: @@ -80,6 +88,8 @@ def __init__( # Handle additional error cases and assign times_of_evaluation and n_evaluations if evaluation_times_is_array: times_of_evaluation = np.sort(times_of_evaluation) + + # TODO: Add in floor + redundancy handling (remove multiple instances of the same time) try: assert np.min(times_of_evaluation) >= 1 assert np.min(np.diff(times_of_evaluation)) >= 1 @@ -150,15 +160,20 @@ def step( datum_1: Union[bool, int, float], verbose: Optional[bool] = False, ) -> TestResult: - """_summary_ + """Step through a one-sided Sequentialized Barnard Exact test. Procedure + aggregates new data to stored self.sequence, iterates the time variable self.time, + and then does one of two things: + (1) If NOT a time_of_evaluation, return decision = Decision.FailToDecide + (2) If a time_of_evaluation, run the associated evaluation via running + a batch test at level alpha_prime = alpha * weight_of_evaluation[idx]. Args: - datum_0 (_type_): _description_ - datum_1 (_type_): _description_ - verbose (Optional[bool], optional): _description_. Defaults to False. + datum_0 (Union[bool, int, float]): New datum from sequence 0 + datum_1 (Union[bool, int, float]): New datum from sequence 1 + verbose (Optional[bool], optional): If true, print to stdout. Defaults to False. Returns: - TestResult: _description_ + TestResult: Result of the test at time self.time. """ is_bernoulli_0 = datum_0 in [0, 1] is_bernoulli_1 = datum_1 in [0, 1] @@ -208,17 +223,19 @@ def step( return result def reset(self, verbose: Optional[bool] = False) -> None: - """_summary_ + """Resets the state variables in order to allow for evaluating + a new trajectory. Args: - verbose (Optional[bool], optional): _description_. Defaults to False. + verbose (Optional[bool], optional): If True, print to stdout. Defaults to False. """ + # Reset state variables self.t = int(0) self.sequence = np.zeros((self.n_max, 2)) class MirroredBarnardSequentialTest(MirroredTestMixin, SequentialTestBase): - """A pair of one-sided Barnard's exact tests with mirrored alternatives. + """A pair of one-sided, sequential Barnard's exact tests with mirrored alternatives. In our terminology, a mirrored test is one that runs two one-sided tests simultaneously, with the null and the alternaive flipped from each other. This is so @@ -272,13 +289,6 @@ def run_on_sequence( sequence_0[idx], sequence_1[idx] ) - # result_for_alternative = self._test_for_alternative.step( - # sequence_0[idx], sequence_1[idx], *args, **kwargs - # ) - # result_for_null = self._test_for_null.step( - # sequence_0[idx], sequence_1[idx], *args, **kwargs - # ) - # Store the info (constituent test results) info = { "result_for_alternative": result_for_alternative, @@ -308,17 +318,21 @@ def run_on_sequence( return result def step( - self, datum_0, datum_1, verbose: Optional[bool] = False + self, + datum_0: Union[bool, int, float], + datum_1: Union[bool, int, float], + verbose: Optional[bool] = False, ) -> Tuple[TestResult, TestResult]: - """_summary_ + """Step through the mirrored test, which amounts to stepping through + each constituent test. Args: - datum_0 (_type_): _description_ - datum_1 (_type_): _description_ - verbose (Optional[bool], optional): _description_. Defaults to False. + datum_0 (Union[bool, int, float]): new datum from sequence 0 + datum_1 (Union[bool, int, float]): new datum from sequence 1 + verbose (Optional[bool], optional): If True, print to stdout. Defaults to False. Returns: - Tuple[TestResult, TestResult]: _description_ + Tuple[TestResult, TestResult]: Pair of constituent TestResult objects """ result_for_alternative = self._test_for_alternative.step( datum_0, datum_1, verbose diff --git a/test_barnard_sequential.ipynb b/test_barnard_sequential.ipynb deleted file mode 100644 index c26323d..0000000 --- a/test_barnard_sequential.ipynb +++ /dev/null @@ -1,204 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np \n", - "from sequentialized_barnard_tests.barnard_sequential import BarnardSequentialTest, MirroredBarnardSequentialTest\n", - "from sequentialized_barnard_tests import Decision, Hypothesis" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "alpha = 0.05\n", - "n_max = 500\n", - "times_of_evaluation = 10 " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "base_test = BarnardSequentialTest(alternative=Hypothesis.P0LessThanP1, alpha=alpha, n_max=n_max, times_of_evaluation=times_of_evaluation)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "base_mirrored_test = MirroredBarnardSequentialTest(alternative=Hypothesis.P0LessThanP1, alpha=alpha, n_max=n_max, times_of_evaluation=times_of_evaluation)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "result_base = base_test.run_on_sequence(np.zeros(31), np.ones(31))" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Decision.AcceptAlternative\n", - "{'p_value': 9.5367431640625e-07, 'statistic': -4.47213595499958, 'Time': 10}\n" - ] - } - ], - "source": [ - "print(result_base.decision)\n", - "print(result_base.info)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "result_mirrored_base = base_mirrored_test.run_on_sequence(np.zeros(31), np.ones(31))" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Decision.AcceptAlternative\n", - "TestResult(decision=, info={'p_value': 9.5367431640625e-07, 'statistic': -4.47213595499958, 'Time': 10})\n", - "TestResult(decision=, info={'p_value': 1.0, 'statistic': -4.47213595499958, 'Time': 10})\n" - ] - } - ], - "source": [ - "print(result_mirrored_base.decision)\n", - "print(result_mirrored_base.info[\"result_for_alternative\"])\n", - "print(result_mirrored_base.info[\"result_for_null\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Decision.FailToDecide\n", - "{'Time': 31}\n" - ] - } - ], - "source": [ - "result_base = base_test.run_on_sequence(np.ones(31), np.zeros(31))\n", - "print(result_base.decision)\n", - "print(result_base.info)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Decision.AcceptNull\n", - "TestResult(decision=, info={'p_value': 1.0, 'statistic': 4.47213595499958, 'Time': 10})\n", - "TestResult(decision=, info={'p_value': 9.5367431640625e-07, 'statistic': 4.47213595499958, 'Time': 10})\n" - ] - } - ], - "source": [ - "result_mirrored_base = base_mirrored_test.run_on_sequence(np.ones(31), np.zeros(31))\n", - "print(result_mirrored_base.decision)\n", - "print(result_mirrored_base.info[\"result_for_alternative\"])\n", - "print(result_mirrored_base.info[\"result_for_null\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Decision.FailToDecide\n", - "TestResult(decision=, info={'p_value': 0.031154807675887342, 'statistic': -1.9518001458970664, 'Time': 10})\n", - "TestResult(decision=, info={'p_value': 1.0, 'statistic': -1.9518001458970664, 'Time': 10})\n", - "0.05\n" - ] - } - ], - "source": [ - "# result_mirrored_base = base_mirrored_test.run_on_sequence(\n", - "# [0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], \n", - "# [1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], \n", - "# )\n", - "result_mirrored_base = base_mirrored_test.run_on_sequence(\n", - " [0, 1, 0, 0, 1, 1, 0, 1, 1, 0], # , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], \n", - " [1, 1, 1, 1, 1, 1, 1, 0, 1, 1], # , 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], \n", - " )\n", - "print(result_mirrored_base.decision)\n", - "print(result_mirrored_base.info[\"result_for_alternative\"])\n", - "print(result_mirrored_base.info[\"result_for_null\"])\n", - "print(base_mirrored_test.alpha)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "seq_barnard", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.6" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/tests/sequentialized_barnard_tests/test_barnard_sequential.py b/tests/sequentialized_barnard_tests/test_barnard_sequential.py index 8754fca..9ebfdf0 100644 --- a/tests/sequentialized_barnard_tests/test_barnard_sequential.py +++ b/tests/sequentialized_barnard_tests/test_barnard_sequential.py @@ -1,4 +1,6 @@ """Unit tests for the Barnard sequential (rectified) method. + +TODO: Add additional tests to verify unusual specification of the times of evaluation, etc """ import numpy as np @@ -64,26 +66,6 @@ def test_barnard_sequential(barnard_sequential, sequence_0, sequence_1, expected ##### Mirrored Barnard Sequential Test ##### -# def test_mirrored_savi_attribute_assignment(): -# test = MirroredBarnardSequentialTest(alternative=Hypothesis.P0MoreThanP1, alpha=0.05) -# assert test.alpha == 0.05 -# assert test._test_for_alternative.alpha == test.alpha -# assert test._test_for_null.alpha == test.alpha -# assert test.alternative == Hypothesis.P0MoreThanP1 -# assert test._test_for_alternative.alternative == test.alternative -# assert test._test_for_null.alternative == Hypothesis.P0LessThanP1 - -# new_alpha = 0.2 -# test.alpha = new_alpha -# assert test._test_for_alternative.alpha == new_alpha -# assert test._test_for_null.alpha == new_alpha - -# new_alternative = Hypothesis.P0LessThanP1 -# test.alternative = new_alternative -# assert test._test_for_alternative.alternative == test.alternative -# assert test._test_for_null.alternative == Hypothesis.P0MoreThanP1 - - @pytest.fixture(scope="module") def mirrored_barnard_sequential(request): test = MirroredBarnardSequentialTest(