From 378a6783cc93e68c117e23a78dec7289f5afcec4 Mon Sep 17 00:00:00 2001 From: ricardocarvalhods Date: Fri, 24 Feb 2023 12:11:02 -0300 Subject: [PATCH 1/2] Added changes to scalar and tests pass --- pipeline_dp/budget_accounting.py | 9 +-- pipeline_dp/combiners.py | 10 ++- pipeline_dp/dp_computations.py | 112 ++++++++++++++++++++++++++----- tests/budget_accounting_test.py | 7 +- tests/dp_computations_test.py | 2 +- 5 files changed, 107 insertions(+), 33 deletions(-) diff --git a/pipeline_dp/budget_accounting.py b/pipeline_dp/budget_accounting.py index db0e97b5..d218cb5c 100644 --- a/pipeline_dp/budget_accounting.py +++ b/pipeline_dp/budget_accounting.py @@ -48,14 +48,7 @@ class MechanismSpec: @property def noise_standard_deviation(self): - """Noise value for the mechanism. - - Raises: - AssertionError: The noise value is not calculated yet. - """ - if self._noise_standard_deviation is None: - raise AssertionError( - "Noise standard deviation is not calculated yet.") + """Noise value for the mechanism. It can be None before budget is computed.""" return self._noise_standard_deviation @property diff --git a/pipeline_dp/combiners.py b/pipeline_dp/combiners.py index 5b82bc5f..a5b7b8ae 100644 --- a/pipeline_dp/combiners.py +++ b/pipeline_dp/combiners.py @@ -149,17 +149,23 @@ def eps(self): @property def delta(self): return self._mechanism_spec.delta + + @property + def noise_standard_deviation(self): + return self._mechanism_spec.noise_standard_deviation @property def scalar_noise_params(self): return dp_computations.ScalarNoiseParams( - self.eps, self.delta, self.aggregate_params.min_value, + self.eps, self.delta, + self.aggregate_params.min_value, self.aggregate_params.max_value, self.aggregate_params.min_sum_per_partition, self.aggregate_params.max_sum_per_partition, self.aggregate_params.max_partitions_contributed, self.aggregate_params.max_contributions_per_partition, - self.aggregate_params.noise_kind) + self.aggregate_params.noise_kind, + self.noise_standard_deviation) @property def additive_vector_noise_params( diff --git a/pipeline_dp/dp_computations.py b/pipeline_dp/dp_computations.py index f5a533c3..56e9b9ae 100644 --- a/pipeline_dp/dp_computations.py +++ b/pipeline_dp/dp_computations.py @@ -33,6 +33,7 @@ class ScalarNoiseParams: max_partitions_contributed: int max_contributions_per_partition: Optional[int] noise_kind: pipeline_dp.NoiseKind # Laplace or Gaussian + noise_standard_deviation: Optional[float] = None def __post_init__(self): assert (self.min_value is None) == ( @@ -103,43 +104,68 @@ def compute_sigma(eps: float, delta: float, l2_sensitivity: float): delta: The delta value. l2_sensitivity: The L2 sensitivity. """ - # TODO: use named arguments, when argument names are added in PyDP on PR - # https://github.com/OpenMined/PyDP/pull/398. - return dp_mechanisms.GaussianMechanism(eps, delta, l2_sensitivity).std + return dp_mechanisms.GaussianMechanism(epsilon=eps, + delta=delta, + sensitivity=l2_sensitivity).std -def apply_laplace_mechanism(value: float, eps: float, l1_sensitivity: float): +def apply_laplace_mechanism(value: float, + eps: float, + l1_sensitivity: float, + noise_standard_deviation: Optional[float] = None): """Applies the Laplace mechanism to the value. + If noise_standard_deviation is set, it is used and eps is ignored. Args: value: The initial value. eps: The epsilon value. l1_sensitivity: The L1 sensitivity. + noise_standard_deviation: The standard deviation for the noise. Returns: The value resulted after adding the noise. """ - mechanism = dp_mechanisms.LaplaceMechanism(epsilon=eps, - sensitivity=l1_sensitivity) + if noise_standard_deviation is not None: + mechanism = dp_mechanisms.LaplaceMechanism( + epsilon=1/noise_standard_deviation, + sensitivity=l1_sensitivity + ) + else: + mechanism = dp_mechanisms.LaplaceMechanism( + epsilon=eps, + sensitivity=l1_sensitivity + ) return mechanism.add_noise(1.0 * value) -def apply_gaussian_mechanism(value: float, eps: float, delta: float, - l2_sensitivity: float): +def apply_gaussian_mechanism(value: float, + eps: float, + delta: float, + l2_sensitivity: float, + noise_standard_deviation: Optional[float] = None): """Applies the Gaussian mechanism to the value. + If noise_standard_deviation is set, it is used and eps&delta are ignored. Args: value: The initial value. eps: The epsilon value. delta: The delta value. l2_sensitivity: The L2 sensitivity. + noise_standard_deviation: The standard deviation for the noise. Returns: The value resulted after adding the noise. """ - # TODO: use named arguments, when argument names are added in PyDP on PR - # https://github.com/OpenMined/PyDP/pull/398. - mechanism = dp_mechanisms.GaussianMechanism(eps, delta, l2_sensitivity) + if noise_standard_deviation is not None: + mechanism = dp_mechanisms.GaussianMechanism.create_from_standard_deviation( + std=l2_sensitivity * noise_standard_deviation + ) + else: + mechanism = dp_mechanisms.GaussianMechanism( + epsilon=eps, + delta=delta, + sensitivity=l2_sensitivity + ) return mechanism.add_noise(1.0 * value) @@ -147,6 +173,7 @@ def _add_random_noise( value: float, eps: float, delta: float, + noise_standard_deviation: float, l0_sensitivity: float, linf_sensitivity: float, noise_kind: pipeline_dp.NoiseKind, @@ -164,14 +191,24 @@ def _add_random_noise( Returns: The value resulted after adding the random noise. """ + if noise_kind == pipeline_dp.NoiseKind.LAPLACE: l1_sensitivity = compute_l1_sensitivity(l0_sensitivity, linf_sensitivity) - return apply_laplace_mechanism(value, eps, l1_sensitivity) + return apply_laplace_mechanism(value, + eps, + l1_sensitivity, + noise_standard_deviation) + if noise_kind == pipeline_dp.NoiseKind.GAUSSIAN: l2_sensitivity = compute_l2_sensitivity(l0_sensitivity, linf_sensitivity) - return apply_gaussian_mechanism(value, eps, delta, l2_sensitivity) + return apply_gaussian_mechanism(value, + eps, + delta, + l2_sensitivity, + noise_standard_deviation) + raise ValueError("Noise kind must be either Laplace or Gaussian.") @@ -213,6 +250,7 @@ def add_noise_vector(vec: np.ndarray, noise_params: AdditiveVectorNoiseParams): s, noise_params.eps_per_coordinate, noise_params.delta_per_coordinate, + None, # TODO: Add noise_standard_deviation for vector sum computation noise_params.l0_sensitivity, noise_params.linf_sensitivity, noise_params.noise_kind, @@ -269,6 +307,7 @@ def compute_dp_count(count: int, dp_params: ScalarNoiseParams): count, dp_params.eps, dp_params.delta, + dp_params.noise_standard_deviation, l0_sensitivity, linf_sensitivity, dp_params.noise_kind, @@ -301,6 +340,7 @@ def compute_dp_sum(sum: float, dp_params: ScalarNoiseParams): sum, dp_params.eps, dp_params.delta, + dp_params.noise_standard_deviation, l0_sensitivity, linf_sensitivity, dp_params.noise_kind, @@ -314,6 +354,7 @@ def _compute_mean_for_normalized_sum( max_value: float, eps: float, delta: float, + noise_standard_deviation: float, l0_sensitivity: float, max_contributions_per_partition: float, noise_kind: pipeline_dp.NoiseKind, @@ -325,6 +366,7 @@ def _compute_mean_for_normalized_sum( sum: Non-DP normalized sum. min_value, max_value: The lowest/highest contribution of the non-normalized values. eps, delta: The budget allocated. + noise_standard_deviation: The standard deviation for the noise. l0_sensitivity: The L0 sensitivity. max_contributions_per_partition: The maximum number of contributions per partition. @@ -341,8 +383,15 @@ def _compute_mean_for_normalized_sum( middle = compute_middle(min_value, max_value) linf_sensitivity = max_contributions_per_partition * abs(middle - min_value) - dp_normalized_sum = _add_random_noise(sum, eps, delta, l0_sensitivity, - linf_sensitivity, noise_kind) + dp_normalized_sum = _add_random_noise( + sum, + eps, + delta, + noise_standard_deviation, + l0_sensitivity, + linf_sensitivity, + noise_kind + ) # Clamps dp_count to 1.0. We know that actual count > 1 except when the # input set is empty, in which case it shouldn't matter much what the # denominator is. @@ -370,10 +419,18 @@ def compute_dp_mean(count: int, normalized_sum: float, dp_params.eps, dp_params.delta, 2) l0_sensitivity = dp_params.l0_sensitivity() + # Increases noise std.dev. equally due to multiple computations + count_noise_standard_deviation = \ + sum_noise_standard_deviation = \ + 2*dp_params.noise_standard_deviation \ + if dp_params.noise_standard_deviation is not None \ + else None + dp_count = _add_random_noise( count, count_eps, count_delta, + count_noise_standard_deviation, l0_sensitivity, dp_params.max_contributions_per_partition, dp_params.noise_kind, @@ -386,6 +443,7 @@ def compute_dp_mean(count: int, normalized_sum: float, dp_params.max_value, sum_eps, sum_delta, + sum_noise_standard_deviation, l0_sensitivity, dp_params.max_contributions_per_partition, dp_params.noise_kind, @@ -420,11 +478,20 @@ def compute_dp_var(count: int, normalized_sum: float, (sum_squares_eps, sum_squares_delta), ) = equally_split_budget(dp_params.eps, dp_params.delta, 3) l0_sensitivity = dp_params.l0_sensitivity() + + # Increases noise std.dev. equally due to multiple computations + count_noise_standard_deviation = \ + sum_noise_standard_deviation = \ + sum_squares_noise_standard_deviation = \ + 3*dp_params.noise_standard_deviation \ + if dp_params.noise_standard_deviation is not None \ + else None dp_count = _add_random_noise( count, count_eps, count_delta, + count_noise_standard_deviation, l0_sensitivity, dp_params.max_contributions_per_partition, dp_params.noise_kind, @@ -438,6 +505,7 @@ def compute_dp_var(count: int, normalized_sum: float, dp_params.max_value, sum_eps, sum_delta, + sum_noise_standard_deviation, l0_sensitivity, dp_params.max_contributions_per_partition, dp_params.noise_kind, @@ -448,9 +516,17 @@ def compute_dp_var(count: int, normalized_sum: float, # Computes and adds noise to the mean of squares. dp_mean_squares = _compute_mean_for_normalized_sum( - dp_count, normalized_sum_squares, squares_min_value, squares_max_value, - sum_squares_eps, sum_squares_delta, l0_sensitivity, - dp_params.max_contributions_per_partition, dp_params.noise_kind) + dp_count, + normalized_sum_squares, + squares_min_value, + squares_max_value, + sum_squares_eps, + sum_squares_delta, + sum_squares_noise_standard_deviation, + l0_sensitivity, + dp_params.max_contributions_per_partition, + dp_params.noise_kind + ) dp_var = dp_mean_squares - dp_mean**2 if dp_params.min_value != dp_params.max_value: diff --git a/tests/budget_accounting_test.py b/tests/budget_accounting_test.py index eb4151d6..09da5dbc 100644 --- a/tests/budget_accounting_test.py +++ b/tests/budget_accounting_test.py @@ -198,9 +198,8 @@ def test_not_enough_aggregations(self, use_num_aggregations): class PLDBudgetAccountantTest(unittest.TestCase): def test_noise_not_calculated(self): - with self.assertRaises(AssertionError): - mechanism = MechanismSpec(MechanismType.LAPLACE) - print(mechanism.noise_standard_deviation()) + mechanism = MechanismSpec(MechanismType.LAPLACE) + self.assertEqual(None, mechanism.noise_standard_deviation) def test_invalid_epsilon(self): with self.assertRaises(ValueError): @@ -257,7 +256,7 @@ class ComputeBudgetTestCase: epsilon: float delta: float expected_pipeline_noise_std: float - mechanisms: [] + mechanisms: list testcases = [ ComputeBudgetTestCase( diff --git a/tests/dp_computations_test.py b/tests/dp_computations_test.py index 8c38ab34..ab128087 100644 --- a/tests/dp_computations_test.py +++ b/tests/dp_computations_test.py @@ -221,7 +221,7 @@ def test_secure_gaussian_noise_is_used(self, gaussian_mechanism): value=20, eps=0.5, delta=1e-10, l2_sensitivity=3) # Assert - gaussian_mechanism.assert_called_with(0.5, 1e-10, 3) + gaussian_mechanism.assert_called_with(epsilon=0.5, delta=1e-10, sensitivity=3) mock_gaussian_mechanism.add_noise.assert_called_with(20) self.assertEqual("value_with_noise", anonymized_value) From 00331536773a265ce70c75a36d1ba84039482b79 Mon Sep 17 00:00:00 2001 From: Ricardo Carvalho Date: Fri, 24 Feb 2023 13:02:00 -0300 Subject: [PATCH 2/2] Formatting --- pipeline_dp/combiners.py | 8 ++-- pipeline_dp/dp_computations.py | 79 ++++++++++++---------------------- tests/dp_computations_test.py | 4 +- 3 files changed, 34 insertions(+), 57 deletions(-) diff --git a/pipeline_dp/combiners.py b/pipeline_dp/combiners.py index a5b7b8ae..5d8ce7c0 100644 --- a/pipeline_dp/combiners.py +++ b/pipeline_dp/combiners.py @@ -149,7 +149,7 @@ def eps(self): @property def delta(self): return self._mechanism_spec.delta - + @property def noise_standard_deviation(self): return self._mechanism_spec.noise_standard_deviation @@ -157,15 +157,13 @@ def noise_standard_deviation(self): @property def scalar_noise_params(self): return dp_computations.ScalarNoiseParams( - self.eps, self.delta, - self.aggregate_params.min_value, + self.eps, self.delta, self.aggregate_params.min_value, self.aggregate_params.max_value, self.aggregate_params.min_sum_per_partition, self.aggregate_params.max_sum_per_partition, self.aggregate_params.max_partitions_contributed, self.aggregate_params.max_contributions_per_partition, - self.aggregate_params.noise_kind, - self.noise_standard_deviation) + self.aggregate_params.noise_kind, self.noise_standard_deviation) @property def additive_vector_noise_params( diff --git a/pipeline_dp/dp_computations.py b/pipeline_dp/dp_computations.py index 56e9b9ae..d4ee0a46 100644 --- a/pipeline_dp/dp_computations.py +++ b/pipeline_dp/dp_computations.py @@ -104,13 +104,13 @@ def compute_sigma(eps: float, delta: float, l2_sensitivity: float): delta: The delta value. l2_sensitivity: The L2 sensitivity. """ - return dp_mechanisms.GaussianMechanism(epsilon=eps, + return dp_mechanisms.GaussianMechanism(epsilon=eps, delta=delta, sensitivity=l2_sensitivity).std -def apply_laplace_mechanism(value: float, - eps: float, +def apply_laplace_mechanism(value: float, + eps: float, l1_sensitivity: float, noise_standard_deviation: Optional[float] = None): """Applies the Laplace mechanism to the value. @@ -126,15 +126,12 @@ def apply_laplace_mechanism(value: float, The value resulted after adding the noise. """ if noise_standard_deviation is not None: - mechanism = dp_mechanisms.LaplaceMechanism( - epsilon=1/noise_standard_deviation, - sensitivity=l1_sensitivity - ) + mechanism = dp_mechanisms.LaplaceMechanism(epsilon=1 / + noise_standard_deviation, + sensitivity=l1_sensitivity) else: - mechanism = dp_mechanisms.LaplaceMechanism( - epsilon=eps, - sensitivity=l1_sensitivity - ) + mechanism = dp_mechanisms.LaplaceMechanism(epsilon=eps, + sensitivity=l1_sensitivity) return mechanism.add_noise(1.0 * value) @@ -158,14 +155,11 @@ def apply_gaussian_mechanism(value: float, """ if noise_standard_deviation is not None: mechanism = dp_mechanisms.GaussianMechanism.create_from_standard_deviation( - std=l2_sensitivity * noise_standard_deviation - ) + std=l2_sensitivity * noise_standard_deviation) else: - mechanism = dp_mechanisms.GaussianMechanism( - epsilon=eps, - delta=delta, - sensitivity=l2_sensitivity - ) + mechanism = dp_mechanisms.GaussianMechanism(epsilon=eps, + delta=delta, + sensitivity=l2_sensitivity) return mechanism.add_noise(1.0 * value) @@ -191,24 +185,19 @@ def _add_random_noise( Returns: The value resulted after adding the random noise. """ - + if noise_kind == pipeline_dp.NoiseKind.LAPLACE: l1_sensitivity = compute_l1_sensitivity(l0_sensitivity, linf_sensitivity) - return apply_laplace_mechanism(value, - eps, - l1_sensitivity, + return apply_laplace_mechanism(value, eps, l1_sensitivity, noise_standard_deviation) - + if noise_kind == pipeline_dp.NoiseKind.GAUSSIAN: l2_sensitivity = compute_l2_sensitivity(l0_sensitivity, linf_sensitivity) - return apply_gaussian_mechanism(value, - eps, - delta, - l2_sensitivity, + return apply_gaussian_mechanism(value, eps, delta, l2_sensitivity, noise_standard_deviation) - + raise ValueError("Noise kind must be either Laplace or Gaussian.") @@ -250,7 +239,7 @@ def add_noise_vector(vec: np.ndarray, noise_params: AdditiveVectorNoiseParams): s, noise_params.eps_per_coordinate, noise_params.delta_per_coordinate, - None, # TODO: Add noise_standard_deviation for vector sum computation + None, # TODO: Add noise_standard_deviation for vector sum computation noise_params.l0_sensitivity, noise_params.linf_sensitivity, noise_params.noise_kind, @@ -383,15 +372,10 @@ def _compute_mean_for_normalized_sum( middle = compute_middle(min_value, max_value) linf_sensitivity = max_contributions_per_partition * abs(middle - min_value) - dp_normalized_sum = _add_random_noise( - sum, - eps, - delta, - noise_standard_deviation, - l0_sensitivity, - linf_sensitivity, - noise_kind - ) + dp_normalized_sum = _add_random_noise(sum, eps, delta, + noise_standard_deviation, + l0_sensitivity, linf_sensitivity, + noise_kind) # Clamps dp_count to 1.0. We know that actual count > 1 except when the # input set is empty, in which case it shouldn't matter much what the # denominator is. @@ -425,7 +409,7 @@ def compute_dp_mean(count: int, normalized_sum: float, 2*dp_params.noise_standard_deviation \ if dp_params.noise_standard_deviation is not None \ else None - + dp_count = _add_random_noise( count, count_eps, @@ -478,7 +462,7 @@ def compute_dp_var(count: int, normalized_sum: float, (sum_squares_eps, sum_squares_delta), ) = equally_split_budget(dp_params.eps, dp_params.delta, 3) l0_sensitivity = dp_params.l0_sensitivity() - + # Increases noise std.dev. equally due to multiple computations count_noise_standard_deviation = \ sum_noise_standard_deviation = \ @@ -516,17 +500,10 @@ def compute_dp_var(count: int, normalized_sum: float, # Computes and adds noise to the mean of squares. dp_mean_squares = _compute_mean_for_normalized_sum( - dp_count, - normalized_sum_squares, - squares_min_value, - squares_max_value, - sum_squares_eps, - sum_squares_delta, - sum_squares_noise_standard_deviation, - l0_sensitivity, - dp_params.max_contributions_per_partition, - dp_params.noise_kind - ) + dp_count, normalized_sum_squares, squares_min_value, squares_max_value, + sum_squares_eps, sum_squares_delta, + sum_squares_noise_standard_deviation, l0_sensitivity, + dp_params.max_contributions_per_partition, dp_params.noise_kind) dp_var = dp_mean_squares - dp_mean**2 if dp_params.min_value != dp_params.max_value: diff --git a/tests/dp_computations_test.py b/tests/dp_computations_test.py index ab128087..f66f4407 100644 --- a/tests/dp_computations_test.py +++ b/tests/dp_computations_test.py @@ -221,7 +221,9 @@ def test_secure_gaussian_noise_is_used(self, gaussian_mechanism): value=20, eps=0.5, delta=1e-10, l2_sensitivity=3) # Assert - gaussian_mechanism.assert_called_with(epsilon=0.5, delta=1e-10, sensitivity=3) + gaussian_mechanism.assert_called_with(epsilon=0.5, + delta=1e-10, + sensitivity=3) mock_gaussian_mechanism.add_noise.assert_called_with(20) self.assertEqual("value_with_noise", anonymized_value)