From 9757e1bc870bc65283abb5a8bc6fc69546b46b67 Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Mon, 1 Nov 2021 11:39:10 -0700 Subject: [PATCH] Update the tree aggregation RDP accounting for restarts. This prevents the potential inaccurate usage of the previous implementation for no-restarts. PiperOrigin-RevId: 406878834 --- tensorflow_privacy/__init__.py | 2 +- .../privacy/analysis/rdp_accountant.py | 63 ++++---------- .../privacy/analysis/rdp_accountant_test.py | 87 +++++++------------ 3 files changed, 50 insertions(+), 102 deletions(-) diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index 5a90fe5..72cc746 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -45,7 +45,7 @@ else: from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogeneous_rdp from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp - from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_tree + from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_tree_restart from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent # DPQuery classes diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_accountant.py index 500a16a..d50a7a8 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant.py @@ -398,34 +398,34 @@ def compute_rdp(q, noise_multiplier, steps, orders): return rdp * steps -def _compute_rdp_tree(sigma, steps_list, max_participation_list, alpha): +def _compute_rdp_tree_restart(sigma, steps_list, alpha): """Computes RDP of the Tree Aggregation Protocol at order alpha.""" if np.isinf(alpha): return np.inf tree_depths = [ - math.floor(math.log2(steps)) + 1 for steps in steps_list if steps > 0 + math.floor(math.log2(float(steps))) + 1 + for steps in steps_list + if steps > 0 ] - record_occurence = [ - x * y for x, y in zip(max_participation_list, tree_depths) - ] - return alpha * sum(record_occurence) / (2 * sigma**2) + return alpha * sum(tree_depths) / (2 * sigma**2) -def compute_rdp_tree( - noise_multiplier: float, steps_list: Union[float, Collection[float]], - max_participation_list: Union[int, Collection[int]], - orders: Union[float, Collection[float]]) -> Collection[float]: +def compute_rdp_tree_restart( + noise_multiplier: float, steps_list: Union[int, Collection[int]], + orders: Union[float, Collection[float]]) -> Union[float, Collection[float]]: """Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism. + This function implements the accounting when the tree is restarted at every + epoch. See appendix of + "Practical and Private (Deep) Learning without Sampling or Shuffling" + https://arxiv.org/abs/2103.00039. + Args: noise_multiplier: A non-negative float representing the ratio of the standard deviation of the Gaussian noise to the l2-sensitivity of the function to which it is added. steps_list: A scalar or a list of non-negative intergers representing the - number of steps between tree restarts. - max_participation_list: A scalar or a list of positive integers representing - maximum number of times a sample may appear between tree restarts. The - type (scalar/list) of `max_participation_list` should match `steps_list`. + number of steps per epoch (between two restarts). orders: An array (or a scalar) of RDP orders. Returns: @@ -442,46 +442,19 @@ def compute_rdp_tree( "steps_list must be a non-empty list, or a non-zero scalar, got " f"{steps_list}.") - if not max_participation_list: - raise ValueError( - "max_participation_list must be a non-empty list, or a non-zero scalar," - f" got {max_participation_list}.") - - if np.isscalar(steps_list) and np.isscalar(max_participation_list): + if np.isscalar(steps_list): steps_list = [steps_list] - max_participation_list = [max_participation_list] - elif np.isscalar(steps_list): - raise ValueError( - "`steps_list` and `max_participation_list` must have the same type, got" - f"scalar of steps: {steps_list}, and list of max_participations with " - f"length {len(max_participation_list)}.") - elif np.isscalar(max_participation_list): - raise ValueError( - "`steps_list` and `max_participation_list` must have the same type, got" - f"scalar of max_participation: {max_participation_list}, and list of " - f"steps with length {len(steps_list)}.") - elif len(max_participation_list) != len(steps_list): - raise ValueError( - "`steps_list` and `max_participation_list` must have the same size, got" - f"steps length {len(steps_list)}, max_participations length " - f"{len(max_participation_list)}") - - for max_participation in max_participation_list: - if max_participation <= 0: - raise ValueError( - f"Max participation must be positive, got {max_participation}") for steps in steps_list: if steps < 0: raise ValueError(f"Steps must be non-negative, got {steps_list}") if np.isscalar(orders): - rdp = _compute_rdp_tree(noise_multiplier, steps_list, - max_participation_list, orders) + rdp = _compute_rdp_tree_restart(noise_multiplier, steps_list, orders) else: rdp = np.array([ - _compute_rdp_tree(noise_multiplier, steps_list, max_participation_list, - alpha) for alpha in orders + _compute_rdp_tree_restart(noise_multiplier, steps_list, alpha) + for alpha in orders ]) return rdp diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py index 583fa3f..42e751c 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py @@ -272,81 +272,56 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase): # This tests is based on the StackOverflow setting in "Practical and # Private (Deep) Learning without Sampling or Shuffling". The calculated # epsilon could be better as the method in this package keeps improving. - steps_list, target_delta, max_participation = 1600, 1e-6, 1 - rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list, - max_participation, orders) + steps_list, target_delta = 1600, 1e-6 + rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps_list, + orders) new_eps = rdp_accountant.get_privacy_spent( orders, rdp, target_delta=target_delta)[0] self.assertLess(new_eps, eps) @parameterized.named_parameters( - ('restart4_max2', [400] * 4, [2] * 4), - ('restart2_max1', [800] * 2, [1] * 2), - ('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5), - ('adaptive', [10, 400, 400, 400, 390], [2, 3, 4, 4, 3])) - def test_compose_tree_rdp(self, steps_list, max_participation_list): + ('restart4', [400] * 4), + ('restart2', [800] * 2), + ('adaptive', [10, 400, 400, 400, 390]), + ) + def test_compose_tree_rdp(self, steps_list): noise_multiplier, orders = 0.1, 1 - if np.isscalar(max_participation_list): - rdp_list = [ - rdp_accountant.compute_rdp_tree(noise_multiplier, steps, - max_participation_list, orders) - for steps in steps_list - ] - else: - rdp_list = [ - rdp_accountant.compute_rdp_tree(noise_multiplier, steps, - max_participation, orders) for steps, - max_participation in zip(steps_list, max_participation_list) - ] - rdp_composed = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list, - max_participation_list, - orders) + rdp_list = [ + rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps, orders) + for steps in steps_list + ] + rdp_composed = rdp_accountant.compute_rdp_tree_restart( + noise_multiplier, steps_list, orders) self.assertAllClose(rdp_composed, sum(rdp_list), rtol=1e-12) @parameterized.named_parameters( - ('restart4_max2', [400] * 4, [2] * 4), - ('restart2_max1', [800] * 2, [1] * 2), - ('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5), + ('restart4', [400] * 4), + ('restart2', [800] * 2), + ('adaptive', [10, 400, 400, 400, 390]), ) - def test_compute_eps_tree_decreasing(self, steps_list, max_participation): + def test_compute_eps_tree_decreasing(self, steps_list): # Test privacy epsilon decreases with noise multiplier increasing when # keeping other parameters the same. orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) target_delta = 1e-6 - prev_eps = rdp_accountant.compute_rdp_tree(0, steps_list, max_participation, - orders) + prev_eps = rdp_accountant.compute_rdp_tree_restart(0, steps_list, orders) for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]: - rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list, - max_participation, orders) + rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier, + steps_list, orders) eps = rdp_accountant.get_privacy_spent( orders, rdp, target_delta=target_delta)[0] self.assertLess(eps, prev_eps) @parameterized.named_parameters( - ('negative_noise', -1, 3, 2, 1), - ('empty_steps', 1, [], 2, 1), - ('empty_part', 1, 1, [], 1), - ('negative_steps', 1, -3, 2, 1), - ('zero_participation', 1, 3, 0, 1), - ('negative_participation', 1, 3, -1, 1), + ('negative_noise', -1, 3, 1), + ('empty_steps', 1, [], 1), + ('negative_steps', 1, -3, 1), ) - def test_compute_rdp_tree_raise(self, noise_multiplier, steps_list, - max_participation, orders): + def test_compute_rdp_tree_restart_raise(self, noise_multiplier, steps_list, + orders): with self.assertRaisesRegex(ValueError, 'must be'): - rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list, - max_participation, orders) - - @parameterized.named_parameters( - ('list_scalar', [2], 1), - ('scalar_list', 2, [1]), - ('list_length', [2, 3], [1]), - ('list_length2', [2, 3], [1, 2, 2]), - ) - def test_compute_rdp_tree_raise_input_type(self, steps_list, - max_participation): - with self.assertRaisesRegex(ValueError, 'must have the same'): - rdp_accountant.compute_rdp_tree( - 0.1, steps_list, max_participation, orders=1) + rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps_list, + orders) @parameterized.named_parameters( ('t100n0.1', 100, 0.1), @@ -354,9 +329,9 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase): ) def test_no_tree_no_sampling(self, total_steps, noise_multiplier): orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) - tree_rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, - [1] * total_steps, - [1] * total_steps, orders) + tree_rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier, + [1] * total_steps, + orders) rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders) self.assertAllClose(tree_rdp, rdp, rtol=1e-12)