From 99c25e3be5afef5efe6e31aa6513770f58c1dff2 Mon Sep 17 00:00:00 2001 From: Arun Ganesh Date: Mon, 13 May 2024 15:13:02 -0700 Subject: [PATCH] Improves user/group-level accounting in compute_dp_sgd_privacy_lib PiperOrigin-RevId: 633346332 --- tensorflow_privacy/privacy/analysis/BUILD | 1 + .../analysis/compute_dp_sgd_privacy_lib.py | 308 ++++++++++++------ .../analysis/compute_dp_sgd_privacy_test.py | 14 +- 3 files changed, 213 insertions(+), 110 deletions(-) diff --git a/tensorflow_privacy/privacy/analysis/BUILD b/tensorflow_privacy/privacy/analysis/BUILD index 6ff02ae..9885adf 100644 --- a/tensorflow_privacy/privacy/analysis/BUILD +++ b/tensorflow_privacy/privacy/analysis/BUILD @@ -24,6 +24,7 @@ py_binary( py_test( name = "compute_dp_sgd_privacy_test", size = "small", + timeout = "moderate", srcs = ["compute_dp_sgd_privacy_test.py"], python_version = "PY3", srcs_version = "PY3", diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py index e3782b9..5a067de 100644 --- a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py @@ -24,6 +24,7 @@ from absl import app from absl import logging import dp_accounting from scipy import optimize +from scipy import stats class UserLevelDPComputationError(Exception): @@ -60,16 +61,10 @@ def _compute_dp_sgd_user_privacy( ) -> float: """Computes add-or-remove-one-user DP epsilon using group privacy. - This privacy guarantee uses add-or-remove-one-user adjacency, and protects - release of all model checkpoints in addition to the final model. - - Uses Vadhan (2017) "The complexity of differential privacy" Lemma 2.2. - - # TODO(b/271330804): Consider using RDP to compute group privacy. - - We use a line search to identify an example-level delta which, when the lemma - is applied, yields the requested user-level delta, then use it to compute the - user-level epsilon. + Without sampling, the privacy accounting reduces to example-level DP + accounting. Otherwise, see the helper methods + _compute_dp_sgd_group_privacy_using_rdp and + _compute_dp_sgd_group_privacy_using_pld for details of privacy accounting. Args: num_epochs: The number of passes over the data. May be fractional. @@ -79,10 +74,7 @@ def _compute_dp_sgd_user_privacy( used_microbatching: If true, increases sensitivity by a factor of two. poisson_subsampling_probability: If not None, gives the probability that each record is chosen in a batch. If None, assumes no subsampling. - accountant_type: The privacy accountant for computing epsilon. While this - method supports both PLD and RDP accountants, the behavior for PLD - accountant can sometimes be overly pessimistic. This remains to be - investigated and fixed (b/271341062). + accountant_type: The privacy accountant for computing epsilon. Returns: The add-or-remove-one-user DP epsilon value using group privacy. @@ -114,93 +106,40 @@ def _compute_dp_sgd_user_privacy( poisson_subsampling_probability, accountant_type, ) - - # The computation below to estimate user_eps works as follows. - # We have _compute_dp_sgd_example_privacy which maps - # F(example_delta) -> example_eps - # Vadhan (2017) "The complexity of differential privacy" Lemma 2.2 gives us - # G(example_eps, example_delta) -> user_delta - # H(example_eps) -> user_eps. - # We first identify an example_delta such that - # G(F(example_delta), example_delta) = user_delta - # Specifically, we use a line search in log space to solve for - # log(G(F(example_delta), example_delta)) - log(user_delta) = 0 - # Then we can return user_eps = H(F(example_delta)). - - target_user_log_delta = math.log(user_delta) - - # Cache example privacy values, which can be expensive. - @functools.cache - def get_example_eps(example_log_delta): + elif poisson_subsampling_probability is None: + # Without subsampling, the worst-case is when all max_examples_per_user + # examples participate in the same round (and in the microbatching case, + # they participate in different microbatches in this round), which + # effectively increases the sensitivity, i.e. decreases the + # noise_multiplier, by max_examples_per_user. return _compute_dp_sgd_example_privacy( + num_epochs, + noise_multiplier / max_examples_per_user, + user_delta, + used_microbatching, + poisson_subsampling_probability, + accountant_type, + ) + elif accountant_type == AccountantType.RDP: + return _compute_dp_sgd_group_privacy_using_rdp( num_epochs, noise_multiplier, - math.exp(example_log_delta), + user_delta, + max_examples_per_user, used_microbatching, poisson_subsampling_probability, ) - - def user_log_delta_gap(example_log_delta): - example_eps = get_example_eps(example_log_delta) - - # Estimate user_eps, user_log_delta using Vadhan Lemma 2.2, using a tighter - # bound seen in the penultimate line of the proof, given as - # user_delta = (example_delta * (exp(k * example_eps) - 1) - # / (exp(example_eps) - 1)) - user_eps = max_examples_per_user * example_eps - user_log_delta = ( - example_log_delta + _logexpm1(user_eps) - _logexpm1(example_eps) + elif accountant_type == AccountantType.PLD: + return _compute_dp_sgd_group_privacy_using_pld( + num_epochs, + noise_multiplier, + user_delta, + max_examples_per_user, + poisson_subsampling_probability, + used_microbatching, ) - return user_log_delta - target_user_log_delta - - # We need bounds on the example-level delta. The supplied user-level delta - # is an upper bound. Search exponentially toward zero for lower bound. - example_log_delta_max = target_user_log_delta - example_log_delta_min = example_log_delta_max - math.log(10) - user_log_delta_gap_min = user_log_delta_gap(example_log_delta_min) - while user_log_delta_gap_min > 0: - # Assuming that _compute_dp_sgd_example_privacy is decreasing in - # example_delta, it is not difficult to show that if user_delta_min - # corresponding to example_delta_min is too large, then we must reduce - # example_delta by at least a factor of (user_delta / user_delta_min). - # In other words, if example_log_delta_min is an upper bound, then so is - # example_log_delta_min - user_log_delta_gap_min. - example_log_delta_max = example_log_delta_min - user_log_delta_gap_min - example_log_delta_min = example_log_delta_max - math.log(10) - user_log_delta_gap_min = user_log_delta_gap(example_log_delta_min) - if not math.isfinite(user_log_delta_gap_min): - # User-level (epsilon, delta) DP is not achievable. This can happen - # because as example_delta decreases, example_eps increases. So it is - # possible for user_delta (which increases in both example_delta and - # example_eps) to diverge to infinity as example_delta goes to zero. - logging.warning( - ( - 'No upper bound on user-level DP epsilon can be computed with %s ' - 'examples per user.' - ), - max_examples_per_user, - ) - return math.inf - - # By the same logic, we can improve on the lower bound we just found, before - # even starting the line search. We actually could do a custom line search - # that makes use of this at each step, but brentq should be fast enough. - example_log_delta_min -= user_log_delta_gap_min - - example_log_delta, result = optimize.brentq( - user_log_delta_gap, - example_log_delta_min, - example_log_delta_max, - full_output=True, - ) - - if not result.converged: - raise UserLevelDPComputationError( - 'Optimization failed trying to compute user-level DP epsilon.' - ) - - # Vadhan (2017) "The complexity of differential privacy" Lemma 2.2. - return max_examples_per_user * get_example_eps(example_log_delta) + else: + raise ValueError(f'Unsupported accountant type: {accountant_type}') def _compute_dp_sgd_example_privacy( @@ -258,6 +197,177 @@ def _compute_dp_sgd_example_privacy( ) +def _compute_dp_sgd_group_privacy_using_rdp( + num_epochs: float, + noise_multiplier: float, + user_delta: float, + max_examples_per_user: int, + used_microbatching: bool = True, + poisson_subsampling_probability: Optional[float] = None, +): + """Computes add-or-remove-one-user DP epsilon using group privacy via RDP. + + This privacy guarantee uses add-or-remove-one-user adjacency, and protects + release of all model checkpoints in addition to the final model. + + Uses Vadhan (2017) "The complexity of differential privacy" Lemma 2.2. + + # TODO(b/271330804): Consider using RDP to compute group privacy. + + We use a line search to identify an example-level delta which, when the lemma + is applied, yields the requested user-level delta, then use it to compute the + user-level epsilon. + + Args: + num_epochs: The number of passes over the data. May be fractional. + noise_multiplier: The ratio of the noise stddev to the l2 sensitivity. + user_delta: The target user-level delta. + max_examples_per_user: Upper bound on the number of examples per user. + used_microbatching: If true, increases sensitivity by a factor of two. + poisson_subsampling_probability: If not None, gives the probability that + each record is chosen in a batch. If None, assumes no subsampling. + + Returns: + The add-or-remove-one-user DP epsilon value using group privacy. + + Raises: + UserLevelDPComputationError: If line search for example-level delta fails. + """ + # The computation below to estimate user_eps works as follows. + # We have _compute_dp_sgd_example_privacy which maps + # F(example_delta) -> example_eps + # Vadhan (2017) "The complexity of differential privacy" Lemma 2.2 gives us + # G(example_eps, example_delta) -> user_delta + # H(example_eps) -> user_eps. + # We first identify an example_delta such that + # G(F(example_delta), example_delta) = user_delta + # Specifically, we use a line search in log space to solve for + # log(G(F(example_delta), example_delta)) - log(user_delta) = 0 + # Then we can return user_eps = H(F(example_delta)). + + target_user_log_delta = math.log(user_delta) + + # Cache example privacy values, which can be expensive. + @functools.cache + def get_example_eps(example_log_delta): + return _compute_dp_sgd_example_privacy( + num_epochs, + noise_multiplier, + math.exp(example_log_delta), + used_microbatching, + poisson_subsampling_probability, + ) + + def user_log_delta_gap(example_log_delta): + example_eps = get_example_eps(example_log_delta) + + # Estimate user_eps, user_log_delta using Vadhan Lemma 2.2, using a + # tighter bound seen in the penultimate line of the proof, given as + # user_delta = (example_delta * (exp(k * example_eps) - 1) + # / (exp(example_eps) - 1)) + user_eps = max_examples_per_user * example_eps + user_log_delta = ( + example_log_delta + _logexpm1(user_eps) - _logexpm1(example_eps) + ) + return user_log_delta - target_user_log_delta + + # We need bounds on the example-level delta. The supplied user-level delta + # is an upper bound. Search exponentially toward zero for lower bound. + example_log_delta_max = target_user_log_delta + example_log_delta_min = example_log_delta_max - math.log(10) + user_log_delta_gap_min = user_log_delta_gap(example_log_delta_min) + while user_log_delta_gap_min > 0: + # Assuming that _compute_dp_sgd_example_privacy is decreasing in + # example_delta, it is not difficult to show that if user_delta_min + # corresponding to example_delta_min is too large, then we must reduce + # example_delta by at least a factor of (user_delta / user_delta_min). + # In other words, if example_log_delta_min is an upper bound, then so is + # example_log_delta_min - user_log_delta_gap_min. + example_log_delta_max = example_log_delta_min - user_log_delta_gap_min + example_log_delta_min = example_log_delta_max - math.log(10) + user_log_delta_gap_min = user_log_delta_gap(example_log_delta_min) + if not math.isfinite(user_log_delta_gap_min): + # User-level (epsilon, delta) DP is not achievable. This can happen + # because as example_delta decreases, example_eps increases. So it is + # possible for user_delta (which increases in both example_delta and + # example_eps) to diverge to infinity as example_delta goes to zero. + logging.warning( + ( + 'No upper bound on user-level DP epsilon can be computed with' + ' %s examples per user.' + ), + max_examples_per_user, + ) + return math.inf + + # By the same logic, we can improve on the lower bound we just found, before + # even starting the line search. We actually could do a custom line search + # that makes use of this at each step, but brentq should be fast enough. + example_log_delta_min -= user_log_delta_gap_min + + example_log_delta, result = optimize.brentq( + user_log_delta_gap, + example_log_delta_min, + example_log_delta_max, + full_output=True, + ) + + if not result.converged: + raise UserLevelDPComputationError( + 'Optimization failed trying to compute user-level DP epsilon.' + ) + + # Vadhan (2017) "The complexity of differential privacy" Lemma 2.2. + return max_examples_per_user * get_example_eps(example_log_delta) + + +def _compute_dp_sgd_group_privacy_using_pld( + num_epochs: float, + noise_multiplier: float, + user_delta: float, + max_examples_per_user: int, + poisson_subsampling_probability: float, + used_microbatching: bool = True, +): + """Computes add-or-remove-one-user DP epsilon using group privacy via PLDs. + + This privacy guarantee uses add-or-remove-one-user adjacency, and protects + release of all model checkpoints in addition to the final model. + + Uses Ganesh (2024) "Tight Group-Level DP Guarantees for DP-SGD with Sampling + via Mixture of Gaussians Mechanisms" (https://arxiv.org/abs/2401.10294) + Theorem 3.1. + + Args: + num_epochs: The number of passes over the data. May be fractional. + noise_multiplier: The ratio of the noise stddev to the l2 sensitivity. + user_delta: The target user-level delta. + max_examples_per_user: Upper bound on the number of examples per user. + poisson_subsampling_probability: Gives the probability that each record is + chosen in a batch. + used_microbatching: If true, increases sensitivity by a factor of two. + + Returns: + The add-or-remove-one-user DP epsilon value using group privacy. + """ + # With microbatching, a (loose) pessimistic assumption is that when a user's + # examples are sampled, they appear in different microbatches. This reduces + # to the non-microbatching analysis, but with the sensitivity doubled. + if used_microbatching: + noise_multiplier /= 2 + sensitivities = range(max_examples_per_user + 1) + probs = stats.binom.pmf( + sensitivities, max_examples_per_user, poisson_subsampling_probability + ) + single_round_event = dp_accounting.dp_event.MixtureOfGaussiansDpEvent( + noise_multiplier, sensitivities, probs + ) + accountant = dp_accounting.pld.PLDAccountant() + count = int(math.ceil(num_epochs / poisson_subsampling_probability)) + accountant.compose(single_round_event, count) + return accountant.get_epsilon(user_delta) + + def compute_dp_sgd_privacy_statement( number_of_examples: int, batch_size: int, @@ -363,16 +473,6 @@ examples per user.""", width=80, ) ) - elif accountant_type == AccountantType.PLD: - # TODO(b/271341062): Add User level DP support for PLD. - paragraphs.append( - textwrap.fill( - """\ -User-level DP epsilon computation is not supported for PLD accounting at this \ -time. Use RDP accounting to obtain user-level DP guarantees.""", - width=80, - ) - ) else: # Case: max_examples_per_user is not None and accountant_type is RDP user_eps_no_subsampling = _compute_dp_sgd_user_privacy( num_epochs, diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_test.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_test.py index 571cc3b..2499272 100644 --- a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_test.py +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_test.py @@ -230,7 +230,7 @@ RDP accounting: User-level DP with add-or-remove-one adjacency at delta = 1e-06 computed using RDP accounting and group privacy: - Epsilon with each example occurring once per epoch: 85.940 + Epsilon with each example occurring once per epoch: 56.224 Epsilon assuming Poisson sampling (*): 6.425 (*) Poisson sampling is not usually done in training pipelines, but assuming @@ -261,7 +261,7 @@ RDP accounting: User-level DP with add-or-remove-one adjacency at delta = 1e-06 computed using RDP accounting and group privacy: - Epsilon with each example occurring once per epoch: inf (**) + Epsilon with each example occurring once per epoch: 366.374 Epsilon assuming Poisson sampling (*): inf (**) (*) Poisson sampling is not usually done in training pipelines, but assuming @@ -307,12 +307,12 @@ data order. def test_dp_sgd_privacy_statement_user_dp_with_pld(self): statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement( **DP_SGD_STATEMENT_KWARGS, - max_examples_per_user=3, + max_examples_per_user=2, accountant_type=_PLD, ) expected_statement = """\ DP-SGD performed over 10000 examples with 64 examples per iteration, noise -multiplier 2.0 for 5.0 epochs with microbatching, and at most 3 examples per +multiplier 2.0 for 5.0 epochs with microbatching, and at most 2 examples per user. This privacy guarantee protects the release of all model checkpoints in addition @@ -323,8 +323,10 @@ PLD accounting: Epsilon with each example occurring once per epoch: 12.595 Epsilon assuming Poisson sampling (*): 1.199 -User-level DP epsilon computation is not supported for PLD accounting at this -time. Use RDP accounting to obtain user-level DP guarantees. +User-level DP with add-or-remove-one adjacency at delta = 1e-06 computed using +PLD accounting and group privacy: + Epsilon with each example occurring once per epoch: 30.579 + Epsilon assuming Poisson sampling (*): 2.532 (*) Poisson sampling is not usually done in training pipelines, but assuming that the data was randomly shuffled, it is believed that the actual epsilon