Improves user/group-level accounting in compute_dp_sgd_privacy_lib

PiperOrigin-RevId: 633346332
This commit is contained in:
Arun Ganesh 2024-05-13 15:13:02 -07:00 committed by A. Unique TensorFlower
parent 3e42ce318f
commit 99c25e3be5
3 changed files with 213 additions and 110 deletions

View file

@ -24,6 +24,7 @@ py_binary(
py_test( py_test(
name = "compute_dp_sgd_privacy_test", name = "compute_dp_sgd_privacy_test",
size = "small", size = "small",
timeout = "moderate",
srcs = ["compute_dp_sgd_privacy_test.py"], srcs = ["compute_dp_sgd_privacy_test.py"],
python_version = "PY3", python_version = "PY3",
srcs_version = "PY3", srcs_version = "PY3",

View file

@ -24,6 +24,7 @@ from absl import app
from absl import logging from absl import logging
import dp_accounting import dp_accounting
from scipy import optimize from scipy import optimize
from scipy import stats
class UserLevelDPComputationError(Exception): class UserLevelDPComputationError(Exception):
@ -60,16 +61,10 @@ def _compute_dp_sgd_user_privacy(
) -> float: ) -> float:
"""Computes add-or-remove-one-user DP epsilon using group privacy. """Computes add-or-remove-one-user DP epsilon using group privacy.
This privacy guarantee uses add-or-remove-one-user adjacency, and protects Without sampling, the privacy accounting reduces to example-level DP
release of all model checkpoints in addition to the final model. accounting. Otherwise, see the helper methods
_compute_dp_sgd_group_privacy_using_rdp and
Uses Vadhan (2017) "The complexity of differential privacy" Lemma 2.2. _compute_dp_sgd_group_privacy_using_pld for details of privacy accounting.
# TODO(b/271330804): Consider using RDP to compute group privacy.
We use a line search to identify an example-level delta which, when the lemma
is applied, yields the requested user-level delta, then use it to compute the
user-level epsilon.
Args: Args:
num_epochs: The number of passes over the data. May be fractional. num_epochs: The number of passes over the data. May be fractional.
@ -79,10 +74,7 @@ def _compute_dp_sgd_user_privacy(
used_microbatching: If true, increases sensitivity by a factor of two. used_microbatching: If true, increases sensitivity by a factor of two.
poisson_subsampling_probability: If not None, gives the probability that poisson_subsampling_probability: If not None, gives the probability that
each record is chosen in a batch. If None, assumes no subsampling. each record is chosen in a batch. If None, assumes no subsampling.
accountant_type: The privacy accountant for computing epsilon. While this accountant_type: The privacy accountant for computing epsilon.
method supports both PLD and RDP accountants, the behavior for PLD
accountant can sometimes be overly pessimistic. This remains to be
investigated and fixed (b/271341062).
Returns: Returns:
The add-or-remove-one-user DP epsilon value using group privacy. The add-or-remove-one-user DP epsilon value using group privacy.
@ -114,93 +106,40 @@ def _compute_dp_sgd_user_privacy(
poisson_subsampling_probability, poisson_subsampling_probability,
accountant_type, accountant_type,
) )
elif poisson_subsampling_probability is None:
# The computation below to estimate user_eps works as follows. # Without subsampling, the worst-case is when all max_examples_per_user
# We have _compute_dp_sgd_example_privacy which maps # examples participate in the same round (and in the microbatching case,
# F(example_delta) -> example_eps # they participate in different microbatches in this round), which
# Vadhan (2017) "The complexity of differential privacy" Lemma 2.2 gives us # effectively increases the sensitivity, i.e. decreases the
# G(example_eps, example_delta) -> user_delta # noise_multiplier, by max_examples_per_user.
# H(example_eps) -> user_eps.
# We first identify an example_delta such that
# G(F(example_delta), example_delta) = user_delta
# Specifically, we use a line search in log space to solve for
# log(G(F(example_delta), example_delta)) - log(user_delta) = 0
# Then we can return user_eps = H(F(example_delta)).
target_user_log_delta = math.log(user_delta)
# Cache example privacy values, which can be expensive.
@functools.cache
def get_example_eps(example_log_delta):
return _compute_dp_sgd_example_privacy( return _compute_dp_sgd_example_privacy(
num_epochs,
noise_multiplier / max_examples_per_user,
user_delta,
used_microbatching,
poisson_subsampling_probability,
accountant_type,
)
elif accountant_type == AccountantType.RDP:
return _compute_dp_sgd_group_privacy_using_rdp(
num_epochs, num_epochs,
noise_multiplier, noise_multiplier,
math.exp(example_log_delta), user_delta,
max_examples_per_user,
used_microbatching, used_microbatching,
poisson_subsampling_probability, poisson_subsampling_probability,
) )
elif accountant_type == AccountantType.PLD:
def user_log_delta_gap(example_log_delta): return _compute_dp_sgd_group_privacy_using_pld(
example_eps = get_example_eps(example_log_delta) num_epochs,
noise_multiplier,
# Estimate user_eps, user_log_delta using Vadhan Lemma 2.2, using a tighter user_delta,
# bound seen in the penultimate line of the proof, given as
# user_delta = (example_delta * (exp(k * example_eps) - 1)
# / (exp(example_eps) - 1))
user_eps = max_examples_per_user * example_eps
user_log_delta = (
example_log_delta + _logexpm1(user_eps) - _logexpm1(example_eps)
)
return user_log_delta - target_user_log_delta
# We need bounds on the example-level delta. The supplied user-level delta
# is an upper bound. Search exponentially toward zero for lower bound.
example_log_delta_max = target_user_log_delta
example_log_delta_min = example_log_delta_max - math.log(10)
user_log_delta_gap_min = user_log_delta_gap(example_log_delta_min)
while user_log_delta_gap_min > 0:
# Assuming that _compute_dp_sgd_example_privacy is decreasing in
# example_delta, it is not difficult to show that if user_delta_min
# corresponding to example_delta_min is too large, then we must reduce
# example_delta by at least a factor of (user_delta / user_delta_min).
# In other words, if example_log_delta_min is an upper bound, then so is
# example_log_delta_min - user_log_delta_gap_min.
example_log_delta_max = example_log_delta_min - user_log_delta_gap_min
example_log_delta_min = example_log_delta_max - math.log(10)
user_log_delta_gap_min = user_log_delta_gap(example_log_delta_min)
if not math.isfinite(user_log_delta_gap_min):
# User-level (epsilon, delta) DP is not achievable. This can happen
# because as example_delta decreases, example_eps increases. So it is
# possible for user_delta (which increases in both example_delta and
# example_eps) to diverge to infinity as example_delta goes to zero.
logging.warning(
(
'No upper bound on user-level DP epsilon can be computed with %s '
'examples per user.'
),
max_examples_per_user, max_examples_per_user,
poisson_subsampling_probability,
used_microbatching,
) )
return math.inf else:
raise ValueError(f'Unsupported accountant type: {accountant_type}')
# By the same logic, we can improve on the lower bound we just found, before
# even starting the line search. We actually could do a custom line search
# that makes use of this at each step, but brentq should be fast enough.
example_log_delta_min -= user_log_delta_gap_min
example_log_delta, result = optimize.brentq(
user_log_delta_gap,
example_log_delta_min,
example_log_delta_max,
full_output=True,
)
if not result.converged:
raise UserLevelDPComputationError(
'Optimization failed trying to compute user-level DP epsilon.'
)
# Vadhan (2017) "The complexity of differential privacy" Lemma 2.2.
return max_examples_per_user * get_example_eps(example_log_delta)
def _compute_dp_sgd_example_privacy( def _compute_dp_sgd_example_privacy(
@ -258,6 +197,177 @@ def _compute_dp_sgd_example_privacy(
) )
def _compute_dp_sgd_group_privacy_using_rdp(
num_epochs: float,
noise_multiplier: float,
user_delta: float,
max_examples_per_user: int,
used_microbatching: bool = True,
poisson_subsampling_probability: Optional[float] = None,
):
"""Computes add-or-remove-one-user DP epsilon using group privacy via RDP.
This privacy guarantee uses add-or-remove-one-user adjacency, and protects
release of all model checkpoints in addition to the final model.
Uses Vadhan (2017) "The complexity of differential privacy" Lemma 2.2.
# TODO(b/271330804): Consider using RDP to compute group privacy.
We use a line search to identify an example-level delta which, when the lemma
is applied, yields the requested user-level delta, then use it to compute the
user-level epsilon.
Args:
num_epochs: The number of passes over the data. May be fractional.
noise_multiplier: The ratio of the noise stddev to the l2 sensitivity.
user_delta: The target user-level delta.
max_examples_per_user: Upper bound on the number of examples per user.
used_microbatching: If true, increases sensitivity by a factor of two.
poisson_subsampling_probability: If not None, gives the probability that
each record is chosen in a batch. If None, assumes no subsampling.
Returns:
The add-or-remove-one-user DP epsilon value using group privacy.
Raises:
UserLevelDPComputationError: If line search for example-level delta fails.
"""
# The computation below to estimate user_eps works as follows.
# We have _compute_dp_sgd_example_privacy which maps
# F(example_delta) -> example_eps
# Vadhan (2017) "The complexity of differential privacy" Lemma 2.2 gives us
# G(example_eps, example_delta) -> user_delta
# H(example_eps) -> user_eps.
# We first identify an example_delta such that
# G(F(example_delta), example_delta) = user_delta
# Specifically, we use a line search in log space to solve for
# log(G(F(example_delta), example_delta)) - log(user_delta) = 0
# Then we can return user_eps = H(F(example_delta)).
target_user_log_delta = math.log(user_delta)
# Cache example privacy values, which can be expensive.
@functools.cache
def get_example_eps(example_log_delta):
return _compute_dp_sgd_example_privacy(
num_epochs,
noise_multiplier,
math.exp(example_log_delta),
used_microbatching,
poisson_subsampling_probability,
)
def user_log_delta_gap(example_log_delta):
example_eps = get_example_eps(example_log_delta)
# Estimate user_eps, user_log_delta using Vadhan Lemma 2.2, using a
# tighter bound seen in the penultimate line of the proof, given as
# user_delta = (example_delta * (exp(k * example_eps) - 1)
# / (exp(example_eps) - 1))
user_eps = max_examples_per_user * example_eps
user_log_delta = (
example_log_delta + _logexpm1(user_eps) - _logexpm1(example_eps)
)
return user_log_delta - target_user_log_delta
# We need bounds on the example-level delta. The supplied user-level delta
# is an upper bound. Search exponentially toward zero for lower bound.
example_log_delta_max = target_user_log_delta
example_log_delta_min = example_log_delta_max - math.log(10)
user_log_delta_gap_min = user_log_delta_gap(example_log_delta_min)
while user_log_delta_gap_min > 0:
# Assuming that _compute_dp_sgd_example_privacy is decreasing in
# example_delta, it is not difficult to show that if user_delta_min
# corresponding to example_delta_min is too large, then we must reduce
# example_delta by at least a factor of (user_delta / user_delta_min).
# In other words, if example_log_delta_min is an upper bound, then so is
# example_log_delta_min - user_log_delta_gap_min.
example_log_delta_max = example_log_delta_min - user_log_delta_gap_min
example_log_delta_min = example_log_delta_max - math.log(10)
user_log_delta_gap_min = user_log_delta_gap(example_log_delta_min)
if not math.isfinite(user_log_delta_gap_min):
# User-level (epsilon, delta) DP is not achievable. This can happen
# because as example_delta decreases, example_eps increases. So it is
# possible for user_delta (which increases in both example_delta and
# example_eps) to diverge to infinity as example_delta goes to zero.
logging.warning(
(
'No upper bound on user-level DP epsilon can be computed with'
' %s examples per user.'
),
max_examples_per_user,
)
return math.inf
# By the same logic, we can improve on the lower bound we just found, before
# even starting the line search. We actually could do a custom line search
# that makes use of this at each step, but brentq should be fast enough.
example_log_delta_min -= user_log_delta_gap_min
example_log_delta, result = optimize.brentq(
user_log_delta_gap,
example_log_delta_min,
example_log_delta_max,
full_output=True,
)
if not result.converged:
raise UserLevelDPComputationError(
'Optimization failed trying to compute user-level DP epsilon.'
)
# Vadhan (2017) "The complexity of differential privacy" Lemma 2.2.
return max_examples_per_user * get_example_eps(example_log_delta)
def _compute_dp_sgd_group_privacy_using_pld(
num_epochs: float,
noise_multiplier: float,
user_delta: float,
max_examples_per_user: int,
poisson_subsampling_probability: float,
used_microbatching: bool = True,
):
"""Computes add-or-remove-one-user DP epsilon using group privacy via PLDs.
This privacy guarantee uses add-or-remove-one-user adjacency, and protects
release of all model checkpoints in addition to the final model.
Uses Ganesh (2024) "Tight Group-Level DP Guarantees for DP-SGD with Sampling
via Mixture of Gaussians Mechanisms" (https://arxiv.org/abs/2401.10294)
Theorem 3.1.
Args:
num_epochs: The number of passes over the data. May be fractional.
noise_multiplier: The ratio of the noise stddev to the l2 sensitivity.
user_delta: The target user-level delta.
max_examples_per_user: Upper bound on the number of examples per user.
poisson_subsampling_probability: Gives the probability that each record is
chosen in a batch.
used_microbatching: If true, increases sensitivity by a factor of two.
Returns:
The add-or-remove-one-user DP epsilon value using group privacy.
"""
# With microbatching, a (loose) pessimistic assumption is that when a user's
# examples are sampled, they appear in different microbatches. This reduces
# to the non-microbatching analysis, but with the sensitivity doubled.
if used_microbatching:
noise_multiplier /= 2
sensitivities = range(max_examples_per_user + 1)
probs = stats.binom.pmf(
sensitivities, max_examples_per_user, poisson_subsampling_probability
)
single_round_event = dp_accounting.dp_event.MixtureOfGaussiansDpEvent(
noise_multiplier, sensitivities, probs
)
accountant = dp_accounting.pld.PLDAccountant()
count = int(math.ceil(num_epochs / poisson_subsampling_probability))
accountant.compose(single_round_event, count)
return accountant.get_epsilon(user_delta)
def compute_dp_sgd_privacy_statement( def compute_dp_sgd_privacy_statement(
number_of_examples: int, number_of_examples: int,
batch_size: int, batch_size: int,
@ -363,16 +473,6 @@ examples per user.""",
width=80, width=80,
) )
) )
elif accountant_type == AccountantType.PLD:
# TODO(b/271341062): Add User level DP support for PLD.
paragraphs.append(
textwrap.fill(
"""\
User-level DP epsilon computation is not supported for PLD accounting at this \
time. Use RDP accounting to obtain user-level DP guarantees.""",
width=80,
)
)
else: # Case: max_examples_per_user is not None and accountant_type is RDP else: # Case: max_examples_per_user is not None and accountant_type is RDP
user_eps_no_subsampling = _compute_dp_sgd_user_privacy( user_eps_no_subsampling = _compute_dp_sgd_user_privacy(
num_epochs, num_epochs,

View file

@ -230,7 +230,7 @@ RDP accounting:
User-level DP with add-or-remove-one adjacency at delta = 1e-06 computed using User-level DP with add-or-remove-one adjacency at delta = 1e-06 computed using
RDP accounting and group privacy: RDP accounting and group privacy:
Epsilon with each example occurring once per epoch: 85.940 Epsilon with each example occurring once per epoch: 56.224
Epsilon assuming Poisson sampling (*): 6.425 Epsilon assuming Poisson sampling (*): 6.425
(*) Poisson sampling is not usually done in training pipelines, but assuming (*) Poisson sampling is not usually done in training pipelines, but assuming
@ -261,7 +261,7 @@ RDP accounting:
User-level DP with add-or-remove-one adjacency at delta = 1e-06 computed using User-level DP with add-or-remove-one adjacency at delta = 1e-06 computed using
RDP accounting and group privacy: RDP accounting and group privacy:
Epsilon with each example occurring once per epoch: inf (**) Epsilon with each example occurring once per epoch: 366.374
Epsilon assuming Poisson sampling (*): inf (**) Epsilon assuming Poisson sampling (*): inf (**)
(*) Poisson sampling is not usually done in training pipelines, but assuming (*) Poisson sampling is not usually done in training pipelines, but assuming
@ -307,12 +307,12 @@ data order.
def test_dp_sgd_privacy_statement_user_dp_with_pld(self): def test_dp_sgd_privacy_statement_user_dp_with_pld(self):
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement( statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
**DP_SGD_STATEMENT_KWARGS, **DP_SGD_STATEMENT_KWARGS,
max_examples_per_user=3, max_examples_per_user=2,
accountant_type=_PLD, accountant_type=_PLD,
) )
expected_statement = """\ expected_statement = """\
DP-SGD performed over 10000 examples with 64 examples per iteration, noise DP-SGD performed over 10000 examples with 64 examples per iteration, noise
multiplier 2.0 for 5.0 epochs with microbatching, and at most 3 examples per multiplier 2.0 for 5.0 epochs with microbatching, and at most 2 examples per
user. user.
This privacy guarantee protects the release of all model checkpoints in addition This privacy guarantee protects the release of all model checkpoints in addition
@ -323,8 +323,10 @@ PLD accounting:
Epsilon with each example occurring once per epoch: 12.595 Epsilon with each example occurring once per epoch: 12.595
Epsilon assuming Poisson sampling (*): 1.199 Epsilon assuming Poisson sampling (*): 1.199
User-level DP epsilon computation is not supported for PLD accounting at this User-level DP with add-or-remove-one adjacency at delta = 1e-06 computed using
time. Use RDP accounting to obtain user-level DP guarantees. PLD accounting and group privacy:
Epsilon with each example occurring once per epoch: 30.579
Epsilon assuming Poisson sampling (*): 2.532
(*) Poisson sampling is not usually done in training pipelines, but assuming (*) Poisson sampling is not usually done in training pipelines, but assuming
that the data was randomly shuffled, it is believed that the actual epsilon that the data was randomly shuffled, it is believed that the actual epsilon