Use better group privacy bound in computing user level privacy [TF Privacy]
PiperOrigin-RevId: 526852999
This commit is contained in:
parent
60cb0dd2fb
commit
33bbc87ff2
2 changed files with 65 additions and 32 deletions
|
@ -28,6 +28,11 @@ class UserLevelDPComputationError(Exception):
|
||||||
"""Error raised if user-level epsilon computation fails."""
|
"""Error raised if user-level epsilon computation fails."""
|
||||||
|
|
||||||
|
|
||||||
|
def _logexpm1(x: float) -> float:
|
||||||
|
"""Returns log(exp(x) - 1)."""
|
||||||
|
return x + math.log(-math.expm1(-x))
|
||||||
|
|
||||||
|
|
||||||
def _compute_dp_sgd_user_privacy(
|
def _compute_dp_sgd_user_privacy(
|
||||||
num_epochs: float,
|
num_epochs: float,
|
||||||
noise_multiplier: float,
|
noise_multiplier: float,
|
||||||
|
@ -100,21 +105,32 @@ def _compute_dp_sgd_user_privacy(
|
||||||
# log(G(F(example_delta), example_delta)) - log(user_delta) = 0
|
# log(G(F(example_delta), example_delta)) - log(user_delta) = 0
|
||||||
# Then we can return user_eps = H(F(example_delta)).
|
# Then we can return user_eps = H(F(example_delta)).
|
||||||
|
|
||||||
log_k = math.log(max_examples_per_user)
|
|
||||||
target_user_log_delta = math.log(user_delta)
|
target_user_log_delta = math.log(user_delta)
|
||||||
|
|
||||||
def user_log_delta_gap(example_log_delta):
|
# We store all example_eps computed for any example_delta in the following
|
||||||
example_eps = _compute_dp_sgd_example_privacy(
|
# method. This is done so that we don't have to recompute values for the same
|
||||||
num_epochs,
|
# delta.
|
||||||
noise_multiplier,
|
epsilon_cache = dict()
|
||||||
math.exp(example_log_delta),
|
|
||||||
used_microbatching,
|
|
||||||
poisson_subsampling_probability,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Estimate user_eps, user_log_delta using Vadhan Lemma 2.2.
|
def user_log_delta_gap(example_log_delta):
|
||||||
|
if example_log_delta not in epsilon_cache:
|
||||||
|
epsilon_cache[example_log_delta] = _compute_dp_sgd_example_privacy(
|
||||||
|
num_epochs,
|
||||||
|
noise_multiplier,
|
||||||
|
math.exp(example_log_delta),
|
||||||
|
used_microbatching,
|
||||||
|
poisson_subsampling_probability,
|
||||||
|
)
|
||||||
|
example_eps = epsilon_cache[example_log_delta]
|
||||||
|
|
||||||
|
# Estimate user_eps, user_log_delta using Vadhan Lemma 2.2, using a tighter
|
||||||
|
# bound seen in the penultimate line of the proof, given as
|
||||||
|
# user_delta = (example_delta * (exp(k * example_eps) - 1)
|
||||||
|
# / (exp(example_eps) - 1))
|
||||||
user_eps = max_examples_per_user * example_eps
|
user_eps = max_examples_per_user * example_eps
|
||||||
user_log_delta = log_k + user_eps + example_log_delta
|
user_log_delta = (
|
||||||
|
example_log_delta + _logexpm1(user_eps) - _logexpm1(example_eps)
|
||||||
|
)
|
||||||
return user_log_delta - target_user_log_delta
|
return user_log_delta - target_user_log_delta
|
||||||
|
|
||||||
# We need bounds on the example-level delta. The supplied user-level delta
|
# We need bounds on the example-level delta. The supplied user-level delta
|
||||||
|
@ -164,9 +180,16 @@ def _compute_dp_sgd_user_privacy(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Vadhan (2017) "The complexity of differential privacy" Lemma 2.2.
|
# Vadhan (2017) "The complexity of differential privacy" Lemma 2.2.
|
||||||
# user_delta = k * exp(k * example_eps) * example_delta
|
if example_log_delta not in epsilon_cache:
|
||||||
# Given example_delta, we can solve for (k * example_eps) = user_eps.
|
epsilon_cache[example_log_delta] = _compute_dp_sgd_example_privacy(
|
||||||
return max(0, target_user_log_delta - log_k - example_log_delta)
|
num_epochs,
|
||||||
|
noise_multiplier,
|
||||||
|
math.exp(example_log_delta),
|
||||||
|
used_microbatching,
|
||||||
|
poisson_subsampling_probability,
|
||||||
|
)
|
||||||
|
example_eps = epsilon_cache[example_log_delta]
|
||||||
|
return max_examples_per_user * example_eps
|
||||||
|
|
||||||
|
|
||||||
def _compute_dp_sgd_example_privacy(
|
def _compute_dp_sgd_example_privacy(
|
||||||
|
@ -354,7 +377,7 @@ using RDP accounting and group privacy:""",
|
||||||
paragraphs.append(
|
paragraphs.append(
|
||||||
textwrap.fill(
|
textwrap.fill(
|
||||||
"""\
|
"""\
|
||||||
No user-level privacy guarantee is possible witout a bound on the number of \
|
No user-level privacy guarantee is possible without a bound on the number of \
|
||||||
examples per user.""",
|
examples per user.""",
|
||||||
width=80,
|
width=80,
|
||||||
)
|
)
|
||||||
|
|
|
@ -134,25 +134,35 @@ class ComputeDpSgdPrivacyTest(parameterized.TestCase):
|
||||||
self.assertEqual(user_eps, example_eps)
|
self.assertEqual(user_eps, example_eps)
|
||||||
|
|
||||||
@parameterized.parameters((0.9, 2), (1.1, 3), (2.3, 13))
|
@parameterized.parameters((0.9, 2), (1.1, 3), (2.3, 13))
|
||||||
def test_user_privacy_epsilon_delta_consistency(self, z, k):
|
def test_user_privacy_epsilon_delta_consistency(
|
||||||
|
self, noise_multiplier, max_examples_per_user
|
||||||
|
):
|
||||||
"""Tests example/user epsilons consistent with Vadhan (2017) Lemma 2.2."""
|
"""Tests example/user epsilons consistent with Vadhan (2017) Lemma 2.2."""
|
||||||
num_epochs = 5
|
num_epochs = 5
|
||||||
user_delta = 1e-6
|
example_delta = 1e-8
|
||||||
q = 2e-4
|
q = 2e-4
|
||||||
user_eps = _user_privacy(
|
|
||||||
num_epochs,
|
|
||||||
noise_multiplier=z,
|
|
||||||
user_delta=user_delta,
|
|
||||||
max_examples_per_user=k,
|
|
||||||
poisson_subsampling_probability=q,
|
|
||||||
)
|
|
||||||
example_eps = _example_privacy(
|
example_eps = _example_privacy(
|
||||||
num_epochs,
|
num_epochs,
|
||||||
noise_multiplier=z,
|
noise_multiplier=noise_multiplier,
|
||||||
example_delta=user_delta / (k * math.exp(user_eps)),
|
example_delta=example_delta,
|
||||||
poisson_subsampling_probability=q,
|
poisson_subsampling_probability=q,
|
||||||
)
|
)
|
||||||
self.assertAlmostEqual(user_eps, example_eps * k)
|
|
||||||
|
user_delta = math.exp(
|
||||||
|
math.log(example_delta)
|
||||||
|
+ compute_dp_sgd_privacy_lib._logexpm1(
|
||||||
|
max_examples_per_user * example_eps
|
||||||
|
)
|
||||||
|
- compute_dp_sgd_privacy_lib._logexpm1(example_eps)
|
||||||
|
)
|
||||||
|
user_eps = _user_privacy(
|
||||||
|
num_epochs,
|
||||||
|
noise_multiplier=noise_multiplier,
|
||||||
|
user_delta=user_delta,
|
||||||
|
max_examples_per_user=max_examples_per_user,
|
||||||
|
poisson_subsampling_probability=q,
|
||||||
|
)
|
||||||
|
self.assertAlmostEqual(user_eps, example_eps * max_examples_per_user)
|
||||||
|
|
||||||
def test_dp_sgd_privacy_statement_no_user_dp(self):
|
def test_dp_sgd_privacy_statement_no_user_dp(self):
|
||||||
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
||||||
|
@ -171,7 +181,7 @@ RDP accounting:
|
||||||
Epsilon with each example occurring once per epoch: 13.376
|
Epsilon with each example occurring once per epoch: 13.376
|
||||||
Epsilon assuming Poisson sampling (*): 1.616
|
Epsilon assuming Poisson sampling (*): 1.616
|
||||||
|
|
||||||
No user-level privacy guarantee is possible witout a bound on the number of
|
No user-level privacy guarantee is possible without a bound on the number of
|
||||||
examples per user.
|
examples per user.
|
||||||
|
|
||||||
(*) Poisson sampling is not usually done in training pipelines, but assuming
|
(*) Poisson sampling is not usually done in training pipelines, but assuming
|
||||||
|
@ -201,8 +211,8 @@ RDP accounting:
|
||||||
|
|
||||||
User-level DP with add-or-remove-one adjacency at delta = 1e-06 computed using
|
User-level DP with add-or-remove-one adjacency at delta = 1e-06 computed using
|
||||||
RDP accounting and group privacy:
|
RDP accounting and group privacy:
|
||||||
Epsilon with each example occurring once per epoch: 113.899
|
Epsilon with each example occurring once per epoch: 85.940
|
||||||
Epsilon assuming Poisson sampling (*): 8.129
|
Epsilon assuming Poisson sampling (*): 6.425
|
||||||
|
|
||||||
(*) Poisson sampling is not usually done in training pipelines, but assuming
|
(*) Poisson sampling is not usually done in training pipelines, but assuming
|
||||||
that the data was randomly shuffled, it is believed the actual epsilon should be
|
that the data was randomly shuffled, it is believed the actual epsilon should be
|
||||||
|
@ -214,11 +224,11 @@ order.
|
||||||
def test_dp_sgd_privacy_statement_user_dp_infinite(self):
|
def test_dp_sgd_privacy_statement_user_dp_infinite(self):
|
||||||
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
||||||
**DP_SGD_STATEMENT_KWARGS,
|
**DP_SGD_STATEMENT_KWARGS,
|
||||||
max_examples_per_user=9,
|
max_examples_per_user=10,
|
||||||
)
|
)
|
||||||
expected_statement = """\
|
expected_statement = """\
|
||||||
DP-SGD performed over 10000 examples with 64 examples per iteration, noise
|
DP-SGD performed over 10000 examples with 64 examples per iteration, noise
|
||||||
multiplier 2.0 for 5.0 epochs with microbatching, and at most 9 examples per
|
multiplier 2.0 for 5.0 epochs with microbatching, and at most 10 examples per
|
||||||
user.
|
user.
|
||||||
|
|
||||||
This privacy guarantee protects the release of all model checkpoints in addition
|
This privacy guarantee protects the release of all model checkpoints in addition
|
||||||
|
|
Loading…
Reference in a new issue