Adds compute_dp_sgd_privacy_statement
for accurate privacy accounting report.
PiperOrigin-RevId: 518934979
This commit is contained in:
parent
52806ba952
commit
d5d60e2eac
2 changed files with 279 additions and 16 deletions
|
@ -15,6 +15,7 @@
|
||||||
"""Library for computing privacy values for DP-SGD."""
|
"""Library for computing privacy values for DP-SGD."""
|
||||||
|
|
||||||
import math
|
import math
|
||||||
|
import textwrap
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from absl import app
|
from absl import app
|
||||||
|
@ -224,6 +225,166 @@ def _compute_dp_sgd_example_privacy(
|
||||||
return accountant.get_epsilon(example_delta)
|
return accountant.get_epsilon(example_delta)
|
||||||
|
|
||||||
|
|
||||||
|
def compute_dp_sgd_privacy_statement(
|
||||||
|
number_of_examples: int,
|
||||||
|
batch_size: int,
|
||||||
|
num_epochs: float,
|
||||||
|
noise_multiplier: float,
|
||||||
|
delta: float,
|
||||||
|
used_microbatching: bool = True,
|
||||||
|
max_examples_per_user: Optional[int] = None,
|
||||||
|
) -> str:
|
||||||
|
"""Produces a privacy report summarizing the DP guarantee.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
number_of_examples: Total number of examples in the dataset. For DP-SGD, an
|
||||||
|
"example" corresponds to one row in a minibatch. E.g., for sequence models
|
||||||
|
this would be a sequence of maximum length.
|
||||||
|
batch_size: The number of examples in a batch. This should be the number of
|
||||||
|
examples in a batch, *regardless of whether/how they are grouped into
|
||||||
|
microbatches*.
|
||||||
|
num_epochs: The number of epochs of training. May be fractional.
|
||||||
|
noise_multiplier: The ratio of the Gaussian noise to the clip norm at each
|
||||||
|
round. It is assumed that the noise_multiplier is constant although the
|
||||||
|
clip norm may be variable if, for example, adaptive clipping is used.
|
||||||
|
delta: The target delta.
|
||||||
|
used_microbatching: Whether microbatching was used (with microbatch size
|
||||||
|
greater than one). Microbatching inflates sensitivity by a factor of two
|
||||||
|
in add-or-remove-one adjacency DP. (See "How to DP-fy ML: A Practical
|
||||||
|
Guide to Machine Learning with Differential Privacy",
|
||||||
|
https://arxiv.org/abs/2303.00654, Sec 5.6.)
|
||||||
|
max_examples_per_user: If the data set is constructed to cap the maximum
|
||||||
|
number of examples each user contributes, provide this argument to also
|
||||||
|
print a user-level DP guarantee.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A str precisely articulating the privacy guarantee.
|
||||||
|
"""
|
||||||
|
|
||||||
|
paragraph = f"""\
|
||||||
|
DP-SGD performed over {number_of_examples} examples with {batch_size} \
|
||||||
|
examples per iteration, noise multiplier {noise_multiplier} for {num_epochs} \
|
||||||
|
epochs {'with' if used_microbatching else 'without'} microbatching"""
|
||||||
|
|
||||||
|
if max_examples_per_user is None:
|
||||||
|
paragraph += ', and no bound on number of examples per user.'
|
||||||
|
else:
|
||||||
|
paragraph += f', and at most {max_examples_per_user} examples per user.'
|
||||||
|
|
||||||
|
paragraphs = [textwrap.fill(paragraph, width=80)]
|
||||||
|
|
||||||
|
paragraphs.append(
|
||||||
|
textwrap.fill(
|
||||||
|
"""\
|
||||||
|
This privacy guarantee protects the release of all model checkpoints in \
|
||||||
|
addition to the final model.""",
|
||||||
|
width=80,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
paragraph = textwrap.fill(
|
||||||
|
f"""\
|
||||||
|
Example-level DP with add-or-remove-one adjacency at delta = {delta} computed \
|
||||||
|
with RDP accounting:""",
|
||||||
|
width=80,
|
||||||
|
)
|
||||||
|
|
||||||
|
example_eps_no_subsampling = _compute_dp_sgd_example_privacy(
|
||||||
|
num_epochs, noise_multiplier, delta, used_microbatching
|
||||||
|
)
|
||||||
|
example_eps_subsampling = _compute_dp_sgd_example_privacy(
|
||||||
|
num_epochs,
|
||||||
|
noise_multiplier,
|
||||||
|
delta,
|
||||||
|
used_microbatching,
|
||||||
|
poisson_subsampling_probability=batch_size / number_of_examples,
|
||||||
|
)
|
||||||
|
|
||||||
|
paragraph += f"""
|
||||||
|
Epsilon with each example occurring once per epoch: \
|
||||||
|
{example_eps_no_subsampling:12.3f}
|
||||||
|
Epsilon assuming Poisson sampling (*): \
|
||||||
|
{example_eps_subsampling:12.3f}"""
|
||||||
|
|
||||||
|
paragraphs.append(paragraph)
|
||||||
|
|
||||||
|
inf_user_eps = False
|
||||||
|
if max_examples_per_user is not None:
|
||||||
|
user_eps_no_subsampling = _compute_dp_sgd_user_privacy(
|
||||||
|
num_epochs,
|
||||||
|
noise_multiplier,
|
||||||
|
delta,
|
||||||
|
max_examples_per_user,
|
||||||
|
used_microbatching,
|
||||||
|
)
|
||||||
|
user_eps_subsampling = _compute_dp_sgd_user_privacy(
|
||||||
|
num_epochs,
|
||||||
|
noise_multiplier,
|
||||||
|
delta,
|
||||||
|
max_examples_per_user,
|
||||||
|
used_microbatching,
|
||||||
|
poisson_subsampling_probability=batch_size / number_of_examples,
|
||||||
|
)
|
||||||
|
if math.isinf(user_eps_no_subsampling):
|
||||||
|
user_eps_no_subsampling_str = ' inf (**)'
|
||||||
|
inf_user_eps = True
|
||||||
|
else:
|
||||||
|
user_eps_no_subsampling_str = f'{user_eps_no_subsampling:12.3f}'
|
||||||
|
if math.isinf(user_eps_subsampling):
|
||||||
|
user_eps_subsampling_str = ' inf (**)'
|
||||||
|
inf_user_eps = True
|
||||||
|
else:
|
||||||
|
user_eps_subsampling_str = f'{user_eps_subsampling:12.3f}'
|
||||||
|
|
||||||
|
paragraph = textwrap.fill(
|
||||||
|
f"""\
|
||||||
|
User-level DP with add-or-remove-one adjacency at delta = {delta} computed \
|
||||||
|
using RDP accounting and group privacy:""",
|
||||||
|
width=80,
|
||||||
|
)
|
||||||
|
paragraph += f"""
|
||||||
|
Epsilon with each example occurring once per epoch: \
|
||||||
|
{user_eps_no_subsampling_str}
|
||||||
|
Epsilon assuming Poisson sampling (*): \
|
||||||
|
{user_eps_subsampling_str}"""
|
||||||
|
|
||||||
|
paragraphs.append(paragraph)
|
||||||
|
else:
|
||||||
|
paragraphs.append(
|
||||||
|
textwrap.fill(
|
||||||
|
"""\
|
||||||
|
No user-level privacy guarantee is possible witout a bound on the number of \
|
||||||
|
examples per user.""",
|
||||||
|
width=80,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
paragraphs.append(
|
||||||
|
textwrap.fill(
|
||||||
|
"""\
|
||||||
|
(*) Poisson sampling is not usually done in training pipelines, but assuming \
|
||||||
|
that the data was randomly shuffled, it is believed the actual epsilon should \
|
||||||
|
be closer to this value than the conservative assumption of an arbitrary data \
|
||||||
|
order.""",
|
||||||
|
width=80,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if inf_user_eps:
|
||||||
|
paragraphs.append(
|
||||||
|
textwrap.fill(
|
||||||
|
"""\
|
||||||
|
(**) A finite example-level epsilon implies a finite user-level epsilon at any \
|
||||||
|
`max_examples_per_user`, but because conversion from example-level to user-\
|
||||||
|
level DP is not exact, it is possible for the upper bound on the user-level \
|
||||||
|
epsilon to still be infinite.""",
|
||||||
|
width=80,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return '\n\n'.join(paragraphs) + '\n'
|
||||||
|
|
||||||
|
|
||||||
def compute_dp_sgd_privacy(n, batch_size, noise_multiplier, epochs, delta):
|
def compute_dp_sgd_privacy(n, batch_size, noise_multiplier, epochs, delta):
|
||||||
"""Compute epsilon based on the given hyperparameters.
|
"""Compute epsilon based on the given hyperparameters.
|
||||||
|
|
||||||
|
@ -231,12 +392,11 @@ def compute_dp_sgd_privacy(n, batch_size, noise_multiplier, epochs, delta):
|
||||||
with microbatching, and assumes Poisson subsampling, which is rarely used in
|
with microbatching, and assumes Poisson subsampling, which is rarely used in
|
||||||
practice. (See "How to DP-fy ML: A Practical Guide to Machine Learning with
|
practice. (See "How to DP-fy ML: A Practical Guide to Machine Learning with
|
||||||
Differential Privacy", https://arxiv.org/abs/2303.00654, Sec 5.6.) Most users
|
Differential Privacy", https://arxiv.org/abs/2303.00654, Sec 5.6.) Most users
|
||||||
should call `compute_dp_sgd_privacy_statement` (which will be added shortly),
|
should call `compute_dp_sgd_privacy_statement`, which provides appropriate
|
||||||
which provides appropriate context for the guarantee (see the reporting
|
context for the guarantee (see the reporting recommendations in "How to DP-fy
|
||||||
recommendations in "How to DP-fy ML", Sec 5.3). If you need a numeric epsilon
|
ML", Sec 5.3). If you need a numeric epsilon value under specific assumptions,
|
||||||
value under specific assumptions, it is recommended to use the `dp_accounting`
|
it is recommended to use the `dp_accounting` libraries directly to compute
|
||||||
libraries directly to compute epsilon, with the precise and correct
|
epsilon, with the precise and correct assumptions of your application.
|
||||||
assumptions of your application.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
n: Number of examples in the training data.
|
n: Number of examples in the training data.
|
||||||
|
@ -248,20 +408,22 @@ def compute_dp_sgd_privacy(n, batch_size, noise_multiplier, epochs, delta):
|
||||||
Returns:
|
Returns:
|
||||||
A 2-tuple containing the value of epsilon and the optimal RDP order.
|
A 2-tuple containing the value of epsilon and the optimal RDP order.
|
||||||
"""
|
"""
|
||||||
# TODO(b/265168958): Update this text for `compute_dp_sgd_privacy_statement`.
|
logging.warn("""\
|
||||||
logging.warn(
|
`compute_dp_sgd_privacy` is deprecated. It does not account for doubling of \
|
||||||
'`compute_dp_sgd_privacy` is deprecated. It does not account '
|
sensitivity with microbatching, and assumes Poisson subsampling, which is \
|
||||||
'for doubling of sensitivity with microbatching, and assumes Poisson '
|
rarely used in practice. Please use `compute_dp_sgd_privacy_statement`, which \
|
||||||
'subsampling, which is rarely used in practice. Please use the '
|
provides appropriate context for the guarantee. To compute epsilon under \
|
||||||
'`dp_accounting` libraries directly to compute epsilon, using the '
|
different assumptions than those in `compute_dp_sgd_privacy_statement`, call \
|
||||||
'precise and correct assumptions of your application.'
|
the `dp_accounting` libraries directly.""")
|
||||||
)
|
|
||||||
|
|
||||||
q = batch_size / n # q - the sampling ratio.
|
q = batch_size / n # q - the sampling ratio.
|
||||||
if q > 1:
|
if q > 1:
|
||||||
raise app.UsageError('n must be larger than the batch size.')
|
raise app.UsageError('n must be larger than the batch size.')
|
||||||
orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] +
|
orders = (
|
||||||
list(range(5, 64)) + [128, 256, 512])
|
[1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 3.0, 3.5, 4.0, 4.5]
|
||||||
|
+ list(range(5, 64))
|
||||||
|
+ [128, 256, 512]
|
||||||
|
)
|
||||||
steps = int(math.ceil(epochs * n / batch_size))
|
steps = int(math.ceil(epochs * n / batch_size))
|
||||||
accountant = dp_accounting.rdp.RdpAccountant(orders)
|
accountant = dp_accounting.rdp.RdpAccountant(orders)
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,15 @@ _example_privacy = compute_dp_sgd_privacy_lib._compute_dp_sgd_example_privacy
|
||||||
_user_privacy = compute_dp_sgd_privacy_lib._compute_dp_sgd_user_privacy
|
_user_privacy = compute_dp_sgd_privacy_lib._compute_dp_sgd_user_privacy
|
||||||
|
|
||||||
|
|
||||||
|
DP_SGD_STATEMENT_KWARGS = dict(
|
||||||
|
number_of_examples=10000,
|
||||||
|
batch_size=64,
|
||||||
|
num_epochs=5.0,
|
||||||
|
noise_multiplier=2.0,
|
||||||
|
delta=1e-6,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ComputeDpSgdPrivacyTest(parameterized.TestCase):
|
class ComputeDpSgdPrivacyTest(parameterized.TestCase):
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
@parameterized.named_parameters(
|
||||||
|
@ -145,6 +154,98 @@ class ComputeDpSgdPrivacyTest(parameterized.TestCase):
|
||||||
)
|
)
|
||||||
self.assertAlmostEqual(user_eps, example_eps * k)
|
self.assertAlmostEqual(user_eps, example_eps * k)
|
||||||
|
|
||||||
|
def test_dp_sgd_privacy_statement_no_user_dp(self):
|
||||||
|
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
||||||
|
**DP_SGD_STATEMENT_KWARGS,
|
||||||
|
)
|
||||||
|
expected_statement = """\
|
||||||
|
DP-SGD performed over 10000 examples with 64 examples per iteration, noise
|
||||||
|
multiplier 2.0 for 5.0 epochs with microbatching, and no bound on number of
|
||||||
|
examples per user.
|
||||||
|
|
||||||
|
This privacy guarantee protects the release of all model checkpoints in addition
|
||||||
|
to the final model.
|
||||||
|
|
||||||
|
Example-level DP with add-or-remove-one adjacency at delta = 1e-06 computed with
|
||||||
|
RDP accounting:
|
||||||
|
Epsilon with each example occurring once per epoch: 13.376
|
||||||
|
Epsilon assuming Poisson sampling (*): 1.616
|
||||||
|
|
||||||
|
No user-level privacy guarantee is possible witout a bound on the number of
|
||||||
|
examples per user.
|
||||||
|
|
||||||
|
(*) Poisson sampling is not usually done in training pipelines, but assuming
|
||||||
|
that the data was randomly shuffled, it is believed the actual epsilon should be
|
||||||
|
closer to this value than the conservative assumption of an arbitrary data
|
||||||
|
order.
|
||||||
|
"""
|
||||||
|
self.assertEqual(statement, expected_statement)
|
||||||
|
|
||||||
|
def test_dp_sgd_privacy_statement_user_dp(self):
|
||||||
|
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
||||||
|
**DP_SGD_STATEMENT_KWARGS,
|
||||||
|
max_examples_per_user=3,
|
||||||
|
)
|
||||||
|
expected_statement = """\
|
||||||
|
DP-SGD performed over 10000 examples with 64 examples per iteration, noise
|
||||||
|
multiplier 2.0 for 5.0 epochs with microbatching, and at most 3 examples per
|
||||||
|
user.
|
||||||
|
|
||||||
|
This privacy guarantee protects the release of all model checkpoints in addition
|
||||||
|
to the final model.
|
||||||
|
|
||||||
|
Example-level DP with add-or-remove-one adjacency at delta = 1e-06 computed with
|
||||||
|
RDP accounting:
|
||||||
|
Epsilon with each example occurring once per epoch: 13.376
|
||||||
|
Epsilon assuming Poisson sampling (*): 1.616
|
||||||
|
|
||||||
|
User-level DP with add-or-remove-one adjacency at delta = 1e-06 computed using
|
||||||
|
RDP accounting and group privacy:
|
||||||
|
Epsilon with each example occurring once per epoch: 113.899
|
||||||
|
Epsilon assuming Poisson sampling (*): 8.129
|
||||||
|
|
||||||
|
(*) Poisson sampling is not usually done in training pipelines, but assuming
|
||||||
|
that the data was randomly shuffled, it is believed the actual epsilon should be
|
||||||
|
closer to this value than the conservative assumption of an arbitrary data
|
||||||
|
order.
|
||||||
|
"""
|
||||||
|
self.assertEqual(statement, expected_statement)
|
||||||
|
|
||||||
|
def test_dp_sgd_privacy_statement_user_dp_infinite(self):
|
||||||
|
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
||||||
|
**DP_SGD_STATEMENT_KWARGS,
|
||||||
|
max_examples_per_user=9,
|
||||||
|
)
|
||||||
|
expected_statement = """\
|
||||||
|
DP-SGD performed over 10000 examples with 64 examples per iteration, noise
|
||||||
|
multiplier 2.0 for 5.0 epochs with microbatching, and at most 9 examples per
|
||||||
|
user.
|
||||||
|
|
||||||
|
This privacy guarantee protects the release of all model checkpoints in addition
|
||||||
|
to the final model.
|
||||||
|
|
||||||
|
Example-level DP with add-or-remove-one adjacency at delta = 1e-06 computed with
|
||||||
|
RDP accounting:
|
||||||
|
Epsilon with each example occurring once per epoch: 13.376
|
||||||
|
Epsilon assuming Poisson sampling (*): 1.616
|
||||||
|
|
||||||
|
User-level DP with add-or-remove-one adjacency at delta = 1e-06 computed using
|
||||||
|
RDP accounting and group privacy:
|
||||||
|
Epsilon with each example occurring once per epoch: inf (**)
|
||||||
|
Epsilon assuming Poisson sampling (*): inf (**)
|
||||||
|
|
||||||
|
(*) Poisson sampling is not usually done in training pipelines, but assuming
|
||||||
|
that the data was randomly shuffled, it is believed the actual epsilon should be
|
||||||
|
closer to this value than the conservative assumption of an arbitrary data
|
||||||
|
order.
|
||||||
|
|
||||||
|
(**) A finite example-level epsilon implies a finite user-level epsilon at any
|
||||||
|
`max_examples_per_user`, but because conversion from example-level to user-level
|
||||||
|
DP is not exact, it is possible for the upper bound on the user-level epsilon to
|
||||||
|
still be infinite.
|
||||||
|
"""
|
||||||
|
self.assertEqual(statement, expected_statement)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
absltest.main()
|
absltest.main()
|
||||||
|
|
Loading…
Reference in a new issue