Add support for PLD Accountant in computing DP-SGD privacy statement [TF Privacy]
PiperOrigin-RevId: 587854134
This commit is contained in:
parent
f51b637dda
commit
93376c9d6a
3 changed files with 170 additions and 43 deletions
|
@ -18,20 +18,20 @@ The script applies the RDP accountant to estimate privacy budget of an iterated
|
||||||
Sampled Gaussian Mechanism. The mechanism's parameters are controlled by flags.
|
Sampled Gaussian Mechanism. The mechanism's parameters are controlled by flags.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
compute_dp_sgd_privacy
|
compute_dp_sgd_privacy \
|
||||||
--N=60000 \
|
--N=60000 \
|
||||||
--batch_size=256 \
|
--batch_size=256 \
|
||||||
--noise_multiplier=1.12 \
|
--noise_multiplier=1.12 \
|
||||||
--epochs=60 \
|
--epochs=60 \
|
||||||
--delta=1e-5
|
--delta=1e-5 \
|
||||||
|
--accountant_type=RDP
|
||||||
|
|
||||||
The output states that DP-SGD with these parameters satisfies (2.92, 1e-5)-DP.
|
Prints out the privacy statement corresponding to the above parameters.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from absl import app
|
from absl import app
|
||||||
from absl import flags
|
from absl import flags
|
||||||
|
from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy_lib
|
||||||
from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy_statement
|
|
||||||
|
|
||||||
|
|
||||||
_NUM_EXAMPLES = flags.DEFINE_integer(
|
_NUM_EXAMPLES = flags.DEFINE_integer(
|
||||||
|
@ -70,6 +70,9 @@ _MAX_EXAMPLES_PER_USER = flags.DEFINE_integer(
|
||||||
'user-level DP guarantee.'
|
'user-level DP guarantee.'
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
_ACCOUNTANT_TYPE = flags.DEFINE_enum(
|
||||||
|
'accountant_type', 'RDP', ['RDP', 'PLD'], 'DP accountant to use.'
|
||||||
|
)
|
||||||
|
|
||||||
flags.mark_flags_as_required(['N', 'batch_size', 'noise_multiplier', 'epochs'])
|
flags.mark_flags_as_required(['N', 'batch_size', 'noise_multiplier', 'epochs'])
|
||||||
|
|
||||||
|
@ -77,7 +80,7 @@ flags.mark_flags_as_required(['N', 'batch_size', 'noise_multiplier', 'epochs'])
|
||||||
def main(argv):
|
def main(argv):
|
||||||
del argv # argv is not used.
|
del argv # argv is not used.
|
||||||
|
|
||||||
statement = compute_dp_sgd_privacy_statement(
|
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
||||||
_NUM_EXAMPLES.value,
|
_NUM_EXAMPLES.value,
|
||||||
_BATCH_SIZE.value,
|
_BATCH_SIZE.value,
|
||||||
_NUM_EPOCHS.value,
|
_NUM_EPOCHS.value,
|
||||||
|
@ -85,6 +88,7 @@ def main(argv):
|
||||||
_DELTA.value,
|
_DELTA.value,
|
||||||
_USED_MICROBATCHING.value,
|
_USED_MICROBATCHING.value,
|
||||||
_MAX_EXAMPLES_PER_USER.value,
|
_MAX_EXAMPLES_PER_USER.value,
|
||||||
|
compute_dp_sgd_privacy_lib.AccountantType(_ACCOUNTANT_TYPE.value),
|
||||||
)
|
)
|
||||||
print(statement)
|
print(statement)
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
"""Library for computing privacy values for DP-SGD."""
|
"""Library for computing privacy values for DP-SGD."""
|
||||||
|
|
||||||
|
import enum
|
||||||
import functools
|
import functools
|
||||||
import math
|
import math
|
||||||
import textwrap
|
import textwrap
|
||||||
|
@ -34,6 +35,20 @@ def _logexpm1(x: float) -> float:
|
||||||
return x + math.log(-math.expm1(-x))
|
return x + math.log(-math.expm1(-x))
|
||||||
|
|
||||||
|
|
||||||
|
class AccountantType(enum.Enum):
|
||||||
|
"""Accountant to use for privacy accounting."""
|
||||||
|
|
||||||
|
RDP = 'RDP'
|
||||||
|
PLD = 'PLD'
|
||||||
|
|
||||||
|
def get_accountant(self) -> dp_accounting.PrivacyAccountant:
|
||||||
|
if self == AccountantType.RDP:
|
||||||
|
return dp_accounting.rdp.RdpAccountant()
|
||||||
|
if self == AccountantType.PLD:
|
||||||
|
return dp_accounting.pld.PLDAccountant()
|
||||||
|
raise ValueError(f'Unsupported Accountant type {self.value}')
|
||||||
|
|
||||||
|
|
||||||
def _compute_dp_sgd_user_privacy(
|
def _compute_dp_sgd_user_privacy(
|
||||||
num_epochs: float,
|
num_epochs: float,
|
||||||
noise_multiplier: float,
|
noise_multiplier: float,
|
||||||
|
@ -41,6 +56,7 @@ def _compute_dp_sgd_user_privacy(
|
||||||
max_examples_per_user: int,
|
max_examples_per_user: int,
|
||||||
used_microbatching: bool = True,
|
used_microbatching: bool = True,
|
||||||
poisson_subsampling_probability: Optional[float] = None,
|
poisson_subsampling_probability: Optional[float] = None,
|
||||||
|
accountant_type: AccountantType = AccountantType.RDP,
|
||||||
) -> float:
|
) -> float:
|
||||||
"""Computes add-or-remove-one-user DP epsilon using group privacy.
|
"""Computes add-or-remove-one-user DP epsilon using group privacy.
|
||||||
|
|
||||||
|
@ -63,6 +79,10 @@ def _compute_dp_sgd_user_privacy(
|
||||||
used_microbatching: If true, increases sensitivity by a factor of two.
|
used_microbatching: If true, increases sensitivity by a factor of two.
|
||||||
poisson_subsampling_probability: If not None, gives the probability that
|
poisson_subsampling_probability: If not None, gives the probability that
|
||||||
each record is chosen in a batch. If None, assumes no subsampling.
|
each record is chosen in a batch. If None, assumes no subsampling.
|
||||||
|
accountant_type: The privacy accountant for computing epsilon. While this
|
||||||
|
method supports both PLD and RDP accountants, the behavior for PLD
|
||||||
|
accountant can sometimes be overly pessimistic. This remains to be
|
||||||
|
investigated and fixed (b/271341062).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The add-or-remove-one-user DP epsilon value using group privacy.
|
The add-or-remove-one-user DP epsilon value using group privacy.
|
||||||
|
@ -92,6 +112,7 @@ def _compute_dp_sgd_user_privacy(
|
||||||
user_delta,
|
user_delta,
|
||||||
used_microbatching,
|
used_microbatching,
|
||||||
poisson_subsampling_probability,
|
poisson_subsampling_probability,
|
||||||
|
accountant_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
# The computation below to estimate user_eps works as follows.
|
# The computation below to estimate user_eps works as follows.
|
||||||
|
@ -188,6 +209,7 @@ def _compute_dp_sgd_example_privacy(
|
||||||
example_delta: float,
|
example_delta: float,
|
||||||
used_microbatching: bool = True,
|
used_microbatching: bool = True,
|
||||||
poisson_subsampling_probability: Optional[float] = None,
|
poisson_subsampling_probability: Optional[float] = None,
|
||||||
|
accountant_type: AccountantType = AccountantType.RDP,
|
||||||
) -> float:
|
) -> float:
|
||||||
"""Computes add-or-remove-one-example DP epsilon.
|
"""Computes add-or-remove-one-example DP epsilon.
|
||||||
|
|
||||||
|
@ -201,6 +223,7 @@ def _compute_dp_sgd_example_privacy(
|
||||||
used_microbatching: If true, increases sensitivity by a factor of two.
|
used_microbatching: If true, increases sensitivity by a factor of two.
|
||||||
poisson_subsampling_probability: If not None, gives the probability that
|
poisson_subsampling_probability: If not None, gives the probability that
|
||||||
each record is chosen in a batch. If None, assumes no subsampling.
|
each record is chosen in a batch. If None, assumes no subsampling.
|
||||||
|
accountant_type: The privacy accountant for computing epsilon.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The epsilon value.
|
The epsilon value.
|
||||||
|
@ -229,10 +252,10 @@ def _compute_dp_sgd_example_privacy(
|
||||||
event_ = dp_accounting.SelfComposedDpEvent(count=count, event=event_)
|
event_ = dp_accounting.SelfComposedDpEvent(count=count, event=event_)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
dp_accounting.rdp.RdpAccountant()
|
accountant_type.get_accountant()
|
||||||
.compose(event_)
|
.compose(event_)
|
||||||
.get_epsilon(example_delta)
|
.get_epsilon(example_delta)
|
||||||
) # TODO(b/271341062)
|
)
|
||||||
|
|
||||||
|
|
||||||
def compute_dp_sgd_privacy_statement(
|
def compute_dp_sgd_privacy_statement(
|
||||||
|
@ -243,6 +266,7 @@ def compute_dp_sgd_privacy_statement(
|
||||||
delta: float,
|
delta: float,
|
||||||
used_microbatching: bool = True,
|
used_microbatching: bool = True,
|
||||||
max_examples_per_user: Optional[int] = None,
|
max_examples_per_user: Optional[int] = None,
|
||||||
|
accountant_type: AccountantType = AccountantType.RDP,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Produces a privacy report summarizing the DP guarantee.
|
"""Produces a privacy report summarizing the DP guarantee.
|
||||||
|
|
||||||
|
@ -267,6 +291,11 @@ def compute_dp_sgd_privacy_statement(
|
||||||
max_examples_per_user: If the data set is constructed to cap the maximum
|
max_examples_per_user: If the data set is constructed to cap the maximum
|
||||||
number of examples each user contributes, provide this argument to also
|
number of examples each user contributes, provide this argument to also
|
||||||
print a user-level DP guarantee.
|
print a user-level DP guarantee.
|
||||||
|
accountant_type: The privacy accountant for computing epsilon. Since the
|
||||||
|
current approach for computing user-level privacy when using PLD
|
||||||
|
accountant can sometimes be overly pessimistic, this method does not
|
||||||
|
provide user-level privacy guarantee for PLD accountant_type. This remains
|
||||||
|
to be investigated and fixed (b/271341062).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A str precisely articulating the privacy guarantee.
|
A str precisely articulating the privacy guarantee.
|
||||||
|
@ -296,12 +325,16 @@ addition to the final model.""",
|
||||||
paragraph = textwrap.fill(
|
paragraph = textwrap.fill(
|
||||||
f"""\
|
f"""\
|
||||||
Example-level DP with add-or-remove-one adjacency at delta = {delta} computed \
|
Example-level DP with add-or-remove-one adjacency at delta = {delta} computed \
|
||||||
with RDP accounting:""",
|
with {accountant_type.value} accounting:""",
|
||||||
width=80,
|
width=80,
|
||||||
)
|
)
|
||||||
|
|
||||||
example_eps_no_subsampling = _compute_dp_sgd_example_privacy(
|
example_eps_no_subsampling = _compute_dp_sgd_example_privacy(
|
||||||
num_epochs, noise_multiplier, delta, used_microbatching
|
num_epochs,
|
||||||
|
noise_multiplier,
|
||||||
|
delta,
|
||||||
|
used_microbatching,
|
||||||
|
accountant_type=accountant_type,
|
||||||
)
|
)
|
||||||
example_eps_subsampling = _compute_dp_sgd_example_privacy(
|
example_eps_subsampling = _compute_dp_sgd_example_privacy(
|
||||||
num_epochs,
|
num_epochs,
|
||||||
|
@ -309,6 +342,7 @@ with RDP accounting:""",
|
||||||
delta,
|
delta,
|
||||||
used_microbatching,
|
used_microbatching,
|
||||||
poisson_subsampling_probability=batch_size / number_of_examples,
|
poisson_subsampling_probability=batch_size / number_of_examples,
|
||||||
|
accountant_type=accountant_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
paragraph += f"""
|
paragraph += f"""
|
||||||
|
@ -320,13 +354,33 @@ with RDP accounting:""",
|
||||||
paragraphs.append(paragraph)
|
paragraphs.append(paragraph)
|
||||||
|
|
||||||
inf_user_eps = False
|
inf_user_eps = False
|
||||||
if max_examples_per_user is not None:
|
if max_examples_per_user is None:
|
||||||
|
paragraphs.append(
|
||||||
|
textwrap.fill(
|
||||||
|
"""\
|
||||||
|
No user-level privacy guarantee is possible without a bound on the number of \
|
||||||
|
examples per user.""",
|
||||||
|
width=80,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
elif accountant_type == AccountantType.PLD:
|
||||||
|
# TODO(b/271341062): Add User level DP support for PLD.
|
||||||
|
paragraphs.append(
|
||||||
|
textwrap.fill(
|
||||||
|
"""\
|
||||||
|
User-level DP epsilon computation is not supported for PLD accounting at this \
|
||||||
|
time. Use RDP accounting to obtain user-level DP guarantees.""",
|
||||||
|
width=80,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else: # Case: max_examples_per_user is not None and accountant_type is RDP
|
||||||
user_eps_no_subsampling = _compute_dp_sgd_user_privacy(
|
user_eps_no_subsampling = _compute_dp_sgd_user_privacy(
|
||||||
num_epochs,
|
num_epochs,
|
||||||
noise_multiplier,
|
noise_multiplier,
|
||||||
delta,
|
delta,
|
||||||
max_examples_per_user,
|
max_examples_per_user,
|
||||||
used_microbatching,
|
used_microbatching,
|
||||||
|
accountant_type=accountant_type,
|
||||||
)
|
)
|
||||||
user_eps_subsampling = _compute_dp_sgd_user_privacy(
|
user_eps_subsampling = _compute_dp_sgd_user_privacy(
|
||||||
num_epochs,
|
num_epochs,
|
||||||
|
@ -335,6 +389,7 @@ with RDP accounting:""",
|
||||||
max_examples_per_user,
|
max_examples_per_user,
|
||||||
used_microbatching,
|
used_microbatching,
|
||||||
poisson_subsampling_probability=batch_size / number_of_examples,
|
poisson_subsampling_probability=batch_size / number_of_examples,
|
||||||
|
accountant_type=accountant_type,
|
||||||
)
|
)
|
||||||
if math.isinf(user_eps_no_subsampling):
|
if math.isinf(user_eps_no_subsampling):
|
||||||
user_eps_no_subsampling_str = ' inf (**)'
|
user_eps_no_subsampling_str = ' inf (**)'
|
||||||
|
@ -350,7 +405,7 @@ with RDP accounting:""",
|
||||||
paragraph = textwrap.fill(
|
paragraph = textwrap.fill(
|
||||||
f"""\
|
f"""\
|
||||||
User-level DP with add-or-remove-one adjacency at delta = {delta} computed \
|
User-level DP with add-or-remove-one adjacency at delta = {delta} computed \
|
||||||
using RDP accounting and group privacy:""",
|
using {accountant_type.value} accounting and group privacy:""",
|
||||||
width=80,
|
width=80,
|
||||||
)
|
)
|
||||||
paragraph += f"""
|
paragraph += f"""
|
||||||
|
@ -360,23 +415,14 @@ using RDP accounting and group privacy:""",
|
||||||
{user_eps_subsampling_str}"""
|
{user_eps_subsampling_str}"""
|
||||||
|
|
||||||
paragraphs.append(paragraph)
|
paragraphs.append(paragraph)
|
||||||
else:
|
|
||||||
paragraphs.append(
|
|
||||||
textwrap.fill(
|
|
||||||
"""\
|
|
||||||
No user-level privacy guarantee is possible without a bound on the number of \
|
|
||||||
examples per user.""",
|
|
||||||
width=80,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
paragraphs.append(
|
paragraphs.append(
|
||||||
textwrap.fill(
|
textwrap.fill(
|
||||||
"""\
|
"""\
|
||||||
(*) Poisson sampling is not usually done in training pipelines, but assuming \
|
(*) Poisson sampling is not usually done in training pipelines, but assuming \
|
||||||
that the data was randomly shuffled, it is believed the actual epsilon should \
|
that the data was randomly shuffled, it is believed that the actual epsilon \
|
||||||
be closer to this value than the conservative assumption of an arbitrary data \
|
should be closer to this value than the conservative assumption of an \
|
||||||
order.""",
|
arbitrary data order.""",
|
||||||
width=80,
|
width=80,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
|
@ -23,6 +23,8 @@ from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy_lib
|
||||||
|
|
||||||
_example_privacy = compute_dp_sgd_privacy_lib._compute_dp_sgd_example_privacy
|
_example_privacy = compute_dp_sgd_privacy_lib._compute_dp_sgd_example_privacy
|
||||||
_user_privacy = compute_dp_sgd_privacy_lib._compute_dp_sgd_user_privacy
|
_user_privacy = compute_dp_sgd_privacy_lib._compute_dp_sgd_user_privacy
|
||||||
|
_RDP = compute_dp_sgd_privacy_lib.AccountantType.RDP
|
||||||
|
_PLD = compute_dp_sgd_privacy_lib.AccountantType.PLD
|
||||||
|
|
||||||
|
|
||||||
DP_SGD_STATEMENT_KWARGS = dict(
|
DP_SGD_STATEMENT_KWARGS = dict(
|
||||||
|
@ -81,13 +83,21 @@ class ComputeDpSgdPrivacyTest(parameterized.TestCase):
|
||||||
_example_privacy(**args)
|
_example_privacy(**args)
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
@parameterized.named_parameters(
|
||||||
('no_microbatching_no_subsampling', False, None, 10.8602036),
|
('no_microbatching_no_subsampling_rdp', False, None, _RDP, 10.8602036),
|
||||||
('microbatching_no_subsampling', True, None, 26.2880374),
|
('microbatching_no_subsampling_rdp', True, None, _RDP, 26.2880374),
|
||||||
('no_microbatching_with_subsampling', False, 1e-2, 3.2391922),
|
('no_microbatching_with_subsampling_rdp', False, 1e-2, _RDP, 3.2391922),
|
||||||
('microbatching_with_subsampling', True, 1e-2, 22.5970358),
|
('microbatching_with_subsampling_rdp', True, 1e-2, _RDP, 22.5970358),
|
||||||
|
('no_microbatching_no_subsampling_pld', False, None, _PLD, 10.1224946),
|
||||||
|
('microbatching_no_subsampling_pld', True, None, _PLD, 24.7160779),
|
||||||
|
('no_microbatching_with_subsampling_pld', False, 1e-2, _PLD, 2.4612381),
|
||||||
|
('microbatching_with_subsampling_pld', True, 1e-2, _PLD, 18.6977407),
|
||||||
)
|
)
|
||||||
def test_compute_dp_sgd_example_privacy(
|
def test_compute_dp_sgd_example_privacy(
|
||||||
self, used_microbatching, poisson_subsampling_probability, expected_eps
|
self,
|
||||||
|
used_microbatching,
|
||||||
|
poisson_subsampling_probability,
|
||||||
|
accountant_type,
|
||||||
|
expected_eps,
|
||||||
):
|
):
|
||||||
num_epochs = 1.2
|
num_epochs = 1.2
|
||||||
noise_multiplier = 0.7
|
noise_multiplier = 0.7
|
||||||
|
@ -98,6 +108,7 @@ class ComputeDpSgdPrivacyTest(parameterized.TestCase):
|
||||||
example_delta,
|
example_delta,
|
||||||
used_microbatching,
|
used_microbatching,
|
||||||
poisson_subsampling_probability,
|
poisson_subsampling_probability,
|
||||||
|
accountant_type,
|
||||||
)
|
)
|
||||||
self.assertAlmostEqual(eps, expected_eps)
|
self.assertAlmostEqual(eps, expected_eps)
|
||||||
|
|
||||||
|
@ -119,17 +130,21 @@ class ComputeDpSgdPrivacyTest(parameterized.TestCase):
|
||||||
with self.assertRaises(ValueError):
|
with self.assertRaises(ValueError):
|
||||||
_user_privacy(**args)
|
_user_privacy(**args)
|
||||||
|
|
||||||
def test_user_privacy_one_example_per_user(self):
|
@parameterized.named_parameters(('RDP', _RDP), ('PLD', _PLD))
|
||||||
|
def test_user_privacy_one_example_per_user(self, accountant_type):
|
||||||
num_epochs = 1.2
|
num_epochs = 1.2
|
||||||
noise_multiplier = 0.7
|
noise_multiplier = 0.7
|
||||||
delta = 1e-5
|
delta = 1e-5
|
||||||
|
|
||||||
example_eps = _example_privacy(num_epochs, noise_multiplier, delta)
|
example_eps = _example_privacy(
|
||||||
|
num_epochs, noise_multiplier, delta, accountant_type=accountant_type
|
||||||
|
)
|
||||||
user_eps = _user_privacy(
|
user_eps = _user_privacy(
|
||||||
num_epochs,
|
num_epochs,
|
||||||
noise_multiplier,
|
noise_multiplier,
|
||||||
delta,
|
delta,
|
||||||
max_examples_per_user=1,
|
max_examples_per_user=1,
|
||||||
|
accountant_type=accountant_type,
|
||||||
)
|
)
|
||||||
self.assertEqual(user_eps, example_eps)
|
self.assertEqual(user_eps, example_eps)
|
||||||
|
|
||||||
|
@ -146,6 +161,7 @@ class ComputeDpSgdPrivacyTest(parameterized.TestCase):
|
||||||
noise_multiplier=noise_multiplier,
|
noise_multiplier=noise_multiplier,
|
||||||
example_delta=example_delta,
|
example_delta=example_delta,
|
||||||
poisson_subsampling_probability=q,
|
poisson_subsampling_probability=q,
|
||||||
|
accountant_type=_RDP,
|
||||||
)
|
)
|
||||||
|
|
||||||
user_delta = math.exp(
|
user_delta = math.exp(
|
||||||
|
@ -161,12 +177,14 @@ class ComputeDpSgdPrivacyTest(parameterized.TestCase):
|
||||||
user_delta=user_delta,
|
user_delta=user_delta,
|
||||||
max_examples_per_user=max_examples_per_user,
|
max_examples_per_user=max_examples_per_user,
|
||||||
poisson_subsampling_probability=q,
|
poisson_subsampling_probability=q,
|
||||||
|
accountant_type=_RDP,
|
||||||
)
|
)
|
||||||
self.assertAlmostEqual(user_eps, example_eps * max_examples_per_user)
|
self.assertAlmostEqual(user_eps, example_eps * max_examples_per_user)
|
||||||
|
|
||||||
def test_dp_sgd_privacy_statement_no_user_dp(self):
|
def test_dp_sgd_privacy_statement_no_user_dp_with_rdp(self):
|
||||||
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
||||||
**DP_SGD_STATEMENT_KWARGS,
|
**DP_SGD_STATEMENT_KWARGS,
|
||||||
|
accountant_type=_RDP,
|
||||||
)
|
)
|
||||||
expected_statement = """\
|
expected_statement = """\
|
||||||
DP-SGD performed over 10000 examples with 64 examples per iteration, noise
|
DP-SGD performed over 10000 examples with 64 examples per iteration, noise
|
||||||
|
@ -185,16 +203,17 @@ No user-level privacy guarantee is possible without a bound on the number of
|
||||||
examples per user.
|
examples per user.
|
||||||
|
|
||||||
(*) Poisson sampling is not usually done in training pipelines, but assuming
|
(*) Poisson sampling is not usually done in training pipelines, but assuming
|
||||||
that the data was randomly shuffled, it is believed the actual epsilon should be
|
that the data was randomly shuffled, it is believed that the actual epsilon
|
||||||
closer to this value than the conservative assumption of an arbitrary data
|
should be closer to this value than the conservative assumption of an arbitrary
|
||||||
order.
|
data order.
|
||||||
"""
|
"""
|
||||||
self.assertEqual(statement, expected_statement)
|
self.assertEqual(statement, expected_statement)
|
||||||
|
|
||||||
def test_dp_sgd_privacy_statement_user_dp(self):
|
def test_dp_sgd_privacy_statement_user_dp_with_rdp(self):
|
||||||
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
||||||
**DP_SGD_STATEMENT_KWARGS,
|
**DP_SGD_STATEMENT_KWARGS,
|
||||||
max_examples_per_user=3,
|
max_examples_per_user=3,
|
||||||
|
accountant_type=_RDP,
|
||||||
)
|
)
|
||||||
expected_statement = """\
|
expected_statement = """\
|
||||||
DP-SGD performed over 10000 examples with 64 examples per iteration, noise
|
DP-SGD performed over 10000 examples with 64 examples per iteration, noise
|
||||||
|
@ -215,16 +234,17 @@ RDP accounting and group privacy:
|
||||||
Epsilon assuming Poisson sampling (*): 6.425
|
Epsilon assuming Poisson sampling (*): 6.425
|
||||||
|
|
||||||
(*) Poisson sampling is not usually done in training pipelines, but assuming
|
(*) Poisson sampling is not usually done in training pipelines, but assuming
|
||||||
that the data was randomly shuffled, it is believed the actual epsilon should be
|
that the data was randomly shuffled, it is believed that the actual epsilon
|
||||||
closer to this value than the conservative assumption of an arbitrary data
|
should be closer to this value than the conservative assumption of an arbitrary
|
||||||
order.
|
data order.
|
||||||
"""
|
"""
|
||||||
self.assertEqual(statement, expected_statement)
|
self.assertEqual(statement, expected_statement)
|
||||||
|
|
||||||
def test_dp_sgd_privacy_statement_user_dp_infinite(self):
|
def test_dp_sgd_privacy_statement_user_dp_infinite_with_rdp(self):
|
||||||
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
||||||
**DP_SGD_STATEMENT_KWARGS,
|
**DP_SGD_STATEMENT_KWARGS,
|
||||||
max_examples_per_user=10,
|
max_examples_per_user=10,
|
||||||
|
accountant_type=_RDP,
|
||||||
)
|
)
|
||||||
expected_statement = """\
|
expected_statement = """\
|
||||||
DP-SGD performed over 10000 examples with 64 examples per iteration, noise
|
DP-SGD performed over 10000 examples with 64 examples per iteration, noise
|
||||||
|
@ -245,14 +265,71 @@ RDP accounting and group privacy:
|
||||||
Epsilon assuming Poisson sampling (*): inf (**)
|
Epsilon assuming Poisson sampling (*): inf (**)
|
||||||
|
|
||||||
(*) Poisson sampling is not usually done in training pipelines, but assuming
|
(*) Poisson sampling is not usually done in training pipelines, but assuming
|
||||||
that the data was randomly shuffled, it is believed the actual epsilon should be
|
that the data was randomly shuffled, it is believed that the actual epsilon
|
||||||
closer to this value than the conservative assumption of an arbitrary data
|
should be closer to this value than the conservative assumption of an arbitrary
|
||||||
order.
|
data order.
|
||||||
|
|
||||||
(**) A finite example-level epsilon implies a finite user-level epsilon at any
|
(**) A finite example-level epsilon implies a finite user-level epsilon at any
|
||||||
`max_examples_per_user`, but because conversion from example-level to user-level
|
`max_examples_per_user`, but because conversion from example-level to user-level
|
||||||
DP is not exact, it is possible for the upper bound on the user-level epsilon to
|
DP is not exact, it is possible for the upper bound on the user-level epsilon to
|
||||||
still be infinite.
|
still be infinite.
|
||||||
|
"""
|
||||||
|
self.assertEqual(statement, expected_statement)
|
||||||
|
|
||||||
|
def test_dp_sgd_privacy_statement_no_user_dp_with_pld(self):
|
||||||
|
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
||||||
|
**DP_SGD_STATEMENT_KWARGS,
|
||||||
|
accountant_type=_PLD,
|
||||||
|
)
|
||||||
|
expected_statement = """\
|
||||||
|
DP-SGD performed over 10000 examples with 64 examples per iteration, noise
|
||||||
|
multiplier 2.0 for 5.0 epochs with microbatching, and no bound on number of
|
||||||
|
examples per user.
|
||||||
|
|
||||||
|
This privacy guarantee protects the release of all model checkpoints in addition
|
||||||
|
to the final model.
|
||||||
|
|
||||||
|
Example-level DP with add-or-remove-one adjacency at delta = 1e-06 computed with
|
||||||
|
PLD accounting:
|
||||||
|
Epsilon with each example occurring once per epoch: 12.595
|
||||||
|
Epsilon assuming Poisson sampling (*): 1.199
|
||||||
|
|
||||||
|
No user-level privacy guarantee is possible without a bound on the number of
|
||||||
|
examples per user.
|
||||||
|
|
||||||
|
(*) Poisson sampling is not usually done in training pipelines, but assuming
|
||||||
|
that the data was randomly shuffled, it is believed that the actual epsilon
|
||||||
|
should be closer to this value than the conservative assumption of an arbitrary
|
||||||
|
data order.
|
||||||
|
"""
|
||||||
|
self.assertEqual(statement, expected_statement)
|
||||||
|
|
||||||
|
def test_dp_sgd_privacy_statement_user_dp_with_pld(self):
|
||||||
|
statement = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy_statement(
|
||||||
|
**DP_SGD_STATEMENT_KWARGS,
|
||||||
|
max_examples_per_user=3,
|
||||||
|
accountant_type=_PLD,
|
||||||
|
)
|
||||||
|
expected_statement = """\
|
||||||
|
DP-SGD performed over 10000 examples with 64 examples per iteration, noise
|
||||||
|
multiplier 2.0 for 5.0 epochs with microbatching, and at most 3 examples per
|
||||||
|
user.
|
||||||
|
|
||||||
|
This privacy guarantee protects the release of all model checkpoints in addition
|
||||||
|
to the final model.
|
||||||
|
|
||||||
|
Example-level DP with add-or-remove-one adjacency at delta = 1e-06 computed with
|
||||||
|
PLD accounting:
|
||||||
|
Epsilon with each example occurring once per epoch: 12.595
|
||||||
|
Epsilon assuming Poisson sampling (*): 1.199
|
||||||
|
|
||||||
|
User-level DP epsilon computation is not supported for PLD accounting at this
|
||||||
|
time. Use RDP accounting to obtain user-level DP guarantees.
|
||||||
|
|
||||||
|
(*) Poisson sampling is not usually done in training pipelines, but assuming
|
||||||
|
that the data was randomly shuffled, it is believed that the actual epsilon
|
||||||
|
should be closer to this value than the conservative assumption of an arbitrary
|
||||||
|
data order.
|
||||||
"""
|
"""
|
||||||
self.assertEqual(statement, expected_statement)
|
self.assertEqual(statement, expected_statement)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue