From d5e41e20ade450e390a01325fea4897a11a16067 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Fri, 7 Apr 2023 15:07:10 -0700 Subject: [PATCH] More detailed description of arguments in compute_dp_sgd_privacy. PiperOrigin-RevId: 522693217 --- .../analysis/compute_dp_sgd_privacy.py | 24 +++++++++++++++---- .../analysis/compute_dp_sgd_privacy_lib.py | 11 +++++---- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py index d882887..572799b 100644 --- a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py @@ -34,10 +34,24 @@ from absl import flags from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy_statement -_NUM_EXAMPLES = flags.DEFINE_integer('N', None, 'Total number of examples.') -_BATCH_SIZE = flags.DEFINE_integer('batch_size', None, 'Batch size.') +_NUM_EXAMPLES = flags.DEFINE_integer( + 'N', None, 'Total number of examples in the training data.' +) +_BATCH_SIZE = flags.DEFINE_integer( + 'batch_size', + None, + ( + 'Number of examples in a batch *regardless of how/whether they are ' + 'grouped into microbatches*.' + ), +) _NOISE_MULTIPLIER = flags.DEFINE_float( - 'noise_multiplier', None, 'Noise multiplier for DP-SGD.' + 'noise_multiplier', + None, + ( + 'Noise multiplier for DP-SGD: ratio of Gaussian noise stddev to the ' + 'l2 clip norm at each round.' + ), ) _NUM_EPOCHS = flags.DEFINE_float( 'epochs', None, 'Number of epochs (may be fractional).' @@ -52,8 +66,8 @@ _MAX_EXAMPLES_PER_USER = flags.DEFINE_integer( 'max_examples_per_user', None, ( - 'Maximum number of examples per user, applicable. Used to compute a' - ' user-level DP guarantee.' + 'Maximum number of examples per user, if applicable. Used to compute a ' + 'user-level DP guarantee.' ), ) diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py index 7e3d567..f9b708e 100644 --- a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py @@ -51,7 +51,7 @@ def _compute_dp_sgd_user_privacy( Args: num_epochs: The number of passes over the data. May be fractional. - noise_multiplier: The ratio of the noise to the l2 sensitivity. + noise_multiplier: The ratio of the noise stddev to the l2 sensitivity. user_delta: The target user-level delta. max_examples_per_user: Upper bound on the number of examples per user. used_microbatching: If true, increases sensitivity by a factor of two. @@ -183,7 +183,7 @@ def _compute_dp_sgd_example_privacy( Args: num_epochs: The number of passes over the data. - noise_multiplier: The ratio of the noise to the l2 sensitivity. + noise_multiplier: The ratio of the noise stddev to the l2 sensitivity. example_delta: The target delta. used_microbatching: If true, increases sensitivity by a factor of two. poisson_subsampling_probability: If not None, gives the probability that @@ -244,9 +244,10 @@ def compute_dp_sgd_privacy_statement( examples in a batch, *regardless of whether/how they are grouped into microbatches*. num_epochs: The number of epochs of training. May be fractional. - noise_multiplier: The ratio of the Gaussian noise to the clip norm at each - round. It is assumed that the noise_multiplier is constant although the - clip norm may be variable if, for example, adaptive clipping is used. + noise_multiplier: The ratio of the Gaussian noise stddev to the l2 clip norm + at each round. It is assumed that the noise_multiplier is constant + although the clip norm may be variable if, for example, adaptive clipping + is used. delta: The target delta. used_microbatching: Whether microbatching was used (with microbatch size greater than one). Microbatching inflates sensitivity by a factor of two