diff --git a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py new file mode 100644 index 0000000..6ae991d --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py @@ -0,0 +1,67 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Command-line script for computing privacy of a model trained with DP-SGD. + +The script applies the RDP accountant to estimate privacy budget of an iterated +Sampled Gaussian Mechanism. The mechanism's parameters are controlled by flags. + +Example: + compute_dp_sgd_privacy + --N=60000 \ + --batch_size=256 \ + --epsilon=2.92 \ + --epochs=60 \ + --delta=1e-5 + +The output states that DP-SGD with these parameters should +use a noise multiplier of 1.12. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + +from absl import app +from absl import flags + +from tensorflow_privacy.privacy.analysis.compute_noise_from_budget_lib import compute_noise + +# Opting out of loading all sibling packages and their dependencies. +sys.skip_tf_privacy_import = True + +FLAGS = flags.FLAGS + +flags.DEFINE_integer('N', None, 'Total number of examples') +flags.DEFINE_integer('batch_size', None, 'Batch size') +flags.DEFINE_float('epsilon', None, 'Target epsilon for DP-SGD') +flags.DEFINE_float('epochs', None, 'Number of epochs (may be fractional)') +flags.DEFINE_float('delta', 1e-6, 'Target delta') + + +def main(argv): + del argv # argv is not used. + + assert FLAGS.N is not None, 'Flag N is missing.' + assert FLAGS.batch_size is not None, 'Flag batch_size is missing.' + assert FLAGS.epsilon is not None, 'Flag epsilon is missing.' + assert FLAGS.epochs is not None, 'Flag epochs is missing.' + compute_noise(FLAGS.N, FLAGS.batch_size, FLAGS.epsilon, + FLAGS.epochs, FLAGS.delta) + + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py new file mode 100644 index 0000000..1d8b107 --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py @@ -0,0 +1,80 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Library for computing privacy values for DP-SGD.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import sys + +from absl import app +from scipy.optimize import bisect + +from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top +from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent + +# Opting out of loading all sibling packages and their dependencies. +sys.skip_tf_privacy_import = True + + +def apply_dp_sgd_analysis(q, sigma, steps, orders, delta): + """Compute and print results of DP-SGD analysis.""" + + # compute_rdp requires that sigma be the ratio of the standard deviation of + # the Gaussian noise to the l2-sensitivity of the function to which it is + # added. Hence, sigma here corresponds to the `noise_multiplier` parameter + # in the DP-SGD implementation found in privacy.optimizers.dp_optimizer + rdp = compute_rdp(q, sigma, steps, orders) + + eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta) + + return eps, opt_order + + +def compute_noise(n, batch_size, target_epsilon, epochs, delta): + """Compute noise based on the given hyperparameters.""" + q = batch_size / n # q - the sampling ratio. + if q > 1: + raise app.UsageError('n must be larger than the batch size.') + orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] + + list(range(5, 64)) + [128, 256, 512]) + steps = int(math.ceil(epochs * n / batch_size)) + + init_noise = 1e-5 # minimum possible noise + init_epsilon, _ = apply_dp_sgd_analysis(q, init_noise, steps, orders, delta) + + if init_epsilon < target_epsilon: # 1e-5 was an overestimate + return 0 + + cur_epsilon = init_epsilon + max_noise, min_noise = init_noise, 0 + + # doubling to find the right range + while cur_epsilon > target_epsilon: # until noise is large enough + max_noise, min_noise = max_noise*2, max_noise + cur_epsilon, _ = apply_dp_sgd_analysis(q, max_noise, steps, orders, delta) + + def epsilon_fn(noise): # should return 0 if guess_epsilon==target_epsilon + guess_epsilon = apply_dp_sgd_analysis(q, noise, steps, orders, delta)[0] + return guess_epsilon - target_epsilon + + target_noise = bisect(epsilon_fn, min_noise, max_noise) + print('DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated' + ' over {} steps satisfies'.format(100 * q, target_noise, steps), end=' ') + print('differential privacy with eps = {:.3g} and delta = {}.'.format( + target_epsilon, delta)) + return target_noise diff --git a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_test.py b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_test.py new file mode 100644 index 0000000..f2dd635 --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_test.py @@ -0,0 +1,40 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import absltest +from absl.testing import parameterized + +from tensorflow_privacy.privacy.analysis import compute_noise_from_budget_lib + + +class ComputeNoiseFromBudgetTest(parameterized.TestCase): + + @parameterized.named_parameters( + ('Test0', 60000, 150, 0.941870567, 15, 1e-5, 1.3), + ('Test1', 100000, 100, 1.70928734, 30, 1e-7, 1.0), + ('Test2', 100000000, 1024, 5907984.81339406, 10, 1e-7, 0.1), + ) + def test_compute_noise(self, n, batch_size, target_epsilon, epochs, + delta, expected_noise): + target_noise = compute_noise_from_budget_lib.compute_noise( + n, batch_size, target_epsilon, epochs, delta) + self.assertAlmostEqual(target_noise, expected_noise) + +if __name__ == '__main__': + absltest.main()