From 943ef91ee932207f22e3840753e0d5b1dd07d503 Mon Sep 17 00:00:00 2001 From: Michael Reneer Date: Fri, 28 Jan 2022 11:56:55 -0800 Subject: [PATCH] Format TensorFlow Privacy files. PiperOrigin-RevId: 424923635 --- g3doc/build_docs.py | 1 - g3doc/guide/get_started.md | 1 - .../analysis/compute_dp_sgd_privacy_lib.py | 7 +- .../analysis/compute_dp_sgd_privacy_test.py | 7 +- .../analysis/compute_noise_from_budget.py | 4 +- .../compute_noise_from_budget_test.py | 5 +- .../privacy/analysis/dp_event.py | 1 - .../privacy/analysis/dp_event_builder_test.py | 4 +- .../privacy/analysis/tensor_buffer.py | 8 +- .../analysis/tensor_buffer_eager_test.py | 5 +- .../tree_aggregation_accountant_test.py | 1 + tensorflow_privacy/privacy/bolt_on/losses.py | 37 +- .../privacy/bolt_on/losses_test.py | 377 ++++++++++-------- tensorflow_privacy/privacy/bolt_on/models.py | 85 ++-- .../privacy/bolt_on/models_test.py | 337 ++++++++-------- .../privacy/bolt_on/optimizers.py | 75 ++-- .../privacy/bolt_on/optimizers_test.py | 290 ++++++++------ .../dp_query/quantile_estimator_query_test.py | 21 +- .../privacy/dp_query/tree_aggregation.py | 1 - .../dp_query/tree_aggregation_query_test.py | 9 +- .../privacy/estimators/dnn_test.py | 1 + .../privacy/estimators/test_utils.py | 11 +- .../privacy/estimators/v1/dnn_test.py | 1 + .../privacy/estimators/v1/head.py | 4 +- .../privacy/keras_models/dp_keras_model.py | 7 +- .../privacy/logistic_regression/datasets.py | 10 +- .../logistic_regression/datasets_test.py | 22 +- .../multinomial_logistic.py | 71 ++-- .../multinomial_logistic_test.py | 21 +- .../single_layer_softmax.py | 30 +- .../single_layer_softmax_test.py | 5 +- .../privacy/optimizers/dp_optimizer_keras.py | 46 +-- .../optimizers/dp_optimizer_keras_test.py | 11 +- .../dp_optimizer_keras_vectorized.py | 13 +- .../privacy/optimizers/dp_optimizer_test.py | 16 +- .../optimizers/dp_optimizer_vectorized.py | 12 +- .../dp_optimizer_vectorized_test.py | 40 +- .../codelabs/example.py | 1 + .../data_structures.py | 22 +- .../keras_evaluation.py | 50 ++- .../keras_evaluation_example.py | 29 +- .../keras_evaluation_test.py | 5 +- .../membership_inference_attack.py | 15 +- .../membership_inference_attack/models.py | 6 +- .../membership_inference_attack/plotting.py | 3 +- .../tf_estimator_evaluation.py | 56 +-- .../tf_estimator_evaluation_example.py | 33 +- .../tf_estimator_evaluation_test.py | 24 +- .../membership_inference_attack/utils.py | 1 - .../utils_tensorboard.py | 43 +- .../privacy_tests/secret_sharer/exposures.py | 30 +- .../secret_sharer/exposures_test.py | 12 +- .../secret_sharer/generate_secrets.py | 17 +- .../secret_sharer/generate_secrets_test.py | 71 ++-- tutorials/bolton_tutorial.py | 64 +-- tutorials/mnist_dpsgd_tutorial.py | 12 +- tutorials/mnist_dpsgd_tutorial_eager.py | 38 +- tutorials/mnist_dpsgd_tutorial_keras.py | 38 +- tutorials/mnist_dpsgd_tutorial_vectorized.py | 45 +-- tutorials/mnist_lr_tutorial.py | 8 +- tutorials/movielens_tutorial.py | 2 +- tutorials/walkthrough/mnist_scratch.py | 35 +- 62 files changed, 1170 insertions(+), 1087 deletions(-) diff --git a/g3doc/build_docs.py b/g3doc/build_docs.py index d038ad9..c7ce9cb 100644 --- a/g3doc/build_docs.py +++ b/g3doc/build_docs.py @@ -23,7 +23,6 @@ from tensorflow_docs.api_generator import generate_lib from tensorflow_docs.api_generator import public_api import tensorflow_privacy as tf_privacy - flags.DEFINE_string('output_dir', '/tmp/tf_privacy', 'Where to output the docs.') flags.DEFINE_string( diff --git a/g3doc/guide/get_started.md b/g3doc/guide/get_started.md index 8a01aee..cfa651d 100644 --- a/g3doc/guide/get_started.md +++ b/g3doc/guide/get_started.md @@ -1,6 +1,5 @@ # Get Started - This document assumes you are already familiar with differential privacy, and have determined that you would like to use TF Privacy to implement differential privacy guarantees in your model(s). If you’re not familiar with differential diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py index 7cc557a..3d8752e 100644 --- a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py @@ -17,7 +17,6 @@ import math from absl import app - from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent @@ -33,8 +32,10 @@ def apply_dp_sgd_analysis(q, sigma, steps, orders, delta): eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta) - print('DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated' - ' over {} steps satisfies'.format(100 * q, sigma, steps), end=' ') + print( + 'DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated' + ' over {} steps satisfies'.format(100 * q, sigma, steps), + end=' ') print('differential privacy with eps = {:.3g} and delta = {}.'.format( eps, delta)) print('The optimal RDP order is {}.'.format(opt_order)) diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_test.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_test.py index c1431db..e11d254 100644 --- a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_test.py +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_test.py @@ -46,12 +46,13 @@ class ComputeDpSgdPrivacyTest(parameterized.TestCase): # the basis of this sanity check. This is confirmed in the above paper. q = batch_size / n steps = epochs * n / batch_size - sigma = noise_multiplier * math.sqrt(steps) /q + sigma = noise_multiplier * math.sqrt(steps) / q # We compute the optimal guarantee for Gaussian # using https://arxiv.org/abs/1805.06530 Theorem 8 (in v2). - low_delta = .5*math.erfc((eps*sigma-.5/sigma)/math.sqrt(2)) + low_delta = .5 * math.erfc((eps * sigma - .5 / sigma) / math.sqrt(2)) if eps < 100: # Skip this if it causes overflow; error is minor. - low_delta -= math.exp(eps)*.5*math.erfc((eps*sigma+.5/sigma)/math.sqrt(2)) + low_delta -= math.exp(eps) * .5 * math.erfc( + (eps * sigma + .5 / sigma) / math.sqrt(2)) self.assertLessEqual(low_delta, delta) diff --git a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py index 01e7368..1cf1289 100644 --- a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py +++ b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py @@ -52,8 +52,8 @@ def main(argv): assert FLAGS.batch_size is not None, 'Flag batch_size is missing.' assert FLAGS.epsilon is not None, 'Flag epsilon is missing.' assert FLAGS.epochs is not None, 'Flag epochs is missing.' - compute_noise(FLAGS.N, FLAGS.batch_size, FLAGS.epsilon, - FLAGS.epochs, FLAGS.delta, FLAGS.min_noise) + compute_noise(FLAGS.N, FLAGS.batch_size, FLAGS.epsilon, FLAGS.epochs, + FLAGS.delta, FLAGS.min_noise) if __name__ == '__main__': diff --git a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_test.py b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_test.py index 6f1ed79..9617503 100644 --- a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_test.py +++ b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_test.py @@ -27,11 +27,12 @@ class ComputeNoiseFromBudgetTest(parameterized.TestCase): ('Test2', 100000000, 1024, 5907984.81339406, 10, 1e-7, 1e-5, 0.1), ('Test3', 100000000, 1024, 5907984.81339406, 10, 1e-7, 1, 0), ) - def test_compute_noise(self, n, batch_size, target_epsilon, epochs, - delta, min_noise, expected_noise): + def test_compute_noise(self, n, batch_size, target_epsilon, epochs, delta, + min_noise, expected_noise): target_noise = compute_noise_from_budget_lib.compute_noise( n, batch_size, target_epsilon, epochs, delta, min_noise) self.assertAlmostEqual(target_noise, expected_noise) + if __name__ == '__main__': absltest.main() diff --git a/tensorflow_privacy/privacy/analysis/dp_event.py b/tensorflow_privacy/privacy/analysis/dp_event.py index 290970f..40a11a1 100644 --- a/tensorflow_privacy/privacy/analysis/dp_event.py +++ b/tensorflow_privacy/privacy/analysis/dp_event.py @@ -176,4 +176,3 @@ class SampledWithoutReplacementDpEvent(DpEvent): source_dataset_size: int sample_size: int event: DpEvent - diff --git a/tensorflow_privacy/privacy/analysis/dp_event_builder_test.py b/tensorflow_privacy/privacy/analysis/dp_event_builder_test.py index dd8a5f2..882e4a6 100644 --- a/tensorflow_privacy/privacy/analysis/dp_event_builder_test.py +++ b/tensorflow_privacy/privacy/analysis/dp_event_builder_test.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for DpEventBuilder.""" from absl.testing import absltest from tensorflow_privacy.privacy.analysis import dp_event @@ -68,7 +67,8 @@ class DpEventBuilderTest(absltest.TestCase): expected_event = dp_event.ComposedDpEvent([ _gaussian_event, dp_event.SelfComposedDpEvent(composed_event, 3), - dp_event.SelfComposedDpEvent(_poisson_event, 2)]) + dp_event.SelfComposedDpEvent(_poisson_event, 2) + ]) self.assertEqual(expected_event, builder.build()) diff --git a/tensorflow_privacy/privacy/analysis/tensor_buffer.py b/tensorflow_privacy/privacy/analysis/tensor_buffer.py index 5a5d272..9e8ab01 100644 --- a/tensorflow_privacy/privacy/analysis/tensor_buffer.py +++ b/tensorflow_privacy/privacy/analysis/tensor_buffer.py @@ -84,13 +84,13 @@ class TensorBuffer(object): dtype=self._dtype, initializer=new_buffer, trainable=False) - return self._buffer, tf.assign( - self._capacity, tf.multiply(self._capacity, 2)) + return self._buffer, tf.assign(self._capacity, + tf.multiply(self._capacity, 2)) else: return tf.assign( self._buffer, new_buffer, - validate_shape=False), tf.assign( - self._capacity, tf.multiply(self._capacity, 2)) + validate_shape=False), tf.assign(self._capacity, + tf.multiply(self._capacity, 2)) update_buffer, update_capacity = tf.cond( pred=tf.equal(self._current_size, self._capacity), diff --git a/tensorflow_privacy/privacy/analysis/tensor_buffer_eager_test.py b/tensorflow_privacy/privacy/analysis/tensor_buffer_eager_test.py index 22786a6..0ed5e70 100644 --- a/tensorflow_privacy/privacy/analysis/tensor_buffer_eager_test.py +++ b/tensorflow_privacy/privacy/analysis/tensor_buffer_eager_test.py @@ -44,9 +44,8 @@ class TensorBufferTest(tf.test.TestCase): my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') - with self.assertRaisesRegex( - tf.errors.InvalidArgumentError, - 'Appending value of inconsistent shape.'): + with self.assertRaisesRegex(tf.errors.InvalidArgumentError, + 'Appending value of inconsistent shape.'): my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32)) def test_resize(self): diff --git a/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant_test.py b/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant_test.py index b60e06c..3cb2926 100644 --- a/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant_test.py +++ b/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant_test.py @@ -187,5 +187,6 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase): tree_aggregation_accountant._compute_gaussian_zcdp( sigma, sum_sensitivity_square)) + if __name__ == '__main__': tf.test.main() diff --git a/tensorflow_privacy/privacy/bolt_on/losses.py b/tensorflow_privacy/privacy/bolt_on/losses.py index 98ec2c4..fe02e2d 100644 --- a/tensorflow_privacy/privacy/bolt_on/losses.py +++ b/tensorflow_privacy/privacy/bolt_on/losses.py @@ -68,7 +68,6 @@ class StrongConvexMixin: Args: class_weight: class weights used - Returns: L """ raise NotImplementedError("lipchitz constant not implemented for " @@ -126,13 +125,10 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin): if reg_lambda <= 0: raise ValueError("reg lambda: {0} must be positive".format(reg_lambda)) if radius_constant <= 0: - raise ValueError("radius_constant: {0}, should be >= 0".format( - radius_constant - )) + raise ValueError( + "radius_constant: {0}, should be >= 0".format(radius_constant)) if delta <= 0: - raise ValueError("delta: {0}, should be >= 0".format( - delta - )) + raise ValueError("delta: {0}, should be >= 0".format(delta)) self.C = c_arg # pylint: disable=invalid-name self.delta = delta self.radius_constant = radius_constant @@ -172,9 +168,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin): def beta(self, class_weight): """See super class.""" max_class_weight = self.max_class_weight(class_weight, self.dtype) - delta = _ops.convert_to_tensor_v2(self.delta, - dtype=self.dtype - ) + delta = _ops.convert_to_tensor_v2(self.delta, dtype=self.dtype) return self.C * max_class_weight / (delta * tf.constant(2, dtype=self.dtype)) + \ self.reg_lambda @@ -197,13 +191,11 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin): The L2 regularizer layer for this loss function, with regularizer constant set to half the 0.5 * reg_lambda. """ - return L1L2(l2=self.reg_lambda/2) + return L1L2(l2=self.reg_lambda / 2) -class StrongConvexBinaryCrossentropy( - losses.BinaryCrossentropy, - StrongConvexMixin -): +class StrongConvexBinaryCrossentropy(losses.BinaryCrossentropy, + StrongConvexMixin): """Strongly Convex BinaryCrossentropy loss using l2 weight regularization.""" def __init__(self, @@ -222,10 +214,10 @@ class StrongConvexBinaryCrossentropy( radius_constant: constant defining the length of the radius from_logits: True if the input are unscaled logits. False if they are already scaled. - label_smoothing: amount of smoothing to perform on labels - relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x). Note, the - impact of this parameter's effect on privacy is not known and thus the - default should be used. + label_smoothing: amount of smoothing to perform on labels relaxation of + trust in labels, e.g. (1 -> 1-x, 0 -> 0+x). Note, the impact of this + parameter's effect on privacy is not known and thus the default should + be used. reduction: reduction type to use. See super class dtype: tf datatype to use for tensor conversions. """ @@ -239,9 +231,8 @@ class StrongConvexBinaryCrossentropy( if c_arg <= 0: raise ValueError("c: {0}, should be >= 0".format(c_arg)) if radius_constant <= 0: - raise ValueError("radius_constant: {0}, should be >= 0".format( - radius_constant - )) + raise ValueError( + "radius_constant: {0}, should be >= 0".format(radius_constant)) self.dtype = dtype self.C = c_arg # pylint: disable=invalid-name self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype) @@ -294,4 +285,4 @@ class StrongConvexBinaryCrossentropy( The L2 regularizer layer for this loss function, with regularizer constant set to half the 0.5 * reg_lambda. """ - return L1L2(l2=self.reg_lambda/2) + return L1L2(l2=self.reg_lambda / 2) diff --git a/tensorflow_privacy/privacy/bolt_on/losses_test.py b/tensorflow_privacy/privacy/bolt_on/losses_test.py index bf99245..5a94b83 100644 --- a/tensorflow_privacy/privacy/bolt_on/losses_test.py +++ b/tensorflow_privacy/privacy/bolt_on/losses_test.py @@ -40,21 +40,29 @@ def captured_output(): class StrongConvexMixinTests(keras_parameterized.TestCase): """Tests for the StrongConvexMixin.""" - @parameterized.named_parameters([ - {'testcase_name': 'beta not implemented', - 'fn': 'beta', - 'args': [1]}, - {'testcase_name': 'gamma not implemented', - 'fn': 'gamma', - 'args': []}, - {'testcase_name': 'lipchitz not implemented', - 'fn': 'lipchitz_constant', - 'args': [1]}, - {'testcase_name': 'radius not implemented', - 'fn': 'radius', - 'args': []}, - ]) + @parameterized.named_parameters([ + { + 'testcase_name': 'beta not implemented', + 'fn': 'beta', + 'args': [1] + }, + { + 'testcase_name': 'gamma not implemented', + 'fn': 'gamma', + 'args': [] + }, + { + 'testcase_name': 'lipchitz not implemented', + 'fn': 'lipchitz_constant', + 'args': [1] + }, + { + 'testcase_name': 'radius not implemented', + 'fn': 'radius', + 'args': [] + }, + ]) def test_not_implemented(self, fn, args): """Test that the given fn's are not implemented on the mixin. @@ -67,9 +75,11 @@ class StrongConvexMixinTests(keras_parameterized.TestCase): getattr(loss, fn, None)(*args) @parameterized.named_parameters([ - {'testcase_name': 'radius not implemented', - 'fn': 'kernel_regularizer', - 'args': []}, + { + 'testcase_name': 'radius not implemented', + 'fn': 'kernel_regularizer', + 'args': [] + }, ]) def test_return_none(self, fn, args): """Test that fn of Mixin returns None. @@ -87,10 +97,11 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase): """tests for BinaryCrossesntropy StrongConvex loss.""" @parameterized.named_parameters([ - {'testcase_name': 'normal', - 'reg_lambda': 1, - 'C': 1, - 'radius_constant': 1 + { + 'testcase_name': 'normal', + 'reg_lambda': 1, + 'C': 1, + 'radius_constant': 1 }, # pylint: disable=invalid-name ]) def test_init_params(self, reg_lambda, C, radius_constant): @@ -106,24 +117,29 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase): self.assertIsInstance(loss, StrongConvexBinaryCrossentropy) @parameterized.named_parameters([ - {'testcase_name': 'negative c', - 'reg_lambda': 1, - 'C': -1, - 'radius_constant': 1 + { + 'testcase_name': 'negative c', + 'reg_lambda': 1, + 'C': -1, + 'radius_constant': 1 }, - {'testcase_name': 'negative radius', - 'reg_lambda': 1, - 'C': 1, - 'radius_constant': -1 + { + 'testcase_name': 'negative radius', + 'reg_lambda': 1, + 'C': 1, + 'radius_constant': -1 }, - {'testcase_name': 'negative lambda', - 'reg_lambda': -1, - 'C': 1, - 'radius_constant': 1 + { + 'testcase_name': 'negative lambda', + 'reg_lambda': -1, + 'C': 1, + 'radius_constant': 1 }, # pylint: disable=invalid-name ]) def test_bad_init_params(self, reg_lambda, C, radius_constant): - """Test invalid domain for given params. Should return ValueError. + """Test invalid domain for given params. + + Should return ValueError. Args: reg_lambda: initialization value for reg_lambda arg @@ -137,25 +153,29 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase): @test_util.run_all_in_graph_and_eager_modes @parameterized.named_parameters([ # [] for compatibility with tensorflow loss calculation - {'testcase_name': 'both positive', - 'logits': [10000], - 'y_true': [1], - 'result': 0, + { + 'testcase_name': 'both positive', + 'logits': [10000], + 'y_true': [1], + 'result': 0, }, - {'testcase_name': 'positive gradient negative logits', - 'logits': [-10000], - 'y_true': [1], - 'result': 10000, + { + 'testcase_name': 'positive gradient negative logits', + 'logits': [-10000], + 'y_true': [1], + 'result': 10000, }, - {'testcase_name': 'positivee gradient positive logits', - 'logits': [10000], - 'y_true': [0], - 'result': 10000, + { + 'testcase_name': 'positivee gradient positive logits', + 'logits': [10000], + 'y_true': [0], + 'result': 10000, }, - {'testcase_name': 'both negative', - 'logits': [-10000], - 'y_true': [0], - 'result': 0 + { + 'testcase_name': 'both negative', + 'logits': [-10000], + 'y_true': [0], + 'result': 0 }, ]) def test_calculation(self, logits, y_true, result): @@ -173,29 +193,33 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase): self.assertEqual(loss.numpy(), result) @parameterized.named_parameters([ - {'testcase_name': 'beta', - 'init_args': [1, 1, 1], - 'fn': 'beta', - 'args': [1], - 'result': tf.constant(2, dtype=tf.float32) + { + 'testcase_name': 'beta', + 'init_args': [1, 1, 1], + 'fn': 'beta', + 'args': [1], + 'result': tf.constant(2, dtype=tf.float32) }, - {'testcase_name': 'gamma', - 'fn': 'gamma', - 'init_args': [1, 1, 1], - 'args': [], - 'result': tf.constant(1, dtype=tf.float32), + { + 'testcase_name': 'gamma', + 'fn': 'gamma', + 'init_args': [1, 1, 1], + 'args': [], + 'result': tf.constant(1, dtype=tf.float32), }, - {'testcase_name': 'lipchitz constant', - 'fn': 'lipchitz_constant', - 'init_args': [1, 1, 1], - 'args': [1], - 'result': tf.constant(2, dtype=tf.float32), + { + 'testcase_name': 'lipchitz constant', + 'fn': 'lipchitz_constant', + 'init_args': [1, 1, 1], + 'args': [1], + 'result': tf.constant(2, dtype=tf.float32), }, - {'testcase_name': 'kernel regularizer', - 'fn': 'kernel_regularizer', - 'init_args': [1, 1, 1], - 'args': [], - 'result': L1L2(l2=0.5), + { + 'testcase_name': 'kernel regularizer', + 'fn': 'kernel_regularizer', + 'init_args': [1, 1, 1], + 'args': [], + 'result': L1L2(l2=0.5), }, ]) def test_fns(self, init_args, fn, args, result): @@ -218,11 +242,12 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase): self.assertEqual(expected, result) @parameterized.named_parameters([ - {'testcase_name': 'label_smoothing', - 'init_args': [1, 1, 1, True, 0.1], - 'fn': None, - 'args': None, - 'print_res': 'The impact of label smoothing on privacy is unknown.' + { + 'testcase_name': 'label_smoothing', + 'init_args': [1, 1, 1, True, 0.1], + 'fn': None, + 'args': None, + 'print_res': 'The impact of label smoothing on privacy is unknown.' }, ]) def test_prints(self, init_args, fn, args, print_res): @@ -245,11 +270,12 @@ class HuberTests(keras_parameterized.TestCase): """tests for BinaryCrossesntropy StrongConvex loss.""" @parameterized.named_parameters([ - {'testcase_name': 'normal', - 'reg_lambda': 1, - 'c': 1, - 'radius_constant': 1, - 'delta': 1, + { + 'testcase_name': 'normal', + 'reg_lambda': 1, + 'c': 1, + 'radius_constant': 1, + 'delta': 1, }, ]) def test_init_params(self, reg_lambda, c, radius_constant, delta): @@ -266,33 +292,39 @@ class HuberTests(keras_parameterized.TestCase): self.assertIsInstance(loss, StrongConvexHuber) @parameterized.named_parameters([ - {'testcase_name': 'negative c', - 'reg_lambda': 1, - 'c': -1, - 'radius_constant': 1, - 'delta': 1 + { + 'testcase_name': 'negative c', + 'reg_lambda': 1, + 'c': -1, + 'radius_constant': 1, + 'delta': 1 }, - {'testcase_name': 'negative radius', - 'reg_lambda': 1, - 'c': 1, - 'radius_constant': -1, - 'delta': 1 + { + 'testcase_name': 'negative radius', + 'reg_lambda': 1, + 'c': 1, + 'radius_constant': -1, + 'delta': 1 }, - {'testcase_name': 'negative lambda', - 'reg_lambda': -1, - 'c': 1, - 'radius_constant': 1, - 'delta': 1 + { + 'testcase_name': 'negative lambda', + 'reg_lambda': -1, + 'c': 1, + 'radius_constant': 1, + 'delta': 1 }, - {'testcase_name': 'negative delta', - 'reg_lambda': 1, - 'c': 1, - 'radius_constant': 1, - 'delta': -1 + { + 'testcase_name': 'negative delta', + 'reg_lambda': 1, + 'c': 1, + 'radius_constant': 1, + 'delta': -1 }, ]) def test_bad_init_params(self, reg_lambda, c, radius_constant, delta): - """Test invalid domain for given params. Should return ValueError. + """Test invalid domain for given params. + + Should return ValueError. Args: reg_lambda: initialization value for reg_lambda arg @@ -307,59 +339,68 @@ class HuberTests(keras_parameterized.TestCase): # test the bounds and test varied delta's @test_util.run_all_in_graph_and_eager_modes @parameterized.named_parameters([ - {'testcase_name': 'delta=1,y_true=1 z>1+h decision boundary', - 'logits': 2.1, - 'y_true': 1, - 'delta': 1, - 'result': 0, + { + 'testcase_name': 'delta=1,y_true=1 z>1+h decision boundary', + 'logits': 2.1, + 'y_true': 1, + 'delta': 1, + 'result': 0, }, - {'testcase_name': 'delta=1,y_true=1 z<1+h decision boundary', - 'logits': 1.9, - 'y_true': 1, - 'delta': 1, - 'result': 0.01*0.25, + { + 'testcase_name': 'delta=1,y_true=1 z<1+h decision boundary', + 'logits': 1.9, + 'y_true': 1, + 'delta': 1, + 'result': 0.01 * 0.25, }, - {'testcase_name': 'delta=1,y_true=1 1-z< h decision boundary', - 'logits': 0.1, - 'y_true': 1, - 'delta': 1, - 'result': 1.9**2 * 0.25, + { + 'testcase_name': 'delta=1,y_true=1 1-z< h decision boundary', + 'logits': 0.1, + 'y_true': 1, + 'delta': 1, + 'result': 1.9**2 * 0.25, }, - {'testcase_name': 'delta=1,y_true=1 z < 1-h decision boundary', - 'logits': -0.1, - 'y_true': 1, - 'delta': 1, - 'result': 1.1, + { + 'testcase_name': 'delta=1,y_true=1 z < 1-h decision boundary', + 'logits': -0.1, + 'y_true': 1, + 'delta': 1, + 'result': 1.1, }, - {'testcase_name': 'delta=2,y_true=1 z>1+h decision boundary', - 'logits': 3.1, - 'y_true': 1, - 'delta': 2, - 'result': 0, + { + 'testcase_name': 'delta=2,y_true=1 z>1+h decision boundary', + 'logits': 3.1, + 'y_true': 1, + 'delta': 2, + 'result': 0, }, - {'testcase_name': 'delta=2,y_true=1 z<1+h decision boundary', - 'logits': 2.9, - 'y_true': 1, - 'delta': 2, - 'result': 0.01*0.125, + { + 'testcase_name': 'delta=2,y_true=1 z<1+h decision boundary', + 'logits': 2.9, + 'y_true': 1, + 'delta': 2, + 'result': 0.01 * 0.125, }, - {'testcase_name': 'delta=2,y_true=1 1-z < h decision boundary', - 'logits': 1.1, - 'y_true': 1, - 'delta': 2, - 'result': 1.9**2 * 0.125, + { + 'testcase_name': 'delta=2,y_true=1 1-z < h decision boundary', + 'logits': 1.1, + 'y_true': 1, + 'delta': 2, + 'result': 1.9**2 * 0.125, }, - {'testcase_name': 'delta=2,y_true=1 z < 1-h decision boundary', - 'logits': -1.1, - 'y_true': 1, - 'delta': 2, - 'result': 2.1, + { + 'testcase_name': 'delta=2,y_true=1 z < 1-h decision boundary', + 'logits': -1.1, + 'y_true': 1, + 'delta': 2, + 'result': 2.1, }, - {'testcase_name': 'delta=1,y_true=-1 z>1+h decision boundary', - 'logits': -2.1, - 'y_true': -1, - 'delta': 1, - 'result': 0, + { + 'testcase_name': 'delta=1,y_true=-1 z>1+h decision boundary', + 'logits': -2.1, + 'y_true': -1, + 'delta': 1, + 'result': 0, }, ]) def test_calculation(self, logits, y_true, delta, result): @@ -378,29 +419,33 @@ class HuberTests(keras_parameterized.TestCase): self.assertAllClose(loss.numpy(), result) @parameterized.named_parameters([ - {'testcase_name': 'beta', - 'init_args': [1, 1, 1, 1], - 'fn': 'beta', - 'args': [1], - 'result': tf.Variable(1.5, dtype=tf.float32) + { + 'testcase_name': 'beta', + 'init_args': [1, 1, 1, 1], + 'fn': 'beta', + 'args': [1], + 'result': tf.Variable(1.5, dtype=tf.float32) }, - {'testcase_name': 'gamma', - 'fn': 'gamma', - 'init_args': [1, 1, 1, 1], - 'args': [], - 'result': tf.Variable(1, dtype=tf.float32), + { + 'testcase_name': 'gamma', + 'fn': 'gamma', + 'init_args': [1, 1, 1, 1], + 'args': [], + 'result': tf.Variable(1, dtype=tf.float32), }, - {'testcase_name': 'lipchitz constant', - 'fn': 'lipchitz_constant', - 'init_args': [1, 1, 1, 1], - 'args': [1], - 'result': tf.Variable(2, dtype=tf.float32), + { + 'testcase_name': 'lipchitz constant', + 'fn': 'lipchitz_constant', + 'init_args': [1, 1, 1, 1], + 'args': [1], + 'result': tf.Variable(2, dtype=tf.float32), }, - {'testcase_name': 'kernel regularizer', - 'fn': 'kernel_regularizer', - 'init_args': [1, 1, 1, 1], - 'args': [], - 'result': L1L2(l2=0.5), + { + 'testcase_name': 'kernel regularizer', + 'fn': 'kernel_regularizer', + 'init_args': [1, 1, 1, 1], + 'args': [], + 'result': L1L2(l2=0.5), }, ]) def test_fns(self, init_args, fn, args, result): diff --git a/tensorflow_privacy/privacy/bolt_on/models.py b/tensorflow_privacy/privacy/bolt_on/models.py index 0d55897..49bf466 100644 --- a/tensorflow_privacy/privacy/bolt_on/models.py +++ b/tensorflow_privacy/privacy/bolt_on/models.py @@ -38,10 +38,7 @@ class BoltOnModel(Model): # pylint: disable=abstract-method Descent-based Analytics by Xi Wu et al. """ - def __init__(self, - n_outputs, - seed=1, - dtype=tf.float32): + def __init__(self, n_outputs, seed=1, dtype=tf.float32): """Private constructor. Args: @@ -51,9 +48,8 @@ class BoltOnModel(Model): # pylint: disable=abstract-method """ super(BoltOnModel, self).__init__(name='bolton', dynamic=False) if n_outputs <= 0: - raise ValueError('n_outputs = {0} is not valid. Must be > 0.'.format( - n_outputs - )) + raise ValueError( + 'n_outputs = {0} is not valid. Must be > 0.'.format(n_outputs)) self.n_outputs = n_outputs self.seed = seed self._layers_instantiated = False @@ -76,11 +72,13 @@ class BoltOnModel(Model): # pylint: disable=abstract-method loss, kernel_initializer=tf.initializers.GlorotUniform, **kwargs): # pylint: disable=arguments-differ - """See super class. Default optimizer used in BoltOn method is SGD. + """See super class. + + Default optimizer used in BoltOn method is SGD. Args: - optimizer: The optimizer to use. This will be automatically wrapped - with the BoltOn Optimizer. + optimizer: The optimizer to use. This will be automatically wrapped with + the BoltOn Optimizer. loss: The loss function to use. Must be a StrongConvex loss (extend the StrongConvexMixin). kernel_initializer: The kernel initializer to use for the single layer. @@ -128,10 +126,10 @@ class BoltOnModel(Model): # pylint: disable=abstract-method y: Labels to fit on, see super. batch_size: The batch size to use for training, see super. class_weight: the class weights to be used. Can be a scalar or 1D tensor - whose dim == n_classes. + whose dim == n_classes. n_samples: the number of individual samples in x. epsilon: privacy parameter, which trades off between utility an privacy. - See the bolt-on paper for more description. + See the bolt-on paper for more description. noise_distribution: the distribution to pull noise from. steps_per_epoch: **kwargs: kwargs to keras Model.fit. See super. @@ -152,8 +150,7 @@ class BoltOnModel(Model): # pylint: disable=abstract-method else: data_size = None batch_size_ = self._validate_or_infer_batch_size(batch_size, - steps_per_epoch, - x) + steps_per_epoch, x) if batch_size_ is None: batch_size_ = 32 # inferring batch_size to be passed to optimizer. batch_size must remain its @@ -164,18 +161,15 @@ class BoltOnModel(Model): # pylint: disable=abstract-method if data_size is None: raise ValueError('Could not infer the number of samples. Please pass ' 'this in using n_samples.') - with self.optimizer(noise_distribution, - epsilon, - self.layers, - class_weight_, - data_size, - batch_size_) as _: - out = super(BoltOnModel, self).fit(x=x, - y=y, - batch_size=batch_size, - class_weight=class_weight, - steps_per_epoch=steps_per_epoch, - **kwargs) + with self.optimizer(noise_distribution, epsilon, self.layers, class_weight_, + data_size, batch_size_) as _: + out = super(BoltOnModel, self).fit( + x=x, + y=y, + batch_size=batch_size, + class_weight=class_weight, + steps_per_epoch=steps_per_epoch, + **kwargs) return out def fit_generator(self, @@ -194,10 +188,10 @@ class BoltOnModel(Model): # pylint: disable=abstract-method Args: generator: Inputs generator following Tensorflow guidelines, see super. class_weight: the class weights to be used. Can be a scalar or 1D tensor - whose dim == n_classes. + whose dim == n_classes. noise_distribution: the distribution to get noise from. epsilon: privacy parameter, which trades off utility and privacy. See - BoltOn paper for more description. + BoltOn paper for more description. n_samples: number of individual samples in x steps_per_epoch: Number of steps per training epoch, see super. **kwargs: **kwargs @@ -222,12 +216,8 @@ class BoltOnModel(Model): # pylint: disable=abstract-method generator) if batch_size is None: batch_size = 32 - with self.optimizer(noise_distribution, - epsilon, - self.layers, - class_weight, - data_size, - batch_size) as _: + with self.optimizer(noise_distribution, epsilon, self.layers, class_weight, + data_size, batch_size) as _: out = super(BoltOnModel, self).fit_generator( generator, class_weight=class_weight, @@ -243,10 +233,10 @@ class BoltOnModel(Model): # pylint: disable=abstract-method Args: class_weights: str specifying type, array giving weights, or None. - class_counts: If class_weights is not None, then an array of - the number of samples for each class - num_classes: If class_weights is not None, then the number of - classes. + class_counts: If class_weights is not None, then an array of the number of + samples for each class + num_classes: If class_weights is not None, then the number of classes. + Returns: class_weights as 1D tensor, to be passed to model's fit method. """ @@ -259,14 +249,12 @@ class BoltOnModel(Model): # pylint: disable=abstract-method raise ValueError('Detected string class_weights with ' 'value: {0}, which is not one of {1}.' 'Please select a valid class_weight type' - 'or pass an array'.format(class_weights, - class_keys)) + 'or pass an array'.format(class_weights, class_keys)) if class_counts is None: raise ValueError('Class counts must be provided if using ' 'class_weights=%s' % class_weights) - class_counts_shape = tf.Variable(class_counts, - trainable=False, - dtype=self._dtype).shape + class_counts_shape = tf.Variable( + class_counts, trainable=False, dtype=self._dtype).shape if len(class_counts_shape) != 1: raise ValueError('class counts must be a 1D array.' 'Detected: {0}'.format(class_counts_shape)) @@ -282,9 +270,8 @@ class BoltOnModel(Model): # pylint: disable=abstract-method class_weights = 1 elif is_string and class_weights == 'balanced': num_samples = sum(class_counts) - weighted_counts = tf.dtypes.cast(tf.math.multiply(num_classes, - class_counts), - self._dtype) + weighted_counts = tf.dtypes.cast( + tf.math.multiply(num_classes, class_counts), self._dtype) class_weights = tf.Variable(num_samples, dtype=self._dtype) / \ tf.Variable(weighted_counts, dtype=self._dtype) else: @@ -293,8 +280,6 @@ class BoltOnModel(Model): # pylint: disable=abstract-method raise ValueError('Detected class_weights shape: {0} instead of ' '1D array'.format(class_weights.shape)) if class_weights.shape[0] != num_classes: - raise ValueError( - 'Detected array length: {0} instead of: {1}'.format( - class_weights.shape[0], - num_classes)) + raise ValueError('Detected array length: {0} instead of: {1}'.format( + class_weights.shape[0], num_classes)) return class_weights diff --git a/tensorflow_privacy/privacy/bolt_on/models_test.py b/tensorflow_privacy/privacy/bolt_on/models_test.py index 54d3a08..566741c 100644 --- a/tensorflow_privacy/privacy/bolt_on/models_test.py +++ b/tensorflow_privacy/privacy/bolt_on/models_test.py @@ -73,9 +73,7 @@ class TestLoss(losses.Loss, StrongConvexMixin): def call(self, y_true, y_pred): """Loss function that is minimized at the mean of the input points.""" return 0.5 * tf.reduce_sum( - tf.math.squared_difference(y_true, y_pred), - axis=1 - ) + tf.math.squared_difference(y_true, y_pred), axis=1) def max_class_weight(self, class_weight): """the maximum weighting in class weights (max value) as a scalar tensor. @@ -125,11 +123,13 @@ class InitTests(keras_parameterized.TestCase): """Tests for keras model initialization.""" @parameterized.named_parameters([ - {'testcase_name': 'normal', - 'n_outputs': 1, + { + 'testcase_name': 'normal', + 'n_outputs': 1, }, - {'testcase_name': 'many outputs', - 'n_outputs': 100, + { + 'testcase_name': 'many outputs', + 'n_outputs': 100, }, ]) def test_init_params(self, n_outputs): @@ -143,8 +143,9 @@ class InitTests(keras_parameterized.TestCase): self.assertIsInstance(clf, models.BoltOnModel) @parameterized.named_parameters([ - {'testcase_name': 'invalid n_outputs', - 'n_outputs': -1, + { + 'testcase_name': 'invalid n_outputs', + 'n_outputs': -1, }, ]) def test_bad_init_params(self, n_outputs): @@ -158,15 +159,17 @@ class InitTests(keras_parameterized.TestCase): models.BoltOnModel(n_outputs) @parameterized.named_parameters([ - {'testcase_name': 'string compile', - 'n_outputs': 1, - 'loss': TestLoss(1, 1, 1), - 'optimizer': 'adam', + { + 'testcase_name': 'string compile', + 'n_outputs': 1, + 'loss': TestLoss(1, 1, 1), + 'optimizer': 'adam', }, - {'testcase_name': 'test compile', - 'n_outputs': 100, - 'loss': TestLoss(1, 1, 1), - 'optimizer': TestOptimizer(), + { + 'testcase_name': 'test compile', + 'n_outputs': 100, + 'loss': TestLoss(1, 1, 1), + 'optimizer': TestOptimizer(), }, ]) def test_compile(self, n_outputs, loss, optimizer): @@ -183,18 +186,17 @@ class InitTests(keras_parameterized.TestCase): clf.compile(optimizer, loss) self.assertEqual(clf.loss, loss) - @parameterized.named_parameters([ - {'testcase_name': 'Not strong loss', - 'n_outputs': 1, - 'loss': losses.BinaryCrossentropy(), - 'optimizer': 'adam', - }, - {'testcase_name': 'Not valid optimizer', - 'n_outputs': 1, - 'loss': TestLoss(1, 1, 1), - 'optimizer': 'ada', - } - ]) + @parameterized.named_parameters([{ + 'testcase_name': 'Not strong loss', + 'n_outputs': 1, + 'loss': losses.BinaryCrossentropy(), + 'optimizer': 'adam', + }, { + 'testcase_name': 'Not valid optimizer', + 'n_outputs': 1, + 'loss': TestLoss(1, 1, 1), + 'optimizer': 'ada', + }]) def test_bad_compile(self, n_outputs, loss, optimizer): """test bad compilations of BoltOnModel that should raise errors. @@ -231,17 +233,11 @@ def _cat_dataset(n_samples, input_dim, n_classes, batch_size, generator=False): x_stack = [] y_stack = [] for i_class in range(n_classes): - x_stack.append( - tf.constant(1*i_class, tf.float32, (n_samples, input_dim)) - ) - y_stack.append( - tf.constant(i_class, tf.float32, (n_samples, n_classes)) - ) + x_stack.append(tf.constant(1 * i_class, tf.float32, (n_samples, input_dim))) + y_stack.append(tf.constant(i_class, tf.float32, (n_samples, n_classes))) x_set, y_set = tf.stack(x_stack), tf.stack(y_stack) if generator: - dataset = tf.data.Dataset.from_tensor_slices( - (x_set, y_set) - ) + dataset = tf.data.Dataset.from_tensor_slices((x_set, y_set)) dataset = dataset.batch(batch_size=batch_size) return dataset return x_set, y_set @@ -266,8 +262,8 @@ def _do_fit(n_samples, epsilon: privacy parameter generator: True to create a generator, False to use an iterator batch_size: batch_size to use - reset_n_samples: True to set _samples to None prior to fitting. - False does nothing + reset_n_samples: True to set _samples to None prior to fitting. False does + nothing optimizer: instance of TestOptimizer loss: instance of TestLoss distribution: distribution to get noise from. @@ -279,37 +275,30 @@ def _do_fit(n_samples, clf.compile(optimizer, loss) if generator: x = _cat_dataset( - n_samples, - input_dim, - n_outputs, - batch_size, - generator=generator - ) + n_samples, input_dim, n_outputs, batch_size, generator=generator) y = None # x = x.batch(batch_size) - x = x.shuffle(n_samples//2) + x = x.shuffle(n_samples // 2) batch_size = None if reset_n_samples: n_samples = None - clf.fit_generator(x, - n_samples=n_samples, - noise_distribution=distribution, - epsilon=epsilon) + clf.fit_generator( + x, + n_samples=n_samples, + noise_distribution=distribution, + epsilon=epsilon) else: x, y = _cat_dataset( - n_samples, - input_dim, - n_outputs, - batch_size, - generator=generator) + n_samples, input_dim, n_outputs, batch_size, generator=generator) if reset_n_samples: n_samples = None - clf.fit(x, - y, - batch_size=batch_size, - n_samples=n_samples, - noise_distribution=distribution, - epsilon=epsilon) + clf.fit( + x, + y, + batch_size=batch_size, + n_samples=n_samples, + noise_distribution=distribution, + epsilon=epsilon) return clf @@ -318,21 +307,25 @@ class FitTests(keras_parameterized.TestCase): # @test_util.run_all_in_graph_and_eager_modes @parameterized.named_parameters([ - {'testcase_name': 'iterator fit', - 'generator': False, - 'reset_n_samples': True, + { + 'testcase_name': 'iterator fit', + 'generator': False, + 'reset_n_samples': True, }, - {'testcase_name': 'iterator fit no samples', - 'generator': False, - 'reset_n_samples': True, + { + 'testcase_name': 'iterator fit no samples', + 'generator': False, + 'reset_n_samples': True, }, - {'testcase_name': 'generator fit', - 'generator': True, - 'reset_n_samples': False, + { + 'testcase_name': 'generator fit', + 'generator': True, + 'reset_n_samples': False, }, - {'testcase_name': 'with callbacks', - 'generator': True, - 'reset_n_samples': False, + { + 'testcase_name': 'with callbacks', + 'generator': True, + 'reset_n_samples': False, }, ]) def test_fit(self, generator, reset_n_samples): @@ -363,8 +356,9 @@ class FitTests(keras_parameterized.TestCase): self.assertEqual(hasattr(clf, 'layers'), True) @parameterized.named_parameters([ - {'testcase_name': 'generator fit', - 'generator': True, + { + 'testcase_name': 'generator fit', + 'generator': True, }, ]) def test_fit_gen(self, generator): @@ -382,27 +376,24 @@ class FitTests(keras_parameterized.TestCase): clf = models.BoltOnModel(n_classes) clf.compile(optimizer, loss) x = _cat_dataset( - n_samples, - input_dim, - n_classes, - batch_size, - generator=generator - ) + n_samples, input_dim, n_classes, batch_size, generator=generator) x = x.batch(batch_size) x = x.shuffle(n_samples // 2) clf.fit_generator(x, n_samples=n_samples) self.assertEqual(hasattr(clf, 'layers'), True) @parameterized.named_parameters([ - {'testcase_name': 'iterator no n_samples', - 'generator': True, - 'reset_n_samples': True, - 'distribution': 'laplace' + { + 'testcase_name': 'iterator no n_samples', + 'generator': True, + 'reset_n_samples': True, + 'distribution': 'laplace' }, - {'testcase_name': 'invalid distribution', - 'generator': True, - 'reset_n_samples': True, - 'distribution': 'not_valid' + { + 'testcase_name': 'invalid distribution', + 'generator': True, + 'reset_n_samples': True, + 'distribution': 'not_valid' }, ]) def test_bad_fit(self, generator, reset_n_samples, distribution): @@ -422,40 +413,33 @@ class FitTests(keras_parameterized.TestCase): epsilon = 1 batch_size = 1 n_samples = 10 - _do_fit( - n_samples, - input_dim, - n_classes, - epsilon, - generator, - batch_size, - reset_n_samples, - optimizer, - loss, - distribution - ) + _do_fit(n_samples, input_dim, n_classes, epsilon, generator, batch_size, + reset_n_samples, optimizer, loss, distribution) @parameterized.named_parameters([ - {'testcase_name': 'None class_weights', - 'class_weights': None, - 'class_counts': None, - 'num_classes': None, - 'result': 1}, - {'testcase_name': 'class weights array', - 'class_weights': [1, 1], - 'class_counts': [1, 1], - 'num_classes': 2, - 'result': [1, 1]}, - {'testcase_name': 'class weights balanced', - 'class_weights': 'balanced', - 'class_counts': [1, 1], - 'num_classes': 2, - 'result': [1, 1]}, + { + 'testcase_name': 'None class_weights', + 'class_weights': None, + 'class_counts': None, + 'num_classes': None, + 'result': 1 + }, + { + 'testcase_name': 'class weights array', + 'class_weights': [1, 1], + 'class_counts': [1, 1], + 'num_classes': 2, + 'result': [1, 1] + }, + { + 'testcase_name': 'class weights balanced', + 'class_weights': 'balanced', + 'class_counts': [1, 1], + 'num_classes': 2, + 'result': [1, 1] + }, ]) - def test_class_calculate(self, - class_weights, - class_counts, - num_classes, + def test_class_calculate(self, class_weights, class_counts, num_classes, result): """Tests the BOltonModel calculate_class_weights method. @@ -466,61 +450,68 @@ class FitTests(keras_parameterized.TestCase): result: expected result """ clf = models.BoltOnModel(1, 1) - expected = clf.calculate_class_weights(class_weights, - class_counts, + expected = clf.calculate_class_weights(class_weights, class_counts, num_classes) if hasattr(expected, 'numpy'): expected = expected.numpy() - self.assertAllEqual( - expected, - result - ) - @parameterized.named_parameters([ - {'testcase_name': 'class weight not valid str', - 'class_weights': 'not_valid', - 'class_counts': 1, - 'num_classes': 1, - 'err_msg': 'Detected string class_weights with value: not_valid'}, - {'testcase_name': 'no class counts', - 'class_weights': 'balanced', - 'class_counts': None, - 'num_classes': 1, - 'err_msg': 'Class counts must be provided if ' - 'using class_weights=balanced'}, - {'testcase_name': 'no num classes', - 'class_weights': 'balanced', - 'class_counts': [1], - 'num_classes': None, - 'err_msg': 'num_classes must be provided if ' - 'using class_weights=balanced'}, - {'testcase_name': 'class counts not array', - 'class_weights': 'balanced', - 'class_counts': 1, - 'num_classes': None, - 'err_msg': 'class counts must be a 1D array.'}, - {'testcase_name': 'class counts array, no num classes', - 'class_weights': [1], - 'class_counts': None, - 'num_classes': None, - 'err_msg': 'You must pass a value for num_classes if ' - 'creating an array of class_weights'}, - {'testcase_name': 'class counts array, improper shape', - 'class_weights': [[1], [1]], - 'class_counts': None, - 'num_classes': 2, - 'err_msg': 'Detected class_weights shape'}, - {'testcase_name': 'class counts array, wrong number classes', - 'class_weights': [1, 1, 1], - 'class_counts': None, - 'num_classes': 2, - 'err_msg': 'Detected array length:'}, - ]) + self.assertAllEqual(expected, result) - def test_class_errors(self, - class_weights, - class_counts, - num_classes, + @parameterized.named_parameters([ + { + 'testcase_name': 'class weight not valid str', + 'class_weights': 'not_valid', + 'class_counts': 1, + 'num_classes': 1, + 'err_msg': 'Detected string class_weights with value: not_valid' + }, + { + 'testcase_name': 'no class counts', + 'class_weights': 'balanced', + 'class_counts': None, + 'num_classes': 1, + 'err_msg': 'Class counts must be provided if ' + 'using class_weights=balanced' + }, + { + 'testcase_name': 'no num classes', + 'class_weights': 'balanced', + 'class_counts': [1], + 'num_classes': None, + 'err_msg': 'num_classes must be provided if ' + 'using class_weights=balanced' + }, + { + 'testcase_name': 'class counts not array', + 'class_weights': 'balanced', + 'class_counts': 1, + 'num_classes': None, + 'err_msg': 'class counts must be a 1D array.' + }, + { + 'testcase_name': 'class counts array, no num classes', + 'class_weights': [1], + 'class_counts': None, + 'num_classes': None, + 'err_msg': 'You must pass a value for num_classes if ' + 'creating an array of class_weights' + }, + { + 'testcase_name': 'class counts array, improper shape', + 'class_weights': [[1], [1]], + 'class_counts': None, + 'num_classes': 2, + 'err_msg': 'Detected class_weights shape' + }, + { + 'testcase_name': 'class counts array, wrong number classes', + 'class_weights': [1, 1, 1], + 'class_counts': None, + 'num_classes': 2, + 'err_msg': 'Detected array length:' + }, + ]) + def test_class_errors(self, class_weights, class_counts, num_classes, err_msg): """Tests the BOltonModel calculate_class_weights method. @@ -534,9 +525,7 @@ class FitTests(keras_parameterized.TestCase): """ clf = models.BoltOnModel(1, 1) with self.assertRaisesRegexp(ValueError, err_msg): # pylint: disable=deprecated-method - clf.calculate_class_weights(class_weights, - class_counts, - num_classes) + clf.calculate_class_weights(class_weights, class_counts, num_classes) if __name__ == '__main__': diff --git a/tensorflow_privacy/privacy/bolt_on/optimizers.py b/tensorflow_privacy/privacy/bolt_on/optimizers.py index 5029f73..87b621e 100644 --- a/tensorflow_privacy/privacy/bolt_on/optimizers.py +++ b/tensorflow_privacy/privacy/bolt_on/optimizers.py @@ -48,14 +48,12 @@ class GammaBetaDecreasingStep( 'This is performed automatically by using the ' '{1} as a context manager, ' 'as desired'.format(self.__class__.__name__, - BoltOn.__class__.__name__ - ) - ) + BoltOn.__class__.__name__)) dtype = self.beta.dtype one = tf.constant(1, dtype) - return tf.math.minimum(tf.math.reduce_min(one/self.beta), - one/(self.gamma*math_ops.cast(step, dtype)) - ) + return tf.math.minimum( + tf.math.reduce_min(one / self.beta), + one / (self.gamma * math_ops.cast(step, dtype))) def get_config(self): """Return config to setup the learning rate scheduler.""" @@ -108,16 +106,16 @@ class BoltOn(optimizer_v2.OptimizerV2): Descent-based Analytics by Xi Wu et. al. """ - def __init__(self, # pylint: disable=super-init-not-called - optimizer, - loss, - dtype=tf.float32, - ): + def __init__( + self, # pylint: disable=super-init-not-called + optimizer, + loss, + dtype=tf.float32, + ): """Constructor. Args: - optimizer: Optimizer_v2 or subclass to be used as the optimizer - (wrapped). + optimizer: Optimizer_v2 or subclass to be used as the optimizer (wrapped). loss: StrongConvexLoss function that the model is being compiled with. dtype: dtype """ @@ -155,8 +153,8 @@ class BoltOn(optimizer_v2.OptimizerV2): """Normalize the weights to the R-ball. Args: - force: True to normalize regardless of previous weight values. - False to check if weights > R-ball and only normalize then. + force: True to normalize regardless of previous weight values. False to + check if weights > R-ball and only normalize then. Raises: Exception: If not called from inside this optimizer context. @@ -199,14 +197,14 @@ class BoltOn(optimizer_v2.OptimizerV2): l2_sensitivity = (2 * loss.lipchitz_constant(self.class_weights)) / \ (loss.gamma() * self.n_samples * self.batch_size) - unit_vector = tf.random.normal(shape=(input_dim, output_dim), - mean=0, - seed=1, - stddev=1.0, - dtype=self.dtype) + unit_vector = tf.random.normal( + shape=(input_dim, output_dim), + mean=0, + seed=1, + stddev=1.0, + dtype=self.dtype) unit_vector = unit_vector / tf.math.sqrt( - tf.reduce_sum(tf.math.square(unit_vector), axis=0) - ) + tf.reduce_sum(tf.math.square(unit_vector), axis=0)) beta = l2_sensitivity / per_class_epsilon alpha = input_dim # input_dim @@ -214,8 +212,7 @@ class BoltOn(optimizer_v2.OptimizerV2): alpha, beta=1 / beta, seed=1, - dtype=self.dtype - ) + dtype=self.dtype) return unit_vector * gamma raise NotImplementedError('Noise distribution: {0} is not ' 'a valid distribution'.format(distribution)) @@ -245,10 +242,8 @@ class BoltOn(optimizer_v2.OptimizerV2): except AttributeError: raise AttributeError( "Neither '{0}' nor '{1}' object has attribute '{2}'" - "".format(self.__class__.__name__, - self._internal_optimizer.__class__.__name__, - name) - ) + ''.format(self.__class__.__name__, + self._internal_optimizer.__class__.__name__, name)) def __setattr__(self, key, value): """Set attribute to self instance if its the internal optimizer. @@ -309,20 +304,15 @@ class BoltOn(optimizer_v2.OptimizerV2): self._is_init = True return self - def __call__(self, - noise_distribution, - epsilon, - layers, - class_weights, - n_samples, - batch_size): + def __call__(self, noise_distribution, epsilon, layers, class_weights, + n_samples, batch_size): """Accepts required values for bolton method from context entry point. Stores them on the optimizer for use throughout fitting. Args: - noise_distribution: the noise distribution to pick. - see _accepted_distributions and get_noise for possible values. + noise_distribution: the noise distribution to pick. see + _accepted_distributions and get_noise for possible values. epsilon: privacy parameter. Lower gives more privacy but less utility. layers: list of Keras/Tensorflow layers. Can be found as model.layers class_weights: class_weights used, which may either be a scalar or 1D @@ -341,8 +331,8 @@ class BoltOn(optimizer_v2.OptimizerV2): 'distributions'.format(noise_distribution, _accepted_distributions)) self.noise_distribution = noise_distribution - self.learning_rate.initialize(self.loss.beta(class_weights), - self.loss.gamma()) + self.learning_rate.initialize( + self.loss.beta(class_weights), self.loss.gamma()) self.epsilon = tf.constant(epsilon, dtype=self.dtype) self.class_weights = tf.constant(class_weights, dtype=self.dtype) self.n_samples = tf.constant(n_samples, dtype=self.dtype) @@ -369,9 +359,10 @@ class BoltOn(optimizer_v2.OptimizerV2): for layer in self.layers: input_dim = layer.kernel.shape[0] output_dim = layer.units - noise = self.get_noise(input_dim, - output_dim, - ) + noise = self.get_noise( + input_dim, + output_dim, + ) layer.kernel = tf.math.add(layer.kernel, noise) self.noise_distribution = None self.learning_rate.de_initialize() diff --git a/tensorflow_privacy/privacy/bolt_on/optimizers_test.py b/tensorflow_privacy/privacy/bolt_on/optimizers_test.py index 19d2755..8591fc3 100644 --- a/tensorflow_privacy/privacy/bolt_on/optimizers_test.py +++ b/tensorflow_privacy/privacy/bolt_on/optimizers_test.py @@ -111,9 +111,7 @@ class TestLoss(losses.Loss, StrongConvexMixin): def call(self, y_true, y_pred): """Loss function that is minimized at the mean of the input points.""" return 0.5 * tf.reduce_sum( - tf.math.squared_difference(y_true, y_pred), - axis=1 - ) + tf.math.squared_difference(y_true, y_pred), axis=1) def max_class_weight(self, class_weight, dtype=tf.float32): """the maximum weighting in class weights (max value) as a scalar tensor. @@ -183,20 +181,24 @@ class TestOptimizer(OptimizerV2): class BoltonOptimizerTest(keras_parameterized.TestCase): """BoltOn Optimizer tests.""" + @test_util.run_all_in_graph_and_eager_modes @parameterized.named_parameters([ - {'testcase_name': 'getattr', - 'fn': '__getattr__', - 'args': ['dtype'], - 'result': tf.float32, - 'test_attr': None}, - {'testcase_name': 'project_weights_to_r', - 'fn': 'project_weights_to_r', - 'args': ['dtype'], - 'result': None, - 'test_attr': ''}, + { + 'testcase_name': 'getattr', + 'fn': '__getattr__', + 'args': ['dtype'], + 'result': tf.float32, + 'test_attr': None + }, + { + 'testcase_name': 'project_weights_to_r', + 'fn': 'project_weights_to_r', + 'args': ['dtype'], + 'result': None, + 'test_attr': '' + }, ]) - def test_fn(self, fn, args, result, test_attr): """test that a fn of BoltOn optimizer is working as expected. @@ -204,9 +206,8 @@ class BoltonOptimizerTest(keras_parameterized.TestCase): fn: method of Optimizer to test args: args to optimizer fn result: the expected result - test_attr: None if the fn returns the test result. Otherwise, this is - the attribute of BoltOn to check against result with. - + test_attr: None if the fn returns the test result. Otherwise, this is the + attribute of BoltOn to check against result with. """ tf.random.set_seed(1) loss = TestLoss(1, 1, 1) @@ -231,30 +232,38 @@ class BoltonOptimizerTest(keras_parameterized.TestCase): @test_util.run_all_in_graph_and_eager_modes @parameterized.named_parameters([ - {'testcase_name': '1 value project to r=1', - 'r': 1, - 'init_value': 2, - 'shape': (1,), - 'n_out': 1, - 'result': [[1]]}, - {'testcase_name': '2 value project to r=1', - 'r': 1, - 'init_value': 2, - 'shape': (2,), - 'n_out': 1, - 'result': [[0.707107], [0.707107]]}, - {'testcase_name': '1 value project to r=2', - 'r': 2, - 'init_value': 3, - 'shape': (1,), - 'n_out': 1, - 'result': [[2]]}, - {'testcase_name': 'no project', - 'r': 2, - 'init_value': 1, - 'shape': (1,), - 'n_out': 1, - 'result': [[1]]}, + { + 'testcase_name': '1 value project to r=1', + 'r': 1, + 'init_value': 2, + 'shape': (1,), + 'n_out': 1, + 'result': [[1]] + }, + { + 'testcase_name': '2 value project to r=1', + 'r': 1, + 'init_value': 2, + 'shape': (2,), + 'n_out': 1, + 'result': [[0.707107], [0.707107]] + }, + { + 'testcase_name': '1 value project to r=2', + 'r': 2, + 'init_value': 3, + 'shape': (1,), + 'n_out': 1, + 'result': [[2]] + }, + { + 'testcase_name': 'no project', + 'r': 2, + 'init_value': 1, + 'shape': (1,), + 'n_out': 1, + 'result': [[1]] + }, ]) def test_project(self, r, shape, n_out, init_value, result): """test that a fn of BoltOn optimizer is working as expected. @@ -267,6 +276,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase): result: the expected output after projection. """ tf.random.set_seed(1) + def project_fn(r): loss = TestLoss(1, 1, r) bolton = opt.BoltOn(TestOptimizer(), loss) @@ -283,15 +293,18 @@ class BoltonOptimizerTest(keras_parameterized.TestCase): bolton.n_samples = 1 bolton.project_weights_to_r() return _ops.convert_to_tensor_v2(bolton.layers[0].kernel, tf.float32) + res = project_fn(r) self.assertAllClose(res, result) @test_util.run_all_in_graph_and_eager_modes @parameterized.named_parameters([ - {'testcase_name': 'normal values', - 'epsilon': 2, - 'noise': 'laplace', - 'class_weights': 1}, + { + 'testcase_name': 'normal values', + 'epsilon': 2, + 'noise': 'laplace', + 'class_weights': 1 + }, ]) def test_context_manager(self, noise, epsilon, class_weights): """Tests the context manager functionality of the optimizer. @@ -301,6 +314,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase): epsilon: epsilon privacy parameter to use class_weights: class_weights to use """ + @tf.function def test_run(): loss = TestLoss(1, 1, 1) @@ -313,18 +327,23 @@ class BoltonOptimizerTest(keras_parameterized.TestCase): with bolton(noise, epsilon, model.layers, class_weights, 1, 1) as _: pass return _ops.convert_to_tensor_v2(bolton.epsilon, dtype=tf.float32) + epsilon = test_run() self.assertEqual(epsilon.numpy(), -1) @parameterized.named_parameters([ - {'testcase_name': 'invalid noise', - 'epsilon': 1, - 'noise': 'not_valid', - 'err_msg': 'Detected noise distribution: not_valid not one of:'}, - {'testcase_name': 'invalid epsilon', - 'epsilon': -1, - 'noise': 'laplace', - 'err_msg': 'Detected epsilon: -1. Valid range is 0 < epsilon 125).astype(int) else: + def label_fn(x): if x < 110.0: return 0 @@ -42,10 +43,11 @@ def make_input_data(size, classes): else: return 2 - labels = list(map( - label_fn, - np.power(feature_a, 3) + np.power(feature_b, 2) + - np.power(feature_c, 1) + noise)) + labels = list( + map( + label_fn, + np.power(feature_a, 3) + np.power(feature_b, 2) + + np.power(feature_c, 1) + noise)) return features, labels @@ -87,6 +89,7 @@ def make_input_fn(features, labels, training, batch_size=16): dataset = dataset.shuffle(1000) return dataset.batch(batch_size) + return input_fn diff --git a/tensorflow_privacy/privacy/estimators/v1/dnn_test.py b/tensorflow_privacy/privacy/estimators/v1/dnn_test.py index 2a83bb8..5ba0433 100644 --- a/tensorflow_privacy/privacy/estimators/v1/dnn_test.py +++ b/tensorflow_privacy/privacy/estimators/v1/dnn_test.py @@ -64,5 +64,6 @@ class DPDNNClassifierTest(tf.test.TestCase, parameterized.TestCase): input_fn=test_utils.make_input_fn(predict_features, predict_labels, False)) + if __name__ == '__main__': tf.test.main() diff --git a/tensorflow_privacy/privacy/estimators/v1/head.py b/tensorflow_privacy/privacy/estimators/v1/head.py index cf223eb..1e555d8 100644 --- a/tensorflow_privacy/privacy/estimators/v1/head.py +++ b/tensorflow_privacy/privacy/estimators/v1/head.py @@ -434,8 +434,8 @@ def _binary_logistic_or_multi_class_head(n_classes, weight_column, encoded as integer or float within [0, 1] for `n_classes=2` and encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there will be errors if vocabulary is not provided and labels are string. - loss_reduction: Describes how to reduce training loss over batch. - Defaults to `SUM`. + loss_reduction: Describes how to reduce training loss over batch. Defaults + to `SUM`. Returns: `head._Head` instance. diff --git a/tensorflow_privacy/privacy/keras_models/dp_keras_model.py b/tensorflow_privacy/privacy/keras_models/dp_keras_model.py index 5c87868..9c81f78 100644 --- a/tensorflow_privacy/privacy/keras_models/dp_keras_model.py +++ b/tensorflow_privacy/privacy/keras_models/dp_keras_model.py @@ -53,9 +53,10 @@ def make_dp_model_class(cls): model.fit(train_data, train_labels, epochs=1, batch_size=32) ``` - """).format(base_model='tf.keras.' + cls.__name__, - short_base_model=cls.__name__, - dp_model_class='DP' + cls.__name__) + """).format( + base_model='tf.keras.' + cls.__name__, + short_base_model=cls.__name__, + dp_model_class='DP' + cls.__name__) def __init__( self, diff --git a/tensorflow_privacy/privacy/logistic_regression/datasets.py b/tensorflow_privacy/privacy/logistic_regression/datasets.py index 92ea4a3..2b54a26 100644 --- a/tensorflow_privacy/privacy/logistic_regression/datasets.py +++ b/tensorflow_privacy/privacy/logistic_regression/datasets.py @@ -40,8 +40,8 @@ class RegressionDataset: """Class for storing labeled examples for a regression dataset. Attributes: - points: array of shape (num_examples, dimension) containing the points to - be classified. + points: array of shape (num_examples, dimension) containing the points to be + classified. labels: array of shape (num_examples,) containing the corresponding labels, each belonging to the set {0,1,...,num_classes-1}, where num_classes is the number of classes. @@ -51,7 +51,7 @@ class RegressionDataset: def linearly_separable_labeled_examples( - num_examples: int, weights: np.ndarray)-> RegressionDataset: + num_examples: int, weights: np.ndarray) -> RegressionDataset: """Generates num_examples labeled examples using separator given by weights. Args: @@ -75,7 +75,7 @@ def linearly_separable_labeled_examples( def synthetic_linearly_separable_data( num_train: int, num_test: int, dimension: int, - num_classes: int)-> Tuple[RegressionDataset, RegressionDataset]: + num_classes: int) -> Tuple[RegressionDataset, RegressionDataset]: """Generates synthetic train and test data for logistic regression. Args: @@ -103,7 +103,7 @@ def synthetic_linearly_separable_data( return (train_dataset, test_dataset) -def mnist_dataset()-> Tuple[RegressionDataset, RegressionDataset]: +def mnist_dataset() -> Tuple[RegressionDataset, RegressionDataset]: """Generates (normalized) train and test data for MNIST. Returns: diff --git a/tensorflow_privacy/privacy/logistic_regression/datasets_test.py b/tensorflow_privacy/privacy/logistic_regression/datasets_test.py index 7d9b27f..a18e619 100644 --- a/tensorflow_privacy/privacy/logistic_regression/datasets_test.py +++ b/tensorflow_privacy/privacy/logistic_regression/datasets_test.py @@ -11,9 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for tensorflow_privacy.privacy.logistic_regression.datasets.""" import unittest + from absl.testing import parameterized import numpy as np from tensorflow_privacy.privacy.logistic_regression import datasets @@ -22,14 +22,16 @@ from tensorflow_privacy.privacy.logistic_regression import datasets class DatasetsTest(parameterized.TestCase): @parameterized.parameters( - (1, np.array([[1],])), - (2, np.array([[1],])), - (5, np.array([[-1, 1], [1, -1]])), + (1, np.array([ + [1], + ])), (2, np.array([ + [1], + ])), (5, np.array([[-1, 1], [1, -1]])), (15, np.array([[-1, 1.5, 2.1], [1.3, -3.3, -7.1], [1.3, -3.3, -7.1]]))) def test_linearly_separable_labeled_examples(self, num_examples, weights): dimension, num_classes = weights.shape - dataset = datasets.linearly_separable_labeled_examples(num_examples, - weights) + dataset = datasets.linearly_separable_labeled_examples( + num_examples, weights) self.assertEqual(dataset.points.shape, (num_examples, dimension)) self.assertEqual(dataset.labels.shape, (num_examples,)) product = np.matmul(dataset.points, weights) @@ -37,11 +39,8 @@ class DatasetsTest(parameterized.TestCase): for j in range(num_classes): self.assertGreaterEqual(product[i, dataset.labels[i]], product[i, j]) - @parameterized.parameters( - (1, 1, 1, 2), - (20, 5, 1, 2), - (20, 5, 2, 2), - (1000, 10, 15, 10)) + @parameterized.parameters((1, 1, 1, 2), (20, 5, 1, 2), (20, 5, 2, 2), + (1000, 10, 15, 10)) def test_synthetic(self, num_train, num_test, dimension, num_classes): (train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data( num_train, num_test, dimension, num_classes) @@ -73,5 +72,6 @@ class DatasetsTest(parameterized.TestCase): self.assertTrue(np.all(np.isin(train_dataset.labels, range(10)))) self.assertTrue(np.all(np.isin(test_dataset.labels, range(10)))) + if __name__ == '__main__': unittest.main() diff --git a/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic.py b/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic.py index fd80485..8a2200e 100644 --- a/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic.py +++ b/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic.py @@ -52,21 +52,26 @@ class KiferRegularizer(tf.keras.regularizers.Regularizer): (self._l2_regularizer, variance) = self.logistic_objective_perturbation_parameters( num_train, epsilon, delta, num_classes, input_clipping_norm) - self._b = tf.random.normal(shape=[dimension, num_classes], mean=0.0, - stddev=math.sqrt(variance), - dtype=tf.dtypes.float32) + self._b = tf.random.normal( + shape=[dimension, num_classes], + mean=0.0, + stddev=math.sqrt(variance), + dtype=tf.dtypes.float32) def __call__(self, x): - return (tf.reduce_sum(self._l2_regularizer*tf.square(x)) + - (1/self._num_train)*tf.reduce_sum(tf.multiply(x, self._b))) + return (tf.reduce_sum(self._l2_regularizer * tf.square(x)) + + (1 / self._num_train) * tf.reduce_sum(tf.multiply(x, self._b))) def get_config(self): - return {'l2_regularizer': self._l2_regularizer, - 'num_train': self._num_train, 'b': self._b} + return { + 'l2_regularizer': self._l2_regularizer, + 'num_train': self._num_train, + 'b': self._b + } def logistic_objective_perturbation_parameters( self, num_train: int, epsilon: float, delta: float, num_classes: int, - input_clipping_norm: float)-> Tuple[float, float]: + input_clipping_norm: float) -> Tuple[float, float]: """Computes l2-regularization coefficient and Gaussian noise variance. The setting is based on Algorithm 1 of Kifer et al. @@ -85,19 +90,21 @@ class KiferRegularizer(tf.keras.regularizers.Regularizer): # zeta is an upper bound on the l2-norm of the loss function gradient. zeta = input_clipping_norm # variance is based on line 5 from Algorithm 1 of Kifer et al. (page 6): - variance = zeta*zeta*(8*np.log(2/delta)+4*epsilon)/(epsilon*epsilon) + variance = zeta * zeta * (8 * np.log(2 / delta) + 4 * epsilon) / ( + epsilon * epsilon) # lambda_coefficient is an upper bound on the spectral norm of the Hessian # of the loss function. - lambda_coefficient = math.sqrt(2*num_classes)*(input_clipping_norm**2)/4 - l2_regularizer = lambda_coefficient/(epsilon*num_train) + lambda_coefficient = math.sqrt(2 * num_classes) * (input_clipping_norm** + 2) / 4 + l2_regularizer = lambda_coefficient / (epsilon * num_train) return (l2_regularizer, variance) def logistic_objective_perturbation(train_dataset: datasets.RegressionDataset, test_dataset: datasets.RegressionDataset, - epsilon: float, delta: float, - epochs: int, num_classes: int, - input_clipping_norm: float)-> List[float]: + epsilon: float, delta: float, epochs: int, + num_classes: int, + input_clipping_norm: float) -> List[float]: """Trains and validates differentially private logistic regression model. The training is based on the Algorithm 1 of Kifer et al. @@ -127,13 +134,21 @@ def logistic_objective_perturbation(train_dataset: datasets.RegressionDataset, kernel_regularizer = KiferRegularizer(num_train, dimension, epsilon, delta, num_classes, input_clipping_norm) return single_layer_softmax.single_layer_softmax_classifier( - train_dataset, test_dataset, epochs, num_classes, optimizer, loss, + train_dataset, + test_dataset, + epochs, + num_classes, + optimizer, + loss, kernel_regularizer=kernel_regularizer) -def compute_dpsgd_noise_multiplier( - num_train: int, epsilon: float, delta: float, epochs: int, - batch_size: int, tolerance: float = 1e-2) -> Optional[float]: +def compute_dpsgd_noise_multiplier(num_train: int, + epsilon: float, + delta: float, + epochs: int, + batch_size: int, + tolerance: float = 1e-2) -> Optional[float]: """Computes the noise multiplier for DP-SGD given privacy parameters. The algorithm performs binary search on the values of epsilon. @@ -153,20 +168,17 @@ def compute_dpsgd_noise_multiplier( the given tolerance) for which using DPKerasAdamOptimizer will result in an (epsilon, delta)-differentially private trained model. """ - search_parameters = common.BinarySearchParameters(lower_bound=0, - upper_bound=math.inf, - initial_guess=1, - tolerance=tolerance) + search_parameters = common.BinarySearchParameters( + lower_bound=0, upper_bound=math.inf, initial_guess=1, tolerance=tolerance) return common.inverse_monotone_function( lambda x: compute_epsilon(num_train, batch_size, x, epochs, delta)[0], epsilon, search_parameters) def logistic_dpsgd(train_dataset: datasets.RegressionDataset, - test_dataset: datasets.RegressionDataset, - epsilon: float, delta: float, epochs: int, num_classes: int, - batch_size: int, num_microbatches: int, - clipping_norm: float)-> List[float]: + test_dataset: datasets.RegressionDataset, epsilon: float, + delta: float, epochs: int, num_classes: int, batch_size: int, + num_microbatches: int, clipping_norm: float) -> List[float]: """Trains and validates private logistic regression model via DP-SGD. The training is based on the differentially private stochasstic gradient @@ -183,8 +195,8 @@ def logistic_dpsgd(train_dataset: datasets.RegressionDataset, num_classes: number of classes. batch_size: the number of examples in each batch of gradient descent. num_microbatches: the number of microbatches in gradient descent. - clipping_norm: the gradients will be normalized by DPKerasAdamOptimizer - to have l2-norm at most clipping_norm. + clipping_norm: the gradients will be normalized by DPKerasAdamOptimizer to + have l2-norm at most clipping_norm. Returns: List of test accuracies (one for each epoch) on test_dataset of model @@ -199,7 +211,8 @@ def logistic_dpsgd(train_dataset: datasets.RegressionDataset, noise_multiplier = compute_dpsgd_noise_multiplier(num_train, epsilon, delta, epochs, batch_size) optimizer = dp_optimizer_keras.DPKerasAdamOptimizer( - l2_norm_clip=clipping_norm, noise_multiplier=noise_multiplier, + l2_norm_clip=clipping_norm, + noise_multiplier=noise_multiplier, num_microbatches=num_microbatches) loss = tf.keras.losses.CategoricalCrossentropy( reduction=tf.losses.Reduction.NONE) diff --git a/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic_test.py b/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic_test.py index 0bfb565..3f34763 100644 --- a/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic_test.py +++ b/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic_test.py @@ -27,7 +27,7 @@ class MultinomialLogisticRegressionTest(parameterized.TestCase): (5000, 500, 4, 1, 1e-5, 40, 2, 0.05), (10000, 1000, 3, 1, 1e-5, 40, 4, 0.1), (10000, 1000, 4, 1, 1e-5, 40, 4, 0.1), - ) + ) def test_logistic_objective_perturbation(self, num_train, num_test, dimension, epsilon, delta, epochs, num_classes, tolerance): @@ -44,7 +44,7 @@ class MultinomialLogisticRegressionTest(parameterized.TestCase): (1, 1, 1e-5, 40, 1, 1e-2), (500, 0.1, 1e-5, 40, 50, 1e-2), (5000, 10, 1e-5, 40, 10, 1e-3), - ) + ) def test_compute_dpsgd_noise_multiplier(self, num_train, epsilon, delta, epochs, batch_size, tolerance): noise_multiplier = multinomial_logistic.compute_dpsgd_noise_multiplier( @@ -61,19 +61,22 @@ class MultinomialLogisticRegressionTest(parameterized.TestCase): (5000, 500, 4, 1, 1e-5, 40, 2, 0.05, 10, 10, 1), (5000, 500, 3, 2, 1e-4, 40, 4, 0.1, 10, 10, 1), (5000, 500, 4, 2, 1e-4, 40, 4, 0.1, 10, 10, 1), - ) - def test_logistic_dpsgd(self, num_train, num_test, dimension, epsilon, - delta, epochs, num_classes, tolerance, - batch_size, num_microbatches, clipping_norm): + ) + def test_logistic_dpsgd(self, num_train, num_test, dimension, epsilon, delta, + epochs, num_classes, tolerance, batch_size, + num_microbatches, clipping_norm): (train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data( num_train, num_test, dimension, num_classes) - accuracy = multinomial_logistic.logistic_dpsgd( - train_dataset, test_dataset, epsilon, delta, epochs, num_classes, - batch_size, num_microbatches, clipping_norm) + accuracy = multinomial_logistic.logistic_dpsgd(train_dataset, test_dataset, + epsilon, delta, epochs, + num_classes, batch_size, + num_microbatches, + clipping_norm) # Since the synthetic data is linearly separable, we expect the test # accuracy to come arbitrarily close to 1 as the number of training examples # grows. self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance) + if __name__ == '__main__': unittest.main() diff --git a/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax.py b/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax.py index a8dcac3..80a3596 100644 --- a/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax.py +++ b/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax.py @@ -11,8 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Implementation of a single-layer softmax classifier. -""" +"""Implementation of a single-layer softmax classifier.""" from typing import List import tensorflow as tf @@ -22,10 +21,13 @@ from tensorflow_privacy.privacy.logistic_regression import datasets def single_layer_softmax_classifier( train_dataset: datasets.RegressionDataset, test_dataset: datasets.RegressionDataset, - epochs: int, num_classes: int, optimizer: tf.keras.optimizers.Optimizer, + epochs: int, + num_classes: int, + optimizer: tf.keras.optimizers.Optimizer, loss: tf.keras.losses.Loss = 'categorical_crossentropy', batch_size: int = 32, - kernel_regularizer: tf.keras.regularizers.Regularizer = None)-> List[float]: + kernel_regularizer: tf.keras.regularizers.Regularizer = None +) -> List[float]: """Trains a single layer neural network classifier with softmax activation. Args: @@ -47,13 +49,17 @@ def single_layer_softmax_classifier( one_hot_train_labels = tf.one_hot(train_dataset.labels, num_classes) one_hot_test_labels = tf.one_hot(test_dataset.labels, num_classes) model = tf.keras.Sequential() - model.add(tf.keras.layers.Dense(units=num_classes, - activation='softmax', - kernel_regularizer=kernel_regularizer)) + model.add( + tf.keras.layers.Dense( + units=num_classes, + activation='softmax', + kernel_regularizer=kernel_regularizer)) model.compile(optimizer, loss=loss, metrics=['accuracy']) - history = model.fit(train_dataset.points, one_hot_train_labels, - batch_size=batch_size, epochs=epochs, - validation_data=(test_dataset.points, - one_hot_test_labels), - verbose=0) + history = model.fit( + train_dataset.points, + one_hot_train_labels, + batch_size=batch_size, + epochs=epochs, + validation_data=(test_dataset.points, one_hot_test_labels), + verbose=0) return history.history['val_accuracy'] diff --git a/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax_test.py b/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax_test.py index 7c0af27..443897e 100644 --- a/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax_test.py +++ b/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax_test.py @@ -11,9 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for tensorflow_privacy.privacy.logistic_regression.single_layer_softmax.""" import unittest + from absl.testing import parameterized from tensorflow_privacy.privacy.logistic_regression import datasets from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax @@ -26,7 +26,7 @@ class SingleLayerSoftmaxTest(parameterized.TestCase): (5000, 500, 4, 40, 2, 0.05), (10000, 1000, 3, 40, 4, 0.1), (10000, 1000, 4, 40, 4, 0.1), - ) + ) def test_single_layer_softmax(self, num_train, num_test, dimension, epochs, num_classes, tolerance): (train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data( @@ -35,5 +35,6 @@ class SingleLayerSoftmaxTest(parameterized.TestCase): train_dataset, test_dataset, epochs, num_classes, 'sgd') self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance) + if __name__ == '__main__': unittest.main() diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py index 0401041..35990f6 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py @@ -138,12 +138,12 @@ def make_keras_optimizer_class(cls): l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients). noise_multiplier: Ratio of the standard deviation to the clipping norm. num_microbatches: Number of microbatches into which each minibatch is - split. Default is `None` which means that number of microbatches - is equal to batch size (i.e. each microbatch contains exactly one + split. Default is `None` which means that number of microbatches is + equal to batch size (i.e. each microbatch contains exactly one example). If `gradient_accumulation_steps` is greater than 1 and `num_microbatches` is not `None` then the effective number of - microbatches is equal to - `num_microbatches * gradient_accumulation_steps`. + microbatches is equal to `num_microbatches * + gradient_accumulation_steps`. gradient_accumulation_steps: If greater than 1 then optimizer will be accumulating gradients for this number of optimizer steps before applying them to update model weights. If this argument is set to 1 @@ -172,39 +172,39 @@ def make_keras_optimizer_class(cls): if self.gradient_accumulation_steps > 1: apply_update = tf.math.equal( tf.math.floormod(self.iterations + 1, - self.gradient_accumulation_steps), - 0) + self.gradient_accumulation_steps), 0) grad_scaler = tf.cast(1. / self.gradient_accumulation_steps, var_dtype) - apply_state[(var_device, var_dtype)].update( - { - 'apply_update': apply_update, - 'grad_scaler': grad_scaler - }) + apply_state[(var_device, var_dtype)].update({ + 'apply_update': apply_update, + 'grad_scaler': grad_scaler + }) def _resource_apply_dense(self, grad, var, apply_state=None): if self.gradient_accumulation_steps > 1: var_device, var_dtype = var.device, var.dtype.base_dtype - coefficients = ((apply_state or {}).get((var_device, var_dtype)) - or self._fallback_apply_state(var_device, var_dtype)) + coefficients = ((apply_state or {}).get((var_device, var_dtype)) or + self._fallback_apply_state(var_device, var_dtype)) grad_acc = self.get_slot(var, 'grad_acc') def _update_grad(): apply_grad_op = super(DPOptimizerClass, self)._resource_apply_dense( grad_acc + grad * coefficients['grad_scaler'], var, apply_state) with tf.control_dependencies([apply_grad_op]): - return grad_acc.assign(tf.zeros_like(grad_acc), - use_locking=self._use_locking, - read_value=False) + return grad_acc.assign( + tf.zeros_like(grad_acc), + use_locking=self._use_locking, + read_value=False) def _accumulate(): - return grad_acc.assign_add(grad * coefficients['grad_scaler'], - use_locking=self._use_locking, - read_value=False) + return grad_acc.assign_add( + grad * coefficients['grad_scaler'], + use_locking=self._use_locking, + read_value=False) return tf.cond(coefficients['apply_update'], _update_grad, _accumulate) else: - return super(DPOptimizerClass, self)._resource_apply_dense( - grad, var, apply_state) + return super(DPOptimizerClass, + self)._resource_apply_dense(grad, var, apply_state) def _resource_apply_sparse_duplicate_indices(self, *args, **kwargs): if self.gradient_accumulation_steps > 1: @@ -220,8 +220,8 @@ def make_keras_optimizer_class(cls): raise NotImplementedError( 'Sparse gradients are not supported with large batch emulation.') else: - return super(DPOptimizerClass, self)._resource_apply_sparse( - *args, **kwargs) + return super(DPOptimizerClass, + self)._resource_apply_sparse(*args, **kwargs) def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None): """DP-SGD version of base class method.""" diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras_test.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras_test.py index 293d6e9..590d89f 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras_test.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras_test.py @@ -15,7 +15,6 @@ from absl.testing import parameterized import numpy as np import tensorflow as tf - from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras_vectorized @@ -108,8 +107,7 @@ class DPOptimizerComputeGradientsTest(tf.test.TestCase, parameterized.TestCase): with tape: loss = self._loss(data0, var0) + self._loss(data1, var1) - grads_and_vars = opt._compute_gradients( - loss, [var0, var1], tape=tape) + grads_and_vars = opt._compute_gradients(loss, [var0, var1], tape=tape) self.assertAllCloseAccordingToType(expected_grad0, grads_and_vars[0][0]) self.assertAllCloseAccordingToType(expected_grad1, grads_and_vars[1][0]) @@ -442,10 +440,9 @@ class DPOptimizerGetGradientsTest(tf.test.TestCase, parameterized.TestCase): ('DPKerasSGDOptimizer 1', dp_optimizer_keras.DPKerasSGDOptimizer, 1), ('DPKerasSGDOptimizer 2', dp_optimizer_keras.DPKerasSGDOptimizer, 2), ('DPKerasSGDOptimizer 4', dp_optimizer_keras.DPKerasSGDOptimizer, 4), - ('DPKerasAdamOptimizer 2', - dp_optimizer_keras.DPKerasAdamOptimizer, 1), - ('DPKerasAdagradOptimizer 2', - dp_optimizer_keras.DPKerasAdagradOptimizer, 2), + ('DPKerasAdamOptimizer 2', dp_optimizer_keras.DPKerasAdamOptimizer, 1), + ('DPKerasAdagradOptimizer 2', dp_optimizer_keras.DPKerasAdagradOptimizer, + 2), ) def testLargeBatchEmulation(self, cls, gradient_accumulation_steps): # Tests various optimizers with large batch emulation. diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras_vectorized.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras_vectorized.py index b429fbd..3481e6d 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras_vectorized.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras_vectorized.py @@ -95,10 +95,11 @@ def make_vectorized_keras_optimizer_class(cls): model.fit(...) ``` - """.format(base_class='tf.keras.optimizers.' + cls.__name__, - dp_keras_class='DPKeras' + cls.__name__, - short_base_class=cls.__name__, - dp_vectorized_keras_class='VectorizedDPKeras' + cls.__name__) + """.format( + base_class='tf.keras.optimizers.' + cls.__name__, + dp_keras_class='DPKeras' + cls.__name__, + short_base_class=cls.__name__, + dp_vectorized_keras_class='VectorizedDPKeras' + cls.__name__) def __init__( self, @@ -112,8 +113,8 @@ def make_vectorized_keras_optimizer_class(cls): Args: l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients). noise_multiplier: Ratio of the standard deviation to the clipping norm. - num_microbatches: Number of microbatches into which each minibatch - is split. + num_microbatches: Number of microbatches into which each minibatch is + split. *args: These will be passed on to the base class `__init__` method. **kwargs: These will be passed on to the base class `__init__` method. """ diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py index 4c854d1..a47c2f7 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py @@ -18,7 +18,6 @@ import unittest from absl.testing import parameterized import numpy as np import tensorflow.compat.v1 as tf - from tensorflow_privacy.privacy.dp_query import gaussian_query from tensorflow_privacy.privacy.optimizers import dp_optimizer @@ -30,13 +29,14 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): return 0.5 * tf.reduce_sum( input_tensor=tf.math.squared_difference(val0, val1), axis=1) - def _compute_expected_gradients(self, per_example_gradients, - l2_norm_clip, num_microbatches): + def _compute_expected_gradients(self, per_example_gradients, l2_norm_clip, + num_microbatches): batch_size, num_vars = per_example_gradients.shape microbatch_gradients = np.mean( - np.reshape(per_example_gradients, - [num_microbatches, - np.int(batch_size / num_microbatches), num_vars]), + np.reshape( + per_example_gradients, + [num_microbatches, + np.int(batch_size / num_microbatches), num_vars]), axis=1) microbatch_gradients_norms = np.linalg.norm(microbatch_gradients, axis=1) @@ -124,8 +124,8 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): l2_norm_clip = 1.0 dp_sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip, 0.0) - opt = cls(dp_sum_query, num_microbatches=num_microbatches, - learning_rate=2.0) + opt = cls( + dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized.py index 1144129..7025a96 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized.py @@ -134,19 +134,15 @@ def make_vectorized_optimizer_class(cls): if var_list is None: var_list = ( - tf.trainable_variables() + tf.get_collection( - tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) + tf.trainable_variables() + + tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) def process_microbatch(microbatch_loss): """Compute clipped grads for one microbatch.""" microbatch_loss = tf.reduce_mean(input_tensor=microbatch_loss) grads, _ = zip(*super(DPOptimizerClass, self).compute_gradients( - microbatch_loss, - var_list, - gate_gradients, - aggregation_method, - colocate_gradients_with_ops, - grad_loss)) + microbatch_loss, var_list, gate_gradients, aggregation_method, + colocate_gradients_with_ops, grad_loss)) grads_list = [ g if g is not None else tf.zeros_like(v) for (g, v) in zip(list(grads), var_list) diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized_test.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized_test.py index 977d3bd..77b2696 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized_test.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized_test.py @@ -17,7 +17,6 @@ import unittest from absl.testing import parameterized import numpy as np import tensorflow.compat.v1 as tf - from tensorflow_privacy.privacy.optimizers import dp_optimizer_vectorized from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagrad from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdam @@ -63,19 +62,19 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): grads_and_vars = sess.run(gradient_op) self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0]) - @parameterized.named_parameters( - ('DPGradientDescent', VectorizedDPSGD), - ('DPAdagrad', VectorizedDPAdagrad), - ('DPAdam', VectorizedDPAdam)) + @parameterized.named_parameters(('DPGradientDescent', VectorizedDPSGD), + ('DPAdagrad', VectorizedDPAdagrad), + ('DPAdam', VectorizedDPAdam)) def testClippingNorm(self, cls): with self.cached_session() as sess: var0 = tf.Variable([0.0, 0.0]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) - opt = cls(l2_norm_clip=1.0, - noise_multiplier=0., - num_microbatches=1, - learning_rate=2.0) + opt = cls( + l2_norm_clip=1.0, + noise_multiplier=0., + num_microbatches=1, + learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values @@ -86,19 +85,19 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): grads_and_vars = sess.run(gradient_op) self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0]) - @parameterized.named_parameters( - ('DPGradientDescent', VectorizedDPSGD), - ('DPAdagrad', VectorizedDPAdagrad), - ('DPAdam', VectorizedDPAdam)) + @parameterized.named_parameters(('DPGradientDescent', VectorizedDPSGD), + ('DPAdagrad', VectorizedDPAdagrad), + ('DPAdam', VectorizedDPAdam)) def testNoiseMultiplier(self, cls): with self.cached_session() as sess: var0 = tf.Variable([0.0]) data0 = tf.Variable([[0.0]]) - opt = cls(l2_norm_clip=4.0, - noise_multiplier=8.0, - num_microbatches=1, - learning_rate=2.0) + opt = cls( + l2_norm_clip=4.0, + noise_multiplier=8.0, + num_microbatches=1, + learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values @@ -168,10 +167,9 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): true_weights, atol=1.0) - @parameterized.named_parameters( - ('DPGradientDescent', VectorizedDPSGD), - ('DPAdagrad', VectorizedDPAdagrad), - ('DPAdam', VectorizedDPAdam)) + @parameterized.named_parameters(('DPGradientDescent', VectorizedDPSGD), + ('DPAdagrad', VectorizedDPAdagrad), + ('DPAdam', VectorizedDPAdam)) def testDPGaussianOptimizerClass(self, cls): with self.cached_session() as sess: var0 = tf.Variable([0.0]) diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/example.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/example.py index ef15191..064af6f 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/example.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/example.py @@ -217,5 +217,6 @@ def main(unused_argv): # For saving a figure into a file: # plotting.save_plot(figure, ) + if __name__ == "__main__": app.run(main) diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py index 180a890..56faa99 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py @@ -482,8 +482,8 @@ class SingleAttackResult: return '\n'.join([ 'SingleAttackResult(', ' SliceSpec: %s' % str(self.slice_spec), - ' DataSize: (ntrain=%d, ntest=%d)' % (self.data_size.ntrain, - self.data_size.ntest), + ' DataSize: (ntrain=%d, ntest=%d)' % + (self.data_size.ntrain, self.data_size.ntest), ' AttackType: %s' % str(self.attack_type), ' AUC: %.2f' % self.get_auc(), ' Attacker advantage: %.2f' % self.get_attacker_advantage(), ')' @@ -684,10 +684,8 @@ class AttackResults: summary.append('Best-performing attacks over all slices') summary.append( ' %s (with %d training and %d test examples) achieved an AUC of %.2f on slice %s' - % (max_auc_result_all.attack_type, - max_auc_result_all.data_size.ntrain, - max_auc_result_all.data_size.ntest, - max_auc_result_all.get_auc(), + % (max_auc_result_all.attack_type, max_auc_result_all.data_size.ntrain, + max_auc_result_all.data_size.ntest, max_auc_result_all.get_auc(), max_auc_result_all.slice_spec)) max_advantage_result_all = self.get_result_with_max_attacker_advantage() @@ -709,10 +707,8 @@ class AttackResults: max_auc_result = results.get_result_with_max_auc() summary.append( ' %s (with %d training and %d test examples) achieved an AUC of %.2f' - % (max_auc_result.attack_type, - max_auc_result.data_size.ntrain, - max_auc_result.data_size.ntest, - max_auc_result.get_auc())) + % (max_auc_result.attack_type, max_auc_result.data_size.ntrain, + max_auc_result.data_size.ntest, max_auc_result.get_auc())) max_advantage_result = results.get_result_with_max_attacker_advantage() summary.append( ' %s (with %d training and %d test examples) achieved an advantage of %.2f' @@ -816,6 +812,8 @@ def get_flattened_attack_metrics(results: AttackResults): types += [str(attack_result.attack_type)] * 2 slices += [str(attack_result.slice_spec)] * 2 attack_metrics += ['adv', 'auc'] - values += [float(attack_result.get_attacker_advantage()), - float(attack_result.get_auc())] + values += [ + float(attack_result.get_attacker_advantage()), + float(attack_result.get_auc()) + ] return types, slices, attack_metrics, values diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation.py index 5205baf..fa0875c 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation.py @@ -54,7 +54,8 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback): def __init__( self, - in_train, out_train, + in_train, + out_train, slicing_spec: SlicingSpec = None, attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,), tensorboard_dir=None, @@ -70,7 +71,7 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback): attack_types: a list of attacks, each of type AttackType tensorboard_dir: directory for tensorboard summary tensorboard_merge_classifiers: if true, plot different classifiers with - the same slicing_spec and metric in the same figure + the same slicing_spec and metric in the same figure is_logit: whether the result of model.predict is logit or probability batch_size: the batch size for model.predict """ @@ -96,19 +97,18 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback): def on_epoch_end(self, epoch, logs=None): results = run_attack_on_keras_model( - self.model, - (self._in_train_data, self._in_train_labels), - (self._out_train_data, self._out_train_labels), - self._slicing_spec, - self._attack_types, - self._is_logit, self._batch_size) + self.model, (self._in_train_data, self._in_train_labels), + (self._out_train_data, self._out_train_labels), self._slicing_spec, + self._attack_types, self._is_logit, self._batch_size) logging.info(results) att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( results) print('Attack result:') - print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in - zip(att_types, att_slices, att_metrics, att_values)])) + print('\n'.join([ + ' %s: %.4f' % (', '.join([s, t, m]), v) + for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values) + ])) # Write to tensorboard if tensorboard_dir is specified if self._writers is not None: @@ -117,7 +117,9 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback): def run_attack_on_keras_model( - model, in_train, out_train, + model, + in_train, + out_train, slicing_spec: SlicingSpec = None, attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,), is_logit: bool = False, @@ -132,6 +134,7 @@ def run_attack_on_keras_model( attack_types: a list of attacks, each of type AttackType is_logit: whether the result of model.predict is logit or probability batch_size: the batch size for model.predict + Returns: Results of the attack """ @@ -139,16 +142,19 @@ def run_attack_on_keras_model( out_train_data, out_train_labels = out_train # Compute predictions and losses - in_train_pred, in_train_loss = calculate_losses( - model, in_train_data, in_train_labels, is_logit, batch_size) - out_train_pred, out_train_loss = calculate_losses( - model, out_train_data, out_train_labels, is_logit, batch_size) + in_train_pred, in_train_loss = calculate_losses(model, in_train_data, + in_train_labels, is_logit, + batch_size) + out_train_pred, out_train_loss = calculate_losses(model, out_train_data, + out_train_labels, is_logit, + batch_size) attack_input = AttackInputData( - logits_train=in_train_pred, logits_test=out_train_pred, - labels_train=in_train_labels, labels_test=out_train_labels, - loss_train=in_train_loss, loss_test=out_train_loss - ) - results = mia.run_attacks(attack_input, - slicing_spec=slicing_spec, - attack_types=attack_types) + logits_train=in_train_pred, + logits_test=out_train_pred, + labels_train=in_train_labels, + labels_test=out_train_labels, + loss_train=in_train_loss, + loss_test=out_train_loss) + results = mia.run_attacks( + attack_input, slicing_spec=slicing_spec, attack_types=attack_types) return results diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation_example.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation_example.py index 15e56f3..bddb903 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation_example.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation_example.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """An example for using keras_evaluation.""" from absl import app @@ -25,15 +24,15 @@ from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_s from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation import MembershipInferenceCallback from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation import run_attack_on_keras_model - FLAGS = flags.FLAGS flags.DEFINE_float('learning_rate', 0.02, 'Learning rate for training') flags.DEFINE_integer('batch_size', 250, 'Batch size') flags.DEFINE_integer('epochs', 100, 'Number of epochs') flags.DEFINE_string('model_dir', None, 'Model directory.') -flags.DEFINE_bool('tensorboard_merge_classifiers', False, 'If true, plot ' - 'different classifiers with the same slicing_spec and metric ' - 'in the same figure.') +flags.DEFINE_bool( + 'tensorboard_merge_classifiers', False, 'If true, plot ' + 'different classifiers with the same slicing_spec and metric ' + 'in the same figure.') def small_cnn(): @@ -76,14 +75,15 @@ def main(unused_argv): # Get callback for membership inference attack. mia_callback = MembershipInferenceCallback( - (x_train, y_train), - (x_test, y_test), + (x_train, y_train), (x_test, y_test), slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), - attack_types=[AttackType.THRESHOLD_ATTACK, - AttackType.K_NEAREST_NEIGHBORS], + attack_types=[ + AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS + ], tensorboard_dir=FLAGS.model_dir, tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers, - is_logit=True, batch_size=2048) + is_logit=True, + batch_size=2048) # Train model with Keras model.fit( @@ -102,11 +102,14 @@ def main(unused_argv): attack_types=[ AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS ], - is_logit=True, batch_size=2048) + is_logit=True, + batch_size=2048) att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( attack_results) - print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in - zip(att_types, att_slices, att_metrics, att_values)])) + print('\n'.join([ + ' %s: %.4f' % (', '.join([s, t, m]), v) + for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values) + ])) if __name__ == '__main__': diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation_test.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation_test.py index 53a3098..69fdcfd 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation_test.py @@ -13,10 +13,8 @@ # limitations under the License. from absl.testing import absltest - import numpy as np import tensorflow.compat.v1 as tf - from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import keras_evaluation from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType @@ -59,8 +57,7 @@ class UtilsTest(absltest.TestCase): def test_run_attack_on_keras_model(self): """Test the attack.""" results = keras_evaluation.run_attack_on_keras_model( - self.model, - (self.train_data, self.train_labels), + self.model, (self.train_data, self.train_labels), (self.test_data, self.test_labels), attack_types=[AttackType.THRESHOLD_ATTACK]) self.assertIsInstance(results, AttackResults) diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py index 80c951f..db5ee3a 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py @@ -140,9 +140,9 @@ def _run_attack(attack_input: AttackInputData, attack_input: input data for running an attack attack_type: the attack to run balance_attacker_training: Whether the training and test sets for the - membership inference attacker should have a balanced (roughly equal) - number of samples from the training and test sets used to develop - the model under attack. + membership inference attacker should have a balanced (roughly equal) + number of samples from the training and test sets used to develop the + model under attack. min_num_samples: minimum number of examples in either training or test data. Returns: @@ -179,9 +179,9 @@ def run_attacks(attack_input: AttackInputData, attack_types: attacks to run privacy_report_metadata: the metadata of the model under attack. balance_attacker_training: Whether the training and test sets for the - membership inference attacker should have a balanced (roughly equal) - number of samples from the training and test sets used to develop - the model under attack. + membership inference attacker should have a balanced (roughly equal) + number of samples from the training and test sets used to develop the + model under attack. min_num_samples: minimum number of examples in either training or test data. Returns: @@ -200,8 +200,7 @@ def run_attacks(attack_input: AttackInputData, attack_input_slice = get_slice(attack_input, single_slice_spec) for attack_type in attack_types: attack_result = _run_attack(attack_input_slice, attack_type, - balance_attacker_training, - min_num_samples) + balance_attacker_training, min_num_samples) if attack_result is not None: attack_results.append(attack_result) diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py index ab15d12..9d162c5 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py @@ -55,9 +55,8 @@ def create_attacker_data(attack_input_data: AttackInputData, attack_input_data: Original AttackInputData test_fraction: Fraction of the dataset to include in the test split. balance: Whether the training and test sets for the membership inference - attacker should have a balanced (roughly equal) number of samples - from the training and test sets used to develop the model - under attack. + attacker should have a balanced (roughly equal) number of samples from the + training and test sets used to develop the model under attack. Returns: AttackerData. @@ -134,6 +133,7 @@ class TrainedAttacker: Args: input_features : A vector of features with the same semantics as x_train passed to train_model. + Returns: An array of probabilities denoting whether the example belongs to test. """ diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/plotting.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/plotting.py index 5f6bb95..275de02 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/plotting.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/plotting.py @@ -81,5 +81,4 @@ def plot_histograms(train: Iterable[float], def plot_roc_curve(roc_curve, plot_func=plot_curve_with_area) -> plt.Figure: """Plot the ROC curve and the area under the curve.""" - return plot_func( - roc_curve.fpr, roc_curve.tpr, xlabel='FPR', ylabel='TPR') + return plot_func(roc_curve.fpr, roc_curve.tpr, xlabel='FPR', ylabel='TPR') diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation.py index afb80f0..ef15734 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """A hook and a function in tf estimator for membership inference attack.""" import os @@ -58,7 +57,8 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook): def __init__( self, estimator, - in_train, out_train, + in_train, + out_train, input_fn_constructor, slicing_spec: SlicingSpec = None, attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,), @@ -76,7 +76,7 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook): attack_types: a list of attacks, each of type AttackType tensorboard_dir: directory for tensorboard summary tensorboard_merge_classifiers: if true, plot different classifiers with - the same slicing_spec and metric in the same figure + the same slicing_spec and metric in the same figure """ in_train_data, self._in_train_labels = in_train out_train_data, self._out_train_labels = out_train @@ -106,19 +106,19 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook): self._writers = None def end(self, session): - results = run_attack_helper(self._estimator, - self._in_train_input_fn, - self._out_train_input_fn, - self._in_train_labels, self._out_train_labels, - self._slicing_spec, + results = run_attack_helper(self._estimator, self._in_train_input_fn, + self._out_train_input_fn, self._in_train_labels, + self._out_train_labels, self._slicing_spec, self._attack_types) logging.info(results) att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( results) print('Attack result:') - print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in - zip(att_types, att_slices, att_metrics, att_values)])) + print('\n'.join([ + ' %s: %.4f' % (', '.join([s, t, m]), v) + for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values) + ])) # Write to tensorboard if tensorboard_dir is specified global_step = self._estimator.get_variable_value('global_step') @@ -128,7 +128,9 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook): def run_attack_on_tf_estimator_model( - estimator, in_train, out_train, + estimator, + in_train, + out_train, input_fn_constructor, slicing_spec: SlicingSpec = None, attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)): @@ -142,6 +144,7 @@ def run_attack_on_tf_estimator_model( the input_fn for model prediction slicing_spec: slicing specification of the attack attack_types: a list of attacks, each of type AttackType + Returns: Results of the attack """ @@ -153,10 +156,8 @@ def run_attack_on_tf_estimator_model( out_train_input_fn = input_fn_constructor(out_train_data, out_train_labels) # Call the helper to run the attack. - results = run_attack_helper(estimator, - in_train_input_fn, out_train_input_fn, - in_train_labels, out_train_labels, - slicing_spec, + results = run_attack_helper(estimator, in_train_input_fn, out_train_input_fn, + in_train_labels, out_train_labels, slicing_spec, attack_types) logging.info('End of training attack:') logging.info(results) @@ -165,8 +166,10 @@ def run_attack_on_tf_estimator_model( def run_attack_helper( estimator, - in_train_input_fn, out_train_input_fn, - in_train_labels, out_train_labels, + in_train_input_fn, + out_train_input_fn, + in_train_labels, + out_train_labels, slicing_spec: SlicingSpec = None, attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)): """A helper function to perform attack. @@ -179,22 +182,23 @@ def run_attack_helper( out_train_labels: out of training labels slicing_spec: slicing specification of the attack attack_types: a list of attacks, each of type AttackType + Returns: Results of the attack """ # Compute predictions and losses - in_train_pred, in_train_loss = calculate_losses(estimator, - in_train_input_fn, + in_train_pred, in_train_loss = calculate_losses(estimator, in_train_input_fn, in_train_labels) out_train_pred, out_train_loss = calculate_losses(estimator, out_train_input_fn, out_train_labels) attack_input = AttackInputData( - logits_train=in_train_pred, logits_test=out_train_pred, - labels_train=in_train_labels, labels_test=out_train_labels, - loss_train=in_train_loss, loss_test=out_train_loss - ) - results = mia.run_attacks(attack_input, - slicing_spec=slicing_spec, - attack_types=attack_types) + logits_train=in_train_pred, + logits_test=out_train_pred, + labels_train=in_train_labels, + labels_test=out_train_labels, + loss_train=in_train_loss, + loss_test=out_train_loss) + results = mia.run_attacks( + attack_input, slicing_spec=slicing_spec, attack_types=attack_types) return results diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation_example.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation_example.py index 943f3c1..1323600 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation_example.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation_example.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """An example for using tf_estimator_evaluation.""" from absl import app @@ -26,15 +25,15 @@ from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_s from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation import MembershipInferenceTrainingHook from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation import run_attack_on_tf_estimator_model - FLAGS = flags.FLAGS flags.DEFINE_float('learning_rate', 0.02, 'Learning rate for training') flags.DEFINE_integer('batch_size', 250, 'Batch size') flags.DEFINE_integer('epochs', 100, 'Number of epochs') flags.DEFINE_string('model_dir', None, 'Model directory.') -flags.DEFINE_bool('tensorboard_merge_classifiers', False, 'If true, plot ' - 'different classifiers with the same slicing_spec and metric ' - 'in the same figure.') +flags.DEFINE_bool( + 'tensorboard_merge_classifiers', False, 'If true, plot ' + 'different classifiers with the same slicing_spec and metric ' + 'in the same figure.') def small_cnn_fn(features, labels, mode): @@ -55,8 +54,8 @@ def small_cnn_fn(features, labels, mode): # Configure the training op (for TRAIN mode). if mode == tf.estimator.ModeKeys.TRAIN: - optimizer = tf.train.MomentumOptimizer(learning_rate=FLAGS.learning_rate, - momentum=0.9) + optimizer = tf.train.MomentumOptimizer( + learning_rate=FLAGS.learning_rate, momentum=0.9) global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss=scalar_loss, global_step=global_step) return tf.estimator.EstimatorSpec( @@ -111,13 +110,12 @@ def main(unused_argv): # Get hook for membership inference attack. mia_hook = MembershipInferenceTrainingHook( - classifier, - (x_train, y_train), - (x_test, y_test), + classifier, (x_train, y_train), (x_test, y_test), input_fn_constructor, slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), - attack_types=[AttackType.THRESHOLD_ATTACK, - AttackType.K_NEAREST_NEIGHBORS], + attack_types=[ + AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS + ], tensorboard_dir=FLAGS.model_dir, tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers) @@ -148,12 +146,15 @@ def main(unused_argv): classifier, (x_train, y_train), (x_test, y_test), input_fn_constructor, slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), - attack_types=[AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS] - ) + attack_types=[ + AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS + ]) att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( attack_results) - print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in - zip(att_types, att_slices, att_metrics, att_values)])) + print('\n'.join([ + ' %s: %.4f' % (', '.join([s, t, m]), v) + for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values) + ])) if __name__ == '__main__': diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation_test.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation_test.py index 4d40f87..8c143d1 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation_test.py @@ -13,10 +13,8 @@ # limitations under the License. from absl.testing import absltest - import numpy as np import tensorflow.compat.v1 as tf - from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import tf_estimator_evaluation from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType @@ -55,23 +53,25 @@ class UtilsTest(absltest.TestCase): # Define the classifier, input_fn for training and test data self.classifier = tf.estimator.Estimator(model_fn=model_fn) self.input_fn_train = tf.estimator.inputs.numpy_input_fn( - x={'x': self.train_data}, y=self.train_labels, num_epochs=1, + x={'x': self.train_data}, + y=self.train_labels, + num_epochs=1, shuffle=False) self.input_fn_test = tf.estimator.inputs.numpy_input_fn( - x={'x': self.test_data}, y=self.test_labels, num_epochs=1, + x={'x': self.test_data}, + y=self.test_labels, + num_epochs=1, shuffle=False) def test_calculate_losses(self): """Test calculating the loss.""" - pred, loss = tf_estimator_evaluation.calculate_losses(self.classifier, - self.input_fn_train, - self.train_labels) + pred, loss = tf_estimator_evaluation.calculate_losses( + self.classifier, self.input_fn_train, self.train_labels) self.assertEqual(pred.shape, (self.ntrain, self.nclass)) self.assertEqual(loss.shape, (self.ntrain,)) - pred, loss = tf_estimator_evaluation.calculate_losses(self.classifier, - self.input_fn_test, - self.test_labels) + pred, loss = tf_estimator_evaluation.calculate_losses( + self.classifier, self.input_fn_test, self.test_labels) self.assertEqual(pred.shape, (self.ntest, self.nclass)) self.assertEqual(loss.shape, (self.ntest,)) @@ -94,12 +94,12 @@ class UtilsTest(absltest.TestCase): def test_run_attack_on_tf_estimator_model(self): """Test the attack on the final models.""" + def input_fn_constructor(x, y): return tf.estimator.inputs.numpy_input_fn(x={'x': x}, y=y, shuffle=False) results = tf_estimator_evaluation.run_attack_on_tf_estimator_model( - self.classifier, - (self.train_data, self.train_labels), + self.classifier, (self.train_data, self.train_labels), (self.test_data, self.test_labels), input_fn_constructor, attack_types=[AttackType.THRESHOLD_ATTACK]) diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils.py index ceaefe4..3610c70 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Utility functions for membership inference attacks.""" import numpy as np diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils_tensorboard.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils_tensorboard.py index 401b890..afaf596 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils_tensorboard.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils_tensorboard.py @@ -27,7 +27,7 @@ def write_to_tensorboard(writers, tags, values, step): Args: writers: a list of tensorboard writers or one writer to be used for metrics. - If it's a list, it should be of the same length as tags + If it's a list, it should be of the same length as tags tags: a list of tags of metrics values: a list of values of metrics with the same length as tags step: step for the tensorboard summary @@ -54,7 +54,7 @@ def write_to_tensorboard_tf2(writers, tags, values, step): Args: writers: a list of tensorboard writers or one writer to be used for metrics. - If it's a list, it should be of the same length as tags + If it's a list, it should be of the same length as tags tags: a list of tags of metrics values: a list of values of metrics with the same length as tags step: step for the tensorboard summary @@ -77,11 +77,10 @@ def write_to_tensorboard_tf2(writers, tags, values, step): writer.flush() -def write_results_to_tensorboard( - attack_results: AttackResults, - writers: Union[tf1.summary.FileWriter, List[tf1.summary.FileWriter]], - step: int, - merge_classifiers: bool): +def write_results_to_tensorboard(attack_results: AttackResults, + writers: Union[tf1.summary.FileWriter, + List[tf1.summary.FileWriter]], + step: int, merge_classifiers: bool): """Write attack results to tensorboard. Args: @@ -97,21 +96,21 @@ def write_results_to_tensorboard( att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( attack_results) if merge_classifiers: - att_tags = ['attack/' + f'{s}_{m}' for s, m in - zip(att_slices, att_metrics)] - write_to_tensorboard([writers[t] for t in att_types], - att_tags, att_values, step) + att_tags = ['attack/' + f'{s}_{m}' for s, m in zip(att_slices, att_metrics)] + write_to_tensorboard([writers[t] for t in att_types], att_tags, att_values, + step) else: - att_tags = ['attack/' + f'{s}_{t}_{m}' for t, s, m in - zip(att_types, att_slices, att_metrics)] + att_tags = [ + 'attack/' + f'{s}_{t}_{m}' + for t, s, m in zip(att_types, att_slices, att_metrics) + ] write_to_tensorboard(writers, att_tags, att_values, step) def write_results_to_tensorboard_tf2( attack_results: AttackResults, writers: Union[tf2.summary.SummaryWriter, List[tf2.summary.SummaryWriter]], - step: int, - merge_classifiers: bool): + step: int, merge_classifiers: bool): """Write attack results to tensorboard. Args: @@ -127,12 +126,12 @@ def write_results_to_tensorboard_tf2( att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( attack_results) if merge_classifiers: - att_tags = ['attack/' + f'{s}_{m}' for s, m in - zip(att_slices, att_metrics)] - write_to_tensorboard_tf2([writers[t] for t in att_types], - att_tags, att_values, step) + att_tags = ['attack/' + f'{s}_{m}' for s, m in zip(att_slices, att_metrics)] + write_to_tensorboard_tf2([writers[t] for t in att_types], att_tags, + att_values, step) else: - att_tags = ['attack/' + f'{s}_{t}_{m}' for t, s, m in - zip(att_types, att_slices, att_metrics)] + att_tags = [ + 'attack/' + f'{s}_{t}_{m}' + for t, s, m in zip(att_types, att_slices, att_metrics) + ] write_to_tensorboard_tf2(writers, att_tags, att_values, step) - diff --git a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py index 8ec45e5..82ec230 100644 --- a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py +++ b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py @@ -25,22 +25,23 @@ def compute_exposure_interpolation( """Get exposure using interpolation. Args: - perplexities: a dictionary, key is number of secret repetitions, - value is a list of perplexities + perplexities: a dictionary, key is number of secret repetitions, value is a + list of perplexities perplexities_reference: a list, perplexities of the random sequences that - did not appear in the training data + did not appear in the training data + Returns: The exposure of every secret measured using interpolation (not necessarily in the same order as the input) """ repetitions = list(perplexities.keys()) # Concatenate all perplexities, including those for references - perplexities_concat = np.concatenate([perplexities[r] for r in repetitions] - + [perplexities_reference]) + perplexities_concat = np.concatenate([perplexities[r] for r in repetitions] + + [perplexities_reference]) # Concatenate the number of repetitions for each secret - repetitions_concat = np.concatenate( - [[r] * len(perplexities[r]) for r in repetitions] - + [[0] * len(perplexities_reference)]) + repetitions_concat = np.concatenate([[r] * len(perplexities[r]) + for r in repetitions] + + [[0] * len(perplexities_reference)]) # Sort the repetition list according to the corresponding perplexity idx = np.argsort(perplexities_concat) @@ -53,8 +54,10 @@ def compute_exposure_interpolation( # (repetitions_concat == 0). cum_sum = np.cumsum(repetitions_concat == 0) ranks = {r: cum_sum[repetitions_concat == r] + 1 for r in repetitions} - exposures = {r: np.log2(len(perplexities_reference)) - np.log2(ranks[r]) - for r in repetitions} + exposures = { + r: np.log2(len(perplexities_reference)) - np.log2(ranks[r]) + for r in repetitions + } return exposures @@ -64,10 +67,11 @@ def compute_exposure_extrapolation( """Get exposure using extrapolation. Args: - perplexities: a dictionary, key is number of secret repetitions, - value is a list of perplexities + perplexities: a dictionary, key is number of secret repetitions, value is a + list of perplexities perplexities_reference: a list, perplexities of the random sequences that - did not appear in the training data + did not appear in the training data + Returns: The exposure of every secret measured using extrapolation """ diff --git a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py index bc44a8a..bc95e51 100644 --- a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py @@ -15,7 +15,6 @@ from absl.testing import absltest import numpy as np from scipy import stats - from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_extrapolation from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_interpolation @@ -28,9 +27,11 @@ class UtilsTest(absltest.TestCase): def test_exposure_interpolation(self): """Test exposure by interpolation.""" - perplexities = {1: [0, 0.1], # smallest perplexities - 2: [20.0], # largest perplexities - 5: [3.5]} # rank = 4 + perplexities = { + 1: [0, 0.1], # smallest perplexities + 2: [20.0], # largest perplexities + 5: [3.5] + } # rank = 4 perplexities_reference = [float(x) for x in range(1, 17)] exposures = compute_exposure_interpolation(perplexities, perplexities_reference) @@ -41,7 +42,8 @@ class UtilsTest(absltest.TestCase): expected_exposures = { 1: np.array([exposure_largest] * 2), 2: np.array([exposure_smallest]), - 5: np.array([np.log2(num_perplexities_reference) - np.log2(4)])} + 5: np.array([np.log2(num_perplexities_reference) - np.log2(4)]) + } self.assertEqual(exposures.keys(), expected_exposures.keys()) for r in exposures.keys(): diff --git a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/generate_secrets.py b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/generate_secrets.py index eb0906d..f33b718 100644 --- a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/generate_secrets.py +++ b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/generate_secrets.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Generate random sequences.""" import itertools @@ -21,7 +20,9 @@ from dataclasses import dataclass import numpy as np -def generate_random_sequences(vocab: List[str], pattern: str, n: int, +def generate_random_sequences(vocab: List[str], + pattern: str, + n: int, seed: int = 1) -> List[str]: """Generate random sequences. @@ -35,6 +36,7 @@ def generate_random_sequences(vocab: List[str], pattern: str, n: int, Returns: A list of different random sequences from the given vocabulary """ + def count_placeholder(pattern): return sum([x[1] is not None for x in string.Formatter().parse(pattern)]) @@ -103,7 +105,8 @@ def construct_secret(secret_config: SecretConfig, seqs: List[str]) -> Secrets: Args: secret_config: configuration of secret. seqs: a list of random sequences that will be used for secrets and - references. + references. + Returns: a secret instance. """ @@ -116,9 +119,10 @@ def construct_secret(secret_config: SecretConfig, seqs: List[str]) -> Secrets: secret_config.num_repetitions, secret_config.num_secrets_for_repetitions): secrets[num_repetition] = seqs[i:i + num_secrets] i += num_secrets - return Secrets(config=secret_config, - secrets=secrets, - references=seqs[-secret_config.num_references:]) + return Secrets( + config=secret_config, + secrets=secrets, + references=seqs[-secret_config.num_references:]) def generate_secrets_and_references(secret_configs: List[SecretConfig], @@ -128,6 +132,7 @@ def generate_secrets_and_references(secret_configs: List[SecretConfig], Args: secret_configs: a list of secret configurations. seed: random seed. + Returns: A list of secret instances. """ diff --git a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/generate_secrets_test.py b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/generate_secrets_test.py index 1d6f652..77fd5a8 100644 --- a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/generate_secrets_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/generate_secrets_test.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets.""" - from absl.testing import absltest - from tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets import construct_secret from tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets import generate_random_sequences from tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets import generate_secrets_and_references @@ -32,27 +29,34 @@ class UtilsTest(absltest.TestCase): """Test generate_random_sequences.""" # Test when n is larger than total number of possible sequences. seqs = generate_random_sequences(['A', 'b', 'c'], '{}+{}', 10, seed=27) - expected_seqs = ['A+c', 'c+c', 'b+b', 'A+b', 'b+c', - 'c+A', 'c+b', 'A+A', 'b+A'] + expected_seqs = [ + 'A+c', 'c+c', 'b+b', 'A+b', 'b+c', 'c+A', 'c+b', 'A+A', 'b+A' + ] self.assertEqual(seqs, expected_seqs) # Test when n is smaller than total number of possible sequences. seqs = generate_random_sequences(list('01234'), 'prefix {}{}{}?', 8, seed=9) - expected_seqs = ['prefix 143?', 'prefix 031?', 'prefix 302?', 'prefix 042?', - 'prefix 404?', 'prefix 024?', 'prefix 021?', 'prefix 403?'] + expected_seqs = [ + 'prefix 143?', 'prefix 031?', 'prefix 302?', 'prefix 042?', + 'prefix 404?', 'prefix 024?', 'prefix 021?', 'prefix 403?' + ] self.assertEqual(seqs, expected_seqs) def test_construct_secret(self): - secret_config = SecretConfig(vocab=None, pattern='', - num_repetitions=[1, 2, 8], - num_secrets_for_repetitions=[2, 3, 1], - num_references=3) + secret_config = SecretConfig( + vocab=None, + pattern='', + num_repetitions=[1, 2, 8], + num_secrets_for_repetitions=[2, 3, 1], + num_references=3) seqs = list('0123456789') secrets = construct_secret(secret_config, seqs) self.assertEqual(secrets.config, secret_config) - self.assertDictEqual(secrets.secrets, {1: ['0', '1'], - 2: ['2', '3', '4'], - 8: ['5']}) + self.assertDictEqual(secrets.secrets, { + 1: ['0', '1'], + 2: ['2', '3', '4'], + 8: ['5'] + }) self.assertEqual(secrets.references, ['7', '8', '9']) # Test when the number of elements in seqs is not enough. @@ -61,29 +65,36 @@ class UtilsTest(absltest.TestCase): def test_generate_secrets_and_references(self): secret_configs = [ - SecretConfig(vocab=['w1', 'w2', 'w3'], pattern='{} {} suf', - num_repetitions=[1, 12], - num_secrets_for_repetitions=[2, 1], - num_references=3), - SecretConfig(vocab=['W 1', 'W 2', 'W 3'], pattern='{}-{}', - num_repetitions=[1, 2, 8], - num_secrets_for_repetitions=[2, 3, 1], - num_references=3) + SecretConfig( + vocab=['w1', 'w2', 'w3'], + pattern='{} {} suf', + num_repetitions=[1, 12], + num_secrets_for_repetitions=[2, 1], + num_references=3), + SecretConfig( + vocab=['W 1', 'W 2', 'W 3'], + pattern='{}-{}', + num_repetitions=[1, 2, 8], + num_secrets_for_repetitions=[2, 3, 1], + num_references=3) ] secrets = generate_secrets_and_references(secret_configs, seed=27) self.assertEqual(secrets[0].config, secret_configs[0]) - self.assertDictEqual(secrets[0].secrets, {1: ['w3 w2 suf', 'w2 w1 suf'], - 12: ['w1 w1 suf']}) + self.assertDictEqual(secrets[0].secrets, { + 1: ['w3 w2 suf', 'w2 w1 suf'], + 12: ['w1 w1 suf'] + }) self.assertEqual(secrets[0].references, ['w2 w3 suf', 'w2 w2 suf', 'w3 w1 suf']) self.assertEqual(secrets[1].config, secret_configs[1]) - self.assertDictEqual(secrets[1].secrets, - {1: ['W 3-W 2', 'W 1-W 3'], - 2: ['W 3-W 1', 'W 2-W 1', 'W 1-W 1'], - 8: ['W 2-W 2']}) - self.assertEqual(secrets[1].references, - ['W 2-W 3', 'W 3-W 3', 'W 1-W 2']) + self.assertDictEqual( + secrets[1].secrets, { + 1: ['W 3-W 2', 'W 1-W 3'], + 2: ['W 3-W 1', 'W 2-W 1', 'W 1-W 1'], + 8: ['W 2-W 2'] + }) + self.assertEqual(secrets[1].references, ['W 2-W 3', 'W 3-W 3', 'W 1-W 2']) if __name__ == '__main__': diff --git a/tutorials/bolton_tutorial.py b/tutorials/bolton_tutorial.py index 753bc66..c0f9eea 100644 --- a/tutorials/bolton_tutorial.py +++ b/tutorials/bolton_tutorial.py @@ -27,10 +27,14 @@ n_samples = 10 input_dim = 2 n_outputs = 1 # Create binary classification dataset: -x_stack = [tf.constant(-1, tf.float32, (n_samples, input_dim)), - tf.constant(1, tf.float32, (n_samples, input_dim))] -y_stack = [tf.constant(0, tf.float32, (n_samples, 1)), - tf.constant(1, tf.float32, (n_samples, 1))] +x_stack = [ + tf.constant(-1, tf.float32, (n_samples, input_dim)), + tf.constant(1, tf.float32, (n_samples, input_dim)) +] +y_stack = [ + tf.constant(0, tf.float32, (n_samples, 1)), + tf.constant(1, tf.float32, (n_samples, 1)) +] x, y = tf.concat(x_stack, 0), tf.concat(y_stack, 0) print(x.shape, y.shape) generator = tf.data.Dataset.from_tensor_slices((x, y)) @@ -86,14 +90,15 @@ n_samples = None # default, if it cannot be iferred, specify this epsilon = 2 noise_distribution = 'laplace' -bolt.fit(x, - y, - epsilon=epsilon, - class_weight=class_weight, - batch_size=batch_size, - n_samples=n_samples, - noise_distribution=noise_distribution, - epochs=2) +bolt.fit( + x, + y, + epsilon=epsilon, + class_weight=class_weight, + batch_size=batch_size, + n_samples=n_samples, + noise_distribution=noise_distribution, + epochs=2) # ------- # We may also train a generator object, or try different optimizers and loss # functions. Below, we will see that we must pass the number of samples as the @@ -109,25 +114,27 @@ n_samples = None # default, if it cannot be iferred, specify this epsilon = 2 noise_distribution = 'laplace' try: - bolt.fit(generator, - epsilon=epsilon, - class_weight=class_weight, - batch_size=batch_size, - n_samples=n_samples, - noise_distribution=noise_distribution, - verbose=0) + bolt.fit( + generator, + epsilon=epsilon, + class_weight=class_weight, + batch_size=batch_size, + n_samples=n_samples, + noise_distribution=noise_distribution, + verbose=0) except ValueError as e: print(e) # ------- # And now, re running with the parameter set. # ------- n_samples = 20 -bolt.fit_generator(generator, - epsilon=epsilon, - class_weight=class_weight, - n_samples=n_samples, - noise_distribution=noise_distribution, - verbose=0) +bolt.fit_generator( + generator, + epsilon=epsilon, + class_weight=class_weight, + n_samples=n_samples, + noise_distribution=noise_distribution, + verbose=0) # ------- # You don't have to use the BoltOn model to use the BoltOn method. # There are only a few requirements: @@ -145,8 +152,8 @@ class TestModel(tf.keras.Model): # pylint: disable=abstract-method def __init__(self, reg_layer, number_of_outputs=1): super().__init__(name='test') - self.output_layer = tf.keras.layers.Dense(number_of_outputs, - kernel_regularizer=reg_layer) + self.output_layer = tf.keras.layers.Dense( + number_of_outputs, kernel_regularizer=reg_layer) def call(self, inputs): # pylint: disable=arguments-differ return self.output_layer(inputs) @@ -180,6 +187,5 @@ with optimizer( layers=test_model.layers, class_weights=class_weights, n_samples=n_samples, - batch_size=batch_size -) as _: + batch_size=batch_size) as _: test_model.fit(x, y, batch_size=batch_size, epochs=2) diff --git a/tutorials/mnist_dpsgd_tutorial.py b/tutorials/mnist_dpsgd_tutorial.py index ebbbd22..bb7fd59 100644 --- a/tutorials/mnist_dpsgd_tutorial.py +++ b/tutorials/mnist_dpsgd_tutorial.py @@ -86,12 +86,10 @@ def cnn_model_fn(features, labels, mode, params): # pylint: disable=unused-argu eval_metric_ops = { 'accuracy': tf.metrics.accuracy( - labels=labels, - predictions=tf.argmax(input=logits, axis=1)) + labels=labels, predictions=tf.argmax(input=logits, axis=1)) } - return tf.estimator.EstimatorSpec(mode=mode, - loss=scalar_loss, - eval_metric_ops=eval_metric_ops) + return tf.estimator.EstimatorSpec( + mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops) def main(unused_argv): @@ -100,8 +98,8 @@ def main(unused_argv): raise ValueError('Number of microbatches should divide evenly batch_size') # Instantiate the tf.Estimator. - mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, - model_dir=FLAGS.model_dir) + mnist_classifier = tf.estimator.Estimator( + model_fn=cnn_model_fn, model_dir=FLAGS.model_dir) # Training loop. steps_per_epoch = 60000 // FLAGS.batch_size diff --git a/tutorials/mnist_dpsgd_tutorial_eager.py b/tutorials/mnist_dpsgd_tutorial_eager.py index 57fd702..3819e7b 100644 --- a/tutorials/mnist_dpsgd_tutorial_eager.py +++ b/tutorials/mnist_dpsgd_tutorial_eager.py @@ -25,16 +25,18 @@ from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescent GradientDescentOptimizer = tf.train.GradientDescentOptimizer tf.enable_eager_execution() -flags.DEFINE_boolean('dpsgd', True, 'If True, train with DP-SGD. If False, ' - 'train with vanilla SGD.') +flags.DEFINE_boolean( + 'dpsgd', True, 'If True, train with DP-SGD. If False, ' + 'train with vanilla SGD.') flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training') flags.DEFINE_float('noise_multiplier', 1.1, 'Ratio of the standard deviation to the clipping norm') flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm') flags.DEFINE_integer('batch_size', 250, 'Batch size') flags.DEFINE_integer('epochs', 60, 'Number of epochs') -flags.DEFINE_integer('microbatches', 250, 'Number of microbatches ' - '(must evenly divide batch_size)') +flags.DEFINE_integer( + 'microbatches', 250, 'Number of microbatches ' + '(must evenly divide batch_size)') FLAGS = flags.FLAGS @@ -45,10 +47,11 @@ def compute_epsilon(steps): return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = FLAGS.batch_size / 60000 - rdp = compute_rdp(q=sampling_probability, - noise_multiplier=FLAGS.noise_multiplier, - steps=steps, - orders=orders) + rdp = compute_rdp( + q=sampling_probability, + noise_multiplier=FLAGS.noise_multiplier, + steps=steps, + orders=orders) # Delta is set to 1e-5 because MNIST has 60000 training points. return get_privacy_spent(orders, rdp, target_delta=1e-5)[0] @@ -64,22 +67,20 @@ def main(_): # Create a dataset object and batch for the training data dataset = tf.data.Dataset.from_tensor_slices( - (tf.cast(train_images[..., tf.newaxis]/255, tf.float32), - tf.cast(train_labels, tf.int64))) + (tf.cast(train_images[..., tf.newaxis] / 255, + tf.float32), tf.cast(train_labels, tf.int64))) dataset = dataset.shuffle(1000).batch(FLAGS.batch_size) # Create a dataset object and batch for the test data eval_dataset = tf.data.Dataset.from_tensor_slices( - (tf.cast(test_images[..., tf.newaxis]/255, tf.float32), - tf.cast(test_labels, tf.int64))) + (tf.cast(test_images[..., tf.newaxis] / 255, + tf.float32), tf.cast(test_labels, tf.int64))) eval_dataset = eval_dataset.batch(10000) # Define the model using tf.keras.layers mnist_model = tf.keras.Sequential([ - tf.keras.layers.Conv2D(16, 8, - strides=2, - padding='same', - activation='relu'), + tf.keras.layers.Conv2D( + 16, 8, strides=2, padding='same', activation='relu'), tf.keras.layers.MaxPool2D(2, 1), tf.keras.layers.Conv2D(32, 4, strides=2, activation='relu'), tf.keras.layers.MaxPool2D(2, 1), @@ -119,8 +120,8 @@ def main(_): return loss if FLAGS.dpsgd: - grads_and_vars = opt.compute_gradients(loss_fn, var_list, - gradient_tape=gradient_tape) + grads_and_vars = opt.compute_gradients( + loss_fn, var_list, gradient_tape=gradient_tape) else: grads_and_vars = opt.compute_gradients(loss_fn, var_list) @@ -140,5 +141,6 @@ def main(_): else: print('Trained with vanilla non-private SGD optimizer') + if __name__ == '__main__': app.run(main) diff --git a/tutorials/mnist_dpsgd_tutorial_keras.py b/tutorials/mnist_dpsgd_tutorial_keras.py index 54ad377..8c63c82 100644 --- a/tutorials/mnist_dpsgd_tutorial_keras.py +++ b/tutorials/mnist_dpsgd_tutorial_keras.py @@ -47,10 +47,11 @@ def compute_epsilon(steps): return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = FLAGS.batch_size / 60000 - rdp = compute_rdp(q=sampling_probability, - noise_multiplier=FLAGS.noise_multiplier, - steps=steps, - orders=orders) + rdp = compute_rdp( + q=sampling_probability, + noise_multiplier=FLAGS.noise_multiplier, + steps=steps, + orders=orders) # Delta is set to 1e-5 because MNIST has 60000 training points. return get_privacy_spent(orders, rdp, target_delta=1e-5)[0] @@ -91,16 +92,16 @@ def main(unused_argv): # Define a sequential Keras model model = tf.keras.Sequential([ - tf.keras.layers.Conv2D(16, 8, - strides=2, - padding='same', - activation='relu', - input_shape=(28, 28, 1)), + tf.keras.layers.Conv2D( + 16, + 8, + strides=2, + padding='same', + activation='relu', + input_shape=(28, 28, 1)), tf.keras.layers.MaxPool2D(2, 1), - tf.keras.layers.Conv2D(32, 4, - strides=2, - padding='valid', - activation='relu'), + tf.keras.layers.Conv2D( + 32, 4, strides=2, padding='valid', activation='relu'), tf.keras.layers.MaxPool2D(2, 1), tf.keras.layers.Flatten(), tf.keras.layers.Dense(32, activation='relu'), @@ -124,10 +125,12 @@ def main(unused_argv): model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) # Train model with Keras - model.fit(train_data, train_labels, - epochs=FLAGS.epochs, - validation_data=(test_data, test_labels), - batch_size=FLAGS.batch_size) + model.fit( + train_data, + train_labels, + epochs=FLAGS.epochs, + validation_data=(test_data, test_labels), + batch_size=FLAGS.batch_size) # Compute the privacy budget expended. if FLAGS.dpsgd: @@ -136,5 +139,6 @@ def main(unused_argv): else: print('Trained with vanilla non-private SGD optimizer') + if __name__ == '__main__': app.run(main) diff --git a/tutorials/mnist_dpsgd_tutorial_vectorized.py b/tutorials/mnist_dpsgd_tutorial_vectorized.py index 2c7b2d7..f8f8ce8 100644 --- a/tutorials/mnist_dpsgd_tutorial_vectorized.py +++ b/tutorials/mnist_dpsgd_tutorial_vectorized.py @@ -22,7 +22,6 @@ from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent from tensorflow_privacy.privacy.optimizers import dp_optimizer_vectorized - flags.DEFINE_boolean( 'dpsgd', True, 'If True, train with DP-SGD. If False, ' 'train with vanilla SGD.') @@ -50,10 +49,11 @@ def compute_epsilon(steps): return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = FLAGS.batch_size / NUM_TRAIN_EXAMPLES - rdp = compute_rdp(q=sampling_probability, - noise_multiplier=FLAGS.noise_multiplier, - steps=steps, - orders=orders) + rdp = compute_rdp( + q=sampling_probability, + noise_multiplier=FLAGS.noise_multiplier, + steps=steps, + orders=orders) # Delta is set to approximate 1 / (number of training points). return get_privacy_spent(orders, rdp, target_delta=1e-5)[0] @@ -63,15 +63,11 @@ def cnn_model_fn(features, labels, mode): # Define CNN architecture using tf.keras.layers. input_layer = tf.reshape(features['x'], [-1, 28, 28, 1]) - y = tf.keras.layers.Conv2D(16, 8, - strides=2, - padding='same', - activation='relu').apply(input_layer) + y = tf.keras.layers.Conv2D( + 16, 8, strides=2, padding='same', activation='relu').apply(input_layer) y = tf.keras.layers.MaxPool2D(2, 1).apply(y) - y = tf.keras.layers.Conv2D(32, 4, - strides=2, - padding='valid', - activation='relu').apply(y) + y = tf.keras.layers.Conv2D( + 32, 4, strides=2, padding='valid', activation='relu').apply(y) y = tf.keras.layers.MaxPool2D(2, 1).apply(y) y = tf.keras.layers.Flatten().apply(y) y = tf.keras.layers.Dense(32, activation='relu').apply(y) @@ -106,22 +102,19 @@ def cnn_model_fn(features, labels, mode): # the vector_loss because tf.estimator requires a scalar loss. This is only # used for evaluation and debugging by tf.estimator. The actual loss being # minimized is opt_loss defined above and passed to optimizer.minimize(). - return tf.estimator.EstimatorSpec(mode=mode, - loss=scalar_loss, - train_op=train_op) + return tf.estimator.EstimatorSpec( + mode=mode, loss=scalar_loss, train_op=train_op) # Add evaluation metrics (for EVAL mode). elif mode == tf.estimator.ModeKeys.EVAL: eval_metric_ops = { 'accuracy': tf.metrics.accuracy( - labels=labels, - predictions=tf.argmax(input=logits, axis=1)) + labels=labels, predictions=tf.argmax(input=logits, axis=1)) } - return tf.estimator.EstimatorSpec(mode=mode, - loss=scalar_loss, - eval_metric_ops=eval_metric_ops) + return tf.estimator.EstimatorSpec( + mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops) def load_mnist(): @@ -155,8 +148,8 @@ def main(unused_argv): train_data, train_labels, test_data, test_labels = load_mnist() # Instantiate the tf.Estimator. - mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, - model_dir=FLAGS.model_dir) + mnist_classifier = tf.estimator.Estimator( + model_fn=cnn_model_fn, model_dir=FLAGS.model_dir) # Create tf.Estimator input functions for the training and test data. train_input_fn = tf.estimator.inputs.numpy_input_fn( @@ -166,10 +159,7 @@ def main(unused_argv): num_epochs=FLAGS.epochs, shuffle=True) eval_input_fn = tf.estimator.inputs.numpy_input_fn( - x={'x': test_data}, - y=test_labels, - num_epochs=1, - shuffle=False) + x={'x': test_data}, y=test_labels, num_epochs=1, shuffle=False) # Training loop. steps_per_epoch = NUM_TRAIN_EXAMPLES // FLAGS.batch_size @@ -189,5 +179,6 @@ def main(unused_argv): else: print('Trained with vanilla non-private SGD optimizer') + if __name__ == '__main__': app.run(main) diff --git a/tutorials/mnist_lr_tutorial.py b/tutorials/mnist_lr_tutorial.py index aae4825..ba9b009 100644 --- a/tutorials/mnist_lr_tutorial.py +++ b/tutorials/mnist_lr_tutorial.py @@ -56,8 +56,8 @@ def lr_model_fn(features, labels, mode, nclasses, dim): logits = tf.keras.layers.Dense( units=nclasses, kernel_regularizer=tf.keras.regularizers.L2(l2=FLAGS.regularizer), - bias_regularizer=tf.keras.regularizers.L2(l2=FLAGS.regularizer)).apply( - input_layer) + bias_regularizer=tf.keras.regularizers.L2( + l2=FLAGS.regularizer)).apply(input_layer) # Calculate loss as a vector (to support microbatches in DP-SGD). vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( @@ -166,8 +166,7 @@ def print_privacy_guarantees(epochs, batch_size, samples, noise_multiplier): # an option. rdp = [order * coef for order in orders] eps, _, _ = get_privacy_spent(orders, rdp, target_delta=delta) - print('\t{:g}% enjoy at least ({:.2f}, {})-DP'.format( - p * 100, eps, delta)) + print('\t{:g}% enjoy at least ({:.2f}, {})-DP'.format(p * 100, eps, delta)) # Compute privacy guarantees for the Sampled Gaussian Mechanism. rdp_sgm = compute_rdp(batch_size / samples, noise_multiplier, @@ -234,5 +233,6 @@ def main(unused_argv): noise_multiplier=FLAGS.noise_multiplier, ) + if __name__ == '__main__': app.run(main) diff --git a/tutorials/movielens_tutorial.py b/tutorials/movielens_tutorial.py index 268cf00..d28050e 100644 --- a/tutorials/movielens_tutorial.py +++ b/tutorials/movielens_tutorial.py @@ -114,7 +114,7 @@ def nn_model_fn(features, labels, mode): return tf.estimator.EstimatorSpec( mode=mode, loss=scalar_loss, train_op=train_op) -# Add evaluation metrics (for EVAL mode). + # Add evaluation metrics (for EVAL mode). if mode == tf.estimator.ModeKeys.EVAL: eval_metric_ops = { 'rmse': diff --git a/tutorials/walkthrough/mnist_scratch.py b/tutorials/walkthrough/mnist_scratch.py index b1622de..f003b6d 100644 --- a/tutorials/walkthrough/mnist_scratch.py +++ b/tutorials/walkthrough/mnist_scratch.py @@ -28,23 +28,19 @@ def cnn_model_fn(features, labels, mode): # Define CNN architecture using tf.keras.layers. input_layer = tf.reshape(features['x'], [-1, 28, 28, 1]) - y = tf.keras.layers.Conv2D(16, 8, - strides=2, - padding='same', - activation='relu').apply(input_layer) + y = tf.keras.layers.Conv2D( + 16, 8, strides=2, padding='same', activation='relu').apply(input_layer) y = tf.keras.layers.MaxPool2D(2, 1).apply(y) - y = tf.keras.layers.Conv2D(32, 4, - strides=2, - padding='valid', - activation='relu').apply(y) + y = tf.keras.layers.Conv2D( + 32, 4, strides=2, padding='valid', activation='relu').apply(y) y = tf.keras.layers.MaxPool2D(2, 1).apply(y) y = tf.keras.layers.Flatten().apply(y) y = tf.keras.layers.Dense(32, activation='relu').apply(y) logits = tf.keras.layers.Dense(10).apply(y) # Calculate loss as a vector and as its average across minibatch. - vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, - logits=logits) + vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=labels, logits=logits) scalar_loss = tf.reduce_mean(vector_loss) # Configure the training op (for TRAIN mode). @@ -53,21 +49,18 @@ def cnn_model_fn(features, labels, mode): opt_loss = scalar_loss global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) - return tf.estimator.EstimatorSpec(mode=mode, - loss=scalar_loss, - train_op=train_op) + return tf.estimator.EstimatorSpec( + mode=mode, loss=scalar_loss, train_op=train_op) # Add evaluation metrics (for EVAL mode). elif mode == tf.estimator.ModeKeys.EVAL: eval_metric_ops = { 'accuracy': tf.metrics.accuracy( - labels=labels, - predictions=tf.argmax(input=logits, axis=1)) + labels=labels, predictions=tf.argmax(input=logits, axis=1)) } - return tf.estimator.EstimatorSpec(mode=mode, - loss=scalar_loss, - eval_metric_ops=eval_metric_ops) + return tf.estimator.EstimatorSpec( + mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops) def load_mnist(): @@ -109,10 +102,7 @@ def main(unused_argv): num_epochs=FLAGS.epochs, shuffle=True) eval_input_fn = tf.estimator.inputs.numpy_input_fn( - x={'x': test_data}, - y=test_labels, - num_epochs=1, - shuffle=False) + x={'x': test_data}, y=test_labels, num_epochs=1, shuffle=False) # Training loop. steps_per_epoch = 60000 // FLAGS.batch_size @@ -125,5 +115,6 @@ def main(unused_argv): test_accuracy = eval_results['accuracy'] print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy)) + if __name__ == '__main__': tf.app.run()