From 7d885640eca754720941705d870b46d6fc19d797 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Wed, 21 Aug 2019 14:11:17 -0400 Subject: [PATCH 1/3] Refactoring bolton to bolt_on from package refactor for comments + strings. --- privacy/bolt_on/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/privacy/bolt_on/README.md b/privacy/bolt_on/README.md index 3d55977..8ac2b77 100644 --- a/privacy/bolt_on/README.md +++ b/privacy/bolt_on/README.md @@ -42,6 +42,14 @@ delta-epsilon privacy in machine learning, some of which can be explored here: https://medium.com/apache-mxnet/epsilon-differential-privacy-for-machine-learning-using-mxnet-a4270fe3865e https://arxiv.org/pdf/1811.04911.pdf +## Stability + +As we are pegged on tensorflow2.0.0, this package may encounter stability +issues in the ongoing development of this package. + +We are aware of issues in model fitting using the BoltOnModel and are actively +working towards solving these issues. + ## Contacts In addition to the maintainers of tensorflow/privacy listed in the root From 18ce9c2335ebc9398e5a1de9379f7d0a251fa4e0 Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Wed, 21 Aug 2019 22:57:35 -0400 Subject: [PATCH 2/3] Stable version for tf2.0a0, b0. --- privacy/bolt_on/README.md | 9 +++++---- privacy/bolt_on/models.py | 5 ++++- privacy/bolt_on/models_test.py | 25 +++++++++++++++---------- privacy/bolt_on/optimizers.py | 13 +++++-------- privacy/bolt_on/optimizers_test.py | 9 ++++----- tutorials/bolton_tutorial.py | 13 ++++++------- 6 files changed, 39 insertions(+), 35 deletions(-) diff --git a/privacy/bolt_on/README.md b/privacy/bolt_on/README.md index 8ac2b77..4e6d1f5 100644 --- a/privacy/bolt_on/README.md +++ b/privacy/bolt_on/README.md @@ -44,11 +44,12 @@ https://arxiv.org/pdf/1811.04911.pdf ## Stability -As we are pegged on tensorflow2.0.0, this package may encounter stability -issues in the ongoing development of this package. +As we are pegged on tensorflow2.0, this package may encounter stability +issues in the ongoing development of tensorflow2.0. -We are aware of issues in model fitting using the BoltOnModel and are actively -working towards solving these issues. +This sub-package is currently stable for 2.0.0a0 and 2.0.0b0. We are aware of +issues in model fitting using the BoltOnModel in beta1, the latest release, +and are actively working towards solving these issues. ## Contacts diff --git a/privacy/bolt_on/models.py b/privacy/bolt_on/models.py index c504103..27be2f2 100644 --- a/privacy/bolt_on/models.py +++ b/privacy/bolt_on/models.py @@ -217,7 +217,10 @@ class BoltOnModel(Model): # pylint: disable=abstract-method elif hasattr(generator, '__len__'): data_size = len(generator) else: - data_size = None + raise ValueError("The number of samples could not be determined. " + "Please make sure that if you are using a generator" + "to call this method directly with n_samples kwarg " + "passed.") batch_size = self._validate_or_infer_batch_size(None, steps_per_epoch, generator) diff --git a/privacy/bolt_on/models_test.py b/privacy/bolt_on/models_test.py index 580255a..c18122f 100644 --- a/privacy/bolt_on/models_test.py +++ b/privacy/bolt_on/models_test.py @@ -227,8 +227,8 @@ def _cat_dataset(n_samples, input_dim, n_classes, batch_size, generator=False): n_samples: number of rows input_dim: input dimensionality n_classes: output dimensionality + batch_size: The desired batch_size generator: False for array, True for generator - batch_size: The desired batch_size. Returns: X as (n_samples, input_dim), Y as (n_samples, n_outputs) @@ -294,6 +294,12 @@ def _do_fit(n_samples, # x = x.batch(batch_size) x = x.shuffle(n_samples//2) batch_size = None + if reset_n_samples: + n_samples = None + clf.fit_generator(x, + n_samples=n_samples, + noise_distribution=distribution, + epsilon=epsilon) else: x, y = _cat_dataset( n_samples, @@ -301,15 +307,14 @@ def _do_fit(n_samples, n_outputs, batch_size, generator=generator) - if reset_n_samples: - n_samples = None - - clf.fit(x, - y, - batch_size=batch_size, - n_samples=n_samples, - noise_distribution=distribution, - epsilon=epsilon) + if reset_n_samples: + n_samples = None + clf.fit(x, + y, + batch_size=batch_size, + n_samples=n_samples, + noise_distribution=distribution, + epsilon=epsilon) return clf diff --git a/privacy/bolt_on/optimizers.py b/privacy/bolt_on/optimizers.py index 3536450..b7f77df 100644 --- a/privacy/bolt_on/optimizers.py +++ b/privacy/bolt_on/optimizers.py @@ -139,8 +139,8 @@ class BoltOn(optimizer_v2.OptimizerV2): 'n_samples', 'layers', 'batch_size', - '_is_init' - ] + '_is_init', + ] self._internal_optimizer = optimizer self.learning_rate = GammaBetaDecreasingStep() # use the BoltOn Learning # rate scheduler, as required for privacy guarantees. This will still need @@ -250,8 +250,7 @@ class BoltOn(optimizer_v2.OptimizerV2): "Neither '{0}' nor '{1}' object has attribute '{2}'" "".format(self.__class__.__name__, self._internal_optimizer.__class__.__name__, - name - ) + name) ) def __setattr__(self, key, value): @@ -319,8 +318,7 @@ class BoltOn(optimizer_v2.OptimizerV2): layers, class_weights, n_samples, - batch_size - ): + batch_size): """Accepts required values for bolton method from context entry point. Stores them on the optimizer for use throughout fitting. @@ -347,8 +345,7 @@ class BoltOn(optimizer_v2.OptimizerV2): _accepted_distributions)) self.noise_distribution = noise_distribution self.learning_rate.initialize(self.loss.beta(class_weights), - self.loss.gamma() - ) + self.loss.gamma()) self.epsilon = tf.constant(epsilon, dtype=self.dtype) self.class_weights = tf.constant(class_weights, dtype=self.dtype) self.n_samples = tf.constant(n_samples, dtype=self.dtype) diff --git a/privacy/bolt_on/optimizers_test.py b/privacy/bolt_on/optimizers_test.py index 731d97d..c6c2f1a 100644 --- a/privacy/bolt_on/optimizers_test.py +++ b/privacy/bolt_on/optimizers_test.py @@ -199,7 +199,6 @@ class BoltonOptimizerTest(keras_parameterized.TestCase): 'result': None, 'test_attr': ''}, ]) - def test_fn(self, fn, args, result, test_attr): """test that a fn of BoltOn optimizer is working as expected. @@ -270,7 +269,6 @@ class BoltonOptimizerTest(keras_parameterized.TestCase): result: the expected output after projection. """ tf.random.set_seed(1) - @tf.function def project_fn(r): loss = TestLoss(1, 1, r) bolton = opt.BoltOn(TestOptimizer(), loss) @@ -358,7 +356,8 @@ class BoltonOptimizerTest(keras_parameterized.TestCase): {'testcase_name': 'fn: get_noise', 'fn': 'get_noise', 'args': [1, 1], - 'err_msg': 'ust be called from within the optimizer\'s context'}, + 'err_msg': 'This method must be called from within the ' + 'optimizer\'s context'}, ]) def test_not_in_context(self, fn, args, err_msg): """Tests that the expected functions raise errors when not in context. @@ -368,7 +367,6 @@ class BoltonOptimizerTest(keras_parameterized.TestCase): args: the arguments for said function err_msg: expected error message """ - @tf.function def test_run(fn, args): loss = TestLoss(1, 1, 1) bolton = opt.BoltOn(TestOptimizer(), loss) @@ -462,7 +460,6 @@ class BoltonOptimizerTest(keras_parameterized.TestCase): fn: fn to test args: arguments to that fn """ - @tf.function def test_run(fn, args): loss = TestLoss(1, 1, 1) bolton = opt.BoltOn(TestOptimizer(), loss) @@ -577,3 +574,5 @@ class SchedulerTest(keras_parameterized.TestCase): if __name__ == '__main__': test.main() + import unittest + unittest.main() diff --git a/tutorials/bolton_tutorial.py b/tutorials/bolton_tutorial.py index fdfe338..13181a6 100644 --- a/tutorials/bolton_tutorial.py +++ b/tutorials/bolton_tutorial.py @@ -124,13 +124,12 @@ except ValueError as e: # And now, re running with the parameter set. # ------- n_samples = 20 -bolt.fit(generator, - epsilon=epsilon, - class_weight=class_weight, - batch_size=batch_size, - n_samples=n_samples, - noise_distribution=noise_distribution, - verbose=0) +bolt.fit_generator(generator, + epsilon=epsilon, + class_weight=class_weight, + n_samples=n_samples, + noise_distribution=noise_distribution, + verbose=0) # ------- # You don't have to use the BoltOn model to use the BoltOn method. # There are only a few requirements: From 2e829f5a9a354a162923d658268e770e8e8f37ec Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Thu, 22 Aug 2019 13:54:11 -0400 Subject: [PATCH 3/3] tf2.0b1 readme update. --- privacy/bolt_on/README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/privacy/bolt_on/README.md b/privacy/bolt_on/README.md index 4e6d1f5..e1f9bac 100644 --- a/privacy/bolt_on/README.md +++ b/privacy/bolt_on/README.md @@ -47,9 +47,10 @@ https://arxiv.org/pdf/1811.04911.pdf As we are pegged on tensorflow2.0, this package may encounter stability issues in the ongoing development of tensorflow2.0. -This sub-package is currently stable for 2.0.0a0 and 2.0.0b0. We are aware of -issues in model fitting using the BoltOnModel in beta1, the latest release, -and are actively working towards solving these issues. +This sub-package is currently stable for 2.0.0a0, 2.0.0b0, and 2.0.0.b1 +If you would like to use this subpackage, please do use one of these versions as +we cannot guarantee it will work for all latest releases. If you do find issues, +feel free to raise an issue to the contributors listed below. ## Contacts