From f41be2c598636f949b97244941ca5cd6ad27d31c Mon Sep 17 00:00:00 2001 From: Christopher Choquette Choo Date: Wed, 19 Jun 2019 10:46:30 -0400 Subject: [PATCH] Bolton implementation and unit tests. Has two pre-implemented loss functions. --- privacy/__init__.py | 6 + privacy/bolton/__init__.py | 7 +- privacy/bolton/{loss.py => losses.py} | 287 +----------------- .../bolton/{loss_test.py => losses_test.py} | 10 +- privacy/bolton/{model.py => models.py} | 13 +- .../bolton/{model_test.py => models_test.py} | 26 +- .../bolton/{optimizer.py => optimizers.py} | 2 +- .../{optimizer_test.py => optimizers_test.py} | 6 +- 8 files changed, 49 insertions(+), 308 deletions(-) rename privacy/bolton/{loss.py => losses.py} (51%) rename privacy/bolton/{loss_test.py => losses_test.py} (98%) rename privacy/bolton/{model.py => models.py} (96%) rename privacy/bolton/{model_test.py => models_test.py} (96%) rename privacy/bolton/{optimizer.py => optimizers.py} (99%) rename privacy/bolton/{optimizer_test.py => optimizers_test.py} (99%) diff --git a/privacy/__init__.py b/privacy/__init__.py index 59bfe20..e494c62 100644 --- a/privacy/__init__.py +++ b/privacy/__init__.py @@ -41,3 +41,9 @@ else: from privacy.optimizers.dp_optimizer import DPAdamOptimizer from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer + + from privacy.bolton.models import BoltonModel + from privacy.bolton.optimizers import Bolton + from privacy.bolton.losses import StrongConvexMixin + from privacy.bolton.losses import StrongConvexBinaryCrossentropy + from privacy.bolton.losses import StrongConvexHuber diff --git a/privacy/bolton/__init__.py b/privacy/bolton/__init__.py index 67b6148..971b804 100644 --- a/privacy/bolton/__init__.py +++ b/privacy/bolton/__init__.py @@ -9,6 +9,7 @@ if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts. pass else: - from privacy.bolton.model import Bolton - from privacy.bolton.loss import StrongConvexHuber - from privacy.bolton.loss import StrongConvexBinaryCrossentropy \ No newline at end of file + from privacy.bolton.models import BoltonModel + from privacy.bolton.optimizers import Bolton + from privacy.bolton.losses import StrongConvexHuber + from privacy.bolton.losses import StrongConvexBinaryCrossentropy \ No newline at end of file diff --git a/privacy/bolton/loss.py b/privacy/bolton/losses.py similarity index 51% rename from privacy/bolton/loss.py rename to privacy/bolton/losses.py index 4ed0479..a326946 100644 --- a/privacy/bolton/loss.py +++ b/privacy/bolton/losses.py @@ -21,6 +21,7 @@ from tensorflow.python.keras import losses from tensorflow.python.keras.utils import losses_utils from tensorflow.python.framework import ops as _ops from tensorflow.python.keras.regularizers import L1L2 +from tensorflow.python.platform import tf_logging as logging class StrongConvexMixin: @@ -147,7 +148,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin): self.dtype = dtype self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype) super(StrongConvexHuber, self).__init__( - name='huber', + name='strongconvexhuber', reduction=reduction, ) @@ -245,6 +246,11 @@ class StrongConvexBinaryCrossentropy( name: Name of the loss instance dtype: tf datatype to use for tensor conversions. """ + if label_smoothing != 0: + logging.warning('The impact of label smoothing on privacy is unknown. ' + 'Use label smoothing at your own risk as it may not ' + 'guarantee privacy.') + if reg_lambda <= 0: raise ValueError("reg lambda: {0} must be positive".format(reg_lambda)) if C <= 0: @@ -258,7 +264,7 @@ class StrongConvexBinaryCrossentropy( self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype) super(StrongConvexBinaryCrossentropy, self).__init__( reduction=reduction, - name='binarycrossentropy', + name='strongconvexbinarycrossentropy', from_logits=from_logits, label_smoothing=label_smoothing, ) @@ -313,280 +319,3 @@ class StrongConvexBinaryCrossentropy( return L1L2(l2=self.reg_lambda/2) -# class StrongConvexSparseCategoricalCrossentropy( -# losses.CategoricalCrossentropy, -# StrongConvexMixin -# ): -# """ -# Strong Convex version of CategoricalCrossentropy loss using l2 weight -# regularization. -# """ -# -# def __init__(self, -# reg_lambda: float, -# C: float, -# radius_constant: float, -# from_logits: bool = True, -# label_smoothing: float = 0, -# reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, -# name: str = 'binarycrossentropy', -# dtype=tf.float32): -# """ -# Args: -# reg_lambda: Weight regularization constant -# C: Penalty parameter C of the loss term -# radius_constant: constant defining the length of the radius -# reduction: reduction type to use. See super class -# label_smoothing: amount of smoothing to perform on labels -# relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x) -# name: Name of the loss instance -# dtype: tf datatype to use for tensor conversions. -# """ -# if reg_lambda <= 0: -# raise ValueError("reg lambda: {0} must be positive".format(reg_lambda)) -# if C <= 0: -# raise ValueError('c: {0}, should be >= 0'.format(C)) -# if radius_constant <= 0: -# raise ValueError('radius_constant: {0}, should be >= 0'.format( -# radius_constant -# )) -# -# self.C = C -# self.dtype = dtype -# self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype) -# super(StrongConvexSparseCategoricalCrossentropy, self).__init__( -# reduction=reduction, -# name=name, -# from_logits=from_logits, -# label_smoothing=label_smoothing, -# ) -# self.radius_constant = radius_constant -# -# def call(self, y_true, y_pred): -# """Compute loss -# -# Args: -# y_true: Ground truth values. -# y_pred: The predicted values. -# -# Returns: -# Loss values per sample. -# """ -# loss = super() -# loss = loss * self.C -# return loss -# -# def radius(self): -# """See super class. -# """ -# return self.radius_constant / self.reg_lambda -# -# def gamma(self): -# """See super class. -# """ -# return self.reg_lambda -# -# def beta(self, class_weight): -# """See super class. -# """ -# max_class_weight = self.max_class_weight(class_weight, self.dtype) -# return self.C * max_class_weight + self.reg_lambda -# -# def lipchitz_constant(self, class_weight): -# """See super class. -# """ -# max_class_weight = self.max_class_weight(class_weight, self.dtype) -# return self.C * max_class_weight + self.reg_lambda * self.radius() -# -# def kernel_regularizer(self): -# """ -# l2 loss using reg_lambda as the l2 term (as desired). Required for -# this loss function to be strongly convex. -# :return: -# """ -# return L1L2(l2=self.reg_lambda) -# -# class StrongConvexSparseCategoricalCrossentropy( -# losses.SparseCategoricalCrossentropy, -# StrongConvexMixin -# ): -# """ -# Strong Convex version of SparseCategoricalCrossentropy loss using l2 weight -# regularization. -# """ -# -# def __init__(self, -# reg_lambda: float, -# C: float, -# radius_constant: float, -# from_logits: bool = True, -# label_smoothing: float = 0, -# reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, -# name: str = 'binarycrossentropy', -# dtype=tf.float32): -# """ -# Args: -# reg_lambda: Weight regularization constant -# C: Penalty parameter C of the loss term -# radius_constant: constant defining the length of the radius -# reduction: reduction type to use. See super class -# label_smoothing: amount of smoothing to perform on labels -# relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x) -# name: Name of the loss instance -# dtype: tf datatype to use for tensor conversions. -# """ -# if reg_lambda <= 0: -# raise ValueError("reg lambda: {0} must be positive".format(reg_lambda)) -# if C <= 0: -# raise ValueError('c: {0}, should be >= 0'.format(C)) -# if radius_constant <= 0: -# raise ValueError('radius_constant: {0}, should be >= 0'.format( -# radius_constant -# )) -# -# self.C = C -# self.dtype = dtype -# self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype) -# super(StrongConvexHuber, self).__init__(reduction=reduction, -# name=name, -# from_logits=from_logits, -# label_smoothing=label_smoothing, -# ) -# self.radius_constant = radius_constant -# -# def call(self, y_true, y_pred): -# """Compute loss -# -# Args: -# y_true: Ground truth values. -# y_pred: The predicted values. -# -# Returns: -# Loss values per sample. -# """ -# loss = super() -# loss = loss * self.C -# return loss -# -# def radius(self): -# """See super class. -# """ -# return self.radius_constant / self.reg_lambda -# -# def gamma(self): -# """See super class. -# """ -# return self.reg_lambda -# -# def beta(self, class_weight): -# """See super class. -# """ -# max_class_weight = self.max_class_weight(class_weight, self.dtype) -# return self.C * max_class_weight + self.reg_lambda -# -# def lipchitz_constant(self, class_weight): -# """See super class. -# """ -# max_class_weight = self.max_class_weight(class_weight, self.dtype) -# return self.C * max_class_weight + self.reg_lambda * self.radius() -# -# def kernel_regularizer(self): -# """ -# l2 loss using reg_lambda as the l2 term (as desired). Required for -# this loss function to be strongly convex. -# :return: -# """ -# return L1L2(l2=self.reg_lambda) -# -# -# class StrongConvexCategoricalCrossentropy( -# losses.CategoricalCrossentropy, -# StrongConvexMixin -# ): -# """ -# Strong Convex version of CategoricalCrossentropy loss using l2 weight -# regularization. -# """ -# -# def __init__(self, -# reg_lambda: float, -# C: float, -# radius_constant: float, -# from_logits: bool = True, -# label_smoothing: float = 0, -# reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, -# name: str = 'binarycrossentropy', -# dtype=tf.float32): -# """ -# Args: -# reg_lambda: Weight regularization constant -# C: Penalty parameter C of the loss term -# radius_constant: constant defining the length of the radius -# reduction: reduction type to use. See super class -# label_smoothing: amount of smoothing to perform on labels -# relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x) -# name: Name of the loss instance -# dtype: tf datatype to use for tensor conversions. -# """ -# if reg_lambda <= 0: -# raise ValueError("reg lambda: {0} must be positive".format(reg_lambda)) -# if C <= 0: -# raise ValueError('c: {0}, should be >= 0'.format(C)) -# if radius_constant <= 0: -# raise ValueError('radius_constant: {0}, should be >= 0'.format( -# radius_constant -# )) -# -# self.C = C -# self.dtype = dtype -# self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype) -# super(StrongConvexHuber, self).__init__(reduction=reduction, -# name=name, -# from_logits=from_logits, -# label_smoothing=label_smoothing, -# ) -# self.radius_constant = radius_constant -# -# def call(self, y_true, y_pred): -# """Compute loss -# -# Args: -# y_true: Ground truth values. -# y_pred: The predicted values. -# -# Returns: -# Loss values per sample. -# """ -# loss = super() -# loss = loss * self.C -# return loss -# -# def radius(self): -# """See super class. -# """ -# return self.radius_constant / self.reg_lambda -# -# def gamma(self): -# """See super class. -# """ -# return self.reg_lambda -# -# def beta(self, class_weight): -# """See super class. -# """ -# max_class_weight = self.max_class_weight(class_weight, self.dtype) -# return self.C * max_class_weight + self.reg_lambda -# -# def lipchitz_constant(self, class_weight): -# """See super class. -# """ -# max_class_weight = self.max_class_weight(class_weight, self.dtype) -# return self.C * max_class_weight + self.reg_lambda * self.radius() -# -# def kernel_regularizer(self): -# """ -# l2 loss using reg_lambda as the l2 term (as desired). Required for -# this loss function to be strongly convex. -# :return: -# """ -# return L1L2(l2=self.reg_lambda) diff --git a/privacy/bolton/loss_test.py b/privacy/bolton/losses_test.py similarity index 98% rename from privacy/bolton/loss_test.py rename to privacy/bolton/losses_test.py index 488710f..d2c9f80 100644 --- a/privacy/bolton/loss_test.py +++ b/privacy/bolton/losses_test.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Unit testing for loss.py""" +"""Unit testing for losses.py""" from __future__ import absolute_import from __future__ import division @@ -22,9 +22,9 @@ from tensorflow.python.keras import keras_parameterized from tensorflow.python.framework import test_util from tensorflow.python.keras.regularizers import L1L2 from absl.testing import parameterized -from privacy.bolton.loss import StrongConvexBinaryCrossentropy -from privacy.bolton.loss import StrongConvexHuber -from privacy.bolton.loss import StrongConvexMixin +from privacy.bolton.losses import StrongConvexBinaryCrossentropy +from privacy.bolton.losses import StrongConvexHuber +from privacy.bolton.losses import StrongConvexMixin class StrongConvexMixinTests(keras_parameterized.TestCase): @@ -355,7 +355,7 @@ class HuberTests(keras_parameterized.TestCase): 'fn': 'kernel_regularizer', 'init_args': [1, 1, 1, 1], 'args': [], - 'result': L1L2(l2=1), + 'result': L1L2(l2=0.5), }, ]) def test_fns(self, init_args, fn, args, result): diff --git a/privacy/bolton/model.py b/privacy/bolton/models.py similarity index 96% rename from privacy/bolton/model.py rename to privacy/bolton/models.py index 6f3f48e..0a2efc0 100644 --- a/privacy/bolton/model.py +++ b/privacy/bolton/models.py @@ -20,8 +20,8 @@ import tensorflow as tf from tensorflow.python.keras.models import Model from tensorflow.python.keras import optimizers from tensorflow.python.framework import ops as _ops -from privacy.bolton.loss import StrongConvexMixin -from privacy.bolton.optimizer import Bolton +from privacy.bolton.losses import StrongConvexMixin +from privacy.bolton.optimizers import Bolton class BoltonModel(Model): @@ -142,7 +142,9 @@ class BoltonModel(Model): """ if class_weight is None: - class_weight = self.calculate_class_weights(class_weight) + class_weight_ = self.calculate_class_weights(class_weight) + else: + class_weight_ = class_weight if n_samples is not None: data_size = n_samples elif hasattr(x, 'shape'): @@ -160,10 +162,13 @@ class BoltonModel(Model): if batch_size_ is None: raise ValueError('batch_size: {0} is an ' 'invalid value'.format(batch_size_)) + if data_size is None: + raise ValueError('Could not infer the number of samples. Please pass ' + 'this in using n_samples.') with self.optimizer(noise_distribution, epsilon, self.layers, - class_weight, + class_weight_, data_size, self.n_outputs, batch_size_, diff --git a/privacy/bolton/model_test.py b/privacy/bolton/models_test.py similarity index 96% rename from privacy/bolton/model_test.py rename to privacy/bolton/models_test.py index 4316a1e..05119d3 100644 --- a/privacy/bolton/model_test.py +++ b/privacy/bolton/models_test.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Unit testing for model.py""" +"""Unit testing for models.py""" from __future__ import absolute_import from __future__ import division @@ -25,9 +25,9 @@ from tensorflow.python.keras import losses from tensorflow.python.framework import ops as _ops from tensorflow.python.keras.regularizers import L1L2 from absl.testing import parameterized -from privacy.bolton import model -from privacy.bolton.optimizer import Bolton -from privacy.bolton.loss import StrongConvexMixin +from privacy.bolton import models +from privacy.bolton.optimizers import Bolton +from privacy.bolton.losses import StrongConvexMixin class TestLoss(losses.Loss, StrongConvexMixin): """Test loss function for testing Bolton model""" @@ -130,8 +130,8 @@ class InitTests(keras_parameterized.TestCase): n_outputs: number of output neurons """ # test valid domains for each variable - clf = model.BoltonModel(n_outputs) - self.assertIsInstance(clf, model.BoltonModel) + clf = models.BoltonModel(n_outputs) + self.assertIsInstance(clf, models.BoltonModel) @parameterized.named_parameters([ {'testcase_name': 'invalid n_outputs', @@ -146,7 +146,7 @@ class InitTests(keras_parameterized.TestCase): """ # test invalid domains for each variable, especially noise with self.assertRaises(ValueError): - model.BoltonModel(n_outputs) + models.BoltonModel(n_outputs) @parameterized.named_parameters([ {'testcase_name': 'string compile', @@ -170,7 +170,7 @@ class InitTests(keras_parameterized.TestCase): """ # test compilation of valid tf.optimizer and tf.loss with self.cached_session(): - clf = model.BoltonModel(n_outputs) + clf = models.BoltonModel(n_outputs) clf.compile(optimizer, loss) self.assertEqual(clf.loss, loss) @@ -197,7 +197,7 @@ class InitTests(keras_parameterized.TestCase): # test compilaton of invalid tf.optimizer and non instantiated loss. with self.cached_session(): with self.assertRaises((ValueError, AttributeError)): - clf = model.BoltonModel(n_outputs) + clf = models.BoltonModel(n_outputs) clf.compile(optimizer, loss) @@ -261,7 +261,7 @@ def _do_fit(n_samples, Returns: BoltonModel instsance """ - clf = model.BoltonModel(n_outputs) + clf = models.BoltonModel(n_outputs) clf.compile(optimizer, loss) if generator: x = _cat_dataset( @@ -355,7 +355,7 @@ class FitTests(keras_parameterized.TestCase): input_dim = 5 batch_size = 1 n_samples = 10 - clf = model.BoltonModel(n_classes) + clf = models.BoltonModel(n_classes) clf.compile(optimizer, loss) x = _cat_dataset( n_samples, @@ -441,7 +441,7 @@ class FitTests(keras_parameterized.TestCase): num_classes: number of outputs neurons result: expected result """ - clf = model.BoltonModel(1, 1) + clf = models.BoltonModel(1, 1) expected = clf.calculate_class_weights(class_weights, class_counts, num_classes @@ -508,7 +508,7 @@ class FitTests(keras_parameterized.TestCase): num_classes: number of outputs neurons result: expected result """ - clf = model.BoltonModel(1, 1) + clf = models.BoltonModel(1, 1) with self.assertRaisesRegexp(ValueError, err_msg): # pylint: disable=deprecated-method clf.calculate_class_weights(class_weights, class_counts, diff --git a/privacy/bolton/optimizer.py b/privacy/bolton/optimizers.py similarity index 99% rename from privacy/bolton/optimizer.py rename to privacy/bolton/optimizers.py index cfd0b98..726ec4f 100644 --- a/privacy/bolton/optimizer.py +++ b/privacy/bolton/optimizers.py @@ -21,7 +21,7 @@ import tensorflow as tf from tensorflow.python.keras.optimizer_v2 import optimizer_v2 from tensorflow.python.ops import math_ops from tensorflow.python import ops as _ops -from privacy.bolton.loss import StrongConvexMixin +from privacy.bolton.losses import StrongConvexMixin _accepted_distributions = ['laplace'] # implemented distributions for noising diff --git a/privacy/bolton/optimizer_test.py b/privacy/bolton/optimizers_test.py similarity index 99% rename from privacy/bolton/optimizer_test.py rename to privacy/bolton/optimizers_test.py index 2060031..0a9f9cc 100644 --- a/privacy/bolton/optimizer_test.py +++ b/privacy/bolton/optimizers_test.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Unit testing for optimizer.py""" +"""Unit testing for optimizers.py""" from __future__ import absolute_import from __future__ import division @@ -29,8 +29,8 @@ from tensorflow.python.framework import ops as _ops from tensorflow.python.framework import test_util from tensorflow.python import ops as _ops from absl.testing import parameterized -from privacy.bolton.loss import StrongConvexMixin -from privacy.bolton import optimizer as opt +from privacy.bolton.losses import StrongConvexMixin +from privacy.bolton import optimizers as opt