diff --git a/privacy/bolton/__init__.py b/privacy/bolton/__init__.py index 96d60b2..70c39dc 100644 --- a/privacy/bolton/__init__.py +++ b/privacy/bolton/__init__.py @@ -17,9 +17,9 @@ from distutils.version import LooseVersion import tensorflow as tf if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): - raise ImportError("Please upgrade your version of tensorflow from: {0} " - "to at least 2.0.0 to use privacy/bolton".format( - LooseVersion(tf.__version__))) + raise ImportError("Please upgrade your version " + "of tensorflow from: {0} to at least 2.0.0 to " + "use privacy/bolton".format(LooseVersion(tf.__version__))) if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts. pass else: diff --git a/privacy/bolton/losses.py b/privacy/bolton/losses.py index 6af4e00..7d9ad32 100644 --- a/privacy/bolton/losses.py +++ b/privacy/bolton/losses.py @@ -160,11 +160,11 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin): one = tf.constant(1, dtype=self.dtype) four = tf.constant(4, dtype=self.dtype) - if z > one + h: + if z > one + h: # pylint: disable=no-else-return return _ops.convert_to_tensor_v2(0, dtype=self.dtype) elif tf.math.abs(one - z) <= h: return one / (four * h) * tf.math.pow(one + h - z, 2) - return one - z # elif: z < one - h + return one - z def radius(self): """See super class.""" @@ -300,281 +300,3 @@ class StrongConvexBinaryCrossentropy( set to half the 0.5 * reg_lambda. """ return L1L2(l2=self.reg_lambda/2) - -# class StrongConvexSparseCategoricalCrossentropy( -# losses.CategoricalCrossentropy, -# StrongConvexMixin -# ): -# """ -# Strong Convex version of CategoricalCrossentropy loss using l2 weight -# regularization. -# """ -# -# def __init__(self, -# reg_lambda: float, -# C: float, -# radius_constant: float, -# from_logits: bool = True, -# label_smoothing: float = 0, -# reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, -# name: str = 'binarycrossentropy', -# dtype=tf.float32): -# """ -# Args: -# reg_lambda: Weight regularization constant -# C: Penalty parameter C of the loss term -# radius_constant: constant defining the length of the radius -# reduction: reduction type to use. See super class -# label_smoothing: amount of smoothing to perform on labels -# relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x) -# name: Name of the loss instance -# dtype: tf datatype to use for tensor conversions. -# """ -# if reg_lambda <= 0: -# raise ValueError("reg lambda: {0} must be positive".format(reg_lambda)) -# if C <= 0: -# raise ValueError('c: {0}, should be >= 0'.format(C)) -# if radius_constant <= 0: -# raise ValueError('radius_constant: {0}, should be >= 0'.format( -# radius_constant -# )) -# -# self.C = C -# self.dtype = dtype -# self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype) -# super(StrongConvexSparseCategoricalCrossentropy, self).__init__( -# reduction=reduction, -# name=name, -# from_logits=from_logits, -# label_smoothing=label_smoothing, -# ) -# self.radius_constant = radius_constant -# -# def call(self, y_true, y_pred): -# """Compute loss -# -# Args: -# y_true: Ground truth values. -# y_pred: The predicted values. -# -# Returns: -# Loss values per sample. -# """ -# loss = super() -# loss = loss * self.C -# return loss -# -# def radius(self): -# """See super class. -# """ -# return self.radius_constant / self.reg_lambda -# -# def gamma(self): -# """See super class. -# """ -# return self.reg_lambda -# -# def beta(self, class_weight): -# """See super class. -# """ -# max_class_weight = self.max_class_weight(class_weight, self.dtype) -# return self.C * max_class_weight + self.reg_lambda -# -# def lipchitz_constant(self, class_weight): -# """See super class. -# """ -# max_class_weight = self.max_class_weight(class_weight, self.dtype) -# return self.C * max_class_weight + self.reg_lambda * self.radius() -# -# def kernel_regularizer(self): -# """ -# l2 loss using reg_lambda as the l2 term (as desired). Required for -# this loss function to be strongly convex. -# :return: -# """ -# return L1L2(l2=self.reg_lambda) -# -# class StrongConvexSparseCategoricalCrossentropy( -# losses.SparseCategoricalCrossentropy, -# StrongConvexMixin -# ): -# """ -# Strong Convex version of SparseCategoricalCrossentropy loss using l2 weight -# regularization. -# """ -# -# def __init__(self, -# reg_lambda: float, -# C: float, -# radius_constant: float, -# from_logits: bool = True, -# label_smoothing: float = 0, -# reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, -# name: str = 'binarycrossentropy', -# dtype=tf.float32): -# """ -# Args: -# reg_lambda: Weight regularization constant -# C: Penalty parameter C of the loss term -# radius_constant: constant defining the length of the radius -# reduction: reduction type to use. See super class -# label_smoothing: amount of smoothing to perform on labels -# relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x) -# name: Name of the loss instance -# dtype: tf datatype to use for tensor conversions. -# """ -# if reg_lambda <= 0: -# raise ValueError("reg lambda: {0} must be positive".format(reg_lambda)) -# if C <= 0: -# raise ValueError('c: {0}, should be >= 0'.format(C)) -# if radius_constant <= 0: -# raise ValueError('radius_constant: {0}, should be >= 0'.format( -# radius_constant -# )) -# -# self.C = C -# self.dtype = dtype -# self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype) -# super(StrongConvexHuber, self).__init__(reduction=reduction, -# name=name, -# from_logits=from_logits, -# label_smoothing=label_smoothing, -# ) -# self.radius_constant = radius_constant -# -# def call(self, y_true, y_pred): -# """Compute loss -# -# Args: -# y_true: Ground truth values. -# y_pred: The predicted values. -# -# Returns: -# Loss values per sample. -# """ -# loss = super() -# loss = loss * self.C -# return loss -# -# def radius(self): -# """See super class. -# """ -# return self.radius_constant / self.reg_lambda -# -# def gamma(self): -# """See super class. -# """ -# return self.reg_lambda -# -# def beta(self, class_weight): -# """See super class. -# """ -# max_class_weight = self.max_class_weight(class_weight, self.dtype) -# return self.C * max_class_weight + self.reg_lambda -# -# def lipchitz_constant(self, class_weight): -# """See super class. -# """ -# max_class_weight = self.max_class_weight(class_weight, self.dtype) -# return self.C * max_class_weight + self.reg_lambda * self.radius() -# -# def kernel_regularizer(self): -# """ -# l2 loss using reg_lambda as the l2 term (as desired). Required for -# this loss function to be strongly convex. -# :return: -# """ -# return L1L2(l2=self.reg_lambda) -# -# -# class StrongConvexCategoricalCrossentropy( -# losses.CategoricalCrossentropy, -# StrongConvexMixin -# ): -# """ -# Strong Convex version of CategoricalCrossentropy loss using l2 weight -# regularization. -# """ -# -# def __init__(self, -# reg_lambda: float, -# C: float, -# radius_constant: float, -# from_logits: bool = True, -# label_smoothing: float = 0, -# reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, -# name: str = 'binarycrossentropy', -# dtype=tf.float32): -# """ -# Args: -# reg_lambda: Weight regularization constant -# C: Penalty parameter C of the loss term -# radius_constant: constant defining the length of the radius -# reduction: reduction type to use. See super class -# label_smoothing: amount of smoothing to perform on labels -# relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x) -# name: Name of the loss instance -# dtype: tf datatype to use for tensor conversions. -# """ -# if reg_lambda <= 0: -# raise ValueError("reg lambda: {0} must be positive".format(reg_lambda)) -# if C <= 0: -# raise ValueError('c: {0}, should be >= 0'.format(C)) -# if radius_constant <= 0: -# raise ValueError('radius_constant: {0}, should be >= 0'.format( -# radius_constant -# )) -# -# self.C = C -# self.dtype = dtype -# self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype) -# super(StrongConvexHuber, self).__init__(reduction=reduction, -# name=name, -# from_logits=from_logits, -# label_smoothing=label_smoothing, -# ) -# self.radius_constant = radius_constant -# -# def call(self, y_true, y_pred): -# """Compute loss -# -# Args: -# y_true: Ground truth values. -# y_pred: The predicted values. -# -# Returns: -# Loss values per sample. -# """ -# loss = super() -# loss = loss * self.C -# return loss -# -# def radius(self): -# """See super class. -# """ -# return self.radius_constant / self.reg_lambda -# -# def gamma(self): -# """See super class. -# """ -# return self.reg_lambda -# -# def beta(self, class_weight): -# """See super class. -# """ -# max_class_weight = self.max_class_weight(class_weight, self.dtype) -# return self.C * max_class_weight + self.reg_lambda -# -# def lipchitz_constant(self, class_weight): -# """See super class. -# """ -# max_class_weight = self.max_class_weight(class_weight, self.dtype) -# return self.C * max_class_weight + self.reg_lambda * self.radius() -# -# def kernel_regularizer(self): -# """ -# l2 loss using reg_lambda as the l2 term (as desired). Required for -# this loss function to be strongly convex. -# :return: -# """ -# return L1L2(l2=self.reg_lambda) \ No newline at end of file diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py index 79c8115..2c5c08a 100644 --- a/privacy/bolton/models.py +++ b/privacy/bolton/models.py @@ -24,7 +24,7 @@ from privacy.bolton.losses import StrongConvexMixin from privacy.bolton.optimizers import Bolton -class BoltonModel(Model): +class BoltonModel(Model): # pylint: disable=abstract-method """Bolton episilon-delta differential privacy model. The privacy guarantees are dependent on the noise that is sampled. Please diff --git a/privacy/bolton/optimizers_test.py b/privacy/bolton/optimizers_test.py index 6cbf7ee..f95f209 100644 --- a/privacy/bolton/optimizers_test.py +++ b/privacy/bolton/optimizers_test.py @@ -32,7 +32,7 @@ from privacy.bolton.losses import StrongConvexMixin from privacy.bolton import optimizers as opt -class TestModel(Model): +class TestModel(Model): # pylint: disable=abstract-method """Bolton episilon-delta model. Uses 4 key steps to achieve privacy guarantees: 1. Adds noise to weights after training (output perturbation). diff --git a/tutorials/bolton_tutorial.py b/tutorials/bolton_tutorial.py index 6a0c010..8f806a1 100644 --- a/tutorials/bolton_tutorial.py +++ b/tutorials/bolton_tutorial.py @@ -1,13 +1,29 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tutorial for bolton module, the model and the optimizer.""" import sys sys.path.append('..') -import tensorflow as tf -from privacy.bolton import losses -from privacy.bolton import models - -"""First, we will create a binary classification dataset with a single output -dimension. The samples for each label are repeated data points at different -points in space.""" +import tensorflow as tf # pylint: disable=wrong-import-position +from privacy.bolton import losses # pylint: disable=wrong-import-position +from privacy.bolton import models # pylint: disable=wrong-import-position +# ------- +# First, we will create a binary classification dataset with a single output +# dimension. The samples for each label are repeated data points at different +# points in space. +# ------- # Parameters for dataset n_samples = 10 input_dim = 2 @@ -22,42 +38,50 @@ print(x.shape, y.shape) generator = tf.data.Dataset.from_tensor_slices((x, y)) generator = generator.batch(10) generator = generator.shuffle(10) -"""First, we will explore using the pre - built BoltonModel, which is a thin -wrapper around a Keras Model using a single - layer neural network. -It automatically uses the Bolton Optimizer which encompasses all the logic -required for the Bolton Differential Privacy method.""" +# ------- +# First, we will explore using the pre - built BoltonModel, which is a thin +# wrapper around a Keras Model using a single - layer neural network. +# It automatically uses the Bolton Optimizer which encompasses all the logic +# required for the Bolton Differential Privacy method. +# ------- bolt = models.BoltonModel(n_outputs) # tell the model how many outputs we have. -"""Now, we will pick our optimizer and Strongly Convex Loss function. The loss -must extend from StrongConvexMixin and implement the associated methods.Some -existing loss functions are pre - implemented in bolton.loss""" +# ------- +# Now, we will pick our optimizer and Strongly Convex Loss function. The loss +# must extend from StrongConvexMixin and implement the associated methods.Some +# existing loss functions are pre - implemented in bolton.loss +# ------- optimizer = tf.optimizers.SGD() reg_lambda = 1 C = 1 radius_constant = 1 loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant) -"""For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy -to be 1; these are all tunable and their impact can be read in losses. -StrongConvexBinaryCrossentropy.We then compile the model with the chosen -optimizer and loss, which will automatically wrap the chosen optimizer with the -Bolton Optimizer, ensuring the required components function as required for -privacy guarantees.""" +# ------- +# For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy +# to be 1; these are all tunable and their impact can be read in losses. +# StrongConvexBinaryCrossentropy.We then compile the model with the chosen +# optimizer and loss, which will automatically wrap the chosen optimizer with the +# Bolton Optimizer, ensuring the required components function as required for +# privacy guarantees. +# ------- bolt.compile(optimizer, loss) -"""To fit the model, the optimizer will require additional information about -the dataset and model.These parameters are: -1. the class_weights used -2. the number of samples in the dataset -3. the batch size which the model will try to infer, if possible. If not, you -will be required to pass these explicitly to the fit method. - -As well, there are two privacy parameters than can be altered: -1. epsilon, a float -2. noise_distribution, a valid string indicating the distriution to use (must be -implemented) - -The BoltonModel offers a helper method,.calculate_class_weight to aid in -class_weight calculation.""" +# ------- +# To fit the model, the optimizer will require additional information about +# the dataset and model.These parameters are: +# 1. the class_weights used +# 2. the number of samples in the dataset +# 3. the batch size which the model will try to infer, if possible. If not, you +# will be required to pass these explicitly to the fit method. +# +# As well, there are two privacy parameters than can be altered: +# 1. epsilon, a float +# 2. noise_distribution, a valid string indicating the distriution to use (must be +# implemented) +# +# The BoltonModel offers a helper method,.calculate_class_weight to aid in +# class_weight calculation. # required parameters -class_weight = None # default, use .calculate_class_weight to specify other values +# ------- +class_weight = None # default, use .calculate_class_weight for other values batch_size = None # default, if it cannot be inferred, specify this n_samples = None # default, if it cannot be iferred, specify this # privacy parameters @@ -72,13 +96,15 @@ bolt.fit(x, n_samples=n_samples, noise_distribution=noise_distribution, epochs=2) -"""We may also train a generator object, or try different optimizers and loss -functions. Below, we will see that we must pass the number of samples as the fit -method is unable to infer it for a generator.""" +# ------- +# We may also train a generator object, or try different optimizers and loss +# functions. Below, we will see that we must pass the number of samples as the +# fit method is unable to infer it for a generator. +# ------- optimizer2 = tf.optimizers.Adam() bolt.compile(optimizer2, loss) # required parameters -class_weight = None # default, use .calculate_class_weight to specify other values +class_weight = None # default, use .calculate_class_weight for other values batch_size = None # default, if it cannot be inferred, specify this n_samples = None # default, if it cannot be iferred, specify this # privacy parameters @@ -95,7 +121,9 @@ try: ) except ValueError as e: print(e) -"""And now, re running with the parameter set.""" +# ------- +# And now, re running with the parameter set. +# ------- n_samples = 20 bolt.fit(generator, epsilon=epsilon, @@ -105,51 +133,56 @@ bolt.fit(generator, noise_distribution=noise_distribution, verbose=0 ) -"""You don't have to use the bolton model to use the Bolton method. -There are only a few requirements: -1. make sure any requirements from the loss are implemented in the model. -2. instantiate the optimizer and use it as a context around your fit operation. -""" - -from privacy.bolton.optimizers import Bolton - -"""Here, we create our own model and setup the Bolton optimizer.""" - -class TestModel(tf.keras.Model): - def __init__(self, reg_layer, n_outputs=1): +# ------- +# You don't have to use the bolton model to use the Bolton method. +# There are only a few requirements: +# 1. make sure any requirements from the loss are implemented in the model. +# 2. instantiate the optimizer and use it as a context around the fit operation. +# ------- +# -------------------- Part 2, using the Optimizer +from privacy.bolton.optimizers import Bolton # pylint: disable=wrong-import-position +# ------- +# Here, we create our own model and setup the Bolton optimizer. +# ------- +class TestModel(tf.keras.Model): # pylint: disable=abstract-method + def __init__(self, reg_layer, number_of_outputs=1): super(TestModel, self).__init__(name='test') - self.output_layer = tf.keras.layers.Dense(n_outputs, + self.output_layer = tf.keras.layers.Dense(number_of_outputs, kernel_regularizer=reg_layer ) - def call(self, inputs): + def call(self, inputs): # pylint: disable=arguments-differ return self.output_layer(inputs) optimizer = tf.optimizers.SGD() loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant) optimizer = Bolton(optimizer, loss) -"""Now, we instantiate our model and check for 1. Since our loss requires L2 -regularization over the kernel, we will pass it to the model.""" +# ------- +# Now, we instantiate our model and check for 1. Since our loss requires L2 +# regularization over the kernel, we will pass it to the model. +# ------- n_outputs = 1 # parameter for model and optimizer context. test_model = TestModel(loss.kernel_regularizer(), n_outputs) test_model.compile(optimizer, loss) -"""We comply with 2., and use the Bolton Optimizer as a context around the fit -method.""" +# ------- +# We comply with 2., and use the Bolton Optimizer as a context around the fit +# method. +# ------- # parameters for context noise_distribution = 'laplace' epsilon = 2 class_weights = 1 # Previously, the fit method auto-detected the class_weights. -# Here, we need to pass the class_weights explicitly. 1 is the equivalent of None. +# Here, we need to pass the class_weights explicitly. 1 is the same as None. n_samples = 20 batch_size = 5 with optimizer( - noise_distribution=noise_distribution, - epsilon=epsilon, - layers=test_model.layers, - class_weights=class_weights, - n_samples=n_samples, - batch_size=batch_size + noise_distribution=noise_distribution, + epsilon=epsilon, + layers=test_model.layers, + class_weights=class_weights, + n_samples=n_samples, + batch_size=batch_size ) as _: test_model.fit(x, y, batch_size=batch_size, epochs=2)