Merge pull request #4 from georgianpartners/bolton

Ensuring pylint is 10/10
2019-07-18 15:07:52 -04:00 · 2019-07-18 15:07:52 -04:00 · 5857e838ba
commit 5857e838ba
parent eab43e8294 c05c2aa0d4
5 changed files with 105 additions and 350 deletions
--- a/privacy/bolton/init.py
+++ b/privacy/bolton/init.py
@ -17,9 +17,9 @@ from distutils.version import LooseVersion
 import tensorflow as tf

 if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
-  raise ImportError("Please upgrade your version of tensorflow from: {0} "
-                    "to at least 2.0.0 to use privacy/bolton".format(
-    LooseVersion(tf.__version__)))
+  raise ImportError("Please upgrade your version "
+                    "of tensorflow from: {0} to at least 2.0.0 to "
+                    "use privacy/bolton".format(LooseVersion(tf.__version__)))
 if hasattr(sys, 'skip_tf_privacy_import'):  # Useful for standalone scripts.
  pass
 else:
--- a/privacy/bolton/losses.py
+++ b/privacy/bolton/losses.py
@ -160,11 +160,11 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
    one = tf.constant(1, dtype=self.dtype)
    four = tf.constant(4, dtype=self.dtype)

-    if z > one + h:
+    if z > one + h:  # pylint: disable=no-else-return
      return _ops.convert_to_tensor_v2(0, dtype=self.dtype)
    elif tf.math.abs(one - z) <= h:
      return one / (four * h) * tf.math.pow(one + h - z, 2)
-    return one - z  # elif: z < one - h
+    return one - z

  def radius(self):
    """See super class."""
@ -300,281 +300,3 @@ class StrongConvexBinaryCrossentropy(
      set to half the 0.5 * reg_lambda.
    """
    return L1L2(l2=self.reg_lambda/2)
-
-# class StrongConvexSparseCategoricalCrossentropy(
-#     losses.CategoricalCrossentropy,
-#     StrongConvexMixin
-# ):
-#   """
-#   Strong Convex version of CategoricalCrossentropy loss using l2 weight
-#   regularization.
-#   """
-#
-#   def __init__(self,
-#                reg_lambda: float,
-#                C: float,
-#                radius_constant: float,
-#                from_logits: bool = True,
-#                label_smoothing: float = 0,
-#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
-#                name: str = 'binarycrossentropy',
-#                dtype=tf.float32):
-#     """
-#     Args:
-#       reg_lambda: Weight regularization constant
-#       C: Penalty parameter C of the loss term
-#       radius_constant: constant defining the length of the radius
-#       reduction: reduction type to use. See super class
-#       label_smoothing: amount of smoothing to perform on labels
-#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
-#       name: Name of the loss instance
-#       dtype: tf datatype to use for tensor conversions.
-#     """
-#     if reg_lambda <= 0:
-#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
-#     if C <= 0:
-#       raise ValueError('c: {0}, should be >= 0'.format(C))
-#     if radius_constant <= 0:
-#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
-#         radius_constant
-#       ))
-#
-#     self.C = C
-#     self.dtype = dtype
-#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
-#     super(StrongConvexSparseCategoricalCrossentropy, self).__init__(
-#         reduction=reduction,
-#         name=name,
-#         from_logits=from_logits,
-#         label_smoothing=label_smoothing,
-#     )
-#     self.radius_constant = radius_constant
-#
-#   def call(self, y_true, y_pred):
-#     """Compute loss
-#
-#         Args:
-#           y_true: Ground truth values.
-#           y_pred: The predicted values.
-#
-#         Returns:
-#           Loss values per sample.
-#       """
-#     loss = super()
-#     loss = loss * self.C
-#     return loss
-#
-#   def radius(self):
-#     """See super class.
-#     """
-#     return self.radius_constant / self.reg_lambda
-#
-#   def gamma(self):
-#     """See super class.
-#     """
-#     return self.reg_lambda
-#
-#   def beta(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda
-#
-#   def lipchitz_constant(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda * self.radius()
-#
-#   def kernel_regularizer(self):
-#     """
-#       l2 loss using reg_lambda as the l2 term (as desired). Required for
-#       this loss function to be strongly convex.
-#     :return:
-#     """
-#     return L1L2(l2=self.reg_lambda)
-#
-# class StrongConvexSparseCategoricalCrossentropy(
-#     losses.SparseCategoricalCrossentropy,
-#     StrongConvexMixin
-# ):
-#   """
-#   Strong Convex version of SparseCategoricalCrossentropy loss using l2 weight
-#   regularization.
-#   """
-#
-#   def __init__(self,
-#                reg_lambda: float,
-#                C: float,
-#                radius_constant: float,
-#                from_logits: bool = True,
-#                label_smoothing: float = 0,
-#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
-#                name: str = 'binarycrossentropy',
-#                dtype=tf.float32):
-#     """
-#     Args:
-#       reg_lambda: Weight regularization constant
-#       C: Penalty parameter C of the loss term
-#       radius_constant: constant defining the length of the radius
-#       reduction: reduction type to use. See super class
-#       label_smoothing: amount of smoothing to perform on labels
-#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
-#       name: Name of the loss instance
-#       dtype: tf datatype to use for tensor conversions.
-#     """
-#     if reg_lambda <= 0:
-#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
-#     if C <= 0:
-#       raise ValueError('c: {0}, should be >= 0'.format(C))
-#     if radius_constant <= 0:
-#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
-#         radius_constant
-#       ))
-#
-#     self.C = C
-#     self.dtype = dtype
-#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
-#     super(StrongConvexHuber, self).__init__(reduction=reduction,
-#                                              name=name,
-#                                              from_logits=from_logits,
-#                                              label_smoothing=label_smoothing,
-#                                              )
-#     self.radius_constant = radius_constant
-#
-#   def call(self, y_true, y_pred):
-#     """Compute loss
-#
-#         Args:
-#           y_true: Ground truth values.
-#           y_pred: The predicted values.
-#
-#         Returns:
-#           Loss values per sample.
-#       """
-#     loss = super()
-#     loss = loss * self.C
-#     return loss
-#
-#   def radius(self):
-#     """See super class.
-#     """
-#     return self.radius_constant / self.reg_lambda
-#
-#   def gamma(self):
-#     """See super class.
-#     """
-#     return self.reg_lambda
-#
-#   def beta(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda
-#
-#   def lipchitz_constant(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda * self.radius()
-#
-#   def kernel_regularizer(self):
-#     """
-#       l2 loss using reg_lambda as the l2 term (as desired). Required for
-#       this loss function to be strongly convex.
-#     :return:
-#     """
-#     return L1L2(l2=self.reg_lambda)
-#
-#
-# class StrongConvexCategoricalCrossentropy(
-#     losses.CategoricalCrossentropy,
-#     StrongConvexMixin
-# ):
-#   """
-#   Strong Convex version of CategoricalCrossentropy loss using l2 weight
-#   regularization.
-#   """
-#
-#   def __init__(self,
-#                reg_lambda: float,
-#                C: float,
-#                radius_constant: float,
-#                from_logits: bool = True,
-#                label_smoothing: float = 0,
-#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
-#                name: str = 'binarycrossentropy',
-#                dtype=tf.float32):
-#     """
-#     Args:
-#       reg_lambda: Weight regularization constant
-#       C: Penalty parameter C of the loss term
-#       radius_constant: constant defining the length of the radius
-#       reduction: reduction type to use. See super class
-#       label_smoothing: amount of smoothing to perform on labels
-#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
-#       name: Name of the loss instance
-#       dtype: tf datatype to use for tensor conversions.
-#     """
-#     if reg_lambda <= 0:
-#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
-#     if C <= 0:
-#       raise ValueError('c: {0}, should be >= 0'.format(C))
-#     if radius_constant <= 0:
-#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
-#         radius_constant
-#       ))
-#
-#     self.C = C
-#     self.dtype = dtype
-#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
-#     super(StrongConvexHuber, self).__init__(reduction=reduction,
-#                                              name=name,
-#                                              from_logits=from_logits,
-#                                              label_smoothing=label_smoothing,
-#                                              )
-#     self.radius_constant = radius_constant
-#
-#   def call(self, y_true, y_pred):
-#     """Compute loss
-#
-#         Args:
-#           y_true: Ground truth values.
-#           y_pred: The predicted values.
-#
-#         Returns:
-#           Loss values per sample.
-#       """
-#     loss = super()
-#     loss = loss * self.C
-#     return loss
-#
-#   def radius(self):
-#     """See super class.
-#     """
-#     return self.radius_constant / self.reg_lambda
-#
-#   def gamma(self):
-#     """See super class.
-#     """
-#     return self.reg_lambda
-#
-#   def beta(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda
-#
-#   def lipchitz_constant(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda * self.radius()
-#
-#   def kernel_regularizer(self):
-#     """
-#       l2 loss using reg_lambda as the l2 term (as desired). Required for
-#       this loss function to be strongly convex.
-#     :return:
-#     """
-#     return L1L2(l2=self.reg_lambda)
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@ -24,7 +24,7 @@ from privacy.bolton.losses import StrongConvexMixin
 from privacy.bolton.optimizers import Bolton


-class BoltonModel(Model):
+class BoltonModel(Model):  # pylint: disable=abstract-method
  """Bolton episilon-delta differential privacy model.

  The privacy guarantees are dependent on the noise that is sampled. Please
--- a/privacy/bolton/optimizers_test.py
+++ b/privacy/bolton/optimizers_test.py
@ -32,7 +32,7 @@ from privacy.bolton.losses import StrongConvexMixin
 from privacy.bolton import optimizers as opt


-class TestModel(Model):
+class TestModel(Model):  # pylint: disable=abstract-method
  """Bolton episilon-delta model.
  Uses 4 key steps to achieve privacy guarantees:
  1. Adds noise to weights after training (output perturbation).
--- a/tutorials/bolton_tutorial.py
+++ b/tutorials/bolton_tutorial.py
@ -1,13 +1,29 @@
+# Copyright 2019, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tutorial for bolton module, the model and the optimizer."""
 import sys

 sys.path.append('..')
-import tensorflow as tf
-from privacy.bolton import losses
-from privacy.bolton import models
-
-"""First, we will create a binary classification dataset with a single output 
-dimension. The samples for each label are repeated data points at different 
-points in space."""
+import tensorflow as tf  # pylint: disable=wrong-import-position
+from privacy.bolton import losses  # pylint: disable=wrong-import-position
+from privacy.bolton import models  # pylint: disable=wrong-import-position
+# -------
+# First, we will create a binary classification dataset with a single output
+# dimension. The samples for each label are repeated data points at different
+# points in space.
+# -------
 # Parameters for dataset
 n_samples = 10
 input_dim = 2
@ -22,42 +38,50 @@ print(x.shape, y.shape)
 generator = tf.data.Dataset.from_tensor_slices((x, y))
 generator = generator.batch(10)
 generator = generator.shuffle(10)
-"""First, we will explore using the pre - built BoltonModel, which is a thin
-wrapper around a Keras Model using a single - layer neural network. 
-It automatically uses the Bolton Optimizer which encompasses all the logic 
-required for the Bolton Differential Privacy method."""
+# -------
+# First, we will explore using the pre - built BoltonModel, which is a thin
+# wrapper around a Keras Model using a single - layer neural network.
+# It automatically uses the Bolton Optimizer which encompasses all the logic
+# required for the Bolton Differential Privacy method.
+# -------
 bolt = models.BoltonModel(n_outputs)  # tell the model how many outputs we have.
-"""Now, we will pick our optimizer and Strongly Convex Loss function. The loss
-must extend from StrongConvexMixin and implement the associated methods.Some 
-existing loss functions are pre - implemented in bolton.loss"""
+# -------
+# Now, we will pick our optimizer and Strongly Convex Loss function. The loss
+# must extend from StrongConvexMixin and implement the associated methods.Some
+# existing loss functions are pre - implemented in bolton.loss
+# -------
 optimizer = tf.optimizers.SGD()
 reg_lambda = 1
 C = 1
 radius_constant = 1
 loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
-"""For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy 
-to be 1; these are all tunable and their impact can be read in losses. 
-StrongConvexBinaryCrossentropy.We then compile the model with the chosen 
-optimizer and loss, which will automatically wrap the chosen optimizer with the 
-Bolton Optimizer, ensuring the required components function as required for 
-privacy guarantees."""
+# -------
+# For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy
+# to be 1; these are all tunable and their impact can be read in losses.
+# StrongConvexBinaryCrossentropy.We then compile the model with the chosen
+# optimizer and loss, which will automatically wrap the chosen optimizer with the
+# Bolton Optimizer, ensuring the required components function as required for
+# privacy guarantees.
+# -------
 bolt.compile(optimizer, loss)
-"""To fit the model, the optimizer will require additional information about
-the dataset and model.These parameters are:
-1. the class_weights used
-2. the number of samples in the dataset
-3. the batch size which the model will try to infer, if possible.  If not, you 
-will be required to pass these explicitly to the fit method.
-
-As well, there are two privacy parameters than can be altered:
-1. epsilon, a float
-2. noise_distribution, a valid string indicating the distriution to use (must be
-implemented)
-
-The BoltonModel offers a helper method,.calculate_class_weight to aid in 
-class_weight calculation."""
+# -------
+# To fit the model, the optimizer will require additional information about
+# the dataset and model.These parameters are:
+# 1. the class_weights used
+# 2. the number of samples in the dataset
+# 3. the batch size which the model will try to infer, if possible.  If not, you
+# will be required to pass these explicitly to the fit method.
+#
+# As well, there are two privacy parameters than can be altered:
+# 1. epsilon, a float
+# 2. noise_distribution, a valid string indicating the distriution to use (must be
+# implemented)
+#
+# The BoltonModel offers a helper method,.calculate_class_weight to aid in
+# class_weight calculation.
 # required parameters
-class_weight = None  # default, use .calculate_class_weight to specify other values
+# -------
+class_weight = None  # default, use .calculate_class_weight for other values
 batch_size = None  # default, if it cannot be inferred, specify this
 n_samples = None  # default, if it cannot be iferred, specify this
 # privacy parameters
@ -72,13 +96,15 @@ bolt.fit(x,
         n_samples=n_samples,
         noise_distribution=noise_distribution,
         epochs=2)
-"""We may also train a generator object, or try different optimizers and loss 
-functions. Below, we will see that we must pass the number of samples as the fit 
-method is unable to infer it for a generator."""
+# -------
+# We may also train a generator object, or try different optimizers and loss
+# functions. Below, we will see that we must pass the number of samples as the
+# fit method is unable to infer it for a generator.
+# -------
 optimizer2 = tf.optimizers.Adam()
 bolt.compile(optimizer2, loss)
 # required parameters
-class_weight = None  # default, use .calculate_class_weight to specify other values
+class_weight = None  # default, use .calculate_class_weight for other values
 batch_size = None  # default, if it cannot be inferred, specify this
 n_samples = None  # default, if it cannot be iferred, specify this
 # privacy parameters
@ -95,7 +121,9 @@ try:
           )
 except ValueError as e:
  print(e)
-"""And now, re running with the parameter set."""
+# -------
+# And now, re running with the parameter set.
+# -------
 n_samples = 20
 bolt.fit(generator,
         epsilon=epsilon,
@ -105,42 +133,47 @@ bolt.fit(generator,
         noise_distribution=noise_distribution,
         verbose=0
         )
-"""You don't have to use the bolton model to use the Bolton method. 
-There are only a few requirements:
-1. make sure any requirements from the loss are implemented in the model.
-2. instantiate the optimizer and use it as a context around your fit operation.
-"""
-
-from privacy.bolton.optimizers import Bolton
-
-"""Here, we create our own model and setup the Bolton optimizer."""
-
-class TestModel(tf.keras.Model):
-  def __init__(self, reg_layer, n_outputs=1):
+# -------
+# You don't have to use the bolton model to use the Bolton method.
+# There are only a few requirements:
+# 1. make sure any requirements from the loss are implemented in the model.
+# 2. instantiate the optimizer and use it as a context around the fit operation.
+# -------
+# -------------------- Part 2, using the Optimizer
+from privacy.bolton.optimizers import Bolton  # pylint: disable=wrong-import-position
+# -------
+# Here, we create our own model and setup the Bolton optimizer.
+# -------
+class TestModel(tf.keras.Model):  # pylint: disable=abstract-method
+  def __init__(self, reg_layer, number_of_outputs=1):
    super(TestModel, self).__init__(name='test')
-    self.output_layer = tf.keras.layers.Dense(n_outputs,
+    self.output_layer = tf.keras.layers.Dense(number_of_outputs,
                                              kernel_regularizer=reg_layer
                                              )

-  def call(self, inputs):
+  def call(self, inputs):  # pylint: disable=arguments-differ
    return self.output_layer(inputs)


 optimizer = tf.optimizers.SGD()
 loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
 optimizer = Bolton(optimizer, loss)
-"""Now, we instantiate our model and check for 1. Since our loss requires L2 
-regularization over the kernel, we will pass it to the model."""
+# -------
+# Now, we instantiate our model and check for 1. Since our loss requires L2
+# regularization over the kernel, we will pass it to the model.
+# -------
 n_outputs = 1  # parameter for model and optimizer context.
 test_model = TestModel(loss.kernel_regularizer(), n_outputs)
 test_model.compile(optimizer, loss)
-"""We comply with 2., and use the Bolton Optimizer as a context around the fit 
-method."""
+# -------
+# We comply with 2., and use the Bolton Optimizer as a context around the fit
+# method.
+# -------
 # parameters for context
 noise_distribution = 'laplace'
 epsilon = 2
 class_weights = 1  # Previously, the fit method auto-detected the class_weights.
-# Here, we need to pass the class_weights explicitly. 1 is the equivalent of None.
+# Here, we need to pass the class_weights explicitly. 1 is the same as None.
 n_samples = 20
 batch_size = 5