From 5f46927747ed9a26b03cb4ed63216dc6757a104f Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Wed, 5 Jun 2019 17:06:02 -0400
Subject: [PATCH 01/39] Working bolton model without unit tests.

---
 privacy/bolton/__init__.py       |  14 ++
 privacy/bolton/loss.py           | 280 +++++++++++++++++++++
 privacy/bolton/loss_test.py      |   3 +
 privacy/bolton/model.py          | 402 +++++++++++++++++++++++++++++++
 privacy/bolton/model_test.py     |   3 +
 privacy/bolton/optimizer.py      | 173 +++++++++++++
 privacy/bolton/optimizer_test.py |   9 +
 7 files changed, 884 insertions(+)
 create mode 100644 privacy/bolton/__init__.py
 create mode 100644 privacy/bolton/loss.py
 create mode 100644 privacy/bolton/loss_test.py
 create mode 100644 privacy/bolton/model.py
 create mode 100644 privacy/bolton/model_test.py
 create mode 100644 privacy/bolton/optimizer.py
 create mode 100644 privacy/bolton/optimizer_test.py

diff --git a/privacy/bolton/__init__.py b/privacy/bolton/__init__.py
new file mode 100644
index 0000000..46bd079
--- /dev/null
+++ b/privacy/bolton/__init__.py
@@ -0,0 +1,14 @@
+import sys
+from distutils.version import LooseVersion
+import tensorflow as tf
+
+if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
+  raise ImportError("Please upgrade your version of tensorflow from: {0} "
+                    "to at least 2.0.0 to use privacy/bolton".format(
+    LooseVersion(tf.__version__)))
+if hasattr(sys, 'skip_tf_privacy_import'):  # Useful for standalone scripts.
+  pass
+else:
+  from privacy.bolton.model import Bolton
+  from privacy.bolton.loss import Huber
+  from privacy.bolton.loss import BinaryCrossentropy
\ No newline at end of file
diff --git a/privacy/bolton/loss.py b/privacy/bolton/loss.py
new file mode 100644
index 0000000..dd5d580
--- /dev/null
+++ b/privacy/bolton/loss.py
@@ -0,0 +1,280 @@
+# Copyright 2018, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Loss functions for bolton method"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tensorflow as tf
+from tensorflow.python.keras import losses
+from tensorflow.python.keras.utils import losses_utils
+from tensorflow.python.framework import ops as _ops
+
+
+class StrongConvexLoss(losses.Loss):
+  """
+  Strong Convex Loss base class for any loss function that will be used with
+  Bolton model. Subclasses must be strongly convex and implement the
+  associated constants. They must also conform to the requirements of tf losses
+  (see super class)
+  """
+  def __init__(self,
+               reg_lambda: float,
+               c: float,
+               radius_constant: float = 1,
+               reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+               name: str = None,
+               dtype=tf.float32,
+               **kwargs):
+    """
+    Args:
+      reg_lambda: Weight regularization constant
+      c: Additional constant for strongly convex convergence. Acts
+          as a global weight.
+      radius_constant: constant defining the length of the radius
+      reduction: reduction type to use. See super class
+      name: Name of the loss instance
+      dtype: tf datatype to use for tensor conversions.
+    """
+    super(StrongConvexLoss, self).__init__(reduction=reduction,
+                                           name=name,
+                                           **kwargs)
+    self._sample_weight = tf.Variable(initial_value=c,
+                                      trainable=False,
+                                      dtype=tf.float32)
+    self._reg_lambda = reg_lambda
+    self.radius_constant = tf.Variable(initial_value=radius_constant,
+                                       trainable=False,
+                                       dtype=tf.float32)
+    self.dtype = dtype
+
+  def radius(self):
+    """Radius of R-Ball (value to normalize weights to after each batch)
+
+    Returns: radius
+
+    """
+    raise NotImplementedError("Radius not implemented for StrongConvex Loss"
+                              "function: %s" % str(self.__class__.__name__))
+
+  def gamma(self):
+    """ Gamma strongly convex
+
+    Returns: gamma
+
+    """
+    raise NotImplementedError("Gamma not implemented for StrongConvex Loss"
+                              "function: %s" % str(self.__class__.__name__))
+
+  def beta(self, class_weight):
+    """Beta smoothess
+
+    Args:
+      class_weight: the class weights used.
+
+    Returns: Beta
+
+    """
+    raise NotImplementedError("Beta not implemented for StrongConvex Loss"
+                              "function: %s" % str(self.__class__.__name__))
+
+  def lipchitz_constant(self, class_weight):
+    """ L lipchitz continuous
+
+    Args:
+      class_weight: class weights used
+
+    Returns: L
+
+    """
+    raise NotImplementedError("lipchitz constant not implemented for "
+                              "StrongConvex Loss"
+                              "function: %s" % str(self.__class__.__name__))
+
+  def reg_lambda(self, convert_to_tensor: bool = False):
+    """ returns the lambda weight regularization constant, as a tensor if
+    desired
+
+    Args:
+      convert_to_tensor: True to convert to tensor, False to leave as
+                            python numeric.
+
+    Returns: reg_lambda
+
+    """
+    if convert_to_tensor:
+      return _ops.convert_to_tensor_v2(self._reg_lambda, dtype=self.dtype)
+    return self._reg_lambda
+
+  def max_class_weight(self, class_weight):
+    class_weight = _ops.convert_to_tensor_v2(class_weight, dtype=self.dtype)
+    return tf.math.reduce_max(class_weight)
+
+
+class Huber(StrongConvexLoss, losses.Huber):
+  """Strong Convex version of huber loss using l2 weight regularization.
+  """
+  def __init__(self,
+               reg_lambda: float,
+               c: float,
+               radius_constant: float,
+               delta: float,
+               reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+               name: str = 'huber',
+               dtype=tf.float32):
+    """Constructor. Passes arguments to StrongConvexLoss and Huber Loss.
+
+    Args:
+      reg_lambda: Weight regularization constant
+      c: Additional constant for strongly convex convergence. Acts
+          as a global weight.
+      radius_constant: constant defining the length of the radius
+      delta: delta value in huber loss.  When to switch from quadratic to
+            absolute deviation.
+      reduction: reduction type to use. See super class
+      name: Name of the loss instance
+      dtype: tf datatype to use for tensor conversions.
+
+    Returns:
+      Loss values per sample.
+    """
+    # self.delta = tf.Variable(initial_value=delta, trainable=False)
+    super(Huber, self).__init__(
+        reg_lambda,
+        c,
+        radius_constant,
+        delta=delta,
+        name=name,
+        reduction=reduction,
+        dtype=dtype
+    )
+
+  def call(self, y_true, y_pred):
+    """Compute loss
+
+    Args:
+      y_true: Ground truth values.
+      y_pred: The predicted values.
+
+    Returns:
+      Loss values per sample.
+    """
+    return super(Huber, self).call(y_true, y_pred, **self._fn_kwargs) * \
+           self._sample_weight
+
+  def radius(self):
+    """See super class.
+    """
+    return self.radius_constant / self.reg_lambda(True)
+
+  def gamma(self):
+    """See super class.
+    """
+    return self.reg_lambda(True)
+
+  def beta(self, class_weight):
+    """See super class.
+    """
+    max_class_weight = self.max_class_weight(class_weight)
+    return self._sample_weight * max_class_weight / \
+           (self.delta * tf.Variable(initial_value=2, trainable=False)) + \
+           self.reg_lambda(True)
+
+  def lipchitz_constant(self, class_weight):
+    """See super class.
+    """
+    # if class_weight is provided,
+    # it should be a vector of the same size of number of classes
+    max_class_weight = self.max_class_weight(class_weight)
+    lc = self._sample_weight * max_class_weight + \
+         self.reg_lambda(True) * self.radius()
+    return lc
+
+
+class BinaryCrossentropy(StrongConvexLoss, losses.BinaryCrossentropy):
+  """
+  Strong Convex version of BinaryCrossentropy loss using l2 weight
+  regularization.
+  """
+  def __init__(self,
+               reg_lambda: float,
+               c: float,
+               radius_constant: float,
+               from_logits: bool = True,
+               label_smoothing: float = 0,
+               reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+               name: str = 'binarycrossentropy',
+               dtype=tf.float32):
+    """
+    Args:
+      reg_lambda: Weight regularization constant
+      c: Additional constant for strongly convex convergence. Acts
+          as a global weight.
+      radius_constant: constant defining the length of the radius
+      reduction: reduction type to use. See super class
+      label_smoothing: amount of smoothing to perform on labels
+                      relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
+      name: Name of the loss instance
+      dtype: tf datatype to use for tensor conversions.
+    """
+    super(BinaryCrossentropy, self).__init__(reg_lambda,
+                                             c,
+                                             radius_constant,
+                                             reduction=reduction,
+                                             name=name,
+                                             from_logits=from_logits,
+                                             label_smoothing=label_smoothing,
+                                             dtype=dtype
+                                             )
+    self.radius_constant = radius_constant
+
+  def call(self, y_true, y_pred):
+    """Compute loss
+
+        Args:
+          y_true: Ground truth values.
+          y_pred: The predicted values.
+
+        Returns:
+          Loss values per sample.
+      """
+    loss = tf.nn.sigmoid_cross_entropy_with_logits(
+        labels=y_true,
+        logits=y_pred
+    )
+    loss = loss * self._sample_weight
+    return loss
+
+  def radius(self):
+    """See super class.
+    """
+    return self.radius_constant / self.reg_lambda(True)
+
+  def gamma(self):
+    """See super class.
+    """
+    return self.reg_lambda(True)
+
+  def beta(self, class_weight):
+    """See super class.
+    """
+    max_class_weight = self.max_class_weight(class_weight)
+    return self._sample_weight * max_class_weight + self.reg_lambda(True)
+
+  def lipchitz_constant(self, class_weight):
+    """See super class.
+    """
+    max_class_weight = self.max_class_weight(class_weight)
+    return self._sample_weight * max_class_weight + \
+           self.reg_lambda(True) * self.radius()
diff --git a/privacy/bolton/loss_test.py b/privacy/bolton/loss_test.py
new file mode 100644
index 0000000..87669fd
--- /dev/null
+++ b/privacy/bolton/loss_test.py
@@ -0,0 +1,3 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
\ No newline at end of file
diff --git a/privacy/bolton/model.py b/privacy/bolton/model.py
new file mode 100644
index 0000000..a600374
--- /dev/null
+++ b/privacy/bolton/model.py
@@ -0,0 +1,402 @@
+# Copyright 2018, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Bolton model for bolton method of differentially private ML"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tensorflow as tf
+from tensorflow.python.keras.models import Model
+from tensorflow.python.keras import optimizers
+from tensorflow.python.training.tracking import base as trackable
+from tensorflow.python.framework import ops as _ops
+from privacy.bolton.loss import StrongConvexLoss
+from privacy.bolton.optimizer import Private
+
+
+class Bolton(Model):
+  """
+  Bolton episilon-delta model
+  Uses 4 key steps to achieve privacy guarantees:
+  1. Adds noise to weights after training (output perturbation).
+  2. Projects weights to R after each batch
+  3. Limits learning rate
+  4. Use a strongly convex loss function (see compile)
+  """
+  def __init__(self,
+               n_classes,
+               epsilon,
+               noise_distribution='laplace',
+               weights_initializer=tf.initializers.GlorotUniform(),
+               seed=1,
+               dtype=tf.float32
+               ):
+    """ private constructor.
+
+    Args:
+        n_classes: number of output classes to predict.
+        epsilon: level of privacy guarantee
+        noise_distribution: distribution to pull weight perturbations from
+        weights_initializer: initializer for weights
+        seed: random seed to use
+        dtype: data type to use for tensors
+    """
+
+    class MyCustomCallback(tf.keras.callbacks.Callback):
+      """Custom callback for bolton training requirements.
+        Implements steps (see Bolton class):
+        2. Projects weights to R after each batch
+        3. Limits learning rate
+      """
+      def on_train_batch_end(self, batch, logs=None):
+        loss = self.model.loss
+        self.model.optimizer.limit_learning_rate(
+            self.model.run_eagerly,
+            loss.beta(self.model.class_weight),
+            loss.gamma()
+        )
+        self.model._project_weights_to_r(loss.radius(), False)
+
+      def on_train_end(self, logs=None):
+        loss = self.model.loss
+        self.model._project_weights_to_r(loss.radius(), True)
+
+    super(Bolton, self).__init__(name='bolton', dynamic=False)
+    self.n_classes = n_classes
+    self.output_layer = tf.keras.layers.Dense(
+        self.n_classes,
+        kernel_regularizer=tf.keras.regularizers.l2(),
+        kernel_initializer=weights_initializer,
+    )
+    # if we do regularization here, we require the user to re-instantiate
+    # the model each time they want to
+    # change lambda, unless we standardize modifying it later at .compile
+    self.force = False
+    self.noise_distribution = noise_distribution
+    self.epsilon = epsilon
+    self.seed = seed
+    self.__in_fit = False
+    self._callback = MyCustomCallback()
+    self._dtype = dtype
+
+  def call(self, inputs):
+    """Forward pass of network
+
+    Args:
+        inputs: inputs to neural network
+
+    Returns:
+
+    """
+    return self.output_layer(inputs)
+
+  def compile(self,
+              optimizer='SGD',
+              loss=None,
+              metrics=None,
+              loss_weights=None,
+              sample_weight_mode=None,
+              weighted_metrics=None,
+              target_tensors=None,
+              distribute=None,
+              **kwargs):
+    """See super class. Default optimizer used in Bolton method is SGD.
+
+    """
+    if not isinstance(loss, StrongConvexLoss):
+      raise ValueError("Loss must be subclassed from StrongConvexLoss")
+    self.output_layer.kernel_regularizer.l2 = loss.reg_lambda()
+    if not isinstance(optimizer, Private):
+      optimizer = optimizers.get(optimizer)
+      if isinstance(self.optimizer, trackable.Trackable):
+        self._track_trackable(
+            self.optimizer, name='optimizer', overwrite=True
+        )
+      optimizer = Private(optimizer)
+
+    super(Bolton, self).compile(optimizer,
+                                loss=loss,
+                                metrics=metrics,
+                                loss_weights=loss_weights,
+                                sample_weight_mode=sample_weight_mode,
+                                weighted_metrics=weighted_metrics,
+                                target_tensors=target_tensors,
+                                distribute=distribute,
+                                **kwargs
+                                )
+
+  def _post_fit(self, x, n_samples):
+    """Implements 1-time weight changes needed for Bolton method.
+    In this case, specifically implements the noise addition
+    assuming a strongly convex function.
+
+    Args:
+        x: inputs
+        n_samples: number of samples in the inputs. In case the number
+        cannot be readily determined by inspecting x.
+
+    Returns:
+
+    """
+    if n_samples is not None:
+      data_size = n_samples
+    elif hasattr(x, 'shape'):
+      data_size = x.shape[0]
+    elif hasattr(x, "__len__"):
+      data_size = len(x)
+    else:
+      if n_samples is None:
+        raise ValueError("Unable to detect the number of training "
+                         "samples and n_smaples was None. "
+                         "either pass a dataset with a .shape or "
+                         "__len__ attribute or explicitly pass the "
+                         "number of samples as n_smaples.")
+      data_size = n_samples
+
+    for layer in self._layers:
+      layer.kernel = layer.kernel + self._get_noise(
+          self.noise_distribution,
+          data_size
+      )
+
+  def fit(self,
+          x=None,
+          y=None,
+          batch_size=None,
+          epochs=1,
+          verbose=1,
+          callbacks=None,
+          validation_split=0.0,
+          validation_data=None,
+          shuffle=True,
+          class_weight=None,
+          sample_weight=None,
+          initial_epoch=0,
+          steps_per_epoch=None,
+          validation_steps=None,
+          validation_freq=1,
+          max_queue_size=10,
+          workers=1,
+          use_multiprocessing=False,
+          n_samples=None,
+          **kwargs):
+    """Reroutes to super fit with additional Bolton delta-epsilon privacy
+    requirements implemented. Note, inputs must be normalized s.t. ||x|| < 1
+    Requirements are as follows:
+        1. Adds noise to weights after training (output perturbation).
+        2. Projects weights to R after each batch
+        3. Limits learning rate
+        4. Use a strongly convex loss function (see compile)
+    See super implementation for more details.
+
+    Args:
+        n_samples: the number of individual samples in x.
+
+    Returns:
+
+    """
+    self.__in_fit = True
+    cb = [self._callback]
+    if callbacks is not None:
+      cb.extend(callbacks)
+    callbacks = cb
+    if class_weight is None:
+      class_weight = self.calculate_class_weights(class_weight)
+    self.class_weight = class_weight
+    out = super(Bolton, self).fit(x=x,
+                                  y=y,
+                                  batch_size=batch_size,
+                                  epochs=epochs,
+                                  verbose=verbose,
+                                  callbacks=callbacks,
+                                  validation_split=validation_split,
+                                  validation_data=validation_data,
+                                  shuffle=shuffle,
+                                  class_weight=class_weight,
+                                  sample_weight=sample_weight,
+                                  initial_epoch=initial_epoch,
+                                  steps_per_epoch=steps_per_epoch,
+                                  validation_steps=validation_steps,
+                                  validation_freq=validation_freq,
+                                  max_queue_size=max_queue_size,
+                                  workers=workers,
+                                  use_multiprocessing=use_multiprocessing,
+                                  **kwargs
+                                  )
+    self._post_fit(x, n_samples)
+    self.__in_fit = False
+    return out
+
+  def fit_generator(self,
+                    generator,
+                    steps_per_epoch=None,
+                    epochs=1,
+                    verbose=1,
+                    callbacks=None,
+                    validation_data=None,
+                    validation_steps=None,
+                    validation_freq=1,
+                    class_weight=None,
+                    max_queue_size=10,
+                    workers=1,
+                    use_multiprocessing=False,
+                    shuffle=True,
+                    initial_epoch=0,
+                    n_samples=None
+                    ):
+    """
+        This method is the same as fit except for when the passed dataset
+        is a generator. See super method and fit for more details.
+    Args:
+        n_samples: number of individual samples in x
+
+    """
+    if class_weight is None:
+      class_weight = self.calculate_class_weights(class_weight)
+    self.class_weight = class_weight
+    out = super(Bolton, self).fit_generator(
+        generator,
+        steps_per_epoch=steps_per_epoch,
+        epochs=epochs,
+        verbose=verbose,
+        callbacks=callbacks,
+        validation_data=validation_data,
+        validation_steps=validation_steps,
+        validation_freq=validation_freq,
+        class_weight=class_weight,
+        max_queue_size=max_queue_size,
+        workers=workers,
+        use_multiprocessing=use_multiprocessing,
+        shuffle=shuffle,
+        initial_epoch=initial_epoch
+    )
+    if not self.__in_fit:
+      self._post_fit(generator, n_samples)
+    return out
+
+  def calculate_class_weights(self,
+                              class_weights=None,
+                              class_counts=None,
+                              num_classes=None
+                              ):
+    """
+        Calculates class weighting to be used in training. Can be on
+    Args:
+        class_weights: str specifying type, array giving weights, or None.
+        class_counts: If class_weights is not None, then the number of
+                        samples for each class
+        num_classes: If class_weights is not None, then the number of
+                        classes.
+    Returns: class_weights as 1D tensor, to be passed to model's fit method.
+
+    """
+    # Value checking
+    class_keys = ['balanced']
+    is_string = False
+    if isinstance(class_weights, str):
+      is_string = True
+      if class_weights not in class_keys:
+        raise ValueError("Detected string class_weights with "
+                         "value: {0}, which is not one of {1}."
+                         "Please select a valid class_weight type"
+                         "or pass an array".format(class_weights,
+                                                   class_keys))
+      if class_counts is None:
+        raise ValueError("Class counts must be provided if using"
+                         "class_weights=%s" % class_weights)
+      if num_classes is None:
+        raise ValueError("Class counts must be provided if using"
+                         "class_weights=%s" % class_weights)
+    elif class_weights is not None:
+      if num_classes is None:
+        raise ValueError("You must pass a value for num_classes if"
+                         "creating an array of class_weights")
+    # performing class weight calculation
+    if class_weights is None:
+      class_weights = 1
+    elif is_string and class_weights == 'balanced':
+      num_samples = sum(class_counts)
+      class_weights = tf.Variable(
+          num_samples / (num_classes * class_counts),
+          dtype=self._dtype
+      )
+    else:
+      class_weights = _ops.convert_to_tensor_v2(class_weights)
+      if len(class_weights.shape) != 1:
+        raise ValueError("Detected class_weights shape: {0} instead of "
+                         "1D array".format(class_weights.shape))
+      if class_weights.shape[0] != num_classes:
+        raise ValueError(
+            "Detected array length: {0} instead of: {1}".format(
+                class_weights.shape[0],
+                num_classes
+            )
+        )
+    return class_weights
+
+  def _project_weights_to_r(self, r, force=False):
+    """helper method to normalize the weights to the R-ball.
+
+    Args:
+        r: radius of "R-Ball". Scalar to normalize to.
+        force: True to normalize regardless of previous weight values.
+                False to check if weights > R-ball and only normalize then.
+
+    Returns:
+
+    """
+    for layer in self._layers:
+      weight_norm = tf.norm(layer.kernel, axis=0)
+      if force:
+        layer.kernel = layer.kernel / (weight_norm / r)
+      elif tf.reduce_sum(tf.cast(weight_norm > r, dtype=self._dtype)) > 0:
+        layer.kernel = layer.kernel / (weight_norm / r)
+
+  def _get_noise(self, distribution, data_size):
+    """Sample noise to be added to weights for privacy guarantee
+
+    Args:
+        distribution: the distribution type to pull noise from
+        data_size: the number of samples
+
+    Returns: noise in shape of layer's weights to be added to the weights.
+
+    """
+    distribution = distribution.lower()
+    input_dim = self._layers[0].kernel.numpy().shape[0]
+    loss = self.loss
+    if distribution == 'laplace':
+      per_class_epsilon = self.epsilon / (self.n_classes)
+      l2_sensitivity = (2 *
+                        loss.lipchitz_constant(self.class_weight)) / \
+                       (loss.gamma() * data_size)
+      unit_vector = tf.random.normal(shape=(input_dim, self.n_classes),
+                                     mean=0,
+                                     seed=1,
+                                     stddev=1.0,
+                                     dtype=self._dtype)
+      unit_vector = unit_vector / tf.math.sqrt(
+          tf.reduce_sum(tf.math.square(unit_vector), axis=0)
+      )
+
+      beta = l2_sensitivity / per_class_epsilon
+      alpha = input_dim  # input_dim
+      gamma = tf.random.gamma([self.n_classes],
+                              alpha,
+                              beta=1 / beta,
+                              seed=1,
+                              dtype=self._dtype)
+      return unit_vector * gamma
+    raise NotImplementedError("distribution: {0} is not "
+                              "currently supported".format(distribution))
diff --git a/privacy/bolton/model_test.py b/privacy/bolton/model_test.py
new file mode 100644
index 0000000..87669fd
--- /dev/null
+++ b/privacy/bolton/model_test.py
@@ -0,0 +1,3 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
\ No newline at end of file
diff --git a/privacy/bolton/optimizer.py b/privacy/bolton/optimizer.py
new file mode 100644
index 0000000..f8af390
--- /dev/null
+++ b/privacy/bolton/optimizer.py
@@ -0,0 +1,173 @@
+# Copyright 2018, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Private Optimizer for bolton method"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+
+_private_attributes = ['_internal_optimizer', 'dtype']
+
+
+class Private(optimizer_v2.OptimizerV2):
+  """
+    Private optimizer wraps another tf optimizer to be used
+    as the visible optimizer to the tf model. No matter the optimizer
+    passed, "Private" enables the bolton model to control the learning rate
+    based on the strongly convex loss.
+  """
+  def __init__(self,
+               optimizer: optimizer_v2.OptimizerV2,
+               dtype=tf.float32
+               ):
+    """Constructor.
+
+    Args:
+        optimizer: Optimizer_v2 or subclass to be used as the optimizer
+                    (wrapped).
+    """
+    self._internal_optimizer = optimizer
+    self.dtype = dtype
+
+  def get_config(self):
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
+    """
+    return self._internal_optimizer.get_config()
+
+  def limit_learning_rate(self, is_eager, beta, gamma):
+    """Implements learning rate limitation that is required by the bolton
+    method for sensitivity bounding of the strongly convex function.
+    Sets the learning rate to the min(1/beta, 1/(gamma*t))
+
+    Args:
+        is_eager: Whether the model is running in eager mode
+        beta: loss function beta-smoothness
+        gamma: loss function gamma-strongly convex
+
+    Returns: None
+
+    """
+    numerator = tf.Variable(initial_value=1, dtype=self.dtype)
+    t = tf.cast(self._iterations, self.dtype)
+    # will exist on the internal optimizer
+    pred = numerator / beta < numerator / (gamma * t)
+    if is_eager:  # check eagerly
+      if pred:
+        self.learning_rate = numerator / beta
+      else:
+        self.learning_rate = numerator / (gamma * t)
+    else:
+      if pred:
+        self.learning_rate = numerator / beta
+      else:
+        self.learning_rate = numerator / (gamma * t)
+
+  def from_config(self, config, custom_objects=None):
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
+    """
+    return self._internal_optimizer.from_config(
+        config,
+        custom_objects=custom_objects
+    )
+
+  def __getattr__(self, name):
+    """return _internal_optimizer off self instance, and everything else
+    from the _internal_optimizer instance.
+
+    Args:
+        name:
+
+    Returns: attribute from Private if specified to come from self, else
+            from _internal_optimizer.
+
+    """
+    if name in _private_attributes:
+      return getattr(self, name)
+    optim = object.__getattribute__(self, '_internal_optimizer')
+    return object.__getattribute__(optim, name)
+
+  def __setattr__(self, key, value):
+    """ Set attribute to self instance if its the internal optimizer.
+    Reroute everything else to the _internal_optimizer.
+
+    Args:
+        key: attribute name
+        value: attribute value
+
+    Returns:
+
+    """
+    if key in _private_attributes:
+      object.__setattr__(self, key, value)
+    else:
+      setattr(self._internal_optimizer, key, value)
+
+  def _resource_apply_dense(self, grad, handle):
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
+    """
+    return self._internal_optimizer._resource_apply_dense(grad, handle)
+
+  def _resource_apply_sparse(self, grad, handle, indices):
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
+    """
+    return self._internal_optimizer._resource_apply_sparse(
+        grad,
+        handle,
+        indices
+    )
+
+  def get_updates(self, loss, params):
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
+    """
+    return self._internal_optimizer.get_updates(loss, params)
+
+  def apply_gradients(self, grads_and_vars, name: str = None):
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
+    """
+    return self._internal_optimizer.apply_gradients(
+        grads_and_vars,
+        name=name
+    )
+
+  def minimize(self,
+               loss,
+               var_list,
+               grad_loss: bool = None,
+               name: str = None
+               ):
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
+    """
+    return self._internal_optimizer.minimize(
+        loss,
+        var_list,
+        grad_loss,
+        name
+    )
+
+  def _compute_gradients(self, loss, var_list, grad_loss=None):
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
+    """
+    return self._internal_optimizer._compute_gradients(
+        loss,
+        var_list,
+        grad_loss=grad_loss
+    )
+
+  def get_gradients(self, loss, params):
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
+    """
+    return self._internal_optimizer.get_gradients(loss, params)
diff --git a/privacy/bolton/optimizer_test.py b/privacy/bolton/optimizer_test.py
new file mode 100644
index 0000000..ec8de48
--- /dev/null
+++ b/privacy/bolton/optimizer_test.py
@@ -0,0 +1,9 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+from tensorflow.python.platform import test
+from tensorflow.python.keras import keras_parameterized
+from privacy.bolton import model
+

From 751eaead545d45bcc47bff7d82656b08c474b434 Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Mon, 10 Jun 2019 16:11:47 -0400
Subject: [PATCH 02/39] Working bolton model without unit tests. -- update to
 include pull request changes changes include: parameter renaming, changing to
 mixin, moving model to compile, additional tests, fixing huber loss

---
 privacy/bolton/__init__.py       |   4 +-
 privacy/bolton/loss.py           | 502 +++++++++++++++++++++++++------
 privacy/bolton/loss_test.py      | 324 +++++++++++++++++++-
 privacy/bolton/model.py          |  89 ++++--
 privacy/bolton/model_test.py     | 493 +++++++++++++++++++++++++++++-
 privacy/bolton/optimizer.py      |  56 ++--
 privacy/bolton/optimizer_test.py | 173 +++++++++++
 7 files changed, 1472 insertions(+), 169 deletions(-)

diff --git a/privacy/bolton/__init__.py b/privacy/bolton/__init__.py
index 46bd079..67b6148 100644
--- a/privacy/bolton/__init__.py
+++ b/privacy/bolton/__init__.py
@@ -10,5 +10,5 @@ if hasattr(sys, 'skip_tf_privacy_import'):  # Useful for standalone scripts.
   pass
 else:
   from privacy.bolton.model import Bolton
-  from privacy.bolton.loss import Huber
-  from privacy.bolton.loss import BinaryCrossentropy
\ No newline at end of file
+  from privacy.bolton.loss import StrongConvexHuber
+  from privacy.bolton.loss import StrongConvexBinaryCrossentropy
\ No newline at end of file
diff --git a/privacy/bolton/loss.py b/privacy/bolton/loss.py
index dd5d580..5cc029a 100644
--- a/privacy/bolton/loss.py
+++ b/privacy/bolton/loss.py
@@ -20,56 +20,33 @@ import tensorflow as tf
 from tensorflow.python.keras import losses
 from tensorflow.python.keras.utils import losses_utils
 from tensorflow.python.framework import ops as _ops
+from tensorflow.python.keras.regularizers import L1L2
 
 
-class StrongConvexLoss(losses.Loss):
+class StrongConvexMixin:
   """
-  Strong Convex Loss base class for any loss function that will be used with
+  Strong Convex Mixin base class for any loss function that will be used with
   Bolton model. Subclasses must be strongly convex and implement the
   associated constants. They must also conform to the requirements of tf losses
-  (see super class)
+  (see super class).
+
+  For more details on the strong convexity requirements, see:
+  Bolt-on Differential Privacy for Scalable Stochastic Gradient
+  Descent-based Analytics by Xi Wu et. al.
   """
-  def __init__(self,
-               reg_lambda: float,
-               c: float,
-               radius_constant: float = 1,
-               reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
-               name: str = None,
-               dtype=tf.float32,
-               **kwargs):
-    """
-    Args:
-      reg_lambda: Weight regularization constant
-      c: Additional constant for strongly convex convergence. Acts
-          as a global weight.
-      radius_constant: constant defining the length of the radius
-      reduction: reduction type to use. See super class
-      name: Name of the loss instance
-      dtype: tf datatype to use for tensor conversions.
-    """
-    super(StrongConvexLoss, self).__init__(reduction=reduction,
-                                           name=name,
-                                           **kwargs)
-    self._sample_weight = tf.Variable(initial_value=c,
-                                      trainable=False,
-                                      dtype=tf.float32)
-    self._reg_lambda = reg_lambda
-    self.radius_constant = tf.Variable(initial_value=radius_constant,
-                                       trainable=False,
-                                       dtype=tf.float32)
-    self.dtype = dtype
 
   def radius(self):
-    """Radius of R-Ball (value to normalize weights to after each batch)
+    """Radius, R, of the hypothesis space W.
+    W is a convex set that forms the hypothesis space.
 
-    Returns: radius
+    Returns: R
 
     """
     raise NotImplementedError("Radius not implemented for StrongConvex Loss"
                               "function: %s" % str(self.__class__.__name__))
 
   def gamma(self):
-    """ Gamma strongly convex
+    """ Strongly convexity, gamma
 
     Returns: gamma
 
@@ -78,7 +55,7 @@ class StrongConvexLoss(losses.Loss):
                               "function: %s" % str(self.__class__.__name__))
 
   def beta(self, class_weight):
-    """Beta smoothess
+    """Smoothness, beta
 
     Args:
       class_weight: the class weights used.
@@ -90,7 +67,7 @@ class StrongConvexLoss(losses.Loss):
                               "function: %s" % str(self.__class__.__name__))
 
   def lipchitz_constant(self, class_weight):
-    """ L lipchitz continuous
+    """Lipchitz constant, L
 
     Args:
       class_weight: class weights used
@@ -102,43 +79,46 @@ class StrongConvexLoss(losses.Loss):
                               "StrongConvex Loss"
                               "function: %s" % str(self.__class__.__name__))
 
-  def reg_lambda(self, convert_to_tensor: bool = False):
-    """ returns the lambda weight regularization constant, as a tensor if
-    desired
+  def kernel_regularizer(self):
+    """returns the kernel_regularizer to be used. Any subclass should override
+      this method if they want a kernel_regularizer (if required for
+      the loss function to be StronglyConvex
+
+    :return: None or kernel_regularizer layer
+    """
+    return None
+
+  def max_class_weight(self, class_weight, dtype):
+    """the maximum weighting in class weights (max value) as a scalar tensor
 
     Args:
-      convert_to_tensor: True to convert to tensor, False to leave as
-                            python numeric.
+      class_weight: class weights used
+      dtype: the data type for tensor conversions.
 
-    Returns: reg_lambda
+    Returns: maximum class weighting as tensor scalar
 
     """
-    if convert_to_tensor:
-      return _ops.convert_to_tensor_v2(self._reg_lambda, dtype=self.dtype)
-    return self._reg_lambda
-
-  def max_class_weight(self, class_weight):
-    class_weight = _ops.convert_to_tensor_v2(class_weight, dtype=self.dtype)
+    class_weight = _ops.convert_to_tensor_v2(class_weight, dtype)
     return tf.math.reduce_max(class_weight)
 
 
-class Huber(StrongConvexLoss, losses.Huber):
-  """Strong Convex version of huber loss using l2 weight regularization.
+class StrongConvexHuber(losses.Huber, StrongConvexMixin):
+  """Strong Convex version of Huber loss using l2 weight regularization.
   """
+
   def __init__(self,
                reg_lambda: float,
-               c: float,
+               C: float,
                radius_constant: float,
                delta: float,
                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                name: str = 'huber',
                dtype=tf.float32):
-    """Constructor. Passes arguments to StrongConvexLoss and Huber Loss.
+    """Constructor.
 
     Args:
       reg_lambda: Weight regularization constant
-      c: Additional constant for strongly convex convergence. Acts
-          as a global weight.
+      C: Penalty parameter C of the loss term
       radius_constant: constant defining the length of the radius
       delta: delta value in huber loss.  When to switch from quadratic to
             absolute deviation.
@@ -149,15 +129,22 @@ class Huber(StrongConvexLoss, losses.Huber):
     Returns:
       Loss values per sample.
     """
-    # self.delta = tf.Variable(initial_value=delta, trainable=False)
-    super(Huber, self).__init__(
-        reg_lambda,
-        c,
-        radius_constant,
+    if C <= 0:
+      raise ValueError('c: {0}, should be >= 0'.format(C))
+    if reg_lambda <= 0:
+      raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
+    if radius_constant <= 0:
+      raise ValueError('radius_constant: {0}, should be >= 0'.format(
+          radius_constant
+      ))
+    self.C = C
+    self.radius_constant = radius_constant
+    self.dtype = dtype
+    self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
+    super(StrongConvexHuber, self).__init__(
         delta=delta,
         name=name,
         reduction=reduction,
-        dtype=dtype
     )
 
   def call(self, y_true, y_pred):
@@ -170,46 +157,73 @@ class Huber(StrongConvexLoss, losses.Huber):
     Returns:
       Loss values per sample.
     """
-    return super(Huber, self).call(y_true, y_pred, **self._fn_kwargs) * \
-           self._sample_weight
+    # return super(StrongConvexHuber, self).call(y_true, y_pred) * self._sample_weight
+    h = self._fn_kwargs['delta']
+    z = y_pred * y_true
+    one = tf.constant(1, dtype=self.dtype)
+    four = tf.constant(4, dtype=self.dtype)
+
+    if z > one + h:
+      return z - z
+    elif tf.math.abs(one - z) <= h:
+      return one / (four * h) * tf.math.pow(one + h - z, 2)
+    elif z < one - h:
+      return one - z
+    else:
+      raise ValueError('')
 
   def radius(self):
     """See super class.
     """
-    return self.radius_constant / self.reg_lambda(True)
+    return self.radius_constant / self.reg_lambda
 
   def gamma(self):
     """See super class.
     """
-    return self.reg_lambda(True)
+    return self.reg_lambda
 
   def beta(self, class_weight):
     """See super class.
     """
-    max_class_weight = self.max_class_weight(class_weight)
-    return self._sample_weight * max_class_weight / \
-           (self.delta * tf.Variable(initial_value=2, trainable=False)) + \
-           self.reg_lambda(True)
+    max_class_weight = self.max_class_weight(class_weight, self.dtype)
+    delta = _ops.convert_to_tensor_v2(self._fn_kwargs['delta'],
+                                      dtype=self.dtype
+                                      )
+    return self.C * max_class_weight / (delta *
+                                        tf.constant(2, dtype=self.dtype)) + \
+           self.reg_lambda
 
   def lipchitz_constant(self, class_weight):
     """See super class.
     """
     # if class_weight is provided,
     # it should be a vector of the same size of number of classes
-    max_class_weight = self.max_class_weight(class_weight)
-    lc = self._sample_weight * max_class_weight + \
-         self.reg_lambda(True) * self.radius()
+    max_class_weight = self.max_class_weight(class_weight, self.dtype)
+    lc = self.C * max_class_weight + \
+         self.reg_lambda * self.radius()
     return lc
 
+  def kernel_regularizer(self):
+    """
+      l2 loss using reg_lambda as the l2 term (as desired). Required for
+      this loss function to be strongly convex.
+    :return:
+    """
+    return L1L2(l2=self.reg_lambda)
 
-class BinaryCrossentropy(StrongConvexLoss, losses.BinaryCrossentropy):
+
+class StrongConvexBinaryCrossentropy(
+    losses.BinaryCrossentropy,
+    StrongConvexMixin
+):
   """
   Strong Convex version of BinaryCrossentropy loss using l2 weight
   regularization.
   """
+
   def __init__(self,
                reg_lambda: float,
-               c: float,
+               C: float,
                radius_constant: float,
                from_logits: bool = True,
                label_smoothing: float = 0,
@@ -219,8 +233,7 @@ class BinaryCrossentropy(StrongConvexLoss, losses.BinaryCrossentropy):
     """
     Args:
       reg_lambda: Weight regularization constant
-      c: Additional constant for strongly convex convergence. Acts
-          as a global weight.
+      C: Penalty parameter C of the loss term
       radius_constant: constant defining the length of the radius
       reduction: reduction type to use. See super class
       label_smoothing: amount of smoothing to perform on labels
@@ -228,15 +241,23 @@ class BinaryCrossentropy(StrongConvexLoss, losses.BinaryCrossentropy):
       name: Name of the loss instance
       dtype: tf datatype to use for tensor conversions.
     """
-    super(BinaryCrossentropy, self).__init__(reg_lambda,
-                                             c,
-                                             radius_constant,
-                                             reduction=reduction,
-                                             name=name,
-                                             from_logits=from_logits,
-                                             label_smoothing=label_smoothing,
-                                             dtype=dtype
-                                             )
+    if reg_lambda <= 0:
+      raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
+    if C <= 0:
+      raise ValueError('c: {0}, should be >= 0'.format(C))
+    if radius_constant <= 0:
+      raise ValueError('radius_constant: {0}, should be >= 0'.format(
+          radius_constant
+      ))
+    self.dtype = dtype
+    self.C = C
+    self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
+    super(StrongConvexBinaryCrossentropy, self).__init__(
+        reduction=reduction,
+        name=name,
+        from_logits=from_logits,
+        label_smoothing=label_smoothing,
+    )
     self.radius_constant = radius_constant
 
   def call(self, y_true, y_pred):
@@ -249,32 +270,319 @@ class BinaryCrossentropy(StrongConvexLoss, losses.BinaryCrossentropy):
         Returns:
           Loss values per sample.
       """
-    loss = tf.nn.sigmoid_cross_entropy_with_logits(
-        labels=y_true,
-        logits=y_pred
-    )
-    loss = loss * self._sample_weight
+    # loss = tf.nn.sigmoid_cross_entropy_with_logits(
+    #   labels=y_true,
+    #   logits=y_pred
+    # )
+    loss = super(StrongConvexBinaryCrossentropy, self).call(y_true, y_pred)
+    loss = loss * self.C
     return loss
 
   def radius(self):
     """See super class.
     """
-    return self.radius_constant / self.reg_lambda(True)
+    return self.radius_constant / self.reg_lambda
 
   def gamma(self):
     """See super class.
     """
-    return self.reg_lambda(True)
+    return self.reg_lambda
 
   def beta(self, class_weight):
     """See super class.
     """
-    max_class_weight = self.max_class_weight(class_weight)
-    return self._sample_weight * max_class_weight + self.reg_lambda(True)
+    max_class_weight = self.max_class_weight(class_weight, self.dtype)
+    return self.C * max_class_weight + self.reg_lambda
 
   def lipchitz_constant(self, class_weight):
     """See super class.
     """
-    max_class_weight = self.max_class_weight(class_weight)
-    return self._sample_weight * max_class_weight + \
-           self.reg_lambda(True) * self.radius()
+    max_class_weight = self.max_class_weight(class_weight, self.dtype)
+    return self.C * max_class_weight + self.reg_lambda * self.radius()
+
+  def kernel_regularizer(self):
+    """
+      l2 loss using reg_lambda as the l2 term (as desired). Required for
+      this loss function to be strongly convex.
+    :return:
+    """
+    return L1L2(l2=self.reg_lambda)
+
+
+# class StrongConvexSparseCategoricalCrossentropy(
+#     losses.CategoricalCrossentropy,
+#     StrongConvexMixin
+# ):
+#   """
+#   Strong Convex version of CategoricalCrossentropy loss using l2 weight
+#   regularization.
+#   """
+#
+#   def __init__(self,
+#                reg_lambda: float,
+#                C: float,
+#                radius_constant: float,
+#                from_logits: bool = True,
+#                label_smoothing: float = 0,
+#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+#                name: str = 'binarycrossentropy',
+#                dtype=tf.float32):
+#     """
+#     Args:
+#       reg_lambda: Weight regularization constant
+#       C: Penalty parameter C of the loss term
+#       radius_constant: constant defining the length of the radius
+#       reduction: reduction type to use. See super class
+#       label_smoothing: amount of smoothing to perform on labels
+#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
+#       name: Name of the loss instance
+#       dtype: tf datatype to use for tensor conversions.
+#     """
+#     if reg_lambda <= 0:
+#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
+#     if C <= 0:
+#       raise ValueError('c: {0}, should be >= 0'.format(C))
+#     if radius_constant <= 0:
+#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
+#         radius_constant
+#       ))
+#
+#     self.C = C
+#     self.dtype = dtype
+#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
+#     super(StrongConvexSparseCategoricalCrossentropy, self).__init__(
+#         reduction=reduction,
+#         name=name,
+#         from_logits=from_logits,
+#         label_smoothing=label_smoothing,
+#     )
+#     self.radius_constant = radius_constant
+#
+#   def call(self, y_true, y_pred):
+#     """Compute loss
+#
+#         Args:
+#           y_true: Ground truth values.
+#           y_pred: The predicted values.
+#
+#         Returns:
+#           Loss values per sample.
+#       """
+#     loss = super()
+#     loss = loss * self.C
+#     return loss
+#
+#   def radius(self):
+#     """See super class.
+#     """
+#     return self.radius_constant / self.reg_lambda
+#
+#   def gamma(self):
+#     """See super class.
+#     """
+#     return self.reg_lambda
+#
+#   def beta(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda
+#
+#   def lipchitz_constant(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda * self.radius()
+#
+#   def kernel_regularizer(self):
+#     """
+#       l2 loss using reg_lambda as the l2 term (as desired). Required for
+#       this loss function to be strongly convex.
+#     :return:
+#     """
+#     return L1L2(l2=self.reg_lambda)
+#
+# class StrongConvexSparseCategoricalCrossentropy(
+#     losses.SparseCategoricalCrossentropy,
+#     StrongConvexMixin
+# ):
+#   """
+#   Strong Convex version of SparseCategoricalCrossentropy loss using l2 weight
+#   regularization.
+#   """
+#
+#   def __init__(self,
+#                reg_lambda: float,
+#                C: float,
+#                radius_constant: float,
+#                from_logits: bool = True,
+#                label_smoothing: float = 0,
+#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+#                name: str = 'binarycrossentropy',
+#                dtype=tf.float32):
+#     """
+#     Args:
+#       reg_lambda: Weight regularization constant
+#       C: Penalty parameter C of the loss term
+#       radius_constant: constant defining the length of the radius
+#       reduction: reduction type to use. See super class
+#       label_smoothing: amount of smoothing to perform on labels
+#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
+#       name: Name of the loss instance
+#       dtype: tf datatype to use for tensor conversions.
+#     """
+#     if reg_lambda <= 0:
+#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
+#     if C <= 0:
+#       raise ValueError('c: {0}, should be >= 0'.format(C))
+#     if radius_constant <= 0:
+#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
+#         radius_constant
+#       ))
+#
+#     self.C = C
+#     self.dtype = dtype
+#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
+#     super(StrongConvexHuber, self).__init__(reduction=reduction,
+#                                              name=name,
+#                                              from_logits=from_logits,
+#                                              label_smoothing=label_smoothing,
+#                                              )
+#     self.radius_constant = radius_constant
+#
+#   def call(self, y_true, y_pred):
+#     """Compute loss
+#
+#         Args:
+#           y_true: Ground truth values.
+#           y_pred: The predicted values.
+#
+#         Returns:
+#           Loss values per sample.
+#       """
+#     loss = super()
+#     loss = loss * self.C
+#     return loss
+#
+#   def radius(self):
+#     """See super class.
+#     """
+#     return self.radius_constant / self.reg_lambda
+#
+#   def gamma(self):
+#     """See super class.
+#     """
+#     return self.reg_lambda
+#
+#   def beta(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda
+#
+#   def lipchitz_constant(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda * self.radius()
+#
+#   def kernel_regularizer(self):
+#     """
+#       l2 loss using reg_lambda as the l2 term (as desired). Required for
+#       this loss function to be strongly convex.
+#     :return:
+#     """
+#     return L1L2(l2=self.reg_lambda)
+#
+#
+# class StrongConvexCategoricalCrossentropy(
+#     losses.CategoricalCrossentropy,
+#     StrongConvexMixin
+# ):
+#   """
+#   Strong Convex version of CategoricalCrossentropy loss using l2 weight
+#   regularization.
+#   """
+#
+#   def __init__(self,
+#                reg_lambda: float,
+#                C: float,
+#                radius_constant: float,
+#                from_logits: bool = True,
+#                label_smoothing: float = 0,
+#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+#                name: str = 'binarycrossentropy',
+#                dtype=tf.float32):
+#     """
+#     Args:
+#       reg_lambda: Weight regularization constant
+#       C: Penalty parameter C of the loss term
+#       radius_constant: constant defining the length of the radius
+#       reduction: reduction type to use. See super class
+#       label_smoothing: amount of smoothing to perform on labels
+#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
+#       name: Name of the loss instance
+#       dtype: tf datatype to use for tensor conversions.
+#     """
+#     if reg_lambda <= 0:
+#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
+#     if C <= 0:
+#       raise ValueError('c: {0}, should be >= 0'.format(C))
+#     if radius_constant <= 0:
+#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
+#         radius_constant
+#       ))
+#
+#     self.C = C
+#     self.dtype = dtype
+#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
+#     super(StrongConvexHuber, self).__init__(reduction=reduction,
+#                                              name=name,
+#                                              from_logits=from_logits,
+#                                              label_smoothing=label_smoothing,
+#                                              )
+#     self.radius_constant = radius_constant
+#
+#   def call(self, y_true, y_pred):
+#     """Compute loss
+#
+#         Args:
+#           y_true: Ground truth values.
+#           y_pred: The predicted values.
+#
+#         Returns:
+#           Loss values per sample.
+#       """
+#     loss = super()
+#     loss = loss * self.C
+#     return loss
+#
+#   def radius(self):
+#     """See super class.
+#     """
+#     return self.radius_constant / self.reg_lambda
+#
+#   def gamma(self):
+#     """See super class.
+#     """
+#     return self.reg_lambda
+#
+#   def beta(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda
+#
+#   def lipchitz_constant(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda * self.radius()
+#
+#   def kernel_regularizer(self):
+#     """
+#       l2 loss using reg_lambda as the l2 term (as desired). Required for
+#       this loss function to be strongly convex.
+#     :return:
+#     """
+#     return L1L2(l2=self.reg_lambda)
diff --git a/privacy/bolton/loss_test.py b/privacy/bolton/loss_test.py
index 87669fd..bb7dc53 100644
--- a/privacy/bolton/loss_test.py
+++ b/privacy/bolton/loss_test.py
@@ -1,3 +1,325 @@
+# Copyright 2018, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Unit testing for loss.py"""
+
 from __future__ import absolute_import
 from __future__ import division
-from __future__ import print_function
\ No newline at end of file
+from __future__ import print_function
+
+import tensorflow as tf
+from tensorflow.python.platform import test
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras.optimizer_v2 import adam
+from tensorflow.python.keras.optimizer_v2 import adagrad
+from tensorflow.python.keras.optimizer_v2 import gradient_descent
+from tensorflow.python.keras import losses
+from tensorflow.python.framework import test_util
+from privacy.bolton import model
+from privacy.bolton.loss import StrongConvexBinaryCrossentropy
+from privacy.bolton.loss import StrongConvexHuber
+from privacy.bolton.loss import StrongConvexMixin
+from absl.testing import parameterized
+from absl.testing import absltest
+from tensorflow.python.keras.regularizers import L1L2
+
+
+class StrongConvexTests(keras_parameterized.TestCase):
+  @parameterized.named_parameters([
+      {'testcase_name': 'beta not implemented',
+       'fn': 'beta',
+       'args': [1]},
+      {'testcase_name': 'gamma not implemented',
+       'fn': 'gamma',
+       'args': []},
+      {'testcase_name': 'lipchitz not implemented',
+       'fn': 'lipchitz_constant',
+       'args': [1]},
+      {'testcase_name': 'radius not implemented',
+       'fn': 'radius',
+       'args': []},
+  ])
+  def test_not_implemented(self, fn, args):
+    with self.assertRaises(NotImplementedError):
+      loss = StrongConvexMixin()
+      getattr(loss, fn, None)(*args)
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'radius not implemented',
+       'fn': 'kernel_regularizer',
+       'args': []},
+  ])
+  def test_return_none(self, fn, args):
+    loss = StrongConvexMixin()
+    ret = getattr(loss, fn, None)(*args)
+    self.assertEqual(ret, None)
+
+
+class BinaryCrossesntropyTests(keras_parameterized.TestCase):
+  """tests for BinaryCrossesntropy StrongConvex loss"""
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'normal',
+       'reg_lambda': 1,
+       'c': 1,
+       'radius_constant': 1
+       },
+  ])
+  def test_init_params(self, reg_lambda, c, radius_constant):
+    # test valid domains for each variable
+    loss = StrongConvexBinaryCrossentropy(reg_lambda, c, radius_constant)
+    self.assertIsInstance(loss, StrongConvexBinaryCrossentropy)
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'negative c',
+       'reg_lambda': 1,
+       'c': -1,
+       'radius_constant': 1
+       },
+      {'testcase_name': 'negative radius',
+       'reg_lambda': 1,
+       'c': 1,
+       'radius_constant': -1
+       },
+      {'testcase_name': 'negative lambda',
+       'reg_lambda': -1,
+       'c': 1,
+       'radius_constant': 1
+       },
+  ])
+  def test_bad_init_params(self, reg_lambda, c, radius_constant):
+    # test valid domains for each variable
+    with self.assertRaises(ValueError):
+      loss = StrongConvexBinaryCrossentropy(reg_lambda, c, radius_constant)
+
+  @test_util.run_all_in_graph_and_eager_modes
+  @parameterized.named_parameters([
+      # [] for compatibility with tensorflow loss calculation
+      {'testcase_name': 'both positive',
+        'logits': [10000],
+        'y_true': [1],
+        'result': 0,
+      },
+      {'testcase_name': 'positive gradient negative logits',
+       'logits': [-10000],
+       'y_true': [1],
+       'result': 10000,
+       },
+      {'testcase_name': 'positivee gradient positive logits',
+       'logits': [10000],
+       'y_true': [0],
+       'result': 10000,
+       },
+      {'testcase_name': 'both negative',
+       'logits': [-10000],
+       'y_true': [0],
+       'result': 0
+       },
+  ])
+  def test_calculation(self, logits, y_true, result):
+    logits = tf.Variable(logits, False, dtype=tf.float32)
+    y_true = tf.Variable(y_true, False, dtype=tf.float32)
+    loss = StrongConvexBinaryCrossentropy(0.00001, 1, 1)
+    loss = loss(y_true, logits)
+    self.assertEqual(loss.numpy(), result)
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'beta',
+       'init_args': [1, 1, 1],
+       'fn': 'beta',
+       'args': [1],
+       'result': tf.constant(2, dtype=tf.float32)
+       },
+      {'testcase_name': 'gamma',
+       'fn': 'gamma',
+       'init_args': [1, 1, 1],
+       'args': [],
+       'result': tf.constant(1, dtype=tf.float32),
+       },
+      {'testcase_name': 'lipchitz constant',
+       'fn': 'lipchitz_constant',
+       'init_args': [1, 1, 1],
+       'args': [1],
+       'result': tf.constant(2, dtype=tf.float32),
+       },
+      {'testcase_name': 'kernel regularizer',
+       'fn': 'kernel_regularizer',
+       'init_args': [1, 1, 1],
+       'args': [],
+       'result': L1L2(l2=1),
+       },
+  ])
+  def test_fns(self, init_args, fn, args, result):
+    loss = StrongConvexBinaryCrossentropy(*init_args)
+    expected = getattr(loss, fn, lambda: 'fn not found')(*args)
+    if hasattr(expected, 'numpy') and hasattr(result, 'numpy'):  # both tensor
+      expected = expected.numpy()
+      result = result.numpy()
+    if hasattr(expected, 'l2') and hasattr(result, 'l2'):  # both l2 regularizer
+      expected = expected.l2
+      result = result.l2
+    self.assertEqual(expected, result)
+
+
+class HuberTests(keras_parameterized.TestCase):
+  """tests for BinaryCrossesntropy StrongConvex loss"""
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'normal',
+       'reg_lambda': 1,
+       'c': 1,
+       'radius_constant': 1,
+       'delta': 1,
+       },
+  ])
+  def test_init_params(self, reg_lambda, c, radius_constant, delta):
+    # test valid domains for each variable
+    loss = StrongConvexHuber(reg_lambda, c, radius_constant, delta)
+    self.assertIsInstance(loss, StrongConvexHuber)
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'negative c',
+       'reg_lambda': 1,
+       'c': -1,
+       'radius_constant': 1,
+       'delta': 1
+       },
+      {'testcase_name': 'negative radius',
+       'reg_lambda': 1,
+       'c': 1,
+       'radius_constant': -1,
+       'delta': 1
+       },
+      {'testcase_name': 'negative lambda',
+       'reg_lambda': -1,
+       'c': 1,
+       'radius_constant': 1,
+       'delta': 1
+       },
+      {'testcase_name': 'negative delta',
+       'reg_lambda': -1,
+       'c': 1,
+       'radius_constant': 1,
+       'delta': -1
+       },
+  ])
+  def test_bad_init_params(self, reg_lambda, c, radius_constant, delta):
+    # test valid domains for each variable
+    with self.assertRaises(ValueError):
+      loss = StrongConvexHuber(reg_lambda, c, radius_constant, delta)
+
+  # test the bounds and test varied delta's
+  @test_util.run_all_in_graph_and_eager_modes
+  @parameterized.named_parameters([
+      {'testcase_name': 'delta=1,y_true=1 z>1+h decision boundary',
+        'logits': 2.1,
+        'y_true': 1,
+        'delta': 1,
+        'result': 0,
+      },
+      {'testcase_name': 'delta=1,y_true=1 z<1+h decision boundary',
+       'logits': 1.9,
+       'y_true': 1,
+       'delta': 1,
+       'result': 0.01*0.25,
+       },
+      {'testcase_name': 'delta=1,y_true=1 1-z< h decision boundary',
+       'logits': 0.1,
+       'y_true': 1,
+       'delta': 1,
+       'result': 1.9**2 * 0.25,
+       },
+      {'testcase_name': 'delta=1,y_true=1 z < 1-h decision boundary',
+       'logits': -0.1,
+       'y_true': 1,
+       'delta': 1,
+       'result': 1.1,
+       },
+      {'testcase_name': 'delta=2,y_true=1 z>1+h decision boundary',
+       'logits': 3.1,
+       'y_true': 1,
+       'delta': 2,
+       'result': 0,
+       },
+      {'testcase_name': 'delta=2,y_true=1 z<1+h decision boundary',
+       'logits': 2.9,
+       'y_true': 1,
+       'delta': 2,
+       'result': 0.01*0.125,
+       },
+      {'testcase_name': 'delta=2,y_true=1 1-z < h decision boundary',
+       'logits': 1.1,
+       'y_true': 1,
+       'delta': 2,
+       'result': 1.9**2 * 0.125,
+       },
+      {'testcase_name': 'delta=2,y_true=1 z < 1-h decision boundary',
+       'logits': -1.1,
+       'y_true': 1,
+       'delta': 2,
+       'result': 2.1,
+       },
+      {'testcase_name': 'delta=1,y_true=-1 z>1+h decision boundary',
+       'logits': -2.1,
+       'y_true': -1,
+       'delta': 1,
+       'result': 0,
+       },
+  ])
+  def test_calculation(self, logits, y_true, delta, result):
+    logits = tf.Variable(logits, False, dtype=tf.float32)
+    y_true = tf.Variable(y_true, False, dtype=tf.float32)
+    loss = StrongConvexHuber(0.00001, 1, 1, delta)
+    loss = loss(y_true, logits)
+    self.assertAllClose(loss.numpy(), result)
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'beta',
+       'init_args': [1, 1, 1, 1],
+       'fn': 'beta',
+       'args': [1],
+       'result': tf.Variable(1.5, dtype=tf.float32)
+       },
+      {'testcase_name': 'gamma',
+       'fn': 'gamma',
+       'init_args': [1, 1, 1, 1],
+       'args': [],
+       'result': tf.Variable(1, dtype=tf.float32),
+       },
+      {'testcase_name': 'lipchitz constant',
+       'fn': 'lipchitz_constant',
+       'init_args': [1, 1, 1, 1],
+       'args': [1],
+       'result': tf.Variable(2, dtype=tf.float32),
+       },
+      {'testcase_name': 'kernel regularizer',
+       'fn': 'kernel_regularizer',
+       'init_args': [1, 1, 1, 1],
+       'args': [],
+       'result': L1L2(l2=1),
+       },
+  ])
+  def test_fns(self, init_args, fn, args, result):
+    loss = StrongConvexHuber(*init_args)
+    expected = getattr(loss, fn, lambda: 'fn not found')(*args)
+    if hasattr(expected, 'numpy') and hasattr(result, 'numpy'):  # both tensor
+      expected = expected.numpy()
+      result = result.numpy()
+    if hasattr(expected, 'l2') and hasattr(result, 'l2'):  # both l2 regularizer
+      expected = expected.l2
+      result = result.l2
+    self.assertEqual(expected, result)
+
+
+if __name__ == '__main__':
+  tf.test.main()
\ No newline at end of file
diff --git a/privacy/bolton/model.py b/privacy/bolton/model.py
index a600374..78ceb7c 100644
--- a/privacy/bolton/model.py
+++ b/privacy/bolton/model.py
@@ -19,11 +19,12 @@ from __future__ import print_function
 import tensorflow as tf
 from tensorflow.python.keras.models import Model
 from tensorflow.python.keras import optimizers
-from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.framework import ops as _ops
-from privacy.bolton.loss import StrongConvexLoss
+from privacy.bolton.loss import StrongConvexMixin
 from privacy.bolton.optimizer import Private
 
+_accepted_distributions = ['laplace']
+
 
 class Bolton(Model):
   """
@@ -33,12 +34,16 @@ class Bolton(Model):
   2. Projects weights to R after each batch
   3. Limits learning rate
   4. Use a strongly convex loss function (see compile)
+
+  For more details on the strong convexity requirements, see:
+  Bolt-on Differential Privacy for Scalable Stochastic Gradient
+  Descent-based Analytics by Xi Wu et. al.
   """
+
   def __init__(self,
                n_classes,
                epsilon,
                noise_distribution='laplace',
-               weights_initializer=tf.initializers.GlorotUniform(),
                seed=1,
                dtype=tf.float32
                ):
@@ -59,6 +64,7 @@ class Bolton(Model):
         2. Projects weights to R after each batch
         3. Limits learning rate
       """
+
       def on_train_batch_end(self, batch, logs=None):
         loss = self.model.loss
         self.model.optimizer.limit_learning_rate(
@@ -72,13 +78,17 @@ class Bolton(Model):
         loss = self.model.loss
         self.model._project_weights_to_r(loss.radius(), True)
 
+    if epsilon <= 0:
+      raise ValueError('Detected epsilon: {0}. '
+                       'Valid range is 0 < epsilon <inf'.format(epsilon))
+
+    if noise_distribution not in _accepted_distributions:
+      raise ValueError('Detected noise distribution: {0} not one of: {1} valid'
+                       'distributions'.format(noise_distribution,
+                                              _accepted_distributions))
+
     super(Bolton, self).__init__(name='bolton', dynamic=False)
     self.n_classes = n_classes
-    self.output_layer = tf.keras.layers.Dense(
-        self.n_classes,
-        kernel_regularizer=tf.keras.regularizers.l2(),
-        kernel_initializer=weights_initializer,
-    )
     # if we do regularization here, we require the user to re-instantiate
     # the model each time they want to
     # change lambda, unless we standardize modifying it later at .compile
@@ -87,6 +97,7 @@ class Bolton(Model):
     self.epsilon = epsilon
     self.seed = seed
     self.__in_fit = False
+    self._layers_instantiated = False
     self._callback = MyCustomCallback()
     self._dtype = dtype
 
@@ -114,15 +125,24 @@ class Bolton(Model):
     """See super class. Default optimizer used in Bolton method is SGD.
 
     """
-    if not isinstance(loss, StrongConvexLoss):
-      raise ValueError("Loss must be subclassed from StrongConvexLoss")
-    self.output_layer.kernel_regularizer.l2 = loss.reg_lambda()
+    for key, val in StrongConvexMixin.__dict__.items():
+      if callable(val) and getattr(loss, key, None) is None:
+        raise ValueError("Please ensure you are passing a valid StrongConvex "
+                         "loss that has all the required methods "
+                         "implemented. "
+                         "Required method: {0} not found".format(key))
+    if not self._layers_instantiated:  # compile may be called multiple times
+      kernel_intiializer = kwargs.get('kernel_initializer',
+                                      tf.initializers.GlorotUniform)
+      self.output_layer = tf.keras.layers.Dense(
+          self.n_classes,
+          kernel_regularizer=loss.kernel_regularizer(),
+          kernel_initializer=kernel_intiializer(),
+      )
+      self._layers_instantiated = True
+    self.output_layer.kernel_regularizer.l2 = loss.reg_lambda
     if not isinstance(optimizer, Private):
       optimizer = optimizers.get(optimizer)
-      if isinstance(self.optimizer, trackable.Trackable):
-        self._track_trackable(
-            self.optimizer, name='optimizer', overwrite=True
-        )
       optimizer = Private(optimizer)
 
     super(Bolton, self).compile(optimizer,
@@ -149,21 +169,20 @@ class Bolton(Model):
     Returns:
 
     """
+    data_size = None
     if n_samples is not None:
       data_size = n_samples
     elif hasattr(x, 'shape'):
       data_size = x.shape[0]
     elif hasattr(x, "__len__"):
       data_size = len(x)
-    else:
+    elif data_size is None:
       if n_samples is None:
         raise ValueError("Unable to detect the number of training "
                          "samples and n_smaples was None. "
                          "either pass a dataset with a .shape or "
                          "__len__ attribute or explicitly pass the "
                          "number of samples as n_smaples.")
-      data_size = n_samples
-
     for layer in self._layers:
       layer.kernel = layer.kernel + self._get_noise(
           self.noise_distribution,
@@ -294,8 +313,8 @@ class Bolton(Model):
         Calculates class weighting to be used in training. Can be on
     Args:
         class_weights: str specifying type, array giving weights, or None.
-        class_counts: If class_weights is not None, then the number of
-                        samples for each class
+        class_counts: If class_weights is not None, then an array of
+                      the number of samples for each class
         num_classes: If class_weights is not None, then the number of
                         classes.
     Returns: class_weights as 1D tensor, to be passed to model's fit method.
@@ -313,10 +332,16 @@ class Bolton(Model):
                          "or pass an array".format(class_weights,
                                                    class_keys))
       if class_counts is None:
-        raise ValueError("Class counts must be provided if using"
+        raise ValueError("Class counts must be provided if using "
                          "class_weights=%s" % class_weights)
+      class_counts_shape = tf.Variable(class_counts,
+                                       trainable=False,
+                                       dtype=self._dtype).shape
+      if len(class_counts_shape) != 1:
+        raise ValueError('class counts must be a 1D array.'
+                         'Detected: {0}'.format(class_counts_shape))
       if num_classes is None:
-        raise ValueError("Class counts must be provided if using"
+        raise ValueError("num_classes must be provided if using "
                          "class_weights=%s" % class_weights)
     elif class_weights is not None:
       if num_classes is None:
@@ -327,10 +352,13 @@ class Bolton(Model):
       class_weights = 1
     elif is_string and class_weights == 'balanced':
       num_samples = sum(class_counts)
-      class_weights = tf.Variable(
-          num_samples / (num_classes * class_counts),
-          dtype=self._dtype
-      )
+      weighted_counts = tf.dtypes.cast(tf.math.multiply(num_classes,
+                                                        class_counts,
+                                                        ),
+                                       self._dtype
+                                       )
+      class_weights = tf.Variable(num_samples, dtype=self._dtype) / \
+                      tf.Variable(weighted_counts, dtype=self._dtype)
     else:
       class_weights = _ops.convert_to_tensor_v2(class_weights)
       if len(class_weights.shape) != 1:
@@ -376,7 +404,7 @@ class Bolton(Model):
     distribution = distribution.lower()
     input_dim = self._layers[0].kernel.numpy().shape[0]
     loss = self.loss
-    if distribution == 'laplace':
+    if distribution == _accepted_distributions[0]:  # laplace
       per_class_epsilon = self.epsilon / (self.n_classes)
       l2_sensitivity = (2 *
                         loss.lipchitz_constant(self.class_weight)) / \
@@ -396,7 +424,8 @@ class Bolton(Model):
                               alpha,
                               beta=1 / beta,
                               seed=1,
-                              dtype=self._dtype)
+                              dtype=self._dtype
+                              )
       return unit_vector * gamma
-    raise NotImplementedError("distribution: {0} is not "
-                              "currently supported".format(distribution))
+    raise NotImplementedError('Noise distribution: {0} is not '
+                              'a valid distribution'.format(distribution))
diff --git a/privacy/bolton/model_test.py b/privacy/bolton/model_test.py
index 87669fd..c3ca109 100644
--- a/privacy/bolton/model_test.py
+++ b/privacy/bolton/model_test.py
@@ -1,3 +1,494 @@
+# Copyright 2018, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Unit testing for model.py"""
+
 from __future__ import absolute_import
 from __future__ import division
-from __future__ import print_function
\ No newline at end of file
+from __future__ import print_function
+
+
+import tensorflow as tf
+from tensorflow.python.platform import test
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
+from tensorflow.python.keras import losses
+from tensorflow.python.framework import ops as _ops
+from tensorflow.python.framework import test_util
+from privacy.bolton import model
+from privacy.bolton.loss import StrongConvexMixin
+from absl.testing import parameterized
+from absl.testing import absltest
+from tensorflow.python.keras.regularizers import L1L2
+
+
+class TestLoss(losses.Loss):
+  """Test loss function for testing Bolton model"""
+  def __init__(self, reg_lambda, C, radius_constant, name='test'):
+    super(TestLoss, self).__init__(name=name)
+    self.reg_lambda = reg_lambda
+    self.C = C
+    self.radius_constant = radius_constant
+
+  def radius(self):
+    """Radius of R-Ball (value to normalize weights to after each batch)
+
+    Returns: radius
+
+    """
+    return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
+
+  def gamma(self):
+    """ Gamma strongly convex
+
+    Returns: gamma
+
+    """
+    return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
+
+  def beta(self, class_weight):
+    """Beta smoothess
+
+    Args:
+      class_weight: the class weights used.
+
+    Returns: Beta
+
+    """
+    return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
+
+  def lipchitz_constant(self, class_weight):
+    """ L lipchitz continuous
+
+    Args:
+      class_weight: class weights used
+
+    Returns: L
+
+    """
+    return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
+
+  def call(self, val0, val1):
+    """Loss function that is minimized at the mean of the input points."""
+    return 0.5 * tf.reduce_sum(tf.math.squared_difference(val0, val1), axis=1)
+
+  def max_class_weight(self, class_weight):
+    if class_weight is None:
+      return 1
+
+  def kernel_regularizer(self):
+    return L1L2(l2=self.reg_lambda)
+
+
+class TestOptimizer(OptimizerV2):
+  """Test optimizer used for testing Bolton model"""
+  def __init__(self):
+    super(TestOptimizer, self).__init__('test')
+
+  def compute_gradients(self):
+    return 0
+
+  def get_config(self):
+    return {}
+
+  def _create_slots(self, var):
+    pass
+
+  def _resource_apply_dense(self, grad, handle):
+    return grad
+
+  def _resource_apply_sparse(self, grad, handle, indices):
+    return grad
+
+
+class InitTests(keras_parameterized.TestCase):
+  """tests for keras model initialization"""
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'normal',
+       'n_classes': 1,
+       'epsilon': 1,
+       'noise_distribution': 'laplace',
+       'seed': 1
+       },
+      {'testcase_name': 'extreme range',
+       'n_classes': 5,
+       'epsilon': 0.1,
+       'noise_distribution': 'laplace',
+       'seed': 10
+       },
+      {'testcase_name': 'extreme range2',
+       'n_classes': 50,
+       'epsilon': 10,
+       'noise_distribution': 'laplace',
+       'seed': 100
+       },
+  ])
+  def test_init_params(
+      self, n_classes, epsilon, noise_distribution, seed):
+    # test valid domains for each variable
+    clf = model.Bolton(n_classes,
+                       epsilon,
+                       noise_distribution,
+                       seed
+                       )
+    self.assertIsInstance(clf, model.Bolton)
+
+  @parameterized.named_parameters([
+    {'testcase_name': 'invalid noise',
+     'n_classes': 1,
+     'epsilon': 1,
+     'noise_distribution': 'not_valid',
+     'weights_initializer': tf.initializers.GlorotUniform(),
+     },
+    {'testcase_name': 'invalid epsilon',
+     'n_classes': 1,
+     'epsilon': -1,
+     'noise_distribution': 'laplace',
+     'weights_initializer': tf.initializers.GlorotUniform(),
+     },
+  ])
+  def test_bad_init_params(
+    self, n_classes, epsilon, noise_distribution, weights_initializer):
+    # test invalid domains for each variable, especially noise
+    seed = 1
+    with self.assertRaises(ValueError):
+      clf = model.Bolton(n_classes,
+                         epsilon,
+                         noise_distribution,
+                         weights_initializer,
+                         seed
+                         )
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'string compile',
+       'n_classes': 1,
+       'loss': TestLoss(1, 1, 1),
+       'optimizer': 'adam',
+       'weights_initializer': tf.initializers.GlorotUniform(),
+       },
+      {'testcase_name': 'test compile',
+       'n_classes': 100,
+       'loss': TestLoss(1, 1, 1),
+       'optimizer': TestOptimizer(),
+       'weights_initializer': tf.initializers.GlorotUniform(),
+       },
+      {'testcase_name': 'invalid weights initializer',
+       'n_classes': 1,
+       'loss': TestLoss(1, 1, 1),
+       'optimizer': TestOptimizer(),
+       'weights_initializer': 'not_valid',
+       },
+  ])
+  def test_compile(self, n_classes, loss, optimizer, weights_initializer):
+    # test compilation of valid tf.optimizer and tf.loss
+    epsilon = 1
+    noise_distribution = 'laplace'
+    with self.cached_session():
+      clf = model.Bolton(n_classes,
+                         epsilon,
+                         noise_distribution,
+                         weights_initializer
+                         )
+      clf.compile(optimizer, loss)
+      self.assertEqual(clf.loss, loss)
+
+  @parameterized.named_parameters([
+    {'testcase_name': 'Not strong loss',
+     'n_classes': 1,
+     'loss': losses.BinaryCrossentropy(),
+     'optimizer': 'adam',
+     },
+    {'testcase_name': 'Not valid optimizer',
+     'n_classes': 1,
+     'loss': TestLoss(1, 1, 1),
+     'optimizer': 'ada',
+     }
+  ])
+  def test_bad_compile(self, n_classes, loss, optimizer):
+    # test compilaton of invalid tf.optimizer and non instantiated loss.
+    epsilon = 1
+    noise_distribution = 'laplace'
+    weights_initializer = tf.initializers.GlorotUniform()
+    with self.cached_session():
+      with self.assertRaises((ValueError, AttributeError)):
+        clf = model.Bolton(n_classes,
+                           epsilon,
+                           noise_distribution,
+                           weights_initializer
+                           )
+        clf.compile(optimizer, loss)
+
+
+def _cat_dataset(n_samples, input_dim, n_classes, t='train', generator=False):
+  """
+      Creates a categorically encoded dataset (y is categorical).
+      returns the specified dataset either as a static array or as a generator.
+      Will have evenly split samples across each output class.
+      Each output class will be a different point in the input space.
+
+    Args:
+        n_samples: number of rows
+        input_dim: input dimensionality
+        n_classes: output dimensionality
+        t: one of 'train', 'val', 'test'
+        generator: False for array, True for generator
+    Returns:
+      X as (n_samples, input_dim), Y as (n_samples, n_classes)
+    """
+  x_stack = []
+  y_stack = []
+  for i_class in range(n_classes):
+    x_stack.append(
+       tf.constant(1*i_class, tf.float32, (n_samples, input_dim))
+    )
+    y_stack.append(
+        tf.constant(i_class, tf.float32, (n_samples, n_classes))
+    )
+  x_set, y_set = tf.stack(x_stack), tf.stack(y_stack)
+  if generator:
+    dataset = tf.data.Dataset.from_tensor_slices(
+      (x_set, y_set)
+    )
+    return dataset
+  return x_set, y_set
+
+def _do_fit(n_samples,
+            input_dim,
+            n_classes,
+            epsilon,
+            generator,
+            batch_size,
+            reset_n_samples,
+            optimizer,
+            loss,
+            callbacks,
+            distribution='laplace'):
+  clf = model.Bolton(n_classes,
+                     epsilon,
+                     distribution
+                     )
+  clf.compile(optimizer, loss)
+  if generator:
+    x = _cat_dataset(
+      n_samples,
+      input_dim,
+      n_classes,
+      generator=generator
+    )
+    y = None
+    # x = x.batch(batch_size)
+    x = x.shuffle(n_samples//2)
+    batch_size = None
+  else:
+    x, y = _cat_dataset(n_samples, input_dim, n_classes, generator=generator)
+  if reset_n_samples:
+    n_samples = None
+
+  if callbacks is not None:
+    callbacks = [callbacks]
+  clf.fit(x,
+          y,
+          batch_size=batch_size,
+          n_samples=n_samples,
+          callbacks=callbacks
+          )
+  return clf
+
+
+class TestCallback(tf.keras.callbacks.Callback):
+  pass
+
+
+class FitTests(keras_parameterized.TestCase):
+  """Test cases for keras model fitting"""
+
+  # @test_util.run_all_in_graph_and_eager_modes
+  @parameterized.named_parameters([
+    {'testcase_name': 'iterator fit',
+     'generator': False,
+     'reset_n_samples': True,
+     'callbacks': None
+     },
+    {'testcase_name': 'iterator fit no samples',
+     'generator': False,
+     'reset_n_samples': True,
+     'callbacks': None
+     },
+    {'testcase_name': 'generator fit',
+     'generator': True,
+     'reset_n_samples': False,
+     'callbacks': None
+     },
+    {'testcase_name': 'with callbacks',
+     'generator': True,
+     'reset_n_samples': False,
+     'callbacks': TestCallback()
+     },
+  ])
+  def test_fit(self, generator, reset_n_samples, callbacks):
+    loss = TestLoss(1, 1, 1)
+    optimizer = TestOptimizer()
+    n_classes = 2
+    input_dim = 5
+    epsilon = 1
+    batch_size = 1
+    n_samples = 10
+    clf = _do_fit(n_samples, input_dim, n_classes, epsilon, generator, batch_size,
+            reset_n_samples, optimizer, loss, callbacks)
+    self.assertEqual(hasattr(clf, '_layers'), True)
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'generator fit',
+       'generator': True,
+       'reset_n_samples': False,
+       'callbacks': None
+       },
+  ])
+  def test_fit_gen(self, generator, reset_n_samples, callbacks):
+    loss = TestLoss(1, 1, 1)
+    optimizer = TestOptimizer()
+    n_classes = 2
+    input_dim = 5
+    epsilon = 1
+    batch_size = 1
+    n_samples = 10
+    clf = model.Bolton(n_classes,
+                       epsilon
+                       )
+    clf.compile(optimizer, loss)
+    x = _cat_dataset(
+      n_samples,
+      input_dim,
+      n_classes,
+      generator=generator
+    )
+    x = x.batch(batch_size)
+    x = x.shuffle(n_samples // 2)
+    clf.fit_generator(x, n_samples=n_samples)
+    self.assertEqual(hasattr(clf, '_layers'), True)
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'iterator no n_samples',
+       'generator': True,
+       'reset_n_samples': True,
+       'distribution': 'laplace'
+       },
+      {'testcase_name': 'invalid distribution',
+       'generator': True,
+       'reset_n_samples': True,
+       'distribution': 'not_valid'
+       },
+  ])
+  def test_bad_fit(self, generator, reset_n_samples, distribution):
+    with self.assertRaises(ValueError):
+      loss = TestLoss(1, 1, 1)
+      optimizer = TestOptimizer()
+      n_classes = 2
+      input_dim = 5
+      epsilon = 1
+      batch_size = 1
+      n_samples = 10
+      _do_fit(n_samples, input_dim, n_classes, epsilon, generator, batch_size,
+              reset_n_samples, optimizer, loss, None, distribution)
+
+  @parameterized.named_parameters([
+    {'testcase_name': 'None class_weights',
+     'class_weights': None,
+     'class_counts': None,
+     'num_classes': None,
+     'result': 1},
+    {'testcase_name': 'class weights array',
+     'class_weights': [1, 1],
+     'class_counts': [1, 1],
+     'num_classes': 2,
+     'result': [1, 1]},
+    {'testcase_name': 'class weights balanced',
+     'class_weights': 'balanced',
+     'class_counts': [1, 1],
+     'num_classes': 2,
+     'result': [1, 1]},
+  ])
+  def test_class_calculate(self,
+                           class_weights,
+                           class_counts,
+                           num_classes,
+                           result
+              ):
+    clf = model.Bolton(1, 1)
+    expected = clf.calculate_class_weights(class_weights,
+                                           class_counts,
+                                           num_classes
+                                           )
+
+    if hasattr(expected, 'numpy'):
+      expected = expected.numpy()
+    self.assertAllEqual(
+        expected,
+        result
+    )
+  @parameterized.named_parameters([
+      {'testcase_name': 'class weight not valid str',
+       'class_weights': 'not_valid',
+       'class_counts': 1,
+       'num_classes': 1,
+       'err_msg': "Detected string class_weights with value: not_valid"},
+      {'testcase_name': 'no class counts',
+       'class_weights': 'balanced',
+       'class_counts': None,
+       'num_classes': 1,
+       'err_msg':
+          "Class counts must be provided if using class_weights=balanced"},
+      {'testcase_name': 'no num classes',
+       'class_weights': 'balanced',
+       'class_counts': [1],
+       'num_classes': None,
+       'err_msg':
+          'num_classes must be provided if using class_weights=balanced'},
+      {'testcase_name': 'class counts not array',
+       'class_weights': 'balanced',
+       'class_counts': 1,
+       'num_classes': None,
+       'err_msg': 'class counts must be a 1D array.'},
+      {'testcase_name': 'class counts array, no num classes',
+       'class_weights': [1],
+       'class_counts': None,
+       'num_classes': None,
+       'err_msg': "You must pass a value for num_classes if"
+                  "creating an array of class_weights"},
+      {'testcase_name': 'class counts array, improper shape',
+       'class_weights': [[1], [1]],
+       'class_counts': None,
+       'num_classes': 2,
+       'err_msg': "Detected class_weights shape"},
+      {'testcase_name': 'class counts array, wrong number classes',
+       'class_weights': [1, 1, 1],
+       'class_counts': None,
+       'num_classes': 2,
+       'err_msg': "Detected array length:"},
+  ])
+  def test_class_errors(self,
+                        class_weights,
+                        class_counts,
+                        num_classes,
+                        err_msg):
+    clf = model.Bolton(1, 1)
+    with self.assertRaisesRegexp(ValueError, err_msg):
+      expected = clf.calculate_class_weights(class_weights,
+                                             class_counts,
+                                             num_classes
+                                             )
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/privacy/bolton/optimizer.py b/privacy/bolton/optimizer.py
index f8af390..3b836ee 100644
--- a/privacy/bolton/optimizer.py
+++ b/privacy/bolton/optimizer.py
@@ -29,6 +29,10 @@ class Private(optimizer_v2.OptimizerV2):
     as the visible optimizer to the tf model. No matter the optimizer
     passed, "Private" enables the bolton model to control the learning rate
     based on the strongly convex loss.
+
+    For more details on the strong convexity requirements, see:
+    Bolt-on Differential Privacy for Scalable Stochastic Gradient
+    Descent-based Analytics by Xi Wu et. al.
   """
   def __init__(self,
                optimizer: optimizer_v2.OptimizerV2,
@@ -76,13 +80,10 @@ class Private(optimizer_v2.OptimizerV2):
       else:
         self.learning_rate = numerator / (gamma * t)
 
-  def from_config(self, config, custom_objects=None):
+  def from_config(self, *args, **kwargs):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    return self._internal_optimizer.from_config(
-        config,
-        custom_objects=custom_objects
-    )
+    return self._internal_optimizer.from_config(*args, **kwargs)
 
   def __getattr__(self, name):
     """return _internal_optimizer off self instance, and everything else
@@ -116,58 +117,37 @@ class Private(optimizer_v2.OptimizerV2):
     else:
       setattr(self._internal_optimizer, key, value)
 
-  def _resource_apply_dense(self, grad, handle):
+  def _resource_apply_dense(self, *args, **kwargs):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    return self._internal_optimizer._resource_apply_dense(grad, handle)
+    return self._internal_optimizer._resource_apply_dense(*args, **kwargs)
 
-  def _resource_apply_sparse(self, grad, handle, indices):
+  def _resource_apply_sparse(self, *args, **kwargs):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    return self._internal_optimizer._resource_apply_sparse(
-        grad,
-        handle,
-        indices
-    )
+    return self._internal_optimizer._resource_apply_sparse(*args, **kwargs)
 
   def get_updates(self, loss, params):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
     return self._internal_optimizer.get_updates(loss, params)
 
-  def apply_gradients(self, grads_and_vars, name: str = None):
+  def apply_gradients(self, *args, **kwargs):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    return self._internal_optimizer.apply_gradients(
-        grads_and_vars,
-        name=name
-    )
+    return self._internal_optimizer.apply_gradients(*args, **kwargs)
 
-  def minimize(self,
-               loss,
-               var_list,
-               grad_loss: bool = None,
-               name: str = None
-               ):
+  def minimize(self, *args, **kwargs):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    return self._internal_optimizer.minimize(
-        loss,
-        var_list,
-        grad_loss,
-        name
-    )
+    return self._internal_optimizer.minimize(*args, **kwargs)
 
-  def _compute_gradients(self, loss, var_list, grad_loss=None):
+  def _compute_gradients(self, *args, **kwargs):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    return self._internal_optimizer._compute_gradients(
-        loss,
-        var_list,
-        grad_loss=grad_loss
-    )
+    return self._internal_optimizer._compute_gradients(*args, **kwargs)
 
-  def get_gradients(self, loss, params):
+  def get_gradients(self, *args, **kwargs):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    return self._internal_optimizer.get_gradients(loss, params)
+    return self._internal_optimizer.get_gradients(*args, **kwargs)
diff --git a/privacy/bolton/optimizer_test.py b/privacy/bolton/optimizer_test.py
index ec8de48..5a0c67d 100644
--- a/privacy/bolton/optimizer_test.py
+++ b/privacy/bolton/optimizer_test.py
@@ -1,9 +1,182 @@
+# Copyright 2018, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Unit testing for optimizer.py"""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
 import tensorflow as tf
 from tensorflow.python.platform import test
+from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
 from tensorflow.python.keras import keras_parameterized
 from privacy.bolton import model
+from privacy.bolton import optimizer as opt
+from absl.testing import parameterized
+from absl.testing import absltest
 
+
+class TestOptimizer(OptimizerV2):
+  """Optimizer used for testing the Private optimizer"""
+  def __init__(self):
+    super(TestOptimizer, self).__init__('test')
+    self.not_private = 'test'
+    self.iterations = tf.Variable(1, dtype=tf.float32)
+    self._iterations = tf.Variable(1, dtype=tf.float32)
+
+  def _compute_gradients(self, loss, var_list, grad_loss=None):
+    return 'test'
+
+  def get_config(self):
+    return 'test'
+
+  def from_config(cls, config, custom_objects=None):
+    return 'test'
+
+  def _create_slots(self):
+    return 'test'
+
+  def _resource_apply_dense(self, grad, handle):
+    return 'test'
+
+  def _resource_apply_sparse(self, grad, handle, indices):
+    return 'test'
+
+  def get_updates(self, loss, params):
+    return 'test'
+
+  def apply_gradients(self, grads_and_vars, name=None):
+    return 'test'
+
+  def minimize(self, loss, var_list, grad_loss=None, name=None):
+    return 'test'
+
+  def get_gradients(self, loss, params):
+    return 'test'
+
+class PrivateTest(keras_parameterized.TestCase):
+  """Private Optimizer tests"""
+  @parameterized.named_parameters([
+      {'testcase_name': 'branch True, beta',
+       'fn': 'limit_learning_rate',
+       'args': [True,
+                tf.Variable(2, dtype=tf.float32),
+                tf.Variable(1, dtype=tf.float32)],
+       'result': tf.Variable(0.5, dtype=tf.float32),
+       'test_attr': 'learning_rate'},
+      {'testcase_name': 'branch True, gamma',
+       'fn': 'limit_learning_rate',
+       'args': [True,
+                tf.Variable(1, dtype=tf.float32),
+                tf.Variable(1, dtype=tf.float32)],
+       'result': tf.Variable(1, dtype=tf.float32),
+       'test_attr': 'learning_rate'},
+      {'testcase_name': 'branch False, beta',
+       'fn': 'limit_learning_rate',
+       'args': [False,
+                tf.Variable(2, dtype=tf.float32),
+                tf.Variable(1, dtype=tf.float32)],
+       'result': tf.Variable(0.5, dtype=tf.float32),
+       'test_attr': 'learning_rate'},
+      {'testcase_name': 'branch False, gamma',
+       'fn': 'limit_learning_rate',
+       'args': [False,
+                tf.Variable(1, dtype=tf.float32),
+                tf.Variable(1, dtype=tf.float32)],
+       'result': tf.Variable(1, dtype=tf.float32),
+       'test_attr': 'learning_rate'},
+      {'testcase_name': 'getattr',
+       'fn': '__getattr__',
+       'args': ['dtype'],
+       'result': tf.float32,
+       'test_attr': None},
+  ])
+  def test_fn(self, fn, args, result, test_attr):
+    private = opt.Private(TestOptimizer())
+    res = getattr(private, fn, None)(*args)
+    if test_attr is not None:
+      res = getattr(private, test_attr, None)
+    if hasattr(res, 'numpy') and hasattr(result, 'numpy'):  # both tensors/not
+      res = res.numpy()
+      result = result.numpy()
+    self.assertEqual(res, result)
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'fn: get_updates',
+       'fn': 'get_updates',
+       'args': [0, 0]},
+      {'testcase_name': 'fn: get_config',
+       'fn': 'get_config',
+       'args': []},
+      {'testcase_name': 'fn: from_config',
+       'fn': 'from_config',
+       'args': [0]},
+      {'testcase_name': 'fn: _resource_apply_dense',
+       'fn': '_resource_apply_dense',
+       'args': [1, 1]},
+      {'testcase_name': 'fn: _resource_apply_sparse',
+       'fn': '_resource_apply_sparse',
+       'args': [1, 1, 1]},
+      {'testcase_name': 'fn: apply_gradients',
+       'fn': 'apply_gradients',
+       'args': [1]},
+      {'testcase_name': 'fn: minimize',
+       'fn': 'minimize',
+       'args': [1, 1]},
+      {'testcase_name': 'fn: _compute_gradients',
+       'fn': '_compute_gradients',
+       'args': [1, 1]},
+      {'testcase_name': 'fn: get_gradients',
+       'fn': 'get_gradients',
+       'args': [1, 1]},
+  ])
+  def test_rerouted_function(self, fn, args):
+    optimizer = TestOptimizer()
+    optimizer = opt.Private(optimizer)
+    self.assertEqual(
+        getattr(optimizer, fn, lambda: 'fn not found')(*args),
+        'test'
+    )
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'fn: limit_learning_rate',
+       'fn': 'limit_learning_rate',
+       'args': [1, 1, 1]}
+  ])
+  def test_not_reroute_fn(self, fn, args):
+    optimizer = TestOptimizer()
+    optimizer = opt.Private(optimizer)
+    self.assertNotEqual(getattr(optimizer, fn, lambda: 'test')(*args),
+                        'test')
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'attr: not_private',
+       'attr': 'not_private'}
+  ])
+  def test_reroute_attr(self, attr):
+    internal_optimizer = TestOptimizer()
+    optimizer = opt.Private(internal_optimizer)
+    self.assertEqual(optimizer._internal_optimizer, internal_optimizer)
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'attr: _internal_optimizer',
+       'attr': '_internal_optimizer'}
+  ])
+  def test_not_reroute_attr(self, attr):
+    internal_optimizer = TestOptimizer()
+    optimizer = opt.Private(internal_optimizer)
+    self.assertEqual(optimizer._internal_optimizer, internal_optimizer)
+
+if __name__ == '__main__':
+  test.main()
\ No newline at end of file

From ec18db5ec5d34922af90d39a94be365a9a4f5685 Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Thu, 13 Jun 2019 01:01:31 -0400
Subject: [PATCH 03/39] Working bolton model without unit tests. -- moving to
 Bolton Optimizer Model is now just a convenient wrapper and example for
 users. Optimizer holds ALL Bolton privacy requirements. Optimizer is used as
 a context manager, and must be passed the model's layers. Unit tests
 incomplete, committing for visibility into the design.

---
 privacy/bolton/loss_test.py      | 110 ++++++--
 privacy/bolton/model.py          | 448 ++++++++++++++++++++-----------
 privacy/bolton/model_test.py     | 190 +++++++------
 privacy/bolton/optimizer.py      | 254 ++++++++++++++++--
 privacy/bolton/optimizer_test.py | 256 ++++++++++++++----
 5 files changed, 913 insertions(+), 345 deletions(-)

diff --git a/privacy/bolton/loss_test.py b/privacy/bolton/loss_test.py
index bb7dc53..ddb4861 100644
--- a/privacy/bolton/loss_test.py
+++ b/privacy/bolton/loss_test.py
@@ -18,23 +18,17 @@ from __future__ import division
 from __future__ import print_function
 
 import tensorflow as tf
-from tensorflow.python.platform import test
 from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.optimizer_v2 import adam
-from tensorflow.python.keras.optimizer_v2 import adagrad
-from tensorflow.python.keras.optimizer_v2 import gradient_descent
-from tensorflow.python.keras import losses
 from tensorflow.python.framework import test_util
-from privacy.bolton import model
+from tensorflow.python.keras.regularizers import L1L2
+from absl.testing import parameterized
 from privacy.bolton.loss import StrongConvexBinaryCrossentropy
 from privacy.bolton.loss import StrongConvexHuber
 from privacy.bolton.loss import StrongConvexMixin
-from absl.testing import parameterized
-from absl.testing import absltest
-from tensorflow.python.keras.regularizers import L1L2
 
 
-class StrongConvexTests(keras_parameterized.TestCase):
+class StrongConvexMixinTests(keras_parameterized.TestCase):
+  """Tests for the StrongConvexMixin"""
   @parameterized.named_parameters([
       {'testcase_name': 'beta not implemented',
        'fn': 'beta',
@@ -50,6 +44,12 @@ class StrongConvexTests(keras_parameterized.TestCase):
        'args': []},
   ])
   def test_not_implemented(self, fn, args):
+    """Test that the given fn's are not implemented on the mixin.
+
+    Args:
+      fn: fn on Mixin to test
+      args: arguments to fn of Mixin
+    """
     with self.assertRaises(NotImplementedError):
       loss = StrongConvexMixin()
       getattr(loss, fn, None)(*args)
@@ -60,6 +60,12 @@ class StrongConvexTests(keras_parameterized.TestCase):
        'args': []},
   ])
   def test_return_none(self, fn, args):
+    """Test that fn of Mixin returns None
+
+    Args:
+      fn: fn of Mixin to test
+      args: arguments to fn of Mixin
+    """
     loss = StrongConvexMixin()
     ret = getattr(loss, fn, None)(*args)
     self.assertEqual(ret, None)
@@ -71,44 +77,56 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
   @parameterized.named_parameters([
       {'testcase_name': 'normal',
        'reg_lambda': 1,
-       'c': 1,
+       'C': 1,
        'radius_constant': 1
        },
   ])
-  def test_init_params(self, reg_lambda, c, radius_constant):
+  def test_init_params(self, reg_lambda, C, radius_constant):
+    """Test initialization for given arguments
+    Args:
+      reg_lambda: initialization value for reg_lambda arg
+      C: initialization value for C arg
+      radius_constant: initialization value for radius_constant arg
+    """
     # test valid domains for each variable
-    loss = StrongConvexBinaryCrossentropy(reg_lambda, c, radius_constant)
+    loss = StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
     self.assertIsInstance(loss, StrongConvexBinaryCrossentropy)
 
   @parameterized.named_parameters([
       {'testcase_name': 'negative c',
        'reg_lambda': 1,
-       'c': -1,
+       'C': -1,
        'radius_constant': 1
        },
       {'testcase_name': 'negative radius',
        'reg_lambda': 1,
-       'c': 1,
+       'C': 1,
        'radius_constant': -1
        },
       {'testcase_name': 'negative lambda',
        'reg_lambda': -1,
-       'c': 1,
+       'C': 1,
        'radius_constant': 1
        },
   ])
-  def test_bad_init_params(self, reg_lambda, c, radius_constant):
+  def test_bad_init_params(self, reg_lambda, C, radius_constant):
+    """Test invalid domain for given params. Should return ValueError
+    Args:
+      reg_lambda: initialization value for reg_lambda arg
+      C: initialization value for C arg
+      radius_constant: initialization value for radius_constant arg
+    """
     # test valid domains for each variable
     with self.assertRaises(ValueError):
-      loss = StrongConvexBinaryCrossentropy(reg_lambda, c, radius_constant)
+      StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
 
   @test_util.run_all_in_graph_and_eager_modes
   @parameterized.named_parameters([
       # [] for compatibility with tensorflow loss calculation
       {'testcase_name': 'both positive',
-        'logits': [10000],
-        'y_true': [1],
-        'result': 0,
+       'logits': [10000],
+       'y_true': [1],
+       'result': 0,
       },
       {'testcase_name': 'positive gradient negative logits',
        'logits': [-10000],
@@ -127,6 +145,12 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
        },
   ])
   def test_calculation(self, logits, y_true, result):
+    """Test the call method to ensure it returns the correct value
+    Args:
+      logits: unscaled output of model
+      y_true: label
+      result: correct loss calculation value
+    """
     logits = tf.Variable(logits, False, dtype=tf.float32)
     y_true = tf.Variable(y_true, False, dtype=tf.float32)
     loss = StrongConvexBinaryCrossentropy(0.00001, 1, 1)
@@ -160,6 +184,13 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
        },
   ])
   def test_fns(self, init_args, fn, args, result):
+    """Test that fn of BinaryCrossentropy loss returns the correct result
+    Args:
+      init_args: init values for loss instance
+      fn: the fn to test
+      args: the arguments to above function
+      result: the correct result from the fn
+    """
     loss = StrongConvexBinaryCrossentropy(*init_args)
     expected = getattr(loss, fn, lambda: 'fn not found')(*args)
     if hasattr(expected, 'numpy') and hasattr(result, 'numpy'):  # both tensor
@@ -183,6 +214,12 @@ class HuberTests(keras_parameterized.TestCase):
        },
   ])
   def test_init_params(self, reg_lambda, c, radius_constant, delta):
+    """Test initialization for given arguments
+    Args:
+      reg_lambda: initialization value for reg_lambda arg
+      C: initialization value for C arg
+      radius_constant: initialization value for radius_constant arg
+    """
     # test valid domains for each variable
     loss = StrongConvexHuber(reg_lambda, c, radius_constant, delta)
     self.assertIsInstance(loss, StrongConvexHuber)
@@ -214,18 +251,24 @@ class HuberTests(keras_parameterized.TestCase):
        },
   ])
   def test_bad_init_params(self, reg_lambda, c, radius_constant, delta):
+    """Test invalid domain for given params. Should return ValueError
+    Args:
+      reg_lambda: initialization value for reg_lambda arg
+      C: initialization value for C arg
+      radius_constant: initialization value for radius_constant arg
+    """
     # test valid domains for each variable
     with self.assertRaises(ValueError):
-      loss = StrongConvexHuber(reg_lambda, c, radius_constant, delta)
+      StrongConvexHuber(reg_lambda, c, radius_constant, delta)
 
   # test the bounds and test varied delta's
   @test_util.run_all_in_graph_and_eager_modes
   @parameterized.named_parameters([
       {'testcase_name': 'delta=1,y_true=1 z>1+h decision boundary',
-        'logits': 2.1,
-        'y_true': 1,
-        'delta': 1,
-        'result': 0,
+       'logits': 2.1,
+       'y_true': 1,
+       'delta': 1,
+       'result': 0,
       },
       {'testcase_name': 'delta=1,y_true=1 z<1+h decision boundary',
        'logits': 1.9,
@@ -277,6 +320,12 @@ class HuberTests(keras_parameterized.TestCase):
        },
   ])
   def test_calculation(self, logits, y_true, delta, result):
+    """Test the call method to ensure it returns the correct value
+    Args:
+      logits: unscaled output of model
+      y_true: label
+      result: correct loss calculation value
+    """
     logits = tf.Variable(logits, False, dtype=tf.float32)
     y_true = tf.Variable(y_true, False, dtype=tf.float32)
     loss = StrongConvexHuber(0.00001, 1, 1, delta)
@@ -310,6 +359,13 @@ class HuberTests(keras_parameterized.TestCase):
        },
   ])
   def test_fns(self, init_args, fn, args, result):
+    """Test that fn of BinaryCrossentropy loss returns the correct result
+    Args:
+      init_args: init values for loss instance
+      fn: the fn to test
+      args: the arguments to above function
+      result: the correct result from the fn
+    """
     loss = StrongConvexHuber(*init_args)
     expected = getattr(loss, fn, lambda: 'fn not found')(*args)
     if hasattr(expected, 'numpy') and hasattr(result, 'numpy'):  # both tensor
@@ -322,4 +378,4 @@ class HuberTests(keras_parameterized.TestCase):
 
 
 if __name__ == '__main__':
-  tf.test.main()
\ No newline at end of file
+  tf.test.main()
diff --git a/privacy/bolton/model.py b/privacy/bolton/model.py
index 78ceb7c..a6731fe 100644
--- a/privacy/bolton/model.py
+++ b/privacy/bolton/model.py
@@ -21,12 +21,12 @@ from tensorflow.python.keras.models import Model
 from tensorflow.python.keras import optimizers
 from tensorflow.python.framework import ops as _ops
 from privacy.bolton.loss import StrongConvexMixin
-from privacy.bolton.optimizer import Private
+from privacy.bolton.optimizer import Bolton
 
 _accepted_distributions = ['laplace']
 
 
-class Bolton(Model):
+class BoltonModel(Model):
   """
   Bolton episilon-delta model
   Uses 4 key steps to achieve privacy guarantees:
@@ -42,8 +42,7 @@ class Bolton(Model):
 
   def __init__(self,
                n_classes,
-               epsilon,
-               noise_distribution='laplace',
+               # noise_distribution='laplace',
                seed=1,
                dtype=tf.float32
                ):
@@ -58,47 +57,22 @@ class Bolton(Model):
         dtype: data type to use for tensors
     """
 
-    class MyCustomCallback(tf.keras.callbacks.Callback):
-      """Custom callback for bolton training requirements.
-        Implements steps (see Bolton class):
-        2. Projects weights to R after each batch
-        3. Limits learning rate
-      """
-
-      def on_train_batch_end(self, batch, logs=None):
-        loss = self.model.loss
-        self.model.optimizer.limit_learning_rate(
-            self.model.run_eagerly,
-            loss.beta(self.model.class_weight),
-            loss.gamma()
-        )
-        self.model._project_weights_to_r(loss.radius(), False)
-
-      def on_train_end(self, logs=None):
-        loss = self.model.loss
-        self.model._project_weights_to_r(loss.radius(), True)
-
-    if epsilon <= 0:
-      raise ValueError('Detected epsilon: {0}. '
-                       'Valid range is 0 < epsilon <inf'.format(epsilon))
-
-    if noise_distribution not in _accepted_distributions:
-      raise ValueError('Detected noise distribution: {0} not one of: {1} valid'
-                       'distributions'.format(noise_distribution,
-                                              _accepted_distributions))
-
-    super(Bolton, self).__init__(name='bolton', dynamic=False)
+    # if noise_distribution not in _accepted_distributions:
+    #   raise ValueError('Detected noise distribution: {0} not one of: {1} valid'
+    #                    'distributions'.format(noise_distribution,
+    #                                           _accepted_distributions))
+    # if epsilon <= 0:
+    #   raise ValueError('Detected epsilon: {0}. '
+    #                    'Valid range is 0 < epsilon <inf'.format(epsilon))
+    # self.epsilon = epsilon
+    super(BoltonModel, self).__init__(name='bolton', dynamic=False)
     self.n_classes = n_classes
-    # if we do regularization here, we require the user to re-instantiate
-    # the model each time they want to
-    # change lambda, unless we standardize modifying it later at .compile
     self.force = False
-    self.noise_distribution = noise_distribution
-    self.epsilon = epsilon
+    # self.noise_distribution = noise_distribution
     self.seed = seed
     self.__in_fit = False
     self._layers_instantiated = False
-    self._callback = MyCustomCallback()
+    # self._callback = MyCustomCallback()
     self._dtype = dtype
 
   def call(self, inputs):
@@ -139,55 +113,65 @@ class Bolton(Model):
           kernel_regularizer=loss.kernel_regularizer(),
           kernel_initializer=kernel_intiializer(),
       )
+      # if we don't do regularization here, we require the user to
+      # re-instantiate the model each time they want to change the penalty
+      # weighting
       self._layers_instantiated = True
     self.output_layer.kernel_regularizer.l2 = loss.reg_lambda
-    if not isinstance(optimizer, Private):
+    if not isinstance(optimizer, Bolton):
       optimizer = optimizers.get(optimizer)
-      optimizer = Private(optimizer)
+      optimizer = Bolton(optimizer, loss)
 
-    super(Bolton, self).compile(optimizer,
-                                loss=loss,
-                                metrics=metrics,
-                                loss_weights=loss_weights,
-                                sample_weight_mode=sample_weight_mode,
-                                weighted_metrics=weighted_metrics,
-                                target_tensors=target_tensors,
-                                distribute=distribute,
-                                **kwargs
-                                )
+    super(BoltonModel, self).compile(optimizer,
+                                     loss=loss,
+                                     metrics=metrics,
+                                     loss_weights=loss_weights,
+                                     sample_weight_mode=sample_weight_mode,
+                                     weighted_metrics=weighted_metrics,
+                                     target_tensors=target_tensors,
+                                     distribute=distribute,
+                                     **kwargs
+                                     )
 
-  def _post_fit(self, x, n_samples):
-    """Implements 1-time weight changes needed for Bolton method.
-    In this case, specifically implements the noise addition
-    assuming a strongly convex function.
-
-    Args:
-        x: inputs
-        n_samples: number of samples in the inputs. In case the number
-        cannot be readily determined by inspecting x.
-
-    Returns:
-
-    """
-    data_size = None
-    if n_samples is not None:
-      data_size = n_samples
-    elif hasattr(x, 'shape'):
-      data_size = x.shape[0]
-    elif hasattr(x, "__len__"):
-      data_size = len(x)
-    elif data_size is None:
-      if n_samples is None:
-        raise ValueError("Unable to detect the number of training "
-                         "samples and n_smaples was None. "
-                         "either pass a dataset with a .shape or "
-                         "__len__ attribute or explicitly pass the "
-                         "number of samples as n_smaples.")
-    for layer in self._layers:
-      layer.kernel = layer.kernel + self._get_noise(
-          self.noise_distribution,
-          data_size
-      )
+  # def _post_fit(self, x, n_samples):
+  #   """Implements 1-time weight changes needed for Bolton method.
+  #   In this case, specifically implements the noise addition
+  #   assuming a strongly convex function.
+  #
+  #   Args:
+  #       x: inputs
+  #       n_samples: number of samples in the inputs. In case the number
+  #       cannot be readily determined by inspecting x.
+  #
+  #   Returns:
+  #
+  #   """
+  #   data_size = None
+  #   if n_samples is not None:
+  #     data_size = n_samples
+  #   elif hasattr(x, 'shape'):
+  #     data_size = x.shape[0]
+  #   elif hasattr(x, "__len__"):
+  #     data_size = len(x)
+  #   elif data_size is None:
+  #     if n_samples is None:
+  #       raise ValueError("Unable to detect the number of training "
+  #                        "samples and n_smaples was None. "
+  #                        "either pass a dataset with a .shape or "
+  #                        "__len__ attribute or explicitly pass the "
+  #                        "number of samples as n_smaples.")
+  #   for layer in self.layers:
+  #     # layer.kernel = layer.kernel + self._get_noise(
+  #     #     data_size
+  #     # )
+  #     input_dim = layer.kernel.numpy().shape[0]
+  #     layer.kernel = layer.kernel + self.optimizer.get_noise(
+  #         self.loss,
+  #         data_size,
+  #         input_dim,
+  #         self.n_classes,
+  #         self.class_weight
+  #     )
 
   def fit(self,
           x=None,
@@ -209,6 +193,8 @@ class Bolton(Model):
           workers=1,
           use_multiprocessing=False,
           n_samples=None,
+          epsilon=2,
+          noise_distribution='laplace',
           **kwargs):
     """Reroutes to super fit with additional Bolton delta-epsilon privacy
     requirements implemented. Note, inputs must be normalized s.t. ||x|| < 1
@@ -226,35 +212,40 @@ class Bolton(Model):
 
     """
     self.__in_fit = True
-    cb = [self._callback]
-    if callbacks is not None:
-      cb.extend(callbacks)
-    callbacks = cb
+    # cb = [self.optimizer.callbacks]
+    # if callbacks is not None:
+    #   cb.extend(callbacks)
+    # callbacks = cb
     if class_weight is None:
       class_weight = self.calculate_class_weights(class_weight)
-    self.class_weight = class_weight
-    out = super(Bolton, self).fit(x=x,
-                                  y=y,
-                                  batch_size=batch_size,
-                                  epochs=epochs,
-                                  verbose=verbose,
-                                  callbacks=callbacks,
-                                  validation_split=validation_split,
-                                  validation_data=validation_data,
-                                  shuffle=shuffle,
-                                  class_weight=class_weight,
-                                  sample_weight=sample_weight,
-                                  initial_epoch=initial_epoch,
-                                  steps_per_epoch=steps_per_epoch,
-                                  validation_steps=validation_steps,
-                                  validation_freq=validation_freq,
-                                  max_queue_size=max_queue_size,
-                                  workers=workers,
-                                  use_multiprocessing=use_multiprocessing,
-                                  **kwargs
-                                  )
-    self._post_fit(x, n_samples)
-    self.__in_fit = False
+    # self.class_weight = class_weight
+    with self.optimizer(noise_distribution,
+                        epsilon,
+                        self.layers,
+                        class_weight,
+                        n_samples,
+                        self.n_classes,
+                        ) as optim:
+      out = super(BoltonModel, self).fit(x=x,
+                                         y=y,
+                                         batch_size=batch_size,
+                                         epochs=epochs,
+                                         verbose=verbose,
+                                         callbacks=callbacks,
+                                         validation_split=validation_split,
+                                         validation_data=validation_data,
+                                         shuffle=shuffle,
+                                         class_weight=class_weight,
+                                         sample_weight=sample_weight,
+                                         initial_epoch=initial_epoch,
+                                         steps_per_epoch=steps_per_epoch,
+                                         validation_steps=validation_steps,
+                                         validation_freq=validation_freq,
+                                         max_queue_size=max_queue_size,
+                                         workers=workers,
+                                         use_multiprocessing=use_multiprocessing,
+                                         **kwargs
+                                         )
     return out
 
   def fit_generator(self,
@@ -284,7 +275,7 @@ class Bolton(Model):
     if class_weight is None:
       class_weight = self.calculate_class_weights(class_weight)
     self.class_weight = class_weight
-    out = super(Bolton, self).fit_generator(
+    out = super(BoltonModel, self).fit_generator(
         generator,
         steps_per_epoch=steps_per_epoch,
         epochs=epochs,
@@ -366,66 +357,195 @@ class Bolton(Model):
                          "1D array".format(class_weights.shape))
       if class_weights.shape[0] != num_classes:
         raise ValueError(
-            "Detected array length: {0} instead of: {1}".format(
-                class_weights.shape[0],
-                num_classes
-            )
+          "Detected array length: {0} instead of: {1}".format(
+            class_weights.shape[0],
+            num_classes
+          )
         )
     return class_weights
 
-  def _project_weights_to_r(self, r, force=False):
-    """helper method to normalize the weights to the R-ball.
+  # def _project_weights_to_r(self, r, force=False):
+  #   """helper method to normalize the weights to the R-ball.
+  #
+  #   Args:
+  #       r: radius of "R-Ball". Scalar to normalize to.
+  #       force: True to normalize regardless of previous weight values.
+  #               False to check if weights > R-ball and only normalize then.
+  #
+  #   Returns:
+  #
+  #   """
+  #   for layer in self.layers:
+  #     weight_norm = tf.norm(layer.kernel, axis=0)
+  #     if force:
+  #       layer.kernel = layer.kernel / (weight_norm / r)
+  #     elif tf.reduce_sum(tf.cast(weight_norm > r, dtype=self._dtype)) > 0:
+  #       layer.kernel = layer.kernel / (weight_norm / r)
 
-    Args:
-        r: radius of "R-Ball". Scalar to normalize to.
-        force: True to normalize regardless of previous weight values.
-                False to check if weights > R-ball and only normalize then.
+  # def _get_noise(self, distribution, data_size):
+  #   """Sample noise to be added to weights for privacy guarantee
+  #
+  #   Args:
+  #       distribution: the distribution type to pull noise from
+  #       data_size: the number of samples
+  #
+  #   Returns: noise in shape of layer's weights to be added to the weights.
+  #
+  #   """
+  #   distribution = distribution.lower()
+  #   input_dim = self.layers[0].kernel.numpy().shape[0]
+  #   loss = self.loss
+  #   if distribution == _accepted_distributions[0]:  # laplace
+  #     per_class_epsilon = self.epsilon / (self.n_classes)
+  #     l2_sensitivity = (2 *
+  #                       loss.lipchitz_constant(self.class_weight)) / \
+  #                      (loss.gamma() * data_size)
+  #     unit_vector = tf.random.normal(shape=(input_dim, self.n_classes),
+  #                                    mean=0,
+  #                                    seed=1,
+  #                                    stddev=1.0,
+  #                                    dtype=self._dtype)
+  #     unit_vector = unit_vector / tf.math.sqrt(
+  #         tf.reduce_sum(tf.math.square(unit_vector), axis=0)
+  #     )
+  #
+  #     beta = l2_sensitivity / per_class_epsilon
+  #     alpha = input_dim  # input_dim
+  #     gamma = tf.random.gamma([self.n_classes],
+  #                             alpha,
+  #                             beta=1 / beta,
+  #                             seed=1,
+  #                             dtype=self._dtype
+  #                             )
+  #     return unit_vector * gamma
+  #   raise NotImplementedError('Noise distribution: {0} is not '
+  #                             'a valid distribution'.format(distribution))
 
-    Returns:
 
-    """
-    for layer in self._layers:
-      weight_norm = tf.norm(layer.kernel, axis=0)
-      if force:
-        layer.kernel = layer.kernel / (weight_norm / r)
-      elif tf.reduce_sum(tf.cast(weight_norm > r, dtype=self._dtype)) > 0:
-        layer.kernel = layer.kernel / (weight_norm / r)
+if __name__ == '__main__':
+  import tensorflow as tf
 
-  def _get_noise(self, distribution, data_size):
-    """Sample noise to be added to weights for privacy guarantee
+  import os
+  import time
+  import matplotlib.pyplot as plt
 
-    Args:
-        distribution: the distribution type to pull noise from
-        data_size: the number of samples
+  _URL = 'https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/facades.tar.gz'
 
-    Returns: noise in shape of layer's weights to be added to the weights.
+  path_to_zip = tf.keras.utils.get_file('facades.tar.gz',
+                                        origin=_URL,
+                                        extract=True)
 
-    """
-    distribution = distribution.lower()
-    input_dim = self._layers[0].kernel.numpy().shape[0]
-    loss = self.loss
-    if distribution == _accepted_distributions[0]:  # laplace
-      per_class_epsilon = self.epsilon / (self.n_classes)
-      l2_sensitivity = (2 *
-                        loss.lipchitz_constant(self.class_weight)) / \
-                       (loss.gamma() * data_size)
-      unit_vector = tf.random.normal(shape=(input_dim, self.n_classes),
-                                     mean=0,
-                                     seed=1,
-                                     stddev=1.0,
-                                     dtype=self._dtype)
-      unit_vector = unit_vector / tf.math.sqrt(
-          tf.reduce_sum(tf.math.square(unit_vector), axis=0)
-      )
+  PATH = os.path.join(os.path.dirname(path_to_zip), 'facades/')
+  BUFFER_SIZE = 400
+  BATCH_SIZE = 1
+  IMG_WIDTH = 256
+  IMG_HEIGHT = 256
 
-      beta = l2_sensitivity / per_class_epsilon
-      alpha = input_dim  # input_dim
-      gamma = tf.random.gamma([self.n_classes],
-                              alpha,
-                              beta=1 / beta,
-                              seed=1,
-                              dtype=self._dtype
-                              )
-      return unit_vector * gamma
-    raise NotImplementedError('Noise distribution: {0} is not '
-                              'a valid distribution'.format(distribution))
+
+  def load(image_file):
+    image = tf.io.read_file(image_file)
+    image = tf.image.decode_jpeg(image)
+
+    w = tf.shape(image)[1]
+
+    w = w // 2
+    real_image = image[:, :w, :]
+    input_image = image[:, w:, :]
+
+    input_image = tf.cast(input_image, tf.float32)
+    real_image = tf.cast(real_image, tf.float32)
+
+    return input_image, real_image
+
+
+  inp, re = load(PATH + 'train/100.jpg')
+  # casting to int for matplotlib to show the image
+  plt.figure()
+  plt.imshow(inp / 255.0)
+  plt.figure()
+  plt.imshow(re / 255.0)
+
+
+  def resize(input_image, real_image, height, width):
+    input_image = tf.image.resize(input_image, [height, width],
+                                  method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
+    real_image = tf.image.resize(real_image, [height, width],
+                                 method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
+
+    return input_image, real_image
+
+
+  def random_crop(input_image, real_image):
+    stacked_image = tf.stack([input_image, real_image], axis=0)
+    cropped_image = tf.image.random_crop(
+      stacked_image, size=[2, IMG_HEIGHT, IMG_WIDTH, 3])
+
+    return cropped_image[0], cropped_image[1]
+
+
+  def normalize(input_image, real_image):
+    input_image = (input_image / 127.5) - 1
+    real_image = (real_image / 127.5) - 1
+
+    return input_image, real_image
+
+
+  @tf.function()
+  def random_jitter(input_image, real_image):
+    # resizing to 286 x 286 x 3
+    input_image, real_image = resize(input_image, real_image, 286, 286)
+
+    # randomly cropping to 256 x 256 x 3
+    input_image, real_image = random_crop(input_image, real_image)
+
+    if tf.random.uniform(()) > 0.5:
+      # random mirroring
+      input_image = tf.image.flip_left_right(input_image)
+      real_image = tf.image.flip_left_right(real_image)
+
+    return input_image, real_image
+
+
+  def load_image_train(image_file):
+    input_image, real_image = load(image_file)
+    input_image, real_image = random_jitter(input_image, real_image)
+    input_image, real_image = normalize(input_image, real_image)
+
+    return input_image, real_image
+
+
+  def load_image_test(image_file):
+    input_image, real_image = load(image_file)
+    input_image, real_image = resize(input_image, real_image,
+                                     IMG_HEIGHT, IMG_WIDTH)
+    input_image, real_image = normalize(input_image, real_image)
+
+    return input_image, real_image
+
+
+  train_dataset = tf.data.Dataset.list_files(PATH + 'train/*.jpg')
+  train_dataset = train_dataset.shuffle(BUFFER_SIZE)
+  train_dataset = train_dataset.map(load_image_train,
+                                    num_parallel_calls=tf.data.experimental.AUTOTUNE)
+  train_dataset = train_dataset.batch(1)
+  # steps_per_epoch = training_utils.infer_steps_for_dataset(
+  #     train_dataset, None, epochs=1, steps_name='steps')
+
+  # for batch in train_dataset:
+  #     print(batch[1].shape)
+  test_dataset = tf.data.Dataset.list_files(PATH + 'test/*.jpg')
+  # shuffling so that for every epoch a different image is generated
+  # to predict and display the progress of our model.
+  train_dataset = train_dataset.shuffle(BUFFER_SIZE)
+  test_dataset = test_dataset.map(load_image_test)
+  test_dataset = test_dataset.batch(1)
+
+  be = BoltonModel(3, 2)
+  from tensorflow.python.keras.optimizer_v2 import adam
+  from privacy.bolton import loss
+
+  test = adam.Adam()
+  l = loss.StrongConvexBinaryCrossentropy(1, 2, 1)
+  be.compile(test, l)
+  print("Eager exeuction: {0}".format(tf.executing_eagerly()))
+  be.fit(train_dataset, verbose=0, steps_per_epoch=1, n_samples=1)
diff --git a/privacy/bolton/model_test.py b/privacy/bolton/model_test.py
index c3ca109..53c4c45 100644
--- a/privacy/bolton/model_test.py
+++ b/privacy/bolton/model_test.py
@@ -32,7 +32,7 @@ from absl.testing import absltest
 from tensorflow.python.keras.regularizers import L1L2
 
 
-class TestLoss(losses.Loss):
+class TestLoss(losses.Loss, StrongConvexMixin):
   """Test loss function for testing Bolton model"""
   def __init__(self, reg_lambda, C, radius_constant, name='test'):
     super(TestLoss, self).__init__(name=name)
@@ -145,21 +145,25 @@ class InitTests(keras_parameterized.TestCase):
     self.assertIsInstance(clf, model.Bolton)
 
   @parameterized.named_parameters([
-    {'testcase_name': 'invalid noise',
-     'n_classes': 1,
-     'epsilon': 1,
-     'noise_distribution': 'not_valid',
-     'weights_initializer': tf.initializers.GlorotUniform(),
-     },
-    {'testcase_name': 'invalid epsilon',
-     'n_classes': 1,
-     'epsilon': -1,
-     'noise_distribution': 'laplace',
-     'weights_initializer': tf.initializers.GlorotUniform(),
-     },
+      {'testcase_name': 'invalid noise',
+       'n_classes': 1,
+       'epsilon': 1,
+       'noise_distribution': 'not_valid',
+       'weights_initializer': tf.initializers.GlorotUniform(),
+       },
+      {'testcase_name': 'invalid epsilon',
+       'n_classes': 1,
+       'epsilon': -1,
+       'noise_distribution': 'laplace',
+       'weights_initializer': tf.initializers.GlorotUniform(),
+       },
   ])
   def test_bad_init_params(
-    self, n_classes, epsilon, noise_distribution, weights_initializer):
+      self,
+      n_classes,
+      epsilon,
+      noise_distribution,
+      weights_initializer):
     # test invalid domains for each variable, especially noise
     seed = 1
     with self.assertRaises(ValueError):
@@ -204,16 +208,16 @@ class InitTests(keras_parameterized.TestCase):
       self.assertEqual(clf.loss, loss)
 
   @parameterized.named_parameters([
-    {'testcase_name': 'Not strong loss',
-     'n_classes': 1,
-     'loss': losses.BinaryCrossentropy(),
-     'optimizer': 'adam',
-     },
-    {'testcase_name': 'Not valid optimizer',
-     'n_classes': 1,
-     'loss': TestLoss(1, 1, 1),
-     'optimizer': 'ada',
-     }
+      {'testcase_name': 'Not strong loss',
+       'n_classes': 1,
+       'loss': losses.BinaryCrossentropy(),
+       'optimizer': 'adam',
+       },
+      {'testcase_name': 'Not valid optimizer',
+       'n_classes': 1,
+       'loss': TestLoss(1, 1, 1),
+       'optimizer': 'ada',
+       }
   ])
   def test_bad_compile(self, n_classes, loss, optimizer):
     # test compilaton of invalid tf.optimizer and non instantiated loss.
@@ -250,7 +254,7 @@ def _cat_dataset(n_samples, input_dim, n_classes, t='train', generator=False):
   y_stack = []
   for i_class in range(n_classes):
     x_stack.append(
-       tf.constant(1*i_class, tf.float32, (n_samples, input_dim))
+        tf.constant(1*i_class, tf.float32, (n_samples, input_dim))
     )
     y_stack.append(
         tf.constant(i_class, tf.float32, (n_samples, n_classes))
@@ -258,7 +262,7 @@ def _cat_dataset(n_samples, input_dim, n_classes, t='train', generator=False):
   x_set, y_set = tf.stack(x_stack), tf.stack(y_stack)
   if generator:
     dataset = tf.data.Dataset.from_tensor_slices(
-      (x_set, y_set)
+        (x_set, y_set)
     )
     return dataset
   return x_set, y_set
@@ -281,10 +285,10 @@ def _do_fit(n_samples,
   clf.compile(optimizer, loss)
   if generator:
     x = _cat_dataset(
-      n_samples,
-      input_dim,
-      n_classes,
-      generator=generator
+        n_samples,
+        input_dim,
+        n_classes,
+        generator=generator
     )
     y = None
     # x = x.batch(batch_size)
@@ -315,26 +319,26 @@ class FitTests(keras_parameterized.TestCase):
 
   # @test_util.run_all_in_graph_and_eager_modes
   @parameterized.named_parameters([
-    {'testcase_name': 'iterator fit',
-     'generator': False,
-     'reset_n_samples': True,
-     'callbacks': None
-     },
-    {'testcase_name': 'iterator fit no samples',
-     'generator': False,
-     'reset_n_samples': True,
-     'callbacks': None
-     },
-    {'testcase_name': 'generator fit',
-     'generator': True,
-     'reset_n_samples': False,
-     'callbacks': None
-     },
-    {'testcase_name': 'with callbacks',
-     'generator': True,
-     'reset_n_samples': False,
-     'callbacks': TestCallback()
-     },
+      {'testcase_name': 'iterator fit',
+       'generator': False,
+       'reset_n_samples': True,
+       'callbacks': None
+       },
+      {'testcase_name': 'iterator fit no samples',
+       'generator': False,
+       'reset_n_samples': True,
+       'callbacks': None
+       },
+      {'testcase_name': 'generator fit',
+       'generator': True,
+       'reset_n_samples': False,
+       'callbacks': None
+       },
+      {'testcase_name': 'with callbacks',
+       'generator': True,
+       'reset_n_samples': False,
+       'callbacks': TestCallback()
+       },
   ])
   def test_fit(self, generator, reset_n_samples, callbacks):
     loss = TestLoss(1, 1, 1)
@@ -344,9 +348,19 @@ class FitTests(keras_parameterized.TestCase):
     epsilon = 1
     batch_size = 1
     n_samples = 10
-    clf = _do_fit(n_samples, input_dim, n_classes, epsilon, generator, batch_size,
-            reset_n_samples, optimizer, loss, callbacks)
-    self.assertEqual(hasattr(clf, '_layers'), True)
+    clf = _do_fit(
+        n_samples,
+        input_dim,
+        n_classes,
+        epsilon,
+        generator,
+        batch_size,
+        reset_n_samples,
+        optimizer,
+        loss,
+        callbacks
+    )
+    self.assertEqual(hasattr(clf, 'layers'), True)
 
   @parameterized.named_parameters([
       {'testcase_name': 'generator fit',
@@ -368,15 +382,15 @@ class FitTests(keras_parameterized.TestCase):
                        )
     clf.compile(optimizer, loss)
     x = _cat_dataset(
-      n_samples,
-      input_dim,
-      n_classes,
-      generator=generator
+        n_samples,
+        input_dim,
+        n_classes,
+        generator=generator
     )
     x = x.batch(batch_size)
     x = x.shuffle(n_samples // 2)
     clf.fit_generator(x, n_samples=n_samples)
-    self.assertEqual(hasattr(clf, '_layers'), True)
+    self.assertEqual(hasattr(clf, 'layers'), True)
 
   @parameterized.named_parameters([
       {'testcase_name': 'iterator no n_samples',
@@ -399,32 +413,43 @@ class FitTests(keras_parameterized.TestCase):
       epsilon = 1
       batch_size = 1
       n_samples = 10
-      _do_fit(n_samples, input_dim, n_classes, epsilon, generator, batch_size,
-              reset_n_samples, optimizer, loss, None, distribution)
+      _do_fit(
+          n_samples,
+          input_dim,
+          n_classes,
+          epsilon,
+          generator,
+          batch_size,
+          reset_n_samples,
+          optimizer,
+          loss,
+          None,
+          distribution
+      )
 
   @parameterized.named_parameters([
-    {'testcase_name': 'None class_weights',
-     'class_weights': None,
-     'class_counts': None,
-     'num_classes': None,
-     'result': 1},
-    {'testcase_name': 'class weights array',
-     'class_weights': [1, 1],
-     'class_counts': [1, 1],
-     'num_classes': 2,
-     'result': [1, 1]},
-    {'testcase_name': 'class weights balanced',
-     'class_weights': 'balanced',
-     'class_counts': [1, 1],
-     'num_classes': 2,
-     'result': [1, 1]},
+      {'testcase_name': 'None class_weights',
+       'class_weights': None,
+       'class_counts': None,
+       'num_classes': None,
+       'result': 1},
+      {'testcase_name': 'class weights array',
+       'class_weights': [1, 1],
+       'class_counts': [1, 1],
+       'num_classes': 2,
+       'result': [1, 1]},
+      {'testcase_name': 'class weights balanced',
+       'class_weights': 'balanced',
+       'class_counts': [1, 1],
+       'num_classes': 2,
+       'result': [1, 1]},
   ])
   def test_class_calculate(self,
                            class_weights,
                            class_counts,
                            num_classes,
                            result
-              ):
+                           ):
     clf = model.Bolton(1, 1)
     expected = clf.calculate_class_weights(class_weights,
                                            class_counts,
@@ -447,14 +472,14 @@ class FitTests(keras_parameterized.TestCase):
        'class_weights': 'balanced',
        'class_counts': None,
        'num_classes': 1,
-       'err_msg':
-          "Class counts must be provided if using class_weights=balanced"},
+       'err_msg': "Class counts must be provided if "
+                  "using class_weights=balanced"},
       {'testcase_name': 'no num classes',
        'class_weights': 'balanced',
        'class_counts': [1],
        'num_classes': None,
-       'err_msg':
-          'num_classes must be provided if using class_weights=balanced'},
+       'err_msg': 'num_classes must be provided if '
+                  'using class_weights=balanced'},
       {'testcase_name': 'class counts not array',
        'class_weights': 'balanced',
        'class_counts': 1,
@@ -464,7 +489,7 @@ class FitTests(keras_parameterized.TestCase):
        'class_weights': [1],
        'class_counts': None,
        'num_classes': None,
-       'err_msg': "You must pass a value for num_classes if"
+       'err_msg': "You must pass a value for num_classes if "
                   "creating an array of class_weights"},
       {'testcase_name': 'class counts array, improper shape',
        'class_weights': [[1], [1]],
@@ -481,7 +506,8 @@ class FitTests(keras_parameterized.TestCase):
                         class_weights,
                         class_counts,
                         num_classes,
-                        err_msg):
+                        err_msg
+                        ):
     clf = model.Bolton(1, 1)
     with self.assertRaisesRegexp(ValueError, err_msg):
       expected = clf.calculate_class_weights(class_weights,
diff --git a/privacy/bolton/optimizer.py b/privacy/bolton/optimizer.py
index 3b836ee..1ec25b9 100644
--- a/privacy/bolton/optimizer.py
+++ b/privacy/bolton/optimizer.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Private Optimizer for bolton method"""
+"""Bolton Optimizer for bolton method"""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -19,15 +19,16 @@ from __future__ import print_function
 
 import tensorflow as tf
 from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from privacy.bolton.loss import StrongConvexMixin
 
-_private_attributes = ['_internal_optimizer', 'dtype']
+_accepted_distributions = ['laplace']
 
 
-class Private(optimizer_v2.OptimizerV2):
+class Bolton(optimizer_v2.OptimizerV2):
   """
-    Private optimizer wraps another tf optimizer to be used
+    Bolton optimizer wraps another tf optimizer to be used
     as the visible optimizer to the tf model. No matter the optimizer
-    passed, "Private" enables the bolton model to control the learning rate
+    passed, "Bolton" enables the bolton model to control the learning rate
     based on the strongly convex loss.
 
     For more details on the strong convexity requirements, see:
@@ -36,7 +37,8 @@ class Private(optimizer_v2.OptimizerV2):
   """
   def __init__(self,
                optimizer: optimizer_v2.OptimizerV2,
-               dtype=tf.float32
+               loss: StrongConvexMixin,
+               dtype=tf.float32,
                ):
     """Constructor.
 
@@ -44,15 +46,100 @@ class Private(optimizer_v2.OptimizerV2):
         optimizer: Optimizer_v2 or subclass to be used as the optimizer
                     (wrapped).
     """
+
+    if not isinstance(loss, StrongConvexMixin):
+      raise ValueError("loss function must be a Strongly Convex and therfore"
+                       "extend the StrongConvexMixin.")
+    self._private_attributes = ['_internal_optimizer',
+                                'dtype',
+                                'noise_distribution',
+                                'epsilon',
+                                'loss',
+                                'class_weights',
+                                'input_dim',
+                                'n_samples',
+                                'n_classes',
+                                'layers',
+                                '_model'
+                                ]
     self._internal_optimizer = optimizer
     self.dtype = dtype
+    self.loss = loss
 
   def get_config(self):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
     return self._internal_optimizer.get_config()
 
-  def limit_learning_rate(self, is_eager, beta, gamma):
+  def project_weights_to_r(self, force=False):
+    """helper method to normalize the weights to the R-ball.
+
+    Args:
+        r: radius of "R-Ball". Scalar to normalize to.
+        force: True to normalize regardless of previous weight values.
+                False to check if weights > R-ball and only normalize then.
+
+    Returns:
+
+    """
+    r = self.loss.radius()
+    for layer in self.layers:
+      if tf.executing_eagerly():
+        weight_norm = tf.norm(layer.kernel, axis=0)
+        if force:
+          layer.kernel = layer.kernel / (weight_norm / r)
+        elif tf.reduce_sum(tf.cast(weight_norm > r, dtype=self.dtype)) > 0:
+          layer.kernel = layer.kernel / (weight_norm / r)
+      else:
+        weight_norm = tf.norm(layer.kernel, axis=0)
+        if force:
+          layer.kernel = layer.kernel / (weight_norm / r)
+        else:
+          layer.kernel = tf.cond(
+              tf.reduce_sum(tf.cast(weight_norm > r, dtype=self.dtype)) > 0,
+              lambda: layer.kernel / (weight_norm / r),
+              lambda: layer.kernel
+          )
+
+  def get_noise(self, data_size, input_dim, output_dim, class_weight):
+    """Sample noise to be added to weights for privacy guarantee
+
+    Args:
+        distribution: the distribution type to pull noise from
+        data_size: the number of samples
+
+    Returns: noise in shape of layer's weights to be added to the weights.
+
+    """
+    loss = self.loss
+    distribution = self.noise_distribution.lower()
+    if distribution == _accepted_distributions[0]:  # laplace
+      per_class_epsilon = self.epsilon / (output_dim)
+      l2_sensitivity = (2 *
+                        loss.lipchitz_constant(class_weight)) / \
+                       (loss.gamma() * data_size)
+      unit_vector = tf.random.normal(shape=(input_dim, output_dim),
+                                     mean=0,
+                                     seed=1,
+                                     stddev=1.0,
+                                     dtype=self.dtype)
+      unit_vector = unit_vector / tf.math.sqrt(
+          tf.reduce_sum(tf.math.square(unit_vector), axis=0)
+      )
+
+      beta = l2_sensitivity / per_class_epsilon
+      alpha = input_dim  # input_dim
+      gamma = tf.random.gamma([output_dim],
+                              alpha,
+                              beta=1 / beta,
+                              seed=1,
+                              dtype=self.dtype
+                              )
+      return unit_vector * gamma
+    raise NotImplementedError('Noise distribution: {0} is not '
+                              'a valid distribution'.format(distribution))
+
+  def limit_learning_rate(self, beta, gamma):
     """Implements learning rate limitation that is required by the bolton
     method for sensitivity bounding of the strongly convex function.
     Sets the learning rate to the min(1/beta, 1/(gamma*t))
@@ -65,20 +152,13 @@ class Private(optimizer_v2.OptimizerV2):
     Returns: None
 
     """
-    numerator = tf.Variable(initial_value=1, dtype=self.dtype)
+    numerator = tf.constant(1, dtype=self.dtype)
     t = tf.cast(self._iterations, self.dtype)
     # will exist on the internal optimizer
-    pred = numerator / beta < numerator / (gamma * t)
-    if is_eager:  # check eagerly
-      if pred:
-        self.learning_rate = numerator / beta
-      else:
-        self.learning_rate = numerator / (gamma * t)
+    if numerator / beta < numerator / (gamma * t):
+      self.learning_rate = numerator / beta
     else:
-      if pred:
-        self.learning_rate = numerator / beta
-      else:
-        self.learning_rate = numerator / (gamma * t)
+      self.learning_rate = numerator / (gamma * t)
 
   def from_config(self, *args, **kwargs):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
@@ -92,14 +172,25 @@ class Private(optimizer_v2.OptimizerV2):
     Args:
         name:
 
-    Returns: attribute from Private if specified to come from self, else
+    Returns: attribute from Bolton if specified to come from self, else
             from _internal_optimizer.
 
     """
-    if name in _private_attributes:
+    if name == '_private_attributes':
+      return getattr(self, name)
+    elif name in self._private_attributes:
       return getattr(self, name)
     optim = object.__getattribute__(self, '_internal_optimizer')
-    return object.__getattribute__(optim, name)
+    try:
+      return object.__getattribute__(optim, name)
+    except AttributeError:
+      raise AttributeError("Neither '{0}' nor '{1}' object has attribute '{2}'"
+                           "".format(
+          self.__class__.__name__,
+          self._internal_optimizer.__class__.__name__,
+          name
+                                     )
+                           )
 
   def __setattr__(self, key, value):
     """ Set attribute to self instance if its the internal optimizer.
@@ -112,7 +203,9 @@ class Private(optimizer_v2.OptimizerV2):
     Returns:
 
     """
-    if key in _private_attributes:
+    if key == '_private_attributes':
+      object.__setattr__(self, key, value)
+    elif key in key in self._private_attributes:
       object.__setattr__(self, key, value)
     else:
       setattr(self._internal_optimizer, key, value)
@@ -130,24 +223,135 @@ class Private(optimizer_v2.OptimizerV2):
   def get_updates(self, loss, params):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    return self._internal_optimizer.get_updates(loss, params)
+    # self.layers = params
+    out = self._internal_optimizer.get_updates(loss, params)
+    self.limit_learning_rate(self.loss.beta(self.class_weights),
+                             self.loss.gamma()
+                             )
+    self.project_weights_to_r()
+    return out
 
   def apply_gradients(self, *args, **kwargs):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    return self._internal_optimizer.apply_gradients(*args, **kwargs)
+    # grads_and_vars = kwargs.get('grads_and_vars', None)
+    # grads_and_vars = optimizer_v2._filter_grads(grads_and_vars)
+    # var_list = [v for (_, v) in grads_and_vars]
+    # self.layers = var_list
+    out = self._internal_optimizer.apply_gradients(*args, **kwargs)
+    self.limit_learning_rate(self.loss.beta(self.class_weights),
+                             self.loss.gamma()
+                             )
+    self.project_weights_to_r()
+    return out
 
   def minimize(self, *args, **kwargs):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    return self._internal_optimizer.minimize(*args, **kwargs)
+    # self.layers = kwargs.get('var_list', None)
+    out = self._internal_optimizer.minimize(*args, **kwargs)
+    self.limit_learning_rate(self.loss.beta(self.class_weights),
+                             self.loss.gamma()
+                             )
+    self.project_weights_to_r()
+    return out
 
   def _compute_gradients(self, *args, **kwargs):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
+    # self.layers = kwargs.get('var_list', None)
     return self._internal_optimizer._compute_gradients(*args, **kwargs)
 
   def get_gradients(self, *args, **kwargs):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
+    # self.layers = kwargs.get('params', None)
     return self._internal_optimizer.get_gradients(*args, **kwargs)
+
+  def __enter__(self):
+    noise_distribution = self.noise_distribution
+    epsilon = self.epsilon
+    class_weights = self.class_weights
+    n_samples = self.n_samples
+    if noise_distribution not in _accepted_distributions:
+      raise ValueError('Detected noise distribution: {0} not one of: {1} valid'
+                       'distributions'.format(noise_distribution,
+                                              _accepted_distributions))
+    self.noise_distribution = noise_distribution
+    self.epsilon = epsilon
+    self.class_weights = class_weights
+    self.n_samples = n_samples
+    return self
+
+  def __call__(self,
+               noise_distribution,
+               epsilon,
+               layers,
+               class_weights,
+               n_samples,
+               n_classes,
+               ):
+    """
+
+    Args:
+      noise_distribution: the noise distribution to pick.
+                          see _accepted_distributions and get_noise for
+                          possible values.
+      epsilon: privacy parameter. Lower gives more privacy but less utility.
+      class_weights: class_weights used
+      n_samples number of rows/individual samples in the training set
+      n_classes: number of output classes
+      layers: list of Keras/Tensorflow layers.
+    """
+    if epsilon <= 0:
+      raise ValueError('Detected epsilon: {0}. '
+                       'Valid range is 0 < epsilon <inf'.format(epsilon))
+    self.noise_distribution = noise_distribution
+    self.epsilon = epsilon
+    self.class_weights = class_weights
+    self.n_samples = n_samples
+    self.n_classes = n_classes
+    self.layers = layers
+    return self
+
+  def __exit__(self, *args):
+    """Exit call from with statement.
+        used to
+
+        1.reset the model and fit parameters passed to the optimizer
+          to enable the Bolton Privacy guarantees. These are reset to ensure
+          that any future calls to fit with the same instance of the optimizer
+          will properly error out.
+
+        2.call post-fit methods normalizing/projecting the model weights and
+          adding noise to the weights.
+
+
+    """
+    # for param in self.layers:
+    #   if param.name.find('kernel') != -1 or param.name.find('weight') != -1:
+    #     input_dim = param.numpy().shape[0]
+    #     print(param)
+    #     noise = -1 * self.get_noise(self.n_samples,
+    #                                 input_dim,
+    #                                 self.n_classes,
+    #                                 self.class_weights
+    #                                 )
+    #     print(tf.math.subtract(param, noise))
+    #     param.assign(tf.math.subtract(param, noise))
+    self.project_weights_to_r(True)
+    for layer in self.layers:
+      input_dim, output_dim = layer.kernel.shape
+      noise = self.get_noise(self.n_samples,
+                             input_dim,
+                             output_dim,
+                             self.class_weights
+                             )
+      layer.kernel = tf.math.add(layer.kernel, noise)
+    self.noise_distribution = None
+    self.epsilon = -1
+    self.class_weights = None
+    self.n_samples = None
+    self.input_dim = None
+    self.n_classes = None
+    self.layers = None
diff --git a/privacy/bolton/optimizer_test.py b/privacy/bolton/optimizer_test.py
index 5a0c67d..5481bb4 100644
--- a/privacy/bolton/optimizer_test.py
+++ b/privacy/bolton/optimizer_test.py
@@ -21,19 +21,129 @@ import tensorflow as tf
 from tensorflow.python.platform import test
 from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
 from tensorflow.python.keras import keras_parameterized
-from privacy.bolton import model
-from privacy.bolton import optimizer as opt
+from tensorflow.python.keras.regularizers import L1L2
+from tensorflow.python.keras import losses
+from tensorflow.python.keras.models import Model
+from tensorflow.python.framework import ops as _ops
+from tensorflow.python.framework import test_util
+
 from absl.testing import parameterized
-from absl.testing import absltest
+from privacy.bolton.loss import StrongConvexMixin
+from privacy.bolton import optimizer as opt
+
+
+class TestModel(Model):
+  """
+  Bolton episilon-delta model
+  Uses 4 key steps to achieve privacy guarantees:
+  1. Adds noise to weights after training (output perturbation).
+  2. Projects weights to R after each batch
+  3. Limits learning rate
+  4. Use a strongly convex loss function (see compile)
+
+  For more details on the strong convexity requirements, see:
+  Bolt-on Differential Privacy for Scalable Stochastic Gradient
+  Descent-based Analytics by Xi Wu et. al.
+  """
+
+  def __init__(self, n_classes=2):
+    """
+    Args:
+        n_classes: number of output classes to predict.
+        epsilon: level of privacy guarantee
+        noise_distribution: distribution to pull weight perturbations from
+        weights_initializer: initializer for weights
+        seed: random seed to use
+        dtype: data type to use for tensors
+    """
+    super(TestModel, self).__init__(name='bolton', dynamic=False)
+    self.n_classes = n_classes
+    self.layer_input_shape = (16, 1)
+    self.output_layer = tf.keras.layers.Dense(
+      self.n_classes,
+      input_shape=self.layer_input_shape,
+      kernel_regularizer=L1L2(l2=1),
+      kernel_initializer='glorot_uniform',
+    )
+
+
+  # def call(self, inputs):
+  #   """Forward pass of network
+  #
+  #   Args:
+  #       inputs: inputs to neural network
+  #
+  #   Returns:
+  #
+  #   """
+  #   return self.output_layer(inputs)
+
+
+class TestLoss(losses.Loss, StrongConvexMixin):
+  """Test loss function for testing Bolton model"""
+  def __init__(self, reg_lambda, C, radius_constant, name='test'):
+    super(TestLoss, self).__init__(name=name)
+    self.reg_lambda = reg_lambda
+    self.C = C
+    self.radius_constant = radius_constant
+
+  def radius(self):
+    """Radius of R-Ball (value to normalize weights to after each batch)
+
+    Returns: radius
+
+    """
+    return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
+
+  def gamma(self):
+    """ Gamma strongly convex
+
+    Returns: gamma
+
+    """
+    return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
+
+  def beta(self, class_weight):
+    """Beta smoothess
+
+    Args:
+      class_weight: the class weights used.
+
+    Returns: Beta
+
+    """
+    return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
+
+  def lipchitz_constant(self, class_weight):
+    """ L lipchitz continuous
+
+    Args:
+      class_weight: class weights used
+
+    Returns: L
+
+    """
+    return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
+
+  def call(self, val0, val1):
+    """Loss function that is minimized at the mean of the input points."""
+    return 0.5 * tf.reduce_sum(tf.math.squared_difference(val0, val1), axis=1)
+
+  def max_class_weight(self, class_weight):
+    if class_weight is None:
+      return 1
+
+  def kernel_regularizer(self):
+    return L1L2(l2=self.reg_lambda)
 
 
 class TestOptimizer(OptimizerV2):
-  """Optimizer used for testing the Private optimizer"""
+  """Optimizer used for testing the Bolton optimizer"""
   def __init__(self):
     super(TestOptimizer, self).__init__('test')
     self.not_private = 'test'
-    self.iterations = tf.Variable(1, dtype=tf.float32)
-    self._iterations = tf.Variable(1, dtype=tf.float32)
+    self.iterations = tf.constant(1, dtype=tf.float32)
+    self._iterations = tf.constant(1, dtype=tf.float32)
 
   def _compute_gradients(self, loss, var_list, grad_loss=None):
     return 'test'
@@ -41,7 +151,7 @@ class TestOptimizer(OptimizerV2):
   def get_config(self):
     return 'test'
 
-  def from_config(cls, config, custom_objects=None):
+  def from_config(self, config, custom_objects=None):
     return 'test'
 
   def _create_slots(self):
@@ -65,34 +175,22 @@ class TestOptimizer(OptimizerV2):
   def get_gradients(self, loss, params):
     return 'test'
 
-class PrivateTest(keras_parameterized.TestCase):
-  """Private Optimizer tests"""
+  def limit_learning_rate(self):
+    return 'test'
+
+class BoltonOptimizerTest(keras_parameterized.TestCase):
+  """Bolton Optimizer tests"""
+  @test_util.run_all_in_graph_and_eager_modes
   @parameterized.named_parameters([
-      {'testcase_name': 'branch True, beta',
+      {'testcase_name': 'branch beta',
        'fn': 'limit_learning_rate',
-       'args': [True,
-                tf.Variable(2, dtype=tf.float32),
+       'args': [tf.Variable(2, dtype=tf.float32),
                 tf.Variable(1, dtype=tf.float32)],
        'result': tf.Variable(0.5, dtype=tf.float32),
        'test_attr': 'learning_rate'},
-      {'testcase_name': 'branch True, gamma',
+      {'testcase_name': 'branch gamma',
        'fn': 'limit_learning_rate',
-       'args': [True,
-                tf.Variable(1, dtype=tf.float32),
-                tf.Variable(1, dtype=tf.float32)],
-       'result': tf.Variable(1, dtype=tf.float32),
-       'test_attr': 'learning_rate'},
-      {'testcase_name': 'branch False, beta',
-       'fn': 'limit_learning_rate',
-       'args': [False,
-                tf.Variable(2, dtype=tf.float32),
-                tf.Variable(1, dtype=tf.float32)],
-       'result': tf.Variable(0.5, dtype=tf.float32),
-       'test_attr': 'learning_rate'},
-      {'testcase_name': 'branch False, gamma',
-       'fn': 'limit_learning_rate',
-       'args': [False,
-                tf.Variable(1, dtype=tf.float32),
+       'args': [tf.Variable(1, dtype=tf.float32),
                 tf.Variable(1, dtype=tf.float32)],
        'result': tf.Variable(1, dtype=tf.float32),
        'test_attr': 'learning_rate'},
@@ -101,9 +199,26 @@ class PrivateTest(keras_parameterized.TestCase):
        'args': ['dtype'],
        'result': tf.float32,
        'test_attr': None},
+      {'testcase_name': 'project_weights_to_r',
+       'fn': 'project_weights_to_r',
+       'args': ['dtype'],
+       'result': tf.float32,
+       'test_attr': None},
   ])
   def test_fn(self, fn, args, result, test_attr):
-    private = opt.Private(TestOptimizer())
+    """test that a fn of Bolton optimizer is working as expected.
+
+    Args:
+      fn: method of Optimizer to test
+      args: args to optimizer fn
+      result: the expected result
+      test_attr: None if the fn returns the test result. Otherwise, this is
+                the attribute of Bolton to check against result with.
+
+    """
+    tf.random.set_seed(1)
+    loss = TestLoss(1, 1, 1)
+    private = opt.Bolton(TestOptimizer(), loss)
     res = getattr(private, fn, None)(*args)
     if test_attr is not None:
       res = getattr(private, test_attr, None)
@@ -142,41 +257,88 @@ class PrivateTest(keras_parameterized.TestCase):
        'args': [1, 1]},
   ])
   def test_rerouted_function(self, fn, args):
+    """ tests that a method of the internal optimizer is correctly routed from
+    the Bolton instance to the internal optimizer instance (TestOptimizer,
+    here).
+
+    Args:
+      fn: fn to test
+      args: arguments to that fn
+    """
+    loss = TestLoss(1, 1, 1)
     optimizer = TestOptimizer()
-    optimizer = opt.Private(optimizer)
-    self.assertEqual(
-        getattr(optimizer, fn, lambda: 'fn not found')(*args),
-        'test'
-    )
+    optimizer = opt.Bolton(optimizer, loss)
+    model = TestModel(2)
+    model.compile(optimizer, loss)
+    model.layers[0].kernel_initializer(model.layer_input_shape)
+    print(model.layers[0].__dict__)
+    with optimizer('laplace', 2, model.layers, 1, 1, model.n_classes):
+      self.assertEqual(
+          getattr(optimizer, fn, lambda: 'fn not found')(*args),
+          'test'
+      )
 
   @parameterized.named_parameters([
       {'testcase_name': 'fn: limit_learning_rate',
        'fn': 'limit_learning_rate',
-       'args': [1, 1, 1]}
+       'args': [1, 1, 1]},
+      {'testcase_name': 'fn: project_weights_to_r',
+       'fn': 'project_weights_to_r',
+       'args': []},
+      {'testcase_name': 'fn: get_noise',
+       'fn': 'get_noise',
+       'args': [1, 1, 1, 1]},
   ])
   def test_not_reroute_fn(self, fn, args):
+    """Test that a fn that should not be rerouted to the internal optimizer is
+    in face not rerouted.
+
+    Args:
+      fn: fn to test
+      args: arguments to that fn
+    """
     optimizer = TestOptimizer()
-    optimizer = opt.Private(optimizer)
+    loss = TestLoss(1, 1, 1)
+    optimizer = opt.Bolton(optimizer, loss)
     self.assertNotEqual(getattr(optimizer, fn, lambda: 'test')(*args),
                         'test')
 
   @parameterized.named_parameters([
-      {'testcase_name': 'attr: not_private',
-       'attr': 'not_private'}
+      {'testcase_name': 'attr: _iterations',
+       'attr': '_iterations'}
   ])
   def test_reroute_attr(self, attr):
+    """ test that attribute of internal optimizer is correctly rerouted to
+    the internal optimizer
+
+    Args:
+      attr: attribute to test
+      result: result after checking attribute
+    """
+    loss = TestLoss(1, 1, 1)
     internal_optimizer = TestOptimizer()
-    optimizer = opt.Private(internal_optimizer)
-    self.assertEqual(optimizer._internal_optimizer, internal_optimizer)
+    optimizer = opt.Bolton(internal_optimizer, loss)
+    self.assertEqual(getattr(optimizer, attr),
+                     getattr(internal_optimizer, attr)
+                     )
 
   @parameterized.named_parameters([
-      {'testcase_name': 'attr: _internal_optimizer',
-       'attr': '_internal_optimizer'}
+    {'testcase_name': 'attr does not exist',
+     'attr': '_not_valid'}
   ])
-  def test_not_reroute_attr(self, attr):
+  def test_attribute_error(self, attr):
+    """ test that attribute of internal optimizer is correctly rerouted to
+    the internal optimizer
+
+    Args:
+      attr: attribute to test
+      result: result after checking attribute
+    """
+    loss = TestLoss(1, 1, 1)
     internal_optimizer = TestOptimizer()
-    optimizer = opt.Private(internal_optimizer)
-    self.assertEqual(optimizer._internal_optimizer, internal_optimizer)
+    optimizer = opt.Bolton(internal_optimizer, loss)
+    with self.assertRaises(AttributeError):
+      getattr(optimizer, attr)
 
 if __name__ == '__main__':
-  test.main()
\ No newline at end of file
+  test.main()

From 935d6e84808fc99146294a888fcd954dcb4e4274 Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Mon, 17 Jun 2019 13:25:30 -0400
Subject: [PATCH 04/39] Bolton created as optimizer with context manager usage.
 Unit tests included. Additional loss functions TBD.

---
 privacy/bolton/loss.py           |  28 +-
 privacy/bolton/loss_test.py      |   6 +-
 privacy/bolton/model.py          | 432 +++++++------------------------
 privacy/bolton/model_test.py     | 252 +++++++++---------
 privacy/bolton/optimizer.py      | 297 ++++++++++++---------
 privacy/bolton/optimizer_test.py | 331 +++++++++++++++++++----
 6 files changed, 685 insertions(+), 661 deletions(-)

diff --git a/privacy/bolton/loss.py b/privacy/bolton/loss.py
index 5cc029a..de49607 100644
--- a/privacy/bolton/loss.py
+++ b/privacy/bolton/loss.py
@@ -102,7 +102,7 @@ class StrongConvexMixin:
     return tf.math.reduce_max(class_weight)
 
 
-class StrongConvexHuber(losses.Huber, StrongConvexMixin):
+class StrongConvexHuber(losses.Loss, StrongConvexMixin):
   """Strong Convex version of Huber loss using l2 weight regularization.
   """
 
@@ -112,7 +112,6 @@ class StrongConvexHuber(losses.Huber, StrongConvexMixin):
                radius_constant: float,
                delta: float,
                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
-               name: str = 'huber',
                dtype=tf.float32):
     """Constructor.
 
@@ -137,13 +136,17 @@ class StrongConvexHuber(losses.Huber, StrongConvexMixin):
       raise ValueError('radius_constant: {0}, should be >= 0'.format(
           radius_constant
       ))
-    self.C = C
+    if delta <= 0:
+      raise ValueError('delta: {0}, should be >= 0'.format(
+          delta
+      ))
+    self.C = C  # pylint: disable=invalid-name
+    self.delta = delta
     self.radius_constant = radius_constant
     self.dtype = dtype
     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
     super(StrongConvexHuber, self).__init__(
-        delta=delta,
-        name=name,
+        name='huber',
         reduction=reduction,
     )
 
@@ -151,26 +154,25 @@ class StrongConvexHuber(losses.Huber, StrongConvexMixin):
     """Compute loss
 
     Args:
-      y_true: Ground truth values.
+      y_true: Ground truth values. One
       y_pred: The predicted values.
 
     Returns:
       Loss values per sample.
     """
     # return super(StrongConvexHuber, self).call(y_true, y_pred) * self._sample_weight
-    h = self._fn_kwargs['delta']
+    h = self.delta
     z = y_pred * y_true
     one = tf.constant(1, dtype=self.dtype)
     four = tf.constant(4, dtype=self.dtype)
 
     if z > one + h:
-      return z - z
+      return _ops.convert_to_tensor_v2(0, dtype=self.dtype)
     elif tf.math.abs(one - z) <= h:
       return one / (four * h) * tf.math.pow(one + h - z, 2)
     elif z < one - h:
       return one - z
-    else:
-      raise ValueError('')
+    raise ValueError('')  # shouldn't be possible to get here.
 
   def radius(self):
     """See super class.
@@ -186,7 +188,7 @@ class StrongConvexHuber(losses.Huber, StrongConvexMixin):
     """See super class.
     """
     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-    delta = _ops.convert_to_tensor_v2(self._fn_kwargs['delta'],
+    delta = _ops.convert_to_tensor_v2(self.delta,
                                       dtype=self.dtype
                                       )
     return self.C * max_class_weight / (delta *
@@ -250,7 +252,7 @@ class StrongConvexBinaryCrossentropy(
           radius_constant
       ))
     self.dtype = dtype
-    self.C = C
+    self.C = C  # pylint: disable=invalid-name
     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
     super(StrongConvexBinaryCrossentropy, self).__init__(
         reduction=reduction,
@@ -306,7 +308,7 @@ class StrongConvexBinaryCrossentropy(
       this loss function to be strongly convex.
     :return:
     """
-    return L1L2(l2=self.reg_lambda)
+    return L1L2(l2=self.reg_lambda/2)
 
 
 # class StrongConvexSparseCategoricalCrossentropy(
diff --git a/privacy/bolton/loss_test.py b/privacy/bolton/loss_test.py
index ddb4861..488710f 100644
--- a/privacy/bolton/loss_test.py
+++ b/privacy/bolton/loss_test.py
@@ -79,7 +79,7 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
        'reg_lambda': 1,
        'C': 1,
        'radius_constant': 1
-       },
+       },  # pylint: disable=invalid-name
   ])
   def test_init_params(self, reg_lambda, C, radius_constant):
     """Test initialization for given arguments
@@ -107,7 +107,7 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
        'reg_lambda': -1,
        'C': 1,
        'radius_constant': 1
-       },
+       },  # pylint: disable=invalid-name
   ])
   def test_bad_init_params(self, reg_lambda, C, radius_constant):
     """Test invalid domain for given params. Should return ValueError
@@ -180,7 +180,7 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
        'fn': 'kernel_regularizer',
        'init_args': [1, 1, 1],
        'args': [],
-       'result': L1L2(l2=1),
+       'result': L1L2(l2=0.5),
        },
   ])
   def test_fns(self, init_args, fn, args, result):
diff --git a/privacy/bolton/model.py b/privacy/bolton/model.py
index a6731fe..6f3f48e 100644
--- a/privacy/bolton/model.py
+++ b/privacy/bolton/model.py
@@ -23,8 +23,6 @@ from tensorflow.python.framework import ops as _ops
 from privacy.bolton.loss import StrongConvexMixin
 from privacy.bolton.optimizer import Bolton
 
-_accepted_distributions = ['laplace']
-
 
 class BoltonModel(Model):
   """
@@ -41,41 +39,28 @@ class BoltonModel(Model):
   """
 
   def __init__(self,
-               n_classes,
-               # noise_distribution='laplace',
+               n_outputs,
                seed=1,
                dtype=tf.float32
                ):
     """ private constructor.
 
     Args:
-        n_classes: number of output classes to predict.
-        epsilon: level of privacy guarantee
-        noise_distribution: distribution to pull weight perturbations from
-        weights_initializer: initializer for weights
+        n_outputs: number of output classes to predict.
         seed: random seed to use
         dtype: data type to use for tensors
     """
-
-    # if noise_distribution not in _accepted_distributions:
-    #   raise ValueError('Detected noise distribution: {0} not one of: {1} valid'
-    #                    'distributions'.format(noise_distribution,
-    #                                           _accepted_distributions))
-    # if epsilon <= 0:
-    #   raise ValueError('Detected epsilon: {0}. '
-    #                    'Valid range is 0 < epsilon <inf'.format(epsilon))
-    # self.epsilon = epsilon
     super(BoltonModel, self).__init__(name='bolton', dynamic=False)
-    self.n_classes = n_classes
-    self.force = False
-    # self.noise_distribution = noise_distribution
+    if n_outputs <= 0:
+      raise ValueError('n_outputs = {0} is not valid. Must be > 0.'.format(
+          n_outputs
+      ))
+    self.n_outputs = n_outputs
     self.seed = seed
-    self.__in_fit = False
     self._layers_instantiated = False
-    # self._callback = MyCustomCallback()
     self._dtype = dtype
 
-  def call(self, inputs):
+  def call(self, inputs, training=False):  # pylint: disable=arguments-differ
     """Forward pass of network
 
     Args:
@@ -87,37 +72,30 @@ class BoltonModel(Model):
     return self.output_layer(inputs)
 
   def compile(self,
-              optimizer='SGD',
-              loss=None,
+              optimizer,
+              loss,
               metrics=None,
               loss_weights=None,
               sample_weight_mode=None,
               weighted_metrics=None,
               target_tensors=None,
               distribute=None,
-              **kwargs):
+              kernel_initializer=tf.initializers.GlorotUniform,
+              **kwargs):  # pylint: disable=arguments-differ
     """See super class. Default optimizer used in Bolton method is SGD.
 
     """
-    for key, val in StrongConvexMixin.__dict__.items():
-      if callable(val) and getattr(loss, key, None) is None:
-        raise ValueError("Please ensure you are passing a valid StrongConvex "
-                         "loss that has all the required methods "
-                         "implemented. "
-                         "Required method: {0} not found".format(key))
+    if not isinstance(loss, StrongConvexMixin):
+      raise ValueError("loss function must be a Strongly Convex and therefore "
+                       "extend the StrongConvexMixin.")
     if not self._layers_instantiated:  # compile may be called multiple times
-      kernel_intiializer = kwargs.get('kernel_initializer',
-                                      tf.initializers.GlorotUniform)
+      # for instance, if the input/outputs are not defined until fit.
       self.output_layer = tf.keras.layers.Dense(
-          self.n_classes,
+          self.n_outputs,
           kernel_regularizer=loss.kernel_regularizer(),
-          kernel_initializer=kernel_intiializer(),
+          kernel_initializer=kernel_initializer(),
       )
-      # if we don't do regularization here, we require the user to
-      # re-instantiate the model each time they want to change the penalty
-      # weighting
       self._layers_instantiated = True
-    self.output_layer.kernel_regularizer.l2 = loss.reg_lambda
     if not isinstance(optimizer, Bolton):
       optimizer = optimizers.get(optimizer)
       optimizer = Bolton(optimizer, loss)
@@ -133,69 +111,16 @@ class BoltonModel(Model):
                                      **kwargs
                                      )
 
-  # def _post_fit(self, x, n_samples):
-  #   """Implements 1-time weight changes needed for Bolton method.
-  #   In this case, specifically implements the noise addition
-  #   assuming a strongly convex function.
-  #
-  #   Args:
-  #       x: inputs
-  #       n_samples: number of samples in the inputs. In case the number
-  #       cannot be readily determined by inspecting x.
-  #
-  #   Returns:
-  #
-  #   """
-  #   data_size = None
-  #   if n_samples is not None:
-  #     data_size = n_samples
-  #   elif hasattr(x, 'shape'):
-  #     data_size = x.shape[0]
-  #   elif hasattr(x, "__len__"):
-  #     data_size = len(x)
-  #   elif data_size is None:
-  #     if n_samples is None:
-  #       raise ValueError("Unable to detect the number of training "
-  #                        "samples and n_smaples was None. "
-  #                        "either pass a dataset with a .shape or "
-  #                        "__len__ attribute or explicitly pass the "
-  #                        "number of samples as n_smaples.")
-  #   for layer in self.layers:
-  #     # layer.kernel = layer.kernel + self._get_noise(
-  #     #     data_size
-  #     # )
-  #     input_dim = layer.kernel.numpy().shape[0]
-  #     layer.kernel = layer.kernel + self.optimizer.get_noise(
-  #         self.loss,
-  #         data_size,
-  #         input_dim,
-  #         self.n_classes,
-  #         self.class_weight
-  #     )
-
   def fit(self,
           x=None,
           y=None,
           batch_size=None,
-          epochs=1,
-          verbose=1,
-          callbacks=None,
-          validation_split=0.0,
-          validation_data=None,
-          shuffle=True,
           class_weight=None,
-          sample_weight=None,
-          initial_epoch=0,
-          steps_per_epoch=None,
-          validation_steps=None,
-          validation_freq=1,
-          max_queue_size=10,
-          workers=1,
-          use_multiprocessing=False,
           n_samples=None,
           epsilon=2,
           noise_distribution='laplace',
-          **kwargs):
+          steps_per_epoch=None,
+          **kwargs):  # pylint: disable=arguments-differ
     """Reroutes to super fit with additional Bolton delta-epsilon privacy
     requirements implemented. Note, inputs must be normalized s.t. ||x|| < 1
     Requirements are as follows:
@@ -207,92 +132,101 @@ class BoltonModel(Model):
 
     Args:
         n_samples: the number of individual samples in x.
+        epsilon: privacy parameter, which trades off between utility an privacy.
+                  See the bolton paper for more description.
+        noise_distribution: the distribution to pull noise from.
+        class_weight: the class weights to be used. Can be a scalar or 1D tensor
+                      whose dim == n_classes.
 
-    Returns:
+        See the super method for descriptions on the rest of the arguments.
 
     """
-    self.__in_fit = True
-    # cb = [self.optimizer.callbacks]
-    # if callbacks is not None:
-    #   cb.extend(callbacks)
-    # callbacks = cb
     if class_weight is None:
       class_weight = self.calculate_class_weights(class_weight)
-    # self.class_weight = class_weight
+    if n_samples is not None:
+      data_size = n_samples
+    elif hasattr(x, 'shape'):
+      data_size = x.shape[0]
+    elif hasattr(x, "__len__"):
+      data_size = len(x)
+    else:
+      data_size = None
+    batch_size_ = self._validate_or_infer_batch_size(batch_size,
+                                                     steps_per_epoch,
+                                                     x
+                                                     )
+    # inferring batch_size to be passed to optimizer. batch_size must remain its
+    # initial value when passed to super().fit()
+    if batch_size_ is None:
+      raise ValueError('batch_size: {0} is an '
+                       'invalid value'.format(batch_size_))
     with self.optimizer(noise_distribution,
                         epsilon,
                         self.layers,
                         class_weight,
-                        n_samples,
-                        self.n_classes,
-                        ) as optim:
+                        data_size,
+                        self.n_outputs,
+                        batch_size_,
+                        ) as _:
       out = super(BoltonModel, self).fit(x=x,
                                          y=y,
                                          batch_size=batch_size,
-                                         epochs=epochs,
-                                         verbose=verbose,
-                                         callbacks=callbacks,
-                                         validation_split=validation_split,
-                                         validation_data=validation_data,
-                                         shuffle=shuffle,
                                          class_weight=class_weight,
-                                         sample_weight=sample_weight,
-                                         initial_epoch=initial_epoch,
                                          steps_per_epoch=steps_per_epoch,
-                                         validation_steps=validation_steps,
-                                         validation_freq=validation_freq,
-                                         max_queue_size=max_queue_size,
-                                         workers=workers,
-                                         use_multiprocessing=use_multiprocessing,
                                          **kwargs
                                          )
     return out
 
   def fit_generator(self,
                     generator,
-                    steps_per_epoch=None,
-                    epochs=1,
-                    verbose=1,
-                    callbacks=None,
-                    validation_data=None,
-                    validation_steps=None,
-                    validation_freq=1,
                     class_weight=None,
-                    max_queue_size=10,
-                    workers=1,
-                    use_multiprocessing=False,
-                    shuffle=True,
-                    initial_epoch=0,
-                    n_samples=None
-                    ):
+                    noise_distribution='laplace',
+                    epsilon=2,
+                    n_samples=None,
+                    steps_per_epoch=None,
+                    **kwargs
+                    ):  # pylint: disable=arguments-differ
     """
         This method is the same as fit except for when the passed dataset
         is a generator. See super method and fit for more details.
     Args:
         n_samples: number of individual samples in x
+        noise_distribution: the distribution to get noise from.
+        epsilon: privacy parameter, which trades off utility and privacy. See
+                  Bolton paper for more description.
+        class_weight: the class weights to be used. Can be a scalar or 1D tensor
+                      whose dim == n_classes.
 
+        See the super method for descriptions on the rest of the arguments.
     """
     if class_weight is None:
       class_weight = self.calculate_class_weights(class_weight)
-    self.class_weight = class_weight
-    out = super(BoltonModel, self).fit_generator(
-        generator,
-        steps_per_epoch=steps_per_epoch,
-        epochs=epochs,
-        verbose=verbose,
-        callbacks=callbacks,
-        validation_data=validation_data,
-        validation_steps=validation_steps,
-        validation_freq=validation_freq,
-        class_weight=class_weight,
-        max_queue_size=max_queue_size,
-        workers=workers,
-        use_multiprocessing=use_multiprocessing,
-        shuffle=shuffle,
-        initial_epoch=initial_epoch
-    )
-    if not self.__in_fit:
-      self._post_fit(generator, n_samples)
+    if n_samples is not None:
+      data_size = n_samples
+    elif hasattr(generator, 'shape'):
+      data_size = generator.shape[0]
+    elif hasattr(generator, "__len__"):
+      data_size = len(generator)
+    else:
+      data_size = None
+    batch_size = self._validate_or_infer_batch_size(None,
+                                                    steps_per_epoch,
+                                                    generator
+                                                    )
+    with self.optimizer(noise_distribution,
+                        epsilon,
+                        self.layers,
+                        class_weight,
+                        data_size,
+                        self.n_outputs,
+                        batch_size
+                        ) as _:
+      out = super(BoltonModel, self).fit_generator(
+          generator,
+          class_weight=class_weight,
+          steps_per_epoch=steps_per_epoch,
+          **kwargs
+      )
     return out
 
   def calculate_class_weights(self,
@@ -336,7 +270,7 @@ class BoltonModel(Model):
                          "class_weights=%s" % class_weights)
     elif class_weights is not None:
       if num_classes is None:
-        raise ValueError("You must pass a value for num_classes if"
+        raise ValueError("You must pass a value for num_classes if "
                          "creating an array of class_weights")
     # performing class weight calculation
     if class_weights is None:
@@ -357,195 +291,9 @@ class BoltonModel(Model):
                          "1D array".format(class_weights.shape))
       if class_weights.shape[0] != num_classes:
         raise ValueError(
-          "Detected array length: {0} instead of: {1}".format(
-            class_weights.shape[0],
-            num_classes
-          )
+            "Detected array length: {0} instead of: {1}".format(
+                class_weights.shape[0],
+                num_classes
+            )
         )
     return class_weights
-
-  # def _project_weights_to_r(self, r, force=False):
-  #   """helper method to normalize the weights to the R-ball.
-  #
-  #   Args:
-  #       r: radius of "R-Ball". Scalar to normalize to.
-  #       force: True to normalize regardless of previous weight values.
-  #               False to check if weights > R-ball and only normalize then.
-  #
-  #   Returns:
-  #
-  #   """
-  #   for layer in self.layers:
-  #     weight_norm = tf.norm(layer.kernel, axis=0)
-  #     if force:
-  #       layer.kernel = layer.kernel / (weight_norm / r)
-  #     elif tf.reduce_sum(tf.cast(weight_norm > r, dtype=self._dtype)) > 0:
-  #       layer.kernel = layer.kernel / (weight_norm / r)
-
-  # def _get_noise(self, distribution, data_size):
-  #   """Sample noise to be added to weights for privacy guarantee
-  #
-  #   Args:
-  #       distribution: the distribution type to pull noise from
-  #       data_size: the number of samples
-  #
-  #   Returns: noise in shape of layer's weights to be added to the weights.
-  #
-  #   """
-  #   distribution = distribution.lower()
-  #   input_dim = self.layers[0].kernel.numpy().shape[0]
-  #   loss = self.loss
-  #   if distribution == _accepted_distributions[0]:  # laplace
-  #     per_class_epsilon = self.epsilon / (self.n_classes)
-  #     l2_sensitivity = (2 *
-  #                       loss.lipchitz_constant(self.class_weight)) / \
-  #                      (loss.gamma() * data_size)
-  #     unit_vector = tf.random.normal(shape=(input_dim, self.n_classes),
-  #                                    mean=0,
-  #                                    seed=1,
-  #                                    stddev=1.0,
-  #                                    dtype=self._dtype)
-  #     unit_vector = unit_vector / tf.math.sqrt(
-  #         tf.reduce_sum(tf.math.square(unit_vector), axis=0)
-  #     )
-  #
-  #     beta = l2_sensitivity / per_class_epsilon
-  #     alpha = input_dim  # input_dim
-  #     gamma = tf.random.gamma([self.n_classes],
-  #                             alpha,
-  #                             beta=1 / beta,
-  #                             seed=1,
-  #                             dtype=self._dtype
-  #                             )
-  #     return unit_vector * gamma
-  #   raise NotImplementedError('Noise distribution: {0} is not '
-  #                             'a valid distribution'.format(distribution))
-
-
-if __name__ == '__main__':
-  import tensorflow as tf
-
-  import os
-  import time
-  import matplotlib.pyplot as plt
-
-  _URL = 'https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/facades.tar.gz'
-
-  path_to_zip = tf.keras.utils.get_file('facades.tar.gz',
-                                        origin=_URL,
-                                        extract=True)
-
-  PATH = os.path.join(os.path.dirname(path_to_zip), 'facades/')
-  BUFFER_SIZE = 400
-  BATCH_SIZE = 1
-  IMG_WIDTH = 256
-  IMG_HEIGHT = 256
-
-
-  def load(image_file):
-    image = tf.io.read_file(image_file)
-    image = tf.image.decode_jpeg(image)
-
-    w = tf.shape(image)[1]
-
-    w = w // 2
-    real_image = image[:, :w, :]
-    input_image = image[:, w:, :]
-
-    input_image = tf.cast(input_image, tf.float32)
-    real_image = tf.cast(real_image, tf.float32)
-
-    return input_image, real_image
-
-
-  inp, re = load(PATH + 'train/100.jpg')
-  # casting to int for matplotlib to show the image
-  plt.figure()
-  plt.imshow(inp / 255.0)
-  plt.figure()
-  plt.imshow(re / 255.0)
-
-
-  def resize(input_image, real_image, height, width):
-    input_image = tf.image.resize(input_image, [height, width],
-                                  method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
-    real_image = tf.image.resize(real_image, [height, width],
-                                 method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
-
-    return input_image, real_image
-
-
-  def random_crop(input_image, real_image):
-    stacked_image = tf.stack([input_image, real_image], axis=0)
-    cropped_image = tf.image.random_crop(
-      stacked_image, size=[2, IMG_HEIGHT, IMG_WIDTH, 3])
-
-    return cropped_image[0], cropped_image[1]
-
-
-  def normalize(input_image, real_image):
-    input_image = (input_image / 127.5) - 1
-    real_image = (real_image / 127.5) - 1
-
-    return input_image, real_image
-
-
-  @tf.function()
-  def random_jitter(input_image, real_image):
-    # resizing to 286 x 286 x 3
-    input_image, real_image = resize(input_image, real_image, 286, 286)
-
-    # randomly cropping to 256 x 256 x 3
-    input_image, real_image = random_crop(input_image, real_image)
-
-    if tf.random.uniform(()) > 0.5:
-      # random mirroring
-      input_image = tf.image.flip_left_right(input_image)
-      real_image = tf.image.flip_left_right(real_image)
-
-    return input_image, real_image
-
-
-  def load_image_train(image_file):
-    input_image, real_image = load(image_file)
-    input_image, real_image = random_jitter(input_image, real_image)
-    input_image, real_image = normalize(input_image, real_image)
-
-    return input_image, real_image
-
-
-  def load_image_test(image_file):
-    input_image, real_image = load(image_file)
-    input_image, real_image = resize(input_image, real_image,
-                                     IMG_HEIGHT, IMG_WIDTH)
-    input_image, real_image = normalize(input_image, real_image)
-
-    return input_image, real_image
-
-
-  train_dataset = tf.data.Dataset.list_files(PATH + 'train/*.jpg')
-  train_dataset = train_dataset.shuffle(BUFFER_SIZE)
-  train_dataset = train_dataset.map(load_image_train,
-                                    num_parallel_calls=tf.data.experimental.AUTOTUNE)
-  train_dataset = train_dataset.batch(1)
-  # steps_per_epoch = training_utils.infer_steps_for_dataset(
-  #     train_dataset, None, epochs=1, steps_name='steps')
-
-  # for batch in train_dataset:
-  #     print(batch[1].shape)
-  test_dataset = tf.data.Dataset.list_files(PATH + 'test/*.jpg')
-  # shuffling so that for every epoch a different image is generated
-  # to predict and display the progress of our model.
-  train_dataset = train_dataset.shuffle(BUFFER_SIZE)
-  test_dataset = test_dataset.map(load_image_test)
-  test_dataset = test_dataset.batch(1)
-
-  be = BoltonModel(3, 2)
-  from tensorflow.python.keras.optimizer_v2 import adam
-  from privacy.bolton import loss
-
-  test = adam.Adam()
-  l = loss.StrongConvexBinaryCrossentropy(1, 2, 1)
-  be.compile(test, l)
-  print("Eager exeuction: {0}".format(tf.executing_eagerly()))
-  be.fit(train_dataset, verbose=0, steps_per_epoch=1, n_samples=1)
diff --git a/privacy/bolton/model_test.py b/privacy/bolton/model_test.py
index 53c4c45..4316a1e 100644
--- a/privacy/bolton/model_test.py
+++ b/privacy/bolton/model_test.py
@@ -19,25 +19,22 @@ from __future__ import print_function
 
 
 import tensorflow as tf
-from tensorflow.python.platform import test
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
 from tensorflow.python.keras import losses
 from tensorflow.python.framework import ops as _ops
-from tensorflow.python.framework import test_util
-from privacy.bolton import model
-from privacy.bolton.loss import StrongConvexMixin
-from absl.testing import parameterized
-from absl.testing import absltest
 from tensorflow.python.keras.regularizers import L1L2
-
+from absl.testing import parameterized
+from privacy.bolton import model
+from privacy.bolton.optimizer import Bolton
+from privacy.bolton.loss import StrongConvexMixin
 
 class TestLoss(losses.Loss, StrongConvexMixin):
   """Test loss function for testing Bolton model"""
   def __init__(self, reg_lambda, C, radius_constant, name='test'):
     super(TestLoss, self).__init__(name=name)
     self.reg_lambda = reg_lambda
-    self.C = C
+    self.C = C  # pylint: disable=invalid-name
     self.radius_constant = radius_constant
 
   def radius(self):
@@ -78,13 +75,17 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
-  def call(self, val0, val1):
+  def call(self, y_true, y_pred):
     """Loss function that is minimized at the mean of the input points."""
-    return 0.5 * tf.reduce_sum(tf.math.squared_difference(val0, val1), axis=1)
+    return 0.5 * tf.reduce_sum(
+        tf.math.squared_difference(y_true, y_pred),
+        axis=1
+    )
 
   def max_class_weight(self, class_weight):
     if class_weight is None:
       return 1
+    raise ValueError('')
 
   def kernel_regularizer(self):
     return L1L2(l2=self.reg_lambda)
@@ -116,125 +117,91 @@ class InitTests(keras_parameterized.TestCase):
 
   @parameterized.named_parameters([
       {'testcase_name': 'normal',
-       'n_classes': 1,
-       'epsilon': 1,
-       'noise_distribution': 'laplace',
-       'seed': 1
+       'n_outputs': 1,
        },
-      {'testcase_name': 'extreme range',
-       'n_classes': 5,
-       'epsilon': 0.1,
-       'noise_distribution': 'laplace',
-       'seed': 10
-       },
-      {'testcase_name': 'extreme range2',
-       'n_classes': 50,
-       'epsilon': 10,
-       'noise_distribution': 'laplace',
-       'seed': 100
+      {'testcase_name': 'many outputs',
+       'n_outputs': 100,
        },
   ])
-  def test_init_params(
-      self, n_classes, epsilon, noise_distribution, seed):
+  def test_init_params(self, n_outputs):
+    """test initialization of BoltonModel
+
+    Args:
+        n_outputs: number of output neurons
+    """
     # test valid domains for each variable
-    clf = model.Bolton(n_classes,
-                       epsilon,
-                       noise_distribution,
-                       seed
-                       )
-    self.assertIsInstance(clf, model.Bolton)
+    clf = model.BoltonModel(n_outputs)
+    self.assertIsInstance(clf, model.BoltonModel)
 
   @parameterized.named_parameters([
-      {'testcase_name': 'invalid noise',
-       'n_classes': 1,
-       'epsilon': 1,
-       'noise_distribution': 'not_valid',
-       'weights_initializer': tf.initializers.GlorotUniform(),
-       },
-      {'testcase_name': 'invalid epsilon',
-       'n_classes': 1,
-       'epsilon': -1,
-       'noise_distribution': 'laplace',
-       'weights_initializer': tf.initializers.GlorotUniform(),
+      {'testcase_name': 'invalid n_outputs',
+       'n_outputs': -1,
        },
   ])
-  def test_bad_init_params(
-      self,
-      n_classes,
-      epsilon,
-      noise_distribution,
-      weights_initializer):
+  def test_bad_init_params(self, n_outputs):
+    """test bad initializations of BoltonModel that should raise errors
+
+    Args:
+        n_outputs: number of output neurons
+    """
     # test invalid domains for each variable, especially noise
-    seed = 1
     with self.assertRaises(ValueError):
-      clf = model.Bolton(n_classes,
-                         epsilon,
-                         noise_distribution,
-                         weights_initializer,
-                         seed
-                         )
+      model.BoltonModel(n_outputs)
 
   @parameterized.named_parameters([
       {'testcase_name': 'string compile',
-       'n_classes': 1,
+       'n_outputs': 1,
        'loss': TestLoss(1, 1, 1),
        'optimizer': 'adam',
-       'weights_initializer': tf.initializers.GlorotUniform(),
        },
       {'testcase_name': 'test compile',
-       'n_classes': 100,
+       'n_outputs': 100,
        'loss': TestLoss(1, 1, 1),
        'optimizer': TestOptimizer(),
-       'weights_initializer': tf.initializers.GlorotUniform(),
-       },
-      {'testcase_name': 'invalid weights initializer',
-       'n_classes': 1,
-       'loss': TestLoss(1, 1, 1),
-       'optimizer': TestOptimizer(),
-       'weights_initializer': 'not_valid',
        },
   ])
-  def test_compile(self, n_classes, loss, optimizer, weights_initializer):
+  def test_compile(self, n_outputs, loss, optimizer):
+    """test compilation of BoltonModel
+
+    Args:
+        n_outputs: number of output neurons
+        loss: instantiated TestLoss instance
+        optimizer: instanced TestOptimizer instance
+    """
     # test compilation of valid tf.optimizer and tf.loss
-    epsilon = 1
-    noise_distribution = 'laplace'
     with self.cached_session():
-      clf = model.Bolton(n_classes,
-                         epsilon,
-                         noise_distribution,
-                         weights_initializer
-                         )
+      clf = model.BoltonModel(n_outputs)
       clf.compile(optimizer, loss)
       self.assertEqual(clf.loss, loss)
 
   @parameterized.named_parameters([
       {'testcase_name': 'Not strong loss',
-       'n_classes': 1,
+       'n_outputs': 1,
        'loss': losses.BinaryCrossentropy(),
        'optimizer': 'adam',
        },
       {'testcase_name': 'Not valid optimizer',
-       'n_classes': 1,
+       'n_outputs': 1,
        'loss': TestLoss(1, 1, 1),
        'optimizer': 'ada',
        }
   ])
-  def test_bad_compile(self, n_classes, loss, optimizer):
+  def test_bad_compile(self, n_outputs, loss, optimizer):
+    """test bad compilations of BoltonModel that should raise errors
+
+      Args:
+          n_outputs: number of output neurons
+          loss: instantiated TestLoss instance
+          optimizer: instanced TestOptimizer instance
+      """
     # test compilaton of invalid tf.optimizer and non instantiated loss.
-    epsilon = 1
-    noise_distribution = 'laplace'
-    weights_initializer = tf.initializers.GlorotUniform()
     with self.cached_session():
       with self.assertRaises((ValueError, AttributeError)):
-        clf = model.Bolton(n_classes,
-                           epsilon,
-                           noise_distribution,
-                           weights_initializer
-                           )
+        clf = model.BoltonModel(n_outputs)
         clf.compile(optimizer, loss)
 
 
-def _cat_dataset(n_samples, input_dim, n_classes, t='train', generator=False):
+def _cat_dataset(n_samples, input_dim, n_classes, generator=False):
   """
       Creates a categorically encoded dataset (y is categorical).
       returns the specified dataset either as a static array or as a generator.
@@ -245,10 +212,9 @@ def _cat_dataset(n_samples, input_dim, n_classes, t='train', generator=False):
         n_samples: number of rows
         input_dim: input dimensionality
         n_classes: output dimensionality
-        t: one of 'train', 'val', 'test'
         generator: False for array, True for generator
     Returns:
-      X as (n_samples, input_dim), Y as (n_samples, n_classes)
+      X as (n_samples, input_dim), Y as (n_samples, n_outputs)
     """
   x_stack = []
   y_stack = []
@@ -269,25 +235,39 @@ def _cat_dataset(n_samples, input_dim, n_classes, t='train', generator=False):
 
 def _do_fit(n_samples,
             input_dim,
-            n_classes,
+            n_outputs,
             epsilon,
             generator,
             batch_size,
             reset_n_samples,
             optimizer,
             loss,
-            callbacks,
             distribution='laplace'):
-  clf = model.Bolton(n_classes,
-                     epsilon,
-                     distribution
-                     )
+  """Helper to instantiate necessary components for fitting and perform a model
+  fit.
+
+  Args:
+      n_samples: number of samples in dataset
+      input_dim: the sample dimensionality
+      n_outputs: number of output neurons
+      epsilon: privacy parameter
+      generator: True to create a generator, False to use an iterator
+      batch_size: batch_size to use
+      reset_n_samples: True to set _samples to None prior to fitting.
+                        False does nothing
+      optimizer: instance of TestOptimizer
+      loss: instance of TestLoss
+      distribution: distribution to get noise from.
+
+  Returns: BoltonModel instsance
+  """
+  clf = model.BoltonModel(n_outputs)
   clf.compile(optimizer, loss)
   if generator:
     x = _cat_dataset(
         n_samples,
         input_dim,
-        n_classes,
+        n_outputs,
         generator=generator
     )
     y = None
@@ -295,25 +275,20 @@ def _do_fit(n_samples,
     x = x.shuffle(n_samples//2)
     batch_size = None
   else:
-    x, y = _cat_dataset(n_samples, input_dim, n_classes, generator=generator)
+    x, y = _cat_dataset(n_samples, input_dim, n_outputs, generator=generator)
   if reset_n_samples:
     n_samples = None
 
-  if callbacks is not None:
-    callbacks = [callbacks]
   clf.fit(x,
           y,
           batch_size=batch_size,
           n_samples=n_samples,
-          callbacks=callbacks
+          noise_distribution=distribution,
+          epsilon=epsilon
           )
   return clf
 
 
-class TestCallback(tf.keras.callbacks.Callback):
-  pass
-
-
 class FitTests(keras_parameterized.TestCase):
   """Test cases for keras model fitting"""
 
@@ -322,27 +297,29 @@ class FitTests(keras_parameterized.TestCase):
       {'testcase_name': 'iterator fit',
        'generator': False,
        'reset_n_samples': True,
-       'callbacks': None
        },
       {'testcase_name': 'iterator fit no samples',
        'generator': False,
        'reset_n_samples': True,
-       'callbacks': None
        },
       {'testcase_name': 'generator fit',
        'generator': True,
        'reset_n_samples': False,
-       'callbacks': None
        },
       {'testcase_name': 'with callbacks',
        'generator': True,
        'reset_n_samples': False,
-       'callbacks': TestCallback()
        },
   ])
-  def test_fit(self, generator, reset_n_samples, callbacks):
+  def test_fit(self, generator, reset_n_samples):
+    """Tests fitting of BoltonModel
+
+    Args:
+        generator: True for generator test, False for iterator test.
+        reset_n_samples: True to reset the n_samples to None, False does nothing
+    """
     loss = TestLoss(1, 1, 1)
-    optimizer = TestOptimizer()
+    optimizer = Bolton(TestOptimizer(), loss)
     n_classes = 2
     input_dim = 5
     epsilon = 1
@@ -358,28 +335,27 @@ class FitTests(keras_parameterized.TestCase):
         reset_n_samples,
         optimizer,
         loss,
-        callbacks
     )
     self.assertEqual(hasattr(clf, 'layers'), True)
 
   @parameterized.named_parameters([
       {'testcase_name': 'generator fit',
        'generator': True,
-       'reset_n_samples': False,
-       'callbacks': None
        },
   ])
-  def test_fit_gen(self, generator, reset_n_samples, callbacks):
+  def test_fit_gen(self, generator):
+    """Tests the fit_generator method of BoltonModel
+
+    Args:
+      generator: True to test with a generator dataset
+    """
     loss = TestLoss(1, 1, 1)
     optimizer = TestOptimizer()
     n_classes = 2
     input_dim = 5
-    epsilon = 1
     batch_size = 1
     n_samples = 10
-    clf = model.Bolton(n_classes,
-                       epsilon
-                       )
+    clf = model.BoltonModel(n_classes)
     clf.compile(optimizer, loss)
     x = _cat_dataset(
         n_samples,
@@ -405,6 +381,14 @@ class FitTests(keras_parameterized.TestCase):
        },
   ])
   def test_bad_fit(self, generator, reset_n_samples, distribution):
+    """Tests fitting with invalid parameters, which should raise an error
+
+    Args:
+        generator: True to test with generator, False is iterator
+        reset_n_samples: True to reset the n_samples param to None prior to
+                          passing it to fit
+        distribution: distribution to get noise from.
+    """
     with self.assertRaises(ValueError):
       loss = TestLoss(1, 1, 1)
       optimizer = TestOptimizer()
@@ -423,7 +407,6 @@ class FitTests(keras_parameterized.TestCase):
           reset_n_samples,
           optimizer,
           loss,
-          None,
           distribution
       )
 
@@ -450,7 +433,15 @@ class FitTests(keras_parameterized.TestCase):
                            num_classes,
                            result
                            ):
-    clf = model.Bolton(1, 1)
+    """Tests the BOltonModel calculate_class_weights method
+
+    Args:
+      class_weights: the class_weights to use
+      class_counts: count of number of samples for each class
+      num_classes: number of outputs neurons
+      result: expected result
+    """
+    clf = model.BoltonModel(1, 1)
     expected = clf.calculate_class_weights(class_weights,
                                            class_counts,
                                            num_classes
@@ -508,12 +499,21 @@ class FitTests(keras_parameterized.TestCase):
                         num_classes,
                         err_msg
                         ):
-    clf = model.Bolton(1, 1)
-    with self.assertRaisesRegexp(ValueError, err_msg):
-      expected = clf.calculate_class_weights(class_weights,
-                                             class_counts,
-                                             num_classes
-                                             )
+    """Tests the BOltonModel calculate_class_weights method with invalid params
+        which should raise the expected errors.
+
+      Args:
+        class_weights: the class_weights to use
+        class_counts: count of number of samples for each class
+        num_classes: number of outputs neurons
+        result: expected result
+      """
+    clf = model.BoltonModel(1, 1)
+    with self.assertRaisesRegexp(ValueError, err_msg):  # pylint: disable=deprecated-method
+      clf.calculate_class_weights(class_weights,
+                                  class_counts,
+                                  num_classes
+                                  )
 
 
 if __name__ == '__main__':
diff --git a/privacy/bolton/optimizer.py b/privacy/bolton/optimizer.py
index 1ec25b9..cfd0b98 100644
--- a/privacy/bolton/optimizer.py
+++ b/privacy/bolton/optimizer.py
@@ -19,9 +19,74 @@ from __future__ import print_function
 
 import tensorflow as tf
 from tensorflow.python.keras.optimizer_v2 import optimizer_v2
+from tensorflow.python.ops import math_ops
+from tensorflow.python import ops as _ops
 from privacy.bolton.loss import StrongConvexMixin
 
-_accepted_distributions = ['laplace']
+_accepted_distributions = ['laplace']  # implemented distributions for noising
+
+
+class GammaBetaDecreasingStep(
+    optimizer_v2.learning_rate_schedule.LearningRateSchedule
+):
+  """
+      Learning Rate Scheduler using the minimum of 1/beta and 1/(gamma * step)
+      at each step. A required step for privacy guarantees.
+  """
+  def __init__(self):
+    self.is_init = False
+    self.beta = None
+    self.gamma = None
+
+  def __call__(self, step):
+    """
+      returns the learning rate
+    Args:
+      step: the current iteration number
+    Returns:
+      decayed learning rate to minimum of 1/beta and 1/(gamma * step) as per
+      the Bolton privacy requirements.
+    """
+    if not self.is_init:
+      raise AttributeError('Please initialize the {0} Learning Rate Scheduler.'
+                           'This is performed automatically by using the '
+                           '{1} as a context manager, '
+                           'as desired'.format(self.__class__.__name__,
+                                               Bolton.__class__.__name__
+                                               )
+                           )
+    dtype = self.beta.dtype
+    one = tf.constant(1, dtype)
+    return tf.math.minimum(tf.math.reduce_min(one/self.beta),
+                           one/(self.gamma*math_ops.cast(step, dtype))
+                           )
+
+  def get_config(self):
+    """
+      config to setup the learning rate scheduler.
+    """
+    return {'beta': self.beta, 'gamma': self.gamma}
+
+  def initialize(self, beta, gamma):
+    """setup the learning rate scheduler with the beta and gamma values provided
+    by the loss function. Meant to be used with .fit as the loss params may
+    depend on values passed to fit.
+
+    Args:
+      beta: Smoothness value. See StrongConvexMixin
+      gamma: Strong Convexity parameter. See StrongConvexMixin.
+    """
+    self.is_init = True
+    self.beta = beta
+    self.gamma = gamma
+
+  def de_initialize(self):
+    """De initialize the scheduler after fitting, in case another fit call has
+    different loss parameters.
+    """
+    self.is_init = False
+    self.beta = None
+    self.gamma = None
 
 
 class Bolton(optimizer_v2.OptimizerV2):
@@ -31,11 +96,24 @@ class Bolton(optimizer_v2.OptimizerV2):
     passed, "Bolton" enables the bolton model to control the learning rate
     based on the strongly convex loss.
 
+    To use the Bolton method, you must:
+    1. instantiate it with an instantiated tf optimizer and StrongConvexLoss.
+    2. use it as a context manager around your .fit method internals.
+
+    This can be accomplished by the following:
+    optimizer = tf.optimizers.SGD()
+    loss = privacy.bolton.losses.StrongConvexBinaryCrossentropy()
+    bolton = Bolton(optimizer, loss)
+    with bolton(*args) as _:
+      model.fit()
+    The args required for the context manager can be found in the __call__
+    method.
+
     For more details on the strong convexity requirements, see:
     Bolt-on Differential Privacy for Scalable Stochastic Gradient
     Descent-based Analytics by Xi Wu et. al.
   """
-  def __init__(self,
+  def __init__(self,  # pylint: disable=super-init-not-called
                optimizer: optimizer_v2.OptimizerV2,
                loss: StrongConvexMixin,
                dtype=tf.float32,
@@ -45,10 +123,11 @@ class Bolton(optimizer_v2.OptimizerV2):
     Args:
         optimizer: Optimizer_v2 or subclass to be used as the optimizer
                     (wrapped).
+        loss: StrongConvexLoss function that the model is being compiled with.
     """
 
     if not isinstance(loss, StrongConvexMixin):
-      raise ValueError("loss function must be a Strongly Convex and therfore"
+      raise ValueError("loss function must be a Strongly Convex and therefore "
                        "extend the StrongConvexMixin.")
     self._private_attributes = ['_internal_optimizer',
                                 'dtype',
@@ -58,13 +137,19 @@ class Bolton(optimizer_v2.OptimizerV2):
                                 'class_weights',
                                 'input_dim',
                                 'n_samples',
-                                'n_classes',
+                                'n_outputs',
                                 'layers',
-                                '_model'
+                                'batch_size',
+                                '_is_init'
                                 ]
     self._internal_optimizer = optimizer
+    self.learning_rate = GammaBetaDecreasingStep()  # use the Bolton Learning
+    # rate scheduler, as required for privacy guarantees. This will still need
+    # to get values from the loss function near the time that .fit is called
+    # on the model (when this optimizer will be called as a context manager)
     self.dtype = dtype
     self.loss = loss
+    self._is_init = False
 
   def get_config(self):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
@@ -75,49 +160,44 @@ class Bolton(optimizer_v2.OptimizerV2):
     """helper method to normalize the weights to the R-ball.
 
     Args:
-        r: radius of "R-Ball". Scalar to normalize to.
         force: True to normalize regardless of previous weight values.
                 False to check if weights > R-ball and only normalize then.
 
     Returns:
 
     """
-    r = self.loss.radius()
+    radius = self.loss.radius()
     for layer in self.layers:
-      if tf.executing_eagerly():
-        weight_norm = tf.norm(layer.kernel, axis=0)
-        if force:
-          layer.kernel = layer.kernel / (weight_norm / r)
-        elif tf.reduce_sum(tf.cast(weight_norm > r, dtype=self.dtype)) > 0:
-          layer.kernel = layer.kernel / (weight_norm / r)
+      weight_norm = tf.norm(layer.kernel, axis=0)
+      if force:
+        layer.kernel = layer.kernel / (weight_norm / radius)
       else:
-        weight_norm = tf.norm(layer.kernel, axis=0)
-        if force:
-          layer.kernel = layer.kernel / (weight_norm / r)
-        else:
-          layer.kernel = tf.cond(
-              tf.reduce_sum(tf.cast(weight_norm > r, dtype=self.dtype)) > 0,
-              lambda: layer.kernel / (weight_norm / r),
-              lambda: layer.kernel
-          )
+        layer.kernel = tf.cond(
+            tf.reduce_sum(tf.cast(weight_norm > radius, dtype=self.dtype)) > 0,
+            lambda k=layer.kernel, w=weight_norm, r=radius: k / (w / r),  # pylint: disable=cell-var-from-loop
+            lambda k=layer.kernel: k  # pylint: disable=cell-var-from-loop
+        )
 
-  def get_noise(self, data_size, input_dim, output_dim, class_weight):
+  def get_noise(self, input_dim, output_dim):
     """Sample noise to be added to weights for privacy guarantee
 
     Args:
-        distribution: the distribution type to pull noise from
-        data_size: the number of samples
+      input_dim: the input dimensionality for the weights
+      output_dim the output dimensionality for the weights
 
     Returns: noise in shape of layer's weights to be added to the weights.
 
     """
+    if not self._is_init:
+      raise Exception('This method must be called from within the optimizer\'s '
+                      'context.')
     loss = self.loss
     distribution = self.noise_distribution.lower()
     if distribution == _accepted_distributions[0]:  # laplace
       per_class_epsilon = self.epsilon / (output_dim)
       l2_sensitivity = (2 *
-                        loss.lipchitz_constant(class_weight)) / \
-                       (loss.gamma() * data_size)
+                        loss.lipchitz_constant(self.class_weights)) / \
+                       (loss.gamma() * self.n_samples * self.batch_size)
       unit_vector = tf.random.normal(shape=(input_dim, output_dim),
                                      mean=0,
                                      seed=1,
@@ -139,28 +219,7 @@ class Bolton(optimizer_v2.OptimizerV2):
     raise NotImplementedError('Noise distribution: {0} is not '
                               'a valid distribution'.format(distribution))
 
-  def limit_learning_rate(self, beta, gamma):
-    """Implements learning rate limitation that is required by the bolton
-    method for sensitivity bounding of the strongly convex function.
-    Sets the learning rate to the min(1/beta, 1/(gamma*t))
-
-    Args:
-        is_eager: Whether the model is running in eager mode
-        beta: loss function beta-smoothness
-        gamma: loss function gamma-strongly convex
-
-    Returns: None
-
-    """
-    numerator = tf.constant(1, dtype=self.dtype)
-    t = tf.cast(self._iterations, self.dtype)
-    # will exist on the internal optimizer
-    if numerator / beta < numerator / (gamma * t):
-      self.learning_rate = numerator / beta
-    else:
-      self.learning_rate = numerator / (gamma * t)
-
-  def from_config(self, *args, **kwargs):
+  def from_config(self, *args, **kwargs):  # pylint: disable=arguments-differ
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
     return self._internal_optimizer.from_config(*args, **kwargs)
@@ -176,21 +235,19 @@ class Bolton(optimizer_v2.OptimizerV2):
             from _internal_optimizer.
 
     """
-    if name == '_private_attributes':
-      return getattr(self, name)
-    elif name in self._private_attributes:
+    if name == '_private_attributes' or name in self._private_attributes:
       return getattr(self, name)
     optim = object.__getattribute__(self, '_internal_optimizer')
     try:
       return object.__getattribute__(optim, name)
     except AttributeError:
-      raise AttributeError("Neither '{0}' nor '{1}' object has attribute '{2}'"
-                           "".format(
-          self.__class__.__name__,
-          self._internal_optimizer.__class__.__name__,
-          name
-                                     )
-                           )
+      raise AttributeError(
+          "Neither '{0}' nor '{1}' object has attribute '{2}'"
+          "".format(self.__class__.__name__,
+                    self._internal_optimizer.__class__.__name__,
+                    name
+                    )
+          )
 
   def __setattr__(self, key, value):
     """ Set attribute to self instance if its the internal optimizer.
@@ -205,113 +262,110 @@ class Bolton(optimizer_v2.OptimizerV2):
     """
     if key == '_private_attributes':
       object.__setattr__(self, key, value)
-    elif key in key in self._private_attributes:
+    elif key in self._private_attributes:
       object.__setattr__(self, key, value)
     else:
       setattr(self._internal_optimizer, key, value)
 
-  def _resource_apply_dense(self, *args, **kwargs):
+  def _resource_apply_dense(self, *args, **kwargs):  # pylint: disable=arguments-differ
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    return self._internal_optimizer._resource_apply_dense(*args, **kwargs)
+    return self._internal_optimizer._resource_apply_dense(*args, **kwargs)  # pylint: disable=protected-access
 
-  def _resource_apply_sparse(self, *args, **kwargs):
+  def _resource_apply_sparse(self, *args, **kwargs):  # pylint: disable=arguments-differ
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    return self._internal_optimizer._resource_apply_sparse(*args, **kwargs)
+    return self._internal_optimizer._resource_apply_sparse(*args, **kwargs)  # pylint: disable=protected-access
 
   def get_updates(self, loss, params):
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    # self.layers = params
     out = self._internal_optimizer.get_updates(loss, params)
-    self.limit_learning_rate(self.loss.beta(self.class_weights),
-                             self.loss.gamma()
-                             )
     self.project_weights_to_r()
     return out
 
-  def apply_gradients(self, *args, **kwargs):
+  def apply_gradients(self, *args, **kwargs):  # pylint: disable=arguments-differ
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    # grads_and_vars = kwargs.get('grads_and_vars', None)
-    # grads_and_vars = optimizer_v2._filter_grads(grads_and_vars)
-    # var_list = [v for (_, v) in grads_and_vars]
-    # self.layers = var_list
     out = self._internal_optimizer.apply_gradients(*args, **kwargs)
-    self.limit_learning_rate(self.loss.beta(self.class_weights),
-                             self.loss.gamma()
-                             )
     self.project_weights_to_r()
     return out
 
-  def minimize(self, *args, **kwargs):
+  def minimize(self, *args, **kwargs):  # pylint: disable=arguments-differ
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    # self.layers = kwargs.get('var_list', None)
     out = self._internal_optimizer.minimize(*args, **kwargs)
-    self.limit_learning_rate(self.loss.beta(self.class_weights),
-                             self.loss.gamma()
-                             )
     self.project_weights_to_r()
     return out
 
-  def _compute_gradients(self, *args, **kwargs):
+  def _compute_gradients(self, *args, **kwargs):  # pylint: disable=arguments-differ,protected-access
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    # self.layers = kwargs.get('var_list', None)
-    return self._internal_optimizer._compute_gradients(*args, **kwargs)
+    return self._internal_optimizer._compute_gradients(*args, **kwargs)  # pylint: disable=protected-access
 
-  def get_gradients(self, *args, **kwargs):
+  def get_gradients(self, *args, **kwargs):  # pylint: disable=arguments-differ
     """Reroutes to _internal_optimizer. See super/_internal_optimizer.
     """
-    # self.layers = kwargs.get('params', None)
     return self._internal_optimizer.get_gradients(*args, **kwargs)
 
   def __enter__(self):
-    noise_distribution = self.noise_distribution
-    epsilon = self.epsilon
-    class_weights = self.class_weights
-    n_samples = self.n_samples
-    if noise_distribution not in _accepted_distributions:
-      raise ValueError('Detected noise distribution: {0} not one of: {1} valid'
-                       'distributions'.format(noise_distribution,
-                                              _accepted_distributions))
-    self.noise_distribution = noise_distribution
-    self.epsilon = epsilon
-    self.class_weights = class_weights
-    self.n_samples = n_samples
+    """Context manager call at the beginning of with statement.
+
+    Returns:
+      self, to be used in context manager
+    """
+    self._is_init = True
     return self
 
   def __call__(self,
-               noise_distribution,
-               epsilon,
-               layers,
+               noise_distribution: str,
+               epsilon: float,
+               layers: list,
                class_weights,
                n_samples,
-               n_classes,
+               n_outputs,
+               batch_size
                ):
-    """
+    """Entry point from context. Accepts required values for bolton method and
+    stores them on the optimizer for use throughout fitting.
 
     Args:
       noise_distribution: the noise distribution to pick.
                           see _accepted_distributions and get_noise for
                           possible values.
       epsilon: privacy parameter. Lower gives more privacy but less utility.
-      class_weights: class_weights used
+      layers: list of Keras/Tensorflow layers. Can be found as model.layers
+      class_weights: class_weights used, which may either be a scalar or 1D
+                      tensor with dim == n_classes.
       n_samples number of rows/individual samples in the training set
-      n_classes: number of output classes
-      layers: list of Keras/Tensorflow layers.
+      n_outputs: number of output classes
+      batch_size: batch size used.
     """
     if epsilon <= 0:
       raise ValueError('Detected epsilon: {0}. '
                        'Valid range is 0 < epsilon <inf'.format(epsilon))
+    if noise_distribution not in _accepted_distributions:
+      raise ValueError('Detected noise distribution: {0} not one of: {1} valid'
+                       'distributions'.format(noise_distribution,
+                                              _accepted_distributions))
     self.noise_distribution = noise_distribution
-    self.epsilon = epsilon
-    self.class_weights = class_weights
-    self.n_samples = n_samples
-    self.n_classes = n_classes
+    self.learning_rate.initialize(self.loss.beta(class_weights),
+                                  self.loss.gamma()
+                                  )
+    self.epsilon = _ops.convert_to_tensor_v2(epsilon, dtype=self.dtype)
+    self.class_weights = _ops.convert_to_tensor_v2(class_weights,
+                                                   dtype=self.dtype
+                                                   )
+    self.n_samples = _ops.convert_to_tensor_v2(n_samples,
+                                               dtype=self.dtype
+                                               )
+    self.n_outputs = _ops.convert_to_tensor_v2(n_outputs,
+                                               dtype=self.dtype
+                                               )
     self.layers = layers
+    self.batch_size = _ops.convert_to_tensor_v2(batch_size,
+                                                dtype=self.dtype
+                                                )
     return self
 
   def __exit__(self, *args):
@@ -328,30 +382,21 @@ class Bolton(optimizer_v2.OptimizerV2):
 
 
     """
-    # for param in self.layers:
-    #   if param.name.find('kernel') != -1 or param.name.find('weight') != -1:
-    #     input_dim = param.numpy().shape[0]
-    #     print(param)
-    #     noise = -1 * self.get_noise(self.n_samples,
-    #                                 input_dim,
-    #                                 self.n_classes,
-    #                                 self.class_weights
-    #                                 )
-    #     print(tf.math.subtract(param, noise))
-    #     param.assign(tf.math.subtract(param, noise))
     self.project_weights_to_r(True)
     for layer in self.layers:
-      input_dim, output_dim = layer.kernel.shape
-      noise = self.get_noise(self.n_samples,
-                             input_dim,
+      input_dim = layer.kernel.shape[0]
+      output_dim = layer.units
+      noise = self.get_noise(input_dim,
                              output_dim,
-                             self.class_weights
                              )
       layer.kernel = tf.math.add(layer.kernel, noise)
     self.noise_distribution = None
+    self.learning_rate.de_initialize()
     self.epsilon = -1
+    self.batch_size = -1
     self.class_weights = None
     self.n_samples = None
     self.input_dim = None
-    self.n_classes = None
+    self.n_outputs = None
     self.layers = None
+    self._is_init = False
diff --git a/privacy/bolton/optimizer_test.py b/privacy/bolton/optimizer_test.py
index 5481bb4..2060031 100644
--- a/privacy/bolton/optimizer_test.py
+++ b/privacy/bolton/optimizer_test.py
@@ -22,16 +22,18 @@ from tensorflow.python.platform import test
 from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras.regularizers import L1L2
+from tensorflow.python.keras.initializers import constant
 from tensorflow.python.keras import losses
 from tensorflow.python.keras.models import Model
 from tensorflow.python.framework import ops as _ops
 from tensorflow.python.framework import test_util
-
+from tensorflow.python import ops as _ops
 from absl.testing import parameterized
 from privacy.bolton.loss import StrongConvexMixin
 from privacy.bolton import optimizer as opt
 
 
+
 class TestModel(Model):
   """
   Bolton episilon-delta model
@@ -46,10 +48,10 @@ class TestModel(Model):
   Descent-based Analytics by Xi Wu et. al.
   """
 
-  def __init__(self, n_classes=2):
+  def __init__(self, n_outputs=2, input_shape=(16,), init_value=2):
     """
     Args:
-        n_classes: number of output classes to predict.
+        n_outputs: number of output neurons
         epsilon: level of privacy guarantee
         noise_distribution: distribution to pull weight perturbations from
         weights_initializer: initializer for weights
@@ -57,13 +59,13 @@ class TestModel(Model):
         dtype: data type to use for tensors
     """
     super(TestModel, self).__init__(name='bolton', dynamic=False)
-    self.n_classes = n_classes
-    self.layer_input_shape = (16, 1)
+    self.n_outputs = n_outputs
+    self.layer_input_shape = input_shape
     self.output_layer = tf.keras.layers.Dense(
-      self.n_classes,
-      input_shape=self.layer_input_shape,
-      kernel_regularizer=L1L2(l2=1),
-      kernel_initializer='glorot_uniform',
+        self.n_outputs,
+        input_shape=self.layer_input_shape,
+        kernel_regularizer=L1L2(l2=1),
+        kernel_initializer=constant(init_value),
     )
 
 
@@ -84,7 +86,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
   def __init__(self, reg_lambda, C, radius_constant, name='test'):
     super(TestLoss, self).__init__(name=name)
     self.reg_lambda = reg_lambda
-    self.C = C
+    self.C = C  # pylint: disable=invalid-name
     self.radius_constant = radius_constant
 
   def radius(self):
@@ -93,7 +95,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     Returns: radius
 
     """
-    return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
+    return _ops.convert_to_tensor_v2(self.radius_constant, dtype=tf.float32)
 
   def gamma(self):
     """ Gamma strongly convex
@@ -125,13 +127,17 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
-  def call(self, val0, val1):
+  def call(self, y_true, y_pred):
     """Loss function that is minimized at the mean of the input points."""
-    return 0.5 * tf.reduce_sum(tf.math.squared_difference(val0, val1), axis=1)
+    return 0.5 * tf.reduce_sum(
+        tf.math.squared_difference(y_true, y_pred),
+        axis=1
+    )
 
-  def max_class_weight(self, class_weight):
+  def max_class_weight(self, class_weight, dtype=tf.float32):
     if class_weight is None:
       return 1
+    raise NotImplementedError('')
 
   def kernel_regularizer(self):
     return L1L2(l2=self.reg_lambda)
@@ -182,18 +188,6 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
   """Bolton Optimizer tests"""
   @test_util.run_all_in_graph_and_eager_modes
   @parameterized.named_parameters([
-      {'testcase_name': 'branch beta',
-       'fn': 'limit_learning_rate',
-       'args': [tf.Variable(2, dtype=tf.float32),
-                tf.Variable(1, dtype=tf.float32)],
-       'result': tf.Variable(0.5, dtype=tf.float32),
-       'test_attr': 'learning_rate'},
-      {'testcase_name': 'branch gamma',
-       'fn': 'limit_learning_rate',
-       'args': [tf.Variable(1, dtype=tf.float32),
-                tf.Variable(1, dtype=tf.float32)],
-       'result': tf.Variable(1, dtype=tf.float32),
-       'test_attr': 'learning_rate'},
       {'testcase_name': 'getattr',
        'fn': '__getattr__',
        'args': ['dtype'],
@@ -202,8 +196,8 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
       {'testcase_name': 'project_weights_to_r',
        'fn': 'project_weights_to_r',
        'args': ['dtype'],
-       'result': tf.float32,
-       'test_attr': None},
+       'result': None,
+       'test_attr': ''},
   ])
   def test_fn(self, fn, args, result, test_attr):
     """test that a fn of Bolton optimizer is working as expected.
@@ -218,15 +212,176 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     """
     tf.random.set_seed(1)
     loss = TestLoss(1, 1, 1)
-    private = opt.Bolton(TestOptimizer(), loss)
-    res = getattr(private, fn, None)(*args)
+    bolton = opt.Bolton(TestOptimizer(), loss)
+    model = TestModel(1)
+    model.layers[0].kernel = \
+      model.layers[0].kernel_initializer((model.layer_input_shape[0],
+                                          model.n_outputs))
+    bolton._is_init = True
+    bolton.layers = model.layers
+    bolton.epsilon = 2
+    bolton.noise_distribution = 'laplace'
+    bolton.n_outputs = 1
+    bolton.n_samples = 1
+    res = getattr(bolton, fn, None)(*args)
     if test_attr is not None:
-      res = getattr(private, test_attr, None)
+      res = getattr(bolton, test_attr, None)
     if hasattr(res, 'numpy') and hasattr(result, 'numpy'):  # both tensors/not
       res = res.numpy()
       result = result.numpy()
     self.assertEqual(res, result)
 
+  @test_util.run_all_in_graph_and_eager_modes
+  @parameterized.named_parameters([
+      {'testcase_name': '1 value project to r=1',
+       'r': 1,
+       'init_value': 2,
+       'shape': (1,),
+       'n_out': 1,
+       'result': [[1]]},
+      {'testcase_name': '2 value project to r=1',
+       'r': 1,
+       'init_value': 2,
+       'shape': (2,),
+       'n_out': 1,
+       'result': [[0.707107], [0.707107]]},
+      {'testcase_name': '1 value project to r=2',
+       'r': 2,
+       'init_value': 3,
+       'shape': (1,),
+       'n_out': 1,
+       'result': [[2]]},
+      {'testcase_name': 'no project',
+       'r': 2,
+       'init_value': 1,
+       'shape': (1,),
+       'n_out': 1,
+       'result': [[1]]},
+  ])
+  def test_project(self, r, shape, n_out, init_value, result):
+    """test that a fn of Bolton optimizer is working as expected.
+
+    Args:
+      fn: method of Optimizer to test
+      args: args to optimizer fn
+      result: the expected result
+      test_attr: None if the fn returns the test result. Otherwise, this is
+                the attribute of Bolton to check against result with.
+
+    """
+    tf.random.set_seed(1)
+    @tf.function
+    def project_fn(r):
+      loss = TestLoss(1, 1, r)
+      bolton = opt.Bolton(TestOptimizer(), loss)
+      model = TestModel(n_out, shape, init_value)
+      model.compile(bolton, loss)
+      model.layers[0].kernel = \
+        model.layers[0].kernel_initializer((model.layer_input_shape[0],
+                                            model.n_outputs))
+      bolton._is_init = True
+      bolton.layers = model.layers
+      bolton.epsilon = 2
+      bolton.noise_distribution = 'laplace'
+      bolton.n_outputs = 1
+      bolton.n_samples = 1
+      bolton.project_weights_to_r()
+      return _ops.convert_to_tensor_v2(bolton.layers[0].kernel, tf.float32)
+    res = project_fn(r)
+    self.assertAllClose(res, result)
+
+  @test_util.run_all_in_graph_and_eager_modes
+  @parameterized.named_parameters([
+      {'testcase_name': 'normal values',
+       'epsilon': 2,
+       'noise': 'laplace',
+       'class_weights': 1},
+  ])
+  def test_context_manager(self, noise, epsilon, class_weights):
+    """Tests the context manager functionality of the optimizer
+
+    Args:
+        noise: noise distribution to pick
+        epsilon: epsilon privacy parameter to use
+        class_weights: class_weights to use
+    """
+    @tf.function
+    def test_run():
+      loss = TestLoss(1, 1, 1)
+      bolton = opt.Bolton(TestOptimizer(), loss)
+      model = TestModel(1, (1,), 1)
+      model.compile(bolton, loss)
+      model.layers[0].kernel = \
+        model.layers[0].kernel_initializer((model.layer_input_shape[0],
+                                            model.n_outputs))
+      with bolton(noise, epsilon, model.layers, class_weights, 1, 1, 1) as _:
+        pass
+      return _ops.convert_to_tensor_v2(bolton.epsilon, dtype=tf.float32)
+    epsilon = test_run()
+    self.assertEqual(epsilon.numpy(), -1)
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'invalid noise',
+       'epsilon': 1,
+       'noise': 'not_valid',
+       'err_msg': 'Detected noise distribution: not_valid not one of:'},
+      {'testcase_name': 'invalid epsilon',
+       'epsilon': -1,
+       'noise': 'laplace',
+       'err_msg': 'Detected epsilon: -1. Valid range is 0 < epsilon <inf'},
+  ])
+  def test_context_domains(self, noise, epsilon, err_msg):
+    """
+
+    Args:
+        noise: noise distribution to pick
+        epsilon: epsilon privacy parameter to use
+        err_msg: the expected error message
+
+    """
+
+    @tf.function
+    def test_run(noise, epsilon):
+      loss = TestLoss(1, 1, 1)
+      bolton = opt.Bolton(TestOptimizer(), loss)
+      model = TestModel(1, (1,), 1)
+      model.compile(bolton, loss)
+      model.layers[0].kernel = \
+        model.layers[0].kernel_initializer((model.layer_input_shape[0],
+                                            model.n_outputs))
+      with bolton(noise, epsilon, model.layers, 1, 1, 1, 1) as _:
+        pass
+    with self.assertRaisesRegexp(ValueError, err_msg):  # pylint: disable=deprecated-method
+      test_run(noise, epsilon)
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'fn: get_noise',
+       'fn': 'get_noise',
+       'args': [1, 1],
+       'err_msg': 'ust be called from within the optimizer\'s context'},
+  ])
+  def test_not_in_context(self, fn, args, err_msg):
+    """Tests that the expected functions raise errors when not in context.
+
+    Args:
+        fn: the function to test
+        args: the arguments for said function
+        err_msg: expected error message
+    """
+    @tf.function
+    def test_run(fn, args):
+      loss = TestLoss(1, 1, 1)
+      bolton = opt.Bolton(TestOptimizer(), loss)
+      model = TestModel(1, (1,), 1)
+      model.compile(bolton, loss)
+      model.layers[0].kernel = \
+        model.layers[0].kernel_initializer((model.layer_input_shape[0],
+                                            model.n_outputs))
+      getattr(bolton, fn)(*args)
+
+    with self.assertRaisesRegexp(Exception, err_msg):  # pylint: disable=deprecated-method
+      test_run(fn, args)
+
   @parameterized.named_parameters([
       {'testcase_name': 'fn: get_updates',
        'fn': 'get_updates',
@@ -267,27 +422,33 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     """
     loss = TestLoss(1, 1, 1)
     optimizer = TestOptimizer()
-    optimizer = opt.Bolton(optimizer, loss)
-    model = TestModel(2)
+    bolton = opt.Bolton(optimizer, loss)
+    model = TestModel(3)
     model.compile(optimizer, loss)
-    model.layers[0].kernel_initializer(model.layer_input_shape)
-    print(model.layers[0].__dict__)
-    with optimizer('laplace', 2, model.layers, 1, 1, model.n_classes):
-      self.assertEqual(
-          getattr(optimizer, fn, lambda: 'fn not found')(*args),
-          'test'
-      )
+    model.layers[0].kernel = \
+        model.layers[0].kernel_initializer((model.layer_input_shape[0],
+                                            model.n_outputs))
+    model.layers[0].kernel = \
+      model.layers[0].kernel_initializer((model.layer_input_shape[0],
+                                          model.n_outputs))
+    bolton._is_init = True
+    bolton.layers = model.layers
+    bolton.epsilon = 2
+    bolton.noise_distribution = 'laplace'
+    bolton.n_outputs = 1
+    bolton.n_samples = 1
+    self.assertEqual(
+        getattr(bolton, fn, lambda: 'fn not found')(*args),
+        'test'
+    )
 
   @parameterized.named_parameters([
-      {'testcase_name': 'fn: limit_learning_rate',
-       'fn': 'limit_learning_rate',
-       'args': [1, 1, 1]},
       {'testcase_name': 'fn: project_weights_to_r',
        'fn': 'project_weights_to_r',
        'args': []},
       {'testcase_name': 'fn: get_noise',
        'fn': 'get_noise',
-       'args': [1, 1, 1, 1]},
+       'args': [1, 1]},
   ])
   def test_not_reroute_fn(self, fn, args):
     """Test that a fn that should not be rerouted to the internal optimizer is
@@ -297,11 +458,30 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
       fn: fn to test
       args: arguments to that fn
     """
-    optimizer = TestOptimizer()
-    loss = TestLoss(1, 1, 1)
-    optimizer = opt.Bolton(optimizer, loss)
-    self.assertNotEqual(getattr(optimizer, fn, lambda: 'test')(*args),
-                        'test')
+    @tf.function
+    def test_run(fn, args):
+      loss = TestLoss(1, 1, 1)
+      bolton = opt.Bolton(TestOptimizer(), loss)
+      model = TestModel(1, (1,), 1)
+      model.compile(bolton, loss)
+      model.layers[0].kernel = \
+        model.layers[0].kernel_initializer((model.layer_input_shape[0],
+                                            model.n_outputs))
+      bolton._is_init = True
+      bolton.noise_distribution = 'laplace'
+      bolton.epsilon = 1
+      bolton.layers = model.layers
+      bolton.class_weights = 1
+      bolton.n_samples = 1
+      bolton.batch_size = 1
+      bolton.n_outputs = 1
+      res = getattr(bolton, fn, lambda: 'test')(*args)
+      if res != 'test':
+        res = 1
+      else:
+        res = 0
+      return _ops.convert_to_tensor_v2(res, dtype=tf.float32)
+    self.assertNotEqual(test_run(fn, args), 0)
 
   @parameterized.named_parameters([
       {'testcase_name': 'attr: _iterations',
@@ -323,8 +503,8 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
                      )
 
   @parameterized.named_parameters([
-    {'testcase_name': 'attr does not exist',
-     'attr': '_not_valid'}
+      {'testcase_name': 'attr does not exist',
+       'attr': '_not_valid'}
   ])
   def test_attribute_error(self, attr):
     """ test that attribute of internal optimizer is correctly rerouted to
@@ -340,5 +520,54 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     with self.assertRaises(AttributeError):
       getattr(optimizer, attr)
 
+class SchedulerTest(keras_parameterized.TestCase):
+  """GammaBeta Scheduler tests"""
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'not in context',
+       'err_msg': 'Please initialize the GammaBetaDecreasingStep Learning Rate'
+                  ' Scheduler'
+       }
+  ])
+  def test_bad_call(self, err_msg):
+    """ test that attribute of internal optimizer is correctly rerouted to
+    the internal optimizer
+
+    Args:
+      attr: attribute to test
+      result: result after checking attribute
+    """
+    scheduler = opt.GammaBetaDecreasingStep()
+    with self.assertRaisesRegexp(Exception, err_msg):  # pylint: disable=deprecated-method
+      scheduler(1)
+
+  @parameterized.named_parameters([
+      {'testcase_name': 'step 1',
+       'step': 1,
+       'res': 0.5},
+      {'testcase_name': 'step 2',
+       'step': 2,
+       'res': 0.5},
+      {'testcase_name': 'step 3',
+       'step': 3,
+       'res': 0.333333333},
+  ])
+  def test_call(self, step, res):
+    """ test that attribute of internal optimizer is correctly rerouted to
+    the internal optimizer
+
+    Args:
+      attr: attribute to test
+      result: result after checking attribute
+    """
+    beta = _ops.convert_to_tensor_v2(2, dtype=tf.float32)
+    gamma = _ops.convert_to_tensor_v2(1, dtype=tf.float32)
+    scheduler = opt.GammaBetaDecreasingStep()
+    scheduler.initialize(beta, gamma)
+    step = _ops.convert_to_tensor_v2(step, dtype=tf.float32)
+    lr = scheduler(step)
+    self.assertAllClose(lr.numpy(), res)
+
+
 if __name__ == '__main__':
   test.main()

From ed7fa73a99572e2529d0ac1057bfd7ca35407fae Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Mon, 17 Jun 2019 14:46:04 -0400
Subject: [PATCH 05/39] Update Huber loss regularization term and some small
 changes across loss parameters.

---
 privacy/bolton/loss.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/privacy/bolton/loss.py b/privacy/bolton/loss.py
index de49607..4ed0479 100644
--- a/privacy/bolton/loss.py
+++ b/privacy/bolton/loss.py
@@ -58,7 +58,8 @@ class StrongConvexMixin:
     """Smoothness, beta
 
     Args:
-      class_weight: the class weights used.
+      class_weight: the class weights as scalar or 1d tensor, where its
+                    dimensionality is equal to the number of outputs.
 
     Returns: Beta
 
@@ -154,7 +155,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
     """Compute loss
 
     Args:
-      y_true: Ground truth values. One
+      y_true: Ground truth values. One hot encoded using -1 and 1.
       y_pred: The predicted values.
 
     Returns:
@@ -211,7 +212,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
       this loss function to be strongly convex.
     :return:
     """
-    return L1L2(l2=self.reg_lambda)
+    return L1L2(l2=self.reg_lambda/2)
 
 
 class StrongConvexBinaryCrossentropy(
@@ -230,7 +231,6 @@ class StrongConvexBinaryCrossentropy(
                from_logits: bool = True,
                label_smoothing: float = 0,
                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
-               name: str = 'binarycrossentropy',
                dtype=tf.float32):
     """
     Args:
@@ -239,7 +239,9 @@ class StrongConvexBinaryCrossentropy(
       radius_constant: constant defining the length of the radius
       reduction: reduction type to use. See super class
       label_smoothing: amount of smoothing to perform on labels
-                      relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
+                      relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x).
+                      Note, the impact of this parameter's effect on privacy
+                      is not known and thus the default should be used.
       name: Name of the loss instance
       dtype: tf datatype to use for tensor conversions.
     """
@@ -256,7 +258,7 @@ class StrongConvexBinaryCrossentropy(
     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
     super(StrongConvexBinaryCrossentropy, self).__init__(
         reduction=reduction,
-        name=name,
+        name='binarycrossentropy',
         from_logits=from_logits,
         label_smoothing=label_smoothing,
     )

From f41be2c598636f949b97244941ca5cd6ad27d31c Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Wed, 19 Jun 2019 10:46:30 -0400
Subject: [PATCH 06/39] Bolton implementation and unit tests. Has two
 pre-implemented loss functions.

---
 privacy/__init__.py                           |   6 +
 privacy/bolton/__init__.py                    |   7 +-
 privacy/bolton/{loss.py => losses.py}         | 287 +-----------------
 .../bolton/{loss_test.py => losses_test.py}   |  10 +-
 privacy/bolton/{model.py => models.py}        |  13 +-
 .../bolton/{model_test.py => models_test.py}  |  26 +-
 .../bolton/{optimizer.py => optimizers.py}    |   2 +-
 .../{optimizer_test.py => optimizers_test.py} |   6 +-
 8 files changed, 49 insertions(+), 308 deletions(-)
 rename privacy/bolton/{loss.py => losses.py} (51%)
 rename privacy/bolton/{loss_test.py => losses_test.py} (98%)
 rename privacy/bolton/{model.py => models.py} (96%)
 rename privacy/bolton/{model_test.py => models_test.py} (96%)
 rename privacy/bolton/{optimizer.py => optimizers.py} (99%)
 rename privacy/bolton/{optimizer_test.py => optimizers_test.py} (99%)

diff --git a/privacy/__init__.py b/privacy/__init__.py
index 59bfe20..e494c62 100644
--- a/privacy/__init__.py
+++ b/privacy/__init__.py
@@ -41,3 +41,9 @@ else:
   from privacy.optimizers.dp_optimizer import DPAdamOptimizer
   from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
   from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
+
+  from privacy.bolton.models import BoltonModel
+  from privacy.bolton.optimizers import Bolton
+  from privacy.bolton.losses import StrongConvexMixin
+  from privacy.bolton.losses import StrongConvexBinaryCrossentropy
+  from privacy.bolton.losses import StrongConvexHuber
diff --git a/privacy/bolton/__init__.py b/privacy/bolton/__init__.py
index 67b6148..971b804 100644
--- a/privacy/bolton/__init__.py
+++ b/privacy/bolton/__init__.py
@@ -9,6 +9,7 @@ if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
 if hasattr(sys, 'skip_tf_privacy_import'):  # Useful for standalone scripts.
   pass
 else:
-  from privacy.bolton.model import Bolton
-  from privacy.bolton.loss import StrongConvexHuber
-  from privacy.bolton.loss import StrongConvexBinaryCrossentropy
\ No newline at end of file
+  from privacy.bolton.models import BoltonModel
+  from privacy.bolton.optimizers import Bolton
+  from privacy.bolton.losses import StrongConvexHuber
+  from privacy.bolton.losses import StrongConvexBinaryCrossentropy
\ No newline at end of file
diff --git a/privacy/bolton/loss.py b/privacy/bolton/losses.py
similarity index 51%
rename from privacy/bolton/loss.py
rename to privacy/bolton/losses.py
index 4ed0479..a326946 100644
--- a/privacy/bolton/loss.py
+++ b/privacy/bolton/losses.py
@@ -21,6 +21,7 @@ from tensorflow.python.keras import losses
 from tensorflow.python.keras.utils import losses_utils
 from tensorflow.python.framework import ops as _ops
 from tensorflow.python.keras.regularizers import L1L2
+from tensorflow.python.platform import tf_logging as logging
 
 
 class StrongConvexMixin:
@@ -147,7 +148,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
     self.dtype = dtype
     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
     super(StrongConvexHuber, self).__init__(
-        name='huber',
+        name='strongconvexhuber',
         reduction=reduction,
     )
 
@@ -245,6 +246,11 @@ class StrongConvexBinaryCrossentropy(
       name: Name of the loss instance
       dtype: tf datatype to use for tensor conversions.
     """
+    if label_smoothing != 0:
+      logging.warning('The impact of label smoothing on privacy is unknown. '
+                      'Use label smoothing at your own risk as it may not '
+                      'guarantee privacy.')
+
     if reg_lambda <= 0:
       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
     if C <= 0:
@@ -258,7 +264,7 @@ class StrongConvexBinaryCrossentropy(
     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
     super(StrongConvexBinaryCrossentropy, self).__init__(
         reduction=reduction,
-        name='binarycrossentropy',
+        name='strongconvexbinarycrossentropy',
         from_logits=from_logits,
         label_smoothing=label_smoothing,
     )
@@ -313,280 +319,3 @@ class StrongConvexBinaryCrossentropy(
     return L1L2(l2=self.reg_lambda/2)
 
 
-# class StrongConvexSparseCategoricalCrossentropy(
-#     losses.CategoricalCrossentropy,
-#     StrongConvexMixin
-# ):
-#   """
-#   Strong Convex version of CategoricalCrossentropy loss using l2 weight
-#   regularization.
-#   """
-#
-#   def __init__(self,
-#                reg_lambda: float,
-#                C: float,
-#                radius_constant: float,
-#                from_logits: bool = True,
-#                label_smoothing: float = 0,
-#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
-#                name: str = 'binarycrossentropy',
-#                dtype=tf.float32):
-#     """
-#     Args:
-#       reg_lambda: Weight regularization constant
-#       C: Penalty parameter C of the loss term
-#       radius_constant: constant defining the length of the radius
-#       reduction: reduction type to use. See super class
-#       label_smoothing: amount of smoothing to perform on labels
-#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
-#       name: Name of the loss instance
-#       dtype: tf datatype to use for tensor conversions.
-#     """
-#     if reg_lambda <= 0:
-#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
-#     if C <= 0:
-#       raise ValueError('c: {0}, should be >= 0'.format(C))
-#     if radius_constant <= 0:
-#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
-#         radius_constant
-#       ))
-#
-#     self.C = C
-#     self.dtype = dtype
-#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
-#     super(StrongConvexSparseCategoricalCrossentropy, self).__init__(
-#         reduction=reduction,
-#         name=name,
-#         from_logits=from_logits,
-#         label_smoothing=label_smoothing,
-#     )
-#     self.radius_constant = radius_constant
-#
-#   def call(self, y_true, y_pred):
-#     """Compute loss
-#
-#         Args:
-#           y_true: Ground truth values.
-#           y_pred: The predicted values.
-#
-#         Returns:
-#           Loss values per sample.
-#       """
-#     loss = super()
-#     loss = loss * self.C
-#     return loss
-#
-#   def radius(self):
-#     """See super class.
-#     """
-#     return self.radius_constant / self.reg_lambda
-#
-#   def gamma(self):
-#     """See super class.
-#     """
-#     return self.reg_lambda
-#
-#   def beta(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda
-#
-#   def lipchitz_constant(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda * self.radius()
-#
-#   def kernel_regularizer(self):
-#     """
-#       l2 loss using reg_lambda as the l2 term (as desired). Required for
-#       this loss function to be strongly convex.
-#     :return:
-#     """
-#     return L1L2(l2=self.reg_lambda)
-#
-# class StrongConvexSparseCategoricalCrossentropy(
-#     losses.SparseCategoricalCrossentropy,
-#     StrongConvexMixin
-# ):
-#   """
-#   Strong Convex version of SparseCategoricalCrossentropy loss using l2 weight
-#   regularization.
-#   """
-#
-#   def __init__(self,
-#                reg_lambda: float,
-#                C: float,
-#                radius_constant: float,
-#                from_logits: bool = True,
-#                label_smoothing: float = 0,
-#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
-#                name: str = 'binarycrossentropy',
-#                dtype=tf.float32):
-#     """
-#     Args:
-#       reg_lambda: Weight regularization constant
-#       C: Penalty parameter C of the loss term
-#       radius_constant: constant defining the length of the radius
-#       reduction: reduction type to use. See super class
-#       label_smoothing: amount of smoothing to perform on labels
-#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
-#       name: Name of the loss instance
-#       dtype: tf datatype to use for tensor conversions.
-#     """
-#     if reg_lambda <= 0:
-#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
-#     if C <= 0:
-#       raise ValueError('c: {0}, should be >= 0'.format(C))
-#     if radius_constant <= 0:
-#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
-#         radius_constant
-#       ))
-#
-#     self.C = C
-#     self.dtype = dtype
-#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
-#     super(StrongConvexHuber, self).__init__(reduction=reduction,
-#                                              name=name,
-#                                              from_logits=from_logits,
-#                                              label_smoothing=label_smoothing,
-#                                              )
-#     self.radius_constant = radius_constant
-#
-#   def call(self, y_true, y_pred):
-#     """Compute loss
-#
-#         Args:
-#           y_true: Ground truth values.
-#           y_pred: The predicted values.
-#
-#         Returns:
-#           Loss values per sample.
-#       """
-#     loss = super()
-#     loss = loss * self.C
-#     return loss
-#
-#   def radius(self):
-#     """See super class.
-#     """
-#     return self.radius_constant / self.reg_lambda
-#
-#   def gamma(self):
-#     """See super class.
-#     """
-#     return self.reg_lambda
-#
-#   def beta(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda
-#
-#   def lipchitz_constant(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda * self.radius()
-#
-#   def kernel_regularizer(self):
-#     """
-#       l2 loss using reg_lambda as the l2 term (as desired). Required for
-#       this loss function to be strongly convex.
-#     :return:
-#     """
-#     return L1L2(l2=self.reg_lambda)
-#
-#
-# class StrongConvexCategoricalCrossentropy(
-#     losses.CategoricalCrossentropy,
-#     StrongConvexMixin
-# ):
-#   """
-#   Strong Convex version of CategoricalCrossentropy loss using l2 weight
-#   regularization.
-#   """
-#
-#   def __init__(self,
-#                reg_lambda: float,
-#                C: float,
-#                radius_constant: float,
-#                from_logits: bool = True,
-#                label_smoothing: float = 0,
-#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
-#                name: str = 'binarycrossentropy',
-#                dtype=tf.float32):
-#     """
-#     Args:
-#       reg_lambda: Weight regularization constant
-#       C: Penalty parameter C of the loss term
-#       radius_constant: constant defining the length of the radius
-#       reduction: reduction type to use. See super class
-#       label_smoothing: amount of smoothing to perform on labels
-#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
-#       name: Name of the loss instance
-#       dtype: tf datatype to use for tensor conversions.
-#     """
-#     if reg_lambda <= 0:
-#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
-#     if C <= 0:
-#       raise ValueError('c: {0}, should be >= 0'.format(C))
-#     if radius_constant <= 0:
-#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
-#         radius_constant
-#       ))
-#
-#     self.C = C
-#     self.dtype = dtype
-#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
-#     super(StrongConvexHuber, self).__init__(reduction=reduction,
-#                                              name=name,
-#                                              from_logits=from_logits,
-#                                              label_smoothing=label_smoothing,
-#                                              )
-#     self.radius_constant = radius_constant
-#
-#   def call(self, y_true, y_pred):
-#     """Compute loss
-#
-#         Args:
-#           y_true: Ground truth values.
-#           y_pred: The predicted values.
-#
-#         Returns:
-#           Loss values per sample.
-#       """
-#     loss = super()
-#     loss = loss * self.C
-#     return loss
-#
-#   def radius(self):
-#     """See super class.
-#     """
-#     return self.radius_constant / self.reg_lambda
-#
-#   def gamma(self):
-#     """See super class.
-#     """
-#     return self.reg_lambda
-#
-#   def beta(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda
-#
-#   def lipchitz_constant(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda * self.radius()
-#
-#   def kernel_regularizer(self):
-#     """
-#       l2 loss using reg_lambda as the l2 term (as desired). Required for
-#       this loss function to be strongly convex.
-#     :return:
-#     """
-#     return L1L2(l2=self.reg_lambda)
diff --git a/privacy/bolton/loss_test.py b/privacy/bolton/losses_test.py
similarity index 98%
rename from privacy/bolton/loss_test.py
rename to privacy/bolton/losses_test.py
index 488710f..d2c9f80 100644
--- a/privacy/bolton/loss_test.py
+++ b/privacy/bolton/losses_test.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Unit testing for loss.py"""
+"""Unit testing for losses.py"""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -22,9 +22,9 @@ from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras.regularizers import L1L2
 from absl.testing import parameterized
-from privacy.bolton.loss import StrongConvexBinaryCrossentropy
-from privacy.bolton.loss import StrongConvexHuber
-from privacy.bolton.loss import StrongConvexMixin
+from privacy.bolton.losses import StrongConvexBinaryCrossentropy
+from privacy.bolton.losses import StrongConvexHuber
+from privacy.bolton.losses import StrongConvexMixin
 
 
 class StrongConvexMixinTests(keras_parameterized.TestCase):
@@ -355,7 +355,7 @@ class HuberTests(keras_parameterized.TestCase):
        'fn': 'kernel_regularizer',
        'init_args': [1, 1, 1, 1],
        'args': [],
-       'result': L1L2(l2=1),
+       'result': L1L2(l2=0.5),
        },
   ])
   def test_fns(self, init_args, fn, args, result):
diff --git a/privacy/bolton/model.py b/privacy/bolton/models.py
similarity index 96%
rename from privacy/bolton/model.py
rename to privacy/bolton/models.py
index 6f3f48e..0a2efc0 100644
--- a/privacy/bolton/model.py
+++ b/privacy/bolton/models.py
@@ -20,8 +20,8 @@ import tensorflow as tf
 from tensorflow.python.keras.models import Model
 from tensorflow.python.keras import optimizers
 from tensorflow.python.framework import ops as _ops
-from privacy.bolton.loss import StrongConvexMixin
-from privacy.bolton.optimizer import Bolton
+from privacy.bolton.losses import StrongConvexMixin
+from privacy.bolton.optimizers import Bolton
 
 
 class BoltonModel(Model):
@@ -142,7 +142,9 @@ class BoltonModel(Model):
 
     """
     if class_weight is None:
-      class_weight = self.calculate_class_weights(class_weight)
+      class_weight_ = self.calculate_class_weights(class_weight)
+    else:
+      class_weight_ = class_weight
     if n_samples is not None:
       data_size = n_samples
     elif hasattr(x, 'shape'):
@@ -160,10 +162,13 @@ class BoltonModel(Model):
     if batch_size_ is None:
       raise ValueError('batch_size: {0} is an '
                        'invalid value'.format(batch_size_))
+    if data_size is None:
+      raise ValueError('Could not infer the number of samples. Please pass '
+                       'this in using n_samples.')
     with self.optimizer(noise_distribution,
                         epsilon,
                         self.layers,
-                        class_weight,
+                        class_weight_,
                         data_size,
                         self.n_outputs,
                         batch_size_,
diff --git a/privacy/bolton/model_test.py b/privacy/bolton/models_test.py
similarity index 96%
rename from privacy/bolton/model_test.py
rename to privacy/bolton/models_test.py
index 4316a1e..05119d3 100644
--- a/privacy/bolton/model_test.py
+++ b/privacy/bolton/models_test.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Unit testing for model.py"""
+"""Unit testing for models.py"""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -25,9 +25,9 @@ from tensorflow.python.keras import losses
 from tensorflow.python.framework import ops as _ops
 from tensorflow.python.keras.regularizers import L1L2
 from absl.testing import parameterized
-from privacy.bolton import model
-from privacy.bolton.optimizer import Bolton
-from privacy.bolton.loss import StrongConvexMixin
+from privacy.bolton import models
+from privacy.bolton.optimizers import Bolton
+from privacy.bolton.losses import StrongConvexMixin
 
 class TestLoss(losses.Loss, StrongConvexMixin):
   """Test loss function for testing Bolton model"""
@@ -130,8 +130,8 @@ class InitTests(keras_parameterized.TestCase):
         n_outputs: number of output neurons
     """
     # test valid domains for each variable
-    clf = model.BoltonModel(n_outputs)
-    self.assertIsInstance(clf, model.BoltonModel)
+    clf = models.BoltonModel(n_outputs)
+    self.assertIsInstance(clf, models.BoltonModel)
 
   @parameterized.named_parameters([
       {'testcase_name': 'invalid n_outputs',
@@ -146,7 +146,7 @@ class InitTests(keras_parameterized.TestCase):
     """
     # test invalid domains for each variable, especially noise
     with self.assertRaises(ValueError):
-      model.BoltonModel(n_outputs)
+      models.BoltonModel(n_outputs)
 
   @parameterized.named_parameters([
       {'testcase_name': 'string compile',
@@ -170,7 +170,7 @@ class InitTests(keras_parameterized.TestCase):
     """
     # test compilation of valid tf.optimizer and tf.loss
     with self.cached_session():
-      clf = model.BoltonModel(n_outputs)
+      clf = models.BoltonModel(n_outputs)
       clf.compile(optimizer, loss)
       self.assertEqual(clf.loss, loss)
 
@@ -197,7 +197,7 @@ class InitTests(keras_parameterized.TestCase):
     # test compilaton of invalid tf.optimizer and non instantiated loss.
     with self.cached_session():
       with self.assertRaises((ValueError, AttributeError)):
-        clf = model.BoltonModel(n_outputs)
+        clf = models.BoltonModel(n_outputs)
         clf.compile(optimizer, loss)
 
 
@@ -261,7 +261,7 @@ def _do_fit(n_samples,
 
   Returns: BoltonModel instsance
   """
-  clf = model.BoltonModel(n_outputs)
+  clf = models.BoltonModel(n_outputs)
   clf.compile(optimizer, loss)
   if generator:
     x = _cat_dataset(
@@ -355,7 +355,7 @@ class FitTests(keras_parameterized.TestCase):
     input_dim = 5
     batch_size = 1
     n_samples = 10
-    clf = model.BoltonModel(n_classes)
+    clf = models.BoltonModel(n_classes)
     clf.compile(optimizer, loss)
     x = _cat_dataset(
         n_samples,
@@ -441,7 +441,7 @@ class FitTests(keras_parameterized.TestCase):
       num_classes: number of outputs neurons
       result: expected result
     """
-    clf = model.BoltonModel(1, 1)
+    clf = models.BoltonModel(1, 1)
     expected = clf.calculate_class_weights(class_weights,
                                            class_counts,
                                            num_classes
@@ -508,7 +508,7 @@ class FitTests(keras_parameterized.TestCase):
         num_classes: number of outputs neurons
         result: expected result
       """
-    clf = model.BoltonModel(1, 1)
+    clf = models.BoltonModel(1, 1)
     with self.assertRaisesRegexp(ValueError, err_msg):  # pylint: disable=deprecated-method
       clf.calculate_class_weights(class_weights,
                                   class_counts,
diff --git a/privacy/bolton/optimizer.py b/privacy/bolton/optimizers.py
similarity index 99%
rename from privacy/bolton/optimizer.py
rename to privacy/bolton/optimizers.py
index cfd0b98..726ec4f 100644
--- a/privacy/bolton/optimizer.py
+++ b/privacy/bolton/optimizers.py
@@ -21,7 +21,7 @@ import tensorflow as tf
 from tensorflow.python.keras.optimizer_v2 import optimizer_v2
 from tensorflow.python.ops import math_ops
 from tensorflow.python import ops as _ops
-from privacy.bolton.loss import StrongConvexMixin
+from privacy.bolton.losses import StrongConvexMixin
 
 _accepted_distributions = ['laplace']  # implemented distributions for noising
 
diff --git a/privacy/bolton/optimizer_test.py b/privacy/bolton/optimizers_test.py
similarity index 99%
rename from privacy/bolton/optimizer_test.py
rename to privacy/bolton/optimizers_test.py
index 2060031..0a9f9cc 100644
--- a/privacy/bolton/optimizer_test.py
+++ b/privacy/bolton/optimizers_test.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Unit testing for optimizer.py"""
+"""Unit testing for optimizers.py"""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -29,8 +29,8 @@ from tensorflow.python.framework import ops as _ops
 from tensorflow.python.framework import test_util
 from tensorflow.python import ops as _ops
 from absl.testing import parameterized
-from privacy.bolton.loss import StrongConvexMixin
-from privacy.bolton import optimizer as opt
+from privacy.bolton.losses import StrongConvexMixin
+from privacy.bolton import optimizers as opt
 
 
 

From 56e16f0a15a531b0e9435cfd6c9d32f1d5be1d69 Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Wed, 19 Jun 2019 11:04:18 -0400
Subject: [PATCH 07/39] Minor changes + tutorial

---
 privacy/bolton/losses.py          | 278 +++++++++++++++++++
 privacy/bolton/models.py          |   2 -
 privacy/bolton/optimizers.py      |  24 +-
 privacy/bolton/optimizers_test.py |   4 +-
 tutorials/bolton_tutorial.ipynb   | 432 ++++++++++++++++++++++++++++++
 5 files changed, 719 insertions(+), 21 deletions(-)
 create mode 100644 tutorials/bolton_tutorial.ipynb

diff --git a/privacy/bolton/losses.py b/privacy/bolton/losses.py
index a326946..6a54576 100644
--- a/privacy/bolton/losses.py
+++ b/privacy/bolton/losses.py
@@ -319,3 +319,281 @@ class StrongConvexBinaryCrossentropy(
     return L1L2(l2=self.reg_lambda/2)
 
 
+# class StrongConvexSparseCategoricalCrossentropy(
+#     losses.CategoricalCrossentropy,
+#     StrongConvexMixin
+# ):
+#   """
+#   Strong Convex version of CategoricalCrossentropy loss using l2 weight
+#   regularization.
+#   """
+#
+#   def __init__(self,
+#                reg_lambda: float,
+#                C: float,
+#                radius_constant: float,
+#                from_logits: bool = True,
+#                label_smoothing: float = 0,
+#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+#                name: str = 'binarycrossentropy',
+#                dtype=tf.float32):
+#     """
+#     Args:
+#       reg_lambda: Weight regularization constant
+#       C: Penalty parameter C of the loss term
+#       radius_constant: constant defining the length of the radius
+#       reduction: reduction type to use. See super class
+#       label_smoothing: amount of smoothing to perform on labels
+#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
+#       name: Name of the loss instance
+#       dtype: tf datatype to use for tensor conversions.
+#     """
+#     if reg_lambda <= 0:
+#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
+#     if C <= 0:
+#       raise ValueError('c: {0}, should be >= 0'.format(C))
+#     if radius_constant <= 0:
+#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
+#         radius_constant
+#       ))
+#
+#     self.C = C
+#     self.dtype = dtype
+#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
+#     super(StrongConvexSparseCategoricalCrossentropy, self).__init__(
+#         reduction=reduction,
+#         name=name,
+#         from_logits=from_logits,
+#         label_smoothing=label_smoothing,
+#     )
+#     self.radius_constant = radius_constant
+#
+#   def call(self, y_true, y_pred):
+#     """Compute loss
+#
+#         Args:
+#           y_true: Ground truth values.
+#           y_pred: The predicted values.
+#
+#         Returns:
+#           Loss values per sample.
+#       """
+#     loss = super()
+#     loss = loss * self.C
+#     return loss
+#
+#   def radius(self):
+#     """See super class.
+#     """
+#     return self.radius_constant / self.reg_lambda
+#
+#   def gamma(self):
+#     """See super class.
+#     """
+#     return self.reg_lambda
+#
+#   def beta(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda
+#
+#   def lipchitz_constant(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda * self.radius()
+#
+#   def kernel_regularizer(self):
+#     """
+#       l2 loss using reg_lambda as the l2 term (as desired). Required for
+#       this loss function to be strongly convex.
+#     :return:
+#     """
+#     return L1L2(l2=self.reg_lambda)
+#
+# class StrongConvexSparseCategoricalCrossentropy(
+#     losses.SparseCategoricalCrossentropy,
+#     StrongConvexMixin
+# ):
+#   """
+#   Strong Convex version of SparseCategoricalCrossentropy loss using l2 weight
+#   regularization.
+#   """
+#
+#   def __init__(self,
+#                reg_lambda: float,
+#                C: float,
+#                radius_constant: float,
+#                from_logits: bool = True,
+#                label_smoothing: float = 0,
+#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+#                name: str = 'binarycrossentropy',
+#                dtype=tf.float32):
+#     """
+#     Args:
+#       reg_lambda: Weight regularization constant
+#       C: Penalty parameter C of the loss term
+#       radius_constant: constant defining the length of the radius
+#       reduction: reduction type to use. See super class
+#       label_smoothing: amount of smoothing to perform on labels
+#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
+#       name: Name of the loss instance
+#       dtype: tf datatype to use for tensor conversions.
+#     """
+#     if reg_lambda <= 0:
+#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
+#     if C <= 0:
+#       raise ValueError('c: {0}, should be >= 0'.format(C))
+#     if radius_constant <= 0:
+#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
+#         radius_constant
+#       ))
+#
+#     self.C = C
+#     self.dtype = dtype
+#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
+#     super(StrongConvexHuber, self).__init__(reduction=reduction,
+#                                              name=name,
+#                                              from_logits=from_logits,
+#                                              label_smoothing=label_smoothing,
+#                                              )
+#     self.radius_constant = radius_constant
+#
+#   def call(self, y_true, y_pred):
+#     """Compute loss
+#
+#         Args:
+#           y_true: Ground truth values.
+#           y_pred: The predicted values.
+#
+#         Returns:
+#           Loss values per sample.
+#       """
+#     loss = super()
+#     loss = loss * self.C
+#     return loss
+#
+#   def radius(self):
+#     """See super class.
+#     """
+#     return self.radius_constant / self.reg_lambda
+#
+#   def gamma(self):
+#     """See super class.
+#     """
+#     return self.reg_lambda
+#
+#   def beta(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda
+#
+#   def lipchitz_constant(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda * self.radius()
+#
+#   def kernel_regularizer(self):
+#     """
+#       l2 loss using reg_lambda as the l2 term (as desired). Required for
+#       this loss function to be strongly convex.
+#     :return:
+#     """
+#     return L1L2(l2=self.reg_lambda)
+#
+#
+# class StrongConvexCategoricalCrossentropy(
+#     losses.CategoricalCrossentropy,
+#     StrongConvexMixin
+# ):
+#   """
+#   Strong Convex version of CategoricalCrossentropy loss using l2 weight
+#   regularization.
+#   """
+#
+#   def __init__(self,
+#                reg_lambda: float,
+#                C: float,
+#                radius_constant: float,
+#                from_logits: bool = True,
+#                label_smoothing: float = 0,
+#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+#                name: str = 'binarycrossentropy',
+#                dtype=tf.float32):
+#     """
+#     Args:
+#       reg_lambda: Weight regularization constant
+#       C: Penalty parameter C of the loss term
+#       radius_constant: constant defining the length of the radius
+#       reduction: reduction type to use. See super class
+#       label_smoothing: amount of smoothing to perform on labels
+#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
+#       name: Name of the loss instance
+#       dtype: tf datatype to use for tensor conversions.
+#     """
+#     if reg_lambda <= 0:
+#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
+#     if C <= 0:
+#       raise ValueError('c: {0}, should be >= 0'.format(C))
+#     if radius_constant <= 0:
+#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
+#         radius_constant
+#       ))
+#
+#     self.C = C
+#     self.dtype = dtype
+#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
+#     super(StrongConvexHuber, self).__init__(reduction=reduction,
+#                                              name=name,
+#                                              from_logits=from_logits,
+#                                              label_smoothing=label_smoothing,
+#                                              )
+#     self.radius_constant = radius_constant
+#
+#   def call(self, y_true, y_pred):
+#     """Compute loss
+#
+#         Args:
+#           y_true: Ground truth values.
+#           y_pred: The predicted values.
+#
+#         Returns:
+#           Loss values per sample.
+#       """
+#     loss = super()
+#     loss = loss * self.C
+#     return loss
+#
+#   def radius(self):
+#     """See super class.
+#     """
+#     return self.radius_constant / self.reg_lambda
+#
+#   def gamma(self):
+#     """See super class.
+#     """
+#     return self.reg_lambda
+#
+#   def beta(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda
+#
+#   def lipchitz_constant(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda * self.radius()
+#
+#   def kernel_regularizer(self):
+#     """
+#       l2 loss using reg_lambda as the l2 term (as desired). Required for
+#       this loss function to be strongly convex.
+#     :return:
+#     """
+#     return L1L2(l2=self.reg_lambda)
+
diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index 0a2efc0..06d1c4b 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -170,7 +170,6 @@ class BoltonModel(Model):
                         self.layers,
                         class_weight_,
                         data_size,
-                        self.n_outputs,
                         batch_size_,
                         ) as _:
       out = super(BoltonModel, self).fit(x=x,
@@ -223,7 +222,6 @@ class BoltonModel(Model):
                         self.layers,
                         class_weight,
                         data_size,
-                        self.n_outputs,
                         batch_size
                         ) as _:
       out = super(BoltonModel, self).fit_generator(
diff --git a/privacy/bolton/optimizers.py b/privacy/bolton/optimizers.py
index 726ec4f..28c1735 100644
--- a/privacy/bolton/optimizers.py
+++ b/privacy/bolton/optimizers.py
@@ -137,7 +137,6 @@ class Bolton(optimizer_v2.OptimizerV2):
                                 'class_weights',
                                 'input_dim',
                                 'n_samples',
-                                'n_outputs',
                                 'layers',
                                 'batch_size',
                                 '_is_init'
@@ -166,6 +165,9 @@ class Bolton(optimizer_v2.OptimizerV2):
     Returns:
 
     """
+    if not self._is_init:
+      raise Exception('This method must be called from within the optimizer\'s '
+                      'context.')
     radius = self.loss.radius()
     for layer in self.layers:
       weight_norm = tf.norm(layer.kernel, axis=0)
@@ -323,7 +325,6 @@ class Bolton(optimizer_v2.OptimizerV2):
                layers: list,
                class_weights,
                n_samples,
-               n_outputs,
                batch_size
                ):
     """Entry point from context. Accepts required values for bolton method and
@@ -338,7 +339,6 @@ class Bolton(optimizer_v2.OptimizerV2):
       class_weights: class_weights used, which may either be a scalar or 1D
                       tensor with dim == n_classes.
       n_samples number of rows/individual samples in the training set
-      n_outputs: number of output classes
       batch_size: batch size used.
     """
     if epsilon <= 0:
@@ -352,20 +352,11 @@ class Bolton(optimizer_v2.OptimizerV2):
     self.learning_rate.initialize(self.loss.beta(class_weights),
                                   self.loss.gamma()
                                   )
-    self.epsilon = _ops.convert_to_tensor_v2(epsilon, dtype=self.dtype)
-    self.class_weights = _ops.convert_to_tensor_v2(class_weights,
-                                                   dtype=self.dtype
-                                                   )
-    self.n_samples = _ops.convert_to_tensor_v2(n_samples,
-                                               dtype=self.dtype
-                                               )
-    self.n_outputs = _ops.convert_to_tensor_v2(n_outputs,
-                                               dtype=self.dtype
-                                               )
+    self.epsilon = tf.constant(epsilon, dtype=self.dtype)
+    self.class_weights = tf.constant(class_weights, dtype=self.dtype)
+    self.n_samples = tf.constant(n_samples, dtype=self.dtype)
     self.layers = layers
-    self.batch_size = _ops.convert_to_tensor_v2(batch_size,
-                                                dtype=self.dtype
-                                                )
+    self.batch_size = tf.constant(batch_size, dtype=self.dtype)
     return self
 
   def __exit__(self, *args):
@@ -397,6 +388,5 @@ class Bolton(optimizer_v2.OptimizerV2):
     self.class_weights = None
     self.n_samples = None
     self.input_dim = None
-    self.n_outputs = None
     self.layers = None
     self._is_init = False
diff --git a/privacy/bolton/optimizers_test.py b/privacy/bolton/optimizers_test.py
index 0a9f9cc..1d0fbfb 100644
--- a/privacy/bolton/optimizers_test.py
+++ b/privacy/bolton/optimizers_test.py
@@ -314,7 +314,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
       model.layers[0].kernel = \
         model.layers[0].kernel_initializer((model.layer_input_shape[0],
                                             model.n_outputs))
-      with bolton(noise, epsilon, model.layers, class_weights, 1, 1, 1) as _:
+      with bolton(noise, epsilon, model.layers, class_weights, 1, 1) as _:
         pass
       return _ops.convert_to_tensor_v2(bolton.epsilon, dtype=tf.float32)
     epsilon = test_run()
@@ -349,7 +349,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
       model.layers[0].kernel = \
         model.layers[0].kernel_initializer((model.layer_input_shape[0],
                                             model.n_outputs))
-      with bolton(noise, epsilon, model.layers, 1, 1, 1, 1) as _:
+      with bolton(noise, epsilon, model.layers, 1, 1, 1) as _:
         pass
     with self.assertRaisesRegexp(ValueError, err_msg):  # pylint: disable=deprecated-method
       test_run(noise, epsilon)
diff --git a/tutorials/bolton_tutorial.ipynb b/tutorials/bolton_tutorial.ipynb
new file mode 100644
index 0000000..b60e612
--- /dev/null
+++ b/tutorials/bolton_tutorial.ipynb
@@ -0,0 +1,432 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "pycharm": {
+     "is_executing": false
+    },
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('..')\n",
+    "import tensorflow as tf\n",
+    "from privacy.bolton import losses\n",
+    "from privacy.bolton import models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, we will create a binary classification dataset with a single output dimension.\n",
+    "The samples for each label are repeated datapoints at different points in space."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "pycharm": {
+     "is_executing": false,
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(20, 2) (20, 1)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Parameters for dataset\n",
+    "n_samples = 10\n",
+    "input_dim = 2\n",
+    "n_outputs = 1\n",
+    "# Create binary classification dataset:\n",
+    "x_stack = [tf.constant(-1, tf.float32, (n_samples, input_dim)), \n",
+    "           tf.constant(1, tf.float32, (n_samples, input_dim))]\n",
+    "y_stack = [tf.constant(0, tf.float32, (n_samples, 1)),\n",
+    "           tf.constant(1, tf.float32, (n_samples, 1))]\n",
+    "x, y = tf.concat(x_stack, 0), tf.concat(y_stack, 0)\n",
+    "print(x.shape, y.shape)\n",
+    "generator = tf.data.Dataset.from_tensor_slices((x, y))\n",
+    "generator = generator.batch(10)\n",
+    "generator = generator.shuffle(10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, we will explore using the pre-built BoltonModel, which is a thin wrapper around a Keras Model using a single-layer neural network. It automatically uses the Bolton Optimizer which encompasses all the logic required for the Bolton Differential Privacy method.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "pycharm": {
+     "is_executing": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "bolt = models.BoltonModel(n_outputs)  # tell the model how many outputs we have."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, we will pick our optimizer and Strongly Convex Loss function. The loss must extend from StrongConvexMixin and implement the associated methods. Some existing loss functions are pre-implemented in bolton.loss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "pycharm": {
+     "is_executing": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "optimizer = tf.optimizers.SGD()\n",
+    "reg_lambda = 1\n",
+    "C = 1\n",
+    "radius_constant = 1\n",
+    "loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy to be 1; these are all tunable and their impact can be read in losses.StrongConvexBinaryCrossentropy. We then compile the model with the chosen optimizer and loss, which will automatically wrap the chosen optimizer with the Bolton Optimizer, ensuring the required components function as required for privacy guarantees."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "pycharm": {
+     "is_executing": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "bolt.compile(optimizer, loss)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To fit the model, the optimizer will require additional information about the dataset and model. These parameters are:\n",
+    "1. the class_weights used\n",
+    "2. the number of samples in the dataset\n",
+    "3. the batch size\n",
+    "which the model will try to infer, if possible. If not, you will be required to pass these explicitly to the fit method.\n",
+    "As well, there are two privacy parameters than can be altered: \n",
+    "1. epsilon, a float\n",
+    "2. noise_distribution, a valid string indicating the distriution to use (must be implemented)\n",
+    "\n",
+    "The BoltonModel offers a helper method, .calculate_class_weight to aid in class_weight calculation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING: Logging before flag parsing goes to stderr.\n",
+      "W0619 11:00:32.392859 4467058112 deprecation.py:323] From /Users/christopherchoo/PycharmProjects/privacy/venv/lib/python3.6/site-packages/tensorflow/python/ops/nn_impl.py:182: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "Use tf.where in 2.0, which has the same broadcast rule as np.where\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train on 20 samples\n",
+      "Epoch 1/2\n",
+      "20/20 [==============================] - 0s 4ms/sample - loss: 0.8146\n",
+      "Epoch 2/2\n",
+      "20/20 [==============================] - 0s 94us/sample - loss: 0.5699\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<tensorflow.python.keras.callbacks.History at 0x10543d0f0>"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# required parameters\n",
+    "class_weight = None  # default, use .calculate_class_weight to specify other values\n",
+    "batch_size = None  # default, if it cannot be inferred, specify this\n",
+    "n_samples = None  # default, if it cannot be iferred, specify this\n",
+    "# privacy parameters\n",
+    "epsilon = 2\n",
+    "noise_distribution = 'laplace'\n",
+    "\n",
+    "bolt.fit(x, \n",
+    "         y, \n",
+    "         epsilon=epsilon, \n",
+    "         class_weight=class_weight, \n",
+    "         batch_size=batch_size, \n",
+    "         n_samples=n_samples,\n",
+    "         noise_distribution=noise_distribution,\n",
+    "         epochs=2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We may also train a generator object, or try different optimizers and loss functions. Below, we will see that we must pass the number of samples as the fit method is unable to infer it for a generator."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "optimizer2 = tf.optimizers.Adam()\n",
+    "bolt.compile(optimizer2, loss)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Could not infer the number of samples. Please pass this in using n_samples.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# required parameters\n",
+    "class_weight = None  # default, use .calculate_class_weight to specify other values\n",
+    "batch_size = None  # default, if it cannot be inferred, specify this\n",
+    "n_samples = None  # default, if it cannot be iferred, specify this\n",
+    "# privacy parameters\n",
+    "epsilon = 2\n",
+    "noise_distribution = 'laplace'\n",
+    "try:\n",
+    "    bolt.fit(generator,\n",
+    "             epsilon=epsilon, \n",
+    "             class_weight=class_weight, \n",
+    "             batch_size=batch_size, \n",
+    "             n_samples=n_samples,\n",
+    "             noise_distribution=noise_distribution,\n",
+    "             verbose=0\n",
+    "             )\n",
+    "except ValueError as e:\n",
+    "    print(e)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And now, re running with the parameter set."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<tensorflow.python.keras.callbacks.History at 0x1267db4a8>"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "n_samples = 20\n",
+    "bolt.fit(generator,\n",
+    "         epsilon=epsilon, \n",
+    "         class_weight=class_weight, \n",
+    "         batch_size=batch_size, \n",
+    "         n_samples=n_samples,\n",
+    "         noise_distribution=noise_distribution,\n",
+    "         verbose=0\n",
+    "         )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You don't have to use the bolton model to use the Bolton method. There are only a few requirements:\n",
+    "1. make sure any requirements from the loss are implemented in the model.\n",
+    "2. instantiate the optimizer and use it as a context around your fit operation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from privacy.bolton.optimizers import Bolton"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here, we create our own model and setup the Bolton optimizer."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class TestModel(tf.keras.Model):\n",
+    "    def __init__(self, reg_layer, n_outputs=1):\n",
+    "        super(TestModel, self).__init__(name='test')\n",
+    "        self.output_layer = tf.keras.layers.Dense(n_outputs,\n",
+    "                                                  kernel_regularizer=reg_layer\n",
+    "                                                  )\n",
+    "        \n",
+    "    def call(self, inputs, training=False):\n",
+    "        return self.output_layer(inputs)\n",
+    "\n",
+    "optimizer = tf.optimizers.SGD()\n",
+    "loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)\n",
+    "optimizer = Bolton(optimizer, loss)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, we instantiate our model and check for 1. Since our loss requires L2 regularization over the kernel, we will pass it to the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_outputs = 1  # parameter for model and optimizer context.\n",
+    "test_model = TestModel(loss.kernel_regularizer(), n_outputs)\n",
+    "test_model.compile(optimizer, loss)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We comply with 2., and use the Bolton Optimizer as a context around the fit method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train on 20 samples\n",
+      "Epoch 1/2\n",
+      "20/20 [==============================] - 0s 3ms/sample - loss: 0.9096\n",
+      "Epoch 2/2\n",
+      "20/20 [==============================] - 0s 430us/sample - loss: 0.5275\n"
+     ]
+    }
+   ],
+   "source": [
+    "# parameters for context\n",
+    "noise_distribution = 'laplace'\n",
+    "epsilon = 2\n",
+    "class_weights = 1  # Previosuly, the fit method auto-detected the class_weights.\n",
+    "# Here, we need to pass the class_weights explicitly. 1 is the equivalent of None.\n",
+    "n_samples = 20\n",
+    "batch_size = 5\n",
+    "\n",
+    "with optimizer(\n",
+    "    noise_distribution=noise_distribution,\n",
+    "    epsilon=epsilon,\n",
+    "    layers=test_model.layers,\n",
+    "    class_weights=class_weights,  \n",
+    "    n_samples=n_samples,\n",
+    "    batch_size=batch_size\n",
+    ") as _:\n",
+    "    test_model.fit(x, y, batch_size=batch_size, epochs=2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "metadata": {
+     "collapsed": false
+    },
+    "source": []
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}

From b120d9c5d84fe03d88ca78ce297d748f6ef38cf7 Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Wed, 19 Jun 2019 11:14:02 -0400
Subject: [PATCH 08/39] Changes for pylint.

---
 privacy/bolton/losses.py          | 280 ------------------------------
 privacy/bolton/models_test.py     |   4 +-
 privacy/bolton/optimizers.py      |   1 -
 privacy/bolton/optimizers_test.py |  26 +--
 4 files changed, 8 insertions(+), 303 deletions(-)

diff --git a/privacy/bolton/losses.py b/privacy/bolton/losses.py
index 6a54576..a99187b 100644
--- a/privacy/bolton/losses.py
+++ b/privacy/bolton/losses.py
@@ -317,283 +317,3 @@ class StrongConvexBinaryCrossentropy(
     :return:
     """
     return L1L2(l2=self.reg_lambda/2)
-
-
-# class StrongConvexSparseCategoricalCrossentropy(
-#     losses.CategoricalCrossentropy,
-#     StrongConvexMixin
-# ):
-#   """
-#   Strong Convex version of CategoricalCrossentropy loss using l2 weight
-#   regularization.
-#   """
-#
-#   def __init__(self,
-#                reg_lambda: float,
-#                C: float,
-#                radius_constant: float,
-#                from_logits: bool = True,
-#                label_smoothing: float = 0,
-#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
-#                name: str = 'binarycrossentropy',
-#                dtype=tf.float32):
-#     """
-#     Args:
-#       reg_lambda: Weight regularization constant
-#       C: Penalty parameter C of the loss term
-#       radius_constant: constant defining the length of the radius
-#       reduction: reduction type to use. See super class
-#       label_smoothing: amount of smoothing to perform on labels
-#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
-#       name: Name of the loss instance
-#       dtype: tf datatype to use for tensor conversions.
-#     """
-#     if reg_lambda <= 0:
-#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
-#     if C <= 0:
-#       raise ValueError('c: {0}, should be >= 0'.format(C))
-#     if radius_constant <= 0:
-#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
-#         radius_constant
-#       ))
-#
-#     self.C = C
-#     self.dtype = dtype
-#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
-#     super(StrongConvexSparseCategoricalCrossentropy, self).__init__(
-#         reduction=reduction,
-#         name=name,
-#         from_logits=from_logits,
-#         label_smoothing=label_smoothing,
-#     )
-#     self.radius_constant = radius_constant
-#
-#   def call(self, y_true, y_pred):
-#     """Compute loss
-#
-#         Args:
-#           y_true: Ground truth values.
-#           y_pred: The predicted values.
-#
-#         Returns:
-#           Loss values per sample.
-#       """
-#     loss = super()
-#     loss = loss * self.C
-#     return loss
-#
-#   def radius(self):
-#     """See super class.
-#     """
-#     return self.radius_constant / self.reg_lambda
-#
-#   def gamma(self):
-#     """See super class.
-#     """
-#     return self.reg_lambda
-#
-#   def beta(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda
-#
-#   def lipchitz_constant(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda * self.radius()
-#
-#   def kernel_regularizer(self):
-#     """
-#       l2 loss using reg_lambda as the l2 term (as desired). Required for
-#       this loss function to be strongly convex.
-#     :return:
-#     """
-#     return L1L2(l2=self.reg_lambda)
-#
-# class StrongConvexSparseCategoricalCrossentropy(
-#     losses.SparseCategoricalCrossentropy,
-#     StrongConvexMixin
-# ):
-#   """
-#   Strong Convex version of SparseCategoricalCrossentropy loss using l2 weight
-#   regularization.
-#   """
-#
-#   def __init__(self,
-#                reg_lambda: float,
-#                C: float,
-#                radius_constant: float,
-#                from_logits: bool = True,
-#                label_smoothing: float = 0,
-#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
-#                name: str = 'binarycrossentropy',
-#                dtype=tf.float32):
-#     """
-#     Args:
-#       reg_lambda: Weight regularization constant
-#       C: Penalty parameter C of the loss term
-#       radius_constant: constant defining the length of the radius
-#       reduction: reduction type to use. See super class
-#       label_smoothing: amount of smoothing to perform on labels
-#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
-#       name: Name of the loss instance
-#       dtype: tf datatype to use for tensor conversions.
-#     """
-#     if reg_lambda <= 0:
-#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
-#     if C <= 0:
-#       raise ValueError('c: {0}, should be >= 0'.format(C))
-#     if radius_constant <= 0:
-#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
-#         radius_constant
-#       ))
-#
-#     self.C = C
-#     self.dtype = dtype
-#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
-#     super(StrongConvexHuber, self).__init__(reduction=reduction,
-#                                              name=name,
-#                                              from_logits=from_logits,
-#                                              label_smoothing=label_smoothing,
-#                                              )
-#     self.radius_constant = radius_constant
-#
-#   def call(self, y_true, y_pred):
-#     """Compute loss
-#
-#         Args:
-#           y_true: Ground truth values.
-#           y_pred: The predicted values.
-#
-#         Returns:
-#           Loss values per sample.
-#       """
-#     loss = super()
-#     loss = loss * self.C
-#     return loss
-#
-#   def radius(self):
-#     """See super class.
-#     """
-#     return self.radius_constant / self.reg_lambda
-#
-#   def gamma(self):
-#     """See super class.
-#     """
-#     return self.reg_lambda
-#
-#   def beta(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda
-#
-#   def lipchitz_constant(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda * self.radius()
-#
-#   def kernel_regularizer(self):
-#     """
-#       l2 loss using reg_lambda as the l2 term (as desired). Required for
-#       this loss function to be strongly convex.
-#     :return:
-#     """
-#     return L1L2(l2=self.reg_lambda)
-#
-#
-# class StrongConvexCategoricalCrossentropy(
-#     losses.CategoricalCrossentropy,
-#     StrongConvexMixin
-# ):
-#   """
-#   Strong Convex version of CategoricalCrossentropy loss using l2 weight
-#   regularization.
-#   """
-#
-#   def __init__(self,
-#                reg_lambda: float,
-#                C: float,
-#                radius_constant: float,
-#                from_logits: bool = True,
-#                label_smoothing: float = 0,
-#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
-#                name: str = 'binarycrossentropy',
-#                dtype=tf.float32):
-#     """
-#     Args:
-#       reg_lambda: Weight regularization constant
-#       C: Penalty parameter C of the loss term
-#       radius_constant: constant defining the length of the radius
-#       reduction: reduction type to use. See super class
-#       label_smoothing: amount of smoothing to perform on labels
-#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
-#       name: Name of the loss instance
-#       dtype: tf datatype to use for tensor conversions.
-#     """
-#     if reg_lambda <= 0:
-#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
-#     if C <= 0:
-#       raise ValueError('c: {0}, should be >= 0'.format(C))
-#     if radius_constant <= 0:
-#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
-#         radius_constant
-#       ))
-#
-#     self.C = C
-#     self.dtype = dtype
-#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
-#     super(StrongConvexHuber, self).__init__(reduction=reduction,
-#                                              name=name,
-#                                              from_logits=from_logits,
-#                                              label_smoothing=label_smoothing,
-#                                              )
-#     self.radius_constant = radius_constant
-#
-#   def call(self, y_true, y_pred):
-#     """Compute loss
-#
-#         Args:
-#           y_true: Ground truth values.
-#           y_pred: The predicted values.
-#
-#         Returns:
-#           Loss values per sample.
-#       """
-#     loss = super()
-#     loss = loss * self.C
-#     return loss
-#
-#   def radius(self):
-#     """See super class.
-#     """
-#     return self.radius_constant / self.reg_lambda
-#
-#   def gamma(self):
-#     """See super class.
-#     """
-#     return self.reg_lambda
-#
-#   def beta(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda
-#
-#   def lipchitz_constant(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda * self.radius()
-#
-#   def kernel_regularizer(self):
-#     """
-#       l2 loss using reg_lambda as the l2 term (as desired). Required for
-#       this loss function to be strongly convex.
-#     :return:
-#     """
-#     return L1L2(l2=self.reg_lambda)
-
diff --git a/privacy/bolton/models_test.py b/privacy/bolton/models_test.py
index 05119d3..63954cc 100644
--- a/privacy/bolton/models_test.py
+++ b/privacy/bolton/models_test.py
@@ -53,7 +53,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
-  def beta(self, class_weight):
+  def beta(self, class_weight):  # pylint: disable=unused-argument
     """Beta smoothess
 
     Args:
@@ -64,7 +64,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
-  def lipchitz_constant(self, class_weight):
+  def lipchitz_constant(self, class_weight):  # pylint: disable=unused-argument
     """ L lipchitz continuous
 
     Args:
diff --git a/privacy/bolton/optimizers.py b/privacy/bolton/optimizers.py
index 28c1735..ec7a7e5 100644
--- a/privacy/bolton/optimizers.py
+++ b/privacy/bolton/optimizers.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 import tensorflow as tf
 from tensorflow.python.keras.optimizer_v2 import optimizer_v2
 from tensorflow.python.ops import math_ops
-from tensorflow.python import ops as _ops
 from privacy.bolton.losses import StrongConvexMixin
 
 _accepted_distributions = ['laplace']  # implemented distributions for noising
diff --git a/privacy/bolton/optimizers_test.py b/privacy/bolton/optimizers_test.py
index 1d0fbfb..6a499fc 100644
--- a/privacy/bolton/optimizers_test.py
+++ b/privacy/bolton/optimizers_test.py
@@ -25,7 +25,6 @@ from tensorflow.python.keras.regularizers import L1L2
 from tensorflow.python.keras.initializers import constant
 from tensorflow.python.keras import losses
 from tensorflow.python.keras.models import Model
-from tensorflow.python.framework import ops as _ops
 from tensorflow.python.framework import test_util
 from tensorflow.python import ops as _ops
 from absl.testing import parameterized
@@ -33,7 +32,6 @@ from privacy.bolton.losses import StrongConvexMixin
 from privacy.bolton import optimizers as opt
 
 
-
 class TestModel(Model):
   """
   Bolton episilon-delta model
@@ -69,18 +67,6 @@ class TestModel(Model):
     )
 
 
-  # def call(self, inputs):
-  #   """Forward pass of network
-  #
-  #   Args:
-  #       inputs: inputs to neural network
-  #
-  #   Returns:
-  #
-  #   """
-  #   return self.output_layer(inputs)
-
-
 class TestLoss(losses.Loss, StrongConvexMixin):
   """Test loss function for testing Bolton model"""
   def __init__(self, reg_lambda, C, radius_constant, name='test'):
@@ -105,7 +91,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
-  def beta(self, class_weight):
+  def beta(self, class_weight):  # pylint: disable=unused-argument
     """Beta smoothess
 
     Args:
@@ -116,7 +102,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
-  def lipchitz_constant(self, class_weight):
+  def lipchitz_constant(self, class_weight):  # pylint: disable=unused-argument
     """ L lipchitz continuous
 
     Args:
@@ -217,7 +203,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     model.layers[0].kernel = \
       model.layers[0].kernel_initializer((model.layer_input_shape[0],
                                           model.n_outputs))
-    bolton._is_init = True
+    bolton._is_init = True  # pylint: disable=protected-access
     bolton.layers = model.layers
     bolton.epsilon = 2
     bolton.noise_distribution = 'laplace'
@@ -279,7 +265,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
       model.layers[0].kernel = \
         model.layers[0].kernel_initializer((model.layer_input_shape[0],
                                             model.n_outputs))
-      bolton._is_init = True
+      bolton._is_init = True  # pylint: disable=protected-access
       bolton.layers = model.layers
       bolton.epsilon = 2
       bolton.noise_distribution = 'laplace'
@@ -431,7 +417,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     model.layers[0].kernel = \
       model.layers[0].kernel_initializer((model.layer_input_shape[0],
                                           model.n_outputs))
-    bolton._is_init = True
+    bolton._is_init = True  # pylint: disable=protected-access
     bolton.layers = model.layers
     bolton.epsilon = 2
     bolton.noise_distribution = 'laplace'
@@ -467,7 +453,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
       model.layers[0].kernel = \
         model.layers[0].kernel_initializer((model.layer_input_shape[0],
                                             model.n_outputs))
-      bolton._is_init = True
+      bolton._is_init = True  # pylint: disable=protected-access
       bolton.noise_distribution = 'laplace'
       bolton.epsilon = 1
       bolton.layers = model.layers

From 3080b654b570bbad6487653fa2b02b108a867d83 Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Wed, 19 Jun 2019 11:18:42 -0400
Subject: [PATCH 09/39] Minor changes to function arguments

---
 privacy/bolton/models.py        | 2 +-
 tutorials/bolton_tutorial.ipynb | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index 06d1c4b..7503157 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -60,7 +60,7 @@ class BoltonModel(Model):
     self._layers_instantiated = False
     self._dtype = dtype
 
-  def call(self, inputs, training=False):  # pylint: disable=arguments-differ
+  def call(self, inputs):  # pylint: disable=arguments-differ
     """Forward pass of network
 
     Args:
diff --git a/tutorials/bolton_tutorial.ipynb b/tutorials/bolton_tutorial.ipynb
index b60e612..f682592 100644
--- a/tutorials/bolton_tutorial.ipynb
+++ b/tutorials/bolton_tutorial.ipynb
@@ -321,7 +321,7 @@
     "                                                  kernel_regularizer=reg_layer\n",
     "                                                  )\n",
     "        \n",
-    "    def call(self, inputs, training=False):\n",
+    "    def call(self, inputs):\n",
     "        return self.output_layer(inputs)\n",
     "\n",
     "optimizer = tf.optimizers.SGD()\n",
@@ -420,13 +420,13 @@
   "pycharm": {
    "stem_cell": {
     "cell_type": "raw",
+    "source": [],
     "metadata": {
      "collapsed": false
-    },
-    "source": []
+    }
    }
   }
  },
  "nbformat": 4,
  "nbformat_minor": 1
-}
+}
\ No newline at end of file

From b03eb6914b8093459614e2aa36ce4ed74c51d416 Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Tue, 16 Jul 2019 10:33:57 -0400
Subject: [PATCH 10/39] Code review changes: Fixed doc string spacing,
 copyrighting, and changed the jupyter file to a python script.

---
 privacy/bolton/__init__.py        |  16 +-
 privacy/bolton/losses.py          | 395 ++++++++++++++++++++++-----
 privacy/bolton/losses_test.py     |  55 +++-
 privacy/bolton/models.py          |  20 +-
 privacy/bolton/models_test.py     |  45 ++--
 privacy/bolton/optimizers.py      | 145 +++++-----
 privacy/bolton/optimizers_test.py |  56 ++--
 tutorials/bolton_tutorial.ipynb   | 432 ------------------------------
 tutorials/bolton_tutorial.py      | 155 +++++++++++
 9 files changed, 681 insertions(+), 638 deletions(-)
 delete mode 100644 tutorials/bolton_tutorial.ipynb
 create mode 100644 tutorials/bolton_tutorial.py

diff --git a/privacy/bolton/__init__.py b/privacy/bolton/__init__.py
index 971b804..96d60b2 100644
--- a/privacy/bolton/__init__.py
+++ b/privacy/bolton/__init__.py
@@ -1,3 +1,17 @@
+# Copyright 2019, The TensorFlow Privacy Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Bolton Method for privacy."""
 import sys
 from distutils.version import LooseVersion
 import tensorflow as tf
@@ -12,4 +26,4 @@ else:
   from privacy.bolton.models import BoltonModel
   from privacy.bolton.optimizers import Bolton
   from privacy.bolton.losses import StrongConvexHuber
-  from privacy.bolton.losses import StrongConvexBinaryCrossentropy
\ No newline at end of file
+  from privacy.bolton.losses import StrongConvexBinaryCrossentropy
diff --git a/privacy/bolton/losses.py b/privacy/bolton/losses.py
index a99187b..6af4e00 100644
--- a/privacy/bolton/losses.py
+++ b/privacy/bolton/losses.py
@@ -1,4 +1,4 @@
-# Copyright 2018, The TensorFlow Authors.
+# Copyright 2019, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -40,53 +40,47 @@ class StrongConvexMixin:
     """Radius, R, of the hypothesis space W.
     W is a convex set that forms the hypothesis space.
 
-    Returns: R
-
+    Returns:
+      R
     """
     raise NotImplementedError("Radius not implemented for StrongConvex Loss"
                               "function: %s" % str(self.__class__.__name__))
 
   def gamma(self):
-    """ Strongly convexity, gamma
-
-    Returns: gamma
-
-    """
+    """Returns strongly convex parameter, gamma."""
     raise NotImplementedError("Gamma not implemented for StrongConvex Loss"
                               "function: %s" % str(self.__class__.__name__))
 
   def beta(self, class_weight):
-    """Smoothness, beta
+    """Smoothness, beta.
 
     Args:
       class_weight: the class weights as scalar or 1d tensor, where its
                     dimensionality is equal to the number of outputs.
 
-    Returns: Beta
-
+    Returns:
+      Beta
     """
     raise NotImplementedError("Beta not implemented for StrongConvex Loss"
                               "function: %s" % str(self.__class__.__name__))
 
   def lipchitz_constant(self, class_weight):
-    """Lipchitz constant, L
+    """Lipchitz constant, L.
 
     Args:
       class_weight: class weights used
 
     Returns: L
-
     """
     raise NotImplementedError("lipchitz constant not implemented for "
                               "StrongConvex Loss"
                               "function: %s" % str(self.__class__.__name__))
 
   def kernel_regularizer(self):
-    """returns the kernel_regularizer to be used. Any subclass should override
-      this method if they want a kernel_regularizer (if required for
-      the loss function to be StronglyConvex
+    """Returns the kernel_regularizer to be used.
 
-    :return: None or kernel_regularizer layer
+    Any subclass should override this method if they want a kernel_regularizer
+    (if required for the loss function to be StronglyConvex.
     """
     return None
 
@@ -97,16 +91,15 @@ class StrongConvexMixin:
       class_weight: class weights used
       dtype: the data type for tensor conversions.
 
-    Returns: maximum class weighting as tensor scalar
-
+    Returns:
+      maximum class weighting as tensor scalar
     """
     class_weight = _ops.convert_to_tensor_v2(class_weight, dtype)
     return tf.math.reduce_max(class_weight)
 
 
 class StrongConvexHuber(losses.Loss, StrongConvexMixin):
-  """Strong Convex version of Huber loss using l2 weight regularization.
-  """
+  """Strong Convex version of Huber loss using l2 weight regularization."""
 
   def __init__(self,
                reg_lambda: float,
@@ -153,7 +146,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
     )
 
   def call(self, y_true, y_pred):
-    """Compute loss
+    """Computes loss
 
     Args:
       y_true: Ground truth values. One hot encoded using -1 and 1.
@@ -162,7 +155,6 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
     Returns:
       Loss values per sample.
     """
-    # return super(StrongConvexHuber, self).call(y_true, y_pred) * self._sample_weight
     h = self.delta
     z = y_pred * y_true
     one = tf.constant(1, dtype=self.dtype)
@@ -172,23 +164,18 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
       return _ops.convert_to_tensor_v2(0, dtype=self.dtype)
     elif tf.math.abs(one - z) <= h:
       return one / (four * h) * tf.math.pow(one + h - z, 2)
-    elif z < one - h:
-      return one - z
-    raise ValueError('')  # shouldn't be possible to get here.
+    return one - z  # elif: z < one - h
 
   def radius(self):
-    """See super class.
-    """
+    """See super class."""
     return self.radius_constant / self.reg_lambda
 
   def gamma(self):
-    """See super class.
-    """
+    """See super class."""
     return self.reg_lambda
 
   def beta(self, class_weight):
-    """See super class.
-    """
+    """See super class."""
     max_class_weight = self.max_class_weight(class_weight, self.dtype)
     delta = _ops.convert_to_tensor_v2(self.delta,
                                       dtype=self.dtype
@@ -198,8 +185,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
            self.reg_lambda
 
   def lipchitz_constant(self, class_weight):
-    """See super class.
-    """
+    """See super class."""
     # if class_weight is provided,
     # it should be a vector of the same size of number of classes
     max_class_weight = self.max_class_weight(class_weight, self.dtype)
@@ -208,10 +194,13 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
     return lc
 
   def kernel_regularizer(self):
-    """
-      l2 loss using reg_lambda as the l2 term (as desired). Required for
-      this loss function to be strongly convex.
-    :return:
+    """Return l2 loss using 0.5*reg_lambda as the l2 term (as desired).
+
+    L2 regularization is required for this loss function to be strongly convex.
+
+    Returns:
+      The L2 regularizer layer for this loss function, with regularizer constant
+      set to half the 0.5 * reg_lambda.
     """
     return L1L2(l2=self.reg_lambda/2)
 
@@ -220,10 +209,7 @@ class StrongConvexBinaryCrossentropy(
     losses.BinaryCrossentropy,
     StrongConvexMixin
 ):
-  """
-  Strong Convex version of BinaryCrossentropy loss using l2 weight
-  regularization.
-  """
+  """Strongly Convex BinaryCrossentropy loss using l2 weight regularization."""
 
   def __init__(self,
                reg_lambda: float,
@@ -239,10 +225,12 @@ class StrongConvexBinaryCrossentropy(
       C: Penalty parameter C of the loss term
       radius_constant: constant defining the length of the radius
       reduction: reduction type to use. See super class
+      from_logits: True if the input are unscaled logits. False if they are
+        already scaled.
       label_smoothing: amount of smoothing to perform on labels
-                      relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x).
-                      Note, the impact of this parameter's effect on privacy
-                      is not known and thus the default should be used.
+        relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x). Note, the
+        impact of this parameter's effect on privacy is not known and thus the
+        default should be used.
       name: Name of the loss instance
       dtype: tf datatype to use for tensor conversions.
     """
@@ -271,49 +259,322 @@ class StrongConvexBinaryCrossentropy(
     self.radius_constant = radius_constant
 
   def call(self, y_true, y_pred):
-    """Compute loss
+    """Computes loss
 
-        Args:
-          y_true: Ground truth values.
-          y_pred: The predicted values.
+      Args:
+        y_true: Ground truth values.
+        y_pred: The predicted values.
 
-        Returns:
-          Loss values per sample.
+      Returns:
+        Loss values per sample.
       """
-    # loss = tf.nn.sigmoid_cross_entropy_with_logits(
-    #   labels=y_true,
-    #   logits=y_pred
-    # )
     loss = super(StrongConvexBinaryCrossentropy, self).call(y_true, y_pred)
     loss = loss * self.C
     return loss
 
   def radius(self):
-    """See super class.
-    """
+    """See super class."""
     return self.radius_constant / self.reg_lambda
 
   def gamma(self):
-    """See super class.
-    """
+    """See super class."""
     return self.reg_lambda
 
   def beta(self, class_weight):
-    """See super class.
-    """
+    """See super class."""
     max_class_weight = self.max_class_weight(class_weight, self.dtype)
     return self.C * max_class_weight + self.reg_lambda
 
   def lipchitz_constant(self, class_weight):
-    """See super class.
-    """
+    """See super class."""
     max_class_weight = self.max_class_weight(class_weight, self.dtype)
     return self.C * max_class_weight + self.reg_lambda * self.radius()
 
   def kernel_regularizer(self):
-    """
-      l2 loss using reg_lambda as the l2 term (as desired). Required for
-      this loss function to be strongly convex.
-    :return:
+    """Return l2 loss using 0.5*reg_lambda as the l2 term (as desired).
+
+    L2 regularization is required for this loss function to be strongly convex.
+
+    Returns:
+      The L2 regularizer layer for this loss function, with regularizer constant
+      set to half the 0.5 * reg_lambda.
     """
     return L1L2(l2=self.reg_lambda/2)
+
+# class StrongConvexSparseCategoricalCrossentropy(
+#     losses.CategoricalCrossentropy,
+#     StrongConvexMixin
+# ):
+#   """
+#   Strong Convex version of CategoricalCrossentropy loss using l2 weight
+#   regularization.
+#   """
+#
+#   def __init__(self,
+#                reg_lambda: float,
+#                C: float,
+#                radius_constant: float,
+#                from_logits: bool = True,
+#                label_smoothing: float = 0,
+#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+#                name: str = 'binarycrossentropy',
+#                dtype=tf.float32):
+#     """
+#     Args:
+#       reg_lambda: Weight regularization constant
+#       C: Penalty parameter C of the loss term
+#       radius_constant: constant defining the length of the radius
+#       reduction: reduction type to use. See super class
+#       label_smoothing: amount of smoothing to perform on labels
+#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
+#       name: Name of the loss instance
+#       dtype: tf datatype to use for tensor conversions.
+#     """
+#     if reg_lambda <= 0:
+#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
+#     if C <= 0:
+#       raise ValueError('c: {0}, should be >= 0'.format(C))
+#     if radius_constant <= 0:
+#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
+#         radius_constant
+#       ))
+#
+#     self.C = C
+#     self.dtype = dtype
+#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
+#     super(StrongConvexSparseCategoricalCrossentropy, self).__init__(
+#         reduction=reduction,
+#         name=name,
+#         from_logits=from_logits,
+#         label_smoothing=label_smoothing,
+#     )
+#     self.radius_constant = radius_constant
+#
+#   def call(self, y_true, y_pred):
+#     """Compute loss
+#
+#         Args:
+#           y_true: Ground truth values.
+#           y_pred: The predicted values.
+#
+#         Returns:
+#           Loss values per sample.
+#       """
+#     loss = super()
+#     loss = loss * self.C
+#     return loss
+#
+#   def radius(self):
+#     """See super class.
+#     """
+#     return self.radius_constant / self.reg_lambda
+#
+#   def gamma(self):
+#     """See super class.
+#     """
+#     return self.reg_lambda
+#
+#   def beta(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda
+#
+#   def lipchitz_constant(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda * self.radius()
+#
+#   def kernel_regularizer(self):
+#     """
+#       l2 loss using reg_lambda as the l2 term (as desired). Required for
+#       this loss function to be strongly convex.
+#     :return:
+#     """
+#     return L1L2(l2=self.reg_lambda)
+#
+# class StrongConvexSparseCategoricalCrossentropy(
+#     losses.SparseCategoricalCrossentropy,
+#     StrongConvexMixin
+# ):
+#   """
+#   Strong Convex version of SparseCategoricalCrossentropy loss using l2 weight
+#   regularization.
+#   """
+#
+#   def __init__(self,
+#                reg_lambda: float,
+#                C: float,
+#                radius_constant: float,
+#                from_logits: bool = True,
+#                label_smoothing: float = 0,
+#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+#                name: str = 'binarycrossentropy',
+#                dtype=tf.float32):
+#     """
+#     Args:
+#       reg_lambda: Weight regularization constant
+#       C: Penalty parameter C of the loss term
+#       radius_constant: constant defining the length of the radius
+#       reduction: reduction type to use. See super class
+#       label_smoothing: amount of smoothing to perform on labels
+#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
+#       name: Name of the loss instance
+#       dtype: tf datatype to use for tensor conversions.
+#     """
+#     if reg_lambda <= 0:
+#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
+#     if C <= 0:
+#       raise ValueError('c: {0}, should be >= 0'.format(C))
+#     if radius_constant <= 0:
+#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
+#         radius_constant
+#       ))
+#
+#     self.C = C
+#     self.dtype = dtype
+#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
+#     super(StrongConvexHuber, self).__init__(reduction=reduction,
+#                                              name=name,
+#                                              from_logits=from_logits,
+#                                              label_smoothing=label_smoothing,
+#                                              )
+#     self.radius_constant = radius_constant
+#
+#   def call(self, y_true, y_pred):
+#     """Compute loss
+#
+#         Args:
+#           y_true: Ground truth values.
+#           y_pred: The predicted values.
+#
+#         Returns:
+#           Loss values per sample.
+#       """
+#     loss = super()
+#     loss = loss * self.C
+#     return loss
+#
+#   def radius(self):
+#     """See super class.
+#     """
+#     return self.radius_constant / self.reg_lambda
+#
+#   def gamma(self):
+#     """See super class.
+#     """
+#     return self.reg_lambda
+#
+#   def beta(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda
+#
+#   def lipchitz_constant(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda * self.radius()
+#
+#   def kernel_regularizer(self):
+#     """
+#       l2 loss using reg_lambda as the l2 term (as desired). Required for
+#       this loss function to be strongly convex.
+#     :return:
+#     """
+#     return L1L2(l2=self.reg_lambda)
+#
+#
+# class StrongConvexCategoricalCrossentropy(
+#     losses.CategoricalCrossentropy,
+#     StrongConvexMixin
+# ):
+#   """
+#   Strong Convex version of CategoricalCrossentropy loss using l2 weight
+#   regularization.
+#   """
+#
+#   def __init__(self,
+#                reg_lambda: float,
+#                C: float,
+#                radius_constant: float,
+#                from_logits: bool = True,
+#                label_smoothing: float = 0,
+#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+#                name: str = 'binarycrossentropy',
+#                dtype=tf.float32):
+#     """
+#     Args:
+#       reg_lambda: Weight regularization constant
+#       C: Penalty parameter C of the loss term
+#       radius_constant: constant defining the length of the radius
+#       reduction: reduction type to use. See super class
+#       label_smoothing: amount of smoothing to perform on labels
+#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
+#       name: Name of the loss instance
+#       dtype: tf datatype to use for tensor conversions.
+#     """
+#     if reg_lambda <= 0:
+#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
+#     if C <= 0:
+#       raise ValueError('c: {0}, should be >= 0'.format(C))
+#     if radius_constant <= 0:
+#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
+#         radius_constant
+#       ))
+#
+#     self.C = C
+#     self.dtype = dtype
+#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
+#     super(StrongConvexHuber, self).__init__(reduction=reduction,
+#                                              name=name,
+#                                              from_logits=from_logits,
+#                                              label_smoothing=label_smoothing,
+#                                              )
+#     self.radius_constant = radius_constant
+#
+#   def call(self, y_true, y_pred):
+#     """Compute loss
+#
+#         Args:
+#           y_true: Ground truth values.
+#           y_pred: The predicted values.
+#
+#         Returns:
+#           Loss values per sample.
+#       """
+#     loss = super()
+#     loss = loss * self.C
+#     return loss
+#
+#   def radius(self):
+#     """See super class.
+#     """
+#     return self.radius_constant / self.reg_lambda
+#
+#   def gamma(self):
+#     """See super class.
+#     """
+#     return self.reg_lambda
+#
+#   def beta(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda
+#
+#   def lipchitz_constant(self, class_weight):
+#     """See super class.
+#     """
+#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
+#     return self.C * max_class_weight + self.reg_lambda * self.radius()
+#
+#   def kernel_regularizer(self):
+#     """
+#       l2 loss using reg_lambda as the l2 term (as desired). Required for
+#       this loss function to be strongly convex.
+#     :return:
+#     """
+#     return L1L2(l2=self.reg_lambda)
\ No newline at end of file
diff --git a/privacy/bolton/losses_test.py b/privacy/bolton/losses_test.py
index d2c9f80..49f3144 100644
--- a/privacy/bolton/losses_test.py
+++ b/privacy/bolton/losses_test.py
@@ -1,4 +1,4 @@
-# Copyright 2018, The TensorFlow Authors.
+# Copyright 2019, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,6 +17,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from contextlib import contextmanager
+from io import StringIO
+import sys
 import tensorflow as tf
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.framework import test_util
@@ -27,6 +30,18 @@ from privacy.bolton.losses import StrongConvexHuber
 from privacy.bolton.losses import StrongConvexMixin
 
 
+@contextmanager
+def captured_output():
+  """Capture std_out and std_err within context."""
+  new_out, new_err = StringIO(), StringIO()
+  old_out, old_err = sys.stdout, sys.stderr
+  try:
+    sys.stdout, sys.stderr = new_out, new_err
+    yield sys.stdout, sys.stderr
+  finally:
+    sys.stdout, sys.stderr = old_out, old_err
+
+
 class StrongConvexMixinTests(keras_parameterized.TestCase):
   """Tests for the StrongConvexMixin"""
   @parameterized.named_parameters([
@@ -72,7 +87,7 @@ class StrongConvexMixinTests(keras_parameterized.TestCase):
 
 
 class BinaryCrossesntropyTests(keras_parameterized.TestCase):
-  """tests for BinaryCrossesntropy StrongConvex loss"""
+  """tests for BinaryCrossesntropy StrongConvex loss."""
 
   @parameterized.named_parameters([
       {'testcase_name': 'normal',
@@ -82,7 +97,8 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
        },  # pylint: disable=invalid-name
   ])
   def test_init_params(self, reg_lambda, C, radius_constant):
-    """Test initialization for given arguments
+    """Test initialization for given arguments.
+
     Args:
       reg_lambda: initialization value for reg_lambda arg
       C: initialization value for C arg
@@ -111,6 +127,7 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
   ])
   def test_bad_init_params(self, reg_lambda, C, radius_constant):
     """Test invalid domain for given params. Should return ValueError
+
     Args:
       reg_lambda: initialization value for reg_lambda arg
       C: initialization value for C arg
@@ -146,6 +163,7 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
   ])
   def test_calculation(self, logits, y_true, result):
     """Test the call method to ensure it returns the correct value
+
     Args:
       logits: unscaled output of model
       y_true: label
@@ -185,6 +203,7 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
   ])
   def test_fns(self, init_args, fn, args, result):
     """Test that fn of BinaryCrossentropy loss returns the correct result
+
     Args:
       init_args: init values for loss instance
       fn: the fn to test
@@ -201,6 +220,29 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
       result = result.l2
     self.assertEqual(expected, result)
 
+  @parameterized.named_parameters([
+      {'testcase_name': 'label_smoothing',
+       'init_args': [1, 1, 1, True, 0.1],
+       'fn': None,
+       'args': None,
+       'print_res': 'The impact of label smoothing on privacy is unknown.'
+       },
+  ])
+  def test_prints(self, init_args, fn, args, print_res):
+    """Test logger warning from StrongConvexBinaryCrossentropy.
+
+    Args:
+      init_args: arguments to init the object with.
+      fn: function to test
+      args: arguments to above function
+      print_res: print result that should have been printed.
+    """
+    with captured_output() as (out, err):  # pylint: disable=unused-variable
+      loss = StrongConvexBinaryCrossentropy(*init_args)
+      if fn is not None:
+        getattr(loss, fn, lambda *arguments: print('error'))(*args)
+    self.assertRegexMatch(err.getvalue().strip(), [print_res])
+
 
 class HuberTests(keras_parameterized.TestCase):
   """tests for BinaryCrossesntropy StrongConvex loss"""
@@ -215,6 +257,7 @@ class HuberTests(keras_parameterized.TestCase):
   ])
   def test_init_params(self, reg_lambda, c, radius_constant, delta):
     """Test initialization for given arguments
+
     Args:
       reg_lambda: initialization value for reg_lambda arg
       C: initialization value for C arg
@@ -244,7 +287,7 @@ class HuberTests(keras_parameterized.TestCase):
        'delta': 1
        },
       {'testcase_name': 'negative delta',
-       'reg_lambda': -1,
+       'reg_lambda': 1,
        'c': 1,
        'radius_constant': 1,
        'delta': -1
@@ -252,10 +295,12 @@ class HuberTests(keras_parameterized.TestCase):
   ])
   def test_bad_init_params(self, reg_lambda, c, radius_constant, delta):
     """Test invalid domain for given params. Should return ValueError
+
     Args:
       reg_lambda: initialization value for reg_lambda arg
       C: initialization value for C arg
       radius_constant: initialization value for radius_constant arg
+      delta: the delta parameter for the huber loss
     """
     # test valid domains for each variable
     with self.assertRaises(ValueError):
@@ -321,6 +366,7 @@ class HuberTests(keras_parameterized.TestCase):
   ])
   def test_calculation(self, logits, y_true, delta, result):
     """Test the call method to ensure it returns the correct value
+
     Args:
       logits: unscaled output of model
       y_true: label
@@ -360,6 +406,7 @@ class HuberTests(keras_parameterized.TestCase):
   ])
   def test_fns(self, init_args, fn, args, result):
     """Test that fn of BinaryCrossentropy loss returns the correct result
+
     Args:
       init_args: init values for loss instance
       fn: the fn to test
diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index 7503157..79c8115 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -1,4 +1,4 @@
-# Copyright 2018, The TensorFlow Authors.
+# Copyright 2019, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -25,8 +25,11 @@ from privacy.bolton.optimizers import Bolton
 
 
 class BoltonModel(Model):
-  """
-  Bolton episilon-delta model
+  """Bolton episilon-delta differential privacy model.
+
+  The privacy guarantees are dependent on the noise that is sampled. Please
+  see the paper linked below for more details.
+
   Uses 4 key steps to achieve privacy guarantees:
   1. Adds noise to weights after training (output perturbation).
   2. Projects weights to R after each batch
@@ -121,8 +124,9 @@ class BoltonModel(Model):
           noise_distribution='laplace',
           steps_per_epoch=None,
           **kwargs):  # pylint: disable=arguments-differ
-    """Reroutes to super fit with additional Bolton delta-epsilon privacy
-    requirements implemented. Note, inputs must be normalized s.t. ||x|| < 1
+    """Reroutes to super fit with  Bolton delta-epsilon privacy requirements.
+
+    Note, inputs must be normalized s.t. ||x|| < 1.
     Requirements are as follows:
         1. Adds noise to weights after training (output perturbation).
         2. Projects weights to R after each batch
@@ -139,7 +143,6 @@ class BoltonModel(Model):
                       whose dim == n_classes.
 
         See the super method for descriptions on the rest of the arguments.
-
     """
     if class_weight is None:
       class_weight_ = self.calculate_class_weights(class_weight)
@@ -237,8 +240,8 @@ class BoltonModel(Model):
                               class_counts=None,
                               num_classes=None
                               ):
-    """
-        Calculates class weighting to be used in training. Can be on
+    """Calculates class weighting to be used in training.
+
     Args:
         class_weights: str specifying type, array giving weights, or None.
         class_counts: If class_weights is not None, then an array of
@@ -246,7 +249,6 @@ class BoltonModel(Model):
         num_classes: If class_weights is not None, then the number of
                         classes.
     Returns: class_weights as 1D tensor, to be passed to model's fit method.
-
     """
     # Value checking
     class_keys = ['balanced']
diff --git a/privacy/bolton/models_test.py b/privacy/bolton/models_test.py
index 63954cc..a89a490 100644
--- a/privacy/bolton/models_test.py
+++ b/privacy/bolton/models_test.py
@@ -1,4 +1,4 @@
-# Copyright 2018, The TensorFlow Authors.
+# Copyright 2019, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -38,40 +38,36 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     self.radius_constant = radius_constant
 
   def radius(self):
-    """Radius of R-Ball (value to normalize weights to after each batch)
+    """Radius, R, of the hypothesis space W.
+    W is a convex set that forms the hypothesis space.
 
     Returns: radius
-
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
   def gamma(self):
-    """ Gamma strongly convex
-
-    Returns: gamma
-
-    """
+    """Returns strongly convex parameter, gamma."""
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
   def beta(self, class_weight):  # pylint: disable=unused-argument
-    """Beta smoothess
+    """Smoothness, beta.
 
     Args:
-      class_weight: the class weights used.
-
-    Returns: Beta
+      class_weight: the class weights as scalar or 1d tensor, where its
+        dimensionality is equal to the number of outputs.
 
+    Returns:
+      Beta
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
   def lipchitz_constant(self, class_weight):  # pylint: disable=unused-argument
-    """ L lipchitz continuous
+    """Lipchitz constant, L.
 
     Args:
       class_weight: class weights used
 
     Returns: L
-
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
@@ -83,11 +79,25 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     )
 
   def max_class_weight(self, class_weight):
+    """the maximum weighting in class weights (max value) as a scalar tensor
+
+    Args:
+      class_weight: class weights used
+      dtype: the data type for tensor conversions.
+
+    Returns:
+      maximum class weighting as tensor scalar
+    """
     if class_weight is None:
       return 1
     raise ValueError('')
 
   def kernel_regularizer(self):
+    """Returns the kernel_regularizer to be used.
+
+    Any subclass should override this method if they want a kernel_regularizer
+    (if required for the loss function to be StronglyConvex.
+    """
     return L1L2(l2=self.reg_lambda)
 
 
@@ -113,7 +123,7 @@ class TestOptimizer(OptimizerV2):
 
 
 class InitTests(keras_parameterized.TestCase):
-  """tests for keras model initialization"""
+  """Tests for keras model initialization."""
 
   @parameterized.named_parameters([
       {'testcase_name': 'normal',
@@ -124,7 +134,7 @@ class InitTests(keras_parameterized.TestCase):
        },
   ])
   def test_init_params(self, n_outputs):
-    """test initialization of BoltonModel
+    """Test initialization of BoltonModel.
 
     Args:
         n_outputs: number of output neurons
@@ -243,8 +253,7 @@ def _do_fit(n_samples,
             optimizer,
             loss,
             distribution='laplace'):
-  """Helper to instantiate necessary components for fitting and perform a model
-  fit.
+  """Instantiate necessary components for fitting and perform a model fit.
 
   Args:
       n_samples: number of samples in dataset
diff --git a/privacy/bolton/optimizers.py b/privacy/bolton/optimizers.py
index ec7a7e5..c2c9349 100644
--- a/privacy/bolton/optimizers.py
+++ b/privacy/bolton/optimizers.py
@@ -1,4 +1,4 @@
-# Copyright 2018, The TensorFlow Authors.
+# Copyright 2019, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -26,11 +26,9 @@ _accepted_distributions = ['laplace']  # implemented distributions for noising
 
 
 class GammaBetaDecreasingStep(
-    optimizer_v2.learning_rate_schedule.LearningRateSchedule
-):
-  """
-      Learning Rate Scheduler using the minimum of 1/beta and 1/(gamma * step)
-      at each step. A required step for privacy guarantees.
+    optimizer_v2.learning_rate_schedule.LearningRateSchedule):
+  """Computes LR as minimum of 1/beta and 1/(gamma * step) at each step.
+  A required step for privacy guarantees.
   """
   def __init__(self):
     self.is_init = False
@@ -38,8 +36,7 @@ class GammaBetaDecreasingStep(
     self.gamma = None
 
   def __call__(self, step):
-    """
-      returns the learning rate
+    """Computes and returns the learning rate.
     Args:
       step: the current iteration number
     Returns:
@@ -61,15 +58,14 @@ class GammaBetaDecreasingStep(
                            )
 
   def get_config(self):
-    """
-      config to setup the learning rate scheduler.
-    """
+    """Return config to setup the learning rate scheduler."""
     return {'beta': self.beta, 'gamma': self.gamma}
 
   def initialize(self, beta, gamma):
-    """setup the learning rate scheduler with the beta and gamma values provided
-    by the loss function. Meant to be used with .fit as the loss params may
-    depend on values passed to fit.
+    """Setups scheduler with beta and gamma values from the loss function.
+
+    Meant to be used with .fit as the loss params may depend on values passed to
+    fit.
 
     Args:
       beta: Smoothness value. See StrongConvexMixin
@@ -80,37 +76,36 @@ class GammaBetaDecreasingStep(
     self.gamma = gamma
 
   def de_initialize(self):
-    """De initialize the scheduler after fitting, in case another fit call has
-    different loss parameters.
-    """
+    """De initialize post fit, as another fit call may use other parameters."""
     self.is_init = False
     self.beta = None
     self.gamma = None
 
 
 class Bolton(optimizer_v2.OptimizerV2):
-  """
-    Bolton optimizer wraps another tf optimizer to be used
-    as the visible optimizer to the tf model. No matter the optimizer
-    passed, "Bolton" enables the bolton model to control the learning rate
-    based on the strongly convex loss.
+  """Wrap another tf optimizer with Bolton privacy protocol.
 
-    To use the Bolton method, you must:
-    1. instantiate it with an instantiated tf optimizer and StrongConvexLoss.
-    2. use it as a context manager around your .fit method internals.
+  Bolton optimizer wraps another tf optimizer to be used
+  as the visible optimizer to the tf model. No matter the optimizer
+  passed, "Bolton" enables the bolton model to control the learning rate
+  based on the strongly convex loss.
 
-    This can be accomplished by the following:
-    optimizer = tf.optimizers.SGD()
-    loss = privacy.bolton.losses.StrongConvexBinaryCrossentropy()
-    bolton = Bolton(optimizer, loss)
-    with bolton(*args) as _:
-      model.fit()
-    The args required for the context manager can be found in the __call__
-    method.
+  To use the Bolton method, you must:
+  1. instantiate it with an instantiated tf optimizer and StrongConvexLoss.
+  2. use it as a context manager around your .fit method internals.
 
-    For more details on the strong convexity requirements, see:
-    Bolt-on Differential Privacy for Scalable Stochastic Gradient
-    Descent-based Analytics by Xi Wu et. al.
+  This can be accomplished by the following:
+  optimizer = tf.optimizers.SGD()
+  loss = privacy.bolton.losses.StrongConvexBinaryCrossentropy()
+  bolton = Bolton(optimizer, loss)
+  with bolton(*args) as _:
+    model.fit()
+  The args required for the context manager can be found in the __call__
+  method.
+
+  For more details on the strong convexity requirements, see:
+  Bolt-on Differential Privacy for Scalable Stochastic Gradient
+  Descent-based Analytics by Xi Wu et. al.
   """
   def __init__(self,  # pylint: disable=super-init-not-called
                optimizer: optimizer_v2.OptimizerV2,
@@ -120,9 +115,9 @@ class Bolton(optimizer_v2.OptimizerV2):
     """Constructor.
 
     Args:
-        optimizer: Optimizer_v2 or subclass to be used as the optimizer
-                    (wrapped).
-        loss: StrongConvexLoss function that the model is being compiled with.
+      optimizer: Optimizer_v2 or subclass to be used as the optimizer
+        (wrapped).
+      loss: StrongConvexLoss function that the model is being compiled with.
     """
 
     if not isinstance(loss, StrongConvexMixin):
@@ -150,19 +145,15 @@ class Bolton(optimizer_v2.OptimizerV2):
     self._is_init = False
 
   def get_config(self):
-    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
-    """
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer."""
     return self._internal_optimizer.get_config()
 
   def project_weights_to_r(self, force=False):
-    """helper method to normalize the weights to the R-ball.
+    """Normalize the weights to the R-ball.
 
     Args:
-        force: True to normalize regardless of previous weight values.
-                False to check if weights > R-ball and only normalize then.
-
-    Returns:
-
+      force: True to normalize regardless of previous weight values.
+        False to check if weights > R-ball and only normalize then.
     """
     if not self._is_init:
       raise Exception('This method must be called from within the optimizer\'s '
@@ -186,8 +177,8 @@ class Bolton(optimizer_v2.OptimizerV2):
       input_dim: the input dimensionality for the weights
       output_dim the output dimensionality for the weights
 
-    Returns: noise in shape of layer's weights to be added to the weights.
-
+    Returns:
+      Noise in shape of layer's weights to be added to the weights.
     """
     if not self._is_init:
       raise Exception('This method must be called from within the optimizer\'s '
@@ -221,8 +212,7 @@ class Bolton(optimizer_v2.OptimizerV2):
                               'a valid distribution'.format(distribution))
 
   def from_config(self, *args, **kwargs):  # pylint: disable=arguments-differ
-    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
-    """
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer."""
     return self._internal_optimizer.from_config(*args, **kwargs)
 
   def __getattr__(self, name):
@@ -230,11 +220,10 @@ class Bolton(optimizer_v2.OptimizerV2):
     from the _internal_optimizer instance.
 
     Args:
-        name:
+      name:
 
     Returns: attribute from Bolton if specified to come from self, else
-            from _internal_optimizer.
-
+      from _internal_optimizer.
     """
     if name == '_private_attributes' or name in self._private_attributes:
       return getattr(self, name)
@@ -255,11 +244,8 @@ class Bolton(optimizer_v2.OptimizerV2):
     Reroute everything else to the _internal_optimizer.
 
     Args:
-        key: attribute name
-        value: attribute value
-
-    Returns:
-
+      key: attribute name
+      value: attribute value
     """
     if key == '_private_attributes':
       object.__setattr__(self, key, value)
@@ -269,44 +255,37 @@ class Bolton(optimizer_v2.OptimizerV2):
       setattr(self._internal_optimizer, key, value)
 
   def _resource_apply_dense(self, *args, **kwargs):  # pylint: disable=arguments-differ
-    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
-    """
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer."""
     return self._internal_optimizer._resource_apply_dense(*args, **kwargs)  # pylint: disable=protected-access
 
   def _resource_apply_sparse(self, *args, **kwargs):  # pylint: disable=arguments-differ
-    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
-    """
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer."""
     return self._internal_optimizer._resource_apply_sparse(*args, **kwargs)  # pylint: disable=protected-access
 
   def get_updates(self, loss, params):
-    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
-    """
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer."""
     out = self._internal_optimizer.get_updates(loss, params)
     self.project_weights_to_r()
     return out
 
   def apply_gradients(self, *args, **kwargs):  # pylint: disable=arguments-differ
-    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
-    """
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer."""
     out = self._internal_optimizer.apply_gradients(*args, **kwargs)
     self.project_weights_to_r()
     return out
 
   def minimize(self, *args, **kwargs):  # pylint: disable=arguments-differ
-    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
-    """
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer."""
     out = self._internal_optimizer.minimize(*args, **kwargs)
     self.project_weights_to_r()
     return out
 
   def _compute_gradients(self, *args, **kwargs):  # pylint: disable=arguments-differ,protected-access
-    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
-    """
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer."""
     return self._internal_optimizer._compute_gradients(*args, **kwargs)  # pylint: disable=protected-access
 
   def get_gradients(self, *args, **kwargs):  # pylint: disable=arguments-differ
-    """Reroutes to _internal_optimizer. See super/_internal_optimizer.
-    """
+    """Reroutes to _internal_optimizer. See super/_internal_optimizer."""
     return self._internal_optimizer.get_gradients(*args, **kwargs)
 
   def __enter__(self):
@@ -326,8 +305,8 @@ class Bolton(optimizer_v2.OptimizerV2):
                n_samples,
                batch_size
                ):
-    """Entry point from context. Accepts required values for bolton method and
-    stores them on the optimizer for use throughout fitting.
+    """Accepts required values for bolton method from context entry point.
+    Stores them on the optimizer for use throughout fitting.
 
     Args:
       noise_distribution: the noise distribution to pick.
@@ -360,17 +339,15 @@ class Bolton(optimizer_v2.OptimizerV2):
 
   def __exit__(self, *args):
     """Exit call from with statement.
-        used to
-
-        1.reset the model and fit parameters passed to the optimizer
-          to enable the Bolton Privacy guarantees. These are reset to ensure
-          that any future calls to fit with the same instance of the optimizer
-          will properly error out.
-
-        2.call post-fit methods normalizing/projecting the model weights and
-          adding noise to the weights.
+    used to
 
+    1.reset the model and fit parameters passed to the optimizer
+      to enable the Bolton Privacy guarantees. These are reset to ensure
+      that any future calls to fit with the same instance of the optimizer
+      will properly error out.
 
+    2.call post-fit methods normalizing/projecting the model weights and
+      adding noise to the weights.
     """
     self.project_weights_to_r(True)
     for layer in self.layers:
diff --git a/privacy/bolton/optimizers_test.py b/privacy/bolton/optimizers_test.py
index 6a499fc..6cbf7ee 100644
--- a/privacy/bolton/optimizers_test.py
+++ b/privacy/bolton/optimizers_test.py
@@ -1,4 +1,4 @@
-# Copyright 2018, The TensorFlow Authors.
+# Copyright 2019, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -33,8 +33,7 @@ from privacy.bolton import optimizers as opt
 
 
 class TestModel(Model):
-  """
-  Bolton episilon-delta model
+  """Bolton episilon-delta model.
   Uses 4 key steps to achieve privacy guarantees:
   1. Adds noise to weights after training (output perturbation).
   2. Projects weights to R after each batch
@@ -47,14 +46,15 @@ class TestModel(Model):
   """
 
   def __init__(self, n_outputs=2, input_shape=(16,), init_value=2):
-    """
+    """Constructor.
+
     Args:
-        n_outputs: number of output neurons
-        epsilon: level of privacy guarantee
-        noise_distribution: distribution to pull weight perturbations from
-        weights_initializer: initializer for weights
-        seed: random seed to use
-        dtype: data type to use for tensors
+      n_outputs: number of output neurons
+      epsilon: level of privacy guarantee
+      noise_distribution: distribution to pull weight perturbations from
+      weights_initializer: initializer for weights
+      seed: random seed to use
+      dtype: data type to use for tensors
     """
     super(TestModel, self).__init__(name='bolton', dynamic=False)
     self.n_outputs = n_outputs
@@ -76,40 +76,36 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     self.radius_constant = radius_constant
 
   def radius(self):
-    """Radius of R-Ball (value to normalize weights to after each batch)
+    """Radius, R, of the hypothesis space W.
+    W is a convex set that forms the hypothesis space.
 
     Returns: radius
-
     """
     return _ops.convert_to_tensor_v2(self.radius_constant, dtype=tf.float32)
 
   def gamma(self):
-    """ Gamma strongly convex
-
-    Returns: gamma
-
-    """
+    """Returns strongly convex parameter, gamma."""
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
   def beta(self, class_weight):  # pylint: disable=unused-argument
-    """Beta smoothess
+    """Smoothness, beta.
 
     Args:
-      class_weight: the class weights used.
-
-    Returns: Beta
+      class_weight: the class weights as scalar or 1d tensor, where its
+        dimensionality is equal to the number of outputs.
 
+    Returns:
+      Beta
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
   def lipchitz_constant(self, class_weight):  # pylint: disable=unused-argument
-    """ L lipchitz continuous
+    """Lipchitz constant, L.
 
     Args:
       class_weight: class weights used
 
     Returns: L
-
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
@@ -121,11 +117,25 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     )
 
   def max_class_weight(self, class_weight, dtype=tf.float32):
+    """the maximum weighting in class weights (max value) as a scalar tensor
+
+    Args:
+      class_weight: class weights used
+      dtype: the data type for tensor conversions.
+
+    Returns:
+      maximum class weighting as tensor scalar
+    """
     if class_weight is None:
       return 1
     raise NotImplementedError('')
 
   def kernel_regularizer(self):
+    """Returns the kernel_regularizer to be used.
+
+    Any subclass should override this method if they want a kernel_regularizer
+    (if required for the loss function to be StronglyConvex.
+    """
     return L1L2(l2=self.reg_lambda)
 
 
diff --git a/tutorials/bolton_tutorial.ipynb b/tutorials/bolton_tutorial.ipynb
deleted file mode 100644
index f682592..0000000
--- a/tutorials/bolton_tutorial.ipynb
+++ /dev/null
@@ -1,432 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "pycharm": {
-     "is_executing": false
-    },
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "import sys\n",
-    "sys.path.append('..')\n",
-    "import tensorflow as tf\n",
-    "from privacy.bolton import losses\n",
-    "from privacy.bolton import models"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "First, we will create a binary classification dataset with a single output dimension.\n",
-    "The samples for each label are repeated datapoints at different points in space."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "pycharm": {
-     "is_executing": false,
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(20, 2) (20, 1)\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Parameters for dataset\n",
-    "n_samples = 10\n",
-    "input_dim = 2\n",
-    "n_outputs = 1\n",
-    "# Create binary classification dataset:\n",
-    "x_stack = [tf.constant(-1, tf.float32, (n_samples, input_dim)), \n",
-    "           tf.constant(1, tf.float32, (n_samples, input_dim))]\n",
-    "y_stack = [tf.constant(0, tf.float32, (n_samples, 1)),\n",
-    "           tf.constant(1, tf.float32, (n_samples, 1))]\n",
-    "x, y = tf.concat(x_stack, 0), tf.concat(y_stack, 0)\n",
-    "print(x.shape, y.shape)\n",
-    "generator = tf.data.Dataset.from_tensor_slices((x, y))\n",
-    "generator = generator.batch(10)\n",
-    "generator = generator.shuffle(10)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "First, we will explore using the pre-built BoltonModel, which is a thin wrapper around a Keras Model using a single-layer neural network. It automatically uses the Bolton Optimizer which encompasses all the logic required for the Bolton Differential Privacy method.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "pycharm": {
-     "is_executing": false
-    }
-   },
-   "outputs": [],
-   "source": [
-    "bolt = models.BoltonModel(n_outputs)  # tell the model how many outputs we have."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now, we will pick our optimizer and Strongly Convex Loss function. The loss must extend from StrongConvexMixin and implement the associated methods. Some existing loss functions are pre-implemented in bolton.loss"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "pycharm": {
-     "is_executing": false
-    }
-   },
-   "outputs": [],
-   "source": [
-    "optimizer = tf.optimizers.SGD()\n",
-    "reg_lambda = 1\n",
-    "C = 1\n",
-    "radius_constant = 1\n",
-    "loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy to be 1; these are all tunable and their impact can be read in losses.StrongConvexBinaryCrossentropy. We then compile the model with the chosen optimizer and loss, which will automatically wrap the chosen optimizer with the Bolton Optimizer, ensuring the required components function as required for privacy guarantees."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "pycharm": {
-     "is_executing": false
-    }
-   },
-   "outputs": [],
-   "source": [
-    "bolt.compile(optimizer, loss)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "To fit the model, the optimizer will require additional information about the dataset and model. These parameters are:\n",
-    "1. the class_weights used\n",
-    "2. the number of samples in the dataset\n",
-    "3. the batch size\n",
-    "which the model will try to infer, if possible. If not, you will be required to pass these explicitly to the fit method.\n",
-    "As well, there are two privacy parameters than can be altered: \n",
-    "1. epsilon, a float\n",
-    "2. noise_distribution, a valid string indicating the distriution to use (must be implemented)\n",
-    "\n",
-    "The BoltonModel offers a helper method, .calculate_class_weight to aid in class_weight calculation."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING: Logging before flag parsing goes to stderr.\n",
-      "W0619 11:00:32.392859 4467058112 deprecation.py:323] From /Users/christopherchoo/PycharmProjects/privacy/venv/lib/python3.6/site-packages/tensorflow/python/ops/nn_impl.py:182: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n",
-      "Instructions for updating:\n",
-      "Use tf.where in 2.0, which has the same broadcast rule as np.where\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train on 20 samples\n",
-      "Epoch 1/2\n",
-      "20/20 [==============================] - 0s 4ms/sample - loss: 0.8146\n",
-      "Epoch 2/2\n",
-      "20/20 [==============================] - 0s 94us/sample - loss: 0.5699\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<tensorflow.python.keras.callbacks.History at 0x10543d0f0>"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# required parameters\n",
-    "class_weight = None  # default, use .calculate_class_weight to specify other values\n",
-    "batch_size = None  # default, if it cannot be inferred, specify this\n",
-    "n_samples = None  # default, if it cannot be iferred, specify this\n",
-    "# privacy parameters\n",
-    "epsilon = 2\n",
-    "noise_distribution = 'laplace'\n",
-    "\n",
-    "bolt.fit(x, \n",
-    "         y, \n",
-    "         epsilon=epsilon, \n",
-    "         class_weight=class_weight, \n",
-    "         batch_size=batch_size, \n",
-    "         n_samples=n_samples,\n",
-    "         noise_distribution=noise_distribution,\n",
-    "         epochs=2)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We may also train a generator object, or try different optimizers and loss functions. Below, we will see that we must pass the number of samples as the fit method is unable to infer it for a generator."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "optimizer2 = tf.optimizers.Adam()\n",
-    "bolt.compile(optimizer2, loss)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Could not infer the number of samples. Please pass this in using n_samples.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# required parameters\n",
-    "class_weight = None  # default, use .calculate_class_weight to specify other values\n",
-    "batch_size = None  # default, if it cannot be inferred, specify this\n",
-    "n_samples = None  # default, if it cannot be iferred, specify this\n",
-    "# privacy parameters\n",
-    "epsilon = 2\n",
-    "noise_distribution = 'laplace'\n",
-    "try:\n",
-    "    bolt.fit(generator,\n",
-    "             epsilon=epsilon, \n",
-    "             class_weight=class_weight, \n",
-    "             batch_size=batch_size, \n",
-    "             n_samples=n_samples,\n",
-    "             noise_distribution=noise_distribution,\n",
-    "             verbose=0\n",
-    "             )\n",
-    "except ValueError as e:\n",
-    "    print(e)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "And now, re running with the parameter set."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<tensorflow.python.keras.callbacks.History at 0x1267db4a8>"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "n_samples = 20\n",
-    "bolt.fit(generator,\n",
-    "         epsilon=epsilon, \n",
-    "         class_weight=class_weight, \n",
-    "         batch_size=batch_size, \n",
-    "         n_samples=n_samples,\n",
-    "         noise_distribution=noise_distribution,\n",
-    "         verbose=0\n",
-    "         )"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "You don't have to use the bolton model to use the Bolton method. There are only a few requirements:\n",
-    "1. make sure any requirements from the loss are implemented in the model.\n",
-    "2. instantiate the optimizer and use it as a context around your fit operation."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from privacy.bolton.optimizers import Bolton"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Here, we create our own model and setup the Bolton optimizer."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class TestModel(tf.keras.Model):\n",
-    "    def __init__(self, reg_layer, n_outputs=1):\n",
-    "        super(TestModel, self).__init__(name='test')\n",
-    "        self.output_layer = tf.keras.layers.Dense(n_outputs,\n",
-    "                                                  kernel_regularizer=reg_layer\n",
-    "                                                  )\n",
-    "        \n",
-    "    def call(self, inputs):\n",
-    "        return self.output_layer(inputs)\n",
-    "\n",
-    "optimizer = tf.optimizers.SGD()\n",
-    "loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)\n",
-    "optimizer = Bolton(optimizer, loss)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now, we instantiate our model and check for 1. Since our loss requires L2 regularization over the kernel, we will pass it to the model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "n_outputs = 1  # parameter for model and optimizer context.\n",
-    "test_model = TestModel(loss.kernel_regularizer(), n_outputs)\n",
-    "test_model.compile(optimizer, loss)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We comply with 2., and use the Bolton Optimizer as a context around the fit method."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Train on 20 samples\n",
-      "Epoch 1/2\n",
-      "20/20 [==============================] - 0s 3ms/sample - loss: 0.9096\n",
-      "Epoch 2/2\n",
-      "20/20 [==============================] - 0s 430us/sample - loss: 0.5275\n"
-     ]
-    }
-   ],
-   "source": [
-    "# parameters for context\n",
-    "noise_distribution = 'laplace'\n",
-    "epsilon = 2\n",
-    "class_weights = 1  # Previosuly, the fit method auto-detected the class_weights.\n",
-    "# Here, we need to pass the class_weights explicitly. 1 is the equivalent of None.\n",
-    "n_samples = 20\n",
-    "batch_size = 5\n",
-    "\n",
-    "with optimizer(\n",
-    "    noise_distribution=noise_distribution,\n",
-    "    epsilon=epsilon,\n",
-    "    layers=test_model.layers,\n",
-    "    class_weights=class_weights,  \n",
-    "    n_samples=n_samples,\n",
-    "    batch_size=batch_size\n",
-    ") as _:\n",
-    "    test_model.fit(x, y, batch_size=batch_size, epochs=2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.8"
-  },
-  "pycharm": {
-   "stem_cell": {
-    "cell_type": "raw",
-    "source": [],
-    "metadata": {
-     "collapsed": false
-    }
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
\ No newline at end of file
diff --git a/tutorials/bolton_tutorial.py b/tutorials/bolton_tutorial.py
new file mode 100644
index 0000000..6a0c010
--- /dev/null
+++ b/tutorials/bolton_tutorial.py
@@ -0,0 +1,155 @@
+import sys
+
+sys.path.append('..')
+import tensorflow as tf
+from privacy.bolton import losses
+from privacy.bolton import models
+
+"""First, we will create a binary classification dataset with a single output 
+dimension. The samples for each label are repeated data points at different 
+points in space."""
+# Parameters for dataset
+n_samples = 10
+input_dim = 2
+n_outputs = 1
+# Create binary classification dataset:
+x_stack = [tf.constant(-1, tf.float32, (n_samples, input_dim)),
+           tf.constant(1, tf.float32, (n_samples, input_dim))]
+y_stack = [tf.constant(0, tf.float32, (n_samples, 1)),
+           tf.constant(1, tf.float32, (n_samples, 1))]
+x, y = tf.concat(x_stack, 0), tf.concat(y_stack, 0)
+print(x.shape, y.shape)
+generator = tf.data.Dataset.from_tensor_slices((x, y))
+generator = generator.batch(10)
+generator = generator.shuffle(10)
+"""First, we will explore using the pre - built BoltonModel, which is a thin
+wrapper around a Keras Model using a single - layer neural network. 
+It automatically uses the Bolton Optimizer which encompasses all the logic 
+required for the Bolton Differential Privacy method."""
+bolt = models.BoltonModel(n_outputs)  # tell the model how many outputs we have.
+"""Now, we will pick our optimizer and Strongly Convex Loss function. The loss
+must extend from StrongConvexMixin and implement the associated methods.Some 
+existing loss functions are pre - implemented in bolton.loss"""
+optimizer = tf.optimizers.SGD()
+reg_lambda = 1
+C = 1
+radius_constant = 1
+loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
+"""For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy 
+to be 1; these are all tunable and their impact can be read in losses. 
+StrongConvexBinaryCrossentropy.We then compile the model with the chosen 
+optimizer and loss, which will automatically wrap the chosen optimizer with the 
+Bolton Optimizer, ensuring the required components function as required for 
+privacy guarantees."""
+bolt.compile(optimizer, loss)
+"""To fit the model, the optimizer will require additional information about
+the dataset and model.These parameters are:
+1. the class_weights used
+2. the number of samples in the dataset
+3. the batch size which the model will try to infer, if possible.  If not, you 
+will be required to pass these explicitly to the fit method.
+
+As well, there are two privacy parameters than can be altered:
+1. epsilon, a float
+2. noise_distribution, a valid string indicating the distriution to use (must be
+implemented)
+
+The BoltonModel offers a helper method,.calculate_class_weight to aid in 
+class_weight calculation."""
+# required parameters
+class_weight = None  # default, use .calculate_class_weight to specify other values
+batch_size = None  # default, if it cannot be inferred, specify this
+n_samples = None  # default, if it cannot be iferred, specify this
+# privacy parameters
+epsilon = 2
+noise_distribution = 'laplace'
+
+bolt.fit(x,
+         y,
+         epsilon=epsilon,
+         class_weight=class_weight,
+         batch_size=batch_size,
+         n_samples=n_samples,
+         noise_distribution=noise_distribution,
+         epochs=2)
+"""We may also train a generator object, or try different optimizers and loss 
+functions. Below, we will see that we must pass the number of samples as the fit 
+method is unable to infer it for a generator."""
+optimizer2 = tf.optimizers.Adam()
+bolt.compile(optimizer2, loss)
+# required parameters
+class_weight = None  # default, use .calculate_class_weight to specify other values
+batch_size = None  # default, if it cannot be inferred, specify this
+n_samples = None  # default, if it cannot be iferred, specify this
+# privacy parameters
+epsilon = 2
+noise_distribution = 'laplace'
+try:
+  bolt.fit(generator,
+           epsilon=epsilon,
+           class_weight=class_weight,
+           batch_size=batch_size,
+           n_samples=n_samples,
+           noise_distribution=noise_distribution,
+           verbose=0
+           )
+except ValueError as e:
+  print(e)
+"""And now, re running with the parameter set."""
+n_samples = 20
+bolt.fit(generator,
+         epsilon=epsilon,
+         class_weight=class_weight,
+         batch_size=batch_size,
+         n_samples=n_samples,
+         noise_distribution=noise_distribution,
+         verbose=0
+         )
+"""You don't have to use the bolton model to use the Bolton method. 
+There are only a few requirements:
+1. make sure any requirements from the loss are implemented in the model.
+2. instantiate the optimizer and use it as a context around your fit operation.
+"""
+
+from privacy.bolton.optimizers import Bolton
+
+"""Here, we create our own model and setup the Bolton optimizer."""
+
+class TestModel(tf.keras.Model):
+  def __init__(self, reg_layer, n_outputs=1):
+    super(TestModel, self).__init__(name='test')
+    self.output_layer = tf.keras.layers.Dense(n_outputs,
+                                              kernel_regularizer=reg_layer
+                                              )
+
+  def call(self, inputs):
+    return self.output_layer(inputs)
+
+
+optimizer = tf.optimizers.SGD()
+loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
+optimizer = Bolton(optimizer, loss)
+"""Now, we instantiate our model and check for 1. Since our loss requires L2 
+regularization over the kernel, we will pass it to the model."""
+n_outputs = 1  # parameter for model and optimizer context.
+test_model = TestModel(loss.kernel_regularizer(), n_outputs)
+test_model.compile(optimizer, loss)
+"""We comply with 2., and use the Bolton Optimizer as a context around the fit 
+method."""
+# parameters for context
+noise_distribution = 'laplace'
+epsilon = 2
+class_weights = 1  # Previously, the fit method auto-detected the class_weights.
+# Here, we need to pass the class_weights explicitly. 1 is the equivalent of None.
+n_samples = 20
+batch_size = 5
+
+with optimizer(
+  noise_distribution=noise_distribution,
+  epsilon=epsilon,
+  layers=test_model.layers,
+  class_weights=class_weights,
+  n_samples=n_samples,
+  batch_size=batch_size
+) as _:
+  test_model.fit(x, y, batch_size=batch_size, epochs=2)

From c05c2aa0d407ae4edc48f085a883e80ca91e06d5 Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Thu, 18 Jul 2019 15:04:35 -0400
Subject: [PATCH 11/39] Ensuring pylint is 10/10

---
 privacy/bolton/__init__.py        |   6 +-
 privacy/bolton/losses.py          | 282 +-----------------------------
 privacy/bolton/models.py          |   2 +-
 privacy/bolton/optimizers_test.py |   2 +-
 tutorials/bolton_tutorial.py      | 163 ++++++++++-------
 5 files changed, 105 insertions(+), 350 deletions(-)

diff --git a/privacy/bolton/__init__.py b/privacy/bolton/__init__.py
index 96d60b2..70c39dc 100644
--- a/privacy/bolton/__init__.py
+++ b/privacy/bolton/__init__.py
@@ -17,9 +17,9 @@ from distutils.version import LooseVersion
 import tensorflow as tf
 
 if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
-  raise ImportError("Please upgrade your version of tensorflow from: {0} "
-                    "to at least 2.0.0 to use privacy/bolton".format(
-    LooseVersion(tf.__version__)))
+  raise ImportError("Please upgrade your version "
+                    "of tensorflow from: {0} to at least 2.0.0 to "
+                    "use privacy/bolton".format(LooseVersion(tf.__version__)))
 if hasattr(sys, 'skip_tf_privacy_import'):  # Useful for standalone scripts.
   pass
 else:
diff --git a/privacy/bolton/losses.py b/privacy/bolton/losses.py
index 6af4e00..7d9ad32 100644
--- a/privacy/bolton/losses.py
+++ b/privacy/bolton/losses.py
@@ -160,11 +160,11 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
     one = tf.constant(1, dtype=self.dtype)
     four = tf.constant(4, dtype=self.dtype)
 
-    if z > one + h:
+    if z > one + h:  # pylint: disable=no-else-return
       return _ops.convert_to_tensor_v2(0, dtype=self.dtype)
     elif tf.math.abs(one - z) <= h:
       return one / (four * h) * tf.math.pow(one + h - z, 2)
-    return one - z  # elif: z < one - h
+    return one - z
 
   def radius(self):
     """See super class."""
@@ -300,281 +300,3 @@ class StrongConvexBinaryCrossentropy(
       set to half the 0.5 * reg_lambda.
     """
     return L1L2(l2=self.reg_lambda/2)
-
-# class StrongConvexSparseCategoricalCrossentropy(
-#     losses.CategoricalCrossentropy,
-#     StrongConvexMixin
-# ):
-#   """
-#   Strong Convex version of CategoricalCrossentropy loss using l2 weight
-#   regularization.
-#   """
-#
-#   def __init__(self,
-#                reg_lambda: float,
-#                C: float,
-#                radius_constant: float,
-#                from_logits: bool = True,
-#                label_smoothing: float = 0,
-#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
-#                name: str = 'binarycrossentropy',
-#                dtype=tf.float32):
-#     """
-#     Args:
-#       reg_lambda: Weight regularization constant
-#       C: Penalty parameter C of the loss term
-#       radius_constant: constant defining the length of the radius
-#       reduction: reduction type to use. See super class
-#       label_smoothing: amount of smoothing to perform on labels
-#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
-#       name: Name of the loss instance
-#       dtype: tf datatype to use for tensor conversions.
-#     """
-#     if reg_lambda <= 0:
-#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
-#     if C <= 0:
-#       raise ValueError('c: {0}, should be >= 0'.format(C))
-#     if radius_constant <= 0:
-#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
-#         radius_constant
-#       ))
-#
-#     self.C = C
-#     self.dtype = dtype
-#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
-#     super(StrongConvexSparseCategoricalCrossentropy, self).__init__(
-#         reduction=reduction,
-#         name=name,
-#         from_logits=from_logits,
-#         label_smoothing=label_smoothing,
-#     )
-#     self.radius_constant = radius_constant
-#
-#   def call(self, y_true, y_pred):
-#     """Compute loss
-#
-#         Args:
-#           y_true: Ground truth values.
-#           y_pred: The predicted values.
-#
-#         Returns:
-#           Loss values per sample.
-#       """
-#     loss = super()
-#     loss = loss * self.C
-#     return loss
-#
-#   def radius(self):
-#     """See super class.
-#     """
-#     return self.radius_constant / self.reg_lambda
-#
-#   def gamma(self):
-#     """See super class.
-#     """
-#     return self.reg_lambda
-#
-#   def beta(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda
-#
-#   def lipchitz_constant(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda * self.radius()
-#
-#   def kernel_regularizer(self):
-#     """
-#       l2 loss using reg_lambda as the l2 term (as desired). Required for
-#       this loss function to be strongly convex.
-#     :return:
-#     """
-#     return L1L2(l2=self.reg_lambda)
-#
-# class StrongConvexSparseCategoricalCrossentropy(
-#     losses.SparseCategoricalCrossentropy,
-#     StrongConvexMixin
-# ):
-#   """
-#   Strong Convex version of SparseCategoricalCrossentropy loss using l2 weight
-#   regularization.
-#   """
-#
-#   def __init__(self,
-#                reg_lambda: float,
-#                C: float,
-#                radius_constant: float,
-#                from_logits: bool = True,
-#                label_smoothing: float = 0,
-#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
-#                name: str = 'binarycrossentropy',
-#                dtype=tf.float32):
-#     """
-#     Args:
-#       reg_lambda: Weight regularization constant
-#       C: Penalty parameter C of the loss term
-#       radius_constant: constant defining the length of the radius
-#       reduction: reduction type to use. See super class
-#       label_smoothing: amount of smoothing to perform on labels
-#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
-#       name: Name of the loss instance
-#       dtype: tf datatype to use for tensor conversions.
-#     """
-#     if reg_lambda <= 0:
-#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
-#     if C <= 0:
-#       raise ValueError('c: {0}, should be >= 0'.format(C))
-#     if radius_constant <= 0:
-#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
-#         radius_constant
-#       ))
-#
-#     self.C = C
-#     self.dtype = dtype
-#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
-#     super(StrongConvexHuber, self).__init__(reduction=reduction,
-#                                              name=name,
-#                                              from_logits=from_logits,
-#                                              label_smoothing=label_smoothing,
-#                                              )
-#     self.radius_constant = radius_constant
-#
-#   def call(self, y_true, y_pred):
-#     """Compute loss
-#
-#         Args:
-#           y_true: Ground truth values.
-#           y_pred: The predicted values.
-#
-#         Returns:
-#           Loss values per sample.
-#       """
-#     loss = super()
-#     loss = loss * self.C
-#     return loss
-#
-#   def radius(self):
-#     """See super class.
-#     """
-#     return self.radius_constant / self.reg_lambda
-#
-#   def gamma(self):
-#     """See super class.
-#     """
-#     return self.reg_lambda
-#
-#   def beta(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda
-#
-#   def lipchitz_constant(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda * self.radius()
-#
-#   def kernel_regularizer(self):
-#     """
-#       l2 loss using reg_lambda as the l2 term (as desired). Required for
-#       this loss function to be strongly convex.
-#     :return:
-#     """
-#     return L1L2(l2=self.reg_lambda)
-#
-#
-# class StrongConvexCategoricalCrossentropy(
-#     losses.CategoricalCrossentropy,
-#     StrongConvexMixin
-# ):
-#   """
-#   Strong Convex version of CategoricalCrossentropy loss using l2 weight
-#   regularization.
-#   """
-#
-#   def __init__(self,
-#                reg_lambda: float,
-#                C: float,
-#                radius_constant: float,
-#                from_logits: bool = True,
-#                label_smoothing: float = 0,
-#                reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
-#                name: str = 'binarycrossentropy',
-#                dtype=tf.float32):
-#     """
-#     Args:
-#       reg_lambda: Weight regularization constant
-#       C: Penalty parameter C of the loss term
-#       radius_constant: constant defining the length of the radius
-#       reduction: reduction type to use. See super class
-#       label_smoothing: amount of smoothing to perform on labels
-#                       relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
-#       name: Name of the loss instance
-#       dtype: tf datatype to use for tensor conversions.
-#     """
-#     if reg_lambda <= 0:
-#       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
-#     if C <= 0:
-#       raise ValueError('c: {0}, should be >= 0'.format(C))
-#     if radius_constant <= 0:
-#       raise ValueError('radius_constant: {0}, should be >= 0'.format(
-#         radius_constant
-#       ))
-#
-#     self.C = C
-#     self.dtype = dtype
-#     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
-#     super(StrongConvexHuber, self).__init__(reduction=reduction,
-#                                              name=name,
-#                                              from_logits=from_logits,
-#                                              label_smoothing=label_smoothing,
-#                                              )
-#     self.radius_constant = radius_constant
-#
-#   def call(self, y_true, y_pred):
-#     """Compute loss
-#
-#         Args:
-#           y_true: Ground truth values.
-#           y_pred: The predicted values.
-#
-#         Returns:
-#           Loss values per sample.
-#       """
-#     loss = super()
-#     loss = loss * self.C
-#     return loss
-#
-#   def radius(self):
-#     """See super class.
-#     """
-#     return self.radius_constant / self.reg_lambda
-#
-#   def gamma(self):
-#     """See super class.
-#     """
-#     return self.reg_lambda
-#
-#   def beta(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda
-#
-#   def lipchitz_constant(self, class_weight):
-#     """See super class.
-#     """
-#     max_class_weight = self.max_class_weight(class_weight, self.dtype)
-#     return self.C * max_class_weight + self.reg_lambda * self.radius()
-#
-#   def kernel_regularizer(self):
-#     """
-#       l2 loss using reg_lambda as the l2 term (as desired). Required for
-#       this loss function to be strongly convex.
-#     :return:
-#     """
-#     return L1L2(l2=self.reg_lambda)
\ No newline at end of file
diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index 79c8115..2c5c08a 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -24,7 +24,7 @@ from privacy.bolton.losses import StrongConvexMixin
 from privacy.bolton.optimizers import Bolton
 
 
-class BoltonModel(Model):
+class BoltonModel(Model):  # pylint: disable=abstract-method
   """Bolton episilon-delta differential privacy model.
 
   The privacy guarantees are dependent on the noise that is sampled. Please
diff --git a/privacy/bolton/optimizers_test.py b/privacy/bolton/optimizers_test.py
index 6cbf7ee..f95f209 100644
--- a/privacy/bolton/optimizers_test.py
+++ b/privacy/bolton/optimizers_test.py
@@ -32,7 +32,7 @@ from privacy.bolton.losses import StrongConvexMixin
 from privacy.bolton import optimizers as opt
 
 
-class TestModel(Model):
+class TestModel(Model):  # pylint: disable=abstract-method
   """Bolton episilon-delta model.
   Uses 4 key steps to achieve privacy guarantees:
   1. Adds noise to weights after training (output perturbation).
diff --git a/tutorials/bolton_tutorial.py b/tutorials/bolton_tutorial.py
index 6a0c010..8f806a1 100644
--- a/tutorials/bolton_tutorial.py
+++ b/tutorials/bolton_tutorial.py
@@ -1,13 +1,29 @@
+# Copyright 2019, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tutorial for bolton module, the model and the optimizer."""
 import sys
 
 sys.path.append('..')
-import tensorflow as tf
-from privacy.bolton import losses
-from privacy.bolton import models
-
-"""First, we will create a binary classification dataset with a single output 
-dimension. The samples for each label are repeated data points at different 
-points in space."""
+import tensorflow as tf  # pylint: disable=wrong-import-position
+from privacy.bolton import losses  # pylint: disable=wrong-import-position
+from privacy.bolton import models  # pylint: disable=wrong-import-position
+# -------
+# First, we will create a binary classification dataset with a single output
+# dimension. The samples for each label are repeated data points at different
+# points in space.
+# -------
 # Parameters for dataset
 n_samples = 10
 input_dim = 2
@@ -22,42 +38,50 @@ print(x.shape, y.shape)
 generator = tf.data.Dataset.from_tensor_slices((x, y))
 generator = generator.batch(10)
 generator = generator.shuffle(10)
-"""First, we will explore using the pre - built BoltonModel, which is a thin
-wrapper around a Keras Model using a single - layer neural network. 
-It automatically uses the Bolton Optimizer which encompasses all the logic 
-required for the Bolton Differential Privacy method."""
+# -------
+# First, we will explore using the pre - built BoltonModel, which is a thin
+# wrapper around a Keras Model using a single - layer neural network.
+# It automatically uses the Bolton Optimizer which encompasses all the logic
+# required for the Bolton Differential Privacy method.
+# -------
 bolt = models.BoltonModel(n_outputs)  # tell the model how many outputs we have.
-"""Now, we will pick our optimizer and Strongly Convex Loss function. The loss
-must extend from StrongConvexMixin and implement the associated methods.Some 
-existing loss functions are pre - implemented in bolton.loss"""
+# -------
+# Now, we will pick our optimizer and Strongly Convex Loss function. The loss
+# must extend from StrongConvexMixin and implement the associated methods.Some
+# existing loss functions are pre - implemented in bolton.loss
+# -------
 optimizer = tf.optimizers.SGD()
 reg_lambda = 1
 C = 1
 radius_constant = 1
 loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
-"""For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy 
-to be 1; these are all tunable and their impact can be read in losses. 
-StrongConvexBinaryCrossentropy.We then compile the model with the chosen 
-optimizer and loss, which will automatically wrap the chosen optimizer with the 
-Bolton Optimizer, ensuring the required components function as required for 
-privacy guarantees."""
+# -------
+# For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy
+# to be 1; these are all tunable and their impact can be read in losses.
+# StrongConvexBinaryCrossentropy.We then compile the model with the chosen
+# optimizer and loss, which will automatically wrap the chosen optimizer with the
+# Bolton Optimizer, ensuring the required components function as required for
+# privacy guarantees.
+# -------
 bolt.compile(optimizer, loss)
-"""To fit the model, the optimizer will require additional information about
-the dataset and model.These parameters are:
-1. the class_weights used
-2. the number of samples in the dataset
-3. the batch size which the model will try to infer, if possible.  If not, you 
-will be required to pass these explicitly to the fit method.
-
-As well, there are two privacy parameters than can be altered:
-1. epsilon, a float
-2. noise_distribution, a valid string indicating the distriution to use (must be
-implemented)
-
-The BoltonModel offers a helper method,.calculate_class_weight to aid in 
-class_weight calculation."""
+# -------
+# To fit the model, the optimizer will require additional information about
+# the dataset and model.These parameters are:
+# 1. the class_weights used
+# 2. the number of samples in the dataset
+# 3. the batch size which the model will try to infer, if possible.  If not, you
+# will be required to pass these explicitly to the fit method.
+#
+# As well, there are two privacy parameters than can be altered:
+# 1. epsilon, a float
+# 2. noise_distribution, a valid string indicating the distriution to use (must be
+# implemented)
+#
+# The BoltonModel offers a helper method,.calculate_class_weight to aid in
+# class_weight calculation.
 # required parameters
-class_weight = None  # default, use .calculate_class_weight to specify other values
+# -------
+class_weight = None  # default, use .calculate_class_weight for other values
 batch_size = None  # default, if it cannot be inferred, specify this
 n_samples = None  # default, if it cannot be iferred, specify this
 # privacy parameters
@@ -72,13 +96,15 @@ bolt.fit(x,
          n_samples=n_samples,
          noise_distribution=noise_distribution,
          epochs=2)
-"""We may also train a generator object, or try different optimizers and loss 
-functions. Below, we will see that we must pass the number of samples as the fit 
-method is unable to infer it for a generator."""
+# -------
+# We may also train a generator object, or try different optimizers and loss
+# functions. Below, we will see that we must pass the number of samples as the
+# fit method is unable to infer it for a generator.
+# -------
 optimizer2 = tf.optimizers.Adam()
 bolt.compile(optimizer2, loss)
 # required parameters
-class_weight = None  # default, use .calculate_class_weight to specify other values
+class_weight = None  # default, use .calculate_class_weight for other values
 batch_size = None  # default, if it cannot be inferred, specify this
 n_samples = None  # default, if it cannot be iferred, specify this
 # privacy parameters
@@ -95,7 +121,9 @@ try:
            )
 except ValueError as e:
   print(e)
-"""And now, re running with the parameter set."""
+# -------
+# And now, re running with the parameter set.
+# -------
 n_samples = 20
 bolt.fit(generator,
          epsilon=epsilon,
@@ -105,51 +133,56 @@ bolt.fit(generator,
          noise_distribution=noise_distribution,
          verbose=0
          )
-"""You don't have to use the bolton model to use the Bolton method. 
-There are only a few requirements:
-1. make sure any requirements from the loss are implemented in the model.
-2. instantiate the optimizer and use it as a context around your fit operation.
-"""
-
-from privacy.bolton.optimizers import Bolton
-
-"""Here, we create our own model and setup the Bolton optimizer."""
-
-class TestModel(tf.keras.Model):
-  def __init__(self, reg_layer, n_outputs=1):
+# -------
+# You don't have to use the bolton model to use the Bolton method.
+# There are only a few requirements:
+# 1. make sure any requirements from the loss are implemented in the model.
+# 2. instantiate the optimizer and use it as a context around the fit operation.
+# -------
+# -------------------- Part 2, using the Optimizer
+from privacy.bolton.optimizers import Bolton  # pylint: disable=wrong-import-position
+# -------
+# Here, we create our own model and setup the Bolton optimizer.
+# -------
+class TestModel(tf.keras.Model):  # pylint: disable=abstract-method
+  def __init__(self, reg_layer, number_of_outputs=1):
     super(TestModel, self).__init__(name='test')
-    self.output_layer = tf.keras.layers.Dense(n_outputs,
+    self.output_layer = tf.keras.layers.Dense(number_of_outputs,
                                               kernel_regularizer=reg_layer
                                               )
 
-  def call(self, inputs):
+  def call(self, inputs):  # pylint: disable=arguments-differ
     return self.output_layer(inputs)
 
 
 optimizer = tf.optimizers.SGD()
 loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
 optimizer = Bolton(optimizer, loss)
-"""Now, we instantiate our model and check for 1. Since our loss requires L2 
-regularization over the kernel, we will pass it to the model."""
+# -------
+# Now, we instantiate our model and check for 1. Since our loss requires L2
+# regularization over the kernel, we will pass it to the model.
+# -------
 n_outputs = 1  # parameter for model and optimizer context.
 test_model = TestModel(loss.kernel_regularizer(), n_outputs)
 test_model.compile(optimizer, loss)
-"""We comply with 2., and use the Bolton Optimizer as a context around the fit 
-method."""
+# -------
+# We comply with 2., and use the Bolton Optimizer as a context around the fit
+# method.
+# -------
 # parameters for context
 noise_distribution = 'laplace'
 epsilon = 2
 class_weights = 1  # Previously, the fit method auto-detected the class_weights.
-# Here, we need to pass the class_weights explicitly. 1 is the equivalent of None.
+# Here, we need to pass the class_weights explicitly. 1 is the same as None.
 n_samples = 20
 batch_size = 5
 
 with optimizer(
-  noise_distribution=noise_distribution,
-  epsilon=epsilon,
-  layers=test_model.layers,
-  class_weights=class_weights,
-  n_samples=n_samples,
-  batch_size=batch_size
+    noise_distribution=noise_distribution,
+    epsilon=epsilon,
+    layers=test_model.layers,
+    class_weights=class_weights,
+    n_samples=n_samples,
+    batch_size=batch_size
 ) as _:
   test_model.fit(x, y, batch_size=batch_size, epochs=2)

From 0082c9ba7682ff2bb389dcfe800e973652642afd Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Mon, 22 Jul 2019 11:27:53 -0400
Subject: [PATCH 12/39] Minor doc change and adding README file explaining
 Bolton Method.

---
 privacy/bolton/README.md | 56 ++++++++++++++++++++++++++++++++++++++++
 privacy/bolton/models.py |  2 +-
 2 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 privacy/bolton/README.md

diff --git a/privacy/bolton/README.md b/privacy/bolton/README.md
new file mode 100644
index 0000000..ad3c141
--- /dev/null
+++ b/privacy/bolton/README.md
@@ -0,0 +1,56 @@
+# Bolton Module
+
+This module contains source code for the Bolton method. This method is a subset
+of methods used in the ensuring privacy in machine learning that leverages
+additional assumptions to provide a new way of approaching the privacy 
+guarantees.
+
+## Bolton Description
+
+This method uses 4 key steps to achieve privacy guarantees:
+  1. Adds noise to weights after training (output perturbation).
+  2. Projects weights to R after each batch
+  3. Limits learning rate
+  4. Use a strongly convex loss function (see compile)
+
+For more details on the strong convexity requirements, see:
+Bolt-on Differential Privacy for Scalable Stochastic Gradient
+Descent-based Analytics by Xi Wu et al.
+
+### Why Bolton?
+
+The major difference for the Bolton method is that it injects noise post model
+convergence, rather than noising gradients or weights during training. This 
+approach requires some additional constraints listed in the Description. 
+Should the use-case and model satisfy these constraints, this is another
+approach that can be trained to maximize utility while maintaining the privacy.
+The paper describes in detail the advantages and disadvantages of this approach 
+and its results compared to some other methods, namely noising at each iteration
+and no noising.
+
+## Tutorials
+
+This module has a tutorial that can be found in the root tutorials directory, 
+under boton_tutorial.py.
+
+## Contribution
+
+This module was initially contributed by Georgian Partners with the hope of 
+growing the tensorflow/privacy library. There are several rich use cases for 
+delta-epsilon privacy in machine learning, some of which can be explored here:
+https://medium.com/apache-mxnet/epsilon-differential-privacy-for-machine-learning-using-mxnet-a4270fe3865e
+https://arxiv.org/pdf/1811.04911.pdf
+
+## Contacts
+
+In addition to the maintainers of tensorflow/privacy listed in the root 
+README.md, please feel free to contact members of Georgian Partners. In 
+particular,
+
+* Georgian Partners (@georgianpartners)
+* Ji Chao Zhang (@Jichaogp)
+* Christopher Choquette (@cchoquette)
+
+## Copyright
+
+Copyright 2019 - Google LLC
diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index 2c5c08a..634f94c 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -38,7 +38,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
 
   For more details on the strong convexity requirements, see:
   Bolt-on Differential Privacy for Scalable Stochastic Gradient
-  Descent-based Analytics by Xi Wu et. al.
+  Descent-based Analytics by Xi Wu et al.
   """
 
   def __init__(self,

From 7785436cf3550821c4fab60e0c9517ee6678a90d Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Mon, 22 Jul 2019 11:30:29 -0400
Subject: [PATCH 13/39] Minor changes to README

---
 privacy/bolton/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/privacy/bolton/README.md b/privacy/bolton/README.md
index ad3c141..aa140e5 100644
--- a/privacy/bolton/README.md
+++ b/privacy/bolton/README.md
@@ -1,6 +1,6 @@
-# Bolton Module
+# Bolton SubPpckage
 
-This module contains source code for the Bolton method. This method is a subset
+This package contains source code for the Bolton method. This method is a subset
 of methods used in the ensuring privacy in machine learning that leverages
 additional assumptions to provide a new way of approaching the privacy 
 guarantees.
@@ -30,12 +30,12 @@ and no noising.
 
 ## Tutorials
 
-This module has a tutorial that can be found in the root tutorials directory, 
+This package has a tutorial that can be found in the root tutorials directory, 
 under boton_tutorial.py.
 
 ## Contribution
 
-This module was initially contributed by Georgian Partners with the hope of 
+This package was initially contributed by Georgian Partners with the hope of 
 growing the tensorflow/privacy library. There are several rich use cases for 
 delta-epsilon privacy in machine learning, some of which can be explored here:
 https://medium.com/apache-mxnet/epsilon-differential-privacy-for-machine-learning-using-mxnet-a4270fe3865e

From 968ea70060bc459cfe9426e97d9b69d0719b722c Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Mon, 22 Jul 2019 11:30:55 -0400
Subject: [PATCH 14/39] Fixing typos.

---
 privacy/bolton/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/privacy/bolton/README.md b/privacy/bolton/README.md
index aa140e5..95d6b68 100644
--- a/privacy/bolton/README.md
+++ b/privacy/bolton/README.md
@@ -1,4 +1,4 @@
-# Bolton SubPpckage
+# Bolton Subpackage
 
 This package contains source code for the Bolton method. This method is a subset
 of methods used in the ensuring privacy in machine learning that leverages

From d0ef1b380c84c1f2a4e9877be687bcba64e540dd Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Thu, 25 Jul 2019 14:38:37 +0000
Subject: [PATCH 15/39] readme fixes

---
 privacy/bolton/README.md | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/privacy/bolton/README.md b/privacy/bolton/README.md
index 95d6b68..a423b65 100644
--- a/privacy/bolton/README.md
+++ b/privacy/bolton/README.md
@@ -2,10 +2,10 @@
 
 This package contains source code for the Bolton method. This method is a subset
 of methods used in the ensuring privacy in machine learning that leverages
-additional assumptions to provide a new way of approaching the privacy 
+additional assumptions to provide a new way of approaching the privacy
 guarantees.
 
-## Bolton Description
+# Bolton Description
 
 This method uses 4 key steps to achieve privacy guarantees:
   1. Adds noise to weights after training (output perturbation).
@@ -17,40 +17,40 @@ For more details on the strong convexity requirements, see:
 Bolt-on Differential Privacy for Scalable Stochastic Gradient
 Descent-based Analytics by Xi Wu et al.
 
-### Why Bolton?
+# Why Bolton?
 
 The major difference for the Bolton method is that it injects noise post model
-convergence, rather than noising gradients or weights during training. This 
-approach requires some additional constraints listed in the Description. 
+convergence, rather than noising gradients or weights during training. This
+approach requires some additional constraints listed in the Description.
 Should the use-case and model satisfy these constraints, this is another
 approach that can be trained to maximize utility while maintaining the privacy.
-The paper describes in detail the advantages and disadvantages of this approach 
+The paper describes in detail the advantages and disadvantages of this approach
 and its results compared to some other methods, namely noising at each iteration
 and no noising.
 
-## Tutorials
+# Tutorials
 
-This package has a tutorial that can be found in the root tutorials directory, 
-under boton_tutorial.py.
+This package has a tutorial that can be found in the root tutorials directory,
+under `bolton_tutorial.py`.
 
-## Contribution
+# Contribution
 
-This package was initially contributed by Georgian Partners with the hope of 
-growing the tensorflow/privacy library. There are several rich use cases for 
+This package was initially contributed by Georgian Partners with the hope of
+growing the tensorflow/privacy library. There are several rich use cases for
 delta-epsilon privacy in machine learning, some of which can be explored here:
 https://medium.com/apache-mxnet/epsilon-differential-privacy-for-machine-learning-using-mxnet-a4270fe3865e
 https://arxiv.org/pdf/1811.04911.pdf
 
-## Contacts
+# Contacts
 
-In addition to the maintainers of tensorflow/privacy listed in the root 
-README.md, please feel free to contact members of Georgian Partners. In 
+In addition to the maintainers of tensorflow/privacy listed in the root
+README.md, please feel free to contact members of Georgian Partners. In
 particular,
 
-* Georgian Partners (@georgianpartners)
-* Ji Chao Zhang (@Jichaogp)
-* Christopher Choquette (@cchoquette)
+* Georgian Partners(@georgianpartners)
+* Ji Chao Zhang(@Jichaogp)
+* Christopher Choquette(@cchoquette)
 
-## Copyright
+# Copyright
 
 Copyright 2019 - Google LLC

From fe90e3c59674b1a22c332f02105307ecce127010 Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Thu, 25 Jul 2019 14:44:21 +0000
Subject: [PATCH 16/39] readme fixes - more

---
 privacy/bolton/README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/privacy/bolton/README.md b/privacy/bolton/README.md
index a423b65..4aef36f 100644
--- a/privacy/bolton/README.md
+++ b/privacy/bolton/README.md
@@ -5,7 +5,7 @@ of methods used in the ensuring privacy in machine learning that leverages
 additional assumptions to provide a new way of approaching the privacy
 guarantees.
 
-# Bolton Description
+## Bolton Description
 
 This method uses 4 key steps to achieve privacy guarantees:
   1. Adds noise to weights after training (output perturbation).
@@ -17,7 +17,7 @@ For more details on the strong convexity requirements, see:
 Bolt-on Differential Privacy for Scalable Stochastic Gradient
 Descent-based Analytics by Xi Wu et al.
 
-# Why Bolton?
+## Why Bolton?
 
 The major difference for the Bolton method is that it injects noise post model
 convergence, rather than noising gradients or weights during training. This
@@ -28,12 +28,12 @@ The paper describes in detail the advantages and disadvantages of this approach
 and its results compared to some other methods, namely noising at each iteration
 and no noising.
 
-# Tutorials
+## Tutorials
 
 This package has a tutorial that can be found in the root tutorials directory,
 under `bolton_tutorial.py`.
 
-# Contribution
+## Contribution
 
 This package was initially contributed by Georgian Partners with the hope of
 growing the tensorflow/privacy library. There are several rich use cases for
@@ -41,7 +41,7 @@ delta-epsilon privacy in machine learning, some of which can be explored here:
 https://medium.com/apache-mxnet/epsilon-differential-privacy-for-machine-learning-using-mxnet-a4270fe3865e
 https://arxiv.org/pdf/1811.04911.pdf
 
-# Contacts
+## Contacts
 
 In addition to the maintainers of tensorflow/privacy listed in the root
 README.md, please feel free to contact members of Georgian Partners. In
@@ -51,6 +51,6 @@ particular,
 * Ji Chao Zhang(@Jichaogp)
 * Christopher Choquette(@cchoquette)
 
-# Copyright
+## Copyright
 
 Copyright 2019 - Google LLC

From 8e6bcf9b4a6f7cddbf50f4cbc3fe71d9b98c3e8b Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Thu, 25 Jul 2019 15:37:54 +0000
Subject: [PATCH 17/39] many fixes

---
 privacy/bolton/__init__.py        |  2 +-
 privacy/bolton/losses_test.py     | 83 ++++++++++++++++---------------
 privacy/bolton/models.py          | 50 +++++++------------
 privacy/bolton/models_test.py     | 59 +++++++++++-----------
 privacy/bolton/optimizers_test.py | 52 +++++++++++--------
 tutorials/bolton_tutorial.py      | 33 ++++++------
 6 files changed, 135 insertions(+), 144 deletions(-)

diff --git a/privacy/bolton/__init__.py b/privacy/bolton/__init__.py
index 70c39dc..9798177 100644
--- a/privacy/bolton/__init__.py
+++ b/privacy/bolton/__init__.py
@@ -20,7 +20,7 @@ if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
   raise ImportError("Please upgrade your version "
                     "of tensorflow from: {0} to at least 2.0.0 to "
                     "use privacy/bolton".format(LooseVersion(tf.__version__)))
-if hasattr(sys, 'skip_tf_privacy_import'):  # Useful for standalone scripts.
+if hasattr(sys, "skip_tf_privacy_import"):  # Useful for standalone scripts.
   pass
 else:
   from privacy.bolton.models import BoltonModel
diff --git a/privacy/bolton/losses_test.py b/privacy/bolton/losses_test.py
index 49f3144..0d069f5 100644
--- a/privacy/bolton/losses_test.py
+++ b/privacy/bolton/losses_test.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Unit testing for losses.py"""
+"""Unit testing for losses."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -20,11 +20,11 @@ from __future__ import print_function
 from contextlib import contextmanager
 from io import StringIO
 import sys
-import tensorflow as tf
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.framework import test_util
-from tensorflow.python.keras.regularizers import L1L2
 from absl.testing import parameterized
+import tensorflow as tf
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras.regularizers import L1L2
 from privacy.bolton.losses import StrongConvexBinaryCrossentropy
 from privacy.bolton.losses import StrongConvexHuber
 from privacy.bolton.losses import StrongConvexMixin
@@ -43,7 +43,7 @@ def captured_output():
 
 
 class StrongConvexMixinTests(keras_parameterized.TestCase):
-  """Tests for the StrongConvexMixin"""
+  """Tests for the StrongConvexMixin."""
   @parameterized.named_parameters([
       {'testcase_name': 'beta not implemented',
        'fn': 'beta',
@@ -58,6 +58,7 @@ class StrongConvexMixinTests(keras_parameterized.TestCase):
        'fn': 'radius',
        'args': []},
   ])
+
   def test_not_implemented(self, fn, args):
     """Test that the given fn's are not implemented on the mixin.
 
@@ -75,7 +76,7 @@ class StrongConvexMixinTests(keras_parameterized.TestCase):
        'args': []},
   ])
   def test_return_none(self, fn, args):
-    """Test that fn of Mixin returns None
+    """Test that fn of Mixin returns None.
 
     Args:
       fn: fn of Mixin to test
@@ -94,7 +95,7 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
        'reg_lambda': 1,
        'C': 1,
        'radius_constant': 1
-       },  # pylint: disable=invalid-name
+      },  # pylint: disable=invalid-name
   ])
   def test_init_params(self, reg_lambda, C, radius_constant):
     """Test initialization for given arguments.
@@ -113,20 +114,20 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
        'reg_lambda': 1,
        'C': -1,
        'radius_constant': 1
-       },
+      },
       {'testcase_name': 'negative radius',
        'reg_lambda': 1,
        'C': 1,
        'radius_constant': -1
-       },
+      },
       {'testcase_name': 'negative lambda',
        'reg_lambda': -1,
        'C': 1,
        'radius_constant': 1
-       },  # pylint: disable=invalid-name
+      },  # pylint: disable=invalid-name
   ])
   def test_bad_init_params(self, reg_lambda, C, radius_constant):
-    """Test invalid domain for given params. Should return ValueError
+    """Test invalid domain for given params. Should return ValueError.
 
     Args:
       reg_lambda: initialization value for reg_lambda arg
@@ -149,20 +150,20 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
        'logits': [-10000],
        'y_true': [1],
        'result': 10000,
-       },
+      },
       {'testcase_name': 'positivee gradient positive logits',
        'logits': [10000],
        'y_true': [0],
        'result': 10000,
-       },
+      },
       {'testcase_name': 'both negative',
        'logits': [-10000],
        'y_true': [0],
        'result': 0
-       },
+      },
   ])
   def test_calculation(self, logits, y_true, result):
-    """Test the call method to ensure it returns the correct value
+    """Test the call method to ensure it returns the correct value.
 
     Args:
       logits: unscaled output of model
@@ -181,28 +182,28 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
        'fn': 'beta',
        'args': [1],
        'result': tf.constant(2, dtype=tf.float32)
-       },
+      },
       {'testcase_name': 'gamma',
        'fn': 'gamma',
        'init_args': [1, 1, 1],
        'args': [],
        'result': tf.constant(1, dtype=tf.float32),
-       },
+      },
       {'testcase_name': 'lipchitz constant',
        'fn': 'lipchitz_constant',
        'init_args': [1, 1, 1],
        'args': [1],
        'result': tf.constant(2, dtype=tf.float32),
-       },
+      },
       {'testcase_name': 'kernel regularizer',
        'fn': 'kernel_regularizer',
        'init_args': [1, 1, 1],
        'args': [],
        'result': L1L2(l2=0.5),
-       },
+      },
   ])
   def test_fns(self, init_args, fn, args, result):
-    """Test that fn of BinaryCrossentropy loss returns the correct result
+    """Test that fn of BinaryCrossentropy loss returns the correct result.
 
     Args:
       init_args: init values for loss instance
@@ -226,7 +227,7 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
        'fn': None,
        'args': None,
        'print_res': 'The impact of label smoothing on privacy is unknown.'
-       },
+      },
   ])
   def test_prints(self, init_args, fn, args, print_res):
     """Test logger warning from StrongConvexBinaryCrossentropy.
@@ -245,7 +246,7 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
 
 
 class HuberTests(keras_parameterized.TestCase):
-  """tests for BinaryCrossesntropy StrongConvex loss"""
+  """tests for BinaryCrossesntropy StrongConvex loss."""
 
   @parameterized.named_parameters([
       {'testcase_name': 'normal',
@@ -253,10 +254,10 @@ class HuberTests(keras_parameterized.TestCase):
        'c': 1,
        'radius_constant': 1,
        'delta': 1,
-       },
+      },
   ])
   def test_init_params(self, reg_lambda, c, radius_constant, delta):
-    """Test initialization for given arguments
+    """Test initialization for given arguments.
 
     Args:
       reg_lambda: initialization value for reg_lambda arg
@@ -273,25 +274,25 @@ class HuberTests(keras_parameterized.TestCase):
        'c': -1,
        'radius_constant': 1,
        'delta': 1
-       },
+      },
       {'testcase_name': 'negative radius',
        'reg_lambda': 1,
        'c': 1,
        'radius_constant': -1,
        'delta': 1
-       },
+      },
       {'testcase_name': 'negative lambda',
        'reg_lambda': -1,
        'c': 1,
        'radius_constant': 1,
        'delta': 1
-       },
+      },
       {'testcase_name': 'negative delta',
        'reg_lambda': 1,
        'c': 1,
        'radius_constant': 1,
        'delta': -1
-       },
+      },
   ])
   def test_bad_init_params(self, reg_lambda, c, radius_constant, delta):
     """Test invalid domain for given params. Should return ValueError
@@ -320,49 +321,49 @@ class HuberTests(keras_parameterized.TestCase):
        'y_true': 1,
        'delta': 1,
        'result': 0.01*0.25,
-       },
+      },
       {'testcase_name': 'delta=1,y_true=1 1-z< h decision boundary',
        'logits': 0.1,
        'y_true': 1,
        'delta': 1,
        'result': 1.9**2 * 0.25,
-       },
+      },
       {'testcase_name': 'delta=1,y_true=1 z < 1-h decision boundary',
        'logits': -0.1,
        'y_true': 1,
        'delta': 1,
        'result': 1.1,
-       },
+      },
       {'testcase_name': 'delta=2,y_true=1 z>1+h decision boundary',
        'logits': 3.1,
        'y_true': 1,
        'delta': 2,
        'result': 0,
-       },
+      },
       {'testcase_name': 'delta=2,y_true=1 z<1+h decision boundary',
        'logits': 2.9,
        'y_true': 1,
        'delta': 2,
        'result': 0.01*0.125,
-       },
+      },
       {'testcase_name': 'delta=2,y_true=1 1-z < h decision boundary',
        'logits': 1.1,
        'y_true': 1,
        'delta': 2,
        'result': 1.9**2 * 0.125,
-       },
+      },
       {'testcase_name': 'delta=2,y_true=1 z < 1-h decision boundary',
        'logits': -1.1,
        'y_true': 1,
        'delta': 2,
        'result': 2.1,
-       },
+      },
       {'testcase_name': 'delta=1,y_true=-1 z>1+h decision boundary',
        'logits': -2.1,
        'y_true': -1,
        'delta': 1,
        'result': 0,
-       },
+      },
   ])
   def test_calculation(self, logits, y_true, delta, result):
     """Test the call method to ensure it returns the correct value
@@ -384,25 +385,25 @@ class HuberTests(keras_parameterized.TestCase):
        'fn': 'beta',
        'args': [1],
        'result': tf.Variable(1.5, dtype=tf.float32)
-       },
+      },
       {'testcase_name': 'gamma',
        'fn': 'gamma',
        'init_args': [1, 1, 1, 1],
        'args': [],
        'result': tf.Variable(1, dtype=tf.float32),
-       },
+      },
       {'testcase_name': 'lipchitz constant',
        'fn': 'lipchitz_constant',
        'init_args': [1, 1, 1, 1],
        'args': [1],
        'result': tf.Variable(2, dtype=tf.float32),
-       },
+      },
       {'testcase_name': 'kernel regularizer',
        'fn': 'kernel_regularizer',
        'init_args': [1, 1, 1, 1],
        'args': [],
        'result': L1L2(l2=0.5),
-       },
+      },
   ])
   def test_fns(self, init_args, fn, args, result):
     """Test that fn of BinaryCrossentropy loss returns the correct result
diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index 634f94c..00c873c 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -11,15 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Bolton model for bolton method of differentially private ML"""
+"""Bolton model for bolton method of differentially private ML."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import tensorflow as tf
-from tensorflow.python.keras.models import Model
-from tensorflow.python.keras import optimizers
 from tensorflow.python.framework import ops as _ops
+from tensorflow.python.keras import optimizers
+from tensorflow.python.keras.models import Model
 from privacy.bolton.losses import StrongConvexMixin
 from privacy.bolton.optimizers import Bolton
 
@@ -44,9 +44,8 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
   def __init__(self,
                n_outputs,
                seed=1,
-               dtype=tf.float32
-               ):
-    """ private constructor.
+               dtype=tf.float32):
+    """Private constructor.
 
     Args:
         n_outputs: number of output classes to predict.
@@ -64,7 +63,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
     self._dtype = dtype
 
   def call(self, inputs):  # pylint: disable=arguments-differ
-    """Forward pass of network
+    """Forward pass of network.
 
     Args:
         inputs: inputs to neural network
@@ -111,8 +110,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                                      weighted_metrics=weighted_metrics,
                                      target_tensors=target_tensors,
                                      distribute=distribute,
-                                     **kwargs
-                                     )
+                                     **kwargs)
 
   def fit(self,
           x=None,
@@ -158,8 +156,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
       data_size = None
     batch_size_ = self._validate_or_infer_batch_size(batch_size,
                                                      steps_per_epoch,
-                                                     x
-                                                     )
+                                                     x)
     # inferring batch_size to be passed to optimizer. batch_size must remain its
     # initial value when passed to super().fit()
     if batch_size_ is None:
@@ -173,15 +170,13 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                         self.layers,
                         class_weight_,
                         data_size,
-                        batch_size_,
-                        ) as _:
+                        batch_size_) as _:
       out = super(BoltonModel, self).fit(x=x,
                                          y=y,
                                          batch_size=batch_size,
                                          class_weight=class_weight,
                                          steps_per_epoch=steps_per_epoch,
-                                         **kwargs
-                                         )
+                                         **kwargs)
     return out
 
   def fit_generator(self,
@@ -191,8 +186,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                     epsilon=2,
                     n_samples=None,
                     steps_per_epoch=None,
-                    **kwargs
-                    ):  # pylint: disable=arguments-differ
+                    **kwargs):  # pylint: disable=arguments-differ
     """
         This method is the same as fit except for when the passed dataset
         is a generator. See super method and fit for more details.
@@ -218,28 +212,24 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
       data_size = None
     batch_size = self._validate_or_infer_batch_size(None,
                                                     steps_per_epoch,
-                                                    generator
-                                                    )
+                                                    generator)
     with self.optimizer(noise_distribution,
                         epsilon,
                         self.layers,
                         class_weight,
                         data_size,
-                        batch_size
-                        ) as _:
+                        batch_size) as _:
       out = super(BoltonModel, self).fit_generator(
           generator,
           class_weight=class_weight,
           steps_per_epoch=steps_per_epoch,
-          **kwargs
-      )
+          **kwargs)
     return out
 
   def calculate_class_weights(self,
                               class_weights=None,
                               class_counts=None,
-                              num_classes=None
-                              ):
+                              num_classes=None):
     """Calculates class weighting to be used in training.
 
     Args:
@@ -283,10 +273,8 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
     elif is_string and class_weights == 'balanced':
       num_samples = sum(class_counts)
       weighted_counts = tf.dtypes.cast(tf.math.multiply(num_classes,
-                                                        class_counts,
-                                                        ),
-                                       self._dtype
-                                       )
+                                                        class_counts),
+                                       self._dtype)
       class_weights = tf.Variable(num_samples, dtype=self._dtype) / \
                       tf.Variable(weighted_counts, dtype=self._dtype)
     else:
@@ -298,7 +286,5 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
         raise ValueError(
             "Detected array length: {0} instead of: {1}".format(
                 class_weights.shape[0],
-                num_classes
-            )
-        )
+                num_classes))
     return class_weights
diff --git a/privacy/bolton/models_test.py b/privacy/bolton/models_test.py
index a89a490..4433479 100644
--- a/privacy/bolton/models_test.py
+++ b/privacy/bolton/models_test.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Unit testing for models.py"""
+"""Unit testing for models."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -29,8 +29,10 @@ from privacy.bolton import models
 from privacy.bolton.optimizers import Bolton
 from privacy.bolton.losses import StrongConvexMixin
 
+
 class TestLoss(losses.Loss, StrongConvexMixin):
-  """Test loss function for testing Bolton model"""
+  """Test loss function for testing Bolton model."""
+
   def __init__(self, reg_lambda, C, radius_constant, name='test'):
     super(TestLoss, self).__init__(name=name)
     self.reg_lambda = reg_lambda
@@ -103,6 +105,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
 
 class TestOptimizer(OptimizerV2):
   """Test optimizer used for testing Bolton model"""
+
   def __init__(self):
     super(TestOptimizer, self).__init__('test')
 
@@ -128,10 +131,10 @@ class InitTests(keras_parameterized.TestCase):
   @parameterized.named_parameters([
       {'testcase_name': 'normal',
        'n_outputs': 1,
-       },
+      },
       {'testcase_name': 'many outputs',
        'n_outputs': 100,
-       },
+      },
   ])
   def test_init_params(self, n_outputs):
     """Test initialization of BoltonModel.
@@ -146,7 +149,7 @@ class InitTests(keras_parameterized.TestCase):
   @parameterized.named_parameters([
       {'testcase_name': 'invalid n_outputs',
        'n_outputs': -1,
-       },
+      },
   ])
   def test_bad_init_params(self, n_outputs):
     """test bad initializations of BoltonModel that should raise errors
@@ -163,12 +166,12 @@ class InitTests(keras_parameterized.TestCase):
        'n_outputs': 1,
        'loss': TestLoss(1, 1, 1),
        'optimizer': 'adam',
-       },
+      },
       {'testcase_name': 'test compile',
        'n_outputs': 100,
        'loss': TestLoss(1, 1, 1),
        'optimizer': TestOptimizer(),
-       },
+      },
   ])
   def test_compile(self, n_outputs, loss, optimizer):
     """test compilation of BoltonModel
@@ -189,12 +192,12 @@ class InitTests(keras_parameterized.TestCase):
        'n_outputs': 1,
        'loss': losses.BinaryCrossentropy(),
        'optimizer': 'adam',
-       },
+      },
       {'testcase_name': 'Not valid optimizer',
        'n_outputs': 1,
        'loss': TestLoss(1, 1, 1),
        'optimizer': 'ada',
-       }
+      }
   ])
   def test_bad_compile(self, n_outputs, loss, optimizer):
     """test bad compilations of BoltonModel that should raise errors
@@ -293,8 +296,7 @@ def _do_fit(n_samples,
           batch_size=batch_size,
           n_samples=n_samples,
           noise_distribution=distribution,
-          epsilon=epsilon
-          )
+          epsilon=epsilon)
   return clf
 
 
@@ -306,19 +308,19 @@ class FitTests(keras_parameterized.TestCase):
       {'testcase_name': 'iterator fit',
        'generator': False,
        'reset_n_samples': True,
-       },
+      },
       {'testcase_name': 'iterator fit no samples',
        'generator': False,
        'reset_n_samples': True,
-       },
+      },
       {'testcase_name': 'generator fit',
        'generator': True,
        'reset_n_samples': False,
-       },
+      },
       {'testcase_name': 'with callbacks',
        'generator': True,
        'reset_n_samples': False,
-       },
+      },
   ])
   def test_fit(self, generator, reset_n_samples):
     """Tests fitting of BoltonModel
@@ -350,7 +352,7 @@ class FitTests(keras_parameterized.TestCase):
   @parameterized.named_parameters([
       {'testcase_name': 'generator fit',
        'generator': True,
-       },
+      },
   ])
   def test_fit_gen(self, generator):
     """Tests the fit_generator method of BoltonModel
@@ -382,12 +384,12 @@ class FitTests(keras_parameterized.TestCase):
        'generator': True,
        'reset_n_samples': True,
        'distribution': 'laplace'
-       },
+      },
       {'testcase_name': 'invalid distribution',
        'generator': True,
        'reset_n_samples': True,
        'distribution': 'not_valid'
-       },
+      },
   ])
   def test_bad_fit(self, generator, reset_n_samples, distribution):
     """Tests fitting with invalid parameters, which should raise an error
@@ -453,8 +455,7 @@ class FitTests(keras_parameterized.TestCase):
     clf = models.BoltonModel(1, 1)
     expected = clf.calculate_class_weights(class_weights,
                                            class_counts,
-                                           num_classes
-                                           )
+                                           num_classes)
 
     if hasattr(expected, 'numpy'):
       expected = expected.numpy()
@@ -467,13 +468,13 @@ class FitTests(keras_parameterized.TestCase):
        'class_weights': 'not_valid',
        'class_counts': 1,
        'num_classes': 1,
-       'err_msg': "Detected string class_weights with value: not_valid"},
+       'err_msg': 'Detected string class_weights with value: not_valid'},
       {'testcase_name': 'no class counts',
        'class_weights': 'balanced',
        'class_counts': None,
        'num_classes': 1,
-       'err_msg': "Class counts must be provided if "
-                  "using class_weights=balanced"},
+       'err_msg': 'Class counts must be provided if '
+                  'using class_weights=balanced'},
       {'testcase_name': 'no num classes',
        'class_weights': 'balanced',
        'class_counts': [1],
@@ -489,8 +490,8 @@ class FitTests(keras_parameterized.TestCase):
        'class_weights': [1],
        'class_counts': None,
        'num_classes': None,
-       'err_msg': "You must pass a value for num_classes if "
-                  "creating an array of class_weights"},
+       'err_msg': 'You must pass a value for num_classes if '
+                  'creating an array of class_weights'},
       {'testcase_name': 'class counts array, improper shape',
        'class_weights': [[1], [1]],
        'class_counts': None,
@@ -500,14 +501,13 @@ class FitTests(keras_parameterized.TestCase):
        'class_weights': [1, 1, 1],
        'class_counts': None,
        'num_classes': 2,
-       'err_msg': "Detected array length:"},
+       'err_msg': 'Detected array length:'},
   ])
   def test_class_errors(self,
                         class_weights,
                         class_counts,
                         num_classes,
-                        err_msg
-                        ):
+                        err_msg):
     """Tests the BOltonModel calculate_class_weights method with invalid params
         which should raise the expected errors.
 
@@ -521,8 +521,7 @@ class FitTests(keras_parameterized.TestCase):
     with self.assertRaisesRegexp(ValueError, err_msg):  # pylint: disable=deprecated-method
       clf.calculate_class_weights(class_weights,
                                   class_counts,
-                                  num_classes
-                                  )
+                                  num_classes)
 
 
 if __name__ == '__main__':
diff --git a/privacy/bolton/optimizers_test.py b/privacy/bolton/optimizers_test.py
index f95f209..9062e04 100644
--- a/privacy/bolton/optimizers_test.py
+++ b/privacy/bolton/optimizers_test.py
@@ -11,29 +11,30 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Unit testing for optimizers.py"""
+"""Unit testing for optimizers."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import tensorflow as tf
-from tensorflow.python.platform import test
-from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.regularizers import L1L2
-from tensorflow.python.keras.initializers import constant
-from tensorflow.python.keras import losses
-from tensorflow.python.keras.models import Model
-from tensorflow.python.framework import test_util
-from tensorflow.python import ops as _ops
 from absl.testing import parameterized
-from privacy.bolton.losses import StrongConvexMixin
+import tensorflow as tf
+from tensorflow.python import ops as _ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import losses
+from tensorflow.python.keras.initializers import constant
+from tensorflow.python.keras.models import Model
+from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
+from tensorflow.python.keras.regularizers import L1L2
+from tensorflow.python.platform import test
 from privacy.bolton import optimizers as opt
+from privacy.bolton.losses import StrongConvexMixin
 
 
 class TestModel(Model):  # pylint: disable=abstract-method
   """Bolton episilon-delta model.
+
   Uses 4 key steps to achieve privacy guarantees:
   1. Adds noise to weights after training (output perturbation).
   2. Projects weights to R after each batch
@@ -68,7 +69,8 @@ class TestModel(Model):  # pylint: disable=abstract-method
 
 
 class TestLoss(losses.Loss, StrongConvexMixin):
-  """Test loss function for testing Bolton model"""
+  """Test loss function for testing Bolton model."""
+
   def __init__(self, reg_lambda, C, radius_constant, name='test'):
     super(TestLoss, self).__init__(name=name)
     self.reg_lambda = reg_lambda
@@ -77,6 +79,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
 
   def radius(self):
     """Radius, R, of the hypothesis space W.
+
     W is a convex set that forms the hypothesis space.
 
     Returns: radius
@@ -117,7 +120,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     )
 
   def max_class_weight(self, class_weight, dtype=tf.float32):
-    """the maximum weighting in class weights (max value) as a scalar tensor
+    """the maximum weighting in class weights (max value) as a scalar tensor.
 
     Args:
       class_weight: class weights used
@@ -141,6 +144,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
 
 class TestOptimizer(OptimizerV2):
   """Optimizer used for testing the Bolton optimizer"""
+
   def __init__(self):
     super(TestOptimizer, self).__init__('test')
     self.not_private = 'test'
@@ -180,8 +184,9 @@ class TestOptimizer(OptimizerV2):
   def limit_learning_rate(self):
     return 'test'
 
+
 class BoltonOptimizerTest(keras_parameterized.TestCase):
-  """Bolton Optimizer tests"""
+  """Bolton Optimizer tests."""
   @test_util.run_all_in_graph_and_eager_modes
   @parameterized.named_parameters([
       {'testcase_name': 'getattr',
@@ -195,6 +200,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
        'result': None,
        'test_attr': ''},
   ])
+
   def test_fn(self, fn, args, result, test_attr):
     """test that a fn of Bolton optimizer is working as expected.
 
@@ -294,7 +300,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
        'class_weights': 1},
   ])
   def test_context_manager(self, noise, epsilon, class_weights):
-    """Tests the context manager functionality of the optimizer
+    """Tests the context manager functionality of the optimizer.
 
     Args:
         noise: noise distribution to pick
@@ -327,7 +333,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
        'err_msg': 'Detected epsilon: -1. Valid range is 0 < epsilon <inf'},
   ])
   def test_context_domains(self, noise, epsilon, err_msg):
-    """
+    """Tests the context domains.
 
     Args:
         noise: noise distribution to pick
@@ -408,7 +414,9 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
        'args': [1, 1]},
   ])
   def test_rerouted_function(self, fn, args):
-    """ tests that a method of the internal optimizer is correctly routed from
+    """Tests rerouted function.
+
+    Tests that a method of the internal optimizer is correctly routed from
     the Bolton instance to the internal optimizer instance (TestOptimizer,
     here).
 
@@ -495,15 +503,14 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     internal_optimizer = TestOptimizer()
     optimizer = opt.Bolton(internal_optimizer, loss)
     self.assertEqual(getattr(optimizer, attr),
-                     getattr(internal_optimizer, attr)
-                     )
+                     getattr(internal_optimizer, attr))
 
   @parameterized.named_parameters([
       {'testcase_name': 'attr does not exist',
        'attr': '_not_valid'}
   ])
   def test_attribute_error(self, attr):
-    """ test that attribute of internal optimizer is correctly rerouted to
+    """Test that attribute of internal optimizer is correctly rerouted to
     the internal optimizer
 
     Args:
@@ -516,6 +523,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     with self.assertRaises(AttributeError):
       getattr(optimizer, attr)
 
+
 class SchedulerTest(keras_parameterized.TestCase):
   """GammaBeta Scheduler tests"""
 
@@ -523,7 +531,7 @@ class SchedulerTest(keras_parameterized.TestCase):
       {'testcase_name': 'not in context',
        'err_msg': 'Please initialize the GammaBetaDecreasingStep Learning Rate'
                   ' Scheduler'
-       }
+      }
   ])
   def test_bad_call(self, err_msg):
     """ test that attribute of internal optimizer is correctly rerouted to
diff --git a/tutorials/bolton_tutorial.py b/tutorials/bolton_tutorial.py
index 8f806a1..5357229 100644
--- a/tutorials/bolton_tutorial.py
+++ b/tutorials/bolton_tutorial.py
@@ -11,14 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """Tutorial for bolton module, the model and the optimizer."""
-import sys
-
-sys.path.append('..')
 import tensorflow as tf  # pylint: disable=wrong-import-position
 from privacy.bolton import losses  # pylint: disable=wrong-import-position
 from privacy.bolton import models  # pylint: disable=wrong-import-position
+from privacy.bolton.optimizers import Bolton  # pylint: disable=wrong-import-position
 # -------
 # First, we will create a binary classification dataset with a single output
 # dimension. The samples for each label are repeated data points at different
@@ -59,9 +56,9 @@ loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
 # For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy
 # to be 1; these are all tunable and their impact can be read in losses.
 # StrongConvexBinaryCrossentropy.We then compile the model with the chosen
-# optimizer and loss, which will automatically wrap the chosen optimizer with the
-# Bolton Optimizer, ensuring the required components function as required for
-# privacy guarantees.
+# optimizer and loss, which will automatically wrap the chosen optimizer with
+# the Bolton Optimizer, ensuring the required components function as required
+# for privacy guarantees.
 # -------
 bolt.compile(optimizer, loss)
 # -------
@@ -69,13 +66,13 @@ bolt.compile(optimizer, loss)
 # the dataset and model.These parameters are:
 # 1. the class_weights used
 # 2. the number of samples in the dataset
-# 3. the batch size which the model will try to infer, if possible.  If not, you
-# will be required to pass these explicitly to the fit method.
+# 3. the batch size which the model will try to infer, if possible.  If not,
+# you will be required to pass these explicitly to the fit method.
 #
 # As well, there are two privacy parameters than can be altered:
 # 1. epsilon, a float
-# 2. noise_distribution, a valid string indicating the distriution to use (must be
-# implemented)
+# 2. noise_distribution, a valid string indicating the distriution to use (must
+# be implemented)
 #
 # The BoltonModel offers a helper method,.calculate_class_weight to aid in
 # class_weight calculation.
@@ -117,8 +114,7 @@ try:
            batch_size=batch_size,
            n_samples=n_samples,
            noise_distribution=noise_distribution,
-           verbose=0
-           )
+           verbose=0)
 except ValueError as e:
   print(e)
 # -------
@@ -131,8 +127,7 @@ bolt.fit(generator,
          batch_size=batch_size,
          n_samples=n_samples,
          noise_distribution=noise_distribution,
-         verbose=0
-         )
+         verbose=0)
 # -------
 # You don't have to use the bolton model to use the Bolton method.
 # There are only a few requirements:
@@ -140,16 +135,18 @@ bolt.fit(generator,
 # 2. instantiate the optimizer and use it as a context around the fit operation.
 # -------
 # -------------------- Part 2, using the Optimizer
-from privacy.bolton.optimizers import Bolton  # pylint: disable=wrong-import-position
+
 # -------
 # Here, we create our own model and setup the Bolton optimizer.
 # -------
+
+
 class TestModel(tf.keras.Model):  # pylint: disable=abstract-method
+
   def __init__(self, reg_layer, number_of_outputs=1):
     super(TestModel, self).__init__(name='test')
     self.output_layer = tf.keras.layers.Dense(number_of_outputs,
-                                              kernel_regularizer=reg_layer
-                                              )
+                                              kernel_regularizer=reg_layer)
 
   def call(self, inputs):  # pylint: disable=arguments-differ
     return self.output_layer(inputs)

From 8974a95b9aeee8e854e741539f2d992a9a6ae263 Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Thu, 25 Jul 2019 16:13:32 +0000
Subject: [PATCH 18/39] more fixes

---
 privacy/bolton/__init__.py        |  2 +-
 privacy/bolton/losses.py          | 10 ++--
 privacy/bolton/losses_test.py     |  9 ++--
 privacy/bolton/models.py          | 51 ++++++++++--------
 privacy/bolton/models_test.py     | 88 ++++++++++++++++---------------
 privacy/bolton/optimizers.py      |  4 +-
 privacy/bolton/optimizers_test.py | 55 +++++++++----------
 tutorials/bolton_tutorial.py      |  2 +-
 8 files changed, 114 insertions(+), 107 deletions(-)

diff --git a/privacy/bolton/__init__.py b/privacy/bolton/__init__.py
index 9798177..d3c8209 100644
--- a/privacy/bolton/__init__.py
+++ b/privacy/bolton/__init__.py
@@ -16,7 +16,7 @@ import sys
 from distutils.version import LooseVersion
 import tensorflow as tf
 
-if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
+if LooseVersion(tf.__version__) < LooseVersion("2.0.0"):
   raise ImportError("Please upgrade your version "
                     "of tensorflow from: {0} to at least 2.0.0 to "
                     "use privacy/bolton".format(LooseVersion(tf.__version__)))
diff --git a/privacy/bolton/losses.py b/privacy/bolton/losses.py
index 7d9ad32..d4c8795 100644
--- a/privacy/bolton/losses.py
+++ b/privacy/bolton/losses.py
@@ -102,11 +102,11 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
   """Strong Convex version of Huber loss using l2 weight regularization."""
 
   def __init__(self,
-               reg_lambda: float,
-               C: float,
-               radius_constant: float,
-               delta: float,
-               reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reg_lambda,
+               C,
+               radius_constant,
+               delta,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                dtype=tf.float32):
     """Constructor.
 
diff --git a/privacy/bolton/losses_test.py b/privacy/bolton/losses_test.py
index 0d069f5..ead3adb 100644
--- a/privacy/bolton/losses_test.py
+++ b/privacy/bolton/losses_test.py
@@ -261,8 +261,9 @@ class HuberTests(keras_parameterized.TestCase):
 
     Args:
       reg_lambda: initialization value for reg_lambda arg
-      C: initialization value for C arg
+      c: initialization value for C arg
       radius_constant: initialization value for radius_constant arg
+      delta: the delta parameter for the huber loss
     """
     # test valid domains for each variable
     loss = StrongConvexHuber(reg_lambda, c, radius_constant, delta)
@@ -295,11 +296,11 @@ class HuberTests(keras_parameterized.TestCase):
       },
   ])
   def test_bad_init_params(self, reg_lambda, c, radius_constant, delta):
-    """Test invalid domain for given params. Should return ValueError
+    """Test invalid domain for given params. Should return ValueError.
 
     Args:
       reg_lambda: initialization value for reg_lambda arg
-      C: initialization value for C arg
+      c: initialization value for C arg
       radius_constant: initialization value for radius_constant arg
       delta: the delta parameter for the huber loss
     """
@@ -406,7 +407,7 @@ class HuberTests(keras_parameterized.TestCase):
       },
   ])
   def test_fns(self, init_args, fn, args, result):
-    """Test that fn of BinaryCrossentropy loss returns the correct result
+    """Test that fn of BinaryCrossentropy loss returns the correct result.
 
     Args:
       init_args: init values for loss instance
diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index 00c873c..fd718d3 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -86,10 +86,12 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
               **kwargs):  # pylint: disable=arguments-differ
     """See super class. Default optimizer used in Bolton method is SGD.
 
+      Missing args.
+
     """
     if not isinstance(loss, StrongConvexMixin):
-      raise ValueError("loss function must be a Strongly Convex and therefore "
-                       "extend the StrongConvexMixin.")
+      raise ValueError('loss function must be a Strongly Convex and therefore '
+                       'extend the StrongConvexMixin.')
     if not self._layers_instantiated:  # compile may be called multiple times
       # for instance, if the input/outputs are not defined until fit.
       self.output_layer = tf.keras.layers.Dense(
@@ -150,7 +152,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
       data_size = n_samples
     elif hasattr(x, 'shape'):
       data_size = x.shape[0]
-    elif hasattr(x, "__len__"):
+    elif hasattr(x, '__len__'):
       data_size = len(x)
     else:
       data_size = None
@@ -187,10 +189,12 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                     n_samples=None,
                     steps_per_epoch=None,
                     **kwargs):  # pylint: disable=arguments-differ
-    """
-        This method is the same as fit except for when the passed dataset
-        is a generator. See super method and fit for more details.
-    Args:
+    """Fit with a generator..
+    
+      This method is the same as fit except for when the passed dataset
+      is a generator. See super method and fit for more details.
+    
+      Args:
         n_samples: number of individual samples in x
         noise_distribution: the distribution to get noise from.
         epsilon: privacy parameter, which trades off utility and privacy. See
@@ -206,7 +210,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
       data_size = n_samples
     elif hasattr(generator, 'shape'):
       data_size = generator.shape[0]
-    elif hasattr(generator, "__len__"):
+    elif hasattr(generator, '__len__'):
       data_size = len(generator)
     else:
       data_size = None
@@ -232,13 +236,14 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                               num_classes=None):
     """Calculates class weighting to be used in training.
 
-    Args:
+      Args:
         class_weights: str specifying type, array giving weights, or None.
         class_counts: If class_weights is not None, then an array of
                       the number of samples for each class
         num_classes: If class_weights is not None, then the number of
                         classes.
-    Returns: class_weights as 1D tensor, to be passed to model's fit method.
+      Returns: 
+        class_weights as 1D tensor, to be passed to model's fit method.
     """
     # Value checking
     class_keys = ['balanced']
@@ -246,14 +251,14 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
     if isinstance(class_weights, str):
       is_string = True
       if class_weights not in class_keys:
-        raise ValueError("Detected string class_weights with "
-                         "value: {0}, which is not one of {1}."
-                         "Please select a valid class_weight type"
-                         "or pass an array".format(class_weights,
+        raise ValueError('Detected string class_weights with '
+                         'value: {0}, which is not one of {1}.'
+                         'Please select a valid class_weight type'
+                         'or pass an array'.format(class_weights,
                                                    class_keys))
       if class_counts is None:
-        raise ValueError("Class counts must be provided if using "
-                         "class_weights=%s" % class_weights)
+        raise ValueError('Class counts must be provided if using '
+                         'class_weights=%s' % class_weights)
       class_counts_shape = tf.Variable(class_counts,
                                        trainable=False,
                                        dtype=self._dtype).shape
@@ -261,12 +266,12 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
         raise ValueError('class counts must be a 1D array.'
                          'Detected: {0}'.format(class_counts_shape))
       if num_classes is None:
-        raise ValueError("num_classes must be provided if using "
-                         "class_weights=%s" % class_weights)
+        raise ValueError('num_classes must be provided if using '
+                         'class_weights=%s' % class_weights)
     elif class_weights is not None:
       if num_classes is None:
-        raise ValueError("You must pass a value for num_classes if "
-                         "creating an array of class_weights")
+        raise ValueError('You must pass a value for num_classes if '
+                         'creating an array of class_weights')
     # performing class weight calculation
     if class_weights is None:
       class_weights = 1
@@ -280,11 +285,11 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
     else:
       class_weights = _ops.convert_to_tensor_v2(class_weights)
       if len(class_weights.shape) != 1:
-        raise ValueError("Detected class_weights shape: {0} instead of "
-                         "1D array".format(class_weights.shape))
+        raise ValueError('Detected class_weights shape: {0} instead of '
+                         '1D array'.format(class_weights.shape))
       if class_weights.shape[0] != num_classes:
         raise ValueError(
-            "Detected array length: {0} instead of: {1}".format(
+            'Detected array length: {0} instead of: {1}'.format(
                 class_weights.shape[0],
                 num_classes))
     return class_weights
diff --git a/privacy/bolton/models_test.py b/privacy/bolton/models_test.py
index 4433479..88602cf 100644
--- a/privacy/bolton/models_test.py
+++ b/privacy/bolton/models_test.py
@@ -17,17 +17,16 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-
-import tensorflow as tf
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
-from tensorflow.python.keras import losses
-from tensorflow.python.framework import ops as _ops
-from tensorflow.python.keras.regularizers import L1L2
 from absl.testing import parameterized
+import tensorflow as tf
+from tensorflow.python.framework import ops as _ops
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import losses
+from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
+from tensorflow.python.keras.regularizers import L1L2
 from privacy.bolton import models
-from privacy.bolton.optimizers import Bolton
 from privacy.bolton.losses import StrongConvexMixin
+from privacy.bolton.optimizers import Bolton
 
 
 class TestLoss(losses.Loss, StrongConvexMixin):
@@ -41,9 +40,11 @@ class TestLoss(losses.Loss, StrongConvexMixin):
 
   def radius(self):
     """Radius, R, of the hypothesis space W.
+
     W is a convex set that forms the hypothesis space.
 
-    Returns: radius
+    Returns: 
+      radius
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
@@ -69,7 +70,8 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     Args:
       class_weight: class weights used
 
-    Returns: L
+    Returns: 
+      L
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
@@ -81,11 +83,10 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     )
 
   def max_class_weight(self, class_weight):
-    """the maximum weighting in class weights (max value) as a scalar tensor
+    """the maximum weighting in class weights (max value) as a scalar tensor.
 
     Args:
       class_weight: class weights used
-      dtype: the data type for tensor conversions.
 
     Returns:
       maximum class weighting as tensor scalar
@@ -104,7 +105,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
 
 
 class TestOptimizer(OptimizerV2):
-  """Test optimizer used for testing Bolton model"""
+  """Test optimizer used for testing Bolton model."""
 
   def __init__(self):
     super(TestOptimizer, self).__init__('test')
@@ -152,7 +153,7 @@ class InitTests(keras_parameterized.TestCase):
       },
   ])
   def test_bad_init_params(self, n_outputs):
-    """test bad initializations of BoltonModel that should raise errors
+    """test bad initializations of BoltonModel that should raise errors.
 
     Args:
         n_outputs: number of output neurons
@@ -174,12 +175,12 @@ class InitTests(keras_parameterized.TestCase):
       },
   ])
   def test_compile(self, n_outputs, loss, optimizer):
-    """test compilation of BoltonModel
+    """test compilation of BoltonModel.
 
     Args:
-        n_outputs: number of output neurons
-        loss: instantiated TestLoss instance
-        optimizer: instanced TestOptimizer instance
+      n_outputs: number of output neurons
+      loss: instantiated TestLoss instance
+      optimizer: instanced TestOptimizer instance
     """
     # test compilation of valid tf.optimizer and tf.loss
     with self.cached_session():
@@ -200,12 +201,12 @@ class InitTests(keras_parameterized.TestCase):
       }
   ])
   def test_bad_compile(self, n_outputs, loss, optimizer):
-    """test bad compilations of BoltonModel that should raise errors
+    """test bad compilations of BoltonModel that should raise errors.
 
       Args:
-          n_outputs: number of output neurons
-          loss: instantiated TestLoss instance
-          optimizer: instanced TestOptimizer instance
+        n_outputs: number of output neurons
+        loss: instantiated TestLoss instance
+        optimizer: instanced TestOptimizer instance
       """
     # test compilaton of invalid tf.optimizer and non instantiated loss.
     with self.cached_session():
@@ -215,17 +216,18 @@ class InitTests(keras_parameterized.TestCase):
 
 
 def _cat_dataset(n_samples, input_dim, n_classes, generator=False):
-  """
-      Creates a categorically encoded dataset (y is categorical).
-      returns the specified dataset either as a static array or as a generator.
-      Will have evenly split samples across each output class.
-      Each output class will be a different point in the input space.
+  """Creates a categorically encoded dataset.
+
+    Creates a categorically encoded dataset (y is categorical).
+    returns the specified dataset either as a static array or as a generator.
+    Will have evenly split samples across each output class.
+    Each output class will be a different point in the input space.
 
     Args:
-        n_samples: number of rows
-        input_dim: input dimensionality
-        n_classes: output dimensionality
-        generator: False for array, True for generator
+      n_samples: number of rows
+      input_dim: input dimensionality
+      n_classes: output dimensionality
+      generator: False for array, True for generator
     Returns:
       X as (n_samples, input_dim), Y as (n_samples, n_outputs)
     """
@@ -246,6 +248,7 @@ def _cat_dataset(n_samples, input_dim, n_classes, generator=False):
     return dataset
   return x_set, y_set
 
+
 def _do_fit(n_samples,
             input_dim,
             n_outputs,
@@ -301,7 +304,7 @@ def _do_fit(n_samples,
 
 
 class FitTests(keras_parameterized.TestCase):
-  """Test cases for keras model fitting"""
+  """Test cases for keras model fitting."""
 
   # @test_util.run_all_in_graph_and_eager_modes
   @parameterized.named_parameters([
@@ -323,7 +326,7 @@ class FitTests(keras_parameterized.TestCase):
       },
   ])
   def test_fit(self, generator, reset_n_samples):
-    """Tests fitting of BoltonModel
+    """Tests fitting of BoltonModel.
 
     Args:
         generator: True for generator test, False for iterator test.
@@ -355,7 +358,7 @@ class FitTests(keras_parameterized.TestCase):
       },
   ])
   def test_fit_gen(self, generator):
-    """Tests the fit_generator method of BoltonModel
+    """Tests the fit_generator method of BoltonModel.
 
     Args:
       generator: True to test with a generator dataset
@@ -392,7 +395,7 @@ class FitTests(keras_parameterized.TestCase):
       },
   ])
   def test_bad_fit(self, generator, reset_n_samples, distribution):
-    """Tests fitting with invalid parameters, which should raise an error
+    """Tests fitting with invalid parameters, which should raise an error.
 
     Args:
         generator: True to test with generator, False is iterator
@@ -442,9 +445,8 @@ class FitTests(keras_parameterized.TestCase):
                            class_weights,
                            class_counts,
                            num_classes,
-                           result
-                           ):
-    """Tests the BOltonModel calculate_class_weights method
+                           result):
+    """Tests the BOltonModel calculate_class_weights method.
 
     Args:
       class_weights: the class_weights to use
@@ -496,26 +498,28 @@ class FitTests(keras_parameterized.TestCase):
        'class_weights': [[1], [1]],
        'class_counts': None,
        'num_classes': 2,
-       'err_msg': "Detected class_weights shape"},
+       'err_msg': 'Detected class_weights shape'},
       {'testcase_name': 'class counts array, wrong number classes',
        'class_weights': [1, 1, 1],
        'class_counts': None,
        'num_classes': 2,
        'err_msg': 'Detected array length:'},
   ])
+
   def test_class_errors(self,
                         class_weights,
                         class_counts,
                         num_classes,
                         err_msg):
-    """Tests the BOltonModel calculate_class_weights method with invalid params
-        which should raise the expected errors.
+    """Tests the BOltonModel calculate_class_weights method.
+    
+      This test passes invalid params which should raise the expected errors.
 
       Args:
         class_weights: the class_weights to use
         class_counts: count of number of samples for each class
         num_classes: number of outputs neurons
-        result: expected result
+        err_msg:
       """
     clf = models.BoltonModel(1, 1)
     with self.assertRaisesRegexp(ValueError, err_msg):  # pylint: disable=deprecated-method
diff --git a/privacy/bolton/optimizers.py b/privacy/bolton/optimizers.py
index c2c9349..214757b 100644
--- a/privacy/bolton/optimizers.py
+++ b/privacy/bolton/optimizers.py
@@ -108,8 +108,8 @@ class Bolton(optimizer_v2.OptimizerV2):
   Descent-based Analytics by Xi Wu et. al.
   """
   def __init__(self,  # pylint: disable=super-init-not-called
-               optimizer: optimizer_v2.OptimizerV2,
-               loss: StrongConvexMixin,
+               optimizer,
+               loss,
                dtype=tf.float32,
                ):
     """Constructor.
diff --git a/privacy/bolton/optimizers_test.py b/privacy/bolton/optimizers_test.py
index 9062e04..621dbf6 100644
--- a/privacy/bolton/optimizers_test.py
+++ b/privacy/bolton/optimizers_test.py
@@ -263,12 +263,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
   def test_project(self, r, shape, n_out, init_value, result):
     """test that a fn of Bolton optimizer is working as expected.
 
-    Args:
-      fn: method of Optimizer to test
-      args: args to optimizer fn
-      result: the expected result
-      test_attr: None if the fn returns the test result. Otherwise, this is
-                the attribute of Bolton to check against result with.
+      Missing args:
 
     """
     tf.random.set_seed(1)
@@ -455,12 +450,14 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
        'args': [1, 1]},
   ])
   def test_not_reroute_fn(self, fn, args):
-    """Test that a fn that should not be rerouted to the internal optimizer is
-    in face not rerouted.
+    """Test function is not rerouted.
+    
+      Test that a fn that should not be rerouted to the internal optimizer is
+      in fact not rerouted.
 
-    Args:
-      fn: fn to test
-      args: arguments to that fn
+      Args:
+        fn: fn to test
+        args: arguments to that fn
     """
     @tf.function
     def test_run(fn, args):
@@ -492,12 +489,13 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
        'attr': '_iterations'}
   ])
   def test_reroute_attr(self, attr):
-    """ test that attribute of internal optimizer is correctly rerouted to
-    the internal optimizer
+    """Test a function is rerouted.
+    
+      Test that attribute of internal optimizer is correctly rerouted to the
+      internal optimizer.
 
-    Args:
-      attr: attribute to test
-      result: result after checking attribute
+      Args:
+        attr: attribute to test
     """
     loss = TestLoss(1, 1, 1)
     internal_optimizer = TestOptimizer()
@@ -510,12 +508,13 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
        'attr': '_not_valid'}
   ])
   def test_attribute_error(self, attr):
-    """Test that attribute of internal optimizer is correctly rerouted to
-    the internal optimizer
+    """Test rerouting of attributes.
+    
+      Test that attribute of internal optimizer is correctly rerouted to the
+      internal optimizer
 
-    Args:
-      attr: attribute to test
-      result: result after checking attribute
+      Args:
+        attr: attribute to test
     """
     loss = TestLoss(1, 1, 1)
     internal_optimizer = TestOptimizer()
@@ -537,9 +536,7 @@ class SchedulerTest(keras_parameterized.TestCase):
     """ test that attribute of internal optimizer is correctly rerouted to
     the internal optimizer
 
-    Args:
-      attr: attribute to test
-      result: result after checking attribute
+    Missing args
     """
     scheduler = opt.GammaBetaDecreasingStep()
     with self.assertRaisesRegexp(Exception, err_msg):  # pylint: disable=deprecated-method
@@ -557,12 +554,12 @@ class SchedulerTest(keras_parameterized.TestCase):
        'res': 0.333333333},
   ])
   def test_call(self, step, res):
-    """ test that attribute of internal optimizer is correctly rerouted to
-    the internal optimizer
+    """Test call.
+    
+      Test that attribute of internal optimizer is correctly rerouted to the
+      internal optimizer
 
-    Args:
-      attr: attribute to test
-      result: result after checking attribute
+      Missing Args:
     """
     beta = _ops.convert_to_tensor_v2(2, dtype=tf.float32)
     gamma = _ops.convert_to_tensor_v2(1, dtype=tf.float32)
diff --git a/tutorials/bolton_tutorial.py b/tutorials/bolton_tutorial.py
index 5357229..25b4bdd 100644
--- a/tutorials/bolton_tutorial.py
+++ b/tutorials/bolton_tutorial.py
@@ -116,7 +116,7 @@ try:
            noise_distribution=noise_distribution,
            verbose=0)
 except ValueError as e:
-  print(e)
+  print e
 # -------
 # And now, re running with the parameter set.
 # -------

From 0317ce80776b75f584ae87d08fe62b59d8f589f3 Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Thu, 25 Jul 2019 16:17:56 +0000
Subject: [PATCH 19/39] print

---
 tutorials/bolton_tutorial.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tutorials/bolton_tutorial.py b/tutorials/bolton_tutorial.py
index 25b4bdd..8cd0aa9 100644
--- a/tutorials/bolton_tutorial.py
+++ b/tutorials/bolton_tutorial.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Tutorial for bolton module, the model and the optimizer."""
+from __future__ import print_function
 import tensorflow as tf  # pylint: disable=wrong-import-position
 from privacy.bolton import losses  # pylint: disable=wrong-import-position
 from privacy.bolton import models  # pylint: disable=wrong-import-position
@@ -116,7 +117,7 @@ try:
            noise_distribution=noise_distribution,
            verbose=0)
 except ValueError as e:
-  print e
+  print(e)
 # -------
 # And now, re running with the parameter set.
 # -------

From 92f97ae32ca5c5b4576742f4fd7b827d0cd22153 Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Sat, 27 Jul 2019 13:54:19 -0400
Subject: [PATCH 20/39] Fixing missing args.

---
 privacy/bolton/models.py          | 39 ++++++++++++-------------------
 privacy/bolton/optimizers_test.py | 14 ++++++++---
 tutorials/bolton_tutorial.py      |  2 ++
 3 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index fd718d3..221b2d2 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -76,18 +76,17 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
   def compile(self,
               optimizer,
               loss,
-              metrics=None,
-              loss_weights=None,
-              sample_weight_mode=None,
-              weighted_metrics=None,
-              target_tensors=None,
-              distribute=None,
               kernel_initializer=tf.initializers.GlorotUniform,
               **kwargs):  # pylint: disable=arguments-differ
     """See super class. Default optimizer used in Bolton method is SGD.
 
-      Missing args.
-
+      Args:
+        optimizer: The optimizer to use. This will be automatically wrapped
+          with the Bolton Optimizer.
+        loss: The loss function to use. Must be a StrongConvex loss (extend the
+          StrongConvexMixin).
+        kernel_initializer: The kernel initializer to use for the single layer.
+        kwargs: kwargs to keras Model.compile. See super.
     """
     if not isinstance(loss, StrongConvexMixin):
       raise ValueError('loss function must be a Strongly Convex and therefore '
@@ -104,15 +103,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
       optimizer = optimizers.get(optimizer)
       optimizer = Bolton(optimizer, loss)
 
-    super(BoltonModel, self).compile(optimizer,
-                                     loss=loss,
-                                     metrics=metrics,
-                                     loss_weights=loss_weights,
-                                     sample_weight_mode=sample_weight_mode,
-                                     weighted_metrics=weighted_metrics,
-                                     target_tensors=target_tensors,
-                                     distribute=distribute,
-                                     **kwargs)
+    super(BoltonModel, self).compile(optimizer, loss=loss, **kwargs)
 
   def fit(self,
           x=None,
@@ -135,14 +126,14 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
     See super implementation for more details.
 
     Args:
-        n_samples: the number of individual samples in x.
-        epsilon: privacy parameter, which trades off between utility an privacy.
-                  See the bolton paper for more description.
-        noise_distribution: the distribution to pull noise from.
-        class_weight: the class weights to be used. Can be a scalar or 1D tensor
-                      whose dim == n_classes.
+      n_samples: the number of individual samples in x.
+      epsilon: privacy parameter, which trades off between utility an privacy.
+                See the bolton paper for more description.
+      noise_distribution: the distribution to pull noise from.
+      class_weight: the class weights to be used. Can be a scalar or 1D tensor
+                    whose dim == n_classes.
 
-        See the super method for descriptions on the rest of the arguments.
+      See the super method for descriptions on the rest of the arguments.
     """
     if class_weight is None:
       class_weight_ = self.calculate_class_weights(class_weight)
diff --git a/privacy/bolton/optimizers_test.py b/privacy/bolton/optimizers_test.py
index 621dbf6..df67dd1 100644
--- a/privacy/bolton/optimizers_test.py
+++ b/privacy/bolton/optimizers_test.py
@@ -263,7 +263,12 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
   def test_project(self, r, shape, n_out, init_value, result):
     """test that a fn of Bolton optimizer is working as expected.
 
-      Missing args:
+      Args:
+        r: Radius value for StrongConvex loss function.
+        shape: input_dimensionality
+        n_out: output dimensionality
+        init_value: the initial value for 'constant' kernel initializer
+        result: the expected output after projection.fFF
 
     """
     tf.random.set_seed(1)
@@ -536,7 +541,8 @@ class SchedulerTest(keras_parameterized.TestCase):
     """ test that attribute of internal optimizer is correctly rerouted to
     the internal optimizer
 
-    Missing args
+      Args:
+        err_msg: The expected error message from the scheduler bad call.
     """
     scheduler = opt.GammaBetaDecreasingStep()
     with self.assertRaisesRegexp(Exception, err_msg):  # pylint: disable=deprecated-method
@@ -559,7 +565,9 @@ class SchedulerTest(keras_parameterized.TestCase):
       Test that attribute of internal optimizer is correctly rerouted to the
       internal optimizer
 
-      Missing Args:
+      Args:
+        step: step number to 'GammaBetaDecreasingStep' 'Scheduler'.
+        res: expected result from call to 'GammaBetaDecreasingStep' 'Scheduler'.
     """
     beta = _ops.convert_to_tensor_v2(2, dtype=tf.float32)
     gamma = _ops.convert_to_tensor_v2(1, dtype=tf.float32)
diff --git a/tutorials/bolton_tutorial.py b/tutorials/bolton_tutorial.py
index 8cd0aa9..ae9707e 100644
--- a/tutorials/bolton_tutorial.py
+++ b/tutorials/bolton_tutorial.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Tutorial for bolton module, the model and the optimizer."""
+from __future__ import absolute_import
+from __future__ import division
 from __future__ import print_function
 import tensorflow as tf  # pylint: disable=wrong-import-position
 from privacy.bolton import losses  # pylint: disable=wrong-import-position

From 71c4a11eb9ad66a78fb13428987366887ea20beb Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Sat, 27 Jul 2019 14:14:05 -0400
Subject: [PATCH 21/39] Fixing new pylint errors.

---
 privacy/bolton/losses.py          |  4 +--
 privacy/bolton/models.py          | 38 ++++++++++-----------
 privacy/bolton/models_test.py     | 55 +++++++++++++++----------------
 privacy/bolton/optimizers.py      |  5 ++-
 privacy/bolton/optimizers_test.py | 22 ++++++-------
 5 files changed, 61 insertions(+), 63 deletions(-)

diff --git a/privacy/bolton/losses.py b/privacy/bolton/losses.py
index d4c8795..65a7710 100644
--- a/privacy/bolton/losses.py
+++ b/privacy/bolton/losses.py
@@ -56,7 +56,7 @@ class StrongConvexMixin:
 
     Args:
       class_weight: the class weights as scalar or 1d tensor, where its
-                    dimensionality is equal to the number of outputs.
+        dimensionality is equal to the number of outputs.
 
     Returns:
       Beta
@@ -115,7 +115,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
       C: Penalty parameter C of the loss term
       radius_constant: constant defining the length of the radius
       delta: delta value in huber loss.  When to switch from quadratic to
-            absolute deviation.
+        absolute deviation.
       reduction: reduction type to use. See super class
       name: Name of the loss instance
       dtype: tf datatype to use for tensor conversions.
diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index 221b2d2..5f827a1 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -180,20 +180,20 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                     n_samples=None,
                     steps_per_epoch=None,
                     **kwargs):  # pylint: disable=arguments-differ
-    """Fit with a generator..
-    
+    """Fit with a generator.
+
       This method is the same as fit except for when the passed dataset
       is a generator. See super method and fit for more details.
-    
-      Args:
-        n_samples: number of individual samples in x
-        noise_distribution: the distribution to get noise from.
-        epsilon: privacy parameter, which trades off utility and privacy. See
-                  Bolton paper for more description.
-        class_weight: the class weights to be used. Can be a scalar or 1D tensor
-                      whose dim == n_classes.
 
-        See the super method for descriptions on the rest of the arguments.
+    Args:
+      n_samples: number of individual samples in x
+      noise_distribution: the distribution to get noise from.
+      epsilon: privacy parameter, which trades off utility and privacy. See
+                Bolton paper for more description.
+      class_weight: the class weights to be used. Can be a scalar or 1D tensor
+                    whose dim == n_classes.
+
+      See the super method for descriptions on the rest of the arguments.
     """
     if class_weight is None:
       class_weight = self.calculate_class_weights(class_weight)
@@ -227,14 +227,14 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                               num_classes=None):
     """Calculates class weighting to be used in training.
 
-      Args:
-        class_weights: str specifying type, array giving weights, or None.
-        class_counts: If class_weights is not None, then an array of
-                      the number of samples for each class
-        num_classes: If class_weights is not None, then the number of
-                        classes.
-      Returns: 
-        class_weights as 1D tensor, to be passed to model's fit method.
+    Args:
+      class_weights: str specifying type, array giving weights, or None.
+      class_counts: If class_weights is not None, then an array of
+                    the number of samples for each class
+      num_classes: If class_weights is not None, then the number of
+                      classes.
+    Returns:
+      class_weights as 1D tensor, to be passed to model's fit method.
     """
     # Value checking
     class_keys = ['balanced']
diff --git a/privacy/bolton/models_test.py b/privacy/bolton/models_test.py
index 88602cf..213f0d6 100644
--- a/privacy/bolton/models_test.py
+++ b/privacy/bolton/models_test.py
@@ -43,7 +43,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
 
     W is a convex set that forms the hypothesis space.
 
-    Returns: 
+    Returns:
       radius
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
@@ -70,7 +70,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     Args:
       class_weight: class weights used
 
-    Returns: 
+    Returns:
       L
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
@@ -175,12 +175,12 @@ class InitTests(keras_parameterized.TestCase):
       },
   ])
   def test_compile(self, n_outputs, loss, optimizer):
-    """test compilation of BoltonModel.
+    """Test compilation of BoltonModel.
 
     Args:
       n_outputs: number of output neurons
       loss: instantiated TestLoss instance
-      optimizer: instanced TestOptimizer instance
+      optimizer: instantiated TestOptimizer instance
     """
     # test compilation of valid tf.optimizer and tf.loss
     with self.cached_session():
@@ -206,7 +206,7 @@ class InitTests(keras_parameterized.TestCase):
       Args:
         n_outputs: number of output neurons
         loss: instantiated TestLoss instance
-        optimizer: instanced TestOptimizer instance
+        optimizer: instantiated TestOptimizer instance
       """
     # test compilaton of invalid tf.optimizer and non instantiated loss.
     with self.cached_session():
@@ -262,17 +262,17 @@ def _do_fit(n_samples,
   """Instantiate necessary components for fitting and perform a model fit.
 
   Args:
-      n_samples: number of samples in dataset
-      input_dim: the sample dimensionality
-      n_outputs: number of output neurons
-      epsilon: privacy parameter
-      generator: True to create a generator, False to use an iterator
-      batch_size: batch_size to use
-      reset_n_samples: True to set _samples to None prior to fitting.
-                        False does nothing
-      optimizer: instance of TestOptimizer
-      loss: instance of TestLoss
-      distribution: distribution to get noise from.
+    n_samples: number of samples in dataset
+    input_dim: the sample dimensionality
+    n_outputs: number of output neurons
+    epsilon: privacy parameter
+    generator: True to create a generator, False to use an iterator
+    batch_size: batch_size to use
+    reset_n_samples: True to set _samples to None prior to fitting.
+                      False does nothing
+    optimizer: instance of TestOptimizer
+    loss: instance of TestLoss
+    distribution: distribution to get noise from.
 
   Returns: BoltonModel instsance
   """
@@ -329,8 +329,8 @@ class FitTests(keras_parameterized.TestCase):
     """Tests fitting of BoltonModel.
 
     Args:
-        generator: True for generator test, False for iterator test.
-        reset_n_samples: True to reset the n_samples to None, False does nothing
+      generator: True for generator test, False for iterator test.
+      reset_n_samples: True to reset the n_samples to None, False does nothing
     """
     loss = TestLoss(1, 1, 1)
     optimizer = Bolton(TestOptimizer(), loss)
@@ -398,10 +398,10 @@ class FitTests(keras_parameterized.TestCase):
     """Tests fitting with invalid parameters, which should raise an error.
 
     Args:
-        generator: True to test with generator, False is iterator
-        reset_n_samples: True to reset the n_samples param to None prior to
-                          passing it to fit
-        distribution: distribution to get noise from.
+      generator: True to test with generator, False is iterator
+      reset_n_samples: True to reset the n_samples param to None prior to
+        passing it to fit
+      distribution: distribution to get noise from.
     """
     with self.assertRaises(ValueError):
       loss = TestLoss(1, 1, 1)
@@ -505,21 +505,20 @@ class FitTests(keras_parameterized.TestCase):
        'num_classes': 2,
        'err_msg': 'Detected array length:'},
   ])
-
   def test_class_errors(self,
                         class_weights,
                         class_counts,
                         num_classes,
                         err_msg):
     """Tests the BOltonModel calculate_class_weights method.
-    
+
       This test passes invalid params which should raise the expected errors.
 
       Args:
-        class_weights: the class_weights to use
-        class_counts: count of number of samples for each class
-        num_classes: number of outputs neurons
-        err_msg:
+        class_weights: the class_weights to use.
+        class_counts: count of number of samples for each class.
+        num_classes: number of outputs neurons.
+        err_msg: The expected error message.
       """
     clf = models.BoltonModel(1, 1)
     with self.assertRaisesRegexp(ValueError, err_msg):  # pylint: disable=deprecated-method
diff --git a/privacy/bolton/optimizers.py b/privacy/bolton/optimizers.py
index 214757b..7c34ebe 100644
--- a/privacy/bolton/optimizers.py
+++ b/privacy/bolton/optimizers.py
@@ -310,12 +310,11 @@ class Bolton(optimizer_v2.OptimizerV2):
 
     Args:
       noise_distribution: the noise distribution to pick.
-                          see _accepted_distributions and get_noise for
-                          possible values.
+        see _accepted_distributions and get_noise for possible values.
       epsilon: privacy parameter. Lower gives more privacy but less utility.
       layers: list of Keras/Tensorflow layers. Can be found as model.layers
       class_weights: class_weights used, which may either be a scalar or 1D
-                      tensor with dim == n_classes.
+        tensor with dim == n_classes.
       n_samples number of rows/individual samples in the training set
       batch_size: batch size used.
     """
diff --git a/privacy/bolton/optimizers_test.py b/privacy/bolton/optimizers_test.py
index df67dd1..3d7882a 100644
--- a/privacy/bolton/optimizers_test.py
+++ b/privacy/bolton/optimizers_test.py
@@ -209,7 +209,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
       args: args to optimizer fn
       result: the expected result
       test_attr: None if the fn returns the test result. Otherwise, this is
-                the attribute of Bolton to check against result with.
+        the attribute of Bolton to check against result with.
 
     """
     tf.random.set_seed(1)
@@ -303,9 +303,9 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     """Tests the context manager functionality of the optimizer.
 
     Args:
-        noise: noise distribution to pick
-        epsilon: epsilon privacy parameter to use
-        class_weights: class_weights to use
+      noise: noise distribution to pick
+      epsilon: epsilon privacy parameter to use
+      class_weights: class_weights to use
     """
     @tf.function
     def test_run():
@@ -336,9 +336,9 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     """Tests the context domains.
 
     Args:
-        noise: noise distribution to pick
-        epsilon: epsilon privacy parameter to use
-        err_msg: the expected error message
+      noise: noise distribution to pick
+      epsilon: epsilon privacy parameter to use
+      err_msg: the expected error message
 
     """
 
@@ -456,7 +456,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
   ])
   def test_not_reroute_fn(self, fn, args):
     """Test function is not rerouted.
-    
+
       Test that a fn that should not be rerouted to the internal optimizer is
       in fact not rerouted.
 
@@ -495,7 +495,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
   ])
   def test_reroute_attr(self, attr):
     """Test a function is rerouted.
-    
+
       Test that attribute of internal optimizer is correctly rerouted to the
       internal optimizer.
 
@@ -514,7 +514,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
   ])
   def test_attribute_error(self, attr):
     """Test rerouting of attributes.
-    
+
       Test that attribute of internal optimizer is correctly rerouted to the
       internal optimizer
 
@@ -561,7 +561,7 @@ class SchedulerTest(keras_parameterized.TestCase):
   ])
   def test_call(self, step, res):
     """Test call.
-    
+
       Test that attribute of internal optimizer is correctly rerouted to the
       internal optimizer
 

From 19ce36777d38d9f4e10283388743842a61b4e1fc Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Mon, 29 Jul 2019 21:20:40 +0000
Subject: [PATCH 22/39] format fixes

---
 privacy/bolton/losses.py          | 12 +++----
 privacy/bolton/losses_test.py     | 11 ++++---
 privacy/bolton/models.py          | 38 +++++++++++++---------
 privacy/bolton/models_test.py     | 17 +++++-----
 privacy/bolton/optimizers.py      |  6 ++--
 privacy/bolton/optimizers_test.py | 53 +++++++++++++++++--------------
 6 files changed, 76 insertions(+), 61 deletions(-)

diff --git a/privacy/bolton/losses.py b/privacy/bolton/losses.py
index d4c8795..08ce089 100644
--- a/privacy/bolton/losses.py
+++ b/privacy/bolton/losses.py
@@ -212,12 +212,12 @@ class StrongConvexBinaryCrossentropy(
   """Strongly Convex BinaryCrossentropy loss using l2 weight regularization."""
 
   def __init__(self,
-               reg_lambda: float,
-               C: float,
-               radius_constant: float,
-               from_logits: bool = True,
-               label_smoothing: float = 0,
-               reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+               reg_lambda,
+               C,
+               radius_constant,
+               from_logits=True,
+               label_smoothing=0,
+               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                dtype=tf.float32):
     """
     Args:
diff --git a/privacy/bolton/losses_test.py b/privacy/bolton/losses_test.py
index ead3adb..44a8d11 100644
--- a/privacy/bolton/losses_test.py
+++ b/privacy/bolton/losses_test.py
@@ -367,12 +367,13 @@ class HuberTests(keras_parameterized.TestCase):
       },
   ])
   def test_calculation(self, logits, y_true, delta, result):
-    """Test the call method to ensure it returns the correct value
+    """Test the call method to ensure it returns the correct value.
 
-    Args:
-      logits: unscaled output of model
-      y_true: label
-      result: correct loss calculation value
+      Args:
+        logits: unscaled output of model
+        y_true: label
+        delta:
+        result: correct loss calculation value
     """
     logits = tf.Variable(logits, False, dtype=tf.float32)
     y_true = tf.Variable(y_true, False, dtype=tf.float32)
diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index fd718d3..6bd313b 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -86,8 +86,16 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
               **kwargs):  # pylint: disable=arguments-differ
     """See super class. Default optimizer used in Bolton method is SGD.
 
-      Missing args.
-
+      Args:
+        optimizer:
+        loss:
+        metrics:
+        loss_weights:
+        sample_weight_mode:
+        weighted_metrics:
+        target_tensors:
+        distribute:
+        kernel_initializer:
     """
     if not isinstance(loss, StrongConvexMixin):
       raise ValueError('loss function must be a Strongly Convex and therefore '
@@ -126,15 +134,15 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
           **kwargs):  # pylint: disable=arguments-differ
     """Reroutes to super fit with  Bolton delta-epsilon privacy requirements.
 
-    Note, inputs must be normalized s.t. ||x|| < 1.
-    Requirements are as follows:
+      Note, inputs must be normalized s.t. ||x|| < 1.
+      Requirements are as follows:
         1. Adds noise to weights after training (output perturbation).
         2. Projects weights to R after each batch
         3. Limits learning rate
         4. Use a strongly convex loss function (see compile)
-    See super implementation for more details.
+      See super implementation for more details.
 
-    Args:
+      Args:
         n_samples: the number of individual samples in x.
         epsilon: privacy parameter, which trades off between utility an privacy.
                   See the bolton paper for more description.
@@ -189,20 +197,20 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                     n_samples=None,
                     steps_per_epoch=None,
                     **kwargs):  # pylint: disable=arguments-differ
-    """Fit with a generator..
-    
+    """Fit with a generator.
+
       This method is the same as fit except for when the passed dataset
       is a generator. See super method and fit for more details.
-    
+ 
       Args:
-        n_samples: number of individual samples in x
+        generator:
+        class_weight: the class weights to be used. Can be a scalar or 1D tensor
+                      whose dim == n_classes.
         noise_distribution: the distribution to get noise from.
         epsilon: privacy parameter, which trades off utility and privacy. See
                   Bolton paper for more description.
-        class_weight: the class weights to be used. Can be a scalar or 1D tensor
-                      whose dim == n_classes.
-
-        See the super method for descriptions on the rest of the arguments.
+        n_samples: number of individual samples in x
+        steps_per_epoch:
     """
     if class_weight is None:
       class_weight = self.calculate_class_weights(class_weight)
@@ -242,7 +250,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                       the number of samples for each class
         num_classes: If class_weights is not None, then the number of
                         classes.
-      Returns: 
+      Returns:
         class_weights as 1D tensor, to be passed to model's fit method.
     """
     # Value checking
diff --git a/privacy/bolton/models_test.py b/privacy/bolton/models_test.py
index 88602cf..ca87eae 100644
--- a/privacy/bolton/models_test.py
+++ b/privacy/bolton/models_test.py
@@ -32,10 +32,10 @@ from privacy.bolton.optimizers import Bolton
 class TestLoss(losses.Loss, StrongConvexMixin):
   """Test loss function for testing Bolton model."""
 
-  def __init__(self, reg_lambda, C, radius_constant, name='test'):
+  def __init__(self, reg_lambda, C_arg, radius_constant, name='test'):
     super(TestLoss, self).__init__(name=name)
     self.reg_lambda = reg_lambda
-    self.C = C  # pylint: disable=invalid-name
+    self.C = C_arg  # pylint: disable=invalid-name
     self.radius_constant = radius_constant
 
   def radius(self):
@@ -43,7 +43,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
 
     W is a convex set that forms the hypothesis space.
 
-    Returns: 
+    Returns:
       radius
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
@@ -70,7 +70,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
     Args:
       class_weight: class weights used
 
-    Returns: 
+    Returns:
       L
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
@@ -207,7 +207,7 @@ class InitTests(keras_parameterized.TestCase):
         n_outputs: number of output neurons
         loss: instantiated TestLoss instance
         optimizer: instanced TestOptimizer instance
-      """
+    """
     # test compilaton of invalid tf.optimizer and non instantiated loss.
     with self.cached_session():
       with self.assertRaises((ValueError, AttributeError)):
@@ -228,9 +228,10 @@ def _cat_dataset(n_samples, input_dim, n_classes, generator=False):
       input_dim: input dimensionality
       n_classes: output dimensionality
       generator: False for array, True for generator
+
     Returns:
       X as (n_samples, input_dim), Y as (n_samples, n_outputs)
-    """
+  """
   x_stack = []
   y_stack = []
   for i_class in range(n_classes):
@@ -512,7 +513,7 @@ class FitTests(keras_parameterized.TestCase):
                         num_classes,
                         err_msg):
     """Tests the BOltonModel calculate_class_weights method.
-    
+ 
       This test passes invalid params which should raise the expected errors.
 
       Args:
@@ -520,7 +521,7 @@ class FitTests(keras_parameterized.TestCase):
         class_counts: count of number of samples for each class
         num_classes: number of outputs neurons
         err_msg:
-      """
+    """
     clf = models.BoltonModel(1, 1)
     with self.assertRaisesRegexp(ValueError, err_msg):  # pylint: disable=deprecated-method
       clf.calculate_class_weights(class_weights,
diff --git a/privacy/bolton/optimizers.py b/privacy/bolton/optimizers.py
index 214757b..e7a1431 100644
--- a/privacy/bolton/optimizers.py
+++ b/privacy/bolton/optimizers.py
@@ -298,9 +298,9 @@ class Bolton(optimizer_v2.OptimizerV2):
     return self
 
   def __call__(self,
-               noise_distribution: str,
-               epsilon: float,
-               layers: list,
+               noise_distribution,
+               epsilon,
+               layers,
                class_weights,
                n_samples,
                batch_size
diff --git a/privacy/bolton/optimizers_test.py b/privacy/bolton/optimizers_test.py
index 621dbf6..c9c1f88 100644
--- a/privacy/bolton/optimizers_test.py
+++ b/privacy/bolton/optimizers_test.py
@@ -51,11 +51,8 @@ class TestModel(Model):  # pylint: disable=abstract-method
 
     Args:
       n_outputs: number of output neurons
-      epsilon: level of privacy guarantee
-      noise_distribution: distribution to pull weight perturbations from
-      weights_initializer: initializer for weights
-      seed: random seed to use
-      dtype: data type to use for tensors
+      input_shape:
+      init_value:
     """
     super(TestModel, self).__init__(name='bolton', dynamic=False)
     self.n_outputs = n_outputs
@@ -71,18 +68,19 @@ class TestModel(Model):  # pylint: disable=abstract-method
 class TestLoss(losses.Loss, StrongConvexMixin):
   """Test loss function for testing Bolton model."""
 
-  def __init__(self, reg_lambda, C, radius_constant, name='test'):
+  def __init__(self, reg_lambda, C_arg, radius_constant, name='test'):
     super(TestLoss, self).__init__(name=name)
     self.reg_lambda = reg_lambda
-    self.C = C  # pylint: disable=invalid-name
+    self.C = C_arg  # pylint: disable=invalid-name
     self.radius_constant = radius_constant
 
   def radius(self):
     """Radius, R, of the hypothesis space W.
 
-    W is a convex set that forms the hypothesis space.
+      W is a convex set that forms the hypothesis space.
 
-    Returns: radius
+      Returns:
+        radius
     """
     return _ops.convert_to_tensor_v2(self.radius_constant, dtype=tf.float32)
 
@@ -105,10 +103,11 @@ class TestLoss(losses.Loss, StrongConvexMixin):
   def lipchitz_constant(self, class_weight):  # pylint: disable=unused-argument
     """Lipchitz constant, L.
 
-    Args:
-      class_weight: class weights used
+      Args:
+        class_weight: class weights used
 
-    Returns: L
+      Returns:
+        L
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
@@ -143,7 +142,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
 
 
 class TestOptimizer(OptimizerV2):
-  """Optimizer used for testing the Bolton optimizer"""
+  """Optimizer used for testing the Bolton optimizer."""
 
   def __init__(self):
     super(TestOptimizer, self).__init__('test')
@@ -263,8 +262,12 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
   def test_project(self, r, shape, n_out, init_value, result):
     """test that a fn of Bolton optimizer is working as expected.
 
-      Missing args:
-
+      Args:
+        r:
+        shape:
+        n_out:
+        init_value:
+        result:
     """
     tf.random.set_seed(1)
     @tf.function
@@ -451,7 +454,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
   ])
   def test_not_reroute_fn(self, fn, args):
     """Test function is not rerouted.
-    
+ 
       Test that a fn that should not be rerouted to the internal optimizer is
       in fact not rerouted.
 
@@ -490,7 +493,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
   ])
   def test_reroute_attr(self, attr):
     """Test a function is rerouted.
-    
+ 
       Test that attribute of internal optimizer is correctly rerouted to the
       internal optimizer.
 
@@ -509,7 +512,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
   ])
   def test_attribute_error(self, attr):
     """Test rerouting of attributes.
-    
+ 
       Test that attribute of internal optimizer is correctly rerouted to the
       internal optimizer
 
@@ -524,7 +527,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
 
 
 class SchedulerTest(keras_parameterized.TestCase):
-  """GammaBeta Scheduler tests"""
+  """GammaBeta Scheduler tests."""
 
   @parameterized.named_parameters([
       {'testcase_name': 'not in context',
@@ -533,10 +536,10 @@ class SchedulerTest(keras_parameterized.TestCase):
       }
   ])
   def test_bad_call(self, err_msg):
-    """ test that attribute of internal optimizer is correctly rerouted to
-    the internal optimizer
+    """Test attribute of internal opt correctly rerouted to the internal opt.
 
-    Missing args
+      Args:
+        err_msg:
     """
     scheduler = opt.GammaBetaDecreasingStep()
     with self.assertRaisesRegexp(Exception, err_msg):  # pylint: disable=deprecated-method
@@ -555,11 +558,13 @@ class SchedulerTest(keras_parameterized.TestCase):
   ])
   def test_call(self, step, res):
     """Test call.
-    
+ 
       Test that attribute of internal optimizer is correctly rerouted to the
       internal optimizer
 
-      Missing Args:
+      Args:
+        step:
+        res:
     """
     beta = _ops.convert_to_tensor_v2(2, dtype=tf.float32)
     gamma = _ops.convert_to_tensor_v2(1, dtype=tf.float32)

From 034ae8fea4160bc2e14cd87aa832d4a7f7fd14c3 Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Mon, 29 Jul 2019 21:29:03 +0000
Subject: [PATCH 23/39] conflicts in models test

---
 privacy/bolton/models_test.py | 21 ++-------------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/privacy/bolton/models_test.py b/privacy/bolton/models_test.py
index 3e3f2db..4f1b3ab 100644
--- a/privacy/bolton/models_test.py
+++ b/privacy/bolton/models_test.py
@@ -206,13 +206,8 @@ class InitTests(keras_parameterized.TestCase):
       Args:
         n_outputs: number of output neurons
         loss: instantiated TestLoss instance
-<<<<<<< HEAD
-        optimizer: instanced TestOptimizer instance
-    """
-=======
         optimizer: instantiated TestOptimizer instance
-      """
->>>>>>> 71c4a11eb9ad66a78fb13428987366887ea20beb
+    """
     # test compilaton of invalid tf.optimizer and non instantiated loss.
     with self.cached_session():
       with self.assertRaises((ValueError, AttributeError)):
@@ -517,17 +512,6 @@ class FitTests(keras_parameterized.TestCase):
                         num_classes,
                         err_msg):
     """Tests the BOltonModel calculate_class_weights method.
-<<<<<<< HEAD
- 
-      This test passes invalid params which should raise the expected errors.
-
-      Args:
-        class_weights: the class_weights to use
-        class_counts: count of number of samples for each class
-        num_classes: number of outputs neurons
-        err_msg:
-    """
-=======
 
       This test passes invalid params which should raise the expected errors.
 
@@ -536,8 +520,7 @@ class FitTests(keras_parameterized.TestCase):
         class_counts: count of number of samples for each class.
         num_classes: number of outputs neurons.
         err_msg: The expected error message.
-      """
->>>>>>> 71c4a11eb9ad66a78fb13428987366887ea20beb
+    """
     clf = models.BoltonModel(1, 1)
     with self.assertRaisesRegexp(ValueError, err_msg):  # pylint: disable=deprecated-method
       clf.calculate_class_weights(class_weights,

From 33c3f058ac4943980c027292f8d8c5b24ff6c0f9 Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Mon, 29 Jul 2019 21:34:02 +0000
Subject: [PATCH 24/39] conflicts in models

---
 privacy/bolton/models.py | 58 +++++-----------------------------------
 1 file changed, 7 insertions(+), 51 deletions(-)

diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index 80b9d14..8883d34 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -81,24 +81,12 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
     """See super class. Default optimizer used in Bolton method is SGD.
 
       Args:
-<<<<<<< HEAD
-        optimizer:
-        loss:
-        metrics:
-        loss_weights:
-        sample_weight_mode:
-        weighted_metrics:
-        target_tensors:
-        distribute:
-        kernel_initializer:
-=======
         optimizer: The optimizer to use. This will be automatically wrapped
           with the Bolton Optimizer.
         loss: The loss function to use. Must be a StrongConvex loss (extend the
           StrongConvexMixin).
         kernel_initializer: The kernel initializer to use for the single layer.
         kwargs: kwargs to keras Model.compile. See super.
->>>>>>> 71c4a11eb9ad66a78fb13428987366887ea20beb
     """
     if not isinstance(loss, StrongConvexMixin):
       raise ValueError('loss function must be a Strongly Convex and therefore '
@@ -137,25 +125,17 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
         4. Use a strongly convex loss function (see compile)
       See super implementation for more details.
 
-<<<<<<< HEAD
-      Args:
-        n_samples: the number of individual samples in x.
-        epsilon: privacy parameter, which trades off between utility an privacy.
-                  See the bolton paper for more description.
-        noise_distribution: the distribution to pull noise from.
-        class_weight: the class weights to be used. Can be a scalar or 1D tensor
-                      whose dim == n_classes.
-=======
     Args:
+      x:
+      y:
+      batch_size:
+      class_weight: the class weights to be used. Can be a scalar or 1D tensor
+                    whose dim == n_classes.
       n_samples: the number of individual samples in x.
       epsilon: privacy parameter, which trades off between utility an privacy.
                 See the bolton paper for more description.
       noise_distribution: the distribution to pull noise from.
-      class_weight: the class weights to be used. Can be a scalar or 1D tensor
-                    whose dim == n_classes.
->>>>>>> 71c4a11eb9ad66a78fb13428987366887ea20beb
-
-      See the super method for descriptions on the rest of the arguments.
+      steps_per_epoch:
     """
     if class_weight is None:
       class_weight_ = self.calculate_class_weights(class_weight)
@@ -206,8 +186,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
 
       This method is the same as fit except for when the passed dataset
       is a generator. See super method and fit for more details.
-<<<<<<< HEAD
- 
+
       Args:
         generator:
         class_weight: the class weights to be used. Can be a scalar or 1D tensor
@@ -217,18 +196,6 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                   Bolton paper for more description.
         n_samples: number of individual samples in x
         steps_per_epoch:
-=======
-
-    Args:
-      n_samples: number of individual samples in x
-      noise_distribution: the distribution to get noise from.
-      epsilon: privacy parameter, which trades off utility and privacy. See
-                Bolton paper for more description.
-      class_weight: the class weights to be used. Can be a scalar or 1D tensor
-                    whose dim == n_classes.
-
-      See the super method for descriptions on the rest of the arguments.
->>>>>>> 71c4a11eb9ad66a78fb13428987366887ea20beb
     """
     if class_weight is None:
       class_weight = self.calculate_class_weights(class_weight)
@@ -262,7 +229,6 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                               num_classes=None):
     """Calculates class weighting to be used in training.
 
-<<<<<<< HEAD
       Args:
         class_weights: str specifying type, array giving weights, or None.
         class_counts: If class_weights is not None, then an array of
@@ -271,16 +237,6 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                         classes.
       Returns:
         class_weights as 1D tensor, to be passed to model's fit method.
-=======
-    Args:
-      class_weights: str specifying type, array giving weights, or None.
-      class_counts: If class_weights is not None, then an array of
-                    the number of samples for each class
-      num_classes: If class_weights is not None, then the number of
-                      classes.
-    Returns:
-      class_weights as 1D tensor, to be passed to model's fit method.
->>>>>>> 71c4a11eb9ad66a78fb13428987366887ea20beb
     """
     # Value checking
     class_keys = ['balanced']

From f06443d50eca6ff1570682fafdac9df67ffec4ac Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Mon, 29 Jul 2019 21:43:19 +0000
Subject: [PATCH 25/39] more lint

---
 privacy/bolton/optimizers.py | 70 +++++++++++++++++++++---------------
 1 file changed, 41 insertions(+), 29 deletions(-)

diff --git a/privacy/bolton/optimizers.py b/privacy/bolton/optimizers.py
index 080ec48..75148da 100644
--- a/privacy/bolton/optimizers.py
+++ b/privacy/bolton/optimizers.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Bolton Optimizer for bolton method"""
+"""Bolton Optimizer for bolton method."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -28,8 +28,10 @@ _accepted_distributions = ['laplace']  # implemented distributions for noising
 class GammaBetaDecreasingStep(
     optimizer_v2.learning_rate_schedule.LearningRateSchedule):
   """Computes LR as minimum of 1/beta and 1/(gamma * step) at each step.
-  A required step for privacy guarantees.
+
+  This is a required step for privacy guarantees.
   """
+
   def __init__(self):
     self.is_init = False
     self.beta = None
@@ -37,11 +39,13 @@ class GammaBetaDecreasingStep(
 
   def __call__(self, step):
     """Computes and returns the learning rate.
-    Args:
-      step: the current iteration number
-    Returns:
-      decayed learning rate to minimum of 1/beta and 1/(gamma * step) as per
-      the Bolton privacy requirements.
+
+      Args:
+        step: the current iteration number
+
+      Returns:
+        decayed learning rate to minimum of 1/beta and 1/(gamma * step) as per
+        the Bolton privacy requirements.
     """
     if not self.is_init:
       raise AttributeError('Please initialize the {0} Learning Rate Scheduler.'
@@ -49,13 +53,13 @@ class GammaBetaDecreasingStep(
                            '{1} as a context manager, '
                            'as desired'.format(self.__class__.__name__,
                                                Bolton.__class__.__name__
-                                               )
-                           )
+                                              )
+                          )
     dtype = self.beta.dtype
     one = tf.constant(1, dtype)
     return tf.math.minimum(tf.math.reduce_min(one/self.beta),
                            one/(self.gamma*math_ops.cast(step, dtype))
-                           )
+                          )
 
   def get_config(self):
     """Return config to setup the learning rate scheduler."""
@@ -107,22 +111,24 @@ class Bolton(optimizer_v2.OptimizerV2):
   Bolt-on Differential Privacy for Scalable Stochastic Gradient
   Descent-based Analytics by Xi Wu et. al.
   """
+
   def __init__(self,  # pylint: disable=super-init-not-called
                optimizer,
                loss,
                dtype=tf.float32,
-               ):
+              ):
     """Constructor.
 
-    Args:
-      optimizer: Optimizer_v2 or subclass to be used as the optimizer
-        (wrapped).
-      loss: StrongConvexLoss function that the model is being compiled with.
+      Args:
+        optimizer: Optimizer_v2 or subclass to be used as the optimizer
+          (wrapped).
+        loss: StrongConvexLoss function that the model is being compiled with.
+        dtype: dtype
     """
 
     if not isinstance(loss, StrongConvexMixin):
-      raise ValueError("loss function must be a Strongly Convex and therefore "
-                       "extend the StrongConvexMixin.")
+      raise ValueError('loss function must be a Strongly Convex and therefore '
+                       'extend the StrongConvexMixin.')
     self._private_attributes = ['_internal_optimizer',
                                 'dtype',
                                 'noise_distribution',
@@ -134,7 +140,7 @@ class Bolton(optimizer_v2.OptimizerV2):
                                 'layers',
                                 'batch_size',
                                 '_is_init'
-                                ]
+                               ]
     self._internal_optimizer = optimizer
     self.learning_rate = GammaBetaDecreasingStep()  # use the Bolton Learning
     # rate scheduler, as required for privacy guarantees. This will still need
@@ -154,6 +160,9 @@ class Bolton(optimizer_v2.OptimizerV2):
     Args:
       force: True to normalize regardless of previous weight values.
         False to check if weights > R-ball and only normalize then.
+
+    Raises:
+      Exception:
     """
     if not self._is_init:
       raise Exception('This method must be called from within the optimizer\'s '
@@ -171,14 +180,17 @@ class Bolton(optimizer_v2.OptimizerV2):
         )
 
   def get_noise(self, input_dim, output_dim):
-    """Sample noise to be added to weights for privacy guarantee
+    """Sample noise to be added to weights for privacy guarantee.
 
-    Args:
-      input_dim: the input dimensionality for the weights
-      output_dim the output dimensionality for the weights
+      Args:
+        input_dim: the input dimensionality for the weights
+        output_dim the output dimensionality for the weights
 
-    Returns:
-      Noise in shape of layer's weights to be added to the weights.
+      Returns:
+        Noise in shape of layer's weights to be added to the weights.
+
+      Raises:
+        Exception:
     """
     if not self._is_init:
       raise Exception('This method must be called from within the optimizer\'s '
@@ -206,7 +218,7 @@ class Bolton(optimizer_v2.OptimizerV2):
                               beta=1 / beta,
                               seed=1,
                               dtype=self.dtype
-                              )
+                             )
       return unit_vector * gamma
     raise NotImplementedError('Noise distribution: {0} is not '
                               'a valid distribution'.format(distribution))
@@ -236,7 +248,7 @@ class Bolton(optimizer_v2.OptimizerV2):
           "".format(self.__class__.__name__,
                     self._internal_optimizer.__class__.__name__,
                     name
-                    )
+                   )
           )
 
   def __setattr__(self, key, value):
@@ -304,7 +316,7 @@ class Bolton(optimizer_v2.OptimizerV2):
                class_weights,
                n_samples,
                batch_size
-               ):
+              ):
     """Accepts required values for bolton method from context entry point.
     Stores them on the optimizer for use throughout fitting.
 
@@ -328,7 +340,7 @@ class Bolton(optimizer_v2.OptimizerV2):
     self.noise_distribution = noise_distribution
     self.learning_rate.initialize(self.loss.beta(class_weights),
                                   self.loss.gamma()
-                                  )
+                                 )
     self.epsilon = tf.constant(epsilon, dtype=self.dtype)
     self.class_weights = tf.constant(class_weights, dtype=self.dtype)
     self.n_samples = tf.constant(n_samples, dtype=self.dtype)
@@ -354,7 +366,7 @@ class Bolton(optimizer_v2.OptimizerV2):
       output_dim = layer.units
       noise = self.get_noise(input_dim,
                              output_dim,
-                             )
+                            )
       layer.kernel = tf.math.add(layer.kernel, noise)
     self.noise_distribution = None
     self.learning_rate.de_initialize()

From ddf17c90914fa97bc71bf2b1d8668f48b09e4899 Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Mon, 29 Jul 2019 21:55:14 +0000
Subject: [PATCH 26/39] more lint

---
 privacy/bolton/losses.py          | 57 ++++++++++++++++---------------
 privacy/bolton/models.py          | 26 ++++++++------
 privacy/bolton/models_test.py     |  5 +--
 privacy/bolton/optimizers_test.py |  8 ++---
 4 files changed, 51 insertions(+), 45 deletions(-)

diff --git a/privacy/bolton/losses.py b/privacy/bolton/losses.py
index 6aa270f..9bd15e8 100644
--- a/privacy/bolton/losses.py
+++ b/privacy/bolton/losses.py
@@ -11,21 +11,23 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Loss functions for bolton method"""
+"""Loss functions for bolton method."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
 import tensorflow as tf
-from tensorflow.python.keras import losses
-from tensorflow.python.keras.utils import losses_utils
 from tensorflow.python.framework import ops as _ops
+from tensorflow.python.keras import losses
 from tensorflow.python.keras.regularizers import L1L2
+from tensorflow.python.keras.utils import losses_utils
 from tensorflow.python.platform import tf_logging as logging
 
 
 class StrongConvexMixin:
-  """
+  """Strong Convex Mixin base class.
+
   Strong Convex Mixin base class for any loss function that will be used with
   Bolton model. Subclasses must be strongly convex and implement the
   associated constants. They must also conform to the requirements of tf losses
@@ -85,7 +87,7 @@ class StrongConvexMixin:
     return None
 
   def max_class_weight(self, class_weight, dtype):
-    """the maximum weighting in class weights (max value) as a scalar tensor
+    """The maximum weighting in class weights (max value) as a scalar tensor.
 
     Args:
       class_weight: class weights used
@@ -103,7 +105,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
 
   def __init__(self,
                reg_lambda,
-               C,
+               c_arg,
                radius_constant,
                delta,
                reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
@@ -117,31 +119,30 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
       delta: delta value in huber loss.  When to switch from quadratic to
         absolute deviation.
       reduction: reduction type to use. See super class
-      name: Name of the loss instance
       dtype: tf datatype to use for tensor conversions.
 
     Returns:
       Loss values per sample.
     """
-    if C <= 0:
-      raise ValueError('c: {0}, should be >= 0'.format(C))
+    if c_arg <= 0:
+      raise ValueError("c: {0}, should be >= 0".format(c_arg))
     if reg_lambda <= 0:
       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
     if radius_constant <= 0:
-      raise ValueError('radius_constant: {0}, should be >= 0'.format(
+      raise ValueError("radius_constant: {0}, should be >= 0".format(
           radius_constant
       ))
     if delta <= 0:
-      raise ValueError('delta: {0}, should be >= 0'.format(
+      raise ValueError("delta: {0}, should be >= 0".format(
           delta
       ))
-    self.C = C  # pylint: disable=invalid-name
+    self.C = c_arg  # pylint: disable=invalid-name
     self.delta = delta
     self.radius_constant = radius_constant
     self.dtype = dtype
     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
     super(StrongConvexHuber, self).__init__(
-        name='strongconvexhuber',
+        name="strongconvexhuber",
         reduction=reduction,
     )
 
@@ -179,7 +180,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
     max_class_weight = self.max_class_weight(class_weight, self.dtype)
     delta = _ops.convert_to_tensor_v2(self.delta,
                                       dtype=self.dtype
-                                      )
+                                     )
     return self.C * max_class_weight / (delta *
                                         tf.constant(2, dtype=self.dtype)) + \
            self.reg_lambda
@@ -213,53 +214,53 @@ class StrongConvexBinaryCrossentropy(
 
   def __init__(self,
                reg_lambda,
-               C,
+               c_arg,
                radius_constant,
                from_logits=True,
                label_smoothing=0,
                reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
                dtype=tf.float32):
-    """
+    """StrongConvexBinaryCrossentropy class.
+
     Args:
       reg_lambda: Weight regularization constant
-      C: Penalty parameter C of the loss term
+      c_arg: Penalty parameter C of the loss term
       radius_constant: constant defining the length of the radius
-      reduction: reduction type to use. See super class
       from_logits: True if the input are unscaled logits. False if they are
         already scaled.
       label_smoothing: amount of smoothing to perform on labels
         relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x). Note, the
         impact of this parameter's effect on privacy is not known and thus the
         default should be used.
-      name: Name of the loss instance
+      reduction: reduction type to use. See super class
       dtype: tf datatype to use for tensor conversions.
     """
     if label_smoothing != 0:
-      logging.warning('The impact of label smoothing on privacy is unknown. '
-                      'Use label smoothing at your own risk as it may not '
-                      'guarantee privacy.')
+      logging.warning("The impact of label smoothing on privacy is unknown. "
+                      "Use label smoothing at your own risk as it may not "
+                      "guarantee privacy.")
 
     if reg_lambda <= 0:
       raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
-    if C <= 0:
-      raise ValueError('c: {0}, should be >= 0'.format(C))
+    if c_arg <= 0:
+      raise ValueError("c: {0}, should be >= 0".format(c_arg))
     if radius_constant <= 0:
-      raise ValueError('radius_constant: {0}, should be >= 0'.format(
+      raise ValueError("radius_constant: {0}, should be >= 0".format(
           radius_constant
       ))
     self.dtype = dtype
-    self.C = C  # pylint: disable=invalid-name
+    self.C = c_arg  # pylint: disable=invalid-name
     self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
     super(StrongConvexBinaryCrossentropy, self).__init__(
         reduction=reduction,
-        name='strongconvexbinarycrossentropy',
+        name="strongconvexbinarycrossentropy",
         from_logits=from_logits,
         label_smoothing=label_smoothing,
     )
     self.radius_constant = radius_constant
 
   def call(self, y_true, y_pred):
-    """Computes loss
+    """Computes loss.
 
       Args:
         y_true: Ground truth values.
diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index 8883d34..b1aa367 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -125,17 +125,21 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
         4. Use a strongly convex loss function (see compile)
       See super implementation for more details.
 
-    Args:
-      x:
-      y:
-      batch_size:
-      class_weight: the class weights to be used. Can be a scalar or 1D tensor
-                    whose dim == n_classes.
-      n_samples: the number of individual samples in x.
-      epsilon: privacy parameter, which trades off between utility an privacy.
-                See the bolton paper for more description.
-      noise_distribution: the distribution to pull noise from.
-      steps_per_epoch:
+      Args:
+        x:
+        y:
+        batch_size:
+        class_weight: the class weights to be used. Can be a scalar or 1D tensor
+                      whose dim == n_classes.
+        n_samples: the number of individual samples in x.
+        epsilon: privacy parameter, which trades off between utility an privacy.
+                  See the bolton paper for more description.
+        noise_distribution: the distribution to pull noise from.
+        steps_per_epoch:
+        kwargs: kwargs to keras Model.fit. See super.
+
+      Returns:
+        output
     """
     if class_weight is None:
       class_weight_ = self.calculate_class_weights(class_weight)
diff --git a/privacy/bolton/models_test.py b/privacy/bolton/models_test.py
index 4f1b3ab..ead15eb 100644
--- a/privacy/bolton/models_test.py
+++ b/privacy/bolton/models_test.py
@@ -32,10 +32,10 @@ from privacy.bolton.optimizers import Bolton
 class TestLoss(losses.Loss, StrongConvexMixin):
   """Test loss function for testing Bolton model."""
 
-  def __init__(self, reg_lambda, C_arg, radius_constant, name='test'):
+  def __init__(self, reg_lambda, c_arg, radius_constant, name='test'):
     super(TestLoss, self).__init__(name=name)
     self.reg_lambda = reg_lambda
-    self.C = C_arg  # pylint: disable=invalid-name
+    self.C = c_arg  # pylint: disable=invalid-name
     self.radius_constant = radius_constant
 
   def radius(self):
@@ -506,6 +506,7 @@ class FitTests(keras_parameterized.TestCase):
        'num_classes': 2,
        'err_msg': 'Detected array length:'},
   ])
+
   def test_class_errors(self,
                         class_weights,
                         class_counts,
diff --git a/privacy/bolton/optimizers_test.py b/privacy/bolton/optimizers_test.py
index 32a9f63..d5adbe1 100644
--- a/privacy/bolton/optimizers_test.py
+++ b/privacy/bolton/optimizers_test.py
@@ -68,10 +68,10 @@ class TestModel(Model):  # pylint: disable=abstract-method
 class TestLoss(losses.Loss, StrongConvexMixin):
   """Test loss function for testing Bolton model."""
 
-  def __init__(self, reg_lambda, C_arg, radius_constant, name='test'):
+  def __init__(self, reg_lambda, c_arg, radius_constant, name='test'):
     super(TestLoss, self).__init__(name=name)
     self.reg_lambda = reg_lambda
-    self.C = C_arg  # pylint: disable=invalid-name
+    self.C = c_arg  # pylint: disable=invalid-name
     self.radius_constant = radius_constant
 
   def radius(self):
@@ -80,7 +80,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
       W is a convex set that forms the hypothesis space.
 
       Returns:
-        radius
+        a tensor
     """
     return _ops.convert_to_tensor_v2(self.radius_constant, dtype=tf.float32)
 
@@ -107,7 +107,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
         class_weight: class weights used
 
       Returns:
-        L
+        constant L
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 

From 32c76e588af36561e37b5c5878c3c5730a776ecf Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Mon, 29 Jul 2019 22:00:53 +0000
Subject: [PATCH 27/39] more lint

---
 privacy/bolton/optimizers.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/privacy/bolton/optimizers.py b/privacy/bolton/optimizers.py
index 75148da..088b97b 100644
--- a/privacy/bolton/optimizers.py
+++ b/privacy/bolton/optimizers.py
@@ -40,12 +40,12 @@ class GammaBetaDecreasingStep(
   def __call__(self, step):
     """Computes and returns the learning rate.
 
-      Args:
-        step: the current iteration number
+    Args:
+      step: the current iteration number
 
-      Returns:
-        decayed learning rate to minimum of 1/beta and 1/(gamma * step) as per
-        the Bolton privacy requirements.
+    Returns:
+      decayed learning rate to minimum of 1/beta and 1/(gamma * step) as per
+      the Bolton privacy requirements.
     """
     if not self.is_init:
       raise AttributeError('Please initialize the {0} Learning Rate Scheduler.'

From ed93cf6f44916065e7a28a19f9eba38d6ec5ea24 Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Mon, 29 Jul 2019 22:09:21 +0000
Subject: [PATCH 28/39] more lint

---
 privacy/bolton/models.py          | 80 +++++++++++++++----------------
 privacy/bolton/models_test.py     | 42 ++++++++--------
 privacy/bolton/optimizers.py      | 32 +++++++------
 privacy/bolton/optimizers_test.py | 67 +++++++++++++-------------
 4 files changed, 113 insertions(+), 108 deletions(-)

diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index b1aa367..7efd59d 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -117,29 +117,29 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
           **kwargs):  # pylint: disable=arguments-differ
     """Reroutes to super fit with  Bolton delta-epsilon privacy requirements.
 
-      Note, inputs must be normalized s.t. ||x|| < 1.
-      Requirements are as follows:
-        1. Adds noise to weights after training (output perturbation).
-        2. Projects weights to R after each batch
-        3. Limits learning rate
-        4. Use a strongly convex loss function (see compile)
-      See super implementation for more details.
+    Note, inputs must be normalized s.t. ||x|| < 1.
+    Requirements are as follows:
+      1. Adds noise to weights after training (output perturbation).
+      2. Projects weights to R after each batch
+      3. Limits learning rate
+      4. Use a strongly convex loss function (see compile)
+    See super implementation for more details.
 
-      Args:
-        x:
-        y:
-        batch_size:
-        class_weight: the class weights to be used. Can be a scalar or 1D tensor
-                      whose dim == n_classes.
-        n_samples: the number of individual samples in x.
-        epsilon: privacy parameter, which trades off between utility an privacy.
-                  See the bolton paper for more description.
-        noise_distribution: the distribution to pull noise from.
-        steps_per_epoch:
-        kwargs: kwargs to keras Model.fit. See super.
+    Args:
+      x:
+      y:
+      batch_size:
+      class_weight: the class weights to be used. Can be a scalar or 1D tensor
+                    whose dim == n_classes.
+      n_samples: the number of individual samples in x.
+      epsilon: privacy parameter, which trades off between utility an privacy.
+                See the bolton paper for more description.
+      noise_distribution: the distribution to pull noise from.
+      steps_per_epoch:
+      kwargs: kwargs to keras Model.fit. See super.
 
-      Returns:
-        output
+    Returns:
+      output
     """
     if class_weight is None:
       class_weight_ = self.calculate_class_weights(class_weight)
@@ -188,18 +188,18 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                     **kwargs):  # pylint: disable=arguments-differ
     """Fit with a generator.
 
-      This method is the same as fit except for when the passed dataset
-      is a generator. See super method and fit for more details.
+    This method is the same as fit except for when the passed dataset
+    is a generator. See super method and fit for more details.
 
-      Args:
-        generator:
-        class_weight: the class weights to be used. Can be a scalar or 1D tensor
-                      whose dim == n_classes.
-        noise_distribution: the distribution to get noise from.
-        epsilon: privacy parameter, which trades off utility and privacy. See
-                  Bolton paper for more description.
-        n_samples: number of individual samples in x
-        steps_per_epoch:
+    Args:
+      generator:
+      class_weight: the class weights to be used. Can be a scalar or 1D tensor
+                    whose dim == n_classes.
+      noise_distribution: the distribution to get noise from.
+      epsilon: privacy parameter, which trades off utility and privacy. See
+                Bolton paper for more description.
+      n_samples: number of individual samples in x
+      steps_per_epoch:
     """
     if class_weight is None:
       class_weight = self.calculate_class_weights(class_weight)
@@ -233,14 +233,14 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                               num_classes=None):
     """Calculates class weighting to be used in training.
 
-      Args:
-        class_weights: str specifying type, array giving weights, or None.
-        class_counts: If class_weights is not None, then an array of
-                      the number of samples for each class
-        num_classes: If class_weights is not None, then the number of
-                        classes.
-      Returns:
-        class_weights as 1D tensor, to be passed to model's fit method.
+    Args:
+      class_weights: str specifying type, array giving weights, or None.
+      class_counts: If class_weights is not None, then an array of
+                    the number of samples for each class
+      num_classes: If class_weights is not None, then the number of
+                      classes.
+    Returns:
+      class_weights as 1D tensor, to be passed to model's fit method.
     """
     # Value checking
     class_keys = ['balanced']
diff --git a/privacy/bolton/models_test.py b/privacy/bolton/models_test.py
index ead15eb..daeba1a 100644
--- a/privacy/bolton/models_test.py
+++ b/privacy/bolton/models_test.py
@@ -203,10 +203,10 @@ class InitTests(keras_parameterized.TestCase):
   def test_bad_compile(self, n_outputs, loss, optimizer):
     """test bad compilations of BoltonModel that should raise errors.
 
-      Args:
-        n_outputs: number of output neurons
-        loss: instantiated TestLoss instance
-        optimizer: instantiated TestOptimizer instance
+    Args:
+      n_outputs: number of output neurons
+      loss: instantiated TestLoss instance
+      optimizer: instantiated TestOptimizer instance
     """
     # test compilaton of invalid tf.optimizer and non instantiated loss.
     with self.cached_session():
@@ -218,19 +218,19 @@ class InitTests(keras_parameterized.TestCase):
 def _cat_dataset(n_samples, input_dim, n_classes, generator=False):
   """Creates a categorically encoded dataset.
 
-    Creates a categorically encoded dataset (y is categorical).
-    returns the specified dataset either as a static array or as a generator.
-    Will have evenly split samples across each output class.
-    Each output class will be a different point in the input space.
+  Creates a categorically encoded dataset (y is categorical).
+  returns the specified dataset either as a static array or as a generator.
+  Will have evenly split samples across each output class.
+  Each output class will be a different point in the input space.
 
-    Args:
-      n_samples: number of rows
-      input_dim: input dimensionality
-      n_classes: output dimensionality
-      generator: False for array, True for generator
+  Args:
+    n_samples: number of rows
+    input_dim: input dimensionality
+    n_classes: output dimensionality
+    generator: False for array, True for generator
 
-    Returns:
-      X as (n_samples, input_dim), Y as (n_samples, n_outputs)
+  Returns:
+    X as (n_samples, input_dim), Y as (n_samples, n_outputs)
   """
   x_stack = []
   y_stack = []
@@ -514,13 +514,13 @@ class FitTests(keras_parameterized.TestCase):
                         err_msg):
     """Tests the BOltonModel calculate_class_weights method.
 
-      This test passes invalid params which should raise the expected errors.
+    This test passes invalid params which should raise the expected errors.
 
-      Args:
-        class_weights: the class_weights to use.
-        class_counts: count of number of samples for each class.
-        num_classes: number of outputs neurons.
-        err_msg: The expected error message.
+    Args:
+      class_weights: the class_weights to use.
+      class_counts: count of number of samples for each class.
+      num_classes: number of outputs neurons.
+      err_msg: The expected error message.
     """
     clf = models.BoltonModel(1, 1)
     with self.assertRaisesRegexp(ValueError, err_msg):  # pylint: disable=deprecated-method
diff --git a/privacy/bolton/optimizers.py b/privacy/bolton/optimizers.py
index 088b97b..8910889 100644
--- a/privacy/bolton/optimizers.py
+++ b/privacy/bolton/optimizers.py
@@ -119,11 +119,11 @@ class Bolton(optimizer_v2.OptimizerV2):
               ):
     """Constructor.
 
-      Args:
-        optimizer: Optimizer_v2 or subclass to be used as the optimizer
-          (wrapped).
-        loss: StrongConvexLoss function that the model is being compiled with.
-        dtype: dtype
+    Args:
+      optimizer: Optimizer_v2 or subclass to be used as the optimizer
+        (wrapped).
+      loss: StrongConvexLoss function that the model is being compiled with.
+      dtype: dtype
     """
 
     if not isinstance(loss, StrongConvexMixin):
@@ -182,15 +182,15 @@ class Bolton(optimizer_v2.OptimizerV2):
   def get_noise(self, input_dim, output_dim):
     """Sample noise to be added to weights for privacy guarantee.
 
-      Args:
-        input_dim: the input dimensionality for the weights
-        output_dim the output dimensionality for the weights
+    Args:
+      input_dim: the input dimensionality for the weights
+      output_dim the output dimensionality for the weights
 
-      Returns:
-        Noise in shape of layer's weights to be added to the weights.
+    Returns:
+      Noise in shape of layer's weights to be added to the weights.
 
-      Raises:
-        Exception:
+    Raises:
+      Exception:
     """
     if not self._is_init:
       raise Exception('This method must be called from within the optimizer\'s '
@@ -228,7 +228,9 @@ class Bolton(optimizer_v2.OptimizerV2):
     return self._internal_optimizer.from_config(*args, **kwargs)
 
   def __getattr__(self, name):
-    """return _internal_optimizer off self instance, and everything else
+    """Get attr.
+
+    return _internal_optimizer off self instance, and everything else
     from the _internal_optimizer instance.
 
     Args:
@@ -253,6 +255,7 @@ class Bolton(optimizer_v2.OptimizerV2):
 
   def __setattr__(self, key, value):
     """ Set attribute to self instance if its the internal optimizer.
+
     Reroute everything else to the _internal_optimizer.
 
     Args:
@@ -318,6 +321,7 @@ class Bolton(optimizer_v2.OptimizerV2):
                batch_size
               ):
     """Accepts required values for bolton method from context entry point.
+
     Stores them on the optimizer for use throughout fitting.
 
     Args:
@@ -327,7 +331,7 @@ class Bolton(optimizer_v2.OptimizerV2):
       layers: list of Keras/Tensorflow layers. Can be found as model.layers
       class_weights: class_weights used, which may either be a scalar or 1D
         tensor with dim == n_classes.
-      n_samples number of rows/individual samples in the training set
+      n_samples: number of rows/individual samples in the training set
       batch_size: batch size used.
     """
     if epsilon <= 0:
diff --git a/privacy/bolton/optimizers_test.py b/privacy/bolton/optimizers_test.py
index d5adbe1..4b08d66 100644
--- a/privacy/bolton/optimizers_test.py
+++ b/privacy/bolton/optimizers_test.py
@@ -77,10 +77,10 @@ class TestLoss(losses.Loss, StrongConvexMixin):
   def radius(self):
     """Radius, R, of the hypothesis space W.
 
-      W is a convex set that forms the hypothesis space.
+    W is a convex set that forms the hypothesis space.
 
-      Returns:
-        a tensor
+    Returns:
+      a tensor
     """
     return _ops.convert_to_tensor_v2(self.radius_constant, dtype=tf.float32)
 
@@ -103,11 +103,11 @@ class TestLoss(losses.Loss, StrongConvexMixin):
   def lipchitz_constant(self, class_weight):  # pylint: disable=unused-argument
     """Lipchitz constant, L.
 
-      Args:
-        class_weight: class weights used
+    Args:
+      class_weight: class weights used
 
-      Returns:
-        constant L
+    Returns:
+      constant L
     """
     return _ops.convert_to_tensor_v2(1, dtype=tf.float32)
 
@@ -262,12 +262,12 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
   def test_project(self, r, shape, n_out, init_value, result):
     """test that a fn of Bolton optimizer is working as expected.
 
-      Args:
-        r: Radius value for StrongConvex loss function.
-        shape: input_dimensionality
-        n_out: output dimensionality
-        init_value: the initial value for 'constant' kernel initializer
-        result: the expected output after projection.
+    Args:
+      r: Radius value for StrongConvex loss function.
+      shape: input_dimensionality
+      n_out: output dimensionality
+      init_value: the initial value for 'constant' kernel initializer
+      result: the expected output after projection.
     """
     tf.random.set_seed(1)
     @tf.function
@@ -455,12 +455,12 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
   def test_not_reroute_fn(self, fn, args):
     """Test function is not rerouted.
 
-      Test that a fn that should not be rerouted to the internal optimizer is
-      in fact not rerouted.
+    Test that a fn that should not be rerouted to the internal optimizer is
+    in fact not rerouted.
 
-      Args:
-        fn: fn to test
-        args: arguments to that fn
+    Args:
+      fn: fn to test
+      args: arguments to that fn
     """
     @tf.function
     def test_run(fn, args):
@@ -494,11 +494,11 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
   def test_reroute_attr(self, attr):
     """Test a function is rerouted.
 
-      Test that attribute of internal optimizer is correctly rerouted to the
-      internal optimizer.
+    Test that attribute of internal optimizer is correctly rerouted to the
+    internal optimizer.
 
-      Args:
-        attr: attribute to test
+    Args:
+      attr: attribute to test
     """
     loss = TestLoss(1, 1, 1)
     internal_optimizer = TestOptimizer()
@@ -513,11 +513,11 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
   def test_attribute_error(self, attr):
     """Test rerouting of attributes.
 
-      Test that attribute of internal optimizer is correctly rerouted to the
-      internal optimizer
+    Test that attribute of internal optimizer is correctly rerouted to the
+    internal optimizer
 
-      Args:
-        attr: attribute to test
+    Args:
+      attr: attribute to test
     """
     loss = TestLoss(1, 1, 1)
     internal_optimizer = TestOptimizer()
@@ -538,8 +538,8 @@ class SchedulerTest(keras_parameterized.TestCase):
   def test_bad_call(self, err_msg):
     """Test attribute of internal opt correctly rerouted to the internal opt.
 
-      Args:
-        err_msg: The expected error message from the scheduler bad call.
+    Args:
+      err_msg: The expected error message from the scheduler bad call.
     """
     scheduler = opt.GammaBetaDecreasingStep()
     with self.assertRaisesRegexp(Exception, err_msg):  # pylint: disable=deprecated-method
@@ -558,12 +558,13 @@ class SchedulerTest(keras_parameterized.TestCase):
   ])
   def test_call(self, step, res):
     """Test call.
-      Test that attribute of internal optimizer is correctly rerouted to the
-      internal optimizer
 
-      Args:
-        step: step number to 'GammaBetaDecreasingStep' 'Scheduler'.
-        res: expected result from call to 'GammaBetaDecreasingStep' 'Scheduler'.
+    Test that attribute of internal optimizer is correctly rerouted to the
+    internal optimizer
+
+    Args:
+      step: step number to 'GammaBetaDecreasingStep' 'Scheduler'.
+      res: expected result from call to 'GammaBetaDecreasingStep' 'Scheduler'.
     """
     beta = _ops.convert_to_tensor_v2(2, dtype=tf.float32)
     gamma = _ops.convert_to_tensor_v2(1, dtype=tf.float32)

From b1f1c47caea666e0abc7bcb4d8e26eceb26f09bf Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Mon, 29 Jul 2019 22:15:52 +0000
Subject: [PATCH 29/39] more lint

---
 privacy/bolton/losses.py      | 17 +++++++++--------
 privacy/bolton/losses_test.py | 10 +++++-----
 privacy/bolton/models.py      | 15 ++++++++-------
 privacy/bolton/models_test.py |  3 ++-
 privacy/bolton/optimizers.py  | 15 +++++++++++----
 5 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/privacy/bolton/losses.py b/privacy/bolton/losses.py
index 9bd15e8..d38c7cd 100644
--- a/privacy/bolton/losses.py
+++ b/privacy/bolton/losses.py
@@ -40,6 +40,7 @@ class StrongConvexMixin:
 
   def radius(self):
     """Radius, R, of the hypothesis space W.
+
     W is a convex set that forms the hypothesis space.
 
     Returns:
@@ -114,7 +115,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
 
     Args:
       reg_lambda: Weight regularization constant
-      C: Penalty parameter C of the loss term
+      c_arg: Penalty parameter C of the loss term
       radius_constant: constant defining the length of the radius
       delta: delta value in huber loss.  When to switch from quadratic to
         absolute deviation.
@@ -147,7 +148,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
     )
 
   def call(self, y_true, y_pred):
-    """Computes loss
+    """Computes loss.
 
     Args:
       y_true: Ground truth values. One hot encoded using -1 and 1.
@@ -262,13 +263,13 @@ class StrongConvexBinaryCrossentropy(
   def call(self, y_true, y_pred):
     """Computes loss.
 
-      Args:
-        y_true: Ground truth values.
-        y_pred: The predicted values.
+    Args:
+      y_true: Ground truth values.
+      y_pred: The predicted values.
 
-      Returns:
-        Loss values per sample.
-      """
+    Returns:
+      Loss values per sample.
+    """
     loss = super(StrongConvexBinaryCrossentropy, self).call(y_true, y_pred)
     loss = loss * self.C
     return loss
diff --git a/privacy/bolton/losses_test.py b/privacy/bolton/losses_test.py
index 44a8d11..713bd7c 100644
--- a/privacy/bolton/losses_test.py
+++ b/privacy/bolton/losses_test.py
@@ -369,11 +369,11 @@ class HuberTests(keras_parameterized.TestCase):
   def test_calculation(self, logits, y_true, delta, result):
     """Test the call method to ensure it returns the correct value.
 
-      Args:
-        logits: unscaled output of model
-        y_true: label
-        delta:
-        result: correct loss calculation value
+    Args:
+      logits: unscaled output of model
+      y_true: label
+      delta:
+      result: correct loss calculation value
     """
     logits = tf.Variable(logits, False, dtype=tf.float32)
     y_true = tf.Variable(y_true, False, dtype=tf.float32)
diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index 7efd59d..a5d6159 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -80,13 +80,13 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
               **kwargs):  # pylint: disable=arguments-differ
     """See super class. Default optimizer used in Bolton method is SGD.
 
-      Args:
-        optimizer: The optimizer to use. This will be automatically wrapped
-          with the Bolton Optimizer.
-        loss: The loss function to use. Must be a StrongConvex loss (extend the
-          StrongConvexMixin).
-        kernel_initializer: The kernel initializer to use for the single layer.
-        kwargs: kwargs to keras Model.compile. See super.
+    Args:
+      optimizer: The optimizer to use. This will be automatically wrapped
+        with the Bolton Optimizer.
+      loss: The loss function to use. Must be a StrongConvex loss (extend the
+        StrongConvexMixin).
+      kernel_initializer: The kernel initializer to use for the single layer.
+      kwargs: kwargs to keras Model.compile. See super.
     """
     if not isinstance(loss, StrongConvexMixin):
       raise ValueError('loss function must be a Strongly Convex and therefore '
@@ -200,6 +200,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                 Bolton paper for more description.
       n_samples: number of individual samples in x
       steps_per_epoch:
+      **kwargs: **kwargs
     """
     if class_weight is None:
       class_weight = self.calculate_class_weights(class_weight)
diff --git a/privacy/bolton/models_test.py b/privacy/bolton/models_test.py
index daeba1a..f5365fe 100644
--- a/privacy/bolton/models_test.py
+++ b/privacy/bolton/models_test.py
@@ -275,7 +275,8 @@ def _do_fit(n_samples,
     loss: instance of TestLoss
     distribution: distribution to get noise from.
 
-  Returns: BoltonModel instsance
+  Returns:
+    BoltonModel instsance
   """
   clf = models.BoltonModel(n_outputs)
   clf.compile(optimizer, loss)
diff --git a/privacy/bolton/optimizers.py b/privacy/bolton/optimizers.py
index 8910889..a18c636 100644
--- a/privacy/bolton/optimizers.py
+++ b/privacy/bolton/optimizers.py
@@ -184,7 +184,7 @@ class Bolton(optimizer_v2.OptimizerV2):
 
     Args:
       input_dim: the input dimensionality for the weights
-      output_dim the output dimensionality for the weights
+      output_dim: the output dimensionality for the weights
 
     Returns:
       Noise in shape of layer's weights to be added to the weights.
@@ -236,7 +236,8 @@ class Bolton(optimizer_v2.OptimizerV2):
     Args:
       name:
 
-    Returns: attribute from Bolton if specified to come from self, else
+    Returns:
+      attribute from Bolton if specified to come from self, else
       from _internal_optimizer.
     """
     if name == '_private_attributes' or name in self._private_attributes:
@@ -254,7 +255,7 @@ class Bolton(optimizer_v2.OptimizerV2):
           )
 
   def __setattr__(self, key, value):
-    """ Set attribute to self instance if its the internal optimizer.
+    """Set attribute to self instance if its the internal optimizer.
 
     Reroute everything else to the _internal_optimizer.
 
@@ -333,6 +334,9 @@ class Bolton(optimizer_v2.OptimizerV2):
         tensor with dim == n_classes.
       n_samples: number of rows/individual samples in the training set
       batch_size: batch size used.
+
+    Returns:
+      self
     """
     if epsilon <= 0:
       raise ValueError('Detected epsilon: {0}. '
@@ -354,8 +358,8 @@ class Bolton(optimizer_v2.OptimizerV2):
 
   def __exit__(self, *args):
     """Exit call from with statement.
-    used to
 
+    Used to:
     1.reset the model and fit parameters passed to the optimizer
       to enable the Bolton Privacy guarantees. These are reset to ensure
       that any future calls to fit with the same instance of the optimizer
@@ -363,6 +367,9 @@ class Bolton(optimizer_v2.OptimizerV2):
 
     2.call post-fit methods normalizing/projecting the model weights and
       adding noise to the weights.
+
+    Args:
+      *args: *args
     """
     self.project_weights_to_r(True)
     for layer in self.layers:

From 801e6eef1c2fa5e0d27c8dddc55ff9fb5f9a1bf5 Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Mon, 29 Jul 2019 22:22:52 +0000
Subject: [PATCH 30/39] more lint

---
 privacy/bolton/models.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index a5d6159..10c19b7 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -86,7 +86,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
       loss: The loss function to use. Must be a StrongConvex loss (extend the
         StrongConvexMixin).
       kernel_initializer: The kernel initializer to use for the single layer.
-      kwargs: kwargs to keras Model.compile. See super.
+      **kwargs: kwargs to keras Model.compile. See super.
     """
     if not isinstance(loss, StrongConvexMixin):
       raise ValueError('loss function must be a Strongly Convex and therefore '
@@ -136,7 +136,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                 See the bolton paper for more description.
       noise_distribution: the distribution to pull noise from.
       steps_per_epoch:
-      kwargs: kwargs to keras Model.fit. See super.
+      **kwargs: kwargs to keras Model.fit. See super.
 
     Returns:
       output
@@ -201,6 +201,9 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
       n_samples: number of individual samples in x
       steps_per_epoch:
       **kwargs: **kwargs
+
+    Returns:
+      output
     """
     if class_weight is None:
       class_weight = self.calculate_class_weights(class_weight)

From f470ae4a7cd73536c409e37d631ec2a9908dab5f Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Mon, 29 Jul 2019 22:30:45 +0000
Subject: [PATCH 31/39] more lint

---
 privacy/bolton/__init__.py    | 8 ++++----
 privacy/bolton/losses_test.py | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/privacy/bolton/__init__.py b/privacy/bolton/__init__.py
index d3c8209..1660208 100644
--- a/privacy/bolton/__init__.py
+++ b/privacy/bolton/__init__.py
@@ -23,7 +23,7 @@ if LooseVersion(tf.__version__) < LooseVersion("2.0.0"):
 if hasattr(sys, "skip_tf_privacy_import"):  # Useful for standalone scripts.
   pass
 else:
-  from privacy.bolton.models import BoltonModel
-  from privacy.bolton.optimizers import Bolton
-  from privacy.bolton.losses import StrongConvexHuber
-  from privacy.bolton.losses import StrongConvexBinaryCrossentropy
+  from privacy.bolton.models import BoltonModel  # pylint: disable=import-not-at-top
+  from privacy.bolton.optimizers import Bolton  # pylint: disable=import-not-at-top
+  from privacy.bolton.losses import StrongConvexHuber  # pylint: disable=import-not-at-top
+  from privacy.bolton.losses import StrongConvexBinaryCrossentropy  # pylint: disable=import-not-at-top
diff --git a/privacy/bolton/losses_test.py b/privacy/bolton/losses_test.py
index 713bd7c..3996ae9 100644
--- a/privacy/bolton/losses_test.py
+++ b/privacy/bolton/losses_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from contextlib import contextmanager
-from io import StringIO
+from contextlib import contextmanager  # pylint: disable=importing-member
+from io import StringIO  # pylint: disable=importing-member
 import sys
 from absl.testing import parameterized
 import tensorflow as tf

From d29f31ec447c5f541ec3585dd17a69d44b78f224 Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Mon, 29 Jul 2019 22:33:30 +0000
Subject: [PATCH 32/39] more lint

---
 privacy/bolton/__init__.py | 8 ++++----
 privacy/bolton/losses.py   | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/privacy/bolton/__init__.py b/privacy/bolton/__init__.py
index 1660208..5dc3940 100644
--- a/privacy/bolton/__init__.py
+++ b/privacy/bolton/__init__.py
@@ -23,7 +23,7 @@ if LooseVersion(tf.__version__) < LooseVersion("2.0.0"):
 if hasattr(sys, "skip_tf_privacy_import"):  # Useful for standalone scripts.
   pass
 else:
-  from privacy.bolton.models import BoltonModel  # pylint: disable=import-not-at-top
-  from privacy.bolton.optimizers import Bolton  # pylint: disable=import-not-at-top
-  from privacy.bolton.losses import StrongConvexHuber  # pylint: disable=import-not-at-top
-  from privacy.bolton.losses import StrongConvexBinaryCrossentropy  # pylint: disable=import-not-at-top
+  from privacy.bolton.models import BoltonModel  # pylint: disable=g-import-not-at-top
+  from privacy.bolton.optimizers import Bolton  # pylint: disable=g-import-not-at-top
+  from privacy.bolton.losses import StrongConvexHuber  # pylint: disable=g-import-not-at-top
+  from privacy.bolton.losses import StrongConvexBinaryCrossentropy  # pylint: disable=g-import-not-at-top
diff --git a/privacy/bolton/losses.py b/privacy/bolton/losses.py
index d38c7cd..880b8c5 100644
--- a/privacy/bolton/losses.py
+++ b/privacy/bolton/losses.py
@@ -25,7 +25,7 @@ from tensorflow.python.keras.utils import losses_utils
 from tensorflow.python.platform import tf_logging as logging
 
 
-class StrongConvexMixin:
+class StrongConvexMixin:  # pylint: disable=old-style-class
   """Strong Convex Mixin base class.
 
   Strong Convex Mixin base class for any loss function that will be used with

From fb12ee047fbf6863d45900ad7da872b99a51bf50 Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Mon, 29 Jul 2019 22:35:13 +0000
Subject: [PATCH 33/39] more lint

---
 privacy/bolton/losses_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/privacy/bolton/losses_test.py b/privacy/bolton/losses_test.py
index 3996ae9..6c60c35 100644
--- a/privacy/bolton/losses_test.py
+++ b/privacy/bolton/losses_test.py
@@ -17,8 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from contextlib import contextmanager  # pylint: disable=importing-member
-from io import StringIO  # pylint: disable=importing-member
+from contextlib import contextmanager  # pylint: disable=g-importing-member
+from io import StringIO  # pylint: disable=g-importing-member
 import sys
 from absl.testing import parameterized
 import tensorflow as tf

From 2065f2b16a42c5c604afb12e3502ac3729418883 Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Tue, 30 Jul 2019 15:12:22 -0400
Subject: [PATCH 34/39] Code style and documentation changes.

---
 privacy/__init__.py               |  4 +--
 privacy/bolton/README.md          | 19 +++++++-------
 privacy/bolton/__init__.py        |  6 ++---
 privacy/bolton/losses.py          |  2 +-
 privacy/bolton/losses_test.py     |  2 +-
 privacy/bolton/models.py          | 43 ++++++++++++++++---------------
 privacy/bolton/models_test.py     | 40 ++++++++++++++--------------
 privacy/bolton/optimizers.py      | 36 +++++++++++++-------------
 privacy/bolton/optimizers_test.py | 34 ++++++++++++------------
 tutorials/bolton_tutorial.py      | 22 ++++++++--------
 10 files changed, 105 insertions(+), 103 deletions(-)

diff --git a/privacy/__init__.py b/privacy/__init__.py
index e494c62..94add1e 100644
--- a/privacy/__init__.py
+++ b/privacy/__init__.py
@@ -42,8 +42,8 @@ else:
   from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
   from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
 
-  from privacy.bolton.models import BoltonModel
-  from privacy.bolton.optimizers import Bolton
+  from privacy.bolton.models import BoltOnModel
+  from privacy.bolton.optimizers import BoltOn
   from privacy.bolton.losses import StrongConvexMixin
   from privacy.bolton.losses import StrongConvexBinaryCrossentropy
   from privacy.bolton.losses import StrongConvexHuber
diff --git a/privacy/bolton/README.md b/privacy/bolton/README.md
index 4aef36f..54eb91a 100644
--- a/privacy/bolton/README.md
+++ b/privacy/bolton/README.md
@@ -1,25 +1,26 @@
-# Bolton Subpackage
+# BoltOn Subpackage
 
-This package contains source code for the Bolton method. This method is a subset
-of methods used in the ensuring privacy in machine learning that leverages
-additional assumptions to provide a new way of approaching the privacy
+This package contains source code for the BoltOn method, a particular 
+differential-privacy (DP) technique that uses output perturbations and leverages 
+additional assumptions to provide a new way of approaching the privacy 
 guarantees.
 
-## Bolton Description
+## BoltOn Description
 
 This method uses 4 key steps to achieve privacy guarantees:
   1. Adds noise to weights after training (output perturbation).
-  2. Projects weights to R after each batch
+  2. Projects weights to R, the radius of the hypothesis space, 
+      after each batch. This value is configurable by the user.
   3. Limits learning rate
   4. Use a strongly convex loss function (see compile)
 
 For more details on the strong convexity requirements, see:
 Bolt-on Differential Privacy for Scalable Stochastic Gradient
-Descent-based Analytics by Xi Wu et al.
+Descent-based Analytics by Xi Wu et al. at https://arxiv.org/pdf/1606.04722.pdf
 
-## Why Bolton?
+## Why BoltOn?
 
-The major difference for the Bolton method is that it injects noise post model
+The major difference for the BoltOn method is that it injects noise post model
 convergence, rather than noising gradients or weights during training. This
 approach requires some additional constraints listed in the Description.
 Should the use-case and model satisfy these constraints, this is another
diff --git a/privacy/bolton/__init__.py b/privacy/bolton/__init__.py
index 5dc3940..bc7a027 100644
--- a/privacy/bolton/__init__.py
+++ b/privacy/bolton/__init__.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Bolton Method for privacy."""
+"""BoltOn Method for privacy."""
 import sys
 from distutils.version import LooseVersion
 import tensorflow as tf
@@ -23,7 +23,7 @@ if LooseVersion(tf.__version__) < LooseVersion("2.0.0"):
 if hasattr(sys, "skip_tf_privacy_import"):  # Useful for standalone scripts.
   pass
 else:
-  from privacy.bolton.models import BoltonModel  # pylint: disable=g-import-not-at-top
-  from privacy.bolton.optimizers import Bolton  # pylint: disable=g-import-not-at-top
+  from privacy.bolton.models import BoltOnModel  # pylint: disable=g-import-not-at-top
+  from privacy.bolton.optimizers import BoltOn  # pylint: disable=g-import-not-at-top
   from privacy.bolton.losses import StrongConvexHuber  # pylint: disable=g-import-not-at-top
   from privacy.bolton.losses import StrongConvexBinaryCrossentropy  # pylint: disable=g-import-not-at-top
diff --git a/privacy/bolton/losses.py b/privacy/bolton/losses.py
index 880b8c5..c742326 100644
--- a/privacy/bolton/losses.py
+++ b/privacy/bolton/losses.py
@@ -29,7 +29,7 @@ class StrongConvexMixin:  # pylint: disable=old-style-class
   """Strong Convex Mixin base class.
 
   Strong Convex Mixin base class for any loss function that will be used with
-  Bolton model. Subclasses must be strongly convex and implement the
+  BoltOn model. Subclasses must be strongly convex and implement the
   associated constants. They must also conform to the requirements of tf losses
   (see super class).
 
diff --git a/privacy/bolton/losses_test.py b/privacy/bolton/losses_test.py
index 6c60c35..ff8137c 100644
--- a/privacy/bolton/losses_test.py
+++ b/privacy/bolton/losses_test.py
@@ -372,7 +372,7 @@ class HuberTests(keras_parameterized.TestCase):
     Args:
       logits: unscaled output of model
       y_true: label
-      delta:
+      delta: delta value for StrongConvexHuber loss.
       result: correct loss calculation value
     """
     logits = tf.Variable(logits, False, dtype=tf.float32)
diff --git a/privacy/bolton/models.py b/privacy/bolton/models.py
index 10c19b7..ad0f59c 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolton/models.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Bolton model for bolton method of differentially private ML."""
+"""BoltOn model for bolton method of differentially private ML."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -21,11 +21,11 @@ from tensorflow.python.framework import ops as _ops
 from tensorflow.python.keras import optimizers
 from tensorflow.python.keras.models import Model
 from privacy.bolton.losses import StrongConvexMixin
-from privacy.bolton.optimizers import Bolton
+from privacy.bolton.optimizers import BoltOn
 
 
-class BoltonModel(Model):  # pylint: disable=abstract-method
-  """Bolton episilon-delta differential privacy model.
+class BoltOnModel(Model):  # pylint: disable=abstract-method
+  """BoltOn episilon-delta differential privacy model.
 
   The privacy guarantees are dependent on the noise that is sampled. Please
   see the paper linked below for more details.
@@ -52,7 +52,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
         seed: random seed to use
         dtype: data type to use for tensors
     """
-    super(BoltonModel, self).__init__(name='bolton', dynamic=False)
+    super(BoltOnModel, self).__init__(name='bolton', dynamic=False)
     if n_outputs <= 0:
       raise ValueError('n_outputs = {0} is not valid. Must be > 0.'.format(
           n_outputs
@@ -69,6 +69,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
         inputs: inputs to neural network
 
     Returns:
+      Output logits for the given inputs.
 
     """
     return self.output_layer(inputs)
@@ -78,11 +79,11 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
               loss,
               kernel_initializer=tf.initializers.GlorotUniform,
               **kwargs):  # pylint: disable=arguments-differ
-    """See super class. Default optimizer used in Bolton method is SGD.
+    """See super class. Default optimizer used in BoltOn method is SGD.
 
     Args:
       optimizer: The optimizer to use. This will be automatically wrapped
-        with the Bolton Optimizer.
+        with the BoltOn Optimizer.
       loss: The loss function to use. Must be a StrongConvex loss (extend the
         StrongConvexMixin).
       kernel_initializer: The kernel initializer to use for the single layer.
@@ -99,11 +100,11 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
           kernel_initializer=kernel_initializer(),
       )
       self._layers_instantiated = True
-    if not isinstance(optimizer, Bolton):
+    if not isinstance(optimizer, BoltOn):
       optimizer = optimizers.get(optimizer)
-      optimizer = Bolton(optimizer, loss)
+      optimizer = BoltOn(optimizer, loss)
 
-    super(BoltonModel, self).compile(optimizer, loss=loss, **kwargs)
+    super(BoltOnModel, self).compile(optimizer, loss=loss, **kwargs)
 
   def fit(self,
           x=None,
@@ -115,7 +116,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
           noise_distribution='laplace',
           steps_per_epoch=None,
           **kwargs):  # pylint: disable=arguments-differ
-    """Reroutes to super fit with  Bolton delta-epsilon privacy requirements.
+    """Reroutes to super fit with  BoltOn delta-epsilon privacy requirements.
 
     Note, inputs must be normalized s.t. ||x|| < 1.
     Requirements are as follows:
@@ -126,9 +127,9 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
     See super implementation for more details.
 
     Args:
-      x:
-      y:
-      batch_size:
+      x: Inputs to fit on, see super.
+      y: Labels to fit on, see super.
+      batch_size: The batch size to use for training, see super.
       class_weight: the class weights to be used. Can be a scalar or 1D tensor
                     whose dim == n_classes.
       n_samples: the number of individual samples in x.
@@ -139,7 +140,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
       **kwargs: kwargs to keras Model.fit. See super.
 
     Returns:
-      output
+      Output from super fit method.
     """
     if class_weight is None:
       class_weight_ = self.calculate_class_weights(class_weight)
@@ -170,7 +171,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                         class_weight_,
                         data_size,
                         batch_size_) as _:
-      out = super(BoltonModel, self).fit(x=x,
+      out = super(BoltOnModel, self).fit(x=x,
                                          y=y,
                                          batch_size=batch_size,
                                          class_weight=class_weight,
@@ -192,18 +193,18 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
     is a generator. See super method and fit for more details.
 
     Args:
-      generator:
+      generator: Inputs generator following Tensorflow guidelines, see super.
       class_weight: the class weights to be used. Can be a scalar or 1D tensor
                     whose dim == n_classes.
       noise_distribution: the distribution to get noise from.
       epsilon: privacy parameter, which trades off utility and privacy. See
-                Bolton paper for more description.
+                BoltOn paper for more description.
       n_samples: number of individual samples in x
-      steps_per_epoch:
+      steps_per_epoch: Number of steps per training epoch, see super.
       **kwargs: **kwargs
 
     Returns:
-      output
+      Output from super fit_generator method.
     """
     if class_weight is None:
       class_weight = self.calculate_class_weights(class_weight)
@@ -224,7 +225,7 @@ class BoltonModel(Model):  # pylint: disable=abstract-method
                         class_weight,
                         data_size,
                         batch_size) as _:
-      out = super(BoltonModel, self).fit_generator(
+      out = super(BoltOnModel, self).fit_generator(
           generator,
           class_weight=class_weight,
           steps_per_epoch=steps_per_epoch,
diff --git a/privacy/bolton/models_test.py b/privacy/bolton/models_test.py
index f5365fe..b252312 100644
--- a/privacy/bolton/models_test.py
+++ b/privacy/bolton/models_test.py
@@ -26,11 +26,11 @@ from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
 from tensorflow.python.keras.regularizers import L1L2
 from privacy.bolton import models
 from privacy.bolton.losses import StrongConvexMixin
-from privacy.bolton.optimizers import Bolton
+from privacy.bolton.optimizers import BoltOn
 
 
 class TestLoss(losses.Loss, StrongConvexMixin):
-  """Test loss function for testing Bolton model."""
+  """Test loss function for testing BoltOn model."""
 
   def __init__(self, reg_lambda, c_arg, radius_constant, name='test'):
     super(TestLoss, self).__init__(name=name)
@@ -105,7 +105,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
 
 
 class TestOptimizer(OptimizerV2):
-  """Test optimizer used for testing Bolton model."""
+  """Test optimizer used for testing BoltOn model."""
 
   def __init__(self):
     super(TestOptimizer, self).__init__('test')
@@ -138,14 +138,14 @@ class InitTests(keras_parameterized.TestCase):
       },
   ])
   def test_init_params(self, n_outputs):
-    """Test initialization of BoltonModel.
+    """Test initialization of BoltOnModel.
 
     Args:
         n_outputs: number of output neurons
     """
     # test valid domains for each variable
-    clf = models.BoltonModel(n_outputs)
-    self.assertIsInstance(clf, models.BoltonModel)
+    clf = models.BoltOnModel(n_outputs)
+    self.assertIsInstance(clf, models.BoltOnModel)
 
   @parameterized.named_parameters([
       {'testcase_name': 'invalid n_outputs',
@@ -153,14 +153,14 @@ class InitTests(keras_parameterized.TestCase):
       },
   ])
   def test_bad_init_params(self, n_outputs):
-    """test bad initializations of BoltonModel that should raise errors.
+    """test bad initializations of BoltOnModel that should raise errors.
 
     Args:
         n_outputs: number of output neurons
     """
     # test invalid domains for each variable, especially noise
     with self.assertRaises(ValueError):
-      models.BoltonModel(n_outputs)
+      models.BoltOnModel(n_outputs)
 
   @parameterized.named_parameters([
       {'testcase_name': 'string compile',
@@ -175,7 +175,7 @@ class InitTests(keras_parameterized.TestCase):
       },
   ])
   def test_compile(self, n_outputs, loss, optimizer):
-    """Test compilation of BoltonModel.
+    """Test compilation of BoltOnModel.
 
     Args:
       n_outputs: number of output neurons
@@ -184,7 +184,7 @@ class InitTests(keras_parameterized.TestCase):
     """
     # test compilation of valid tf.optimizer and tf.loss
     with self.cached_session():
-      clf = models.BoltonModel(n_outputs)
+      clf = models.BoltOnModel(n_outputs)
       clf.compile(optimizer, loss)
       self.assertEqual(clf.loss, loss)
 
@@ -201,7 +201,7 @@ class InitTests(keras_parameterized.TestCase):
       }
   ])
   def test_bad_compile(self, n_outputs, loss, optimizer):
-    """test bad compilations of BoltonModel that should raise errors.
+    """test bad compilations of BoltOnModel that should raise errors.
 
     Args:
       n_outputs: number of output neurons
@@ -211,7 +211,7 @@ class InitTests(keras_parameterized.TestCase):
     # test compilaton of invalid tf.optimizer and non instantiated loss.
     with self.cached_session():
       with self.assertRaises((ValueError, AttributeError)):
-        clf = models.BoltonModel(n_outputs)
+        clf = models.BoltOnModel(n_outputs)
         clf.compile(optimizer, loss)
 
 
@@ -276,9 +276,9 @@ def _do_fit(n_samples,
     distribution: distribution to get noise from.
 
   Returns:
-    BoltonModel instsance
+    BoltOnModel instsance
   """
-  clf = models.BoltonModel(n_outputs)
+  clf = models.BoltOnModel(n_outputs)
   clf.compile(optimizer, loss)
   if generator:
     x = _cat_dataset(
@@ -328,14 +328,14 @@ class FitTests(keras_parameterized.TestCase):
       },
   ])
   def test_fit(self, generator, reset_n_samples):
-    """Tests fitting of BoltonModel.
+    """Tests fitting of BoltOnModel.
 
     Args:
       generator: True for generator test, False for iterator test.
       reset_n_samples: True to reset the n_samples to None, False does nothing
     """
     loss = TestLoss(1, 1, 1)
-    optimizer = Bolton(TestOptimizer(), loss)
+    optimizer = BoltOn(TestOptimizer(), loss)
     n_classes = 2
     input_dim = 5
     epsilon = 1
@@ -360,7 +360,7 @@ class FitTests(keras_parameterized.TestCase):
       },
   ])
   def test_fit_gen(self, generator):
-    """Tests the fit_generator method of BoltonModel.
+    """Tests the fit_generator method of BoltOnModel.
 
     Args:
       generator: True to test with a generator dataset
@@ -371,7 +371,7 @@ class FitTests(keras_parameterized.TestCase):
     input_dim = 5
     batch_size = 1
     n_samples = 10
-    clf = models.BoltonModel(n_classes)
+    clf = models.BoltOnModel(n_classes)
     clf.compile(optimizer, loss)
     x = _cat_dataset(
         n_samples,
@@ -456,7 +456,7 @@ class FitTests(keras_parameterized.TestCase):
       num_classes: number of outputs neurons
       result: expected result
     """
-    clf = models.BoltonModel(1, 1)
+    clf = models.BoltOnModel(1, 1)
     expected = clf.calculate_class_weights(class_weights,
                                            class_counts,
                                            num_classes)
@@ -523,7 +523,7 @@ class FitTests(keras_parameterized.TestCase):
       num_classes: number of outputs neurons.
       err_msg: The expected error message.
     """
-    clf = models.BoltonModel(1, 1)
+    clf = models.BoltOnModel(1, 1)
     with self.assertRaisesRegexp(ValueError, err_msg):  # pylint: disable=deprecated-method
       clf.calculate_class_weights(class_weights,
                                   class_counts,
diff --git a/privacy/bolton/optimizers.py b/privacy/bolton/optimizers.py
index a18c636..d647bbb 100644
--- a/privacy/bolton/optimizers.py
+++ b/privacy/bolton/optimizers.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Bolton Optimizer for bolton method."""
+"""BoltOn Optimizer for bolton method."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -45,15 +45,15 @@ class GammaBetaDecreasingStep(
 
     Returns:
       decayed learning rate to minimum of 1/beta and 1/(gamma * step) as per
-      the Bolton privacy requirements.
+      the BoltOn privacy requirements.
     """
     if not self.is_init:
       raise AttributeError('Please initialize the {0} Learning Rate Scheduler.'
                            'This is performed automatically by using the '
                            '{1} as a context manager, '
                            'as desired'.format(self.__class__.__name__,
-                                               Bolton.__class__.__name__
-                                              )
+                                               BoltOn.__class__.__name__
+                                               )
                           )
     dtype = self.beta.dtype
     one = tf.constant(1, dtype)
@@ -86,22 +86,22 @@ class GammaBetaDecreasingStep(
     self.gamma = None
 
 
-class Bolton(optimizer_v2.OptimizerV2):
-  """Wrap another tf optimizer with Bolton privacy protocol.
+class BoltOn(optimizer_v2.OptimizerV2):
+  """Wrap another tf optimizer with BoltOn privacy protocol.
 
-  Bolton optimizer wraps another tf optimizer to be used
+  BoltOn optimizer wraps another tf optimizer to be used
   as the visible optimizer to the tf model. No matter the optimizer
-  passed, "Bolton" enables the bolton model to control the learning rate
+  passed, "BoltOn" enables the bolton model to control the learning rate
   based on the strongly convex loss.
 
-  To use the Bolton method, you must:
+  To use the BoltOn method, you must:
   1. instantiate it with an instantiated tf optimizer and StrongConvexLoss.
   2. use it as a context manager around your .fit method internals.
 
   This can be accomplished by the following:
   optimizer = tf.optimizers.SGD()
   loss = privacy.bolton.losses.StrongConvexBinaryCrossentropy()
-  bolton = Bolton(optimizer, loss)
+  bolton = BoltOn(optimizer, loss)
   with bolton(*args) as _:
     model.fit()
   The args required for the context manager can be found in the __call__
@@ -142,7 +142,7 @@ class Bolton(optimizer_v2.OptimizerV2):
                                 '_is_init'
                                ]
     self._internal_optimizer = optimizer
-    self.learning_rate = GammaBetaDecreasingStep()  # use the Bolton Learning
+    self.learning_rate = GammaBetaDecreasingStep()  # use the BoltOn Learning
     # rate scheduler, as required for privacy guarantees. This will still need
     # to get values from the loss function near the time that .fit is called
     # on the model (when this optimizer will be called as a context manager)
@@ -162,7 +162,7 @@ class Bolton(optimizer_v2.OptimizerV2):
         False to check if weights > R-ball and only normalize then.
 
     Raises:
-      Exception:
+      Exception: If not called from inside this optimizer context.
     """
     if not self._is_init:
       raise Exception('This method must be called from within the optimizer\'s '
@@ -190,7 +190,7 @@ class Bolton(optimizer_v2.OptimizerV2):
       Noise in shape of layer's weights to be added to the weights.
 
     Raises:
-      Exception:
+      Exception: If not called from inside this optimizer's context.
     """
     if not self._is_init:
       raise Exception('This method must be called from within the optimizer\'s '
@@ -234,10 +234,10 @@ class Bolton(optimizer_v2.OptimizerV2):
     from the _internal_optimizer instance.
 
     Args:
-      name:
+      name: Name of attribute to get from this or aggregate optimizer.
 
     Returns:
-      attribute from Bolton if specified to come from self, else
+      attribute from BoltOn if specified to come from self, else
       from _internal_optimizer.
     """
     if name == '_private_attributes' or name in self._private_attributes:
@@ -336,7 +336,7 @@ class Bolton(optimizer_v2.OptimizerV2):
       batch_size: batch size used.
 
     Returns:
-      self
+      self, to be used by the __enter__ method for context.
     """
     if epsilon <= 0:
       raise ValueError('Detected epsilon: {0}. '
@@ -361,7 +361,7 @@ class Bolton(optimizer_v2.OptimizerV2):
 
     Used to:
     1.reset the model and fit parameters passed to the optimizer
-      to enable the Bolton Privacy guarantees. These are reset to ensure
+      to enable the BoltOn Privacy guarantees. These are reset to ensure
       that any future calls to fit with the same instance of the optimizer
       will properly error out.
 
@@ -369,7 +369,7 @@ class Bolton(optimizer_v2.OptimizerV2):
       adding noise to the weights.
 
     Args:
-      *args: *args
+      *args: encompasses the type, value, and traceback values which are unused.
     """
     self.project_weights_to_r(True)
     for layer in self.layers:
diff --git a/privacy/bolton/optimizers_test.py b/privacy/bolton/optimizers_test.py
index 4b08d66..abfffdd 100644
--- a/privacy/bolton/optimizers_test.py
+++ b/privacy/bolton/optimizers_test.py
@@ -33,7 +33,7 @@ from privacy.bolton.losses import StrongConvexMixin
 
 
 class TestModel(Model):  # pylint: disable=abstract-method
-  """Bolton episilon-delta model.
+  """BoltOn episilon-delta model.
 
   Uses 4 key steps to achieve privacy guarantees:
   1. Adds noise to weights after training (output perturbation).
@@ -66,7 +66,7 @@ class TestModel(Model):  # pylint: disable=abstract-method
 
 
 class TestLoss(losses.Loss, StrongConvexMixin):
-  """Test loss function for testing Bolton model."""
+  """Test loss function for testing BoltOn model."""
 
   def __init__(self, reg_lambda, c_arg, radius_constant, name='test'):
     super(TestLoss, self).__init__(name=name)
@@ -142,7 +142,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
 
 
 class TestOptimizer(OptimizerV2):
-  """Optimizer used for testing the Bolton optimizer."""
+  """Optimizer used for testing the BoltOn optimizer."""
 
   def __init__(self):
     super(TestOptimizer, self).__init__('test')
@@ -185,7 +185,7 @@ class TestOptimizer(OptimizerV2):
 
 
 class BoltonOptimizerTest(keras_parameterized.TestCase):
-  """Bolton Optimizer tests."""
+  """BoltOn Optimizer tests."""
   @test_util.run_all_in_graph_and_eager_modes
   @parameterized.named_parameters([
       {'testcase_name': 'getattr',
@@ -201,19 +201,19 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
   ])
 
   def test_fn(self, fn, args, result, test_attr):
-    """test that a fn of Bolton optimizer is working as expected.
+    """test that a fn of BoltOn optimizer is working as expected.
 
     Args:
       fn: method of Optimizer to test
       args: args to optimizer fn
       result: the expected result
       test_attr: None if the fn returns the test result. Otherwise, this is
-        the attribute of Bolton to check against result with.
+        the attribute of BoltOn to check against result with.
 
     """
     tf.random.set_seed(1)
     loss = TestLoss(1, 1, 1)
-    bolton = opt.Bolton(TestOptimizer(), loss)
+    bolton = opt.BoltOn(TestOptimizer(), loss)
     model = TestModel(1)
     model.layers[0].kernel = \
       model.layers[0].kernel_initializer((model.layer_input_shape[0],
@@ -260,7 +260,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
        'result': [[1]]},
   ])
   def test_project(self, r, shape, n_out, init_value, result):
-    """test that a fn of Bolton optimizer is working as expected.
+    """test that a fn of BoltOn optimizer is working as expected.
 
     Args:
       r: Radius value for StrongConvex loss function.
@@ -273,7 +273,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     @tf.function
     def project_fn(r):
       loss = TestLoss(1, 1, r)
-      bolton = opt.Bolton(TestOptimizer(), loss)
+      bolton = opt.BoltOn(TestOptimizer(), loss)
       model = TestModel(n_out, shape, init_value)
       model.compile(bolton, loss)
       model.layers[0].kernel = \
@@ -308,7 +308,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     @tf.function
     def test_run():
       loss = TestLoss(1, 1, 1)
-      bolton = opt.Bolton(TestOptimizer(), loss)
+      bolton = opt.BoltOn(TestOptimizer(), loss)
       model = TestModel(1, (1,), 1)
       model.compile(bolton, loss)
       model.layers[0].kernel = \
@@ -343,7 +343,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     @tf.function
     def test_run(noise, epsilon):
       loss = TestLoss(1, 1, 1)
-      bolton = opt.Bolton(TestOptimizer(), loss)
+      bolton = opt.BoltOn(TestOptimizer(), loss)
       model = TestModel(1, (1,), 1)
       model.compile(bolton, loss)
       model.layers[0].kernel = \
@@ -371,7 +371,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     @tf.function
     def test_run(fn, args):
       loss = TestLoss(1, 1, 1)
-      bolton = opt.Bolton(TestOptimizer(), loss)
+      bolton = opt.BoltOn(TestOptimizer(), loss)
       model = TestModel(1, (1,), 1)
       model.compile(bolton, loss)
       model.layers[0].kernel = \
@@ -415,7 +415,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     """Tests rerouted function.
 
     Tests that a method of the internal optimizer is correctly routed from
-    the Bolton instance to the internal optimizer instance (TestOptimizer,
+    the BoltOn instance to the internal optimizer instance (TestOptimizer,
     here).
 
     Args:
@@ -424,7 +424,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     """
     loss = TestLoss(1, 1, 1)
     optimizer = TestOptimizer()
-    bolton = opt.Bolton(optimizer, loss)
+    bolton = opt.BoltOn(optimizer, loss)
     model = TestModel(3)
     model.compile(optimizer, loss)
     model.layers[0].kernel = \
@@ -465,7 +465,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     @tf.function
     def test_run(fn, args):
       loss = TestLoss(1, 1, 1)
-      bolton = opt.Bolton(TestOptimizer(), loss)
+      bolton = opt.BoltOn(TestOptimizer(), loss)
       model = TestModel(1, (1,), 1)
       model.compile(bolton, loss)
       model.layers[0].kernel = \
@@ -502,7 +502,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     """
     loss = TestLoss(1, 1, 1)
     internal_optimizer = TestOptimizer()
-    optimizer = opt.Bolton(internal_optimizer, loss)
+    optimizer = opt.BoltOn(internal_optimizer, loss)
     self.assertEqual(getattr(optimizer, attr),
                      getattr(internal_optimizer, attr))
 
@@ -521,7 +521,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
     """
     loss = TestLoss(1, 1, 1)
     internal_optimizer = TestOptimizer()
-    optimizer = opt.Bolton(internal_optimizer, loss)
+    optimizer = opt.BoltOn(internal_optimizer, loss)
     with self.assertRaises(AttributeError):
       getattr(optimizer, attr)
 
diff --git a/tutorials/bolton_tutorial.py b/tutorials/bolton_tutorial.py
index ae9707e..c56f9bf 100644
--- a/tutorials/bolton_tutorial.py
+++ b/tutorials/bolton_tutorial.py
@@ -18,7 +18,7 @@ from __future__ import print_function
 import tensorflow as tf  # pylint: disable=wrong-import-position
 from privacy.bolton import losses  # pylint: disable=wrong-import-position
 from privacy.bolton import models  # pylint: disable=wrong-import-position
-from privacy.bolton.optimizers import Bolton  # pylint: disable=wrong-import-position
+from privacy.bolton.optimizers import BoltOn  # pylint: disable=wrong-import-position
 # -------
 # First, we will create a binary classification dataset with a single output
 # dimension. The samples for each label are repeated data points at different
@@ -39,12 +39,12 @@ generator = tf.data.Dataset.from_tensor_slices((x, y))
 generator = generator.batch(10)
 generator = generator.shuffle(10)
 # -------
-# First, we will explore using the pre - built BoltonModel, which is a thin
+# First, we will explore using the pre - built BoltOnModel, which is a thin
 # wrapper around a Keras Model using a single - layer neural network.
-# It automatically uses the Bolton Optimizer which encompasses all the logic
-# required for the Bolton Differential Privacy method.
+# It automatically uses the BoltOn Optimizer which encompasses all the logic
+# required for the BoltOn Differential Privacy method.
 # -------
-bolt = models.BoltonModel(n_outputs)  # tell the model how many outputs we have.
+bolt = models.BoltOnModel(n_outputs)  # tell the model how many outputs we have.
 # -------
 # Now, we will pick our optimizer and Strongly Convex Loss function. The loss
 # must extend from StrongConvexMixin and implement the associated methods.Some
@@ -60,7 +60,7 @@ loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
 # to be 1; these are all tunable and their impact can be read in losses.
 # StrongConvexBinaryCrossentropy.We then compile the model with the chosen
 # optimizer and loss, which will automatically wrap the chosen optimizer with
-# the Bolton Optimizer, ensuring the required components function as required
+# the BoltOn Optimizer, ensuring the required components function as required
 # for privacy guarantees.
 # -------
 bolt.compile(optimizer, loss)
@@ -77,7 +77,7 @@ bolt.compile(optimizer, loss)
 # 2. noise_distribution, a valid string indicating the distriution to use (must
 # be implemented)
 #
-# The BoltonModel offers a helper method,.calculate_class_weight to aid in
+# The BoltOnModel offers a helper method,.calculate_class_weight to aid in
 # class_weight calculation.
 # required parameters
 # -------
@@ -132,7 +132,7 @@ bolt.fit(generator,
          noise_distribution=noise_distribution,
          verbose=0)
 # -------
-# You don't have to use the bolton model to use the Bolton method.
+# You don't have to use the bolton model to use the BoltOn method.
 # There are only a few requirements:
 # 1. make sure any requirements from the loss are implemented in the model.
 # 2. instantiate the optimizer and use it as a context around the fit operation.
@@ -140,7 +140,7 @@ bolt.fit(generator,
 # -------------------- Part 2, using the Optimizer
 
 # -------
-# Here, we create our own model and setup the Bolton optimizer.
+# Here, we create our own model and setup the BoltOn optimizer.
 # -------
 
 
@@ -157,7 +157,7 @@ class TestModel(tf.keras.Model):  # pylint: disable=abstract-method
 
 optimizer = tf.optimizers.SGD()
 loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
-optimizer = Bolton(optimizer, loss)
+optimizer = BoltOn(optimizer, loss)
 # -------
 # Now, we instantiate our model and check for 1. Since our loss requires L2
 # regularization over the kernel, we will pass it to the model.
@@ -166,7 +166,7 @@ n_outputs = 1  # parameter for model and optimizer context.
 test_model = TestModel(loss.kernel_regularizer(), n_outputs)
 test_model.compile(optimizer, loss)
 # -------
-# We comply with 2., and use the Bolton Optimizer as a context around the fit
+# We comply with 2., and use the BoltOn Optimizer as a context around the fit
 # method.
 # -------
 # parameters for context

From cd0ff6d299caa594202dae8194dc5011eee9d9b0 Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Tue, 30 Jul 2019 21:53:53 +0000
Subject: [PATCH 35/39] indent

---
 privacy/bolton/optimizers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/privacy/bolton/optimizers.py b/privacy/bolton/optimizers.py
index d647bbb..f5aa23e 100644
--- a/privacy/bolton/optimizers.py
+++ b/privacy/bolton/optimizers.py
@@ -53,7 +53,7 @@ class GammaBetaDecreasingStep(
                            '{1} as a context manager, '
                            'as desired'.format(self.__class__.__name__,
                                                BoltOn.__class__.__name__
-                                               )
+                                              )
                           )
     dtype = self.beta.dtype
     one = tf.constant(1, dtype)

From 223f2cc640c3a64cd06d93eb15f467ee01d3fcf3 Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Tue, 30 Jul 2019 22:00:16 +0000
Subject: [PATCH 36/39] trailing spaces

---
 privacy/bolton/README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/privacy/bolton/README.md b/privacy/bolton/README.md
index 54eb91a..77d79d1 100644
--- a/privacy/bolton/README.md
+++ b/privacy/bolton/README.md
@@ -1,15 +1,15 @@
 # BoltOn Subpackage
 
-This package contains source code for the BoltOn method, a particular 
-differential-privacy (DP) technique that uses output perturbations and leverages 
-additional assumptions to provide a new way of approaching the privacy 
-guarantees.
+This package contains source code for the BoltOn method, a particular
+differential-privacy (DP) technique that uses output perturbations and
+leverages additional assumptions to provide a new way of approaching the
+privacy guarantees.
 
 ## BoltOn Description
 
 This method uses 4 key steps to achieve privacy guarantees:
   1. Adds noise to weights after training (output perturbation).
-  2. Projects weights to R, the radius of the hypothesis space, 
+  2. Projects weights to R, the radius of the hypothesis space,
       after each batch. This value is configurable by the user.
   3. Limits learning rate
   4. Use a strongly convex loss function (see compile)

From c0bd19365bc143f0703e0fb270730fe546988f23 Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Wed, 31 Jul 2019 10:52:41 -0400
Subject: [PATCH 37/39] Refactoring bolton package to bolt_on only in code
 usages.

---
 privacy/__init__.py                            | 10 +++++-----
 privacy/{bolton => bolt_on}/README.md          |  0
 privacy/{bolton => bolt_on}/__init__.py        |  8 ++++----
 privacy/{bolton => bolt_on}/losses.py          |  0
 privacy/{bolton => bolt_on}/losses_test.py     |  6 +++---
 privacy/{bolton => bolt_on}/models.py          |  4 ++--
 privacy/{bolton => bolt_on}/models_test.py     |  6 +++---
 privacy/{bolton => bolt_on}/optimizers.py      |  2 +-
 privacy/{bolton => bolt_on}/optimizers_test.py |  4 ++--
 tutorials/bolton_tutorial.py                   |  6 +++---
 10 files changed, 23 insertions(+), 23 deletions(-)
 rename privacy/{bolton => bolt_on}/README.md (100%)
 rename privacy/{bolton => bolt_on}/__init__.py (75%)
 rename privacy/{bolton => bolt_on}/losses.py (100%)
 rename privacy/{bolton => bolt_on}/losses_test.py (98%)
 rename privacy/{bolton => bolt_on}/models.py (99%)
 rename privacy/{bolton => bolt_on}/models_test.py (99%)
 rename privacy/{bolton => bolt_on}/optimizers.py (99%)
 rename privacy/{bolton => bolt_on}/optimizers_test.py (99%)

diff --git a/privacy/__init__.py b/privacy/__init__.py
index 94add1e..aab6e94 100644
--- a/privacy/__init__.py
+++ b/privacy/__init__.py
@@ -42,8 +42,8 @@ else:
   from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
   from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
 
-  from privacy.bolton.models import BoltOnModel
-  from privacy.bolton.optimizers import BoltOn
-  from privacy.bolton.losses import StrongConvexMixin
-  from privacy.bolton.losses import StrongConvexBinaryCrossentropy
-  from privacy.bolton.losses import StrongConvexHuber
+  from privacy.bolt_on.models import BoltOnModel
+  from privacy.bolt_on.optimizers import BoltOn
+  from privacy.bolt_on.losses import StrongConvexMixin
+  from privacy.bolt_on.losses import StrongConvexBinaryCrossentropy
+  from privacy.bolt_on.losses import StrongConvexHuber
diff --git a/privacy/bolton/README.md b/privacy/bolt_on/README.md
similarity index 100%
rename from privacy/bolton/README.md
rename to privacy/bolt_on/README.md
diff --git a/privacy/bolton/__init__.py b/privacy/bolt_on/__init__.py
similarity index 75%
rename from privacy/bolton/__init__.py
rename to privacy/bolt_on/__init__.py
index bc7a027..52b1b29 100644
--- a/privacy/bolton/__init__.py
+++ b/privacy/bolt_on/__init__.py
@@ -23,7 +23,7 @@ if LooseVersion(tf.__version__) < LooseVersion("2.0.0"):
 if hasattr(sys, "skip_tf_privacy_import"):  # Useful for standalone scripts.
   pass
 else:
-  from privacy.bolton.models import BoltOnModel  # pylint: disable=g-import-not-at-top
-  from privacy.bolton.optimizers import BoltOn  # pylint: disable=g-import-not-at-top
-  from privacy.bolton.losses import StrongConvexHuber  # pylint: disable=g-import-not-at-top
-  from privacy.bolton.losses import StrongConvexBinaryCrossentropy  # pylint: disable=g-import-not-at-top
+  from privacy.bolt_on.models import BoltOnModel  # pylint: disable=g-import-not-at-top
+  from privacy.bolt_on.optimizers import BoltOn  # pylint: disable=g-import-not-at-top
+  from privacy.bolt_on.losses import StrongConvexHuber  # pylint: disable=g-import-not-at-top
+  from privacy.bolt_on.losses import StrongConvexBinaryCrossentropy  # pylint: disable=g-import-not-at-top
diff --git a/privacy/bolton/losses.py b/privacy/bolt_on/losses.py
similarity index 100%
rename from privacy/bolton/losses.py
rename to privacy/bolt_on/losses.py
diff --git a/privacy/bolton/losses_test.py b/privacy/bolt_on/losses_test.py
similarity index 98%
rename from privacy/bolton/losses_test.py
rename to privacy/bolt_on/losses_test.py
index ff8137c..3d88190 100644
--- a/privacy/bolton/losses_test.py
+++ b/privacy/bolt_on/losses_test.py
@@ -25,9 +25,9 @@ import tensorflow as tf
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras.regularizers import L1L2
-from privacy.bolton.losses import StrongConvexBinaryCrossentropy
-from privacy.bolton.losses import StrongConvexHuber
-from privacy.bolton.losses import StrongConvexMixin
+from privacy.bolt_on.losses import StrongConvexBinaryCrossentropy
+from privacy.bolt_on.losses import StrongConvexHuber
+from privacy.bolt_on.losses import StrongConvexMixin
 
 
 @contextmanager
diff --git a/privacy/bolton/models.py b/privacy/bolt_on/models.py
similarity index 99%
rename from privacy/bolton/models.py
rename to privacy/bolt_on/models.py
index ad0f59c..98f2167 100644
--- a/privacy/bolton/models.py
+++ b/privacy/bolt_on/models.py
@@ -20,8 +20,8 @@ import tensorflow as tf
 from tensorflow.python.framework import ops as _ops
 from tensorflow.python.keras import optimizers
 from tensorflow.python.keras.models import Model
-from privacy.bolton.losses import StrongConvexMixin
-from privacy.bolton.optimizers import BoltOn
+from privacy.bolt_on.losses import StrongConvexMixin
+from privacy.bolt_on.optimizers import BoltOn
 
 
 class BoltOnModel(Model):  # pylint: disable=abstract-method
diff --git a/privacy/bolton/models_test.py b/privacy/bolt_on/models_test.py
similarity index 99%
rename from privacy/bolton/models_test.py
rename to privacy/bolt_on/models_test.py
index b252312..522f686 100644
--- a/privacy/bolton/models_test.py
+++ b/privacy/bolt_on/models_test.py
@@ -24,9 +24,9 @@ from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import losses
 from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
 from tensorflow.python.keras.regularizers import L1L2
-from privacy.bolton import models
-from privacy.bolton.losses import StrongConvexMixin
-from privacy.bolton.optimizers import BoltOn
+from privacy.bolt_on import models
+from privacy.bolt_on.losses import StrongConvexMixin
+from privacy.bolt_on.optimizers import BoltOn
 
 
 class TestLoss(losses.Loss, StrongConvexMixin):
diff --git a/privacy/bolton/optimizers.py b/privacy/bolt_on/optimizers.py
similarity index 99%
rename from privacy/bolton/optimizers.py
rename to privacy/bolt_on/optimizers.py
index f5aa23e..97d1aba 100644
--- a/privacy/bolton/optimizers.py
+++ b/privacy/bolt_on/optimizers.py
@@ -20,7 +20,7 @@ from __future__ import print_function
 import tensorflow as tf
 from tensorflow.python.keras.optimizer_v2 import optimizer_v2
 from tensorflow.python.ops import math_ops
-from privacy.bolton.losses import StrongConvexMixin
+from privacy.bolt_on.losses import StrongConvexMixin
 
 _accepted_distributions = ['laplace']  # implemented distributions for noising
 
diff --git a/privacy/bolton/optimizers_test.py b/privacy/bolt_on/optimizers_test.py
similarity index 99%
rename from privacy/bolton/optimizers_test.py
rename to privacy/bolt_on/optimizers_test.py
index abfffdd..731d97d 100644
--- a/privacy/bolton/optimizers_test.py
+++ b/privacy/bolt_on/optimizers_test.py
@@ -28,8 +28,8 @@ from tensorflow.python.keras.models import Model
 from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
 from tensorflow.python.keras.regularizers import L1L2
 from tensorflow.python.platform import test
-from privacy.bolton import optimizers as opt
-from privacy.bolton.losses import StrongConvexMixin
+from privacy.bolt_on import optimizers as opt
+from privacy.bolt_on.losses import StrongConvexMixin
 
 
 class TestModel(Model):  # pylint: disable=abstract-method
diff --git a/tutorials/bolton_tutorial.py b/tutorials/bolton_tutorial.py
index c56f9bf..5a3b748 100644
--- a/tutorials/bolton_tutorial.py
+++ b/tutorials/bolton_tutorial.py
@@ -16,9 +16,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import tensorflow as tf  # pylint: disable=wrong-import-position
-from privacy.bolton import losses  # pylint: disable=wrong-import-position
-from privacy.bolton import models  # pylint: disable=wrong-import-position
-from privacy.bolton.optimizers import BoltOn  # pylint: disable=wrong-import-position
+from privacy.bolt_on import losses  # pylint: disable=wrong-import-position
+from privacy.bolt_on import models  # pylint: disable=wrong-import-position
+from privacy.bolt_on.optimizers import BoltOn  # pylint: disable=wrong-import-position
 # -------
 # First, we will create a binary classification dataset with a single output
 # dimension. The samples for each label are repeated data points at different

From 4bd0ad482a2a5ae1cfd68bc38ae5d7c2276857be Mon Sep 17 00:00:00 2001
From: Christopher Choquette Choo <choquette.christopher@gmail.com>
Date: Wed, 31 Jul 2019 10:55:25 -0400
Subject: [PATCH 38/39] Refactoring bolton to bolt_on from package refactor for
 comments + strings.

---
 privacy/bolt_on/__init__.py   | 2 +-
 privacy/bolt_on/losses.py     | 2 +-
 privacy/bolt_on/models.py     | 4 ++--
 privacy/bolt_on/optimizers.py | 6 +++---
 tutorials/bolton_tutorial.py  | 6 +++---
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/privacy/bolt_on/__init__.py b/privacy/bolt_on/__init__.py
index 52b1b29..075edf9 100644
--- a/privacy/bolt_on/__init__.py
+++ b/privacy/bolt_on/__init__.py
@@ -19,7 +19,7 @@ import tensorflow as tf
 if LooseVersion(tf.__version__) < LooseVersion("2.0.0"):
   raise ImportError("Please upgrade your version "
                     "of tensorflow from: {0} to at least 2.0.0 to "
-                    "use privacy/bolton".format(LooseVersion(tf.__version__)))
+                    "use privacy/bolt_on".format(LooseVersion(tf.__version__)))
 if hasattr(sys, "skip_tf_privacy_import"):  # Useful for standalone scripts.
   pass
 else:
diff --git a/privacy/bolt_on/losses.py b/privacy/bolt_on/losses.py
index c742326..81bd0c3 100644
--- a/privacy/bolt_on/losses.py
+++ b/privacy/bolt_on/losses.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Loss functions for bolton method."""
+"""Loss functions for BoltOn method."""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/privacy/bolt_on/models.py b/privacy/bolt_on/models.py
index 98f2167..7cdcccd 100644
--- a/privacy/bolt_on/models.py
+++ b/privacy/bolt_on/models.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""BoltOn model for bolton method of differentially private ML."""
+"""BoltOn model for Bolt-on method of differentially private ML."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -134,7 +134,7 @@ class BoltOnModel(Model):  # pylint: disable=abstract-method
                     whose dim == n_classes.
       n_samples: the number of individual samples in x.
       epsilon: privacy parameter, which trades off between utility an privacy.
-                See the bolton paper for more description.
+                See the bolt-on paper for more description.
       noise_distribution: the distribution to pull noise from.
       steps_per_epoch:
       **kwargs: kwargs to keras Model.fit. See super.
diff --git a/privacy/bolt_on/optimizers.py b/privacy/bolt_on/optimizers.py
index 97d1aba..3536450 100644
--- a/privacy/bolt_on/optimizers.py
+++ b/privacy/bolt_on/optimizers.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""BoltOn Optimizer for bolton method."""
+"""BoltOn Optimizer for Bolt-on method."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -91,7 +91,7 @@ class BoltOn(optimizer_v2.OptimizerV2):
 
   BoltOn optimizer wraps another tf optimizer to be used
   as the visible optimizer to the tf model. No matter the optimizer
-  passed, "BoltOn" enables the bolton model to control the learning rate
+  passed, "BoltOn" enables the bolt-on model to control the learning rate
   based on the strongly convex loss.
 
   To use the BoltOn method, you must:
@@ -100,7 +100,7 @@ class BoltOn(optimizer_v2.OptimizerV2):
 
   This can be accomplished by the following:
   optimizer = tf.optimizers.SGD()
-  loss = privacy.bolton.losses.StrongConvexBinaryCrossentropy()
+  loss = privacy.bolt_on.losses.StrongConvexBinaryCrossentropy()
   bolton = BoltOn(optimizer, loss)
   with bolton(*args) as _:
     model.fit()
diff --git a/tutorials/bolton_tutorial.py b/tutorials/bolton_tutorial.py
index 5a3b748..fdfe338 100644
--- a/tutorials/bolton_tutorial.py
+++ b/tutorials/bolton_tutorial.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Tutorial for bolton module, the model and the optimizer."""
+"""Tutorial for bolt_on module, the model and the optimizer."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -48,7 +48,7 @@ bolt = models.BoltOnModel(n_outputs)  # tell the model how many outputs we have.
 # -------
 # Now, we will pick our optimizer and Strongly Convex Loss function. The loss
 # must extend from StrongConvexMixin and implement the associated methods.Some
-# existing loss functions are pre - implemented in bolton.loss
+# existing loss functions are pre - implemented in bolt_on.loss
 # -------
 optimizer = tf.optimizers.SGD()
 reg_lambda = 1
@@ -132,7 +132,7 @@ bolt.fit(generator,
          noise_distribution=noise_distribution,
          verbose=0)
 # -------
-# You don't have to use the bolton model to use the BoltOn method.
+# You don't have to use the BoltOn model to use the BoltOn method.
 # There are only a few requirements:
 # 1. make sure any requirements from the loss are implemented in the model.
 # 2. instantiate the optimizer and use it as a context around the fit operation.

From 12dc0b9497899f11cf9fe466d6c468ce46147d5d Mon Sep 17 00:00:00 2001
From: npapernot <papernot@google.com>
Date: Wed, 31 Jul 2019 20:40:30 +0000
Subject: [PATCH 39/39] last typo

---
 privacy/bolt_on/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/privacy/bolt_on/README.md b/privacy/bolt_on/README.md
index 77d79d1..3d55977 100644
--- a/privacy/bolt_on/README.md
+++ b/privacy/bolt_on/README.md
@@ -12,7 +12,7 @@ This method uses 4 key steps to achieve privacy guarantees:
   2. Projects weights to R, the radius of the hypothesis space,
       after each batch. This value is configurable by the user.
   3. Limits learning rate
-  4. Use a strongly convex loss function (see compile)
+  4. Uses a strongly convex loss function (see compile)
 
 For more details on the strong convexity requirements, see:
 Bolt-on Differential Privacy for Scalable Stochastic Gradient