Bolton implementation and unit tests. Has two pre-implemented loss functions.

This commit is contained in:
Christopher Choquette Choo 2019-06-19 10:46:30 -04:00
parent ed7fa73a99
commit f41be2c598
8 changed files with 49 additions and 308 deletions

View file

@ -41,3 +41,9 @@ else:
from privacy.optimizers.dp_optimizer import DPAdamOptimizer
from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
from privacy.bolton.models import BoltonModel
from privacy.bolton.optimizers import Bolton
from privacy.bolton.losses import StrongConvexMixin
from privacy.bolton.losses import StrongConvexBinaryCrossentropy
from privacy.bolton.losses import StrongConvexHuber

View file

@ -9,6 +9,7 @@ if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts.
pass
else:
from privacy.bolton.model import Bolton
from privacy.bolton.loss import StrongConvexHuber
from privacy.bolton.loss import StrongConvexBinaryCrossentropy
from privacy.bolton.models import BoltonModel
from privacy.bolton.optimizers import Bolton
from privacy.bolton.losses import StrongConvexHuber
from privacy.bolton.losses import StrongConvexBinaryCrossentropy

View file

@ -21,6 +21,7 @@ from tensorflow.python.keras import losses
from tensorflow.python.keras.utils import losses_utils
from tensorflow.python.framework import ops as _ops
from tensorflow.python.keras.regularizers import L1L2
from tensorflow.python.platform import tf_logging as logging
class StrongConvexMixin:
@ -147,7 +148,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
self.dtype = dtype
self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
super(StrongConvexHuber, self).__init__(
name='huber',
name='strongconvexhuber',
reduction=reduction,
)
@ -245,6 +246,11 @@ class StrongConvexBinaryCrossentropy(
name: Name of the loss instance
dtype: tf datatype to use for tensor conversions.
"""
if label_smoothing != 0:
logging.warning('The impact of label smoothing on privacy is unknown. '
'Use label smoothing at your own risk as it may not '
'guarantee privacy.')
if reg_lambda <= 0:
raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
if C <= 0:
@ -258,7 +264,7 @@ class StrongConvexBinaryCrossentropy(
self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
super(StrongConvexBinaryCrossentropy, self).__init__(
reduction=reduction,
name='binarycrossentropy',
name='strongconvexbinarycrossentropy',
from_logits=from_logits,
label_smoothing=label_smoothing,
)
@ -313,280 +319,3 @@ class StrongConvexBinaryCrossentropy(
return L1L2(l2=self.reg_lambda/2)
# class StrongConvexSparseCategoricalCrossentropy(
# losses.CategoricalCrossentropy,
# StrongConvexMixin
# ):
# """
# Strong Convex version of CategoricalCrossentropy loss using l2 weight
# regularization.
# """
#
# def __init__(self,
# reg_lambda: float,
# C: float,
# radius_constant: float,
# from_logits: bool = True,
# label_smoothing: float = 0,
# reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
# name: str = 'binarycrossentropy',
# dtype=tf.float32):
# """
# Args:
# reg_lambda: Weight regularization constant
# C: Penalty parameter C of the loss term
# radius_constant: constant defining the length of the radius
# reduction: reduction type to use. See super class
# label_smoothing: amount of smoothing to perform on labels
# relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
# name: Name of the loss instance
# dtype: tf datatype to use for tensor conversions.
# """
# if reg_lambda <= 0:
# raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
# if C <= 0:
# raise ValueError('c: {0}, should be >= 0'.format(C))
# if radius_constant <= 0:
# raise ValueError('radius_constant: {0}, should be >= 0'.format(
# radius_constant
# ))
#
# self.C = C
# self.dtype = dtype
# self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
# super(StrongConvexSparseCategoricalCrossentropy, self).__init__(
# reduction=reduction,
# name=name,
# from_logits=from_logits,
# label_smoothing=label_smoothing,
# )
# self.radius_constant = radius_constant
#
# def call(self, y_true, y_pred):
# """Compute loss
#
# Args:
# y_true: Ground truth values.
# y_pred: The predicted values.
#
# Returns:
# Loss values per sample.
# """
# loss = super()
# loss = loss * self.C
# return loss
#
# def radius(self):
# """See super class.
# """
# return self.radius_constant / self.reg_lambda
#
# def gamma(self):
# """See super class.
# """
# return self.reg_lambda
#
# def beta(self, class_weight):
# """See super class.
# """
# max_class_weight = self.max_class_weight(class_weight, self.dtype)
# return self.C * max_class_weight + self.reg_lambda
#
# def lipchitz_constant(self, class_weight):
# """See super class.
# """
# max_class_weight = self.max_class_weight(class_weight, self.dtype)
# return self.C * max_class_weight + self.reg_lambda * self.radius()
#
# def kernel_regularizer(self):
# """
# l2 loss using reg_lambda as the l2 term (as desired). Required for
# this loss function to be strongly convex.
# :return:
# """
# return L1L2(l2=self.reg_lambda)
#
# class StrongConvexSparseCategoricalCrossentropy(
# losses.SparseCategoricalCrossentropy,
# StrongConvexMixin
# ):
# """
# Strong Convex version of SparseCategoricalCrossentropy loss using l2 weight
# regularization.
# """
#
# def __init__(self,
# reg_lambda: float,
# C: float,
# radius_constant: float,
# from_logits: bool = True,
# label_smoothing: float = 0,
# reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
# name: str = 'binarycrossentropy',
# dtype=tf.float32):
# """
# Args:
# reg_lambda: Weight regularization constant
# C: Penalty parameter C of the loss term
# radius_constant: constant defining the length of the radius
# reduction: reduction type to use. See super class
# label_smoothing: amount of smoothing to perform on labels
# relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
# name: Name of the loss instance
# dtype: tf datatype to use for tensor conversions.
# """
# if reg_lambda <= 0:
# raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
# if C <= 0:
# raise ValueError('c: {0}, should be >= 0'.format(C))
# if radius_constant <= 0:
# raise ValueError('radius_constant: {0}, should be >= 0'.format(
# radius_constant
# ))
#
# self.C = C
# self.dtype = dtype
# self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
# super(StrongConvexHuber, self).__init__(reduction=reduction,
# name=name,
# from_logits=from_logits,
# label_smoothing=label_smoothing,
# )
# self.radius_constant = radius_constant
#
# def call(self, y_true, y_pred):
# """Compute loss
#
# Args:
# y_true: Ground truth values.
# y_pred: The predicted values.
#
# Returns:
# Loss values per sample.
# """
# loss = super()
# loss = loss * self.C
# return loss
#
# def radius(self):
# """See super class.
# """
# return self.radius_constant / self.reg_lambda
#
# def gamma(self):
# """See super class.
# """
# return self.reg_lambda
#
# def beta(self, class_weight):
# """See super class.
# """
# max_class_weight = self.max_class_weight(class_weight, self.dtype)
# return self.C * max_class_weight + self.reg_lambda
#
# def lipchitz_constant(self, class_weight):
# """See super class.
# """
# max_class_weight = self.max_class_weight(class_weight, self.dtype)
# return self.C * max_class_weight + self.reg_lambda * self.radius()
#
# def kernel_regularizer(self):
# """
# l2 loss using reg_lambda as the l2 term (as desired). Required for
# this loss function to be strongly convex.
# :return:
# """
# return L1L2(l2=self.reg_lambda)
#
#
# class StrongConvexCategoricalCrossentropy(
# losses.CategoricalCrossentropy,
# StrongConvexMixin
# ):
# """
# Strong Convex version of CategoricalCrossentropy loss using l2 weight
# regularization.
# """
#
# def __init__(self,
# reg_lambda: float,
# C: float,
# radius_constant: float,
# from_logits: bool = True,
# label_smoothing: float = 0,
# reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
# name: str = 'binarycrossentropy',
# dtype=tf.float32):
# """
# Args:
# reg_lambda: Weight regularization constant
# C: Penalty parameter C of the loss term
# radius_constant: constant defining the length of the radius
# reduction: reduction type to use. See super class
# label_smoothing: amount of smoothing to perform on labels
# relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
# name: Name of the loss instance
# dtype: tf datatype to use for tensor conversions.
# """
# if reg_lambda <= 0:
# raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
# if C <= 0:
# raise ValueError('c: {0}, should be >= 0'.format(C))
# if radius_constant <= 0:
# raise ValueError('radius_constant: {0}, should be >= 0'.format(
# radius_constant
# ))
#
# self.C = C
# self.dtype = dtype
# self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
# super(StrongConvexHuber, self).__init__(reduction=reduction,
# name=name,
# from_logits=from_logits,
# label_smoothing=label_smoothing,
# )
# self.radius_constant = radius_constant
#
# def call(self, y_true, y_pred):
# """Compute loss
#
# Args:
# y_true: Ground truth values.
# y_pred: The predicted values.
#
# Returns:
# Loss values per sample.
# """
# loss = super()
# loss = loss * self.C
# return loss
#
# def radius(self):
# """See super class.
# """
# return self.radius_constant / self.reg_lambda
#
# def gamma(self):
# """See super class.
# """
# return self.reg_lambda
#
# def beta(self, class_weight):
# """See super class.
# """
# max_class_weight = self.max_class_weight(class_weight, self.dtype)
# return self.C * max_class_weight + self.reg_lambda
#
# def lipchitz_constant(self, class_weight):
# """See super class.
# """
# max_class_weight = self.max_class_weight(class_weight, self.dtype)
# return self.C * max_class_weight + self.reg_lambda * self.radius()
#
# def kernel_regularizer(self):
# """
# l2 loss using reg_lambda as the l2 term (as desired). Required for
# this loss function to be strongly convex.
# :return:
# """
# return L1L2(l2=self.reg_lambda)

View file

@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unit testing for loss.py"""
"""Unit testing for losses.py"""
from __future__ import absolute_import
from __future__ import division
@ -22,9 +22,9 @@ from tensorflow.python.keras import keras_parameterized
from tensorflow.python.framework import test_util
from tensorflow.python.keras.regularizers import L1L2
from absl.testing import parameterized
from privacy.bolton.loss import StrongConvexBinaryCrossentropy
from privacy.bolton.loss import StrongConvexHuber
from privacy.bolton.loss import StrongConvexMixin
from privacy.bolton.losses import StrongConvexBinaryCrossentropy
from privacy.bolton.losses import StrongConvexHuber
from privacy.bolton.losses import StrongConvexMixin
class StrongConvexMixinTests(keras_parameterized.TestCase):
@ -355,7 +355,7 @@ class HuberTests(keras_parameterized.TestCase):
'fn': 'kernel_regularizer',
'init_args': [1, 1, 1, 1],
'args': [],
'result': L1L2(l2=1),
'result': L1L2(l2=0.5),
},
])
def test_fns(self, init_args, fn, args, result):

View file

@ -20,8 +20,8 @@ import tensorflow as tf
from tensorflow.python.keras.models import Model
from tensorflow.python.keras import optimizers
from tensorflow.python.framework import ops as _ops
from privacy.bolton.loss import StrongConvexMixin
from privacy.bolton.optimizer import Bolton
from privacy.bolton.losses import StrongConvexMixin
from privacy.bolton.optimizers import Bolton
class BoltonModel(Model):
@ -142,7 +142,9 @@ class BoltonModel(Model):
"""
if class_weight is None:
class_weight = self.calculate_class_weights(class_weight)
class_weight_ = self.calculate_class_weights(class_weight)
else:
class_weight_ = class_weight
if n_samples is not None:
data_size = n_samples
elif hasattr(x, 'shape'):
@ -160,10 +162,13 @@ class BoltonModel(Model):
if batch_size_ is None:
raise ValueError('batch_size: {0} is an '
'invalid value'.format(batch_size_))
if data_size is None:
raise ValueError('Could not infer the number of samples. Please pass '
'this in using n_samples.')
with self.optimizer(noise_distribution,
epsilon,
self.layers,
class_weight,
class_weight_,
data_size,
self.n_outputs,
batch_size_,

View file

@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unit testing for model.py"""
"""Unit testing for models.py"""
from __future__ import absolute_import
from __future__ import division
@ -25,9 +25,9 @@ from tensorflow.python.keras import losses
from tensorflow.python.framework import ops as _ops
from tensorflow.python.keras.regularizers import L1L2
from absl.testing import parameterized
from privacy.bolton import model
from privacy.bolton.optimizer import Bolton
from privacy.bolton.loss import StrongConvexMixin
from privacy.bolton import models
from privacy.bolton.optimizers import Bolton
from privacy.bolton.losses import StrongConvexMixin
class TestLoss(losses.Loss, StrongConvexMixin):
"""Test loss function for testing Bolton model"""
@ -130,8 +130,8 @@ class InitTests(keras_parameterized.TestCase):
n_outputs: number of output neurons
"""
# test valid domains for each variable
clf = model.BoltonModel(n_outputs)
self.assertIsInstance(clf, model.BoltonModel)
clf = models.BoltonModel(n_outputs)
self.assertIsInstance(clf, models.BoltonModel)
@parameterized.named_parameters([
{'testcase_name': 'invalid n_outputs',
@ -146,7 +146,7 @@ class InitTests(keras_parameterized.TestCase):
"""
# test invalid domains for each variable, especially noise
with self.assertRaises(ValueError):
model.BoltonModel(n_outputs)
models.BoltonModel(n_outputs)
@parameterized.named_parameters([
{'testcase_name': 'string compile',
@ -170,7 +170,7 @@ class InitTests(keras_parameterized.TestCase):
"""
# test compilation of valid tf.optimizer and tf.loss
with self.cached_session():
clf = model.BoltonModel(n_outputs)
clf = models.BoltonModel(n_outputs)
clf.compile(optimizer, loss)
self.assertEqual(clf.loss, loss)
@ -197,7 +197,7 @@ class InitTests(keras_parameterized.TestCase):
# test compilaton of invalid tf.optimizer and non instantiated loss.
with self.cached_session():
with self.assertRaises((ValueError, AttributeError)):
clf = model.BoltonModel(n_outputs)
clf = models.BoltonModel(n_outputs)
clf.compile(optimizer, loss)
@ -261,7 +261,7 @@ def _do_fit(n_samples,
Returns: BoltonModel instsance
"""
clf = model.BoltonModel(n_outputs)
clf = models.BoltonModel(n_outputs)
clf.compile(optimizer, loss)
if generator:
x = _cat_dataset(
@ -355,7 +355,7 @@ class FitTests(keras_parameterized.TestCase):
input_dim = 5
batch_size = 1
n_samples = 10
clf = model.BoltonModel(n_classes)
clf = models.BoltonModel(n_classes)
clf.compile(optimizer, loss)
x = _cat_dataset(
n_samples,
@ -441,7 +441,7 @@ class FitTests(keras_parameterized.TestCase):
num_classes: number of outputs neurons
result: expected result
"""
clf = model.BoltonModel(1, 1)
clf = models.BoltonModel(1, 1)
expected = clf.calculate_class_weights(class_weights,
class_counts,
num_classes
@ -508,7 +508,7 @@ class FitTests(keras_parameterized.TestCase):
num_classes: number of outputs neurons
result: expected result
"""
clf = model.BoltonModel(1, 1)
clf = models.BoltonModel(1, 1)
with self.assertRaisesRegexp(ValueError, err_msg): # pylint: disable=deprecated-method
clf.calculate_class_weights(class_weights,
class_counts,

View file

@ -21,7 +21,7 @@ import tensorflow as tf
from tensorflow.python.keras.optimizer_v2 import optimizer_v2
from tensorflow.python.ops import math_ops
from tensorflow.python import ops as _ops
from privacy.bolton.loss import StrongConvexMixin
from privacy.bolton.losses import StrongConvexMixin
_accepted_distributions = ['laplace'] # implemented distributions for noising

View file

@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unit testing for optimizer.py"""
"""Unit testing for optimizers.py"""
from __future__ import absolute_import
from __future__ import division
@ -29,8 +29,8 @@ from tensorflow.python.framework import ops as _ops
from tensorflow.python.framework import test_util
from tensorflow.python import ops as _ops
from absl.testing import parameterized
from privacy.bolton.loss import StrongConvexMixin
from privacy.bolton import optimizer as opt
from privacy.bolton.losses import StrongConvexMixin
from privacy.bolton import optimizers as opt