Bolton implementation and unit tests. Has two pre-implemented loss functions.

This commit is contained in:
Christopher Choquette Choo 2019-06-19 10:46:30 -04:00
parent ed7fa73a99
commit f41be2c598
8 changed files with 49 additions and 308 deletions

View file

@ -41,3 +41,9 @@ else:
from privacy.optimizers.dp_optimizer import DPAdamOptimizer from privacy.optimizers.dp_optimizer import DPAdamOptimizer
from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
from privacy.bolton.models import BoltonModel
from privacy.bolton.optimizers import Bolton
from privacy.bolton.losses import StrongConvexMixin
from privacy.bolton.losses import StrongConvexBinaryCrossentropy
from privacy.bolton.losses import StrongConvexHuber

View file

@ -9,6 +9,7 @@ if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts. if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts.
pass pass
else: else:
from privacy.bolton.model import Bolton from privacy.bolton.models import BoltonModel
from privacy.bolton.loss import StrongConvexHuber from privacy.bolton.optimizers import Bolton
from privacy.bolton.loss import StrongConvexBinaryCrossentropy from privacy.bolton.losses import StrongConvexHuber
from privacy.bolton.losses import StrongConvexBinaryCrossentropy

View file

@ -21,6 +21,7 @@ from tensorflow.python.keras import losses
from tensorflow.python.keras.utils import losses_utils from tensorflow.python.keras.utils import losses_utils
from tensorflow.python.framework import ops as _ops from tensorflow.python.framework import ops as _ops
from tensorflow.python.keras.regularizers import L1L2 from tensorflow.python.keras.regularizers import L1L2
from tensorflow.python.platform import tf_logging as logging
class StrongConvexMixin: class StrongConvexMixin:
@ -147,7 +148,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
self.dtype = dtype self.dtype = dtype
self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype) self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
super(StrongConvexHuber, self).__init__( super(StrongConvexHuber, self).__init__(
name='huber', name='strongconvexhuber',
reduction=reduction, reduction=reduction,
) )
@ -245,6 +246,11 @@ class StrongConvexBinaryCrossentropy(
name: Name of the loss instance name: Name of the loss instance
dtype: tf datatype to use for tensor conversions. dtype: tf datatype to use for tensor conversions.
""" """
if label_smoothing != 0:
logging.warning('The impact of label smoothing on privacy is unknown. '
'Use label smoothing at your own risk as it may not '
'guarantee privacy.')
if reg_lambda <= 0: if reg_lambda <= 0:
raise ValueError("reg lambda: {0} must be positive".format(reg_lambda)) raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
if C <= 0: if C <= 0:
@ -258,7 +264,7 @@ class StrongConvexBinaryCrossentropy(
self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype) self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
super(StrongConvexBinaryCrossentropy, self).__init__( super(StrongConvexBinaryCrossentropy, self).__init__(
reduction=reduction, reduction=reduction,
name='binarycrossentropy', name='strongconvexbinarycrossentropy',
from_logits=from_logits, from_logits=from_logits,
label_smoothing=label_smoothing, label_smoothing=label_smoothing,
) )
@ -313,280 +319,3 @@ class StrongConvexBinaryCrossentropy(
return L1L2(l2=self.reg_lambda/2) return L1L2(l2=self.reg_lambda/2)
# class StrongConvexSparseCategoricalCrossentropy(
# losses.CategoricalCrossentropy,
# StrongConvexMixin
# ):
# """
# Strong Convex version of CategoricalCrossentropy loss using l2 weight
# regularization.
# """
#
# def __init__(self,
# reg_lambda: float,
# C: float,
# radius_constant: float,
# from_logits: bool = True,
# label_smoothing: float = 0,
# reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
# name: str = 'binarycrossentropy',
# dtype=tf.float32):
# """
# Args:
# reg_lambda: Weight regularization constant
# C: Penalty parameter C of the loss term
# radius_constant: constant defining the length of the radius
# reduction: reduction type to use. See super class
# label_smoothing: amount of smoothing to perform on labels
# relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
# name: Name of the loss instance
# dtype: tf datatype to use for tensor conversions.
# """
# if reg_lambda <= 0:
# raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
# if C <= 0:
# raise ValueError('c: {0}, should be >= 0'.format(C))
# if radius_constant <= 0:
# raise ValueError('radius_constant: {0}, should be >= 0'.format(
# radius_constant
# ))
#
# self.C = C
# self.dtype = dtype
# self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
# super(StrongConvexSparseCategoricalCrossentropy, self).__init__(
# reduction=reduction,
# name=name,
# from_logits=from_logits,
# label_smoothing=label_smoothing,
# )
# self.radius_constant = radius_constant
#
# def call(self, y_true, y_pred):
# """Compute loss
#
# Args:
# y_true: Ground truth values.
# y_pred: The predicted values.
#
# Returns:
# Loss values per sample.
# """
# loss = super()
# loss = loss * self.C
# return loss
#
# def radius(self):
# """See super class.
# """
# return self.radius_constant / self.reg_lambda
#
# def gamma(self):
# """See super class.
# """
# return self.reg_lambda
#
# def beta(self, class_weight):
# """See super class.
# """
# max_class_weight = self.max_class_weight(class_weight, self.dtype)
# return self.C * max_class_weight + self.reg_lambda
#
# def lipchitz_constant(self, class_weight):
# """See super class.
# """
# max_class_weight = self.max_class_weight(class_weight, self.dtype)
# return self.C * max_class_weight + self.reg_lambda * self.radius()
#
# def kernel_regularizer(self):
# """
# l2 loss using reg_lambda as the l2 term (as desired). Required for
# this loss function to be strongly convex.
# :return:
# """
# return L1L2(l2=self.reg_lambda)
#
# class StrongConvexSparseCategoricalCrossentropy(
# losses.SparseCategoricalCrossentropy,
# StrongConvexMixin
# ):
# """
# Strong Convex version of SparseCategoricalCrossentropy loss using l2 weight
# regularization.
# """
#
# def __init__(self,
# reg_lambda: float,
# C: float,
# radius_constant: float,
# from_logits: bool = True,
# label_smoothing: float = 0,
# reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
# name: str = 'binarycrossentropy',
# dtype=tf.float32):
# """
# Args:
# reg_lambda: Weight regularization constant
# C: Penalty parameter C of the loss term
# radius_constant: constant defining the length of the radius
# reduction: reduction type to use. See super class
# label_smoothing: amount of smoothing to perform on labels
# relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
# name: Name of the loss instance
# dtype: tf datatype to use for tensor conversions.
# """
# if reg_lambda <= 0:
# raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
# if C <= 0:
# raise ValueError('c: {0}, should be >= 0'.format(C))
# if radius_constant <= 0:
# raise ValueError('radius_constant: {0}, should be >= 0'.format(
# radius_constant
# ))
#
# self.C = C
# self.dtype = dtype
# self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
# super(StrongConvexHuber, self).__init__(reduction=reduction,
# name=name,
# from_logits=from_logits,
# label_smoothing=label_smoothing,
# )
# self.radius_constant = radius_constant
#
# def call(self, y_true, y_pred):
# """Compute loss
#
# Args:
# y_true: Ground truth values.
# y_pred: The predicted values.
#
# Returns:
# Loss values per sample.
# """
# loss = super()
# loss = loss * self.C
# return loss
#
# def radius(self):
# """See super class.
# """
# return self.radius_constant / self.reg_lambda
#
# def gamma(self):
# """See super class.
# """
# return self.reg_lambda
#
# def beta(self, class_weight):
# """See super class.
# """
# max_class_weight = self.max_class_weight(class_weight, self.dtype)
# return self.C * max_class_weight + self.reg_lambda
#
# def lipchitz_constant(self, class_weight):
# """See super class.
# """
# max_class_weight = self.max_class_weight(class_weight, self.dtype)
# return self.C * max_class_weight + self.reg_lambda * self.radius()
#
# def kernel_regularizer(self):
# """
# l2 loss using reg_lambda as the l2 term (as desired). Required for
# this loss function to be strongly convex.
# :return:
# """
# return L1L2(l2=self.reg_lambda)
#
#
# class StrongConvexCategoricalCrossentropy(
# losses.CategoricalCrossentropy,
# StrongConvexMixin
# ):
# """
# Strong Convex version of CategoricalCrossentropy loss using l2 weight
# regularization.
# """
#
# def __init__(self,
# reg_lambda: float,
# C: float,
# radius_constant: float,
# from_logits: bool = True,
# label_smoothing: float = 0,
# reduction: str = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
# name: str = 'binarycrossentropy',
# dtype=tf.float32):
# """
# Args:
# reg_lambda: Weight regularization constant
# C: Penalty parameter C of the loss term
# radius_constant: constant defining the length of the radius
# reduction: reduction type to use. See super class
# label_smoothing: amount of smoothing to perform on labels
# relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x)
# name: Name of the loss instance
# dtype: tf datatype to use for tensor conversions.
# """
# if reg_lambda <= 0:
# raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
# if C <= 0:
# raise ValueError('c: {0}, should be >= 0'.format(C))
# if radius_constant <= 0:
# raise ValueError('radius_constant: {0}, should be >= 0'.format(
# radius_constant
# ))
#
# self.C = C
# self.dtype = dtype
# self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
# super(StrongConvexHuber, self).__init__(reduction=reduction,
# name=name,
# from_logits=from_logits,
# label_smoothing=label_smoothing,
# )
# self.radius_constant = radius_constant
#
# def call(self, y_true, y_pred):
# """Compute loss
#
# Args:
# y_true: Ground truth values.
# y_pred: The predicted values.
#
# Returns:
# Loss values per sample.
# """
# loss = super()
# loss = loss * self.C
# return loss
#
# def radius(self):
# """See super class.
# """
# return self.radius_constant / self.reg_lambda
#
# def gamma(self):
# """See super class.
# """
# return self.reg_lambda
#
# def beta(self, class_weight):
# """See super class.
# """
# max_class_weight = self.max_class_weight(class_weight, self.dtype)
# return self.C * max_class_weight + self.reg_lambda
#
# def lipchitz_constant(self, class_weight):
# """See super class.
# """
# max_class_weight = self.max_class_weight(class_weight, self.dtype)
# return self.C * max_class_weight + self.reg_lambda * self.radius()
#
# def kernel_regularizer(self):
# """
# l2 loss using reg_lambda as the l2 term (as desired). Required for
# this loss function to be strongly convex.
# :return:
# """
# return L1L2(l2=self.reg_lambda)

View file

@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Unit testing for loss.py""" """Unit testing for losses.py"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
@ -22,9 +22,9 @@ from tensorflow.python.keras import keras_parameterized
from tensorflow.python.framework import test_util from tensorflow.python.framework import test_util
from tensorflow.python.keras.regularizers import L1L2 from tensorflow.python.keras.regularizers import L1L2
from absl.testing import parameterized from absl.testing import parameterized
from privacy.bolton.loss import StrongConvexBinaryCrossentropy from privacy.bolton.losses import StrongConvexBinaryCrossentropy
from privacy.bolton.loss import StrongConvexHuber from privacy.bolton.losses import StrongConvexHuber
from privacy.bolton.loss import StrongConvexMixin from privacy.bolton.losses import StrongConvexMixin
class StrongConvexMixinTests(keras_parameterized.TestCase): class StrongConvexMixinTests(keras_parameterized.TestCase):
@ -355,7 +355,7 @@ class HuberTests(keras_parameterized.TestCase):
'fn': 'kernel_regularizer', 'fn': 'kernel_regularizer',
'init_args': [1, 1, 1, 1], 'init_args': [1, 1, 1, 1],
'args': [], 'args': [],
'result': L1L2(l2=1), 'result': L1L2(l2=0.5),
}, },
]) ])
def test_fns(self, init_args, fn, args, result): def test_fns(self, init_args, fn, args, result):

View file

@ -20,8 +20,8 @@ import tensorflow as tf
from tensorflow.python.keras.models import Model from tensorflow.python.keras.models import Model
from tensorflow.python.keras import optimizers from tensorflow.python.keras import optimizers
from tensorflow.python.framework import ops as _ops from tensorflow.python.framework import ops as _ops
from privacy.bolton.loss import StrongConvexMixin from privacy.bolton.losses import StrongConvexMixin
from privacy.bolton.optimizer import Bolton from privacy.bolton.optimizers import Bolton
class BoltonModel(Model): class BoltonModel(Model):
@ -142,7 +142,9 @@ class BoltonModel(Model):
""" """
if class_weight is None: if class_weight is None:
class_weight = self.calculate_class_weights(class_weight) class_weight_ = self.calculate_class_weights(class_weight)
else:
class_weight_ = class_weight
if n_samples is not None: if n_samples is not None:
data_size = n_samples data_size = n_samples
elif hasattr(x, 'shape'): elif hasattr(x, 'shape'):
@ -160,10 +162,13 @@ class BoltonModel(Model):
if batch_size_ is None: if batch_size_ is None:
raise ValueError('batch_size: {0} is an ' raise ValueError('batch_size: {0} is an '
'invalid value'.format(batch_size_)) 'invalid value'.format(batch_size_))
if data_size is None:
raise ValueError('Could not infer the number of samples. Please pass '
'this in using n_samples.')
with self.optimizer(noise_distribution, with self.optimizer(noise_distribution,
epsilon, epsilon,
self.layers, self.layers,
class_weight, class_weight_,
data_size, data_size,
self.n_outputs, self.n_outputs,
batch_size_, batch_size_,

View file

@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Unit testing for model.py""" """Unit testing for models.py"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
@ -25,9 +25,9 @@ from tensorflow.python.keras import losses
from tensorflow.python.framework import ops as _ops from tensorflow.python.framework import ops as _ops
from tensorflow.python.keras.regularizers import L1L2 from tensorflow.python.keras.regularizers import L1L2
from absl.testing import parameterized from absl.testing import parameterized
from privacy.bolton import model from privacy.bolton import models
from privacy.bolton.optimizer import Bolton from privacy.bolton.optimizers import Bolton
from privacy.bolton.loss import StrongConvexMixin from privacy.bolton.losses import StrongConvexMixin
class TestLoss(losses.Loss, StrongConvexMixin): class TestLoss(losses.Loss, StrongConvexMixin):
"""Test loss function for testing Bolton model""" """Test loss function for testing Bolton model"""
@ -130,8 +130,8 @@ class InitTests(keras_parameterized.TestCase):
n_outputs: number of output neurons n_outputs: number of output neurons
""" """
# test valid domains for each variable # test valid domains for each variable
clf = model.BoltonModel(n_outputs) clf = models.BoltonModel(n_outputs)
self.assertIsInstance(clf, model.BoltonModel) self.assertIsInstance(clf, models.BoltonModel)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'invalid n_outputs', {'testcase_name': 'invalid n_outputs',
@ -146,7 +146,7 @@ class InitTests(keras_parameterized.TestCase):
""" """
# test invalid domains for each variable, especially noise # test invalid domains for each variable, especially noise
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
model.BoltonModel(n_outputs) models.BoltonModel(n_outputs)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'string compile', {'testcase_name': 'string compile',
@ -170,7 +170,7 @@ class InitTests(keras_parameterized.TestCase):
""" """
# test compilation of valid tf.optimizer and tf.loss # test compilation of valid tf.optimizer and tf.loss
with self.cached_session(): with self.cached_session():
clf = model.BoltonModel(n_outputs) clf = models.BoltonModel(n_outputs)
clf.compile(optimizer, loss) clf.compile(optimizer, loss)
self.assertEqual(clf.loss, loss) self.assertEqual(clf.loss, loss)
@ -197,7 +197,7 @@ class InitTests(keras_parameterized.TestCase):
# test compilaton of invalid tf.optimizer and non instantiated loss. # test compilaton of invalid tf.optimizer and non instantiated loss.
with self.cached_session(): with self.cached_session():
with self.assertRaises((ValueError, AttributeError)): with self.assertRaises((ValueError, AttributeError)):
clf = model.BoltonModel(n_outputs) clf = models.BoltonModel(n_outputs)
clf.compile(optimizer, loss) clf.compile(optimizer, loss)
@ -261,7 +261,7 @@ def _do_fit(n_samples,
Returns: BoltonModel instsance Returns: BoltonModel instsance
""" """
clf = model.BoltonModel(n_outputs) clf = models.BoltonModel(n_outputs)
clf.compile(optimizer, loss) clf.compile(optimizer, loss)
if generator: if generator:
x = _cat_dataset( x = _cat_dataset(
@ -355,7 +355,7 @@ class FitTests(keras_parameterized.TestCase):
input_dim = 5 input_dim = 5
batch_size = 1 batch_size = 1
n_samples = 10 n_samples = 10
clf = model.BoltonModel(n_classes) clf = models.BoltonModel(n_classes)
clf.compile(optimizer, loss) clf.compile(optimizer, loss)
x = _cat_dataset( x = _cat_dataset(
n_samples, n_samples,
@ -441,7 +441,7 @@ class FitTests(keras_parameterized.TestCase):
num_classes: number of outputs neurons num_classes: number of outputs neurons
result: expected result result: expected result
""" """
clf = model.BoltonModel(1, 1) clf = models.BoltonModel(1, 1)
expected = clf.calculate_class_weights(class_weights, expected = clf.calculate_class_weights(class_weights,
class_counts, class_counts,
num_classes num_classes
@ -508,7 +508,7 @@ class FitTests(keras_parameterized.TestCase):
num_classes: number of outputs neurons num_classes: number of outputs neurons
result: expected result result: expected result
""" """
clf = model.BoltonModel(1, 1) clf = models.BoltonModel(1, 1)
with self.assertRaisesRegexp(ValueError, err_msg): # pylint: disable=deprecated-method with self.assertRaisesRegexp(ValueError, err_msg): # pylint: disable=deprecated-method
clf.calculate_class_weights(class_weights, clf.calculate_class_weights(class_weights,
class_counts, class_counts,

View file

@ -21,7 +21,7 @@ import tensorflow as tf
from tensorflow.python.keras.optimizer_v2 import optimizer_v2 from tensorflow.python.keras.optimizer_v2 import optimizer_v2
from tensorflow.python.ops import math_ops from tensorflow.python.ops import math_ops
from tensorflow.python import ops as _ops from tensorflow.python import ops as _ops
from privacy.bolton.loss import StrongConvexMixin from privacy.bolton.losses import StrongConvexMixin
_accepted_distributions = ['laplace'] # implemented distributions for noising _accepted_distributions = ['laplace'] # implemented distributions for noising

View file

@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Unit testing for optimizer.py""" """Unit testing for optimizers.py"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
@ -29,8 +29,8 @@ from tensorflow.python.framework import ops as _ops
from tensorflow.python.framework import test_util from tensorflow.python.framework import test_util
from tensorflow.python import ops as _ops from tensorflow.python import ops as _ops
from absl.testing import parameterized from absl.testing import parameterized
from privacy.bolton.loss import StrongConvexMixin from privacy.bolton.losses import StrongConvexMixin
from privacy.bolton import optimizer as opt from privacy.bolton import optimizers as opt