Format TensorFlow Privacy files.

PiperOrigin-RevId: 424923635
This commit is contained in:
Michael Reneer 2022-01-28 11:56:55 -08:00 committed by A. Unique TensorFlower
parent 07230a161a
commit 943ef91ee9
62 changed files with 1170 additions and 1087 deletions

View file

@ -23,7 +23,6 @@ from tensorflow_docs.api_generator import generate_lib
from tensorflow_docs.api_generator import public_api
import tensorflow_privacy as tf_privacy
flags.DEFINE_string('output_dir', '/tmp/tf_privacy',
'Where to output the docs.')
flags.DEFINE_string(

View file

@ -1,6 +1,5 @@
# Get Started
This document assumes you are already familiar with differential privacy, and
have determined that you would like to use TF Privacy to implement differential
privacy guarantees in your model(s). If youre not familiar with differential

View file

@ -17,7 +17,6 @@
import math
from absl import app
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
@ -33,8 +32,10 @@ def apply_dp_sgd_analysis(q, sigma, steps, orders, delta):
eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta)
print('DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated'
' over {} steps satisfies'.format(100 * q, sigma, steps), end=' ')
print(
'DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated'
' over {} steps satisfies'.format(100 * q, sigma, steps),
end=' ')
print('differential privacy with eps = {:.3g} and delta = {}.'.format(
eps, delta))
print('The optimal RDP order is {}.'.format(opt_order))

View file

@ -51,7 +51,8 @@ class ComputeDpSgdPrivacyTest(parameterized.TestCase):
# using https://arxiv.org/abs/1805.06530 Theorem 8 (in v2).
low_delta = .5 * math.erfc((eps * sigma - .5 / sigma) / math.sqrt(2))
if eps < 100: # Skip this if it causes overflow; error is minor.
low_delta -= math.exp(eps)*.5*math.erfc((eps*sigma+.5/sigma)/math.sqrt(2))
low_delta -= math.exp(eps) * .5 * math.erfc(
(eps * sigma + .5 / sigma) / math.sqrt(2))
self.assertLessEqual(low_delta, delta)

View file

@ -52,8 +52,8 @@ def main(argv):
assert FLAGS.batch_size is not None, 'Flag batch_size is missing.'
assert FLAGS.epsilon is not None, 'Flag epsilon is missing.'
assert FLAGS.epochs is not None, 'Flag epochs is missing.'
compute_noise(FLAGS.N, FLAGS.batch_size, FLAGS.epsilon,
FLAGS.epochs, FLAGS.delta, FLAGS.min_noise)
compute_noise(FLAGS.N, FLAGS.batch_size, FLAGS.epsilon, FLAGS.epochs,
FLAGS.delta, FLAGS.min_noise)
if __name__ == '__main__':

View file

@ -27,11 +27,12 @@ class ComputeNoiseFromBudgetTest(parameterized.TestCase):
('Test2', 100000000, 1024, 5907984.81339406, 10, 1e-7, 1e-5, 0.1),
('Test3', 100000000, 1024, 5907984.81339406, 10, 1e-7, 1, 0),
)
def test_compute_noise(self, n, batch_size, target_epsilon, epochs,
delta, min_noise, expected_noise):
def test_compute_noise(self, n, batch_size, target_epsilon, epochs, delta,
min_noise, expected_noise):
target_noise = compute_noise_from_budget_lib.compute_noise(
n, batch_size, target_epsilon, epochs, delta, min_noise)
self.assertAlmostEqual(target_noise, expected_noise)
if __name__ == '__main__':
absltest.main()

View file

@ -176,4 +176,3 @@ class SampledWithoutReplacementDpEvent(DpEvent):
source_dataset_size: int
sample_size: int
event: DpEvent

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for DpEventBuilder."""
from absl.testing import absltest
from tensorflow_privacy.privacy.analysis import dp_event
@ -68,7 +67,8 @@ class DpEventBuilderTest(absltest.TestCase):
expected_event = dp_event.ComposedDpEvent([
_gaussian_event,
dp_event.SelfComposedDpEvent(composed_event, 3),
dp_event.SelfComposedDpEvent(_poisson_event, 2)])
dp_event.SelfComposedDpEvent(_poisson_event, 2)
])
self.assertEqual(expected_event, builder.build())

View file

@ -84,13 +84,13 @@ class TensorBuffer(object):
dtype=self._dtype,
initializer=new_buffer,
trainable=False)
return self._buffer, tf.assign(
self._capacity, tf.multiply(self._capacity, 2))
return self._buffer, tf.assign(self._capacity,
tf.multiply(self._capacity, 2))
else:
return tf.assign(
self._buffer, new_buffer,
validate_shape=False), tf.assign(
self._capacity, tf.multiply(self._capacity, 2))
validate_shape=False), tf.assign(self._capacity,
tf.multiply(self._capacity, 2))
update_buffer, update_capacity = tf.cond(
pred=tf.equal(self._current_size, self._capacity),

View file

@ -44,8 +44,7 @@ class TensorBufferTest(tf.test.TestCase):
my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
with self.assertRaisesRegex(
tf.errors.InvalidArgumentError,
with self.assertRaisesRegex(tf.errors.InvalidArgumentError,
'Appending value of inconsistent shape.'):
my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32))

View file

@ -187,5 +187,6 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
tree_aggregation_accountant._compute_gaussian_zcdp(
sigma, sum_sensitivity_square))
if __name__ == '__main__':
tf.test.main()

View file

@ -68,7 +68,6 @@ class StrongConvexMixin:
Args:
class_weight: class weights used
Returns: L
"""
raise NotImplementedError("lipchitz constant not implemented for "
@ -126,13 +125,10 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
if reg_lambda <= 0:
raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
if radius_constant <= 0:
raise ValueError("radius_constant: {0}, should be >= 0".format(
radius_constant
))
raise ValueError(
"radius_constant: {0}, should be >= 0".format(radius_constant))
if delta <= 0:
raise ValueError("delta: {0}, should be >= 0".format(
delta
))
raise ValueError("delta: {0}, should be >= 0".format(delta))
self.C = c_arg # pylint: disable=invalid-name
self.delta = delta
self.radius_constant = radius_constant
@ -172,9 +168,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
def beta(self, class_weight):
"""See super class."""
max_class_weight = self.max_class_weight(class_weight, self.dtype)
delta = _ops.convert_to_tensor_v2(self.delta,
dtype=self.dtype
)
delta = _ops.convert_to_tensor_v2(self.delta, dtype=self.dtype)
return self.C * max_class_weight / (delta *
tf.constant(2, dtype=self.dtype)) + \
self.reg_lambda
@ -200,10 +194,8 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
return L1L2(l2=self.reg_lambda / 2)
class StrongConvexBinaryCrossentropy(
losses.BinaryCrossentropy,
StrongConvexMixin
):
class StrongConvexBinaryCrossentropy(losses.BinaryCrossentropy,
StrongConvexMixin):
"""Strongly Convex BinaryCrossentropy loss using l2 weight regularization."""
def __init__(self,
@ -222,10 +214,10 @@ class StrongConvexBinaryCrossentropy(
radius_constant: constant defining the length of the radius
from_logits: True if the input are unscaled logits. False if they are
already scaled.
label_smoothing: amount of smoothing to perform on labels
relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x). Note, the
impact of this parameter's effect on privacy is not known and thus the
default should be used.
label_smoothing: amount of smoothing to perform on labels relaxation of
trust in labels, e.g. (1 -> 1-x, 0 -> 0+x). Note, the impact of this
parameter's effect on privacy is not known and thus the default should
be used.
reduction: reduction type to use. See super class
dtype: tf datatype to use for tensor conversions.
"""
@ -239,9 +231,8 @@ class StrongConvexBinaryCrossentropy(
if c_arg <= 0:
raise ValueError("c: {0}, should be >= 0".format(c_arg))
if radius_constant <= 0:
raise ValueError("radius_constant: {0}, should be >= 0".format(
radius_constant
))
raise ValueError(
"radius_constant: {0}, should be >= 0".format(radius_constant))
self.dtype = dtype
self.C = c_arg # pylint: disable=invalid-name
self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)

View file

@ -40,21 +40,29 @@ def captured_output():
class StrongConvexMixinTests(keras_parameterized.TestCase):
"""Tests for the StrongConvexMixin."""
@parameterized.named_parameters([
{'testcase_name': 'beta not implemented',
'fn': 'beta',
'args': [1]},
{'testcase_name': 'gamma not implemented',
'fn': 'gamma',
'args': []},
{'testcase_name': 'lipchitz not implemented',
'fn': 'lipchitz_constant',
'args': [1]},
{'testcase_name': 'radius not implemented',
'fn': 'radius',
'args': []},
])
@parameterized.named_parameters([
{
'testcase_name': 'beta not implemented',
'fn': 'beta',
'args': [1]
},
{
'testcase_name': 'gamma not implemented',
'fn': 'gamma',
'args': []
},
{
'testcase_name': 'lipchitz not implemented',
'fn': 'lipchitz_constant',
'args': [1]
},
{
'testcase_name': 'radius not implemented',
'fn': 'radius',
'args': []
},
])
def test_not_implemented(self, fn, args):
"""Test that the given fn's are not implemented on the mixin.
@ -67,9 +75,11 @@ class StrongConvexMixinTests(keras_parameterized.TestCase):
getattr(loss, fn, None)(*args)
@parameterized.named_parameters([
{'testcase_name': 'radius not implemented',
{
'testcase_name': 'radius not implemented',
'fn': 'kernel_regularizer',
'args': []},
'args': []
},
])
def test_return_none(self, fn, args):
"""Test that fn of Mixin returns None.
@ -87,7 +97,8 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
"""tests for BinaryCrossesntropy StrongConvex loss."""
@parameterized.named_parameters([
{'testcase_name': 'normal',
{
'testcase_name': 'normal',
'reg_lambda': 1,
'C': 1,
'radius_constant': 1
@ -106,24 +117,29 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
self.assertIsInstance(loss, StrongConvexBinaryCrossentropy)
@parameterized.named_parameters([
{'testcase_name': 'negative c',
{
'testcase_name': 'negative c',
'reg_lambda': 1,
'C': -1,
'radius_constant': 1
},
{'testcase_name': 'negative radius',
{
'testcase_name': 'negative radius',
'reg_lambda': 1,
'C': 1,
'radius_constant': -1
},
{'testcase_name': 'negative lambda',
{
'testcase_name': 'negative lambda',
'reg_lambda': -1,
'C': 1,
'radius_constant': 1
}, # pylint: disable=invalid-name
])
def test_bad_init_params(self, reg_lambda, C, radius_constant):
"""Test invalid domain for given params. Should return ValueError.
"""Test invalid domain for given params.
Should return ValueError.
Args:
reg_lambda: initialization value for reg_lambda arg
@ -137,22 +153,26 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
@test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([
# [] for compatibility with tensorflow loss calculation
{'testcase_name': 'both positive',
{
'testcase_name': 'both positive',
'logits': [10000],
'y_true': [1],
'result': 0,
},
{'testcase_name': 'positive gradient negative logits',
{
'testcase_name': 'positive gradient negative logits',
'logits': [-10000],
'y_true': [1],
'result': 10000,
},
{'testcase_name': 'positivee gradient positive logits',
{
'testcase_name': 'positivee gradient positive logits',
'logits': [10000],
'y_true': [0],
'result': 10000,
},
{'testcase_name': 'both negative',
{
'testcase_name': 'both negative',
'logits': [-10000],
'y_true': [0],
'result': 0
@ -173,25 +193,29 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
self.assertEqual(loss.numpy(), result)
@parameterized.named_parameters([
{'testcase_name': 'beta',
{
'testcase_name': 'beta',
'init_args': [1, 1, 1],
'fn': 'beta',
'args': [1],
'result': tf.constant(2, dtype=tf.float32)
},
{'testcase_name': 'gamma',
{
'testcase_name': 'gamma',
'fn': 'gamma',
'init_args': [1, 1, 1],
'args': [],
'result': tf.constant(1, dtype=tf.float32),
},
{'testcase_name': 'lipchitz constant',
{
'testcase_name': 'lipchitz constant',
'fn': 'lipchitz_constant',
'init_args': [1, 1, 1],
'args': [1],
'result': tf.constant(2, dtype=tf.float32),
},
{'testcase_name': 'kernel regularizer',
{
'testcase_name': 'kernel regularizer',
'fn': 'kernel_regularizer',
'init_args': [1, 1, 1],
'args': [],
@ -218,7 +242,8 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
self.assertEqual(expected, result)
@parameterized.named_parameters([
{'testcase_name': 'label_smoothing',
{
'testcase_name': 'label_smoothing',
'init_args': [1, 1, 1, True, 0.1],
'fn': None,
'args': None,
@ -245,7 +270,8 @@ class HuberTests(keras_parameterized.TestCase):
"""tests for BinaryCrossesntropy StrongConvex loss."""
@parameterized.named_parameters([
{'testcase_name': 'normal',
{
'testcase_name': 'normal',
'reg_lambda': 1,
'c': 1,
'radius_constant': 1,
@ -266,25 +292,29 @@ class HuberTests(keras_parameterized.TestCase):
self.assertIsInstance(loss, StrongConvexHuber)
@parameterized.named_parameters([
{'testcase_name': 'negative c',
{
'testcase_name': 'negative c',
'reg_lambda': 1,
'c': -1,
'radius_constant': 1,
'delta': 1
},
{'testcase_name': 'negative radius',
{
'testcase_name': 'negative radius',
'reg_lambda': 1,
'c': 1,
'radius_constant': -1,
'delta': 1
},
{'testcase_name': 'negative lambda',
{
'testcase_name': 'negative lambda',
'reg_lambda': -1,
'c': 1,
'radius_constant': 1,
'delta': 1
},
{'testcase_name': 'negative delta',
{
'testcase_name': 'negative delta',
'reg_lambda': 1,
'c': 1,
'radius_constant': 1,
@ -292,7 +322,9 @@ class HuberTests(keras_parameterized.TestCase):
},
])
def test_bad_init_params(self, reg_lambda, c, radius_constant, delta):
"""Test invalid domain for given params. Should return ValueError.
"""Test invalid domain for given params.
Should return ValueError.
Args:
reg_lambda: initialization value for reg_lambda arg
@ -307,55 +339,64 @@ class HuberTests(keras_parameterized.TestCase):
# test the bounds and test varied delta's
@test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([
{'testcase_name': 'delta=1,y_true=1 z>1+h decision boundary',
{
'testcase_name': 'delta=1,y_true=1 z>1+h decision boundary',
'logits': 2.1,
'y_true': 1,
'delta': 1,
'result': 0,
},
{'testcase_name': 'delta=1,y_true=1 z<1+h decision boundary',
{
'testcase_name': 'delta=1,y_true=1 z<1+h decision boundary',
'logits': 1.9,
'y_true': 1,
'delta': 1,
'result': 0.01 * 0.25,
},
{'testcase_name': 'delta=1,y_true=1 1-z< h decision boundary',
{
'testcase_name': 'delta=1,y_true=1 1-z< h decision boundary',
'logits': 0.1,
'y_true': 1,
'delta': 1,
'result': 1.9**2 * 0.25,
},
{'testcase_name': 'delta=1,y_true=1 z < 1-h decision boundary',
{
'testcase_name': 'delta=1,y_true=1 z < 1-h decision boundary',
'logits': -0.1,
'y_true': 1,
'delta': 1,
'result': 1.1,
},
{'testcase_name': 'delta=2,y_true=1 z>1+h decision boundary',
{
'testcase_name': 'delta=2,y_true=1 z>1+h decision boundary',
'logits': 3.1,
'y_true': 1,
'delta': 2,
'result': 0,
},
{'testcase_name': 'delta=2,y_true=1 z<1+h decision boundary',
{
'testcase_name': 'delta=2,y_true=1 z<1+h decision boundary',
'logits': 2.9,
'y_true': 1,
'delta': 2,
'result': 0.01 * 0.125,
},
{'testcase_name': 'delta=2,y_true=1 1-z < h decision boundary',
{
'testcase_name': 'delta=2,y_true=1 1-z < h decision boundary',
'logits': 1.1,
'y_true': 1,
'delta': 2,
'result': 1.9**2 * 0.125,
},
{'testcase_name': 'delta=2,y_true=1 z < 1-h decision boundary',
{
'testcase_name': 'delta=2,y_true=1 z < 1-h decision boundary',
'logits': -1.1,
'y_true': 1,
'delta': 2,
'result': 2.1,
},
{'testcase_name': 'delta=1,y_true=-1 z>1+h decision boundary',
{
'testcase_name': 'delta=1,y_true=-1 z>1+h decision boundary',
'logits': -2.1,
'y_true': -1,
'delta': 1,
@ -378,25 +419,29 @@ class HuberTests(keras_parameterized.TestCase):
self.assertAllClose(loss.numpy(), result)
@parameterized.named_parameters([
{'testcase_name': 'beta',
{
'testcase_name': 'beta',
'init_args': [1, 1, 1, 1],
'fn': 'beta',
'args': [1],
'result': tf.Variable(1.5, dtype=tf.float32)
},
{'testcase_name': 'gamma',
{
'testcase_name': 'gamma',
'fn': 'gamma',
'init_args': [1, 1, 1, 1],
'args': [],
'result': tf.Variable(1, dtype=tf.float32),
},
{'testcase_name': 'lipchitz constant',
{
'testcase_name': 'lipchitz constant',
'fn': 'lipchitz_constant',
'init_args': [1, 1, 1, 1],
'args': [1],
'result': tf.Variable(2, dtype=tf.float32),
},
{'testcase_name': 'kernel regularizer',
{
'testcase_name': 'kernel regularizer',
'fn': 'kernel_regularizer',
'init_args': [1, 1, 1, 1],
'args': [],

View file

@ -38,10 +38,7 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
Descent-based Analytics by Xi Wu et al.
"""
def __init__(self,
n_outputs,
seed=1,
dtype=tf.float32):
def __init__(self, n_outputs, seed=1, dtype=tf.float32):
"""Private constructor.
Args:
@ -51,9 +48,8 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
"""
super(BoltOnModel, self).__init__(name='bolton', dynamic=False)
if n_outputs <= 0:
raise ValueError('n_outputs = {0} is not valid. Must be > 0.'.format(
n_outputs
))
raise ValueError(
'n_outputs = {0} is not valid. Must be > 0.'.format(n_outputs))
self.n_outputs = n_outputs
self.seed = seed
self._layers_instantiated = False
@ -76,11 +72,13 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
loss,
kernel_initializer=tf.initializers.GlorotUniform,
**kwargs): # pylint: disable=arguments-differ
"""See super class. Default optimizer used in BoltOn method is SGD.
"""See super class.
Default optimizer used in BoltOn method is SGD.
Args:
optimizer: The optimizer to use. This will be automatically wrapped
with the BoltOn Optimizer.
optimizer: The optimizer to use. This will be automatically wrapped with
the BoltOn Optimizer.
loss: The loss function to use. Must be a StrongConvex loss (extend the
StrongConvexMixin).
kernel_initializer: The kernel initializer to use for the single layer.
@ -152,8 +150,7 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
else:
data_size = None
batch_size_ = self._validate_or_infer_batch_size(batch_size,
steps_per_epoch,
x)
steps_per_epoch, x)
if batch_size_ is None:
batch_size_ = 32
# inferring batch_size to be passed to optimizer. batch_size must remain its
@ -164,13 +161,10 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
if data_size is None:
raise ValueError('Could not infer the number of samples. Please pass '
'this in using n_samples.')
with self.optimizer(noise_distribution,
epsilon,
self.layers,
class_weight_,
data_size,
batch_size_) as _:
out = super(BoltOnModel, self).fit(x=x,
with self.optimizer(noise_distribution, epsilon, self.layers, class_weight_,
data_size, batch_size_) as _:
out = super(BoltOnModel, self).fit(
x=x,
y=y,
batch_size=batch_size,
class_weight=class_weight,
@ -222,12 +216,8 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
generator)
if batch_size is None:
batch_size = 32
with self.optimizer(noise_distribution,
epsilon,
self.layers,
class_weight,
data_size,
batch_size) as _:
with self.optimizer(noise_distribution, epsilon, self.layers, class_weight,
data_size, batch_size) as _:
out = super(BoltOnModel, self).fit_generator(
generator,
class_weight=class_weight,
@ -243,10 +233,10 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
Args:
class_weights: str specifying type, array giving weights, or None.
class_counts: If class_weights is not None, then an array of
the number of samples for each class
num_classes: If class_weights is not None, then the number of
classes.
class_counts: If class_weights is not None, then an array of the number of
samples for each class
num_classes: If class_weights is not None, then the number of classes.
Returns:
class_weights as 1D tensor, to be passed to model's fit method.
"""
@ -259,14 +249,12 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
raise ValueError('Detected string class_weights with '
'value: {0}, which is not one of {1}.'
'Please select a valid class_weight type'
'or pass an array'.format(class_weights,
class_keys))
'or pass an array'.format(class_weights, class_keys))
if class_counts is None:
raise ValueError('Class counts must be provided if using '
'class_weights=%s' % class_weights)
class_counts_shape = tf.Variable(class_counts,
trainable=False,
dtype=self._dtype).shape
class_counts_shape = tf.Variable(
class_counts, trainable=False, dtype=self._dtype).shape
if len(class_counts_shape) != 1:
raise ValueError('class counts must be a 1D array.'
'Detected: {0}'.format(class_counts_shape))
@ -282,9 +270,8 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
class_weights = 1
elif is_string and class_weights == 'balanced':
num_samples = sum(class_counts)
weighted_counts = tf.dtypes.cast(tf.math.multiply(num_classes,
class_counts),
self._dtype)
weighted_counts = tf.dtypes.cast(
tf.math.multiply(num_classes, class_counts), self._dtype)
class_weights = tf.Variable(num_samples, dtype=self._dtype) / \
tf.Variable(weighted_counts, dtype=self._dtype)
else:
@ -293,8 +280,6 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
raise ValueError('Detected class_weights shape: {0} instead of '
'1D array'.format(class_weights.shape))
if class_weights.shape[0] != num_classes:
raise ValueError(
'Detected array length: {0} instead of: {1}'.format(
class_weights.shape[0],
num_classes))
raise ValueError('Detected array length: {0} instead of: {1}'.format(
class_weights.shape[0], num_classes))
return class_weights

View file

@ -73,9 +73,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
def call(self, y_true, y_pred):
"""Loss function that is minimized at the mean of the input points."""
return 0.5 * tf.reduce_sum(
tf.math.squared_difference(y_true, y_pred),
axis=1
)
tf.math.squared_difference(y_true, y_pred), axis=1)
def max_class_weight(self, class_weight):
"""the maximum weighting in class weights (max value) as a scalar tensor.
@ -125,10 +123,12 @@ class InitTests(keras_parameterized.TestCase):
"""Tests for keras model initialization."""
@parameterized.named_parameters([
{'testcase_name': 'normal',
{
'testcase_name': 'normal',
'n_outputs': 1,
},
{'testcase_name': 'many outputs',
{
'testcase_name': 'many outputs',
'n_outputs': 100,
},
])
@ -143,7 +143,8 @@ class InitTests(keras_parameterized.TestCase):
self.assertIsInstance(clf, models.BoltOnModel)
@parameterized.named_parameters([
{'testcase_name': 'invalid n_outputs',
{
'testcase_name': 'invalid n_outputs',
'n_outputs': -1,
},
])
@ -158,12 +159,14 @@ class InitTests(keras_parameterized.TestCase):
models.BoltOnModel(n_outputs)
@parameterized.named_parameters([
{'testcase_name': 'string compile',
{
'testcase_name': 'string compile',
'n_outputs': 1,
'loss': TestLoss(1, 1, 1),
'optimizer': 'adam',
},
{'testcase_name': 'test compile',
{
'testcase_name': 'test compile',
'n_outputs': 100,
'loss': TestLoss(1, 1, 1),
'optimizer': TestOptimizer(),
@ -183,18 +186,17 @@ class InitTests(keras_parameterized.TestCase):
clf.compile(optimizer, loss)
self.assertEqual(clf.loss, loss)
@parameterized.named_parameters([
{'testcase_name': 'Not strong loss',
@parameterized.named_parameters([{
'testcase_name': 'Not strong loss',
'n_outputs': 1,
'loss': losses.BinaryCrossentropy(),
'optimizer': 'adam',
},
{'testcase_name': 'Not valid optimizer',
}, {
'testcase_name': 'Not valid optimizer',
'n_outputs': 1,
'loss': TestLoss(1, 1, 1),
'optimizer': 'ada',
}
])
}])
def test_bad_compile(self, n_outputs, loss, optimizer):
"""test bad compilations of BoltOnModel that should raise errors.
@ -231,17 +233,11 @@ def _cat_dataset(n_samples, input_dim, n_classes, batch_size, generator=False):
x_stack = []
y_stack = []
for i_class in range(n_classes):
x_stack.append(
tf.constant(1*i_class, tf.float32, (n_samples, input_dim))
)
y_stack.append(
tf.constant(i_class, tf.float32, (n_samples, n_classes))
)
x_stack.append(tf.constant(1 * i_class, tf.float32, (n_samples, input_dim)))
y_stack.append(tf.constant(i_class, tf.float32, (n_samples, n_classes)))
x_set, y_set = tf.stack(x_stack), tf.stack(y_stack)
if generator:
dataset = tf.data.Dataset.from_tensor_slices(
(x_set, y_set)
)
dataset = tf.data.Dataset.from_tensor_slices((x_set, y_set))
dataset = dataset.batch(batch_size=batch_size)
return dataset
return x_set, y_set
@ -266,8 +262,8 @@ def _do_fit(n_samples,
epsilon: privacy parameter
generator: True to create a generator, False to use an iterator
batch_size: batch_size to use
reset_n_samples: True to set _samples to None prior to fitting.
False does nothing
reset_n_samples: True to set _samples to None prior to fitting. False does
nothing
optimizer: instance of TestOptimizer
loss: instance of TestLoss
distribution: distribution to get noise from.
@ -279,32 +275,25 @@ def _do_fit(n_samples,
clf.compile(optimizer, loss)
if generator:
x = _cat_dataset(
n_samples,
input_dim,
n_outputs,
batch_size,
generator=generator
)
n_samples, input_dim, n_outputs, batch_size, generator=generator)
y = None
# x = x.batch(batch_size)
x = x.shuffle(n_samples // 2)
batch_size = None
if reset_n_samples:
n_samples = None
clf.fit_generator(x,
clf.fit_generator(
x,
n_samples=n_samples,
noise_distribution=distribution,
epsilon=epsilon)
else:
x, y = _cat_dataset(
n_samples,
input_dim,
n_outputs,
batch_size,
generator=generator)
n_samples, input_dim, n_outputs, batch_size, generator=generator)
if reset_n_samples:
n_samples = None
clf.fit(x,
clf.fit(
x,
y,
batch_size=batch_size,
n_samples=n_samples,
@ -318,19 +307,23 @@ class FitTests(keras_parameterized.TestCase):
# @test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([
{'testcase_name': 'iterator fit',
{
'testcase_name': 'iterator fit',
'generator': False,
'reset_n_samples': True,
},
{'testcase_name': 'iterator fit no samples',
{
'testcase_name': 'iterator fit no samples',
'generator': False,
'reset_n_samples': True,
},
{'testcase_name': 'generator fit',
{
'testcase_name': 'generator fit',
'generator': True,
'reset_n_samples': False,
},
{'testcase_name': 'with callbacks',
{
'testcase_name': 'with callbacks',
'generator': True,
'reset_n_samples': False,
},
@ -363,7 +356,8 @@ class FitTests(keras_parameterized.TestCase):
self.assertEqual(hasattr(clf, 'layers'), True)
@parameterized.named_parameters([
{'testcase_name': 'generator fit',
{
'testcase_name': 'generator fit',
'generator': True,
},
])
@ -382,24 +376,21 @@ class FitTests(keras_parameterized.TestCase):
clf = models.BoltOnModel(n_classes)
clf.compile(optimizer, loss)
x = _cat_dataset(
n_samples,
input_dim,
n_classes,
batch_size,
generator=generator
)
n_samples, input_dim, n_classes, batch_size, generator=generator)
x = x.batch(batch_size)
x = x.shuffle(n_samples // 2)
clf.fit_generator(x, n_samples=n_samples)
self.assertEqual(hasattr(clf, 'layers'), True)
@parameterized.named_parameters([
{'testcase_name': 'iterator no n_samples',
{
'testcase_name': 'iterator no n_samples',
'generator': True,
'reset_n_samples': True,
'distribution': 'laplace'
},
{'testcase_name': 'invalid distribution',
{
'testcase_name': 'invalid distribution',
'generator': True,
'reset_n_samples': True,
'distribution': 'not_valid'
@ -422,40 +413,33 @@ class FitTests(keras_parameterized.TestCase):
epsilon = 1
batch_size = 1
n_samples = 10
_do_fit(
n_samples,
input_dim,
n_classes,
epsilon,
generator,
batch_size,
reset_n_samples,
optimizer,
loss,
distribution
)
_do_fit(n_samples, input_dim, n_classes, epsilon, generator, batch_size,
reset_n_samples, optimizer, loss, distribution)
@parameterized.named_parameters([
{'testcase_name': 'None class_weights',
{
'testcase_name': 'None class_weights',
'class_weights': None,
'class_counts': None,
'num_classes': None,
'result': 1},
{'testcase_name': 'class weights array',
'result': 1
},
{
'testcase_name': 'class weights array',
'class_weights': [1, 1],
'class_counts': [1, 1],
'num_classes': 2,
'result': [1, 1]},
{'testcase_name': 'class weights balanced',
'result': [1, 1]
},
{
'testcase_name': 'class weights balanced',
'class_weights': 'balanced',
'class_counts': [1, 1],
'num_classes': 2,
'result': [1, 1]},
'result': [1, 1]
},
])
def test_class_calculate(self,
class_weights,
class_counts,
num_classes,
def test_class_calculate(self, class_weights, class_counts, num_classes,
result):
"""Tests the BOltonModel calculate_class_weights method.
@ -466,61 +450,68 @@ class FitTests(keras_parameterized.TestCase):
result: expected result
"""
clf = models.BoltOnModel(1, 1)
expected = clf.calculate_class_weights(class_weights,
class_counts,
expected = clf.calculate_class_weights(class_weights, class_counts,
num_classes)
if hasattr(expected, 'numpy'):
expected = expected.numpy()
self.assertAllEqual(
expected,
result
)
self.assertAllEqual(expected, result)
@parameterized.named_parameters([
{'testcase_name': 'class weight not valid str',
{
'testcase_name': 'class weight not valid str',
'class_weights': 'not_valid',
'class_counts': 1,
'num_classes': 1,
'err_msg': 'Detected string class_weights with value: not_valid'},
{'testcase_name': 'no class counts',
'err_msg': 'Detected string class_weights with value: not_valid'
},
{
'testcase_name': 'no class counts',
'class_weights': 'balanced',
'class_counts': None,
'num_classes': 1,
'err_msg': 'Class counts must be provided if '
'using class_weights=balanced'},
{'testcase_name': 'no num classes',
'using class_weights=balanced'
},
{
'testcase_name': 'no num classes',
'class_weights': 'balanced',
'class_counts': [1],
'num_classes': None,
'err_msg': 'num_classes must be provided if '
'using class_weights=balanced'},
{'testcase_name': 'class counts not array',
'using class_weights=balanced'
},
{
'testcase_name': 'class counts not array',
'class_weights': 'balanced',
'class_counts': 1,
'num_classes': None,
'err_msg': 'class counts must be a 1D array.'},
{'testcase_name': 'class counts array, no num classes',
'err_msg': 'class counts must be a 1D array.'
},
{
'testcase_name': 'class counts array, no num classes',
'class_weights': [1],
'class_counts': None,
'num_classes': None,
'err_msg': 'You must pass a value for num_classes if '
'creating an array of class_weights'},
{'testcase_name': 'class counts array, improper shape',
'creating an array of class_weights'
},
{
'testcase_name': 'class counts array, improper shape',
'class_weights': [[1], [1]],
'class_counts': None,
'num_classes': 2,
'err_msg': 'Detected class_weights shape'},
{'testcase_name': 'class counts array, wrong number classes',
'err_msg': 'Detected class_weights shape'
},
{
'testcase_name': 'class counts array, wrong number classes',
'class_weights': [1, 1, 1],
'class_counts': None,
'num_classes': 2,
'err_msg': 'Detected array length:'},
'err_msg': 'Detected array length:'
},
])
def test_class_errors(self,
class_weights,
class_counts,
num_classes,
def test_class_errors(self, class_weights, class_counts, num_classes,
err_msg):
"""Tests the BOltonModel calculate_class_weights method.
@ -534,9 +525,7 @@ class FitTests(keras_parameterized.TestCase):
"""
clf = models.BoltOnModel(1, 1)
with self.assertRaisesRegexp(ValueError, err_msg): # pylint: disable=deprecated-method
clf.calculate_class_weights(class_weights,
class_counts,
num_classes)
clf.calculate_class_weights(class_weights, class_counts, num_classes)
if __name__ == '__main__':

View file

@ -48,14 +48,12 @@ class GammaBetaDecreasingStep(
'This is performed automatically by using the '
'{1} as a context manager, '
'as desired'.format(self.__class__.__name__,
BoltOn.__class__.__name__
)
)
BoltOn.__class__.__name__))
dtype = self.beta.dtype
one = tf.constant(1, dtype)
return tf.math.minimum(tf.math.reduce_min(one/self.beta),
one/(self.gamma*math_ops.cast(step, dtype))
)
return tf.math.minimum(
tf.math.reduce_min(one / self.beta),
one / (self.gamma * math_ops.cast(step, dtype)))
def get_config(self):
"""Return config to setup the learning rate scheduler."""
@ -108,7 +106,8 @@ class BoltOn(optimizer_v2.OptimizerV2):
Descent-based Analytics by Xi Wu et. al.
"""
def __init__(self, # pylint: disable=super-init-not-called
def __init__(
self, # pylint: disable=super-init-not-called
optimizer,
loss,
dtype=tf.float32,
@ -116,8 +115,7 @@ class BoltOn(optimizer_v2.OptimizerV2):
"""Constructor.
Args:
optimizer: Optimizer_v2 or subclass to be used as the optimizer
(wrapped).
optimizer: Optimizer_v2 or subclass to be used as the optimizer (wrapped).
loss: StrongConvexLoss function that the model is being compiled with.
dtype: dtype
"""
@ -155,8 +153,8 @@ class BoltOn(optimizer_v2.OptimizerV2):
"""Normalize the weights to the R-ball.
Args:
force: True to normalize regardless of previous weight values.
False to check if weights > R-ball and only normalize then.
force: True to normalize regardless of previous weight values. False to
check if weights > R-ball and only normalize then.
Raises:
Exception: If not called from inside this optimizer context.
@ -199,14 +197,14 @@ class BoltOn(optimizer_v2.OptimizerV2):
l2_sensitivity = (2 *
loss.lipchitz_constant(self.class_weights)) / \
(loss.gamma() * self.n_samples * self.batch_size)
unit_vector = tf.random.normal(shape=(input_dim, output_dim),
unit_vector = tf.random.normal(
shape=(input_dim, output_dim),
mean=0,
seed=1,
stddev=1.0,
dtype=self.dtype)
unit_vector = unit_vector / tf.math.sqrt(
tf.reduce_sum(tf.math.square(unit_vector), axis=0)
)
tf.reduce_sum(tf.math.square(unit_vector), axis=0))
beta = l2_sensitivity / per_class_epsilon
alpha = input_dim # input_dim
@ -214,8 +212,7 @@ class BoltOn(optimizer_v2.OptimizerV2):
alpha,
beta=1 / beta,
seed=1,
dtype=self.dtype
)
dtype=self.dtype)
return unit_vector * gamma
raise NotImplementedError('Noise distribution: {0} is not '
'a valid distribution'.format(distribution))
@ -245,10 +242,8 @@ class BoltOn(optimizer_v2.OptimizerV2):
except AttributeError:
raise AttributeError(
"Neither '{0}' nor '{1}' object has attribute '{2}'"
"".format(self.__class__.__name__,
self._internal_optimizer.__class__.__name__,
name)
)
''.format(self.__class__.__name__,
self._internal_optimizer.__class__.__name__, name))
def __setattr__(self, key, value):
"""Set attribute to self instance if its the internal optimizer.
@ -309,20 +304,15 @@ class BoltOn(optimizer_v2.OptimizerV2):
self._is_init = True
return self
def __call__(self,
noise_distribution,
epsilon,
layers,
class_weights,
n_samples,
batch_size):
def __call__(self, noise_distribution, epsilon, layers, class_weights,
n_samples, batch_size):
"""Accepts required values for bolton method from context entry point.
Stores them on the optimizer for use throughout fitting.
Args:
noise_distribution: the noise distribution to pick.
see _accepted_distributions and get_noise for possible values.
noise_distribution: the noise distribution to pick. see
_accepted_distributions and get_noise for possible values.
epsilon: privacy parameter. Lower gives more privacy but less utility.
layers: list of Keras/Tensorflow layers. Can be found as model.layers
class_weights: class_weights used, which may either be a scalar or 1D
@ -341,8 +331,8 @@ class BoltOn(optimizer_v2.OptimizerV2):
'distributions'.format(noise_distribution,
_accepted_distributions))
self.noise_distribution = noise_distribution
self.learning_rate.initialize(self.loss.beta(class_weights),
self.loss.gamma())
self.learning_rate.initialize(
self.loss.beta(class_weights), self.loss.gamma())
self.epsilon = tf.constant(epsilon, dtype=self.dtype)
self.class_weights = tf.constant(class_weights, dtype=self.dtype)
self.n_samples = tf.constant(n_samples, dtype=self.dtype)
@ -369,7 +359,8 @@ class BoltOn(optimizer_v2.OptimizerV2):
for layer in self.layers:
input_dim = layer.kernel.shape[0]
output_dim = layer.units
noise = self.get_noise(input_dim,
noise = self.get_noise(
input_dim,
output_dim,
)
layer.kernel = tf.math.add(layer.kernel, noise)

View file

@ -111,9 +111,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
def call(self, y_true, y_pred):
"""Loss function that is minimized at the mean of the input points."""
return 0.5 * tf.reduce_sum(
tf.math.squared_difference(y_true, y_pred),
axis=1
)
tf.math.squared_difference(y_true, y_pred), axis=1)
def max_class_weight(self, class_weight, dtype=tf.float32):
"""the maximum weighting in class weights (max value) as a scalar tensor.
@ -183,20 +181,24 @@ class TestOptimizer(OptimizerV2):
class BoltonOptimizerTest(keras_parameterized.TestCase):
"""BoltOn Optimizer tests."""
@test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([
{'testcase_name': 'getattr',
{
'testcase_name': 'getattr',
'fn': '__getattr__',
'args': ['dtype'],
'result': tf.float32,
'test_attr': None},
{'testcase_name': 'project_weights_to_r',
'test_attr': None
},
{
'testcase_name': 'project_weights_to_r',
'fn': 'project_weights_to_r',
'args': ['dtype'],
'result': None,
'test_attr': ''},
'test_attr': ''
},
])
def test_fn(self, fn, args, result, test_attr):
"""test that a fn of BoltOn optimizer is working as expected.
@ -204,9 +206,8 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
fn: method of Optimizer to test
args: args to optimizer fn
result: the expected result
test_attr: None if the fn returns the test result. Otherwise, this is
the attribute of BoltOn to check against result with.
test_attr: None if the fn returns the test result. Otherwise, this is the
attribute of BoltOn to check against result with.
"""
tf.random.set_seed(1)
loss = TestLoss(1, 1, 1)
@ -231,30 +232,38 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
@test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([
{'testcase_name': '1 value project to r=1',
{
'testcase_name': '1 value project to r=1',
'r': 1,
'init_value': 2,
'shape': (1,),
'n_out': 1,
'result': [[1]]},
{'testcase_name': '2 value project to r=1',
'result': [[1]]
},
{
'testcase_name': '2 value project to r=1',
'r': 1,
'init_value': 2,
'shape': (2,),
'n_out': 1,
'result': [[0.707107], [0.707107]]},
{'testcase_name': '1 value project to r=2',
'result': [[0.707107], [0.707107]]
},
{
'testcase_name': '1 value project to r=2',
'r': 2,
'init_value': 3,
'shape': (1,),
'n_out': 1,
'result': [[2]]},
{'testcase_name': 'no project',
'result': [[2]]
},
{
'testcase_name': 'no project',
'r': 2,
'init_value': 1,
'shape': (1,),
'n_out': 1,
'result': [[1]]},
'result': [[1]]
},
])
def test_project(self, r, shape, n_out, init_value, result):
"""test that a fn of BoltOn optimizer is working as expected.
@ -267,6 +276,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
result: the expected output after projection.
"""
tf.random.set_seed(1)
def project_fn(r):
loss = TestLoss(1, 1, r)
bolton = opt.BoltOn(TestOptimizer(), loss)
@ -283,15 +293,18 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
bolton.n_samples = 1
bolton.project_weights_to_r()
return _ops.convert_to_tensor_v2(bolton.layers[0].kernel, tf.float32)
res = project_fn(r)
self.assertAllClose(res, result)
@test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([
{'testcase_name': 'normal values',
{
'testcase_name': 'normal values',
'epsilon': 2,
'noise': 'laplace',
'class_weights': 1},
'class_weights': 1
},
])
def test_context_manager(self, noise, epsilon, class_weights):
"""Tests the context manager functionality of the optimizer.
@ -301,6 +314,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
epsilon: epsilon privacy parameter to use
class_weights: class_weights to use
"""
@tf.function
def test_run():
loss = TestLoss(1, 1, 1)
@ -313,18 +327,23 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
with bolton(noise, epsilon, model.layers, class_weights, 1, 1) as _:
pass
return _ops.convert_to_tensor_v2(bolton.epsilon, dtype=tf.float32)
epsilon = test_run()
self.assertEqual(epsilon.numpy(), -1)
@parameterized.named_parameters([
{'testcase_name': 'invalid noise',
{
'testcase_name': 'invalid noise',
'epsilon': 1,
'noise': 'not_valid',
'err_msg': 'Detected noise distribution: not_valid not one of:'},
{'testcase_name': 'invalid epsilon',
'err_msg': 'Detected noise distribution: not_valid not one of:'
},
{
'testcase_name': 'invalid epsilon',
'epsilon': -1,
'noise': 'laplace',
'err_msg': 'Detected epsilon: -1. Valid range is 0 < epsilon <inf'},
'err_msg': 'Detected epsilon: -1. Valid range is 0 < epsilon <inf'
},
])
def test_context_domains(self, noise, epsilon, err_msg):
"""Tests the context domains.
@ -333,7 +352,6 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
noise: noise distribution to pick
epsilon: epsilon privacy parameter to use
err_msg: the expected error message
"""
@tf.function
@ -347,15 +365,18 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
model.n_outputs))
with bolton(noise, epsilon, model.layers, 1, 1, 1) as _:
pass
with self.assertRaisesRegexp(ValueError, err_msg): # pylint: disable=deprecated-method
test_run(noise, epsilon)
@parameterized.named_parameters([
{'testcase_name': 'fn: get_noise',
{
'testcase_name': 'fn: get_noise',
'fn': 'get_noise',
'args': [1, 1],
'err_msg': 'This method must be called from within the '
'optimizer\'s context'},
'optimizer\'s context'
},
])
def test_not_in_context(self, fn, args, err_msg):
"""Tests that the expected functions raise errors when not in context.
@ -365,6 +386,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
args: the arguments for said function
err_msg: expected error message
"""
def test_run(fn, args):
loss = TestLoss(1, 1, 1)
bolton = opt.BoltOn(TestOptimizer(), loss)
@ -379,33 +401,51 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
test_run(fn, args)
@parameterized.named_parameters([
{'testcase_name': 'fn: get_updates',
{
'testcase_name': 'fn: get_updates',
'fn': 'get_updates',
'args': [0, 0]},
{'testcase_name': 'fn: get_config',
'args': [0, 0]
},
{
'testcase_name': 'fn: get_config',
'fn': 'get_config',
'args': []},
{'testcase_name': 'fn: from_config',
'args': []
},
{
'testcase_name': 'fn: from_config',
'fn': 'from_config',
'args': [0]},
{'testcase_name': 'fn: _resource_apply_dense',
'args': [0]
},
{
'testcase_name': 'fn: _resource_apply_dense',
'fn': '_resource_apply_dense',
'args': [1, 1]},
{'testcase_name': 'fn: _resource_apply_sparse',
'args': [1, 1]
},
{
'testcase_name': 'fn: _resource_apply_sparse',
'fn': '_resource_apply_sparse',
'args': [1, 1, 1]},
{'testcase_name': 'fn: apply_gradients',
'args': [1, 1, 1]
},
{
'testcase_name': 'fn: apply_gradients',
'fn': 'apply_gradients',
'args': [1]},
{'testcase_name': 'fn: minimize',
'args': [1]
},
{
'testcase_name': 'fn: minimize',
'fn': 'minimize',
'args': [1, 1]},
{'testcase_name': 'fn: _compute_gradients',
'args': [1, 1]
},
{
'testcase_name': 'fn: _compute_gradients',
'fn': '_compute_gradients',
'args': [1, 1]},
{'testcase_name': 'fn: get_gradients',
'args': [1, 1]
},
{
'testcase_name': 'fn: get_gradients',
'fn': 'get_gradients',
'args': [1, 1]},
'args': [1, 1]
},
])
def test_rerouted_function(self, fn, args):
"""Tests rerouted function.
@ -435,18 +475,19 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
bolton.noise_distribution = 'laplace'
bolton.n_outputs = 1
bolton.n_samples = 1
self.assertEqual(
getattr(bolton, fn, lambda: 'fn not found')(*args),
'test'
)
self.assertEqual(getattr(bolton, fn, lambda: 'fn not found')(*args), 'test')
@parameterized.named_parameters([
{'testcase_name': 'fn: project_weights_to_r',
{
'testcase_name': 'fn: project_weights_to_r',
'fn': 'project_weights_to_r',
'args': []},
{'testcase_name': 'fn: get_noise',
'args': []
},
{
'testcase_name': 'fn: get_noise',
'fn': 'get_noise',
'args': [1, 1]},
'args': [1, 1]
},
])
def test_not_reroute_fn(self, fn, args):
"""Test function is not rerouted.
@ -458,6 +499,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
fn: fn to test
args: arguments to that fn
"""
def test_run(fn, args):
loss = TestLoss(1, 1, 1)
bolton = opt.BoltOn(TestOptimizer(), loss)
@ -480,12 +522,13 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
else:
res = 0
return _ops.convert_to_tensor_v2(res, dtype=tf.float32)
self.assertNotEqual(test_run(fn, args), 0)
@parameterized.named_parameters([
{'testcase_name': 'attr: _iterations',
'attr': '_iterations'}
])
@parameterized.named_parameters([{
'testcase_name': 'attr: _iterations',
'attr': '_iterations'
}])
def test_reroute_attr(self, attr):
"""Test a function is rerouted.
@ -498,13 +541,13 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
loss = TestLoss(1, 1, 1)
internal_optimizer = TestOptimizer()
optimizer = opt.BoltOn(internal_optimizer, loss)
self.assertEqual(getattr(optimizer, attr),
getattr(internal_optimizer, attr))
self.assertEqual(
getattr(optimizer, attr), getattr(internal_optimizer, attr))
@parameterized.named_parameters([
{'testcase_name': 'attr does not exist',
'attr': '_not_valid'}
])
@parameterized.named_parameters([{
'testcase_name': 'attr does not exist',
'attr': '_not_valid'
}])
def test_attribute_error(self, attr):
"""Test rerouting of attributes.
@ -524,12 +567,11 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
class SchedulerTest(keras_parameterized.TestCase):
"""GammaBeta Scheduler tests."""
@parameterized.named_parameters([
{'testcase_name': 'not in context',
@parameterized.named_parameters([{
'testcase_name': 'not in context',
'err_msg': 'Please initialize the GammaBetaDecreasingStep Learning Rate'
' Scheduler'
}
])
}])
def test_bad_call(self, err_msg):
"""Test attribute of internal opt correctly rerouted to the internal opt.
@ -541,15 +583,21 @@ class SchedulerTest(keras_parameterized.TestCase):
scheduler(1)
@parameterized.named_parameters([
{'testcase_name': 'step 1',
{
'testcase_name': 'step 1',
'step': 1,
'res': 0.5},
{'testcase_name': 'step 2',
'res': 0.5
},
{
'testcase_name': 'step 2',
'step': 2,
'res': 0.5},
{'testcase_name': 'step 3',
'res': 0.5
},
{
'testcase_name': 'step 3',
'step': 3,
'res': 0.333333333},
'res': 0.333333333
},
])
def test_call(self, step, res):
"""Test call.

View file

@ -13,10 +13,8 @@
# limitations under the License.
from absl.testing import parameterized
import numpy as np
import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.dp_query import quantile_estimator_query
from tensorflow_privacy.privacy.dp_query import test_utils
@ -44,10 +42,7 @@ def _make_quantile_estimator_query(initial_estimate,
raise ValueError(
'Cannot set expected_num_records to None for tree aggregation.')
return quantile_estimator_query.NoPrivacyQuantileEstimatorQuery(
initial_estimate,
target_quantile,
learning_rate,
geometric_update)
initial_estimate, target_quantile, learning_rate, geometric_update)
class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
@ -201,8 +196,10 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
# 100 records equally spaced from 0 to 10 in 0.1 increments.
# Test that we converge to the correct median value and bounce around it.
num_records = 21
records = [tf.constant(x) for x in np.linspace(
0.0, 10.0, num=num_records, dtype=np.float32)]
records = [
tf.constant(x)
for x in np.linspace(0.0, 10.0, num=num_records, dtype=np.float32)
]
query = _make_quantile_estimator_query(
initial_estimate=(1.0 if start_low else 10.0),
@ -267,9 +264,7 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
def test_raises_with_non_scalar_record(self):
query = quantile_estimator_query.NoPrivacyQuantileEstimatorQuery(
initial_estimate=1.0,
target_quantile=0.5,
learning_rate=1.0)
initial_estimate=1.0, target_quantile=0.5, learning_rate=1.0)
with self.assertRaisesRegex(ValueError, 'scalar'):
query.accumulate_record(None, None, [1.0, 2.0])

View file

@ -28,7 +28,6 @@ from typing import Any, Callable, Collection, Optional, Tuple, Union
import attr
import tensorflow as tf
# TODO(b/192464750): find a proper place for the helper functions, privatize
# the tree aggregation logic, and encourage users to use the DPQuery API.

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for `tree_aggregation_query`."""
from absl.testing import parameterized
import numpy as np
@ -212,11 +211,11 @@ class TreeCumulativeSumQueryTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
('two_records_noise_fn', [2.71828, 3.14159], _get_noise_fn),
('five_records_noise_fn', np.random.uniform(low=0.1, size=5).tolist(),
_get_noise_fn),
('five_records_noise_fn', np.random.uniform(
low=0.1, size=5).tolist(), _get_noise_fn),
('two_records_generator', [2.71828, 3.14159], _get_noise_generator),
('five_records_generator', np.random.uniform(low=0.1, size=5).tolist(),
_get_noise_generator),
('five_records_generator', np.random.uniform(
low=0.1, size=5).tolist(), _get_noise_generator),
)
def test_noisy_cumsum_and_state_update(self, records, value_generator):
num_trials, vector_size = 10, 100

View file

@ -63,5 +63,6 @@ class DPDNNClassifierTest(tf.test.TestCase, parameterized.TestCase):
input_fn=test_utils.make_input_fn(predict_features, predict_labels,
False))
if __name__ == '__main__':
tf.test.main()

View file

@ -34,6 +34,7 @@ def make_input_data(size, classes):
np.power(feature_a, 3) + np.power(feature_b, 2) +
np.power(feature_c, 1) + noise > 125).astype(int)
else:
def label_fn(x):
if x < 110.0:
return 0
@ -42,7 +43,8 @@ def make_input_data(size, classes):
else:
return 2
labels = list(map(
labels = list(
map(
label_fn,
np.power(feature_a, 3) + np.power(feature_b, 2) +
np.power(feature_c, 1) + noise))
@ -87,6 +89,7 @@ def make_input_fn(features, labels, training, batch_size=16):
dataset = dataset.shuffle(1000)
return dataset.batch(batch_size)
return input_fn

View file

@ -64,5 +64,6 @@ class DPDNNClassifierTest(tf.test.TestCase, parameterized.TestCase):
input_fn=test_utils.make_input_fn(predict_features, predict_labels,
False))
if __name__ == '__main__':
tf.test.main()

View file

@ -434,8 +434,8 @@ def _binary_logistic_or_multi_class_head(n_classes, weight_column,
encoded as integer or float within [0, 1] for `n_classes=2` and encoded as
integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there
will be errors if vocabulary is not provided and labels are string.
loss_reduction: Describes how to reduce training loss over batch.
Defaults to `SUM`.
loss_reduction: Describes how to reduce training loss over batch. Defaults
to `SUM`.
Returns:
`head._Head` instance.

View file

@ -53,7 +53,8 @@ def make_dp_model_class(cls):
model.fit(train_data, train_labels, epochs=1, batch_size=32)
```
""").format(base_model='tf.keras.' + cls.__name__,
""").format(
base_model='tf.keras.' + cls.__name__,
short_base_model=cls.__name__,
dp_model_class='DP' + cls.__name__)

View file

@ -40,8 +40,8 @@ class RegressionDataset:
"""Class for storing labeled examples for a regression dataset.
Attributes:
points: array of shape (num_examples, dimension) containing the points to
be classified.
points: array of shape (num_examples, dimension) containing the points to be
classified.
labels: array of shape (num_examples,) containing the corresponding labels,
each belonging to the set {0,1,...,num_classes-1}, where num_classes is
the number of classes.

View file

@ -11,9 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for tensorflow_privacy.privacy.logistic_regression.datasets."""
import unittest
from absl.testing import parameterized
import numpy as np
from tensorflow_privacy.privacy.logistic_regression import datasets
@ -22,14 +22,16 @@ from tensorflow_privacy.privacy.logistic_regression import datasets
class DatasetsTest(parameterized.TestCase):
@parameterized.parameters(
(1, np.array([[1],])),
(2, np.array([[1],])),
(5, np.array([[-1, 1], [1, -1]])),
(1, np.array([
[1],
])), (2, np.array([
[1],
])), (5, np.array([[-1, 1], [1, -1]])),
(15, np.array([[-1, 1.5, 2.1], [1.3, -3.3, -7.1], [1.3, -3.3, -7.1]])))
def test_linearly_separable_labeled_examples(self, num_examples, weights):
dimension, num_classes = weights.shape
dataset = datasets.linearly_separable_labeled_examples(num_examples,
weights)
dataset = datasets.linearly_separable_labeled_examples(
num_examples, weights)
self.assertEqual(dataset.points.shape, (num_examples, dimension))
self.assertEqual(dataset.labels.shape, (num_examples,))
product = np.matmul(dataset.points, weights)
@ -37,10 +39,7 @@ class DatasetsTest(parameterized.TestCase):
for j in range(num_classes):
self.assertGreaterEqual(product[i, dataset.labels[i]], product[i, j])
@parameterized.parameters(
(1, 1, 1, 2),
(20, 5, 1, 2),
(20, 5, 2, 2),
@parameterized.parameters((1, 1, 1, 2), (20, 5, 1, 2), (20, 5, 2, 2),
(1000, 10, 15, 10))
def test_synthetic(self, num_train, num_test, dimension, num_classes):
(train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data(
@ -73,5 +72,6 @@ class DatasetsTest(parameterized.TestCase):
self.assertTrue(np.all(np.isin(train_dataset.labels, range(10))))
self.assertTrue(np.all(np.isin(test_dataset.labels, range(10))))
if __name__ == '__main__':
unittest.main()

View file

@ -52,7 +52,9 @@ class KiferRegularizer(tf.keras.regularizers.Regularizer):
(self._l2_regularizer,
variance) = self.logistic_objective_perturbation_parameters(
num_train, epsilon, delta, num_classes, input_clipping_norm)
self._b = tf.random.normal(shape=[dimension, num_classes], mean=0.0,
self._b = tf.random.normal(
shape=[dimension, num_classes],
mean=0.0,
stddev=math.sqrt(variance),
dtype=tf.dtypes.float32)
@ -61,8 +63,11 @@ class KiferRegularizer(tf.keras.regularizers.Regularizer):
(1 / self._num_train) * tf.reduce_sum(tf.multiply(x, self._b)))
def get_config(self):
return {'l2_regularizer': self._l2_regularizer,
'num_train': self._num_train, 'b': self._b}
return {
'l2_regularizer': self._l2_regularizer,
'num_train': self._num_train,
'b': self._b
}
def logistic_objective_perturbation_parameters(
self, num_train: int, epsilon: float, delta: float, num_classes: int,
@ -85,18 +90,20 @@ class KiferRegularizer(tf.keras.regularizers.Regularizer):
# zeta is an upper bound on the l2-norm of the loss function gradient.
zeta = input_clipping_norm
# variance is based on line 5 from Algorithm 1 of Kifer et al. (page 6):
variance = zeta*zeta*(8*np.log(2/delta)+4*epsilon)/(epsilon*epsilon)
variance = zeta * zeta * (8 * np.log(2 / delta) + 4 * epsilon) / (
epsilon * epsilon)
# lambda_coefficient is an upper bound on the spectral norm of the Hessian
# of the loss function.
lambda_coefficient = math.sqrt(2*num_classes)*(input_clipping_norm**2)/4
lambda_coefficient = math.sqrt(2 * num_classes) * (input_clipping_norm**
2) / 4
l2_regularizer = lambda_coefficient / (epsilon * num_train)
return (l2_regularizer, variance)
def logistic_objective_perturbation(train_dataset: datasets.RegressionDataset,
test_dataset: datasets.RegressionDataset,
epsilon: float, delta: float,
epochs: int, num_classes: int,
epsilon: float, delta: float, epochs: int,
num_classes: int,
input_clipping_norm: float) -> List[float]:
"""Trains and validates differentially private logistic regression model.
@ -127,13 +134,21 @@ def logistic_objective_perturbation(train_dataset: datasets.RegressionDataset,
kernel_regularizer = KiferRegularizer(num_train, dimension, epsilon, delta,
num_classes, input_clipping_norm)
return single_layer_softmax.single_layer_softmax_classifier(
train_dataset, test_dataset, epochs, num_classes, optimizer, loss,
train_dataset,
test_dataset,
epochs,
num_classes,
optimizer,
loss,
kernel_regularizer=kernel_regularizer)
def compute_dpsgd_noise_multiplier(
num_train: int, epsilon: float, delta: float, epochs: int,
batch_size: int, tolerance: float = 1e-2) -> Optional[float]:
def compute_dpsgd_noise_multiplier(num_train: int,
epsilon: float,
delta: float,
epochs: int,
batch_size: int,
tolerance: float = 1e-2) -> Optional[float]:
"""Computes the noise multiplier for DP-SGD given privacy parameters.
The algorithm performs binary search on the values of epsilon.
@ -153,20 +168,17 @@ def compute_dpsgd_noise_multiplier(
the given tolerance) for which using DPKerasAdamOptimizer will result in an
(epsilon, delta)-differentially private trained model.
"""
search_parameters = common.BinarySearchParameters(lower_bound=0,
upper_bound=math.inf,
initial_guess=1,
tolerance=tolerance)
search_parameters = common.BinarySearchParameters(
lower_bound=0, upper_bound=math.inf, initial_guess=1, tolerance=tolerance)
return common.inverse_monotone_function(
lambda x: compute_epsilon(num_train, batch_size, x, epochs, delta)[0],
epsilon, search_parameters)
def logistic_dpsgd(train_dataset: datasets.RegressionDataset,
test_dataset: datasets.RegressionDataset,
epsilon: float, delta: float, epochs: int, num_classes: int,
batch_size: int, num_microbatches: int,
clipping_norm: float)-> List[float]:
test_dataset: datasets.RegressionDataset, epsilon: float,
delta: float, epochs: int, num_classes: int, batch_size: int,
num_microbatches: int, clipping_norm: float) -> List[float]:
"""Trains and validates private logistic regression model via DP-SGD.
The training is based on the differentially private stochasstic gradient
@ -183,8 +195,8 @@ def logistic_dpsgd(train_dataset: datasets.RegressionDataset,
num_classes: number of classes.
batch_size: the number of examples in each batch of gradient descent.
num_microbatches: the number of microbatches in gradient descent.
clipping_norm: the gradients will be normalized by DPKerasAdamOptimizer
to have l2-norm at most clipping_norm.
clipping_norm: the gradients will be normalized by DPKerasAdamOptimizer to
have l2-norm at most clipping_norm.
Returns:
List of test accuracies (one for each epoch) on test_dataset of model
@ -199,7 +211,8 @@ def logistic_dpsgd(train_dataset: datasets.RegressionDataset,
noise_multiplier = compute_dpsgd_noise_multiplier(num_train, epsilon, delta,
epochs, batch_size)
optimizer = dp_optimizer_keras.DPKerasAdamOptimizer(
l2_norm_clip=clipping_norm, noise_multiplier=noise_multiplier,
l2_norm_clip=clipping_norm,
noise_multiplier=noise_multiplier,
num_microbatches=num_microbatches)
loss = tf.keras.losses.CategoricalCrossentropy(
reduction=tf.losses.Reduction.NONE)

View file

@ -62,18 +62,21 @@ class MultinomialLogisticRegressionTest(parameterized.TestCase):
(5000, 500, 3, 2, 1e-4, 40, 4, 0.1, 10, 10, 1),
(5000, 500, 4, 2, 1e-4, 40, 4, 0.1, 10, 10, 1),
)
def test_logistic_dpsgd(self, num_train, num_test, dimension, epsilon,
delta, epochs, num_classes, tolerance,
batch_size, num_microbatches, clipping_norm):
def test_logistic_dpsgd(self, num_train, num_test, dimension, epsilon, delta,
epochs, num_classes, tolerance, batch_size,
num_microbatches, clipping_norm):
(train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data(
num_train, num_test, dimension, num_classes)
accuracy = multinomial_logistic.logistic_dpsgd(
train_dataset, test_dataset, epsilon, delta, epochs, num_classes,
batch_size, num_microbatches, clipping_norm)
accuracy = multinomial_logistic.logistic_dpsgd(train_dataset, test_dataset,
epsilon, delta, epochs,
num_classes, batch_size,
num_microbatches,
clipping_norm)
# Since the synthetic data is linearly separable, we expect the test
# accuracy to come arbitrarily close to 1 as the number of training examples
# grows.
self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance)
if __name__ == '__main__':
unittest.main()

View file

@ -11,8 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of a single-layer softmax classifier.
"""
"""Implementation of a single-layer softmax classifier."""
from typing import List
import tensorflow as tf
@ -22,10 +21,13 @@ from tensorflow_privacy.privacy.logistic_regression import datasets
def single_layer_softmax_classifier(
train_dataset: datasets.RegressionDataset,
test_dataset: datasets.RegressionDataset,
epochs: int, num_classes: int, optimizer: tf.keras.optimizers.Optimizer,
epochs: int,
num_classes: int,
optimizer: tf.keras.optimizers.Optimizer,
loss: tf.keras.losses.Loss = 'categorical_crossentropy',
batch_size: int = 32,
kernel_regularizer: tf.keras.regularizers.Regularizer = None)-> List[float]:
kernel_regularizer: tf.keras.regularizers.Regularizer = None
) -> List[float]:
"""Trains a single layer neural network classifier with softmax activation.
Args:
@ -47,13 +49,17 @@ def single_layer_softmax_classifier(
one_hot_train_labels = tf.one_hot(train_dataset.labels, num_classes)
one_hot_test_labels = tf.one_hot(test_dataset.labels, num_classes)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(units=num_classes,
model.add(
tf.keras.layers.Dense(
units=num_classes,
activation='softmax',
kernel_regularizer=kernel_regularizer))
model.compile(optimizer, loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset.points, one_hot_train_labels,
batch_size=batch_size, epochs=epochs,
validation_data=(test_dataset.points,
one_hot_test_labels),
history = model.fit(
train_dataset.points,
one_hot_train_labels,
batch_size=batch_size,
epochs=epochs,
validation_data=(test_dataset.points, one_hot_test_labels),
verbose=0)
return history.history['val_accuracy']

View file

@ -11,9 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for tensorflow_privacy.privacy.logistic_regression.single_layer_softmax."""
import unittest
from absl.testing import parameterized
from tensorflow_privacy.privacy.logistic_regression import datasets
from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax
@ -35,5 +35,6 @@ class SingleLayerSoftmaxTest(parameterized.TestCase):
train_dataset, test_dataset, epochs, num_classes, 'sgd')
self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance)
if __name__ == '__main__':
unittest.main()

View file

@ -138,12 +138,12 @@ def make_keras_optimizer_class(cls):
l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients).
noise_multiplier: Ratio of the standard deviation to the clipping norm.
num_microbatches: Number of microbatches into which each minibatch is
split. Default is `None` which means that number of microbatches
is equal to batch size (i.e. each microbatch contains exactly one
split. Default is `None` which means that number of microbatches is
equal to batch size (i.e. each microbatch contains exactly one
example). If `gradient_accumulation_steps` is greater than 1 and
`num_microbatches` is not `None` then the effective number of
microbatches is equal to
`num_microbatches * gradient_accumulation_steps`.
microbatches is equal to `num_microbatches *
gradient_accumulation_steps`.
gradient_accumulation_steps: If greater than 1 then optimizer will be
accumulating gradients for this number of optimizer steps before
applying them to update model weights. If this argument is set to 1
@ -172,11 +172,9 @@ def make_keras_optimizer_class(cls):
if self.gradient_accumulation_steps > 1:
apply_update = tf.math.equal(
tf.math.floormod(self.iterations + 1,
self.gradient_accumulation_steps),
0)
self.gradient_accumulation_steps), 0)
grad_scaler = tf.cast(1. / self.gradient_accumulation_steps, var_dtype)
apply_state[(var_device, var_dtype)].update(
{
apply_state[(var_device, var_dtype)].update({
'apply_update': apply_update,
'grad_scaler': grad_scaler
})
@ -184,27 +182,29 @@ def make_keras_optimizer_class(cls):
def _resource_apply_dense(self, grad, var, apply_state=None):
if self.gradient_accumulation_steps > 1:
var_device, var_dtype = var.device, var.dtype.base_dtype
coefficients = ((apply_state or {}).get((var_device, var_dtype))
or self._fallback_apply_state(var_device, var_dtype))
coefficients = ((apply_state or {}).get((var_device, var_dtype)) or
self._fallback_apply_state(var_device, var_dtype))
grad_acc = self.get_slot(var, 'grad_acc')
def _update_grad():
apply_grad_op = super(DPOptimizerClass, self)._resource_apply_dense(
grad_acc + grad * coefficients['grad_scaler'], var, apply_state)
with tf.control_dependencies([apply_grad_op]):
return grad_acc.assign(tf.zeros_like(grad_acc),
return grad_acc.assign(
tf.zeros_like(grad_acc),
use_locking=self._use_locking,
read_value=False)
def _accumulate():
return grad_acc.assign_add(grad * coefficients['grad_scaler'],
return grad_acc.assign_add(
grad * coefficients['grad_scaler'],
use_locking=self._use_locking,
read_value=False)
return tf.cond(coefficients['apply_update'], _update_grad, _accumulate)
else:
return super(DPOptimizerClass, self)._resource_apply_dense(
grad, var, apply_state)
return super(DPOptimizerClass,
self)._resource_apply_dense(grad, var, apply_state)
def _resource_apply_sparse_duplicate_indices(self, *args, **kwargs):
if self.gradient_accumulation_steps > 1:
@ -220,8 +220,8 @@ def make_keras_optimizer_class(cls):
raise NotImplementedError(
'Sparse gradients are not supported with large batch emulation.')
else:
return super(DPOptimizerClass, self)._resource_apply_sparse(
*args, **kwargs)
return super(DPOptimizerClass,
self)._resource_apply_sparse(*args, **kwargs)
def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None):
"""DP-SGD version of base class method."""

View file

@ -15,7 +15,6 @@
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras
from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras_vectorized
@ -108,8 +107,7 @@ class DPOptimizerComputeGradientsTest(tf.test.TestCase, parameterized.TestCase):
with tape:
loss = self._loss(data0, var0) + self._loss(data1, var1)
grads_and_vars = opt._compute_gradients(
loss, [var0, var1], tape=tape)
grads_and_vars = opt._compute_gradients(loss, [var0, var1], tape=tape)
self.assertAllCloseAccordingToType(expected_grad0, grads_and_vars[0][0])
self.assertAllCloseAccordingToType(expected_grad1, grads_and_vars[1][0])
@ -442,10 +440,9 @@ class DPOptimizerGetGradientsTest(tf.test.TestCase, parameterized.TestCase):
('DPKerasSGDOptimizer 1', dp_optimizer_keras.DPKerasSGDOptimizer, 1),
('DPKerasSGDOptimizer 2', dp_optimizer_keras.DPKerasSGDOptimizer, 2),
('DPKerasSGDOptimizer 4', dp_optimizer_keras.DPKerasSGDOptimizer, 4),
('DPKerasAdamOptimizer 2',
dp_optimizer_keras.DPKerasAdamOptimizer, 1),
('DPKerasAdagradOptimizer 2',
dp_optimizer_keras.DPKerasAdagradOptimizer, 2),
('DPKerasAdamOptimizer 2', dp_optimizer_keras.DPKerasAdamOptimizer, 1),
('DPKerasAdagradOptimizer 2', dp_optimizer_keras.DPKerasAdagradOptimizer,
2),
)
def testLargeBatchEmulation(self, cls, gradient_accumulation_steps):
# Tests various optimizers with large batch emulation.

View file

@ -95,7 +95,8 @@ def make_vectorized_keras_optimizer_class(cls):
model.fit(...)
```
""".format(base_class='tf.keras.optimizers.' + cls.__name__,
""".format(
base_class='tf.keras.optimizers.' + cls.__name__,
dp_keras_class='DPKeras' + cls.__name__,
short_base_class=cls.__name__,
dp_vectorized_keras_class='VectorizedDPKeras' + cls.__name__)
@ -112,8 +113,8 @@ def make_vectorized_keras_optimizer_class(cls):
Args:
l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients).
noise_multiplier: Ratio of the standard deviation to the clipping norm.
num_microbatches: Number of microbatches into which each minibatch
is split.
num_microbatches: Number of microbatches into which each minibatch is
split.
*args: These will be passed on to the base class `__init__` method.
**kwargs: These will be passed on to the base class `__init__` method.
"""

View file

@ -18,7 +18,6 @@ import unittest
from absl.testing import parameterized
import numpy as np
import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.dp_query import gaussian_query
from tensorflow_privacy.privacy.optimizers import dp_optimizer
@ -30,11 +29,12 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
return 0.5 * tf.reduce_sum(
input_tensor=tf.math.squared_difference(val0, val1), axis=1)
def _compute_expected_gradients(self, per_example_gradients,
l2_norm_clip, num_microbatches):
def _compute_expected_gradients(self, per_example_gradients, l2_norm_clip,
num_microbatches):
batch_size, num_vars = per_example_gradients.shape
microbatch_gradients = np.mean(
np.reshape(per_example_gradients,
np.reshape(
per_example_gradients,
[num_microbatches,
np.int(batch_size / num_microbatches), num_vars]),
axis=1)
@ -124,8 +124,8 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
l2_norm_clip = 1.0
dp_sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip, 0.0)
opt = cls(dp_sum_query, num_microbatches=num_microbatches,
learning_rate=2.0)
opt = cls(
dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0)
self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values

View file

@ -134,19 +134,15 @@ def make_vectorized_optimizer_class(cls):
if var_list is None:
var_list = (
tf.trainable_variables() + tf.get_collection(
tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
tf.trainable_variables() +
tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
def process_microbatch(microbatch_loss):
"""Compute clipped grads for one microbatch."""
microbatch_loss = tf.reduce_mean(input_tensor=microbatch_loss)
grads, _ = zip(*super(DPOptimizerClass, self).compute_gradients(
microbatch_loss,
var_list,
gate_gradients,
aggregation_method,
colocate_gradients_with_ops,
grad_loss))
microbatch_loss, var_list, gate_gradients, aggregation_method,
colocate_gradients_with_ops, grad_loss))
grads_list = [
g if g is not None else tf.zeros_like(v)
for (g, v) in zip(list(grads), var_list)

View file

@ -17,7 +17,6 @@ import unittest
from absl.testing import parameterized
import numpy as np
import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.optimizers import dp_optimizer_vectorized
from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagrad
from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdam
@ -63,8 +62,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
grads_and_vars = sess.run(gradient_op)
self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0])
@parameterized.named_parameters(
('DPGradientDescent', VectorizedDPSGD),
@parameterized.named_parameters(('DPGradientDescent', VectorizedDPSGD),
('DPAdagrad', VectorizedDPAdagrad),
('DPAdam', VectorizedDPAdam))
def testClippingNorm(self, cls):
@ -72,7 +70,8 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0, 0.0])
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
opt = cls(l2_norm_clip=1.0,
opt = cls(
l2_norm_clip=1.0,
noise_multiplier=0.,
num_microbatches=1,
learning_rate=2.0)
@ -86,8 +85,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
grads_and_vars = sess.run(gradient_op)
self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
@parameterized.named_parameters(
('DPGradientDescent', VectorizedDPSGD),
@parameterized.named_parameters(('DPGradientDescent', VectorizedDPSGD),
('DPAdagrad', VectorizedDPAdagrad),
('DPAdam', VectorizedDPAdam))
def testNoiseMultiplier(self, cls):
@ -95,7 +93,8 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0])
data0 = tf.Variable([[0.0]])
opt = cls(l2_norm_clip=4.0,
opt = cls(
l2_norm_clip=4.0,
noise_multiplier=8.0,
num_microbatches=1,
learning_rate=2.0)
@ -168,8 +167,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
true_weights,
atol=1.0)
@parameterized.named_parameters(
('DPGradientDescent', VectorizedDPSGD),
@parameterized.named_parameters(('DPGradientDescent', VectorizedDPSGD),
('DPAdagrad', VectorizedDPAdagrad),
('DPAdam', VectorizedDPAdam))
def testDPGaussianOptimizerClass(self, cls):

View file

@ -217,5 +217,6 @@ def main(unused_argv):
# For saving a figure into a file:
# plotting.save_plot(figure, <file_path>)
if __name__ == "__main__":
app.run(main)

View file

@ -482,8 +482,8 @@ class SingleAttackResult:
return '\n'.join([
'SingleAttackResult(',
' SliceSpec: %s' % str(self.slice_spec),
' DataSize: (ntrain=%d, ntest=%d)' % (self.data_size.ntrain,
self.data_size.ntest),
' DataSize: (ntrain=%d, ntest=%d)' %
(self.data_size.ntrain, self.data_size.ntest),
' AttackType: %s' % str(self.attack_type),
' AUC: %.2f' % self.get_auc(),
' Attacker advantage: %.2f' % self.get_attacker_advantage(), ')'
@ -684,10 +684,8 @@ class AttackResults:
summary.append('Best-performing attacks over all slices')
summary.append(
' %s (with %d training and %d test examples) achieved an AUC of %.2f on slice %s'
% (max_auc_result_all.attack_type,
max_auc_result_all.data_size.ntrain,
max_auc_result_all.data_size.ntest,
max_auc_result_all.get_auc(),
% (max_auc_result_all.attack_type, max_auc_result_all.data_size.ntrain,
max_auc_result_all.data_size.ntest, max_auc_result_all.get_auc(),
max_auc_result_all.slice_spec))
max_advantage_result_all = self.get_result_with_max_attacker_advantage()
@ -709,10 +707,8 @@ class AttackResults:
max_auc_result = results.get_result_with_max_auc()
summary.append(
' %s (with %d training and %d test examples) achieved an AUC of %.2f'
% (max_auc_result.attack_type,
max_auc_result.data_size.ntrain,
max_auc_result.data_size.ntest,
max_auc_result.get_auc()))
% (max_auc_result.attack_type, max_auc_result.data_size.ntrain,
max_auc_result.data_size.ntest, max_auc_result.get_auc()))
max_advantage_result = results.get_result_with_max_attacker_advantage()
summary.append(
' %s (with %d training and %d test examples) achieved an advantage of %.2f'
@ -816,6 +812,8 @@ def get_flattened_attack_metrics(results: AttackResults):
types += [str(attack_result.attack_type)] * 2
slices += [str(attack_result.slice_spec)] * 2
attack_metrics += ['adv', 'auc']
values += [float(attack_result.get_attacker_advantage()),
float(attack_result.get_auc())]
values += [
float(attack_result.get_attacker_advantage()),
float(attack_result.get_auc())
]
return types, slices, attack_metrics, values

View file

@ -54,7 +54,8 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback):
def __init__(
self,
in_train, out_train,
in_train,
out_train,
slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,),
tensorboard_dir=None,
@ -96,19 +97,18 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs=None):
results = run_attack_on_keras_model(
self.model,
(self._in_train_data, self._in_train_labels),
(self._out_train_data, self._out_train_labels),
self._slicing_spec,
self._attack_types,
self._is_logit, self._batch_size)
self.model, (self._in_train_data, self._in_train_labels),
(self._out_train_data, self._out_train_labels), self._slicing_spec,
self._attack_types, self._is_logit, self._batch_size)
logging.info(results)
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
results)
print('Attack result:')
print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in
zip(att_types, att_slices, att_metrics, att_values)]))
print('\n'.join([
' %s: %.4f' % (', '.join([s, t, m]), v)
for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
]))
# Write to tensorboard if tensorboard_dir is specified
if self._writers is not None:
@ -117,7 +117,9 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback):
def run_attack_on_keras_model(
model, in_train, out_train,
model,
in_train,
out_train,
slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,),
is_logit: bool = False,
@ -132,6 +134,7 @@ def run_attack_on_keras_model(
attack_types: a list of attacks, each of type AttackType
is_logit: whether the result of model.predict is logit or probability
batch_size: the batch size for model.predict
Returns:
Results of the attack
"""
@ -139,16 +142,19 @@ def run_attack_on_keras_model(
out_train_data, out_train_labels = out_train
# Compute predictions and losses
in_train_pred, in_train_loss = calculate_losses(
model, in_train_data, in_train_labels, is_logit, batch_size)
out_train_pred, out_train_loss = calculate_losses(
model, out_train_data, out_train_labels, is_logit, batch_size)
in_train_pred, in_train_loss = calculate_losses(model, in_train_data,
in_train_labels, is_logit,
batch_size)
out_train_pred, out_train_loss = calculate_losses(model, out_train_data,
out_train_labels, is_logit,
batch_size)
attack_input = AttackInputData(
logits_train=in_train_pred, logits_test=out_train_pred,
labels_train=in_train_labels, labels_test=out_train_labels,
loss_train=in_train_loss, loss_test=out_train_loss
)
results = mia.run_attacks(attack_input,
slicing_spec=slicing_spec,
attack_types=attack_types)
logits_train=in_train_pred,
logits_test=out_train_pred,
labels_train=in_train_labels,
labels_test=out_train_labels,
loss_train=in_train_loss,
loss_test=out_train_loss)
results = mia.run_attacks(
attack_input, slicing_spec=slicing_spec, attack_types=attack_types)
return results

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""An example for using keras_evaluation."""
from absl import app
@ -25,13 +24,13 @@ from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_s
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation import MembershipInferenceCallback
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation import run_attack_on_keras_model
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.02, 'Learning rate for training')
flags.DEFINE_integer('batch_size', 250, 'Batch size')
flags.DEFINE_integer('epochs', 100, 'Number of epochs')
flags.DEFINE_string('model_dir', None, 'Model directory.')
flags.DEFINE_bool('tensorboard_merge_classifiers', False, 'If true, plot '
flags.DEFINE_bool(
'tensorboard_merge_classifiers', False, 'If true, plot '
'different classifiers with the same slicing_spec and metric '
'in the same figure.')
@ -76,14 +75,15 @@ def main(unused_argv):
# Get callback for membership inference attack.
mia_callback = MembershipInferenceCallback(
(x_train, y_train),
(x_test, y_test),
(x_train, y_train), (x_test, y_test),
slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
attack_types=[AttackType.THRESHOLD_ATTACK,
AttackType.K_NEAREST_NEIGHBORS],
attack_types=[
AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
],
tensorboard_dir=FLAGS.model_dir,
tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers,
is_logit=True, batch_size=2048)
is_logit=True,
batch_size=2048)
# Train model with Keras
model.fit(
@ -102,11 +102,14 @@ def main(unused_argv):
attack_types=[
AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
],
is_logit=True, batch_size=2048)
is_logit=True,
batch_size=2048)
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
attack_results)
print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in
zip(att_types, att_slices, att_metrics, att_values)]))
print('\n'.join([
' %s: %.4f' % (', '.join([s, t, m]), v)
for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
]))
if __name__ == '__main__':

View file

@ -13,10 +13,8 @@
# limitations under the License.
from absl.testing import absltest
import numpy as np
import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import keras_evaluation
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType
@ -59,8 +57,7 @@ class UtilsTest(absltest.TestCase):
def test_run_attack_on_keras_model(self):
"""Test the attack."""
results = keras_evaluation.run_attack_on_keras_model(
self.model,
(self.train_data, self.train_labels),
self.model, (self.train_data, self.train_labels),
(self.test_data, self.test_labels),
attack_types=[AttackType.THRESHOLD_ATTACK])
self.assertIsInstance(results, AttackResults)

View file

@ -141,8 +141,8 @@ def _run_attack(attack_input: AttackInputData,
attack_type: the attack to run
balance_attacker_training: Whether the training and test sets for the
membership inference attacker should have a balanced (roughly equal)
number of samples from the training and test sets used to develop
the model under attack.
number of samples from the training and test sets used to develop the
model under attack.
min_num_samples: minimum number of examples in either training or test data.
Returns:
@ -180,8 +180,8 @@ def run_attacks(attack_input: AttackInputData,
privacy_report_metadata: the metadata of the model under attack.
balance_attacker_training: Whether the training and test sets for the
membership inference attacker should have a balanced (roughly equal)
number of samples from the training and test sets used to develop
the model under attack.
number of samples from the training and test sets used to develop the
model under attack.
min_num_samples: minimum number of examples in either training or test data.
Returns:
@ -200,8 +200,7 @@ def run_attacks(attack_input: AttackInputData,
attack_input_slice = get_slice(attack_input, single_slice_spec)
for attack_type in attack_types:
attack_result = _run_attack(attack_input_slice, attack_type,
balance_attacker_training,
min_num_samples)
balance_attacker_training, min_num_samples)
if attack_result is not None:
attack_results.append(attack_result)

View file

@ -55,9 +55,8 @@ def create_attacker_data(attack_input_data: AttackInputData,
attack_input_data: Original AttackInputData
test_fraction: Fraction of the dataset to include in the test split.
balance: Whether the training and test sets for the membership inference
attacker should have a balanced (roughly equal) number of samples
from the training and test sets used to develop the model
under attack.
attacker should have a balanced (roughly equal) number of samples from the
training and test sets used to develop the model under attack.
Returns:
AttackerData.
@ -134,6 +133,7 @@ class TrainedAttacker:
Args:
input_features : A vector of features with the same semantics as x_train
passed to train_model.
Returns:
An array of probabilities denoting whether the example belongs to test.
"""

View file

@ -81,5 +81,4 @@ def plot_histograms(train: Iterable[float],
def plot_roc_curve(roc_curve, plot_func=plot_curve_with_area) -> plt.Figure:
"""Plot the ROC curve and the area under the curve."""
return plot_func(
roc_curve.fpr, roc_curve.tpr, xlabel='FPR', ylabel='TPR')
return plot_func(roc_curve.fpr, roc_curve.tpr, xlabel='FPR', ylabel='TPR')

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A hook and a function in tf estimator for membership inference attack."""
import os
@ -58,7 +57,8 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
def __init__(
self,
estimator,
in_train, out_train,
in_train,
out_train,
input_fn_constructor,
slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,),
@ -106,19 +106,19 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
self._writers = None
def end(self, session):
results = run_attack_helper(self._estimator,
self._in_train_input_fn,
self._out_train_input_fn,
self._in_train_labels, self._out_train_labels,
self._slicing_spec,
results = run_attack_helper(self._estimator, self._in_train_input_fn,
self._out_train_input_fn, self._in_train_labels,
self._out_train_labels, self._slicing_spec,
self._attack_types)
logging.info(results)
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
results)
print('Attack result:')
print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in
zip(att_types, att_slices, att_metrics, att_values)]))
print('\n'.join([
' %s: %.4f' % (', '.join([s, t, m]), v)
for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
]))
# Write to tensorboard if tensorboard_dir is specified
global_step = self._estimator.get_variable_value('global_step')
@ -128,7 +128,9 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
def run_attack_on_tf_estimator_model(
estimator, in_train, out_train,
estimator,
in_train,
out_train,
input_fn_constructor,
slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)):
@ -142,6 +144,7 @@ def run_attack_on_tf_estimator_model(
the input_fn for model prediction
slicing_spec: slicing specification of the attack
attack_types: a list of attacks, each of type AttackType
Returns:
Results of the attack
"""
@ -153,10 +156,8 @@ def run_attack_on_tf_estimator_model(
out_train_input_fn = input_fn_constructor(out_train_data, out_train_labels)
# Call the helper to run the attack.
results = run_attack_helper(estimator,
in_train_input_fn, out_train_input_fn,
in_train_labels, out_train_labels,
slicing_spec,
results = run_attack_helper(estimator, in_train_input_fn, out_train_input_fn,
in_train_labels, out_train_labels, slicing_spec,
attack_types)
logging.info('End of training attack:')
logging.info(results)
@ -165,8 +166,10 @@ def run_attack_on_tf_estimator_model(
def run_attack_helper(
estimator,
in_train_input_fn, out_train_input_fn,
in_train_labels, out_train_labels,
in_train_input_fn,
out_train_input_fn,
in_train_labels,
out_train_labels,
slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)):
"""A helper function to perform attack.
@ -179,22 +182,23 @@ def run_attack_helper(
out_train_labels: out of training labels
slicing_spec: slicing specification of the attack
attack_types: a list of attacks, each of type AttackType
Returns:
Results of the attack
"""
# Compute predictions and losses
in_train_pred, in_train_loss = calculate_losses(estimator,
in_train_input_fn,
in_train_pred, in_train_loss = calculate_losses(estimator, in_train_input_fn,
in_train_labels)
out_train_pred, out_train_loss = calculate_losses(estimator,
out_train_input_fn,
out_train_labels)
attack_input = AttackInputData(
logits_train=in_train_pred, logits_test=out_train_pred,
labels_train=in_train_labels, labels_test=out_train_labels,
loss_train=in_train_loss, loss_test=out_train_loss
)
results = mia.run_attacks(attack_input,
slicing_spec=slicing_spec,
attack_types=attack_types)
logits_train=in_train_pred,
logits_test=out_train_pred,
labels_train=in_train_labels,
labels_test=out_train_labels,
loss_train=in_train_loss,
loss_test=out_train_loss)
results = mia.run_attacks(
attack_input, slicing_spec=slicing_spec, attack_types=attack_types)
return results

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""An example for using tf_estimator_evaluation."""
from absl import app
@ -26,13 +25,13 @@ from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_s
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation import MembershipInferenceTrainingHook
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation import run_attack_on_tf_estimator_model
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.02, 'Learning rate for training')
flags.DEFINE_integer('batch_size', 250, 'Batch size')
flags.DEFINE_integer('epochs', 100, 'Number of epochs')
flags.DEFINE_string('model_dir', None, 'Model directory.')
flags.DEFINE_bool('tensorboard_merge_classifiers', False, 'If true, plot '
flags.DEFINE_bool(
'tensorboard_merge_classifiers', False, 'If true, plot '
'different classifiers with the same slicing_spec and metric '
'in the same figure.')
@ -55,8 +54,8 @@ def small_cnn_fn(features, labels, mode):
# Configure the training op (for TRAIN mode).
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.MomentumOptimizer(learning_rate=FLAGS.learning_rate,
momentum=0.9)
optimizer = tf.train.MomentumOptimizer(
learning_rate=FLAGS.learning_rate, momentum=0.9)
global_step = tf.train.get_global_step()
train_op = optimizer.minimize(loss=scalar_loss, global_step=global_step)
return tf.estimator.EstimatorSpec(
@ -111,13 +110,12 @@ def main(unused_argv):
# Get hook for membership inference attack.
mia_hook = MembershipInferenceTrainingHook(
classifier,
(x_train, y_train),
(x_test, y_test),
classifier, (x_train, y_train), (x_test, y_test),
input_fn_constructor,
slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
attack_types=[AttackType.THRESHOLD_ATTACK,
AttackType.K_NEAREST_NEIGHBORS],
attack_types=[
AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
],
tensorboard_dir=FLAGS.model_dir,
tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers)
@ -148,12 +146,15 @@ def main(unused_argv):
classifier, (x_train, y_train), (x_test, y_test),
input_fn_constructor,
slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
attack_types=[AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS]
)
attack_types=[
AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
])
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
attack_results)
print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in
zip(att_types, att_slices, att_metrics, att_values)]))
print('\n'.join([
' %s: %.4f' % (', '.join([s, t, m]), v)
for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
]))
if __name__ == '__main__':

View file

@ -13,10 +13,8 @@
# limitations under the License.
from absl.testing import absltest
import numpy as np
import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import tf_estimator_evaluation
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType
@ -55,23 +53,25 @@ class UtilsTest(absltest.TestCase):
# Define the classifier, input_fn for training and test data
self.classifier = tf.estimator.Estimator(model_fn=model_fn)
self.input_fn_train = tf.estimator.inputs.numpy_input_fn(
x={'x': self.train_data}, y=self.train_labels, num_epochs=1,
x={'x': self.train_data},
y=self.train_labels,
num_epochs=1,
shuffle=False)
self.input_fn_test = tf.estimator.inputs.numpy_input_fn(
x={'x': self.test_data}, y=self.test_labels, num_epochs=1,
x={'x': self.test_data},
y=self.test_labels,
num_epochs=1,
shuffle=False)
def test_calculate_losses(self):
"""Test calculating the loss."""
pred, loss = tf_estimator_evaluation.calculate_losses(self.classifier,
self.input_fn_train,
self.train_labels)
pred, loss = tf_estimator_evaluation.calculate_losses(
self.classifier, self.input_fn_train, self.train_labels)
self.assertEqual(pred.shape, (self.ntrain, self.nclass))
self.assertEqual(loss.shape, (self.ntrain,))
pred, loss = tf_estimator_evaluation.calculate_losses(self.classifier,
self.input_fn_test,
self.test_labels)
pred, loss = tf_estimator_evaluation.calculate_losses(
self.classifier, self.input_fn_test, self.test_labels)
self.assertEqual(pred.shape, (self.ntest, self.nclass))
self.assertEqual(loss.shape, (self.ntest,))
@ -94,12 +94,12 @@ class UtilsTest(absltest.TestCase):
def test_run_attack_on_tf_estimator_model(self):
"""Test the attack on the final models."""
def input_fn_constructor(x, y):
return tf.estimator.inputs.numpy_input_fn(x={'x': x}, y=y, shuffle=False)
results = tf_estimator_evaluation.run_attack_on_tf_estimator_model(
self.classifier,
(self.train_data, self.train_labels),
self.classifier, (self.train_data, self.train_labels),
(self.test_data, self.test_labels),
input_fn_constructor,
attack_types=[AttackType.THRESHOLD_ATTACK])

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions for membership inference attacks."""
import numpy as np

View file

@ -77,11 +77,10 @@ def write_to_tensorboard_tf2(writers, tags, values, step):
writer.flush()
def write_results_to_tensorboard(
attack_results: AttackResults,
writers: Union[tf1.summary.FileWriter, List[tf1.summary.FileWriter]],
step: int,
merge_classifiers: bool):
def write_results_to_tensorboard(attack_results: AttackResults,
writers: Union[tf1.summary.FileWriter,
List[tf1.summary.FileWriter]],
step: int, merge_classifiers: bool):
"""Write attack results to tensorboard.
Args:
@ -97,21 +96,21 @@ def write_results_to_tensorboard(
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
attack_results)
if merge_classifiers:
att_tags = ['attack/' + f'{s}_{m}' for s, m in
zip(att_slices, att_metrics)]
write_to_tensorboard([writers[t] for t in att_types],
att_tags, att_values, step)
att_tags = ['attack/' + f'{s}_{m}' for s, m in zip(att_slices, att_metrics)]
write_to_tensorboard([writers[t] for t in att_types], att_tags, att_values,
step)
else:
att_tags = ['attack/' + f'{s}_{t}_{m}' for t, s, m in
zip(att_types, att_slices, att_metrics)]
att_tags = [
'attack/' + f'{s}_{t}_{m}'
for t, s, m in zip(att_types, att_slices, att_metrics)
]
write_to_tensorboard(writers, att_tags, att_values, step)
def write_results_to_tensorboard_tf2(
attack_results: AttackResults,
writers: Union[tf2.summary.SummaryWriter, List[tf2.summary.SummaryWriter]],
step: int,
merge_classifiers: bool):
step: int, merge_classifiers: bool):
"""Write attack results to tensorboard.
Args:
@ -127,12 +126,12 @@ def write_results_to_tensorboard_tf2(
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
attack_results)
if merge_classifiers:
att_tags = ['attack/' + f'{s}_{m}' for s, m in
zip(att_slices, att_metrics)]
write_to_tensorboard_tf2([writers[t] for t in att_types],
att_tags, att_values, step)
att_tags = ['attack/' + f'{s}_{m}' for s, m in zip(att_slices, att_metrics)]
write_to_tensorboard_tf2([writers[t] for t in att_types], att_tags,
att_values, step)
else:
att_tags = ['attack/' + f'{s}_{t}_{m}' for t, s, m in
zip(att_types, att_slices, att_metrics)]
att_tags = [
'attack/' + f'{s}_{t}_{m}'
for t, s, m in zip(att_types, att_slices, att_metrics)
]
write_to_tensorboard_tf2(writers, att_tags, att_values, step)

View file

@ -25,22 +25,23 @@ def compute_exposure_interpolation(
"""Get exposure using interpolation.
Args:
perplexities: a dictionary, key is number of secret repetitions,
value is a list of perplexities
perplexities: a dictionary, key is number of secret repetitions, value is a
list of perplexities
perplexities_reference: a list, perplexities of the random sequences that
did not appear in the training data
Returns:
The exposure of every secret measured using interpolation (not necessarily
in the same order as the input)
"""
repetitions = list(perplexities.keys())
# Concatenate all perplexities, including those for references
perplexities_concat = np.concatenate([perplexities[r] for r in repetitions]
+ [perplexities_reference])
perplexities_concat = np.concatenate([perplexities[r] for r in repetitions] +
[perplexities_reference])
# Concatenate the number of repetitions for each secret
repetitions_concat = np.concatenate(
[[r] * len(perplexities[r]) for r in repetitions]
+ [[0] * len(perplexities_reference)])
repetitions_concat = np.concatenate([[r] * len(perplexities[r])
for r in repetitions] +
[[0] * len(perplexities_reference)])
# Sort the repetition list according to the corresponding perplexity
idx = np.argsort(perplexities_concat)
@ -53,8 +54,10 @@ def compute_exposure_interpolation(
# (repetitions_concat == 0).
cum_sum = np.cumsum(repetitions_concat == 0)
ranks = {r: cum_sum[repetitions_concat == r] + 1 for r in repetitions}
exposures = {r: np.log2(len(perplexities_reference)) - np.log2(ranks[r])
for r in repetitions}
exposures = {
r: np.log2(len(perplexities_reference)) - np.log2(ranks[r])
for r in repetitions
}
return exposures
@ -64,10 +67,11 @@ def compute_exposure_extrapolation(
"""Get exposure using extrapolation.
Args:
perplexities: a dictionary, key is number of secret repetitions,
value is a list of perplexities
perplexities: a dictionary, key is number of secret repetitions, value is a
list of perplexities
perplexities_reference: a list, perplexities of the random sequences that
did not appear in the training data
Returns:
The exposure of every secret measured using extrapolation
"""

View file

@ -15,7 +15,6 @@
from absl.testing import absltest
import numpy as np
from scipy import stats
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_extrapolation
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_interpolation
@ -28,9 +27,11 @@ class UtilsTest(absltest.TestCase):
def test_exposure_interpolation(self):
"""Test exposure by interpolation."""
perplexities = {1: [0, 0.1], # smallest perplexities
perplexities = {
1: [0, 0.1], # smallest perplexities
2: [20.0], # largest perplexities
5: [3.5]} # rank = 4
5: [3.5]
} # rank = 4
perplexities_reference = [float(x) for x in range(1, 17)]
exposures = compute_exposure_interpolation(perplexities,
perplexities_reference)
@ -41,7 +42,8 @@ class UtilsTest(absltest.TestCase):
expected_exposures = {
1: np.array([exposure_largest] * 2),
2: np.array([exposure_smallest]),
5: np.array([np.log2(num_perplexities_reference) - np.log2(4)])}
5: np.array([np.log2(num_perplexities_reference) - np.log2(4)])
}
self.assertEqual(exposures.keys(), expected_exposures.keys())
for r in exposures.keys():

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generate random sequences."""
import itertools
@ -21,7 +20,9 @@ from dataclasses import dataclass
import numpy as np
def generate_random_sequences(vocab: List[str], pattern: str, n: int,
def generate_random_sequences(vocab: List[str],
pattern: str,
n: int,
seed: int = 1) -> List[str]:
"""Generate random sequences.
@ -35,6 +36,7 @@ def generate_random_sequences(vocab: List[str], pattern: str, n: int,
Returns:
A list of different random sequences from the given vocabulary
"""
def count_placeholder(pattern):
return sum([x[1] is not None for x in string.Formatter().parse(pattern)])
@ -104,6 +106,7 @@ def construct_secret(secret_config: SecretConfig, seqs: List[str]) -> Secrets:
secret_config: configuration of secret.
seqs: a list of random sequences that will be used for secrets and
references.
Returns:
a secret instance.
"""
@ -116,7 +119,8 @@ def construct_secret(secret_config: SecretConfig, seqs: List[str]) -> Secrets:
secret_config.num_repetitions, secret_config.num_secrets_for_repetitions):
secrets[num_repetition] = seqs[i:i + num_secrets]
i += num_secrets
return Secrets(config=secret_config,
return Secrets(
config=secret_config,
secrets=secrets,
references=seqs[-secret_config.num_references:])
@ -128,6 +132,7 @@ def generate_secrets_and_references(secret_configs: List[SecretConfig],
Args:
secret_configs: a list of secret configurations.
seed: random seed.
Returns:
A list of secret instances.
"""

View file

@ -12,10 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets."""
from absl.testing import absltest
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets import construct_secret
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets import generate_random_sequences
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets import generate_secrets_and_references
@ -32,27 +29,34 @@ class UtilsTest(absltest.TestCase):
"""Test generate_random_sequences."""
# Test when n is larger than total number of possible sequences.
seqs = generate_random_sequences(['A', 'b', 'c'], '{}+{}', 10, seed=27)
expected_seqs = ['A+c', 'c+c', 'b+b', 'A+b', 'b+c',
'c+A', 'c+b', 'A+A', 'b+A']
expected_seqs = [
'A+c', 'c+c', 'b+b', 'A+b', 'b+c', 'c+A', 'c+b', 'A+A', 'b+A'
]
self.assertEqual(seqs, expected_seqs)
# Test when n is smaller than total number of possible sequences.
seqs = generate_random_sequences(list('01234'), 'prefix {}{}{}?', 8, seed=9)
expected_seqs = ['prefix 143?', 'prefix 031?', 'prefix 302?', 'prefix 042?',
'prefix 404?', 'prefix 024?', 'prefix 021?', 'prefix 403?']
expected_seqs = [
'prefix 143?', 'prefix 031?', 'prefix 302?', 'prefix 042?',
'prefix 404?', 'prefix 024?', 'prefix 021?', 'prefix 403?'
]
self.assertEqual(seqs, expected_seqs)
def test_construct_secret(self):
secret_config = SecretConfig(vocab=None, pattern='',
secret_config = SecretConfig(
vocab=None,
pattern='',
num_repetitions=[1, 2, 8],
num_secrets_for_repetitions=[2, 3, 1],
num_references=3)
seqs = list('0123456789')
secrets = construct_secret(secret_config, seqs)
self.assertEqual(secrets.config, secret_config)
self.assertDictEqual(secrets.secrets, {1: ['0', '1'],
self.assertDictEqual(secrets.secrets, {
1: ['0', '1'],
2: ['2', '3', '4'],
8: ['5']})
8: ['5']
})
self.assertEqual(secrets.references, ['7', '8', '9'])
# Test when the number of elements in seqs is not enough.
@ -61,29 +65,36 @@ class UtilsTest(absltest.TestCase):
def test_generate_secrets_and_references(self):
secret_configs = [
SecretConfig(vocab=['w1', 'w2', 'w3'], pattern='{} {} suf',
SecretConfig(
vocab=['w1', 'w2', 'w3'],
pattern='{} {} suf',
num_repetitions=[1, 12],
num_secrets_for_repetitions=[2, 1],
num_references=3),
SecretConfig(vocab=['W 1', 'W 2', 'W 3'], pattern='{}-{}',
SecretConfig(
vocab=['W 1', 'W 2', 'W 3'],
pattern='{}-{}',
num_repetitions=[1, 2, 8],
num_secrets_for_repetitions=[2, 3, 1],
num_references=3)
]
secrets = generate_secrets_and_references(secret_configs, seed=27)
self.assertEqual(secrets[0].config, secret_configs[0])
self.assertDictEqual(secrets[0].secrets, {1: ['w3 w2 suf', 'w2 w1 suf'],
12: ['w1 w1 suf']})
self.assertDictEqual(secrets[0].secrets, {
1: ['w3 w2 suf', 'w2 w1 suf'],
12: ['w1 w1 suf']
})
self.assertEqual(secrets[0].references,
['w2 w3 suf', 'w2 w2 suf', 'w3 w1 suf'])
self.assertEqual(secrets[1].config, secret_configs[1])
self.assertDictEqual(secrets[1].secrets,
{1: ['W 3-W 2', 'W 1-W 3'],
self.assertDictEqual(
secrets[1].secrets, {
1: ['W 3-W 2', 'W 1-W 3'],
2: ['W 3-W 1', 'W 2-W 1', 'W 1-W 1'],
8: ['W 2-W 2']})
self.assertEqual(secrets[1].references,
['W 2-W 3', 'W 3-W 3', 'W 1-W 2'])
8: ['W 2-W 2']
})
self.assertEqual(secrets[1].references, ['W 2-W 3', 'W 3-W 3', 'W 1-W 2'])
if __name__ == '__main__':

View file

@ -27,10 +27,14 @@ n_samples = 10
input_dim = 2
n_outputs = 1
# Create binary classification dataset:
x_stack = [tf.constant(-1, tf.float32, (n_samples, input_dim)),
tf.constant(1, tf.float32, (n_samples, input_dim))]
y_stack = [tf.constant(0, tf.float32, (n_samples, 1)),
tf.constant(1, tf.float32, (n_samples, 1))]
x_stack = [
tf.constant(-1, tf.float32, (n_samples, input_dim)),
tf.constant(1, tf.float32, (n_samples, input_dim))
]
y_stack = [
tf.constant(0, tf.float32, (n_samples, 1)),
tf.constant(1, tf.float32, (n_samples, 1))
]
x, y = tf.concat(x_stack, 0), tf.concat(y_stack, 0)
print(x.shape, y.shape)
generator = tf.data.Dataset.from_tensor_slices((x, y))
@ -86,7 +90,8 @@ n_samples = None # default, if it cannot be iferred, specify this
epsilon = 2
noise_distribution = 'laplace'
bolt.fit(x,
bolt.fit(
x,
y,
epsilon=epsilon,
class_weight=class_weight,
@ -109,7 +114,8 @@ n_samples = None # default, if it cannot be iferred, specify this
epsilon = 2
noise_distribution = 'laplace'
try:
bolt.fit(generator,
bolt.fit(
generator,
epsilon=epsilon,
class_weight=class_weight,
batch_size=batch_size,
@ -122,7 +128,8 @@ except ValueError as e:
# And now, re running with the parameter set.
# -------
n_samples = 20
bolt.fit_generator(generator,
bolt.fit_generator(
generator,
epsilon=epsilon,
class_weight=class_weight,
n_samples=n_samples,
@ -145,8 +152,8 @@ class TestModel(tf.keras.Model): # pylint: disable=abstract-method
def __init__(self, reg_layer, number_of_outputs=1):
super().__init__(name='test')
self.output_layer = tf.keras.layers.Dense(number_of_outputs,
kernel_regularizer=reg_layer)
self.output_layer = tf.keras.layers.Dense(
number_of_outputs, kernel_regularizer=reg_layer)
def call(self, inputs): # pylint: disable=arguments-differ
return self.output_layer(inputs)
@ -180,6 +187,5 @@ with optimizer(
layers=test_model.layers,
class_weights=class_weights,
n_samples=n_samples,
batch_size=batch_size
) as _:
batch_size=batch_size) as _:
test_model.fit(x, y, batch_size=batch_size, epochs=2)

View file

@ -86,12 +86,10 @@ def cnn_model_fn(features, labels, mode, params): # pylint: disable=unused-argu
eval_metric_ops = {
'accuracy':
tf.metrics.accuracy(
labels=labels,
predictions=tf.argmax(input=logits, axis=1))
labels=labels, predictions=tf.argmax(input=logits, axis=1))
}
return tf.estimator.EstimatorSpec(mode=mode,
loss=scalar_loss,
eval_metric_ops=eval_metric_ops)
return tf.estimator.EstimatorSpec(
mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
def main(unused_argv):
@ -100,8 +98,8 @@ def main(unused_argv):
raise ValueError('Number of microbatches should divide evenly batch_size')
# Instantiate the tf.Estimator.
mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,
model_dir=FLAGS.model_dir)
mnist_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir=FLAGS.model_dir)
# Training loop.
steps_per_epoch = 60000 // FLAGS.batch_size

View file

@ -25,7 +25,8 @@ from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescent
GradientDescentOptimizer = tf.train.GradientDescentOptimizer
tf.enable_eager_execution()
flags.DEFINE_boolean('dpsgd', True, 'If True, train with DP-SGD. If False, '
flags.DEFINE_boolean(
'dpsgd', True, 'If True, train with DP-SGD. If False, '
'train with vanilla SGD.')
flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training')
flags.DEFINE_float('noise_multiplier', 1.1,
@ -33,7 +34,8 @@ flags.DEFINE_float('noise_multiplier', 1.1,
flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
flags.DEFINE_integer('batch_size', 250, 'Batch size')
flags.DEFINE_integer('epochs', 60, 'Number of epochs')
flags.DEFINE_integer('microbatches', 250, 'Number of microbatches '
flags.DEFINE_integer(
'microbatches', 250, 'Number of microbatches '
'(must evenly divide batch_size)')
FLAGS = flags.FLAGS
@ -45,7 +47,8 @@ def compute_epsilon(steps):
return float('inf')
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
sampling_probability = FLAGS.batch_size / 60000
rdp = compute_rdp(q=sampling_probability,
rdp = compute_rdp(
q=sampling_probability,
noise_multiplier=FLAGS.noise_multiplier,
steps=steps,
orders=orders)
@ -64,22 +67,20 @@ def main(_):
# Create a dataset object and batch for the training data
dataset = tf.data.Dataset.from_tensor_slices(
(tf.cast(train_images[..., tf.newaxis]/255, tf.float32),
tf.cast(train_labels, tf.int64)))
(tf.cast(train_images[..., tf.newaxis] / 255,
tf.float32), tf.cast(train_labels, tf.int64)))
dataset = dataset.shuffle(1000).batch(FLAGS.batch_size)
# Create a dataset object and batch for the test data
eval_dataset = tf.data.Dataset.from_tensor_slices(
(tf.cast(test_images[..., tf.newaxis]/255, tf.float32),
tf.cast(test_labels, tf.int64)))
(tf.cast(test_images[..., tf.newaxis] / 255,
tf.float32), tf.cast(test_labels, tf.int64)))
eval_dataset = eval_dataset.batch(10000)
# Define the model using tf.keras.layers
mnist_model = tf.keras.Sequential([
tf.keras.layers.Conv2D(16, 8,
strides=2,
padding='same',
activation='relu'),
tf.keras.layers.Conv2D(
16, 8, strides=2, padding='same', activation='relu'),
tf.keras.layers.MaxPool2D(2, 1),
tf.keras.layers.Conv2D(32, 4, strides=2, activation='relu'),
tf.keras.layers.MaxPool2D(2, 1),
@ -119,8 +120,8 @@ def main(_):
return loss
if FLAGS.dpsgd:
grads_and_vars = opt.compute_gradients(loss_fn, var_list,
gradient_tape=gradient_tape)
grads_and_vars = opt.compute_gradients(
loss_fn, var_list, gradient_tape=gradient_tape)
else:
grads_and_vars = opt.compute_gradients(loss_fn, var_list)
@ -140,5 +141,6 @@ def main(_):
else:
print('Trained with vanilla non-private SGD optimizer')
if __name__ == '__main__':
app.run(main)

View file

@ -47,7 +47,8 @@ def compute_epsilon(steps):
return float('inf')
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
sampling_probability = FLAGS.batch_size / 60000
rdp = compute_rdp(q=sampling_probability,
rdp = compute_rdp(
q=sampling_probability,
noise_multiplier=FLAGS.noise_multiplier,
steps=steps,
orders=orders)
@ -91,16 +92,16 @@ def main(unused_argv):
# Define a sequential Keras model
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(16, 8,
tf.keras.layers.Conv2D(
16,
8,
strides=2,
padding='same',
activation='relu',
input_shape=(28, 28, 1)),
tf.keras.layers.MaxPool2D(2, 1),
tf.keras.layers.Conv2D(32, 4,
strides=2,
padding='valid',
activation='relu'),
tf.keras.layers.Conv2D(
32, 4, strides=2, padding='valid', activation='relu'),
tf.keras.layers.MaxPool2D(2, 1),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(32, activation='relu'),
@ -124,7 +125,9 @@ def main(unused_argv):
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
# Train model with Keras
model.fit(train_data, train_labels,
model.fit(
train_data,
train_labels,
epochs=FLAGS.epochs,
validation_data=(test_data, test_labels),
batch_size=FLAGS.batch_size)
@ -136,5 +139,6 @@ def main(unused_argv):
else:
print('Trained with vanilla non-private SGD optimizer')
if __name__ == '__main__':
app.run(main)

View file

@ -22,7 +22,6 @@ from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
from tensorflow_privacy.privacy.optimizers import dp_optimizer_vectorized
flags.DEFINE_boolean(
'dpsgd', True, 'If True, train with DP-SGD. If False, '
'train with vanilla SGD.')
@ -50,7 +49,8 @@ def compute_epsilon(steps):
return float('inf')
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
sampling_probability = FLAGS.batch_size / NUM_TRAIN_EXAMPLES
rdp = compute_rdp(q=sampling_probability,
rdp = compute_rdp(
q=sampling_probability,
noise_multiplier=FLAGS.noise_multiplier,
steps=steps,
orders=orders)
@ -63,15 +63,11 @@ def cnn_model_fn(features, labels, mode):
# Define CNN architecture using tf.keras.layers.
input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])
y = tf.keras.layers.Conv2D(16, 8,
strides=2,
padding='same',
activation='relu').apply(input_layer)
y = tf.keras.layers.Conv2D(
16, 8, strides=2, padding='same', activation='relu').apply(input_layer)
y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
y = tf.keras.layers.Conv2D(32, 4,
strides=2,
padding='valid',
activation='relu').apply(y)
y = tf.keras.layers.Conv2D(
32, 4, strides=2, padding='valid', activation='relu').apply(y)
y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
y = tf.keras.layers.Flatten().apply(y)
y = tf.keras.layers.Dense(32, activation='relu').apply(y)
@ -106,22 +102,19 @@ def cnn_model_fn(features, labels, mode):
# the vector_loss because tf.estimator requires a scalar loss. This is only
# used for evaluation and debugging by tf.estimator. The actual loss being
# minimized is opt_loss defined above and passed to optimizer.minimize().
return tf.estimator.EstimatorSpec(mode=mode,
loss=scalar_loss,
train_op=train_op)
return tf.estimator.EstimatorSpec(
mode=mode, loss=scalar_loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode).
elif mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = {
'accuracy':
tf.metrics.accuracy(
labels=labels,
predictions=tf.argmax(input=logits, axis=1))
labels=labels, predictions=tf.argmax(input=logits, axis=1))
}
return tf.estimator.EstimatorSpec(mode=mode,
loss=scalar_loss,
eval_metric_ops=eval_metric_ops)
return tf.estimator.EstimatorSpec(
mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
def load_mnist():
@ -155,8 +148,8 @@ def main(unused_argv):
train_data, train_labels, test_data, test_labels = load_mnist()
# Instantiate the tf.Estimator.
mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,
model_dir=FLAGS.model_dir)
mnist_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir=FLAGS.model_dir)
# Create tf.Estimator input functions for the training and test data.
train_input_fn = tf.estimator.inputs.numpy_input_fn(
@ -166,10 +159,7 @@ def main(unused_argv):
num_epochs=FLAGS.epochs,
shuffle=True)
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={'x': test_data},
y=test_labels,
num_epochs=1,
shuffle=False)
x={'x': test_data}, y=test_labels, num_epochs=1, shuffle=False)
# Training loop.
steps_per_epoch = NUM_TRAIN_EXAMPLES // FLAGS.batch_size
@ -189,5 +179,6 @@ def main(unused_argv):
else:
print('Trained with vanilla non-private SGD optimizer')
if __name__ == '__main__':
app.run(main)

View file

@ -56,8 +56,8 @@ def lr_model_fn(features, labels, mode, nclasses, dim):
logits = tf.keras.layers.Dense(
units=nclasses,
kernel_regularizer=tf.keras.regularizers.L2(l2=FLAGS.regularizer),
bias_regularizer=tf.keras.regularizers.L2(l2=FLAGS.regularizer)).apply(
input_layer)
bias_regularizer=tf.keras.regularizers.L2(
l2=FLAGS.regularizer)).apply(input_layer)
# Calculate loss as a vector (to support microbatches in DP-SGD).
vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
@ -166,8 +166,7 @@ def print_privacy_guarantees(epochs, batch_size, samples, noise_multiplier):
# an option.
rdp = [order * coef for order in orders]
eps, _, _ = get_privacy_spent(orders, rdp, target_delta=delta)
print('\t{:g}% enjoy at least ({:.2f}, {})-DP'.format(
p * 100, eps, delta))
print('\t{:g}% enjoy at least ({:.2f}, {})-DP'.format(p * 100, eps, delta))
# Compute privacy guarantees for the Sampled Gaussian Mechanism.
rdp_sgm = compute_rdp(batch_size / samples, noise_multiplier,
@ -234,5 +233,6 @@ def main(unused_argv):
noise_multiplier=FLAGS.noise_multiplier,
)
if __name__ == '__main__':
app.run(main)

View file

@ -28,23 +28,19 @@ def cnn_model_fn(features, labels, mode):
# Define CNN architecture using tf.keras.layers.
input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])
y = tf.keras.layers.Conv2D(16, 8,
strides=2,
padding='same',
activation='relu').apply(input_layer)
y = tf.keras.layers.Conv2D(
16, 8, strides=2, padding='same', activation='relu').apply(input_layer)
y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
y = tf.keras.layers.Conv2D(32, 4,
strides=2,
padding='valid',
activation='relu').apply(y)
y = tf.keras.layers.Conv2D(
32, 4, strides=2, padding='valid', activation='relu').apply(y)
y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
y = tf.keras.layers.Flatten().apply(y)
y = tf.keras.layers.Dense(32, activation='relu').apply(y)
logits = tf.keras.layers.Dense(10).apply(y)
# Calculate loss as a vector and as its average across minibatch.
vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
logits=logits)
vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits)
scalar_loss = tf.reduce_mean(vector_loss)
# Configure the training op (for TRAIN mode).
@ -53,21 +49,18 @@ def cnn_model_fn(features, labels, mode):
opt_loss = scalar_loss
global_step = tf.train.get_global_step()
train_op = optimizer.minimize(loss=opt_loss, global_step=global_step)
return tf.estimator.EstimatorSpec(mode=mode,
loss=scalar_loss,
train_op=train_op)
return tf.estimator.EstimatorSpec(
mode=mode, loss=scalar_loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode).
elif mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = {
'accuracy':
tf.metrics.accuracy(
labels=labels,
predictions=tf.argmax(input=logits, axis=1))
labels=labels, predictions=tf.argmax(input=logits, axis=1))
}
return tf.estimator.EstimatorSpec(mode=mode,
loss=scalar_loss,
eval_metric_ops=eval_metric_ops)
return tf.estimator.EstimatorSpec(
mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
def load_mnist():
@ -109,10 +102,7 @@ def main(unused_argv):
num_epochs=FLAGS.epochs,
shuffle=True)
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={'x': test_data},
y=test_labels,
num_epochs=1,
shuffle=False)
x={'x': test_data}, y=test_labels, num_epochs=1, shuffle=False)
# Training loop.
steps_per_epoch = 60000 // FLAGS.batch_size
@ -125,5 +115,6 @@ def main(unused_argv):
test_accuracy = eval_results['accuracy']
print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy))
if __name__ == '__main__':
tf.app.run()