Format TensorFlow Privacy files.

PiperOrigin-RevId: 424923635
This commit is contained in:
Michael Reneer 2022-01-28 11:56:55 -08:00 committed by A. Unique TensorFlower
parent 07230a161a
commit 943ef91ee9
62 changed files with 1170 additions and 1087 deletions

View file

@ -23,7 +23,6 @@ from tensorflow_docs.api_generator import generate_lib
from tensorflow_docs.api_generator import public_api from tensorflow_docs.api_generator import public_api
import tensorflow_privacy as tf_privacy import tensorflow_privacy as tf_privacy
flags.DEFINE_string('output_dir', '/tmp/tf_privacy', flags.DEFINE_string('output_dir', '/tmp/tf_privacy',
'Where to output the docs.') 'Where to output the docs.')
flags.DEFINE_string( flags.DEFINE_string(

View file

@ -1,6 +1,5 @@
# Get Started # Get Started
This document assumes you are already familiar with differential privacy, and This document assumes you are already familiar with differential privacy, and
have determined that you would like to use TF Privacy to implement differential have determined that you would like to use TF Privacy to implement differential
privacy guarantees in your model(s). If youre not familiar with differential privacy guarantees in your model(s). If youre not familiar with differential

View file

@ -17,7 +17,6 @@
import math import math
from absl import app from absl import app
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
@ -33,8 +32,10 @@ def apply_dp_sgd_analysis(q, sigma, steps, orders, delta):
eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta) eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta)
print('DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated' print(
' over {} steps satisfies'.format(100 * q, sigma, steps), end=' ') 'DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated'
' over {} steps satisfies'.format(100 * q, sigma, steps),
end=' ')
print('differential privacy with eps = {:.3g} and delta = {}.'.format( print('differential privacy with eps = {:.3g} and delta = {}.'.format(
eps, delta)) eps, delta))
print('The optimal RDP order is {}.'.format(opt_order)) print('The optimal RDP order is {}.'.format(opt_order))

View file

@ -46,12 +46,13 @@ class ComputeDpSgdPrivacyTest(parameterized.TestCase):
# the basis of this sanity check. This is confirmed in the above paper. # the basis of this sanity check. This is confirmed in the above paper.
q = batch_size / n q = batch_size / n
steps = epochs * n / batch_size steps = epochs * n / batch_size
sigma = noise_multiplier * math.sqrt(steps) /q sigma = noise_multiplier * math.sqrt(steps) / q
# We compute the optimal guarantee for Gaussian # We compute the optimal guarantee for Gaussian
# using https://arxiv.org/abs/1805.06530 Theorem 8 (in v2). # using https://arxiv.org/abs/1805.06530 Theorem 8 (in v2).
low_delta = .5*math.erfc((eps*sigma-.5/sigma)/math.sqrt(2)) low_delta = .5 * math.erfc((eps * sigma - .5 / sigma) / math.sqrt(2))
if eps < 100: # Skip this if it causes overflow; error is minor. if eps < 100: # Skip this if it causes overflow; error is minor.
low_delta -= math.exp(eps)*.5*math.erfc((eps*sigma+.5/sigma)/math.sqrt(2)) low_delta -= math.exp(eps) * .5 * math.erfc(
(eps * sigma + .5 / sigma) / math.sqrt(2))
self.assertLessEqual(low_delta, delta) self.assertLessEqual(low_delta, delta)

View file

@ -52,8 +52,8 @@ def main(argv):
assert FLAGS.batch_size is not None, 'Flag batch_size is missing.' assert FLAGS.batch_size is not None, 'Flag batch_size is missing.'
assert FLAGS.epsilon is not None, 'Flag epsilon is missing.' assert FLAGS.epsilon is not None, 'Flag epsilon is missing.'
assert FLAGS.epochs is not None, 'Flag epochs is missing.' assert FLAGS.epochs is not None, 'Flag epochs is missing.'
compute_noise(FLAGS.N, FLAGS.batch_size, FLAGS.epsilon, compute_noise(FLAGS.N, FLAGS.batch_size, FLAGS.epsilon, FLAGS.epochs,
FLAGS.epochs, FLAGS.delta, FLAGS.min_noise) FLAGS.delta, FLAGS.min_noise)
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -27,11 +27,12 @@ class ComputeNoiseFromBudgetTest(parameterized.TestCase):
('Test2', 100000000, 1024, 5907984.81339406, 10, 1e-7, 1e-5, 0.1), ('Test2', 100000000, 1024, 5907984.81339406, 10, 1e-7, 1e-5, 0.1),
('Test3', 100000000, 1024, 5907984.81339406, 10, 1e-7, 1, 0), ('Test3', 100000000, 1024, 5907984.81339406, 10, 1e-7, 1, 0),
) )
def test_compute_noise(self, n, batch_size, target_epsilon, epochs, def test_compute_noise(self, n, batch_size, target_epsilon, epochs, delta,
delta, min_noise, expected_noise): min_noise, expected_noise):
target_noise = compute_noise_from_budget_lib.compute_noise( target_noise = compute_noise_from_budget_lib.compute_noise(
n, batch_size, target_epsilon, epochs, delta, min_noise) n, batch_size, target_epsilon, epochs, delta, min_noise)
self.assertAlmostEqual(target_noise, expected_noise) self.assertAlmostEqual(target_noise, expected_noise)
if __name__ == '__main__': if __name__ == '__main__':
absltest.main() absltest.main()

View file

@ -176,4 +176,3 @@ class SampledWithoutReplacementDpEvent(DpEvent):
source_dataset_size: int source_dataset_size: int
sample_size: int sample_size: int
event: DpEvent event: DpEvent

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Tests for DpEventBuilder."""
from absl.testing import absltest from absl.testing import absltest
from tensorflow_privacy.privacy.analysis import dp_event from tensorflow_privacy.privacy.analysis import dp_event
@ -68,7 +67,8 @@ class DpEventBuilderTest(absltest.TestCase):
expected_event = dp_event.ComposedDpEvent([ expected_event = dp_event.ComposedDpEvent([
_gaussian_event, _gaussian_event,
dp_event.SelfComposedDpEvent(composed_event, 3), dp_event.SelfComposedDpEvent(composed_event, 3),
dp_event.SelfComposedDpEvent(_poisson_event, 2)]) dp_event.SelfComposedDpEvent(_poisson_event, 2)
])
self.assertEqual(expected_event, builder.build()) self.assertEqual(expected_event, builder.build())

View file

@ -84,13 +84,13 @@ class TensorBuffer(object):
dtype=self._dtype, dtype=self._dtype,
initializer=new_buffer, initializer=new_buffer,
trainable=False) trainable=False)
return self._buffer, tf.assign( return self._buffer, tf.assign(self._capacity,
self._capacity, tf.multiply(self._capacity, 2)) tf.multiply(self._capacity, 2))
else: else:
return tf.assign( return tf.assign(
self._buffer, new_buffer, self._buffer, new_buffer,
validate_shape=False), tf.assign( validate_shape=False), tf.assign(self._capacity,
self._capacity, tf.multiply(self._capacity, 2)) tf.multiply(self._capacity, 2))
update_buffer, update_capacity = tf.cond( update_buffer, update_capacity = tf.cond(
pred=tf.equal(self._current_size, self._capacity), pred=tf.equal(self._current_size, self._capacity),

View file

@ -44,9 +44,8 @@ class TensorBufferTest(tf.test.TestCase):
my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
with self.assertRaisesRegex( with self.assertRaisesRegex(tf.errors.InvalidArgumentError,
tf.errors.InvalidArgumentError, 'Appending value of inconsistent shape.'):
'Appending value of inconsistent shape.'):
my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32)) my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32))
def test_resize(self): def test_resize(self):

View file

@ -187,5 +187,6 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
tree_aggregation_accountant._compute_gaussian_zcdp( tree_aggregation_accountant._compute_gaussian_zcdp(
sigma, sum_sensitivity_square)) sigma, sum_sensitivity_square))
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()

View file

@ -68,7 +68,6 @@ class StrongConvexMixin:
Args: Args:
class_weight: class weights used class_weight: class weights used
Returns: L Returns: L
""" """
raise NotImplementedError("lipchitz constant not implemented for " raise NotImplementedError("lipchitz constant not implemented for "
@ -126,13 +125,10 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
if reg_lambda <= 0: if reg_lambda <= 0:
raise ValueError("reg lambda: {0} must be positive".format(reg_lambda)) raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
if radius_constant <= 0: if radius_constant <= 0:
raise ValueError("radius_constant: {0}, should be >= 0".format( raise ValueError(
radius_constant "radius_constant: {0}, should be >= 0".format(radius_constant))
))
if delta <= 0: if delta <= 0:
raise ValueError("delta: {0}, should be >= 0".format( raise ValueError("delta: {0}, should be >= 0".format(delta))
delta
))
self.C = c_arg # pylint: disable=invalid-name self.C = c_arg # pylint: disable=invalid-name
self.delta = delta self.delta = delta
self.radius_constant = radius_constant self.radius_constant = radius_constant
@ -172,9 +168,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
def beta(self, class_weight): def beta(self, class_weight):
"""See super class.""" """See super class."""
max_class_weight = self.max_class_weight(class_weight, self.dtype) max_class_weight = self.max_class_weight(class_weight, self.dtype)
delta = _ops.convert_to_tensor_v2(self.delta, delta = _ops.convert_to_tensor_v2(self.delta, dtype=self.dtype)
dtype=self.dtype
)
return self.C * max_class_weight / (delta * return self.C * max_class_weight / (delta *
tf.constant(2, dtype=self.dtype)) + \ tf.constant(2, dtype=self.dtype)) + \
self.reg_lambda self.reg_lambda
@ -197,13 +191,11 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
The L2 regularizer layer for this loss function, with regularizer constant The L2 regularizer layer for this loss function, with regularizer constant
set to half the 0.5 * reg_lambda. set to half the 0.5 * reg_lambda.
""" """
return L1L2(l2=self.reg_lambda/2) return L1L2(l2=self.reg_lambda / 2)
class StrongConvexBinaryCrossentropy( class StrongConvexBinaryCrossentropy(losses.BinaryCrossentropy,
losses.BinaryCrossentropy, StrongConvexMixin):
StrongConvexMixin
):
"""Strongly Convex BinaryCrossentropy loss using l2 weight regularization.""" """Strongly Convex BinaryCrossentropy loss using l2 weight regularization."""
def __init__(self, def __init__(self,
@ -222,10 +214,10 @@ class StrongConvexBinaryCrossentropy(
radius_constant: constant defining the length of the radius radius_constant: constant defining the length of the radius
from_logits: True if the input are unscaled logits. False if they are from_logits: True if the input are unscaled logits. False if they are
already scaled. already scaled.
label_smoothing: amount of smoothing to perform on labels label_smoothing: amount of smoothing to perform on labels relaxation of
relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x). Note, the trust in labels, e.g. (1 -> 1-x, 0 -> 0+x). Note, the impact of this
impact of this parameter's effect on privacy is not known and thus the parameter's effect on privacy is not known and thus the default should
default should be used. be used.
reduction: reduction type to use. See super class reduction: reduction type to use. See super class
dtype: tf datatype to use for tensor conversions. dtype: tf datatype to use for tensor conversions.
""" """
@ -239,9 +231,8 @@ class StrongConvexBinaryCrossentropy(
if c_arg <= 0: if c_arg <= 0:
raise ValueError("c: {0}, should be >= 0".format(c_arg)) raise ValueError("c: {0}, should be >= 0".format(c_arg))
if radius_constant <= 0: if radius_constant <= 0:
raise ValueError("radius_constant: {0}, should be >= 0".format( raise ValueError(
radius_constant "radius_constant: {0}, should be >= 0".format(radius_constant))
))
self.dtype = dtype self.dtype = dtype
self.C = c_arg # pylint: disable=invalid-name self.C = c_arg # pylint: disable=invalid-name
self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype) self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
@ -294,4 +285,4 @@ class StrongConvexBinaryCrossentropy(
The L2 regularizer layer for this loss function, with regularizer constant The L2 regularizer layer for this loss function, with regularizer constant
set to half the 0.5 * reg_lambda. set to half the 0.5 * reg_lambda.
""" """
return L1L2(l2=self.reg_lambda/2) return L1L2(l2=self.reg_lambda / 2)

View file

@ -40,21 +40,29 @@ def captured_output():
class StrongConvexMixinTests(keras_parameterized.TestCase): class StrongConvexMixinTests(keras_parameterized.TestCase):
"""Tests for the StrongConvexMixin.""" """Tests for the StrongConvexMixin."""
@parameterized.named_parameters([
{'testcase_name': 'beta not implemented',
'fn': 'beta',
'args': [1]},
{'testcase_name': 'gamma not implemented',
'fn': 'gamma',
'args': []},
{'testcase_name': 'lipchitz not implemented',
'fn': 'lipchitz_constant',
'args': [1]},
{'testcase_name': 'radius not implemented',
'fn': 'radius',
'args': []},
])
@parameterized.named_parameters([
{
'testcase_name': 'beta not implemented',
'fn': 'beta',
'args': [1]
},
{
'testcase_name': 'gamma not implemented',
'fn': 'gamma',
'args': []
},
{
'testcase_name': 'lipchitz not implemented',
'fn': 'lipchitz_constant',
'args': [1]
},
{
'testcase_name': 'radius not implemented',
'fn': 'radius',
'args': []
},
])
def test_not_implemented(self, fn, args): def test_not_implemented(self, fn, args):
"""Test that the given fn's are not implemented on the mixin. """Test that the given fn's are not implemented on the mixin.
@ -67,9 +75,11 @@ class StrongConvexMixinTests(keras_parameterized.TestCase):
getattr(loss, fn, None)(*args) getattr(loss, fn, None)(*args)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'radius not implemented', {
'fn': 'kernel_regularizer', 'testcase_name': 'radius not implemented',
'args': []}, 'fn': 'kernel_regularizer',
'args': []
},
]) ])
def test_return_none(self, fn, args): def test_return_none(self, fn, args):
"""Test that fn of Mixin returns None. """Test that fn of Mixin returns None.
@ -87,10 +97,11 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
"""tests for BinaryCrossesntropy StrongConvex loss.""" """tests for BinaryCrossesntropy StrongConvex loss."""
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'normal', {
'reg_lambda': 1, 'testcase_name': 'normal',
'C': 1, 'reg_lambda': 1,
'radius_constant': 1 'C': 1,
'radius_constant': 1
}, # pylint: disable=invalid-name }, # pylint: disable=invalid-name
]) ])
def test_init_params(self, reg_lambda, C, radius_constant): def test_init_params(self, reg_lambda, C, radius_constant):
@ -106,24 +117,29 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
self.assertIsInstance(loss, StrongConvexBinaryCrossentropy) self.assertIsInstance(loss, StrongConvexBinaryCrossentropy)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'negative c', {
'reg_lambda': 1, 'testcase_name': 'negative c',
'C': -1, 'reg_lambda': 1,
'radius_constant': 1 'C': -1,
'radius_constant': 1
}, },
{'testcase_name': 'negative radius', {
'reg_lambda': 1, 'testcase_name': 'negative radius',
'C': 1, 'reg_lambda': 1,
'radius_constant': -1 'C': 1,
'radius_constant': -1
}, },
{'testcase_name': 'negative lambda', {
'reg_lambda': -1, 'testcase_name': 'negative lambda',
'C': 1, 'reg_lambda': -1,
'radius_constant': 1 'C': 1,
'radius_constant': 1
}, # pylint: disable=invalid-name }, # pylint: disable=invalid-name
]) ])
def test_bad_init_params(self, reg_lambda, C, radius_constant): def test_bad_init_params(self, reg_lambda, C, radius_constant):
"""Test invalid domain for given params. Should return ValueError. """Test invalid domain for given params.
Should return ValueError.
Args: Args:
reg_lambda: initialization value for reg_lambda arg reg_lambda: initialization value for reg_lambda arg
@ -137,25 +153,29 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
@test_util.run_all_in_graph_and_eager_modes @test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([ @parameterized.named_parameters([
# [] for compatibility with tensorflow loss calculation # [] for compatibility with tensorflow loss calculation
{'testcase_name': 'both positive', {
'logits': [10000], 'testcase_name': 'both positive',
'y_true': [1], 'logits': [10000],
'result': 0, 'y_true': [1],
'result': 0,
}, },
{'testcase_name': 'positive gradient negative logits', {
'logits': [-10000], 'testcase_name': 'positive gradient negative logits',
'y_true': [1], 'logits': [-10000],
'result': 10000, 'y_true': [1],
'result': 10000,
}, },
{'testcase_name': 'positivee gradient positive logits', {
'logits': [10000], 'testcase_name': 'positivee gradient positive logits',
'y_true': [0], 'logits': [10000],
'result': 10000, 'y_true': [0],
'result': 10000,
}, },
{'testcase_name': 'both negative', {
'logits': [-10000], 'testcase_name': 'both negative',
'y_true': [0], 'logits': [-10000],
'result': 0 'y_true': [0],
'result': 0
}, },
]) ])
def test_calculation(self, logits, y_true, result): def test_calculation(self, logits, y_true, result):
@ -173,29 +193,33 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
self.assertEqual(loss.numpy(), result) self.assertEqual(loss.numpy(), result)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'beta', {
'init_args': [1, 1, 1], 'testcase_name': 'beta',
'fn': 'beta', 'init_args': [1, 1, 1],
'args': [1], 'fn': 'beta',
'result': tf.constant(2, dtype=tf.float32) 'args': [1],
'result': tf.constant(2, dtype=tf.float32)
}, },
{'testcase_name': 'gamma', {
'fn': 'gamma', 'testcase_name': 'gamma',
'init_args': [1, 1, 1], 'fn': 'gamma',
'args': [], 'init_args': [1, 1, 1],
'result': tf.constant(1, dtype=tf.float32), 'args': [],
'result': tf.constant(1, dtype=tf.float32),
}, },
{'testcase_name': 'lipchitz constant', {
'fn': 'lipchitz_constant', 'testcase_name': 'lipchitz constant',
'init_args': [1, 1, 1], 'fn': 'lipchitz_constant',
'args': [1], 'init_args': [1, 1, 1],
'result': tf.constant(2, dtype=tf.float32), 'args': [1],
'result': tf.constant(2, dtype=tf.float32),
}, },
{'testcase_name': 'kernel regularizer', {
'fn': 'kernel_regularizer', 'testcase_name': 'kernel regularizer',
'init_args': [1, 1, 1], 'fn': 'kernel_regularizer',
'args': [], 'init_args': [1, 1, 1],
'result': L1L2(l2=0.5), 'args': [],
'result': L1L2(l2=0.5),
}, },
]) ])
def test_fns(self, init_args, fn, args, result): def test_fns(self, init_args, fn, args, result):
@ -218,11 +242,12 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
self.assertEqual(expected, result) self.assertEqual(expected, result)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'label_smoothing', {
'init_args': [1, 1, 1, True, 0.1], 'testcase_name': 'label_smoothing',
'fn': None, 'init_args': [1, 1, 1, True, 0.1],
'args': None, 'fn': None,
'print_res': 'The impact of label smoothing on privacy is unknown.' 'args': None,
'print_res': 'The impact of label smoothing on privacy is unknown.'
}, },
]) ])
def test_prints(self, init_args, fn, args, print_res): def test_prints(self, init_args, fn, args, print_res):
@ -245,11 +270,12 @@ class HuberTests(keras_parameterized.TestCase):
"""tests for BinaryCrossesntropy StrongConvex loss.""" """tests for BinaryCrossesntropy StrongConvex loss."""
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'normal', {
'reg_lambda': 1, 'testcase_name': 'normal',
'c': 1, 'reg_lambda': 1,
'radius_constant': 1, 'c': 1,
'delta': 1, 'radius_constant': 1,
'delta': 1,
}, },
]) ])
def test_init_params(self, reg_lambda, c, radius_constant, delta): def test_init_params(self, reg_lambda, c, radius_constant, delta):
@ -266,33 +292,39 @@ class HuberTests(keras_parameterized.TestCase):
self.assertIsInstance(loss, StrongConvexHuber) self.assertIsInstance(loss, StrongConvexHuber)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'negative c', {
'reg_lambda': 1, 'testcase_name': 'negative c',
'c': -1, 'reg_lambda': 1,
'radius_constant': 1, 'c': -1,
'delta': 1 'radius_constant': 1,
'delta': 1
}, },
{'testcase_name': 'negative radius', {
'reg_lambda': 1, 'testcase_name': 'negative radius',
'c': 1, 'reg_lambda': 1,
'radius_constant': -1, 'c': 1,
'delta': 1 'radius_constant': -1,
'delta': 1
}, },
{'testcase_name': 'negative lambda', {
'reg_lambda': -1, 'testcase_name': 'negative lambda',
'c': 1, 'reg_lambda': -1,
'radius_constant': 1, 'c': 1,
'delta': 1 'radius_constant': 1,
'delta': 1
}, },
{'testcase_name': 'negative delta', {
'reg_lambda': 1, 'testcase_name': 'negative delta',
'c': 1, 'reg_lambda': 1,
'radius_constant': 1, 'c': 1,
'delta': -1 'radius_constant': 1,
'delta': -1
}, },
]) ])
def test_bad_init_params(self, reg_lambda, c, radius_constant, delta): def test_bad_init_params(self, reg_lambda, c, radius_constant, delta):
"""Test invalid domain for given params. Should return ValueError. """Test invalid domain for given params.
Should return ValueError.
Args: Args:
reg_lambda: initialization value for reg_lambda arg reg_lambda: initialization value for reg_lambda arg
@ -307,59 +339,68 @@ class HuberTests(keras_parameterized.TestCase):
# test the bounds and test varied delta's # test the bounds and test varied delta's
@test_util.run_all_in_graph_and_eager_modes @test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'delta=1,y_true=1 z>1+h decision boundary', {
'logits': 2.1, 'testcase_name': 'delta=1,y_true=1 z>1+h decision boundary',
'y_true': 1, 'logits': 2.1,
'delta': 1, 'y_true': 1,
'result': 0, 'delta': 1,
'result': 0,
}, },
{'testcase_name': 'delta=1,y_true=1 z<1+h decision boundary', {
'logits': 1.9, 'testcase_name': 'delta=1,y_true=1 z<1+h decision boundary',
'y_true': 1, 'logits': 1.9,
'delta': 1, 'y_true': 1,
'result': 0.01*0.25, 'delta': 1,
'result': 0.01 * 0.25,
}, },
{'testcase_name': 'delta=1,y_true=1 1-z< h decision boundary', {
'logits': 0.1, 'testcase_name': 'delta=1,y_true=1 1-z< h decision boundary',
'y_true': 1, 'logits': 0.1,
'delta': 1, 'y_true': 1,
'result': 1.9**2 * 0.25, 'delta': 1,
'result': 1.9**2 * 0.25,
}, },
{'testcase_name': 'delta=1,y_true=1 z < 1-h decision boundary', {
'logits': -0.1, 'testcase_name': 'delta=1,y_true=1 z < 1-h decision boundary',
'y_true': 1, 'logits': -0.1,
'delta': 1, 'y_true': 1,
'result': 1.1, 'delta': 1,
'result': 1.1,
}, },
{'testcase_name': 'delta=2,y_true=1 z>1+h decision boundary', {
'logits': 3.1, 'testcase_name': 'delta=2,y_true=1 z>1+h decision boundary',
'y_true': 1, 'logits': 3.1,
'delta': 2, 'y_true': 1,
'result': 0, 'delta': 2,
'result': 0,
}, },
{'testcase_name': 'delta=2,y_true=1 z<1+h decision boundary', {
'logits': 2.9, 'testcase_name': 'delta=2,y_true=1 z<1+h decision boundary',
'y_true': 1, 'logits': 2.9,
'delta': 2, 'y_true': 1,
'result': 0.01*0.125, 'delta': 2,
'result': 0.01 * 0.125,
}, },
{'testcase_name': 'delta=2,y_true=1 1-z < h decision boundary', {
'logits': 1.1, 'testcase_name': 'delta=2,y_true=1 1-z < h decision boundary',
'y_true': 1, 'logits': 1.1,
'delta': 2, 'y_true': 1,
'result': 1.9**2 * 0.125, 'delta': 2,
'result': 1.9**2 * 0.125,
}, },
{'testcase_name': 'delta=2,y_true=1 z < 1-h decision boundary', {
'logits': -1.1, 'testcase_name': 'delta=2,y_true=1 z < 1-h decision boundary',
'y_true': 1, 'logits': -1.1,
'delta': 2, 'y_true': 1,
'result': 2.1, 'delta': 2,
'result': 2.1,
}, },
{'testcase_name': 'delta=1,y_true=-1 z>1+h decision boundary', {
'logits': -2.1, 'testcase_name': 'delta=1,y_true=-1 z>1+h decision boundary',
'y_true': -1, 'logits': -2.1,
'delta': 1, 'y_true': -1,
'result': 0, 'delta': 1,
'result': 0,
}, },
]) ])
def test_calculation(self, logits, y_true, delta, result): def test_calculation(self, logits, y_true, delta, result):
@ -378,29 +419,33 @@ class HuberTests(keras_parameterized.TestCase):
self.assertAllClose(loss.numpy(), result) self.assertAllClose(loss.numpy(), result)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'beta', {
'init_args': [1, 1, 1, 1], 'testcase_name': 'beta',
'fn': 'beta', 'init_args': [1, 1, 1, 1],
'args': [1], 'fn': 'beta',
'result': tf.Variable(1.5, dtype=tf.float32) 'args': [1],
'result': tf.Variable(1.5, dtype=tf.float32)
}, },
{'testcase_name': 'gamma', {
'fn': 'gamma', 'testcase_name': 'gamma',
'init_args': [1, 1, 1, 1], 'fn': 'gamma',
'args': [], 'init_args': [1, 1, 1, 1],
'result': tf.Variable(1, dtype=tf.float32), 'args': [],
'result': tf.Variable(1, dtype=tf.float32),
}, },
{'testcase_name': 'lipchitz constant', {
'fn': 'lipchitz_constant', 'testcase_name': 'lipchitz constant',
'init_args': [1, 1, 1, 1], 'fn': 'lipchitz_constant',
'args': [1], 'init_args': [1, 1, 1, 1],
'result': tf.Variable(2, dtype=tf.float32), 'args': [1],
'result': tf.Variable(2, dtype=tf.float32),
}, },
{'testcase_name': 'kernel regularizer', {
'fn': 'kernel_regularizer', 'testcase_name': 'kernel regularizer',
'init_args': [1, 1, 1, 1], 'fn': 'kernel_regularizer',
'args': [], 'init_args': [1, 1, 1, 1],
'result': L1L2(l2=0.5), 'args': [],
'result': L1L2(l2=0.5),
}, },
]) ])
def test_fns(self, init_args, fn, args, result): def test_fns(self, init_args, fn, args, result):

View file

@ -38,10 +38,7 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
Descent-based Analytics by Xi Wu et al. Descent-based Analytics by Xi Wu et al.
""" """
def __init__(self, def __init__(self, n_outputs, seed=1, dtype=tf.float32):
n_outputs,
seed=1,
dtype=tf.float32):
"""Private constructor. """Private constructor.
Args: Args:
@ -51,9 +48,8 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
""" """
super(BoltOnModel, self).__init__(name='bolton', dynamic=False) super(BoltOnModel, self).__init__(name='bolton', dynamic=False)
if n_outputs <= 0: if n_outputs <= 0:
raise ValueError('n_outputs = {0} is not valid. Must be > 0.'.format( raise ValueError(
n_outputs 'n_outputs = {0} is not valid. Must be > 0.'.format(n_outputs))
))
self.n_outputs = n_outputs self.n_outputs = n_outputs
self.seed = seed self.seed = seed
self._layers_instantiated = False self._layers_instantiated = False
@ -76,11 +72,13 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
loss, loss,
kernel_initializer=tf.initializers.GlorotUniform, kernel_initializer=tf.initializers.GlorotUniform,
**kwargs): # pylint: disable=arguments-differ **kwargs): # pylint: disable=arguments-differ
"""See super class. Default optimizer used in BoltOn method is SGD. """See super class.
Default optimizer used in BoltOn method is SGD.
Args: Args:
optimizer: The optimizer to use. This will be automatically wrapped optimizer: The optimizer to use. This will be automatically wrapped with
with the BoltOn Optimizer. the BoltOn Optimizer.
loss: The loss function to use. Must be a StrongConvex loss (extend the loss: The loss function to use. Must be a StrongConvex loss (extend the
StrongConvexMixin). StrongConvexMixin).
kernel_initializer: The kernel initializer to use for the single layer. kernel_initializer: The kernel initializer to use for the single layer.
@ -128,10 +126,10 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
y: Labels to fit on, see super. y: Labels to fit on, see super.
batch_size: The batch size to use for training, see super. batch_size: The batch size to use for training, see super.
class_weight: the class weights to be used. Can be a scalar or 1D tensor class_weight: the class weights to be used. Can be a scalar or 1D tensor
whose dim == n_classes. whose dim == n_classes.
n_samples: the number of individual samples in x. n_samples: the number of individual samples in x.
epsilon: privacy parameter, which trades off between utility an privacy. epsilon: privacy parameter, which trades off between utility an privacy.
See the bolt-on paper for more description. See the bolt-on paper for more description.
noise_distribution: the distribution to pull noise from. noise_distribution: the distribution to pull noise from.
steps_per_epoch: steps_per_epoch:
**kwargs: kwargs to keras Model.fit. See super. **kwargs: kwargs to keras Model.fit. See super.
@ -152,8 +150,7 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
else: else:
data_size = None data_size = None
batch_size_ = self._validate_or_infer_batch_size(batch_size, batch_size_ = self._validate_or_infer_batch_size(batch_size,
steps_per_epoch, steps_per_epoch, x)
x)
if batch_size_ is None: if batch_size_ is None:
batch_size_ = 32 batch_size_ = 32
# inferring batch_size to be passed to optimizer. batch_size must remain its # inferring batch_size to be passed to optimizer. batch_size must remain its
@ -164,18 +161,15 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
if data_size is None: if data_size is None:
raise ValueError('Could not infer the number of samples. Please pass ' raise ValueError('Could not infer the number of samples. Please pass '
'this in using n_samples.') 'this in using n_samples.')
with self.optimizer(noise_distribution, with self.optimizer(noise_distribution, epsilon, self.layers, class_weight_,
epsilon, data_size, batch_size_) as _:
self.layers, out = super(BoltOnModel, self).fit(
class_weight_, x=x,
data_size, y=y,
batch_size_) as _: batch_size=batch_size,
out = super(BoltOnModel, self).fit(x=x, class_weight=class_weight,
y=y, steps_per_epoch=steps_per_epoch,
batch_size=batch_size, **kwargs)
class_weight=class_weight,
steps_per_epoch=steps_per_epoch,
**kwargs)
return out return out
def fit_generator(self, def fit_generator(self,
@ -194,10 +188,10 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
Args: Args:
generator: Inputs generator following Tensorflow guidelines, see super. generator: Inputs generator following Tensorflow guidelines, see super.
class_weight: the class weights to be used. Can be a scalar or 1D tensor class_weight: the class weights to be used. Can be a scalar or 1D tensor
whose dim == n_classes. whose dim == n_classes.
noise_distribution: the distribution to get noise from. noise_distribution: the distribution to get noise from.
epsilon: privacy parameter, which trades off utility and privacy. See epsilon: privacy parameter, which trades off utility and privacy. See
BoltOn paper for more description. BoltOn paper for more description.
n_samples: number of individual samples in x n_samples: number of individual samples in x
steps_per_epoch: Number of steps per training epoch, see super. steps_per_epoch: Number of steps per training epoch, see super.
**kwargs: **kwargs **kwargs: **kwargs
@ -222,12 +216,8 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
generator) generator)
if batch_size is None: if batch_size is None:
batch_size = 32 batch_size = 32
with self.optimizer(noise_distribution, with self.optimizer(noise_distribution, epsilon, self.layers, class_weight,
epsilon, data_size, batch_size) as _:
self.layers,
class_weight,
data_size,
batch_size) as _:
out = super(BoltOnModel, self).fit_generator( out = super(BoltOnModel, self).fit_generator(
generator, generator,
class_weight=class_weight, class_weight=class_weight,
@ -243,10 +233,10 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
Args: Args:
class_weights: str specifying type, array giving weights, or None. class_weights: str specifying type, array giving weights, or None.
class_counts: If class_weights is not None, then an array of class_counts: If class_weights is not None, then an array of the number of
the number of samples for each class samples for each class
num_classes: If class_weights is not None, then the number of num_classes: If class_weights is not None, then the number of classes.
classes.
Returns: Returns:
class_weights as 1D tensor, to be passed to model's fit method. class_weights as 1D tensor, to be passed to model's fit method.
""" """
@ -259,14 +249,12 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
raise ValueError('Detected string class_weights with ' raise ValueError('Detected string class_weights with '
'value: {0}, which is not one of {1}.' 'value: {0}, which is not one of {1}.'
'Please select a valid class_weight type' 'Please select a valid class_weight type'
'or pass an array'.format(class_weights, 'or pass an array'.format(class_weights, class_keys))
class_keys))
if class_counts is None: if class_counts is None:
raise ValueError('Class counts must be provided if using ' raise ValueError('Class counts must be provided if using '
'class_weights=%s' % class_weights) 'class_weights=%s' % class_weights)
class_counts_shape = tf.Variable(class_counts, class_counts_shape = tf.Variable(
trainable=False, class_counts, trainable=False, dtype=self._dtype).shape
dtype=self._dtype).shape
if len(class_counts_shape) != 1: if len(class_counts_shape) != 1:
raise ValueError('class counts must be a 1D array.' raise ValueError('class counts must be a 1D array.'
'Detected: {0}'.format(class_counts_shape)) 'Detected: {0}'.format(class_counts_shape))
@ -282,9 +270,8 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
class_weights = 1 class_weights = 1
elif is_string and class_weights == 'balanced': elif is_string and class_weights == 'balanced':
num_samples = sum(class_counts) num_samples = sum(class_counts)
weighted_counts = tf.dtypes.cast(tf.math.multiply(num_classes, weighted_counts = tf.dtypes.cast(
class_counts), tf.math.multiply(num_classes, class_counts), self._dtype)
self._dtype)
class_weights = tf.Variable(num_samples, dtype=self._dtype) / \ class_weights = tf.Variable(num_samples, dtype=self._dtype) / \
tf.Variable(weighted_counts, dtype=self._dtype) tf.Variable(weighted_counts, dtype=self._dtype)
else: else:
@ -293,8 +280,6 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
raise ValueError('Detected class_weights shape: {0} instead of ' raise ValueError('Detected class_weights shape: {0} instead of '
'1D array'.format(class_weights.shape)) '1D array'.format(class_weights.shape))
if class_weights.shape[0] != num_classes: if class_weights.shape[0] != num_classes:
raise ValueError( raise ValueError('Detected array length: {0} instead of: {1}'.format(
'Detected array length: {0} instead of: {1}'.format( class_weights.shape[0], num_classes))
class_weights.shape[0],
num_classes))
return class_weights return class_weights

View file

@ -73,9 +73,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
def call(self, y_true, y_pred): def call(self, y_true, y_pred):
"""Loss function that is minimized at the mean of the input points.""" """Loss function that is minimized at the mean of the input points."""
return 0.5 * tf.reduce_sum( return 0.5 * tf.reduce_sum(
tf.math.squared_difference(y_true, y_pred), tf.math.squared_difference(y_true, y_pred), axis=1)
axis=1
)
def max_class_weight(self, class_weight): def max_class_weight(self, class_weight):
"""the maximum weighting in class weights (max value) as a scalar tensor. """the maximum weighting in class weights (max value) as a scalar tensor.
@ -125,11 +123,13 @@ class InitTests(keras_parameterized.TestCase):
"""Tests for keras model initialization.""" """Tests for keras model initialization."""
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'normal', {
'n_outputs': 1, 'testcase_name': 'normal',
'n_outputs': 1,
}, },
{'testcase_name': 'many outputs', {
'n_outputs': 100, 'testcase_name': 'many outputs',
'n_outputs': 100,
}, },
]) ])
def test_init_params(self, n_outputs): def test_init_params(self, n_outputs):
@ -143,8 +143,9 @@ class InitTests(keras_parameterized.TestCase):
self.assertIsInstance(clf, models.BoltOnModel) self.assertIsInstance(clf, models.BoltOnModel)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'invalid n_outputs', {
'n_outputs': -1, 'testcase_name': 'invalid n_outputs',
'n_outputs': -1,
}, },
]) ])
def test_bad_init_params(self, n_outputs): def test_bad_init_params(self, n_outputs):
@ -158,15 +159,17 @@ class InitTests(keras_parameterized.TestCase):
models.BoltOnModel(n_outputs) models.BoltOnModel(n_outputs)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'string compile', {
'n_outputs': 1, 'testcase_name': 'string compile',
'loss': TestLoss(1, 1, 1), 'n_outputs': 1,
'optimizer': 'adam', 'loss': TestLoss(1, 1, 1),
'optimizer': 'adam',
}, },
{'testcase_name': 'test compile', {
'n_outputs': 100, 'testcase_name': 'test compile',
'loss': TestLoss(1, 1, 1), 'n_outputs': 100,
'optimizer': TestOptimizer(), 'loss': TestLoss(1, 1, 1),
'optimizer': TestOptimizer(),
}, },
]) ])
def test_compile(self, n_outputs, loss, optimizer): def test_compile(self, n_outputs, loss, optimizer):
@ -183,18 +186,17 @@ class InitTests(keras_parameterized.TestCase):
clf.compile(optimizer, loss) clf.compile(optimizer, loss)
self.assertEqual(clf.loss, loss) self.assertEqual(clf.loss, loss)
@parameterized.named_parameters([ @parameterized.named_parameters([{
{'testcase_name': 'Not strong loss', 'testcase_name': 'Not strong loss',
'n_outputs': 1, 'n_outputs': 1,
'loss': losses.BinaryCrossentropy(), 'loss': losses.BinaryCrossentropy(),
'optimizer': 'adam', 'optimizer': 'adam',
}, }, {
{'testcase_name': 'Not valid optimizer', 'testcase_name': 'Not valid optimizer',
'n_outputs': 1, 'n_outputs': 1,
'loss': TestLoss(1, 1, 1), 'loss': TestLoss(1, 1, 1),
'optimizer': 'ada', 'optimizer': 'ada',
} }])
])
def test_bad_compile(self, n_outputs, loss, optimizer): def test_bad_compile(self, n_outputs, loss, optimizer):
"""test bad compilations of BoltOnModel that should raise errors. """test bad compilations of BoltOnModel that should raise errors.
@ -231,17 +233,11 @@ def _cat_dataset(n_samples, input_dim, n_classes, batch_size, generator=False):
x_stack = [] x_stack = []
y_stack = [] y_stack = []
for i_class in range(n_classes): for i_class in range(n_classes):
x_stack.append( x_stack.append(tf.constant(1 * i_class, tf.float32, (n_samples, input_dim)))
tf.constant(1*i_class, tf.float32, (n_samples, input_dim)) y_stack.append(tf.constant(i_class, tf.float32, (n_samples, n_classes)))
)
y_stack.append(
tf.constant(i_class, tf.float32, (n_samples, n_classes))
)
x_set, y_set = tf.stack(x_stack), tf.stack(y_stack) x_set, y_set = tf.stack(x_stack), tf.stack(y_stack)
if generator: if generator:
dataset = tf.data.Dataset.from_tensor_slices( dataset = tf.data.Dataset.from_tensor_slices((x_set, y_set))
(x_set, y_set)
)
dataset = dataset.batch(batch_size=batch_size) dataset = dataset.batch(batch_size=batch_size)
return dataset return dataset
return x_set, y_set return x_set, y_set
@ -266,8 +262,8 @@ def _do_fit(n_samples,
epsilon: privacy parameter epsilon: privacy parameter
generator: True to create a generator, False to use an iterator generator: True to create a generator, False to use an iterator
batch_size: batch_size to use batch_size: batch_size to use
reset_n_samples: True to set _samples to None prior to fitting. reset_n_samples: True to set _samples to None prior to fitting. False does
False does nothing nothing
optimizer: instance of TestOptimizer optimizer: instance of TestOptimizer
loss: instance of TestLoss loss: instance of TestLoss
distribution: distribution to get noise from. distribution: distribution to get noise from.
@ -279,37 +275,30 @@ def _do_fit(n_samples,
clf.compile(optimizer, loss) clf.compile(optimizer, loss)
if generator: if generator:
x = _cat_dataset( x = _cat_dataset(
n_samples, n_samples, input_dim, n_outputs, batch_size, generator=generator)
input_dim,
n_outputs,
batch_size,
generator=generator
)
y = None y = None
# x = x.batch(batch_size) # x = x.batch(batch_size)
x = x.shuffle(n_samples//2) x = x.shuffle(n_samples // 2)
batch_size = None batch_size = None
if reset_n_samples: if reset_n_samples:
n_samples = None n_samples = None
clf.fit_generator(x, clf.fit_generator(
n_samples=n_samples, x,
noise_distribution=distribution, n_samples=n_samples,
epsilon=epsilon) noise_distribution=distribution,
epsilon=epsilon)
else: else:
x, y = _cat_dataset( x, y = _cat_dataset(
n_samples, n_samples, input_dim, n_outputs, batch_size, generator=generator)
input_dim,
n_outputs,
batch_size,
generator=generator)
if reset_n_samples: if reset_n_samples:
n_samples = None n_samples = None
clf.fit(x, clf.fit(
y, x,
batch_size=batch_size, y,
n_samples=n_samples, batch_size=batch_size,
noise_distribution=distribution, n_samples=n_samples,
epsilon=epsilon) noise_distribution=distribution,
epsilon=epsilon)
return clf return clf
@ -318,21 +307,25 @@ class FitTests(keras_parameterized.TestCase):
# @test_util.run_all_in_graph_and_eager_modes # @test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'iterator fit', {
'generator': False, 'testcase_name': 'iterator fit',
'reset_n_samples': True, 'generator': False,
'reset_n_samples': True,
}, },
{'testcase_name': 'iterator fit no samples', {
'generator': False, 'testcase_name': 'iterator fit no samples',
'reset_n_samples': True, 'generator': False,
'reset_n_samples': True,
}, },
{'testcase_name': 'generator fit', {
'generator': True, 'testcase_name': 'generator fit',
'reset_n_samples': False, 'generator': True,
'reset_n_samples': False,
}, },
{'testcase_name': 'with callbacks', {
'generator': True, 'testcase_name': 'with callbacks',
'reset_n_samples': False, 'generator': True,
'reset_n_samples': False,
}, },
]) ])
def test_fit(self, generator, reset_n_samples): def test_fit(self, generator, reset_n_samples):
@ -363,8 +356,9 @@ class FitTests(keras_parameterized.TestCase):
self.assertEqual(hasattr(clf, 'layers'), True) self.assertEqual(hasattr(clf, 'layers'), True)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'generator fit', {
'generator': True, 'testcase_name': 'generator fit',
'generator': True,
}, },
]) ])
def test_fit_gen(self, generator): def test_fit_gen(self, generator):
@ -382,27 +376,24 @@ class FitTests(keras_parameterized.TestCase):
clf = models.BoltOnModel(n_classes) clf = models.BoltOnModel(n_classes)
clf.compile(optimizer, loss) clf.compile(optimizer, loss)
x = _cat_dataset( x = _cat_dataset(
n_samples, n_samples, input_dim, n_classes, batch_size, generator=generator)
input_dim,
n_classes,
batch_size,
generator=generator
)
x = x.batch(batch_size) x = x.batch(batch_size)
x = x.shuffle(n_samples // 2) x = x.shuffle(n_samples // 2)
clf.fit_generator(x, n_samples=n_samples) clf.fit_generator(x, n_samples=n_samples)
self.assertEqual(hasattr(clf, 'layers'), True) self.assertEqual(hasattr(clf, 'layers'), True)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'iterator no n_samples', {
'generator': True, 'testcase_name': 'iterator no n_samples',
'reset_n_samples': True, 'generator': True,
'distribution': 'laplace' 'reset_n_samples': True,
'distribution': 'laplace'
}, },
{'testcase_name': 'invalid distribution', {
'generator': True, 'testcase_name': 'invalid distribution',
'reset_n_samples': True, 'generator': True,
'distribution': 'not_valid' 'reset_n_samples': True,
'distribution': 'not_valid'
}, },
]) ])
def test_bad_fit(self, generator, reset_n_samples, distribution): def test_bad_fit(self, generator, reset_n_samples, distribution):
@ -422,40 +413,33 @@ class FitTests(keras_parameterized.TestCase):
epsilon = 1 epsilon = 1
batch_size = 1 batch_size = 1
n_samples = 10 n_samples = 10
_do_fit( _do_fit(n_samples, input_dim, n_classes, epsilon, generator, batch_size,
n_samples, reset_n_samples, optimizer, loss, distribution)
input_dim,
n_classes,
epsilon,
generator,
batch_size,
reset_n_samples,
optimizer,
loss,
distribution
)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'None class_weights', {
'class_weights': None, 'testcase_name': 'None class_weights',
'class_counts': None, 'class_weights': None,
'num_classes': None, 'class_counts': None,
'result': 1}, 'num_classes': None,
{'testcase_name': 'class weights array', 'result': 1
'class_weights': [1, 1], },
'class_counts': [1, 1], {
'num_classes': 2, 'testcase_name': 'class weights array',
'result': [1, 1]}, 'class_weights': [1, 1],
{'testcase_name': 'class weights balanced', 'class_counts': [1, 1],
'class_weights': 'balanced', 'num_classes': 2,
'class_counts': [1, 1], 'result': [1, 1]
'num_classes': 2, },
'result': [1, 1]}, {
'testcase_name': 'class weights balanced',
'class_weights': 'balanced',
'class_counts': [1, 1],
'num_classes': 2,
'result': [1, 1]
},
]) ])
def test_class_calculate(self, def test_class_calculate(self, class_weights, class_counts, num_classes,
class_weights,
class_counts,
num_classes,
result): result):
"""Tests the BOltonModel calculate_class_weights method. """Tests the BOltonModel calculate_class_weights method.
@ -466,61 +450,68 @@ class FitTests(keras_parameterized.TestCase):
result: expected result result: expected result
""" """
clf = models.BoltOnModel(1, 1) clf = models.BoltOnModel(1, 1)
expected = clf.calculate_class_weights(class_weights, expected = clf.calculate_class_weights(class_weights, class_counts,
class_counts,
num_classes) num_classes)
if hasattr(expected, 'numpy'): if hasattr(expected, 'numpy'):
expected = expected.numpy() expected = expected.numpy()
self.assertAllEqual( self.assertAllEqual(expected, result)
expected,
result
)
@parameterized.named_parameters([
{'testcase_name': 'class weight not valid str',
'class_weights': 'not_valid',
'class_counts': 1,
'num_classes': 1,
'err_msg': 'Detected string class_weights with value: not_valid'},
{'testcase_name': 'no class counts',
'class_weights': 'balanced',
'class_counts': None,
'num_classes': 1,
'err_msg': 'Class counts must be provided if '
'using class_weights=balanced'},
{'testcase_name': 'no num classes',
'class_weights': 'balanced',
'class_counts': [1],
'num_classes': None,
'err_msg': 'num_classes must be provided if '
'using class_weights=balanced'},
{'testcase_name': 'class counts not array',
'class_weights': 'balanced',
'class_counts': 1,
'num_classes': None,
'err_msg': 'class counts must be a 1D array.'},
{'testcase_name': 'class counts array, no num classes',
'class_weights': [1],
'class_counts': None,
'num_classes': None,
'err_msg': 'You must pass a value for num_classes if '
'creating an array of class_weights'},
{'testcase_name': 'class counts array, improper shape',
'class_weights': [[1], [1]],
'class_counts': None,
'num_classes': 2,
'err_msg': 'Detected class_weights shape'},
{'testcase_name': 'class counts array, wrong number classes',
'class_weights': [1, 1, 1],
'class_counts': None,
'num_classes': 2,
'err_msg': 'Detected array length:'},
])
def test_class_errors(self, @parameterized.named_parameters([
class_weights, {
class_counts, 'testcase_name': 'class weight not valid str',
num_classes, 'class_weights': 'not_valid',
'class_counts': 1,
'num_classes': 1,
'err_msg': 'Detected string class_weights with value: not_valid'
},
{
'testcase_name': 'no class counts',
'class_weights': 'balanced',
'class_counts': None,
'num_classes': 1,
'err_msg': 'Class counts must be provided if '
'using class_weights=balanced'
},
{
'testcase_name': 'no num classes',
'class_weights': 'balanced',
'class_counts': [1],
'num_classes': None,
'err_msg': 'num_classes must be provided if '
'using class_weights=balanced'
},
{
'testcase_name': 'class counts not array',
'class_weights': 'balanced',
'class_counts': 1,
'num_classes': None,
'err_msg': 'class counts must be a 1D array.'
},
{
'testcase_name': 'class counts array, no num classes',
'class_weights': [1],
'class_counts': None,
'num_classes': None,
'err_msg': 'You must pass a value for num_classes if '
'creating an array of class_weights'
},
{
'testcase_name': 'class counts array, improper shape',
'class_weights': [[1], [1]],
'class_counts': None,
'num_classes': 2,
'err_msg': 'Detected class_weights shape'
},
{
'testcase_name': 'class counts array, wrong number classes',
'class_weights': [1, 1, 1],
'class_counts': None,
'num_classes': 2,
'err_msg': 'Detected array length:'
},
])
def test_class_errors(self, class_weights, class_counts, num_classes,
err_msg): err_msg):
"""Tests the BOltonModel calculate_class_weights method. """Tests the BOltonModel calculate_class_weights method.
@ -534,9 +525,7 @@ class FitTests(keras_parameterized.TestCase):
""" """
clf = models.BoltOnModel(1, 1) clf = models.BoltOnModel(1, 1)
with self.assertRaisesRegexp(ValueError, err_msg): # pylint: disable=deprecated-method with self.assertRaisesRegexp(ValueError, err_msg): # pylint: disable=deprecated-method
clf.calculate_class_weights(class_weights, clf.calculate_class_weights(class_weights, class_counts, num_classes)
class_counts,
num_classes)
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -48,14 +48,12 @@ class GammaBetaDecreasingStep(
'This is performed automatically by using the ' 'This is performed automatically by using the '
'{1} as a context manager, ' '{1} as a context manager, '
'as desired'.format(self.__class__.__name__, 'as desired'.format(self.__class__.__name__,
BoltOn.__class__.__name__ BoltOn.__class__.__name__))
)
)
dtype = self.beta.dtype dtype = self.beta.dtype
one = tf.constant(1, dtype) one = tf.constant(1, dtype)
return tf.math.minimum(tf.math.reduce_min(one/self.beta), return tf.math.minimum(
one/(self.gamma*math_ops.cast(step, dtype)) tf.math.reduce_min(one / self.beta),
) one / (self.gamma * math_ops.cast(step, dtype)))
def get_config(self): def get_config(self):
"""Return config to setup the learning rate scheduler.""" """Return config to setup the learning rate scheduler."""
@ -108,16 +106,16 @@ class BoltOn(optimizer_v2.OptimizerV2):
Descent-based Analytics by Xi Wu et. al. Descent-based Analytics by Xi Wu et. al.
""" """
def __init__(self, # pylint: disable=super-init-not-called def __init__(
optimizer, self, # pylint: disable=super-init-not-called
loss, optimizer,
dtype=tf.float32, loss,
): dtype=tf.float32,
):
"""Constructor. """Constructor.
Args: Args:
optimizer: Optimizer_v2 or subclass to be used as the optimizer optimizer: Optimizer_v2 or subclass to be used as the optimizer (wrapped).
(wrapped).
loss: StrongConvexLoss function that the model is being compiled with. loss: StrongConvexLoss function that the model is being compiled with.
dtype: dtype dtype: dtype
""" """
@ -155,8 +153,8 @@ class BoltOn(optimizer_v2.OptimizerV2):
"""Normalize the weights to the R-ball. """Normalize the weights to the R-ball.
Args: Args:
force: True to normalize regardless of previous weight values. force: True to normalize regardless of previous weight values. False to
False to check if weights > R-ball and only normalize then. check if weights > R-ball and only normalize then.
Raises: Raises:
Exception: If not called from inside this optimizer context. Exception: If not called from inside this optimizer context.
@ -199,14 +197,14 @@ class BoltOn(optimizer_v2.OptimizerV2):
l2_sensitivity = (2 * l2_sensitivity = (2 *
loss.lipchitz_constant(self.class_weights)) / \ loss.lipchitz_constant(self.class_weights)) / \
(loss.gamma() * self.n_samples * self.batch_size) (loss.gamma() * self.n_samples * self.batch_size)
unit_vector = tf.random.normal(shape=(input_dim, output_dim), unit_vector = tf.random.normal(
mean=0, shape=(input_dim, output_dim),
seed=1, mean=0,
stddev=1.0, seed=1,
dtype=self.dtype) stddev=1.0,
dtype=self.dtype)
unit_vector = unit_vector / tf.math.sqrt( unit_vector = unit_vector / tf.math.sqrt(
tf.reduce_sum(tf.math.square(unit_vector), axis=0) tf.reduce_sum(tf.math.square(unit_vector), axis=0))
)
beta = l2_sensitivity / per_class_epsilon beta = l2_sensitivity / per_class_epsilon
alpha = input_dim # input_dim alpha = input_dim # input_dim
@ -214,8 +212,7 @@ class BoltOn(optimizer_v2.OptimizerV2):
alpha, alpha,
beta=1 / beta, beta=1 / beta,
seed=1, seed=1,
dtype=self.dtype dtype=self.dtype)
)
return unit_vector * gamma return unit_vector * gamma
raise NotImplementedError('Noise distribution: {0} is not ' raise NotImplementedError('Noise distribution: {0} is not '
'a valid distribution'.format(distribution)) 'a valid distribution'.format(distribution))
@ -245,10 +242,8 @@ class BoltOn(optimizer_v2.OptimizerV2):
except AttributeError: except AttributeError:
raise AttributeError( raise AttributeError(
"Neither '{0}' nor '{1}' object has attribute '{2}'" "Neither '{0}' nor '{1}' object has attribute '{2}'"
"".format(self.__class__.__name__, ''.format(self.__class__.__name__,
self._internal_optimizer.__class__.__name__, self._internal_optimizer.__class__.__name__, name))
name)
)
def __setattr__(self, key, value): def __setattr__(self, key, value):
"""Set attribute to self instance if its the internal optimizer. """Set attribute to self instance if its the internal optimizer.
@ -309,20 +304,15 @@ class BoltOn(optimizer_v2.OptimizerV2):
self._is_init = True self._is_init = True
return self return self
def __call__(self, def __call__(self, noise_distribution, epsilon, layers, class_weights,
noise_distribution, n_samples, batch_size):
epsilon,
layers,
class_weights,
n_samples,
batch_size):
"""Accepts required values for bolton method from context entry point. """Accepts required values for bolton method from context entry point.
Stores them on the optimizer for use throughout fitting. Stores them on the optimizer for use throughout fitting.
Args: Args:
noise_distribution: the noise distribution to pick. noise_distribution: the noise distribution to pick. see
see _accepted_distributions and get_noise for possible values. _accepted_distributions and get_noise for possible values.
epsilon: privacy parameter. Lower gives more privacy but less utility. epsilon: privacy parameter. Lower gives more privacy but less utility.
layers: list of Keras/Tensorflow layers. Can be found as model.layers layers: list of Keras/Tensorflow layers. Can be found as model.layers
class_weights: class_weights used, which may either be a scalar or 1D class_weights: class_weights used, which may either be a scalar or 1D
@ -341,8 +331,8 @@ class BoltOn(optimizer_v2.OptimizerV2):
'distributions'.format(noise_distribution, 'distributions'.format(noise_distribution,
_accepted_distributions)) _accepted_distributions))
self.noise_distribution = noise_distribution self.noise_distribution = noise_distribution
self.learning_rate.initialize(self.loss.beta(class_weights), self.learning_rate.initialize(
self.loss.gamma()) self.loss.beta(class_weights), self.loss.gamma())
self.epsilon = tf.constant(epsilon, dtype=self.dtype) self.epsilon = tf.constant(epsilon, dtype=self.dtype)
self.class_weights = tf.constant(class_weights, dtype=self.dtype) self.class_weights = tf.constant(class_weights, dtype=self.dtype)
self.n_samples = tf.constant(n_samples, dtype=self.dtype) self.n_samples = tf.constant(n_samples, dtype=self.dtype)
@ -369,9 +359,10 @@ class BoltOn(optimizer_v2.OptimizerV2):
for layer in self.layers: for layer in self.layers:
input_dim = layer.kernel.shape[0] input_dim = layer.kernel.shape[0]
output_dim = layer.units output_dim = layer.units
noise = self.get_noise(input_dim, noise = self.get_noise(
output_dim, input_dim,
) output_dim,
)
layer.kernel = tf.math.add(layer.kernel, noise) layer.kernel = tf.math.add(layer.kernel, noise)
self.noise_distribution = None self.noise_distribution = None
self.learning_rate.de_initialize() self.learning_rate.de_initialize()

View file

@ -111,9 +111,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
def call(self, y_true, y_pred): def call(self, y_true, y_pred):
"""Loss function that is minimized at the mean of the input points.""" """Loss function that is minimized at the mean of the input points."""
return 0.5 * tf.reduce_sum( return 0.5 * tf.reduce_sum(
tf.math.squared_difference(y_true, y_pred), tf.math.squared_difference(y_true, y_pred), axis=1)
axis=1
)
def max_class_weight(self, class_weight, dtype=tf.float32): def max_class_weight(self, class_weight, dtype=tf.float32):
"""the maximum weighting in class weights (max value) as a scalar tensor. """the maximum weighting in class weights (max value) as a scalar tensor.
@ -183,20 +181,24 @@ class TestOptimizer(OptimizerV2):
class BoltonOptimizerTest(keras_parameterized.TestCase): class BoltonOptimizerTest(keras_parameterized.TestCase):
"""BoltOn Optimizer tests.""" """BoltOn Optimizer tests."""
@test_util.run_all_in_graph_and_eager_modes @test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'getattr', {
'fn': '__getattr__', 'testcase_name': 'getattr',
'args': ['dtype'], 'fn': '__getattr__',
'result': tf.float32, 'args': ['dtype'],
'test_attr': None}, 'result': tf.float32,
{'testcase_name': 'project_weights_to_r', 'test_attr': None
'fn': 'project_weights_to_r', },
'args': ['dtype'], {
'result': None, 'testcase_name': 'project_weights_to_r',
'test_attr': ''}, 'fn': 'project_weights_to_r',
'args': ['dtype'],
'result': None,
'test_attr': ''
},
]) ])
def test_fn(self, fn, args, result, test_attr): def test_fn(self, fn, args, result, test_attr):
"""test that a fn of BoltOn optimizer is working as expected. """test that a fn of BoltOn optimizer is working as expected.
@ -204,9 +206,8 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
fn: method of Optimizer to test fn: method of Optimizer to test
args: args to optimizer fn args: args to optimizer fn
result: the expected result result: the expected result
test_attr: None if the fn returns the test result. Otherwise, this is test_attr: None if the fn returns the test result. Otherwise, this is the
the attribute of BoltOn to check against result with. attribute of BoltOn to check against result with.
""" """
tf.random.set_seed(1) tf.random.set_seed(1)
loss = TestLoss(1, 1, 1) loss = TestLoss(1, 1, 1)
@ -231,30 +232,38 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
@test_util.run_all_in_graph_and_eager_modes @test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': '1 value project to r=1', {
'r': 1, 'testcase_name': '1 value project to r=1',
'init_value': 2, 'r': 1,
'shape': (1,), 'init_value': 2,
'n_out': 1, 'shape': (1,),
'result': [[1]]}, 'n_out': 1,
{'testcase_name': '2 value project to r=1', 'result': [[1]]
'r': 1, },
'init_value': 2, {
'shape': (2,), 'testcase_name': '2 value project to r=1',
'n_out': 1, 'r': 1,
'result': [[0.707107], [0.707107]]}, 'init_value': 2,
{'testcase_name': '1 value project to r=2', 'shape': (2,),
'r': 2, 'n_out': 1,
'init_value': 3, 'result': [[0.707107], [0.707107]]
'shape': (1,), },
'n_out': 1, {
'result': [[2]]}, 'testcase_name': '1 value project to r=2',
{'testcase_name': 'no project', 'r': 2,
'r': 2, 'init_value': 3,
'init_value': 1, 'shape': (1,),
'shape': (1,), 'n_out': 1,
'n_out': 1, 'result': [[2]]
'result': [[1]]}, },
{
'testcase_name': 'no project',
'r': 2,
'init_value': 1,
'shape': (1,),
'n_out': 1,
'result': [[1]]
},
]) ])
def test_project(self, r, shape, n_out, init_value, result): def test_project(self, r, shape, n_out, init_value, result):
"""test that a fn of BoltOn optimizer is working as expected. """test that a fn of BoltOn optimizer is working as expected.
@ -267,6 +276,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
result: the expected output after projection. result: the expected output after projection.
""" """
tf.random.set_seed(1) tf.random.set_seed(1)
def project_fn(r): def project_fn(r):
loss = TestLoss(1, 1, r) loss = TestLoss(1, 1, r)
bolton = opt.BoltOn(TestOptimizer(), loss) bolton = opt.BoltOn(TestOptimizer(), loss)
@ -283,15 +293,18 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
bolton.n_samples = 1 bolton.n_samples = 1
bolton.project_weights_to_r() bolton.project_weights_to_r()
return _ops.convert_to_tensor_v2(bolton.layers[0].kernel, tf.float32) return _ops.convert_to_tensor_v2(bolton.layers[0].kernel, tf.float32)
res = project_fn(r) res = project_fn(r)
self.assertAllClose(res, result) self.assertAllClose(res, result)
@test_util.run_all_in_graph_and_eager_modes @test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'normal values', {
'epsilon': 2, 'testcase_name': 'normal values',
'noise': 'laplace', 'epsilon': 2,
'class_weights': 1}, 'noise': 'laplace',
'class_weights': 1
},
]) ])
def test_context_manager(self, noise, epsilon, class_weights): def test_context_manager(self, noise, epsilon, class_weights):
"""Tests the context manager functionality of the optimizer. """Tests the context manager functionality of the optimizer.
@ -301,6 +314,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
epsilon: epsilon privacy parameter to use epsilon: epsilon privacy parameter to use
class_weights: class_weights to use class_weights: class_weights to use
""" """
@tf.function @tf.function
def test_run(): def test_run():
loss = TestLoss(1, 1, 1) loss = TestLoss(1, 1, 1)
@ -313,18 +327,23 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
with bolton(noise, epsilon, model.layers, class_weights, 1, 1) as _: with bolton(noise, epsilon, model.layers, class_weights, 1, 1) as _:
pass pass
return _ops.convert_to_tensor_v2(bolton.epsilon, dtype=tf.float32) return _ops.convert_to_tensor_v2(bolton.epsilon, dtype=tf.float32)
epsilon = test_run() epsilon = test_run()
self.assertEqual(epsilon.numpy(), -1) self.assertEqual(epsilon.numpy(), -1)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'invalid noise', {
'epsilon': 1, 'testcase_name': 'invalid noise',
'noise': 'not_valid', 'epsilon': 1,
'err_msg': 'Detected noise distribution: not_valid not one of:'}, 'noise': 'not_valid',
{'testcase_name': 'invalid epsilon', 'err_msg': 'Detected noise distribution: not_valid not one of:'
'epsilon': -1, },
'noise': 'laplace', {
'err_msg': 'Detected epsilon: -1. Valid range is 0 < epsilon <inf'}, 'testcase_name': 'invalid epsilon',
'epsilon': -1,
'noise': 'laplace',
'err_msg': 'Detected epsilon: -1. Valid range is 0 < epsilon <inf'
},
]) ])
def test_context_domains(self, noise, epsilon, err_msg): def test_context_domains(self, noise, epsilon, err_msg):
"""Tests the context domains. """Tests the context domains.
@ -333,7 +352,6 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
noise: noise distribution to pick noise: noise distribution to pick
epsilon: epsilon privacy parameter to use epsilon: epsilon privacy parameter to use
err_msg: the expected error message err_msg: the expected error message
""" """
@tf.function @tf.function
@ -347,15 +365,18 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
model.n_outputs)) model.n_outputs))
with bolton(noise, epsilon, model.layers, 1, 1, 1) as _: with bolton(noise, epsilon, model.layers, 1, 1, 1) as _:
pass pass
with self.assertRaisesRegexp(ValueError, err_msg): # pylint: disable=deprecated-method with self.assertRaisesRegexp(ValueError, err_msg): # pylint: disable=deprecated-method
test_run(noise, epsilon) test_run(noise, epsilon)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'fn: get_noise', {
'fn': 'get_noise', 'testcase_name': 'fn: get_noise',
'args': [1, 1], 'fn': 'get_noise',
'err_msg': 'This method must be called from within the ' 'args': [1, 1],
'optimizer\'s context'}, 'err_msg': 'This method must be called from within the '
'optimizer\'s context'
},
]) ])
def test_not_in_context(self, fn, args, err_msg): def test_not_in_context(self, fn, args, err_msg):
"""Tests that the expected functions raise errors when not in context. """Tests that the expected functions raise errors when not in context.
@ -365,6 +386,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
args: the arguments for said function args: the arguments for said function
err_msg: expected error message err_msg: expected error message
""" """
def test_run(fn, args): def test_run(fn, args):
loss = TestLoss(1, 1, 1) loss = TestLoss(1, 1, 1)
bolton = opt.BoltOn(TestOptimizer(), loss) bolton = opt.BoltOn(TestOptimizer(), loss)
@ -379,33 +401,51 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
test_run(fn, args) test_run(fn, args)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'fn: get_updates', {
'fn': 'get_updates', 'testcase_name': 'fn: get_updates',
'args': [0, 0]}, 'fn': 'get_updates',
{'testcase_name': 'fn: get_config', 'args': [0, 0]
'fn': 'get_config', },
'args': []}, {
{'testcase_name': 'fn: from_config', 'testcase_name': 'fn: get_config',
'fn': 'from_config', 'fn': 'get_config',
'args': [0]}, 'args': []
{'testcase_name': 'fn: _resource_apply_dense', },
'fn': '_resource_apply_dense', {
'args': [1, 1]}, 'testcase_name': 'fn: from_config',
{'testcase_name': 'fn: _resource_apply_sparse', 'fn': 'from_config',
'fn': '_resource_apply_sparse', 'args': [0]
'args': [1, 1, 1]}, },
{'testcase_name': 'fn: apply_gradients', {
'fn': 'apply_gradients', 'testcase_name': 'fn: _resource_apply_dense',
'args': [1]}, 'fn': '_resource_apply_dense',
{'testcase_name': 'fn: minimize', 'args': [1, 1]
'fn': 'minimize', },
'args': [1, 1]}, {
{'testcase_name': 'fn: _compute_gradients', 'testcase_name': 'fn: _resource_apply_sparse',
'fn': '_compute_gradients', 'fn': '_resource_apply_sparse',
'args': [1, 1]}, 'args': [1, 1, 1]
{'testcase_name': 'fn: get_gradients', },
'fn': 'get_gradients', {
'args': [1, 1]}, 'testcase_name': 'fn: apply_gradients',
'fn': 'apply_gradients',
'args': [1]
},
{
'testcase_name': 'fn: minimize',
'fn': 'minimize',
'args': [1, 1]
},
{
'testcase_name': 'fn: _compute_gradients',
'fn': '_compute_gradients',
'args': [1, 1]
},
{
'testcase_name': 'fn: get_gradients',
'fn': 'get_gradients',
'args': [1, 1]
},
]) ])
def test_rerouted_function(self, fn, args): def test_rerouted_function(self, fn, args):
"""Tests rerouted function. """Tests rerouted function.
@ -435,18 +475,19 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
bolton.noise_distribution = 'laplace' bolton.noise_distribution = 'laplace'
bolton.n_outputs = 1 bolton.n_outputs = 1
bolton.n_samples = 1 bolton.n_samples = 1
self.assertEqual( self.assertEqual(getattr(bolton, fn, lambda: 'fn not found')(*args), 'test')
getattr(bolton, fn, lambda: 'fn not found')(*args),
'test'
)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'fn: project_weights_to_r', {
'fn': 'project_weights_to_r', 'testcase_name': 'fn: project_weights_to_r',
'args': []}, 'fn': 'project_weights_to_r',
{'testcase_name': 'fn: get_noise', 'args': []
'fn': 'get_noise', },
'args': [1, 1]}, {
'testcase_name': 'fn: get_noise',
'fn': 'get_noise',
'args': [1, 1]
},
]) ])
def test_not_reroute_fn(self, fn, args): def test_not_reroute_fn(self, fn, args):
"""Test function is not rerouted. """Test function is not rerouted.
@ -458,6 +499,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
fn: fn to test fn: fn to test
args: arguments to that fn args: arguments to that fn
""" """
def test_run(fn, args): def test_run(fn, args):
loss = TestLoss(1, 1, 1) loss = TestLoss(1, 1, 1)
bolton = opt.BoltOn(TestOptimizer(), loss) bolton = opt.BoltOn(TestOptimizer(), loss)
@ -480,12 +522,13 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
else: else:
res = 0 res = 0
return _ops.convert_to_tensor_v2(res, dtype=tf.float32) return _ops.convert_to_tensor_v2(res, dtype=tf.float32)
self.assertNotEqual(test_run(fn, args), 0) self.assertNotEqual(test_run(fn, args), 0)
@parameterized.named_parameters([ @parameterized.named_parameters([{
{'testcase_name': 'attr: _iterations', 'testcase_name': 'attr: _iterations',
'attr': '_iterations'} 'attr': '_iterations'
]) }])
def test_reroute_attr(self, attr): def test_reroute_attr(self, attr):
"""Test a function is rerouted. """Test a function is rerouted.
@ -498,13 +541,13 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
loss = TestLoss(1, 1, 1) loss = TestLoss(1, 1, 1)
internal_optimizer = TestOptimizer() internal_optimizer = TestOptimizer()
optimizer = opt.BoltOn(internal_optimizer, loss) optimizer = opt.BoltOn(internal_optimizer, loss)
self.assertEqual(getattr(optimizer, attr), self.assertEqual(
getattr(internal_optimizer, attr)) getattr(optimizer, attr), getattr(internal_optimizer, attr))
@parameterized.named_parameters([ @parameterized.named_parameters([{
{'testcase_name': 'attr does not exist', 'testcase_name': 'attr does not exist',
'attr': '_not_valid'} 'attr': '_not_valid'
]) }])
def test_attribute_error(self, attr): def test_attribute_error(self, attr):
"""Test rerouting of attributes. """Test rerouting of attributes.
@ -524,12 +567,11 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
class SchedulerTest(keras_parameterized.TestCase): class SchedulerTest(keras_parameterized.TestCase):
"""GammaBeta Scheduler tests.""" """GammaBeta Scheduler tests."""
@parameterized.named_parameters([ @parameterized.named_parameters([{
{'testcase_name': 'not in context', 'testcase_name': 'not in context',
'err_msg': 'Please initialize the GammaBetaDecreasingStep Learning Rate' 'err_msg': 'Please initialize the GammaBetaDecreasingStep Learning Rate'
' Scheduler' ' Scheduler'
} }])
])
def test_bad_call(self, err_msg): def test_bad_call(self, err_msg):
"""Test attribute of internal opt correctly rerouted to the internal opt. """Test attribute of internal opt correctly rerouted to the internal opt.
@ -541,15 +583,21 @@ class SchedulerTest(keras_parameterized.TestCase):
scheduler(1) scheduler(1)
@parameterized.named_parameters([ @parameterized.named_parameters([
{'testcase_name': 'step 1', {
'step': 1, 'testcase_name': 'step 1',
'res': 0.5}, 'step': 1,
{'testcase_name': 'step 2', 'res': 0.5
'step': 2, },
'res': 0.5}, {
{'testcase_name': 'step 3', 'testcase_name': 'step 2',
'step': 3, 'step': 2,
'res': 0.333333333}, 'res': 0.5
},
{
'testcase_name': 'step 3',
'step': 3,
'res': 0.333333333
},
]) ])
def test_call(self, step, res): def test_call(self, step, res):
"""Test call. """Test call.

View file

@ -13,10 +13,8 @@
# limitations under the License. # limitations under the License.
from absl.testing import parameterized from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.dp_query import quantile_estimator_query from tensorflow_privacy.privacy.dp_query import quantile_estimator_query
from tensorflow_privacy.privacy.dp_query import test_utils from tensorflow_privacy.privacy.dp_query import test_utils
@ -44,10 +42,7 @@ def _make_quantile_estimator_query(initial_estimate,
raise ValueError( raise ValueError(
'Cannot set expected_num_records to None for tree aggregation.') 'Cannot set expected_num_records to None for tree aggregation.')
return quantile_estimator_query.NoPrivacyQuantileEstimatorQuery( return quantile_estimator_query.NoPrivacyQuantileEstimatorQuery(
initial_estimate, initial_estimate, target_quantile, learning_rate, geometric_update)
target_quantile,
learning_rate,
geometric_update)
class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase): class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
@ -109,7 +104,7 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
# to 4 / sqrt(2.0). Still only one record is below, so it reduces to 2.0. # to 4 / sqrt(2.0). Still only one record is below, so it reduces to 2.0.
# Now no records are below, and the estimate norm stays there (at 2.0). # Now no records are below, and the estimate norm stays there (at 2.0).
four_div_root_two = 4 / np.sqrt(2.0) # approx 2.828 four_div_root_two = 4 / np.sqrt(2.0) # approx 2.828
expected_estimates = [8.0, 4.0, four_div_root_two, 2.0, 2.0] expected_estimates = [8.0, 4.0, four_div_root_two, 2.0, 2.0]
for expected_estimate in expected_estimates: for expected_estimate in expected_estimates:
@ -175,7 +170,7 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
# is multiplied by sqrt(2.0). Still only one is above so it increases to # is multiplied by sqrt(2.0). Still only one is above so it increases to
# 4.0. Now both records are above, and the estimate stays there (at 4.0). # 4.0. Now both records are above, and the estimate stays there (at 4.0).
two_times_root_two = 2 * np.sqrt(2.0) # approx 2.828 two_times_root_two = 2 * np.sqrt(2.0) # approx 2.828
expected_estimates = [1.0, 2.0, two_times_root_two, 4.0, 4.0] expected_estimates = [1.0, 2.0, two_times_root_two, 4.0, 4.0]
for expected_estimate in expected_estimates: for expected_estimate in expected_estimates:
@ -201,8 +196,10 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
# 100 records equally spaced from 0 to 10 in 0.1 increments. # 100 records equally spaced from 0 to 10 in 0.1 increments.
# Test that we converge to the correct median value and bounce around it. # Test that we converge to the correct median value and bounce around it.
num_records = 21 num_records = 21
records = [tf.constant(x) for x in np.linspace( records = [
0.0, 10.0, num=num_records, dtype=np.float32)] tf.constant(x)
for x in np.linspace(0.0, 10.0, num=num_records, dtype=np.float32)
]
query = _make_quantile_estimator_query( query = _make_quantile_estimator_query(
initial_estimate=(1.0 if start_low else 10.0), initial_estimate=(1.0 if start_low else 10.0),
@ -267,9 +264,7 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
def test_raises_with_non_scalar_record(self): def test_raises_with_non_scalar_record(self):
query = quantile_estimator_query.NoPrivacyQuantileEstimatorQuery( query = quantile_estimator_query.NoPrivacyQuantileEstimatorQuery(
initial_estimate=1.0, initial_estimate=1.0, target_quantile=0.5, learning_rate=1.0)
target_quantile=0.5,
learning_rate=1.0)
with self.assertRaisesRegex(ValueError, 'scalar'): with self.assertRaisesRegex(ValueError, 'scalar'):
query.accumulate_record(None, None, [1.0, 2.0]) query.accumulate_record(None, None, [1.0, 2.0])

View file

@ -28,7 +28,6 @@ from typing import Any, Callable, Collection, Optional, Tuple, Union
import attr import attr
import tensorflow as tf import tensorflow as tf
# TODO(b/192464750): find a proper place for the helper functions, privatize # TODO(b/192464750): find a proper place for the helper functions, privatize
# the tree aggregation logic, and encourage users to use the DPQuery API. # the tree aggregation logic, and encourage users to use the DPQuery API.

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Tests for `tree_aggregation_query`."""
from absl.testing import parameterized from absl.testing import parameterized
import numpy as np import numpy as np
@ -212,11 +211,11 @@ class TreeCumulativeSumQueryTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters( @parameterized.named_parameters(
('two_records_noise_fn', [2.71828, 3.14159], _get_noise_fn), ('two_records_noise_fn', [2.71828, 3.14159], _get_noise_fn),
('five_records_noise_fn', np.random.uniform(low=0.1, size=5).tolist(), ('five_records_noise_fn', np.random.uniform(
_get_noise_fn), low=0.1, size=5).tolist(), _get_noise_fn),
('two_records_generator', [2.71828, 3.14159], _get_noise_generator), ('two_records_generator', [2.71828, 3.14159], _get_noise_generator),
('five_records_generator', np.random.uniform(low=0.1, size=5).tolist(), ('five_records_generator', np.random.uniform(
_get_noise_generator), low=0.1, size=5).tolist(), _get_noise_generator),
) )
def test_noisy_cumsum_and_state_update(self, records, value_generator): def test_noisy_cumsum_and_state_update(self, records, value_generator):
num_trials, vector_size = 10, 100 num_trials, vector_size = 10, 100

View file

@ -63,5 +63,6 @@ class DPDNNClassifierTest(tf.test.TestCase, parameterized.TestCase):
input_fn=test_utils.make_input_fn(predict_features, predict_labels, input_fn=test_utils.make_input_fn(predict_features, predict_labels,
False)) False))
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()

View file

@ -34,6 +34,7 @@ def make_input_data(size, classes):
np.power(feature_a, 3) + np.power(feature_b, 2) + np.power(feature_a, 3) + np.power(feature_b, 2) +
np.power(feature_c, 1) + noise > 125).astype(int) np.power(feature_c, 1) + noise > 125).astype(int)
else: else:
def label_fn(x): def label_fn(x):
if x < 110.0: if x < 110.0:
return 0 return 0
@ -42,10 +43,11 @@ def make_input_data(size, classes):
else: else:
return 2 return 2
labels = list(map( labels = list(
label_fn, map(
np.power(feature_a, 3) + np.power(feature_b, 2) + label_fn,
np.power(feature_c, 1) + noise)) np.power(feature_a, 3) + np.power(feature_b, 2) +
np.power(feature_c, 1) + noise))
return features, labels return features, labels
@ -87,6 +89,7 @@ def make_input_fn(features, labels, training, batch_size=16):
dataset = dataset.shuffle(1000) dataset = dataset.shuffle(1000)
return dataset.batch(batch_size) return dataset.batch(batch_size)
return input_fn return input_fn

View file

@ -64,5 +64,6 @@ class DPDNNClassifierTest(tf.test.TestCase, parameterized.TestCase):
input_fn=test_utils.make_input_fn(predict_features, predict_labels, input_fn=test_utils.make_input_fn(predict_features, predict_labels,
False)) False))
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()

View file

@ -434,8 +434,8 @@ def _binary_logistic_or_multi_class_head(n_classes, weight_column,
encoded as integer or float within [0, 1] for `n_classes=2` and encoded as encoded as integer or float within [0, 1] for `n_classes=2` and encoded as
integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there
will be errors if vocabulary is not provided and labels are string. will be errors if vocabulary is not provided and labels are string.
loss_reduction: Describes how to reduce training loss over batch. loss_reduction: Describes how to reduce training loss over batch. Defaults
Defaults to `SUM`. to `SUM`.
Returns: Returns:
`head._Head` instance. `head._Head` instance.

View file

@ -53,9 +53,10 @@ def make_dp_model_class(cls):
model.fit(train_data, train_labels, epochs=1, batch_size=32) model.fit(train_data, train_labels, epochs=1, batch_size=32)
``` ```
""").format(base_model='tf.keras.' + cls.__name__, """).format(
short_base_model=cls.__name__, base_model='tf.keras.' + cls.__name__,
dp_model_class='DP' + cls.__name__) short_base_model=cls.__name__,
dp_model_class='DP' + cls.__name__)
def __init__( def __init__(
self, self,

View file

@ -40,8 +40,8 @@ class RegressionDataset:
"""Class for storing labeled examples for a regression dataset. """Class for storing labeled examples for a regression dataset.
Attributes: Attributes:
points: array of shape (num_examples, dimension) containing the points to points: array of shape (num_examples, dimension) containing the points to be
be classified. classified.
labels: array of shape (num_examples,) containing the corresponding labels, labels: array of shape (num_examples,) containing the corresponding labels,
each belonging to the set {0,1,...,num_classes-1}, where num_classes is each belonging to the set {0,1,...,num_classes-1}, where num_classes is
the number of classes. the number of classes.
@ -51,7 +51,7 @@ class RegressionDataset:
def linearly_separable_labeled_examples( def linearly_separable_labeled_examples(
num_examples: int, weights: np.ndarray)-> RegressionDataset: num_examples: int, weights: np.ndarray) -> RegressionDataset:
"""Generates num_examples labeled examples using separator given by weights. """Generates num_examples labeled examples using separator given by weights.
Args: Args:
@ -75,7 +75,7 @@ def linearly_separable_labeled_examples(
def synthetic_linearly_separable_data( def synthetic_linearly_separable_data(
num_train: int, num_test: int, dimension: int, num_train: int, num_test: int, dimension: int,
num_classes: int)-> Tuple[RegressionDataset, RegressionDataset]: num_classes: int) -> Tuple[RegressionDataset, RegressionDataset]:
"""Generates synthetic train and test data for logistic regression. """Generates synthetic train and test data for logistic regression.
Args: Args:
@ -103,7 +103,7 @@ def synthetic_linearly_separable_data(
return (train_dataset, test_dataset) return (train_dataset, test_dataset)
def mnist_dataset()-> Tuple[RegressionDataset, RegressionDataset]: def mnist_dataset() -> Tuple[RegressionDataset, RegressionDataset]:
"""Generates (normalized) train and test data for MNIST. """Generates (normalized) train and test data for MNIST.
Returns: Returns:

View file

@ -11,9 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Tests for tensorflow_privacy.privacy.logistic_regression.datasets."""
import unittest import unittest
from absl.testing import parameterized from absl.testing import parameterized
import numpy as np import numpy as np
from tensorflow_privacy.privacy.logistic_regression import datasets from tensorflow_privacy.privacy.logistic_regression import datasets
@ -22,14 +22,16 @@ from tensorflow_privacy.privacy.logistic_regression import datasets
class DatasetsTest(parameterized.TestCase): class DatasetsTest(parameterized.TestCase):
@parameterized.parameters( @parameterized.parameters(
(1, np.array([[1],])), (1, np.array([
(2, np.array([[1],])), [1],
(5, np.array([[-1, 1], [1, -1]])), ])), (2, np.array([
[1],
])), (5, np.array([[-1, 1], [1, -1]])),
(15, np.array([[-1, 1.5, 2.1], [1.3, -3.3, -7.1], [1.3, -3.3, -7.1]]))) (15, np.array([[-1, 1.5, 2.1], [1.3, -3.3, -7.1], [1.3, -3.3, -7.1]])))
def test_linearly_separable_labeled_examples(self, num_examples, weights): def test_linearly_separable_labeled_examples(self, num_examples, weights):
dimension, num_classes = weights.shape dimension, num_classes = weights.shape
dataset = datasets.linearly_separable_labeled_examples(num_examples, dataset = datasets.linearly_separable_labeled_examples(
weights) num_examples, weights)
self.assertEqual(dataset.points.shape, (num_examples, dimension)) self.assertEqual(dataset.points.shape, (num_examples, dimension))
self.assertEqual(dataset.labels.shape, (num_examples,)) self.assertEqual(dataset.labels.shape, (num_examples,))
product = np.matmul(dataset.points, weights) product = np.matmul(dataset.points, weights)
@ -37,11 +39,8 @@ class DatasetsTest(parameterized.TestCase):
for j in range(num_classes): for j in range(num_classes):
self.assertGreaterEqual(product[i, dataset.labels[i]], product[i, j]) self.assertGreaterEqual(product[i, dataset.labels[i]], product[i, j])
@parameterized.parameters( @parameterized.parameters((1, 1, 1, 2), (20, 5, 1, 2), (20, 5, 2, 2),
(1, 1, 1, 2), (1000, 10, 15, 10))
(20, 5, 1, 2),
(20, 5, 2, 2),
(1000, 10, 15, 10))
def test_synthetic(self, num_train, num_test, dimension, num_classes): def test_synthetic(self, num_train, num_test, dimension, num_classes):
(train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data( (train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data(
num_train, num_test, dimension, num_classes) num_train, num_test, dimension, num_classes)
@ -73,5 +72,6 @@ class DatasetsTest(parameterized.TestCase):
self.assertTrue(np.all(np.isin(train_dataset.labels, range(10)))) self.assertTrue(np.all(np.isin(train_dataset.labels, range(10))))
self.assertTrue(np.all(np.isin(test_dataset.labels, range(10)))) self.assertTrue(np.all(np.isin(test_dataset.labels, range(10))))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -52,21 +52,26 @@ class KiferRegularizer(tf.keras.regularizers.Regularizer):
(self._l2_regularizer, (self._l2_regularizer,
variance) = self.logistic_objective_perturbation_parameters( variance) = self.logistic_objective_perturbation_parameters(
num_train, epsilon, delta, num_classes, input_clipping_norm) num_train, epsilon, delta, num_classes, input_clipping_norm)
self._b = tf.random.normal(shape=[dimension, num_classes], mean=0.0, self._b = tf.random.normal(
stddev=math.sqrt(variance), shape=[dimension, num_classes],
dtype=tf.dtypes.float32) mean=0.0,
stddev=math.sqrt(variance),
dtype=tf.dtypes.float32)
def __call__(self, x): def __call__(self, x):
return (tf.reduce_sum(self._l2_regularizer*tf.square(x)) + return (tf.reduce_sum(self._l2_regularizer * tf.square(x)) +
(1/self._num_train)*tf.reduce_sum(tf.multiply(x, self._b))) (1 / self._num_train) * tf.reduce_sum(tf.multiply(x, self._b)))
def get_config(self): def get_config(self):
return {'l2_regularizer': self._l2_regularizer, return {
'num_train': self._num_train, 'b': self._b} 'l2_regularizer': self._l2_regularizer,
'num_train': self._num_train,
'b': self._b
}
def logistic_objective_perturbation_parameters( def logistic_objective_perturbation_parameters(
self, num_train: int, epsilon: float, delta: float, num_classes: int, self, num_train: int, epsilon: float, delta: float, num_classes: int,
input_clipping_norm: float)-> Tuple[float, float]: input_clipping_norm: float) -> Tuple[float, float]:
"""Computes l2-regularization coefficient and Gaussian noise variance. """Computes l2-regularization coefficient and Gaussian noise variance.
The setting is based on Algorithm 1 of Kifer et al. The setting is based on Algorithm 1 of Kifer et al.
@ -85,19 +90,21 @@ class KiferRegularizer(tf.keras.regularizers.Regularizer):
# zeta is an upper bound on the l2-norm of the loss function gradient. # zeta is an upper bound on the l2-norm of the loss function gradient.
zeta = input_clipping_norm zeta = input_clipping_norm
# variance is based on line 5 from Algorithm 1 of Kifer et al. (page 6): # variance is based on line 5 from Algorithm 1 of Kifer et al. (page 6):
variance = zeta*zeta*(8*np.log(2/delta)+4*epsilon)/(epsilon*epsilon) variance = zeta * zeta * (8 * np.log(2 / delta) + 4 * epsilon) / (
epsilon * epsilon)
# lambda_coefficient is an upper bound on the spectral norm of the Hessian # lambda_coefficient is an upper bound on the spectral norm of the Hessian
# of the loss function. # of the loss function.
lambda_coefficient = math.sqrt(2*num_classes)*(input_clipping_norm**2)/4 lambda_coefficient = math.sqrt(2 * num_classes) * (input_clipping_norm**
l2_regularizer = lambda_coefficient/(epsilon*num_train) 2) / 4
l2_regularizer = lambda_coefficient / (epsilon * num_train)
return (l2_regularizer, variance) return (l2_regularizer, variance)
def logistic_objective_perturbation(train_dataset: datasets.RegressionDataset, def logistic_objective_perturbation(train_dataset: datasets.RegressionDataset,
test_dataset: datasets.RegressionDataset, test_dataset: datasets.RegressionDataset,
epsilon: float, delta: float, epsilon: float, delta: float, epochs: int,
epochs: int, num_classes: int, num_classes: int,
input_clipping_norm: float)-> List[float]: input_clipping_norm: float) -> List[float]:
"""Trains and validates differentially private logistic regression model. """Trains and validates differentially private logistic regression model.
The training is based on the Algorithm 1 of Kifer et al. The training is based on the Algorithm 1 of Kifer et al.
@ -127,13 +134,21 @@ def logistic_objective_perturbation(train_dataset: datasets.RegressionDataset,
kernel_regularizer = KiferRegularizer(num_train, dimension, epsilon, delta, kernel_regularizer = KiferRegularizer(num_train, dimension, epsilon, delta,
num_classes, input_clipping_norm) num_classes, input_clipping_norm)
return single_layer_softmax.single_layer_softmax_classifier( return single_layer_softmax.single_layer_softmax_classifier(
train_dataset, test_dataset, epochs, num_classes, optimizer, loss, train_dataset,
test_dataset,
epochs,
num_classes,
optimizer,
loss,
kernel_regularizer=kernel_regularizer) kernel_regularizer=kernel_regularizer)
def compute_dpsgd_noise_multiplier( def compute_dpsgd_noise_multiplier(num_train: int,
num_train: int, epsilon: float, delta: float, epochs: int, epsilon: float,
batch_size: int, tolerance: float = 1e-2) -> Optional[float]: delta: float,
epochs: int,
batch_size: int,
tolerance: float = 1e-2) -> Optional[float]:
"""Computes the noise multiplier for DP-SGD given privacy parameters. """Computes the noise multiplier for DP-SGD given privacy parameters.
The algorithm performs binary search on the values of epsilon. The algorithm performs binary search on the values of epsilon.
@ -153,20 +168,17 @@ def compute_dpsgd_noise_multiplier(
the given tolerance) for which using DPKerasAdamOptimizer will result in an the given tolerance) for which using DPKerasAdamOptimizer will result in an
(epsilon, delta)-differentially private trained model. (epsilon, delta)-differentially private trained model.
""" """
search_parameters = common.BinarySearchParameters(lower_bound=0, search_parameters = common.BinarySearchParameters(
upper_bound=math.inf, lower_bound=0, upper_bound=math.inf, initial_guess=1, tolerance=tolerance)
initial_guess=1,
tolerance=tolerance)
return common.inverse_monotone_function( return common.inverse_monotone_function(
lambda x: compute_epsilon(num_train, batch_size, x, epochs, delta)[0], lambda x: compute_epsilon(num_train, batch_size, x, epochs, delta)[0],
epsilon, search_parameters) epsilon, search_parameters)
def logistic_dpsgd(train_dataset: datasets.RegressionDataset, def logistic_dpsgd(train_dataset: datasets.RegressionDataset,
test_dataset: datasets.RegressionDataset, test_dataset: datasets.RegressionDataset, epsilon: float,
epsilon: float, delta: float, epochs: int, num_classes: int, delta: float, epochs: int, num_classes: int, batch_size: int,
batch_size: int, num_microbatches: int, num_microbatches: int, clipping_norm: float) -> List[float]:
clipping_norm: float)-> List[float]:
"""Trains and validates private logistic regression model via DP-SGD. """Trains and validates private logistic regression model via DP-SGD.
The training is based on the differentially private stochasstic gradient The training is based on the differentially private stochasstic gradient
@ -183,8 +195,8 @@ def logistic_dpsgd(train_dataset: datasets.RegressionDataset,
num_classes: number of classes. num_classes: number of classes.
batch_size: the number of examples in each batch of gradient descent. batch_size: the number of examples in each batch of gradient descent.
num_microbatches: the number of microbatches in gradient descent. num_microbatches: the number of microbatches in gradient descent.
clipping_norm: the gradients will be normalized by DPKerasAdamOptimizer clipping_norm: the gradients will be normalized by DPKerasAdamOptimizer to
to have l2-norm at most clipping_norm. have l2-norm at most clipping_norm.
Returns: Returns:
List of test accuracies (one for each epoch) on test_dataset of model List of test accuracies (one for each epoch) on test_dataset of model
@ -199,7 +211,8 @@ def logistic_dpsgd(train_dataset: datasets.RegressionDataset,
noise_multiplier = compute_dpsgd_noise_multiplier(num_train, epsilon, delta, noise_multiplier = compute_dpsgd_noise_multiplier(num_train, epsilon, delta,
epochs, batch_size) epochs, batch_size)
optimizer = dp_optimizer_keras.DPKerasAdamOptimizer( optimizer = dp_optimizer_keras.DPKerasAdamOptimizer(
l2_norm_clip=clipping_norm, noise_multiplier=noise_multiplier, l2_norm_clip=clipping_norm,
noise_multiplier=noise_multiplier,
num_microbatches=num_microbatches) num_microbatches=num_microbatches)
loss = tf.keras.losses.CategoricalCrossentropy( loss = tf.keras.losses.CategoricalCrossentropy(
reduction=tf.losses.Reduction.NONE) reduction=tf.losses.Reduction.NONE)

View file

@ -27,7 +27,7 @@ class MultinomialLogisticRegressionTest(parameterized.TestCase):
(5000, 500, 4, 1, 1e-5, 40, 2, 0.05), (5000, 500, 4, 1, 1e-5, 40, 2, 0.05),
(10000, 1000, 3, 1, 1e-5, 40, 4, 0.1), (10000, 1000, 3, 1, 1e-5, 40, 4, 0.1),
(10000, 1000, 4, 1, 1e-5, 40, 4, 0.1), (10000, 1000, 4, 1, 1e-5, 40, 4, 0.1),
) )
def test_logistic_objective_perturbation(self, num_train, num_test, dimension, def test_logistic_objective_perturbation(self, num_train, num_test, dimension,
epsilon, delta, epochs, num_classes, epsilon, delta, epochs, num_classes,
tolerance): tolerance):
@ -44,7 +44,7 @@ class MultinomialLogisticRegressionTest(parameterized.TestCase):
(1, 1, 1e-5, 40, 1, 1e-2), (1, 1, 1e-5, 40, 1, 1e-2),
(500, 0.1, 1e-5, 40, 50, 1e-2), (500, 0.1, 1e-5, 40, 50, 1e-2),
(5000, 10, 1e-5, 40, 10, 1e-3), (5000, 10, 1e-5, 40, 10, 1e-3),
) )
def test_compute_dpsgd_noise_multiplier(self, num_train, epsilon, delta, def test_compute_dpsgd_noise_multiplier(self, num_train, epsilon, delta,
epochs, batch_size, tolerance): epochs, batch_size, tolerance):
noise_multiplier = multinomial_logistic.compute_dpsgd_noise_multiplier( noise_multiplier = multinomial_logistic.compute_dpsgd_noise_multiplier(
@ -61,19 +61,22 @@ class MultinomialLogisticRegressionTest(parameterized.TestCase):
(5000, 500, 4, 1, 1e-5, 40, 2, 0.05, 10, 10, 1), (5000, 500, 4, 1, 1e-5, 40, 2, 0.05, 10, 10, 1),
(5000, 500, 3, 2, 1e-4, 40, 4, 0.1, 10, 10, 1), (5000, 500, 3, 2, 1e-4, 40, 4, 0.1, 10, 10, 1),
(5000, 500, 4, 2, 1e-4, 40, 4, 0.1, 10, 10, 1), (5000, 500, 4, 2, 1e-4, 40, 4, 0.1, 10, 10, 1),
) )
def test_logistic_dpsgd(self, num_train, num_test, dimension, epsilon, def test_logistic_dpsgd(self, num_train, num_test, dimension, epsilon, delta,
delta, epochs, num_classes, tolerance, epochs, num_classes, tolerance, batch_size,
batch_size, num_microbatches, clipping_norm): num_microbatches, clipping_norm):
(train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data( (train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data(
num_train, num_test, dimension, num_classes) num_train, num_test, dimension, num_classes)
accuracy = multinomial_logistic.logistic_dpsgd( accuracy = multinomial_logistic.logistic_dpsgd(train_dataset, test_dataset,
train_dataset, test_dataset, epsilon, delta, epochs, num_classes, epsilon, delta, epochs,
batch_size, num_microbatches, clipping_norm) num_classes, batch_size,
num_microbatches,
clipping_norm)
# Since the synthetic data is linearly separable, we expect the test # Since the synthetic data is linearly separable, we expect the test
# accuracy to come arbitrarily close to 1 as the number of training examples # accuracy to come arbitrarily close to 1 as the number of training examples
# grows. # grows.
self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance) self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -11,8 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Implementation of a single-layer softmax classifier. """Implementation of a single-layer softmax classifier."""
"""
from typing import List from typing import List
import tensorflow as tf import tensorflow as tf
@ -22,10 +21,13 @@ from tensorflow_privacy.privacy.logistic_regression import datasets
def single_layer_softmax_classifier( def single_layer_softmax_classifier(
train_dataset: datasets.RegressionDataset, train_dataset: datasets.RegressionDataset,
test_dataset: datasets.RegressionDataset, test_dataset: datasets.RegressionDataset,
epochs: int, num_classes: int, optimizer: tf.keras.optimizers.Optimizer, epochs: int,
num_classes: int,
optimizer: tf.keras.optimizers.Optimizer,
loss: tf.keras.losses.Loss = 'categorical_crossentropy', loss: tf.keras.losses.Loss = 'categorical_crossentropy',
batch_size: int = 32, batch_size: int = 32,
kernel_regularizer: tf.keras.regularizers.Regularizer = None)-> List[float]: kernel_regularizer: tf.keras.regularizers.Regularizer = None
) -> List[float]:
"""Trains a single layer neural network classifier with softmax activation. """Trains a single layer neural network classifier with softmax activation.
Args: Args:
@ -47,13 +49,17 @@ def single_layer_softmax_classifier(
one_hot_train_labels = tf.one_hot(train_dataset.labels, num_classes) one_hot_train_labels = tf.one_hot(train_dataset.labels, num_classes)
one_hot_test_labels = tf.one_hot(test_dataset.labels, num_classes) one_hot_test_labels = tf.one_hot(test_dataset.labels, num_classes)
model = tf.keras.Sequential() model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(units=num_classes, model.add(
activation='softmax', tf.keras.layers.Dense(
kernel_regularizer=kernel_regularizer)) units=num_classes,
activation='softmax',
kernel_regularizer=kernel_regularizer))
model.compile(optimizer, loss=loss, metrics=['accuracy']) model.compile(optimizer, loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset.points, one_hot_train_labels, history = model.fit(
batch_size=batch_size, epochs=epochs, train_dataset.points,
validation_data=(test_dataset.points, one_hot_train_labels,
one_hot_test_labels), batch_size=batch_size,
verbose=0) epochs=epochs,
validation_data=(test_dataset.points, one_hot_test_labels),
verbose=0)
return history.history['val_accuracy'] return history.history['val_accuracy']

View file

@ -11,9 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Tests for tensorflow_privacy.privacy.logistic_regression.single_layer_softmax."""
import unittest import unittest
from absl.testing import parameterized from absl.testing import parameterized
from tensorflow_privacy.privacy.logistic_regression import datasets from tensorflow_privacy.privacy.logistic_regression import datasets
from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax
@ -26,7 +26,7 @@ class SingleLayerSoftmaxTest(parameterized.TestCase):
(5000, 500, 4, 40, 2, 0.05), (5000, 500, 4, 40, 2, 0.05),
(10000, 1000, 3, 40, 4, 0.1), (10000, 1000, 3, 40, 4, 0.1),
(10000, 1000, 4, 40, 4, 0.1), (10000, 1000, 4, 40, 4, 0.1),
) )
def test_single_layer_softmax(self, num_train, num_test, dimension, epochs, def test_single_layer_softmax(self, num_train, num_test, dimension, epochs,
num_classes, tolerance): num_classes, tolerance):
(train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data( (train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data(
@ -35,5 +35,6 @@ class SingleLayerSoftmaxTest(parameterized.TestCase):
train_dataset, test_dataset, epochs, num_classes, 'sgd') train_dataset, test_dataset, epochs, num_classes, 'sgd')
self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance) self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -138,12 +138,12 @@ def make_keras_optimizer_class(cls):
l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients). l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients).
noise_multiplier: Ratio of the standard deviation to the clipping norm. noise_multiplier: Ratio of the standard deviation to the clipping norm.
num_microbatches: Number of microbatches into which each minibatch is num_microbatches: Number of microbatches into which each minibatch is
split. Default is `None` which means that number of microbatches split. Default is `None` which means that number of microbatches is
is equal to batch size (i.e. each microbatch contains exactly one equal to batch size (i.e. each microbatch contains exactly one
example). If `gradient_accumulation_steps` is greater than 1 and example). If `gradient_accumulation_steps` is greater than 1 and
`num_microbatches` is not `None` then the effective number of `num_microbatches` is not `None` then the effective number of
microbatches is equal to microbatches is equal to `num_microbatches *
`num_microbatches * gradient_accumulation_steps`. gradient_accumulation_steps`.
gradient_accumulation_steps: If greater than 1 then optimizer will be gradient_accumulation_steps: If greater than 1 then optimizer will be
accumulating gradients for this number of optimizer steps before accumulating gradients for this number of optimizer steps before
applying them to update model weights. If this argument is set to 1 applying them to update model weights. If this argument is set to 1
@ -172,39 +172,39 @@ def make_keras_optimizer_class(cls):
if self.gradient_accumulation_steps > 1: if self.gradient_accumulation_steps > 1:
apply_update = tf.math.equal( apply_update = tf.math.equal(
tf.math.floormod(self.iterations + 1, tf.math.floormod(self.iterations + 1,
self.gradient_accumulation_steps), self.gradient_accumulation_steps), 0)
0)
grad_scaler = tf.cast(1. / self.gradient_accumulation_steps, var_dtype) grad_scaler = tf.cast(1. / self.gradient_accumulation_steps, var_dtype)
apply_state[(var_device, var_dtype)].update( apply_state[(var_device, var_dtype)].update({
{ 'apply_update': apply_update,
'apply_update': apply_update, 'grad_scaler': grad_scaler
'grad_scaler': grad_scaler })
})
def _resource_apply_dense(self, grad, var, apply_state=None): def _resource_apply_dense(self, grad, var, apply_state=None):
if self.gradient_accumulation_steps > 1: if self.gradient_accumulation_steps > 1:
var_device, var_dtype = var.device, var.dtype.base_dtype var_device, var_dtype = var.device, var.dtype.base_dtype
coefficients = ((apply_state or {}).get((var_device, var_dtype)) coefficients = ((apply_state or {}).get((var_device, var_dtype)) or
or self._fallback_apply_state(var_device, var_dtype)) self._fallback_apply_state(var_device, var_dtype))
grad_acc = self.get_slot(var, 'grad_acc') grad_acc = self.get_slot(var, 'grad_acc')
def _update_grad(): def _update_grad():
apply_grad_op = super(DPOptimizerClass, self)._resource_apply_dense( apply_grad_op = super(DPOptimizerClass, self)._resource_apply_dense(
grad_acc + grad * coefficients['grad_scaler'], var, apply_state) grad_acc + grad * coefficients['grad_scaler'], var, apply_state)
with tf.control_dependencies([apply_grad_op]): with tf.control_dependencies([apply_grad_op]):
return grad_acc.assign(tf.zeros_like(grad_acc), return grad_acc.assign(
use_locking=self._use_locking, tf.zeros_like(grad_acc),
read_value=False) use_locking=self._use_locking,
read_value=False)
def _accumulate(): def _accumulate():
return grad_acc.assign_add(grad * coefficients['grad_scaler'], return grad_acc.assign_add(
use_locking=self._use_locking, grad * coefficients['grad_scaler'],
read_value=False) use_locking=self._use_locking,
read_value=False)
return tf.cond(coefficients['apply_update'], _update_grad, _accumulate) return tf.cond(coefficients['apply_update'], _update_grad, _accumulate)
else: else:
return super(DPOptimizerClass, self)._resource_apply_dense( return super(DPOptimizerClass,
grad, var, apply_state) self)._resource_apply_dense(grad, var, apply_state)
def _resource_apply_sparse_duplicate_indices(self, *args, **kwargs): def _resource_apply_sparse_duplicate_indices(self, *args, **kwargs):
if self.gradient_accumulation_steps > 1: if self.gradient_accumulation_steps > 1:
@ -220,8 +220,8 @@ def make_keras_optimizer_class(cls):
raise NotImplementedError( raise NotImplementedError(
'Sparse gradients are not supported with large batch emulation.') 'Sparse gradients are not supported with large batch emulation.')
else: else:
return super(DPOptimizerClass, self)._resource_apply_sparse( return super(DPOptimizerClass,
*args, **kwargs) self)._resource_apply_sparse(*args, **kwargs)
def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None): def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None):
"""DP-SGD version of base class method.""" """DP-SGD version of base class method."""

View file

@ -15,7 +15,6 @@
from absl.testing import parameterized from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras
from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras_vectorized from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras_vectorized
@ -108,8 +107,7 @@ class DPOptimizerComputeGradientsTest(tf.test.TestCase, parameterized.TestCase):
with tape: with tape:
loss = self._loss(data0, var0) + self._loss(data1, var1) loss = self._loss(data0, var0) + self._loss(data1, var1)
grads_and_vars = opt._compute_gradients( grads_and_vars = opt._compute_gradients(loss, [var0, var1], tape=tape)
loss, [var0, var1], tape=tape)
self.assertAllCloseAccordingToType(expected_grad0, grads_and_vars[0][0]) self.assertAllCloseAccordingToType(expected_grad0, grads_and_vars[0][0])
self.assertAllCloseAccordingToType(expected_grad1, grads_and_vars[1][0]) self.assertAllCloseAccordingToType(expected_grad1, grads_and_vars[1][0])
@ -442,10 +440,9 @@ class DPOptimizerGetGradientsTest(tf.test.TestCase, parameterized.TestCase):
('DPKerasSGDOptimizer 1', dp_optimizer_keras.DPKerasSGDOptimizer, 1), ('DPKerasSGDOptimizer 1', dp_optimizer_keras.DPKerasSGDOptimizer, 1),
('DPKerasSGDOptimizer 2', dp_optimizer_keras.DPKerasSGDOptimizer, 2), ('DPKerasSGDOptimizer 2', dp_optimizer_keras.DPKerasSGDOptimizer, 2),
('DPKerasSGDOptimizer 4', dp_optimizer_keras.DPKerasSGDOptimizer, 4), ('DPKerasSGDOptimizer 4', dp_optimizer_keras.DPKerasSGDOptimizer, 4),
('DPKerasAdamOptimizer 2', ('DPKerasAdamOptimizer 2', dp_optimizer_keras.DPKerasAdamOptimizer, 1),
dp_optimizer_keras.DPKerasAdamOptimizer, 1), ('DPKerasAdagradOptimizer 2', dp_optimizer_keras.DPKerasAdagradOptimizer,
('DPKerasAdagradOptimizer 2', 2),
dp_optimizer_keras.DPKerasAdagradOptimizer, 2),
) )
def testLargeBatchEmulation(self, cls, gradient_accumulation_steps): def testLargeBatchEmulation(self, cls, gradient_accumulation_steps):
# Tests various optimizers with large batch emulation. # Tests various optimizers with large batch emulation.

View file

@ -95,10 +95,11 @@ def make_vectorized_keras_optimizer_class(cls):
model.fit(...) model.fit(...)
``` ```
""".format(base_class='tf.keras.optimizers.' + cls.__name__, """.format(
dp_keras_class='DPKeras' + cls.__name__, base_class='tf.keras.optimizers.' + cls.__name__,
short_base_class=cls.__name__, dp_keras_class='DPKeras' + cls.__name__,
dp_vectorized_keras_class='VectorizedDPKeras' + cls.__name__) short_base_class=cls.__name__,
dp_vectorized_keras_class='VectorizedDPKeras' + cls.__name__)
def __init__( def __init__(
self, self,
@ -112,8 +113,8 @@ def make_vectorized_keras_optimizer_class(cls):
Args: Args:
l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients). l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients).
noise_multiplier: Ratio of the standard deviation to the clipping norm. noise_multiplier: Ratio of the standard deviation to the clipping norm.
num_microbatches: Number of microbatches into which each minibatch num_microbatches: Number of microbatches into which each minibatch is
is split. split.
*args: These will be passed on to the base class `__init__` method. *args: These will be passed on to the base class `__init__` method.
**kwargs: These will be passed on to the base class `__init__` method. **kwargs: These will be passed on to the base class `__init__` method.
""" """

View file

@ -18,7 +18,6 @@ import unittest
from absl.testing import parameterized from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.dp_query import gaussian_query from tensorflow_privacy.privacy.dp_query import gaussian_query
from tensorflow_privacy.privacy.optimizers import dp_optimizer from tensorflow_privacy.privacy.optimizers import dp_optimizer
@ -30,13 +29,14 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
return 0.5 * tf.reduce_sum( return 0.5 * tf.reduce_sum(
input_tensor=tf.math.squared_difference(val0, val1), axis=1) input_tensor=tf.math.squared_difference(val0, val1), axis=1)
def _compute_expected_gradients(self, per_example_gradients, def _compute_expected_gradients(self, per_example_gradients, l2_norm_clip,
l2_norm_clip, num_microbatches): num_microbatches):
batch_size, num_vars = per_example_gradients.shape batch_size, num_vars = per_example_gradients.shape
microbatch_gradients = np.mean( microbatch_gradients = np.mean(
np.reshape(per_example_gradients, np.reshape(
[num_microbatches, per_example_gradients,
np.int(batch_size / num_microbatches), num_vars]), [num_microbatches,
np.int(batch_size / num_microbatches), num_vars]),
axis=1) axis=1)
microbatch_gradients_norms = np.linalg.norm(microbatch_gradients, axis=1) microbatch_gradients_norms = np.linalg.norm(microbatch_gradients, axis=1)
@ -124,8 +124,8 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
l2_norm_clip = 1.0 l2_norm_clip = 1.0
dp_sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip, 0.0) dp_sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip, 0.0)
opt = cls(dp_sum_query, num_microbatches=num_microbatches, opt = cls(
learning_rate=2.0) dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0)
self.evaluate(tf.global_variables_initializer()) self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values # Fetch params to validate initial values

View file

@ -134,19 +134,15 @@ def make_vectorized_optimizer_class(cls):
if var_list is None: if var_list is None:
var_list = ( var_list = (
tf.trainable_variables() + tf.get_collection( tf.trainable_variables() +
tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
def process_microbatch(microbatch_loss): def process_microbatch(microbatch_loss):
"""Compute clipped grads for one microbatch.""" """Compute clipped grads for one microbatch."""
microbatch_loss = tf.reduce_mean(input_tensor=microbatch_loss) microbatch_loss = tf.reduce_mean(input_tensor=microbatch_loss)
grads, _ = zip(*super(DPOptimizerClass, self).compute_gradients( grads, _ = zip(*super(DPOptimizerClass, self).compute_gradients(
microbatch_loss, microbatch_loss, var_list, gate_gradients, aggregation_method,
var_list, colocate_gradients_with_ops, grad_loss))
gate_gradients,
aggregation_method,
colocate_gradients_with_ops,
grad_loss))
grads_list = [ grads_list = [
g if g is not None else tf.zeros_like(v) g if g is not None else tf.zeros_like(v)
for (g, v) in zip(list(grads), var_list) for (g, v) in zip(list(grads), var_list)

View file

@ -17,7 +17,6 @@ import unittest
from absl.testing import parameterized from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.optimizers import dp_optimizer_vectorized from tensorflow_privacy.privacy.optimizers import dp_optimizer_vectorized
from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagrad from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagrad
from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdam from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdam
@ -63,19 +62,19 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
grads_and_vars = sess.run(gradient_op) grads_and_vars = sess.run(gradient_op)
self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0]) self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0])
@parameterized.named_parameters( @parameterized.named_parameters(('DPGradientDescent', VectorizedDPSGD),
('DPGradientDescent', VectorizedDPSGD), ('DPAdagrad', VectorizedDPAdagrad),
('DPAdagrad', VectorizedDPAdagrad), ('DPAdam', VectorizedDPAdam))
('DPAdam', VectorizedDPAdam))
def testClippingNorm(self, cls): def testClippingNorm(self, cls):
with self.cached_session() as sess: with self.cached_session() as sess:
var0 = tf.Variable([0.0, 0.0]) var0 = tf.Variable([0.0, 0.0])
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
opt = cls(l2_norm_clip=1.0, opt = cls(
noise_multiplier=0., l2_norm_clip=1.0,
num_microbatches=1, noise_multiplier=0.,
learning_rate=2.0) num_microbatches=1,
learning_rate=2.0)
self.evaluate(tf.global_variables_initializer()) self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values # Fetch params to validate initial values
@ -86,19 +85,19 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
grads_and_vars = sess.run(gradient_op) grads_and_vars = sess.run(gradient_op)
self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0]) self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
@parameterized.named_parameters( @parameterized.named_parameters(('DPGradientDescent', VectorizedDPSGD),
('DPGradientDescent', VectorizedDPSGD), ('DPAdagrad', VectorizedDPAdagrad),
('DPAdagrad', VectorizedDPAdagrad), ('DPAdam', VectorizedDPAdam))
('DPAdam', VectorizedDPAdam))
def testNoiseMultiplier(self, cls): def testNoiseMultiplier(self, cls):
with self.cached_session() as sess: with self.cached_session() as sess:
var0 = tf.Variable([0.0]) var0 = tf.Variable([0.0])
data0 = tf.Variable([[0.0]]) data0 = tf.Variable([[0.0]])
opt = cls(l2_norm_clip=4.0, opt = cls(
noise_multiplier=8.0, l2_norm_clip=4.0,
num_microbatches=1, noise_multiplier=8.0,
learning_rate=2.0) num_microbatches=1,
learning_rate=2.0)
self.evaluate(tf.global_variables_initializer()) self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values # Fetch params to validate initial values
@ -168,10 +167,9 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
true_weights, true_weights,
atol=1.0) atol=1.0)
@parameterized.named_parameters( @parameterized.named_parameters(('DPGradientDescent', VectorizedDPSGD),
('DPGradientDescent', VectorizedDPSGD), ('DPAdagrad', VectorizedDPAdagrad),
('DPAdagrad', VectorizedDPAdagrad), ('DPAdam', VectorizedDPAdam))
('DPAdam', VectorizedDPAdam))
def testDPGaussianOptimizerClass(self, cls): def testDPGaussianOptimizerClass(self, cls):
with self.cached_session() as sess: with self.cached_session() as sess:
var0 = tf.Variable([0.0]) var0 = tf.Variable([0.0])

View file

@ -217,5 +217,6 @@ def main(unused_argv):
# For saving a figure into a file: # For saving a figure into a file:
# plotting.save_plot(figure, <file_path>) # plotting.save_plot(figure, <file_path>)
if __name__ == "__main__": if __name__ == "__main__":
app.run(main) app.run(main)

View file

@ -482,8 +482,8 @@ class SingleAttackResult:
return '\n'.join([ return '\n'.join([
'SingleAttackResult(', 'SingleAttackResult(',
' SliceSpec: %s' % str(self.slice_spec), ' SliceSpec: %s' % str(self.slice_spec),
' DataSize: (ntrain=%d, ntest=%d)' % (self.data_size.ntrain, ' DataSize: (ntrain=%d, ntest=%d)' %
self.data_size.ntest), (self.data_size.ntrain, self.data_size.ntest),
' AttackType: %s' % str(self.attack_type), ' AttackType: %s' % str(self.attack_type),
' AUC: %.2f' % self.get_auc(), ' AUC: %.2f' % self.get_auc(),
' Attacker advantage: %.2f' % self.get_attacker_advantage(), ')' ' Attacker advantage: %.2f' % self.get_attacker_advantage(), ')'
@ -684,10 +684,8 @@ class AttackResults:
summary.append('Best-performing attacks over all slices') summary.append('Best-performing attacks over all slices')
summary.append( summary.append(
' %s (with %d training and %d test examples) achieved an AUC of %.2f on slice %s' ' %s (with %d training and %d test examples) achieved an AUC of %.2f on slice %s'
% (max_auc_result_all.attack_type, % (max_auc_result_all.attack_type, max_auc_result_all.data_size.ntrain,
max_auc_result_all.data_size.ntrain, max_auc_result_all.data_size.ntest, max_auc_result_all.get_auc(),
max_auc_result_all.data_size.ntest,
max_auc_result_all.get_auc(),
max_auc_result_all.slice_spec)) max_auc_result_all.slice_spec))
max_advantage_result_all = self.get_result_with_max_attacker_advantage() max_advantage_result_all = self.get_result_with_max_attacker_advantage()
@ -709,10 +707,8 @@ class AttackResults:
max_auc_result = results.get_result_with_max_auc() max_auc_result = results.get_result_with_max_auc()
summary.append( summary.append(
' %s (with %d training and %d test examples) achieved an AUC of %.2f' ' %s (with %d training and %d test examples) achieved an AUC of %.2f'
% (max_auc_result.attack_type, % (max_auc_result.attack_type, max_auc_result.data_size.ntrain,
max_auc_result.data_size.ntrain, max_auc_result.data_size.ntest, max_auc_result.get_auc()))
max_auc_result.data_size.ntest,
max_auc_result.get_auc()))
max_advantage_result = results.get_result_with_max_attacker_advantage() max_advantage_result = results.get_result_with_max_attacker_advantage()
summary.append( summary.append(
' %s (with %d training and %d test examples) achieved an advantage of %.2f' ' %s (with %d training and %d test examples) achieved an advantage of %.2f'
@ -816,6 +812,8 @@ def get_flattened_attack_metrics(results: AttackResults):
types += [str(attack_result.attack_type)] * 2 types += [str(attack_result.attack_type)] * 2
slices += [str(attack_result.slice_spec)] * 2 slices += [str(attack_result.slice_spec)] * 2
attack_metrics += ['adv', 'auc'] attack_metrics += ['adv', 'auc']
values += [float(attack_result.get_attacker_advantage()), values += [
float(attack_result.get_auc())] float(attack_result.get_attacker_advantage()),
float(attack_result.get_auc())
]
return types, slices, attack_metrics, values return types, slices, attack_metrics, values

View file

@ -54,7 +54,8 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback):
def __init__( def __init__(
self, self,
in_train, out_train, in_train,
out_train,
slicing_spec: SlicingSpec = None, slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,), attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,),
tensorboard_dir=None, tensorboard_dir=None,
@ -70,7 +71,7 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback):
attack_types: a list of attacks, each of type AttackType attack_types: a list of attacks, each of type AttackType
tensorboard_dir: directory for tensorboard summary tensorboard_dir: directory for tensorboard summary
tensorboard_merge_classifiers: if true, plot different classifiers with tensorboard_merge_classifiers: if true, plot different classifiers with
the same slicing_spec and metric in the same figure the same slicing_spec and metric in the same figure
is_logit: whether the result of model.predict is logit or probability is_logit: whether the result of model.predict is logit or probability
batch_size: the batch size for model.predict batch_size: the batch size for model.predict
""" """
@ -96,19 +97,18 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs=None): def on_epoch_end(self, epoch, logs=None):
results = run_attack_on_keras_model( results = run_attack_on_keras_model(
self.model, self.model, (self._in_train_data, self._in_train_labels),
(self._in_train_data, self._in_train_labels), (self._out_train_data, self._out_train_labels), self._slicing_spec,
(self._out_train_data, self._out_train_labels), self._attack_types, self._is_logit, self._batch_size)
self._slicing_spec,
self._attack_types,
self._is_logit, self._batch_size)
logging.info(results) logging.info(results)
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
results) results)
print('Attack result:') print('Attack result:')
print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in print('\n'.join([
zip(att_types, att_slices, att_metrics, att_values)])) ' %s: %.4f' % (', '.join([s, t, m]), v)
for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
]))
# Write to tensorboard if tensorboard_dir is specified # Write to tensorboard if tensorboard_dir is specified
if self._writers is not None: if self._writers is not None:
@ -117,7 +117,9 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback):
def run_attack_on_keras_model( def run_attack_on_keras_model(
model, in_train, out_train, model,
in_train,
out_train,
slicing_spec: SlicingSpec = None, slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,), attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,),
is_logit: bool = False, is_logit: bool = False,
@ -132,6 +134,7 @@ def run_attack_on_keras_model(
attack_types: a list of attacks, each of type AttackType attack_types: a list of attacks, each of type AttackType
is_logit: whether the result of model.predict is logit or probability is_logit: whether the result of model.predict is logit or probability
batch_size: the batch size for model.predict batch_size: the batch size for model.predict
Returns: Returns:
Results of the attack Results of the attack
""" """
@ -139,16 +142,19 @@ def run_attack_on_keras_model(
out_train_data, out_train_labels = out_train out_train_data, out_train_labels = out_train
# Compute predictions and losses # Compute predictions and losses
in_train_pred, in_train_loss = calculate_losses( in_train_pred, in_train_loss = calculate_losses(model, in_train_data,
model, in_train_data, in_train_labels, is_logit, batch_size) in_train_labels, is_logit,
out_train_pred, out_train_loss = calculate_losses( batch_size)
model, out_train_data, out_train_labels, is_logit, batch_size) out_train_pred, out_train_loss = calculate_losses(model, out_train_data,
out_train_labels, is_logit,
batch_size)
attack_input = AttackInputData( attack_input = AttackInputData(
logits_train=in_train_pred, logits_test=out_train_pred, logits_train=in_train_pred,
labels_train=in_train_labels, labels_test=out_train_labels, logits_test=out_train_pred,
loss_train=in_train_loss, loss_test=out_train_loss labels_train=in_train_labels,
) labels_test=out_train_labels,
results = mia.run_attacks(attack_input, loss_train=in_train_loss,
slicing_spec=slicing_spec, loss_test=out_train_loss)
attack_types=attack_types) results = mia.run_attacks(
attack_input, slicing_spec=slicing_spec, attack_types=attack_types)
return results return results

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""An example for using keras_evaluation.""" """An example for using keras_evaluation."""
from absl import app from absl import app
@ -25,15 +24,15 @@ from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_s
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation import MembershipInferenceCallback from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation import MembershipInferenceCallback
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation import run_attack_on_keras_model from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation import run_attack_on_keras_model
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.02, 'Learning rate for training') flags.DEFINE_float('learning_rate', 0.02, 'Learning rate for training')
flags.DEFINE_integer('batch_size', 250, 'Batch size') flags.DEFINE_integer('batch_size', 250, 'Batch size')
flags.DEFINE_integer('epochs', 100, 'Number of epochs') flags.DEFINE_integer('epochs', 100, 'Number of epochs')
flags.DEFINE_string('model_dir', None, 'Model directory.') flags.DEFINE_string('model_dir', None, 'Model directory.')
flags.DEFINE_bool('tensorboard_merge_classifiers', False, 'If true, plot ' flags.DEFINE_bool(
'different classifiers with the same slicing_spec and metric ' 'tensorboard_merge_classifiers', False, 'If true, plot '
'in the same figure.') 'different classifiers with the same slicing_spec and metric '
'in the same figure.')
def small_cnn(): def small_cnn():
@ -76,14 +75,15 @@ def main(unused_argv):
# Get callback for membership inference attack. # Get callback for membership inference attack.
mia_callback = MembershipInferenceCallback( mia_callback = MembershipInferenceCallback(
(x_train, y_train), (x_train, y_train), (x_test, y_test),
(x_test, y_test),
slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
attack_types=[AttackType.THRESHOLD_ATTACK, attack_types=[
AttackType.K_NEAREST_NEIGHBORS], AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
],
tensorboard_dir=FLAGS.model_dir, tensorboard_dir=FLAGS.model_dir,
tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers, tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers,
is_logit=True, batch_size=2048) is_logit=True,
batch_size=2048)
# Train model with Keras # Train model with Keras
model.fit( model.fit(
@ -102,11 +102,14 @@ def main(unused_argv):
attack_types=[ attack_types=[
AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
], ],
is_logit=True, batch_size=2048) is_logit=True,
batch_size=2048)
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
attack_results) attack_results)
print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in print('\n'.join([
zip(att_types, att_slices, att_metrics, att_values)])) ' %s: %.4f' % (', '.join([s, t, m]), v)
for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
]))
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -13,10 +13,8 @@
# limitations under the License. # limitations under the License.
from absl.testing import absltest from absl.testing import absltest
import numpy as np import numpy as np
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import keras_evaluation from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import keras_evaluation
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType
@ -59,8 +57,7 @@ class UtilsTest(absltest.TestCase):
def test_run_attack_on_keras_model(self): def test_run_attack_on_keras_model(self):
"""Test the attack.""" """Test the attack."""
results = keras_evaluation.run_attack_on_keras_model( results = keras_evaluation.run_attack_on_keras_model(
self.model, self.model, (self.train_data, self.train_labels),
(self.train_data, self.train_labels),
(self.test_data, self.test_labels), (self.test_data, self.test_labels),
attack_types=[AttackType.THRESHOLD_ATTACK]) attack_types=[AttackType.THRESHOLD_ATTACK])
self.assertIsInstance(results, AttackResults) self.assertIsInstance(results, AttackResults)

View file

@ -140,9 +140,9 @@ def _run_attack(attack_input: AttackInputData,
attack_input: input data for running an attack attack_input: input data for running an attack
attack_type: the attack to run attack_type: the attack to run
balance_attacker_training: Whether the training and test sets for the balance_attacker_training: Whether the training and test sets for the
membership inference attacker should have a balanced (roughly equal) membership inference attacker should have a balanced (roughly equal)
number of samples from the training and test sets used to develop number of samples from the training and test sets used to develop the
the model under attack. model under attack.
min_num_samples: minimum number of examples in either training or test data. min_num_samples: minimum number of examples in either training or test data.
Returns: Returns:
@ -179,9 +179,9 @@ def run_attacks(attack_input: AttackInputData,
attack_types: attacks to run attack_types: attacks to run
privacy_report_metadata: the metadata of the model under attack. privacy_report_metadata: the metadata of the model under attack.
balance_attacker_training: Whether the training and test sets for the balance_attacker_training: Whether the training and test sets for the
membership inference attacker should have a balanced (roughly equal) membership inference attacker should have a balanced (roughly equal)
number of samples from the training and test sets used to develop number of samples from the training and test sets used to develop the
the model under attack. model under attack.
min_num_samples: minimum number of examples in either training or test data. min_num_samples: minimum number of examples in either training or test data.
Returns: Returns:
@ -200,8 +200,7 @@ def run_attacks(attack_input: AttackInputData,
attack_input_slice = get_slice(attack_input, single_slice_spec) attack_input_slice = get_slice(attack_input, single_slice_spec)
for attack_type in attack_types: for attack_type in attack_types:
attack_result = _run_attack(attack_input_slice, attack_type, attack_result = _run_attack(attack_input_slice, attack_type,
balance_attacker_training, balance_attacker_training, min_num_samples)
min_num_samples)
if attack_result is not None: if attack_result is not None:
attack_results.append(attack_result) attack_results.append(attack_result)

View file

@ -55,9 +55,8 @@ def create_attacker_data(attack_input_data: AttackInputData,
attack_input_data: Original AttackInputData attack_input_data: Original AttackInputData
test_fraction: Fraction of the dataset to include in the test split. test_fraction: Fraction of the dataset to include in the test split.
balance: Whether the training and test sets for the membership inference balance: Whether the training and test sets for the membership inference
attacker should have a balanced (roughly equal) number of samples attacker should have a balanced (roughly equal) number of samples from the
from the training and test sets used to develop the model training and test sets used to develop the model under attack.
under attack.
Returns: Returns:
AttackerData. AttackerData.
@ -134,6 +133,7 @@ class TrainedAttacker:
Args: Args:
input_features : A vector of features with the same semantics as x_train input_features : A vector of features with the same semantics as x_train
passed to train_model. passed to train_model.
Returns: Returns:
An array of probabilities denoting whether the example belongs to test. An array of probabilities denoting whether the example belongs to test.
""" """

View file

@ -81,5 +81,4 @@ def plot_histograms(train: Iterable[float],
def plot_roc_curve(roc_curve, plot_func=plot_curve_with_area) -> plt.Figure: def plot_roc_curve(roc_curve, plot_func=plot_curve_with_area) -> plt.Figure:
"""Plot the ROC curve and the area under the curve.""" """Plot the ROC curve and the area under the curve."""
return plot_func( return plot_func(roc_curve.fpr, roc_curve.tpr, xlabel='FPR', ylabel='TPR')
roc_curve.fpr, roc_curve.tpr, xlabel='FPR', ylabel='TPR')

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""A hook and a function in tf estimator for membership inference attack.""" """A hook and a function in tf estimator for membership inference attack."""
import os import os
@ -58,7 +57,8 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
def __init__( def __init__(
self, self,
estimator, estimator,
in_train, out_train, in_train,
out_train,
input_fn_constructor, input_fn_constructor,
slicing_spec: SlicingSpec = None, slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,), attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,),
@ -76,7 +76,7 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
attack_types: a list of attacks, each of type AttackType attack_types: a list of attacks, each of type AttackType
tensorboard_dir: directory for tensorboard summary tensorboard_dir: directory for tensorboard summary
tensorboard_merge_classifiers: if true, plot different classifiers with tensorboard_merge_classifiers: if true, plot different classifiers with
the same slicing_spec and metric in the same figure the same slicing_spec and metric in the same figure
""" """
in_train_data, self._in_train_labels = in_train in_train_data, self._in_train_labels = in_train
out_train_data, self._out_train_labels = out_train out_train_data, self._out_train_labels = out_train
@ -106,19 +106,19 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
self._writers = None self._writers = None
def end(self, session): def end(self, session):
results = run_attack_helper(self._estimator, results = run_attack_helper(self._estimator, self._in_train_input_fn,
self._in_train_input_fn, self._out_train_input_fn, self._in_train_labels,
self._out_train_input_fn, self._out_train_labels, self._slicing_spec,
self._in_train_labels, self._out_train_labels,
self._slicing_spec,
self._attack_types) self._attack_types)
logging.info(results) logging.info(results)
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
results) results)
print('Attack result:') print('Attack result:')
print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in print('\n'.join([
zip(att_types, att_slices, att_metrics, att_values)])) ' %s: %.4f' % (', '.join([s, t, m]), v)
for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
]))
# Write to tensorboard if tensorboard_dir is specified # Write to tensorboard if tensorboard_dir is specified
global_step = self._estimator.get_variable_value('global_step') global_step = self._estimator.get_variable_value('global_step')
@ -128,7 +128,9 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
def run_attack_on_tf_estimator_model( def run_attack_on_tf_estimator_model(
estimator, in_train, out_train, estimator,
in_train,
out_train,
input_fn_constructor, input_fn_constructor,
slicing_spec: SlicingSpec = None, slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)): attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)):
@ -142,6 +144,7 @@ def run_attack_on_tf_estimator_model(
the input_fn for model prediction the input_fn for model prediction
slicing_spec: slicing specification of the attack slicing_spec: slicing specification of the attack
attack_types: a list of attacks, each of type AttackType attack_types: a list of attacks, each of type AttackType
Returns: Returns:
Results of the attack Results of the attack
""" """
@ -153,10 +156,8 @@ def run_attack_on_tf_estimator_model(
out_train_input_fn = input_fn_constructor(out_train_data, out_train_labels) out_train_input_fn = input_fn_constructor(out_train_data, out_train_labels)
# Call the helper to run the attack. # Call the helper to run the attack.
results = run_attack_helper(estimator, results = run_attack_helper(estimator, in_train_input_fn, out_train_input_fn,
in_train_input_fn, out_train_input_fn, in_train_labels, out_train_labels, slicing_spec,
in_train_labels, out_train_labels,
slicing_spec,
attack_types) attack_types)
logging.info('End of training attack:') logging.info('End of training attack:')
logging.info(results) logging.info(results)
@ -165,8 +166,10 @@ def run_attack_on_tf_estimator_model(
def run_attack_helper( def run_attack_helper(
estimator, estimator,
in_train_input_fn, out_train_input_fn, in_train_input_fn,
in_train_labels, out_train_labels, out_train_input_fn,
in_train_labels,
out_train_labels,
slicing_spec: SlicingSpec = None, slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)): attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)):
"""A helper function to perform attack. """A helper function to perform attack.
@ -179,22 +182,23 @@ def run_attack_helper(
out_train_labels: out of training labels out_train_labels: out of training labels
slicing_spec: slicing specification of the attack slicing_spec: slicing specification of the attack
attack_types: a list of attacks, each of type AttackType attack_types: a list of attacks, each of type AttackType
Returns: Returns:
Results of the attack Results of the attack
""" """
# Compute predictions and losses # Compute predictions and losses
in_train_pred, in_train_loss = calculate_losses(estimator, in_train_pred, in_train_loss = calculate_losses(estimator, in_train_input_fn,
in_train_input_fn,
in_train_labels) in_train_labels)
out_train_pred, out_train_loss = calculate_losses(estimator, out_train_pred, out_train_loss = calculate_losses(estimator,
out_train_input_fn, out_train_input_fn,
out_train_labels) out_train_labels)
attack_input = AttackInputData( attack_input = AttackInputData(
logits_train=in_train_pred, logits_test=out_train_pred, logits_train=in_train_pred,
labels_train=in_train_labels, labels_test=out_train_labels, logits_test=out_train_pred,
loss_train=in_train_loss, loss_test=out_train_loss labels_train=in_train_labels,
) labels_test=out_train_labels,
results = mia.run_attacks(attack_input, loss_train=in_train_loss,
slicing_spec=slicing_spec, loss_test=out_train_loss)
attack_types=attack_types) results = mia.run_attacks(
attack_input, slicing_spec=slicing_spec, attack_types=attack_types)
return results return results

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""An example for using tf_estimator_evaluation.""" """An example for using tf_estimator_evaluation."""
from absl import app from absl import app
@ -26,15 +25,15 @@ from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_s
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation import MembershipInferenceTrainingHook from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation import MembershipInferenceTrainingHook
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation import run_attack_on_tf_estimator_model from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation import run_attack_on_tf_estimator_model
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.02, 'Learning rate for training') flags.DEFINE_float('learning_rate', 0.02, 'Learning rate for training')
flags.DEFINE_integer('batch_size', 250, 'Batch size') flags.DEFINE_integer('batch_size', 250, 'Batch size')
flags.DEFINE_integer('epochs', 100, 'Number of epochs') flags.DEFINE_integer('epochs', 100, 'Number of epochs')
flags.DEFINE_string('model_dir', None, 'Model directory.') flags.DEFINE_string('model_dir', None, 'Model directory.')
flags.DEFINE_bool('tensorboard_merge_classifiers', False, 'If true, plot ' flags.DEFINE_bool(
'different classifiers with the same slicing_spec and metric ' 'tensorboard_merge_classifiers', False, 'If true, plot '
'in the same figure.') 'different classifiers with the same slicing_spec and metric '
'in the same figure.')
def small_cnn_fn(features, labels, mode): def small_cnn_fn(features, labels, mode):
@ -55,8 +54,8 @@ def small_cnn_fn(features, labels, mode):
# Configure the training op (for TRAIN mode). # Configure the training op (for TRAIN mode).
if mode == tf.estimator.ModeKeys.TRAIN: if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.MomentumOptimizer(learning_rate=FLAGS.learning_rate, optimizer = tf.train.MomentumOptimizer(
momentum=0.9) learning_rate=FLAGS.learning_rate, momentum=0.9)
global_step = tf.train.get_global_step() global_step = tf.train.get_global_step()
train_op = optimizer.minimize(loss=scalar_loss, global_step=global_step) train_op = optimizer.minimize(loss=scalar_loss, global_step=global_step)
return tf.estimator.EstimatorSpec( return tf.estimator.EstimatorSpec(
@ -111,13 +110,12 @@ def main(unused_argv):
# Get hook for membership inference attack. # Get hook for membership inference attack.
mia_hook = MembershipInferenceTrainingHook( mia_hook = MembershipInferenceTrainingHook(
classifier, classifier, (x_train, y_train), (x_test, y_test),
(x_train, y_train),
(x_test, y_test),
input_fn_constructor, input_fn_constructor,
slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
attack_types=[AttackType.THRESHOLD_ATTACK, attack_types=[
AttackType.K_NEAREST_NEIGHBORS], AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
],
tensorboard_dir=FLAGS.model_dir, tensorboard_dir=FLAGS.model_dir,
tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers) tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers)
@ -148,12 +146,15 @@ def main(unused_argv):
classifier, (x_train, y_train), (x_test, y_test), classifier, (x_train, y_train), (x_test, y_test),
input_fn_constructor, input_fn_constructor,
slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
attack_types=[AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS] attack_types=[
) AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
])
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
attack_results) attack_results)
print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in print('\n'.join([
zip(att_types, att_slices, att_metrics, att_values)])) ' %s: %.4f' % (', '.join([s, t, m]), v)
for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
]))
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -13,10 +13,8 @@
# limitations under the License. # limitations under the License.
from absl.testing import absltest from absl.testing import absltest
import numpy as np import numpy as np
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import tf_estimator_evaluation from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import tf_estimator_evaluation
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType
@ -55,23 +53,25 @@ class UtilsTest(absltest.TestCase):
# Define the classifier, input_fn for training and test data # Define the classifier, input_fn for training and test data
self.classifier = tf.estimator.Estimator(model_fn=model_fn) self.classifier = tf.estimator.Estimator(model_fn=model_fn)
self.input_fn_train = tf.estimator.inputs.numpy_input_fn( self.input_fn_train = tf.estimator.inputs.numpy_input_fn(
x={'x': self.train_data}, y=self.train_labels, num_epochs=1, x={'x': self.train_data},
y=self.train_labels,
num_epochs=1,
shuffle=False) shuffle=False)
self.input_fn_test = tf.estimator.inputs.numpy_input_fn( self.input_fn_test = tf.estimator.inputs.numpy_input_fn(
x={'x': self.test_data}, y=self.test_labels, num_epochs=1, x={'x': self.test_data},
y=self.test_labels,
num_epochs=1,
shuffle=False) shuffle=False)
def test_calculate_losses(self): def test_calculate_losses(self):
"""Test calculating the loss.""" """Test calculating the loss."""
pred, loss = tf_estimator_evaluation.calculate_losses(self.classifier, pred, loss = tf_estimator_evaluation.calculate_losses(
self.input_fn_train, self.classifier, self.input_fn_train, self.train_labels)
self.train_labels)
self.assertEqual(pred.shape, (self.ntrain, self.nclass)) self.assertEqual(pred.shape, (self.ntrain, self.nclass))
self.assertEqual(loss.shape, (self.ntrain,)) self.assertEqual(loss.shape, (self.ntrain,))
pred, loss = tf_estimator_evaluation.calculate_losses(self.classifier, pred, loss = tf_estimator_evaluation.calculate_losses(
self.input_fn_test, self.classifier, self.input_fn_test, self.test_labels)
self.test_labels)
self.assertEqual(pred.shape, (self.ntest, self.nclass)) self.assertEqual(pred.shape, (self.ntest, self.nclass))
self.assertEqual(loss.shape, (self.ntest,)) self.assertEqual(loss.shape, (self.ntest,))
@ -94,12 +94,12 @@ class UtilsTest(absltest.TestCase):
def test_run_attack_on_tf_estimator_model(self): def test_run_attack_on_tf_estimator_model(self):
"""Test the attack on the final models.""" """Test the attack on the final models."""
def input_fn_constructor(x, y): def input_fn_constructor(x, y):
return tf.estimator.inputs.numpy_input_fn(x={'x': x}, y=y, shuffle=False) return tf.estimator.inputs.numpy_input_fn(x={'x': x}, y=y, shuffle=False)
results = tf_estimator_evaluation.run_attack_on_tf_estimator_model( results = tf_estimator_evaluation.run_attack_on_tf_estimator_model(
self.classifier, self.classifier, (self.train_data, self.train_labels),
(self.train_data, self.train_labels),
(self.test_data, self.test_labels), (self.test_data, self.test_labels),
input_fn_constructor, input_fn_constructor,
attack_types=[AttackType.THRESHOLD_ATTACK]) attack_types=[AttackType.THRESHOLD_ATTACK])

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Utility functions for membership inference attacks.""" """Utility functions for membership inference attacks."""
import numpy as np import numpy as np

View file

@ -27,7 +27,7 @@ def write_to_tensorboard(writers, tags, values, step):
Args: Args:
writers: a list of tensorboard writers or one writer to be used for metrics. writers: a list of tensorboard writers or one writer to be used for metrics.
If it's a list, it should be of the same length as tags If it's a list, it should be of the same length as tags
tags: a list of tags of metrics tags: a list of tags of metrics
values: a list of values of metrics with the same length as tags values: a list of values of metrics with the same length as tags
step: step for the tensorboard summary step: step for the tensorboard summary
@ -54,7 +54,7 @@ def write_to_tensorboard_tf2(writers, tags, values, step):
Args: Args:
writers: a list of tensorboard writers or one writer to be used for metrics. writers: a list of tensorboard writers or one writer to be used for metrics.
If it's a list, it should be of the same length as tags If it's a list, it should be of the same length as tags
tags: a list of tags of metrics tags: a list of tags of metrics
values: a list of values of metrics with the same length as tags values: a list of values of metrics with the same length as tags
step: step for the tensorboard summary step: step for the tensorboard summary
@ -77,11 +77,10 @@ def write_to_tensorboard_tf2(writers, tags, values, step):
writer.flush() writer.flush()
def write_results_to_tensorboard( def write_results_to_tensorboard(attack_results: AttackResults,
attack_results: AttackResults, writers: Union[tf1.summary.FileWriter,
writers: Union[tf1.summary.FileWriter, List[tf1.summary.FileWriter]], List[tf1.summary.FileWriter]],
step: int, step: int, merge_classifiers: bool):
merge_classifiers: bool):
"""Write attack results to tensorboard. """Write attack results to tensorboard.
Args: Args:
@ -97,21 +96,21 @@ def write_results_to_tensorboard(
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
attack_results) attack_results)
if merge_classifiers: if merge_classifiers:
att_tags = ['attack/' + f'{s}_{m}' for s, m in att_tags = ['attack/' + f'{s}_{m}' for s, m in zip(att_slices, att_metrics)]
zip(att_slices, att_metrics)] write_to_tensorboard([writers[t] for t in att_types], att_tags, att_values,
write_to_tensorboard([writers[t] for t in att_types], step)
att_tags, att_values, step)
else: else:
att_tags = ['attack/' + f'{s}_{t}_{m}' for t, s, m in att_tags = [
zip(att_types, att_slices, att_metrics)] 'attack/' + f'{s}_{t}_{m}'
for t, s, m in zip(att_types, att_slices, att_metrics)
]
write_to_tensorboard(writers, att_tags, att_values, step) write_to_tensorboard(writers, att_tags, att_values, step)
def write_results_to_tensorboard_tf2( def write_results_to_tensorboard_tf2(
attack_results: AttackResults, attack_results: AttackResults,
writers: Union[tf2.summary.SummaryWriter, List[tf2.summary.SummaryWriter]], writers: Union[tf2.summary.SummaryWriter, List[tf2.summary.SummaryWriter]],
step: int, step: int, merge_classifiers: bool):
merge_classifiers: bool):
"""Write attack results to tensorboard. """Write attack results to tensorboard.
Args: Args:
@ -127,12 +126,12 @@ def write_results_to_tensorboard_tf2(
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
attack_results) attack_results)
if merge_classifiers: if merge_classifiers:
att_tags = ['attack/' + f'{s}_{m}' for s, m in att_tags = ['attack/' + f'{s}_{m}' for s, m in zip(att_slices, att_metrics)]
zip(att_slices, att_metrics)] write_to_tensorboard_tf2([writers[t] for t in att_types], att_tags,
write_to_tensorboard_tf2([writers[t] for t in att_types], att_values, step)
att_tags, att_values, step)
else: else:
att_tags = ['attack/' + f'{s}_{t}_{m}' for t, s, m in att_tags = [
zip(att_types, att_slices, att_metrics)] 'attack/' + f'{s}_{t}_{m}'
for t, s, m in zip(att_types, att_slices, att_metrics)
]
write_to_tensorboard_tf2(writers, att_tags, att_values, step) write_to_tensorboard_tf2(writers, att_tags, att_values, step)

View file

@ -25,22 +25,23 @@ def compute_exposure_interpolation(
"""Get exposure using interpolation. """Get exposure using interpolation.
Args: Args:
perplexities: a dictionary, key is number of secret repetitions, perplexities: a dictionary, key is number of secret repetitions, value is a
value is a list of perplexities list of perplexities
perplexities_reference: a list, perplexities of the random sequences that perplexities_reference: a list, perplexities of the random sequences that
did not appear in the training data did not appear in the training data
Returns: Returns:
The exposure of every secret measured using interpolation (not necessarily The exposure of every secret measured using interpolation (not necessarily
in the same order as the input) in the same order as the input)
""" """
repetitions = list(perplexities.keys()) repetitions = list(perplexities.keys())
# Concatenate all perplexities, including those for references # Concatenate all perplexities, including those for references
perplexities_concat = np.concatenate([perplexities[r] for r in repetitions] perplexities_concat = np.concatenate([perplexities[r] for r in repetitions] +
+ [perplexities_reference]) [perplexities_reference])
# Concatenate the number of repetitions for each secret # Concatenate the number of repetitions for each secret
repetitions_concat = np.concatenate( repetitions_concat = np.concatenate([[r] * len(perplexities[r])
[[r] * len(perplexities[r]) for r in repetitions] for r in repetitions] +
+ [[0] * len(perplexities_reference)]) [[0] * len(perplexities_reference)])
# Sort the repetition list according to the corresponding perplexity # Sort the repetition list according to the corresponding perplexity
idx = np.argsort(perplexities_concat) idx = np.argsort(perplexities_concat)
@ -53,8 +54,10 @@ def compute_exposure_interpolation(
# (repetitions_concat == 0). # (repetitions_concat == 0).
cum_sum = np.cumsum(repetitions_concat == 0) cum_sum = np.cumsum(repetitions_concat == 0)
ranks = {r: cum_sum[repetitions_concat == r] + 1 for r in repetitions} ranks = {r: cum_sum[repetitions_concat == r] + 1 for r in repetitions}
exposures = {r: np.log2(len(perplexities_reference)) - np.log2(ranks[r]) exposures = {
for r in repetitions} r: np.log2(len(perplexities_reference)) - np.log2(ranks[r])
for r in repetitions
}
return exposures return exposures
@ -64,10 +67,11 @@ def compute_exposure_extrapolation(
"""Get exposure using extrapolation. """Get exposure using extrapolation.
Args: Args:
perplexities: a dictionary, key is number of secret repetitions, perplexities: a dictionary, key is number of secret repetitions, value is a
value is a list of perplexities list of perplexities
perplexities_reference: a list, perplexities of the random sequences that perplexities_reference: a list, perplexities of the random sequences that
did not appear in the training data did not appear in the training data
Returns: Returns:
The exposure of every secret measured using extrapolation The exposure of every secret measured using extrapolation
""" """

View file

@ -15,7 +15,6 @@
from absl.testing import absltest from absl.testing import absltest
import numpy as np import numpy as np
from scipy import stats from scipy import stats
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_extrapolation from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_extrapolation
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_interpolation from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_interpolation
@ -28,9 +27,11 @@ class UtilsTest(absltest.TestCase):
def test_exposure_interpolation(self): def test_exposure_interpolation(self):
"""Test exposure by interpolation.""" """Test exposure by interpolation."""
perplexities = {1: [0, 0.1], # smallest perplexities perplexities = {
2: [20.0], # largest perplexities 1: [0, 0.1], # smallest perplexities
5: [3.5]} # rank = 4 2: [20.0], # largest perplexities
5: [3.5]
} # rank = 4
perplexities_reference = [float(x) for x in range(1, 17)] perplexities_reference = [float(x) for x in range(1, 17)]
exposures = compute_exposure_interpolation(perplexities, exposures = compute_exposure_interpolation(perplexities,
perplexities_reference) perplexities_reference)
@ -41,7 +42,8 @@ class UtilsTest(absltest.TestCase):
expected_exposures = { expected_exposures = {
1: np.array([exposure_largest] * 2), 1: np.array([exposure_largest] * 2),
2: np.array([exposure_smallest]), 2: np.array([exposure_smallest]),
5: np.array([np.log2(num_perplexities_reference) - np.log2(4)])} 5: np.array([np.log2(num_perplexities_reference) - np.log2(4)])
}
self.assertEqual(exposures.keys(), expected_exposures.keys()) self.assertEqual(exposures.keys(), expected_exposures.keys())
for r in exposures.keys(): for r in exposures.keys():

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Generate random sequences.""" """Generate random sequences."""
import itertools import itertools
@ -21,7 +20,9 @@ from dataclasses import dataclass
import numpy as np import numpy as np
def generate_random_sequences(vocab: List[str], pattern: str, n: int, def generate_random_sequences(vocab: List[str],
pattern: str,
n: int,
seed: int = 1) -> List[str]: seed: int = 1) -> List[str]:
"""Generate random sequences. """Generate random sequences.
@ -35,6 +36,7 @@ def generate_random_sequences(vocab: List[str], pattern: str, n: int,
Returns: Returns:
A list of different random sequences from the given vocabulary A list of different random sequences from the given vocabulary
""" """
def count_placeholder(pattern): def count_placeholder(pattern):
return sum([x[1] is not None for x in string.Formatter().parse(pattern)]) return sum([x[1] is not None for x in string.Formatter().parse(pattern)])
@ -103,7 +105,8 @@ def construct_secret(secret_config: SecretConfig, seqs: List[str]) -> Secrets:
Args: Args:
secret_config: configuration of secret. secret_config: configuration of secret.
seqs: a list of random sequences that will be used for secrets and seqs: a list of random sequences that will be used for secrets and
references. references.
Returns: Returns:
a secret instance. a secret instance.
""" """
@ -116,9 +119,10 @@ def construct_secret(secret_config: SecretConfig, seqs: List[str]) -> Secrets:
secret_config.num_repetitions, secret_config.num_secrets_for_repetitions): secret_config.num_repetitions, secret_config.num_secrets_for_repetitions):
secrets[num_repetition] = seqs[i:i + num_secrets] secrets[num_repetition] = seqs[i:i + num_secrets]
i += num_secrets i += num_secrets
return Secrets(config=secret_config, return Secrets(
secrets=secrets, config=secret_config,
references=seqs[-secret_config.num_references:]) secrets=secrets,
references=seqs[-secret_config.num_references:])
def generate_secrets_and_references(secret_configs: List[SecretConfig], def generate_secrets_and_references(secret_configs: List[SecretConfig],
@ -128,6 +132,7 @@ def generate_secrets_and_references(secret_configs: List[SecretConfig],
Args: Args:
secret_configs: a list of secret configurations. secret_configs: a list of secret configurations.
seed: random seed. seed: random seed.
Returns: Returns:
A list of secret instances. A list of secret instances.
""" """

View file

@ -12,10 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Tests for tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets."""
from absl.testing import absltest from absl.testing import absltest
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets import construct_secret from tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets import construct_secret
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets import generate_random_sequences from tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets import generate_random_sequences
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets import generate_secrets_and_references from tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets import generate_secrets_and_references
@ -32,27 +29,34 @@ class UtilsTest(absltest.TestCase):
"""Test generate_random_sequences.""" """Test generate_random_sequences."""
# Test when n is larger than total number of possible sequences. # Test when n is larger than total number of possible sequences.
seqs = generate_random_sequences(['A', 'b', 'c'], '{}+{}', 10, seed=27) seqs = generate_random_sequences(['A', 'b', 'c'], '{}+{}', 10, seed=27)
expected_seqs = ['A+c', 'c+c', 'b+b', 'A+b', 'b+c', expected_seqs = [
'c+A', 'c+b', 'A+A', 'b+A'] 'A+c', 'c+c', 'b+b', 'A+b', 'b+c', 'c+A', 'c+b', 'A+A', 'b+A'
]
self.assertEqual(seqs, expected_seqs) self.assertEqual(seqs, expected_seqs)
# Test when n is smaller than total number of possible sequences. # Test when n is smaller than total number of possible sequences.
seqs = generate_random_sequences(list('01234'), 'prefix {}{}{}?', 8, seed=9) seqs = generate_random_sequences(list('01234'), 'prefix {}{}{}?', 8, seed=9)
expected_seqs = ['prefix 143?', 'prefix 031?', 'prefix 302?', 'prefix 042?', expected_seqs = [
'prefix 404?', 'prefix 024?', 'prefix 021?', 'prefix 403?'] 'prefix 143?', 'prefix 031?', 'prefix 302?', 'prefix 042?',
'prefix 404?', 'prefix 024?', 'prefix 021?', 'prefix 403?'
]
self.assertEqual(seqs, expected_seqs) self.assertEqual(seqs, expected_seqs)
def test_construct_secret(self): def test_construct_secret(self):
secret_config = SecretConfig(vocab=None, pattern='', secret_config = SecretConfig(
num_repetitions=[1, 2, 8], vocab=None,
num_secrets_for_repetitions=[2, 3, 1], pattern='',
num_references=3) num_repetitions=[1, 2, 8],
num_secrets_for_repetitions=[2, 3, 1],
num_references=3)
seqs = list('0123456789') seqs = list('0123456789')
secrets = construct_secret(secret_config, seqs) secrets = construct_secret(secret_config, seqs)
self.assertEqual(secrets.config, secret_config) self.assertEqual(secrets.config, secret_config)
self.assertDictEqual(secrets.secrets, {1: ['0', '1'], self.assertDictEqual(secrets.secrets, {
2: ['2', '3', '4'], 1: ['0', '1'],
8: ['5']}) 2: ['2', '3', '4'],
8: ['5']
})
self.assertEqual(secrets.references, ['7', '8', '9']) self.assertEqual(secrets.references, ['7', '8', '9'])
# Test when the number of elements in seqs is not enough. # Test when the number of elements in seqs is not enough.
@ -61,29 +65,36 @@ class UtilsTest(absltest.TestCase):
def test_generate_secrets_and_references(self): def test_generate_secrets_and_references(self):
secret_configs = [ secret_configs = [
SecretConfig(vocab=['w1', 'w2', 'w3'], pattern='{} {} suf', SecretConfig(
num_repetitions=[1, 12], vocab=['w1', 'w2', 'w3'],
num_secrets_for_repetitions=[2, 1], pattern='{} {} suf',
num_references=3), num_repetitions=[1, 12],
SecretConfig(vocab=['W 1', 'W 2', 'W 3'], pattern='{}-{}', num_secrets_for_repetitions=[2, 1],
num_repetitions=[1, 2, 8], num_references=3),
num_secrets_for_repetitions=[2, 3, 1], SecretConfig(
num_references=3) vocab=['W 1', 'W 2', 'W 3'],
pattern='{}-{}',
num_repetitions=[1, 2, 8],
num_secrets_for_repetitions=[2, 3, 1],
num_references=3)
] ]
secrets = generate_secrets_and_references(secret_configs, seed=27) secrets = generate_secrets_and_references(secret_configs, seed=27)
self.assertEqual(secrets[0].config, secret_configs[0]) self.assertEqual(secrets[0].config, secret_configs[0])
self.assertDictEqual(secrets[0].secrets, {1: ['w3 w2 suf', 'w2 w1 suf'], self.assertDictEqual(secrets[0].secrets, {
12: ['w1 w1 suf']}) 1: ['w3 w2 suf', 'w2 w1 suf'],
12: ['w1 w1 suf']
})
self.assertEqual(secrets[0].references, self.assertEqual(secrets[0].references,
['w2 w3 suf', 'w2 w2 suf', 'w3 w1 suf']) ['w2 w3 suf', 'w2 w2 suf', 'w3 w1 suf'])
self.assertEqual(secrets[1].config, secret_configs[1]) self.assertEqual(secrets[1].config, secret_configs[1])
self.assertDictEqual(secrets[1].secrets, self.assertDictEqual(
{1: ['W 3-W 2', 'W 1-W 3'], secrets[1].secrets, {
2: ['W 3-W 1', 'W 2-W 1', 'W 1-W 1'], 1: ['W 3-W 2', 'W 1-W 3'],
8: ['W 2-W 2']}) 2: ['W 3-W 1', 'W 2-W 1', 'W 1-W 1'],
self.assertEqual(secrets[1].references, 8: ['W 2-W 2']
['W 2-W 3', 'W 3-W 3', 'W 1-W 2']) })
self.assertEqual(secrets[1].references, ['W 2-W 3', 'W 3-W 3', 'W 1-W 2'])
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -27,10 +27,14 @@ n_samples = 10
input_dim = 2 input_dim = 2
n_outputs = 1 n_outputs = 1
# Create binary classification dataset: # Create binary classification dataset:
x_stack = [tf.constant(-1, tf.float32, (n_samples, input_dim)), x_stack = [
tf.constant(1, tf.float32, (n_samples, input_dim))] tf.constant(-1, tf.float32, (n_samples, input_dim)),
y_stack = [tf.constant(0, tf.float32, (n_samples, 1)), tf.constant(1, tf.float32, (n_samples, input_dim))
tf.constant(1, tf.float32, (n_samples, 1))] ]
y_stack = [
tf.constant(0, tf.float32, (n_samples, 1)),
tf.constant(1, tf.float32, (n_samples, 1))
]
x, y = tf.concat(x_stack, 0), tf.concat(y_stack, 0) x, y = tf.concat(x_stack, 0), tf.concat(y_stack, 0)
print(x.shape, y.shape) print(x.shape, y.shape)
generator = tf.data.Dataset.from_tensor_slices((x, y)) generator = tf.data.Dataset.from_tensor_slices((x, y))
@ -86,14 +90,15 @@ n_samples = None # default, if it cannot be iferred, specify this
epsilon = 2 epsilon = 2
noise_distribution = 'laplace' noise_distribution = 'laplace'
bolt.fit(x, bolt.fit(
y, x,
epsilon=epsilon, y,
class_weight=class_weight, epsilon=epsilon,
batch_size=batch_size, class_weight=class_weight,
n_samples=n_samples, batch_size=batch_size,
noise_distribution=noise_distribution, n_samples=n_samples,
epochs=2) noise_distribution=noise_distribution,
epochs=2)
# ------- # -------
# We may also train a generator object, or try different optimizers and loss # We may also train a generator object, or try different optimizers and loss
# functions. Below, we will see that we must pass the number of samples as the # functions. Below, we will see that we must pass the number of samples as the
@ -109,25 +114,27 @@ n_samples = None # default, if it cannot be iferred, specify this
epsilon = 2 epsilon = 2
noise_distribution = 'laplace' noise_distribution = 'laplace'
try: try:
bolt.fit(generator, bolt.fit(
epsilon=epsilon, generator,
class_weight=class_weight, epsilon=epsilon,
batch_size=batch_size, class_weight=class_weight,
n_samples=n_samples, batch_size=batch_size,
noise_distribution=noise_distribution, n_samples=n_samples,
verbose=0) noise_distribution=noise_distribution,
verbose=0)
except ValueError as e: except ValueError as e:
print(e) print(e)
# ------- # -------
# And now, re running with the parameter set. # And now, re running with the parameter set.
# ------- # -------
n_samples = 20 n_samples = 20
bolt.fit_generator(generator, bolt.fit_generator(
epsilon=epsilon, generator,
class_weight=class_weight, epsilon=epsilon,
n_samples=n_samples, class_weight=class_weight,
noise_distribution=noise_distribution, n_samples=n_samples,
verbose=0) noise_distribution=noise_distribution,
verbose=0)
# ------- # -------
# You don't have to use the BoltOn model to use the BoltOn method. # You don't have to use the BoltOn model to use the BoltOn method.
# There are only a few requirements: # There are only a few requirements:
@ -145,8 +152,8 @@ class TestModel(tf.keras.Model): # pylint: disable=abstract-method
def __init__(self, reg_layer, number_of_outputs=1): def __init__(self, reg_layer, number_of_outputs=1):
super().__init__(name='test') super().__init__(name='test')
self.output_layer = tf.keras.layers.Dense(number_of_outputs, self.output_layer = tf.keras.layers.Dense(
kernel_regularizer=reg_layer) number_of_outputs, kernel_regularizer=reg_layer)
def call(self, inputs): # pylint: disable=arguments-differ def call(self, inputs): # pylint: disable=arguments-differ
return self.output_layer(inputs) return self.output_layer(inputs)
@ -180,6 +187,5 @@ with optimizer(
layers=test_model.layers, layers=test_model.layers,
class_weights=class_weights, class_weights=class_weights,
n_samples=n_samples, n_samples=n_samples,
batch_size=batch_size batch_size=batch_size) as _:
) as _:
test_model.fit(x, y, batch_size=batch_size, epochs=2) test_model.fit(x, y, batch_size=batch_size, epochs=2)

View file

@ -86,12 +86,10 @@ def cnn_model_fn(features, labels, mode, params): # pylint: disable=unused-argu
eval_metric_ops = { eval_metric_ops = {
'accuracy': 'accuracy':
tf.metrics.accuracy( tf.metrics.accuracy(
labels=labels, labels=labels, predictions=tf.argmax(input=logits, axis=1))
predictions=tf.argmax(input=logits, axis=1))
} }
return tf.estimator.EstimatorSpec(mode=mode, return tf.estimator.EstimatorSpec(
loss=scalar_loss, mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
eval_metric_ops=eval_metric_ops)
def main(unused_argv): def main(unused_argv):
@ -100,8 +98,8 @@ def main(unused_argv):
raise ValueError('Number of microbatches should divide evenly batch_size') raise ValueError('Number of microbatches should divide evenly batch_size')
# Instantiate the tf.Estimator. # Instantiate the tf.Estimator.
mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, mnist_classifier = tf.estimator.Estimator(
model_dir=FLAGS.model_dir) model_fn=cnn_model_fn, model_dir=FLAGS.model_dir)
# Training loop. # Training loop.
steps_per_epoch = 60000 // FLAGS.batch_size steps_per_epoch = 60000 // FLAGS.batch_size

View file

@ -25,16 +25,18 @@ from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescent
GradientDescentOptimizer = tf.train.GradientDescentOptimizer GradientDescentOptimizer = tf.train.GradientDescentOptimizer
tf.enable_eager_execution() tf.enable_eager_execution()
flags.DEFINE_boolean('dpsgd', True, 'If True, train with DP-SGD. If False, ' flags.DEFINE_boolean(
'train with vanilla SGD.') 'dpsgd', True, 'If True, train with DP-SGD. If False, '
'train with vanilla SGD.')
flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training') flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training')
flags.DEFINE_float('noise_multiplier', 1.1, flags.DEFINE_float('noise_multiplier', 1.1,
'Ratio of the standard deviation to the clipping norm') 'Ratio of the standard deviation to the clipping norm')
flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm') flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
flags.DEFINE_integer('batch_size', 250, 'Batch size') flags.DEFINE_integer('batch_size', 250, 'Batch size')
flags.DEFINE_integer('epochs', 60, 'Number of epochs') flags.DEFINE_integer('epochs', 60, 'Number of epochs')
flags.DEFINE_integer('microbatches', 250, 'Number of microbatches ' flags.DEFINE_integer(
'(must evenly divide batch_size)') 'microbatches', 250, 'Number of microbatches '
'(must evenly divide batch_size)')
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
@ -45,10 +47,11 @@ def compute_epsilon(steps):
return float('inf') return float('inf')
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
sampling_probability = FLAGS.batch_size / 60000 sampling_probability = FLAGS.batch_size / 60000
rdp = compute_rdp(q=sampling_probability, rdp = compute_rdp(
noise_multiplier=FLAGS.noise_multiplier, q=sampling_probability,
steps=steps, noise_multiplier=FLAGS.noise_multiplier,
orders=orders) steps=steps,
orders=orders)
# Delta is set to 1e-5 because MNIST has 60000 training points. # Delta is set to 1e-5 because MNIST has 60000 training points.
return get_privacy_spent(orders, rdp, target_delta=1e-5)[0] return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
@ -64,22 +67,20 @@ def main(_):
# Create a dataset object and batch for the training data # Create a dataset object and batch for the training data
dataset = tf.data.Dataset.from_tensor_slices( dataset = tf.data.Dataset.from_tensor_slices(
(tf.cast(train_images[..., tf.newaxis]/255, tf.float32), (tf.cast(train_images[..., tf.newaxis] / 255,
tf.cast(train_labels, tf.int64))) tf.float32), tf.cast(train_labels, tf.int64)))
dataset = dataset.shuffle(1000).batch(FLAGS.batch_size) dataset = dataset.shuffle(1000).batch(FLAGS.batch_size)
# Create a dataset object and batch for the test data # Create a dataset object and batch for the test data
eval_dataset = tf.data.Dataset.from_tensor_slices( eval_dataset = tf.data.Dataset.from_tensor_slices(
(tf.cast(test_images[..., tf.newaxis]/255, tf.float32), (tf.cast(test_images[..., tf.newaxis] / 255,
tf.cast(test_labels, tf.int64))) tf.float32), tf.cast(test_labels, tf.int64)))
eval_dataset = eval_dataset.batch(10000) eval_dataset = eval_dataset.batch(10000)
# Define the model using tf.keras.layers # Define the model using tf.keras.layers
mnist_model = tf.keras.Sequential([ mnist_model = tf.keras.Sequential([
tf.keras.layers.Conv2D(16, 8, tf.keras.layers.Conv2D(
strides=2, 16, 8, strides=2, padding='same', activation='relu'),
padding='same',
activation='relu'),
tf.keras.layers.MaxPool2D(2, 1), tf.keras.layers.MaxPool2D(2, 1),
tf.keras.layers.Conv2D(32, 4, strides=2, activation='relu'), tf.keras.layers.Conv2D(32, 4, strides=2, activation='relu'),
tf.keras.layers.MaxPool2D(2, 1), tf.keras.layers.MaxPool2D(2, 1),
@ -119,8 +120,8 @@ def main(_):
return loss return loss
if FLAGS.dpsgd: if FLAGS.dpsgd:
grads_and_vars = opt.compute_gradients(loss_fn, var_list, grads_and_vars = opt.compute_gradients(
gradient_tape=gradient_tape) loss_fn, var_list, gradient_tape=gradient_tape)
else: else:
grads_and_vars = opt.compute_gradients(loss_fn, var_list) grads_and_vars = opt.compute_gradients(loss_fn, var_list)
@ -140,5 +141,6 @@ def main(_):
else: else:
print('Trained with vanilla non-private SGD optimizer') print('Trained with vanilla non-private SGD optimizer')
if __name__ == '__main__': if __name__ == '__main__':
app.run(main) app.run(main)

View file

@ -47,10 +47,11 @@ def compute_epsilon(steps):
return float('inf') return float('inf')
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
sampling_probability = FLAGS.batch_size / 60000 sampling_probability = FLAGS.batch_size / 60000
rdp = compute_rdp(q=sampling_probability, rdp = compute_rdp(
noise_multiplier=FLAGS.noise_multiplier, q=sampling_probability,
steps=steps, noise_multiplier=FLAGS.noise_multiplier,
orders=orders) steps=steps,
orders=orders)
# Delta is set to 1e-5 because MNIST has 60000 training points. # Delta is set to 1e-5 because MNIST has 60000 training points.
return get_privacy_spent(orders, rdp, target_delta=1e-5)[0] return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
@ -91,16 +92,16 @@ def main(unused_argv):
# Define a sequential Keras model # Define a sequential Keras model
model = tf.keras.Sequential([ model = tf.keras.Sequential([
tf.keras.layers.Conv2D(16, 8, tf.keras.layers.Conv2D(
strides=2, 16,
padding='same', 8,
activation='relu', strides=2,
input_shape=(28, 28, 1)), padding='same',
activation='relu',
input_shape=(28, 28, 1)),
tf.keras.layers.MaxPool2D(2, 1), tf.keras.layers.MaxPool2D(2, 1),
tf.keras.layers.Conv2D(32, 4, tf.keras.layers.Conv2D(
strides=2, 32, 4, strides=2, padding='valid', activation='relu'),
padding='valid',
activation='relu'),
tf.keras.layers.MaxPool2D(2, 1), tf.keras.layers.MaxPool2D(2, 1),
tf.keras.layers.Flatten(), tf.keras.layers.Flatten(),
tf.keras.layers.Dense(32, activation='relu'), tf.keras.layers.Dense(32, activation='relu'),
@ -124,10 +125,12 @@ def main(unused_argv):
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
# Train model with Keras # Train model with Keras
model.fit(train_data, train_labels, model.fit(
epochs=FLAGS.epochs, train_data,
validation_data=(test_data, test_labels), train_labels,
batch_size=FLAGS.batch_size) epochs=FLAGS.epochs,
validation_data=(test_data, test_labels),
batch_size=FLAGS.batch_size)
# Compute the privacy budget expended. # Compute the privacy budget expended.
if FLAGS.dpsgd: if FLAGS.dpsgd:
@ -136,5 +139,6 @@ def main(unused_argv):
else: else:
print('Trained with vanilla non-private SGD optimizer') print('Trained with vanilla non-private SGD optimizer')
if __name__ == '__main__': if __name__ == '__main__':
app.run(main) app.run(main)

View file

@ -22,7 +22,6 @@ from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
from tensorflow_privacy.privacy.optimizers import dp_optimizer_vectorized from tensorflow_privacy.privacy.optimizers import dp_optimizer_vectorized
flags.DEFINE_boolean( flags.DEFINE_boolean(
'dpsgd', True, 'If True, train with DP-SGD. If False, ' 'dpsgd', True, 'If True, train with DP-SGD. If False, '
'train with vanilla SGD.') 'train with vanilla SGD.')
@ -50,10 +49,11 @@ def compute_epsilon(steps):
return float('inf') return float('inf')
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
sampling_probability = FLAGS.batch_size / NUM_TRAIN_EXAMPLES sampling_probability = FLAGS.batch_size / NUM_TRAIN_EXAMPLES
rdp = compute_rdp(q=sampling_probability, rdp = compute_rdp(
noise_multiplier=FLAGS.noise_multiplier, q=sampling_probability,
steps=steps, noise_multiplier=FLAGS.noise_multiplier,
orders=orders) steps=steps,
orders=orders)
# Delta is set to approximate 1 / (number of training points). # Delta is set to approximate 1 / (number of training points).
return get_privacy_spent(orders, rdp, target_delta=1e-5)[0] return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
@ -63,15 +63,11 @@ def cnn_model_fn(features, labels, mode):
# Define CNN architecture using tf.keras.layers. # Define CNN architecture using tf.keras.layers.
input_layer = tf.reshape(features['x'], [-1, 28, 28, 1]) input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])
y = tf.keras.layers.Conv2D(16, 8, y = tf.keras.layers.Conv2D(
strides=2, 16, 8, strides=2, padding='same', activation='relu').apply(input_layer)
padding='same',
activation='relu').apply(input_layer)
y = tf.keras.layers.MaxPool2D(2, 1).apply(y) y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
y = tf.keras.layers.Conv2D(32, 4, y = tf.keras.layers.Conv2D(
strides=2, 32, 4, strides=2, padding='valid', activation='relu').apply(y)
padding='valid',
activation='relu').apply(y)
y = tf.keras.layers.MaxPool2D(2, 1).apply(y) y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
y = tf.keras.layers.Flatten().apply(y) y = tf.keras.layers.Flatten().apply(y)
y = tf.keras.layers.Dense(32, activation='relu').apply(y) y = tf.keras.layers.Dense(32, activation='relu').apply(y)
@ -106,22 +102,19 @@ def cnn_model_fn(features, labels, mode):
# the vector_loss because tf.estimator requires a scalar loss. This is only # the vector_loss because tf.estimator requires a scalar loss. This is only
# used for evaluation and debugging by tf.estimator. The actual loss being # used for evaluation and debugging by tf.estimator. The actual loss being
# minimized is opt_loss defined above and passed to optimizer.minimize(). # minimized is opt_loss defined above and passed to optimizer.minimize().
return tf.estimator.EstimatorSpec(mode=mode, return tf.estimator.EstimatorSpec(
loss=scalar_loss, mode=mode, loss=scalar_loss, train_op=train_op)
train_op=train_op)
# Add evaluation metrics (for EVAL mode). # Add evaluation metrics (for EVAL mode).
elif mode == tf.estimator.ModeKeys.EVAL: elif mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = { eval_metric_ops = {
'accuracy': 'accuracy':
tf.metrics.accuracy( tf.metrics.accuracy(
labels=labels, labels=labels, predictions=tf.argmax(input=logits, axis=1))
predictions=tf.argmax(input=logits, axis=1))
} }
return tf.estimator.EstimatorSpec(mode=mode, return tf.estimator.EstimatorSpec(
loss=scalar_loss, mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
eval_metric_ops=eval_metric_ops)
def load_mnist(): def load_mnist():
@ -155,8 +148,8 @@ def main(unused_argv):
train_data, train_labels, test_data, test_labels = load_mnist() train_data, train_labels, test_data, test_labels = load_mnist()
# Instantiate the tf.Estimator. # Instantiate the tf.Estimator.
mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, mnist_classifier = tf.estimator.Estimator(
model_dir=FLAGS.model_dir) model_fn=cnn_model_fn, model_dir=FLAGS.model_dir)
# Create tf.Estimator input functions for the training and test data. # Create tf.Estimator input functions for the training and test data.
train_input_fn = tf.estimator.inputs.numpy_input_fn( train_input_fn = tf.estimator.inputs.numpy_input_fn(
@ -166,10 +159,7 @@ def main(unused_argv):
num_epochs=FLAGS.epochs, num_epochs=FLAGS.epochs,
shuffle=True) shuffle=True)
eval_input_fn = tf.estimator.inputs.numpy_input_fn( eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={'x': test_data}, x={'x': test_data}, y=test_labels, num_epochs=1, shuffle=False)
y=test_labels,
num_epochs=1,
shuffle=False)
# Training loop. # Training loop.
steps_per_epoch = NUM_TRAIN_EXAMPLES // FLAGS.batch_size steps_per_epoch = NUM_TRAIN_EXAMPLES // FLAGS.batch_size
@ -189,5 +179,6 @@ def main(unused_argv):
else: else:
print('Trained with vanilla non-private SGD optimizer') print('Trained with vanilla non-private SGD optimizer')
if __name__ == '__main__': if __name__ == '__main__':
app.run(main) app.run(main)

View file

@ -56,8 +56,8 @@ def lr_model_fn(features, labels, mode, nclasses, dim):
logits = tf.keras.layers.Dense( logits = tf.keras.layers.Dense(
units=nclasses, units=nclasses,
kernel_regularizer=tf.keras.regularizers.L2(l2=FLAGS.regularizer), kernel_regularizer=tf.keras.regularizers.L2(l2=FLAGS.regularizer),
bias_regularizer=tf.keras.regularizers.L2(l2=FLAGS.regularizer)).apply( bias_regularizer=tf.keras.regularizers.L2(
input_layer) l2=FLAGS.regularizer)).apply(input_layer)
# Calculate loss as a vector (to support microbatches in DP-SGD). # Calculate loss as a vector (to support microbatches in DP-SGD).
vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
@ -166,8 +166,7 @@ def print_privacy_guarantees(epochs, batch_size, samples, noise_multiplier):
# an option. # an option.
rdp = [order * coef for order in orders] rdp = [order * coef for order in orders]
eps, _, _ = get_privacy_spent(orders, rdp, target_delta=delta) eps, _, _ = get_privacy_spent(orders, rdp, target_delta=delta)
print('\t{:g}% enjoy at least ({:.2f}, {})-DP'.format( print('\t{:g}% enjoy at least ({:.2f}, {})-DP'.format(p * 100, eps, delta))
p * 100, eps, delta))
# Compute privacy guarantees for the Sampled Gaussian Mechanism. # Compute privacy guarantees for the Sampled Gaussian Mechanism.
rdp_sgm = compute_rdp(batch_size / samples, noise_multiplier, rdp_sgm = compute_rdp(batch_size / samples, noise_multiplier,
@ -234,5 +233,6 @@ def main(unused_argv):
noise_multiplier=FLAGS.noise_multiplier, noise_multiplier=FLAGS.noise_multiplier,
) )
if __name__ == '__main__': if __name__ == '__main__':
app.run(main) app.run(main)

View file

@ -114,7 +114,7 @@ def nn_model_fn(features, labels, mode):
return tf.estimator.EstimatorSpec( return tf.estimator.EstimatorSpec(
mode=mode, loss=scalar_loss, train_op=train_op) mode=mode, loss=scalar_loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode). # Add evaluation metrics (for EVAL mode).
if mode == tf.estimator.ModeKeys.EVAL: if mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = { eval_metric_ops = {
'rmse': 'rmse':

View file

@ -28,23 +28,19 @@ def cnn_model_fn(features, labels, mode):
# Define CNN architecture using tf.keras.layers. # Define CNN architecture using tf.keras.layers.
input_layer = tf.reshape(features['x'], [-1, 28, 28, 1]) input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])
y = tf.keras.layers.Conv2D(16, 8, y = tf.keras.layers.Conv2D(
strides=2, 16, 8, strides=2, padding='same', activation='relu').apply(input_layer)
padding='same',
activation='relu').apply(input_layer)
y = tf.keras.layers.MaxPool2D(2, 1).apply(y) y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
y = tf.keras.layers.Conv2D(32, 4, y = tf.keras.layers.Conv2D(
strides=2, 32, 4, strides=2, padding='valid', activation='relu').apply(y)
padding='valid',
activation='relu').apply(y)
y = tf.keras.layers.MaxPool2D(2, 1).apply(y) y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
y = tf.keras.layers.Flatten().apply(y) y = tf.keras.layers.Flatten().apply(y)
y = tf.keras.layers.Dense(32, activation='relu').apply(y) y = tf.keras.layers.Dense(32, activation='relu').apply(y)
logits = tf.keras.layers.Dense(10).apply(y) logits = tf.keras.layers.Dense(10).apply(y)
# Calculate loss as a vector and as its average across minibatch. # Calculate loss as a vector and as its average across minibatch.
vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits) labels=labels, logits=logits)
scalar_loss = tf.reduce_mean(vector_loss) scalar_loss = tf.reduce_mean(vector_loss)
# Configure the training op (for TRAIN mode). # Configure the training op (for TRAIN mode).
@ -53,21 +49,18 @@ def cnn_model_fn(features, labels, mode):
opt_loss = scalar_loss opt_loss = scalar_loss
global_step = tf.train.get_global_step() global_step = tf.train.get_global_step()
train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) train_op = optimizer.minimize(loss=opt_loss, global_step=global_step)
return tf.estimator.EstimatorSpec(mode=mode, return tf.estimator.EstimatorSpec(
loss=scalar_loss, mode=mode, loss=scalar_loss, train_op=train_op)
train_op=train_op)
# Add evaluation metrics (for EVAL mode). # Add evaluation metrics (for EVAL mode).
elif mode == tf.estimator.ModeKeys.EVAL: elif mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = { eval_metric_ops = {
'accuracy': 'accuracy':
tf.metrics.accuracy( tf.metrics.accuracy(
labels=labels, labels=labels, predictions=tf.argmax(input=logits, axis=1))
predictions=tf.argmax(input=logits, axis=1))
} }
return tf.estimator.EstimatorSpec(mode=mode, return tf.estimator.EstimatorSpec(
loss=scalar_loss, mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
eval_metric_ops=eval_metric_ops)
def load_mnist(): def load_mnist():
@ -109,10 +102,7 @@ def main(unused_argv):
num_epochs=FLAGS.epochs, num_epochs=FLAGS.epochs,
shuffle=True) shuffle=True)
eval_input_fn = tf.estimator.inputs.numpy_input_fn( eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={'x': test_data}, x={'x': test_data}, y=test_labels, num_epochs=1, shuffle=False)
y=test_labels,
num_epochs=1,
shuffle=False)
# Training loop. # Training loop.
steps_per_epoch = 60000 // FLAGS.batch_size steps_per_epoch = 60000 // FLAGS.batch_size
@ -125,5 +115,6 @@ def main(unused_argv):
test_accuracy = eval_results['accuracy'] test_accuracy = eval_results['accuracy']
print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy)) print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy))
if __name__ == '__main__': if __name__ == '__main__':
tf.app.run() tf.app.run()