Format TensorFlow Privacy files.

PiperOrigin-RevId: 424923635
This commit is contained in:
Michael Reneer 2022-01-28 11:56:55 -08:00 committed by A. Unique TensorFlower
parent 07230a161a
commit 943ef91ee9
62 changed files with 1170 additions and 1087 deletions

View file

@ -23,7 +23,6 @@ from tensorflow_docs.api_generator import generate_lib
from tensorflow_docs.api_generator import public_api
import tensorflow_privacy as tf_privacy
flags.DEFINE_string('output_dir', '/tmp/tf_privacy',
'Where to output the docs.')
flags.DEFINE_string(

View file

@ -1,6 +1,5 @@
# Get Started
This document assumes you are already familiar with differential privacy, and
have determined that you would like to use TF Privacy to implement differential
privacy guarantees in your model(s). If youre not familiar with differential

View file

@ -17,7 +17,6 @@
import math
from absl import app
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
@ -33,8 +32,10 @@ def apply_dp_sgd_analysis(q, sigma, steps, orders, delta):
eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta)
print('DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated'
' over {} steps satisfies'.format(100 * q, sigma, steps), end=' ')
print(
'DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated'
' over {} steps satisfies'.format(100 * q, sigma, steps),
end=' ')
print('differential privacy with eps = {:.3g} and delta = {}.'.format(
eps, delta))
print('The optimal RDP order is {}.'.format(opt_order))

View file

@ -46,12 +46,13 @@ class ComputeDpSgdPrivacyTest(parameterized.TestCase):
# the basis of this sanity check. This is confirmed in the above paper.
q = batch_size / n
steps = epochs * n / batch_size
sigma = noise_multiplier * math.sqrt(steps) /q
sigma = noise_multiplier * math.sqrt(steps) / q
# We compute the optimal guarantee for Gaussian
# using https://arxiv.org/abs/1805.06530 Theorem 8 (in v2).
low_delta = .5*math.erfc((eps*sigma-.5/sigma)/math.sqrt(2))
low_delta = .5 * math.erfc((eps * sigma - .5 / sigma) / math.sqrt(2))
if eps < 100: # Skip this if it causes overflow; error is minor.
low_delta -= math.exp(eps)*.5*math.erfc((eps*sigma+.5/sigma)/math.sqrt(2))
low_delta -= math.exp(eps) * .5 * math.erfc(
(eps * sigma + .5 / sigma) / math.sqrt(2))
self.assertLessEqual(low_delta, delta)

View file

@ -52,8 +52,8 @@ def main(argv):
assert FLAGS.batch_size is not None, 'Flag batch_size is missing.'
assert FLAGS.epsilon is not None, 'Flag epsilon is missing.'
assert FLAGS.epochs is not None, 'Flag epochs is missing.'
compute_noise(FLAGS.N, FLAGS.batch_size, FLAGS.epsilon,
FLAGS.epochs, FLAGS.delta, FLAGS.min_noise)
compute_noise(FLAGS.N, FLAGS.batch_size, FLAGS.epsilon, FLAGS.epochs,
FLAGS.delta, FLAGS.min_noise)
if __name__ == '__main__':

View file

@ -27,11 +27,12 @@ class ComputeNoiseFromBudgetTest(parameterized.TestCase):
('Test2', 100000000, 1024, 5907984.81339406, 10, 1e-7, 1e-5, 0.1),
('Test3', 100000000, 1024, 5907984.81339406, 10, 1e-7, 1, 0),
)
def test_compute_noise(self, n, batch_size, target_epsilon, epochs,
delta, min_noise, expected_noise):
def test_compute_noise(self, n, batch_size, target_epsilon, epochs, delta,
min_noise, expected_noise):
target_noise = compute_noise_from_budget_lib.compute_noise(
n, batch_size, target_epsilon, epochs, delta, min_noise)
self.assertAlmostEqual(target_noise, expected_noise)
if __name__ == '__main__':
absltest.main()

View file

@ -176,4 +176,3 @@ class SampledWithoutReplacementDpEvent(DpEvent):
source_dataset_size: int
sample_size: int
event: DpEvent

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for DpEventBuilder."""
from absl.testing import absltest
from tensorflow_privacy.privacy.analysis import dp_event
@ -68,7 +67,8 @@ class DpEventBuilderTest(absltest.TestCase):
expected_event = dp_event.ComposedDpEvent([
_gaussian_event,
dp_event.SelfComposedDpEvent(composed_event, 3),
dp_event.SelfComposedDpEvent(_poisson_event, 2)])
dp_event.SelfComposedDpEvent(_poisson_event, 2)
])
self.assertEqual(expected_event, builder.build())

View file

@ -84,13 +84,13 @@ class TensorBuffer(object):
dtype=self._dtype,
initializer=new_buffer,
trainable=False)
return self._buffer, tf.assign(
self._capacity, tf.multiply(self._capacity, 2))
return self._buffer, tf.assign(self._capacity,
tf.multiply(self._capacity, 2))
else:
return tf.assign(
self._buffer, new_buffer,
validate_shape=False), tf.assign(
self._capacity, tf.multiply(self._capacity, 2))
validate_shape=False), tf.assign(self._capacity,
tf.multiply(self._capacity, 2))
update_buffer, update_capacity = tf.cond(
pred=tf.equal(self._current_size, self._capacity),

View file

@ -44,9 +44,8 @@ class TensorBufferTest(tf.test.TestCase):
my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
with self.assertRaisesRegex(
tf.errors.InvalidArgumentError,
'Appending value of inconsistent shape.'):
with self.assertRaisesRegex(tf.errors.InvalidArgumentError,
'Appending value of inconsistent shape.'):
my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32))
def test_resize(self):

View file

@ -187,5 +187,6 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
tree_aggregation_accountant._compute_gaussian_zcdp(
sigma, sum_sensitivity_square))
if __name__ == '__main__':
tf.test.main()

View file

@ -68,7 +68,6 @@ class StrongConvexMixin:
Args:
class_weight: class weights used
Returns: L
"""
raise NotImplementedError("lipchitz constant not implemented for "
@ -126,13 +125,10 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
if reg_lambda <= 0:
raise ValueError("reg lambda: {0} must be positive".format(reg_lambda))
if radius_constant <= 0:
raise ValueError("radius_constant: {0}, should be >= 0".format(
radius_constant
))
raise ValueError(
"radius_constant: {0}, should be >= 0".format(radius_constant))
if delta <= 0:
raise ValueError("delta: {0}, should be >= 0".format(
delta
))
raise ValueError("delta: {0}, should be >= 0".format(delta))
self.C = c_arg # pylint: disable=invalid-name
self.delta = delta
self.radius_constant = radius_constant
@ -172,9 +168,7 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
def beta(self, class_weight):
"""See super class."""
max_class_weight = self.max_class_weight(class_weight, self.dtype)
delta = _ops.convert_to_tensor_v2(self.delta,
dtype=self.dtype
)
delta = _ops.convert_to_tensor_v2(self.delta, dtype=self.dtype)
return self.C * max_class_weight / (delta *
tf.constant(2, dtype=self.dtype)) + \
self.reg_lambda
@ -197,13 +191,11 @@ class StrongConvexHuber(losses.Loss, StrongConvexMixin):
The L2 regularizer layer for this loss function, with regularizer constant
set to half the 0.5 * reg_lambda.
"""
return L1L2(l2=self.reg_lambda/2)
return L1L2(l2=self.reg_lambda / 2)
class StrongConvexBinaryCrossentropy(
losses.BinaryCrossentropy,
StrongConvexMixin
):
class StrongConvexBinaryCrossentropy(losses.BinaryCrossentropy,
StrongConvexMixin):
"""Strongly Convex BinaryCrossentropy loss using l2 weight regularization."""
def __init__(self,
@ -222,10 +214,10 @@ class StrongConvexBinaryCrossentropy(
radius_constant: constant defining the length of the radius
from_logits: True if the input are unscaled logits. False if they are
already scaled.
label_smoothing: amount of smoothing to perform on labels
relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x). Note, the
impact of this parameter's effect on privacy is not known and thus the
default should be used.
label_smoothing: amount of smoothing to perform on labels relaxation of
trust in labels, e.g. (1 -> 1-x, 0 -> 0+x). Note, the impact of this
parameter's effect on privacy is not known and thus the default should
be used.
reduction: reduction type to use. See super class
dtype: tf datatype to use for tensor conversions.
"""
@ -239,9 +231,8 @@ class StrongConvexBinaryCrossentropy(
if c_arg <= 0:
raise ValueError("c: {0}, should be >= 0".format(c_arg))
if radius_constant <= 0:
raise ValueError("radius_constant: {0}, should be >= 0".format(
radius_constant
))
raise ValueError(
"radius_constant: {0}, should be >= 0".format(radius_constant))
self.dtype = dtype
self.C = c_arg # pylint: disable=invalid-name
self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype)
@ -294,4 +285,4 @@ class StrongConvexBinaryCrossentropy(
The L2 regularizer layer for this loss function, with regularizer constant
set to half the 0.5 * reg_lambda.
"""
return L1L2(l2=self.reg_lambda/2)
return L1L2(l2=self.reg_lambda / 2)

View file

@ -40,21 +40,29 @@ def captured_output():
class StrongConvexMixinTests(keras_parameterized.TestCase):
"""Tests for the StrongConvexMixin."""
@parameterized.named_parameters([
{'testcase_name': 'beta not implemented',
'fn': 'beta',
'args': [1]},
{'testcase_name': 'gamma not implemented',
'fn': 'gamma',
'args': []},
{'testcase_name': 'lipchitz not implemented',
'fn': 'lipchitz_constant',
'args': [1]},
{'testcase_name': 'radius not implemented',
'fn': 'radius',
'args': []},
])
@parameterized.named_parameters([
{
'testcase_name': 'beta not implemented',
'fn': 'beta',
'args': [1]
},
{
'testcase_name': 'gamma not implemented',
'fn': 'gamma',
'args': []
},
{
'testcase_name': 'lipchitz not implemented',
'fn': 'lipchitz_constant',
'args': [1]
},
{
'testcase_name': 'radius not implemented',
'fn': 'radius',
'args': []
},
])
def test_not_implemented(self, fn, args):
"""Test that the given fn's are not implemented on the mixin.
@ -67,9 +75,11 @@ class StrongConvexMixinTests(keras_parameterized.TestCase):
getattr(loss, fn, None)(*args)
@parameterized.named_parameters([
{'testcase_name': 'radius not implemented',
'fn': 'kernel_regularizer',
'args': []},
{
'testcase_name': 'radius not implemented',
'fn': 'kernel_regularizer',
'args': []
},
])
def test_return_none(self, fn, args):
"""Test that fn of Mixin returns None.
@ -87,10 +97,11 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
"""tests for BinaryCrossesntropy StrongConvex loss."""
@parameterized.named_parameters([
{'testcase_name': 'normal',
'reg_lambda': 1,
'C': 1,
'radius_constant': 1
{
'testcase_name': 'normal',
'reg_lambda': 1,
'C': 1,
'radius_constant': 1
}, # pylint: disable=invalid-name
])
def test_init_params(self, reg_lambda, C, radius_constant):
@ -106,24 +117,29 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
self.assertIsInstance(loss, StrongConvexBinaryCrossentropy)
@parameterized.named_parameters([
{'testcase_name': 'negative c',
'reg_lambda': 1,
'C': -1,
'radius_constant': 1
{
'testcase_name': 'negative c',
'reg_lambda': 1,
'C': -1,
'radius_constant': 1
},
{'testcase_name': 'negative radius',
'reg_lambda': 1,
'C': 1,
'radius_constant': -1
{
'testcase_name': 'negative radius',
'reg_lambda': 1,
'C': 1,
'radius_constant': -1
},
{'testcase_name': 'negative lambda',
'reg_lambda': -1,
'C': 1,
'radius_constant': 1
{
'testcase_name': 'negative lambda',
'reg_lambda': -1,
'C': 1,
'radius_constant': 1
}, # pylint: disable=invalid-name
])
def test_bad_init_params(self, reg_lambda, C, radius_constant):
"""Test invalid domain for given params. Should return ValueError.
"""Test invalid domain for given params.
Should return ValueError.
Args:
reg_lambda: initialization value for reg_lambda arg
@ -137,25 +153,29 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
@test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([
# [] for compatibility with tensorflow loss calculation
{'testcase_name': 'both positive',
'logits': [10000],
'y_true': [1],
'result': 0,
{
'testcase_name': 'both positive',
'logits': [10000],
'y_true': [1],
'result': 0,
},
{'testcase_name': 'positive gradient negative logits',
'logits': [-10000],
'y_true': [1],
'result': 10000,
{
'testcase_name': 'positive gradient negative logits',
'logits': [-10000],
'y_true': [1],
'result': 10000,
},
{'testcase_name': 'positivee gradient positive logits',
'logits': [10000],
'y_true': [0],
'result': 10000,
{
'testcase_name': 'positivee gradient positive logits',
'logits': [10000],
'y_true': [0],
'result': 10000,
},
{'testcase_name': 'both negative',
'logits': [-10000],
'y_true': [0],
'result': 0
{
'testcase_name': 'both negative',
'logits': [-10000],
'y_true': [0],
'result': 0
},
])
def test_calculation(self, logits, y_true, result):
@ -173,29 +193,33 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
self.assertEqual(loss.numpy(), result)
@parameterized.named_parameters([
{'testcase_name': 'beta',
'init_args': [1, 1, 1],
'fn': 'beta',
'args': [1],
'result': tf.constant(2, dtype=tf.float32)
{
'testcase_name': 'beta',
'init_args': [1, 1, 1],
'fn': 'beta',
'args': [1],
'result': tf.constant(2, dtype=tf.float32)
},
{'testcase_name': 'gamma',
'fn': 'gamma',
'init_args': [1, 1, 1],
'args': [],
'result': tf.constant(1, dtype=tf.float32),
{
'testcase_name': 'gamma',
'fn': 'gamma',
'init_args': [1, 1, 1],
'args': [],
'result': tf.constant(1, dtype=tf.float32),
},
{'testcase_name': 'lipchitz constant',
'fn': 'lipchitz_constant',
'init_args': [1, 1, 1],
'args': [1],
'result': tf.constant(2, dtype=tf.float32),
{
'testcase_name': 'lipchitz constant',
'fn': 'lipchitz_constant',
'init_args': [1, 1, 1],
'args': [1],
'result': tf.constant(2, dtype=tf.float32),
},
{'testcase_name': 'kernel regularizer',
'fn': 'kernel_regularizer',
'init_args': [1, 1, 1],
'args': [],
'result': L1L2(l2=0.5),
{
'testcase_name': 'kernel regularizer',
'fn': 'kernel_regularizer',
'init_args': [1, 1, 1],
'args': [],
'result': L1L2(l2=0.5),
},
])
def test_fns(self, init_args, fn, args, result):
@ -218,11 +242,12 @@ class BinaryCrossesntropyTests(keras_parameterized.TestCase):
self.assertEqual(expected, result)
@parameterized.named_parameters([
{'testcase_name': 'label_smoothing',
'init_args': [1, 1, 1, True, 0.1],
'fn': None,
'args': None,
'print_res': 'The impact of label smoothing on privacy is unknown.'
{
'testcase_name': 'label_smoothing',
'init_args': [1, 1, 1, True, 0.1],
'fn': None,
'args': None,
'print_res': 'The impact of label smoothing on privacy is unknown.'
},
])
def test_prints(self, init_args, fn, args, print_res):
@ -245,11 +270,12 @@ class HuberTests(keras_parameterized.TestCase):
"""tests for BinaryCrossesntropy StrongConvex loss."""
@parameterized.named_parameters([
{'testcase_name': 'normal',
'reg_lambda': 1,
'c': 1,
'radius_constant': 1,
'delta': 1,
{
'testcase_name': 'normal',
'reg_lambda': 1,
'c': 1,
'radius_constant': 1,
'delta': 1,
},
])
def test_init_params(self, reg_lambda, c, radius_constant, delta):
@ -266,33 +292,39 @@ class HuberTests(keras_parameterized.TestCase):
self.assertIsInstance(loss, StrongConvexHuber)
@parameterized.named_parameters([
{'testcase_name': 'negative c',
'reg_lambda': 1,
'c': -1,
'radius_constant': 1,
'delta': 1
{
'testcase_name': 'negative c',
'reg_lambda': 1,
'c': -1,
'radius_constant': 1,
'delta': 1
},
{'testcase_name': 'negative radius',
'reg_lambda': 1,
'c': 1,
'radius_constant': -1,
'delta': 1
{
'testcase_name': 'negative radius',
'reg_lambda': 1,
'c': 1,
'radius_constant': -1,
'delta': 1
},
{'testcase_name': 'negative lambda',
'reg_lambda': -1,
'c': 1,
'radius_constant': 1,
'delta': 1
{
'testcase_name': 'negative lambda',
'reg_lambda': -1,
'c': 1,
'radius_constant': 1,
'delta': 1
},
{'testcase_name': 'negative delta',
'reg_lambda': 1,
'c': 1,
'radius_constant': 1,
'delta': -1
{
'testcase_name': 'negative delta',
'reg_lambda': 1,
'c': 1,
'radius_constant': 1,
'delta': -1
},
])
def test_bad_init_params(self, reg_lambda, c, radius_constant, delta):
"""Test invalid domain for given params. Should return ValueError.
"""Test invalid domain for given params.
Should return ValueError.
Args:
reg_lambda: initialization value for reg_lambda arg
@ -307,59 +339,68 @@ class HuberTests(keras_parameterized.TestCase):
# test the bounds and test varied delta's
@test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([
{'testcase_name': 'delta=1,y_true=1 z>1+h decision boundary',
'logits': 2.1,
'y_true': 1,
'delta': 1,
'result': 0,
{
'testcase_name': 'delta=1,y_true=1 z>1+h decision boundary',
'logits': 2.1,
'y_true': 1,
'delta': 1,
'result': 0,
},
{'testcase_name': 'delta=1,y_true=1 z<1+h decision boundary',
'logits': 1.9,
'y_true': 1,
'delta': 1,
'result': 0.01*0.25,
{
'testcase_name': 'delta=1,y_true=1 z<1+h decision boundary',
'logits': 1.9,
'y_true': 1,
'delta': 1,
'result': 0.01 * 0.25,
},
{'testcase_name': 'delta=1,y_true=1 1-z< h decision boundary',
'logits': 0.1,
'y_true': 1,
'delta': 1,
'result': 1.9**2 * 0.25,
{
'testcase_name': 'delta=1,y_true=1 1-z< h decision boundary',
'logits': 0.1,
'y_true': 1,
'delta': 1,
'result': 1.9**2 * 0.25,
},
{'testcase_name': 'delta=1,y_true=1 z < 1-h decision boundary',
'logits': -0.1,
'y_true': 1,
'delta': 1,
'result': 1.1,
{
'testcase_name': 'delta=1,y_true=1 z < 1-h decision boundary',
'logits': -0.1,
'y_true': 1,
'delta': 1,
'result': 1.1,
},
{'testcase_name': 'delta=2,y_true=1 z>1+h decision boundary',
'logits': 3.1,
'y_true': 1,
'delta': 2,
'result': 0,
{
'testcase_name': 'delta=2,y_true=1 z>1+h decision boundary',
'logits': 3.1,
'y_true': 1,
'delta': 2,
'result': 0,
},
{'testcase_name': 'delta=2,y_true=1 z<1+h decision boundary',
'logits': 2.9,
'y_true': 1,
'delta': 2,
'result': 0.01*0.125,
{
'testcase_name': 'delta=2,y_true=1 z<1+h decision boundary',
'logits': 2.9,
'y_true': 1,
'delta': 2,
'result': 0.01 * 0.125,
},
{'testcase_name': 'delta=2,y_true=1 1-z < h decision boundary',
'logits': 1.1,
'y_true': 1,
'delta': 2,
'result': 1.9**2 * 0.125,
{
'testcase_name': 'delta=2,y_true=1 1-z < h decision boundary',
'logits': 1.1,
'y_true': 1,
'delta': 2,
'result': 1.9**2 * 0.125,
},
{'testcase_name': 'delta=2,y_true=1 z < 1-h decision boundary',
'logits': -1.1,
'y_true': 1,
'delta': 2,
'result': 2.1,
{
'testcase_name': 'delta=2,y_true=1 z < 1-h decision boundary',
'logits': -1.1,
'y_true': 1,
'delta': 2,
'result': 2.1,
},
{'testcase_name': 'delta=1,y_true=-1 z>1+h decision boundary',
'logits': -2.1,
'y_true': -1,
'delta': 1,
'result': 0,
{
'testcase_name': 'delta=1,y_true=-1 z>1+h decision boundary',
'logits': -2.1,
'y_true': -1,
'delta': 1,
'result': 0,
},
])
def test_calculation(self, logits, y_true, delta, result):
@ -378,29 +419,33 @@ class HuberTests(keras_parameterized.TestCase):
self.assertAllClose(loss.numpy(), result)
@parameterized.named_parameters([
{'testcase_name': 'beta',
'init_args': [1, 1, 1, 1],
'fn': 'beta',
'args': [1],
'result': tf.Variable(1.5, dtype=tf.float32)
{
'testcase_name': 'beta',
'init_args': [1, 1, 1, 1],
'fn': 'beta',
'args': [1],
'result': tf.Variable(1.5, dtype=tf.float32)
},
{'testcase_name': 'gamma',
'fn': 'gamma',
'init_args': [1, 1, 1, 1],
'args': [],
'result': tf.Variable(1, dtype=tf.float32),
{
'testcase_name': 'gamma',
'fn': 'gamma',
'init_args': [1, 1, 1, 1],
'args': [],
'result': tf.Variable(1, dtype=tf.float32),
},
{'testcase_name': 'lipchitz constant',
'fn': 'lipchitz_constant',
'init_args': [1, 1, 1, 1],
'args': [1],
'result': tf.Variable(2, dtype=tf.float32),
{
'testcase_name': 'lipchitz constant',
'fn': 'lipchitz_constant',
'init_args': [1, 1, 1, 1],
'args': [1],
'result': tf.Variable(2, dtype=tf.float32),
},
{'testcase_name': 'kernel regularizer',
'fn': 'kernel_regularizer',
'init_args': [1, 1, 1, 1],
'args': [],
'result': L1L2(l2=0.5),
{
'testcase_name': 'kernel regularizer',
'fn': 'kernel_regularizer',
'init_args': [1, 1, 1, 1],
'args': [],
'result': L1L2(l2=0.5),
},
])
def test_fns(self, init_args, fn, args, result):

View file

@ -38,10 +38,7 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
Descent-based Analytics by Xi Wu et al.
"""
def __init__(self,
n_outputs,
seed=1,
dtype=tf.float32):
def __init__(self, n_outputs, seed=1, dtype=tf.float32):
"""Private constructor.
Args:
@ -51,9 +48,8 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
"""
super(BoltOnModel, self).__init__(name='bolton', dynamic=False)
if n_outputs <= 0:
raise ValueError('n_outputs = {0} is not valid. Must be > 0.'.format(
n_outputs
))
raise ValueError(
'n_outputs = {0} is not valid. Must be > 0.'.format(n_outputs))
self.n_outputs = n_outputs
self.seed = seed
self._layers_instantiated = False
@ -76,11 +72,13 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
loss,
kernel_initializer=tf.initializers.GlorotUniform,
**kwargs): # pylint: disable=arguments-differ
"""See super class. Default optimizer used in BoltOn method is SGD.
"""See super class.
Default optimizer used in BoltOn method is SGD.
Args:
optimizer: The optimizer to use. This will be automatically wrapped
with the BoltOn Optimizer.
optimizer: The optimizer to use. This will be automatically wrapped with
the BoltOn Optimizer.
loss: The loss function to use. Must be a StrongConvex loss (extend the
StrongConvexMixin).
kernel_initializer: The kernel initializer to use for the single layer.
@ -128,10 +126,10 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
y: Labels to fit on, see super.
batch_size: The batch size to use for training, see super.
class_weight: the class weights to be used. Can be a scalar or 1D tensor
whose dim == n_classes.
whose dim == n_classes.
n_samples: the number of individual samples in x.
epsilon: privacy parameter, which trades off between utility an privacy.
See the bolt-on paper for more description.
See the bolt-on paper for more description.
noise_distribution: the distribution to pull noise from.
steps_per_epoch:
**kwargs: kwargs to keras Model.fit. See super.
@ -152,8 +150,7 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
else:
data_size = None
batch_size_ = self._validate_or_infer_batch_size(batch_size,
steps_per_epoch,
x)
steps_per_epoch, x)
if batch_size_ is None:
batch_size_ = 32
# inferring batch_size to be passed to optimizer. batch_size must remain its
@ -164,18 +161,15 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
if data_size is None:
raise ValueError('Could not infer the number of samples. Please pass '
'this in using n_samples.')
with self.optimizer(noise_distribution,
epsilon,
self.layers,
class_weight_,
data_size,
batch_size_) as _:
out = super(BoltOnModel, self).fit(x=x,
y=y,
batch_size=batch_size,
class_weight=class_weight,
steps_per_epoch=steps_per_epoch,
**kwargs)
with self.optimizer(noise_distribution, epsilon, self.layers, class_weight_,
data_size, batch_size_) as _:
out = super(BoltOnModel, self).fit(
x=x,
y=y,
batch_size=batch_size,
class_weight=class_weight,
steps_per_epoch=steps_per_epoch,
**kwargs)
return out
def fit_generator(self,
@ -194,10 +188,10 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
Args:
generator: Inputs generator following Tensorflow guidelines, see super.
class_weight: the class weights to be used. Can be a scalar or 1D tensor
whose dim == n_classes.
whose dim == n_classes.
noise_distribution: the distribution to get noise from.
epsilon: privacy parameter, which trades off utility and privacy. See
BoltOn paper for more description.
BoltOn paper for more description.
n_samples: number of individual samples in x
steps_per_epoch: Number of steps per training epoch, see super.
**kwargs: **kwargs
@ -222,12 +216,8 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
generator)
if batch_size is None:
batch_size = 32
with self.optimizer(noise_distribution,
epsilon,
self.layers,
class_weight,
data_size,
batch_size) as _:
with self.optimizer(noise_distribution, epsilon, self.layers, class_weight,
data_size, batch_size) as _:
out = super(BoltOnModel, self).fit_generator(
generator,
class_weight=class_weight,
@ -243,10 +233,10 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
Args:
class_weights: str specifying type, array giving weights, or None.
class_counts: If class_weights is not None, then an array of
the number of samples for each class
num_classes: If class_weights is not None, then the number of
classes.
class_counts: If class_weights is not None, then an array of the number of
samples for each class
num_classes: If class_weights is not None, then the number of classes.
Returns:
class_weights as 1D tensor, to be passed to model's fit method.
"""
@ -259,14 +249,12 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
raise ValueError('Detected string class_weights with '
'value: {0}, which is not one of {1}.'
'Please select a valid class_weight type'
'or pass an array'.format(class_weights,
class_keys))
'or pass an array'.format(class_weights, class_keys))
if class_counts is None:
raise ValueError('Class counts must be provided if using '
'class_weights=%s' % class_weights)
class_counts_shape = tf.Variable(class_counts,
trainable=False,
dtype=self._dtype).shape
class_counts_shape = tf.Variable(
class_counts, trainable=False, dtype=self._dtype).shape
if len(class_counts_shape) != 1:
raise ValueError('class counts must be a 1D array.'
'Detected: {0}'.format(class_counts_shape))
@ -282,9 +270,8 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
class_weights = 1
elif is_string and class_weights == 'balanced':
num_samples = sum(class_counts)
weighted_counts = tf.dtypes.cast(tf.math.multiply(num_classes,
class_counts),
self._dtype)
weighted_counts = tf.dtypes.cast(
tf.math.multiply(num_classes, class_counts), self._dtype)
class_weights = tf.Variable(num_samples, dtype=self._dtype) / \
tf.Variable(weighted_counts, dtype=self._dtype)
else:
@ -293,8 +280,6 @@ class BoltOnModel(Model): # pylint: disable=abstract-method
raise ValueError('Detected class_weights shape: {0} instead of '
'1D array'.format(class_weights.shape))
if class_weights.shape[0] != num_classes:
raise ValueError(
'Detected array length: {0} instead of: {1}'.format(
class_weights.shape[0],
num_classes))
raise ValueError('Detected array length: {0} instead of: {1}'.format(
class_weights.shape[0], num_classes))
return class_weights

View file

@ -73,9 +73,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
def call(self, y_true, y_pred):
"""Loss function that is minimized at the mean of the input points."""
return 0.5 * tf.reduce_sum(
tf.math.squared_difference(y_true, y_pred),
axis=1
)
tf.math.squared_difference(y_true, y_pred), axis=1)
def max_class_weight(self, class_weight):
"""the maximum weighting in class weights (max value) as a scalar tensor.
@ -125,11 +123,13 @@ class InitTests(keras_parameterized.TestCase):
"""Tests for keras model initialization."""
@parameterized.named_parameters([
{'testcase_name': 'normal',
'n_outputs': 1,
{
'testcase_name': 'normal',
'n_outputs': 1,
},
{'testcase_name': 'many outputs',
'n_outputs': 100,
{
'testcase_name': 'many outputs',
'n_outputs': 100,
},
])
def test_init_params(self, n_outputs):
@ -143,8 +143,9 @@ class InitTests(keras_parameterized.TestCase):
self.assertIsInstance(clf, models.BoltOnModel)
@parameterized.named_parameters([
{'testcase_name': 'invalid n_outputs',
'n_outputs': -1,
{
'testcase_name': 'invalid n_outputs',
'n_outputs': -1,
},
])
def test_bad_init_params(self, n_outputs):
@ -158,15 +159,17 @@ class InitTests(keras_parameterized.TestCase):
models.BoltOnModel(n_outputs)
@parameterized.named_parameters([
{'testcase_name': 'string compile',
'n_outputs': 1,
'loss': TestLoss(1, 1, 1),
'optimizer': 'adam',
{
'testcase_name': 'string compile',
'n_outputs': 1,
'loss': TestLoss(1, 1, 1),
'optimizer': 'adam',
},
{'testcase_name': 'test compile',
'n_outputs': 100,
'loss': TestLoss(1, 1, 1),
'optimizer': TestOptimizer(),
{
'testcase_name': 'test compile',
'n_outputs': 100,
'loss': TestLoss(1, 1, 1),
'optimizer': TestOptimizer(),
},
])
def test_compile(self, n_outputs, loss, optimizer):
@ -183,18 +186,17 @@ class InitTests(keras_parameterized.TestCase):
clf.compile(optimizer, loss)
self.assertEqual(clf.loss, loss)
@parameterized.named_parameters([
{'testcase_name': 'Not strong loss',
'n_outputs': 1,
'loss': losses.BinaryCrossentropy(),
'optimizer': 'adam',
},
{'testcase_name': 'Not valid optimizer',
'n_outputs': 1,
'loss': TestLoss(1, 1, 1),
'optimizer': 'ada',
}
])
@parameterized.named_parameters([{
'testcase_name': 'Not strong loss',
'n_outputs': 1,
'loss': losses.BinaryCrossentropy(),
'optimizer': 'adam',
}, {
'testcase_name': 'Not valid optimizer',
'n_outputs': 1,
'loss': TestLoss(1, 1, 1),
'optimizer': 'ada',
}])
def test_bad_compile(self, n_outputs, loss, optimizer):
"""test bad compilations of BoltOnModel that should raise errors.
@ -231,17 +233,11 @@ def _cat_dataset(n_samples, input_dim, n_classes, batch_size, generator=False):
x_stack = []
y_stack = []
for i_class in range(n_classes):
x_stack.append(
tf.constant(1*i_class, tf.float32, (n_samples, input_dim))
)
y_stack.append(
tf.constant(i_class, tf.float32, (n_samples, n_classes))
)
x_stack.append(tf.constant(1 * i_class, tf.float32, (n_samples, input_dim)))
y_stack.append(tf.constant(i_class, tf.float32, (n_samples, n_classes)))
x_set, y_set = tf.stack(x_stack), tf.stack(y_stack)
if generator:
dataset = tf.data.Dataset.from_tensor_slices(
(x_set, y_set)
)
dataset = tf.data.Dataset.from_tensor_slices((x_set, y_set))
dataset = dataset.batch(batch_size=batch_size)
return dataset
return x_set, y_set
@ -266,8 +262,8 @@ def _do_fit(n_samples,
epsilon: privacy parameter
generator: True to create a generator, False to use an iterator
batch_size: batch_size to use
reset_n_samples: True to set _samples to None prior to fitting.
False does nothing
reset_n_samples: True to set _samples to None prior to fitting. False does
nothing
optimizer: instance of TestOptimizer
loss: instance of TestLoss
distribution: distribution to get noise from.
@ -279,37 +275,30 @@ def _do_fit(n_samples,
clf.compile(optimizer, loss)
if generator:
x = _cat_dataset(
n_samples,
input_dim,
n_outputs,
batch_size,
generator=generator
)
n_samples, input_dim, n_outputs, batch_size, generator=generator)
y = None
# x = x.batch(batch_size)
x = x.shuffle(n_samples//2)
x = x.shuffle(n_samples // 2)
batch_size = None
if reset_n_samples:
n_samples = None
clf.fit_generator(x,
n_samples=n_samples,
noise_distribution=distribution,
epsilon=epsilon)
clf.fit_generator(
x,
n_samples=n_samples,
noise_distribution=distribution,
epsilon=epsilon)
else:
x, y = _cat_dataset(
n_samples,
input_dim,
n_outputs,
batch_size,
generator=generator)
n_samples, input_dim, n_outputs, batch_size, generator=generator)
if reset_n_samples:
n_samples = None
clf.fit(x,
y,
batch_size=batch_size,
n_samples=n_samples,
noise_distribution=distribution,
epsilon=epsilon)
clf.fit(
x,
y,
batch_size=batch_size,
n_samples=n_samples,
noise_distribution=distribution,
epsilon=epsilon)
return clf
@ -318,21 +307,25 @@ class FitTests(keras_parameterized.TestCase):
# @test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([
{'testcase_name': 'iterator fit',
'generator': False,
'reset_n_samples': True,
{
'testcase_name': 'iterator fit',
'generator': False,
'reset_n_samples': True,
},
{'testcase_name': 'iterator fit no samples',
'generator': False,
'reset_n_samples': True,
{
'testcase_name': 'iterator fit no samples',
'generator': False,
'reset_n_samples': True,
},
{'testcase_name': 'generator fit',
'generator': True,
'reset_n_samples': False,
{
'testcase_name': 'generator fit',
'generator': True,
'reset_n_samples': False,
},
{'testcase_name': 'with callbacks',
'generator': True,
'reset_n_samples': False,
{
'testcase_name': 'with callbacks',
'generator': True,
'reset_n_samples': False,
},
])
def test_fit(self, generator, reset_n_samples):
@ -363,8 +356,9 @@ class FitTests(keras_parameterized.TestCase):
self.assertEqual(hasattr(clf, 'layers'), True)
@parameterized.named_parameters([
{'testcase_name': 'generator fit',
'generator': True,
{
'testcase_name': 'generator fit',
'generator': True,
},
])
def test_fit_gen(self, generator):
@ -382,27 +376,24 @@ class FitTests(keras_parameterized.TestCase):
clf = models.BoltOnModel(n_classes)
clf.compile(optimizer, loss)
x = _cat_dataset(
n_samples,
input_dim,
n_classes,
batch_size,
generator=generator
)
n_samples, input_dim, n_classes, batch_size, generator=generator)
x = x.batch(batch_size)
x = x.shuffle(n_samples // 2)
clf.fit_generator(x, n_samples=n_samples)
self.assertEqual(hasattr(clf, 'layers'), True)
@parameterized.named_parameters([
{'testcase_name': 'iterator no n_samples',
'generator': True,
'reset_n_samples': True,
'distribution': 'laplace'
{
'testcase_name': 'iterator no n_samples',
'generator': True,
'reset_n_samples': True,
'distribution': 'laplace'
},
{'testcase_name': 'invalid distribution',
'generator': True,
'reset_n_samples': True,
'distribution': 'not_valid'
{
'testcase_name': 'invalid distribution',
'generator': True,
'reset_n_samples': True,
'distribution': 'not_valid'
},
])
def test_bad_fit(self, generator, reset_n_samples, distribution):
@ -422,40 +413,33 @@ class FitTests(keras_parameterized.TestCase):
epsilon = 1
batch_size = 1
n_samples = 10
_do_fit(
n_samples,
input_dim,
n_classes,
epsilon,
generator,
batch_size,
reset_n_samples,
optimizer,
loss,
distribution
)
_do_fit(n_samples, input_dim, n_classes, epsilon, generator, batch_size,
reset_n_samples, optimizer, loss, distribution)
@parameterized.named_parameters([
{'testcase_name': 'None class_weights',
'class_weights': None,
'class_counts': None,
'num_classes': None,
'result': 1},
{'testcase_name': 'class weights array',
'class_weights': [1, 1],
'class_counts': [1, 1],
'num_classes': 2,
'result': [1, 1]},
{'testcase_name': 'class weights balanced',
'class_weights': 'balanced',
'class_counts': [1, 1],
'num_classes': 2,
'result': [1, 1]},
{
'testcase_name': 'None class_weights',
'class_weights': None,
'class_counts': None,
'num_classes': None,
'result': 1
},
{
'testcase_name': 'class weights array',
'class_weights': [1, 1],
'class_counts': [1, 1],
'num_classes': 2,
'result': [1, 1]
},
{
'testcase_name': 'class weights balanced',
'class_weights': 'balanced',
'class_counts': [1, 1],
'num_classes': 2,
'result': [1, 1]
},
])
def test_class_calculate(self,
class_weights,
class_counts,
num_classes,
def test_class_calculate(self, class_weights, class_counts, num_classes,
result):
"""Tests the BOltonModel calculate_class_weights method.
@ -466,61 +450,68 @@ class FitTests(keras_parameterized.TestCase):
result: expected result
"""
clf = models.BoltOnModel(1, 1)
expected = clf.calculate_class_weights(class_weights,
class_counts,
expected = clf.calculate_class_weights(class_weights, class_counts,
num_classes)
if hasattr(expected, 'numpy'):
expected = expected.numpy()
self.assertAllEqual(
expected,
result
)
@parameterized.named_parameters([
{'testcase_name': 'class weight not valid str',
'class_weights': 'not_valid',
'class_counts': 1,
'num_classes': 1,
'err_msg': 'Detected string class_weights with value: not_valid'},
{'testcase_name': 'no class counts',
'class_weights': 'balanced',
'class_counts': None,
'num_classes': 1,
'err_msg': 'Class counts must be provided if '
'using class_weights=balanced'},
{'testcase_name': 'no num classes',
'class_weights': 'balanced',
'class_counts': [1],
'num_classes': None,
'err_msg': 'num_classes must be provided if '
'using class_weights=balanced'},
{'testcase_name': 'class counts not array',
'class_weights': 'balanced',
'class_counts': 1,
'num_classes': None,
'err_msg': 'class counts must be a 1D array.'},
{'testcase_name': 'class counts array, no num classes',
'class_weights': [1],
'class_counts': None,
'num_classes': None,
'err_msg': 'You must pass a value for num_classes if '
'creating an array of class_weights'},
{'testcase_name': 'class counts array, improper shape',
'class_weights': [[1], [1]],
'class_counts': None,
'num_classes': 2,
'err_msg': 'Detected class_weights shape'},
{'testcase_name': 'class counts array, wrong number classes',
'class_weights': [1, 1, 1],
'class_counts': None,
'num_classes': 2,
'err_msg': 'Detected array length:'},
])
self.assertAllEqual(expected, result)
def test_class_errors(self,
class_weights,
class_counts,
num_classes,
@parameterized.named_parameters([
{
'testcase_name': 'class weight not valid str',
'class_weights': 'not_valid',
'class_counts': 1,
'num_classes': 1,
'err_msg': 'Detected string class_weights with value: not_valid'
},
{
'testcase_name': 'no class counts',
'class_weights': 'balanced',
'class_counts': None,
'num_classes': 1,
'err_msg': 'Class counts must be provided if '
'using class_weights=balanced'
},
{
'testcase_name': 'no num classes',
'class_weights': 'balanced',
'class_counts': [1],
'num_classes': None,
'err_msg': 'num_classes must be provided if '
'using class_weights=balanced'
},
{
'testcase_name': 'class counts not array',
'class_weights': 'balanced',
'class_counts': 1,
'num_classes': None,
'err_msg': 'class counts must be a 1D array.'
},
{
'testcase_name': 'class counts array, no num classes',
'class_weights': [1],
'class_counts': None,
'num_classes': None,
'err_msg': 'You must pass a value for num_classes if '
'creating an array of class_weights'
},
{
'testcase_name': 'class counts array, improper shape',
'class_weights': [[1], [1]],
'class_counts': None,
'num_classes': 2,
'err_msg': 'Detected class_weights shape'
},
{
'testcase_name': 'class counts array, wrong number classes',
'class_weights': [1, 1, 1],
'class_counts': None,
'num_classes': 2,
'err_msg': 'Detected array length:'
},
])
def test_class_errors(self, class_weights, class_counts, num_classes,
err_msg):
"""Tests the BOltonModel calculate_class_weights method.
@ -534,9 +525,7 @@ class FitTests(keras_parameterized.TestCase):
"""
clf = models.BoltOnModel(1, 1)
with self.assertRaisesRegexp(ValueError, err_msg): # pylint: disable=deprecated-method
clf.calculate_class_weights(class_weights,
class_counts,
num_classes)
clf.calculate_class_weights(class_weights, class_counts, num_classes)
if __name__ == '__main__':

View file

@ -48,14 +48,12 @@ class GammaBetaDecreasingStep(
'This is performed automatically by using the '
'{1} as a context manager, '
'as desired'.format(self.__class__.__name__,
BoltOn.__class__.__name__
)
)
BoltOn.__class__.__name__))
dtype = self.beta.dtype
one = tf.constant(1, dtype)
return tf.math.minimum(tf.math.reduce_min(one/self.beta),
one/(self.gamma*math_ops.cast(step, dtype))
)
return tf.math.minimum(
tf.math.reduce_min(one / self.beta),
one / (self.gamma * math_ops.cast(step, dtype)))
def get_config(self):
"""Return config to setup the learning rate scheduler."""
@ -108,16 +106,16 @@ class BoltOn(optimizer_v2.OptimizerV2):
Descent-based Analytics by Xi Wu et. al.
"""
def __init__(self, # pylint: disable=super-init-not-called
optimizer,
loss,
dtype=tf.float32,
):
def __init__(
self, # pylint: disable=super-init-not-called
optimizer,
loss,
dtype=tf.float32,
):
"""Constructor.
Args:
optimizer: Optimizer_v2 or subclass to be used as the optimizer
(wrapped).
optimizer: Optimizer_v2 or subclass to be used as the optimizer (wrapped).
loss: StrongConvexLoss function that the model is being compiled with.
dtype: dtype
"""
@ -155,8 +153,8 @@ class BoltOn(optimizer_v2.OptimizerV2):
"""Normalize the weights to the R-ball.
Args:
force: True to normalize regardless of previous weight values.
False to check if weights > R-ball and only normalize then.
force: True to normalize regardless of previous weight values. False to
check if weights > R-ball and only normalize then.
Raises:
Exception: If not called from inside this optimizer context.
@ -199,14 +197,14 @@ class BoltOn(optimizer_v2.OptimizerV2):
l2_sensitivity = (2 *
loss.lipchitz_constant(self.class_weights)) / \
(loss.gamma() * self.n_samples * self.batch_size)
unit_vector = tf.random.normal(shape=(input_dim, output_dim),
mean=0,
seed=1,
stddev=1.0,
dtype=self.dtype)
unit_vector = tf.random.normal(
shape=(input_dim, output_dim),
mean=0,
seed=1,
stddev=1.0,
dtype=self.dtype)
unit_vector = unit_vector / tf.math.sqrt(
tf.reduce_sum(tf.math.square(unit_vector), axis=0)
)
tf.reduce_sum(tf.math.square(unit_vector), axis=0))
beta = l2_sensitivity / per_class_epsilon
alpha = input_dim # input_dim
@ -214,8 +212,7 @@ class BoltOn(optimizer_v2.OptimizerV2):
alpha,
beta=1 / beta,
seed=1,
dtype=self.dtype
)
dtype=self.dtype)
return unit_vector * gamma
raise NotImplementedError('Noise distribution: {0} is not '
'a valid distribution'.format(distribution))
@ -245,10 +242,8 @@ class BoltOn(optimizer_v2.OptimizerV2):
except AttributeError:
raise AttributeError(
"Neither '{0}' nor '{1}' object has attribute '{2}'"
"".format(self.__class__.__name__,
self._internal_optimizer.__class__.__name__,
name)
)
''.format(self.__class__.__name__,
self._internal_optimizer.__class__.__name__, name))
def __setattr__(self, key, value):
"""Set attribute to self instance if its the internal optimizer.
@ -309,20 +304,15 @@ class BoltOn(optimizer_v2.OptimizerV2):
self._is_init = True
return self
def __call__(self,
noise_distribution,
epsilon,
layers,
class_weights,
n_samples,
batch_size):
def __call__(self, noise_distribution, epsilon, layers, class_weights,
n_samples, batch_size):
"""Accepts required values for bolton method from context entry point.
Stores them on the optimizer for use throughout fitting.
Args:
noise_distribution: the noise distribution to pick.
see _accepted_distributions and get_noise for possible values.
noise_distribution: the noise distribution to pick. see
_accepted_distributions and get_noise for possible values.
epsilon: privacy parameter. Lower gives more privacy but less utility.
layers: list of Keras/Tensorflow layers. Can be found as model.layers
class_weights: class_weights used, which may either be a scalar or 1D
@ -341,8 +331,8 @@ class BoltOn(optimizer_v2.OptimizerV2):
'distributions'.format(noise_distribution,
_accepted_distributions))
self.noise_distribution = noise_distribution
self.learning_rate.initialize(self.loss.beta(class_weights),
self.loss.gamma())
self.learning_rate.initialize(
self.loss.beta(class_weights), self.loss.gamma())
self.epsilon = tf.constant(epsilon, dtype=self.dtype)
self.class_weights = tf.constant(class_weights, dtype=self.dtype)
self.n_samples = tf.constant(n_samples, dtype=self.dtype)
@ -369,9 +359,10 @@ class BoltOn(optimizer_v2.OptimizerV2):
for layer in self.layers:
input_dim = layer.kernel.shape[0]
output_dim = layer.units
noise = self.get_noise(input_dim,
output_dim,
)
noise = self.get_noise(
input_dim,
output_dim,
)
layer.kernel = tf.math.add(layer.kernel, noise)
self.noise_distribution = None
self.learning_rate.de_initialize()

View file

@ -111,9 +111,7 @@ class TestLoss(losses.Loss, StrongConvexMixin):
def call(self, y_true, y_pred):
"""Loss function that is minimized at the mean of the input points."""
return 0.5 * tf.reduce_sum(
tf.math.squared_difference(y_true, y_pred),
axis=1
)
tf.math.squared_difference(y_true, y_pred), axis=1)
def max_class_weight(self, class_weight, dtype=tf.float32):
"""the maximum weighting in class weights (max value) as a scalar tensor.
@ -183,20 +181,24 @@ class TestOptimizer(OptimizerV2):
class BoltonOptimizerTest(keras_parameterized.TestCase):
"""BoltOn Optimizer tests."""
@test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([
{'testcase_name': 'getattr',
'fn': '__getattr__',
'args': ['dtype'],
'result': tf.float32,
'test_attr': None},
{'testcase_name': 'project_weights_to_r',
'fn': 'project_weights_to_r',
'args': ['dtype'],
'result': None,
'test_attr': ''},
{
'testcase_name': 'getattr',
'fn': '__getattr__',
'args': ['dtype'],
'result': tf.float32,
'test_attr': None
},
{
'testcase_name': 'project_weights_to_r',
'fn': 'project_weights_to_r',
'args': ['dtype'],
'result': None,
'test_attr': ''
},
])
def test_fn(self, fn, args, result, test_attr):
"""test that a fn of BoltOn optimizer is working as expected.
@ -204,9 +206,8 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
fn: method of Optimizer to test
args: args to optimizer fn
result: the expected result
test_attr: None if the fn returns the test result. Otherwise, this is
the attribute of BoltOn to check against result with.
test_attr: None if the fn returns the test result. Otherwise, this is the
attribute of BoltOn to check against result with.
"""
tf.random.set_seed(1)
loss = TestLoss(1, 1, 1)
@ -231,30 +232,38 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
@test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([
{'testcase_name': '1 value project to r=1',
'r': 1,
'init_value': 2,
'shape': (1,),
'n_out': 1,
'result': [[1]]},
{'testcase_name': '2 value project to r=1',
'r': 1,
'init_value': 2,
'shape': (2,),
'n_out': 1,
'result': [[0.707107], [0.707107]]},
{'testcase_name': '1 value project to r=2',
'r': 2,
'init_value': 3,
'shape': (1,),
'n_out': 1,
'result': [[2]]},
{'testcase_name': 'no project',
'r': 2,
'init_value': 1,
'shape': (1,),
'n_out': 1,
'result': [[1]]},
{
'testcase_name': '1 value project to r=1',
'r': 1,
'init_value': 2,
'shape': (1,),
'n_out': 1,
'result': [[1]]
},
{
'testcase_name': '2 value project to r=1',
'r': 1,
'init_value': 2,
'shape': (2,),
'n_out': 1,
'result': [[0.707107], [0.707107]]
},
{
'testcase_name': '1 value project to r=2',
'r': 2,
'init_value': 3,
'shape': (1,),
'n_out': 1,
'result': [[2]]
},
{
'testcase_name': 'no project',
'r': 2,
'init_value': 1,
'shape': (1,),
'n_out': 1,
'result': [[1]]
},
])
def test_project(self, r, shape, n_out, init_value, result):
"""test that a fn of BoltOn optimizer is working as expected.
@ -267,6 +276,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
result: the expected output after projection.
"""
tf.random.set_seed(1)
def project_fn(r):
loss = TestLoss(1, 1, r)
bolton = opt.BoltOn(TestOptimizer(), loss)
@ -283,15 +293,18 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
bolton.n_samples = 1
bolton.project_weights_to_r()
return _ops.convert_to_tensor_v2(bolton.layers[0].kernel, tf.float32)
res = project_fn(r)
self.assertAllClose(res, result)
@test_util.run_all_in_graph_and_eager_modes
@parameterized.named_parameters([
{'testcase_name': 'normal values',
'epsilon': 2,
'noise': 'laplace',
'class_weights': 1},
{
'testcase_name': 'normal values',
'epsilon': 2,
'noise': 'laplace',
'class_weights': 1
},
])
def test_context_manager(self, noise, epsilon, class_weights):
"""Tests the context manager functionality of the optimizer.
@ -301,6 +314,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
epsilon: epsilon privacy parameter to use
class_weights: class_weights to use
"""
@tf.function
def test_run():
loss = TestLoss(1, 1, 1)
@ -313,18 +327,23 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
with bolton(noise, epsilon, model.layers, class_weights, 1, 1) as _:
pass
return _ops.convert_to_tensor_v2(bolton.epsilon, dtype=tf.float32)
epsilon = test_run()
self.assertEqual(epsilon.numpy(), -1)
@parameterized.named_parameters([
{'testcase_name': 'invalid noise',
'epsilon': 1,
'noise': 'not_valid',
'err_msg': 'Detected noise distribution: not_valid not one of:'},
{'testcase_name': 'invalid epsilon',
'epsilon': -1,
'noise': 'laplace',
'err_msg': 'Detected epsilon: -1. Valid range is 0 < epsilon <inf'},
{
'testcase_name': 'invalid noise',
'epsilon': 1,
'noise': 'not_valid',
'err_msg': 'Detected noise distribution: not_valid not one of:'
},
{
'testcase_name': 'invalid epsilon',
'epsilon': -1,
'noise': 'laplace',
'err_msg': 'Detected epsilon: -1. Valid range is 0 < epsilon <inf'
},
])
def test_context_domains(self, noise, epsilon, err_msg):
"""Tests the context domains.
@ -333,7 +352,6 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
noise: noise distribution to pick
epsilon: epsilon privacy parameter to use
err_msg: the expected error message
"""
@tf.function
@ -347,15 +365,18 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
model.n_outputs))
with bolton(noise, epsilon, model.layers, 1, 1, 1) as _:
pass
with self.assertRaisesRegexp(ValueError, err_msg): # pylint: disable=deprecated-method
test_run(noise, epsilon)
@parameterized.named_parameters([
{'testcase_name': 'fn: get_noise',
'fn': 'get_noise',
'args': [1, 1],
'err_msg': 'This method must be called from within the '
'optimizer\'s context'},
{
'testcase_name': 'fn: get_noise',
'fn': 'get_noise',
'args': [1, 1],
'err_msg': 'This method must be called from within the '
'optimizer\'s context'
},
])
def test_not_in_context(self, fn, args, err_msg):
"""Tests that the expected functions raise errors when not in context.
@ -365,6 +386,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
args: the arguments for said function
err_msg: expected error message
"""
def test_run(fn, args):
loss = TestLoss(1, 1, 1)
bolton = opt.BoltOn(TestOptimizer(), loss)
@ -379,33 +401,51 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
test_run(fn, args)
@parameterized.named_parameters([
{'testcase_name': 'fn: get_updates',
'fn': 'get_updates',
'args': [0, 0]},
{'testcase_name': 'fn: get_config',
'fn': 'get_config',
'args': []},
{'testcase_name': 'fn: from_config',
'fn': 'from_config',
'args': [0]},
{'testcase_name': 'fn: _resource_apply_dense',
'fn': '_resource_apply_dense',
'args': [1, 1]},
{'testcase_name': 'fn: _resource_apply_sparse',
'fn': '_resource_apply_sparse',
'args': [1, 1, 1]},
{'testcase_name': 'fn: apply_gradients',
'fn': 'apply_gradients',
'args': [1]},
{'testcase_name': 'fn: minimize',
'fn': 'minimize',
'args': [1, 1]},
{'testcase_name': 'fn: _compute_gradients',
'fn': '_compute_gradients',
'args': [1, 1]},
{'testcase_name': 'fn: get_gradients',
'fn': 'get_gradients',
'args': [1, 1]},
{
'testcase_name': 'fn: get_updates',
'fn': 'get_updates',
'args': [0, 0]
},
{
'testcase_name': 'fn: get_config',
'fn': 'get_config',
'args': []
},
{
'testcase_name': 'fn: from_config',
'fn': 'from_config',
'args': [0]
},
{
'testcase_name': 'fn: _resource_apply_dense',
'fn': '_resource_apply_dense',
'args': [1, 1]
},
{
'testcase_name': 'fn: _resource_apply_sparse',
'fn': '_resource_apply_sparse',
'args': [1, 1, 1]
},
{
'testcase_name': 'fn: apply_gradients',
'fn': 'apply_gradients',
'args': [1]
},
{
'testcase_name': 'fn: minimize',
'fn': 'minimize',
'args': [1, 1]
},
{
'testcase_name': 'fn: _compute_gradients',
'fn': '_compute_gradients',
'args': [1, 1]
},
{
'testcase_name': 'fn: get_gradients',
'fn': 'get_gradients',
'args': [1, 1]
},
])
def test_rerouted_function(self, fn, args):
"""Tests rerouted function.
@ -435,18 +475,19 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
bolton.noise_distribution = 'laplace'
bolton.n_outputs = 1
bolton.n_samples = 1
self.assertEqual(
getattr(bolton, fn, lambda: 'fn not found')(*args),
'test'
)
self.assertEqual(getattr(bolton, fn, lambda: 'fn not found')(*args), 'test')
@parameterized.named_parameters([
{'testcase_name': 'fn: project_weights_to_r',
'fn': 'project_weights_to_r',
'args': []},
{'testcase_name': 'fn: get_noise',
'fn': 'get_noise',
'args': [1, 1]},
{
'testcase_name': 'fn: project_weights_to_r',
'fn': 'project_weights_to_r',
'args': []
},
{
'testcase_name': 'fn: get_noise',
'fn': 'get_noise',
'args': [1, 1]
},
])
def test_not_reroute_fn(self, fn, args):
"""Test function is not rerouted.
@ -458,6 +499,7 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
fn: fn to test
args: arguments to that fn
"""
def test_run(fn, args):
loss = TestLoss(1, 1, 1)
bolton = opt.BoltOn(TestOptimizer(), loss)
@ -480,12 +522,13 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
else:
res = 0
return _ops.convert_to_tensor_v2(res, dtype=tf.float32)
self.assertNotEqual(test_run(fn, args), 0)
@parameterized.named_parameters([
{'testcase_name': 'attr: _iterations',
'attr': '_iterations'}
])
@parameterized.named_parameters([{
'testcase_name': 'attr: _iterations',
'attr': '_iterations'
}])
def test_reroute_attr(self, attr):
"""Test a function is rerouted.
@ -498,13 +541,13 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
loss = TestLoss(1, 1, 1)
internal_optimizer = TestOptimizer()
optimizer = opt.BoltOn(internal_optimizer, loss)
self.assertEqual(getattr(optimizer, attr),
getattr(internal_optimizer, attr))
self.assertEqual(
getattr(optimizer, attr), getattr(internal_optimizer, attr))
@parameterized.named_parameters([
{'testcase_name': 'attr does not exist',
'attr': '_not_valid'}
])
@parameterized.named_parameters([{
'testcase_name': 'attr does not exist',
'attr': '_not_valid'
}])
def test_attribute_error(self, attr):
"""Test rerouting of attributes.
@ -524,12 +567,11 @@ class BoltonOptimizerTest(keras_parameterized.TestCase):
class SchedulerTest(keras_parameterized.TestCase):
"""GammaBeta Scheduler tests."""
@parameterized.named_parameters([
{'testcase_name': 'not in context',
'err_msg': 'Please initialize the GammaBetaDecreasingStep Learning Rate'
' Scheduler'
}
])
@parameterized.named_parameters([{
'testcase_name': 'not in context',
'err_msg': 'Please initialize the GammaBetaDecreasingStep Learning Rate'
' Scheduler'
}])
def test_bad_call(self, err_msg):
"""Test attribute of internal opt correctly rerouted to the internal opt.
@ -541,15 +583,21 @@ class SchedulerTest(keras_parameterized.TestCase):
scheduler(1)
@parameterized.named_parameters([
{'testcase_name': 'step 1',
'step': 1,
'res': 0.5},
{'testcase_name': 'step 2',
'step': 2,
'res': 0.5},
{'testcase_name': 'step 3',
'step': 3,
'res': 0.333333333},
{
'testcase_name': 'step 1',
'step': 1,
'res': 0.5
},
{
'testcase_name': 'step 2',
'step': 2,
'res': 0.5
},
{
'testcase_name': 'step 3',
'step': 3,
'res': 0.333333333
},
])
def test_call(self, step, res):
"""Test call.

View file

@ -13,10 +13,8 @@
# limitations under the License.
from absl.testing import parameterized
import numpy as np
import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.dp_query import quantile_estimator_query
from tensorflow_privacy.privacy.dp_query import test_utils
@ -44,10 +42,7 @@ def _make_quantile_estimator_query(initial_estimate,
raise ValueError(
'Cannot set expected_num_records to None for tree aggregation.')
return quantile_estimator_query.NoPrivacyQuantileEstimatorQuery(
initial_estimate,
target_quantile,
learning_rate,
geometric_update)
initial_estimate, target_quantile, learning_rate, geometric_update)
class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
@ -109,7 +104,7 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
# to 4 / sqrt(2.0). Still only one record is below, so it reduces to 2.0.
# Now no records are below, and the estimate norm stays there (at 2.0).
four_div_root_two = 4 / np.sqrt(2.0) # approx 2.828
four_div_root_two = 4 / np.sqrt(2.0) # approx 2.828
expected_estimates = [8.0, 4.0, four_div_root_two, 2.0, 2.0]
for expected_estimate in expected_estimates:
@ -175,7 +170,7 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
# is multiplied by sqrt(2.0). Still only one is above so it increases to
# 4.0. Now both records are above, and the estimate stays there (at 4.0).
two_times_root_two = 2 * np.sqrt(2.0) # approx 2.828
two_times_root_two = 2 * np.sqrt(2.0) # approx 2.828
expected_estimates = [1.0, 2.0, two_times_root_two, 4.0, 4.0]
for expected_estimate in expected_estimates:
@ -201,8 +196,10 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
# 100 records equally spaced from 0 to 10 in 0.1 increments.
# Test that we converge to the correct median value and bounce around it.
num_records = 21
records = [tf.constant(x) for x in np.linspace(
0.0, 10.0, num=num_records, dtype=np.float32)]
records = [
tf.constant(x)
for x in np.linspace(0.0, 10.0, num=num_records, dtype=np.float32)
]
query = _make_quantile_estimator_query(
initial_estimate=(1.0 if start_low else 10.0),
@ -267,9 +264,7 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
def test_raises_with_non_scalar_record(self):
query = quantile_estimator_query.NoPrivacyQuantileEstimatorQuery(
initial_estimate=1.0,
target_quantile=0.5,
learning_rate=1.0)
initial_estimate=1.0, target_quantile=0.5, learning_rate=1.0)
with self.assertRaisesRegex(ValueError, 'scalar'):
query.accumulate_record(None, None, [1.0, 2.0])

View file

@ -28,7 +28,6 @@ from typing import Any, Callable, Collection, Optional, Tuple, Union
import attr
import tensorflow as tf
# TODO(b/192464750): find a proper place for the helper functions, privatize
# the tree aggregation logic, and encourage users to use the DPQuery API.

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for `tree_aggregation_query`."""
from absl.testing import parameterized
import numpy as np
@ -212,11 +211,11 @@ class TreeCumulativeSumQueryTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
('two_records_noise_fn', [2.71828, 3.14159], _get_noise_fn),
('five_records_noise_fn', np.random.uniform(low=0.1, size=5).tolist(),
_get_noise_fn),
('five_records_noise_fn', np.random.uniform(
low=0.1, size=5).tolist(), _get_noise_fn),
('two_records_generator', [2.71828, 3.14159], _get_noise_generator),
('five_records_generator', np.random.uniform(low=0.1, size=5).tolist(),
_get_noise_generator),
('five_records_generator', np.random.uniform(
low=0.1, size=5).tolist(), _get_noise_generator),
)
def test_noisy_cumsum_and_state_update(self, records, value_generator):
num_trials, vector_size = 10, 100

View file

@ -63,5 +63,6 @@ class DPDNNClassifierTest(tf.test.TestCase, parameterized.TestCase):
input_fn=test_utils.make_input_fn(predict_features, predict_labels,
False))
if __name__ == '__main__':
tf.test.main()

View file

@ -34,6 +34,7 @@ def make_input_data(size, classes):
np.power(feature_a, 3) + np.power(feature_b, 2) +
np.power(feature_c, 1) + noise > 125).astype(int)
else:
def label_fn(x):
if x < 110.0:
return 0
@ -42,10 +43,11 @@ def make_input_data(size, classes):
else:
return 2
labels = list(map(
label_fn,
np.power(feature_a, 3) + np.power(feature_b, 2) +
np.power(feature_c, 1) + noise))
labels = list(
map(
label_fn,
np.power(feature_a, 3) + np.power(feature_b, 2) +
np.power(feature_c, 1) + noise))
return features, labels
@ -87,6 +89,7 @@ def make_input_fn(features, labels, training, batch_size=16):
dataset = dataset.shuffle(1000)
return dataset.batch(batch_size)
return input_fn

View file

@ -64,5 +64,6 @@ class DPDNNClassifierTest(tf.test.TestCase, parameterized.TestCase):
input_fn=test_utils.make_input_fn(predict_features, predict_labels,
False))
if __name__ == '__main__':
tf.test.main()

View file

@ -434,8 +434,8 @@ def _binary_logistic_or_multi_class_head(n_classes, weight_column,
encoded as integer or float within [0, 1] for `n_classes=2` and encoded as
integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also there
will be errors if vocabulary is not provided and labels are string.
loss_reduction: Describes how to reduce training loss over batch.
Defaults to `SUM`.
loss_reduction: Describes how to reduce training loss over batch. Defaults
to `SUM`.
Returns:
`head._Head` instance.

View file

@ -53,9 +53,10 @@ def make_dp_model_class(cls):
model.fit(train_data, train_labels, epochs=1, batch_size=32)
```
""").format(base_model='tf.keras.' + cls.__name__,
short_base_model=cls.__name__,
dp_model_class='DP' + cls.__name__)
""").format(
base_model='tf.keras.' + cls.__name__,
short_base_model=cls.__name__,
dp_model_class='DP' + cls.__name__)
def __init__(
self,

View file

@ -40,8 +40,8 @@ class RegressionDataset:
"""Class for storing labeled examples for a regression dataset.
Attributes:
points: array of shape (num_examples, dimension) containing the points to
be classified.
points: array of shape (num_examples, dimension) containing the points to be
classified.
labels: array of shape (num_examples,) containing the corresponding labels,
each belonging to the set {0,1,...,num_classes-1}, where num_classes is
the number of classes.
@ -51,7 +51,7 @@ class RegressionDataset:
def linearly_separable_labeled_examples(
num_examples: int, weights: np.ndarray)-> RegressionDataset:
num_examples: int, weights: np.ndarray) -> RegressionDataset:
"""Generates num_examples labeled examples using separator given by weights.
Args:
@ -75,7 +75,7 @@ def linearly_separable_labeled_examples(
def synthetic_linearly_separable_data(
num_train: int, num_test: int, dimension: int,
num_classes: int)-> Tuple[RegressionDataset, RegressionDataset]:
num_classes: int) -> Tuple[RegressionDataset, RegressionDataset]:
"""Generates synthetic train and test data for logistic regression.
Args:
@ -103,7 +103,7 @@ def synthetic_linearly_separable_data(
return (train_dataset, test_dataset)
def mnist_dataset()-> Tuple[RegressionDataset, RegressionDataset]:
def mnist_dataset() -> Tuple[RegressionDataset, RegressionDataset]:
"""Generates (normalized) train and test data for MNIST.
Returns:

View file

@ -11,9 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for tensorflow_privacy.privacy.logistic_regression.datasets."""
import unittest
from absl.testing import parameterized
import numpy as np
from tensorflow_privacy.privacy.logistic_regression import datasets
@ -22,14 +22,16 @@ from tensorflow_privacy.privacy.logistic_regression import datasets
class DatasetsTest(parameterized.TestCase):
@parameterized.parameters(
(1, np.array([[1],])),
(2, np.array([[1],])),
(5, np.array([[-1, 1], [1, -1]])),
(1, np.array([
[1],
])), (2, np.array([
[1],
])), (5, np.array([[-1, 1], [1, -1]])),
(15, np.array([[-1, 1.5, 2.1], [1.3, -3.3, -7.1], [1.3, -3.3, -7.1]])))
def test_linearly_separable_labeled_examples(self, num_examples, weights):
dimension, num_classes = weights.shape
dataset = datasets.linearly_separable_labeled_examples(num_examples,
weights)
dataset = datasets.linearly_separable_labeled_examples(
num_examples, weights)
self.assertEqual(dataset.points.shape, (num_examples, dimension))
self.assertEqual(dataset.labels.shape, (num_examples,))
product = np.matmul(dataset.points, weights)
@ -37,11 +39,8 @@ class DatasetsTest(parameterized.TestCase):
for j in range(num_classes):
self.assertGreaterEqual(product[i, dataset.labels[i]], product[i, j])
@parameterized.parameters(
(1, 1, 1, 2),
(20, 5, 1, 2),
(20, 5, 2, 2),
(1000, 10, 15, 10))
@parameterized.parameters((1, 1, 1, 2), (20, 5, 1, 2), (20, 5, 2, 2),
(1000, 10, 15, 10))
def test_synthetic(self, num_train, num_test, dimension, num_classes):
(train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data(
num_train, num_test, dimension, num_classes)
@ -73,5 +72,6 @@ class DatasetsTest(parameterized.TestCase):
self.assertTrue(np.all(np.isin(train_dataset.labels, range(10))))
self.assertTrue(np.all(np.isin(test_dataset.labels, range(10))))
if __name__ == '__main__':
unittest.main()

View file

@ -52,21 +52,26 @@ class KiferRegularizer(tf.keras.regularizers.Regularizer):
(self._l2_regularizer,
variance) = self.logistic_objective_perturbation_parameters(
num_train, epsilon, delta, num_classes, input_clipping_norm)
self._b = tf.random.normal(shape=[dimension, num_classes], mean=0.0,
stddev=math.sqrt(variance),
dtype=tf.dtypes.float32)
self._b = tf.random.normal(
shape=[dimension, num_classes],
mean=0.0,
stddev=math.sqrt(variance),
dtype=tf.dtypes.float32)
def __call__(self, x):
return (tf.reduce_sum(self._l2_regularizer*tf.square(x)) +
(1/self._num_train)*tf.reduce_sum(tf.multiply(x, self._b)))
return (tf.reduce_sum(self._l2_regularizer * tf.square(x)) +
(1 / self._num_train) * tf.reduce_sum(tf.multiply(x, self._b)))
def get_config(self):
return {'l2_regularizer': self._l2_regularizer,
'num_train': self._num_train, 'b': self._b}
return {
'l2_regularizer': self._l2_regularizer,
'num_train': self._num_train,
'b': self._b
}
def logistic_objective_perturbation_parameters(
self, num_train: int, epsilon: float, delta: float, num_classes: int,
input_clipping_norm: float)-> Tuple[float, float]:
input_clipping_norm: float) -> Tuple[float, float]:
"""Computes l2-regularization coefficient and Gaussian noise variance.
The setting is based on Algorithm 1 of Kifer et al.
@ -85,19 +90,21 @@ class KiferRegularizer(tf.keras.regularizers.Regularizer):
# zeta is an upper bound on the l2-norm of the loss function gradient.
zeta = input_clipping_norm
# variance is based on line 5 from Algorithm 1 of Kifer et al. (page 6):
variance = zeta*zeta*(8*np.log(2/delta)+4*epsilon)/(epsilon*epsilon)
variance = zeta * zeta * (8 * np.log(2 / delta) + 4 * epsilon) / (
epsilon * epsilon)
# lambda_coefficient is an upper bound on the spectral norm of the Hessian
# of the loss function.
lambda_coefficient = math.sqrt(2*num_classes)*(input_clipping_norm**2)/4
l2_regularizer = lambda_coefficient/(epsilon*num_train)
lambda_coefficient = math.sqrt(2 * num_classes) * (input_clipping_norm**
2) / 4
l2_regularizer = lambda_coefficient / (epsilon * num_train)
return (l2_regularizer, variance)
def logistic_objective_perturbation(train_dataset: datasets.RegressionDataset,
test_dataset: datasets.RegressionDataset,
epsilon: float, delta: float,
epochs: int, num_classes: int,
input_clipping_norm: float)-> List[float]:
epsilon: float, delta: float, epochs: int,
num_classes: int,
input_clipping_norm: float) -> List[float]:
"""Trains and validates differentially private logistic regression model.
The training is based on the Algorithm 1 of Kifer et al.
@ -127,13 +134,21 @@ def logistic_objective_perturbation(train_dataset: datasets.RegressionDataset,
kernel_regularizer = KiferRegularizer(num_train, dimension, epsilon, delta,
num_classes, input_clipping_norm)
return single_layer_softmax.single_layer_softmax_classifier(
train_dataset, test_dataset, epochs, num_classes, optimizer, loss,
train_dataset,
test_dataset,
epochs,
num_classes,
optimizer,
loss,
kernel_regularizer=kernel_regularizer)
def compute_dpsgd_noise_multiplier(
num_train: int, epsilon: float, delta: float, epochs: int,
batch_size: int, tolerance: float = 1e-2) -> Optional[float]:
def compute_dpsgd_noise_multiplier(num_train: int,
epsilon: float,
delta: float,
epochs: int,
batch_size: int,
tolerance: float = 1e-2) -> Optional[float]:
"""Computes the noise multiplier for DP-SGD given privacy parameters.
The algorithm performs binary search on the values of epsilon.
@ -153,20 +168,17 @@ def compute_dpsgd_noise_multiplier(
the given tolerance) for which using DPKerasAdamOptimizer will result in an
(epsilon, delta)-differentially private trained model.
"""
search_parameters = common.BinarySearchParameters(lower_bound=0,
upper_bound=math.inf,
initial_guess=1,
tolerance=tolerance)
search_parameters = common.BinarySearchParameters(
lower_bound=0, upper_bound=math.inf, initial_guess=1, tolerance=tolerance)
return common.inverse_monotone_function(
lambda x: compute_epsilon(num_train, batch_size, x, epochs, delta)[0],
epsilon, search_parameters)
def logistic_dpsgd(train_dataset: datasets.RegressionDataset,
test_dataset: datasets.RegressionDataset,
epsilon: float, delta: float, epochs: int, num_classes: int,
batch_size: int, num_microbatches: int,
clipping_norm: float)-> List[float]:
test_dataset: datasets.RegressionDataset, epsilon: float,
delta: float, epochs: int, num_classes: int, batch_size: int,
num_microbatches: int, clipping_norm: float) -> List[float]:
"""Trains and validates private logistic regression model via DP-SGD.
The training is based on the differentially private stochasstic gradient
@ -183,8 +195,8 @@ def logistic_dpsgd(train_dataset: datasets.RegressionDataset,
num_classes: number of classes.
batch_size: the number of examples in each batch of gradient descent.
num_microbatches: the number of microbatches in gradient descent.
clipping_norm: the gradients will be normalized by DPKerasAdamOptimizer
to have l2-norm at most clipping_norm.
clipping_norm: the gradients will be normalized by DPKerasAdamOptimizer to
have l2-norm at most clipping_norm.
Returns:
List of test accuracies (one for each epoch) on test_dataset of model
@ -199,7 +211,8 @@ def logistic_dpsgd(train_dataset: datasets.RegressionDataset,
noise_multiplier = compute_dpsgd_noise_multiplier(num_train, epsilon, delta,
epochs, batch_size)
optimizer = dp_optimizer_keras.DPKerasAdamOptimizer(
l2_norm_clip=clipping_norm, noise_multiplier=noise_multiplier,
l2_norm_clip=clipping_norm,
noise_multiplier=noise_multiplier,
num_microbatches=num_microbatches)
loss = tf.keras.losses.CategoricalCrossentropy(
reduction=tf.losses.Reduction.NONE)

View file

@ -27,7 +27,7 @@ class MultinomialLogisticRegressionTest(parameterized.TestCase):
(5000, 500, 4, 1, 1e-5, 40, 2, 0.05),
(10000, 1000, 3, 1, 1e-5, 40, 4, 0.1),
(10000, 1000, 4, 1, 1e-5, 40, 4, 0.1),
)
)
def test_logistic_objective_perturbation(self, num_train, num_test, dimension,
epsilon, delta, epochs, num_classes,
tolerance):
@ -44,7 +44,7 @@ class MultinomialLogisticRegressionTest(parameterized.TestCase):
(1, 1, 1e-5, 40, 1, 1e-2),
(500, 0.1, 1e-5, 40, 50, 1e-2),
(5000, 10, 1e-5, 40, 10, 1e-3),
)
)
def test_compute_dpsgd_noise_multiplier(self, num_train, epsilon, delta,
epochs, batch_size, tolerance):
noise_multiplier = multinomial_logistic.compute_dpsgd_noise_multiplier(
@ -61,19 +61,22 @@ class MultinomialLogisticRegressionTest(parameterized.TestCase):
(5000, 500, 4, 1, 1e-5, 40, 2, 0.05, 10, 10, 1),
(5000, 500, 3, 2, 1e-4, 40, 4, 0.1, 10, 10, 1),
(5000, 500, 4, 2, 1e-4, 40, 4, 0.1, 10, 10, 1),
)
def test_logistic_dpsgd(self, num_train, num_test, dimension, epsilon,
delta, epochs, num_classes, tolerance,
batch_size, num_microbatches, clipping_norm):
)
def test_logistic_dpsgd(self, num_train, num_test, dimension, epsilon, delta,
epochs, num_classes, tolerance, batch_size,
num_microbatches, clipping_norm):
(train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data(
num_train, num_test, dimension, num_classes)
accuracy = multinomial_logistic.logistic_dpsgd(
train_dataset, test_dataset, epsilon, delta, epochs, num_classes,
batch_size, num_microbatches, clipping_norm)
accuracy = multinomial_logistic.logistic_dpsgd(train_dataset, test_dataset,
epsilon, delta, epochs,
num_classes, batch_size,
num_microbatches,
clipping_norm)
# Since the synthetic data is linearly separable, we expect the test
# accuracy to come arbitrarily close to 1 as the number of training examples
# grows.
self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance)
if __name__ == '__main__':
unittest.main()

View file

@ -11,8 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of a single-layer softmax classifier.
"""
"""Implementation of a single-layer softmax classifier."""
from typing import List
import tensorflow as tf
@ -22,10 +21,13 @@ from tensorflow_privacy.privacy.logistic_regression import datasets
def single_layer_softmax_classifier(
train_dataset: datasets.RegressionDataset,
test_dataset: datasets.RegressionDataset,
epochs: int, num_classes: int, optimizer: tf.keras.optimizers.Optimizer,
epochs: int,
num_classes: int,
optimizer: tf.keras.optimizers.Optimizer,
loss: tf.keras.losses.Loss = 'categorical_crossentropy',
batch_size: int = 32,
kernel_regularizer: tf.keras.regularizers.Regularizer = None)-> List[float]:
kernel_regularizer: tf.keras.regularizers.Regularizer = None
) -> List[float]:
"""Trains a single layer neural network classifier with softmax activation.
Args:
@ -47,13 +49,17 @@ def single_layer_softmax_classifier(
one_hot_train_labels = tf.one_hot(train_dataset.labels, num_classes)
one_hot_test_labels = tf.one_hot(test_dataset.labels, num_classes)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(units=num_classes,
activation='softmax',
kernel_regularizer=kernel_regularizer))
model.add(
tf.keras.layers.Dense(
units=num_classes,
activation='softmax',
kernel_regularizer=kernel_regularizer))
model.compile(optimizer, loss=loss, metrics=['accuracy'])
history = model.fit(train_dataset.points, one_hot_train_labels,
batch_size=batch_size, epochs=epochs,
validation_data=(test_dataset.points,
one_hot_test_labels),
verbose=0)
history = model.fit(
train_dataset.points,
one_hot_train_labels,
batch_size=batch_size,
epochs=epochs,
validation_data=(test_dataset.points, one_hot_test_labels),
verbose=0)
return history.history['val_accuracy']

View file

@ -11,9 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for tensorflow_privacy.privacy.logistic_regression.single_layer_softmax."""
import unittest
from absl.testing import parameterized
from tensorflow_privacy.privacy.logistic_regression import datasets
from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax
@ -26,7 +26,7 @@ class SingleLayerSoftmaxTest(parameterized.TestCase):
(5000, 500, 4, 40, 2, 0.05),
(10000, 1000, 3, 40, 4, 0.1),
(10000, 1000, 4, 40, 4, 0.1),
)
)
def test_single_layer_softmax(self, num_train, num_test, dimension, epochs,
num_classes, tolerance):
(train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data(
@ -35,5 +35,6 @@ class SingleLayerSoftmaxTest(parameterized.TestCase):
train_dataset, test_dataset, epochs, num_classes, 'sgd')
self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance)
if __name__ == '__main__':
unittest.main()

View file

@ -138,12 +138,12 @@ def make_keras_optimizer_class(cls):
l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients).
noise_multiplier: Ratio of the standard deviation to the clipping norm.
num_microbatches: Number of microbatches into which each minibatch is
split. Default is `None` which means that number of microbatches
is equal to batch size (i.e. each microbatch contains exactly one
split. Default is `None` which means that number of microbatches is
equal to batch size (i.e. each microbatch contains exactly one
example). If `gradient_accumulation_steps` is greater than 1 and
`num_microbatches` is not `None` then the effective number of
microbatches is equal to
`num_microbatches * gradient_accumulation_steps`.
microbatches is equal to `num_microbatches *
gradient_accumulation_steps`.
gradient_accumulation_steps: If greater than 1 then optimizer will be
accumulating gradients for this number of optimizer steps before
applying them to update model weights. If this argument is set to 1
@ -172,39 +172,39 @@ def make_keras_optimizer_class(cls):
if self.gradient_accumulation_steps > 1:
apply_update = tf.math.equal(
tf.math.floormod(self.iterations + 1,
self.gradient_accumulation_steps),
0)
self.gradient_accumulation_steps), 0)
grad_scaler = tf.cast(1. / self.gradient_accumulation_steps, var_dtype)
apply_state[(var_device, var_dtype)].update(
{
'apply_update': apply_update,
'grad_scaler': grad_scaler
})
apply_state[(var_device, var_dtype)].update({
'apply_update': apply_update,
'grad_scaler': grad_scaler
})
def _resource_apply_dense(self, grad, var, apply_state=None):
if self.gradient_accumulation_steps > 1:
var_device, var_dtype = var.device, var.dtype.base_dtype
coefficients = ((apply_state or {}).get((var_device, var_dtype))
or self._fallback_apply_state(var_device, var_dtype))
coefficients = ((apply_state or {}).get((var_device, var_dtype)) or
self._fallback_apply_state(var_device, var_dtype))
grad_acc = self.get_slot(var, 'grad_acc')
def _update_grad():
apply_grad_op = super(DPOptimizerClass, self)._resource_apply_dense(
grad_acc + grad * coefficients['grad_scaler'], var, apply_state)
with tf.control_dependencies([apply_grad_op]):
return grad_acc.assign(tf.zeros_like(grad_acc),
use_locking=self._use_locking,
read_value=False)
return grad_acc.assign(
tf.zeros_like(grad_acc),
use_locking=self._use_locking,
read_value=False)
def _accumulate():
return grad_acc.assign_add(grad * coefficients['grad_scaler'],
use_locking=self._use_locking,
read_value=False)
return grad_acc.assign_add(
grad * coefficients['grad_scaler'],
use_locking=self._use_locking,
read_value=False)
return tf.cond(coefficients['apply_update'], _update_grad, _accumulate)
else:
return super(DPOptimizerClass, self)._resource_apply_dense(
grad, var, apply_state)
return super(DPOptimizerClass,
self)._resource_apply_dense(grad, var, apply_state)
def _resource_apply_sparse_duplicate_indices(self, *args, **kwargs):
if self.gradient_accumulation_steps > 1:
@ -220,8 +220,8 @@ def make_keras_optimizer_class(cls):
raise NotImplementedError(
'Sparse gradients are not supported with large batch emulation.')
else:
return super(DPOptimizerClass, self)._resource_apply_sparse(
*args, **kwargs)
return super(DPOptimizerClass,
self)._resource_apply_sparse(*args, **kwargs)
def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None):
"""DP-SGD version of base class method."""

View file

@ -15,7 +15,6 @@
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras
from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras_vectorized
@ -108,8 +107,7 @@ class DPOptimizerComputeGradientsTest(tf.test.TestCase, parameterized.TestCase):
with tape:
loss = self._loss(data0, var0) + self._loss(data1, var1)
grads_and_vars = opt._compute_gradients(
loss, [var0, var1], tape=tape)
grads_and_vars = opt._compute_gradients(loss, [var0, var1], tape=tape)
self.assertAllCloseAccordingToType(expected_grad0, grads_and_vars[0][0])
self.assertAllCloseAccordingToType(expected_grad1, grads_and_vars[1][0])
@ -442,10 +440,9 @@ class DPOptimizerGetGradientsTest(tf.test.TestCase, parameterized.TestCase):
('DPKerasSGDOptimizer 1', dp_optimizer_keras.DPKerasSGDOptimizer, 1),
('DPKerasSGDOptimizer 2', dp_optimizer_keras.DPKerasSGDOptimizer, 2),
('DPKerasSGDOptimizer 4', dp_optimizer_keras.DPKerasSGDOptimizer, 4),
('DPKerasAdamOptimizer 2',
dp_optimizer_keras.DPKerasAdamOptimizer, 1),
('DPKerasAdagradOptimizer 2',
dp_optimizer_keras.DPKerasAdagradOptimizer, 2),
('DPKerasAdamOptimizer 2', dp_optimizer_keras.DPKerasAdamOptimizer, 1),
('DPKerasAdagradOptimizer 2', dp_optimizer_keras.DPKerasAdagradOptimizer,
2),
)
def testLargeBatchEmulation(self, cls, gradient_accumulation_steps):
# Tests various optimizers with large batch emulation.

View file

@ -95,10 +95,11 @@ def make_vectorized_keras_optimizer_class(cls):
model.fit(...)
```
""".format(base_class='tf.keras.optimizers.' + cls.__name__,
dp_keras_class='DPKeras' + cls.__name__,
short_base_class=cls.__name__,
dp_vectorized_keras_class='VectorizedDPKeras' + cls.__name__)
""".format(
base_class='tf.keras.optimizers.' + cls.__name__,
dp_keras_class='DPKeras' + cls.__name__,
short_base_class=cls.__name__,
dp_vectorized_keras_class='VectorizedDPKeras' + cls.__name__)
def __init__(
self,
@ -112,8 +113,8 @@ def make_vectorized_keras_optimizer_class(cls):
Args:
l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients).
noise_multiplier: Ratio of the standard deviation to the clipping norm.
num_microbatches: Number of microbatches into which each minibatch
is split.
num_microbatches: Number of microbatches into which each minibatch is
split.
*args: These will be passed on to the base class `__init__` method.
**kwargs: These will be passed on to the base class `__init__` method.
"""

View file

@ -18,7 +18,6 @@ import unittest
from absl.testing import parameterized
import numpy as np
import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.dp_query import gaussian_query
from tensorflow_privacy.privacy.optimizers import dp_optimizer
@ -30,13 +29,14 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
return 0.5 * tf.reduce_sum(
input_tensor=tf.math.squared_difference(val0, val1), axis=1)
def _compute_expected_gradients(self, per_example_gradients,
l2_norm_clip, num_microbatches):
def _compute_expected_gradients(self, per_example_gradients, l2_norm_clip,
num_microbatches):
batch_size, num_vars = per_example_gradients.shape
microbatch_gradients = np.mean(
np.reshape(per_example_gradients,
[num_microbatches,
np.int(batch_size / num_microbatches), num_vars]),
np.reshape(
per_example_gradients,
[num_microbatches,
np.int(batch_size / num_microbatches), num_vars]),
axis=1)
microbatch_gradients_norms = np.linalg.norm(microbatch_gradients, axis=1)
@ -124,8 +124,8 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
l2_norm_clip = 1.0
dp_sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip, 0.0)
opt = cls(dp_sum_query, num_microbatches=num_microbatches,
learning_rate=2.0)
opt = cls(
dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0)
self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values

View file

@ -134,19 +134,15 @@ def make_vectorized_optimizer_class(cls):
if var_list is None:
var_list = (
tf.trainable_variables() + tf.get_collection(
tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
tf.trainable_variables() +
tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
def process_microbatch(microbatch_loss):
"""Compute clipped grads for one microbatch."""
microbatch_loss = tf.reduce_mean(input_tensor=microbatch_loss)
grads, _ = zip(*super(DPOptimizerClass, self).compute_gradients(
microbatch_loss,
var_list,
gate_gradients,
aggregation_method,
colocate_gradients_with_ops,
grad_loss))
microbatch_loss, var_list, gate_gradients, aggregation_method,
colocate_gradients_with_ops, grad_loss))
grads_list = [
g if g is not None else tf.zeros_like(v)
for (g, v) in zip(list(grads), var_list)

View file

@ -17,7 +17,6 @@ import unittest
from absl.testing import parameterized
import numpy as np
import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.optimizers import dp_optimizer_vectorized
from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagrad
from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdam
@ -63,19 +62,19 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
grads_and_vars = sess.run(gradient_op)
self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0])
@parameterized.named_parameters(
('DPGradientDescent', VectorizedDPSGD),
('DPAdagrad', VectorizedDPAdagrad),
('DPAdam', VectorizedDPAdam))
@parameterized.named_parameters(('DPGradientDescent', VectorizedDPSGD),
('DPAdagrad', VectorizedDPAdagrad),
('DPAdam', VectorizedDPAdam))
def testClippingNorm(self, cls):
with self.cached_session() as sess:
var0 = tf.Variable([0.0, 0.0])
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
opt = cls(l2_norm_clip=1.0,
noise_multiplier=0.,
num_microbatches=1,
learning_rate=2.0)
opt = cls(
l2_norm_clip=1.0,
noise_multiplier=0.,
num_microbatches=1,
learning_rate=2.0)
self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values
@ -86,19 +85,19 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
grads_and_vars = sess.run(gradient_op)
self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
@parameterized.named_parameters(
('DPGradientDescent', VectorizedDPSGD),
('DPAdagrad', VectorizedDPAdagrad),
('DPAdam', VectorizedDPAdam))
@parameterized.named_parameters(('DPGradientDescent', VectorizedDPSGD),
('DPAdagrad', VectorizedDPAdagrad),
('DPAdam', VectorizedDPAdam))
def testNoiseMultiplier(self, cls):
with self.cached_session() as sess:
var0 = tf.Variable([0.0])
data0 = tf.Variable([[0.0]])
opt = cls(l2_norm_clip=4.0,
noise_multiplier=8.0,
num_microbatches=1,
learning_rate=2.0)
opt = cls(
l2_norm_clip=4.0,
noise_multiplier=8.0,
num_microbatches=1,
learning_rate=2.0)
self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values
@ -168,10 +167,9 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
true_weights,
atol=1.0)
@parameterized.named_parameters(
('DPGradientDescent', VectorizedDPSGD),
('DPAdagrad', VectorizedDPAdagrad),
('DPAdam', VectorizedDPAdam))
@parameterized.named_parameters(('DPGradientDescent', VectorizedDPSGD),
('DPAdagrad', VectorizedDPAdagrad),
('DPAdam', VectorizedDPAdam))
def testDPGaussianOptimizerClass(self, cls):
with self.cached_session() as sess:
var0 = tf.Variable([0.0])

View file

@ -217,5 +217,6 @@ def main(unused_argv):
# For saving a figure into a file:
# plotting.save_plot(figure, <file_path>)
if __name__ == "__main__":
app.run(main)

View file

@ -482,8 +482,8 @@ class SingleAttackResult:
return '\n'.join([
'SingleAttackResult(',
' SliceSpec: %s' % str(self.slice_spec),
' DataSize: (ntrain=%d, ntest=%d)' % (self.data_size.ntrain,
self.data_size.ntest),
' DataSize: (ntrain=%d, ntest=%d)' %
(self.data_size.ntrain, self.data_size.ntest),
' AttackType: %s' % str(self.attack_type),
' AUC: %.2f' % self.get_auc(),
' Attacker advantage: %.2f' % self.get_attacker_advantage(), ')'
@ -684,10 +684,8 @@ class AttackResults:
summary.append('Best-performing attacks over all slices')
summary.append(
' %s (with %d training and %d test examples) achieved an AUC of %.2f on slice %s'
% (max_auc_result_all.attack_type,
max_auc_result_all.data_size.ntrain,
max_auc_result_all.data_size.ntest,
max_auc_result_all.get_auc(),
% (max_auc_result_all.attack_type, max_auc_result_all.data_size.ntrain,
max_auc_result_all.data_size.ntest, max_auc_result_all.get_auc(),
max_auc_result_all.slice_spec))
max_advantage_result_all = self.get_result_with_max_attacker_advantage()
@ -709,10 +707,8 @@ class AttackResults:
max_auc_result = results.get_result_with_max_auc()
summary.append(
' %s (with %d training and %d test examples) achieved an AUC of %.2f'
% (max_auc_result.attack_type,
max_auc_result.data_size.ntrain,
max_auc_result.data_size.ntest,
max_auc_result.get_auc()))
% (max_auc_result.attack_type, max_auc_result.data_size.ntrain,
max_auc_result.data_size.ntest, max_auc_result.get_auc()))
max_advantage_result = results.get_result_with_max_attacker_advantage()
summary.append(
' %s (with %d training and %d test examples) achieved an advantage of %.2f'
@ -816,6 +812,8 @@ def get_flattened_attack_metrics(results: AttackResults):
types += [str(attack_result.attack_type)] * 2
slices += [str(attack_result.slice_spec)] * 2
attack_metrics += ['adv', 'auc']
values += [float(attack_result.get_attacker_advantage()),
float(attack_result.get_auc())]
values += [
float(attack_result.get_attacker_advantage()),
float(attack_result.get_auc())
]
return types, slices, attack_metrics, values

View file

@ -54,7 +54,8 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback):
def __init__(
self,
in_train, out_train,
in_train,
out_train,
slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,),
tensorboard_dir=None,
@ -70,7 +71,7 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback):
attack_types: a list of attacks, each of type AttackType
tensorboard_dir: directory for tensorboard summary
tensorboard_merge_classifiers: if true, plot different classifiers with
the same slicing_spec and metric in the same figure
the same slicing_spec and metric in the same figure
is_logit: whether the result of model.predict is logit or probability
batch_size: the batch size for model.predict
"""
@ -96,19 +97,18 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs=None):
results = run_attack_on_keras_model(
self.model,
(self._in_train_data, self._in_train_labels),
(self._out_train_data, self._out_train_labels),
self._slicing_spec,
self._attack_types,
self._is_logit, self._batch_size)
self.model, (self._in_train_data, self._in_train_labels),
(self._out_train_data, self._out_train_labels), self._slicing_spec,
self._attack_types, self._is_logit, self._batch_size)
logging.info(results)
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
results)
print('Attack result:')
print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in
zip(att_types, att_slices, att_metrics, att_values)]))
print('\n'.join([
' %s: %.4f' % (', '.join([s, t, m]), v)
for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
]))
# Write to tensorboard if tensorboard_dir is specified
if self._writers is not None:
@ -117,7 +117,9 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback):
def run_attack_on_keras_model(
model, in_train, out_train,
model,
in_train,
out_train,
slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,),
is_logit: bool = False,
@ -132,6 +134,7 @@ def run_attack_on_keras_model(
attack_types: a list of attacks, each of type AttackType
is_logit: whether the result of model.predict is logit or probability
batch_size: the batch size for model.predict
Returns:
Results of the attack
"""
@ -139,16 +142,19 @@ def run_attack_on_keras_model(
out_train_data, out_train_labels = out_train
# Compute predictions and losses
in_train_pred, in_train_loss = calculate_losses(
model, in_train_data, in_train_labels, is_logit, batch_size)
out_train_pred, out_train_loss = calculate_losses(
model, out_train_data, out_train_labels, is_logit, batch_size)
in_train_pred, in_train_loss = calculate_losses(model, in_train_data,
in_train_labels, is_logit,
batch_size)
out_train_pred, out_train_loss = calculate_losses(model, out_train_data,
out_train_labels, is_logit,
batch_size)
attack_input = AttackInputData(
logits_train=in_train_pred, logits_test=out_train_pred,
labels_train=in_train_labels, labels_test=out_train_labels,
loss_train=in_train_loss, loss_test=out_train_loss
)
results = mia.run_attacks(attack_input,
slicing_spec=slicing_spec,
attack_types=attack_types)
logits_train=in_train_pred,
logits_test=out_train_pred,
labels_train=in_train_labels,
labels_test=out_train_labels,
loss_train=in_train_loss,
loss_test=out_train_loss)
results = mia.run_attacks(
attack_input, slicing_spec=slicing_spec, attack_types=attack_types)
return results

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""An example for using keras_evaluation."""
from absl import app
@ -25,15 +24,15 @@ from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_s
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation import MembershipInferenceCallback
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation import run_attack_on_keras_model
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.02, 'Learning rate for training')
flags.DEFINE_integer('batch_size', 250, 'Batch size')
flags.DEFINE_integer('epochs', 100, 'Number of epochs')
flags.DEFINE_string('model_dir', None, 'Model directory.')
flags.DEFINE_bool('tensorboard_merge_classifiers', False, 'If true, plot '
'different classifiers with the same slicing_spec and metric '
'in the same figure.')
flags.DEFINE_bool(
'tensorboard_merge_classifiers', False, 'If true, plot '
'different classifiers with the same slicing_spec and metric '
'in the same figure.')
def small_cnn():
@ -76,14 +75,15 @@ def main(unused_argv):
# Get callback for membership inference attack.
mia_callback = MembershipInferenceCallback(
(x_train, y_train),
(x_test, y_test),
(x_train, y_train), (x_test, y_test),
slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
attack_types=[AttackType.THRESHOLD_ATTACK,
AttackType.K_NEAREST_NEIGHBORS],
attack_types=[
AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
],
tensorboard_dir=FLAGS.model_dir,
tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers,
is_logit=True, batch_size=2048)
is_logit=True,
batch_size=2048)
# Train model with Keras
model.fit(
@ -102,11 +102,14 @@ def main(unused_argv):
attack_types=[
AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
],
is_logit=True, batch_size=2048)
is_logit=True,
batch_size=2048)
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
attack_results)
print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in
zip(att_types, att_slices, att_metrics, att_values)]))
print('\n'.join([
' %s: %.4f' % (', '.join([s, t, m]), v)
for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
]))
if __name__ == '__main__':

View file

@ -13,10 +13,8 @@
# limitations under the License.
from absl.testing import absltest
import numpy as np
import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import keras_evaluation
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType
@ -59,8 +57,7 @@ class UtilsTest(absltest.TestCase):
def test_run_attack_on_keras_model(self):
"""Test the attack."""
results = keras_evaluation.run_attack_on_keras_model(
self.model,
(self.train_data, self.train_labels),
self.model, (self.train_data, self.train_labels),
(self.test_data, self.test_labels),
attack_types=[AttackType.THRESHOLD_ATTACK])
self.assertIsInstance(results, AttackResults)

View file

@ -140,9 +140,9 @@ def _run_attack(attack_input: AttackInputData,
attack_input: input data for running an attack
attack_type: the attack to run
balance_attacker_training: Whether the training and test sets for the
membership inference attacker should have a balanced (roughly equal)
number of samples from the training and test sets used to develop
the model under attack.
membership inference attacker should have a balanced (roughly equal)
number of samples from the training and test sets used to develop the
model under attack.
min_num_samples: minimum number of examples in either training or test data.
Returns:
@ -179,9 +179,9 @@ def run_attacks(attack_input: AttackInputData,
attack_types: attacks to run
privacy_report_metadata: the metadata of the model under attack.
balance_attacker_training: Whether the training and test sets for the
membership inference attacker should have a balanced (roughly equal)
number of samples from the training and test sets used to develop
the model under attack.
membership inference attacker should have a balanced (roughly equal)
number of samples from the training and test sets used to develop the
model under attack.
min_num_samples: minimum number of examples in either training or test data.
Returns:
@ -200,8 +200,7 @@ def run_attacks(attack_input: AttackInputData,
attack_input_slice = get_slice(attack_input, single_slice_spec)
for attack_type in attack_types:
attack_result = _run_attack(attack_input_slice, attack_type,
balance_attacker_training,
min_num_samples)
balance_attacker_training, min_num_samples)
if attack_result is not None:
attack_results.append(attack_result)

View file

@ -55,9 +55,8 @@ def create_attacker_data(attack_input_data: AttackInputData,
attack_input_data: Original AttackInputData
test_fraction: Fraction of the dataset to include in the test split.
balance: Whether the training and test sets for the membership inference
attacker should have a balanced (roughly equal) number of samples
from the training and test sets used to develop the model
under attack.
attacker should have a balanced (roughly equal) number of samples from the
training and test sets used to develop the model under attack.
Returns:
AttackerData.
@ -134,6 +133,7 @@ class TrainedAttacker:
Args:
input_features : A vector of features with the same semantics as x_train
passed to train_model.
Returns:
An array of probabilities denoting whether the example belongs to test.
"""

View file

@ -81,5 +81,4 @@ def plot_histograms(train: Iterable[float],
def plot_roc_curve(roc_curve, plot_func=plot_curve_with_area) -> plt.Figure:
"""Plot the ROC curve and the area under the curve."""
return plot_func(
roc_curve.fpr, roc_curve.tpr, xlabel='FPR', ylabel='TPR')
return plot_func(roc_curve.fpr, roc_curve.tpr, xlabel='FPR', ylabel='TPR')

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A hook and a function in tf estimator for membership inference attack."""
import os
@ -58,7 +57,8 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
def __init__(
self,
estimator,
in_train, out_train,
in_train,
out_train,
input_fn_constructor,
slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,),
@ -76,7 +76,7 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
attack_types: a list of attacks, each of type AttackType
tensorboard_dir: directory for tensorboard summary
tensorboard_merge_classifiers: if true, plot different classifiers with
the same slicing_spec and metric in the same figure
the same slicing_spec and metric in the same figure
"""
in_train_data, self._in_train_labels = in_train
out_train_data, self._out_train_labels = out_train
@ -106,19 +106,19 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
self._writers = None
def end(self, session):
results = run_attack_helper(self._estimator,
self._in_train_input_fn,
self._out_train_input_fn,
self._in_train_labels, self._out_train_labels,
self._slicing_spec,
results = run_attack_helper(self._estimator, self._in_train_input_fn,
self._out_train_input_fn, self._in_train_labels,
self._out_train_labels, self._slicing_spec,
self._attack_types)
logging.info(results)
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
results)
print('Attack result:')
print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in
zip(att_types, att_slices, att_metrics, att_values)]))
print('\n'.join([
' %s: %.4f' % (', '.join([s, t, m]), v)
for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
]))
# Write to tensorboard if tensorboard_dir is specified
global_step = self._estimator.get_variable_value('global_step')
@ -128,7 +128,9 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
def run_attack_on_tf_estimator_model(
estimator, in_train, out_train,
estimator,
in_train,
out_train,
input_fn_constructor,
slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)):
@ -142,6 +144,7 @@ def run_attack_on_tf_estimator_model(
the input_fn for model prediction
slicing_spec: slicing specification of the attack
attack_types: a list of attacks, each of type AttackType
Returns:
Results of the attack
"""
@ -153,10 +156,8 @@ def run_attack_on_tf_estimator_model(
out_train_input_fn = input_fn_constructor(out_train_data, out_train_labels)
# Call the helper to run the attack.
results = run_attack_helper(estimator,
in_train_input_fn, out_train_input_fn,
in_train_labels, out_train_labels,
slicing_spec,
results = run_attack_helper(estimator, in_train_input_fn, out_train_input_fn,
in_train_labels, out_train_labels, slicing_spec,
attack_types)
logging.info('End of training attack:')
logging.info(results)
@ -165,8 +166,10 @@ def run_attack_on_tf_estimator_model(
def run_attack_helper(
estimator,
in_train_input_fn, out_train_input_fn,
in_train_labels, out_train_labels,
in_train_input_fn,
out_train_input_fn,
in_train_labels,
out_train_labels,
slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)):
"""A helper function to perform attack.
@ -179,22 +182,23 @@ def run_attack_helper(
out_train_labels: out of training labels
slicing_spec: slicing specification of the attack
attack_types: a list of attacks, each of type AttackType
Returns:
Results of the attack
"""
# Compute predictions and losses
in_train_pred, in_train_loss = calculate_losses(estimator,
in_train_input_fn,
in_train_pred, in_train_loss = calculate_losses(estimator, in_train_input_fn,
in_train_labels)
out_train_pred, out_train_loss = calculate_losses(estimator,
out_train_input_fn,
out_train_labels)
attack_input = AttackInputData(
logits_train=in_train_pred, logits_test=out_train_pred,
labels_train=in_train_labels, labels_test=out_train_labels,
loss_train=in_train_loss, loss_test=out_train_loss
)
results = mia.run_attacks(attack_input,
slicing_spec=slicing_spec,
attack_types=attack_types)
logits_train=in_train_pred,
logits_test=out_train_pred,
labels_train=in_train_labels,
labels_test=out_train_labels,
loss_train=in_train_loss,
loss_test=out_train_loss)
results = mia.run_attacks(
attack_input, slicing_spec=slicing_spec, attack_types=attack_types)
return results

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""An example for using tf_estimator_evaluation."""
from absl import app
@ -26,15 +25,15 @@ from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_s
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation import MembershipInferenceTrainingHook
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation import run_attack_on_tf_estimator_model
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.02, 'Learning rate for training')
flags.DEFINE_integer('batch_size', 250, 'Batch size')
flags.DEFINE_integer('epochs', 100, 'Number of epochs')
flags.DEFINE_string('model_dir', None, 'Model directory.')
flags.DEFINE_bool('tensorboard_merge_classifiers', False, 'If true, plot '
'different classifiers with the same slicing_spec and metric '
'in the same figure.')
flags.DEFINE_bool(
'tensorboard_merge_classifiers', False, 'If true, plot '
'different classifiers with the same slicing_spec and metric '
'in the same figure.')
def small_cnn_fn(features, labels, mode):
@ -55,8 +54,8 @@ def small_cnn_fn(features, labels, mode):
# Configure the training op (for TRAIN mode).
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.MomentumOptimizer(learning_rate=FLAGS.learning_rate,
momentum=0.9)
optimizer = tf.train.MomentumOptimizer(
learning_rate=FLAGS.learning_rate, momentum=0.9)
global_step = tf.train.get_global_step()
train_op = optimizer.minimize(loss=scalar_loss, global_step=global_step)
return tf.estimator.EstimatorSpec(
@ -111,13 +110,12 @@ def main(unused_argv):
# Get hook for membership inference attack.
mia_hook = MembershipInferenceTrainingHook(
classifier,
(x_train, y_train),
(x_test, y_test),
classifier, (x_train, y_train), (x_test, y_test),
input_fn_constructor,
slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
attack_types=[AttackType.THRESHOLD_ATTACK,
AttackType.K_NEAREST_NEIGHBORS],
attack_types=[
AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
],
tensorboard_dir=FLAGS.model_dir,
tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers)
@ -148,12 +146,15 @@ def main(unused_argv):
classifier, (x_train, y_train), (x_test, y_test),
input_fn_constructor,
slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
attack_types=[AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS]
)
attack_types=[
AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
])
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
attack_results)
print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in
zip(att_types, att_slices, att_metrics, att_values)]))
print('\n'.join([
' %s: %.4f' % (', '.join([s, t, m]), v)
for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
]))
if __name__ == '__main__':

View file

@ -13,10 +13,8 @@
# limitations under the License.
from absl.testing import absltest
import numpy as np
import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import tf_estimator_evaluation
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType
@ -55,23 +53,25 @@ class UtilsTest(absltest.TestCase):
# Define the classifier, input_fn for training and test data
self.classifier = tf.estimator.Estimator(model_fn=model_fn)
self.input_fn_train = tf.estimator.inputs.numpy_input_fn(
x={'x': self.train_data}, y=self.train_labels, num_epochs=1,
x={'x': self.train_data},
y=self.train_labels,
num_epochs=1,
shuffle=False)
self.input_fn_test = tf.estimator.inputs.numpy_input_fn(
x={'x': self.test_data}, y=self.test_labels, num_epochs=1,
x={'x': self.test_data},
y=self.test_labels,
num_epochs=1,
shuffle=False)
def test_calculate_losses(self):
"""Test calculating the loss."""
pred, loss = tf_estimator_evaluation.calculate_losses(self.classifier,
self.input_fn_train,
self.train_labels)
pred, loss = tf_estimator_evaluation.calculate_losses(
self.classifier, self.input_fn_train, self.train_labels)
self.assertEqual(pred.shape, (self.ntrain, self.nclass))
self.assertEqual(loss.shape, (self.ntrain,))
pred, loss = tf_estimator_evaluation.calculate_losses(self.classifier,
self.input_fn_test,
self.test_labels)
pred, loss = tf_estimator_evaluation.calculate_losses(
self.classifier, self.input_fn_test, self.test_labels)
self.assertEqual(pred.shape, (self.ntest, self.nclass))
self.assertEqual(loss.shape, (self.ntest,))
@ -94,12 +94,12 @@ class UtilsTest(absltest.TestCase):
def test_run_attack_on_tf_estimator_model(self):
"""Test the attack on the final models."""
def input_fn_constructor(x, y):
return tf.estimator.inputs.numpy_input_fn(x={'x': x}, y=y, shuffle=False)
results = tf_estimator_evaluation.run_attack_on_tf_estimator_model(
self.classifier,
(self.train_data, self.train_labels),
self.classifier, (self.train_data, self.train_labels),
(self.test_data, self.test_labels),
input_fn_constructor,
attack_types=[AttackType.THRESHOLD_ATTACK])

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions for membership inference attacks."""
import numpy as np

View file

@ -27,7 +27,7 @@ def write_to_tensorboard(writers, tags, values, step):
Args:
writers: a list of tensorboard writers or one writer to be used for metrics.
If it's a list, it should be of the same length as tags
If it's a list, it should be of the same length as tags
tags: a list of tags of metrics
values: a list of values of metrics with the same length as tags
step: step for the tensorboard summary
@ -54,7 +54,7 @@ def write_to_tensorboard_tf2(writers, tags, values, step):
Args:
writers: a list of tensorboard writers or one writer to be used for metrics.
If it's a list, it should be of the same length as tags
If it's a list, it should be of the same length as tags
tags: a list of tags of metrics
values: a list of values of metrics with the same length as tags
step: step for the tensorboard summary
@ -77,11 +77,10 @@ def write_to_tensorboard_tf2(writers, tags, values, step):
writer.flush()
def write_results_to_tensorboard(
attack_results: AttackResults,
writers: Union[tf1.summary.FileWriter, List[tf1.summary.FileWriter]],
step: int,
merge_classifiers: bool):
def write_results_to_tensorboard(attack_results: AttackResults,
writers: Union[tf1.summary.FileWriter,
List[tf1.summary.FileWriter]],
step: int, merge_classifiers: bool):
"""Write attack results to tensorboard.
Args:
@ -97,21 +96,21 @@ def write_results_to_tensorboard(
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
attack_results)
if merge_classifiers:
att_tags = ['attack/' + f'{s}_{m}' for s, m in
zip(att_slices, att_metrics)]
write_to_tensorboard([writers[t] for t in att_types],
att_tags, att_values, step)
att_tags = ['attack/' + f'{s}_{m}' for s, m in zip(att_slices, att_metrics)]
write_to_tensorboard([writers[t] for t in att_types], att_tags, att_values,
step)
else:
att_tags = ['attack/' + f'{s}_{t}_{m}' for t, s, m in
zip(att_types, att_slices, att_metrics)]
att_tags = [
'attack/' + f'{s}_{t}_{m}'
for t, s, m in zip(att_types, att_slices, att_metrics)
]
write_to_tensorboard(writers, att_tags, att_values, step)
def write_results_to_tensorboard_tf2(
attack_results: AttackResults,
writers: Union[tf2.summary.SummaryWriter, List[tf2.summary.SummaryWriter]],
step: int,
merge_classifiers: bool):
step: int, merge_classifiers: bool):
"""Write attack results to tensorboard.
Args:
@ -127,12 +126,12 @@ def write_results_to_tensorboard_tf2(
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
attack_results)
if merge_classifiers:
att_tags = ['attack/' + f'{s}_{m}' for s, m in
zip(att_slices, att_metrics)]
write_to_tensorboard_tf2([writers[t] for t in att_types],
att_tags, att_values, step)
att_tags = ['attack/' + f'{s}_{m}' for s, m in zip(att_slices, att_metrics)]
write_to_tensorboard_tf2([writers[t] for t in att_types], att_tags,
att_values, step)
else:
att_tags = ['attack/' + f'{s}_{t}_{m}' for t, s, m in
zip(att_types, att_slices, att_metrics)]
att_tags = [
'attack/' + f'{s}_{t}_{m}'
for t, s, m in zip(att_types, att_slices, att_metrics)
]
write_to_tensorboard_tf2(writers, att_tags, att_values, step)

View file

@ -25,22 +25,23 @@ def compute_exposure_interpolation(
"""Get exposure using interpolation.
Args:
perplexities: a dictionary, key is number of secret repetitions,
value is a list of perplexities
perplexities: a dictionary, key is number of secret repetitions, value is a
list of perplexities
perplexities_reference: a list, perplexities of the random sequences that
did not appear in the training data
did not appear in the training data
Returns:
The exposure of every secret measured using interpolation (not necessarily
in the same order as the input)
"""
repetitions = list(perplexities.keys())
# Concatenate all perplexities, including those for references
perplexities_concat = np.concatenate([perplexities[r] for r in repetitions]
+ [perplexities_reference])
perplexities_concat = np.concatenate([perplexities[r] for r in repetitions] +
[perplexities_reference])
# Concatenate the number of repetitions for each secret
repetitions_concat = np.concatenate(
[[r] * len(perplexities[r]) for r in repetitions]
+ [[0] * len(perplexities_reference)])
repetitions_concat = np.concatenate([[r] * len(perplexities[r])
for r in repetitions] +
[[0] * len(perplexities_reference)])
# Sort the repetition list according to the corresponding perplexity
idx = np.argsort(perplexities_concat)
@ -53,8 +54,10 @@ def compute_exposure_interpolation(
# (repetitions_concat == 0).
cum_sum = np.cumsum(repetitions_concat == 0)
ranks = {r: cum_sum[repetitions_concat == r] + 1 for r in repetitions}
exposures = {r: np.log2(len(perplexities_reference)) - np.log2(ranks[r])
for r in repetitions}
exposures = {
r: np.log2(len(perplexities_reference)) - np.log2(ranks[r])
for r in repetitions
}
return exposures
@ -64,10 +67,11 @@ def compute_exposure_extrapolation(
"""Get exposure using extrapolation.
Args:
perplexities: a dictionary, key is number of secret repetitions,
value is a list of perplexities
perplexities: a dictionary, key is number of secret repetitions, value is a
list of perplexities
perplexities_reference: a list, perplexities of the random sequences that
did not appear in the training data
did not appear in the training data
Returns:
The exposure of every secret measured using extrapolation
"""

View file

@ -15,7 +15,6 @@
from absl.testing import absltest
import numpy as np
from scipy import stats
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_extrapolation
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_interpolation
@ -28,9 +27,11 @@ class UtilsTest(absltest.TestCase):
def test_exposure_interpolation(self):
"""Test exposure by interpolation."""
perplexities = {1: [0, 0.1], # smallest perplexities
2: [20.0], # largest perplexities
5: [3.5]} # rank = 4
perplexities = {
1: [0, 0.1], # smallest perplexities
2: [20.0], # largest perplexities
5: [3.5]
} # rank = 4
perplexities_reference = [float(x) for x in range(1, 17)]
exposures = compute_exposure_interpolation(perplexities,
perplexities_reference)
@ -41,7 +42,8 @@ class UtilsTest(absltest.TestCase):
expected_exposures = {
1: np.array([exposure_largest] * 2),
2: np.array([exposure_smallest]),
5: np.array([np.log2(num_perplexities_reference) - np.log2(4)])}
5: np.array([np.log2(num_perplexities_reference) - np.log2(4)])
}
self.assertEqual(exposures.keys(), expected_exposures.keys())
for r in exposures.keys():

View file

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generate random sequences."""
import itertools
@ -21,7 +20,9 @@ from dataclasses import dataclass
import numpy as np
def generate_random_sequences(vocab: List[str], pattern: str, n: int,
def generate_random_sequences(vocab: List[str],
pattern: str,
n: int,
seed: int = 1) -> List[str]:
"""Generate random sequences.
@ -35,6 +36,7 @@ def generate_random_sequences(vocab: List[str], pattern: str, n: int,
Returns:
A list of different random sequences from the given vocabulary
"""
def count_placeholder(pattern):
return sum([x[1] is not None for x in string.Formatter().parse(pattern)])
@ -103,7 +105,8 @@ def construct_secret(secret_config: SecretConfig, seqs: List[str]) -> Secrets:
Args:
secret_config: configuration of secret.
seqs: a list of random sequences that will be used for secrets and
references.
references.
Returns:
a secret instance.
"""
@ -116,9 +119,10 @@ def construct_secret(secret_config: SecretConfig, seqs: List[str]) -> Secrets:
secret_config.num_repetitions, secret_config.num_secrets_for_repetitions):
secrets[num_repetition] = seqs[i:i + num_secrets]
i += num_secrets
return Secrets(config=secret_config,
secrets=secrets,
references=seqs[-secret_config.num_references:])
return Secrets(
config=secret_config,
secrets=secrets,
references=seqs[-secret_config.num_references:])
def generate_secrets_and_references(secret_configs: List[SecretConfig],
@ -128,6 +132,7 @@ def generate_secrets_and_references(secret_configs: List[SecretConfig],
Args:
secret_configs: a list of secret configurations.
seed: random seed.
Returns:
A list of secret instances.
"""

View file

@ -12,10 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets."""
from absl.testing import absltest
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets import construct_secret
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets import generate_random_sequences
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.generate_secrets import generate_secrets_and_references
@ -32,27 +29,34 @@ class UtilsTest(absltest.TestCase):
"""Test generate_random_sequences."""
# Test when n is larger than total number of possible sequences.
seqs = generate_random_sequences(['A', 'b', 'c'], '{}+{}', 10, seed=27)
expected_seqs = ['A+c', 'c+c', 'b+b', 'A+b', 'b+c',
'c+A', 'c+b', 'A+A', 'b+A']
expected_seqs = [
'A+c', 'c+c', 'b+b', 'A+b', 'b+c', 'c+A', 'c+b', 'A+A', 'b+A'
]
self.assertEqual(seqs, expected_seqs)
# Test when n is smaller than total number of possible sequences.
seqs = generate_random_sequences(list('01234'), 'prefix {}{}{}?', 8, seed=9)
expected_seqs = ['prefix 143?', 'prefix 031?', 'prefix 302?', 'prefix 042?',
'prefix 404?', 'prefix 024?', 'prefix 021?', 'prefix 403?']
expected_seqs = [
'prefix 143?', 'prefix 031?', 'prefix 302?', 'prefix 042?',
'prefix 404?', 'prefix 024?', 'prefix 021?', 'prefix 403?'
]
self.assertEqual(seqs, expected_seqs)
def test_construct_secret(self):
secret_config = SecretConfig(vocab=None, pattern='',
num_repetitions=[1, 2, 8],
num_secrets_for_repetitions=[2, 3, 1],
num_references=3)
secret_config = SecretConfig(
vocab=None,
pattern='',
num_repetitions=[1, 2, 8],
num_secrets_for_repetitions=[2, 3, 1],
num_references=3)
seqs = list('0123456789')
secrets = construct_secret(secret_config, seqs)
self.assertEqual(secrets.config, secret_config)
self.assertDictEqual(secrets.secrets, {1: ['0', '1'],
2: ['2', '3', '4'],
8: ['5']})
self.assertDictEqual(secrets.secrets, {
1: ['0', '1'],
2: ['2', '3', '4'],
8: ['5']
})
self.assertEqual(secrets.references, ['7', '8', '9'])
# Test when the number of elements in seqs is not enough.
@ -61,29 +65,36 @@ class UtilsTest(absltest.TestCase):
def test_generate_secrets_and_references(self):
secret_configs = [
SecretConfig(vocab=['w1', 'w2', 'w3'], pattern='{} {} suf',
num_repetitions=[1, 12],
num_secrets_for_repetitions=[2, 1],
num_references=3),
SecretConfig(vocab=['W 1', 'W 2', 'W 3'], pattern='{}-{}',
num_repetitions=[1, 2, 8],
num_secrets_for_repetitions=[2, 3, 1],
num_references=3)
SecretConfig(
vocab=['w1', 'w2', 'w3'],
pattern='{} {} suf',
num_repetitions=[1, 12],
num_secrets_for_repetitions=[2, 1],
num_references=3),
SecretConfig(
vocab=['W 1', 'W 2', 'W 3'],
pattern='{}-{}',
num_repetitions=[1, 2, 8],
num_secrets_for_repetitions=[2, 3, 1],
num_references=3)
]
secrets = generate_secrets_and_references(secret_configs, seed=27)
self.assertEqual(secrets[0].config, secret_configs[0])
self.assertDictEqual(secrets[0].secrets, {1: ['w3 w2 suf', 'w2 w1 suf'],
12: ['w1 w1 suf']})
self.assertDictEqual(secrets[0].secrets, {
1: ['w3 w2 suf', 'w2 w1 suf'],
12: ['w1 w1 suf']
})
self.assertEqual(secrets[0].references,
['w2 w3 suf', 'w2 w2 suf', 'w3 w1 suf'])
self.assertEqual(secrets[1].config, secret_configs[1])
self.assertDictEqual(secrets[1].secrets,
{1: ['W 3-W 2', 'W 1-W 3'],
2: ['W 3-W 1', 'W 2-W 1', 'W 1-W 1'],
8: ['W 2-W 2']})
self.assertEqual(secrets[1].references,
['W 2-W 3', 'W 3-W 3', 'W 1-W 2'])
self.assertDictEqual(
secrets[1].secrets, {
1: ['W 3-W 2', 'W 1-W 3'],
2: ['W 3-W 1', 'W 2-W 1', 'W 1-W 1'],
8: ['W 2-W 2']
})
self.assertEqual(secrets[1].references, ['W 2-W 3', 'W 3-W 3', 'W 1-W 2'])
if __name__ == '__main__':

View file

@ -27,10 +27,14 @@ n_samples = 10
input_dim = 2
n_outputs = 1
# Create binary classification dataset:
x_stack = [tf.constant(-1, tf.float32, (n_samples, input_dim)),
tf.constant(1, tf.float32, (n_samples, input_dim))]
y_stack = [tf.constant(0, tf.float32, (n_samples, 1)),
tf.constant(1, tf.float32, (n_samples, 1))]
x_stack = [
tf.constant(-1, tf.float32, (n_samples, input_dim)),
tf.constant(1, tf.float32, (n_samples, input_dim))
]
y_stack = [
tf.constant(0, tf.float32, (n_samples, 1)),
tf.constant(1, tf.float32, (n_samples, 1))
]
x, y = tf.concat(x_stack, 0), tf.concat(y_stack, 0)
print(x.shape, y.shape)
generator = tf.data.Dataset.from_tensor_slices((x, y))
@ -86,14 +90,15 @@ n_samples = None # default, if it cannot be iferred, specify this
epsilon = 2
noise_distribution = 'laplace'
bolt.fit(x,
y,
epsilon=epsilon,
class_weight=class_weight,
batch_size=batch_size,
n_samples=n_samples,
noise_distribution=noise_distribution,
epochs=2)
bolt.fit(
x,
y,
epsilon=epsilon,
class_weight=class_weight,
batch_size=batch_size,
n_samples=n_samples,
noise_distribution=noise_distribution,
epochs=2)
# -------
# We may also train a generator object, or try different optimizers and loss
# functions. Below, we will see that we must pass the number of samples as the
@ -109,25 +114,27 @@ n_samples = None # default, if it cannot be iferred, specify this
epsilon = 2
noise_distribution = 'laplace'
try:
bolt.fit(generator,
epsilon=epsilon,
class_weight=class_weight,
batch_size=batch_size,
n_samples=n_samples,
noise_distribution=noise_distribution,
verbose=0)
bolt.fit(
generator,
epsilon=epsilon,
class_weight=class_weight,
batch_size=batch_size,
n_samples=n_samples,
noise_distribution=noise_distribution,
verbose=0)
except ValueError as e:
print(e)
# -------
# And now, re running with the parameter set.
# -------
n_samples = 20
bolt.fit_generator(generator,
epsilon=epsilon,
class_weight=class_weight,
n_samples=n_samples,
noise_distribution=noise_distribution,
verbose=0)
bolt.fit_generator(
generator,
epsilon=epsilon,
class_weight=class_weight,
n_samples=n_samples,
noise_distribution=noise_distribution,
verbose=0)
# -------
# You don't have to use the BoltOn model to use the BoltOn method.
# There are only a few requirements:
@ -145,8 +152,8 @@ class TestModel(tf.keras.Model): # pylint: disable=abstract-method
def __init__(self, reg_layer, number_of_outputs=1):
super().__init__(name='test')
self.output_layer = tf.keras.layers.Dense(number_of_outputs,
kernel_regularizer=reg_layer)
self.output_layer = tf.keras.layers.Dense(
number_of_outputs, kernel_regularizer=reg_layer)
def call(self, inputs): # pylint: disable=arguments-differ
return self.output_layer(inputs)
@ -180,6 +187,5 @@ with optimizer(
layers=test_model.layers,
class_weights=class_weights,
n_samples=n_samples,
batch_size=batch_size
) as _:
batch_size=batch_size) as _:
test_model.fit(x, y, batch_size=batch_size, epochs=2)

View file

@ -86,12 +86,10 @@ def cnn_model_fn(features, labels, mode, params): # pylint: disable=unused-argu
eval_metric_ops = {
'accuracy':
tf.metrics.accuracy(
labels=labels,
predictions=tf.argmax(input=logits, axis=1))
labels=labels, predictions=tf.argmax(input=logits, axis=1))
}
return tf.estimator.EstimatorSpec(mode=mode,
loss=scalar_loss,
eval_metric_ops=eval_metric_ops)
return tf.estimator.EstimatorSpec(
mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
def main(unused_argv):
@ -100,8 +98,8 @@ def main(unused_argv):
raise ValueError('Number of microbatches should divide evenly batch_size')
# Instantiate the tf.Estimator.
mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,
model_dir=FLAGS.model_dir)
mnist_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir=FLAGS.model_dir)
# Training loop.
steps_per_epoch = 60000 // FLAGS.batch_size

View file

@ -25,16 +25,18 @@ from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescent
GradientDescentOptimizer = tf.train.GradientDescentOptimizer
tf.enable_eager_execution()
flags.DEFINE_boolean('dpsgd', True, 'If True, train with DP-SGD. If False, '
'train with vanilla SGD.')
flags.DEFINE_boolean(
'dpsgd', True, 'If True, train with DP-SGD. If False, '
'train with vanilla SGD.')
flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training')
flags.DEFINE_float('noise_multiplier', 1.1,
'Ratio of the standard deviation to the clipping norm')
flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
flags.DEFINE_integer('batch_size', 250, 'Batch size')
flags.DEFINE_integer('epochs', 60, 'Number of epochs')
flags.DEFINE_integer('microbatches', 250, 'Number of microbatches '
'(must evenly divide batch_size)')
flags.DEFINE_integer(
'microbatches', 250, 'Number of microbatches '
'(must evenly divide batch_size)')
FLAGS = flags.FLAGS
@ -45,10 +47,11 @@ def compute_epsilon(steps):
return float('inf')
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
sampling_probability = FLAGS.batch_size / 60000
rdp = compute_rdp(q=sampling_probability,
noise_multiplier=FLAGS.noise_multiplier,
steps=steps,
orders=orders)
rdp = compute_rdp(
q=sampling_probability,
noise_multiplier=FLAGS.noise_multiplier,
steps=steps,
orders=orders)
# Delta is set to 1e-5 because MNIST has 60000 training points.
return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
@ -64,22 +67,20 @@ def main(_):
# Create a dataset object and batch for the training data
dataset = tf.data.Dataset.from_tensor_slices(
(tf.cast(train_images[..., tf.newaxis]/255, tf.float32),
tf.cast(train_labels, tf.int64)))
(tf.cast(train_images[..., tf.newaxis] / 255,
tf.float32), tf.cast(train_labels, tf.int64)))
dataset = dataset.shuffle(1000).batch(FLAGS.batch_size)
# Create a dataset object and batch for the test data
eval_dataset = tf.data.Dataset.from_tensor_slices(
(tf.cast(test_images[..., tf.newaxis]/255, tf.float32),
tf.cast(test_labels, tf.int64)))
(tf.cast(test_images[..., tf.newaxis] / 255,
tf.float32), tf.cast(test_labels, tf.int64)))
eval_dataset = eval_dataset.batch(10000)
# Define the model using tf.keras.layers
mnist_model = tf.keras.Sequential([
tf.keras.layers.Conv2D(16, 8,
strides=2,
padding='same',
activation='relu'),
tf.keras.layers.Conv2D(
16, 8, strides=2, padding='same', activation='relu'),
tf.keras.layers.MaxPool2D(2, 1),
tf.keras.layers.Conv2D(32, 4, strides=2, activation='relu'),
tf.keras.layers.MaxPool2D(2, 1),
@ -119,8 +120,8 @@ def main(_):
return loss
if FLAGS.dpsgd:
grads_and_vars = opt.compute_gradients(loss_fn, var_list,
gradient_tape=gradient_tape)
grads_and_vars = opt.compute_gradients(
loss_fn, var_list, gradient_tape=gradient_tape)
else:
grads_and_vars = opt.compute_gradients(loss_fn, var_list)
@ -140,5 +141,6 @@ def main(_):
else:
print('Trained with vanilla non-private SGD optimizer')
if __name__ == '__main__':
app.run(main)

View file

@ -47,10 +47,11 @@ def compute_epsilon(steps):
return float('inf')
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
sampling_probability = FLAGS.batch_size / 60000
rdp = compute_rdp(q=sampling_probability,
noise_multiplier=FLAGS.noise_multiplier,
steps=steps,
orders=orders)
rdp = compute_rdp(
q=sampling_probability,
noise_multiplier=FLAGS.noise_multiplier,
steps=steps,
orders=orders)
# Delta is set to 1e-5 because MNIST has 60000 training points.
return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
@ -91,16 +92,16 @@ def main(unused_argv):
# Define a sequential Keras model
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(16, 8,
strides=2,
padding='same',
activation='relu',
input_shape=(28, 28, 1)),
tf.keras.layers.Conv2D(
16,
8,
strides=2,
padding='same',
activation='relu',
input_shape=(28, 28, 1)),
tf.keras.layers.MaxPool2D(2, 1),
tf.keras.layers.Conv2D(32, 4,
strides=2,
padding='valid',
activation='relu'),
tf.keras.layers.Conv2D(
32, 4, strides=2, padding='valid', activation='relu'),
tf.keras.layers.MaxPool2D(2, 1),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(32, activation='relu'),
@ -124,10 +125,12 @@ def main(unused_argv):
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
# Train model with Keras
model.fit(train_data, train_labels,
epochs=FLAGS.epochs,
validation_data=(test_data, test_labels),
batch_size=FLAGS.batch_size)
model.fit(
train_data,
train_labels,
epochs=FLAGS.epochs,
validation_data=(test_data, test_labels),
batch_size=FLAGS.batch_size)
# Compute the privacy budget expended.
if FLAGS.dpsgd:
@ -136,5 +139,6 @@ def main(unused_argv):
else:
print('Trained with vanilla non-private SGD optimizer')
if __name__ == '__main__':
app.run(main)

View file

@ -22,7 +22,6 @@ from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
from tensorflow_privacy.privacy.optimizers import dp_optimizer_vectorized
flags.DEFINE_boolean(
'dpsgd', True, 'If True, train with DP-SGD. If False, '
'train with vanilla SGD.')
@ -50,10 +49,11 @@ def compute_epsilon(steps):
return float('inf')
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
sampling_probability = FLAGS.batch_size / NUM_TRAIN_EXAMPLES
rdp = compute_rdp(q=sampling_probability,
noise_multiplier=FLAGS.noise_multiplier,
steps=steps,
orders=orders)
rdp = compute_rdp(
q=sampling_probability,
noise_multiplier=FLAGS.noise_multiplier,
steps=steps,
orders=orders)
# Delta is set to approximate 1 / (number of training points).
return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
@ -63,15 +63,11 @@ def cnn_model_fn(features, labels, mode):
# Define CNN architecture using tf.keras.layers.
input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])
y = tf.keras.layers.Conv2D(16, 8,
strides=2,
padding='same',
activation='relu').apply(input_layer)
y = tf.keras.layers.Conv2D(
16, 8, strides=2, padding='same', activation='relu').apply(input_layer)
y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
y = tf.keras.layers.Conv2D(32, 4,
strides=2,
padding='valid',
activation='relu').apply(y)
y = tf.keras.layers.Conv2D(
32, 4, strides=2, padding='valid', activation='relu').apply(y)
y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
y = tf.keras.layers.Flatten().apply(y)
y = tf.keras.layers.Dense(32, activation='relu').apply(y)
@ -106,22 +102,19 @@ def cnn_model_fn(features, labels, mode):
# the vector_loss because tf.estimator requires a scalar loss. This is only
# used for evaluation and debugging by tf.estimator. The actual loss being
# minimized is opt_loss defined above and passed to optimizer.minimize().
return tf.estimator.EstimatorSpec(mode=mode,
loss=scalar_loss,
train_op=train_op)
return tf.estimator.EstimatorSpec(
mode=mode, loss=scalar_loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode).
elif mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = {
'accuracy':
tf.metrics.accuracy(
labels=labels,
predictions=tf.argmax(input=logits, axis=1))
labels=labels, predictions=tf.argmax(input=logits, axis=1))
}
return tf.estimator.EstimatorSpec(mode=mode,
loss=scalar_loss,
eval_metric_ops=eval_metric_ops)
return tf.estimator.EstimatorSpec(
mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
def load_mnist():
@ -155,8 +148,8 @@ def main(unused_argv):
train_data, train_labels, test_data, test_labels = load_mnist()
# Instantiate the tf.Estimator.
mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,
model_dir=FLAGS.model_dir)
mnist_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir=FLAGS.model_dir)
# Create tf.Estimator input functions for the training and test data.
train_input_fn = tf.estimator.inputs.numpy_input_fn(
@ -166,10 +159,7 @@ def main(unused_argv):
num_epochs=FLAGS.epochs,
shuffle=True)
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={'x': test_data},
y=test_labels,
num_epochs=1,
shuffle=False)
x={'x': test_data}, y=test_labels, num_epochs=1, shuffle=False)
# Training loop.
steps_per_epoch = NUM_TRAIN_EXAMPLES // FLAGS.batch_size
@ -189,5 +179,6 @@ def main(unused_argv):
else:
print('Trained with vanilla non-private SGD optimizer')
if __name__ == '__main__':
app.run(main)

View file

@ -56,8 +56,8 @@ def lr_model_fn(features, labels, mode, nclasses, dim):
logits = tf.keras.layers.Dense(
units=nclasses,
kernel_regularizer=tf.keras.regularizers.L2(l2=FLAGS.regularizer),
bias_regularizer=tf.keras.regularizers.L2(l2=FLAGS.regularizer)).apply(
input_layer)
bias_regularizer=tf.keras.regularizers.L2(
l2=FLAGS.regularizer)).apply(input_layer)
# Calculate loss as a vector (to support microbatches in DP-SGD).
vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
@ -166,8 +166,7 @@ def print_privacy_guarantees(epochs, batch_size, samples, noise_multiplier):
# an option.
rdp = [order * coef for order in orders]
eps, _, _ = get_privacy_spent(orders, rdp, target_delta=delta)
print('\t{:g}% enjoy at least ({:.2f}, {})-DP'.format(
p * 100, eps, delta))
print('\t{:g}% enjoy at least ({:.2f}, {})-DP'.format(p * 100, eps, delta))
# Compute privacy guarantees for the Sampled Gaussian Mechanism.
rdp_sgm = compute_rdp(batch_size / samples, noise_multiplier,
@ -234,5 +233,6 @@ def main(unused_argv):
noise_multiplier=FLAGS.noise_multiplier,
)
if __name__ == '__main__':
app.run(main)

View file

@ -114,7 +114,7 @@ def nn_model_fn(features, labels, mode):
return tf.estimator.EstimatorSpec(
mode=mode, loss=scalar_loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode).
# Add evaluation metrics (for EVAL mode).
if mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = {
'rmse':

View file

@ -28,23 +28,19 @@ def cnn_model_fn(features, labels, mode):
# Define CNN architecture using tf.keras.layers.
input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])
y = tf.keras.layers.Conv2D(16, 8,
strides=2,
padding='same',
activation='relu').apply(input_layer)
y = tf.keras.layers.Conv2D(
16, 8, strides=2, padding='same', activation='relu').apply(input_layer)
y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
y = tf.keras.layers.Conv2D(32, 4,
strides=2,
padding='valid',
activation='relu').apply(y)
y = tf.keras.layers.Conv2D(
32, 4, strides=2, padding='valid', activation='relu').apply(y)
y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
y = tf.keras.layers.Flatten().apply(y)
y = tf.keras.layers.Dense(32, activation='relu').apply(y)
logits = tf.keras.layers.Dense(10).apply(y)
# Calculate loss as a vector and as its average across minibatch.
vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
logits=logits)
vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits)
scalar_loss = tf.reduce_mean(vector_loss)
# Configure the training op (for TRAIN mode).
@ -53,21 +49,18 @@ def cnn_model_fn(features, labels, mode):
opt_loss = scalar_loss
global_step = tf.train.get_global_step()
train_op = optimizer.minimize(loss=opt_loss, global_step=global_step)
return tf.estimator.EstimatorSpec(mode=mode,
loss=scalar_loss,
train_op=train_op)
return tf.estimator.EstimatorSpec(
mode=mode, loss=scalar_loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode).
elif mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = {
'accuracy':
tf.metrics.accuracy(
labels=labels,
predictions=tf.argmax(input=logits, axis=1))
labels=labels, predictions=tf.argmax(input=logits, axis=1))
}
return tf.estimator.EstimatorSpec(mode=mode,
loss=scalar_loss,
eval_metric_ops=eval_metric_ops)
return tf.estimator.EstimatorSpec(
mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
def load_mnist():
@ -109,10 +102,7 @@ def main(unused_argv):
num_epochs=FLAGS.epochs,
shuffle=True)
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={'x': test_data},
y=test_labels,
num_epochs=1,
shuffle=False)
x={'x': test_data}, y=test_labels, num_epochs=1, shuffle=False)
# Training loop.
steps_per_epoch = 60000 // FLAGS.batch_size
@ -125,5 +115,6 @@ def main(unused_argv):
test_accuracy = eval_results['accuracy']
print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy))
if __name__ == '__main__':
tf.app.run()