Remove set_denominator functions from DPQuery and make QueryWithLedger easier to use.

set_denominator was added so that the batch size doesn't need to be specified before constructing the optimizer, but it breaks the DPQuery abstraction. Now the optimizer uses a GaussianSumQuery instead of GaussianAverageQuery, and normalization by batch size is done inside the optimizer.

Also instead of creating all DPQueries with a PrivacyLedger and then wrapping with QueryWithLedger, it is now sufficient to create the queries with no ledger and QueryWithLedger will construct the ledger and pass it to all inner queries.

PiperOrigin-RevId: 251462353
This commit is contained in:
Galen Andrew 2019-06-04 10:14:09 -07:00 committed by A. Unique TensorFlower
parent 7636945566
commit d5dcfec745
17 changed files with 202 additions and 220 deletions

View file

@ -20,7 +20,6 @@ import sys
if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts.
pass
else:
from privacy.analysis.privacy_ledger import DummyLedger
from privacy.analysis.privacy_ledger import GaussianSumQueryEntry
from privacy.analysis.privacy_ledger import PrivacyLedger
from privacy.analysis.privacy_ledger import QueryWithLedger

View file

@ -65,44 +65,39 @@ class PrivacyLedger(object):
def __init__(self,
population_size,
selection_probability=None):
selection_probability):
"""Initialize the PrivacyLedger.
Args:
population_size: An integer (may be variable) specifying the size of the
population, i.e. size of the training data used in each epoch.
selection_probability: A float (may be variable) specifying the
probability each record is included in a sample. If None, it can be set
later with set_sample_size.
probability each record is included in a sample.
Raises:
ValueError: If selection_probability is 0.
"""
self._population_size = population_size
self._selection_probability = selection_probability
if selection_probability is None:
init_capacity_samples = 1
if tf.executing_eagerly():
if tf.equal(selection_probability, 0):
raise ValueError('Selection probability cannot be 0.')
init_capacity = tf.cast(tf.ceil(1 / selection_probability), tf.int32)
else:
if tf.executing_eagerly():
if tf.equal(selection_probability, 0):
raise ValueError('Selection probability cannot be 0.')
init_capacity_samples = tf.cast(tf.ceil(1 / selection_probability),
tf.int32)
else:
if selection_probability == 0:
raise ValueError('Selection probability cannot be 0.')
init_capacity_samples = np.int(np.ceil(1 / selection_probability))
init_capacity_queries = init_capacity_samples
if selection_probability == 0:
raise ValueError('Selection probability cannot be 0.')
init_capacity = np.int(np.ceil(1 / selection_probability))
# The query buffer stores rows corresponding to GaussianSumQueryEntries.
self._query_buffer = tensor_buffer.TensorBuffer(
init_capacity_queries, [3], tf.float32, 'query')
init_capacity, [3], tf.float32, 'query')
self._sample_var = tf.Variable(
initial_value=tf.zeros([3]), trainable=False, name='sample')
# The sample buffer stores rows corresponding to SampleEntries.
self._sample_buffer = tensor_buffer.TensorBuffer(
init_capacity_samples, [3], tf.float32, 'sample')
init_capacity, [3], tf.float32, 'sample')
self._sample_count = tf.Variable(
initial_value=0.0, trainable=False, name='sample_count')
self._query_count = tf.Variable(
@ -175,39 +170,6 @@ class PrivacyLedger(object):
return format_ledger(sample_array, query_array)
def set_sample_size(self, batch_size):
self._selection_probability = tf.cast(batch_size,
tf.float32) / self._population_size
class DummyLedger(object):
"""A ledger that records nothing.
This ledger may be passed in place of a normal PrivacyLedger in case privacy
accounting is to be handled externally.
"""
def record_sum_query(self, l2_norm_bound, noise_stddev):
del l2_norm_bound
del noise_stddev
return tf.no_op()
def finalize_sample(self):
return tf.no_op()
def get_unformatted_ledger(self):
empty_array = tf.zeros(shape=[0, 3])
return empty_array, empty_array
def get_formatted_ledger(self, sess):
del sess
empty_array = np.zeros(shape=[0, 3])
return empty_array, empty_array
def get_formatted_ledger_eager(self):
empty_array = np.zeros(shape=[0, 3])
return empty_array, empty_array
class QueryWithLedger(dp_query.DPQuery):
"""A class for DP queries that record events to a PrivacyLedger.
@ -221,17 +183,40 @@ class QueryWithLedger(dp_query.DPQuery):
For example usage, see privacy_ledger_test.py.
"""
def __init__(self, query, ledger):
def __init__(self, query,
population_size=None, selection_probability=None,
ledger=None):
"""Initializes the QueryWithLedger.
Args:
query: The query whose events should be recorded to the ledger. Any
subqueries (including those in the leaves of a nested query) should also
contain a reference to the same ledger given here.
ledger: A PrivacyLedger to which privacy events should be recorded.
population_size: An integer (may be variable) specifying the size of the
population, i.e. size of the training data used in each epoch. May be
None if `ledger` is specified.
selection_probability: A float (may be variable) specifying the
probability each record is included in a sample. May be None if `ledger`
is specified.
ledger: A PrivacyLedger to use. Must be specified if either of
`population_size` or `selection_probability` is None.
"""
self._query = query
if population_size is not None and selection_probability is not None:
self.set_ledger(PrivacyLedger(population_size, selection_probability))
elif ledger is not None:
self.set_ledger(ledger)
else:
raise ValueError('One of (population_size, selection_probability) or '
'ledger must be specified.')
@property
def ledger(self):
return self._ledger
def set_ledger(self, ledger):
self._ledger = ledger
self._query.set_ledger(ledger)
def initial_global_state(self):
"""See base class."""
@ -260,10 +245,13 @@ class QueryWithLedger(dp_query.DPQuery):
def get_noised_result(self, sample_state, global_state):
"""Ensures sample is recorded to the ledger and returns noised result."""
# Ensure sample_state is fully aggregated before calling get_noised_result.
with tf.control_dependencies(nest.flatten(sample_state)):
with tf.control_dependencies([self._ledger.finalize_sample()]):
return self._query.get_noised_result(sample_state, global_state)
def set_denominator(self, global_state, num_microbatches, microbatch_size=1):
self._ledger.set_sample_size(num_microbatches * microbatch_size)
return self._query.set_denominator(global_state, num_microbatches)
result, new_global_state = self._query.get_noised_result(
sample_state, global_state)
# Ensure inner queries have recorded before finalizing.
with tf.control_dependencies(nest.flatten(result)):
finalize = self._ledger.finalize_sample()
# Ensure finalizing happens.
with tf.control_dependencies([finalize]):
return nest.map_structure(tf.identity, result), new_global_state

View file

@ -56,12 +56,11 @@ class PrivacyLedgerTest(tf.test.TestCase):
population_size = tf.Variable(0)
selection_probability = tf.Variable(1.0)
ledger = privacy_ledger.PrivacyLedger(
population_size, selection_probability)
query = gaussian_query.GaussianSumQuery(
l2_norm_clip=10.0, stddev=0.0, ledger=ledger)
query = privacy_ledger.QueryWithLedger(query, ledger)
l2_norm_clip=10.0, stddev=0.0)
query = privacy_ledger.QueryWithLedger(
query, population_size, selection_probability)
# First sample.
tf.assign(population_size, 10)
@ -69,7 +68,7 @@ class PrivacyLedgerTest(tf.test.TestCase):
test_utils.run_query(query, [record1, record2])
expected_queries = [[10.0, 0.0]]
formatted = ledger.get_formatted_ledger_eager()
formatted = query.ledger.get_formatted_ledger_eager()
sample_1 = formatted[0]
self.assertAllClose(sample_1.population_size, 10.0)
self.assertAllClose(sample_1.selection_probability, 0.1)
@ -80,7 +79,7 @@ class PrivacyLedgerTest(tf.test.TestCase):
tf.assign(selection_probability, 0.2)
test_utils.run_query(query, [record1, record2])
formatted = ledger.get_formatted_ledger_eager()
formatted = query.ledger.get_formatted_ledger_eager()
sample_1, sample_2 = formatted
self.assertAllClose(sample_1.population_size, 10.0)
self.assertAllClose(sample_1.selection_probability, 0.1)
@ -93,16 +92,15 @@ class PrivacyLedgerTest(tf.test.TestCase):
def test_nested_query(self):
population_size = tf.Variable(0)
selection_probability = tf.Variable(1.0)
ledger = privacy_ledger.PrivacyLedger(
population_size, selection_probability)
query1 = gaussian_query.GaussianAverageQuery(
l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0, ledger=ledger)
l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0)
query2 = gaussian_query.GaussianAverageQuery(
l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0, ledger=ledger)
l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0)
query = nested_query.NestedQuery([query1, query2])
query = privacy_ledger.QueryWithLedger(query, ledger)
query = privacy_ledger.QueryWithLedger(
query, population_size, selection_probability)
record1 = [1.0, [12.0, 9.0]]
record2 = [5.0, [1.0, 2.0]]
@ -113,7 +111,7 @@ class PrivacyLedgerTest(tf.test.TestCase):
test_utils.run_query(query, [record1, record2])
expected_queries = [[4.0, 2.0], [5.0, 1.0]]
formatted = ledger.get_formatted_ledger_eager()
formatted = query.ledger.get_formatted_ledger_eager()
sample_1 = formatted[0]
self.assertAllClose(sample_1.population_size, 10.0)
self.assertAllClose(sample_1.selection_probability, 0.1)
@ -124,7 +122,7 @@ class PrivacyLedgerTest(tf.test.TestCase):
tf.assign(selection_probability, 0.2)
test_utils.run_query(query, [record1, record2])
formatted = ledger.get_formatted_ledger_eager()
formatted = query.ledger.get_formatted_ledger_eager()
sample_1, sample_2 = formatted
self.assertAllClose(sample_1.population_size, 10.0)
self.assertAllClose(sample_1.selection_probability, 0.1)

View file

@ -307,7 +307,7 @@ def compute_rdp_from_ledger(ledger, orders):
Returns:
RDP at all orders, can be np.inf.
"""
total_rdp = 0
total_rdp = np.zeros_like(orders, dtype=float)
for sample in ledger:
# Compute equivalent z from l2_clip_bounds and noise stddevs in sample.
# See https://arxiv.org/pdf/1812.06210.pdf for derivation of this formula.

View file

@ -61,6 +61,16 @@ class DPQuery(object):
__metaclass__ = abc.ABCMeta
def set_ledger(self, ledger):
"""Supplies privacy ledger to which the query can record privacy events.
Args:
ledger: A `PrivacyLedger`.
"""
del ledger
raise TypeError(
'DPQuery type %s does not support set_ledger.' % type(self).__name__)
def initial_global_state(self):
"""Returns the initial global state for the DPQuery."""
return ()

View file

@ -43,17 +43,19 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery):
_GlobalState = collections.namedtuple(
'_GlobalState', ['l2_norm_clip', 'stddev'])
def __init__(self, l2_norm_clip, stddev, ledger=None):
def __init__(self, l2_norm_clip, stddev):
"""Initializes the GaussianSumQuery.
Args:
l2_norm_clip: The clipping norm to apply to the global norm of each
record.
stddev: The stddev of the noise added to the sum.
ledger: The privacy ledger to which queries should be recorded.
"""
self._l2_norm_clip = l2_norm_clip
self._stddev = stddev
self._ledger = None
def set_ledger(self, ledger):
self._ledger = ledger
def make_global_state(self, l2_norm_clip, stddev):
@ -68,16 +70,8 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery):
return global_state.l2_norm_clip
def initial_sample_state(self, global_state, template):
if self._ledger:
dependencies = [
self._ledger.record_sum_query(
global_state.l2_norm_clip, global_state.stddev)
]
else:
dependencies = []
with tf.control_dependencies(dependencies):
return nest.map_structure(
dp_query.zeros_like, template)
return nest.map_structure(
dp_query.zeros_like, template)
def preprocess_record_impl(self, params, record):
"""Clips the l2 norm, returning the clipped record and the l2 norm.
@ -110,7 +104,15 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery):
def add_noise(v):
return v + random_normal(tf.shape(v))
return nest.map_structure(add_noise, sample_state), global_state
if self._ledger:
dependencies = [
self._ledger.record_sum_query(
global_state.l2_norm_clip, global_state.stddev)
]
else:
dependencies = []
with tf.control_dependencies(dependencies):
return nest.map_structure(add_noise, sample_state), global_state
class GaussianAverageQuery(normalized_query.NormalizedQuery):
@ -127,8 +129,7 @@ class GaussianAverageQuery(normalized_query.NormalizedQuery):
def __init__(self,
l2_norm_clip,
sum_stddev,
denominator,
ledger=None):
denominator):
"""Initializes the GaussianAverageQuery.
Args:
@ -138,8 +139,7 @@ class GaussianAverageQuery(normalized_query.NormalizedQuery):
normalization).
denominator: The normalization constant (applied after noise is added to
the sum).
ledger: The privacy ledger to which queries should be recorded.
"""
super(GaussianAverageQuery, self).__init__(
numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev, ledger),
numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev),
denominator=denominator)

View file

@ -62,6 +62,9 @@ class NestedQuery(dp_query.DPQuery):
return nest.map_structure_up_to(
self._queries, caller, self._queries, *inputs)
def set_ledger(self, ledger):
self._map_to_queries('set_ledger', ledger=ledger)
def initial_global_state(self):
"""See base class."""
return self._map_to_queries('initial_global_state')

View file

@ -67,5 +67,6 @@ class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery):
"""See base class."""
sum_state, denominator = sample_state
return nest.map_structure(
lambda t: tf.truediv(t, denominator), sum_state), global_state
return (
nest.map_structure(lambda t: t / denominator, sum_state),
global_state)

View file

@ -51,6 +51,10 @@ class NormalizedQuery(dp_query.DPQuery):
self._numerator = numerator_query
self._denominator = denominator
def set_ledger(self, ledger):
"""See base class."""
self._numerator.set_ledger(ledger)
def initial_global_state(self):
"""See base class."""
if self._denominator is not None:
@ -92,7 +96,3 @@ class NormalizedQuery(dp_query.DPQuery):
def merge_sample_states(self, sample_state_1, sample_state_2):
"""See base class."""
return self._numerator.merge_sample_states(sample_state_1, sample_state_2)
def set_denominator(self, global_state, denominator):
"""Returns an updated global_state with the given denominator."""
return global_state._replace(denominator=tf.cast(denominator, tf.float32))

View file

@ -68,8 +68,7 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery):
target_unclipped_quantile,
learning_rate,
clipped_count_stddev,
expected_num_records,
ledger=None):
expected_num_records):
"""Initializes the QuantileAdaptiveClipSumQuery.
Args:
@ -87,7 +86,6 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery):
should be about 0.5 for reasonable privacy.
expected_num_records: The expected number of records per round, used to
estimate the clipped count quantile.
ledger: The privacy ledger to which queries should be recorded.
"""
self._initial_l2_norm_clip = initial_l2_norm_clip
self._noise_multiplier = noise_multiplier
@ -95,8 +93,7 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery):
self._learning_rate = learning_rate
# Initialize sum query's global state with None, to be set later.
self._sum_query = gaussian_query.GaussianSumQuery(
None, None, ledger)
self._sum_query = gaussian_query.GaussianSumQuery(None, None)
# self._clipped_fraction_query is a DPQuery used to estimate the fraction of
# records that are clipped. It accumulates an indicator 0/1 of whether each
@ -110,8 +107,12 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery):
self._clipped_fraction_query = gaussian_query.GaussianAverageQuery(
l2_norm_clip=0.5,
sum_stddev=clipped_count_stddev,
denominator=expected_num_records,
ledger=ledger)
denominator=expected_num_records)
def set_ledger(self, ledger):
"""See base class."""
self._sum_query.set_ledger(ledger)
self._clipped_fraction_query.set_ledger(ledger)
def initial_global_state(self):
"""See base class."""
@ -252,8 +253,7 @@ class QuantileAdaptiveClipAverageQuery(normalized_query.NormalizedQuery):
target_unclipped_quantile,
learning_rate,
clipped_count_stddev,
expected_num_records,
ledger=None):
expected_num_records):
"""Initializes the AdaptiveClipAverageQuery.
Args:
@ -272,7 +272,6 @@ class QuantileAdaptiveClipAverageQuery(normalized_query.NormalizedQuery):
should be about 0.5 for reasonable privacy.
expected_num_records: The expected number of records, used to estimate the
clipped count quantile.
ledger: The privacy ledger to which queries should be recorded.
"""
numerator_query = QuantileAdaptiveClipSumQuery(
initial_l2_norm_clip,
@ -280,8 +279,7 @@ class QuantileAdaptiveClipAverageQuery(normalized_query.NormalizedQuery):
target_unclipped_quantile,
learning_rate,
clipped_count_stddev,
expected_num_records,
ledger)
expected_num_records)
super(QuantileAdaptiveClipAverageQuery, self).__init__(
numerator_query=numerator_query,
denominator=denominator)

View file

@ -251,8 +251,6 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
population_size = tf.Variable(0)
selection_probability = tf.Variable(1.0)
ledger = privacy_ledger.PrivacyLedger(
population_size, selection_probability)
query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery(
initial_l2_norm_clip=10.0,
@ -260,10 +258,10 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
target_unclipped_quantile=0.0,
learning_rate=1.0,
clipped_count_stddev=0.0,
expected_num_records=2.0,
ledger=ledger)
expected_num_records=2.0)
query = privacy_ledger.QueryWithLedger(query, ledger)
query = privacy_ledger.QueryWithLedger(
query, population_size, selection_probability)
# First sample.
tf.assign(population_size, 10)
@ -271,7 +269,7 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
_, global_state = test_utils.run_query(query, [record1, record2])
expected_queries = [[10.0, 10.0], [0.5, 0.0]]
formatted = ledger.get_formatted_ledger_eager()
formatted = query.ledger.get_formatted_ledger_eager()
sample_1 = formatted[0]
self.assertAllClose(sample_1.population_size, 10.0)
self.assertAllClose(sample_1.selection_probability, 0.1)
@ -282,7 +280,7 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
tf.assign(selection_probability, 0.2)
test_utils.run_query(query, [record1, record2], global_state)
formatted = ledger.get_formatted_ledger_eager()
formatted = query.ledger.get_formatted_ledger_eager()
sample_1, sample_2 = formatted
self.assertAllClose(sample_1.population_size, 10.0)
self.assertAllClose(sample_1.selection_probability, 0.1)

View file

@ -23,6 +23,11 @@ import tensorflow as tf
from privacy.analysis import privacy_ledger
from privacy.dp_query import gaussian_query
if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
nest = tf.contrib.framework.nest
else:
nest = tf.nest
def make_optimizer_class(cls):
"""Constructs a DP optimizer class from an existing one."""
@ -46,7 +51,7 @@ def make_optimizer_class(cls):
def __init__(
self,
dp_average_query,
dp_sum_query,
num_microbatches=None,
unroll_microbatches=False,
*args, # pylint: disable=keyword-arg-before-vararg, g-doc-args
@ -54,7 +59,7 @@ def make_optimizer_class(cls):
"""Initialize the DPOptimizerClass.
Args:
dp_average_query: DPQuery object, specifying differential privacy
dp_sum_query: DPQuery object, specifying differential privacy
mechanism to use.
num_microbatches: How many microbatches into which the minibatch is
split. If None, will default to the size of the minibatch, and
@ -64,9 +69,9 @@ def make_optimizer_class(cls):
raises an exception.
"""
super(DPOptimizerClass, self).__init__(*args, **kwargs)
self._dp_average_query = dp_average_query
self._dp_sum_query = dp_sum_query
self._num_microbatches = num_microbatches
self._global_state = self._dp_average_query.initial_global_state()
self._global_state = self._dp_sum_query.initial_global_state()
# TODO(b/122613513): Set unroll_microbatches=True to avoid this bug.
# Beware: When num_microbatches is large (>100), enabling this parameter
# may cause an OOM error.
@ -88,31 +93,34 @@ def make_optimizer_class(cls):
vector_loss = loss()
if self._num_microbatches is None:
self._num_microbatches = tf.shape(vector_loss)[0]
self._global_state = self._dp_average_query.set_denominator(
self._global_state,
self._num_microbatches)
sample_state = self._dp_average_query.initial_sample_state(
if isinstance(self._dp_sum_query, privacy_ledger.QueryWithLedger):
self._dp_sum_query.set_batch_size(self._num_microbatches)
sample_state = self._dp_sum_query.initial_sample_state(
self._global_state, var_list)
microbatches_losses = tf.reshape(vector_loss,
[self._num_microbatches, -1])
sample_params = (
self._dp_average_query.derive_sample_params(self._global_state))
self._dp_sum_query.derive_sample_params(self._global_state))
def process_microbatch(i, sample_state):
"""Process one microbatch (record) with privacy helper."""
microbatch_loss = tf.reduce_mean(tf.gather(microbatches_losses, [i]))
grads = gradient_tape.gradient(microbatch_loss, var_list)
sample_state = self._dp_average_query.accumulate_record(sample_params,
sample_state,
grads)
sample_state = self._dp_sum_query.accumulate_record(
sample_params, sample_state, grads)
return sample_state
for idx in range(self._num_microbatches):
sample_state = process_microbatch(idx, sample_state)
final_grads, self._global_state = (
self._dp_average_query.get_noised_result(sample_state,
self._global_state))
grad_sums, self._global_state = (
self._dp_sum_query.get_noised_result(
sample_state, self._global_state))
def normalize(v):
return v / tf.cast(self._num_microbatches, tf.float32)
final_grads = nest.map_structure(normalize, grad_sums)
grads_and_vars = list(zip(final_grads, var_list))
return grads_and_vars
@ -128,12 +136,12 @@ def make_optimizer_class(cls):
# sampling from the dataset without replacement.
if self._num_microbatches is None:
self._num_microbatches = tf.shape(loss)[0]
self._global_state = self._dp_average_query.set_denominator(
self._global_state,
self._num_microbatches)
if isinstance(self._dp_sum_query, privacy_ledger.QueryWithLedger):
self._dp_sum_query.set_batch_size(self._num_microbatches)
microbatches_losses = tf.reshape(loss, [self._num_microbatches, -1])
sample_params = (
self._dp_average_query.derive_sample_params(self._global_state))
self._dp_sum_query.derive_sample_params(self._global_state))
def process_microbatch(i, sample_state):
"""Process one microbatch (record) with privacy helper."""
@ -145,7 +153,7 @@ def make_optimizer_class(cls):
g if g is not None else tf.zeros_like(v)
for (g, v) in zip(list(grads), var_list)
]
sample_state = self._dp_average_query.accumulate_record(
sample_state = self._dp_sum_query.accumulate_record(
sample_params, sample_state, grads_list)
return sample_state
@ -154,7 +162,7 @@ def make_optimizer_class(cls):
tf.trainable_variables() + tf.get_collection(
tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
sample_state = self._dp_average_query.initial_sample_state(
sample_state = self._dp_sum_query.initial_sample_state(
self._global_state, var_list)
if self._unroll_microbatches:
@ -169,10 +177,15 @@ def make_optimizer_class(cls):
idx = tf.constant(0)
_, sample_state = tf.while_loop(cond_fn, body_fn, [idx, sample_state])
final_grads, self._global_state = (
self._dp_average_query.get_noised_result(
grad_sums, self._global_state = (
self._dp_sum_query.get_noised_result(
sample_state, self._global_state))
def normalize(v):
return tf.truediv(v, tf.cast(self._num_microbatches, tf.float32))
final_grads = nest.map_structure(normalize, grad_sums)
return list(zip(final_grads, var_list))
return DPOptimizerClass
@ -188,20 +201,20 @@ def make_gaussian_optimizer_class(cls):
self,
l2_norm_clip,
noise_multiplier,
num_microbatches,
ledger,
num_microbatches=None,
ledger=None,
unroll_microbatches=False,
*args, # pylint: disable=keyword-arg-before-vararg
**kwargs):
dp_average_query = gaussian_query.GaussianAverageQuery(
l2_norm_clip, l2_norm_clip * noise_multiplier,
num_microbatches, ledger)
dp_sum_query = gaussian_query.GaussianSumQuery(
l2_norm_clip, l2_norm_clip * noise_multiplier)
if ledger:
dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger)
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query,
ledger=ledger)
super(DPGaussianOptimizerClass, self).__init__(
dp_average_query,
dp_sum_query,
num_microbatches,
unroll_microbatches,
*args,
@ -209,7 +222,7 @@ def make_gaussian_optimizer_class(cls):
@property
def ledger(self):
return self._ledger
return self._dp_sum_query.ledger
return DPGaussianOptimizerClass

View file

@ -53,14 +53,12 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([1.0, 2.0])
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(
1.0e9, 0.0, num_microbatches, ledger)
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
ledger)
dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
dp_sum_query = privacy_ledger.QueryWithLedger(
dp_sum_query, 1e6, num_microbatches / 1e6)
opt = cls(
dp_average_query,
dp_sum_query,
num_microbatches=num_microbatches,
learning_rate=2.0)
@ -84,12 +82,10 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0, 0.0])
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
ledger)
dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0)
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values
@ -110,12 +106,10 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0])
data0 = tf.Variable([[0.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1)
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
ledger)
dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0)
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0)
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values

View file

@ -46,22 +46,18 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
('DPAdagrad 4', dp_optimizer.DPAdagradOptimizer, 4, [-2.5, -2.5]),
('DPAdam 1', dp_optimizer.DPAdamOptimizer, 1, [-2.5, -2.5]),
('DPAdam 2', dp_optimizer.DPAdamOptimizer, 2, [-2.5, -2.5]),
('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5]),
('DPAdam None', dp_optimizer.DPAdamOptimizer, None, [-2.5, -2.5]))
('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5]))
def testBaseline(self, cls, num_microbatches, expected_answer):
with self.cached_session() as sess:
var0 = tf.Variable([1.0, 2.0])
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
ledger = privacy_ledger.PrivacyLedger(
1e6, num_microbatches / 1e6 if num_microbatches else None)
dp_average_query = gaussian_query.GaussianAverageQuery(
1.0e9, 0.0, num_microbatches, ledger)
dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger)
dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
dp_sum_query = privacy_ledger.QueryWithLedger(
dp_sum_query, 1e6, num_microbatches / 1e6)
opt = cls(
dp_average_query,
dp_sum_query,
num_microbatches=num_microbatches,
learning_rate=2.0)
@ -84,12 +80,10 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0, 0.0])
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger)
dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0)
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values
@ -109,12 +103,10 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0])
data0 = tf.Variable([[0.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1)
dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger)
dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0)
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0)
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values
@ -153,12 +145,10 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
vector_loss = tf.squared_difference(labels, preds)
scalar_loss = tf.reduce_mean(vector_loss)
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger)
dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
optimizer = dp_optimizer.DPGradientDescentOptimizer(
dp_average_query,
dp_sum_query,
num_microbatches=1,
learning_rate=1.0)
global_step = tf.train.get_global_step()
@ -198,14 +188,12 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
num_microbatches = 4
ledger = privacy_ledger.PrivacyLedger(
1e6, num_microbatches / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(1.0e9, 0.0, 4)
dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger)
dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
dp_sum_query = privacy_ledger.QueryWithLedger(
dp_sum_query, 1e6, num_microbatches / 1e6)
opt = cls(
dp_average_query,
dp_sum_query,
num_microbatches=num_microbatches,
learning_rate=2.0,
unroll_microbatches=True)
@ -233,8 +221,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
l2_norm_clip=4.0,
noise_multiplier=2.0,
num_microbatches=1,
learning_rate=2.0,
ledger=privacy_ledger.DummyLedger())
learning_rate=2.0)
self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values

View file

@ -94,9 +94,7 @@ def rnn_model_fn(features, labels, mode): # pylint: disable=unused-argument
ledger = privacy_ledger.PrivacyLedger(
population_size=NB_TRAIN,
selection_probability=(FLAGS.batch_size / NB_TRAIN),
max_samples=1e6,
max_queries=1e6)
selection_probability=(FLAGS.batch_size / NB_TRAIN))
optimizer = dp_optimizer.DPAdamGaussianOptimizer(
l2_norm_clip=FLAGS.l2_norm_clip,

View file

@ -26,12 +26,11 @@ import tensorflow as tf
from privacy.analysis.rdp_accountant import compute_rdp
from privacy.analysis.rdp_accountant import get_privacy_spent
from privacy.dp_query.gaussian_query import GaussianAverageQuery
from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
GradientDescentOptimizer = tf.train.GradientDescentOptimizer
tf.compat.v1.enable_eager_execution()
tf.enable_eager_execution()
else:
GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name
@ -64,6 +63,9 @@ def compute_epsilon(steps):
def main(_):
if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
raise ValueError('Number of microbatches should divide evenly batch_size')
# Fetch the mnist data
train, test = tf.keras.datasets.mnist.load_data()
train_images, train_labels = train
@ -97,13 +99,10 @@ def main(_):
# Instantiate the optimizer
if FLAGS.dpsgd:
dp_average_query = GaussianAverageQuery(
FLAGS.l2_norm_clip,
FLAGS.l2_norm_clip * FLAGS.noise_multiplier,
FLAGS.microbatches)
opt = DPGradientDescentOptimizer(
dp_average_query,
FLAGS.microbatches,
opt = DPGradientDescentGaussianOptimizer(
l2_norm_clip=FLAGS.l2_norm_clip,
noise_multiplier=FLAGS.noise_multiplier,
num_microbatches=FLAGS.microbatches,
learning_rate=FLAGS.learning_rate)
else:
opt = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
@ -145,7 +144,7 @@ def main(_):
# Compute the privacy budget expended so far.
if FLAGS.dpsgd:
eps = compute_epsilon(epoch * steps_per_epoch)
eps = compute_epsilon((epoch + 1) * steps_per_epoch)
print('For delta=1e-5, the current epsilon is: %.2f' % eps)
else:
print('Trained with vanilla non-private SGD optimizer')

View file

@ -27,8 +27,7 @@ import tensorflow as tf
from privacy.analysis.rdp_accountant import compute_rdp
from privacy.analysis.rdp_accountant import get_privacy_spent
from privacy.dp_query.gaussian_query import GaussianAverageQuery
from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
GradientDescentOptimizer = tf.train.GradientDescentOptimizer
@ -42,10 +41,10 @@ flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training')
flags.DEFINE_float('noise_multiplier', 1.1,
'Ratio of the standard deviation to the clipping norm')
flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
flags.DEFINE_integer('batch_size', 250, 'Batch size')
flags.DEFINE_integer('batch_size', 256, 'Batch size')
flags.DEFINE_integer('epochs', 60, 'Number of epochs')
flags.DEFINE_integer(
'microbatches', 250, 'Number of microbatches '
'microbatches', 256, 'Number of microbatches '
'(must evenly divide batch_size)')
flags.DEFINE_string('model_dir', None, 'Model directory')
@ -119,13 +118,10 @@ def main(unused_argv):
])
if FLAGS.dpsgd:
dp_average_query = GaussianAverageQuery(
FLAGS.l2_norm_clip,
FLAGS.l2_norm_clip * FLAGS.noise_multiplier,
FLAGS.microbatches)
optimizer = DPGradientDescentOptimizer(
dp_average_query,
FLAGS.microbatches,
optimizer = DPGradientDescentGaussianOptimizer(
l2_norm_clip=FLAGS.l2_norm_clip,
noise_multiplier=FLAGS.noise_multiplier,
num_microbatches=FLAGS.num_microbatches,
learning_rate=FLAGS.learning_rate,
unroll_microbatches=True)
# Compute vector of per-example loss rather than its mean over a minibatch.