Remove set_denominator functions from DPQuery and make QueryWithLedger easier to use.

set_denominator was added so that the batch size doesn't need to be specified before constructing the optimizer, but it breaks the DPQuery abstraction. Now the optimizer uses a GaussianSumQuery instead of GaussianAverageQuery, and normalization by batch size is done inside the optimizer.

Also instead of creating all DPQueries with a PrivacyLedger and then wrapping with QueryWithLedger, it is now sufficient to create the queries with no ledger and QueryWithLedger will construct the ledger and pass it to all inner queries.

PiperOrigin-RevId: 251462353
This commit is contained in:
Galen Andrew 2019-06-04 10:14:09 -07:00 committed by A. Unique TensorFlower
parent 7636945566
commit d5dcfec745
17 changed files with 202 additions and 220 deletions

View file

@ -20,7 +20,6 @@ import sys
if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts. if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts.
pass pass
else: else:
from privacy.analysis.privacy_ledger import DummyLedger
from privacy.analysis.privacy_ledger import GaussianSumQueryEntry from privacy.analysis.privacy_ledger import GaussianSumQueryEntry
from privacy.analysis.privacy_ledger import PrivacyLedger from privacy.analysis.privacy_ledger import PrivacyLedger
from privacy.analysis.privacy_ledger import QueryWithLedger from privacy.analysis.privacy_ledger import QueryWithLedger

View file

@ -65,44 +65,39 @@ class PrivacyLedger(object):
def __init__(self, def __init__(self,
population_size, population_size,
selection_probability=None): selection_probability):
"""Initialize the PrivacyLedger. """Initialize the PrivacyLedger.
Args: Args:
population_size: An integer (may be variable) specifying the size of the population_size: An integer (may be variable) specifying the size of the
population, i.e. size of the training data used in each epoch. population, i.e. size of the training data used in each epoch.
selection_probability: A float (may be variable) specifying the selection_probability: A float (may be variable) specifying the
probability each record is included in a sample. If None, it can be set probability each record is included in a sample.
later with set_sample_size.
Raises: Raises:
ValueError: If selection_probability is 0. ValueError: If selection_probability is 0.
""" """
self._population_size = population_size self._population_size = population_size
self._selection_probability = selection_probability self._selection_probability = selection_probability
if selection_probability is None:
init_capacity_samples = 1
else:
if tf.executing_eagerly(): if tf.executing_eagerly():
if tf.equal(selection_probability, 0): if tf.equal(selection_probability, 0):
raise ValueError('Selection probability cannot be 0.') raise ValueError('Selection probability cannot be 0.')
init_capacity_samples = tf.cast(tf.ceil(1 / selection_probability), init_capacity = tf.cast(tf.ceil(1 / selection_probability), tf.int32)
tf.int32)
else: else:
if selection_probability == 0: if selection_probability == 0:
raise ValueError('Selection probability cannot be 0.') raise ValueError('Selection probability cannot be 0.')
init_capacity_samples = np.int(np.ceil(1 / selection_probability)) init_capacity = np.int(np.ceil(1 / selection_probability))
init_capacity_queries = init_capacity_samples
# The query buffer stores rows corresponding to GaussianSumQueryEntries. # The query buffer stores rows corresponding to GaussianSumQueryEntries.
self._query_buffer = tensor_buffer.TensorBuffer( self._query_buffer = tensor_buffer.TensorBuffer(
init_capacity_queries, [3], tf.float32, 'query') init_capacity, [3], tf.float32, 'query')
self._sample_var = tf.Variable( self._sample_var = tf.Variable(
initial_value=tf.zeros([3]), trainable=False, name='sample') initial_value=tf.zeros([3]), trainable=False, name='sample')
# The sample buffer stores rows corresponding to SampleEntries. # The sample buffer stores rows corresponding to SampleEntries.
self._sample_buffer = tensor_buffer.TensorBuffer( self._sample_buffer = tensor_buffer.TensorBuffer(
init_capacity_samples, [3], tf.float32, 'sample') init_capacity, [3], tf.float32, 'sample')
self._sample_count = tf.Variable( self._sample_count = tf.Variable(
initial_value=0.0, trainable=False, name='sample_count') initial_value=0.0, trainable=False, name='sample_count')
self._query_count = tf.Variable( self._query_count = tf.Variable(
@ -175,39 +170,6 @@ class PrivacyLedger(object):
return format_ledger(sample_array, query_array) return format_ledger(sample_array, query_array)
def set_sample_size(self, batch_size):
self._selection_probability = tf.cast(batch_size,
tf.float32) / self._population_size
class DummyLedger(object):
"""A ledger that records nothing.
This ledger may be passed in place of a normal PrivacyLedger in case privacy
accounting is to be handled externally.
"""
def record_sum_query(self, l2_norm_bound, noise_stddev):
del l2_norm_bound
del noise_stddev
return tf.no_op()
def finalize_sample(self):
return tf.no_op()
def get_unformatted_ledger(self):
empty_array = tf.zeros(shape=[0, 3])
return empty_array, empty_array
def get_formatted_ledger(self, sess):
del sess
empty_array = np.zeros(shape=[0, 3])
return empty_array, empty_array
def get_formatted_ledger_eager(self):
empty_array = np.zeros(shape=[0, 3])
return empty_array, empty_array
class QueryWithLedger(dp_query.DPQuery): class QueryWithLedger(dp_query.DPQuery):
"""A class for DP queries that record events to a PrivacyLedger. """A class for DP queries that record events to a PrivacyLedger.
@ -221,17 +183,40 @@ class QueryWithLedger(dp_query.DPQuery):
For example usage, see privacy_ledger_test.py. For example usage, see privacy_ledger_test.py.
""" """
def __init__(self, query, ledger): def __init__(self, query,
population_size=None, selection_probability=None,
ledger=None):
"""Initializes the QueryWithLedger. """Initializes the QueryWithLedger.
Args: Args:
query: The query whose events should be recorded to the ledger. Any query: The query whose events should be recorded to the ledger. Any
subqueries (including those in the leaves of a nested query) should also subqueries (including those in the leaves of a nested query) should also
contain a reference to the same ledger given here. contain a reference to the same ledger given here.
ledger: A PrivacyLedger to which privacy events should be recorded. population_size: An integer (may be variable) specifying the size of the
population, i.e. size of the training data used in each epoch. May be
None if `ledger` is specified.
selection_probability: A float (may be variable) specifying the
probability each record is included in a sample. May be None if `ledger`
is specified.
ledger: A PrivacyLedger to use. Must be specified if either of
`population_size` or `selection_probability` is None.
""" """
self._query = query self._query = query
if population_size is not None and selection_probability is not None:
self.set_ledger(PrivacyLedger(population_size, selection_probability))
elif ledger is not None:
self.set_ledger(ledger)
else:
raise ValueError('One of (population_size, selection_probability) or '
'ledger must be specified.')
@property
def ledger(self):
return self._ledger
def set_ledger(self, ledger):
self._ledger = ledger self._ledger = ledger
self._query.set_ledger(ledger)
def initial_global_state(self): def initial_global_state(self):
"""See base class.""" """See base class."""
@ -260,10 +245,13 @@ class QueryWithLedger(dp_query.DPQuery):
def get_noised_result(self, sample_state, global_state): def get_noised_result(self, sample_state, global_state):
"""Ensures sample is recorded to the ledger and returns noised result.""" """Ensures sample is recorded to the ledger and returns noised result."""
# Ensure sample_state is fully aggregated before calling get_noised_result.
with tf.control_dependencies(nest.flatten(sample_state)): with tf.control_dependencies(nest.flatten(sample_state)):
with tf.control_dependencies([self._ledger.finalize_sample()]): result, new_global_state = self._query.get_noised_result(
return self._query.get_noised_result(sample_state, global_state) sample_state, global_state)
# Ensure inner queries have recorded before finalizing.
def set_denominator(self, global_state, num_microbatches, microbatch_size=1): with tf.control_dependencies(nest.flatten(result)):
self._ledger.set_sample_size(num_microbatches * microbatch_size) finalize = self._ledger.finalize_sample()
return self._query.set_denominator(global_state, num_microbatches) # Ensure finalizing happens.
with tf.control_dependencies([finalize]):
return nest.map_structure(tf.identity, result), new_global_state

View file

@ -56,12 +56,11 @@ class PrivacyLedgerTest(tf.test.TestCase):
population_size = tf.Variable(0) population_size = tf.Variable(0)
selection_probability = tf.Variable(1.0) selection_probability = tf.Variable(1.0)
ledger = privacy_ledger.PrivacyLedger(
population_size, selection_probability)
query = gaussian_query.GaussianSumQuery( query = gaussian_query.GaussianSumQuery(
l2_norm_clip=10.0, stddev=0.0, ledger=ledger) l2_norm_clip=10.0, stddev=0.0)
query = privacy_ledger.QueryWithLedger(query, ledger) query = privacy_ledger.QueryWithLedger(
query, population_size, selection_probability)
# First sample. # First sample.
tf.assign(population_size, 10) tf.assign(population_size, 10)
@ -69,7 +68,7 @@ class PrivacyLedgerTest(tf.test.TestCase):
test_utils.run_query(query, [record1, record2]) test_utils.run_query(query, [record1, record2])
expected_queries = [[10.0, 0.0]] expected_queries = [[10.0, 0.0]]
formatted = ledger.get_formatted_ledger_eager() formatted = query.ledger.get_formatted_ledger_eager()
sample_1 = formatted[0] sample_1 = formatted[0]
self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.population_size, 10.0)
self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.selection_probability, 0.1)
@ -80,7 +79,7 @@ class PrivacyLedgerTest(tf.test.TestCase):
tf.assign(selection_probability, 0.2) tf.assign(selection_probability, 0.2)
test_utils.run_query(query, [record1, record2]) test_utils.run_query(query, [record1, record2])
formatted = ledger.get_formatted_ledger_eager() formatted = query.ledger.get_formatted_ledger_eager()
sample_1, sample_2 = formatted sample_1, sample_2 = formatted
self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.population_size, 10.0)
self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.selection_probability, 0.1)
@ -93,16 +92,15 @@ class PrivacyLedgerTest(tf.test.TestCase):
def test_nested_query(self): def test_nested_query(self):
population_size = tf.Variable(0) population_size = tf.Variable(0)
selection_probability = tf.Variable(1.0) selection_probability = tf.Variable(1.0)
ledger = privacy_ledger.PrivacyLedger(
population_size, selection_probability)
query1 = gaussian_query.GaussianAverageQuery( query1 = gaussian_query.GaussianAverageQuery(
l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0, ledger=ledger) l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0)
query2 = gaussian_query.GaussianAverageQuery( query2 = gaussian_query.GaussianAverageQuery(
l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0, ledger=ledger) l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0)
query = nested_query.NestedQuery([query1, query2]) query = nested_query.NestedQuery([query1, query2])
query = privacy_ledger.QueryWithLedger(query, ledger) query = privacy_ledger.QueryWithLedger(
query, population_size, selection_probability)
record1 = [1.0, [12.0, 9.0]] record1 = [1.0, [12.0, 9.0]]
record2 = [5.0, [1.0, 2.0]] record2 = [5.0, [1.0, 2.0]]
@ -113,7 +111,7 @@ class PrivacyLedgerTest(tf.test.TestCase):
test_utils.run_query(query, [record1, record2]) test_utils.run_query(query, [record1, record2])
expected_queries = [[4.0, 2.0], [5.0, 1.0]] expected_queries = [[4.0, 2.0], [5.0, 1.0]]
formatted = ledger.get_formatted_ledger_eager() formatted = query.ledger.get_formatted_ledger_eager()
sample_1 = formatted[0] sample_1 = formatted[0]
self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.population_size, 10.0)
self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.selection_probability, 0.1)
@ -124,7 +122,7 @@ class PrivacyLedgerTest(tf.test.TestCase):
tf.assign(selection_probability, 0.2) tf.assign(selection_probability, 0.2)
test_utils.run_query(query, [record1, record2]) test_utils.run_query(query, [record1, record2])
formatted = ledger.get_formatted_ledger_eager() formatted = query.ledger.get_formatted_ledger_eager()
sample_1, sample_2 = formatted sample_1, sample_2 = formatted
self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.population_size, 10.0)
self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.selection_probability, 0.1)

View file

@ -307,7 +307,7 @@ def compute_rdp_from_ledger(ledger, orders):
Returns: Returns:
RDP at all orders, can be np.inf. RDP at all orders, can be np.inf.
""" """
total_rdp = 0 total_rdp = np.zeros_like(orders, dtype=float)
for sample in ledger: for sample in ledger:
# Compute equivalent z from l2_clip_bounds and noise stddevs in sample. # Compute equivalent z from l2_clip_bounds and noise stddevs in sample.
# See https://arxiv.org/pdf/1812.06210.pdf for derivation of this formula. # See https://arxiv.org/pdf/1812.06210.pdf for derivation of this formula.

View file

@ -61,6 +61,16 @@ class DPQuery(object):
__metaclass__ = abc.ABCMeta __metaclass__ = abc.ABCMeta
def set_ledger(self, ledger):
"""Supplies privacy ledger to which the query can record privacy events.
Args:
ledger: A `PrivacyLedger`.
"""
del ledger
raise TypeError(
'DPQuery type %s does not support set_ledger.' % type(self).__name__)
def initial_global_state(self): def initial_global_state(self):
"""Returns the initial global state for the DPQuery.""" """Returns the initial global state for the DPQuery."""
return () return ()

View file

@ -43,17 +43,19 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery):
_GlobalState = collections.namedtuple( _GlobalState = collections.namedtuple(
'_GlobalState', ['l2_norm_clip', 'stddev']) '_GlobalState', ['l2_norm_clip', 'stddev'])
def __init__(self, l2_norm_clip, stddev, ledger=None): def __init__(self, l2_norm_clip, stddev):
"""Initializes the GaussianSumQuery. """Initializes the GaussianSumQuery.
Args: Args:
l2_norm_clip: The clipping norm to apply to the global norm of each l2_norm_clip: The clipping norm to apply to the global norm of each
record. record.
stddev: The stddev of the noise added to the sum. stddev: The stddev of the noise added to the sum.
ledger: The privacy ledger to which queries should be recorded.
""" """
self._l2_norm_clip = l2_norm_clip self._l2_norm_clip = l2_norm_clip
self._stddev = stddev self._stddev = stddev
self._ledger = None
def set_ledger(self, ledger):
self._ledger = ledger self._ledger = ledger
def make_global_state(self, l2_norm_clip, stddev): def make_global_state(self, l2_norm_clip, stddev):
@ -68,14 +70,6 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery):
return global_state.l2_norm_clip return global_state.l2_norm_clip
def initial_sample_state(self, global_state, template): def initial_sample_state(self, global_state, template):
if self._ledger:
dependencies = [
self._ledger.record_sum_query(
global_state.l2_norm_clip, global_state.stddev)
]
else:
dependencies = []
with tf.control_dependencies(dependencies):
return nest.map_structure( return nest.map_structure(
dp_query.zeros_like, template) dp_query.zeros_like, template)
@ -110,6 +104,14 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery):
def add_noise(v): def add_noise(v):
return v + random_normal(tf.shape(v)) return v + random_normal(tf.shape(v))
if self._ledger:
dependencies = [
self._ledger.record_sum_query(
global_state.l2_norm_clip, global_state.stddev)
]
else:
dependencies = []
with tf.control_dependencies(dependencies):
return nest.map_structure(add_noise, sample_state), global_state return nest.map_structure(add_noise, sample_state), global_state
@ -127,8 +129,7 @@ class GaussianAverageQuery(normalized_query.NormalizedQuery):
def __init__(self, def __init__(self,
l2_norm_clip, l2_norm_clip,
sum_stddev, sum_stddev,
denominator, denominator):
ledger=None):
"""Initializes the GaussianAverageQuery. """Initializes the GaussianAverageQuery.
Args: Args:
@ -138,8 +139,7 @@ class GaussianAverageQuery(normalized_query.NormalizedQuery):
normalization). normalization).
denominator: The normalization constant (applied after noise is added to denominator: The normalization constant (applied after noise is added to
the sum). the sum).
ledger: The privacy ledger to which queries should be recorded.
""" """
super(GaussianAverageQuery, self).__init__( super(GaussianAverageQuery, self).__init__(
numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev, ledger), numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev),
denominator=denominator) denominator=denominator)

View file

@ -62,6 +62,9 @@ class NestedQuery(dp_query.DPQuery):
return nest.map_structure_up_to( return nest.map_structure_up_to(
self._queries, caller, self._queries, *inputs) self._queries, caller, self._queries, *inputs)
def set_ledger(self, ledger):
self._map_to_queries('set_ledger', ledger=ledger)
def initial_global_state(self): def initial_global_state(self):
"""See base class.""" """See base class."""
return self._map_to_queries('initial_global_state') return self._map_to_queries('initial_global_state')

View file

@ -67,5 +67,6 @@ class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery):
"""See base class.""" """See base class."""
sum_state, denominator = sample_state sum_state, denominator = sample_state
return nest.map_structure( return (
lambda t: tf.truediv(t, denominator), sum_state), global_state nest.map_structure(lambda t: t / denominator, sum_state),
global_state)

View file

@ -51,6 +51,10 @@ class NormalizedQuery(dp_query.DPQuery):
self._numerator = numerator_query self._numerator = numerator_query
self._denominator = denominator self._denominator = denominator
def set_ledger(self, ledger):
"""See base class."""
self._numerator.set_ledger(ledger)
def initial_global_state(self): def initial_global_state(self):
"""See base class.""" """See base class."""
if self._denominator is not None: if self._denominator is not None:
@ -92,7 +96,3 @@ class NormalizedQuery(dp_query.DPQuery):
def merge_sample_states(self, sample_state_1, sample_state_2): def merge_sample_states(self, sample_state_1, sample_state_2):
"""See base class.""" """See base class."""
return self._numerator.merge_sample_states(sample_state_1, sample_state_2) return self._numerator.merge_sample_states(sample_state_1, sample_state_2)
def set_denominator(self, global_state, denominator):
"""Returns an updated global_state with the given denominator."""
return global_state._replace(denominator=tf.cast(denominator, tf.float32))

View file

@ -68,8 +68,7 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery):
target_unclipped_quantile, target_unclipped_quantile,
learning_rate, learning_rate,
clipped_count_stddev, clipped_count_stddev,
expected_num_records, expected_num_records):
ledger=None):
"""Initializes the QuantileAdaptiveClipSumQuery. """Initializes the QuantileAdaptiveClipSumQuery.
Args: Args:
@ -87,7 +86,6 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery):
should be about 0.5 for reasonable privacy. should be about 0.5 for reasonable privacy.
expected_num_records: The expected number of records per round, used to expected_num_records: The expected number of records per round, used to
estimate the clipped count quantile. estimate the clipped count quantile.
ledger: The privacy ledger to which queries should be recorded.
""" """
self._initial_l2_norm_clip = initial_l2_norm_clip self._initial_l2_norm_clip = initial_l2_norm_clip
self._noise_multiplier = noise_multiplier self._noise_multiplier = noise_multiplier
@ -95,8 +93,7 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery):
self._learning_rate = learning_rate self._learning_rate = learning_rate
# Initialize sum query's global state with None, to be set later. # Initialize sum query's global state with None, to be set later.
self._sum_query = gaussian_query.GaussianSumQuery( self._sum_query = gaussian_query.GaussianSumQuery(None, None)
None, None, ledger)
# self._clipped_fraction_query is a DPQuery used to estimate the fraction of # self._clipped_fraction_query is a DPQuery used to estimate the fraction of
# records that are clipped. It accumulates an indicator 0/1 of whether each # records that are clipped. It accumulates an indicator 0/1 of whether each
@ -110,8 +107,12 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery):
self._clipped_fraction_query = gaussian_query.GaussianAverageQuery( self._clipped_fraction_query = gaussian_query.GaussianAverageQuery(
l2_norm_clip=0.5, l2_norm_clip=0.5,
sum_stddev=clipped_count_stddev, sum_stddev=clipped_count_stddev,
denominator=expected_num_records, denominator=expected_num_records)
ledger=ledger)
def set_ledger(self, ledger):
"""See base class."""
self._sum_query.set_ledger(ledger)
self._clipped_fraction_query.set_ledger(ledger)
def initial_global_state(self): def initial_global_state(self):
"""See base class.""" """See base class."""
@ -252,8 +253,7 @@ class QuantileAdaptiveClipAverageQuery(normalized_query.NormalizedQuery):
target_unclipped_quantile, target_unclipped_quantile,
learning_rate, learning_rate,
clipped_count_stddev, clipped_count_stddev,
expected_num_records, expected_num_records):
ledger=None):
"""Initializes the AdaptiveClipAverageQuery. """Initializes the AdaptiveClipAverageQuery.
Args: Args:
@ -272,7 +272,6 @@ class QuantileAdaptiveClipAverageQuery(normalized_query.NormalizedQuery):
should be about 0.5 for reasonable privacy. should be about 0.5 for reasonable privacy.
expected_num_records: The expected number of records, used to estimate the expected_num_records: The expected number of records, used to estimate the
clipped count quantile. clipped count quantile.
ledger: The privacy ledger to which queries should be recorded.
""" """
numerator_query = QuantileAdaptiveClipSumQuery( numerator_query = QuantileAdaptiveClipSumQuery(
initial_l2_norm_clip, initial_l2_norm_clip,
@ -280,8 +279,7 @@ class QuantileAdaptiveClipAverageQuery(normalized_query.NormalizedQuery):
target_unclipped_quantile, target_unclipped_quantile,
learning_rate, learning_rate,
clipped_count_stddev, clipped_count_stddev,
expected_num_records, expected_num_records)
ledger)
super(QuantileAdaptiveClipAverageQuery, self).__init__( super(QuantileAdaptiveClipAverageQuery, self).__init__(
numerator_query=numerator_query, numerator_query=numerator_query,
denominator=denominator) denominator=denominator)

View file

@ -251,8 +251,6 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
population_size = tf.Variable(0) population_size = tf.Variable(0)
selection_probability = tf.Variable(1.0) selection_probability = tf.Variable(1.0)
ledger = privacy_ledger.PrivacyLedger(
population_size, selection_probability)
query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery(
initial_l2_norm_clip=10.0, initial_l2_norm_clip=10.0,
@ -260,10 +258,10 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
target_unclipped_quantile=0.0, target_unclipped_quantile=0.0,
learning_rate=1.0, learning_rate=1.0,
clipped_count_stddev=0.0, clipped_count_stddev=0.0,
expected_num_records=2.0, expected_num_records=2.0)
ledger=ledger)
query = privacy_ledger.QueryWithLedger(query, ledger) query = privacy_ledger.QueryWithLedger(
query, population_size, selection_probability)
# First sample. # First sample.
tf.assign(population_size, 10) tf.assign(population_size, 10)
@ -271,7 +269,7 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
_, global_state = test_utils.run_query(query, [record1, record2]) _, global_state = test_utils.run_query(query, [record1, record2])
expected_queries = [[10.0, 10.0], [0.5, 0.0]] expected_queries = [[10.0, 10.0], [0.5, 0.0]]
formatted = ledger.get_formatted_ledger_eager() formatted = query.ledger.get_formatted_ledger_eager()
sample_1 = formatted[0] sample_1 = formatted[0]
self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.population_size, 10.0)
self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.selection_probability, 0.1)
@ -282,7 +280,7 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
tf.assign(selection_probability, 0.2) tf.assign(selection_probability, 0.2)
test_utils.run_query(query, [record1, record2], global_state) test_utils.run_query(query, [record1, record2], global_state)
formatted = ledger.get_formatted_ledger_eager() formatted = query.ledger.get_formatted_ledger_eager()
sample_1, sample_2 = formatted sample_1, sample_2 = formatted
self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.population_size, 10.0)
self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.selection_probability, 0.1)

View file

@ -23,6 +23,11 @@ import tensorflow as tf
from privacy.analysis import privacy_ledger from privacy.analysis import privacy_ledger
from privacy.dp_query import gaussian_query from privacy.dp_query import gaussian_query
if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
nest = tf.contrib.framework.nest
else:
nest = tf.nest
def make_optimizer_class(cls): def make_optimizer_class(cls):
"""Constructs a DP optimizer class from an existing one.""" """Constructs a DP optimizer class from an existing one."""
@ -46,7 +51,7 @@ def make_optimizer_class(cls):
def __init__( def __init__(
self, self,
dp_average_query, dp_sum_query,
num_microbatches=None, num_microbatches=None,
unroll_microbatches=False, unroll_microbatches=False,
*args, # pylint: disable=keyword-arg-before-vararg, g-doc-args *args, # pylint: disable=keyword-arg-before-vararg, g-doc-args
@ -54,7 +59,7 @@ def make_optimizer_class(cls):
"""Initialize the DPOptimizerClass. """Initialize the DPOptimizerClass.
Args: Args:
dp_average_query: DPQuery object, specifying differential privacy dp_sum_query: DPQuery object, specifying differential privacy
mechanism to use. mechanism to use.
num_microbatches: How many microbatches into which the minibatch is num_microbatches: How many microbatches into which the minibatch is
split. If None, will default to the size of the minibatch, and split. If None, will default to the size of the minibatch, and
@ -64,9 +69,9 @@ def make_optimizer_class(cls):
raises an exception. raises an exception.
""" """
super(DPOptimizerClass, self).__init__(*args, **kwargs) super(DPOptimizerClass, self).__init__(*args, **kwargs)
self._dp_average_query = dp_average_query self._dp_sum_query = dp_sum_query
self._num_microbatches = num_microbatches self._num_microbatches = num_microbatches
self._global_state = self._dp_average_query.initial_global_state() self._global_state = self._dp_sum_query.initial_global_state()
# TODO(b/122613513): Set unroll_microbatches=True to avoid this bug. # TODO(b/122613513): Set unroll_microbatches=True to avoid this bug.
# Beware: When num_microbatches is large (>100), enabling this parameter # Beware: When num_microbatches is large (>100), enabling this parameter
# may cause an OOM error. # may cause an OOM error.
@ -88,31 +93,34 @@ def make_optimizer_class(cls):
vector_loss = loss() vector_loss = loss()
if self._num_microbatches is None: if self._num_microbatches is None:
self._num_microbatches = tf.shape(vector_loss)[0] self._num_microbatches = tf.shape(vector_loss)[0]
self._global_state = self._dp_average_query.set_denominator( if isinstance(self._dp_sum_query, privacy_ledger.QueryWithLedger):
self._global_state, self._dp_sum_query.set_batch_size(self._num_microbatches)
self._num_microbatches) sample_state = self._dp_sum_query.initial_sample_state(
sample_state = self._dp_average_query.initial_sample_state(
self._global_state, var_list) self._global_state, var_list)
microbatches_losses = tf.reshape(vector_loss, microbatches_losses = tf.reshape(vector_loss,
[self._num_microbatches, -1]) [self._num_microbatches, -1])
sample_params = ( sample_params = (
self._dp_average_query.derive_sample_params(self._global_state)) self._dp_sum_query.derive_sample_params(self._global_state))
def process_microbatch(i, sample_state): def process_microbatch(i, sample_state):
"""Process one microbatch (record) with privacy helper.""" """Process one microbatch (record) with privacy helper."""
microbatch_loss = tf.reduce_mean(tf.gather(microbatches_losses, [i])) microbatch_loss = tf.reduce_mean(tf.gather(microbatches_losses, [i]))
grads = gradient_tape.gradient(microbatch_loss, var_list) grads = gradient_tape.gradient(microbatch_loss, var_list)
sample_state = self._dp_average_query.accumulate_record(sample_params, sample_state = self._dp_sum_query.accumulate_record(
sample_state, sample_params, sample_state, grads)
grads)
return sample_state return sample_state
for idx in range(self._num_microbatches): for idx in range(self._num_microbatches):
sample_state = process_microbatch(idx, sample_state) sample_state = process_microbatch(idx, sample_state)
final_grads, self._global_state = ( grad_sums, self._global_state = (
self._dp_average_query.get_noised_result(sample_state, self._dp_sum_query.get_noised_result(
self._global_state)) sample_state, self._global_state))
def normalize(v):
return v / tf.cast(self._num_microbatches, tf.float32)
final_grads = nest.map_structure(normalize, grad_sums)
grads_and_vars = list(zip(final_grads, var_list)) grads_and_vars = list(zip(final_grads, var_list))
return grads_and_vars return grads_and_vars
@ -128,12 +136,12 @@ def make_optimizer_class(cls):
# sampling from the dataset without replacement. # sampling from the dataset without replacement.
if self._num_microbatches is None: if self._num_microbatches is None:
self._num_microbatches = tf.shape(loss)[0] self._num_microbatches = tf.shape(loss)[0]
self._global_state = self._dp_average_query.set_denominator( if isinstance(self._dp_sum_query, privacy_ledger.QueryWithLedger):
self._global_state, self._dp_sum_query.set_batch_size(self._num_microbatches)
self._num_microbatches)
microbatches_losses = tf.reshape(loss, [self._num_microbatches, -1]) microbatches_losses = tf.reshape(loss, [self._num_microbatches, -1])
sample_params = ( sample_params = (
self._dp_average_query.derive_sample_params(self._global_state)) self._dp_sum_query.derive_sample_params(self._global_state))
def process_microbatch(i, sample_state): def process_microbatch(i, sample_state):
"""Process one microbatch (record) with privacy helper.""" """Process one microbatch (record) with privacy helper."""
@ -145,7 +153,7 @@ def make_optimizer_class(cls):
g if g is not None else tf.zeros_like(v) g if g is not None else tf.zeros_like(v)
for (g, v) in zip(list(grads), var_list) for (g, v) in zip(list(grads), var_list)
] ]
sample_state = self._dp_average_query.accumulate_record( sample_state = self._dp_sum_query.accumulate_record(
sample_params, sample_state, grads_list) sample_params, sample_state, grads_list)
return sample_state return sample_state
@ -154,7 +162,7 @@ def make_optimizer_class(cls):
tf.trainable_variables() + tf.get_collection( tf.trainable_variables() + tf.get_collection(
tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
sample_state = self._dp_average_query.initial_sample_state( sample_state = self._dp_sum_query.initial_sample_state(
self._global_state, var_list) self._global_state, var_list)
if self._unroll_microbatches: if self._unroll_microbatches:
@ -169,10 +177,15 @@ def make_optimizer_class(cls):
idx = tf.constant(0) idx = tf.constant(0)
_, sample_state = tf.while_loop(cond_fn, body_fn, [idx, sample_state]) _, sample_state = tf.while_loop(cond_fn, body_fn, [idx, sample_state])
final_grads, self._global_state = ( grad_sums, self._global_state = (
self._dp_average_query.get_noised_result( self._dp_sum_query.get_noised_result(
sample_state, self._global_state)) sample_state, self._global_state))
def normalize(v):
return tf.truediv(v, tf.cast(self._num_microbatches, tf.float32))
final_grads = nest.map_structure(normalize, grad_sums)
return list(zip(final_grads, var_list)) return list(zip(final_grads, var_list))
return DPOptimizerClass return DPOptimizerClass
@ -188,20 +201,20 @@ def make_gaussian_optimizer_class(cls):
self, self,
l2_norm_clip, l2_norm_clip,
noise_multiplier, noise_multiplier,
num_microbatches, num_microbatches=None,
ledger, ledger=None,
unroll_microbatches=False, unroll_microbatches=False,
*args, # pylint: disable=keyword-arg-before-vararg *args, # pylint: disable=keyword-arg-before-vararg
**kwargs): **kwargs):
dp_average_query = gaussian_query.GaussianAverageQuery( dp_sum_query = gaussian_query.GaussianSumQuery(
l2_norm_clip, l2_norm_clip * noise_multiplier, l2_norm_clip, l2_norm_clip * noise_multiplier)
num_microbatches, ledger)
if ledger: if ledger:
dp_average_query = privacy_ledger.QueryWithLedger( dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query,
dp_average_query, ledger) ledger=ledger)
super(DPGaussianOptimizerClass, self).__init__( super(DPGaussianOptimizerClass, self).__init__(
dp_average_query, dp_sum_query,
num_microbatches, num_microbatches,
unroll_microbatches, unroll_microbatches,
*args, *args,
@ -209,7 +222,7 @@ def make_gaussian_optimizer_class(cls):
@property @property
def ledger(self): def ledger(self):
return self._ledger return self._dp_sum_query.ledger
return DPGaussianOptimizerClass return DPGaussianOptimizerClass

View file

@ -53,14 +53,12 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([1.0, 2.0]) var0 = tf.Variable([1.0, 2.0])
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6) dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
dp_average_query = gaussian_query.GaussianAverageQuery( dp_sum_query = privacy_ledger.QueryWithLedger(
1.0e9, 0.0, num_microbatches, ledger) dp_sum_query, 1e6, num_microbatches / 1e6)
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
ledger)
opt = cls( opt = cls(
dp_average_query, dp_sum_query,
num_microbatches=num_microbatches, num_microbatches=num_microbatches,
learning_rate=2.0) learning_rate=2.0)
@ -84,12 +82,10 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0, 0.0]) var0 = tf.Variable([0.0, 0.0])
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6) dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
ledger)
opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
self.evaluate(tf.global_variables_initializer()) self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values # Fetch params to validate initial values
@ -110,12 +106,10 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0]) var0 = tf.Variable([0.0])
data0 = tf.Variable([[0.0]]) data0 = tf.Variable([[0.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6) dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0)
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1) dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
ledger)
opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
self.evaluate(tf.global_variables_initializer()) self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values # Fetch params to validate initial values

View file

@ -46,22 +46,18 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
('DPAdagrad 4', dp_optimizer.DPAdagradOptimizer, 4, [-2.5, -2.5]), ('DPAdagrad 4', dp_optimizer.DPAdagradOptimizer, 4, [-2.5, -2.5]),
('DPAdam 1', dp_optimizer.DPAdamOptimizer, 1, [-2.5, -2.5]), ('DPAdam 1', dp_optimizer.DPAdamOptimizer, 1, [-2.5, -2.5]),
('DPAdam 2', dp_optimizer.DPAdamOptimizer, 2, [-2.5, -2.5]), ('DPAdam 2', dp_optimizer.DPAdamOptimizer, 2, [-2.5, -2.5]),
('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5]), ('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5]))
('DPAdam None', dp_optimizer.DPAdamOptimizer, None, [-2.5, -2.5]))
def testBaseline(self, cls, num_microbatches, expected_answer): def testBaseline(self, cls, num_microbatches, expected_answer):
with self.cached_session() as sess: with self.cached_session() as sess:
var0 = tf.Variable([1.0, 2.0]) var0 = tf.Variable([1.0, 2.0])
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
ledger = privacy_ledger.PrivacyLedger( dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
1e6, num_microbatches / 1e6 if num_microbatches else None) dp_sum_query = privacy_ledger.QueryWithLedger(
dp_average_query = gaussian_query.GaussianAverageQuery( dp_sum_query, 1e6, num_microbatches / 1e6)
1.0e9, 0.0, num_microbatches, ledger)
dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger)
opt = cls( opt = cls(
dp_average_query, dp_sum_query,
num_microbatches=num_microbatches, num_microbatches=num_microbatches,
learning_rate=2.0) learning_rate=2.0)
@ -84,12 +80,10 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0, 0.0]) var0 = tf.Variable([0.0, 0.0])
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6) dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger)
opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
self.evaluate(tf.global_variables_initializer()) self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values # Fetch params to validate initial values
@ -109,12 +103,10 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0]) var0 = tf.Variable([0.0])
data0 = tf.Variable([[0.0]]) data0 = tf.Variable([[0.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6) dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0)
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1) dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger)
opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
self.evaluate(tf.global_variables_initializer()) self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values # Fetch params to validate initial values
@ -153,12 +145,10 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
vector_loss = tf.squared_difference(labels, preds) vector_loss = tf.squared_difference(labels, preds)
scalar_loss = tf.reduce_mean(vector_loss) scalar_loss = tf.reduce_mean(vector_loss)
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6) dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger)
optimizer = dp_optimizer.DPGradientDescentOptimizer( optimizer = dp_optimizer.DPGradientDescentOptimizer(
dp_average_query, dp_sum_query,
num_microbatches=1, num_microbatches=1,
learning_rate=1.0) learning_rate=1.0)
global_step = tf.train.get_global_step() global_step = tf.train.get_global_step()
@ -198,14 +188,12 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
num_microbatches = 4 num_microbatches = 4
ledger = privacy_ledger.PrivacyLedger( dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
1e6, num_microbatches / 1e6) dp_sum_query = privacy_ledger.QueryWithLedger(
dp_average_query = gaussian_query.GaussianAverageQuery(1.0e9, 0.0, 4) dp_sum_query, 1e6, num_microbatches / 1e6)
dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger)
opt = cls( opt = cls(
dp_average_query, dp_sum_query,
num_microbatches=num_microbatches, num_microbatches=num_microbatches,
learning_rate=2.0, learning_rate=2.0,
unroll_microbatches=True) unroll_microbatches=True)
@ -233,8 +221,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
l2_norm_clip=4.0, l2_norm_clip=4.0,
noise_multiplier=2.0, noise_multiplier=2.0,
num_microbatches=1, num_microbatches=1,
learning_rate=2.0, learning_rate=2.0)
ledger=privacy_ledger.DummyLedger())
self.evaluate(tf.global_variables_initializer()) self.evaluate(tf.global_variables_initializer())
# Fetch params to validate initial values # Fetch params to validate initial values

View file

@ -94,9 +94,7 @@ def rnn_model_fn(features, labels, mode): # pylint: disable=unused-argument
ledger = privacy_ledger.PrivacyLedger( ledger = privacy_ledger.PrivacyLedger(
population_size=NB_TRAIN, population_size=NB_TRAIN,
selection_probability=(FLAGS.batch_size / NB_TRAIN), selection_probability=(FLAGS.batch_size / NB_TRAIN))
max_samples=1e6,
max_queries=1e6)
optimizer = dp_optimizer.DPAdamGaussianOptimizer( optimizer = dp_optimizer.DPAdamGaussianOptimizer(
l2_norm_clip=FLAGS.l2_norm_clip, l2_norm_clip=FLAGS.l2_norm_clip,

View file

@ -26,12 +26,11 @@ import tensorflow as tf
from privacy.analysis.rdp_accountant import compute_rdp from privacy.analysis.rdp_accountant import compute_rdp
from privacy.analysis.rdp_accountant import get_privacy_spent from privacy.analysis.rdp_accountant import get_privacy_spent
from privacy.dp_query.gaussian_query import GaussianAverageQuery from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
GradientDescentOptimizer = tf.train.GradientDescentOptimizer GradientDescentOptimizer = tf.train.GradientDescentOptimizer
tf.compat.v1.enable_eager_execution() tf.enable_eager_execution()
else: else:
GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name
@ -64,6 +63,9 @@ def compute_epsilon(steps):
def main(_): def main(_):
if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
raise ValueError('Number of microbatches should divide evenly batch_size')
# Fetch the mnist data # Fetch the mnist data
train, test = tf.keras.datasets.mnist.load_data() train, test = tf.keras.datasets.mnist.load_data()
train_images, train_labels = train train_images, train_labels = train
@ -97,13 +99,10 @@ def main(_):
# Instantiate the optimizer # Instantiate the optimizer
if FLAGS.dpsgd: if FLAGS.dpsgd:
dp_average_query = GaussianAverageQuery( opt = DPGradientDescentGaussianOptimizer(
FLAGS.l2_norm_clip, l2_norm_clip=FLAGS.l2_norm_clip,
FLAGS.l2_norm_clip * FLAGS.noise_multiplier, noise_multiplier=FLAGS.noise_multiplier,
FLAGS.microbatches) num_microbatches=FLAGS.microbatches,
opt = DPGradientDescentOptimizer(
dp_average_query,
FLAGS.microbatches,
learning_rate=FLAGS.learning_rate) learning_rate=FLAGS.learning_rate)
else: else:
opt = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) opt = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
@ -145,7 +144,7 @@ def main(_):
# Compute the privacy budget expended so far. # Compute the privacy budget expended so far.
if FLAGS.dpsgd: if FLAGS.dpsgd:
eps = compute_epsilon(epoch * steps_per_epoch) eps = compute_epsilon((epoch + 1) * steps_per_epoch)
print('For delta=1e-5, the current epsilon is: %.2f' % eps) print('For delta=1e-5, the current epsilon is: %.2f' % eps)
else: else:
print('Trained with vanilla non-private SGD optimizer') print('Trained with vanilla non-private SGD optimizer')

View file

@ -27,8 +27,7 @@ import tensorflow as tf
from privacy.analysis.rdp_accountant import compute_rdp from privacy.analysis.rdp_accountant import compute_rdp
from privacy.analysis.rdp_accountant import get_privacy_spent from privacy.analysis.rdp_accountant import get_privacy_spent
from privacy.dp_query.gaussian_query import GaussianAverageQuery from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
GradientDescentOptimizer = tf.train.GradientDescentOptimizer GradientDescentOptimizer = tf.train.GradientDescentOptimizer
@ -42,10 +41,10 @@ flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training')
flags.DEFINE_float('noise_multiplier', 1.1, flags.DEFINE_float('noise_multiplier', 1.1,
'Ratio of the standard deviation to the clipping norm') 'Ratio of the standard deviation to the clipping norm')
flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm') flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
flags.DEFINE_integer('batch_size', 250, 'Batch size') flags.DEFINE_integer('batch_size', 256, 'Batch size')
flags.DEFINE_integer('epochs', 60, 'Number of epochs') flags.DEFINE_integer('epochs', 60, 'Number of epochs')
flags.DEFINE_integer( flags.DEFINE_integer(
'microbatches', 250, 'Number of microbatches ' 'microbatches', 256, 'Number of microbatches '
'(must evenly divide batch_size)') '(must evenly divide batch_size)')
flags.DEFINE_string('model_dir', None, 'Model directory') flags.DEFINE_string('model_dir', None, 'Model directory')
@ -119,13 +118,10 @@ def main(unused_argv):
]) ])
if FLAGS.dpsgd: if FLAGS.dpsgd:
dp_average_query = GaussianAverageQuery( optimizer = DPGradientDescentGaussianOptimizer(
FLAGS.l2_norm_clip, l2_norm_clip=FLAGS.l2_norm_clip,
FLAGS.l2_norm_clip * FLAGS.noise_multiplier, noise_multiplier=FLAGS.noise_multiplier,
FLAGS.microbatches) num_microbatches=FLAGS.num_microbatches,
optimizer = DPGradientDescentOptimizer(
dp_average_query,
FLAGS.microbatches,
learning_rate=FLAGS.learning_rate, learning_rate=FLAGS.learning_rate,
unroll_microbatches=True) unroll_microbatches=True)
# Compute vector of per-example loss rather than its mean over a minibatch. # Compute vector of per-example loss rather than its mean over a minibatch.