From d5dcfec745b377a23f8729f1f90e6839484e22d6 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Tue, 4 Jun 2019 10:14:09 -0700 Subject: [PATCH] Remove set_denominator functions from DPQuery and make QueryWithLedger easier to use. set_denominator was added so that the batch size doesn't need to be specified before constructing the optimizer, but it breaks the DPQuery abstraction. Now the optimizer uses a GaussianSumQuery instead of GaussianAverageQuery, and normalization by batch size is done inside the optimizer. Also instead of creating all DPQueries with a PrivacyLedger and then wrapping with QueryWithLedger, it is now sufficient to create the queries with no ledger and QueryWithLedger will construct the ledger and pass it to all inner queries. PiperOrigin-RevId: 251462353 --- privacy/__init__.py | 1 - privacy/analysis/privacy_ledger.py | 104 ++++++++---------- privacy/analysis/privacy_ledger_test.py | 24 ++-- privacy/analysis/rdp_accountant.py | 2 +- privacy/dp_query/dp_query.py | 10 ++ privacy/dp_query/gaussian_query.py | 34 +++--- privacy/dp_query/nested_query.py | 3 + privacy/dp_query/no_privacy_query.py | 5 +- privacy/dp_query/normalized_query.py | 8 +- .../quantile_adaptive_clip_sum_query.py | 22 ++-- .../quantile_adaptive_clip_sum_query_test.py | 12 +- privacy/optimizers/dp_optimizer.py | 77 +++++++------ privacy/optimizers/dp_optimizer_eager_test.py | 26 ++--- privacy/optimizers/dp_optimizer_test.py | 51 ++++----- tutorials/lm_dpsgd_tutorial.py | 4 +- tutorials/mnist_dpsgd_tutorial_eager.py | 21 ++-- tutorials/mnist_dpsgd_tutorial_keras.py | 18 ++- 17 files changed, 202 insertions(+), 220 deletions(-) diff --git a/privacy/__init__.py b/privacy/__init__.py index 4aa5cc5..59bfe20 100644 --- a/privacy/__init__.py +++ b/privacy/__init__.py @@ -20,7 +20,6 @@ import sys if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts. pass else: - from privacy.analysis.privacy_ledger import DummyLedger from privacy.analysis.privacy_ledger import GaussianSumQueryEntry from privacy.analysis.privacy_ledger import PrivacyLedger from privacy.analysis.privacy_ledger import QueryWithLedger diff --git a/privacy/analysis/privacy_ledger.py b/privacy/analysis/privacy_ledger.py index f29c515..9c29eb9 100644 --- a/privacy/analysis/privacy_ledger.py +++ b/privacy/analysis/privacy_ledger.py @@ -65,44 +65,39 @@ class PrivacyLedger(object): def __init__(self, population_size, - selection_probability=None): + selection_probability): """Initialize the PrivacyLedger. Args: population_size: An integer (may be variable) specifying the size of the population, i.e. size of the training data used in each epoch. selection_probability: A float (may be variable) specifying the - probability each record is included in a sample. If None, it can be set - later with set_sample_size. + probability each record is included in a sample. Raises: ValueError: If selection_probability is 0. """ self._population_size = population_size self._selection_probability = selection_probability - if selection_probability is None: - init_capacity_samples = 1 + + if tf.executing_eagerly(): + if tf.equal(selection_probability, 0): + raise ValueError('Selection probability cannot be 0.') + init_capacity = tf.cast(tf.ceil(1 / selection_probability), tf.int32) else: - if tf.executing_eagerly(): - if tf.equal(selection_probability, 0): - raise ValueError('Selection probability cannot be 0.') - init_capacity_samples = tf.cast(tf.ceil(1 / selection_probability), - tf.int32) - else: - if selection_probability == 0: - raise ValueError('Selection probability cannot be 0.') - init_capacity_samples = np.int(np.ceil(1 / selection_probability)) - init_capacity_queries = init_capacity_samples + if selection_probability == 0: + raise ValueError('Selection probability cannot be 0.') + init_capacity = np.int(np.ceil(1 / selection_probability)) # The query buffer stores rows corresponding to GaussianSumQueryEntries. self._query_buffer = tensor_buffer.TensorBuffer( - init_capacity_queries, [3], tf.float32, 'query') + init_capacity, [3], tf.float32, 'query') self._sample_var = tf.Variable( initial_value=tf.zeros([3]), trainable=False, name='sample') # The sample buffer stores rows corresponding to SampleEntries. self._sample_buffer = tensor_buffer.TensorBuffer( - init_capacity_samples, [3], tf.float32, 'sample') + init_capacity, [3], tf.float32, 'sample') self._sample_count = tf.Variable( initial_value=0.0, trainable=False, name='sample_count') self._query_count = tf.Variable( @@ -175,39 +170,6 @@ class PrivacyLedger(object): return format_ledger(sample_array, query_array) - def set_sample_size(self, batch_size): - self._selection_probability = tf.cast(batch_size, - tf.float32) / self._population_size - - -class DummyLedger(object): - """A ledger that records nothing. - - This ledger may be passed in place of a normal PrivacyLedger in case privacy - accounting is to be handled externally. - """ - - def record_sum_query(self, l2_norm_bound, noise_stddev): - del l2_norm_bound - del noise_stddev - return tf.no_op() - - def finalize_sample(self): - return tf.no_op() - - def get_unformatted_ledger(self): - empty_array = tf.zeros(shape=[0, 3]) - return empty_array, empty_array - - def get_formatted_ledger(self, sess): - del sess - empty_array = np.zeros(shape=[0, 3]) - return empty_array, empty_array - - def get_formatted_ledger_eager(self): - empty_array = np.zeros(shape=[0, 3]) - return empty_array, empty_array - class QueryWithLedger(dp_query.DPQuery): """A class for DP queries that record events to a PrivacyLedger. @@ -221,17 +183,40 @@ class QueryWithLedger(dp_query.DPQuery): For example usage, see privacy_ledger_test.py. """ - def __init__(self, query, ledger): + def __init__(self, query, + population_size=None, selection_probability=None, + ledger=None): """Initializes the QueryWithLedger. Args: query: The query whose events should be recorded to the ledger. Any subqueries (including those in the leaves of a nested query) should also contain a reference to the same ledger given here. - ledger: A PrivacyLedger to which privacy events should be recorded. + population_size: An integer (may be variable) specifying the size of the + population, i.e. size of the training data used in each epoch. May be + None if `ledger` is specified. + selection_probability: A float (may be variable) specifying the + probability each record is included in a sample. May be None if `ledger` + is specified. + ledger: A PrivacyLedger to use. Must be specified if either of + `population_size` or `selection_probability` is None. """ self._query = query + if population_size is not None and selection_probability is not None: + self.set_ledger(PrivacyLedger(population_size, selection_probability)) + elif ledger is not None: + self.set_ledger(ledger) + else: + raise ValueError('One of (population_size, selection_probability) or ' + 'ledger must be specified.') + + @property + def ledger(self): + return self._ledger + + def set_ledger(self, ledger): self._ledger = ledger + self._query.set_ledger(ledger) def initial_global_state(self): """See base class.""" @@ -260,10 +245,13 @@ class QueryWithLedger(dp_query.DPQuery): def get_noised_result(self, sample_state, global_state): """Ensures sample is recorded to the ledger and returns noised result.""" + # Ensure sample_state is fully aggregated before calling get_noised_result. with tf.control_dependencies(nest.flatten(sample_state)): - with tf.control_dependencies([self._ledger.finalize_sample()]): - return self._query.get_noised_result(sample_state, global_state) - - def set_denominator(self, global_state, num_microbatches, microbatch_size=1): - self._ledger.set_sample_size(num_microbatches * microbatch_size) - return self._query.set_denominator(global_state, num_microbatches) + result, new_global_state = self._query.get_noised_result( + sample_state, global_state) + # Ensure inner queries have recorded before finalizing. + with tf.control_dependencies(nest.flatten(result)): + finalize = self._ledger.finalize_sample() + # Ensure finalizing happens. + with tf.control_dependencies([finalize]): + return nest.map_structure(tf.identity, result), new_global_state diff --git a/privacy/analysis/privacy_ledger_test.py b/privacy/analysis/privacy_ledger_test.py index fe51063..603b1a3 100644 --- a/privacy/analysis/privacy_ledger_test.py +++ b/privacy/analysis/privacy_ledger_test.py @@ -56,12 +56,11 @@ class PrivacyLedgerTest(tf.test.TestCase): population_size = tf.Variable(0) selection_probability = tf.Variable(1.0) - ledger = privacy_ledger.PrivacyLedger( - population_size, selection_probability) query = gaussian_query.GaussianSumQuery( - l2_norm_clip=10.0, stddev=0.0, ledger=ledger) - query = privacy_ledger.QueryWithLedger(query, ledger) + l2_norm_clip=10.0, stddev=0.0) + query = privacy_ledger.QueryWithLedger( + query, population_size, selection_probability) # First sample. tf.assign(population_size, 10) @@ -69,7 +68,7 @@ class PrivacyLedgerTest(tf.test.TestCase): test_utils.run_query(query, [record1, record2]) expected_queries = [[10.0, 0.0]] - formatted = ledger.get_formatted_ledger_eager() + formatted = query.ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) @@ -80,7 +79,7 @@ class PrivacyLedgerTest(tf.test.TestCase): tf.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2]) - formatted = ledger.get_formatted_ledger_eager() + formatted = query.ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) @@ -93,16 +92,15 @@ class PrivacyLedgerTest(tf.test.TestCase): def test_nested_query(self): population_size = tf.Variable(0) selection_probability = tf.Variable(1.0) - ledger = privacy_ledger.PrivacyLedger( - population_size, selection_probability) query1 = gaussian_query.GaussianAverageQuery( - l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0, ledger=ledger) + l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0) query2 = gaussian_query.GaussianAverageQuery( - l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0, ledger=ledger) + l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0) query = nested_query.NestedQuery([query1, query2]) - query = privacy_ledger.QueryWithLedger(query, ledger) + query = privacy_ledger.QueryWithLedger( + query, population_size, selection_probability) record1 = [1.0, [12.0, 9.0]] record2 = [5.0, [1.0, 2.0]] @@ -113,7 +111,7 @@ class PrivacyLedgerTest(tf.test.TestCase): test_utils.run_query(query, [record1, record2]) expected_queries = [[4.0, 2.0], [5.0, 1.0]] - formatted = ledger.get_formatted_ledger_eager() + formatted = query.ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) @@ -124,7 +122,7 @@ class PrivacyLedgerTest(tf.test.TestCase): tf.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2]) - formatted = ledger.get_formatted_ledger_eager() + formatted = query.ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) diff --git a/privacy/analysis/rdp_accountant.py b/privacy/analysis/rdp_accountant.py index fe5c928..195b91e 100644 --- a/privacy/analysis/rdp_accountant.py +++ b/privacy/analysis/rdp_accountant.py @@ -307,7 +307,7 @@ def compute_rdp_from_ledger(ledger, orders): Returns: RDP at all orders, can be np.inf. """ - total_rdp = 0 + total_rdp = np.zeros_like(orders, dtype=float) for sample in ledger: # Compute equivalent z from l2_clip_bounds and noise stddevs in sample. # See https://arxiv.org/pdf/1812.06210.pdf for derivation of this formula. diff --git a/privacy/dp_query/dp_query.py b/privacy/dp_query/dp_query.py index 116b8be..4fa4fe3 100644 --- a/privacy/dp_query/dp_query.py +++ b/privacy/dp_query/dp_query.py @@ -61,6 +61,16 @@ class DPQuery(object): __metaclass__ = abc.ABCMeta + def set_ledger(self, ledger): + """Supplies privacy ledger to which the query can record privacy events. + + Args: + ledger: A `PrivacyLedger`. + """ + del ledger + raise TypeError( + 'DPQuery type %s does not support set_ledger.' % type(self).__name__) + def initial_global_state(self): """Returns the initial global state for the DPQuery.""" return () diff --git a/privacy/dp_query/gaussian_query.py b/privacy/dp_query/gaussian_query.py index 35e0bcf..2977f91 100644 --- a/privacy/dp_query/gaussian_query.py +++ b/privacy/dp_query/gaussian_query.py @@ -43,17 +43,19 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery): _GlobalState = collections.namedtuple( '_GlobalState', ['l2_norm_clip', 'stddev']) - def __init__(self, l2_norm_clip, stddev, ledger=None): + def __init__(self, l2_norm_clip, stddev): """Initializes the GaussianSumQuery. Args: l2_norm_clip: The clipping norm to apply to the global norm of each record. stddev: The stddev of the noise added to the sum. - ledger: The privacy ledger to which queries should be recorded. """ self._l2_norm_clip = l2_norm_clip self._stddev = stddev + self._ledger = None + + def set_ledger(self, ledger): self._ledger = ledger def make_global_state(self, l2_norm_clip, stddev): @@ -68,16 +70,8 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery): return global_state.l2_norm_clip def initial_sample_state(self, global_state, template): - if self._ledger: - dependencies = [ - self._ledger.record_sum_query( - global_state.l2_norm_clip, global_state.stddev) - ] - else: - dependencies = [] - with tf.control_dependencies(dependencies): - return nest.map_structure( - dp_query.zeros_like, template) + return nest.map_structure( + dp_query.zeros_like, template) def preprocess_record_impl(self, params, record): """Clips the l2 norm, returning the clipped record and the l2 norm. @@ -110,7 +104,15 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery): def add_noise(v): return v + random_normal(tf.shape(v)) - return nest.map_structure(add_noise, sample_state), global_state + if self._ledger: + dependencies = [ + self._ledger.record_sum_query( + global_state.l2_norm_clip, global_state.stddev) + ] + else: + dependencies = [] + with tf.control_dependencies(dependencies): + return nest.map_structure(add_noise, sample_state), global_state class GaussianAverageQuery(normalized_query.NormalizedQuery): @@ -127,8 +129,7 @@ class GaussianAverageQuery(normalized_query.NormalizedQuery): def __init__(self, l2_norm_clip, sum_stddev, - denominator, - ledger=None): + denominator): """Initializes the GaussianAverageQuery. Args: @@ -138,8 +139,7 @@ class GaussianAverageQuery(normalized_query.NormalizedQuery): normalization). denominator: The normalization constant (applied after noise is added to the sum). - ledger: The privacy ledger to which queries should be recorded. """ super(GaussianAverageQuery, self).__init__( - numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev, ledger), + numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev), denominator=denominator) diff --git a/privacy/dp_query/nested_query.py b/privacy/dp_query/nested_query.py index de5aa08..62c1f5f 100644 --- a/privacy/dp_query/nested_query.py +++ b/privacy/dp_query/nested_query.py @@ -62,6 +62,9 @@ class NestedQuery(dp_query.DPQuery): return nest.map_structure_up_to( self._queries, caller, self._queries, *inputs) + def set_ledger(self, ledger): + self._map_to_queries('set_ledger', ledger=ledger) + def initial_global_state(self): """See base class.""" return self._map_to_queries('initial_global_state') diff --git a/privacy/dp_query/no_privacy_query.py b/privacy/dp_query/no_privacy_query.py index 3d03ce7..68731b4 100644 --- a/privacy/dp_query/no_privacy_query.py +++ b/privacy/dp_query/no_privacy_query.py @@ -67,5 +67,6 @@ class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery): """See base class.""" sum_state, denominator = sample_state - return nest.map_structure( - lambda t: tf.truediv(t, denominator), sum_state), global_state + return ( + nest.map_structure(lambda t: t / denominator, sum_state), + global_state) diff --git a/privacy/dp_query/normalized_query.py b/privacy/dp_query/normalized_query.py index c3ca4d4..6e0d833 100644 --- a/privacy/dp_query/normalized_query.py +++ b/privacy/dp_query/normalized_query.py @@ -51,6 +51,10 @@ class NormalizedQuery(dp_query.DPQuery): self._numerator = numerator_query self._denominator = denominator + def set_ledger(self, ledger): + """See base class.""" + self._numerator.set_ledger(ledger) + def initial_global_state(self): """See base class.""" if self._denominator is not None: @@ -92,7 +96,3 @@ class NormalizedQuery(dp_query.DPQuery): def merge_sample_states(self, sample_state_1, sample_state_2): """See base class.""" return self._numerator.merge_sample_states(sample_state_1, sample_state_2) - - def set_denominator(self, global_state, denominator): - """Returns an updated global_state with the given denominator.""" - return global_state._replace(denominator=tf.cast(denominator, tf.float32)) diff --git a/privacy/dp_query/quantile_adaptive_clip_sum_query.py b/privacy/dp_query/quantile_adaptive_clip_sum_query.py index 6aa9785..eaa516b 100644 --- a/privacy/dp_query/quantile_adaptive_clip_sum_query.py +++ b/privacy/dp_query/quantile_adaptive_clip_sum_query.py @@ -68,8 +68,7 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery): target_unclipped_quantile, learning_rate, clipped_count_stddev, - expected_num_records, - ledger=None): + expected_num_records): """Initializes the QuantileAdaptiveClipSumQuery. Args: @@ -87,7 +86,6 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery): should be about 0.5 for reasonable privacy. expected_num_records: The expected number of records per round, used to estimate the clipped count quantile. - ledger: The privacy ledger to which queries should be recorded. """ self._initial_l2_norm_clip = initial_l2_norm_clip self._noise_multiplier = noise_multiplier @@ -95,8 +93,7 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery): self._learning_rate = learning_rate # Initialize sum query's global state with None, to be set later. - self._sum_query = gaussian_query.GaussianSumQuery( - None, None, ledger) + self._sum_query = gaussian_query.GaussianSumQuery(None, None) # self._clipped_fraction_query is a DPQuery used to estimate the fraction of # records that are clipped. It accumulates an indicator 0/1 of whether each @@ -110,8 +107,12 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery): self._clipped_fraction_query = gaussian_query.GaussianAverageQuery( l2_norm_clip=0.5, sum_stddev=clipped_count_stddev, - denominator=expected_num_records, - ledger=ledger) + denominator=expected_num_records) + + def set_ledger(self, ledger): + """See base class.""" + self._sum_query.set_ledger(ledger) + self._clipped_fraction_query.set_ledger(ledger) def initial_global_state(self): """See base class.""" @@ -252,8 +253,7 @@ class QuantileAdaptiveClipAverageQuery(normalized_query.NormalizedQuery): target_unclipped_quantile, learning_rate, clipped_count_stddev, - expected_num_records, - ledger=None): + expected_num_records): """Initializes the AdaptiveClipAverageQuery. Args: @@ -272,7 +272,6 @@ class QuantileAdaptiveClipAverageQuery(normalized_query.NormalizedQuery): should be about 0.5 for reasonable privacy. expected_num_records: The expected number of records, used to estimate the clipped count quantile. - ledger: The privacy ledger to which queries should be recorded. """ numerator_query = QuantileAdaptiveClipSumQuery( initial_l2_norm_clip, @@ -280,8 +279,7 @@ class QuantileAdaptiveClipAverageQuery(normalized_query.NormalizedQuery): target_unclipped_quantile, learning_rate, clipped_count_stddev, - expected_num_records, - ledger) + expected_num_records) super(QuantileAdaptiveClipAverageQuery, self).__init__( numerator_query=numerator_query, denominator=denominator) diff --git a/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py b/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py index 0d9ab2e..6206051 100644 --- a/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py +++ b/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py @@ -251,8 +251,6 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase): population_size = tf.Variable(0) selection_probability = tf.Variable(1.0) - ledger = privacy_ledger.PrivacyLedger( - population_size, selection_probability) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=10.0, @@ -260,10 +258,10 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase): target_unclipped_quantile=0.0, learning_rate=1.0, clipped_count_stddev=0.0, - expected_num_records=2.0, - ledger=ledger) + expected_num_records=2.0) - query = privacy_ledger.QueryWithLedger(query, ledger) + query = privacy_ledger.QueryWithLedger( + query, population_size, selection_probability) # First sample. tf.assign(population_size, 10) @@ -271,7 +269,7 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase): _, global_state = test_utils.run_query(query, [record1, record2]) expected_queries = [[10.0, 10.0], [0.5, 0.0]] - formatted = ledger.get_formatted_ledger_eager() + formatted = query.ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) @@ -282,7 +280,7 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase): tf.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2], global_state) - formatted = ledger.get_formatted_ledger_eager() + formatted = query.ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) diff --git a/privacy/optimizers/dp_optimizer.py b/privacy/optimizers/dp_optimizer.py index 9a36767..59cfe13 100644 --- a/privacy/optimizers/dp_optimizer.py +++ b/privacy/optimizers/dp_optimizer.py @@ -23,6 +23,11 @@ import tensorflow as tf from privacy.analysis import privacy_ledger from privacy.dp_query import gaussian_query +if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): + nest = tf.contrib.framework.nest +else: + nest = tf.nest + def make_optimizer_class(cls): """Constructs a DP optimizer class from an existing one.""" @@ -46,7 +51,7 @@ def make_optimizer_class(cls): def __init__( self, - dp_average_query, + dp_sum_query, num_microbatches=None, unroll_microbatches=False, *args, # pylint: disable=keyword-arg-before-vararg, g-doc-args @@ -54,7 +59,7 @@ def make_optimizer_class(cls): """Initialize the DPOptimizerClass. Args: - dp_average_query: DPQuery object, specifying differential privacy + dp_sum_query: DPQuery object, specifying differential privacy mechanism to use. num_microbatches: How many microbatches into which the minibatch is split. If None, will default to the size of the minibatch, and @@ -64,9 +69,9 @@ def make_optimizer_class(cls): raises an exception. """ super(DPOptimizerClass, self).__init__(*args, **kwargs) - self._dp_average_query = dp_average_query + self._dp_sum_query = dp_sum_query self._num_microbatches = num_microbatches - self._global_state = self._dp_average_query.initial_global_state() + self._global_state = self._dp_sum_query.initial_global_state() # TODO(b/122613513): Set unroll_microbatches=True to avoid this bug. # Beware: When num_microbatches is large (>100), enabling this parameter # may cause an OOM error. @@ -88,31 +93,34 @@ def make_optimizer_class(cls): vector_loss = loss() if self._num_microbatches is None: self._num_microbatches = tf.shape(vector_loss)[0] - self._global_state = self._dp_average_query.set_denominator( - self._global_state, - self._num_microbatches) - sample_state = self._dp_average_query.initial_sample_state( + if isinstance(self._dp_sum_query, privacy_ledger.QueryWithLedger): + self._dp_sum_query.set_batch_size(self._num_microbatches) + sample_state = self._dp_sum_query.initial_sample_state( self._global_state, var_list) microbatches_losses = tf.reshape(vector_loss, [self._num_microbatches, -1]) sample_params = ( - self._dp_average_query.derive_sample_params(self._global_state)) + self._dp_sum_query.derive_sample_params(self._global_state)) def process_microbatch(i, sample_state): """Process one microbatch (record) with privacy helper.""" microbatch_loss = tf.reduce_mean(tf.gather(microbatches_losses, [i])) grads = gradient_tape.gradient(microbatch_loss, var_list) - sample_state = self._dp_average_query.accumulate_record(sample_params, - sample_state, - grads) + sample_state = self._dp_sum_query.accumulate_record( + sample_params, sample_state, grads) return sample_state for idx in range(self._num_microbatches): sample_state = process_microbatch(idx, sample_state) - final_grads, self._global_state = ( - self._dp_average_query.get_noised_result(sample_state, - self._global_state)) + grad_sums, self._global_state = ( + self._dp_sum_query.get_noised_result( + sample_state, self._global_state)) + + def normalize(v): + return v / tf.cast(self._num_microbatches, tf.float32) + + final_grads = nest.map_structure(normalize, grad_sums) grads_and_vars = list(zip(final_grads, var_list)) return grads_and_vars @@ -128,12 +136,12 @@ def make_optimizer_class(cls): # sampling from the dataset without replacement. if self._num_microbatches is None: self._num_microbatches = tf.shape(loss)[0] - self._global_state = self._dp_average_query.set_denominator( - self._global_state, - self._num_microbatches) + if isinstance(self._dp_sum_query, privacy_ledger.QueryWithLedger): + self._dp_sum_query.set_batch_size(self._num_microbatches) + microbatches_losses = tf.reshape(loss, [self._num_microbatches, -1]) sample_params = ( - self._dp_average_query.derive_sample_params(self._global_state)) + self._dp_sum_query.derive_sample_params(self._global_state)) def process_microbatch(i, sample_state): """Process one microbatch (record) with privacy helper.""" @@ -145,7 +153,7 @@ def make_optimizer_class(cls): g if g is not None else tf.zeros_like(v) for (g, v) in zip(list(grads), var_list) ] - sample_state = self._dp_average_query.accumulate_record( + sample_state = self._dp_sum_query.accumulate_record( sample_params, sample_state, grads_list) return sample_state @@ -154,7 +162,7 @@ def make_optimizer_class(cls): tf.trainable_variables() + tf.get_collection( tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) - sample_state = self._dp_average_query.initial_sample_state( + sample_state = self._dp_sum_query.initial_sample_state( self._global_state, var_list) if self._unroll_microbatches: @@ -169,10 +177,15 @@ def make_optimizer_class(cls): idx = tf.constant(0) _, sample_state = tf.while_loop(cond_fn, body_fn, [idx, sample_state]) - final_grads, self._global_state = ( - self._dp_average_query.get_noised_result( + grad_sums, self._global_state = ( + self._dp_sum_query.get_noised_result( sample_state, self._global_state)) + def normalize(v): + return tf.truediv(v, tf.cast(self._num_microbatches, tf.float32)) + + final_grads = nest.map_structure(normalize, grad_sums) + return list(zip(final_grads, var_list)) return DPOptimizerClass @@ -188,20 +201,20 @@ def make_gaussian_optimizer_class(cls): self, l2_norm_clip, noise_multiplier, - num_microbatches, - ledger, + num_microbatches=None, + ledger=None, unroll_microbatches=False, *args, # pylint: disable=keyword-arg-before-vararg **kwargs): - dp_average_query = gaussian_query.GaussianAverageQuery( - l2_norm_clip, l2_norm_clip * noise_multiplier, - num_microbatches, ledger) + dp_sum_query = gaussian_query.GaussianSumQuery( + l2_norm_clip, l2_norm_clip * noise_multiplier) + if ledger: - dp_average_query = privacy_ledger.QueryWithLedger( - dp_average_query, ledger) + dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, + ledger=ledger) super(DPGaussianOptimizerClass, self).__init__( - dp_average_query, + dp_sum_query, num_microbatches, unroll_microbatches, *args, @@ -209,7 +222,7 @@ def make_gaussian_optimizer_class(cls): @property def ledger(self): - return self._ledger + return self._dp_sum_query.ledger return DPGaussianOptimizerClass diff --git a/privacy/optimizers/dp_optimizer_eager_test.py b/privacy/optimizers/dp_optimizer_eager_test.py index d2ed98d..98e9a9e 100644 --- a/privacy/optimizers/dp_optimizer_eager_test.py +++ b/privacy/optimizers/dp_optimizer_eager_test.py @@ -53,14 +53,12 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase): var0 = tf.Variable([1.0, 2.0]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) - ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6) - dp_average_query = gaussian_query.GaussianAverageQuery( - 1.0e9, 0.0, num_microbatches, ledger) - dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query, - ledger) + dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) + dp_sum_query = privacy_ledger.QueryWithLedger( + dp_sum_query, 1e6, num_microbatches / 1e6) opt = cls( - dp_average_query, + dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0) @@ -84,12 +82,10 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase): var0 = tf.Variable([0.0, 0.0]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) - ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6) - dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) - dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query, - ledger) + dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) + dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) - opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0) + opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values @@ -110,12 +106,10 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase): var0 = tf.Variable([0.0]) data0 = tf.Variable([[0.0]]) - ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6) - dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1) - dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query, - ledger) + dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0) + dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) - opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0) + opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values diff --git a/privacy/optimizers/dp_optimizer_test.py b/privacy/optimizers/dp_optimizer_test.py index 820006f..44d749f 100644 --- a/privacy/optimizers/dp_optimizer_test.py +++ b/privacy/optimizers/dp_optimizer_test.py @@ -46,22 +46,18 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): ('DPAdagrad 4', dp_optimizer.DPAdagradOptimizer, 4, [-2.5, -2.5]), ('DPAdam 1', dp_optimizer.DPAdamOptimizer, 1, [-2.5, -2.5]), ('DPAdam 2', dp_optimizer.DPAdamOptimizer, 2, [-2.5, -2.5]), - ('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5]), - ('DPAdam None', dp_optimizer.DPAdamOptimizer, None, [-2.5, -2.5])) + ('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5])) def testBaseline(self, cls, num_microbatches, expected_answer): with self.cached_session() as sess: var0 = tf.Variable([1.0, 2.0]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) - ledger = privacy_ledger.PrivacyLedger( - 1e6, num_microbatches / 1e6 if num_microbatches else None) - dp_average_query = gaussian_query.GaussianAverageQuery( - 1.0e9, 0.0, num_microbatches, ledger) - dp_average_query = privacy_ledger.QueryWithLedger( - dp_average_query, ledger) + dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) + dp_sum_query = privacy_ledger.QueryWithLedger( + dp_sum_query, 1e6, num_microbatches / 1e6) opt = cls( - dp_average_query, + dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0) @@ -84,12 +80,10 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): var0 = tf.Variable([0.0, 0.0]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) - ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6) - dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) - dp_average_query = privacy_ledger.QueryWithLedger( - dp_average_query, ledger) + dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) + dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) - opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0) + opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values @@ -109,12 +103,10 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): var0 = tf.Variable([0.0]) data0 = tf.Variable([[0.0]]) - ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6) - dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1) - dp_average_query = privacy_ledger.QueryWithLedger( - dp_average_query, ledger) + dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0) + dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) - opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0) + opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values @@ -153,12 +145,10 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): vector_loss = tf.squared_difference(labels, preds) scalar_loss = tf.reduce_mean(vector_loss) - ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6) - dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) - dp_average_query = privacy_ledger.QueryWithLedger( - dp_average_query, ledger) + dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) + dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) optimizer = dp_optimizer.DPGradientDescentOptimizer( - dp_average_query, + dp_sum_query, num_microbatches=1, learning_rate=1.0) global_step = tf.train.get_global_step() @@ -198,14 +188,12 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): num_microbatches = 4 - ledger = privacy_ledger.PrivacyLedger( - 1e6, num_microbatches / 1e6) - dp_average_query = gaussian_query.GaussianAverageQuery(1.0e9, 0.0, 4) - dp_average_query = privacy_ledger.QueryWithLedger( - dp_average_query, ledger) + dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) + dp_sum_query = privacy_ledger.QueryWithLedger( + dp_sum_query, 1e6, num_microbatches / 1e6) opt = cls( - dp_average_query, + dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0, unroll_microbatches=True) @@ -233,8 +221,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): l2_norm_clip=4.0, noise_multiplier=2.0, num_microbatches=1, - learning_rate=2.0, - ledger=privacy_ledger.DummyLedger()) + learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values diff --git a/tutorials/lm_dpsgd_tutorial.py b/tutorials/lm_dpsgd_tutorial.py index 8280e3c..8bc1cff 100644 --- a/tutorials/lm_dpsgd_tutorial.py +++ b/tutorials/lm_dpsgd_tutorial.py @@ -94,9 +94,7 @@ def rnn_model_fn(features, labels, mode): # pylint: disable=unused-argument ledger = privacy_ledger.PrivacyLedger( population_size=NB_TRAIN, - selection_probability=(FLAGS.batch_size / NB_TRAIN), - max_samples=1e6, - max_queries=1e6) + selection_probability=(FLAGS.batch_size / NB_TRAIN)) optimizer = dp_optimizer.DPAdamGaussianOptimizer( l2_norm_clip=FLAGS.l2_norm_clip, diff --git a/tutorials/mnist_dpsgd_tutorial_eager.py b/tutorials/mnist_dpsgd_tutorial_eager.py index 92d55b9..94b03d4 100644 --- a/tutorials/mnist_dpsgd_tutorial_eager.py +++ b/tutorials/mnist_dpsgd_tutorial_eager.py @@ -26,12 +26,11 @@ import tensorflow as tf from privacy.analysis.rdp_accountant import compute_rdp from privacy.analysis.rdp_accountant import get_privacy_spent -from privacy.dp_query.gaussian_query import GaussianAverageQuery -from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer +from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): GradientDescentOptimizer = tf.train.GradientDescentOptimizer - tf.compat.v1.enable_eager_execution() + tf.enable_eager_execution() else: GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name @@ -64,6 +63,9 @@ def compute_epsilon(steps): def main(_): + if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0: + raise ValueError('Number of microbatches should divide evenly batch_size') + # Fetch the mnist data train, test = tf.keras.datasets.mnist.load_data() train_images, train_labels = train @@ -97,13 +99,10 @@ def main(_): # Instantiate the optimizer if FLAGS.dpsgd: - dp_average_query = GaussianAverageQuery( - FLAGS.l2_norm_clip, - FLAGS.l2_norm_clip * FLAGS.noise_multiplier, - FLAGS.microbatches) - opt = DPGradientDescentOptimizer( - dp_average_query, - FLAGS.microbatches, + opt = DPGradientDescentGaussianOptimizer( + l2_norm_clip=FLAGS.l2_norm_clip, + noise_multiplier=FLAGS.noise_multiplier, + num_microbatches=FLAGS.microbatches, learning_rate=FLAGS.learning_rate) else: opt = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) @@ -145,7 +144,7 @@ def main(_): # Compute the privacy budget expended so far. if FLAGS.dpsgd: - eps = compute_epsilon(epoch * steps_per_epoch) + eps = compute_epsilon((epoch + 1) * steps_per_epoch) print('For delta=1e-5, the current epsilon is: %.2f' % eps) else: print('Trained with vanilla non-private SGD optimizer') diff --git a/tutorials/mnist_dpsgd_tutorial_keras.py b/tutorials/mnist_dpsgd_tutorial_keras.py index acf6dc6..71f67cb 100644 --- a/tutorials/mnist_dpsgd_tutorial_keras.py +++ b/tutorials/mnist_dpsgd_tutorial_keras.py @@ -27,8 +27,7 @@ import tensorflow as tf from privacy.analysis.rdp_accountant import compute_rdp from privacy.analysis.rdp_accountant import get_privacy_spent -from privacy.dp_query.gaussian_query import GaussianAverageQuery -from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer +from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): GradientDescentOptimizer = tf.train.GradientDescentOptimizer @@ -42,10 +41,10 @@ flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training') flags.DEFINE_float('noise_multiplier', 1.1, 'Ratio of the standard deviation to the clipping norm') flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm') -flags.DEFINE_integer('batch_size', 250, 'Batch size') +flags.DEFINE_integer('batch_size', 256, 'Batch size') flags.DEFINE_integer('epochs', 60, 'Number of epochs') flags.DEFINE_integer( - 'microbatches', 250, 'Number of microbatches ' + 'microbatches', 256, 'Number of microbatches ' '(must evenly divide batch_size)') flags.DEFINE_string('model_dir', None, 'Model directory') @@ -119,13 +118,10 @@ def main(unused_argv): ]) if FLAGS.dpsgd: - dp_average_query = GaussianAverageQuery( - FLAGS.l2_norm_clip, - FLAGS.l2_norm_clip * FLAGS.noise_multiplier, - FLAGS.microbatches) - optimizer = DPGradientDescentOptimizer( - dp_average_query, - FLAGS.microbatches, + optimizer = DPGradientDescentGaussianOptimizer( + l2_norm_clip=FLAGS.l2_norm_clip, + noise_multiplier=FLAGS.noise_multiplier, + num_microbatches=FLAGS.num_microbatches, learning_rate=FLAGS.learning_rate, unroll_microbatches=True) # Compute vector of per-example loss rather than its mean over a minibatch.