From 0efb23afcb15ac945a486cbc193382e4eeb32cc4 Mon Sep 17 00:00:00 2001 From: Ilya Mironov Date: Tue, 21 May 2019 11:37:54 -0700 Subject: [PATCH] Changing initial capacity for the ledger to smaller values. (+ restoring compatibility with Python 2) PiperOrigin-RevId: 249292683 --- privacy/analysis/privacy_ledger.py | 39 +++++++++++-------- privacy/analysis/privacy_ledger_test.py | 15 ++++--- privacy/analysis/tensor_buffer_test_eager.py | 4 +- .../quantile_adaptive_clip_sum_query_test.py | 4 +- privacy/optimizers/dp_optimizer_eager_test.py | 6 +-- privacy/optimizers/dp_optimizer_test.py | 10 ++--- 6 files changed, 45 insertions(+), 33 deletions(-) diff --git a/privacy/analysis/privacy_ledger.py b/privacy/analysis/privacy_ledger.py index 448f4cc..f29c515 100644 --- a/privacy/analysis/privacy_ledger.py +++ b/privacy/analysis/privacy_ledger.py @@ -65,37 +65,44 @@ class PrivacyLedger(object): def __init__(self, population_size, - selection_probability=None, - max_samples=None, - max_queries=None): + selection_probability=None): """Initialize the PrivacyLedger. Args: population_size: An integer (may be variable) specifying the size of the population, i.e. size of the training data used in each epoch. selection_probability: A float (may be variable) specifying the - probability each record is included in a sample. - max_samples: The maximum number of samples. An exception is thrown if more - than this many samples are recorded. - max_queries: The maximum number of queries. An exception is thrown if more - than this many queries are recorded. + probability each record is included in a sample. If None, it can be set + later with set_sample_size. + + Raises: + ValueError: If selection_probability is 0. """ self._population_size = population_size self._selection_probability = selection_probability - if max_samples is None: - max_samples = 1000 * population_size - if max_queries is None: - max_queries = 1000 * population_size + if selection_probability is None: + init_capacity_samples = 1 + else: + if tf.executing_eagerly(): + if tf.equal(selection_probability, 0): + raise ValueError('Selection probability cannot be 0.') + init_capacity_samples = tf.cast(tf.ceil(1 / selection_probability), + tf.int32) + else: + if selection_probability == 0: + raise ValueError('Selection probability cannot be 0.') + init_capacity_samples = np.int(np.ceil(1 / selection_probability)) + init_capacity_queries = init_capacity_samples # The query buffer stores rows corresponding to GaussianSumQueryEntries. - self._query_buffer = tensor_buffer.TensorBuffer(max_queries, [3], - tf.float32, 'query') + self._query_buffer = tensor_buffer.TensorBuffer( + init_capacity_queries, [3], tf.float32, 'query') self._sample_var = tf.Variable( initial_value=tf.zeros([3]), trainable=False, name='sample') # The sample buffer stores rows corresponding to SampleEntries. - self._sample_buffer = tensor_buffer.TensorBuffer(max_samples, [3], - tf.float32, 'sample') + self._sample_buffer = tensor_buffer.TensorBuffer( + init_capacity_samples, [3], tf.float32, 'sample') self._sample_count = tf.Variable( initial_value=0.0, trainable=False, name='sample_count') self._query_count = tf.Variable( diff --git a/privacy/analysis/privacy_ledger_test.py b/privacy/analysis/privacy_ledger_test.py index 8997c26..fe51063 100644 --- a/privacy/analysis/privacy_ledger_test.py +++ b/privacy/analysis/privacy_ledger_test.py @@ -30,8 +30,13 @@ tf.enable_eager_execution() class PrivacyLedgerTest(tf.test.TestCase): + def test_fail_on_probability_zero(self): + with self.assertRaisesRegexp(ValueError, + 'Selection probability cannot be 0.'): + privacy_ledger.PrivacyLedger(10, 0) + def test_basic(self): - ledger = privacy_ledger.PrivacyLedger(10, 0.1, 50, 50) + ledger = privacy_ledger.PrivacyLedger(10, 0.1) ledger.record_sum_query(5.0, 1.0) ledger.record_sum_query(2.0, 0.5) @@ -50,9 +55,9 @@ class PrivacyLedgerTest(tf.test.TestCase): record2 = tf.constant([-1.0, 1.0]) population_size = tf.Variable(0) - selection_probability = tf.Variable(0.0) + selection_probability = tf.Variable(1.0) ledger = privacy_ledger.PrivacyLedger( - population_size, selection_probability, 50, 50) + population_size, selection_probability) query = gaussian_query.GaussianSumQuery( l2_norm_clip=10.0, stddev=0.0, ledger=ledger) @@ -87,9 +92,9 @@ class PrivacyLedgerTest(tf.test.TestCase): def test_nested_query(self): population_size = tf.Variable(0) - selection_probability = tf.Variable(0.0) + selection_probability = tf.Variable(1.0) ledger = privacy_ledger.PrivacyLedger( - population_size, selection_probability, 50, 50) + population_size, selection_probability) query1 = gaussian_query.GaussianAverageQuery( l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0, ledger=ledger) diff --git a/privacy/analysis/tensor_buffer_test_eager.py b/privacy/analysis/tensor_buffer_test_eager.py index c5a4900..21156b6 100644 --- a/privacy/analysis/tensor_buffer_test_eager.py +++ b/privacy/analysis/tensor_buffer_test_eager.py @@ -41,7 +41,7 @@ class TensorBufferTest(tf.test.TestCase): self.assertAllEqual(my_buffer.values.numpy(), [value1, value2]) def test_fail_on_scalar(self): - with self.assertRaisesRegex(ValueError, 'Shape cannot be scalar.'): + with self.assertRaisesRegexp(ValueError, 'Shape cannot be scalar.'): tensor_buffer.TensorBuffer(1, ()) def test_fail_on_inconsistent_shape(self): @@ -49,7 +49,7 @@ class TensorBufferTest(tf.test.TestCase): my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') - with self.assertRaisesRegex( + with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, 'Appending value of inconsistent shape.'): my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32)) diff --git a/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py b/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py index 396d240..0d9ab2e 100644 --- a/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py +++ b/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py @@ -250,9 +250,9 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase): record2 = tf.constant([-7.25]) population_size = tf.Variable(0) - selection_probability = tf.Variable(0.0) + selection_probability = tf.Variable(1.0) ledger = privacy_ledger.PrivacyLedger( - population_size, selection_probability, 50, 50) + population_size, selection_probability) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=10.0, diff --git a/privacy/optimizers/dp_optimizer_eager_test.py b/privacy/optimizers/dp_optimizer_eager_test.py index cfb2ccb..d2ed98d 100644 --- a/privacy/optimizers/dp_optimizer_eager_test.py +++ b/privacy/optimizers/dp_optimizer_eager_test.py @@ -53,7 +53,7 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase): var0 = tf.Variable([1.0, 2.0]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) - ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6, 50, 50) + ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6) dp_average_query = gaussian_query.GaussianAverageQuery( 1.0e9, 0.0, num_microbatches, ledger) dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query, @@ -84,7 +84,7 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase): var0 = tf.Variable([0.0, 0.0]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) - ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 50, 50) + ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6) dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query, ledger) @@ -110,7 +110,7 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase): var0 = tf.Variable([0.0]) data0 = tf.Variable([[0.0]]) - ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 5000, 5000) + ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6) dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1) dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query, ledger) diff --git a/privacy/optimizers/dp_optimizer_test.py b/privacy/optimizers/dp_optimizer_test.py index e8bd635..820006f 100644 --- a/privacy/optimizers/dp_optimizer_test.py +++ b/privacy/optimizers/dp_optimizer_test.py @@ -54,7 +54,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) ledger = privacy_ledger.PrivacyLedger( - 1e6, num_microbatches / 1e6 if num_microbatches else None, 50, 50) + 1e6, num_microbatches / 1e6 if num_microbatches else None) dp_average_query = gaussian_query.GaussianAverageQuery( 1.0e9, 0.0, num_microbatches, ledger) dp_average_query = privacy_ledger.QueryWithLedger( @@ -84,7 +84,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): var0 = tf.Variable([0.0, 0.0]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) - ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 50, 50) + ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6) dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query, ledger) @@ -109,7 +109,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): var0 = tf.Variable([0.0]) data0 = tf.Variable([[0.0]]) - ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 5000, 5000) + ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6) dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1) dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query, ledger) @@ -153,7 +153,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): vector_loss = tf.squared_difference(labels, preds) scalar_loss = tf.reduce_mean(vector_loss) - ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 500, 500) + ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6) dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query, ledger) @@ -199,7 +199,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): num_microbatches = 4 ledger = privacy_ledger.PrivacyLedger( - 1e6, num_microbatches / 1e6, 50, 50) + 1e6, num_microbatches / 1e6) dp_average_query = gaussian_query.GaussianAverageQuery(1.0e9, 0.0, 4) dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query, ledger)