Changing initial capacity for the ledger to smaller values. (+ restoring compatibility with Python 2)

PiperOrigin-RevId: 249292683
This commit is contained in:
Ilya Mironov 2019-05-21 11:37:54 -07:00 committed by A. Unique TensorFlower
parent a94dc626b1
commit 0efb23afcb
6 changed files with 45 additions and 33 deletions

View file

@ -65,37 +65,44 @@ class PrivacyLedger(object):
def __init__(self, def __init__(self,
population_size, population_size,
selection_probability=None, selection_probability=None):
max_samples=None,
max_queries=None):
"""Initialize the PrivacyLedger. """Initialize the PrivacyLedger.
Args: Args:
population_size: An integer (may be variable) specifying the size of the population_size: An integer (may be variable) specifying the size of the
population, i.e. size of the training data used in each epoch. population, i.e. size of the training data used in each epoch.
selection_probability: A float (may be variable) specifying the selection_probability: A float (may be variable) specifying the
probability each record is included in a sample. probability each record is included in a sample. If None, it can be set
max_samples: The maximum number of samples. An exception is thrown if more later with set_sample_size.
than this many samples are recorded.
max_queries: The maximum number of queries. An exception is thrown if more Raises:
than this many queries are recorded. ValueError: If selection_probability is 0.
""" """
self._population_size = population_size self._population_size = population_size
self._selection_probability = selection_probability self._selection_probability = selection_probability
if max_samples is None: if selection_probability is None:
max_samples = 1000 * population_size init_capacity_samples = 1
if max_queries is None: else:
max_queries = 1000 * population_size if tf.executing_eagerly():
if tf.equal(selection_probability, 0):
raise ValueError('Selection probability cannot be 0.')
init_capacity_samples = tf.cast(tf.ceil(1 / selection_probability),
tf.int32)
else:
if selection_probability == 0:
raise ValueError('Selection probability cannot be 0.')
init_capacity_samples = np.int(np.ceil(1 / selection_probability))
init_capacity_queries = init_capacity_samples
# The query buffer stores rows corresponding to GaussianSumQueryEntries. # The query buffer stores rows corresponding to GaussianSumQueryEntries.
self._query_buffer = tensor_buffer.TensorBuffer(max_queries, [3], self._query_buffer = tensor_buffer.TensorBuffer(
tf.float32, 'query') init_capacity_queries, [3], tf.float32, 'query')
self._sample_var = tf.Variable( self._sample_var = tf.Variable(
initial_value=tf.zeros([3]), trainable=False, name='sample') initial_value=tf.zeros([3]), trainable=False, name='sample')
# The sample buffer stores rows corresponding to SampleEntries. # The sample buffer stores rows corresponding to SampleEntries.
self._sample_buffer = tensor_buffer.TensorBuffer(max_samples, [3], self._sample_buffer = tensor_buffer.TensorBuffer(
tf.float32, 'sample') init_capacity_samples, [3], tf.float32, 'sample')
self._sample_count = tf.Variable( self._sample_count = tf.Variable(
initial_value=0.0, trainable=False, name='sample_count') initial_value=0.0, trainable=False, name='sample_count')
self._query_count = tf.Variable( self._query_count = tf.Variable(

View file

@ -30,8 +30,13 @@ tf.enable_eager_execution()
class PrivacyLedgerTest(tf.test.TestCase): class PrivacyLedgerTest(tf.test.TestCase):
def test_fail_on_probability_zero(self):
with self.assertRaisesRegexp(ValueError,
'Selection probability cannot be 0.'):
privacy_ledger.PrivacyLedger(10, 0)
def test_basic(self): def test_basic(self):
ledger = privacy_ledger.PrivacyLedger(10, 0.1, 50, 50) ledger = privacy_ledger.PrivacyLedger(10, 0.1)
ledger.record_sum_query(5.0, 1.0) ledger.record_sum_query(5.0, 1.0)
ledger.record_sum_query(2.0, 0.5) ledger.record_sum_query(2.0, 0.5)
@ -50,9 +55,9 @@ class PrivacyLedgerTest(tf.test.TestCase):
record2 = tf.constant([-1.0, 1.0]) record2 = tf.constant([-1.0, 1.0])
population_size = tf.Variable(0) population_size = tf.Variable(0)
selection_probability = tf.Variable(0.0) selection_probability = tf.Variable(1.0)
ledger = privacy_ledger.PrivacyLedger( ledger = privacy_ledger.PrivacyLedger(
population_size, selection_probability, 50, 50) population_size, selection_probability)
query = gaussian_query.GaussianSumQuery( query = gaussian_query.GaussianSumQuery(
l2_norm_clip=10.0, stddev=0.0, ledger=ledger) l2_norm_clip=10.0, stddev=0.0, ledger=ledger)
@ -87,9 +92,9 @@ class PrivacyLedgerTest(tf.test.TestCase):
def test_nested_query(self): def test_nested_query(self):
population_size = tf.Variable(0) population_size = tf.Variable(0)
selection_probability = tf.Variable(0.0) selection_probability = tf.Variable(1.0)
ledger = privacy_ledger.PrivacyLedger( ledger = privacy_ledger.PrivacyLedger(
population_size, selection_probability, 50, 50) population_size, selection_probability)
query1 = gaussian_query.GaussianAverageQuery( query1 = gaussian_query.GaussianAverageQuery(
l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0, ledger=ledger) l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0, ledger=ledger)

View file

@ -41,7 +41,7 @@ class TensorBufferTest(tf.test.TestCase):
self.assertAllEqual(my_buffer.values.numpy(), [value1, value2]) self.assertAllEqual(my_buffer.values.numpy(), [value1, value2])
def test_fail_on_scalar(self): def test_fail_on_scalar(self):
with self.assertRaisesRegex(ValueError, 'Shape cannot be scalar.'): with self.assertRaisesRegexp(ValueError, 'Shape cannot be scalar.'):
tensor_buffer.TensorBuffer(1, ()) tensor_buffer.TensorBuffer(1, ())
def test_fail_on_inconsistent_shape(self): def test_fail_on_inconsistent_shape(self):
@ -49,7 +49,7 @@ class TensorBufferTest(tf.test.TestCase):
my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
with self.assertRaisesRegex( with self.assertRaisesRegexp(
tf.errors.InvalidArgumentError, tf.errors.InvalidArgumentError,
'Appending value of inconsistent shape.'): 'Appending value of inconsistent shape.'):
my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32)) my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32))

View file

@ -250,9 +250,9 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
record2 = tf.constant([-7.25]) record2 = tf.constant([-7.25])
population_size = tf.Variable(0) population_size = tf.Variable(0)
selection_probability = tf.Variable(0.0) selection_probability = tf.Variable(1.0)
ledger = privacy_ledger.PrivacyLedger( ledger = privacy_ledger.PrivacyLedger(
population_size, selection_probability, 50, 50) population_size, selection_probability)
query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery(
initial_l2_norm_clip=10.0, initial_l2_norm_clip=10.0,

View file

@ -53,7 +53,7 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([1.0, 2.0]) var0 = tf.Variable([1.0, 2.0])
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6, 50, 50) ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery( dp_average_query = gaussian_query.GaussianAverageQuery(
1.0e9, 0.0, num_microbatches, ledger) 1.0e9, 0.0, num_microbatches, ledger)
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query, dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
@ -84,7 +84,7 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0, 0.0]) var0 = tf.Variable([0.0, 0.0])
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 50, 50) ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query, dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
ledger) ledger)
@ -110,7 +110,7 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0]) var0 = tf.Variable([0.0])
data0 = tf.Variable([[0.0]]) data0 = tf.Variable([[0.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 5000, 5000) ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1) dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1)
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query, dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
ledger) ledger)

View file

@ -54,7 +54,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
ledger = privacy_ledger.PrivacyLedger( ledger = privacy_ledger.PrivacyLedger(
1e6, num_microbatches / 1e6 if num_microbatches else None, 50, 50) 1e6, num_microbatches / 1e6 if num_microbatches else None)
dp_average_query = gaussian_query.GaussianAverageQuery( dp_average_query = gaussian_query.GaussianAverageQuery(
1.0e9, 0.0, num_microbatches, ledger) 1.0e9, 0.0, num_microbatches, ledger)
dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query = privacy_ledger.QueryWithLedger(
@ -84,7 +84,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0, 0.0]) var0 = tf.Variable([0.0, 0.0])
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 50, 50) ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger) dp_average_query, ledger)
@ -109,7 +109,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0]) var0 = tf.Variable([0.0])
data0 = tf.Variable([[0.0]]) data0 = tf.Variable([[0.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 5000, 5000) ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1) dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1)
dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger) dp_average_query, ledger)
@ -153,7 +153,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
vector_loss = tf.squared_difference(labels, preds) vector_loss = tf.squared_difference(labels, preds)
scalar_loss = tf.reduce_mean(vector_loss) scalar_loss = tf.reduce_mean(vector_loss)
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 500, 500) ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1) dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger) dp_average_query, ledger)
@ -199,7 +199,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
num_microbatches = 4 num_microbatches = 4
ledger = privacy_ledger.PrivacyLedger( ledger = privacy_ledger.PrivacyLedger(
1e6, num_microbatches / 1e6, 50, 50) 1e6, num_microbatches / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(1.0e9, 0.0, 4) dp_average_query = gaussian_query.GaussianAverageQuery(1.0e9, 0.0, 4)
dp_average_query = privacy_ledger.QueryWithLedger( dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger) dp_average_query, ledger)