Changing initial capacity for the ledger to smaller values. (+ restoring compatibility with Python 2)

PiperOrigin-RevId: 249292683
This commit is contained in:
Ilya Mironov 2019-05-21 11:37:54 -07:00 committed by A. Unique TensorFlower
parent a94dc626b1
commit 0efb23afcb
6 changed files with 45 additions and 33 deletions

View file

@ -65,37 +65,44 @@ class PrivacyLedger(object):
def __init__(self,
population_size,
selection_probability=None,
max_samples=None,
max_queries=None):
selection_probability=None):
"""Initialize the PrivacyLedger.
Args:
population_size: An integer (may be variable) specifying the size of the
population, i.e. size of the training data used in each epoch.
selection_probability: A float (may be variable) specifying the
probability each record is included in a sample.
max_samples: The maximum number of samples. An exception is thrown if more
than this many samples are recorded.
max_queries: The maximum number of queries. An exception is thrown if more
than this many queries are recorded.
probability each record is included in a sample. If None, it can be set
later with set_sample_size.
Raises:
ValueError: If selection_probability is 0.
"""
self._population_size = population_size
self._selection_probability = selection_probability
if max_samples is None:
max_samples = 1000 * population_size
if max_queries is None:
max_queries = 1000 * population_size
if selection_probability is None:
init_capacity_samples = 1
else:
if tf.executing_eagerly():
if tf.equal(selection_probability, 0):
raise ValueError('Selection probability cannot be 0.')
init_capacity_samples = tf.cast(tf.ceil(1 / selection_probability),
tf.int32)
else:
if selection_probability == 0:
raise ValueError('Selection probability cannot be 0.')
init_capacity_samples = np.int(np.ceil(1 / selection_probability))
init_capacity_queries = init_capacity_samples
# The query buffer stores rows corresponding to GaussianSumQueryEntries.
self._query_buffer = tensor_buffer.TensorBuffer(max_queries, [3],
tf.float32, 'query')
self._query_buffer = tensor_buffer.TensorBuffer(
init_capacity_queries, [3], tf.float32, 'query')
self._sample_var = tf.Variable(
initial_value=tf.zeros([3]), trainable=False, name='sample')
# The sample buffer stores rows corresponding to SampleEntries.
self._sample_buffer = tensor_buffer.TensorBuffer(max_samples, [3],
tf.float32, 'sample')
self._sample_buffer = tensor_buffer.TensorBuffer(
init_capacity_samples, [3], tf.float32, 'sample')
self._sample_count = tf.Variable(
initial_value=0.0, trainable=False, name='sample_count')
self._query_count = tf.Variable(

View file

@ -30,8 +30,13 @@ tf.enable_eager_execution()
class PrivacyLedgerTest(tf.test.TestCase):
def test_fail_on_probability_zero(self):
with self.assertRaisesRegexp(ValueError,
'Selection probability cannot be 0.'):
privacy_ledger.PrivacyLedger(10, 0)
def test_basic(self):
ledger = privacy_ledger.PrivacyLedger(10, 0.1, 50, 50)
ledger = privacy_ledger.PrivacyLedger(10, 0.1)
ledger.record_sum_query(5.0, 1.0)
ledger.record_sum_query(2.0, 0.5)
@ -50,9 +55,9 @@ class PrivacyLedgerTest(tf.test.TestCase):
record2 = tf.constant([-1.0, 1.0])
population_size = tf.Variable(0)
selection_probability = tf.Variable(0.0)
selection_probability = tf.Variable(1.0)
ledger = privacy_ledger.PrivacyLedger(
population_size, selection_probability, 50, 50)
population_size, selection_probability)
query = gaussian_query.GaussianSumQuery(
l2_norm_clip=10.0, stddev=0.0, ledger=ledger)
@ -87,9 +92,9 @@ class PrivacyLedgerTest(tf.test.TestCase):
def test_nested_query(self):
population_size = tf.Variable(0)
selection_probability = tf.Variable(0.0)
selection_probability = tf.Variable(1.0)
ledger = privacy_ledger.PrivacyLedger(
population_size, selection_probability, 50, 50)
population_size, selection_probability)
query1 = gaussian_query.GaussianAverageQuery(
l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0, ledger=ledger)

View file

@ -41,7 +41,7 @@ class TensorBufferTest(tf.test.TestCase):
self.assertAllEqual(my_buffer.values.numpy(), [value1, value2])
def test_fail_on_scalar(self):
with self.assertRaisesRegex(ValueError, 'Shape cannot be scalar.'):
with self.assertRaisesRegexp(ValueError, 'Shape cannot be scalar.'):
tensor_buffer.TensorBuffer(1, ())
def test_fail_on_inconsistent_shape(self):
@ -49,7 +49,7 @@ class TensorBufferTest(tf.test.TestCase):
my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
with self.assertRaisesRegex(
with self.assertRaisesRegexp(
tf.errors.InvalidArgumentError,
'Appending value of inconsistent shape.'):
my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32))

View file

@ -250,9 +250,9 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
record2 = tf.constant([-7.25])
population_size = tf.Variable(0)
selection_probability = tf.Variable(0.0)
selection_probability = tf.Variable(1.0)
ledger = privacy_ledger.PrivacyLedger(
population_size, selection_probability, 50, 50)
population_size, selection_probability)
query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery(
initial_l2_norm_clip=10.0,

View file

@ -53,7 +53,7 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([1.0, 2.0])
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6, 50, 50)
ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(
1.0e9, 0.0, num_microbatches, ledger)
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
@ -84,7 +84,7 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0, 0.0])
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 50, 50)
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
ledger)
@ -110,7 +110,7 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0])
data0 = tf.Variable([[0.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 5000, 5000)
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1)
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
ledger)

View file

@ -54,7 +54,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
ledger = privacy_ledger.PrivacyLedger(
1e6, num_microbatches / 1e6 if num_microbatches else None, 50, 50)
1e6, num_microbatches / 1e6 if num_microbatches else None)
dp_average_query = gaussian_query.GaussianAverageQuery(
1.0e9, 0.0, num_microbatches, ledger)
dp_average_query = privacy_ledger.QueryWithLedger(
@ -84,7 +84,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0, 0.0])
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 50, 50)
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger)
@ -109,7 +109,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
var0 = tf.Variable([0.0])
data0 = tf.Variable([[0.0]])
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 5000, 5000)
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1)
dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger)
@ -153,7 +153,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
vector_loss = tf.squared_difference(labels, preds)
scalar_loss = tf.reduce_mean(vector_loss)
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 500, 500)
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger)
@ -199,7 +199,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
num_microbatches = 4
ledger = privacy_ledger.PrivacyLedger(
1e6, num_microbatches / 1e6, 50, 50)
1e6, num_microbatches / 1e6)
dp_average_query = gaussian_query.GaussianAverageQuery(1.0e9, 0.0, 4)
dp_average_query = privacy_ledger.QueryWithLedger(
dp_average_query, ledger)