Changing initial capacity for the ledger to smaller values. (+ restoring compatibility with Python 2)
PiperOrigin-RevId: 249292683
This commit is contained in:
parent
a94dc626b1
commit
0efb23afcb
6 changed files with 45 additions and 33 deletions
|
@ -65,37 +65,44 @@ class PrivacyLedger(object):
|
|||
|
||||
def __init__(self,
|
||||
population_size,
|
||||
selection_probability=None,
|
||||
max_samples=None,
|
||||
max_queries=None):
|
||||
selection_probability=None):
|
||||
"""Initialize the PrivacyLedger.
|
||||
|
||||
Args:
|
||||
population_size: An integer (may be variable) specifying the size of the
|
||||
population, i.e. size of the training data used in each epoch.
|
||||
selection_probability: A float (may be variable) specifying the
|
||||
probability each record is included in a sample.
|
||||
max_samples: The maximum number of samples. An exception is thrown if more
|
||||
than this many samples are recorded.
|
||||
max_queries: The maximum number of queries. An exception is thrown if more
|
||||
than this many queries are recorded.
|
||||
probability each record is included in a sample. If None, it can be set
|
||||
later with set_sample_size.
|
||||
|
||||
Raises:
|
||||
ValueError: If selection_probability is 0.
|
||||
"""
|
||||
self._population_size = population_size
|
||||
self._selection_probability = selection_probability
|
||||
if max_samples is None:
|
||||
max_samples = 1000 * population_size
|
||||
if max_queries is None:
|
||||
max_queries = 1000 * population_size
|
||||
if selection_probability is None:
|
||||
init_capacity_samples = 1
|
||||
else:
|
||||
if tf.executing_eagerly():
|
||||
if tf.equal(selection_probability, 0):
|
||||
raise ValueError('Selection probability cannot be 0.')
|
||||
init_capacity_samples = tf.cast(tf.ceil(1 / selection_probability),
|
||||
tf.int32)
|
||||
else:
|
||||
if selection_probability == 0:
|
||||
raise ValueError('Selection probability cannot be 0.')
|
||||
init_capacity_samples = np.int(np.ceil(1 / selection_probability))
|
||||
init_capacity_queries = init_capacity_samples
|
||||
|
||||
# The query buffer stores rows corresponding to GaussianSumQueryEntries.
|
||||
self._query_buffer = tensor_buffer.TensorBuffer(max_queries, [3],
|
||||
tf.float32, 'query')
|
||||
self._query_buffer = tensor_buffer.TensorBuffer(
|
||||
init_capacity_queries, [3], tf.float32, 'query')
|
||||
self._sample_var = tf.Variable(
|
||||
initial_value=tf.zeros([3]), trainable=False, name='sample')
|
||||
|
||||
# The sample buffer stores rows corresponding to SampleEntries.
|
||||
self._sample_buffer = tensor_buffer.TensorBuffer(max_samples, [3],
|
||||
tf.float32, 'sample')
|
||||
self._sample_buffer = tensor_buffer.TensorBuffer(
|
||||
init_capacity_samples, [3], tf.float32, 'sample')
|
||||
self._sample_count = tf.Variable(
|
||||
initial_value=0.0, trainable=False, name='sample_count')
|
||||
self._query_count = tf.Variable(
|
||||
|
|
|
@ -30,8 +30,13 @@ tf.enable_eager_execution()
|
|||
|
||||
class PrivacyLedgerTest(tf.test.TestCase):
|
||||
|
||||
def test_fail_on_probability_zero(self):
|
||||
with self.assertRaisesRegexp(ValueError,
|
||||
'Selection probability cannot be 0.'):
|
||||
privacy_ledger.PrivacyLedger(10, 0)
|
||||
|
||||
def test_basic(self):
|
||||
ledger = privacy_ledger.PrivacyLedger(10, 0.1, 50, 50)
|
||||
ledger = privacy_ledger.PrivacyLedger(10, 0.1)
|
||||
ledger.record_sum_query(5.0, 1.0)
|
||||
ledger.record_sum_query(2.0, 0.5)
|
||||
|
||||
|
@ -50,9 +55,9 @@ class PrivacyLedgerTest(tf.test.TestCase):
|
|||
record2 = tf.constant([-1.0, 1.0])
|
||||
|
||||
population_size = tf.Variable(0)
|
||||
selection_probability = tf.Variable(0.0)
|
||||
selection_probability = tf.Variable(1.0)
|
||||
ledger = privacy_ledger.PrivacyLedger(
|
||||
population_size, selection_probability, 50, 50)
|
||||
population_size, selection_probability)
|
||||
|
||||
query = gaussian_query.GaussianSumQuery(
|
||||
l2_norm_clip=10.0, stddev=0.0, ledger=ledger)
|
||||
|
@ -87,9 +92,9 @@ class PrivacyLedgerTest(tf.test.TestCase):
|
|||
|
||||
def test_nested_query(self):
|
||||
population_size = tf.Variable(0)
|
||||
selection_probability = tf.Variable(0.0)
|
||||
selection_probability = tf.Variable(1.0)
|
||||
ledger = privacy_ledger.PrivacyLedger(
|
||||
population_size, selection_probability, 50, 50)
|
||||
population_size, selection_probability)
|
||||
|
||||
query1 = gaussian_query.GaussianAverageQuery(
|
||||
l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0, ledger=ledger)
|
||||
|
|
|
@ -41,7 +41,7 @@ class TensorBufferTest(tf.test.TestCase):
|
|||
self.assertAllEqual(my_buffer.values.numpy(), [value1, value2])
|
||||
|
||||
def test_fail_on_scalar(self):
|
||||
with self.assertRaisesRegex(ValueError, 'Shape cannot be scalar.'):
|
||||
with self.assertRaisesRegexp(ValueError, 'Shape cannot be scalar.'):
|
||||
tensor_buffer.TensorBuffer(1, ())
|
||||
|
||||
def test_fail_on_inconsistent_shape(self):
|
||||
|
@ -49,7 +49,7 @@ class TensorBufferTest(tf.test.TestCase):
|
|||
|
||||
my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
|
||||
|
||||
with self.assertRaisesRegex(
|
||||
with self.assertRaisesRegexp(
|
||||
tf.errors.InvalidArgumentError,
|
||||
'Appending value of inconsistent shape.'):
|
||||
my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32))
|
||||
|
|
|
@ -250,9 +250,9 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
|
|||
record2 = tf.constant([-7.25])
|
||||
|
||||
population_size = tf.Variable(0)
|
||||
selection_probability = tf.Variable(0.0)
|
||||
selection_probability = tf.Variable(1.0)
|
||||
ledger = privacy_ledger.PrivacyLedger(
|
||||
population_size, selection_probability, 50, 50)
|
||||
population_size, selection_probability)
|
||||
|
||||
query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery(
|
||||
initial_l2_norm_clip=10.0,
|
||||
|
|
|
@ -53,7 +53,7 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
|
|||
var0 = tf.Variable([1.0, 2.0])
|
||||
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
||||
|
||||
ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6, 50, 50)
|
||||
ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6)
|
||||
dp_average_query = gaussian_query.GaussianAverageQuery(
|
||||
1.0e9, 0.0, num_microbatches, ledger)
|
||||
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
|
||||
|
@ -84,7 +84,7 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
|
|||
var0 = tf.Variable([0.0, 0.0])
|
||||
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
|
||||
|
||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 50, 50)
|
||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
|
||||
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
|
||||
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
|
||||
ledger)
|
||||
|
@ -110,7 +110,7 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
|
|||
var0 = tf.Variable([0.0])
|
||||
data0 = tf.Variable([[0.0]])
|
||||
|
||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 5000, 5000)
|
||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
|
||||
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1)
|
||||
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
|
||||
ledger)
|
||||
|
|
|
@ -54,7 +54,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
|||
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
||||
|
||||
ledger = privacy_ledger.PrivacyLedger(
|
||||
1e6, num_microbatches / 1e6 if num_microbatches else None, 50, 50)
|
||||
1e6, num_microbatches / 1e6 if num_microbatches else None)
|
||||
dp_average_query = gaussian_query.GaussianAverageQuery(
|
||||
1.0e9, 0.0, num_microbatches, ledger)
|
||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
||||
|
@ -84,7 +84,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
|||
var0 = tf.Variable([0.0, 0.0])
|
||||
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
|
||||
|
||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 50, 50)
|
||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
|
||||
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
|
||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
||||
dp_average_query, ledger)
|
||||
|
@ -109,7 +109,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
|||
var0 = tf.Variable([0.0])
|
||||
data0 = tf.Variable([[0.0]])
|
||||
|
||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 5000, 5000)
|
||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
|
||||
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1)
|
||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
||||
dp_average_query, ledger)
|
||||
|
@ -153,7 +153,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
|||
|
||||
vector_loss = tf.squared_difference(labels, preds)
|
||||
scalar_loss = tf.reduce_mean(vector_loss)
|
||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 500, 500)
|
||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
|
||||
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
|
||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
||||
dp_average_query, ledger)
|
||||
|
@ -199,7 +199,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
|||
num_microbatches = 4
|
||||
|
||||
ledger = privacy_ledger.PrivacyLedger(
|
||||
1e6, num_microbatches / 1e6, 50, 50)
|
||||
1e6, num_microbatches / 1e6)
|
||||
dp_average_query = gaussian_query.GaussianAverageQuery(1.0e9, 0.0, 4)
|
||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
||||
dp_average_query, ledger)
|
||||
|
|
Loading…
Reference in a new issue