forked from 626_privacy/tensorflow_privacy
Changing initial capacity for the ledger to smaller values. (+ restoring compatibility with Python 2)
PiperOrigin-RevId: 249292683
This commit is contained in:
parent
a94dc626b1
commit
0efb23afcb
6 changed files with 45 additions and 33 deletions
|
@ -65,37 +65,44 @@ class PrivacyLedger(object):
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
population_size,
|
population_size,
|
||||||
selection_probability=None,
|
selection_probability=None):
|
||||||
max_samples=None,
|
|
||||||
max_queries=None):
|
|
||||||
"""Initialize the PrivacyLedger.
|
"""Initialize the PrivacyLedger.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
population_size: An integer (may be variable) specifying the size of the
|
population_size: An integer (may be variable) specifying the size of the
|
||||||
population, i.e. size of the training data used in each epoch.
|
population, i.e. size of the training data used in each epoch.
|
||||||
selection_probability: A float (may be variable) specifying the
|
selection_probability: A float (may be variable) specifying the
|
||||||
probability each record is included in a sample.
|
probability each record is included in a sample. If None, it can be set
|
||||||
max_samples: The maximum number of samples. An exception is thrown if more
|
later with set_sample_size.
|
||||||
than this many samples are recorded.
|
|
||||||
max_queries: The maximum number of queries. An exception is thrown if more
|
Raises:
|
||||||
than this many queries are recorded.
|
ValueError: If selection_probability is 0.
|
||||||
"""
|
"""
|
||||||
self._population_size = population_size
|
self._population_size = population_size
|
||||||
self._selection_probability = selection_probability
|
self._selection_probability = selection_probability
|
||||||
if max_samples is None:
|
if selection_probability is None:
|
||||||
max_samples = 1000 * population_size
|
init_capacity_samples = 1
|
||||||
if max_queries is None:
|
else:
|
||||||
max_queries = 1000 * population_size
|
if tf.executing_eagerly():
|
||||||
|
if tf.equal(selection_probability, 0):
|
||||||
|
raise ValueError('Selection probability cannot be 0.')
|
||||||
|
init_capacity_samples = tf.cast(tf.ceil(1 / selection_probability),
|
||||||
|
tf.int32)
|
||||||
|
else:
|
||||||
|
if selection_probability == 0:
|
||||||
|
raise ValueError('Selection probability cannot be 0.')
|
||||||
|
init_capacity_samples = np.int(np.ceil(1 / selection_probability))
|
||||||
|
init_capacity_queries = init_capacity_samples
|
||||||
|
|
||||||
# The query buffer stores rows corresponding to GaussianSumQueryEntries.
|
# The query buffer stores rows corresponding to GaussianSumQueryEntries.
|
||||||
self._query_buffer = tensor_buffer.TensorBuffer(max_queries, [3],
|
self._query_buffer = tensor_buffer.TensorBuffer(
|
||||||
tf.float32, 'query')
|
init_capacity_queries, [3], tf.float32, 'query')
|
||||||
self._sample_var = tf.Variable(
|
self._sample_var = tf.Variable(
|
||||||
initial_value=tf.zeros([3]), trainable=False, name='sample')
|
initial_value=tf.zeros([3]), trainable=False, name='sample')
|
||||||
|
|
||||||
# The sample buffer stores rows corresponding to SampleEntries.
|
# The sample buffer stores rows corresponding to SampleEntries.
|
||||||
self._sample_buffer = tensor_buffer.TensorBuffer(max_samples, [3],
|
self._sample_buffer = tensor_buffer.TensorBuffer(
|
||||||
tf.float32, 'sample')
|
init_capacity_samples, [3], tf.float32, 'sample')
|
||||||
self._sample_count = tf.Variable(
|
self._sample_count = tf.Variable(
|
||||||
initial_value=0.0, trainable=False, name='sample_count')
|
initial_value=0.0, trainable=False, name='sample_count')
|
||||||
self._query_count = tf.Variable(
|
self._query_count = tf.Variable(
|
||||||
|
|
|
@ -30,8 +30,13 @@ tf.enable_eager_execution()
|
||||||
|
|
||||||
class PrivacyLedgerTest(tf.test.TestCase):
|
class PrivacyLedgerTest(tf.test.TestCase):
|
||||||
|
|
||||||
|
def test_fail_on_probability_zero(self):
|
||||||
|
with self.assertRaisesRegexp(ValueError,
|
||||||
|
'Selection probability cannot be 0.'):
|
||||||
|
privacy_ledger.PrivacyLedger(10, 0)
|
||||||
|
|
||||||
def test_basic(self):
|
def test_basic(self):
|
||||||
ledger = privacy_ledger.PrivacyLedger(10, 0.1, 50, 50)
|
ledger = privacy_ledger.PrivacyLedger(10, 0.1)
|
||||||
ledger.record_sum_query(5.0, 1.0)
|
ledger.record_sum_query(5.0, 1.0)
|
||||||
ledger.record_sum_query(2.0, 0.5)
|
ledger.record_sum_query(2.0, 0.5)
|
||||||
|
|
||||||
|
@ -50,9 +55,9 @@ class PrivacyLedgerTest(tf.test.TestCase):
|
||||||
record2 = tf.constant([-1.0, 1.0])
|
record2 = tf.constant([-1.0, 1.0])
|
||||||
|
|
||||||
population_size = tf.Variable(0)
|
population_size = tf.Variable(0)
|
||||||
selection_probability = tf.Variable(0.0)
|
selection_probability = tf.Variable(1.0)
|
||||||
ledger = privacy_ledger.PrivacyLedger(
|
ledger = privacy_ledger.PrivacyLedger(
|
||||||
population_size, selection_probability, 50, 50)
|
population_size, selection_probability)
|
||||||
|
|
||||||
query = gaussian_query.GaussianSumQuery(
|
query = gaussian_query.GaussianSumQuery(
|
||||||
l2_norm_clip=10.0, stddev=0.0, ledger=ledger)
|
l2_norm_clip=10.0, stddev=0.0, ledger=ledger)
|
||||||
|
@ -87,9 +92,9 @@ class PrivacyLedgerTest(tf.test.TestCase):
|
||||||
|
|
||||||
def test_nested_query(self):
|
def test_nested_query(self):
|
||||||
population_size = tf.Variable(0)
|
population_size = tf.Variable(0)
|
||||||
selection_probability = tf.Variable(0.0)
|
selection_probability = tf.Variable(1.0)
|
||||||
ledger = privacy_ledger.PrivacyLedger(
|
ledger = privacy_ledger.PrivacyLedger(
|
||||||
population_size, selection_probability, 50, 50)
|
population_size, selection_probability)
|
||||||
|
|
||||||
query1 = gaussian_query.GaussianAverageQuery(
|
query1 = gaussian_query.GaussianAverageQuery(
|
||||||
l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0, ledger=ledger)
|
l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0, ledger=ledger)
|
||||||
|
|
|
@ -41,7 +41,7 @@ class TensorBufferTest(tf.test.TestCase):
|
||||||
self.assertAllEqual(my_buffer.values.numpy(), [value1, value2])
|
self.assertAllEqual(my_buffer.values.numpy(), [value1, value2])
|
||||||
|
|
||||||
def test_fail_on_scalar(self):
|
def test_fail_on_scalar(self):
|
||||||
with self.assertRaisesRegex(ValueError, 'Shape cannot be scalar.'):
|
with self.assertRaisesRegexp(ValueError, 'Shape cannot be scalar.'):
|
||||||
tensor_buffer.TensorBuffer(1, ())
|
tensor_buffer.TensorBuffer(1, ())
|
||||||
|
|
||||||
def test_fail_on_inconsistent_shape(self):
|
def test_fail_on_inconsistent_shape(self):
|
||||||
|
@ -49,7 +49,7 @@ class TensorBufferTest(tf.test.TestCase):
|
||||||
|
|
||||||
my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
|
my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer')
|
||||||
|
|
||||||
with self.assertRaisesRegex(
|
with self.assertRaisesRegexp(
|
||||||
tf.errors.InvalidArgumentError,
|
tf.errors.InvalidArgumentError,
|
||||||
'Appending value of inconsistent shape.'):
|
'Appending value of inconsistent shape.'):
|
||||||
my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32))
|
my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32))
|
||||||
|
|
|
@ -250,9 +250,9 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
|
||||||
record2 = tf.constant([-7.25])
|
record2 = tf.constant([-7.25])
|
||||||
|
|
||||||
population_size = tf.Variable(0)
|
population_size = tf.Variable(0)
|
||||||
selection_probability = tf.Variable(0.0)
|
selection_probability = tf.Variable(1.0)
|
||||||
ledger = privacy_ledger.PrivacyLedger(
|
ledger = privacy_ledger.PrivacyLedger(
|
||||||
population_size, selection_probability, 50, 50)
|
population_size, selection_probability)
|
||||||
|
|
||||||
query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery(
|
query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery(
|
||||||
initial_l2_norm_clip=10.0,
|
initial_l2_norm_clip=10.0,
|
||||||
|
|
|
@ -53,7 +53,7 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
var0 = tf.Variable([1.0, 2.0])
|
var0 = tf.Variable([1.0, 2.0])
|
||||||
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6, 50, 50)
|
ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6)
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(
|
dp_average_query = gaussian_query.GaussianAverageQuery(
|
||||||
1.0e9, 0.0, num_microbatches, ledger)
|
1.0e9, 0.0, num_microbatches, ledger)
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
|
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
|
||||||
|
@ -84,7 +84,7 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
var0 = tf.Variable([0.0, 0.0])
|
var0 = tf.Variable([0.0, 0.0])
|
||||||
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
|
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 50, 50)
|
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
|
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
|
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
|
||||||
ledger)
|
ledger)
|
||||||
|
@ -110,7 +110,7 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
var0 = tf.Variable([0.0])
|
var0 = tf.Variable([0.0])
|
||||||
data0 = tf.Variable([[0.0]])
|
data0 = tf.Variable([[0.0]])
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 5000, 5000)
|
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1)
|
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1)
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
|
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
|
||||||
ledger)
|
ledger)
|
||||||
|
|
|
@ -54,7 +54,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(
|
ledger = privacy_ledger.PrivacyLedger(
|
||||||
1e6, num_microbatches / 1e6 if num_microbatches else None, 50, 50)
|
1e6, num_microbatches / 1e6 if num_microbatches else None)
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(
|
dp_average_query = gaussian_query.GaussianAverageQuery(
|
||||||
1.0e9, 0.0, num_microbatches, ledger)
|
1.0e9, 0.0, num_microbatches, ledger)
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
dp_average_query = privacy_ledger.QueryWithLedger(
|
||||||
|
@ -84,7 +84,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
var0 = tf.Variable([0.0, 0.0])
|
var0 = tf.Variable([0.0, 0.0])
|
||||||
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
|
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 50, 50)
|
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
|
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
dp_average_query = privacy_ledger.QueryWithLedger(
|
||||||
dp_average_query, ledger)
|
dp_average_query, ledger)
|
||||||
|
@ -109,7 +109,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
var0 = tf.Variable([0.0])
|
var0 = tf.Variable([0.0])
|
||||||
data0 = tf.Variable([[0.0]])
|
data0 = tf.Variable([[0.0]])
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 5000, 5000)
|
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1)
|
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1)
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
dp_average_query = privacy_ledger.QueryWithLedger(
|
||||||
dp_average_query, ledger)
|
dp_average_query, ledger)
|
||||||
|
@ -153,7 +153,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
|
|
||||||
vector_loss = tf.squared_difference(labels, preds)
|
vector_loss = tf.squared_difference(labels, preds)
|
||||||
scalar_loss = tf.reduce_mean(vector_loss)
|
scalar_loss = tf.reduce_mean(vector_loss)
|
||||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 500, 500)
|
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
|
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
dp_average_query = privacy_ledger.QueryWithLedger(
|
||||||
dp_average_query, ledger)
|
dp_average_query, ledger)
|
||||||
|
@ -199,7 +199,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
num_microbatches = 4
|
num_microbatches = 4
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(
|
ledger = privacy_ledger.PrivacyLedger(
|
||||||
1e6, num_microbatches / 1e6, 50, 50)
|
1e6, num_microbatches / 1e6)
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(1.0e9, 0.0, 4)
|
dp_average_query = gaussian_query.GaussianAverageQuery(1.0e9, 0.0, 4)
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
dp_average_query = privacy_ledger.QueryWithLedger(
|
||||||
dp_average_query, ledger)
|
dp_average_query, ledger)
|
||||||
|
|
Loading…
Reference in a new issue