Remove set_denominator functions from DPQuery and make QueryWithLedger easier to use.
set_denominator was added so that the batch size doesn't need to be specified before constructing the optimizer, but it breaks the DPQuery abstraction. Now the optimizer uses a GaussianSumQuery instead of GaussianAverageQuery, and normalization by batch size is done inside the optimizer. Also instead of creating all DPQueries with a PrivacyLedger and then wrapping with QueryWithLedger, it is now sufficient to create the queries with no ledger and QueryWithLedger will construct the ledger and pass it to all inner queries. PiperOrigin-RevId: 251462353
This commit is contained in:
parent
7636945566
commit
d5dcfec745
17 changed files with 202 additions and 220 deletions
|
@ -20,7 +20,6 @@ import sys
|
||||||
if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts.
|
if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts.
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
from privacy.analysis.privacy_ledger import DummyLedger
|
|
||||||
from privacy.analysis.privacy_ledger import GaussianSumQueryEntry
|
from privacy.analysis.privacy_ledger import GaussianSumQueryEntry
|
||||||
from privacy.analysis.privacy_ledger import PrivacyLedger
|
from privacy.analysis.privacy_ledger import PrivacyLedger
|
||||||
from privacy.analysis.privacy_ledger import QueryWithLedger
|
from privacy.analysis.privacy_ledger import QueryWithLedger
|
||||||
|
|
|
@ -65,44 +65,39 @@ class PrivacyLedger(object):
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
population_size,
|
population_size,
|
||||||
selection_probability=None):
|
selection_probability):
|
||||||
"""Initialize the PrivacyLedger.
|
"""Initialize the PrivacyLedger.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
population_size: An integer (may be variable) specifying the size of the
|
population_size: An integer (may be variable) specifying the size of the
|
||||||
population, i.e. size of the training data used in each epoch.
|
population, i.e. size of the training data used in each epoch.
|
||||||
selection_probability: A float (may be variable) specifying the
|
selection_probability: A float (may be variable) specifying the
|
||||||
probability each record is included in a sample. If None, it can be set
|
probability each record is included in a sample.
|
||||||
later with set_sample_size.
|
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
ValueError: If selection_probability is 0.
|
ValueError: If selection_probability is 0.
|
||||||
"""
|
"""
|
||||||
self._population_size = population_size
|
self._population_size = population_size
|
||||||
self._selection_probability = selection_probability
|
self._selection_probability = selection_probability
|
||||||
if selection_probability is None:
|
|
||||||
init_capacity_samples = 1
|
|
||||||
else:
|
|
||||||
if tf.executing_eagerly():
|
if tf.executing_eagerly():
|
||||||
if tf.equal(selection_probability, 0):
|
if tf.equal(selection_probability, 0):
|
||||||
raise ValueError('Selection probability cannot be 0.')
|
raise ValueError('Selection probability cannot be 0.')
|
||||||
init_capacity_samples = tf.cast(tf.ceil(1 / selection_probability),
|
init_capacity = tf.cast(tf.ceil(1 / selection_probability), tf.int32)
|
||||||
tf.int32)
|
|
||||||
else:
|
else:
|
||||||
if selection_probability == 0:
|
if selection_probability == 0:
|
||||||
raise ValueError('Selection probability cannot be 0.')
|
raise ValueError('Selection probability cannot be 0.')
|
||||||
init_capacity_samples = np.int(np.ceil(1 / selection_probability))
|
init_capacity = np.int(np.ceil(1 / selection_probability))
|
||||||
init_capacity_queries = init_capacity_samples
|
|
||||||
|
|
||||||
# The query buffer stores rows corresponding to GaussianSumQueryEntries.
|
# The query buffer stores rows corresponding to GaussianSumQueryEntries.
|
||||||
self._query_buffer = tensor_buffer.TensorBuffer(
|
self._query_buffer = tensor_buffer.TensorBuffer(
|
||||||
init_capacity_queries, [3], tf.float32, 'query')
|
init_capacity, [3], tf.float32, 'query')
|
||||||
self._sample_var = tf.Variable(
|
self._sample_var = tf.Variable(
|
||||||
initial_value=tf.zeros([3]), trainable=False, name='sample')
|
initial_value=tf.zeros([3]), trainable=False, name='sample')
|
||||||
|
|
||||||
# The sample buffer stores rows corresponding to SampleEntries.
|
# The sample buffer stores rows corresponding to SampleEntries.
|
||||||
self._sample_buffer = tensor_buffer.TensorBuffer(
|
self._sample_buffer = tensor_buffer.TensorBuffer(
|
||||||
init_capacity_samples, [3], tf.float32, 'sample')
|
init_capacity, [3], tf.float32, 'sample')
|
||||||
self._sample_count = tf.Variable(
|
self._sample_count = tf.Variable(
|
||||||
initial_value=0.0, trainable=False, name='sample_count')
|
initial_value=0.0, trainable=False, name='sample_count')
|
||||||
self._query_count = tf.Variable(
|
self._query_count = tf.Variable(
|
||||||
|
@ -175,39 +170,6 @@ class PrivacyLedger(object):
|
||||||
|
|
||||||
return format_ledger(sample_array, query_array)
|
return format_ledger(sample_array, query_array)
|
||||||
|
|
||||||
def set_sample_size(self, batch_size):
|
|
||||||
self._selection_probability = tf.cast(batch_size,
|
|
||||||
tf.float32) / self._population_size
|
|
||||||
|
|
||||||
|
|
||||||
class DummyLedger(object):
|
|
||||||
"""A ledger that records nothing.
|
|
||||||
|
|
||||||
This ledger may be passed in place of a normal PrivacyLedger in case privacy
|
|
||||||
accounting is to be handled externally.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def record_sum_query(self, l2_norm_bound, noise_stddev):
|
|
||||||
del l2_norm_bound
|
|
||||||
del noise_stddev
|
|
||||||
return tf.no_op()
|
|
||||||
|
|
||||||
def finalize_sample(self):
|
|
||||||
return tf.no_op()
|
|
||||||
|
|
||||||
def get_unformatted_ledger(self):
|
|
||||||
empty_array = tf.zeros(shape=[0, 3])
|
|
||||||
return empty_array, empty_array
|
|
||||||
|
|
||||||
def get_formatted_ledger(self, sess):
|
|
||||||
del sess
|
|
||||||
empty_array = np.zeros(shape=[0, 3])
|
|
||||||
return empty_array, empty_array
|
|
||||||
|
|
||||||
def get_formatted_ledger_eager(self):
|
|
||||||
empty_array = np.zeros(shape=[0, 3])
|
|
||||||
return empty_array, empty_array
|
|
||||||
|
|
||||||
|
|
||||||
class QueryWithLedger(dp_query.DPQuery):
|
class QueryWithLedger(dp_query.DPQuery):
|
||||||
"""A class for DP queries that record events to a PrivacyLedger.
|
"""A class for DP queries that record events to a PrivacyLedger.
|
||||||
|
@ -221,17 +183,40 @@ class QueryWithLedger(dp_query.DPQuery):
|
||||||
For example usage, see privacy_ledger_test.py.
|
For example usage, see privacy_ledger_test.py.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, query, ledger):
|
def __init__(self, query,
|
||||||
|
population_size=None, selection_probability=None,
|
||||||
|
ledger=None):
|
||||||
"""Initializes the QueryWithLedger.
|
"""Initializes the QueryWithLedger.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: The query whose events should be recorded to the ledger. Any
|
query: The query whose events should be recorded to the ledger. Any
|
||||||
subqueries (including those in the leaves of a nested query) should also
|
subqueries (including those in the leaves of a nested query) should also
|
||||||
contain a reference to the same ledger given here.
|
contain a reference to the same ledger given here.
|
||||||
ledger: A PrivacyLedger to which privacy events should be recorded.
|
population_size: An integer (may be variable) specifying the size of the
|
||||||
|
population, i.e. size of the training data used in each epoch. May be
|
||||||
|
None if `ledger` is specified.
|
||||||
|
selection_probability: A float (may be variable) specifying the
|
||||||
|
probability each record is included in a sample. May be None if `ledger`
|
||||||
|
is specified.
|
||||||
|
ledger: A PrivacyLedger to use. Must be specified if either of
|
||||||
|
`population_size` or `selection_probability` is None.
|
||||||
"""
|
"""
|
||||||
self._query = query
|
self._query = query
|
||||||
|
if population_size is not None and selection_probability is not None:
|
||||||
|
self.set_ledger(PrivacyLedger(population_size, selection_probability))
|
||||||
|
elif ledger is not None:
|
||||||
|
self.set_ledger(ledger)
|
||||||
|
else:
|
||||||
|
raise ValueError('One of (population_size, selection_probability) or '
|
||||||
|
'ledger must be specified.')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ledger(self):
|
||||||
|
return self._ledger
|
||||||
|
|
||||||
|
def set_ledger(self, ledger):
|
||||||
self._ledger = ledger
|
self._ledger = ledger
|
||||||
|
self._query.set_ledger(ledger)
|
||||||
|
|
||||||
def initial_global_state(self):
|
def initial_global_state(self):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
|
@ -260,10 +245,13 @@ class QueryWithLedger(dp_query.DPQuery):
|
||||||
|
|
||||||
def get_noised_result(self, sample_state, global_state):
|
def get_noised_result(self, sample_state, global_state):
|
||||||
"""Ensures sample is recorded to the ledger and returns noised result."""
|
"""Ensures sample is recorded to the ledger and returns noised result."""
|
||||||
|
# Ensure sample_state is fully aggregated before calling get_noised_result.
|
||||||
with tf.control_dependencies(nest.flatten(sample_state)):
|
with tf.control_dependencies(nest.flatten(sample_state)):
|
||||||
with tf.control_dependencies([self._ledger.finalize_sample()]):
|
result, new_global_state = self._query.get_noised_result(
|
||||||
return self._query.get_noised_result(sample_state, global_state)
|
sample_state, global_state)
|
||||||
|
# Ensure inner queries have recorded before finalizing.
|
||||||
def set_denominator(self, global_state, num_microbatches, microbatch_size=1):
|
with tf.control_dependencies(nest.flatten(result)):
|
||||||
self._ledger.set_sample_size(num_microbatches * microbatch_size)
|
finalize = self._ledger.finalize_sample()
|
||||||
return self._query.set_denominator(global_state, num_microbatches)
|
# Ensure finalizing happens.
|
||||||
|
with tf.control_dependencies([finalize]):
|
||||||
|
return nest.map_structure(tf.identity, result), new_global_state
|
||||||
|
|
|
@ -56,12 +56,11 @@ class PrivacyLedgerTest(tf.test.TestCase):
|
||||||
|
|
||||||
population_size = tf.Variable(0)
|
population_size = tf.Variable(0)
|
||||||
selection_probability = tf.Variable(1.0)
|
selection_probability = tf.Variable(1.0)
|
||||||
ledger = privacy_ledger.PrivacyLedger(
|
|
||||||
population_size, selection_probability)
|
|
||||||
|
|
||||||
query = gaussian_query.GaussianSumQuery(
|
query = gaussian_query.GaussianSumQuery(
|
||||||
l2_norm_clip=10.0, stddev=0.0, ledger=ledger)
|
l2_norm_clip=10.0, stddev=0.0)
|
||||||
query = privacy_ledger.QueryWithLedger(query, ledger)
|
query = privacy_ledger.QueryWithLedger(
|
||||||
|
query, population_size, selection_probability)
|
||||||
|
|
||||||
# First sample.
|
# First sample.
|
||||||
tf.assign(population_size, 10)
|
tf.assign(population_size, 10)
|
||||||
|
@ -69,7 +68,7 @@ class PrivacyLedgerTest(tf.test.TestCase):
|
||||||
test_utils.run_query(query, [record1, record2])
|
test_utils.run_query(query, [record1, record2])
|
||||||
|
|
||||||
expected_queries = [[10.0, 0.0]]
|
expected_queries = [[10.0, 0.0]]
|
||||||
formatted = ledger.get_formatted_ledger_eager()
|
formatted = query.ledger.get_formatted_ledger_eager()
|
||||||
sample_1 = formatted[0]
|
sample_1 = formatted[0]
|
||||||
self.assertAllClose(sample_1.population_size, 10.0)
|
self.assertAllClose(sample_1.population_size, 10.0)
|
||||||
self.assertAllClose(sample_1.selection_probability, 0.1)
|
self.assertAllClose(sample_1.selection_probability, 0.1)
|
||||||
|
@ -80,7 +79,7 @@ class PrivacyLedgerTest(tf.test.TestCase):
|
||||||
tf.assign(selection_probability, 0.2)
|
tf.assign(selection_probability, 0.2)
|
||||||
test_utils.run_query(query, [record1, record2])
|
test_utils.run_query(query, [record1, record2])
|
||||||
|
|
||||||
formatted = ledger.get_formatted_ledger_eager()
|
formatted = query.ledger.get_formatted_ledger_eager()
|
||||||
sample_1, sample_2 = formatted
|
sample_1, sample_2 = formatted
|
||||||
self.assertAllClose(sample_1.population_size, 10.0)
|
self.assertAllClose(sample_1.population_size, 10.0)
|
||||||
self.assertAllClose(sample_1.selection_probability, 0.1)
|
self.assertAllClose(sample_1.selection_probability, 0.1)
|
||||||
|
@ -93,16 +92,15 @@ class PrivacyLedgerTest(tf.test.TestCase):
|
||||||
def test_nested_query(self):
|
def test_nested_query(self):
|
||||||
population_size = tf.Variable(0)
|
population_size = tf.Variable(0)
|
||||||
selection_probability = tf.Variable(1.0)
|
selection_probability = tf.Variable(1.0)
|
||||||
ledger = privacy_ledger.PrivacyLedger(
|
|
||||||
population_size, selection_probability)
|
|
||||||
|
|
||||||
query1 = gaussian_query.GaussianAverageQuery(
|
query1 = gaussian_query.GaussianAverageQuery(
|
||||||
l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0, ledger=ledger)
|
l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0)
|
||||||
query2 = gaussian_query.GaussianAverageQuery(
|
query2 = gaussian_query.GaussianAverageQuery(
|
||||||
l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0, ledger=ledger)
|
l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0)
|
||||||
|
|
||||||
query = nested_query.NestedQuery([query1, query2])
|
query = nested_query.NestedQuery([query1, query2])
|
||||||
query = privacy_ledger.QueryWithLedger(query, ledger)
|
query = privacy_ledger.QueryWithLedger(
|
||||||
|
query, population_size, selection_probability)
|
||||||
|
|
||||||
record1 = [1.0, [12.0, 9.0]]
|
record1 = [1.0, [12.0, 9.0]]
|
||||||
record2 = [5.0, [1.0, 2.0]]
|
record2 = [5.0, [1.0, 2.0]]
|
||||||
|
@ -113,7 +111,7 @@ class PrivacyLedgerTest(tf.test.TestCase):
|
||||||
test_utils.run_query(query, [record1, record2])
|
test_utils.run_query(query, [record1, record2])
|
||||||
|
|
||||||
expected_queries = [[4.0, 2.0], [5.0, 1.0]]
|
expected_queries = [[4.0, 2.0], [5.0, 1.0]]
|
||||||
formatted = ledger.get_formatted_ledger_eager()
|
formatted = query.ledger.get_formatted_ledger_eager()
|
||||||
sample_1 = formatted[0]
|
sample_1 = formatted[0]
|
||||||
self.assertAllClose(sample_1.population_size, 10.0)
|
self.assertAllClose(sample_1.population_size, 10.0)
|
||||||
self.assertAllClose(sample_1.selection_probability, 0.1)
|
self.assertAllClose(sample_1.selection_probability, 0.1)
|
||||||
|
@ -124,7 +122,7 @@ class PrivacyLedgerTest(tf.test.TestCase):
|
||||||
tf.assign(selection_probability, 0.2)
|
tf.assign(selection_probability, 0.2)
|
||||||
test_utils.run_query(query, [record1, record2])
|
test_utils.run_query(query, [record1, record2])
|
||||||
|
|
||||||
formatted = ledger.get_formatted_ledger_eager()
|
formatted = query.ledger.get_formatted_ledger_eager()
|
||||||
sample_1, sample_2 = formatted
|
sample_1, sample_2 = formatted
|
||||||
self.assertAllClose(sample_1.population_size, 10.0)
|
self.assertAllClose(sample_1.population_size, 10.0)
|
||||||
self.assertAllClose(sample_1.selection_probability, 0.1)
|
self.assertAllClose(sample_1.selection_probability, 0.1)
|
||||||
|
|
|
@ -307,7 +307,7 @@ def compute_rdp_from_ledger(ledger, orders):
|
||||||
Returns:
|
Returns:
|
||||||
RDP at all orders, can be np.inf.
|
RDP at all orders, can be np.inf.
|
||||||
"""
|
"""
|
||||||
total_rdp = 0
|
total_rdp = np.zeros_like(orders, dtype=float)
|
||||||
for sample in ledger:
|
for sample in ledger:
|
||||||
# Compute equivalent z from l2_clip_bounds and noise stddevs in sample.
|
# Compute equivalent z from l2_clip_bounds and noise stddevs in sample.
|
||||||
# See https://arxiv.org/pdf/1812.06210.pdf for derivation of this formula.
|
# See https://arxiv.org/pdf/1812.06210.pdf for derivation of this formula.
|
||||||
|
|
|
@ -61,6 +61,16 @@ class DPQuery(object):
|
||||||
|
|
||||||
__metaclass__ = abc.ABCMeta
|
__metaclass__ = abc.ABCMeta
|
||||||
|
|
||||||
|
def set_ledger(self, ledger):
|
||||||
|
"""Supplies privacy ledger to which the query can record privacy events.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ledger: A `PrivacyLedger`.
|
||||||
|
"""
|
||||||
|
del ledger
|
||||||
|
raise TypeError(
|
||||||
|
'DPQuery type %s does not support set_ledger.' % type(self).__name__)
|
||||||
|
|
||||||
def initial_global_state(self):
|
def initial_global_state(self):
|
||||||
"""Returns the initial global state for the DPQuery."""
|
"""Returns the initial global state for the DPQuery."""
|
||||||
return ()
|
return ()
|
||||||
|
|
|
@ -43,17 +43,19 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery):
|
||||||
_GlobalState = collections.namedtuple(
|
_GlobalState = collections.namedtuple(
|
||||||
'_GlobalState', ['l2_norm_clip', 'stddev'])
|
'_GlobalState', ['l2_norm_clip', 'stddev'])
|
||||||
|
|
||||||
def __init__(self, l2_norm_clip, stddev, ledger=None):
|
def __init__(self, l2_norm_clip, stddev):
|
||||||
"""Initializes the GaussianSumQuery.
|
"""Initializes the GaussianSumQuery.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
l2_norm_clip: The clipping norm to apply to the global norm of each
|
l2_norm_clip: The clipping norm to apply to the global norm of each
|
||||||
record.
|
record.
|
||||||
stddev: The stddev of the noise added to the sum.
|
stddev: The stddev of the noise added to the sum.
|
||||||
ledger: The privacy ledger to which queries should be recorded.
|
|
||||||
"""
|
"""
|
||||||
self._l2_norm_clip = l2_norm_clip
|
self._l2_norm_clip = l2_norm_clip
|
||||||
self._stddev = stddev
|
self._stddev = stddev
|
||||||
|
self._ledger = None
|
||||||
|
|
||||||
|
def set_ledger(self, ledger):
|
||||||
self._ledger = ledger
|
self._ledger = ledger
|
||||||
|
|
||||||
def make_global_state(self, l2_norm_clip, stddev):
|
def make_global_state(self, l2_norm_clip, stddev):
|
||||||
|
@ -68,14 +70,6 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery):
|
||||||
return global_state.l2_norm_clip
|
return global_state.l2_norm_clip
|
||||||
|
|
||||||
def initial_sample_state(self, global_state, template):
|
def initial_sample_state(self, global_state, template):
|
||||||
if self._ledger:
|
|
||||||
dependencies = [
|
|
||||||
self._ledger.record_sum_query(
|
|
||||||
global_state.l2_norm_clip, global_state.stddev)
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
dependencies = []
|
|
||||||
with tf.control_dependencies(dependencies):
|
|
||||||
return nest.map_structure(
|
return nest.map_structure(
|
||||||
dp_query.zeros_like, template)
|
dp_query.zeros_like, template)
|
||||||
|
|
||||||
|
@ -110,6 +104,14 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery):
|
||||||
def add_noise(v):
|
def add_noise(v):
|
||||||
return v + random_normal(tf.shape(v))
|
return v + random_normal(tf.shape(v))
|
||||||
|
|
||||||
|
if self._ledger:
|
||||||
|
dependencies = [
|
||||||
|
self._ledger.record_sum_query(
|
||||||
|
global_state.l2_norm_clip, global_state.stddev)
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
dependencies = []
|
||||||
|
with tf.control_dependencies(dependencies):
|
||||||
return nest.map_structure(add_noise, sample_state), global_state
|
return nest.map_structure(add_noise, sample_state), global_state
|
||||||
|
|
||||||
|
|
||||||
|
@ -127,8 +129,7 @@ class GaussianAverageQuery(normalized_query.NormalizedQuery):
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
l2_norm_clip,
|
l2_norm_clip,
|
||||||
sum_stddev,
|
sum_stddev,
|
||||||
denominator,
|
denominator):
|
||||||
ledger=None):
|
|
||||||
"""Initializes the GaussianAverageQuery.
|
"""Initializes the GaussianAverageQuery.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -138,8 +139,7 @@ class GaussianAverageQuery(normalized_query.NormalizedQuery):
|
||||||
normalization).
|
normalization).
|
||||||
denominator: The normalization constant (applied after noise is added to
|
denominator: The normalization constant (applied after noise is added to
|
||||||
the sum).
|
the sum).
|
||||||
ledger: The privacy ledger to which queries should be recorded.
|
|
||||||
"""
|
"""
|
||||||
super(GaussianAverageQuery, self).__init__(
|
super(GaussianAverageQuery, self).__init__(
|
||||||
numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev, ledger),
|
numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev),
|
||||||
denominator=denominator)
|
denominator=denominator)
|
||||||
|
|
|
@ -62,6 +62,9 @@ class NestedQuery(dp_query.DPQuery):
|
||||||
return nest.map_structure_up_to(
|
return nest.map_structure_up_to(
|
||||||
self._queries, caller, self._queries, *inputs)
|
self._queries, caller, self._queries, *inputs)
|
||||||
|
|
||||||
|
def set_ledger(self, ledger):
|
||||||
|
self._map_to_queries('set_ledger', ledger=ledger)
|
||||||
|
|
||||||
def initial_global_state(self):
|
def initial_global_state(self):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
return self._map_to_queries('initial_global_state')
|
return self._map_to_queries('initial_global_state')
|
||||||
|
|
|
@ -67,5 +67,6 @@ class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
sum_state, denominator = sample_state
|
sum_state, denominator = sample_state
|
||||||
|
|
||||||
return nest.map_structure(
|
return (
|
||||||
lambda t: tf.truediv(t, denominator), sum_state), global_state
|
nest.map_structure(lambda t: t / denominator, sum_state),
|
||||||
|
global_state)
|
||||||
|
|
|
@ -51,6 +51,10 @@ class NormalizedQuery(dp_query.DPQuery):
|
||||||
self._numerator = numerator_query
|
self._numerator = numerator_query
|
||||||
self._denominator = denominator
|
self._denominator = denominator
|
||||||
|
|
||||||
|
def set_ledger(self, ledger):
|
||||||
|
"""See base class."""
|
||||||
|
self._numerator.set_ledger(ledger)
|
||||||
|
|
||||||
def initial_global_state(self):
|
def initial_global_state(self):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
if self._denominator is not None:
|
if self._denominator is not None:
|
||||||
|
@ -92,7 +96,3 @@ class NormalizedQuery(dp_query.DPQuery):
|
||||||
def merge_sample_states(self, sample_state_1, sample_state_2):
|
def merge_sample_states(self, sample_state_1, sample_state_2):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
return self._numerator.merge_sample_states(sample_state_1, sample_state_2)
|
return self._numerator.merge_sample_states(sample_state_1, sample_state_2)
|
||||||
|
|
||||||
def set_denominator(self, global_state, denominator):
|
|
||||||
"""Returns an updated global_state with the given denominator."""
|
|
||||||
return global_state._replace(denominator=tf.cast(denominator, tf.float32))
|
|
||||||
|
|
|
@ -68,8 +68,7 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery):
|
||||||
target_unclipped_quantile,
|
target_unclipped_quantile,
|
||||||
learning_rate,
|
learning_rate,
|
||||||
clipped_count_stddev,
|
clipped_count_stddev,
|
||||||
expected_num_records,
|
expected_num_records):
|
||||||
ledger=None):
|
|
||||||
"""Initializes the QuantileAdaptiveClipSumQuery.
|
"""Initializes the QuantileAdaptiveClipSumQuery.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -87,7 +86,6 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery):
|
||||||
should be about 0.5 for reasonable privacy.
|
should be about 0.5 for reasonable privacy.
|
||||||
expected_num_records: The expected number of records per round, used to
|
expected_num_records: The expected number of records per round, used to
|
||||||
estimate the clipped count quantile.
|
estimate the clipped count quantile.
|
||||||
ledger: The privacy ledger to which queries should be recorded.
|
|
||||||
"""
|
"""
|
||||||
self._initial_l2_norm_clip = initial_l2_norm_clip
|
self._initial_l2_norm_clip = initial_l2_norm_clip
|
||||||
self._noise_multiplier = noise_multiplier
|
self._noise_multiplier = noise_multiplier
|
||||||
|
@ -95,8 +93,7 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery):
|
||||||
self._learning_rate = learning_rate
|
self._learning_rate = learning_rate
|
||||||
|
|
||||||
# Initialize sum query's global state with None, to be set later.
|
# Initialize sum query's global state with None, to be set later.
|
||||||
self._sum_query = gaussian_query.GaussianSumQuery(
|
self._sum_query = gaussian_query.GaussianSumQuery(None, None)
|
||||||
None, None, ledger)
|
|
||||||
|
|
||||||
# self._clipped_fraction_query is a DPQuery used to estimate the fraction of
|
# self._clipped_fraction_query is a DPQuery used to estimate the fraction of
|
||||||
# records that are clipped. It accumulates an indicator 0/1 of whether each
|
# records that are clipped. It accumulates an indicator 0/1 of whether each
|
||||||
|
@ -110,8 +107,12 @@ class QuantileAdaptiveClipSumQuery(dp_query.DPQuery):
|
||||||
self._clipped_fraction_query = gaussian_query.GaussianAverageQuery(
|
self._clipped_fraction_query = gaussian_query.GaussianAverageQuery(
|
||||||
l2_norm_clip=0.5,
|
l2_norm_clip=0.5,
|
||||||
sum_stddev=clipped_count_stddev,
|
sum_stddev=clipped_count_stddev,
|
||||||
denominator=expected_num_records,
|
denominator=expected_num_records)
|
||||||
ledger=ledger)
|
|
||||||
|
def set_ledger(self, ledger):
|
||||||
|
"""See base class."""
|
||||||
|
self._sum_query.set_ledger(ledger)
|
||||||
|
self._clipped_fraction_query.set_ledger(ledger)
|
||||||
|
|
||||||
def initial_global_state(self):
|
def initial_global_state(self):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
|
@ -252,8 +253,7 @@ class QuantileAdaptiveClipAverageQuery(normalized_query.NormalizedQuery):
|
||||||
target_unclipped_quantile,
|
target_unclipped_quantile,
|
||||||
learning_rate,
|
learning_rate,
|
||||||
clipped_count_stddev,
|
clipped_count_stddev,
|
||||||
expected_num_records,
|
expected_num_records):
|
||||||
ledger=None):
|
|
||||||
"""Initializes the AdaptiveClipAverageQuery.
|
"""Initializes the AdaptiveClipAverageQuery.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -272,7 +272,6 @@ class QuantileAdaptiveClipAverageQuery(normalized_query.NormalizedQuery):
|
||||||
should be about 0.5 for reasonable privacy.
|
should be about 0.5 for reasonable privacy.
|
||||||
expected_num_records: The expected number of records, used to estimate the
|
expected_num_records: The expected number of records, used to estimate the
|
||||||
clipped count quantile.
|
clipped count quantile.
|
||||||
ledger: The privacy ledger to which queries should be recorded.
|
|
||||||
"""
|
"""
|
||||||
numerator_query = QuantileAdaptiveClipSumQuery(
|
numerator_query = QuantileAdaptiveClipSumQuery(
|
||||||
initial_l2_norm_clip,
|
initial_l2_norm_clip,
|
||||||
|
@ -280,8 +279,7 @@ class QuantileAdaptiveClipAverageQuery(normalized_query.NormalizedQuery):
|
||||||
target_unclipped_quantile,
|
target_unclipped_quantile,
|
||||||
learning_rate,
|
learning_rate,
|
||||||
clipped_count_stddev,
|
clipped_count_stddev,
|
||||||
expected_num_records,
|
expected_num_records)
|
||||||
ledger)
|
|
||||||
super(QuantileAdaptiveClipAverageQuery, self).__init__(
|
super(QuantileAdaptiveClipAverageQuery, self).__init__(
|
||||||
numerator_query=numerator_query,
|
numerator_query=numerator_query,
|
||||||
denominator=denominator)
|
denominator=denominator)
|
||||||
|
|
|
@ -251,8 +251,6 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
|
||||||
|
|
||||||
population_size = tf.Variable(0)
|
population_size = tf.Variable(0)
|
||||||
selection_probability = tf.Variable(1.0)
|
selection_probability = tf.Variable(1.0)
|
||||||
ledger = privacy_ledger.PrivacyLedger(
|
|
||||||
population_size, selection_probability)
|
|
||||||
|
|
||||||
query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery(
|
query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery(
|
||||||
initial_l2_norm_clip=10.0,
|
initial_l2_norm_clip=10.0,
|
||||||
|
@ -260,10 +258,10 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
|
||||||
target_unclipped_quantile=0.0,
|
target_unclipped_quantile=0.0,
|
||||||
learning_rate=1.0,
|
learning_rate=1.0,
|
||||||
clipped_count_stddev=0.0,
|
clipped_count_stddev=0.0,
|
||||||
expected_num_records=2.0,
|
expected_num_records=2.0)
|
||||||
ledger=ledger)
|
|
||||||
|
|
||||||
query = privacy_ledger.QueryWithLedger(query, ledger)
|
query = privacy_ledger.QueryWithLedger(
|
||||||
|
query, population_size, selection_probability)
|
||||||
|
|
||||||
# First sample.
|
# First sample.
|
||||||
tf.assign(population_size, 10)
|
tf.assign(population_size, 10)
|
||||||
|
@ -271,7 +269,7 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
|
||||||
_, global_state = test_utils.run_query(query, [record1, record2])
|
_, global_state = test_utils.run_query(query, [record1, record2])
|
||||||
|
|
||||||
expected_queries = [[10.0, 10.0], [0.5, 0.0]]
|
expected_queries = [[10.0, 10.0], [0.5, 0.0]]
|
||||||
formatted = ledger.get_formatted_ledger_eager()
|
formatted = query.ledger.get_formatted_ledger_eager()
|
||||||
sample_1 = formatted[0]
|
sample_1 = formatted[0]
|
||||||
self.assertAllClose(sample_1.population_size, 10.0)
|
self.assertAllClose(sample_1.population_size, 10.0)
|
||||||
self.assertAllClose(sample_1.selection_probability, 0.1)
|
self.assertAllClose(sample_1.selection_probability, 0.1)
|
||||||
|
@ -282,7 +280,7 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase):
|
||||||
tf.assign(selection_probability, 0.2)
|
tf.assign(selection_probability, 0.2)
|
||||||
test_utils.run_query(query, [record1, record2], global_state)
|
test_utils.run_query(query, [record1, record2], global_state)
|
||||||
|
|
||||||
formatted = ledger.get_formatted_ledger_eager()
|
formatted = query.ledger.get_formatted_ledger_eager()
|
||||||
sample_1, sample_2 = formatted
|
sample_1, sample_2 = formatted
|
||||||
self.assertAllClose(sample_1.population_size, 10.0)
|
self.assertAllClose(sample_1.population_size, 10.0)
|
||||||
self.assertAllClose(sample_1.selection_probability, 0.1)
|
self.assertAllClose(sample_1.selection_probability, 0.1)
|
||||||
|
|
|
@ -23,6 +23,11 @@ import tensorflow as tf
|
||||||
from privacy.analysis import privacy_ledger
|
from privacy.analysis import privacy_ledger
|
||||||
from privacy.dp_query import gaussian_query
|
from privacy.dp_query import gaussian_query
|
||||||
|
|
||||||
|
if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
|
||||||
|
nest = tf.contrib.framework.nest
|
||||||
|
else:
|
||||||
|
nest = tf.nest
|
||||||
|
|
||||||
|
|
||||||
def make_optimizer_class(cls):
|
def make_optimizer_class(cls):
|
||||||
"""Constructs a DP optimizer class from an existing one."""
|
"""Constructs a DP optimizer class from an existing one."""
|
||||||
|
@ -46,7 +51,7 @@ def make_optimizer_class(cls):
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
dp_average_query,
|
dp_sum_query,
|
||||||
num_microbatches=None,
|
num_microbatches=None,
|
||||||
unroll_microbatches=False,
|
unroll_microbatches=False,
|
||||||
*args, # pylint: disable=keyword-arg-before-vararg, g-doc-args
|
*args, # pylint: disable=keyword-arg-before-vararg, g-doc-args
|
||||||
|
@ -54,7 +59,7 @@ def make_optimizer_class(cls):
|
||||||
"""Initialize the DPOptimizerClass.
|
"""Initialize the DPOptimizerClass.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
dp_average_query: DPQuery object, specifying differential privacy
|
dp_sum_query: DPQuery object, specifying differential privacy
|
||||||
mechanism to use.
|
mechanism to use.
|
||||||
num_microbatches: How many microbatches into which the minibatch is
|
num_microbatches: How many microbatches into which the minibatch is
|
||||||
split. If None, will default to the size of the minibatch, and
|
split. If None, will default to the size of the minibatch, and
|
||||||
|
@ -64,9 +69,9 @@ def make_optimizer_class(cls):
|
||||||
raises an exception.
|
raises an exception.
|
||||||
"""
|
"""
|
||||||
super(DPOptimizerClass, self).__init__(*args, **kwargs)
|
super(DPOptimizerClass, self).__init__(*args, **kwargs)
|
||||||
self._dp_average_query = dp_average_query
|
self._dp_sum_query = dp_sum_query
|
||||||
self._num_microbatches = num_microbatches
|
self._num_microbatches = num_microbatches
|
||||||
self._global_state = self._dp_average_query.initial_global_state()
|
self._global_state = self._dp_sum_query.initial_global_state()
|
||||||
# TODO(b/122613513): Set unroll_microbatches=True to avoid this bug.
|
# TODO(b/122613513): Set unroll_microbatches=True to avoid this bug.
|
||||||
# Beware: When num_microbatches is large (>100), enabling this parameter
|
# Beware: When num_microbatches is large (>100), enabling this parameter
|
||||||
# may cause an OOM error.
|
# may cause an OOM error.
|
||||||
|
@ -88,31 +93,34 @@ def make_optimizer_class(cls):
|
||||||
vector_loss = loss()
|
vector_loss = loss()
|
||||||
if self._num_microbatches is None:
|
if self._num_microbatches is None:
|
||||||
self._num_microbatches = tf.shape(vector_loss)[0]
|
self._num_microbatches = tf.shape(vector_loss)[0]
|
||||||
self._global_state = self._dp_average_query.set_denominator(
|
if isinstance(self._dp_sum_query, privacy_ledger.QueryWithLedger):
|
||||||
self._global_state,
|
self._dp_sum_query.set_batch_size(self._num_microbatches)
|
||||||
self._num_microbatches)
|
sample_state = self._dp_sum_query.initial_sample_state(
|
||||||
sample_state = self._dp_average_query.initial_sample_state(
|
|
||||||
self._global_state, var_list)
|
self._global_state, var_list)
|
||||||
microbatches_losses = tf.reshape(vector_loss,
|
microbatches_losses = tf.reshape(vector_loss,
|
||||||
[self._num_microbatches, -1])
|
[self._num_microbatches, -1])
|
||||||
sample_params = (
|
sample_params = (
|
||||||
self._dp_average_query.derive_sample_params(self._global_state))
|
self._dp_sum_query.derive_sample_params(self._global_state))
|
||||||
|
|
||||||
def process_microbatch(i, sample_state):
|
def process_microbatch(i, sample_state):
|
||||||
"""Process one microbatch (record) with privacy helper."""
|
"""Process one microbatch (record) with privacy helper."""
|
||||||
microbatch_loss = tf.reduce_mean(tf.gather(microbatches_losses, [i]))
|
microbatch_loss = tf.reduce_mean(tf.gather(microbatches_losses, [i]))
|
||||||
grads = gradient_tape.gradient(microbatch_loss, var_list)
|
grads = gradient_tape.gradient(microbatch_loss, var_list)
|
||||||
sample_state = self._dp_average_query.accumulate_record(sample_params,
|
sample_state = self._dp_sum_query.accumulate_record(
|
||||||
sample_state,
|
sample_params, sample_state, grads)
|
||||||
grads)
|
|
||||||
return sample_state
|
return sample_state
|
||||||
|
|
||||||
for idx in range(self._num_microbatches):
|
for idx in range(self._num_microbatches):
|
||||||
sample_state = process_microbatch(idx, sample_state)
|
sample_state = process_microbatch(idx, sample_state)
|
||||||
|
|
||||||
final_grads, self._global_state = (
|
grad_sums, self._global_state = (
|
||||||
self._dp_average_query.get_noised_result(sample_state,
|
self._dp_sum_query.get_noised_result(
|
||||||
self._global_state))
|
sample_state, self._global_state))
|
||||||
|
|
||||||
|
def normalize(v):
|
||||||
|
return v / tf.cast(self._num_microbatches, tf.float32)
|
||||||
|
|
||||||
|
final_grads = nest.map_structure(normalize, grad_sums)
|
||||||
|
|
||||||
grads_and_vars = list(zip(final_grads, var_list))
|
grads_and_vars = list(zip(final_grads, var_list))
|
||||||
return grads_and_vars
|
return grads_and_vars
|
||||||
|
@ -128,12 +136,12 @@ def make_optimizer_class(cls):
|
||||||
# sampling from the dataset without replacement.
|
# sampling from the dataset without replacement.
|
||||||
if self._num_microbatches is None:
|
if self._num_microbatches is None:
|
||||||
self._num_microbatches = tf.shape(loss)[0]
|
self._num_microbatches = tf.shape(loss)[0]
|
||||||
self._global_state = self._dp_average_query.set_denominator(
|
if isinstance(self._dp_sum_query, privacy_ledger.QueryWithLedger):
|
||||||
self._global_state,
|
self._dp_sum_query.set_batch_size(self._num_microbatches)
|
||||||
self._num_microbatches)
|
|
||||||
microbatches_losses = tf.reshape(loss, [self._num_microbatches, -1])
|
microbatches_losses = tf.reshape(loss, [self._num_microbatches, -1])
|
||||||
sample_params = (
|
sample_params = (
|
||||||
self._dp_average_query.derive_sample_params(self._global_state))
|
self._dp_sum_query.derive_sample_params(self._global_state))
|
||||||
|
|
||||||
def process_microbatch(i, sample_state):
|
def process_microbatch(i, sample_state):
|
||||||
"""Process one microbatch (record) with privacy helper."""
|
"""Process one microbatch (record) with privacy helper."""
|
||||||
|
@ -145,7 +153,7 @@ def make_optimizer_class(cls):
|
||||||
g if g is not None else tf.zeros_like(v)
|
g if g is not None else tf.zeros_like(v)
|
||||||
for (g, v) in zip(list(grads), var_list)
|
for (g, v) in zip(list(grads), var_list)
|
||||||
]
|
]
|
||||||
sample_state = self._dp_average_query.accumulate_record(
|
sample_state = self._dp_sum_query.accumulate_record(
|
||||||
sample_params, sample_state, grads_list)
|
sample_params, sample_state, grads_list)
|
||||||
return sample_state
|
return sample_state
|
||||||
|
|
||||||
|
@ -154,7 +162,7 @@ def make_optimizer_class(cls):
|
||||||
tf.trainable_variables() + tf.get_collection(
|
tf.trainable_variables() + tf.get_collection(
|
||||||
tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
|
tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
|
||||||
|
|
||||||
sample_state = self._dp_average_query.initial_sample_state(
|
sample_state = self._dp_sum_query.initial_sample_state(
|
||||||
self._global_state, var_list)
|
self._global_state, var_list)
|
||||||
|
|
||||||
if self._unroll_microbatches:
|
if self._unroll_microbatches:
|
||||||
|
@ -169,10 +177,15 @@ def make_optimizer_class(cls):
|
||||||
idx = tf.constant(0)
|
idx = tf.constant(0)
|
||||||
_, sample_state = tf.while_loop(cond_fn, body_fn, [idx, sample_state])
|
_, sample_state = tf.while_loop(cond_fn, body_fn, [idx, sample_state])
|
||||||
|
|
||||||
final_grads, self._global_state = (
|
grad_sums, self._global_state = (
|
||||||
self._dp_average_query.get_noised_result(
|
self._dp_sum_query.get_noised_result(
|
||||||
sample_state, self._global_state))
|
sample_state, self._global_state))
|
||||||
|
|
||||||
|
def normalize(v):
|
||||||
|
return tf.truediv(v, tf.cast(self._num_microbatches, tf.float32))
|
||||||
|
|
||||||
|
final_grads = nest.map_structure(normalize, grad_sums)
|
||||||
|
|
||||||
return list(zip(final_grads, var_list))
|
return list(zip(final_grads, var_list))
|
||||||
|
|
||||||
return DPOptimizerClass
|
return DPOptimizerClass
|
||||||
|
@ -188,20 +201,20 @@ def make_gaussian_optimizer_class(cls):
|
||||||
self,
|
self,
|
||||||
l2_norm_clip,
|
l2_norm_clip,
|
||||||
noise_multiplier,
|
noise_multiplier,
|
||||||
num_microbatches,
|
num_microbatches=None,
|
||||||
ledger,
|
ledger=None,
|
||||||
unroll_microbatches=False,
|
unroll_microbatches=False,
|
||||||
*args, # pylint: disable=keyword-arg-before-vararg
|
*args, # pylint: disable=keyword-arg-before-vararg
|
||||||
**kwargs):
|
**kwargs):
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(
|
dp_sum_query = gaussian_query.GaussianSumQuery(
|
||||||
l2_norm_clip, l2_norm_clip * noise_multiplier,
|
l2_norm_clip, l2_norm_clip * noise_multiplier)
|
||||||
num_microbatches, ledger)
|
|
||||||
if ledger:
|
if ledger:
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query,
|
||||||
dp_average_query, ledger)
|
ledger=ledger)
|
||||||
|
|
||||||
super(DPGaussianOptimizerClass, self).__init__(
|
super(DPGaussianOptimizerClass, self).__init__(
|
||||||
dp_average_query,
|
dp_sum_query,
|
||||||
num_microbatches,
|
num_microbatches,
|
||||||
unroll_microbatches,
|
unroll_microbatches,
|
||||||
*args,
|
*args,
|
||||||
|
@ -209,7 +222,7 @@ def make_gaussian_optimizer_class(cls):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def ledger(self):
|
def ledger(self):
|
||||||
return self._ledger
|
return self._dp_sum_query.ledger
|
||||||
|
|
||||||
return DPGaussianOptimizerClass
|
return DPGaussianOptimizerClass
|
||||||
|
|
||||||
|
|
|
@ -53,14 +53,12 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
var0 = tf.Variable([1.0, 2.0])
|
var0 = tf.Variable([1.0, 2.0])
|
||||||
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6)
|
dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(
|
dp_sum_query = privacy_ledger.QueryWithLedger(
|
||||||
1.0e9, 0.0, num_microbatches, ledger)
|
dp_sum_query, 1e6, num_microbatches / 1e6)
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
|
|
||||||
ledger)
|
|
||||||
|
|
||||||
opt = cls(
|
opt = cls(
|
||||||
dp_average_query,
|
dp_sum_query,
|
||||||
num_microbatches=num_microbatches,
|
num_microbatches=num_microbatches,
|
||||||
learning_rate=2.0)
|
learning_rate=2.0)
|
||||||
|
|
||||||
|
@ -84,12 +82,10 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
var0 = tf.Variable([0.0, 0.0])
|
var0 = tf.Variable([0.0, 0.0])
|
||||||
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
|
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
|
dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
|
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
|
|
||||||
ledger)
|
|
||||||
|
|
||||||
opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0)
|
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
|
||||||
|
|
||||||
self.evaluate(tf.global_variables_initializer())
|
self.evaluate(tf.global_variables_initializer())
|
||||||
# Fetch params to validate initial values
|
# Fetch params to validate initial values
|
||||||
|
@ -110,12 +106,10 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
var0 = tf.Variable([0.0])
|
var0 = tf.Variable([0.0])
|
||||||
data0 = tf.Variable([[0.0]])
|
data0 = tf.Variable([[0.0]])
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
|
dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0)
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1)
|
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(dp_average_query,
|
|
||||||
ledger)
|
|
||||||
|
|
||||||
opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0)
|
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
|
||||||
|
|
||||||
self.evaluate(tf.global_variables_initializer())
|
self.evaluate(tf.global_variables_initializer())
|
||||||
# Fetch params to validate initial values
|
# Fetch params to validate initial values
|
||||||
|
|
|
@ -46,22 +46,18 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
('DPAdagrad 4', dp_optimizer.DPAdagradOptimizer, 4, [-2.5, -2.5]),
|
('DPAdagrad 4', dp_optimizer.DPAdagradOptimizer, 4, [-2.5, -2.5]),
|
||||||
('DPAdam 1', dp_optimizer.DPAdamOptimizer, 1, [-2.5, -2.5]),
|
('DPAdam 1', dp_optimizer.DPAdamOptimizer, 1, [-2.5, -2.5]),
|
||||||
('DPAdam 2', dp_optimizer.DPAdamOptimizer, 2, [-2.5, -2.5]),
|
('DPAdam 2', dp_optimizer.DPAdamOptimizer, 2, [-2.5, -2.5]),
|
||||||
('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5]),
|
('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5]))
|
||||||
('DPAdam None', dp_optimizer.DPAdamOptimizer, None, [-2.5, -2.5]))
|
|
||||||
def testBaseline(self, cls, num_microbatches, expected_answer):
|
def testBaseline(self, cls, num_microbatches, expected_answer):
|
||||||
with self.cached_session() as sess:
|
with self.cached_session() as sess:
|
||||||
var0 = tf.Variable([1.0, 2.0])
|
var0 = tf.Variable([1.0, 2.0])
|
||||||
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(
|
dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
|
||||||
1e6, num_microbatches / 1e6 if num_microbatches else None)
|
dp_sum_query = privacy_ledger.QueryWithLedger(
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(
|
dp_sum_query, 1e6, num_microbatches / 1e6)
|
||||||
1.0e9, 0.0, num_microbatches, ledger)
|
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
|
||||||
dp_average_query, ledger)
|
|
||||||
|
|
||||||
opt = cls(
|
opt = cls(
|
||||||
dp_average_query,
|
dp_sum_query,
|
||||||
num_microbatches=num_microbatches,
|
num_microbatches=num_microbatches,
|
||||||
learning_rate=2.0)
|
learning_rate=2.0)
|
||||||
|
|
||||||
|
@ -84,12 +80,10 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
var0 = tf.Variable([0.0, 0.0])
|
var0 = tf.Variable([0.0, 0.0])
|
||||||
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
|
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
|
dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
|
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
|
||||||
dp_average_query, ledger)
|
|
||||||
|
|
||||||
opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0)
|
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
|
||||||
|
|
||||||
self.evaluate(tf.global_variables_initializer())
|
self.evaluate(tf.global_variables_initializer())
|
||||||
# Fetch params to validate initial values
|
# Fetch params to validate initial values
|
||||||
|
@ -109,12 +103,10 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
var0 = tf.Variable([0.0])
|
var0 = tf.Variable([0.0])
|
||||||
data0 = tf.Variable([[0.0]])
|
data0 = tf.Variable([[0.0]])
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
|
dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0)
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1)
|
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
|
||||||
dp_average_query, ledger)
|
|
||||||
|
|
||||||
opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0)
|
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
|
||||||
|
|
||||||
self.evaluate(tf.global_variables_initializer())
|
self.evaluate(tf.global_variables_initializer())
|
||||||
# Fetch params to validate initial values
|
# Fetch params to validate initial values
|
||||||
|
@ -153,12 +145,10 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
|
|
||||||
vector_loss = tf.squared_difference(labels, preds)
|
vector_loss = tf.squared_difference(labels, preds)
|
||||||
scalar_loss = tf.reduce_mean(vector_loss)
|
scalar_loss = tf.reduce_mean(vector_loss)
|
||||||
ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6)
|
dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
|
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
|
||||||
dp_average_query, ledger)
|
|
||||||
optimizer = dp_optimizer.DPGradientDescentOptimizer(
|
optimizer = dp_optimizer.DPGradientDescentOptimizer(
|
||||||
dp_average_query,
|
dp_sum_query,
|
||||||
num_microbatches=1,
|
num_microbatches=1,
|
||||||
learning_rate=1.0)
|
learning_rate=1.0)
|
||||||
global_step = tf.train.get_global_step()
|
global_step = tf.train.get_global_step()
|
||||||
|
@ -198,14 +188,12 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
|
|
||||||
num_microbatches = 4
|
num_microbatches = 4
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(
|
dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
|
||||||
1e6, num_microbatches / 1e6)
|
dp_sum_query = privacy_ledger.QueryWithLedger(
|
||||||
dp_average_query = gaussian_query.GaussianAverageQuery(1.0e9, 0.0, 4)
|
dp_sum_query, 1e6, num_microbatches / 1e6)
|
||||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
|
||||||
dp_average_query, ledger)
|
|
||||||
|
|
||||||
opt = cls(
|
opt = cls(
|
||||||
dp_average_query,
|
dp_sum_query,
|
||||||
num_microbatches=num_microbatches,
|
num_microbatches=num_microbatches,
|
||||||
learning_rate=2.0,
|
learning_rate=2.0,
|
||||||
unroll_microbatches=True)
|
unroll_microbatches=True)
|
||||||
|
@ -233,8 +221,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
l2_norm_clip=4.0,
|
l2_norm_clip=4.0,
|
||||||
noise_multiplier=2.0,
|
noise_multiplier=2.0,
|
||||||
num_microbatches=1,
|
num_microbatches=1,
|
||||||
learning_rate=2.0,
|
learning_rate=2.0)
|
||||||
ledger=privacy_ledger.DummyLedger())
|
|
||||||
|
|
||||||
self.evaluate(tf.global_variables_initializer())
|
self.evaluate(tf.global_variables_initializer())
|
||||||
# Fetch params to validate initial values
|
# Fetch params to validate initial values
|
||||||
|
|
|
@ -94,9 +94,7 @@ def rnn_model_fn(features, labels, mode): # pylint: disable=unused-argument
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(
|
ledger = privacy_ledger.PrivacyLedger(
|
||||||
population_size=NB_TRAIN,
|
population_size=NB_TRAIN,
|
||||||
selection_probability=(FLAGS.batch_size / NB_TRAIN),
|
selection_probability=(FLAGS.batch_size / NB_TRAIN))
|
||||||
max_samples=1e6,
|
|
||||||
max_queries=1e6)
|
|
||||||
|
|
||||||
optimizer = dp_optimizer.DPAdamGaussianOptimizer(
|
optimizer = dp_optimizer.DPAdamGaussianOptimizer(
|
||||||
l2_norm_clip=FLAGS.l2_norm_clip,
|
l2_norm_clip=FLAGS.l2_norm_clip,
|
||||||
|
|
|
@ -26,12 +26,11 @@ import tensorflow as tf
|
||||||
|
|
||||||
from privacy.analysis.rdp_accountant import compute_rdp
|
from privacy.analysis.rdp_accountant import compute_rdp
|
||||||
from privacy.analysis.rdp_accountant import get_privacy_spent
|
from privacy.analysis.rdp_accountant import get_privacy_spent
|
||||||
from privacy.dp_query.gaussian_query import GaussianAverageQuery
|
from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
|
||||||
from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
|
|
||||||
|
|
||||||
if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
|
if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
|
||||||
GradientDescentOptimizer = tf.train.GradientDescentOptimizer
|
GradientDescentOptimizer = tf.train.GradientDescentOptimizer
|
||||||
tf.compat.v1.enable_eager_execution()
|
tf.enable_eager_execution()
|
||||||
else:
|
else:
|
||||||
GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name
|
GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name
|
||||||
|
|
||||||
|
@ -64,6 +63,9 @@ def compute_epsilon(steps):
|
||||||
|
|
||||||
|
|
||||||
def main(_):
|
def main(_):
|
||||||
|
if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0:
|
||||||
|
raise ValueError('Number of microbatches should divide evenly batch_size')
|
||||||
|
|
||||||
# Fetch the mnist data
|
# Fetch the mnist data
|
||||||
train, test = tf.keras.datasets.mnist.load_data()
|
train, test = tf.keras.datasets.mnist.load_data()
|
||||||
train_images, train_labels = train
|
train_images, train_labels = train
|
||||||
|
@ -97,13 +99,10 @@ def main(_):
|
||||||
|
|
||||||
# Instantiate the optimizer
|
# Instantiate the optimizer
|
||||||
if FLAGS.dpsgd:
|
if FLAGS.dpsgd:
|
||||||
dp_average_query = GaussianAverageQuery(
|
opt = DPGradientDescentGaussianOptimizer(
|
||||||
FLAGS.l2_norm_clip,
|
l2_norm_clip=FLAGS.l2_norm_clip,
|
||||||
FLAGS.l2_norm_clip * FLAGS.noise_multiplier,
|
noise_multiplier=FLAGS.noise_multiplier,
|
||||||
FLAGS.microbatches)
|
num_microbatches=FLAGS.microbatches,
|
||||||
opt = DPGradientDescentOptimizer(
|
|
||||||
dp_average_query,
|
|
||||||
FLAGS.microbatches,
|
|
||||||
learning_rate=FLAGS.learning_rate)
|
learning_rate=FLAGS.learning_rate)
|
||||||
else:
|
else:
|
||||||
opt = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
|
opt = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
|
||||||
|
@ -145,7 +144,7 @@ def main(_):
|
||||||
|
|
||||||
# Compute the privacy budget expended so far.
|
# Compute the privacy budget expended so far.
|
||||||
if FLAGS.dpsgd:
|
if FLAGS.dpsgd:
|
||||||
eps = compute_epsilon(epoch * steps_per_epoch)
|
eps = compute_epsilon((epoch + 1) * steps_per_epoch)
|
||||||
print('For delta=1e-5, the current epsilon is: %.2f' % eps)
|
print('For delta=1e-5, the current epsilon is: %.2f' % eps)
|
||||||
else:
|
else:
|
||||||
print('Trained with vanilla non-private SGD optimizer')
|
print('Trained with vanilla non-private SGD optimizer')
|
||||||
|
|
|
@ -27,8 +27,7 @@ import tensorflow as tf
|
||||||
|
|
||||||
from privacy.analysis.rdp_accountant import compute_rdp
|
from privacy.analysis.rdp_accountant import compute_rdp
|
||||||
from privacy.analysis.rdp_accountant import get_privacy_spent
|
from privacy.analysis.rdp_accountant import get_privacy_spent
|
||||||
from privacy.dp_query.gaussian_query import GaussianAverageQuery
|
from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer
|
||||||
from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer
|
|
||||||
|
|
||||||
if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
|
if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
|
||||||
GradientDescentOptimizer = tf.train.GradientDescentOptimizer
|
GradientDescentOptimizer = tf.train.GradientDescentOptimizer
|
||||||
|
@ -42,10 +41,10 @@ flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training')
|
||||||
flags.DEFINE_float('noise_multiplier', 1.1,
|
flags.DEFINE_float('noise_multiplier', 1.1,
|
||||||
'Ratio of the standard deviation to the clipping norm')
|
'Ratio of the standard deviation to the clipping norm')
|
||||||
flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
|
flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
|
||||||
flags.DEFINE_integer('batch_size', 250, 'Batch size')
|
flags.DEFINE_integer('batch_size', 256, 'Batch size')
|
||||||
flags.DEFINE_integer('epochs', 60, 'Number of epochs')
|
flags.DEFINE_integer('epochs', 60, 'Number of epochs')
|
||||||
flags.DEFINE_integer(
|
flags.DEFINE_integer(
|
||||||
'microbatches', 250, 'Number of microbatches '
|
'microbatches', 256, 'Number of microbatches '
|
||||||
'(must evenly divide batch_size)')
|
'(must evenly divide batch_size)')
|
||||||
flags.DEFINE_string('model_dir', None, 'Model directory')
|
flags.DEFINE_string('model_dir', None, 'Model directory')
|
||||||
|
|
||||||
|
@ -119,13 +118,10 @@ def main(unused_argv):
|
||||||
])
|
])
|
||||||
|
|
||||||
if FLAGS.dpsgd:
|
if FLAGS.dpsgd:
|
||||||
dp_average_query = GaussianAverageQuery(
|
optimizer = DPGradientDescentGaussianOptimizer(
|
||||||
FLAGS.l2_norm_clip,
|
l2_norm_clip=FLAGS.l2_norm_clip,
|
||||||
FLAGS.l2_norm_clip * FLAGS.noise_multiplier,
|
noise_multiplier=FLAGS.noise_multiplier,
|
||||||
FLAGS.microbatches)
|
num_microbatches=FLAGS.num_microbatches,
|
||||||
optimizer = DPGradientDescentOptimizer(
|
|
||||||
dp_average_query,
|
|
||||||
FLAGS.microbatches,
|
|
||||||
learning_rate=FLAGS.learning_rate,
|
learning_rate=FLAGS.learning_rate,
|
||||||
unroll_microbatches=True)
|
unroll_microbatches=True)
|
||||||
# Compute vector of per-example loss rather than its mean over a minibatch.
|
# Compute vector of per-example loss rather than its mean over a minibatch.
|
||||||
|
|
Loading…
Reference in a new issue