Update PrivacyLedger and DPOptimizer to make certain arguments optional.
PiperOrigin-RevId: 246235646
This commit is contained in:
parent
c09ec4c22b
commit
beb86c6e18
5 changed files with 67 additions and 35 deletions
|
@ -11,9 +11,7 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""PrivacyLedger class for keeping a record of private queries.
|
||||
"""
|
||||
"""PrivacyLedger class for keeping a record of private queries."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
|
@ -65,36 +63,39 @@ class PrivacyLedger(object):
|
|||
for the purpose of computing privacy guarantees.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
population_size,
|
||||
selection_probability,
|
||||
max_samples,
|
||||
max_queries):
|
||||
def __init__(self,
|
||||
population_size,
|
||||
selection_probability=None,
|
||||
max_samples=None,
|
||||
max_queries=None):
|
||||
"""Initialize the PrivacyLedger.
|
||||
|
||||
Args:
|
||||
population_size: An integer (may be variable) specifying the size of the
|
||||
population.
|
||||
population, i.e. size of the training data used in each epoch.
|
||||
selection_probability: A float (may be variable) specifying the
|
||||
probability each record is included in a sample.
|
||||
max_samples: The maximum number of samples. An exception is thrown if
|
||||
more than this many samples are recorded.
|
||||
max_queries: The maximum number of queries. An exception is thrown if
|
||||
more than this many queries are recorded.
|
||||
max_samples: The maximum number of samples. An exception is thrown if more
|
||||
than this many samples are recorded.
|
||||
max_queries: The maximum number of queries. An exception is thrown if more
|
||||
than this many queries are recorded.
|
||||
"""
|
||||
self._population_size = population_size
|
||||
self._selection_probability = selection_probability
|
||||
if max_samples is None:
|
||||
max_samples = 1000 * population_size
|
||||
if max_queries is None:
|
||||
max_queries = 1000 * population_size
|
||||
|
||||
# The query buffer stores rows corresponding to GaussianSumQueryEntries.
|
||||
self._query_buffer = tensor_buffer.TensorBuffer(
|
||||
max_queries, [3], tf.float32, 'query')
|
||||
self._query_buffer = tensor_buffer.TensorBuffer(max_queries, [3],
|
||||
tf.float32, 'query')
|
||||
self._sample_var = tf.Variable(
|
||||
initial_value=tf.zeros([3]), trainable=False, name='sample')
|
||||
|
||||
# The sample buffer stores rows corresponding to SampleEntries.
|
||||
self._sample_buffer = tensor_buffer.TensorBuffer(
|
||||
max_samples, [3], tf.float32, 'sample')
|
||||
self._sample_buffer = tensor_buffer.TensorBuffer(max_samples, [3],
|
||||
tf.float32, 'sample')
|
||||
self._sample_count = tf.Variable(
|
||||
initial_value=0.0, trainable=False, name='sample_count')
|
||||
self._query_count = tf.Variable(
|
||||
|
@ -116,9 +117,10 @@ class PrivacyLedger(object):
|
|||
Returns:
|
||||
An operation recording the sum query to the ledger.
|
||||
"""
|
||||
|
||||
def _do_record_query():
|
||||
with tf.control_dependencies([
|
||||
tf.assign(self._query_count, self._query_count + 1)]):
|
||||
with tf.control_dependencies(
|
||||
[tf.assign(self._query_count, self._query_count + 1)]):
|
||||
return self._query_buffer.append(
|
||||
[self._sample_count, l2_norm_bound, noise_stddev])
|
||||
|
||||
|
@ -127,14 +129,15 @@ class PrivacyLedger(object):
|
|||
def finalize_sample(self):
|
||||
"""Finalizes sample and records sample ledger entry."""
|
||||
with tf.control_dependencies([
|
||||
tf.assign(
|
||||
self._sample_var,
|
||||
[self._population_size,
|
||||
self._selection_probability,
|
||||
self._query_count])]):
|
||||
tf.assign(self._sample_var, [
|
||||
self._population_size, self._selection_probability,
|
||||
self._query_count
|
||||
])
|
||||
]):
|
||||
with tf.control_dependencies([
|
||||
tf.assign(self._sample_count, self._sample_count + 1),
|
||||
tf.assign(self._query_count, 0)]):
|
||||
tf.assign(self._query_count, 0)
|
||||
]):
|
||||
return self._sample_buffer.append(self._sample_var)
|
||||
|
||||
def get_unformatted_ledger(self):
|
||||
|
@ -165,6 +168,10 @@ class PrivacyLedger(object):
|
|||
|
||||
return format_ledger(sample_array, query_array)
|
||||
|
||||
def set_sample_size(self, batch_size):
|
||||
self._selection_probability = tf.cast(batch_size,
|
||||
tf.float32) / self._population_size
|
||||
|
||||
|
||||
class DummyLedger(object):
|
||||
"""A ledger that records nothing.
|
||||
|
@ -212,8 +219,8 @@ class QueryWithLedger(dp_query.DPQuery):
|
|||
|
||||
Args:
|
||||
query: The query whose events should be recorded to the ledger. Any
|
||||
subqueries (including those in the leaves of a nested query) should
|
||||
also contain a reference to the same ledger given here.
|
||||
subqueries (including those in the leaves of a nested query) should also
|
||||
contain a reference to the same ledger given here.
|
||||
ledger: A PrivacyLedger to which privacy events should be recorded.
|
||||
"""
|
||||
self._query = query
|
||||
|
@ -240,3 +247,7 @@ class QueryWithLedger(dp_query.DPQuery):
|
|||
with tf.control_dependencies(nest.flatten(sample_state)):
|
||||
with tf.control_dependencies([self._ledger.finalize_sample()]):
|
||||
return self._query.get_noised_result(sample_state, global_state)
|
||||
|
||||
def set_denominator(self, num_microbatches, microbatch_size=1):
|
||||
self._query.set_denominator(num_microbatches)
|
||||
self._ledger.set_sample_size(num_microbatches * microbatch_size)
|
||||
|
|
|
@ -167,4 +167,4 @@ class GaussianAverageQuery(normalized_query.NormalizedQuery):
|
|||
"""
|
||||
super(GaussianAverageQuery, self).__init__(
|
||||
numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev, ledger),
|
||||
denominator=tf.cast(denominator, tf.float32))
|
||||
denominator=denominator)
|
||||
|
|
|
@ -41,7 +41,8 @@ class NormalizedQuery(dp_query.DPQuery):
|
|||
denominator: A value for the denominator.
|
||||
"""
|
||||
self._numerator = numerator_query
|
||||
self._denominator = tf.cast(denominator, tf.float32)
|
||||
self._denominator = tf.cast(denominator,
|
||||
tf.float32) if denominator is not None else None
|
||||
|
||||
def initial_global_state(self):
|
||||
"""Returns the initial global state for the NormalizedQuery."""
|
||||
|
@ -103,4 +104,5 @@ class NormalizedQuery(dp_query.DPQuery):
|
|||
|
||||
return nest.map_structure(normalize, noised_sum), new_sum_global_state
|
||||
|
||||
|
||||
def set_denominator(self, denominator):
|
||||
self._denominator = tf.cast(denominator, tf.float32)
|
||||
|
|
|
@ -47,10 +47,22 @@ def make_optimizer_class(cls):
|
|||
def __init__(
|
||||
self,
|
||||
dp_average_query,
|
||||
num_microbatches,
|
||||
num_microbatches=None,
|
||||
unroll_microbatches=False,
|
||||
*args, # pylint: disable=keyword-arg-before-vararg
|
||||
*args, # pylint: disable=keyword-arg-before-vararg, g-doc-args
|
||||
**kwargs):
|
||||
"""Initialize the DPOptimizerClass.
|
||||
|
||||
Args:
|
||||
dp_average_query: DPQuery object, specifying differential privacy
|
||||
mechanism to use.
|
||||
num_microbatches: How many microbatches into which the minibatch is
|
||||
split. If None, will default to the size of the minibatch, and
|
||||
per-example gradients will be computed.
|
||||
unroll_microbatches: If true, processes microbatches within a Python
|
||||
loop instead of a tf.while_loop. Can be used if using a tf.while_loop
|
||||
raises an exception.
|
||||
"""
|
||||
super(DPOptimizerClass, self).__init__(*args, **kwargs)
|
||||
self._dp_average_query = dp_average_query
|
||||
self._num_microbatches = num_microbatches
|
||||
|
@ -74,6 +86,9 @@ def make_optimizer_class(cls):
|
|||
raise ValueError('When in Eager mode, a tape needs to be passed.')
|
||||
|
||||
vector_loss = loss()
|
||||
if self._num_microbatches is None:
|
||||
self._num_microbatches = tf.shape(vector_loss)[0]
|
||||
self._dp_average_query.set_denominator(self._num_microbatches)
|
||||
sample_state = self._dp_average_query.initial_sample_state(
|
||||
self._global_state, var_list)
|
||||
microbatches_losses = tf.reshape(vector_loss,
|
||||
|
@ -109,6 +124,9 @@ def make_optimizer_class(cls):
|
|||
# we sampled each microbatch from the appropriate binomial distribution,
|
||||
# although that still wouldn't be quite correct because it would be
|
||||
# sampling from the dataset without replacement.
|
||||
if self._num_microbatches is None:
|
||||
self._num_microbatches = tf.shape(loss)[0]
|
||||
self._dp_average_query.set_denominator(self._num_microbatches)
|
||||
microbatches_losses = tf.reshape(loss, [self._num_microbatches, -1])
|
||||
sample_params = (
|
||||
self._dp_average_query.derive_sample_params(self._global_state))
|
||||
|
|
|
@ -46,14 +46,15 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
|||
('DPAdagrad 4', dp_optimizer.DPAdagradOptimizer, 4, [-2.5, -2.5]),
|
||||
('DPAdam 1', dp_optimizer.DPAdamOptimizer, 1, [-2.5, -2.5]),
|
||||
('DPAdam 2', dp_optimizer.DPAdamOptimizer, 2, [-2.5, -2.5]),
|
||||
('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5]))
|
||||
('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5]),
|
||||
('DPAdam None', dp_optimizer.DPAdamOptimizer, None, [-2.5, -2.5]))
|
||||
def testBaseline(self, cls, num_microbatches, expected_answer):
|
||||
with self.cached_session() as sess:
|
||||
var0 = tf.Variable([1.0, 2.0])
|
||||
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
||||
|
||||
ledger = privacy_ledger.PrivacyLedger(
|
||||
1e6, num_microbatches / 1e6, 50, 50)
|
||||
1e6, num_microbatches / 1e6 if num_microbatches else None, 50, 50)
|
||||
dp_average_query = gaussian_query.GaussianAverageQuery(
|
||||
1.0e9, 0.0, num_microbatches, ledger)
|
||||
dp_average_query = privacy_ledger.QueryWithLedger(
|
||||
|
|
Loading…
Reference in a new issue