Remove PrivacyLedger which will soon be replaced by DpEvent and PrivacyAccountant.
PiperOrigin-RevId: 393147667
This commit is contained in:
parent
0e04e1baeb
commit
d9236d5619
19 changed files with 172 additions and 776 deletions
|
@ -31,10 +31,6 @@ else:
|
||||||
|
|
||||||
# Analysis
|
# Analysis
|
||||||
from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy
|
from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy
|
||||||
from tensorflow_privacy.privacy.analysis.privacy_ledger import GaussianSumQueryEntry
|
|
||||||
from tensorflow_privacy.privacy.analysis.privacy_ledger import PrivacyLedger
|
|
||||||
from tensorflow_privacy.privacy.analysis.privacy_ledger import QueryWithLedger
|
|
||||||
from tensorflow_privacy.privacy.analysis.privacy_ledger import SampleEntry
|
|
||||||
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogenous_rdp
|
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogenous_rdp
|
||||||
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
|
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
|
||||||
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_from_ledger
|
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_from_ledger
|
||||||
|
|
|
@ -1,299 +0,0 @@
|
||||||
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
"""PrivacyLedger class for keeping a record of private queries."""
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
|
||||||
from __future__ import division
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
import collections
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import tensorflow.compat.v1 as tf
|
|
||||||
|
|
||||||
from tensorflow_privacy.privacy.analysis import tensor_buffer
|
|
||||||
from tensorflow_privacy.privacy.dp_query import dp_query
|
|
||||||
|
|
||||||
SampleEntry = collections.namedtuple( # pylint: disable=invalid-name
|
|
||||||
'SampleEntry', ['population_size', 'selection_probability', 'queries'])
|
|
||||||
|
|
||||||
GaussianSumQueryEntry = collections.namedtuple( # pylint: disable=invalid-name
|
|
||||||
'GaussianSumQueryEntry', ['l2_norm_bound', 'noise_stddev'])
|
|
||||||
|
|
||||||
|
|
||||||
def format_ledger(sample_array, query_array):
|
|
||||||
"""Converts array representation into a list of SampleEntries."""
|
|
||||||
samples = []
|
|
||||||
query_pos = 0
|
|
||||||
sample_pos = 0
|
|
||||||
for sample in sample_array:
|
|
||||||
population_size, selection_probability, num_queries = sample
|
|
||||||
queries = []
|
|
||||||
for _ in range(int(num_queries)):
|
|
||||||
query = query_array[query_pos]
|
|
||||||
assert int(query[0]) == sample_pos
|
|
||||||
queries.append(GaussianSumQueryEntry(*query[1:]))
|
|
||||||
query_pos += 1
|
|
||||||
samples.append(SampleEntry(population_size, selection_probability, queries))
|
|
||||||
sample_pos += 1
|
|
||||||
return samples
|
|
||||||
|
|
||||||
|
|
||||||
class PrivacyLedger(object):
|
|
||||||
"""Class for keeping a record of private queries.
|
|
||||||
|
|
||||||
The PrivacyLedger keeps a record of all queries executed over a given dataset
|
|
||||||
for the purpose of computing privacy guarantees. To use it, it must be
|
|
||||||
associated with a `DPQuery` object via a `QueryWithLedger`.
|
|
||||||
|
|
||||||
The current implementation works only with DPQueries that consist of composing
|
|
||||||
Gaussian sum mechanism with Poisson subsampling.
|
|
||||||
|
|
||||||
Example usage:
|
|
||||||
|
|
||||||
```
|
|
||||||
import tensorflow_privacy as tfp
|
|
||||||
|
|
||||||
dp_query = tfp.QueryWithLedger(
|
|
||||||
tensorflow_privacy.GaussianSumQuery(
|
|
||||||
l2_norm_clip=1.0, stddev=1.0),
|
|
||||||
population_size=10000,
|
|
||||||
selection_probability=0.01)
|
|
||||||
|
|
||||||
# Use dp_query here in training loop.
|
|
||||||
|
|
||||||
formatted_ledger = dp_query.ledger.get_formatted_ledger_eager()
|
|
||||||
orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] +
|
|
||||||
list(range(5, 64)) + [128, 256, 512])
|
|
||||||
total_rdp = tfp.compute_rdp_from_ledger(formatted_ledger, orders)
|
|
||||||
epsilon = tfp.get_privacy_spent(orders, total_rdp, target_delta=1e-5)
|
|
||||||
```
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
population_size,
|
|
||||||
selection_probability):
|
|
||||||
"""Initializes the PrivacyLedger.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
population_size: An integer (may be variable) specifying the size of the
|
|
||||||
population, i.e. size of the training data used in each epoch.
|
|
||||||
selection_probability: A floating point value (may be variable) specifying
|
|
||||||
the probability each record is included in a sample.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If `selection_probability` is 0.
|
|
||||||
"""
|
|
||||||
self._population_size = population_size
|
|
||||||
self._selection_probability = selection_probability
|
|
||||||
|
|
||||||
if tf.executing_eagerly():
|
|
||||||
if tf.equal(selection_probability, 0):
|
|
||||||
raise ValueError('Selection probability cannot be 0.')
|
|
||||||
init_capacity = tf.cast(tf.math.ceil(1 / selection_probability), tf.int32)
|
|
||||||
else:
|
|
||||||
if selection_probability == 0:
|
|
||||||
raise ValueError('Selection probability cannot be 0.')
|
|
||||||
init_capacity = np.int(np.ceil(1 / selection_probability))
|
|
||||||
|
|
||||||
# The query buffer stores rows corresponding to GaussianSumQueryEntries.
|
|
||||||
self._query_buffer = tensor_buffer.TensorBuffer(
|
|
||||||
init_capacity, [3], tf.float32, 'query')
|
|
||||||
self._sample_var = tf.Variable(
|
|
||||||
initial_value=tf.zeros([3]), trainable=False, name='sample')
|
|
||||||
|
|
||||||
# The sample buffer stores rows corresponding to SampleEntries.
|
|
||||||
self._sample_buffer = tensor_buffer.TensorBuffer(
|
|
||||||
init_capacity, [3], tf.float32, 'sample')
|
|
||||||
self._sample_count = tf.Variable(
|
|
||||||
initial_value=0.0, trainable=False, name='sample_count')
|
|
||||||
self._query_count = tf.Variable(
|
|
||||||
initial_value=0.0, trainable=False, name='query_count')
|
|
||||||
self._cs = tf.CriticalSection()
|
|
||||||
|
|
||||||
def record_sum_query(self, l2_norm_bound, noise_stddev):
|
|
||||||
"""Records that a query was issued.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
l2_norm_bound: The maximum l2 norm of the tensor group in the query.
|
|
||||||
noise_stddev: The standard deviation of the noise applied to the sum.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
An operation recording the sum query to the ledger. This should be called
|
|
||||||
for every Gaussian sum query that is issued on a sample.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def _do_record_query():
|
|
||||||
with tf.control_dependencies(
|
|
||||||
[tf.assign(self._query_count, self._query_count + 1)]):
|
|
||||||
return self._query_buffer.append(
|
|
||||||
[self._sample_count, l2_norm_bound, noise_stddev])
|
|
||||||
|
|
||||||
return self._cs.execute(_do_record_query)
|
|
||||||
|
|
||||||
def finalize_sample(self):
|
|
||||||
"""Finalizes sample and records sample ledger entry.
|
|
||||||
|
|
||||||
This should be called once per application of the mechanism on a sample,
|
|
||||||
after all sum queries have been recorded.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
An operation recording the complete mechanism (sampling and sum
|
|
||||||
estimation) to the ledger.
|
|
||||||
"""
|
|
||||||
with tf.control_dependencies([
|
|
||||||
tf.assign(self._sample_var, [
|
|
||||||
self._population_size, self._selection_probability,
|
|
||||||
self._query_count
|
|
||||||
])
|
|
||||||
]):
|
|
||||||
with tf.control_dependencies([
|
|
||||||
tf.assign(self._sample_count, self._sample_count + 1),
|
|
||||||
tf.assign(self._query_count, 0)
|
|
||||||
]):
|
|
||||||
return self._sample_buffer.append(self._sample_var)
|
|
||||||
|
|
||||||
def get_unformatted_ledger(self):
|
|
||||||
"""Returns the raw sample and query values."""
|
|
||||||
return self._sample_buffer.values, self._query_buffer.values
|
|
||||||
|
|
||||||
def get_formatted_ledger(self, sess):
|
|
||||||
"""Gets the formatted query ledger.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
sess: The tensorflow session in which the ledger was created.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The query ledger as a list of `SampleEntry` instances.
|
|
||||||
"""
|
|
||||||
sample_array = sess.run(self._sample_buffer.values)
|
|
||||||
query_array = sess.run(self._query_buffer.values)
|
|
||||||
|
|
||||||
return format_ledger(sample_array, query_array)
|
|
||||||
|
|
||||||
def get_formatted_ledger_eager(self):
|
|
||||||
"""Gets the formatted query ledger.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The query ledger as a list of `SampleEntry` instances.
|
|
||||||
"""
|
|
||||||
sample_array = self._sample_buffer.values.numpy()
|
|
||||||
query_array = self._query_buffer.values.numpy()
|
|
||||||
|
|
||||||
return format_ledger(sample_array, query_array)
|
|
||||||
|
|
||||||
|
|
||||||
class QueryWithLedger(dp_query.DPQuery):
|
|
||||||
"""A class for DP queries that record events to a `PrivacyLedger`.
|
|
||||||
|
|
||||||
`QueryWithLedger` should be the top-level query in a structure of queries that
|
|
||||||
may include sum queries, nested queries, etc. It should simply wrap another
|
|
||||||
query and contain a reference to the ledger. Any contained queries (including
|
|
||||||
those contained in the leaves of a nested query) should also contain a
|
|
||||||
reference to the same ledger object.
|
|
||||||
|
|
||||||
Only composed Gaussian sum queries with Poisson subsampling are supported.
|
|
||||||
This includes `GaussianSumQuery`, `QuantileEstimatorQuery`, and
|
|
||||||
`QuantileAdaptiveClipSumQuery`, as well as `NestedQuery` or `NormalizedQuery`
|
|
||||||
objects that contain the previous mentioned query types.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, query,
|
|
||||||
population_size=None, selection_probability=None,
|
|
||||||
ledger=None):
|
|
||||||
"""Initializes the `QueryWithLedger`.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query: The query whose events should be recorded to the ledger. Any
|
|
||||||
subqueries (including those in the leaves of a nested query) should also
|
|
||||||
contain a reference to the same ledger given here.
|
|
||||||
population_size: An integer (may be variable) specifying the size of the
|
|
||||||
population, i.e. size of the training data used in each epoch. May be
|
|
||||||
`None` if `ledger` is specified.
|
|
||||||
selection_probability: A floating point value (may be variable) specifying
|
|
||||||
the probability each record is included in a sample under Poisson
|
|
||||||
subsampling. May be `None` if `ledger` is specified.
|
|
||||||
ledger: A `PrivacyLedger` to use. Must be specified if either of
|
|
||||||
`population_size` or `selection_probability` is `None`.
|
|
||||||
"""
|
|
||||||
self._query = query
|
|
||||||
if population_size is not None and selection_probability is not None:
|
|
||||||
self.set_ledger(PrivacyLedger(population_size, selection_probability))
|
|
||||||
elif ledger is not None:
|
|
||||||
self.set_ledger(ledger)
|
|
||||||
else:
|
|
||||||
raise ValueError('One of (population_size, selection_probability) or '
|
|
||||||
'ledger must be specified.')
|
|
||||||
|
|
||||||
@property
|
|
||||||
def ledger(self):
|
|
||||||
"""Gets the ledger that all inner queries record to."""
|
|
||||||
return self._ledger
|
|
||||||
|
|
||||||
def set_ledger(self, ledger):
|
|
||||||
"""Sets a new ledger."""
|
|
||||||
self._ledger = ledger
|
|
||||||
self._query.set_ledger(ledger)
|
|
||||||
|
|
||||||
def initial_global_state(self):
|
|
||||||
"""Implements `tensorflow_privacy.DPQuery.initial_global_state`."""
|
|
||||||
return self._query.initial_global_state()
|
|
||||||
|
|
||||||
def derive_sample_params(self, global_state):
|
|
||||||
"""Implements `tensorflow_privacy.DPQuery.derive_sample_params`."""
|
|
||||||
return self._query.derive_sample_params(global_state)
|
|
||||||
|
|
||||||
def initial_sample_state(self, template):
|
|
||||||
"""Implements `tensorflow_privacy.DPQuery.initial_sample_state`."""
|
|
||||||
return self._query.initial_sample_state(template)
|
|
||||||
|
|
||||||
def preprocess_record(self, params, record):
|
|
||||||
"""Implements `tensorflow_privacy.DPQuery.preprocess_record`."""
|
|
||||||
return self._query.preprocess_record(params, record)
|
|
||||||
|
|
||||||
def accumulate_preprocessed_record(self, sample_state, preprocessed_record):
|
|
||||||
"""Implements `tensorflow_privacy.DPQuery.accumulate_preprocessed_record`."""
|
|
||||||
return self._query.accumulate_preprocessed_record(
|
|
||||||
sample_state, preprocessed_record)
|
|
||||||
|
|
||||||
def merge_sample_states(self, sample_state_1, sample_state_2):
|
|
||||||
"""Implements `tensorflow_privacy.DPQuery.merge_sample_states`."""
|
|
||||||
return self._query.merge_sample_states(sample_state_1, sample_state_2)
|
|
||||||
|
|
||||||
def get_noised_result(self, sample_state, global_state):
|
|
||||||
"""Implements `tensorflow_privacy.DPQuery.derive_metrics`.
|
|
||||||
|
|
||||||
Besides noising and returning the result of the inner query, ensures that
|
|
||||||
the sample is recorded to the ledger.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
sample_state: The sample state after all records have been accumulated.
|
|
||||||
global_state: The global state, storing long-term privacy bookkeeping.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A tuple (result, new_global_state) where "result" is the result of the
|
|
||||||
query and "new_global_state" is the updated global state.
|
|
||||||
"""
|
|
||||||
# Ensure sample_state is fully aggregated before calling get_noised_result.
|
|
||||||
with tf.control_dependencies(tf.nest.flatten(sample_state)):
|
|
||||||
result, new_global_state = self._query.get_noised_result(
|
|
||||||
sample_state, global_state)
|
|
||||||
|
|
||||||
# Ensure inner queries have recorded before finalizing.
|
|
||||||
with tf.control_dependencies(tf.nest.flatten(result)):
|
|
||||||
finalize = self._ledger.finalize_sample()
|
|
||||||
|
|
||||||
# Ensure finalizing happens.
|
|
||||||
with tf.control_dependencies([finalize]):
|
|
||||||
return tf.nest.map_structure(tf.identity, result), new_global_state
|
|
|
@ -1,133 +0,0 @@
|
||||||
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
"""Tests for PrivacyLedger."""
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
|
||||||
from __future__ import division
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
import tensorflow.compat.v1 as tf
|
|
||||||
|
|
||||||
from tensorflow_privacy.privacy.analysis import privacy_ledger
|
|
||||||
from tensorflow_privacy.privacy.dp_query import gaussian_query
|
|
||||||
from tensorflow_privacy.privacy.dp_query import nested_query
|
|
||||||
from tensorflow_privacy.privacy.dp_query import test_utils
|
|
||||||
|
|
||||||
tf.enable_eager_execution()
|
|
||||||
|
|
||||||
|
|
||||||
class PrivacyLedgerTest(tf.test.TestCase):
|
|
||||||
|
|
||||||
def test_fail_on_probability_zero(self):
|
|
||||||
with self.assertRaisesRegexp(ValueError,
|
|
||||||
'Selection probability cannot be 0.'):
|
|
||||||
privacy_ledger.PrivacyLedger(10, 0)
|
|
||||||
|
|
||||||
def test_basic(self):
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(10, 0.1)
|
|
||||||
ledger.record_sum_query(5.0, 1.0)
|
|
||||||
ledger.record_sum_query(2.0, 0.5)
|
|
||||||
|
|
||||||
ledger.finalize_sample()
|
|
||||||
|
|
||||||
expected_queries = [[5.0, 1.0], [2.0, 0.5]]
|
|
||||||
formatted = ledger.get_formatted_ledger_eager()
|
|
||||||
|
|
||||||
sample = formatted[0]
|
|
||||||
self.assertAllClose(sample.population_size, 10.0)
|
|
||||||
self.assertAllClose(sample.selection_probability, 0.1)
|
|
||||||
self.assertAllClose(sorted(sample.queries), sorted(expected_queries))
|
|
||||||
|
|
||||||
def test_sum_query(self):
|
|
||||||
record1 = tf.constant([2.0, 0.0])
|
|
||||||
record2 = tf.constant([-1.0, 1.0])
|
|
||||||
|
|
||||||
population_size = tf.Variable(0)
|
|
||||||
selection_probability = tf.Variable(1.0)
|
|
||||||
|
|
||||||
query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=0.0)
|
|
||||||
query = privacy_ledger.QueryWithLedger(query, population_size,
|
|
||||||
selection_probability)
|
|
||||||
|
|
||||||
# First sample.
|
|
||||||
tf.assign(population_size, 10)
|
|
||||||
tf.assign(selection_probability, 0.1)
|
|
||||||
test_utils.run_query(query, [record1, record2])
|
|
||||||
|
|
||||||
expected_queries = [[10.0, 0.0]]
|
|
||||||
formatted = query.ledger.get_formatted_ledger_eager()
|
|
||||||
sample_1 = formatted[0]
|
|
||||||
self.assertAllClose(sample_1.population_size, 10.0)
|
|
||||||
self.assertAllClose(sample_1.selection_probability, 0.1)
|
|
||||||
self.assertAllClose(sample_1.queries, expected_queries)
|
|
||||||
|
|
||||||
# Second sample.
|
|
||||||
tf.assign(population_size, 20)
|
|
||||||
tf.assign(selection_probability, 0.2)
|
|
||||||
test_utils.run_query(query, [record1, record2])
|
|
||||||
|
|
||||||
formatted = query.ledger.get_formatted_ledger_eager()
|
|
||||||
sample_1, sample_2 = formatted
|
|
||||||
self.assertAllClose(sample_1.population_size, 10.0)
|
|
||||||
self.assertAllClose(sample_1.selection_probability, 0.1)
|
|
||||||
self.assertAllClose(sample_1.queries, expected_queries)
|
|
||||||
|
|
||||||
self.assertAllClose(sample_2.population_size, 20.0)
|
|
||||||
self.assertAllClose(sample_2.selection_probability, 0.2)
|
|
||||||
self.assertAllClose(sample_2.queries, expected_queries)
|
|
||||||
|
|
||||||
def test_nested_query(self):
|
|
||||||
population_size = tf.Variable(0)
|
|
||||||
selection_probability = tf.Variable(1.0)
|
|
||||||
|
|
||||||
query1 = gaussian_query.GaussianSumQuery(l2_norm_clip=4.0, stddev=2.0)
|
|
||||||
query2 = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=1.0)
|
|
||||||
|
|
||||||
query = nested_query.NestedQuery([query1, query2])
|
|
||||||
query = privacy_ledger.QueryWithLedger(query, population_size,
|
|
||||||
selection_probability)
|
|
||||||
|
|
||||||
record1 = [1.0, [12.0, 9.0]]
|
|
||||||
record2 = [5.0, [1.0, 2.0]]
|
|
||||||
|
|
||||||
# First sample.
|
|
||||||
tf.assign(population_size, 10)
|
|
||||||
tf.assign(selection_probability, 0.1)
|
|
||||||
test_utils.run_query(query, [record1, record2])
|
|
||||||
|
|
||||||
expected_queries = [[4.0, 2.0], [5.0, 1.0]]
|
|
||||||
formatted = query.ledger.get_formatted_ledger_eager()
|
|
||||||
sample_1 = formatted[0]
|
|
||||||
self.assertAllClose(sample_1.population_size, 10.0)
|
|
||||||
self.assertAllClose(sample_1.selection_probability, 0.1)
|
|
||||||
self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries))
|
|
||||||
|
|
||||||
# Second sample.
|
|
||||||
tf.assign(population_size, 20)
|
|
||||||
tf.assign(selection_probability, 0.2)
|
|
||||||
test_utils.run_query(query, [record1, record2])
|
|
||||||
|
|
||||||
formatted = query.ledger.get_formatted_ledger_eager()
|
|
||||||
sample_1, sample_2 = formatted
|
|
||||||
self.assertAllClose(sample_1.population_size, 10.0)
|
|
||||||
self.assertAllClose(sample_1.selection_probability, 0.1)
|
|
||||||
self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries))
|
|
||||||
|
|
||||||
self.assertAllClose(sample_2.population_size, 20.0)
|
|
||||||
self.assertAllClose(sample_2.selection_probability, 0.2)
|
|
||||||
self.assertAllClose(sorted(sample_2.queries), sorted(expected_queries))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
tf.test.main()
|
|
|
@ -47,7 +47,6 @@ import numpy as np
|
||||||
from scipy import special
|
from scipy import special
|
||||||
import six
|
import six
|
||||||
|
|
||||||
|
|
||||||
########################
|
########################
|
||||||
# LOG-SPACE ARITHMETIC #
|
# LOG-SPACE ARITHMETIC #
|
||||||
########################
|
########################
|
||||||
|
@ -102,8 +101,8 @@ def _log_print(logx):
|
||||||
|
|
||||||
|
|
||||||
def _log_comb(n, k):
|
def _log_comb(n, k):
|
||||||
return (special.gammaln(n + 1) -
|
return (special.gammaln(n + 1) - special.gammaln(k + 1) -
|
||||||
special.gammaln(k + 1) - special.gammaln(n - k + 1))
|
special.gammaln(n - k + 1))
|
||||||
|
|
||||||
|
|
||||||
def _compute_log_a_int(q, sigma, alpha):
|
def _compute_log_a_int(q, sigma, alpha):
|
||||||
|
@ -215,17 +214,19 @@ def _compute_delta(orders, rdp, eps):
|
||||||
# Improved bound from https://arxiv.org/abs/2004.00010 Proposition 12 (in v4):
|
# Improved bound from https://arxiv.org/abs/2004.00010 Proposition 12 (in v4):
|
||||||
logdeltas = [] # work in log space to avoid overflows
|
logdeltas = [] # work in log space to avoid overflows
|
||||||
for (a, r) in zip(orders_vec, rdp_vec):
|
for (a, r) in zip(orders_vec, rdp_vec):
|
||||||
if a < 1: raise ValueError("Renyi divergence order must be >=1.")
|
if a < 1:
|
||||||
if r < 0: raise ValueError("Renyi divergence must be >=0.")
|
raise ValueError("Renyi divergence order must be >=1.")
|
||||||
|
if r < 0:
|
||||||
|
raise ValueError("Renyi divergence must be >=0.")
|
||||||
# For small alpha, we are better of with bound via KL divergence:
|
# For small alpha, we are better of with bound via KL divergence:
|
||||||
# delta <= sqrt(1-exp(-KL)).
|
# delta <= sqrt(1-exp(-KL)).
|
||||||
# Take a min of the two bounds.
|
# Take a min of the two bounds.
|
||||||
logdelta = 0.5*math.log1p(-math.exp(-r))
|
logdelta = 0.5 * math.log1p(-math.exp(-r))
|
||||||
if a > 1.01:
|
if a > 1.01:
|
||||||
# This bound is not numerically stable as alpha->1.
|
# This bound is not numerically stable as alpha->1.
|
||||||
# Thus we have a min value for alpha.
|
# Thus we have a min value for alpha.
|
||||||
# The bound is also not useful for small alpha, so doesn't matter.
|
# The bound is also not useful for small alpha, so doesn't matter.
|
||||||
rdp_bound = (a - 1) * (r - eps + math.log1p(-1/a)) - math.log(a)
|
rdp_bound = (a - 1) * (r - eps + math.log1p(-1 / a)) - math.log(a)
|
||||||
logdelta = min(logdelta, rdp_bound)
|
logdelta = min(logdelta, rdp_bound)
|
||||||
|
|
||||||
logdeltas.append(logdelta)
|
logdeltas.append(logdelta)
|
||||||
|
@ -264,8 +265,10 @@ def _compute_eps(orders, rdp, delta):
|
||||||
# Also appears in https://arxiv.org/abs/2001.05990 Equation 20 (in v1).
|
# Also appears in https://arxiv.org/abs/2001.05990 Equation 20 (in v1).
|
||||||
eps_vec = []
|
eps_vec = []
|
||||||
for (a, r) in zip(orders_vec, rdp_vec):
|
for (a, r) in zip(orders_vec, rdp_vec):
|
||||||
if a < 1: raise ValueError("Renyi divergence order must be >=1.")
|
if a < 1:
|
||||||
if r < 0: raise ValueError("Renyi divergence must be >=0.")
|
raise ValueError("Renyi divergence order must be >=1.")
|
||||||
|
if r < 0:
|
||||||
|
raise ValueError("Renyi divergence must be >=0.")
|
||||||
|
|
||||||
if delta**2 + math.expm1(-r) >= 0:
|
if delta**2 + math.expm1(-r) >= 0:
|
||||||
# In this case, we can simply bound via KL divergence:
|
# In this case, we can simply bound via KL divergence:
|
||||||
|
@ -378,7 +381,7 @@ def compute_rdp(q, noise_multiplier, steps, orders):
|
||||||
Args:
|
Args:
|
||||||
q: The sampling rate.
|
q: The sampling rate.
|
||||||
noise_multiplier: The ratio of the standard deviation of the Gaussian noise
|
noise_multiplier: The ratio of the standard deviation of the Gaussian noise
|
||||||
to the l2-sensitivity of the function to which it is added.
|
to the l2-sensitivity of the function to which it is added.
|
||||||
steps: The number of steps.
|
steps: The number of steps.
|
||||||
orders: An array (or a scalar) of RDP orders.
|
orders: An array (or a scalar) of RDP orders.
|
||||||
|
|
||||||
|
@ -388,8 +391,8 @@ def compute_rdp(q, noise_multiplier, steps, orders):
|
||||||
if np.isscalar(orders):
|
if np.isscalar(orders):
|
||||||
rdp = _compute_rdp(q, noise_multiplier, orders)
|
rdp = _compute_rdp(q, noise_multiplier, orders)
|
||||||
else:
|
else:
|
||||||
rdp = np.array([_compute_rdp(q, noise_multiplier, order)
|
rdp = np.array(
|
||||||
for order in orders])
|
[_compute_rdp(q, noise_multiplier, order) for order in orders])
|
||||||
|
|
||||||
return rdp * steps
|
return rdp * steps
|
||||||
|
|
||||||
|
@ -572,8 +575,8 @@ def get_privacy_spent(orders, rdp, target_eps=None, target_delta=None):
|
||||||
target_eps: If not `None`, the epsilon for which we compute the
|
target_eps: If not `None`, the epsilon for which we compute the
|
||||||
corresponding delta.
|
corresponding delta.
|
||||||
target_delta: If not `None`, the delta for which we compute the
|
target_delta: If not `None`, the delta for which we compute the
|
||||||
corresponding epsilon. Exactly one of `target_eps` and `target_delta`
|
corresponding epsilon. Exactly one of `target_eps` and `target_delta` must
|
||||||
must be `None`.
|
be `None`.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A tuple of epsilon, delta, and the optimal order.
|
A tuple of epsilon, delta, and the optimal order.
|
||||||
|
@ -595,24 +598,3 @@ def get_privacy_spent(orders, rdp, target_eps=None, target_delta=None):
|
||||||
else:
|
else:
|
||||||
eps, opt_order = _compute_eps(orders, rdp, target_delta)
|
eps, opt_order = _compute_eps(orders, rdp, target_delta)
|
||||||
return eps, target_delta, opt_order
|
return eps, target_delta, opt_order
|
||||||
|
|
||||||
|
|
||||||
def compute_rdp_from_ledger(ledger, orders):
|
|
||||||
"""Computes RDP of Sampled Gaussian Mechanism from ledger.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
ledger: A formatted privacy ledger.
|
|
||||||
orders: An array (or a scalar) of RDP orders.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
RDP at all orders. Can be `np.inf`.
|
|
||||||
"""
|
|
||||||
total_rdp = np.zeros_like(orders, dtype=float)
|
|
||||||
for sample in ledger:
|
|
||||||
# Compute equivalent z from l2_clip_bounds and noise stddevs in sample.
|
|
||||||
# See https://arxiv.org/pdf/1812.06210.pdf for derivation of this formula.
|
|
||||||
effective_z = sum([
|
|
||||||
(q.noise_stddev / q.l2_norm_bound)**-2 for q in sample.queries])**-0.5
|
|
||||||
total_rdp += compute_rdp(
|
|
||||||
sample.selection_probability, effective_z, 1, orders)
|
|
||||||
return total_rdp
|
|
||||||
|
|
|
@ -31,7 +31,6 @@ from mpmath import quad
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from tensorflow_privacy.privacy.analysis import privacy_ledger
|
|
||||||
from tensorflow_privacy.privacy.analysis import rdp_accountant
|
from tensorflow_privacy.privacy.analysis import rdp_accountant
|
||||||
|
|
||||||
|
|
||||||
|
@ -121,16 +120,47 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase):
|
||||||
[6.5007e-04, 1.0854e-03, 2.1808e-03, 2.3846e-02, 1.6742e+02, np.inf],
|
[6.5007e-04, 1.0854e-03, 2.1808e-03, 2.3846e-02, 1.6742e+02, np.inf],
|
||||||
rtol=1e-4)
|
rtol=1e-4)
|
||||||
|
|
||||||
params = ({'q': 1e-7, 'sigma': .1, 'order': 1.01},
|
params = ({
|
||||||
{'q': 1e-6, 'sigma': .1, 'order': 256},
|
'q': 1e-7,
|
||||||
{'q': 1e-5, 'sigma': .1, 'order': 256.1},
|
'sigma': .1,
|
||||||
{'q': 1e-6, 'sigma': 1, 'order': 27},
|
'order': 1.01
|
||||||
{'q': 1e-4, 'sigma': 1., 'order': 1.5},
|
}, {
|
||||||
{'q': 1e-3, 'sigma': 1., 'order': 2},
|
'q': 1e-6,
|
||||||
{'q': .01, 'sigma': 10, 'order': 20},
|
'sigma': .1,
|
||||||
{'q': .1, 'sigma': 100, 'order': 20.5},
|
'order': 256
|
||||||
{'q': .99, 'sigma': .1, 'order': 256},
|
}, {
|
||||||
{'q': .999, 'sigma': 100, 'order': 256.1})
|
'q': 1e-5,
|
||||||
|
'sigma': .1,
|
||||||
|
'order': 256.1
|
||||||
|
}, {
|
||||||
|
'q': 1e-6,
|
||||||
|
'sigma': 1,
|
||||||
|
'order': 27
|
||||||
|
}, {
|
||||||
|
'q': 1e-4,
|
||||||
|
'sigma': 1.,
|
||||||
|
'order': 1.5
|
||||||
|
}, {
|
||||||
|
'q': 1e-3,
|
||||||
|
'sigma': 1.,
|
||||||
|
'order': 2
|
||||||
|
}, {
|
||||||
|
'q': .01,
|
||||||
|
'sigma': 10,
|
||||||
|
'order': 20
|
||||||
|
}, {
|
||||||
|
'q': .1,
|
||||||
|
'sigma': 100,
|
||||||
|
'order': 20.5
|
||||||
|
}, {
|
||||||
|
'q': .99,
|
||||||
|
'sigma': .1,
|
||||||
|
'order': 256
|
||||||
|
}, {
|
||||||
|
'q': .999,
|
||||||
|
'sigma': 100,
|
||||||
|
'order': 256.1
|
||||||
|
})
|
||||||
|
|
||||||
# pylint:disable=undefined-variable
|
# pylint:disable=undefined-variable
|
||||||
@parameterized.parameters(p for p in params)
|
@parameterized.parameters(p for p in params)
|
||||||
|
@ -152,7 +182,8 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase):
|
||||||
self.assertAlmostEqual(eps, 1.32783806176)
|
self.assertAlmostEqual(eps, 1.32783806176)
|
||||||
|
|
||||||
# Second test for Gaussian noise (with no subsampling):
|
# Second test for Gaussian noise (with no subsampling):
|
||||||
orders = [0.001*i for i in range(1000, 100000)] # Pick fine set of orders.
|
orders = [0.001 * i for i in range(1000, 100000)
|
||||||
|
] # Pick fine set of orders.
|
||||||
rdp = rdp_accountant.compute_rdp(1, 4.530877117, 1, orders)
|
rdp = rdp_accountant.compute_rdp(1, 4.530877117, 1, orders)
|
||||||
# Scale is chosen to obtain exactly (1,1e-6)-DP.
|
# Scale is chosen to obtain exactly (1,1e-6)-DP.
|
||||||
eps, _, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-6)
|
eps, _, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-6)
|
||||||
|
@ -168,7 +199,7 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase):
|
||||||
self.assertAlmostEqual(delta, 1e-5)
|
self.assertAlmostEqual(delta, 1e-5)
|
||||||
|
|
||||||
# Second test for Gaussian noise (with no subsampling):
|
# Second test for Gaussian noise (with no subsampling):
|
||||||
orders = [0.001*i for i in range(1000, 100000)] # Pick fine set of order.
|
orders = [0.001 * i for i in range(1000, 100000)] # Pick fine set of order.
|
||||||
rdp = rdp_accountant.compute_rdp(1, 4.530877117, 1, orders)
|
rdp = rdp_accountant.compute_rdp(1, 4.530877117, 1, orders)
|
||||||
# Scale is chosen to obtain exactly (1,1e-6)-DP.
|
# Scale is chosen to obtain exactly (1,1e-6)-DP.
|
||||||
_, delta, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_eps=1)
|
_, delta, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_eps=1)
|
||||||
|
@ -178,17 +209,13 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase):
|
||||||
orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14.,
|
orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14.,
|
||||||
16., 20., 24., 28., 32., 64., 256.)
|
16., 20., 24., 28., 32., 64., 256.)
|
||||||
|
|
||||||
rdp = rdp_accountant.compute_rdp(q=1e-4,
|
rdp = rdp_accountant.compute_rdp(
|
||||||
noise_multiplier=.4,
|
q=1e-4, noise_multiplier=.4, steps=40000, orders=orders)
|
||||||
steps=40000,
|
|
||||||
orders=orders)
|
|
||||||
|
|
||||||
eps, _, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-6)
|
eps, _, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-6)
|
||||||
|
|
||||||
rdp += rdp_accountant.compute_rdp(q=0.1,
|
rdp += rdp_accountant.compute_rdp(
|
||||||
noise_multiplier=2,
|
q=0.1, noise_multiplier=2, steps=100, orders=orders)
|
||||||
steps=100,
|
|
||||||
orders=orders)
|
|
||||||
eps, _, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-5)
|
eps, _, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-5)
|
||||||
# These tests use the old RDP -> approx DP conversion
|
# These tests use the old RDP -> approx DP conversion
|
||||||
# self.assertAlmostEqual(eps, 8.509656, places=5)
|
# self.assertAlmostEqual(eps, 8.509656, places=5)
|
||||||
|
@ -217,42 +244,25 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase):
|
||||||
def test_get_privacy_spent_gaussian(self):
|
def test_get_privacy_spent_gaussian(self):
|
||||||
# Compare the optimal bound for Gaussian with the one derived from RDP.
|
# Compare the optimal bound for Gaussian with the one derived from RDP.
|
||||||
# Also compare the RDP upper bound with the "standard" upper bound.
|
# Also compare the RDP upper bound with the "standard" upper bound.
|
||||||
orders = [0.1*x for x in range(10, 505)]
|
orders = [0.1 * x for x in range(10, 505)]
|
||||||
eps_vec = [0.1*x for x in range(500)]
|
eps_vec = [0.1 * x for x in range(500)]
|
||||||
rdp = rdp_accountant.compute_rdp(1, 1, 1, orders)
|
rdp = rdp_accountant.compute_rdp(1, 1, 1, orders)
|
||||||
for eps in eps_vec:
|
for eps in eps_vec:
|
||||||
_, delta, _ = rdp_accountant.get_privacy_spent(orders, rdp,
|
_, delta, _ = rdp_accountant.get_privacy_spent(
|
||||||
target_eps=eps)
|
orders, rdp, target_eps=eps)
|
||||||
# For comparison, we compute the optimal guarantee for Gaussian
|
# For comparison, we compute the optimal guarantee for Gaussian
|
||||||
# using https://arxiv.org/abs/1805.06530 Theorem 8 (in v2).
|
# using https://arxiv.org/abs/1805.06530 Theorem 8 (in v2).
|
||||||
delta0 = math.erfc((eps-.5)/math.sqrt(2))/2
|
delta0 = math.erfc((eps - .5) / math.sqrt(2)) / 2
|
||||||
delta0 = delta0 - math.exp(eps)*math.erfc((eps+.5)/math.sqrt(2))/2
|
delta0 = delta0 - math.exp(eps) * math.erfc((eps + .5) / math.sqrt(2)) / 2
|
||||||
self.assertLessEqual(delta0, delta+1e-300) # need tolerance 10^-300
|
self.assertLessEqual(delta0, delta + 1e-300) # need tolerance 10^-300
|
||||||
|
|
||||||
# Compute the "standard" upper bound, which should be an upper bound.
|
# Compute the "standard" upper bound, which should be an upper bound.
|
||||||
# Note, if orders is too sparse, this will NOT be an upper bound.
|
# Note, if orders is too sparse, this will NOT be an upper bound.
|
||||||
if eps >= 0.5:
|
if eps >= 0.5:
|
||||||
delta1 = math.exp(-0.5*(eps-0.5)**2)
|
delta1 = math.exp(-0.5 * (eps - 0.5)**2)
|
||||||
else:
|
else:
|
||||||
delta1 = 1
|
delta1 = 1
|
||||||
self.assertLessEqual(delta, delta1+1e-300)
|
self.assertLessEqual(delta, delta1 + 1e-300)
|
||||||
|
|
||||||
def test_compute_rdp_from_ledger(self):
|
|
||||||
orders = range(2, 33)
|
|
||||||
q = 0.1
|
|
||||||
n = 1000
|
|
||||||
l2_norm_clip = 3.14159
|
|
||||||
noise_stddev = 2.71828
|
|
||||||
steps = 3
|
|
||||||
|
|
||||||
query_entry = privacy_ledger.GaussianSumQueryEntry(
|
|
||||||
l2_norm_clip, noise_stddev)
|
|
||||||
ledger = [privacy_ledger.SampleEntry(n, q, [query_entry])] * steps
|
|
||||||
|
|
||||||
z = noise_stddev / l2_norm_clip
|
|
||||||
rdp = rdp_accountant.compute_rdp(q, z, steps, orders)
|
|
||||||
rdp_from_ledger = rdp_accountant.compute_rdp_from_ledger(ledger, orders)
|
|
||||||
self.assertSequenceAlmostEqual(rdp, rdp_from_ledger)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -46,11 +46,6 @@ class DiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery):
|
||||||
self._l2_norm_bound = l2_norm_bound
|
self._l2_norm_bound = l2_norm_bound
|
||||||
self._stddev = stddev
|
self._stddev = stddev
|
||||||
|
|
||||||
def set_ledger(self, ledger):
|
|
||||||
del ledger # Unused.
|
|
||||||
raise NotImplementedError('Ledger has not yet been implemented for'
|
|
||||||
'DiscreteGaussianSumQuery!')
|
|
||||||
|
|
||||||
def initial_global_state(self):
|
def initial_global_state(self):
|
||||||
return self._GlobalState(
|
return self._GlobalState(
|
||||||
tf.cast(self._l2_norm_bound, tf.float32),
|
tf.cast(self._l2_norm_bound, tf.float32),
|
||||||
|
|
|
@ -46,11 +46,6 @@ class DistributedDiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery):
|
||||||
self._l2_norm_bound = l2_norm_bound
|
self._l2_norm_bound = l2_norm_bound
|
||||||
self._local_stddev = local_stddev
|
self._local_stddev = local_stddev
|
||||||
|
|
||||||
def set_ledger(self, ledger):
|
|
||||||
del ledger # Unused.
|
|
||||||
raise NotImplementedError('Ledger has not yet been implemented for'
|
|
||||||
'DistributedDiscreteGaussianSumQuery!')
|
|
||||||
|
|
||||||
def initial_global_state(self):
|
def initial_global_state(self):
|
||||||
return self._GlobalState(
|
return self._GlobalState(
|
||||||
tf.cast(self._l2_norm_bound, tf.float32),
|
tf.cast(self._l2_norm_bound, tf.float32),
|
||||||
|
|
|
@ -11,7 +11,6 @@
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
"""An interface for differentially private query mechanisms.
|
"""An interface for differentially private query mechanisms.
|
||||||
|
|
||||||
The DPQuery class abstracts the differential privacy mechanism needed by DP-SGD.
|
The DPQuery class abstracts the differential privacy mechanism needed by DP-SGD.
|
||||||
|
@ -100,18 +99,6 @@ class DPQuery(object):
|
||||||
|
|
||||||
__metaclass__ = abc.ABCMeta
|
__metaclass__ = abc.ABCMeta
|
||||||
|
|
||||||
def set_ledger(self, ledger):
|
|
||||||
"""Supplies privacy ledger to which the query can record privacy events.
|
|
||||||
|
|
||||||
The ledger should be updated with each call to get_noised_result.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
ledger: A `PrivacyLedger`.
|
|
||||||
"""
|
|
||||||
del ledger
|
|
||||||
raise TypeError(
|
|
||||||
'DPQuery type %s does not support set_ledger.' % type(self).__name__)
|
|
||||||
|
|
||||||
def initial_global_state(self):
|
def initial_global_state(self):
|
||||||
"""Returns the initial global state for the DPQuery.
|
"""Returns the initial global state for the DPQuery.
|
||||||
|
|
||||||
|
@ -155,7 +142,6 @@ class DPQuery(object):
|
||||||
as a template to create the initial sample state. It is assumed that the
|
as a template to create the initial sample state. It is assumed that the
|
||||||
leaves of the structure are python scalars or some type that has
|
leaves of the structure are python scalars or some type that has
|
||||||
properties `shape` and `dtype`.
|
properties `shape` and `dtype`.
|
||||||
|
|
||||||
Returns: An initial sample state.
|
Returns: An initial sample state.
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
@ -171,12 +157,12 @@ class DPQuery(object):
|
||||||
variables that are stored in self.
|
variables that are stored in self.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
params: The parameters for the sample. In standard DP-SGD training,
|
params: The parameters for the sample. In standard DP-SGD training, the
|
||||||
the clipping norm for the sample's microbatch gradients (i.e.,
|
clipping norm for the sample's microbatch gradients (i.e., a maximum
|
||||||
a maximum norm magnitude to which each gradient is clipped)
|
norm magnitude to which each gradient is clipped)
|
||||||
record: The record to be processed. In standard DP-SGD training,
|
record: The record to be processed. In standard DP-SGD training, the
|
||||||
the gradient computed for the examples in one microbatch, which
|
gradient computed for the examples in one microbatch, which may be the
|
||||||
may be the gradient for just one example (for size 1 microbatches).
|
gradient for just one example (for size 1 microbatches).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A structure of tensors to be aggregated.
|
A structure of tensors to be aggregated.
|
||||||
|
@ -185,8 +171,7 @@ class DPQuery(object):
|
||||||
return record
|
return record
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def accumulate_preprocessed_record(
|
def accumulate_preprocessed_record(self, sample_state, preprocessed_record):
|
||||||
self, sample_state, preprocessed_record):
|
|
||||||
"""Accumulates a single preprocessed record into the sample state.
|
"""Accumulates a single preprocessed record into the sample state.
|
||||||
|
|
||||||
This method is intended to only do simple aggregation, typically just a sum.
|
This method is intended to only do simple aggregation, typically just a sum.
|
||||||
|
@ -194,8 +179,8 @@ class DPQuery(object):
|
||||||
declaratively specify the type of aggregation required.
|
declaratively specify the type of aggregation required.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
sample_state: The current sample state. In standard DP-SGD training,
|
sample_state: The current sample state. In standard DP-SGD training, the
|
||||||
the accumulated sum of previous clipped microbatch gradients.
|
accumulated sum of previous clipped microbatch gradients.
|
||||||
preprocessed_record: The preprocessed record to accumulate.
|
preprocessed_record: The preprocessed record to accumulate.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
@ -211,22 +196,22 @@ class DPQuery(object):
|
||||||
functions run on a single device. Typically this will be a simple sum.
|
functions run on a single device. Typically this will be a simple sum.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
params: The parameters for the sample. In standard DP-SGD training,
|
params: The parameters for the sample. In standard DP-SGD training, the
|
||||||
the clipping norm for the sample's microbatch gradients (i.e.,
|
clipping norm for the sample's microbatch gradients (i.e., a maximum
|
||||||
a maximum norm magnitude to which each gradient is clipped)
|
norm magnitude to which each gradient is clipped)
|
||||||
sample_state: The current sample state. In standard DP-SGD training,
|
sample_state: The current sample state. In standard DP-SGD training, the
|
||||||
the accumulated sum of previous clipped microbatch gradients.
|
accumulated sum of previous clipped microbatch gradients.
|
||||||
record: The record to accumulate. In standard DP-SGD training,
|
record: The record to accumulate. In standard DP-SGD training, the
|
||||||
the gradient computed for the examples in one microbatch, which
|
gradient computed for the examples in one microbatch, which may be the
|
||||||
may be the gradient for just one example (for size 1 microbatches).
|
gradient for just one example (for size 1 microbatches).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The updated sample state. In standard DP-SGD training, the set of
|
The updated sample state. In standard DP-SGD training, the set of
|
||||||
previous microbatch gradients with the addition of the record argument.
|
previous microbatch gradients with the addition of the record argument.
|
||||||
"""
|
"""
|
||||||
preprocessed_record = self.preprocess_record(params, record)
|
preprocessed_record = self.preprocess_record(params, record)
|
||||||
return self.accumulate_preprocessed_record(
|
return self.accumulate_preprocessed_record(sample_state,
|
||||||
sample_state, preprocessed_record)
|
preprocessed_record)
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def merge_sample_states(self, sample_state_1, sample_state_2):
|
def merge_sample_states(self, sample_state_1, sample_state_2):
|
||||||
|
|
|
@ -47,10 +47,6 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery):
|
||||||
self._stddev = stddev
|
self._stddev = stddev
|
||||||
self._ledger = None
|
self._ledger = None
|
||||||
|
|
||||||
def set_ledger(self, ledger):
|
|
||||||
"""Implements `tensorflow_privacy.DPQuery.set_ledger`."""
|
|
||||||
self._ledger = ledger
|
|
||||||
|
|
||||||
def make_global_state(self, l2_norm_clip, stddev):
|
def make_global_state(self, l2_norm_clip, stddev):
|
||||||
"""Creates a global state from the given parameters."""
|
"""Creates a global state from the given parameters."""
|
||||||
return self._GlobalState(
|
return self._GlobalState(
|
||||||
|
|
|
@ -11,9 +11,7 @@
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
"""Implements DPQuery interface for queries over nested structures."""
|
||||||
"""Implements DPQuery interface for queries over nested structures.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
|
@ -60,16 +58,13 @@ class NestedQuery(dp_query.DPQuery):
|
||||||
|
|
||||||
def _map_to_queries(self, fn, *inputs, **kwargs):
|
def _map_to_queries(self, fn, *inputs, **kwargs):
|
||||||
"""Maps DPQuery methods to the subqueries."""
|
"""Maps DPQuery methods to the subqueries."""
|
||||||
|
|
||||||
def caller(query, *args):
|
def caller(query, *args):
|
||||||
return getattr(query, fn)(*args, **kwargs)
|
return getattr(query, fn)(*args, **kwargs)
|
||||||
|
|
||||||
return tree.map_structure_up_to(self._queries, caller, self._queries,
|
return tree.map_structure_up_to(self._queries, caller, self._queries,
|
||||||
*inputs)
|
*inputs)
|
||||||
|
|
||||||
def set_ledger(self, ledger):
|
|
||||||
"""Implements `tensorflow_privacy.DPQuery.set_ledger`."""
|
|
||||||
self._map_to_queries('set_ledger', ledger=ledger)
|
|
||||||
|
|
||||||
def initial_global_state(self):
|
def initial_global_state(self):
|
||||||
"""Implements `tensorflow_privacy.DPQuery.initial_global_state`."""
|
"""Implements `tensorflow_privacy.DPQuery.initial_global_state`."""
|
||||||
return self._map_to_queries('initial_global_state')
|
return self._map_to_queries('initial_global_state')
|
||||||
|
@ -89,18 +84,15 @@ class NestedQuery(dp_query.DPQuery):
|
||||||
"""Implements `tensorflow_privacy.DPQuery.preprocess_record`."""
|
"""Implements `tensorflow_privacy.DPQuery.preprocess_record`."""
|
||||||
return self._map_to_queries('preprocess_record', params, record)
|
return self._map_to_queries('preprocess_record', params, record)
|
||||||
|
|
||||||
def accumulate_preprocessed_record(
|
def accumulate_preprocessed_record(self, sample_state, preprocessed_record):
|
||||||
self, sample_state, preprocessed_record):
|
|
||||||
"""Implements `tensorflow_privacy.DPQuery.accumulate_preprocessed_record`."""
|
"""Implements `tensorflow_privacy.DPQuery.accumulate_preprocessed_record`."""
|
||||||
return self._map_to_queries(
|
return self._map_to_queries('accumulate_preprocessed_record', sample_state,
|
||||||
'accumulate_preprocessed_record',
|
preprocessed_record)
|
||||||
sample_state,
|
|
||||||
preprocessed_record)
|
|
||||||
|
|
||||||
def merge_sample_states(self, sample_state_1, sample_state_2):
|
def merge_sample_states(self, sample_state_1, sample_state_2):
|
||||||
"""Implements `tensorflow_privacy.DPQuery.merge_sample_states`."""
|
"""Implements `tensorflow_privacy.DPQuery.merge_sample_states`."""
|
||||||
return self._map_to_queries(
|
return self._map_to_queries('merge_sample_states', sample_state_1,
|
||||||
'merge_sample_states', sample_state_1, sample_state_2)
|
sample_state_2)
|
||||||
|
|
||||||
def get_noised_result(self, sample_state, global_state):
|
def get_noised_result(self, sample_state, global_state):
|
||||||
"""Implements `tensorflow_privacy.DPQuery.get_noised_result`."""
|
"""Implements `tensorflow_privacy.DPQuery.get_noised_result`."""
|
||||||
|
@ -118,12 +110,12 @@ class NestedQuery(dp_query.DPQuery):
|
||||||
|
|
||||||
def add_metrics(tuple_path, subquery, subquery_global_state):
|
def add_metrics(tuple_path, subquery, subquery_global_state):
|
||||||
metrics.update({
|
metrics.update({
|
||||||
'/'.join(str(s) for s in tuple_path + (name,)): metric
|
'/'.join(str(s) for s in tuple_path + (name,)): metric for name,
|
||||||
for name, metric
|
metric in subquery.derive_metrics(subquery_global_state).items()
|
||||||
in subquery.derive_metrics(subquery_global_state).items()})
|
})
|
||||||
|
|
||||||
tree.map_structure_with_path_up_to(
|
tree.map_structure_with_path_up_to(self._queries, add_metrics,
|
||||||
self._queries, add_metrics, self._queries, global_state)
|
self._queries, global_state)
|
||||||
|
|
||||||
return metrics
|
return metrics
|
||||||
|
|
||||||
|
@ -137,12 +129,13 @@ class NestedSumQuery(NestedQuery, dp_query.SumAggregationDPQuery):
|
||||||
Args:
|
Args:
|
||||||
queries: A nested structure of queries that must all be
|
queries: A nested structure of queries that must all be
|
||||||
SumAggregationDPQueries.
|
SumAggregationDPQueries.
|
||||||
|
|
||||||
Raises: TypeError if any of the subqueries are not SumAggregationDPQueries.
|
Raises: TypeError if any of the subqueries are not SumAggregationDPQueries.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def check(query):
|
def check(query):
|
||||||
if not isinstance(query, dp_query.SumAggregationDPQuery):
|
if not isinstance(query, dp_query.SumAggregationDPQuery):
|
||||||
raise TypeError('All subqueries must be SumAggregationDPQueries.')
|
raise TypeError('All subqueries must be SumAggregationDPQueries.')
|
||||||
|
|
||||||
tree.map_structure(check, queries)
|
tree.map_structure(check, queries)
|
||||||
|
|
||||||
super(NestedSumQuery, self).__init__(queries)
|
super(NestedSumQuery, self).__init__(queries)
|
||||||
|
|
|
@ -17,8 +17,6 @@ from __future__ import absolute_import
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
import warnings
|
|
||||||
|
|
||||||
import tensorflow.compat.v1 as tf
|
import tensorflow.compat.v1 as tf
|
||||||
|
|
||||||
from tensorflow_privacy.privacy.dp_query import dp_query
|
from tensorflow_privacy.privacy.dp_query import dp_query
|
||||||
|
@ -33,20 +31,11 @@ class NoPrivacySumQuery(dp_query.SumAggregationDPQuery):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._ledger = None
|
self._ledger = None
|
||||||
|
|
||||||
def set_ledger(self, ledger):
|
|
||||||
"""Implements `tensorflow_privacy.DPQuery.set_ledger`."""
|
|
||||||
warnings.warn(
|
|
||||||
'Attempt to use NoPrivacySumQuery with privacy ledger. Privacy '
|
|
||||||
'guarantees will be vacuous.')
|
|
||||||
self._ledger = ledger
|
|
||||||
|
|
||||||
def get_noised_result(self, sample_state, global_state):
|
def get_noised_result(self, sample_state, global_state):
|
||||||
"""Implements `tensorflow_privacy.DPQuery.get_noised_result`."""
|
"""Implements `tensorflow_privacy.DPQuery.get_noised_result`."""
|
||||||
|
|
||||||
if self._ledger:
|
if self._ledger:
|
||||||
dependencies = [
|
dependencies = [self._ledger.record_sum_query(float('inf'), 0.0)]
|
||||||
self._ledger.record_sum_query(float('inf'), 0.0)
|
|
||||||
]
|
|
||||||
else:
|
else:
|
||||||
dependencies = []
|
dependencies = []
|
||||||
|
|
||||||
|
@ -71,17 +60,10 @@ class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery):
|
||||||
"""Initializes the NoPrivacyAverageQuery."""
|
"""Initializes the NoPrivacyAverageQuery."""
|
||||||
self._ledger = None
|
self._ledger = None
|
||||||
|
|
||||||
def set_ledger(self, ledger):
|
|
||||||
"""Implements `tensorflow_privacy.DPQuery.set_ledger`."""
|
|
||||||
warnings.warn(
|
|
||||||
'Attempt to use NoPrivacyAverageQuery with privacy ledger. Privacy '
|
|
||||||
'guarantees will be vacuous.')
|
|
||||||
self._ledger = ledger
|
|
||||||
|
|
||||||
def initial_sample_state(self, template):
|
def initial_sample_state(self, template):
|
||||||
"""Implements `tensorflow_privacy.DPQuery.initial_sample_state`."""
|
"""Implements `tensorflow_privacy.DPQuery.initial_sample_state`."""
|
||||||
return (super(NoPrivacyAverageQuery, self).initial_sample_state(template),
|
return (super(NoPrivacyAverageQuery,
|
||||||
tf.constant(0.0))
|
self).initial_sample_state(template), tf.constant(0.0))
|
||||||
|
|
||||||
def preprocess_record(self, params, record, weight=1):
|
def preprocess_record(self, params, record, weight=1):
|
||||||
"""Implements `tensorflow_privacy.DPQuery.preprocess_record`.
|
"""Implements `tensorflow_privacy.DPQuery.preprocess_record`.
|
||||||
|
@ -122,9 +104,7 @@ class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery):
|
||||||
sum_state, denominator = sample_state
|
sum_state, denominator = sample_state
|
||||||
|
|
||||||
if self._ledger:
|
if self._ledger:
|
||||||
dependencies = [
|
dependencies = [self._ledger.record_sum_query(float('inf'), 0.0)]
|
||||||
self._ledger.record_sum_query(float('inf'), 0.0)
|
|
||||||
]
|
|
||||||
else:
|
else:
|
||||||
dependencies = []
|
dependencies = []
|
||||||
|
|
||||||
|
|
|
@ -11,9 +11,7 @@
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
"""Implements DPQuery interface for normalized queries."""
|
||||||
"""Implements DPQuery interface for normalized queries.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
|
@ -38,8 +36,8 @@ class NormalizedQuery(dp_query.SumAggregationDPQuery):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# pylint: disable=invalid-name
|
# pylint: disable=invalid-name
|
||||||
_GlobalState = collections.namedtuple(
|
_GlobalState = collections.namedtuple('_GlobalState',
|
||||||
'_GlobalState', ['numerator_state', 'denominator'])
|
['numerator_state', 'denominator'])
|
||||||
|
|
||||||
def __init__(self, numerator_query, denominator):
|
def __init__(self, numerator_query, denominator):
|
||||||
"""Initializes the NormalizedQuery.
|
"""Initializes the NormalizedQuery.
|
||||||
|
@ -55,15 +53,11 @@ class NormalizedQuery(dp_query.SumAggregationDPQuery):
|
||||||
|
|
||||||
assert isinstance(self._numerator, dp_query.SumAggregationDPQuery)
|
assert isinstance(self._numerator, dp_query.SumAggregationDPQuery)
|
||||||
|
|
||||||
def set_ledger(self, ledger):
|
|
||||||
"""Implements `tensorflow_privacy.DPQuery.set_ledger`."""
|
|
||||||
self._numerator.set_ledger(ledger)
|
|
||||||
|
|
||||||
def initial_global_state(self):
|
def initial_global_state(self):
|
||||||
"""Implements `tensorflow_privacy.DPQuery.initial_global_state`."""
|
"""Implements `tensorflow_privacy.DPQuery.initial_global_state`."""
|
||||||
denominator = tf.cast(self._denominator, tf.float32)
|
denominator = tf.cast(self._denominator, tf.float32)
|
||||||
return self._GlobalState(
|
return self._GlobalState(self._numerator.initial_global_state(),
|
||||||
self._numerator.initial_global_state(), denominator)
|
denominator)
|
||||||
|
|
||||||
def derive_sample_params(self, global_state):
|
def derive_sample_params(self, global_state):
|
||||||
"""Implements `tensorflow_privacy.DPQuery.derive_sample_params`."""
|
"""Implements `tensorflow_privacy.DPQuery.derive_sample_params`."""
|
||||||
|
@ -82,6 +76,7 @@ class NormalizedQuery(dp_query.SumAggregationDPQuery):
|
||||||
"""Implements `tensorflow_privacy.DPQuery.get_noised_result`."""
|
"""Implements `tensorflow_privacy.DPQuery.get_noised_result`."""
|
||||||
noised_sum, new_sum_global_state = self._numerator.get_noised_result(
|
noised_sum, new_sum_global_state = self._numerator.get_noised_result(
|
||||||
sample_state, global_state.numerator_state)
|
sample_state, global_state.numerator_state)
|
||||||
|
|
||||||
def normalize(v):
|
def normalize(v):
|
||||||
return tf.truediv(v, global_state.denominator)
|
return tf.truediv(v, global_state.denominator)
|
||||||
|
|
||||||
|
|
|
@ -91,11 +91,6 @@ class QuantileAdaptiveClipSumQuery(dp_query.SumAggregationDPQuery):
|
||||||
assert isinstance(self._quantile_estimator_query,
|
assert isinstance(self._quantile_estimator_query,
|
||||||
dp_query.SumAggregationDPQuery)
|
dp_query.SumAggregationDPQuery)
|
||||||
|
|
||||||
def set_ledger(self, ledger):
|
|
||||||
"""Implements `tensorflow_privacy.DPQuery.set_ledger`."""
|
|
||||||
self._sum_query.set_ledger(ledger)
|
|
||||||
self._quantile_estimator_query.set_ledger(ledger)
|
|
||||||
|
|
||||||
def initial_global_state(self):
|
def initial_global_state(self):
|
||||||
"""Implements `tensorflow_privacy.DPQuery.initial_global_state`."""
|
"""Implements `tensorflow_privacy.DPQuery.initial_global_state`."""
|
||||||
return self._GlobalState(
|
return self._GlobalState(
|
||||||
|
|
|
@ -22,7 +22,6 @@ from absl.testing import parameterized
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow.compat.v1 as tf
|
import tensorflow.compat.v1 as tf
|
||||||
|
|
||||||
from tensorflow_privacy.privacy.analysis import privacy_ledger
|
|
||||||
from tensorflow_privacy.privacy.dp_query import quantile_adaptive_clip_sum_query
|
from tensorflow_privacy.privacy.dp_query import quantile_adaptive_clip_sum_query
|
||||||
from tensorflow_privacy.privacy.dp_query import test_utils
|
from tensorflow_privacy.privacy.dp_query import test_utils
|
||||||
|
|
||||||
|
@ -291,53 +290,6 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase,
|
||||||
if t > 40:
|
if t > 40:
|
||||||
self.assertNear(actual_clip, 5.0, 0.5)
|
self.assertNear(actual_clip, 5.0, 0.5)
|
||||||
|
|
||||||
def test_ledger(self):
|
|
||||||
record1 = tf.constant([8.5])
|
|
||||||
record2 = tf.constant([-7.25])
|
|
||||||
|
|
||||||
population_size = tf.Variable(0)
|
|
||||||
selection_probability = tf.Variable(1.0)
|
|
||||||
|
|
||||||
query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery(
|
|
||||||
initial_l2_norm_clip=10.0,
|
|
||||||
noise_multiplier=1.0,
|
|
||||||
target_unclipped_quantile=0.0,
|
|
||||||
learning_rate=1.0,
|
|
||||||
clipped_count_stddev=0.0,
|
|
||||||
expected_num_records=2.0,
|
|
||||||
geometric_update=False)
|
|
||||||
|
|
||||||
query = privacy_ledger.QueryWithLedger(query, population_size,
|
|
||||||
selection_probability)
|
|
||||||
|
|
||||||
# First sample.
|
|
||||||
tf.assign(population_size, 10)
|
|
||||||
tf.assign(selection_probability, 0.1)
|
|
||||||
_, global_state = test_utils.run_query(query, [record1, record2])
|
|
||||||
|
|
||||||
expected_queries = [[10.0, 10.0], [0.5, 0.0]]
|
|
||||||
formatted = query.ledger.get_formatted_ledger_eager()
|
|
||||||
sample_1 = formatted[0]
|
|
||||||
self.assertAllClose(sample_1.population_size, 10.0)
|
|
||||||
self.assertAllClose(sample_1.selection_probability, 0.1)
|
|
||||||
self.assertAllClose(sample_1.queries, expected_queries)
|
|
||||||
|
|
||||||
# Second sample.
|
|
||||||
tf.assign(population_size, 20)
|
|
||||||
tf.assign(selection_probability, 0.2)
|
|
||||||
test_utils.run_query(query, [record1, record2], global_state)
|
|
||||||
|
|
||||||
formatted = query.ledger.get_formatted_ledger_eager()
|
|
||||||
sample_1, sample_2 = formatted
|
|
||||||
self.assertAllClose(sample_1.population_size, 10.0)
|
|
||||||
self.assertAllClose(sample_1.selection_probability, 0.1)
|
|
||||||
self.assertAllClose(sample_1.queries, expected_queries)
|
|
||||||
|
|
||||||
expected_queries_2 = [[9.0, 9.0], [0.5, 0.0]]
|
|
||||||
self.assertAllClose(sample_2.population_size, 20.0)
|
|
||||||
self.assertAllClose(sample_2.selection_probability, 0.2)
|
|
||||||
self.assertAllClose(sample_2.queries, expected_queries_2)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
tf.test.main()
|
tf.test.main()
|
||||||
|
|
|
@ -101,10 +101,6 @@ class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery):
|
||||||
l2_norm_clip=0.5, stddev=below_estimate_stddev),
|
l2_norm_clip=0.5, stddev=below_estimate_stddev),
|
||||||
denominator=expected_num_records)
|
denominator=expected_num_records)
|
||||||
|
|
||||||
def set_ledger(self, ledger):
|
|
||||||
"""Implements `tensorflow_privacy.DPQuery.set_ledger`."""
|
|
||||||
self._below_estimate_query.set_ledger(ledger)
|
|
||||||
|
|
||||||
def initial_global_state(self):
|
def initial_global_state(self):
|
||||||
"""Implements `tensorflow_privacy.DPQuery.initial_global_state`."""
|
"""Implements `tensorflow_privacy.DPQuery.initial_global_state`."""
|
||||||
return self._GlobalState(
|
return self._GlobalState(
|
||||||
|
|
|
@ -21,7 +21,6 @@ from absl import logging
|
||||||
|
|
||||||
import tensorflow.compat.v1 as tf
|
import tensorflow.compat.v1 as tf
|
||||||
|
|
||||||
from tensorflow_privacy.privacy.analysis import privacy_ledger
|
|
||||||
from tensorflow_privacy.privacy.dp_query import gaussian_query
|
from tensorflow_privacy.privacy.dp_query import gaussian_query
|
||||||
|
|
||||||
|
|
||||||
|
@ -166,8 +165,8 @@ def make_optimizer_class(cls):
|
||||||
sample_state = process_microbatch(idx, sample_state)
|
sample_state = process_microbatch(idx, sample_state)
|
||||||
|
|
||||||
grad_sums, self._global_state = (
|
grad_sums, self._global_state = (
|
||||||
self._dp_sum_query.get_noised_result(
|
self._dp_sum_query.get_noised_result(sample_state,
|
||||||
sample_state, self._global_state))
|
self._global_state))
|
||||||
|
|
||||||
def normalize(v):
|
def normalize(v):
|
||||||
return v / tf.cast(self._num_microbatches, tf.float32)
|
return v / tf.cast(self._num_microbatches, tf.float32)
|
||||||
|
@ -197,8 +196,8 @@ def make_optimizer_class(cls):
|
||||||
"""Process one microbatch (record) with privacy helper."""
|
"""Process one microbatch (record) with privacy helper."""
|
||||||
self_super = super(DPOptimizerClass, self)
|
self_super = super(DPOptimizerClass, self)
|
||||||
|
|
||||||
mean_loss = tf.reduce_mean(input_tensor=tf.gather(
|
mean_loss = tf.reduce_mean(
|
||||||
microbatches_losses, [i]))
|
input_tensor=tf.gather(microbatches_losses, [i]))
|
||||||
|
|
||||||
if hasattr(self_super, 'compute_gradients'):
|
if hasattr(self_super, 'compute_gradients'):
|
||||||
# This case covers optimizers in tf.train.
|
# This case covers optimizers in tf.train.
|
||||||
|
@ -208,8 +207,8 @@ def make_optimizer_class(cls):
|
||||||
compute_gradients_fn = self_super._compute_gradients # pylint: disable=protected-access
|
compute_gradients_fn = self_super._compute_gradients # pylint: disable=protected-access
|
||||||
|
|
||||||
grads, _ = zip(*compute_gradients_fn(
|
grads, _ = zip(*compute_gradients_fn(
|
||||||
mean_loss, var_list, gate_gradients,
|
mean_loss, var_list, gate_gradients, aggregation_method,
|
||||||
aggregation_method, colocate_gradients_with_ops, grad_loss))
|
colocate_gradients_with_ops, grad_loss))
|
||||||
grads_list = list(grads)
|
grads_list = list(grads)
|
||||||
|
|
||||||
sample_state = self._dp_sum_query.accumulate_record(
|
sample_state = self._dp_sum_query.accumulate_record(
|
||||||
|
@ -218,8 +217,8 @@ def make_optimizer_class(cls):
|
||||||
|
|
||||||
if var_list is None:
|
if var_list is None:
|
||||||
var_list = (
|
var_list = (
|
||||||
tf.trainable_variables() + tf.get_collection(
|
tf.trainable_variables() +
|
||||||
tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
|
tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
|
||||||
|
|
||||||
sample_state = self._dp_sum_query.initial_sample_state(var_list)
|
sample_state = self._dp_sum_query.initial_sample_state(var_list)
|
||||||
|
|
||||||
|
@ -237,8 +236,8 @@ def make_optimizer_class(cls):
|
||||||
cond=cond_fn, body=body_fn, loop_vars=[idx, sample_state])
|
cond=cond_fn, body=body_fn, loop_vars=[idx, sample_state])
|
||||||
|
|
||||||
grad_sums, self._global_state = (
|
grad_sums, self._global_state = (
|
||||||
self._dp_sum_query.get_noised_result(
|
self._dp_sum_query.get_noised_result(sample_state,
|
||||||
sample_state, self._global_state))
|
self._global_state))
|
||||||
|
|
||||||
def normalize(v):
|
def normalize(v):
|
||||||
try:
|
try:
|
||||||
|
@ -307,9 +306,7 @@ def make_gaussian_optimizer_class(cls):
|
||||||
```
|
```
|
||||||
|
|
||||||
""").format(
|
""").format(
|
||||||
'tf.compat.v1.train.' + cls.__name__,
|
'tf.compat.v1.train.' + cls.__name__, cls.__name__, cls.__name__,
|
||||||
cls.__name__,
|
|
||||||
cls.__name__,
|
|
||||||
'DP' + cls.__name__.replace('Optimizer', 'GaussianOptimizer'))
|
'DP' + cls.__name__.replace('Optimizer', 'GaussianOptimizer'))
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -317,7 +314,6 @@ def make_gaussian_optimizer_class(cls):
|
||||||
l2_norm_clip,
|
l2_norm_clip,
|
||||||
noise_multiplier,
|
noise_multiplier,
|
||||||
num_microbatches=None,
|
num_microbatches=None,
|
||||||
ledger=None,
|
|
||||||
unroll_microbatches=False,
|
unroll_microbatches=False,
|
||||||
*args, # pylint: disable=keyword-arg-before-vararg
|
*args, # pylint: disable=keyword-arg-before-vararg
|
||||||
**kwargs):
|
**kwargs):
|
||||||
|
@ -329,7 +325,6 @@ def make_gaussian_optimizer_class(cls):
|
||||||
num_microbatches: Number of microbatches into which each minibatch is
|
num_microbatches: Number of microbatches into which each minibatch is
|
||||||
split. If `None`, will default to the size of the minibatch, and
|
split. If `None`, will default to the size of the minibatch, and
|
||||||
per-example gradients will be computed.
|
per-example gradients will be computed.
|
||||||
ledger: Defaults to `None`. An instance of `tf_privacy.PrivacyLedger`.
|
|
||||||
unroll_microbatches: If true, processes microbatches within a Python
|
unroll_microbatches: If true, processes microbatches within a Python
|
||||||
loop instead of a `tf.while_loop`. Can be used if using a
|
loop instead of a `tf.while_loop`. Can be used if using a
|
||||||
`tf.while_loop` raises an exception.
|
`tf.while_loop` raises an exception.
|
||||||
|
@ -344,16 +339,9 @@ def make_gaussian_optimizer_class(cls):
|
||||||
dp_sum_query = gaussian_query.GaussianSumQuery(
|
dp_sum_query = gaussian_query.GaussianSumQuery(
|
||||||
l2_norm_clip, l2_norm_clip * noise_multiplier)
|
l2_norm_clip, l2_norm_clip * noise_multiplier)
|
||||||
|
|
||||||
if ledger:
|
super(DPGaussianOptimizerClass,
|
||||||
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query,
|
self).__init__(dp_sum_query, num_microbatches, unroll_microbatches,
|
||||||
ledger=ledger)
|
*args, **kwargs)
|
||||||
|
|
||||||
super(DPGaussianOptimizerClass, self).__init__(
|
|
||||||
dp_sum_query,
|
|
||||||
num_microbatches,
|
|
||||||
unroll_microbatches,
|
|
||||||
*args,
|
|
||||||
**kwargs)
|
|
||||||
|
|
||||||
def get_config(self):
|
def get_config(self):
|
||||||
"""Creates configuration for Keras serialization.
|
"""Creates configuration for Keras serialization.
|
||||||
|
@ -370,7 +358,8 @@ def make_gaussian_optimizer_class(cls):
|
||||||
config.update({
|
config.update({
|
||||||
'l2_norm_clip': self._l2_norm_clip,
|
'l2_norm_clip': self._l2_norm_clip,
|
||||||
'noise_multiplier': self._noise_multiplier,
|
'noise_multiplier': self._noise_multiplier,
|
||||||
'num_microbatches': self._num_microbatches})
|
'num_microbatches': self._num_microbatches
|
||||||
|
})
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
@ -380,6 +369,7 @@ def make_gaussian_optimizer_class(cls):
|
||||||
|
|
||||||
return DPGaussianOptimizerClass
|
return DPGaussianOptimizerClass
|
||||||
|
|
||||||
|
|
||||||
AdagradOptimizer = tf.train.AdagradOptimizer
|
AdagradOptimizer = tf.train.AdagradOptimizer
|
||||||
AdamOptimizer = tf.train.AdamOptimizer
|
AdamOptimizer = tf.train.AdamOptimizer
|
||||||
GradientDescentOptimizer = tf.train.GradientDescentOptimizer
|
GradientDescentOptimizer = tf.train.GradientDescentOptimizer
|
||||||
|
|
|
@ -22,7 +22,6 @@ import numpy as np
|
||||||
from six.moves import range
|
from six.moves import range
|
||||||
import tensorflow.compat.v1 as tf
|
import tensorflow.compat.v1 as tf
|
||||||
|
|
||||||
from tensorflow_privacy.privacy.analysis import privacy_ledger
|
|
||||||
from tensorflow_privacy.privacy.dp_query import gaussian_query
|
from tensorflow_privacy.privacy.dp_query import gaussian_query
|
||||||
from tensorflow_privacy.privacy.optimizers import dp_optimizer
|
from tensorflow_privacy.privacy.optimizers import dp_optimizer
|
||||||
|
|
||||||
|
@ -56,13 +55,9 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
||||||
|
|
||||||
dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
|
dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
|
||||||
dp_sum_query = privacy_ledger.QueryWithLedger(
|
|
||||||
dp_sum_query, 1e6, num_microbatches / 1e6)
|
|
||||||
|
|
||||||
opt = cls(
|
opt = cls(
|
||||||
dp_sum_query,
|
dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0)
|
||||||
num_microbatches=num_microbatches,
|
|
||||||
learning_rate=2.0)
|
|
||||||
|
|
||||||
self.evaluate(tf.global_variables_initializer())
|
self.evaluate(tf.global_variables_initializer())
|
||||||
# Fetch params to validate initial values
|
# Fetch params to validate initial values
|
||||||
|
@ -85,7 +80,6 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
|
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
|
||||||
|
|
||||||
dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
|
dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
|
||||||
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
|
|
||||||
|
|
||||||
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
|
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
|
||||||
|
|
||||||
|
@ -109,7 +103,6 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
data0 = tf.Variable([[0.0]])
|
data0 = tf.Variable([[0.0]])
|
||||||
|
|
||||||
dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0)
|
dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0)
|
||||||
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
|
|
||||||
|
|
||||||
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
|
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,6 @@ import numpy as np
|
||||||
from six.moves import range
|
from six.moves import range
|
||||||
import tensorflow.compat.v1 as tf
|
import tensorflow.compat.v1 as tf
|
||||||
|
|
||||||
from tensorflow_privacy.privacy.analysis import privacy_ledger
|
|
||||||
from tensorflow_privacy.privacy.dp_query import gaussian_query
|
from tensorflow_privacy.privacy.dp_query import gaussian_query
|
||||||
from tensorflow_privacy.privacy.optimizers import dp_optimizer
|
from tensorflow_privacy.privacy.optimizers import dp_optimizer
|
||||||
|
|
||||||
|
@ -51,9 +50,8 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
('DPAdam 2', dp_optimizer.DPAdamOptimizer, 2, [-2.5, -2.5]),
|
('DPAdam 2', dp_optimizer.DPAdamOptimizer, 2, [-2.5, -2.5]),
|
||||||
('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5]),
|
('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5]),
|
||||||
('DPRMSPropOptimizer 1', dp_optimizer.DPRMSPropOptimizer, 1,
|
('DPRMSPropOptimizer 1', dp_optimizer.DPRMSPropOptimizer, 1,
|
||||||
[-2.5, -2.5]),
|
[-2.5, -2.5]), ('DPRMSPropOptimizer 2', dp_optimizer.DPRMSPropOptimizer,
|
||||||
('DPRMSPropOptimizer 2', dp_optimizer.DPRMSPropOptimizer, 2,
|
2, [-2.5, -2.5]),
|
||||||
[-2.5, -2.5]),
|
|
||||||
('DPRMSPropOptimizer 4', dp_optimizer.DPRMSPropOptimizer, 4, [-2.5, -2.5])
|
('DPRMSPropOptimizer 4', dp_optimizer.DPRMSPropOptimizer, 4, [-2.5, -2.5])
|
||||||
)
|
)
|
||||||
def testBaseline(self, cls, num_microbatches, expected_answer):
|
def testBaseline(self, cls, num_microbatches, expected_answer):
|
||||||
|
@ -62,13 +60,9 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
||||||
|
|
||||||
dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
|
dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
|
||||||
dp_sum_query = privacy_ledger.QueryWithLedger(
|
|
||||||
dp_sum_query, 1e6, num_microbatches / 1e6)
|
|
||||||
|
|
||||||
opt = cls(
|
opt = cls(
|
||||||
dp_sum_query,
|
dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0)
|
||||||
num_microbatches=num_microbatches,
|
|
||||||
learning_rate=2.0)
|
|
||||||
|
|
||||||
self.evaluate(tf.global_variables_initializer())
|
self.evaluate(tf.global_variables_initializer())
|
||||||
# Fetch params to validate initial values
|
# Fetch params to validate initial values
|
||||||
|
@ -91,7 +85,6 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
|
data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])
|
||||||
|
|
||||||
dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
|
dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
|
||||||
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
|
|
||||||
|
|
||||||
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
|
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
|
||||||
|
|
||||||
|
@ -115,7 +108,6 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
data0 = tf.Variable([[0.0]])
|
data0 = tf.Variable([[0.0]])
|
||||||
|
|
||||||
dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0)
|
dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0)
|
||||||
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
|
|
||||||
|
|
||||||
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
|
opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0)
|
||||||
|
|
||||||
|
@ -157,11 +149,8 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
vector_loss = tf.math.squared_difference(labels, preds)
|
vector_loss = tf.math.squared_difference(labels, preds)
|
||||||
scalar_loss = tf.reduce_mean(input_tensor=vector_loss)
|
scalar_loss = tf.reduce_mean(input_tensor=vector_loss)
|
||||||
dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
|
dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0)
|
||||||
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6)
|
|
||||||
optimizer = dp_optimizer.DPGradientDescentOptimizer(
|
optimizer = dp_optimizer.DPGradientDescentOptimizer(
|
||||||
dp_sum_query,
|
dp_sum_query, num_microbatches=1, learning_rate=1.0)
|
||||||
num_microbatches=1,
|
|
||||||
learning_rate=1.0)
|
|
||||||
global_step = tf.train.get_global_step()
|
global_step = tf.train.get_global_step()
|
||||||
train_op = optimizer.minimize(loss=vector_loss, global_step=global_step)
|
train_op = optimizer.minimize(loss=vector_loss, global_step=global_step)
|
||||||
return tf.estimator.EstimatorSpec(
|
return tf.estimator.EstimatorSpec(
|
||||||
|
@ -201,8 +190,6 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
num_microbatches = 4
|
num_microbatches = 4
|
||||||
|
|
||||||
dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
|
dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
|
||||||
dp_sum_query = privacy_ledger.QueryWithLedger(
|
|
||||||
dp_sum_query, 1e6, num_microbatches / 1e6)
|
|
||||||
|
|
||||||
opt = cls(
|
opt = cls(
|
||||||
dp_sum_query,
|
dp_sum_query,
|
||||||
|
@ -283,8 +270,6 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
extra_variable = tf.Variable('foo', trainable=True, dtype=tf.string)
|
extra_variable = tf.Variable('foo', trainable=True, dtype=tf.string)
|
||||||
|
|
||||||
dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
|
dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0)
|
||||||
dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6,
|
|
||||||
num_microbatches / 1e6)
|
|
||||||
|
|
||||||
opt = cls(
|
opt = cls(
|
||||||
dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0)
|
dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0)
|
||||||
|
@ -298,27 +283,26 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
sess.run(minimize_op)
|
sess.run(minimize_op)
|
||||||
|
|
||||||
def _testWriteOutAndReload(self, optimizer_cls):
|
def _testWriteOutAndReload(self, optimizer_cls):
|
||||||
optimizer = optimizer_cls(l2_norm_clip=1.0,
|
optimizer = optimizer_cls(
|
||||||
noise_multiplier=0.01,
|
l2_norm_clip=1.0, noise_multiplier=0.01, num_microbatches=1)
|
||||||
num_microbatches=1)
|
|
||||||
|
|
||||||
test_dir = self.get_temp_dir()
|
test_dir = self.get_temp_dir()
|
||||||
model_path = os.path.join(test_dir, 'model')
|
model_path = os.path.join(test_dir, 'model')
|
||||||
|
|
||||||
model = tf.keras.Sequential([tf.keras.layers.InputLayer(input_shape=(1, 1)),
|
model = tf.keras.Sequential([
|
||||||
tf.keras.layers.Dense(units=1,
|
tf.keras.layers.InputLayer(input_shape=(1, 1)),
|
||||||
activation='softmax')])
|
tf.keras.layers.Dense(units=1, activation='softmax')
|
||||||
model.compile(optimizer=optimizer,
|
])
|
||||||
loss=tf.keras.losses.SparseCategoricalCrossentropy(
|
model.compile(
|
||||||
from_logits=True))
|
optimizer=optimizer,
|
||||||
|
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))
|
||||||
|
|
||||||
tf.keras.models.save_model(model, filepath=model_path,
|
tf.keras.models.save_model(
|
||||||
include_optimizer=True)
|
model, filepath=model_path, include_optimizer=True)
|
||||||
|
|
||||||
optimizer_cls_str = optimizer_cls.__name__
|
optimizer_cls_str = optimizer_cls.__name__
|
||||||
tf.keras.models.load_model(model_path,
|
tf.keras.models.load_model(
|
||||||
custom_objects={
|
model_path, custom_objects={optimizer_cls_str: optimizer_cls})
|
||||||
optimizer_cls_str: optimizer_cls})
|
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,6 @@
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
"""Training a language model (recurrent neural network) with DP-SGD optimizer.
|
"""Training a language model (recurrent neural network) with DP-SGD optimizer.
|
||||||
|
|
||||||
This tutorial uses a corpus of text from TensorFlow datasets unless a
|
This tutorial uses a corpus of text from TensorFlow datasets unless a
|
||||||
|
@ -44,7 +43,6 @@ import numpy as np
|
||||||
import tensorflow.compat.v1 as tf
|
import tensorflow.compat.v1 as tf
|
||||||
import tensorflow_datasets as tfds
|
import tensorflow_datasets as tfds
|
||||||
|
|
||||||
from tensorflow_privacy.privacy.analysis import privacy_ledger
|
|
||||||
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
|
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
|
||||||
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
|
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
|
||||||
from tensorflow_privacy.privacy.optimizers import dp_optimizer
|
from tensorflow_privacy.privacy.optimizers import dp_optimizer
|
||||||
|
@ -92,27 +90,20 @@ def rnn_model_fn(features, labels, mode): # pylint: disable=unused-argument
|
||||||
if mode == tf.estimator.ModeKeys.TRAIN:
|
if mode == tf.estimator.ModeKeys.TRAIN:
|
||||||
if FLAGS.dpsgd:
|
if FLAGS.dpsgd:
|
||||||
|
|
||||||
ledger = privacy_ledger.PrivacyLedger(
|
|
||||||
population_size=NB_TRAIN,
|
|
||||||
selection_probability=(FLAGS.batch_size / NB_TRAIN))
|
|
||||||
|
|
||||||
optimizer = dp_optimizer.DPAdamGaussianOptimizer(
|
optimizer = dp_optimizer.DPAdamGaussianOptimizer(
|
||||||
l2_norm_clip=FLAGS.l2_norm_clip,
|
l2_norm_clip=FLAGS.l2_norm_clip,
|
||||||
noise_multiplier=FLAGS.noise_multiplier,
|
noise_multiplier=FLAGS.noise_multiplier,
|
||||||
num_microbatches=FLAGS.microbatches,
|
num_microbatches=FLAGS.microbatches,
|
||||||
ledger=ledger,
|
|
||||||
learning_rate=FLAGS.learning_rate,
|
learning_rate=FLAGS.learning_rate,
|
||||||
unroll_microbatches=True)
|
unroll_microbatches=True)
|
||||||
opt_loss = vector_loss
|
opt_loss = vector_loss
|
||||||
else:
|
else:
|
||||||
optimizer = tf.train.AdamOptimizer(
|
optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
|
||||||
learning_rate=FLAGS.learning_rate)
|
|
||||||
opt_loss = scalar_loss
|
opt_loss = scalar_loss
|
||||||
global_step = tf.train.get_global_step()
|
global_step = tf.train.get_global_step()
|
||||||
train_op = optimizer.minimize(loss=opt_loss, global_step=global_step)
|
train_op = optimizer.minimize(loss=opt_loss, global_step=global_step)
|
||||||
return tf.estimator.EstimatorSpec(mode=mode,
|
return tf.estimator.EstimatorSpec(
|
||||||
loss=scalar_loss,
|
mode=mode, loss=scalar_loss, train_op=train_op)
|
||||||
train_op=train_op)
|
|
||||||
|
|
||||||
# Add evaluation metrics (for EVAL mode).
|
# Add evaluation metrics (for EVAL mode).
|
||||||
elif mode == tf.estimator.ModeKeys.EVAL:
|
elif mode == tf.estimator.ModeKeys.EVAL:
|
||||||
|
@ -122,9 +113,8 @@ def rnn_model_fn(features, labels, mode): # pylint: disable=unused-argument
|
||||||
labels=tf.cast(x[:, 1:], dtype=tf.int32),
|
labels=tf.cast(x[:, 1:], dtype=tf.int32),
|
||||||
predictions=tf.argmax(input=logits, axis=2))
|
predictions=tf.argmax(input=logits, axis=2))
|
||||||
}
|
}
|
||||||
return tf.estimator.EstimatorSpec(mode=mode,
|
return tf.estimator.EstimatorSpec(
|
||||||
loss=scalar_loss,
|
mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
|
||||||
eval_metric_ops=eval_metric_ops)
|
|
||||||
|
|
||||||
|
|
||||||
def load_data():
|
def load_data():
|
||||||
|
@ -132,13 +122,13 @@ def load_data():
|
||||||
if not FLAGS.data_dir:
|
if not FLAGS.data_dir:
|
||||||
print('FLAGS.data_dir containing train.txt and test.txt was not specified, '
|
print('FLAGS.data_dir containing train.txt and test.txt was not specified, '
|
||||||
'using a substitute dataset from the tensorflow_datasets module.')
|
'using a substitute dataset from the tensorflow_datasets module.')
|
||||||
train_dataset = tfds.load(name='lm1b/subwords8k',
|
train_dataset = tfds.load(
|
||||||
split=tfds.Split.TRAIN,
|
name='lm1b/subwords8k',
|
||||||
batch_size=NB_TRAIN,
|
split=tfds.Split.TRAIN,
|
||||||
shuffle_files=True)
|
batch_size=NB_TRAIN,
|
||||||
test_dataset = tfds.load(name='lm1b/subwords8k',
|
shuffle_files=True)
|
||||||
split=tfds.Split.TEST,
|
test_dataset = tfds.load(
|
||||||
batch_size=10000)
|
name='lm1b/subwords8k', split=tfds.Split.TEST, batch_size=10000)
|
||||||
train_data = next(iter(tfds.as_numpy(train_dataset)))
|
train_data = next(iter(tfds.as_numpy(train_dataset)))
|
||||||
test_data = next(iter(tfds.as_numpy(test_dataset)))
|
test_data = next(iter(tfds.as_numpy(test_dataset)))
|
||||||
train_data = train_data['text'].flatten()
|
train_data = train_data['text'].flatten()
|
||||||
|
@ -162,10 +152,11 @@ def compute_epsilon(steps):
|
||||||
return float('inf')
|
return float('inf')
|
||||||
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
|
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
|
||||||
sampling_probability = FLAGS.batch_size / NB_TRAIN
|
sampling_probability = FLAGS.batch_size / NB_TRAIN
|
||||||
rdp = compute_rdp(q=sampling_probability,
|
rdp = compute_rdp(
|
||||||
noise_multiplier=FLAGS.noise_multiplier,
|
q=sampling_probability,
|
||||||
steps=steps,
|
noise_multiplier=FLAGS.noise_multiplier,
|
||||||
orders=orders)
|
steps=steps,
|
||||||
|
orders=orders)
|
||||||
# Delta is set to 1e-5 because Penn TreeBank has 60000 training points.
|
# Delta is set to 1e-5 because Penn TreeBank has 60000 training points.
|
||||||
return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
|
return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
|
||||||
|
|
||||||
|
@ -180,9 +171,8 @@ def main(unused_argv):
|
||||||
|
|
||||||
# Instantiate the tf.Estimator.
|
# Instantiate the tf.Estimator.
|
||||||
conf = tf.estimator.RunConfig(save_summary_steps=1000)
|
conf = tf.estimator.RunConfig(save_summary_steps=1000)
|
||||||
lm_classifier = tf.estimator.Estimator(model_fn=rnn_model_fn,
|
lm_classifier = tf.estimator.Estimator(
|
||||||
model_dir=FLAGS.model_dir,
|
model_fn=rnn_model_fn, model_dir=FLAGS.model_dir, config=conf)
|
||||||
config=conf)
|
|
||||||
|
|
||||||
# Create tf.Estimator input functions for the training and test data.
|
# Create tf.Estimator input functions for the training and test data.
|
||||||
batch_len = FLAGS.batch_size * SEQ_LEN
|
batch_len = FLAGS.batch_size * SEQ_LEN
|
||||||
|
@ -221,5 +211,6 @@ def main(unused_argv):
|
||||||
else:
|
else:
|
||||||
print('Trained with vanilla non-private SGD optimizer')
|
print('Trained with vanilla non-private SGD optimizer')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
app.run(main)
|
app.run(main)
|
||||||
|
|
Loading…
Reference in a new issue