diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index f775d80..cfd5344 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -43,6 +43,7 @@ else: # DPQuery classes from tensorflow_privacy.privacy.dp_query.dp_query import DPQuery from tensorflow_privacy.privacy.dp_query.dp_query import SumAggregationDPQuery + from tensorflow_privacy.privacy.dp_query.discrete_gaussian_query import DiscreteGaussianSumQuery from tensorflow_privacy.privacy.dp_query.distributed_discrete_gaussian_query import DistributedDiscreteGaussianSumQuery from tensorflow_privacy.privacy.dp_query.gaussian_query import GaussianSumQuery from tensorflow_privacy.privacy.dp_query.nested_query import NestedQuery diff --git a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query.py b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query.py new file mode 100644 index 0000000..444489b --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query.py @@ -0,0 +1,89 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Implements DPQuery interface for discrete Gaussian mechanism.""" + +import collections + +import tensorflow as tf +from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils +from tensorflow_privacy.privacy.dp_query import dp_query + + +class DiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery): + """Implements DPQuery for discrete Gaussian sum queries. + + For each local record, we check the L2 norm bound and add discrete Gaussian + noise. In particular, this DPQuery does not perform L2 norm clipping and the + norms of the input records are expected to be bounded. + """ + + # pylint: disable=invalid-name + _GlobalState = collections.namedtuple('_GlobalState', + ['l2_norm_bound', 'stddev']) + + # pylint: disable=invalid-name + _SampleParams = collections.namedtuple('_SampleParams', + ['l2_norm_bound', 'stddev']) + + def __init__(self, l2_norm_bound, stddev): + """Initializes the DiscreteGaussianSumQuery. + + Args: + l2_norm_bound: The L2 norm bound to verify for each record. + stddev: The stddev of the discrete Gaussian noise added to the sum. + """ + self._l2_norm_bound = l2_norm_bound + self._stddev = stddev + + def set_ledger(self, ledger): + del ledger # Unused. + raise NotImplementedError('Ledger has not yet been implemented for' + 'DiscreteGaussianSumQuery!') + + def initial_global_state(self): + return self._GlobalState( + tf.cast(self._l2_norm_bound, tf.float32), + tf.cast(self._stddev, tf.float32)) + + def derive_sample_params(self, global_state): + return self._SampleParams(global_state.l2_norm_bound, global_state.stddev) + + def preprocess_record(self, params, record): + """Check record norm and add noise to the record.""" + record_as_list = tf.nest.flatten(record) + record_as_float_list = [tf.cast(x, tf.float32) for x in record_as_list] + tf.nest.map_structure(lambda x: tf.compat.v1.assert_type(x, tf.int32), + record_as_list) + dependencies = [ + tf.compat.v1.assert_less_equal( + tf.linalg.global_norm(record_as_float_list), + params.l2_norm_bound, + message=f'Global L2 norm exceeds {params.l2_norm_bound}.') + ] + with tf.control_dependencies(dependencies): + return tf.nest.map_structure(tf.identity, record) + + def get_noised_result(self, sample_state, global_state): + """Adds discrete Gaussian noise to the aggregate.""" + # Round up the noise as the TF discrete Gaussian sampler only takes + # integer noise stddevs for now. + ceil_stddev = tf.cast(tf.math.ceil(global_state.stddev), tf.int32) + + def add_noise(v): + noised_v = v + discrete_gaussian_utils.sample_discrete_gaussian( + scale=ceil_stddev, shape=tf.shape(v), dtype=v.dtype) + # Ensure shape as TF shape inference may fail due to custom noise sampler. + return tf.ensure_shape(noised_v, v.shape) + + return tf.nest.map_structure(add_noise, sample_state), global_state diff --git a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query_test.py b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query_test.py new file mode 100644 index 0000000..fc14e7c --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query_test.py @@ -0,0 +1,148 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for DiscreteGaussianSumQuery.""" + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf +from tensorflow_privacy.privacy.dp_query import discrete_gaussian_query +from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils +from tensorflow_privacy.privacy.dp_query import test_utils + +dg_sum_query = discrete_gaussian_query.DiscreteGaussianSumQuery + + +def silence_tf_error_messages(func): + """Decorator that temporarily changes the TF logging levels.""" + + def wrapper(*args, **kwargs): + cur_verbosity = tf.compat.v1.logging.get_verbosity() + tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.FATAL) + func(*args, **kwargs) + tf.compat.v1.logging.set_verbosity(cur_verbosity) # Reset verbosity. + + return wrapper + + +class DiscreteGaussianQueryTest(tf.test.TestCase, parameterized.TestCase): + + def test_sum_no_noise(self): + with self.cached_session() as sess: + record1 = tf.constant([2, 0], dtype=tf.int32) + record2 = tf.constant([-1, 1], dtype=tf.int32) + + query = dg_sum_query(l2_norm_bound=10, stddev=0.0) + query_result, _ = test_utils.run_query(query, [record1, record2]) + result = sess.run(query_result) + expected = [1, 1] + self.assertAllEqual(result, expected) + + @parameterized.product(sample_size=[1, 3]) + def test_sum_multiple_shapes(self, sample_size): + with self.cached_session() as sess: + t1 = tf.constant([2, 0], dtype=tf.int32) + t2 = tf.constant([-1, 1, 3], dtype=tf.int32) + t3 = tf.constant([-2], dtype=tf.int32) + record = [t1, t2, t3] + sample = [record] * sample_size + + query = dg_sum_query(l2_norm_bound=10, stddev=0.0) + query_result, _ = test_utils.run_query(query, sample) + expected = [sample_size * t1, sample_size * t2, sample_size * t3] + result, expected = sess.run([query_result, expected]) + # Use `assertAllClose` for nested structures equality (with tolerance=0). + self.assertAllClose(result, expected, atol=0) + + @parameterized.product(sample_size=[1, 3]) + def test_sum_nested_record_structure(self, sample_size): + with self.cached_session() as sess: + t1 = tf.constant([1, 0], dtype=tf.int32) + t2 = tf.constant([1, 1, 1], dtype=tf.int32) + t3 = tf.constant([1], dtype=tf.int32) + t4 = tf.constant([[1, 1], [1, 1]], dtype=tf.int32) + record = [t1, dict(a=t2, b=[t3, (t4, t1)])] + sample = [record] * sample_size + + query = dg_sum_query(l2_norm_bound=10, stddev=0.0) + query_result, _ = test_utils.run_query(query, sample) + result = sess.run(query_result) + + s = sample_size + expected = [t1 * s, dict(a=t2 * s, b=[t3 * s, (t4 * s, t1 * s)])] + # Use `assertAllClose` for nested structures equality (with tolerance=0) + self.assertAllClose(result, expected, atol=0) + + def test_sum_raise_on_float_inputs(self): + with self.cached_session() as sess: + record1 = tf.constant([2, 0], dtype=tf.float32) + record2 = tf.constant([-1, 1], dtype=tf.float32) + query = dg_sum_query(l2_norm_bound=10, stddev=0.0) + + with self.assertRaises(TypeError): + query_result, _ = test_utils.run_query(query, [record1, record2]) + sess.run(query_result) + + @parameterized.product(l2_norm_bound=[0, 3, 10, 14.1]) + @silence_tf_error_messages + def test_sum_raise_on_l2_norm_excess(self, l2_norm_bound): + with self.cached_session() as sess: + record = tf.constant([10, 10], dtype=tf.int32) + query = dg_sum_query(l2_norm_bound=l2_norm_bound, stddev=0.0) + + with self.assertRaises(tf.errors.InvalidArgumentError): + query_result, _ = test_utils.run_query(query, [record]) + sess.run(query_result) + + def test_sum_float_norm_not_rounded(self): + """Test that the float L2 norm bound doesn't get rounded/casted to integers.""" + with self.cached_session() as sess: + # A casted/rounded norm bound would be insufficient. + l2_norm_bound = 14.2 + record = tf.constant([10, 10], dtype=tf.int32) + query = dg_sum_query(l2_norm_bound=l2_norm_bound, stddev=0.0) + query_result, _ = test_utils.run_query(query, [record]) + result = sess.run(query_result) + expected = [10, 10] + self.assertAllEqual(result, expected) + + @parameterized.product(stddev=[10, 100, 1000]) + def test_noisy_sum(self, stddev): + num_trials = 1000 + record_1 = tf.zeros([num_trials], dtype=tf.int32) + record_2 = tf.ones([num_trials], dtype=tf.int32) + sample = [record_1, record_2] + query = dg_sum_query(l2_norm_bound=num_trials, stddev=stddev) + result, _ = test_utils.run_query(query, sample) + + sampled_noise = discrete_gaussian_utils.sample_discrete_gaussian( + scale=tf.cast(stddev, tf.int32), shape=[num_trials], dtype=tf.int32) + + result, sampled_noise = self.evaluate([result, sampled_noise]) + + # The standard error of the stddev should be roughly sigma / sqrt(2N - 2), + # (https://stats.stackexchange.com/questions/156518) so set a rtol to give + # < 0.01% of failure (within ~4 standard errors). + rtol = 4 / np.sqrt(2 * num_trials - 2) + self.assertAllClose(np.std(result), stddev, rtol=rtol) + + # Use standard error of the mean to compare percentiles. + stderr = stddev / np.sqrt(num_trials) + self.assertAllClose( + np.percentile(result, [25, 50, 75]), + np.percentile(sampled_noise, [25, 50, 75]), + atol=4 * stderr) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py index 5b450ee..8dd4dba 100644 --- a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py +++ b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py @@ -41,7 +41,7 @@ class DistributedDiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery): Args: l2_norm_bound: The L2 norm bound to verify for each record. - local_stddev: The scale/stddev of the local discrete Gaussian noise. + local_stddev: The stddev of the local discrete Gaussian noise. """ self._l2_norm_bound = l2_norm_bound self._local_stddev = local_stddev @@ -65,7 +65,7 @@ class DistributedDiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery): Args: record: The record to which we generate and add local noise. - local_stddev: The scale/stddev of the local discrete Gaussian noise. + local_stddev: The stddev of the local discrete Gaussian noise. shares: Number of shares of local noise to generate. Should be 1 for each record. This can be useful when we want to generate multiple noise shares at once. @@ -84,7 +84,7 @@ class DistributedDiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery): scale=ceil_local_stddev, shape=shape, dtype=v.dtype) # Sum across the number of noise shares and add it. noised_v = v + tf.reduce_sum(dgauss_noise, axis=0) - # Ensure shape as TF shape inference may fail due to custom noise sampler. + # Set shape as TF shape inference may fail due to custom noise sampler. noised_v.set_shape(v.shape.as_list()) return noised_v diff --git a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py index b2f6051..1c1a461 100644 --- a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for DistributedDiscreteGaussianQuery.""" +"""Tests for DistributedDiscreteGaussianSumQuery.""" from absl.testing import parameterized import numpy as np