From e7e11d14d9edde33805ce1270267742708105556 Mon Sep 17 00:00:00 2001 From: Ken Liu Date: Tue, 27 Jul 2021 17:17:53 -0700 Subject: [PATCH] Adds discrete Gaussian (sampler and distributed DPQuery) to public TF Privacy. PiperOrigin-RevId: 387232449 --- tensorflow_privacy/__init__.py | 1 + .../dp_query/discrete_gaussian_utils.py | 142 +++++++++ .../dp_query/discrete_gaussian_utils_test.py | 275 ++++++++++++++++++ .../distributed_discrete_gaussian_query.py | 114 ++++++++ ...istributed_discrete_gaussian_query_test.py | 165 +++++++++++ 5 files changed, 697 insertions(+) create mode 100644 tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils.py create mode 100644 tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils_test.py create mode 100644 tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py create mode 100644 tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index bf0e8e4..f775d80 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -43,6 +43,7 @@ else: # DPQuery classes from tensorflow_privacy.privacy.dp_query.dp_query import DPQuery from tensorflow_privacy.privacy.dp_query.dp_query import SumAggregationDPQuery + from tensorflow_privacy.privacy.dp_query.distributed_discrete_gaussian_query import DistributedDiscreteGaussianSumQuery from tensorflow_privacy.privacy.dp_query.gaussian_query import GaussianSumQuery from tensorflow_privacy.privacy.dp_query.nested_query import NestedQuery from tensorflow_privacy.privacy.dp_query.no_privacy_query import NoPrivacyAverageQuery diff --git a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils.py b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils.py new file mode 100644 index 0000000..ea0a663 --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils.py @@ -0,0 +1,142 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Util functions for drawing discrete Gaussian samples. + +The following functions implement a vectorized TF version of the sampling +algorithm described in the paper: + +The Discrete Gaussian for Differential Privacy +https://arxiv.org/pdf/2004.00010.pdf + +Note that the exact sampling implementation should use integer and fractional +parameters only. Here, we relax this constraint a bit and use vectorized +implementations of Bernoulli and discrete Laplace sampling that can take float +parameters. +""" + +import tensorflow as tf +import tensorflow_probability as tf_prob + + +def _sample_discrete_laplace(t, shape): + """Sample from discrete Laplace with scale t. + + This method is based on the observation that sampling from Z ~ Lap(t) is + equivalent to sampling X, Y independently from Geo(1 - exp(-1/t)) and take + Z = X - Y. + + Note also that tensorflow_probability's geometric sampler is based on floating + operations and may possibly be inexact. + + Args: + t: The scale of the discrete Laplace distribution. + shape: The tensor shape of the tensors drawn. + + Returns: + A tensor of the specified shape filled with random values. + """ + geometric_probs = 1.0 - tf.exp(-1.0 / tf.cast(t, tf.float64)) + sampler = tf_prob.distributions.Geometric(probs=geometric_probs) + return tf.cast(sampler.sample(shape) - sampler.sample(shape), tf.int64) + + +def _sample_bernoulli(p): + """Sample from Bernoulli(p).""" + return tf_prob.distributions.Bernoulli(probs=p, dtype=tf.int64).sample() + + +def _check_input_args(scale, shape, dtype): + """Checks the input args to the discrete Gaussian sampler.""" + if tf.as_dtype(dtype) not in (tf.int32, tf.int64): + raise ValueError( + f'Only tf.int32 and tf.int64 are supported. Found dtype `{dtype}`.') + + checks = [ + tf.compat.v1.assert_non_negative(scale), + tf.compat.v1.assert_integer(scale) + ] + with tf.control_dependencies(checks): + return tf.identity(scale), shape, dtype + + +def _int_square(value): + """Avoids the TF op `Square(T=...)` for ints as sampling can happen on clients.""" + return (value - 1) * (value + 1) + 1 + + +@tf.function +def _sample_discrete_gaussian_helper(scale, shape, dtype): + """Draw samples from discrete Gaussian, assuming scale >= 0.""" + scale = tf.cast(scale, tf.int64) + sq_scale = _int_square(scale) + + # Scale for discrete Laplace. The sampling algorithm should be correct + # for any discrete Laplace scale, and the original paper uses + # `dlap_scale = floor(scale) + 1`. Here we use `dlap_scale = scale` (where + # input `scale` is restricted to integers >= 1) to simplify the fraction + # below. It turns out that for integer scales >= 1, `dlap_scale = scale` gives + # a good minimum success rate of ~70%, allowing a small oversampling factor. + dlap_scale = scale + oversample_factor = 1.5 + + # Draw at least some samples in case we got unlucky with small input shape. + min_n = 1000 + target_n = tf.reduce_prod(tf.cast(shape, tf.int64)) + oversample_n = oversample_factor * tf.cast(target_n, tf.float32) + draw_n = tf.maximum(min_n, tf.cast(oversample_n, tf.int32)) + + accepted_n = tf.constant(0, dtype=target_n.dtype) + result = tf.zeros((0,), dtype=tf.int64) + + while accepted_n < target_n: + # Since the number of samples could be different in every retry, we need to + # manually specify the shape info for TF. + tf.autograph.experimental.set_loop_options( + shape_invariants=[(result, tf.TensorShape([None]))]) + # Draw samples. + samples = _sample_discrete_laplace(dlap_scale, shape=(draw_n,)) + z_numer = _int_square(tf.abs(samples) - scale) + z_denom = 2 * sq_scale + bern_probs = tf.exp(-1.0 * tf.divide(z_numer, z_denom)) + accept = _sample_bernoulli(bern_probs) + # Keep successful samples and increment counter. + accepted_samples = samples[tf.equal(accept, 1)] + accepted_n += tf.cast(tf.size(accepted_samples), accepted_n.dtype) + result = tf.concat([result, accepted_samples], axis=0) + # Reduce the number of draws for any retries. + draw_n = tf.cast(target_n - accepted_n, tf.float32) * oversample_factor + draw_n = tf.maximum(min_n, tf.cast(draw_n, tf.int32)) + + return tf.cast(tf.reshape(result[:target_n], shape), dtype) + + +def sample_discrete_gaussian(scale, shape, dtype=tf.int32): + """Draws (possibly inexact) samples from the discrete Gaussian distribution. + + We relax some integer constraints to use vectorized implementations of + Bernoulli and discrete Laplace sampling. Integer operations are done in + tf.int64 as TF does not have direct support for fractions. + + Args: + scale: The scale of the discrete Gaussian distribution. + shape: The shape of the output tensor. + dtype: The type of the output. + + Returns: + A tensor of the specified shape filled with random values. + """ + scale, shape, dtype = _check_input_args(scale, shape, dtype) + return tf.cond( + tf.equal(scale, 0), lambda: tf.zeros(shape, dtype), + lambda: _sample_discrete_gaussian_helper(scale, shape, dtype)) diff --git a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils_test.py b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils_test.py new file mode 100644 index 0000000..185a649 --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils_test.py @@ -0,0 +1,275 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for discrete_gaussian_utils.""" + +import collections +import fractions +import math +import random + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf +from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils + +EXACT_SAMPLER_SEED = 4242 + + +class DiscreteGaussianUtilsTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.product(dtype=[tf.bool, tf.float32, tf.float64]) + def test_raise_on_bad_dtype(self, dtype): + with self.assertRaises(ValueError): + _ = discrete_gaussian_utils.sample_discrete_gaussian(1, (1,), dtype) + + def test_raise_on_negative_scale(self): + with self.assertRaises(tf.errors.InvalidArgumentError): + _ = discrete_gaussian_utils.sample_discrete_gaussian(-10, (1,)) + + def test_raise_on_float_scale(self): + with self.assertRaises(TypeError): + _ = discrete_gaussian_utils.sample_discrete_gaussian(3.14, (1,)) + + @parameterized.product(shape=[(), (1,), (100,), (2, 2), (3, 3, 3), + (4, 1, 1, 1)]) + def test_shapes(self, shape): + samples = discrete_gaussian_utils.sample_discrete_gaussian(100, shape) + samples = self.evaluate(samples) + self.assertAllEqual(samples.shape, shape) + + @parameterized.product(dtype=[tf.int32, tf.int64]) + def test_dtypes(self, dtype): + samples = discrete_gaussian_utils.sample_discrete_gaussian(1, (10,), dtype) + samples = self.evaluate(samples) + # Convert output np dtypes to tf dtypes. + self.assertEqual(tf.as_dtype(samples.dtype), dtype) + + def test_zero_noise(self): + scale = 0 + shape = (100,) + dtype = tf.int32 + samples = discrete_gaussian_utils.sample_discrete_gaussian( + scale, shape, dtype=dtype) + samples = self.evaluate(samples) + self.assertAllEqual(samples, tf.zeros(shape, dtype=dtype)) + + @parameterized.named_parameters([('small_scale_small_n', 10, 2000, 1, 2), + ('small_scale_large_n', 10, 5000, 1, 1), + ('large_scale_small_n', 50, 2000, 2, 5), + ('large_scale_large_n', 50, 5000, 2, 3)]) + def test_match_exact_sampler(self, scale, num_samples, mean_std_atol, + percentile_atol): + true_samples = exact_sampler(scale, num_samples) + drawn_samples = discrete_gaussian_utils.sample_discrete_gaussian( + scale=scale, shape=(num_samples,)) + drawn_samples = self.evaluate(drawn_samples) + + # Check mean, std, and percentiles. + self.assertAllClose( + np.mean(true_samples), np.mean(drawn_samples), atol=mean_std_atol) + self.assertAllClose( + np.std(true_samples), np.std(drawn_samples), atol=mean_std_atol) + self.assertAllClose( + np.percentile(true_samples, [10, 30, 50, 70, 90]), + np.percentile(drawn_samples, [10, 30, 50, 70, 90]), + atol=percentile_atol) + + @parameterized.named_parameters([('n_1000', 1000, 5e-2), + ('n_10000', 10000, 5e-3)]) + def test_kl_divergence(self, num_samples, kl_tolerance): + """Compute KL divergence betwen empirical & true distribution.""" + scale = 10 + sq_sigma = scale * scale + drawn_samples = discrete_gaussian_utils.sample_discrete_gaussian( + scale=scale, shape=(num_samples,)) + drawn_samples = self.evaluate(drawn_samples) + value_counts = collections.Counter(drawn_samples) + + kl = 0 + norm_const = dgauss_normalizing_constant(sq_sigma) + + for value, count in value_counts.items(): + kl += count * ( + math.log(count * norm_const / num_samples) + value * value / + (2.0 * sq_sigma)) + + kl /= num_samples + self.assertLess(kl, kl_tolerance) + + +def exact_sampler(scale, num_samples, seed=EXACT_SAMPLER_SEED): + """Implementation of the exact discrete gaussian distribution sampler. + + Source: https://arxiv.org/pdf/2004.00010.pdf. + + Args: + scale: The scale of the discrete Gaussian. + num_samples: The number of samples to generate. + seed: The seed for the random number generator to reproduce samples. + + Returns: + A numpy array of discrete Gaussian samples. + """ + + def randrange(a, rng): + return rng.randrange(a) + + def bern_em1(rng): + """Sample from Bernoulli(exp(-1)).""" + k = 2 + while True: + if randrange(k, rng) == 0: # if Bernoulli(1/k)==1 + k = k + 1 + else: + return k % 2 + + def bern_emab1(a, b, rng): + """Sample from Bernoulli(exp(-a/b)), assuming 0 <= a <= b.""" + assert isinstance(a, int) + assert isinstance(b, int) + assert 0 <= a <= b + k = 1 + while True: + if randrange(b, rng) < a and randrange(k, rng) == 0: # if Bern(a/b/k)==1 + k = k + 1 + else: + return k % 2 + + def bern_emab(a, b, rng): + """Sample from Bernoulli(exp(-a/b)), allowing a > b.""" + while a > b: + if bern_em1(rng) == 0: + return 0 + a = a - b + return bern_emab1(a, b, rng) + + def geometric(t, rng): + """Sample from geometric(1-exp(-1/t)).""" + assert isinstance(t, int) + assert t > 0 + while True: + u = randrange(t, rng) + if bern_emab1(u, t, rng) == 1: + while bern_em1(rng) == 1: + u = u + t + return u + + def dlap(t, rng): + """Sample from discrete Laplace with scale t. + + Pr[x] = exp(-|x|/t) * (exp(1/t)-1)/(exp(1/t)+1). Supported on integers. + + Args: + t: The scale. + rng: The random number generator. + + Returns: + A discrete Laplace sample. + """ + assert isinstance(t, int) + assert t > 0 + while True: + u = geometric(t, rng) + b = randrange(2, rng) + if b == 1: + return u + elif u > 0: + return -u + + def floorsqrt(x): + """Compute floor(sqrt(x)) exactly.""" + assert x >= 0 + a = 0 # maintain a^2<=x. + b = 1 # maintain b^2>x. + while b * b <= x: + b = 2 * b + # Do binary search. + while a + 1 < b: + c = (a + b) // 2 + if c * c <= x: + a = c + else: + b = c + return a + + def dgauss(ss, num, rng): + """Sample from discrete Gaussian. + + Args: + ss: Variance proxy, squared scale, sigma^2. + num: The number of samples to generate. + rng: The random number generator. + + Returns: + A list of discrete Gaussian samples. + """ + ss = fractions.Fraction(ss) # cast to rational for exact arithmetic + assert ss > 0 + t = floorsqrt(ss) + 1 + results = [] + trials = 0 + while len(results) < num: + trials = trials + 1 + y = dlap(t, rng) + z = (abs(y) - ss / t)**2 / (2 * ss) + if bern_emab(z.numerator, z.denominator, rng) == 1: + results.append(y) + return results, t, trials + + rng = random.Random(seed) + return np.array(dgauss(scale * scale, num_samples, rng)[0]) + + +def dgauss_normalizing_constant(sigma_sq): + """Compute the normalizing constant of the discrete Gaussian. + + Source: https://arxiv.org/pdf/2004.00010.pdf. + + Args: + sigma_sq: Variance proxy, squared scale, sigma^2. + + Returns: + The normalizing constant. + """ + original = None + poisson = None + if sigma_sq <= 1: + original = 0 + x = 1000 + while x > 0: + original = original + math.exp(-x * x / (2.0 * sigma_sq)) + x = x - 1 + original = 2 * original + 1 + + if sigma_sq * 100 >= 1: + poisson = 0 + y = 1000 + while y > 0: + poisson = poisson + math.exp(-math.pi * math.pi * sigma_sq * 2 * y * y) + y = y - 1 + poisson = math.sqrt(2 * math.pi * sigma_sq) * (1 + 2 * poisson) + + if poisson is None: + return original + if original is None: + return poisson + + scale = max(1, math.sqrt(2 * math.pi * sigma_sq)) + precision = 1e-15 + assert -precision * scale <= original - poisson <= precision * scale + return (original + poisson) / 2 + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py new file mode 100644 index 0000000..5b450ee --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py @@ -0,0 +1,114 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Implements DPQuery interface for distributed discrete Gaussian mechanism.""" + +import collections + +import tensorflow as tf +from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils +from tensorflow_privacy.privacy.dp_query import dp_query + + +class DistributedDiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery): + """Implements DPQuery for discrete distributed Gaussian sum queries. + + For each local record, we check the L2 norm bound and add discrete Gaussian + noise. In particular, this DPQuery does not perform L2 norm clipping and the + norms of the input records are expected to be bounded. + """ + + # pylint: disable=invalid-name + _GlobalState = collections.namedtuple('_GlobalState', + ['l2_norm_bound', 'local_stddev']) + + # pylint: disable=invalid-name + _SampleParams = collections.namedtuple('_SampleParams', + ['l2_norm_bound', 'local_stddev']) + + def __init__(self, l2_norm_bound, local_stddev): + """Initializes the DistributedDiscreteGaussianSumQuery. + + Args: + l2_norm_bound: The L2 norm bound to verify for each record. + local_stddev: The scale/stddev of the local discrete Gaussian noise. + """ + self._l2_norm_bound = l2_norm_bound + self._local_stddev = local_stddev + + def set_ledger(self, ledger): + del ledger # Unused. + raise NotImplementedError('Ledger has not yet been implemented for' + 'DistributedDiscreteGaussianSumQuery!') + + def initial_global_state(self): + return self._GlobalState( + tf.cast(self._l2_norm_bound, tf.float32), + tf.cast(self._local_stddev, tf.float32)) + + def derive_sample_params(self, global_state): + return self._SampleParams(global_state.l2_norm_bound, + global_state.local_stddev) + + def _add_local_noise(self, record, local_stddev, shares=1): + """Add local discrete Gaussian noise to the record. + + Args: + record: The record to which we generate and add local noise. + local_stddev: The scale/stddev of the local discrete Gaussian noise. + shares: Number of shares of local noise to generate. Should be 1 for each + record. This can be useful when we want to generate multiple noise + shares at once. + + Returns: + The record with local noise added. + """ + # Round up the noise as the TF discrete Gaussian sampler only takes + # integer noise stddevs for now. + ceil_local_stddev = tf.cast(tf.math.ceil(local_stddev), tf.int32) + + def add_noise(v): + # Adds an extra dimension for `shares` number of draws. + shape = tf.concat([[shares], tf.shape(v)], axis=0) + dgauss_noise = discrete_gaussian_utils.sample_discrete_gaussian( + scale=ceil_local_stddev, shape=shape, dtype=v.dtype) + # Sum across the number of noise shares and add it. + noised_v = v + tf.reduce_sum(dgauss_noise, axis=0) + # Ensure shape as TF shape inference may fail due to custom noise sampler. + noised_v.set_shape(v.shape.as_list()) + return noised_v + + return tf.nest.map_structure(add_noise, record) + + def preprocess_record(self, params, record): + """Check record norm and add noise to the record.""" + record_as_list = tf.nest.flatten(record) + record_as_float_list = [tf.cast(x, tf.float32) for x in record_as_list] + tf.nest.map_structure(lambda x: tf.compat.v1.assert_type(x, tf.int32), + record_as_list) + dependencies = [ + tf.compat.v1.assert_less_equal( + tf.linalg.global_norm(record_as_float_list), + params.l2_norm_bound, + message=f'Global L2 norm exceeds {params.l2_norm_bound}.') + ] + with tf.control_dependencies(dependencies): + result = tf.cond( + tf.equal(params.local_stddev, 0), lambda: record, + lambda: self._add_local_noise(record, params.local_stddev)) + return result + + def get_noised_result(self, sample_state, global_state): + # Note that by directly returning the aggregate, this assumes that there + # will not be missing local noise shares during execution. + return sample_state, global_state diff --git a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py new file mode 100644 index 0000000..b2f6051 --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py @@ -0,0 +1,165 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for DistributedDiscreteGaussianQuery.""" + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf +from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils +from tensorflow_privacy.privacy.dp_query import distributed_discrete_gaussian_query +from tensorflow_privacy.privacy.dp_query import test_utils + +ddg_sum_query = distributed_discrete_gaussian_query.DistributedDiscreteGaussianSumQuery + + +def silence_tf_error_messages(func): + """Decorator that temporarily changes the TF logging levels.""" + + def wrapper(*args, **kwargs): + cur_verbosity = tf.compat.v1.logging.get_verbosity() + tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.FATAL) + func(*args, **kwargs) + tf.compat.v1.logging.set_verbosity(cur_verbosity) # Reset verbosity. + + return wrapper + + +class DistributedDiscreteGaussianQueryTest(tf.test.TestCase, + parameterized.TestCase): + + def test_sum_no_noise(self): + with self.cached_session() as sess: + record1 = tf.constant([2, 0], dtype=tf.int32) + record2 = tf.constant([-1, 1], dtype=tf.int32) + + query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0) + query_result, _ = test_utils.run_query(query, [record1, record2]) + result = sess.run(query_result) + expected = [1, 1] + self.assertAllEqual(result, expected) + + @parameterized.product(sample_size=[1, 3]) + def test_sum_multiple_shapes(self, sample_size): + with self.cached_session() as sess: + t1 = tf.constant([2, 0], dtype=tf.int32) + t2 = tf.constant([-1, 1, 3], dtype=tf.int32) + t3 = tf.constant([-2], dtype=tf.int32) + record = [t1, t2, t3] + sample = [record] * sample_size + + query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0) + query_result, _ = test_utils.run_query(query, sample) + expected = [sample_size * t1, sample_size * t2, sample_size * t3] + result, expected = sess.run([query_result, expected]) + # Use `assertAllClose` for nested structures equality (with tolerance=0). + self.assertAllClose(result, expected, atol=0) + + @parameterized.product(sample_size=[1, 3]) + def test_sum_nested_record_structure(self, sample_size): + with self.cached_session() as sess: + t1 = tf.constant([1, 0], dtype=tf.int32) + t2 = tf.constant([1, 1, 1], dtype=tf.int32) + t3 = tf.constant([1], dtype=tf.int32) + t4 = tf.constant([[1, 1], [1, 1]], dtype=tf.int32) + record = [t1, dict(a=t2, b=[t3, (t4, t1)])] + sample = [record] * sample_size + + query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0) + query_result, _ = test_utils.run_query(query, sample) + result = sess.run(query_result) + + s = sample_size + expected = [t1 * s, dict(a=t2 * s, b=[t3 * s, (t4 * s, t1 * s)])] + # Use `assertAllClose` for nested structures equality (with tolerance=0) + self.assertAllClose(result, expected, atol=0) + + def test_sum_raise_on_float_inputs(self): + with self.cached_session() as sess: + record1 = tf.constant([2, 0], dtype=tf.float32) + record2 = tf.constant([-1, 1], dtype=tf.float32) + query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0) + + with self.assertRaises(TypeError): + query_result, _ = test_utils.run_query(query, [record1, record2]) + sess.run(query_result) + + @parameterized.product(l2_norm_bound=[0, 3, 10, 14.1]) + @silence_tf_error_messages + def test_sum_raise_on_l2_norm_excess(self, l2_norm_bound): + with self.cached_session() as sess: + record = tf.constant([10, 10], dtype=tf.int32) + query = ddg_sum_query(l2_norm_bound=l2_norm_bound, local_stddev=0.0) + + with self.assertRaises(tf.errors.InvalidArgumentError): + query_result, _ = test_utils.run_query(query, [record]) + sess.run(query_result) + + def test_sum_float_norm_not_rounded(self): + """Test that the float L2 norm bound doesn't get rounded/casted to integers.""" + with self.cached_session() as sess: + # A casted/rounded norm bound would be insufficient. + l2_norm_bound = 14.2 + record = tf.constant([10, 10], dtype=tf.int32) + query = ddg_sum_query(l2_norm_bound=l2_norm_bound, local_stddev=0.0) + query_result, _ = test_utils.run_query(query, [record]) + result = sess.run(query_result) + expected = [10, 10] + self.assertAllEqual(result, expected) + + @parameterized.named_parameters([('2_local_stddev_1_record', 2, 1), + ('10_local_stddev_4_records', 10, 4), + ('1000_local_stddev_1_record', 1000, 1), + ('1000_local_stddev_25_records', 1000, 25)]) + def test_sum_local_noise_shares(self, local_stddev, num_records): + """Test the noise level of the sum of discrete Gaussians applied locally. + + The sum of discrete Gaussians is not a discrete Gaussian, but it will be + extremely close for sigma >= 2. We will thus compare the aggregated noise + to a central discrete Gaussian noise with appropriately scaled stddev with + some reasonable tolerance. + + Args: + local_stddev: The stddev of the local discrete Gaussian noise. + num_records: The number of records to be aggregated. + """ + # Aggregated local noises. + num_trials = 1000 + record = tf.zeros([num_trials], dtype=tf.int32) + sample = [record] * num_records + query = ddg_sum_query(l2_norm_bound=10.0, local_stddev=local_stddev) + query_result, _ = test_utils.run_query(query, sample) + + # Central discrete Gaussian noise. + central_stddev = np.sqrt(num_records) * local_stddev + central_noise = discrete_gaussian_utils.sample_discrete_gaussian( + scale=tf.cast(tf.round(central_stddev), record.dtype), + shape=tf.shape(record), + dtype=record.dtype) + + agg_noise, central_noise = self.evaluate([query_result, central_noise]) + + mean_stddev = central_stddev * np.sqrt(num_trials) / num_trials + atol = 3.5 * mean_stddev + + # Use the atol for mean as a rough default atol for stddev/percentile. + self.assertAllClose(np.mean(agg_noise), np.mean(central_noise), atol=atol) + self.assertAllClose(np.std(agg_noise), np.std(central_noise), atol=atol) + self.assertAllClose( + np.percentile(agg_noise, [25, 50, 75]), + np.percentile(central_noise, [25, 50, 75]), + atol=atol) + + +if __name__ == '__main__': + tf.test.main()