Adds discrete Gaussian (sampler and distributed DPQuery) to public TF Privacy.

PiperOrigin-RevId: 387232449
2021-07-27 17:17:53 -07:00 · 2021-07-27 17:17:53 -07:00 · e7e11d14d9
commit e7e11d14d9
parent 2f862eba9b
5 changed files with 697 additions and 0 deletions
--- a/tensorflow_privacy/init.py
+++ b/tensorflow_privacy/init.py
@ -43,6 +43,7 @@ else:
  # DPQuery classes
  from tensorflow_privacy.privacy.dp_query.dp_query import DPQuery
  from tensorflow_privacy.privacy.dp_query.dp_query import SumAggregationDPQuery
  from tensorflow_privacy.privacy.dp_query.distributed_discrete_gaussian_query import DistributedDiscreteGaussianSumQuery
  from tensorflow_privacy.privacy.dp_query.gaussian_query import GaussianSumQuery
  from tensorflow_privacy.privacy.dp_query.nested_query import NestedQuery
  from tensorflow_privacy.privacy.dp_query.no_privacy_query import NoPrivacyAverageQuery
--- a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils.py
+++ b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils.py
@ -0,0 +1,142 @@
 # Copyright 2021, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     https://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Util functions for drawing discrete Gaussian samples.
 The following functions implement a vectorized TF version of the sampling
 algorithm described in the paper:
 The Discrete Gaussian for Differential Privacy
 https://arxiv.org/pdf/2004.00010.pdf
 Note that the exact sampling implementation should use integer and fractional
 parameters only. Here, we relax this constraint a bit and use vectorized
 implementations of Bernoulli and discrete Laplace sampling that can take float
 parameters.
 """
 import tensorflow as tf
 import tensorflow_probability as tf_prob
 def _sample_discrete_laplace(t, shape):
  """Sample from discrete Laplace with scale t.
  This method is based on the observation that sampling from Z ~ Lap(t) is
  equivalent to sampling X, Y independently from Geo(1 - exp(-1/t)) and take
  Z = X - Y.
  Note also that tensorflow_probability's geometric sampler is based on floating
  operations and may possibly be inexact.
  Args:
    t: The scale of the discrete Laplace distribution.
    shape: The tensor shape of the tensors drawn.
  Returns:
    A tensor of the specified shape filled with random values.
  """
  geometric_probs = 1.0 - tf.exp(-1.0 / tf.cast(t, tf.float64))
  sampler = tf_prob.distributions.Geometric(probs=geometric_probs)
  return tf.cast(sampler.sample(shape) - sampler.sample(shape), tf.int64)
 def _sample_bernoulli(p):
  """Sample from Bernoulli(p)."""
  return tf_prob.distributions.Bernoulli(probs=p, dtype=tf.int64).sample()
 def _check_input_args(scale, shape, dtype):
  """Checks the input args to the discrete Gaussian sampler."""
  if tf.as_dtype(dtype) not in (tf.int32, tf.int64):
    raise ValueError(
        f'Only tf.int32 and tf.int64 are supported. Found dtype `{dtype}`.')
  checks = [
      tf.compat.v1.assert_non_negative(scale),
      tf.compat.v1.assert_integer(scale)
  ]
  with tf.control_dependencies(checks):
    return tf.identity(scale), shape, dtype
 def _int_square(value):
  """Avoids the TF op `Square(T=...)` for ints as sampling can happen on clients."""
  return (value - 1) * (value + 1) + 1
@tf.function
 def _sample_discrete_gaussian_helper(scale, shape, dtype):
  """Draw samples from discrete Gaussian, assuming scale >= 0."""
  scale = tf.cast(scale, tf.int64)
  sq_scale = _int_square(scale)
  # Scale for discrete Laplace. The sampling algorithm should be correct
  # for any discrete Laplace scale, and the original paper uses
  # `dlap_scale = floor(scale) + 1`. Here we use `dlap_scale = scale` (where
  # input `scale` is restricted to integers >= 1) to simplify the fraction
  # below. It turns out that for integer scales >= 1, `dlap_scale = scale` gives
  # a good minimum success rate of ~70%, allowing a small oversampling factor.
  dlap_scale = scale
  oversample_factor = 1.5
  # Draw at least some samples in case we got unlucky with small input shape.
  min_n = 1000
  target_n = tf.reduce_prod(tf.cast(shape, tf.int64))
  oversample_n = oversample_factor * tf.cast(target_n, tf.float32)
  draw_n = tf.maximum(min_n, tf.cast(oversample_n, tf.int32))
  accepted_n = tf.constant(0, dtype=target_n.dtype)
  result = tf.zeros((0,), dtype=tf.int64)
  while accepted_n < target_n:
    # Since the number of samples could be different in every retry, we need to
    # manually specify the shape info for TF.
    tf.autograph.experimental.set_loop_options(
        shape_invariants=[(result, tf.TensorShape([None]))])
    # Draw samples.
    samples = _sample_discrete_laplace(dlap_scale, shape=(draw_n,))
    z_numer = _int_square(tf.abs(samples) - scale)
    z_denom = 2 * sq_scale
    bern_probs = tf.exp(-1.0 * tf.divide(z_numer, z_denom))
    accept = _sample_bernoulli(bern_probs)
    # Keep successful samples and increment counter.
    accepted_samples = samples[tf.equal(accept, 1)]
    accepted_n += tf.cast(tf.size(accepted_samples), accepted_n.dtype)
    result = tf.concat([result, accepted_samples], axis=0)
    # Reduce the number of draws for any retries.
    draw_n = tf.cast(target_n - accepted_n, tf.float32) * oversample_factor
    draw_n = tf.maximum(min_n, tf.cast(draw_n, tf.int32))
  return tf.cast(tf.reshape(result[:target_n], shape), dtype)
 def sample_discrete_gaussian(scale, shape, dtype=tf.int32):
  """Draws (possibly inexact) samples from the discrete Gaussian distribution.
  We relax some integer constraints to use vectorized implementations of
  Bernoulli and discrete Laplace sampling. Integer operations are done in
  tf.int64 as TF does not have direct support for fractions.
  Args:
    scale: The scale of the discrete Gaussian distribution.
    shape: The shape of the output tensor.
    dtype: The type of the output.
  Returns:
    A tensor of the specified shape filled with random values.
  """
  scale, shape, dtype = _check_input_args(scale, shape, dtype)
  return tf.cond(
      tf.equal(scale, 0), lambda: tf.zeros(shape, dtype),
      lambda: _sample_discrete_gaussian_helper(scale, shape, dtype))
--- a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils_test.py
+++ b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils_test.py
@ -0,0 +1,275 @@
 # Copyright 2021, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     https://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Tests for discrete_gaussian_utils."""
 import collections
 import fractions
 import math
 import random
 from absl.testing import parameterized
 import numpy as np
 import tensorflow as tf
 from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils
 EXACT_SAMPLER_SEED = 4242
 class DiscreteGaussianUtilsTest(tf.test.TestCase, parameterized.TestCase):
  @parameterized.product(dtype=[tf.bool, tf.float32, tf.float64])
  def test_raise_on_bad_dtype(self, dtype):
    with self.assertRaises(ValueError):
      _ = discrete_gaussian_utils.sample_discrete_gaussian(1, (1,), dtype)
  def test_raise_on_negative_scale(self):
    with self.assertRaises(tf.errors.InvalidArgumentError):
      _ = discrete_gaussian_utils.sample_discrete_gaussian(-10, (1,))
  def test_raise_on_float_scale(self):
    with self.assertRaises(TypeError):
      _ = discrete_gaussian_utils.sample_discrete_gaussian(3.14, (1,))
  @parameterized.product(shape=[(), (1,), (100,), (2, 2), (3, 3, 3),
                                (4, 1, 1, 1)])
  def test_shapes(self, shape):
    samples = discrete_gaussian_utils.sample_discrete_gaussian(100, shape)
    samples = self.evaluate(samples)
    self.assertAllEqual(samples.shape, shape)
  @parameterized.product(dtype=[tf.int32, tf.int64])
  def test_dtypes(self, dtype):
    samples = discrete_gaussian_utils.sample_discrete_gaussian(1, (10,), dtype)
    samples = self.evaluate(samples)
    # Convert output np dtypes to tf dtypes.
    self.assertEqual(tf.as_dtype(samples.dtype), dtype)
  def test_zero_noise(self):
    scale = 0
    shape = (100,)
    dtype = tf.int32
    samples = discrete_gaussian_utils.sample_discrete_gaussian(
        scale, shape, dtype=dtype)
    samples = self.evaluate(samples)
    self.assertAllEqual(samples, tf.zeros(shape, dtype=dtype))
  @parameterized.named_parameters([('small_scale_small_n', 10, 2000, 1, 2),
                                   ('small_scale_large_n', 10, 5000, 1, 1),
                                   ('large_scale_small_n', 50, 2000, 2, 5),
                                   ('large_scale_large_n', 50, 5000, 2, 3)])
  def test_match_exact_sampler(self, scale, num_samples, mean_std_atol,
                               percentile_atol):
    true_samples = exact_sampler(scale, num_samples)
    drawn_samples = discrete_gaussian_utils.sample_discrete_gaussian(
        scale=scale, shape=(num_samples,))
    drawn_samples = self.evaluate(drawn_samples)
    # Check mean, std, and percentiles.
    self.assertAllClose(
        np.mean(true_samples), np.mean(drawn_samples), atol=mean_std_atol)
    self.assertAllClose(
        np.std(true_samples), np.std(drawn_samples), atol=mean_std_atol)
    self.assertAllClose(
        np.percentile(true_samples, [10, 30, 50, 70, 90]),
        np.percentile(drawn_samples, [10, 30, 50, 70, 90]),
        atol=percentile_atol)
  @parameterized.named_parameters([('n_1000', 1000, 5e-2),
                                   ('n_10000', 10000, 5e-3)])
  def test_kl_divergence(self, num_samples, kl_tolerance):
    """Compute KL divergence betwen empirical & true distribution."""
    scale = 10
    sq_sigma = scale * scale
    drawn_samples = discrete_gaussian_utils.sample_discrete_gaussian(
        scale=scale, shape=(num_samples,))
    drawn_samples = self.evaluate(drawn_samples)
    value_counts = collections.Counter(drawn_samples)
    kl = 0
    norm_const = dgauss_normalizing_constant(sq_sigma)
    for value, count in value_counts.items():
      kl += count * (
          math.log(count * norm_const / num_samples) + value * value /
          (2.0 * sq_sigma))
    kl /= num_samples
    self.assertLess(kl, kl_tolerance)
 def exact_sampler(scale, num_samples, seed=EXACT_SAMPLER_SEED):
  """Implementation of the exact discrete gaussian distribution sampler.
  Source: https://arxiv.org/pdf/2004.00010.pdf.
  Args:
    scale: The scale of the discrete Gaussian.
    num_samples: The number of samples to generate.
    seed: The seed for the random number generator to reproduce samples.
  Returns:
    A numpy array of discrete Gaussian samples.
  """
  def randrange(a, rng):
    return rng.randrange(a)
  def bern_em1(rng):
    """Sample from Bernoulli(exp(-1))."""
    k = 2
    while True:
      if randrange(k, rng) == 0:  # if Bernoulli(1/k)==1
        k = k + 1
      else:
        return k % 2
  def bern_emab1(a, b, rng):
    """Sample from Bernoulli(exp(-a/b)), assuming 0 <= a <= b."""
    assert isinstance(a, int)
    assert isinstance(b, int)
    assert 0 <= a <= b
    k = 1
    while True:
      if randrange(b, rng) < a and randrange(k, rng) == 0:  # if Bern(a/b/k)==1
        k = k + 1
      else:
        return k % 2
  def bern_emab(a, b, rng):
    """Sample from Bernoulli(exp(-a/b)), allowing a > b."""
    while a > b:
      if bern_em1(rng) == 0:
        return 0
      a = a - b
    return bern_emab1(a, b, rng)
  def geometric(t, rng):
    """Sample from geometric(1-exp(-1/t))."""
    assert isinstance(t, int)
    assert t > 0
    while True:
      u = randrange(t, rng)
      if bern_emab1(u, t, rng) == 1:
        while bern_em1(rng) == 1:
          u = u + t
        return u
  def dlap(t, rng):
    """Sample from discrete Laplace with scale t.
    Pr[x] = exp(-|x|/t) * (exp(1/t)-1)/(exp(1/t)+1). Supported on integers.
    Args:
      t: The scale.
      rng: The random number generator.
    Returns:
      A discrete Laplace sample.
    """
    assert isinstance(t, int)
    assert t > 0
    while True:
      u = geometric(t, rng)
      b = randrange(2, rng)
      if b == 1:
        return u
      elif u > 0:
        return -u
  def floorsqrt(x):
    """Compute floor(sqrt(x)) exactly."""
    assert x >= 0
    a = 0  # maintain a^2<=x.
    b = 1  # maintain b^2>x.
    while b * b <= x:
      b = 2 * b
    # Do binary search.
    while a + 1 < b:
      c = (a + b) // 2
      if c * c <= x:
        a = c
      else:
        b = c
    return a
  def dgauss(ss, num, rng):
    """Sample from discrete Gaussian.
    Args:
      ss: Variance proxy, squared scale, sigma^2.
      num: The number of samples to generate.
      rng: The random number generator.
    Returns:
      A list of discrete Gaussian samples.
    """
    ss = fractions.Fraction(ss)  # cast to rational for exact arithmetic
    assert ss > 0
    t = floorsqrt(ss) + 1
    results = []
    trials = 0
    while len(results) < num:
      trials = trials + 1
      y = dlap(t, rng)
      z = (abs(y) - ss / t)**2 / (2 * ss)
      if bern_emab(z.numerator, z.denominator, rng) == 1:
        results.append(y)
    return results, t, trials
  rng = random.Random(seed)
  return np.array(dgauss(scale * scale, num_samples, rng)[0])
 def dgauss_normalizing_constant(sigma_sq):
  """Compute the normalizing constant of the discrete Gaussian.
  Source: https://arxiv.org/pdf/2004.00010.pdf.
  Args:
    sigma_sq: Variance proxy, squared scale, sigma^2.
  Returns:
    The normalizing constant.
  """
  original = None
  poisson = None
  if sigma_sq <= 1:
    original = 0
    x = 1000
    while x > 0:
      original = original + math.exp(-x * x / (2.0 * sigma_sq))
      x = x - 1
    original = 2 * original + 1
  if sigma_sq * 100 >= 1:
    poisson = 0
    y = 1000
    while y > 0:
      poisson = poisson + math.exp(-math.pi * math.pi * sigma_sq * 2 * y * y)
      y = y - 1
    poisson = math.sqrt(2 * math.pi * sigma_sq) * (1 + 2 * poisson)
  if poisson is None:
    return original
  if original is None:
    return poisson
  scale = max(1, math.sqrt(2 * math.pi * sigma_sq))
  precision = 1e-15
  assert -precision * scale <= original - poisson <= precision * scale
  return (original + poisson) / 2
 if __name__ == '__main__':
  tf.test.main()
--- a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py
+++ b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py
@ -0,0 +1,114 @@
 # Copyright 2021, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     https://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Implements DPQuery interface for distributed discrete Gaussian mechanism."""
 import collections
 import tensorflow as tf
 from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils
 from tensorflow_privacy.privacy.dp_query import dp_query
 class DistributedDiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery):
  """Implements DPQuery for discrete distributed Gaussian sum queries.
  For each local record, we check the L2 norm bound and add discrete Gaussian
  noise. In particular, this DPQuery does not perform L2 norm clipping and the
  norms of the input records are expected to be bounded.
  """
  # pylint: disable=invalid-name
  _GlobalState = collections.namedtuple('_GlobalState',
                                        ['l2_norm_bound', 'local_stddev'])
  # pylint: disable=invalid-name
  _SampleParams = collections.namedtuple('_SampleParams',
                                         ['l2_norm_bound', 'local_stddev'])
  def __init__(self, l2_norm_bound, local_stddev):
    """Initializes the DistributedDiscreteGaussianSumQuery.
    Args:
      l2_norm_bound: The L2 norm bound to verify for each record.
      local_stddev: The scale/stddev of the local discrete Gaussian noise.
    """
    self._l2_norm_bound = l2_norm_bound
    self._local_stddev = local_stddev
  def set_ledger(self, ledger):
    del ledger  # Unused.
    raise NotImplementedError('Ledger has not yet been implemented for'
                              'DistributedDiscreteGaussianSumQuery!')
  def initial_global_state(self):
    return self._GlobalState(
        tf.cast(self._l2_norm_bound, tf.float32),
        tf.cast(self._local_stddev, tf.float32))
  def derive_sample_params(self, global_state):
    return self._SampleParams(global_state.l2_norm_bound,
                              global_state.local_stddev)
  def _add_local_noise(self, record, local_stddev, shares=1):
    """Add local discrete Gaussian noise to the record.
    Args:
      record: The record to which we generate and add local noise.
      local_stddev: The scale/stddev of the local discrete Gaussian noise.
      shares: Number of shares of local noise to generate. Should be 1 for each
        record. This can be useful when we want to generate multiple noise
        shares at once.
    Returns:
      The record with local noise added.
    """
    # Round up the noise as the TF discrete Gaussian sampler only takes
    # integer noise stddevs for now.
    ceil_local_stddev = tf.cast(tf.math.ceil(local_stddev), tf.int32)
    def add_noise(v):
      # Adds an extra dimension for `shares` number of draws.
      shape = tf.concat([[shares], tf.shape(v)], axis=0)
      dgauss_noise = discrete_gaussian_utils.sample_discrete_gaussian(
          scale=ceil_local_stddev, shape=shape, dtype=v.dtype)
      # Sum across the number of noise shares and add it.
      noised_v = v + tf.reduce_sum(dgauss_noise, axis=0)
      # Ensure shape as TF shape inference may fail due to custom noise sampler.
      noised_v.set_shape(v.shape.as_list())
      return noised_v
    return tf.nest.map_structure(add_noise, record)
  def preprocess_record(self, params, record):
    """Check record norm and add noise to the record."""
    record_as_list = tf.nest.flatten(record)
    record_as_float_list = [tf.cast(x, tf.float32) for x in record_as_list]
    tf.nest.map_structure(lambda x: tf.compat.v1.assert_type(x, tf.int32),
                          record_as_list)
    dependencies = [
        tf.compat.v1.assert_less_equal(
            tf.linalg.global_norm(record_as_float_list),
            params.l2_norm_bound,
            message=f'Global L2 norm exceeds {params.l2_norm_bound}.')
    ]
    with tf.control_dependencies(dependencies):
      result = tf.cond(
          tf.equal(params.local_stddev, 0), lambda: record,
          lambda: self._add_local_noise(record, params.local_stddev))
      return result
  def get_noised_result(self, sample_state, global_state):
    # Note that by directly returning the aggregate, this assumes that there
    # will not be missing local noise shares during execution.
    return sample_state, global_state
--- a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py
+++ b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py
@ -0,0 +1,165 @@
 # Copyright 2021, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     https://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Tests for DistributedDiscreteGaussianQuery."""
 from absl.testing import parameterized
 import numpy as np
 import tensorflow as tf
 from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils
 from tensorflow_privacy.privacy.dp_query import distributed_discrete_gaussian_query
 from tensorflow_privacy.privacy.dp_query import test_utils
 ddg_sum_query = distributed_discrete_gaussian_query.DistributedDiscreteGaussianSumQuery
 def silence_tf_error_messages(func):
  """Decorator that temporarily changes the TF logging levels."""
  def wrapper(*args, **kwargs):
    cur_verbosity = tf.compat.v1.logging.get_verbosity()
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.FATAL)
    func(*args, **kwargs)
    tf.compat.v1.logging.set_verbosity(cur_verbosity)  # Reset verbosity.
  return wrapper
 class DistributedDiscreteGaussianQueryTest(tf.test.TestCase,
                                           parameterized.TestCase):
  def test_sum_no_noise(self):
    with self.cached_session() as sess:
      record1 = tf.constant([2, 0], dtype=tf.int32)
      record2 = tf.constant([-1, 1], dtype=tf.int32)
      query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0)
      query_result, _ = test_utils.run_query(query, [record1, record2])
      result = sess.run(query_result)
      expected = [1, 1]
      self.assertAllEqual(result, expected)
  @parameterized.product(sample_size=[1, 3])
  def test_sum_multiple_shapes(self, sample_size):
    with self.cached_session() as sess:
      t1 = tf.constant([2, 0], dtype=tf.int32)
      t2 = tf.constant([-1, 1, 3], dtype=tf.int32)
      t3 = tf.constant([-2], dtype=tf.int32)
      record = [t1, t2, t3]
      sample = [record] * sample_size
      query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0)
      query_result, _ = test_utils.run_query(query, sample)
      expected = [sample_size * t1, sample_size * t2, sample_size * t3]
      result, expected = sess.run([query_result, expected])
      # Use `assertAllClose` for nested structures equality (with tolerance=0).
      self.assertAllClose(result, expected, atol=0)
  @parameterized.product(sample_size=[1, 3])
  def test_sum_nested_record_structure(self, sample_size):
    with self.cached_session() as sess:
      t1 = tf.constant([1, 0], dtype=tf.int32)
      t2 = tf.constant([1, 1, 1], dtype=tf.int32)
      t3 = tf.constant([1], dtype=tf.int32)
      t4 = tf.constant([[1, 1], [1, 1]], dtype=tf.int32)
      record = [t1, dict(a=t2, b=[t3, (t4, t1)])]
      sample = [record] * sample_size
      query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0)
      query_result, _ = test_utils.run_query(query, sample)
      result = sess.run(query_result)
      s = sample_size
      expected = [t1 * s, dict(a=t2 * s, b=[t3 * s, (t4 * s, t1 * s)])]
      # Use `assertAllClose` for nested structures equality (with tolerance=0)
      self.assertAllClose(result, expected, atol=0)
  def test_sum_raise_on_float_inputs(self):
    with self.cached_session() as sess:
      record1 = tf.constant([2, 0], dtype=tf.float32)
      record2 = tf.constant([-1, 1], dtype=tf.float32)
      query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0)
      with self.assertRaises(TypeError):
        query_result, _ = test_utils.run_query(query, [record1, record2])
        sess.run(query_result)
  @parameterized.product(l2_norm_bound=[0, 3, 10, 14.1])
  @silence_tf_error_messages
  def test_sum_raise_on_l2_norm_excess(self, l2_norm_bound):
    with self.cached_session() as sess:
      record = tf.constant([10, 10], dtype=tf.int32)
      query = ddg_sum_query(l2_norm_bound=l2_norm_bound, local_stddev=0.0)
      with self.assertRaises(tf.errors.InvalidArgumentError):
        query_result, _ = test_utils.run_query(query, [record])
        sess.run(query_result)
  def test_sum_float_norm_not_rounded(self):
    """Test that the float L2 norm bound doesn't get rounded/casted to integers."""
    with self.cached_session() as sess:
      # A casted/rounded norm bound would be insufficient.
      l2_norm_bound = 14.2
      record = tf.constant([10, 10], dtype=tf.int32)
      query = ddg_sum_query(l2_norm_bound=l2_norm_bound, local_stddev=0.0)
      query_result, _ = test_utils.run_query(query, [record])
      result = sess.run(query_result)
      expected = [10, 10]
      self.assertAllEqual(result, expected)
  @parameterized.named_parameters([('2_local_stddev_1_record', 2, 1),
                                   ('10_local_stddev_4_records', 10, 4),
                                   ('1000_local_stddev_1_record', 1000, 1),
                                   ('1000_local_stddev_25_records', 1000, 25)])
  def test_sum_local_noise_shares(self, local_stddev, num_records):
    """Test the noise level of the sum of discrete Gaussians applied locally.
    The sum of discrete Gaussians is not a discrete Gaussian, but it will be
    extremely close for sigma >= 2. We will thus compare the aggregated noise
    to a central discrete Gaussian noise with appropriately scaled stddev with
    some reasonable tolerance.
    Args:
      local_stddev: The stddev of the local discrete Gaussian noise.
      num_records: The number of records to be aggregated.
    """
    # Aggregated local noises.
    num_trials = 1000
    record = tf.zeros([num_trials], dtype=tf.int32)
    sample = [record] * num_records
    query = ddg_sum_query(l2_norm_bound=10.0, local_stddev=local_stddev)
    query_result, _ = test_utils.run_query(query, sample)
    # Central discrete Gaussian noise.
    central_stddev = np.sqrt(num_records) * local_stddev
    central_noise = discrete_gaussian_utils.sample_discrete_gaussian(
        scale=tf.cast(tf.round(central_stddev), record.dtype),
        shape=tf.shape(record),
        dtype=record.dtype)
    agg_noise, central_noise = self.evaluate([query_result, central_noise])
    mean_stddev = central_stddev * np.sqrt(num_trials) / num_trials
    atol = 3.5 * mean_stddev
    # Use the atol for mean as a rough default atol for stddev/percentile.
    self.assertAllClose(np.mean(agg_noise), np.mean(central_noise), atol=atol)
    self.assertAllClose(np.std(agg_noise), np.std(central_noise), atol=atol)
    self.assertAllClose(
        np.percentile(agg_noise, [25, 50, 75]),
        np.percentile(central_noise, [25, 50, 75]),
        atol=atol)
 if __name__ == '__main__':
  tf.test.main()