Adds discrete Gaussian (sampler and distributed DPQuery) to public TF Privacy.

PiperOrigin-RevId: 387232449
2021-07-27 17:17:53 -07:00 · 2021-07-27 17:17:53 -07:00 · e7e11d14d9
commit e7e11d14d9
parent 2f862eba9b
5 changed files with 697 additions and 0 deletions
--- a/tensorflow_privacy/init.py
+++ b/tensorflow_privacy/init.py
@ -43,6 +43,7 @@ else:
  # DPQuery classes
  from tensorflow_privacy.privacy.dp_query.dp_query import DPQuery
  from tensorflow_privacy.privacy.dp_query.dp_query import SumAggregationDPQuery
+  from tensorflow_privacy.privacy.dp_query.distributed_discrete_gaussian_query import DistributedDiscreteGaussianSumQuery
  from tensorflow_privacy.privacy.dp_query.gaussian_query import GaussianSumQuery
  from tensorflow_privacy.privacy.dp_query.nested_query import NestedQuery
  from tensorflow_privacy.privacy.dp_query.no_privacy_query import NoPrivacyAverageQuery
--- a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils.py
+++ b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils.py
@ -0,0 +1,142 @@
+# Copyright 2021, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Util functions for drawing discrete Gaussian samples.
+
+The following functions implement a vectorized TF version of the sampling
+algorithm described in the paper:
+
+The Discrete Gaussian for Differential Privacy
+https://arxiv.org/pdf/2004.00010.pdf
+
+Note that the exact sampling implementation should use integer and fractional
+parameters only. Here, we relax this constraint a bit and use vectorized
+implementations of Bernoulli and discrete Laplace sampling that can take float
+parameters.
+"""
+
+import tensorflow as tf
+import tensorflow_probability as tf_prob
+
+
+def _sample_discrete_laplace(t, shape):
+  """Sample from discrete Laplace with scale t.
+
+  This method is based on the observation that sampling from Z ~ Lap(t) is
+  equivalent to sampling X, Y independently from Geo(1 - exp(-1/t)) and take
+  Z = X - Y.
+
+  Note also that tensorflow_probability's geometric sampler is based on floating
+  operations and may possibly be inexact.
+
+  Args:
+    t: The scale of the discrete Laplace distribution.
+    shape: The tensor shape of the tensors drawn.
+
+  Returns:
+    A tensor of the specified shape filled with random values.
+  """
+  geometric_probs = 1.0 - tf.exp(-1.0 / tf.cast(t, tf.float64))
+  sampler = tf_prob.distributions.Geometric(probs=geometric_probs)
+  return tf.cast(sampler.sample(shape) - sampler.sample(shape), tf.int64)
+
+
+def _sample_bernoulli(p):
+  """Sample from Bernoulli(p)."""
+  return tf_prob.distributions.Bernoulli(probs=p, dtype=tf.int64).sample()
+
+
+def _check_input_args(scale, shape, dtype):
+  """Checks the input args to the discrete Gaussian sampler."""
+  if tf.as_dtype(dtype) not in (tf.int32, tf.int64):
+    raise ValueError(
+        f'Only tf.int32 and tf.int64 are supported. Found dtype `{dtype}`.')
+
+  checks = [
+      tf.compat.v1.assert_non_negative(scale),
+      tf.compat.v1.assert_integer(scale)
+  ]
+  with tf.control_dependencies(checks):
+    return tf.identity(scale), shape, dtype
+
+
+def _int_square(value):
+  """Avoids the TF op `Square(T=...)` for ints as sampling can happen on clients."""
+  return (value - 1) * (value + 1) + 1
+
+
+@tf.function
+def _sample_discrete_gaussian_helper(scale, shape, dtype):
+  """Draw samples from discrete Gaussian, assuming scale >= 0."""
+  scale = tf.cast(scale, tf.int64)
+  sq_scale = _int_square(scale)
+
+  # Scale for discrete Laplace. The sampling algorithm should be correct
+  # for any discrete Laplace scale, and the original paper uses
+  # `dlap_scale = floor(scale) + 1`. Here we use `dlap_scale = scale` (where
+  # input `scale` is restricted to integers >= 1) to simplify the fraction
+  # below. It turns out that for integer scales >= 1, `dlap_scale = scale` gives
+  # a good minimum success rate of ~70%, allowing a small oversampling factor.
+  dlap_scale = scale
+  oversample_factor = 1.5
+
+  # Draw at least some samples in case we got unlucky with small input shape.
+  min_n = 1000
+  target_n = tf.reduce_prod(tf.cast(shape, tf.int64))
+  oversample_n = oversample_factor * tf.cast(target_n, tf.float32)
+  draw_n = tf.maximum(min_n, tf.cast(oversample_n, tf.int32))
+
+  accepted_n = tf.constant(0, dtype=target_n.dtype)
+  result = tf.zeros((0,), dtype=tf.int64)
+
+  while accepted_n < target_n:
+    # Since the number of samples could be different in every retry, we need to
+    # manually specify the shape info for TF.
+    tf.autograph.experimental.set_loop_options(
+        shape_invariants=[(result, tf.TensorShape([None]))])
+    # Draw samples.
+    samples = _sample_discrete_laplace(dlap_scale, shape=(draw_n,))
+    z_numer = _int_square(tf.abs(samples) - scale)
+    z_denom = 2 * sq_scale
+    bern_probs = tf.exp(-1.0 * tf.divide(z_numer, z_denom))
+    accept = _sample_bernoulli(bern_probs)
+    # Keep successful samples and increment counter.
+    accepted_samples = samples[tf.equal(accept, 1)]
+    accepted_n += tf.cast(tf.size(accepted_samples), accepted_n.dtype)
+    result = tf.concat([result, accepted_samples], axis=0)
+    # Reduce the number of draws for any retries.
+    draw_n = tf.cast(target_n - accepted_n, tf.float32) * oversample_factor
+    draw_n = tf.maximum(min_n, tf.cast(draw_n, tf.int32))
+
+  return tf.cast(tf.reshape(result[:target_n], shape), dtype)
+
+
+def sample_discrete_gaussian(scale, shape, dtype=tf.int32):
+  """Draws (possibly inexact) samples from the discrete Gaussian distribution.
+
+  We relax some integer constraints to use vectorized implementations of
+  Bernoulli and discrete Laplace sampling. Integer operations are done in
+  tf.int64 as TF does not have direct support for fractions.
+
+  Args:
+    scale: The scale of the discrete Gaussian distribution.
+    shape: The shape of the output tensor.
+    dtype: The type of the output.
+
+  Returns:
+    A tensor of the specified shape filled with random values.
+  """
+  scale, shape, dtype = _check_input_args(scale, shape, dtype)
+  return tf.cond(
+      tf.equal(scale, 0), lambda: tf.zeros(shape, dtype),
+      lambda: _sample_discrete_gaussian_helper(scale, shape, dtype))
--- a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils_test.py
+++ b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils_test.py
@ -0,0 +1,275 @@
+# Copyright 2021, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for discrete_gaussian_utils."""
+
+import collections
+import fractions
+import math
+import random
+
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils
+
+EXACT_SAMPLER_SEED = 4242
+
+
+class DiscreteGaussianUtilsTest(tf.test.TestCase, parameterized.TestCase):
+
+  @parameterized.product(dtype=[tf.bool, tf.float32, tf.float64])
+  def test_raise_on_bad_dtype(self, dtype):
+    with self.assertRaises(ValueError):
+      _ = discrete_gaussian_utils.sample_discrete_gaussian(1, (1,), dtype)
+
+  def test_raise_on_negative_scale(self):
+    with self.assertRaises(tf.errors.InvalidArgumentError):
+      _ = discrete_gaussian_utils.sample_discrete_gaussian(-10, (1,))
+
+  def test_raise_on_float_scale(self):
+    with self.assertRaises(TypeError):
+      _ = discrete_gaussian_utils.sample_discrete_gaussian(3.14, (1,))
+
+  @parameterized.product(shape=[(), (1,), (100,), (2, 2), (3, 3, 3),
+                                (4, 1, 1, 1)])
+  def test_shapes(self, shape):
+    samples = discrete_gaussian_utils.sample_discrete_gaussian(100, shape)
+    samples = self.evaluate(samples)
+    self.assertAllEqual(samples.shape, shape)
+
+  @parameterized.product(dtype=[tf.int32, tf.int64])
+  def test_dtypes(self, dtype):
+    samples = discrete_gaussian_utils.sample_discrete_gaussian(1, (10,), dtype)
+    samples = self.evaluate(samples)
+    # Convert output np dtypes to tf dtypes.
+    self.assertEqual(tf.as_dtype(samples.dtype), dtype)
+
+  def test_zero_noise(self):
+    scale = 0
+    shape = (100,)
+    dtype = tf.int32
+    samples = discrete_gaussian_utils.sample_discrete_gaussian(
+        scale, shape, dtype=dtype)
+    samples = self.evaluate(samples)
+    self.assertAllEqual(samples, tf.zeros(shape, dtype=dtype))
+
+  @parameterized.named_parameters([('small_scale_small_n', 10, 2000, 1, 2),
+                                   ('small_scale_large_n', 10, 5000, 1, 1),
+                                   ('large_scale_small_n', 50, 2000, 2, 5),
+                                   ('large_scale_large_n', 50, 5000, 2, 3)])
+  def test_match_exact_sampler(self, scale, num_samples, mean_std_atol,
+                               percentile_atol):
+    true_samples = exact_sampler(scale, num_samples)
+    drawn_samples = discrete_gaussian_utils.sample_discrete_gaussian(
+        scale=scale, shape=(num_samples,))
+    drawn_samples = self.evaluate(drawn_samples)
+
+    # Check mean, std, and percentiles.
+    self.assertAllClose(
+        np.mean(true_samples), np.mean(drawn_samples), atol=mean_std_atol)
+    self.assertAllClose(
+        np.std(true_samples), np.std(drawn_samples), atol=mean_std_atol)
+    self.assertAllClose(
+        np.percentile(true_samples, [10, 30, 50, 70, 90]),
+        np.percentile(drawn_samples, [10, 30, 50, 70, 90]),
+        atol=percentile_atol)
+
+  @parameterized.named_parameters([('n_1000', 1000, 5e-2),
+                                   ('n_10000', 10000, 5e-3)])
+  def test_kl_divergence(self, num_samples, kl_tolerance):
+    """Compute KL divergence betwen empirical & true distribution."""
+    scale = 10
+    sq_sigma = scale * scale
+    drawn_samples = discrete_gaussian_utils.sample_discrete_gaussian(
+        scale=scale, shape=(num_samples,))
+    drawn_samples = self.evaluate(drawn_samples)
+    value_counts = collections.Counter(drawn_samples)
+
+    kl = 0
+    norm_const = dgauss_normalizing_constant(sq_sigma)
+
+    for value, count in value_counts.items():
+      kl += count * (
+          math.log(count * norm_const / num_samples) + value * value /
+          (2.0 * sq_sigma))
+
+    kl /= num_samples
+    self.assertLess(kl, kl_tolerance)
+
+
+def exact_sampler(scale, num_samples, seed=EXACT_SAMPLER_SEED):
+  """Implementation of the exact discrete gaussian distribution sampler.
+
+  Source: https://arxiv.org/pdf/2004.00010.pdf.
+
+  Args:
+    scale: The scale of the discrete Gaussian.
+    num_samples: The number of samples to generate.
+    seed: The seed for the random number generator to reproduce samples.
+
+  Returns:
+    A numpy array of discrete Gaussian samples.
+  """
+
+  def randrange(a, rng):
+    return rng.randrange(a)
+
+  def bern_em1(rng):
+    """Sample from Bernoulli(exp(-1))."""
+    k = 2
+    while True:
+      if randrange(k, rng) == 0:  # if Bernoulli(1/k)==1
+        k = k + 1
+      else:
+        return k % 2
+
+  def bern_emab1(a, b, rng):
+    """Sample from Bernoulli(exp(-a/b)), assuming 0 <= a <= b."""
+    assert isinstance(a, int)
+    assert isinstance(b, int)
+    assert 0 <= a <= b
+    k = 1
+    while True:
+      if randrange(b, rng) < a and randrange(k, rng) == 0:  # if Bern(a/b/k)==1
+        k = k + 1
+      else:
+        return k % 2
+
+  def bern_emab(a, b, rng):
+    """Sample from Bernoulli(exp(-a/b)), allowing a > b."""
+    while a > b:
+      if bern_em1(rng) == 0:
+        return 0
+      a = a - b
+    return bern_emab1(a, b, rng)
+
+  def geometric(t, rng):
+    """Sample from geometric(1-exp(-1/t))."""
+    assert isinstance(t, int)
+    assert t > 0
+    while True:
+      u = randrange(t, rng)
+      if bern_emab1(u, t, rng) == 1:
+        while bern_em1(rng) == 1:
+          u = u + t
+        return u
+
+  def dlap(t, rng):
+    """Sample from discrete Laplace with scale t.
+
+    Pr[x] = exp(-|x|/t) * (exp(1/t)-1)/(exp(1/t)+1). Supported on integers.
+
+    Args:
+      t: The scale.
+      rng: The random number generator.
+
+    Returns:
+      A discrete Laplace sample.
+    """
+    assert isinstance(t, int)
+    assert t > 0
+    while True:
+      u = geometric(t, rng)
+      b = randrange(2, rng)
+      if b == 1:
+        return u
+      elif u > 0:
+        return -u
+
+  def floorsqrt(x):
+    """Compute floor(sqrt(x)) exactly."""
+    assert x >= 0
+    a = 0  # maintain a^2<=x.
+    b = 1  # maintain b^2>x.
+    while b * b <= x:
+      b = 2 * b
+    # Do binary search.
+    while a + 1 < b:
+      c = (a + b) // 2
+      if c * c <= x:
+        a = c
+      else:
+        b = c
+    return a
+
+  def dgauss(ss, num, rng):
+    """Sample from discrete Gaussian.
+
+    Args:
+      ss: Variance proxy, squared scale, sigma^2.
+      num: The number of samples to generate.
+      rng: The random number generator.
+
+    Returns:
+      A list of discrete Gaussian samples.
+    """
+    ss = fractions.Fraction(ss)  # cast to rational for exact arithmetic
+    assert ss > 0
+    t = floorsqrt(ss) + 1
+    results = []
+    trials = 0
+    while len(results) < num:
+      trials = trials + 1
+      y = dlap(t, rng)
+      z = (abs(y) - ss / t)**2 / (2 * ss)
+      if bern_emab(z.numerator, z.denominator, rng) == 1:
+        results.append(y)
+    return results, t, trials
+
+  rng = random.Random(seed)
+  return np.array(dgauss(scale * scale, num_samples, rng)[0])
+
+
+def dgauss_normalizing_constant(sigma_sq):
+  """Compute the normalizing constant of the discrete Gaussian.
+
+  Source: https://arxiv.org/pdf/2004.00010.pdf.
+
+  Args:
+    sigma_sq: Variance proxy, squared scale, sigma^2.
+
+  Returns:
+    The normalizing constant.
+  """
+  original = None
+  poisson = None
+  if sigma_sq <= 1:
+    original = 0
+    x = 1000
+    while x > 0:
+      original = original + math.exp(-x * x / (2.0 * sigma_sq))
+      x = x - 1
+    original = 2 * original + 1
+
+  if sigma_sq * 100 >= 1:
+    poisson = 0
+    y = 1000
+    while y > 0:
+      poisson = poisson + math.exp(-math.pi * math.pi * sigma_sq * 2 * y * y)
+      y = y - 1
+    poisson = math.sqrt(2 * math.pi * sigma_sq) * (1 + 2 * poisson)
+
+  if poisson is None:
+    return original
+  if original is None:
+    return poisson
+
+  scale = max(1, math.sqrt(2 * math.pi * sigma_sq))
+  precision = 1e-15
+  assert -precision * scale <= original - poisson <= precision * scale
+  return (original + poisson) / 2
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py
+++ b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py
@ -0,0 +1,114 @@
+# Copyright 2021, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Implements DPQuery interface for distributed discrete Gaussian mechanism."""
+
+import collections
+
+import tensorflow as tf
+from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils
+from tensorflow_privacy.privacy.dp_query import dp_query
+
+
+class DistributedDiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery):
+  """Implements DPQuery for discrete distributed Gaussian sum queries.
+
+  For each local record, we check the L2 norm bound and add discrete Gaussian
+  noise. In particular, this DPQuery does not perform L2 norm clipping and the
+  norms of the input records are expected to be bounded.
+  """
+
+  # pylint: disable=invalid-name
+  _GlobalState = collections.namedtuple('_GlobalState',
+                                        ['l2_norm_bound', 'local_stddev'])
+
+  # pylint: disable=invalid-name
+  _SampleParams = collections.namedtuple('_SampleParams',
+                                         ['l2_norm_bound', 'local_stddev'])
+
+  def __init__(self, l2_norm_bound, local_stddev):
+    """Initializes the DistributedDiscreteGaussianSumQuery.
+
+    Args:
+      l2_norm_bound: The L2 norm bound to verify for each record.
+      local_stddev: The scale/stddev of the local discrete Gaussian noise.
+    """
+    self._l2_norm_bound = l2_norm_bound
+    self._local_stddev = local_stddev
+
+  def set_ledger(self, ledger):
+    del ledger  # Unused.
+    raise NotImplementedError('Ledger has not yet been implemented for'
+                              'DistributedDiscreteGaussianSumQuery!')
+
+  def initial_global_state(self):
+    return self._GlobalState(
+        tf.cast(self._l2_norm_bound, tf.float32),
+        tf.cast(self._local_stddev, tf.float32))
+
+  def derive_sample_params(self, global_state):
+    return self._SampleParams(global_state.l2_norm_bound,
+                              global_state.local_stddev)
+
+  def _add_local_noise(self, record, local_stddev, shares=1):
+    """Add local discrete Gaussian noise to the record.
+
+    Args:
+      record: The record to which we generate and add local noise.
+      local_stddev: The scale/stddev of the local discrete Gaussian noise.
+      shares: Number of shares of local noise to generate. Should be 1 for each
+        record. This can be useful when we want to generate multiple noise
+        shares at once.
+
+    Returns:
+      The record with local noise added.
+    """
+    # Round up the noise as the TF discrete Gaussian sampler only takes
+    # integer noise stddevs for now.
+    ceil_local_stddev = tf.cast(tf.math.ceil(local_stddev), tf.int32)
+
+    def add_noise(v):
+      # Adds an extra dimension for `shares` number of draws.
+      shape = tf.concat([[shares], tf.shape(v)], axis=0)
+      dgauss_noise = discrete_gaussian_utils.sample_discrete_gaussian(
+          scale=ceil_local_stddev, shape=shape, dtype=v.dtype)
+      # Sum across the number of noise shares and add it.
+      noised_v = v + tf.reduce_sum(dgauss_noise, axis=0)
+      # Ensure shape as TF shape inference may fail due to custom noise sampler.
+      noised_v.set_shape(v.shape.as_list())
+      return noised_v
+
+    return tf.nest.map_structure(add_noise, record)
+
+  def preprocess_record(self, params, record):
+    """Check record norm and add noise to the record."""
+    record_as_list = tf.nest.flatten(record)
+    record_as_float_list = [tf.cast(x, tf.float32) for x in record_as_list]
+    tf.nest.map_structure(lambda x: tf.compat.v1.assert_type(x, tf.int32),
+                          record_as_list)
+    dependencies = [
+        tf.compat.v1.assert_less_equal(
+            tf.linalg.global_norm(record_as_float_list),
+            params.l2_norm_bound,
+            message=f'Global L2 norm exceeds {params.l2_norm_bound}.')
+    ]
+    with tf.control_dependencies(dependencies):
+      result = tf.cond(
+          tf.equal(params.local_stddev, 0), lambda: record,
+          lambda: self._add_local_noise(record, params.local_stddev))
+      return result
+
+  def get_noised_result(self, sample_state, global_state):
+    # Note that by directly returning the aggregate, this assumes that there
+    # will not be missing local noise shares during execution.
+    return sample_state, global_state
--- a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py
+++ b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py
@ -0,0 +1,165 @@
+# Copyright 2021, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for DistributedDiscreteGaussianQuery."""
+
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils
+from tensorflow_privacy.privacy.dp_query import distributed_discrete_gaussian_query
+from tensorflow_privacy.privacy.dp_query import test_utils
+
+ddg_sum_query = distributed_discrete_gaussian_query.DistributedDiscreteGaussianSumQuery
+
+
+def silence_tf_error_messages(func):
+  """Decorator that temporarily changes the TF logging levels."""
+
+  def wrapper(*args, **kwargs):
+    cur_verbosity = tf.compat.v1.logging.get_verbosity()
+    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.FATAL)
+    func(*args, **kwargs)
+    tf.compat.v1.logging.set_verbosity(cur_verbosity)  # Reset verbosity.
+
+  return wrapper
+
+
+class DistributedDiscreteGaussianQueryTest(tf.test.TestCase,
+                                           parameterized.TestCase):
+
+  def test_sum_no_noise(self):
+    with self.cached_session() as sess:
+      record1 = tf.constant([2, 0], dtype=tf.int32)
+      record2 = tf.constant([-1, 1], dtype=tf.int32)
+
+      query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0)
+      query_result, _ = test_utils.run_query(query, [record1, record2])
+      result = sess.run(query_result)
+      expected = [1, 1]
+      self.assertAllEqual(result, expected)
+
+  @parameterized.product(sample_size=[1, 3])
+  def test_sum_multiple_shapes(self, sample_size):
+    with self.cached_session() as sess:
+      t1 = tf.constant([2, 0], dtype=tf.int32)
+      t2 = tf.constant([-1, 1, 3], dtype=tf.int32)
+      t3 = tf.constant([-2], dtype=tf.int32)
+      record = [t1, t2, t3]
+      sample = [record] * sample_size
+
+      query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0)
+      query_result, _ = test_utils.run_query(query, sample)
+      expected = [sample_size * t1, sample_size * t2, sample_size * t3]
+      result, expected = sess.run([query_result, expected])
+      # Use `assertAllClose` for nested structures equality (with tolerance=0).
+      self.assertAllClose(result, expected, atol=0)
+
+  @parameterized.product(sample_size=[1, 3])
+  def test_sum_nested_record_structure(self, sample_size):
+    with self.cached_session() as sess:
+      t1 = tf.constant([1, 0], dtype=tf.int32)
+      t2 = tf.constant([1, 1, 1], dtype=tf.int32)
+      t3 = tf.constant([1], dtype=tf.int32)
+      t4 = tf.constant([[1, 1], [1, 1]], dtype=tf.int32)
+      record = [t1, dict(a=t2, b=[t3, (t4, t1)])]
+      sample = [record] * sample_size
+
+      query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0)
+      query_result, _ = test_utils.run_query(query, sample)
+      result = sess.run(query_result)
+
+      s = sample_size
+      expected = [t1 * s, dict(a=t2 * s, b=[t3 * s, (t4 * s, t1 * s)])]
+      # Use `assertAllClose` for nested structures equality (with tolerance=0)
+      self.assertAllClose(result, expected, atol=0)
+
+  def test_sum_raise_on_float_inputs(self):
+    with self.cached_session() as sess:
+      record1 = tf.constant([2, 0], dtype=tf.float32)
+      record2 = tf.constant([-1, 1], dtype=tf.float32)
+      query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0)
+
+      with self.assertRaises(TypeError):
+        query_result, _ = test_utils.run_query(query, [record1, record2])
+        sess.run(query_result)
+
+  @parameterized.product(l2_norm_bound=[0, 3, 10, 14.1])
+  @silence_tf_error_messages
+  def test_sum_raise_on_l2_norm_excess(self, l2_norm_bound):
+    with self.cached_session() as sess:
+      record = tf.constant([10, 10], dtype=tf.int32)
+      query = ddg_sum_query(l2_norm_bound=l2_norm_bound, local_stddev=0.0)
+
+      with self.assertRaises(tf.errors.InvalidArgumentError):
+        query_result, _ = test_utils.run_query(query, [record])
+        sess.run(query_result)
+
+  def test_sum_float_norm_not_rounded(self):
+    """Test that the float L2 norm bound doesn't get rounded/casted to integers."""
+    with self.cached_session() as sess:
+      # A casted/rounded norm bound would be insufficient.
+      l2_norm_bound = 14.2
+      record = tf.constant([10, 10], dtype=tf.int32)
+      query = ddg_sum_query(l2_norm_bound=l2_norm_bound, local_stddev=0.0)
+      query_result, _ = test_utils.run_query(query, [record])
+      result = sess.run(query_result)
+      expected = [10, 10]
+      self.assertAllEqual(result, expected)
+
+  @parameterized.named_parameters([('2_local_stddev_1_record', 2, 1),
+                                   ('10_local_stddev_4_records', 10, 4),
+                                   ('1000_local_stddev_1_record', 1000, 1),
+                                   ('1000_local_stddev_25_records', 1000, 25)])
+  def test_sum_local_noise_shares(self, local_stddev, num_records):
+    """Test the noise level of the sum of discrete Gaussians applied locally.
+
+    The sum of discrete Gaussians is not a discrete Gaussian, but it will be
+    extremely close for sigma >= 2. We will thus compare the aggregated noise
+    to a central discrete Gaussian noise with appropriately scaled stddev with
+    some reasonable tolerance.
+
+    Args:
+      local_stddev: The stddev of the local discrete Gaussian noise.
+      num_records: The number of records to be aggregated.
+    """
+    # Aggregated local noises.
+    num_trials = 1000
+    record = tf.zeros([num_trials], dtype=tf.int32)
+    sample = [record] * num_records
+    query = ddg_sum_query(l2_norm_bound=10.0, local_stddev=local_stddev)
+    query_result, _ = test_utils.run_query(query, sample)
+
+    # Central discrete Gaussian noise.
+    central_stddev = np.sqrt(num_records) * local_stddev
+    central_noise = discrete_gaussian_utils.sample_discrete_gaussian(
+        scale=tf.cast(tf.round(central_stddev), record.dtype),
+        shape=tf.shape(record),
+        dtype=record.dtype)
+
+    agg_noise, central_noise = self.evaluate([query_result, central_noise])
+
+    mean_stddev = central_stddev * np.sqrt(num_trials) / num_trials
+    atol = 3.5 * mean_stddev
+
+    # Use the atol for mean as a rough default atol for stddev/percentile.
+    self.assertAllClose(np.mean(agg_noise), np.mean(central_noise), atol=atol)
+    self.assertAllClose(np.std(agg_noise), np.std(central_noise), atol=atol)
+    self.assertAllClose(
+        np.percentile(agg_noise, [25, 50, 75]),
+        np.percentile(central_noise, [25, 50, 75]),
+        atol=atol)
+
+
+if __name__ == '__main__':
+  tf.test.main()