forked from 626_privacy/tensorflow_privacy
Adds discrete Gaussian (sampler and distributed DPQuery) to public TF Privacy.
PiperOrigin-RevId: 387232449
This commit is contained in:
parent
2f862eba9b
commit
e7e11d14d9
5 changed files with 697 additions and 0 deletions
|
@ -43,6 +43,7 @@ else:
|
||||||
# DPQuery classes
|
# DPQuery classes
|
||||||
from tensorflow_privacy.privacy.dp_query.dp_query import DPQuery
|
from tensorflow_privacy.privacy.dp_query.dp_query import DPQuery
|
||||||
from tensorflow_privacy.privacy.dp_query.dp_query import SumAggregationDPQuery
|
from tensorflow_privacy.privacy.dp_query.dp_query import SumAggregationDPQuery
|
||||||
|
from tensorflow_privacy.privacy.dp_query.distributed_discrete_gaussian_query import DistributedDiscreteGaussianSumQuery
|
||||||
from tensorflow_privacy.privacy.dp_query.gaussian_query import GaussianSumQuery
|
from tensorflow_privacy.privacy.dp_query.gaussian_query import GaussianSumQuery
|
||||||
from tensorflow_privacy.privacy.dp_query.nested_query import NestedQuery
|
from tensorflow_privacy.privacy.dp_query.nested_query import NestedQuery
|
||||||
from tensorflow_privacy.privacy.dp_query.no_privacy_query import NoPrivacyAverageQuery
|
from tensorflow_privacy.privacy.dp_query.no_privacy_query import NoPrivacyAverageQuery
|
||||||
|
|
142
tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils.py
Normal file
142
tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils.py
Normal file
|
@ -0,0 +1,142 @@
|
||||||
|
# Copyright 2021, The TensorFlow Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""Util functions for drawing discrete Gaussian samples.
|
||||||
|
|
||||||
|
The following functions implement a vectorized TF version of the sampling
|
||||||
|
algorithm described in the paper:
|
||||||
|
|
||||||
|
The Discrete Gaussian for Differential Privacy
|
||||||
|
https://arxiv.org/pdf/2004.00010.pdf
|
||||||
|
|
||||||
|
Note that the exact sampling implementation should use integer and fractional
|
||||||
|
parameters only. Here, we relax this constraint a bit and use vectorized
|
||||||
|
implementations of Bernoulli and discrete Laplace sampling that can take float
|
||||||
|
parameters.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
import tensorflow_probability as tf_prob
|
||||||
|
|
||||||
|
|
||||||
|
def _sample_discrete_laplace(t, shape):
|
||||||
|
"""Sample from discrete Laplace with scale t.
|
||||||
|
|
||||||
|
This method is based on the observation that sampling from Z ~ Lap(t) is
|
||||||
|
equivalent to sampling X, Y independently from Geo(1 - exp(-1/t)) and take
|
||||||
|
Z = X - Y.
|
||||||
|
|
||||||
|
Note also that tensorflow_probability's geometric sampler is based on floating
|
||||||
|
operations and may possibly be inexact.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
t: The scale of the discrete Laplace distribution.
|
||||||
|
shape: The tensor shape of the tensors drawn.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A tensor of the specified shape filled with random values.
|
||||||
|
"""
|
||||||
|
geometric_probs = 1.0 - tf.exp(-1.0 / tf.cast(t, tf.float64))
|
||||||
|
sampler = tf_prob.distributions.Geometric(probs=geometric_probs)
|
||||||
|
return tf.cast(sampler.sample(shape) - sampler.sample(shape), tf.int64)
|
||||||
|
|
||||||
|
|
||||||
|
def _sample_bernoulli(p):
|
||||||
|
"""Sample from Bernoulli(p)."""
|
||||||
|
return tf_prob.distributions.Bernoulli(probs=p, dtype=tf.int64).sample()
|
||||||
|
|
||||||
|
|
||||||
|
def _check_input_args(scale, shape, dtype):
|
||||||
|
"""Checks the input args to the discrete Gaussian sampler."""
|
||||||
|
if tf.as_dtype(dtype) not in (tf.int32, tf.int64):
|
||||||
|
raise ValueError(
|
||||||
|
f'Only tf.int32 and tf.int64 are supported. Found dtype `{dtype}`.')
|
||||||
|
|
||||||
|
checks = [
|
||||||
|
tf.compat.v1.assert_non_negative(scale),
|
||||||
|
tf.compat.v1.assert_integer(scale)
|
||||||
|
]
|
||||||
|
with tf.control_dependencies(checks):
|
||||||
|
return tf.identity(scale), shape, dtype
|
||||||
|
|
||||||
|
|
||||||
|
def _int_square(value):
|
||||||
|
"""Avoids the TF op `Square(T=...)` for ints as sampling can happen on clients."""
|
||||||
|
return (value - 1) * (value + 1) + 1
|
||||||
|
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def _sample_discrete_gaussian_helper(scale, shape, dtype):
|
||||||
|
"""Draw samples from discrete Gaussian, assuming scale >= 0."""
|
||||||
|
scale = tf.cast(scale, tf.int64)
|
||||||
|
sq_scale = _int_square(scale)
|
||||||
|
|
||||||
|
# Scale for discrete Laplace. The sampling algorithm should be correct
|
||||||
|
# for any discrete Laplace scale, and the original paper uses
|
||||||
|
# `dlap_scale = floor(scale) + 1`. Here we use `dlap_scale = scale` (where
|
||||||
|
# input `scale` is restricted to integers >= 1) to simplify the fraction
|
||||||
|
# below. It turns out that for integer scales >= 1, `dlap_scale = scale` gives
|
||||||
|
# a good minimum success rate of ~70%, allowing a small oversampling factor.
|
||||||
|
dlap_scale = scale
|
||||||
|
oversample_factor = 1.5
|
||||||
|
|
||||||
|
# Draw at least some samples in case we got unlucky with small input shape.
|
||||||
|
min_n = 1000
|
||||||
|
target_n = tf.reduce_prod(tf.cast(shape, tf.int64))
|
||||||
|
oversample_n = oversample_factor * tf.cast(target_n, tf.float32)
|
||||||
|
draw_n = tf.maximum(min_n, tf.cast(oversample_n, tf.int32))
|
||||||
|
|
||||||
|
accepted_n = tf.constant(0, dtype=target_n.dtype)
|
||||||
|
result = tf.zeros((0,), dtype=tf.int64)
|
||||||
|
|
||||||
|
while accepted_n < target_n:
|
||||||
|
# Since the number of samples could be different in every retry, we need to
|
||||||
|
# manually specify the shape info for TF.
|
||||||
|
tf.autograph.experimental.set_loop_options(
|
||||||
|
shape_invariants=[(result, tf.TensorShape([None]))])
|
||||||
|
# Draw samples.
|
||||||
|
samples = _sample_discrete_laplace(dlap_scale, shape=(draw_n,))
|
||||||
|
z_numer = _int_square(tf.abs(samples) - scale)
|
||||||
|
z_denom = 2 * sq_scale
|
||||||
|
bern_probs = tf.exp(-1.0 * tf.divide(z_numer, z_denom))
|
||||||
|
accept = _sample_bernoulli(bern_probs)
|
||||||
|
# Keep successful samples and increment counter.
|
||||||
|
accepted_samples = samples[tf.equal(accept, 1)]
|
||||||
|
accepted_n += tf.cast(tf.size(accepted_samples), accepted_n.dtype)
|
||||||
|
result = tf.concat([result, accepted_samples], axis=0)
|
||||||
|
# Reduce the number of draws for any retries.
|
||||||
|
draw_n = tf.cast(target_n - accepted_n, tf.float32) * oversample_factor
|
||||||
|
draw_n = tf.maximum(min_n, tf.cast(draw_n, tf.int32))
|
||||||
|
|
||||||
|
return tf.cast(tf.reshape(result[:target_n], shape), dtype)
|
||||||
|
|
||||||
|
|
||||||
|
def sample_discrete_gaussian(scale, shape, dtype=tf.int32):
|
||||||
|
"""Draws (possibly inexact) samples from the discrete Gaussian distribution.
|
||||||
|
|
||||||
|
We relax some integer constraints to use vectorized implementations of
|
||||||
|
Bernoulli and discrete Laplace sampling. Integer operations are done in
|
||||||
|
tf.int64 as TF does not have direct support for fractions.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
scale: The scale of the discrete Gaussian distribution.
|
||||||
|
shape: The shape of the output tensor.
|
||||||
|
dtype: The type of the output.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A tensor of the specified shape filled with random values.
|
||||||
|
"""
|
||||||
|
scale, shape, dtype = _check_input_args(scale, shape, dtype)
|
||||||
|
return tf.cond(
|
||||||
|
tf.equal(scale, 0), lambda: tf.zeros(shape, dtype),
|
||||||
|
lambda: _sample_discrete_gaussian_helper(scale, shape, dtype))
|
|
@ -0,0 +1,275 @@
|
||||||
|
# Copyright 2021, The TensorFlow Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""Tests for discrete_gaussian_utils."""
|
||||||
|
|
||||||
|
import collections
|
||||||
|
import fractions
|
||||||
|
import math
|
||||||
|
import random
|
||||||
|
|
||||||
|
from absl.testing import parameterized
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils
|
||||||
|
|
||||||
|
EXACT_SAMPLER_SEED = 4242
|
||||||
|
|
||||||
|
|
||||||
|
class DiscreteGaussianUtilsTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
|
|
||||||
|
@parameterized.product(dtype=[tf.bool, tf.float32, tf.float64])
|
||||||
|
def test_raise_on_bad_dtype(self, dtype):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
_ = discrete_gaussian_utils.sample_discrete_gaussian(1, (1,), dtype)
|
||||||
|
|
||||||
|
def test_raise_on_negative_scale(self):
|
||||||
|
with self.assertRaises(tf.errors.InvalidArgumentError):
|
||||||
|
_ = discrete_gaussian_utils.sample_discrete_gaussian(-10, (1,))
|
||||||
|
|
||||||
|
def test_raise_on_float_scale(self):
|
||||||
|
with self.assertRaises(TypeError):
|
||||||
|
_ = discrete_gaussian_utils.sample_discrete_gaussian(3.14, (1,))
|
||||||
|
|
||||||
|
@parameterized.product(shape=[(), (1,), (100,), (2, 2), (3, 3, 3),
|
||||||
|
(4, 1, 1, 1)])
|
||||||
|
def test_shapes(self, shape):
|
||||||
|
samples = discrete_gaussian_utils.sample_discrete_gaussian(100, shape)
|
||||||
|
samples = self.evaluate(samples)
|
||||||
|
self.assertAllEqual(samples.shape, shape)
|
||||||
|
|
||||||
|
@parameterized.product(dtype=[tf.int32, tf.int64])
|
||||||
|
def test_dtypes(self, dtype):
|
||||||
|
samples = discrete_gaussian_utils.sample_discrete_gaussian(1, (10,), dtype)
|
||||||
|
samples = self.evaluate(samples)
|
||||||
|
# Convert output np dtypes to tf dtypes.
|
||||||
|
self.assertEqual(tf.as_dtype(samples.dtype), dtype)
|
||||||
|
|
||||||
|
def test_zero_noise(self):
|
||||||
|
scale = 0
|
||||||
|
shape = (100,)
|
||||||
|
dtype = tf.int32
|
||||||
|
samples = discrete_gaussian_utils.sample_discrete_gaussian(
|
||||||
|
scale, shape, dtype=dtype)
|
||||||
|
samples = self.evaluate(samples)
|
||||||
|
self.assertAllEqual(samples, tf.zeros(shape, dtype=dtype))
|
||||||
|
|
||||||
|
@parameterized.named_parameters([('small_scale_small_n', 10, 2000, 1, 2),
|
||||||
|
('small_scale_large_n', 10, 5000, 1, 1),
|
||||||
|
('large_scale_small_n', 50, 2000, 2, 5),
|
||||||
|
('large_scale_large_n', 50, 5000, 2, 3)])
|
||||||
|
def test_match_exact_sampler(self, scale, num_samples, mean_std_atol,
|
||||||
|
percentile_atol):
|
||||||
|
true_samples = exact_sampler(scale, num_samples)
|
||||||
|
drawn_samples = discrete_gaussian_utils.sample_discrete_gaussian(
|
||||||
|
scale=scale, shape=(num_samples,))
|
||||||
|
drawn_samples = self.evaluate(drawn_samples)
|
||||||
|
|
||||||
|
# Check mean, std, and percentiles.
|
||||||
|
self.assertAllClose(
|
||||||
|
np.mean(true_samples), np.mean(drawn_samples), atol=mean_std_atol)
|
||||||
|
self.assertAllClose(
|
||||||
|
np.std(true_samples), np.std(drawn_samples), atol=mean_std_atol)
|
||||||
|
self.assertAllClose(
|
||||||
|
np.percentile(true_samples, [10, 30, 50, 70, 90]),
|
||||||
|
np.percentile(drawn_samples, [10, 30, 50, 70, 90]),
|
||||||
|
atol=percentile_atol)
|
||||||
|
|
||||||
|
@parameterized.named_parameters([('n_1000', 1000, 5e-2),
|
||||||
|
('n_10000', 10000, 5e-3)])
|
||||||
|
def test_kl_divergence(self, num_samples, kl_tolerance):
|
||||||
|
"""Compute KL divergence betwen empirical & true distribution."""
|
||||||
|
scale = 10
|
||||||
|
sq_sigma = scale * scale
|
||||||
|
drawn_samples = discrete_gaussian_utils.sample_discrete_gaussian(
|
||||||
|
scale=scale, shape=(num_samples,))
|
||||||
|
drawn_samples = self.evaluate(drawn_samples)
|
||||||
|
value_counts = collections.Counter(drawn_samples)
|
||||||
|
|
||||||
|
kl = 0
|
||||||
|
norm_const = dgauss_normalizing_constant(sq_sigma)
|
||||||
|
|
||||||
|
for value, count in value_counts.items():
|
||||||
|
kl += count * (
|
||||||
|
math.log(count * norm_const / num_samples) + value * value /
|
||||||
|
(2.0 * sq_sigma))
|
||||||
|
|
||||||
|
kl /= num_samples
|
||||||
|
self.assertLess(kl, kl_tolerance)
|
||||||
|
|
||||||
|
|
||||||
|
def exact_sampler(scale, num_samples, seed=EXACT_SAMPLER_SEED):
|
||||||
|
"""Implementation of the exact discrete gaussian distribution sampler.
|
||||||
|
|
||||||
|
Source: https://arxiv.org/pdf/2004.00010.pdf.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
scale: The scale of the discrete Gaussian.
|
||||||
|
num_samples: The number of samples to generate.
|
||||||
|
seed: The seed for the random number generator to reproduce samples.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A numpy array of discrete Gaussian samples.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def randrange(a, rng):
|
||||||
|
return rng.randrange(a)
|
||||||
|
|
||||||
|
def bern_em1(rng):
|
||||||
|
"""Sample from Bernoulli(exp(-1))."""
|
||||||
|
k = 2
|
||||||
|
while True:
|
||||||
|
if randrange(k, rng) == 0: # if Bernoulli(1/k)==1
|
||||||
|
k = k + 1
|
||||||
|
else:
|
||||||
|
return k % 2
|
||||||
|
|
||||||
|
def bern_emab1(a, b, rng):
|
||||||
|
"""Sample from Bernoulli(exp(-a/b)), assuming 0 <= a <= b."""
|
||||||
|
assert isinstance(a, int)
|
||||||
|
assert isinstance(b, int)
|
||||||
|
assert 0 <= a <= b
|
||||||
|
k = 1
|
||||||
|
while True:
|
||||||
|
if randrange(b, rng) < a and randrange(k, rng) == 0: # if Bern(a/b/k)==1
|
||||||
|
k = k + 1
|
||||||
|
else:
|
||||||
|
return k % 2
|
||||||
|
|
||||||
|
def bern_emab(a, b, rng):
|
||||||
|
"""Sample from Bernoulli(exp(-a/b)), allowing a > b."""
|
||||||
|
while a > b:
|
||||||
|
if bern_em1(rng) == 0:
|
||||||
|
return 0
|
||||||
|
a = a - b
|
||||||
|
return bern_emab1(a, b, rng)
|
||||||
|
|
||||||
|
def geometric(t, rng):
|
||||||
|
"""Sample from geometric(1-exp(-1/t))."""
|
||||||
|
assert isinstance(t, int)
|
||||||
|
assert t > 0
|
||||||
|
while True:
|
||||||
|
u = randrange(t, rng)
|
||||||
|
if bern_emab1(u, t, rng) == 1:
|
||||||
|
while bern_em1(rng) == 1:
|
||||||
|
u = u + t
|
||||||
|
return u
|
||||||
|
|
||||||
|
def dlap(t, rng):
|
||||||
|
"""Sample from discrete Laplace with scale t.
|
||||||
|
|
||||||
|
Pr[x] = exp(-|x|/t) * (exp(1/t)-1)/(exp(1/t)+1). Supported on integers.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
t: The scale.
|
||||||
|
rng: The random number generator.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A discrete Laplace sample.
|
||||||
|
"""
|
||||||
|
assert isinstance(t, int)
|
||||||
|
assert t > 0
|
||||||
|
while True:
|
||||||
|
u = geometric(t, rng)
|
||||||
|
b = randrange(2, rng)
|
||||||
|
if b == 1:
|
||||||
|
return u
|
||||||
|
elif u > 0:
|
||||||
|
return -u
|
||||||
|
|
||||||
|
def floorsqrt(x):
|
||||||
|
"""Compute floor(sqrt(x)) exactly."""
|
||||||
|
assert x >= 0
|
||||||
|
a = 0 # maintain a^2<=x.
|
||||||
|
b = 1 # maintain b^2>x.
|
||||||
|
while b * b <= x:
|
||||||
|
b = 2 * b
|
||||||
|
# Do binary search.
|
||||||
|
while a + 1 < b:
|
||||||
|
c = (a + b) // 2
|
||||||
|
if c * c <= x:
|
||||||
|
a = c
|
||||||
|
else:
|
||||||
|
b = c
|
||||||
|
return a
|
||||||
|
|
||||||
|
def dgauss(ss, num, rng):
|
||||||
|
"""Sample from discrete Gaussian.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ss: Variance proxy, squared scale, sigma^2.
|
||||||
|
num: The number of samples to generate.
|
||||||
|
rng: The random number generator.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of discrete Gaussian samples.
|
||||||
|
"""
|
||||||
|
ss = fractions.Fraction(ss) # cast to rational for exact arithmetic
|
||||||
|
assert ss > 0
|
||||||
|
t = floorsqrt(ss) + 1
|
||||||
|
results = []
|
||||||
|
trials = 0
|
||||||
|
while len(results) < num:
|
||||||
|
trials = trials + 1
|
||||||
|
y = dlap(t, rng)
|
||||||
|
z = (abs(y) - ss / t)**2 / (2 * ss)
|
||||||
|
if bern_emab(z.numerator, z.denominator, rng) == 1:
|
||||||
|
results.append(y)
|
||||||
|
return results, t, trials
|
||||||
|
|
||||||
|
rng = random.Random(seed)
|
||||||
|
return np.array(dgauss(scale * scale, num_samples, rng)[0])
|
||||||
|
|
||||||
|
|
||||||
|
def dgauss_normalizing_constant(sigma_sq):
|
||||||
|
"""Compute the normalizing constant of the discrete Gaussian.
|
||||||
|
|
||||||
|
Source: https://arxiv.org/pdf/2004.00010.pdf.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sigma_sq: Variance proxy, squared scale, sigma^2.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The normalizing constant.
|
||||||
|
"""
|
||||||
|
original = None
|
||||||
|
poisson = None
|
||||||
|
if sigma_sq <= 1:
|
||||||
|
original = 0
|
||||||
|
x = 1000
|
||||||
|
while x > 0:
|
||||||
|
original = original + math.exp(-x * x / (2.0 * sigma_sq))
|
||||||
|
x = x - 1
|
||||||
|
original = 2 * original + 1
|
||||||
|
|
||||||
|
if sigma_sq * 100 >= 1:
|
||||||
|
poisson = 0
|
||||||
|
y = 1000
|
||||||
|
while y > 0:
|
||||||
|
poisson = poisson + math.exp(-math.pi * math.pi * sigma_sq * 2 * y * y)
|
||||||
|
y = y - 1
|
||||||
|
poisson = math.sqrt(2 * math.pi * sigma_sq) * (1 + 2 * poisson)
|
||||||
|
|
||||||
|
if poisson is None:
|
||||||
|
return original
|
||||||
|
if original is None:
|
||||||
|
return poisson
|
||||||
|
|
||||||
|
scale = max(1, math.sqrt(2 * math.pi * sigma_sq))
|
||||||
|
precision = 1e-15
|
||||||
|
assert -precision * scale <= original - poisson <= precision * scale
|
||||||
|
return (original + poisson) / 2
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
tf.test.main()
|
|
@ -0,0 +1,114 @@
|
||||||
|
# Copyright 2021, The TensorFlow Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""Implements DPQuery interface for distributed discrete Gaussian mechanism."""
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils
|
||||||
|
from tensorflow_privacy.privacy.dp_query import dp_query
|
||||||
|
|
||||||
|
|
||||||
|
class DistributedDiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery):
|
||||||
|
"""Implements DPQuery for discrete distributed Gaussian sum queries.
|
||||||
|
|
||||||
|
For each local record, we check the L2 norm bound and add discrete Gaussian
|
||||||
|
noise. In particular, this DPQuery does not perform L2 norm clipping and the
|
||||||
|
norms of the input records are expected to be bounded.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# pylint: disable=invalid-name
|
||||||
|
_GlobalState = collections.namedtuple('_GlobalState',
|
||||||
|
['l2_norm_bound', 'local_stddev'])
|
||||||
|
|
||||||
|
# pylint: disable=invalid-name
|
||||||
|
_SampleParams = collections.namedtuple('_SampleParams',
|
||||||
|
['l2_norm_bound', 'local_stddev'])
|
||||||
|
|
||||||
|
def __init__(self, l2_norm_bound, local_stddev):
|
||||||
|
"""Initializes the DistributedDiscreteGaussianSumQuery.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
l2_norm_bound: The L2 norm bound to verify for each record.
|
||||||
|
local_stddev: The scale/stddev of the local discrete Gaussian noise.
|
||||||
|
"""
|
||||||
|
self._l2_norm_bound = l2_norm_bound
|
||||||
|
self._local_stddev = local_stddev
|
||||||
|
|
||||||
|
def set_ledger(self, ledger):
|
||||||
|
del ledger # Unused.
|
||||||
|
raise NotImplementedError('Ledger has not yet been implemented for'
|
||||||
|
'DistributedDiscreteGaussianSumQuery!')
|
||||||
|
|
||||||
|
def initial_global_state(self):
|
||||||
|
return self._GlobalState(
|
||||||
|
tf.cast(self._l2_norm_bound, tf.float32),
|
||||||
|
tf.cast(self._local_stddev, tf.float32))
|
||||||
|
|
||||||
|
def derive_sample_params(self, global_state):
|
||||||
|
return self._SampleParams(global_state.l2_norm_bound,
|
||||||
|
global_state.local_stddev)
|
||||||
|
|
||||||
|
def _add_local_noise(self, record, local_stddev, shares=1):
|
||||||
|
"""Add local discrete Gaussian noise to the record.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
record: The record to which we generate and add local noise.
|
||||||
|
local_stddev: The scale/stddev of the local discrete Gaussian noise.
|
||||||
|
shares: Number of shares of local noise to generate. Should be 1 for each
|
||||||
|
record. This can be useful when we want to generate multiple noise
|
||||||
|
shares at once.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The record with local noise added.
|
||||||
|
"""
|
||||||
|
# Round up the noise as the TF discrete Gaussian sampler only takes
|
||||||
|
# integer noise stddevs for now.
|
||||||
|
ceil_local_stddev = tf.cast(tf.math.ceil(local_stddev), tf.int32)
|
||||||
|
|
||||||
|
def add_noise(v):
|
||||||
|
# Adds an extra dimension for `shares` number of draws.
|
||||||
|
shape = tf.concat([[shares], tf.shape(v)], axis=0)
|
||||||
|
dgauss_noise = discrete_gaussian_utils.sample_discrete_gaussian(
|
||||||
|
scale=ceil_local_stddev, shape=shape, dtype=v.dtype)
|
||||||
|
# Sum across the number of noise shares and add it.
|
||||||
|
noised_v = v + tf.reduce_sum(dgauss_noise, axis=0)
|
||||||
|
# Ensure shape as TF shape inference may fail due to custom noise sampler.
|
||||||
|
noised_v.set_shape(v.shape.as_list())
|
||||||
|
return noised_v
|
||||||
|
|
||||||
|
return tf.nest.map_structure(add_noise, record)
|
||||||
|
|
||||||
|
def preprocess_record(self, params, record):
|
||||||
|
"""Check record norm and add noise to the record."""
|
||||||
|
record_as_list = tf.nest.flatten(record)
|
||||||
|
record_as_float_list = [tf.cast(x, tf.float32) for x in record_as_list]
|
||||||
|
tf.nest.map_structure(lambda x: tf.compat.v1.assert_type(x, tf.int32),
|
||||||
|
record_as_list)
|
||||||
|
dependencies = [
|
||||||
|
tf.compat.v1.assert_less_equal(
|
||||||
|
tf.linalg.global_norm(record_as_float_list),
|
||||||
|
params.l2_norm_bound,
|
||||||
|
message=f'Global L2 norm exceeds {params.l2_norm_bound}.')
|
||||||
|
]
|
||||||
|
with tf.control_dependencies(dependencies):
|
||||||
|
result = tf.cond(
|
||||||
|
tf.equal(params.local_stddev, 0), lambda: record,
|
||||||
|
lambda: self._add_local_noise(record, params.local_stddev))
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_noised_result(self, sample_state, global_state):
|
||||||
|
# Note that by directly returning the aggregate, this assumes that there
|
||||||
|
# will not be missing local noise shares during execution.
|
||||||
|
return sample_state, global_state
|
|
@ -0,0 +1,165 @@
|
||||||
|
# Copyright 2021, The TensorFlow Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""Tests for DistributedDiscreteGaussianQuery."""
|
||||||
|
|
||||||
|
from absl.testing import parameterized
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils
|
||||||
|
from tensorflow_privacy.privacy.dp_query import distributed_discrete_gaussian_query
|
||||||
|
from tensorflow_privacy.privacy.dp_query import test_utils
|
||||||
|
|
||||||
|
ddg_sum_query = distributed_discrete_gaussian_query.DistributedDiscreteGaussianSumQuery
|
||||||
|
|
||||||
|
|
||||||
|
def silence_tf_error_messages(func):
|
||||||
|
"""Decorator that temporarily changes the TF logging levels."""
|
||||||
|
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
cur_verbosity = tf.compat.v1.logging.get_verbosity()
|
||||||
|
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.FATAL)
|
||||||
|
func(*args, **kwargs)
|
||||||
|
tf.compat.v1.logging.set_verbosity(cur_verbosity) # Reset verbosity.
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
class DistributedDiscreteGaussianQueryTest(tf.test.TestCase,
|
||||||
|
parameterized.TestCase):
|
||||||
|
|
||||||
|
def test_sum_no_noise(self):
|
||||||
|
with self.cached_session() as sess:
|
||||||
|
record1 = tf.constant([2, 0], dtype=tf.int32)
|
||||||
|
record2 = tf.constant([-1, 1], dtype=tf.int32)
|
||||||
|
|
||||||
|
query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0)
|
||||||
|
query_result, _ = test_utils.run_query(query, [record1, record2])
|
||||||
|
result = sess.run(query_result)
|
||||||
|
expected = [1, 1]
|
||||||
|
self.assertAllEqual(result, expected)
|
||||||
|
|
||||||
|
@parameterized.product(sample_size=[1, 3])
|
||||||
|
def test_sum_multiple_shapes(self, sample_size):
|
||||||
|
with self.cached_session() as sess:
|
||||||
|
t1 = tf.constant([2, 0], dtype=tf.int32)
|
||||||
|
t2 = tf.constant([-1, 1, 3], dtype=tf.int32)
|
||||||
|
t3 = tf.constant([-2], dtype=tf.int32)
|
||||||
|
record = [t1, t2, t3]
|
||||||
|
sample = [record] * sample_size
|
||||||
|
|
||||||
|
query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0)
|
||||||
|
query_result, _ = test_utils.run_query(query, sample)
|
||||||
|
expected = [sample_size * t1, sample_size * t2, sample_size * t3]
|
||||||
|
result, expected = sess.run([query_result, expected])
|
||||||
|
# Use `assertAllClose` for nested structures equality (with tolerance=0).
|
||||||
|
self.assertAllClose(result, expected, atol=0)
|
||||||
|
|
||||||
|
@parameterized.product(sample_size=[1, 3])
|
||||||
|
def test_sum_nested_record_structure(self, sample_size):
|
||||||
|
with self.cached_session() as sess:
|
||||||
|
t1 = tf.constant([1, 0], dtype=tf.int32)
|
||||||
|
t2 = tf.constant([1, 1, 1], dtype=tf.int32)
|
||||||
|
t3 = tf.constant([1], dtype=tf.int32)
|
||||||
|
t4 = tf.constant([[1, 1], [1, 1]], dtype=tf.int32)
|
||||||
|
record = [t1, dict(a=t2, b=[t3, (t4, t1)])]
|
||||||
|
sample = [record] * sample_size
|
||||||
|
|
||||||
|
query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0)
|
||||||
|
query_result, _ = test_utils.run_query(query, sample)
|
||||||
|
result = sess.run(query_result)
|
||||||
|
|
||||||
|
s = sample_size
|
||||||
|
expected = [t1 * s, dict(a=t2 * s, b=[t3 * s, (t4 * s, t1 * s)])]
|
||||||
|
# Use `assertAllClose` for nested structures equality (with tolerance=0)
|
||||||
|
self.assertAllClose(result, expected, atol=0)
|
||||||
|
|
||||||
|
def test_sum_raise_on_float_inputs(self):
|
||||||
|
with self.cached_session() as sess:
|
||||||
|
record1 = tf.constant([2, 0], dtype=tf.float32)
|
||||||
|
record2 = tf.constant([-1, 1], dtype=tf.float32)
|
||||||
|
query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0)
|
||||||
|
|
||||||
|
with self.assertRaises(TypeError):
|
||||||
|
query_result, _ = test_utils.run_query(query, [record1, record2])
|
||||||
|
sess.run(query_result)
|
||||||
|
|
||||||
|
@parameterized.product(l2_norm_bound=[0, 3, 10, 14.1])
|
||||||
|
@silence_tf_error_messages
|
||||||
|
def test_sum_raise_on_l2_norm_excess(self, l2_norm_bound):
|
||||||
|
with self.cached_session() as sess:
|
||||||
|
record = tf.constant([10, 10], dtype=tf.int32)
|
||||||
|
query = ddg_sum_query(l2_norm_bound=l2_norm_bound, local_stddev=0.0)
|
||||||
|
|
||||||
|
with self.assertRaises(tf.errors.InvalidArgumentError):
|
||||||
|
query_result, _ = test_utils.run_query(query, [record])
|
||||||
|
sess.run(query_result)
|
||||||
|
|
||||||
|
def test_sum_float_norm_not_rounded(self):
|
||||||
|
"""Test that the float L2 norm bound doesn't get rounded/casted to integers."""
|
||||||
|
with self.cached_session() as sess:
|
||||||
|
# A casted/rounded norm bound would be insufficient.
|
||||||
|
l2_norm_bound = 14.2
|
||||||
|
record = tf.constant([10, 10], dtype=tf.int32)
|
||||||
|
query = ddg_sum_query(l2_norm_bound=l2_norm_bound, local_stddev=0.0)
|
||||||
|
query_result, _ = test_utils.run_query(query, [record])
|
||||||
|
result = sess.run(query_result)
|
||||||
|
expected = [10, 10]
|
||||||
|
self.assertAllEqual(result, expected)
|
||||||
|
|
||||||
|
@parameterized.named_parameters([('2_local_stddev_1_record', 2, 1),
|
||||||
|
('10_local_stddev_4_records', 10, 4),
|
||||||
|
('1000_local_stddev_1_record', 1000, 1),
|
||||||
|
('1000_local_stddev_25_records', 1000, 25)])
|
||||||
|
def test_sum_local_noise_shares(self, local_stddev, num_records):
|
||||||
|
"""Test the noise level of the sum of discrete Gaussians applied locally.
|
||||||
|
|
||||||
|
The sum of discrete Gaussians is not a discrete Gaussian, but it will be
|
||||||
|
extremely close for sigma >= 2. We will thus compare the aggregated noise
|
||||||
|
to a central discrete Gaussian noise with appropriately scaled stddev with
|
||||||
|
some reasonable tolerance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
local_stddev: The stddev of the local discrete Gaussian noise.
|
||||||
|
num_records: The number of records to be aggregated.
|
||||||
|
"""
|
||||||
|
# Aggregated local noises.
|
||||||
|
num_trials = 1000
|
||||||
|
record = tf.zeros([num_trials], dtype=tf.int32)
|
||||||
|
sample = [record] * num_records
|
||||||
|
query = ddg_sum_query(l2_norm_bound=10.0, local_stddev=local_stddev)
|
||||||
|
query_result, _ = test_utils.run_query(query, sample)
|
||||||
|
|
||||||
|
# Central discrete Gaussian noise.
|
||||||
|
central_stddev = np.sqrt(num_records) * local_stddev
|
||||||
|
central_noise = discrete_gaussian_utils.sample_discrete_gaussian(
|
||||||
|
scale=tf.cast(tf.round(central_stddev), record.dtype),
|
||||||
|
shape=tf.shape(record),
|
||||||
|
dtype=record.dtype)
|
||||||
|
|
||||||
|
agg_noise, central_noise = self.evaluate([query_result, central_noise])
|
||||||
|
|
||||||
|
mean_stddev = central_stddev * np.sqrt(num_trials) / num_trials
|
||||||
|
atol = 3.5 * mean_stddev
|
||||||
|
|
||||||
|
# Use the atol for mean as a rough default atol for stddev/percentile.
|
||||||
|
self.assertAllClose(np.mean(agg_noise), np.mean(central_noise), atol=atol)
|
||||||
|
self.assertAllClose(np.std(agg_noise), np.std(central_noise), atol=atol)
|
||||||
|
self.assertAllClose(
|
||||||
|
np.percentile(agg_noise, [25, 50, 75]),
|
||||||
|
np.percentile(central_noise, [25, 50, 75]),
|
||||||
|
atol=atol)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
tf.test.main()
|
Loading…
Reference in a new issue