RDP for tree aggregation. See "Practical and Private (Deep) Learning without Sampling or Shuffling" https://arxiv.org/abs/2103.00039 for more details. See tests for example usage for calculating epsilon.

PiperOrigin-RevId: 394770205
This commit is contained in:
Zheng Xu 2021-09-03 15:42:17 -07:00 committed by A. Unique TensorFlower
parent b7249e6ab2
commit a20cbf9578
2 changed files with 119 additions and 2 deletions

View file

@ -42,6 +42,7 @@ from __future__ import print_function
import math import math
import sys import sys
from typing import Collection, Union
import numpy as np import numpy as np
from scipy import special from scipy import special
@ -397,6 +398,64 @@ def compute_rdp(q, noise_multiplier, steps, orders):
return rdp * steps return rdp * steps
def _compute_rdp_tree(sigma, steps_list, max_participation, alpha):
"""Computes RDP of the Tree Aggregation Protocol at order alpha."""
if np.isinf(alpha):
return np.inf
tree_depths = [
math.floor(math.log2(steps)) + 1 for steps in steps_list if steps > 0
]
return alpha * max_participation * sum(tree_depths) / (2 * sigma**2)
def compute_rdp_tree(
noise_multiplier: float, steps_list: Collection[float],
max_participation: int,
orders: Union[float, Collection[float]]) -> Collection[float]:
"""Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism.
Args:
noise_multiplier: A non-negative float representing the ratio of the
standard deviation of the Gaussian noise to the l2-sensitivity of the
function to which it is added.
steps_list: A list of non-negative intergers representing the number of
steps between tree restarts.
max_participation: A positive integer representing maximum number of times a
sample may appear between tree restarts.
orders: An array (or a scalar) of RDP orders.
Returns:
The RDPs at all orders. Can be `np.inf`.
"""
if noise_multiplier < 0:
raise ValueError(
f"Noise multiplier must be non-negative, got {noise_multiplier}")
elif noise_multiplier == 0:
return np.inf
if max_participation <= 0:
raise ValueError(
f"Max participation must be positive, got {max_participation}")
if not steps_list:
raise ValueError("List of steps must be non-empty.")
for steps in steps_list:
if steps < 0:
raise ValueError(f"List of steps must be non-negative, got {steps_list}")
if np.isscalar(orders):
rdp = _compute_rdp_tree(noise_multiplier, steps_list, max_participation,
orders)
else:
rdp = np.array([
_compute_rdp_tree(noise_multiplier, steps_list, max_participation,
alpha) for alpha in orders
])
return rdp
def compute_rdp_sample_without_replacement(q, noise_multiplier, steps, orders): def compute_rdp_sample_without_replacement(q, noise_multiplier, steps, orders):
"""Compute RDP of Gaussian Mechanism using sampling without replacement. """Compute RDP of Gaussian Mechanism using sampling without replacement.

View file

@ -21,7 +21,6 @@ from __future__ import print_function
import math import math
import sys import sys
from absl.testing import absltest
from absl.testing import parameterized from absl.testing import parameterized
from mpmath import exp from mpmath import exp
from mpmath import inf from mpmath import inf
@ -265,5 +264,64 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase):
self.assertLessEqual(delta, delta1 + 1e-300) self.assertLessEqual(delta, delta1 + 1e-300)
class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(('eps20', 1.13, 19.74), ('eps2', 8.83, 2.04))
def test_compute_eps_tree(self, noise_multiplier, eps):
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
# This tests is based on the StackOverflow setting in "Practical and
# Private (Deep) Learning without Sampling or Shuffling". The calculated
# epsilon could be better as the method in this package keeps improving.
steps_list, target_delta, max_participation = [1600], 1e-6, 1
rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
max_participation, orders)
new_eps = rdp_accountant.get_privacy_spent(
orders, rdp, target_delta=target_delta)[0]
self.assertLess(new_eps, eps)
@parameterized.named_parameters(
('restart4_max2', [400] * 4, 2),
('restart2_max1', [800] * 2, 1),
('adaptive_max4', [10, 400, 400, 400, 390], 4),
)
def test_compute_eps_tree_decreasing(self, steps_list, max_participation):
# Test privacy epsilon decreases with noise multiplier increasing when
# keeping other parameters the same.
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
target_delta = 1e-6
prev_eps = rdp_accountant.compute_rdp_tree(0, steps_list, max_participation,
orders)
for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]:
rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
max_participation, orders)
eps = rdp_accountant.get_privacy_spent(
orders, rdp, target_delta=target_delta)[0]
self.assertLess(eps, prev_eps)
@parameterized.named_parameters(
('negative_noise', -1, [3], 2, 1),
('empty_steps', 1, [], 2, 1),
('negative_steps', 1, [-3], 2, 1),
('zero_participation', 1, [3], 0, 1),
('negative_participation', 1, [3], -1, 1),
)
def test_compute_rdp_tree_raise(self, noise_multiplier, steps_list,
max_participation, orders):
with self.assertRaisesRegex(ValueError, 'must be'):
rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
max_participation, orders)
@parameterized.named_parameters(
('t100n0.1', 100, 0.1),
('t1000n0.01', 1000, 0.01),
)
def test_no_tree_no_sampling(self, total_steps, noise_multiplier):
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
tree_rdp = rdp_accountant.compute_rdp_tree(noise_multiplier,
[1] * total_steps, 1, orders)
rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders)
self.assertAllClose(tree_rdp, rdp, rtol=1e-12)
if __name__ == '__main__': if __name__ == '__main__':
absltest.main() tf.test.main()