RDP for tree aggregation. See "Practical and Private (Deep) Learning without Sampling or Shuffling" https://arxiv.org/abs/2103.00039 for more details. See tests for example usage for calculating epsilon.

PiperOrigin-RevId: 394770205
This commit is contained in:
Zheng Xu 2021-09-03 15:42:17 -07:00 committed by A. Unique TensorFlower
parent b7249e6ab2
commit a20cbf9578
2 changed files with 119 additions and 2 deletions

View file

@ -42,6 +42,7 @@ from __future__ import print_function
import math
import sys
from typing import Collection, Union
import numpy as np
from scipy import special
@ -397,6 +398,64 @@ def compute_rdp(q, noise_multiplier, steps, orders):
return rdp * steps
def _compute_rdp_tree(sigma, steps_list, max_participation, alpha):
"""Computes RDP of the Tree Aggregation Protocol at order alpha."""
if np.isinf(alpha):
return np.inf
tree_depths = [
math.floor(math.log2(steps)) + 1 for steps in steps_list if steps > 0
]
return alpha * max_participation * sum(tree_depths) / (2 * sigma**2)
def compute_rdp_tree(
noise_multiplier: float, steps_list: Collection[float],
max_participation: int,
orders: Union[float, Collection[float]]) -> Collection[float]:
"""Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism.
Args:
noise_multiplier: A non-negative float representing the ratio of the
standard deviation of the Gaussian noise to the l2-sensitivity of the
function to which it is added.
steps_list: A list of non-negative intergers representing the number of
steps between tree restarts.
max_participation: A positive integer representing maximum number of times a
sample may appear between tree restarts.
orders: An array (or a scalar) of RDP orders.
Returns:
The RDPs at all orders. Can be `np.inf`.
"""
if noise_multiplier < 0:
raise ValueError(
f"Noise multiplier must be non-negative, got {noise_multiplier}")
elif noise_multiplier == 0:
return np.inf
if max_participation <= 0:
raise ValueError(
f"Max participation must be positive, got {max_participation}")
if not steps_list:
raise ValueError("List of steps must be non-empty.")
for steps in steps_list:
if steps < 0:
raise ValueError(f"List of steps must be non-negative, got {steps_list}")
if np.isscalar(orders):
rdp = _compute_rdp_tree(noise_multiplier, steps_list, max_participation,
orders)
else:
rdp = np.array([
_compute_rdp_tree(noise_multiplier, steps_list, max_participation,
alpha) for alpha in orders
])
return rdp
def compute_rdp_sample_without_replacement(q, noise_multiplier, steps, orders):
"""Compute RDP of Gaussian Mechanism using sampling without replacement.

View file

@ -21,7 +21,6 @@ from __future__ import print_function
import math
import sys
from absl.testing import absltest
from absl.testing import parameterized
from mpmath import exp
from mpmath import inf
@ -265,5 +264,64 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase):
self.assertLessEqual(delta, delta1 + 1e-300)
class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(('eps20', 1.13, 19.74), ('eps2', 8.83, 2.04))
def test_compute_eps_tree(self, noise_multiplier, eps):
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
# This tests is based on the StackOverflow setting in "Practical and
# Private (Deep) Learning without Sampling or Shuffling". The calculated
# epsilon could be better as the method in this package keeps improving.
steps_list, target_delta, max_participation = [1600], 1e-6, 1
rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
max_participation, orders)
new_eps = rdp_accountant.get_privacy_spent(
orders, rdp, target_delta=target_delta)[0]
self.assertLess(new_eps, eps)
@parameterized.named_parameters(
('restart4_max2', [400] * 4, 2),
('restart2_max1', [800] * 2, 1),
('adaptive_max4', [10, 400, 400, 400, 390], 4),
)
def test_compute_eps_tree_decreasing(self, steps_list, max_participation):
# Test privacy epsilon decreases with noise multiplier increasing when
# keeping other parameters the same.
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
target_delta = 1e-6
prev_eps = rdp_accountant.compute_rdp_tree(0, steps_list, max_participation,
orders)
for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]:
rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
max_participation, orders)
eps = rdp_accountant.get_privacy_spent(
orders, rdp, target_delta=target_delta)[0]
self.assertLess(eps, prev_eps)
@parameterized.named_parameters(
('negative_noise', -1, [3], 2, 1),
('empty_steps', 1, [], 2, 1),
('negative_steps', 1, [-3], 2, 1),
('zero_participation', 1, [3], 0, 1),
('negative_participation', 1, [3], -1, 1),
)
def test_compute_rdp_tree_raise(self, noise_multiplier, steps_list,
max_participation, orders):
with self.assertRaisesRegex(ValueError, 'must be'):
rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
max_participation, orders)
@parameterized.named_parameters(
('t100n0.1', 100, 0.1),
('t1000n0.01', 1000, 0.01),
)
def test_no_tree_no_sampling(self, total_steps, noise_multiplier):
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
tree_rdp = rdp_accountant.compute_rdp_tree(noise_multiplier,
[1] * total_steps, 1, orders)
rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders)
self.assertAllClose(tree_rdp, rdp, rtol=1e-12)
if __name__ == '__main__':
absltest.main()
tf.test.main()