Update the tree aggregation RDP accounting for restarts. This prevents the potential inaccurate usage of the previous implementation for no-restarts.

PiperOrigin-RevId: 406878834
This commit is contained in:
Zheng Xu 2021-11-01 11:39:10 -07:00 committed by A. Unique TensorFlower
parent c5cb687507
commit 9757e1bc87
3 changed files with 50 additions and 102 deletions

View file

@ -45,7 +45,7 @@ else:
from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogeneous_rdp
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_tree
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_tree_restart
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
# DPQuery classes

View file

@ -398,34 +398,34 @@ def compute_rdp(q, noise_multiplier, steps, orders):
return rdp * steps
def _compute_rdp_tree(sigma, steps_list, max_participation_list, alpha):
def _compute_rdp_tree_restart(sigma, steps_list, alpha):
"""Computes RDP of the Tree Aggregation Protocol at order alpha."""
if np.isinf(alpha):
return np.inf
tree_depths = [
math.floor(math.log2(steps)) + 1 for steps in steps_list if steps > 0
math.floor(math.log2(float(steps))) + 1
for steps in steps_list
if steps > 0
]
record_occurence = [
x * y for x, y in zip(max_participation_list, tree_depths)
]
return alpha * sum(record_occurence) / (2 * sigma**2)
return alpha * sum(tree_depths) / (2 * sigma**2)
def compute_rdp_tree(
noise_multiplier: float, steps_list: Union[float, Collection[float]],
max_participation_list: Union[int, Collection[int]],
orders: Union[float, Collection[float]]) -> Collection[float]:
def compute_rdp_tree_restart(
noise_multiplier: float, steps_list: Union[int, Collection[int]],
orders: Union[float, Collection[float]]) -> Union[float, Collection[float]]:
"""Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism.
This function implements the accounting when the tree is restarted at every
epoch. See appendix of
"Practical and Private (Deep) Learning without Sampling or Shuffling"
https://arxiv.org/abs/2103.00039.
Args:
noise_multiplier: A non-negative float representing the ratio of the
standard deviation of the Gaussian noise to the l2-sensitivity of the
function to which it is added.
steps_list: A scalar or a list of non-negative intergers representing the
number of steps between tree restarts.
max_participation_list: A scalar or a list of positive integers representing
maximum number of times a sample may appear between tree restarts. The
type (scalar/list) of `max_participation_list` should match `steps_list`.
number of steps per epoch (between two restarts).
orders: An array (or a scalar) of RDP orders.
Returns:
@ -442,46 +442,19 @@ def compute_rdp_tree(
"steps_list must be a non-empty list, or a non-zero scalar, got "
f"{steps_list}.")
if not max_participation_list:
raise ValueError(
"max_participation_list must be a non-empty list, or a non-zero scalar,"
f" got {max_participation_list}.")
if np.isscalar(steps_list) and np.isscalar(max_participation_list):
if np.isscalar(steps_list):
steps_list = [steps_list]
max_participation_list = [max_participation_list]
elif np.isscalar(steps_list):
raise ValueError(
"`steps_list` and `max_participation_list` must have the same type, got"
f"scalar of steps: {steps_list}, and list of max_participations with "
f"length {len(max_participation_list)}.")
elif np.isscalar(max_participation_list):
raise ValueError(
"`steps_list` and `max_participation_list` must have the same type, got"
f"scalar of max_participation: {max_participation_list}, and list of "
f"steps with length {len(steps_list)}.")
elif len(max_participation_list) != len(steps_list):
raise ValueError(
"`steps_list` and `max_participation_list` must have the same size, got"
f"steps length {len(steps_list)}, max_participations length "
f"{len(max_participation_list)}")
for max_participation in max_participation_list:
if max_participation <= 0:
raise ValueError(
f"Max participation must be positive, got {max_participation}")
for steps in steps_list:
if steps < 0:
raise ValueError(f"Steps must be non-negative, got {steps_list}")
if np.isscalar(orders):
rdp = _compute_rdp_tree(noise_multiplier, steps_list,
max_participation_list, orders)
rdp = _compute_rdp_tree_restart(noise_multiplier, steps_list, orders)
else:
rdp = np.array([
_compute_rdp_tree(noise_multiplier, steps_list, max_participation_list,
alpha) for alpha in orders
_compute_rdp_tree_restart(noise_multiplier, steps_list, alpha)
for alpha in orders
])
return rdp

View file

@ -272,81 +272,56 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
# This tests is based on the StackOverflow setting in "Practical and
# Private (Deep) Learning without Sampling or Shuffling". The calculated
# epsilon could be better as the method in this package keeps improving.
steps_list, target_delta, max_participation = 1600, 1e-6, 1
rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
max_participation, orders)
steps_list, target_delta = 1600, 1e-6
rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps_list,
orders)
new_eps = rdp_accountant.get_privacy_spent(
orders, rdp, target_delta=target_delta)[0]
self.assertLess(new_eps, eps)
@parameterized.named_parameters(
('restart4_max2', [400] * 4, [2] * 4),
('restart2_max1', [800] * 2, [1] * 2),
('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5),
('adaptive', [10, 400, 400, 400, 390], [2, 3, 4, 4, 3]))
def test_compose_tree_rdp(self, steps_list, max_participation_list):
('restart4', [400] * 4),
('restart2', [800] * 2),
('adaptive', [10, 400, 400, 400, 390]),
)
def test_compose_tree_rdp(self, steps_list):
noise_multiplier, orders = 0.1, 1
if np.isscalar(max_participation_list):
rdp_list = [
rdp_accountant.compute_rdp_tree(noise_multiplier, steps,
max_participation_list, orders)
rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps, orders)
for steps in steps_list
]
else:
rdp_list = [
rdp_accountant.compute_rdp_tree(noise_multiplier, steps,
max_participation, orders) for steps,
max_participation in zip(steps_list, max_participation_list)
]
rdp_composed = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
max_participation_list,
orders)
rdp_composed = rdp_accountant.compute_rdp_tree_restart(
noise_multiplier, steps_list, orders)
self.assertAllClose(rdp_composed, sum(rdp_list), rtol=1e-12)
@parameterized.named_parameters(
('restart4_max2', [400] * 4, [2] * 4),
('restart2_max1', [800] * 2, [1] * 2),
('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5),
('restart4', [400] * 4),
('restart2', [800] * 2),
('adaptive', [10, 400, 400, 400, 390]),
)
def test_compute_eps_tree_decreasing(self, steps_list, max_participation):
def test_compute_eps_tree_decreasing(self, steps_list):
# Test privacy epsilon decreases with noise multiplier increasing when
# keeping other parameters the same.
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
target_delta = 1e-6
prev_eps = rdp_accountant.compute_rdp_tree(0, steps_list, max_participation,
orders)
prev_eps = rdp_accountant.compute_rdp_tree_restart(0, steps_list, orders)
for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]:
rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
max_participation, orders)
rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier,
steps_list, orders)
eps = rdp_accountant.get_privacy_spent(
orders, rdp, target_delta=target_delta)[0]
self.assertLess(eps, prev_eps)
@parameterized.named_parameters(
('negative_noise', -1, 3, 2, 1),
('empty_steps', 1, [], 2, 1),
('empty_part', 1, 1, [], 1),
('negative_steps', 1, -3, 2, 1),
('zero_participation', 1, 3, 0, 1),
('negative_participation', 1, 3, -1, 1),
('negative_noise', -1, 3, 1),
('empty_steps', 1, [], 1),
('negative_steps', 1, -3, 1),
)
def test_compute_rdp_tree_raise(self, noise_multiplier, steps_list,
max_participation, orders):
def test_compute_rdp_tree_restart_raise(self, noise_multiplier, steps_list,
orders):
with self.assertRaisesRegex(ValueError, 'must be'):
rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
max_participation, orders)
@parameterized.named_parameters(
('list_scalar', [2], 1),
('scalar_list', 2, [1]),
('list_length', [2, 3], [1]),
('list_length2', [2, 3], [1, 2, 2]),
)
def test_compute_rdp_tree_raise_input_type(self, steps_list,
max_participation):
with self.assertRaisesRegex(ValueError, 'must have the same'):
rdp_accountant.compute_rdp_tree(
0.1, steps_list, max_participation, orders=1)
rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps_list,
orders)
@parameterized.named_parameters(
('t100n0.1', 100, 0.1),
@ -354,9 +329,9 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
)
def test_no_tree_no_sampling(self, total_steps, noise_multiplier):
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
tree_rdp = rdp_accountant.compute_rdp_tree(noise_multiplier,
tree_rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier,
[1] * total_steps,
[1] * total_steps, orders)
orders)
rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders)
self.assertAllClose(tree_rdp, rdp, rtol=1e-12)