forked from 626_privacy/tensorflow_privacy
Update the tree aggregation RDP accounting for restarts. This prevents the potential inaccurate usage of the previous implementation for no-restarts.
PiperOrigin-RevId: 406878834
This commit is contained in:
parent
c5cb687507
commit
9757e1bc87
3 changed files with 50 additions and 102 deletions
|
@ -45,7 +45,7 @@ else:
|
||||||
from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy
|
from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy
|
||||||
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogeneous_rdp
|
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogeneous_rdp
|
||||||
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
|
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
|
||||||
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_tree
|
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_tree_restart
|
||||||
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
|
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
|
||||||
|
|
||||||
# DPQuery classes
|
# DPQuery classes
|
||||||
|
|
|
@ -398,34 +398,34 @@ def compute_rdp(q, noise_multiplier, steps, orders):
|
||||||
return rdp * steps
|
return rdp * steps
|
||||||
|
|
||||||
|
|
||||||
def _compute_rdp_tree(sigma, steps_list, max_participation_list, alpha):
|
def _compute_rdp_tree_restart(sigma, steps_list, alpha):
|
||||||
"""Computes RDP of the Tree Aggregation Protocol at order alpha."""
|
"""Computes RDP of the Tree Aggregation Protocol at order alpha."""
|
||||||
if np.isinf(alpha):
|
if np.isinf(alpha):
|
||||||
return np.inf
|
return np.inf
|
||||||
tree_depths = [
|
tree_depths = [
|
||||||
math.floor(math.log2(steps)) + 1 for steps in steps_list if steps > 0
|
math.floor(math.log2(float(steps))) + 1
|
||||||
|
for steps in steps_list
|
||||||
|
if steps > 0
|
||||||
]
|
]
|
||||||
record_occurence = [
|
return alpha * sum(tree_depths) / (2 * sigma**2)
|
||||||
x * y for x, y in zip(max_participation_list, tree_depths)
|
|
||||||
]
|
|
||||||
return alpha * sum(record_occurence) / (2 * sigma**2)
|
|
||||||
|
|
||||||
|
|
||||||
def compute_rdp_tree(
|
def compute_rdp_tree_restart(
|
||||||
noise_multiplier: float, steps_list: Union[float, Collection[float]],
|
noise_multiplier: float, steps_list: Union[int, Collection[int]],
|
||||||
max_participation_list: Union[int, Collection[int]],
|
orders: Union[float, Collection[float]]) -> Union[float, Collection[float]]:
|
||||||
orders: Union[float, Collection[float]]) -> Collection[float]:
|
|
||||||
"""Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism.
|
"""Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism.
|
||||||
|
|
||||||
|
This function implements the accounting when the tree is restarted at every
|
||||||
|
epoch. See appendix of
|
||||||
|
"Practical and Private (Deep) Learning without Sampling or Shuffling"
|
||||||
|
https://arxiv.org/abs/2103.00039.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
noise_multiplier: A non-negative float representing the ratio of the
|
noise_multiplier: A non-negative float representing the ratio of the
|
||||||
standard deviation of the Gaussian noise to the l2-sensitivity of the
|
standard deviation of the Gaussian noise to the l2-sensitivity of the
|
||||||
function to which it is added.
|
function to which it is added.
|
||||||
steps_list: A scalar or a list of non-negative intergers representing the
|
steps_list: A scalar or a list of non-negative intergers representing the
|
||||||
number of steps between tree restarts.
|
number of steps per epoch (between two restarts).
|
||||||
max_participation_list: A scalar or a list of positive integers representing
|
|
||||||
maximum number of times a sample may appear between tree restarts. The
|
|
||||||
type (scalar/list) of `max_participation_list` should match `steps_list`.
|
|
||||||
orders: An array (or a scalar) of RDP orders.
|
orders: An array (or a scalar) of RDP orders.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
@ -442,46 +442,19 @@ def compute_rdp_tree(
|
||||||
"steps_list must be a non-empty list, or a non-zero scalar, got "
|
"steps_list must be a non-empty list, or a non-zero scalar, got "
|
||||||
f"{steps_list}.")
|
f"{steps_list}.")
|
||||||
|
|
||||||
if not max_participation_list:
|
if np.isscalar(steps_list):
|
||||||
raise ValueError(
|
|
||||||
"max_participation_list must be a non-empty list, or a non-zero scalar,"
|
|
||||||
f" got {max_participation_list}.")
|
|
||||||
|
|
||||||
if np.isscalar(steps_list) and np.isscalar(max_participation_list):
|
|
||||||
steps_list = [steps_list]
|
steps_list = [steps_list]
|
||||||
max_participation_list = [max_participation_list]
|
|
||||||
elif np.isscalar(steps_list):
|
|
||||||
raise ValueError(
|
|
||||||
"`steps_list` and `max_participation_list` must have the same type, got"
|
|
||||||
f"scalar of steps: {steps_list}, and list of max_participations with "
|
|
||||||
f"length {len(max_participation_list)}.")
|
|
||||||
elif np.isscalar(max_participation_list):
|
|
||||||
raise ValueError(
|
|
||||||
"`steps_list` and `max_participation_list` must have the same type, got"
|
|
||||||
f"scalar of max_participation: {max_participation_list}, and list of "
|
|
||||||
f"steps with length {len(steps_list)}.")
|
|
||||||
elif len(max_participation_list) != len(steps_list):
|
|
||||||
raise ValueError(
|
|
||||||
"`steps_list` and `max_participation_list` must have the same size, got"
|
|
||||||
f"steps length {len(steps_list)}, max_participations length "
|
|
||||||
f"{len(max_participation_list)}")
|
|
||||||
|
|
||||||
for max_participation in max_participation_list:
|
|
||||||
if max_participation <= 0:
|
|
||||||
raise ValueError(
|
|
||||||
f"Max participation must be positive, got {max_participation}")
|
|
||||||
|
|
||||||
for steps in steps_list:
|
for steps in steps_list:
|
||||||
if steps < 0:
|
if steps < 0:
|
||||||
raise ValueError(f"Steps must be non-negative, got {steps_list}")
|
raise ValueError(f"Steps must be non-negative, got {steps_list}")
|
||||||
|
|
||||||
if np.isscalar(orders):
|
if np.isscalar(orders):
|
||||||
rdp = _compute_rdp_tree(noise_multiplier, steps_list,
|
rdp = _compute_rdp_tree_restart(noise_multiplier, steps_list, orders)
|
||||||
max_participation_list, orders)
|
|
||||||
else:
|
else:
|
||||||
rdp = np.array([
|
rdp = np.array([
|
||||||
_compute_rdp_tree(noise_multiplier, steps_list, max_participation_list,
|
_compute_rdp_tree_restart(noise_multiplier, steps_list, alpha)
|
||||||
alpha) for alpha in orders
|
for alpha in orders
|
||||||
])
|
])
|
||||||
|
|
||||||
return rdp
|
return rdp
|
||||||
|
|
|
@ -272,81 +272,56 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
# This tests is based on the StackOverflow setting in "Practical and
|
# This tests is based on the StackOverflow setting in "Practical and
|
||||||
# Private (Deep) Learning without Sampling or Shuffling". The calculated
|
# Private (Deep) Learning without Sampling or Shuffling". The calculated
|
||||||
# epsilon could be better as the method in this package keeps improving.
|
# epsilon could be better as the method in this package keeps improving.
|
||||||
steps_list, target_delta, max_participation = 1600, 1e-6, 1
|
steps_list, target_delta = 1600, 1e-6
|
||||||
rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
|
rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps_list,
|
||||||
max_participation, orders)
|
orders)
|
||||||
new_eps = rdp_accountant.get_privacy_spent(
|
new_eps = rdp_accountant.get_privacy_spent(
|
||||||
orders, rdp, target_delta=target_delta)[0]
|
orders, rdp, target_delta=target_delta)[0]
|
||||||
self.assertLess(new_eps, eps)
|
self.assertLess(new_eps, eps)
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
@parameterized.named_parameters(
|
||||||
('restart4_max2', [400] * 4, [2] * 4),
|
('restart4', [400] * 4),
|
||||||
('restart2_max1', [800] * 2, [1] * 2),
|
('restart2', [800] * 2),
|
||||||
('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5),
|
('adaptive', [10, 400, 400, 400, 390]),
|
||||||
('adaptive', [10, 400, 400, 400, 390], [2, 3, 4, 4, 3]))
|
)
|
||||||
def test_compose_tree_rdp(self, steps_list, max_participation_list):
|
def test_compose_tree_rdp(self, steps_list):
|
||||||
noise_multiplier, orders = 0.1, 1
|
noise_multiplier, orders = 0.1, 1
|
||||||
if np.isscalar(max_participation_list):
|
rdp_list = [
|
||||||
rdp_list = [
|
rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps, orders)
|
||||||
rdp_accountant.compute_rdp_tree(noise_multiplier, steps,
|
for steps in steps_list
|
||||||
max_participation_list, orders)
|
]
|
||||||
for steps in steps_list
|
rdp_composed = rdp_accountant.compute_rdp_tree_restart(
|
||||||
]
|
noise_multiplier, steps_list, orders)
|
||||||
else:
|
|
||||||
rdp_list = [
|
|
||||||
rdp_accountant.compute_rdp_tree(noise_multiplier, steps,
|
|
||||||
max_participation, orders) for steps,
|
|
||||||
max_participation in zip(steps_list, max_participation_list)
|
|
||||||
]
|
|
||||||
rdp_composed = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
|
|
||||||
max_participation_list,
|
|
||||||
orders)
|
|
||||||
self.assertAllClose(rdp_composed, sum(rdp_list), rtol=1e-12)
|
self.assertAllClose(rdp_composed, sum(rdp_list), rtol=1e-12)
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
@parameterized.named_parameters(
|
||||||
('restart4_max2', [400] * 4, [2] * 4),
|
('restart4', [400] * 4),
|
||||||
('restart2_max1', [800] * 2, [1] * 2),
|
('restart2', [800] * 2),
|
||||||
('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5),
|
('adaptive', [10, 400, 400, 400, 390]),
|
||||||
)
|
)
|
||||||
def test_compute_eps_tree_decreasing(self, steps_list, max_participation):
|
def test_compute_eps_tree_decreasing(self, steps_list):
|
||||||
# Test privacy epsilon decreases with noise multiplier increasing when
|
# Test privacy epsilon decreases with noise multiplier increasing when
|
||||||
# keeping other parameters the same.
|
# keeping other parameters the same.
|
||||||
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
|
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
|
||||||
target_delta = 1e-6
|
target_delta = 1e-6
|
||||||
prev_eps = rdp_accountant.compute_rdp_tree(0, steps_list, max_participation,
|
prev_eps = rdp_accountant.compute_rdp_tree_restart(0, steps_list, orders)
|
||||||
orders)
|
|
||||||
for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]:
|
for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]:
|
||||||
rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
|
rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier,
|
||||||
max_participation, orders)
|
steps_list, orders)
|
||||||
eps = rdp_accountant.get_privacy_spent(
|
eps = rdp_accountant.get_privacy_spent(
|
||||||
orders, rdp, target_delta=target_delta)[0]
|
orders, rdp, target_delta=target_delta)[0]
|
||||||
self.assertLess(eps, prev_eps)
|
self.assertLess(eps, prev_eps)
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
@parameterized.named_parameters(
|
||||||
('negative_noise', -1, 3, 2, 1),
|
('negative_noise', -1, 3, 1),
|
||||||
('empty_steps', 1, [], 2, 1),
|
('empty_steps', 1, [], 1),
|
||||||
('empty_part', 1, 1, [], 1),
|
('negative_steps', 1, -3, 1),
|
||||||
('negative_steps', 1, -3, 2, 1),
|
|
||||||
('zero_participation', 1, 3, 0, 1),
|
|
||||||
('negative_participation', 1, 3, -1, 1),
|
|
||||||
)
|
)
|
||||||
def test_compute_rdp_tree_raise(self, noise_multiplier, steps_list,
|
def test_compute_rdp_tree_restart_raise(self, noise_multiplier, steps_list,
|
||||||
max_participation, orders):
|
orders):
|
||||||
with self.assertRaisesRegex(ValueError, 'must be'):
|
with self.assertRaisesRegex(ValueError, 'must be'):
|
||||||
rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
|
rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps_list,
|
||||||
max_participation, orders)
|
orders)
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
|
||||||
('list_scalar', [2], 1),
|
|
||||||
('scalar_list', 2, [1]),
|
|
||||||
('list_length', [2, 3], [1]),
|
|
||||||
('list_length2', [2, 3], [1, 2, 2]),
|
|
||||||
)
|
|
||||||
def test_compute_rdp_tree_raise_input_type(self, steps_list,
|
|
||||||
max_participation):
|
|
||||||
with self.assertRaisesRegex(ValueError, 'must have the same'):
|
|
||||||
rdp_accountant.compute_rdp_tree(
|
|
||||||
0.1, steps_list, max_participation, orders=1)
|
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
@parameterized.named_parameters(
|
||||||
('t100n0.1', 100, 0.1),
|
('t100n0.1', 100, 0.1),
|
||||||
|
@ -354,9 +329,9 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
)
|
)
|
||||||
def test_no_tree_no_sampling(self, total_steps, noise_multiplier):
|
def test_no_tree_no_sampling(self, total_steps, noise_multiplier):
|
||||||
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
|
orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
|
||||||
tree_rdp = rdp_accountant.compute_rdp_tree(noise_multiplier,
|
tree_rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier,
|
||||||
[1] * total_steps,
|
[1] * total_steps,
|
||||||
[1] * total_steps, orders)
|
orders)
|
||||||
rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders)
|
rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders)
|
||||||
self.assertAllClose(tree_rdp, rdp, rtol=1e-12)
|
self.assertAllClose(tree_rdp, rdp, rtol=1e-12)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue