Update the tree aggregation RDP accounting for restarts. This prevents the potential inaccurate usage of the previous implementation for no-restarts.

PiperOrigin-RevId: 406878834
2021-11-01 11:39:10 -07:00 · 2021-11-01 11:39:10 -07:00 · 9757e1bc87
commit 9757e1bc87
parent c5cb687507
3 changed files with 50 additions and 102 deletions
--- a/tensorflow_privacy/init.py
+++ b/tensorflow_privacy/init.py
@ -45,7 +45,7 @@ else:
  from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy
  from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogeneous_rdp
  from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
-  from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_tree
+  from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_tree_restart
  from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
  # DPQuery classes
--- a/tensorflow_privacy/privacy/analysis/rdp_accountant.py
+++ b/tensorflow_privacy/privacy/analysis/rdp_accountant.py
@ -398,34 +398,34 @@ def compute_rdp(q, noise_multiplier, steps, orders):
  return rdp * steps
-def _compute_rdp_tree(sigma, steps_list, max_participation_list, alpha):
+def _compute_rdp_tree_restart(sigma, steps_list, alpha):
  """Computes RDP of the Tree Aggregation Protocol at order alpha."""
  if np.isinf(alpha):
    return np.inf
  tree_depths = [
-      math.floor(math.log2(steps)) + 1 for steps in steps_list if steps > 0
+      math.floor(math.log2(float(steps))) + 1
      for steps in steps_list
      if steps > 0
  ]
-  record_occurence = [
+  return alpha * sum(tree_depths) / (2 * sigma**2)
      x * y for x, y in zip(max_participation_list, tree_depths)
  ]
  return alpha * sum(record_occurence) / (2 * sigma**2)
-def compute_rdp_tree(
+def compute_rdp_tree_restart(
-    noise_multiplier: float, steps_list: Union[float, Collection[float]],
+    noise_multiplier: float, steps_list: Union[int, Collection[int]],
-    max_participation_list: Union[int, Collection[int]],
+    orders: Union[float, Collection[float]]) -> Union[float, Collection[float]]:
    orders: Union[float, Collection[float]]) -> Collection[float]:
  """Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism.
  This function implements the accounting when the tree is restarted at every
  epoch. See appendix of
  "Practical and Private (Deep) Learning without Sampling or Shuffling"
  https://arxiv.org/abs/2103.00039.
  Args:
    noise_multiplier: A non-negative float representing the ratio of the
      standard deviation of the Gaussian noise to the l2-sensitivity of the
      function to which it is added.
    steps_list: A scalar or a list of non-negative intergers representing the
-      number of steps between tree restarts.
+      number of steps per epoch (between two restarts).
    max_participation_list: A scalar or a list of positive integers representing
      maximum number of times a sample may appear between tree restarts. The
      type (scalar/list) of `max_participation_list` should match `steps_list`.
    orders: An array (or a scalar) of RDP orders.
  Returns:
@ -442,46 +442,19 @@ def compute_rdp_tree(
        "steps_list must be a non-empty list, or a non-zero scalar, got "
        f"{steps_list}.")
-  if not max_participation_list:
+  if np.isscalar(steps_list):
    raise ValueError(
        "max_participation_list must be a non-empty list, or a non-zero scalar,"
        f" got {max_participation_list}.")
  if np.isscalar(steps_list) and np.isscalar(max_participation_list):
    steps_list = [steps_list]
    max_participation_list = [max_participation_list]
  elif np.isscalar(steps_list):
    raise ValueError(
        "`steps_list` and `max_participation_list` must have the same type, got"
        f"scalar of steps: {steps_list}, and list of max_participations with "
        f"length {len(max_participation_list)}.")
  elif np.isscalar(max_participation_list):
    raise ValueError(
        "`steps_list` and `max_participation_list` must have the same type, got"
        f"scalar of max_participation: {max_participation_list}, and list of "
        f"steps with length {len(steps_list)}.")
  elif len(max_participation_list) != len(steps_list):
    raise ValueError(
        "`steps_list` and `max_participation_list` must have the same size, got"
        f"steps length {len(steps_list)}, max_participations length "
        f"{len(max_participation_list)}")
  for max_participation in max_participation_list:
    if max_participation <= 0:
      raise ValueError(
          f"Max participation must be positive, got {max_participation}")
  for steps in steps_list:
    if steps < 0:
      raise ValueError(f"Steps must be non-negative, got {steps_list}")
  if np.isscalar(orders):
-    rdp = _compute_rdp_tree(noise_multiplier, steps_list,
+    rdp = _compute_rdp_tree_restart(noise_multiplier, steps_list, orders)
                            max_participation_list, orders)
  else:
    rdp = np.array([
-        _compute_rdp_tree(noise_multiplier, steps_list, max_participation_list,
+        _compute_rdp_tree_restart(noise_multiplier, steps_list, alpha)
-                          alpha) for alpha in orders
+        for alpha in orders
    ])
  return rdp
--- a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py
+++ b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py
@ -272,81 +272,56 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
    # This tests is based on the StackOverflow setting in "Practical and
    # Private (Deep) Learning without Sampling or Shuffling". The calculated
    # epsilon could be better as the method in this package keeps improving.
-    steps_list, target_delta, max_participation = 1600, 1e-6, 1
+    steps_list, target_delta = 1600, 1e-6
-    rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
+    rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps_list,
-                                          max_participation, orders)
+                                                  orders)
    new_eps = rdp_accountant.get_privacy_spent(
        orders, rdp, target_delta=target_delta)[0]
    self.assertLess(new_eps, eps)
  @parameterized.named_parameters(
-      ('restart4_max2', [400] * 4, [2] * 4),
+      ('restart4', [400] * 4),
-      ('restart2_max1', [800] * 2, [1] * 2),
+      ('restart2', [800] * 2),
-      ('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5),
+      ('adaptive', [10, 400, 400, 400, 390]),
-      ('adaptive', [10, 400, 400, 400, 390], [2, 3, 4, 4, 3]))
+  )
-  def test_compose_tree_rdp(self, steps_list, max_participation_list):
+  def test_compose_tree_rdp(self, steps_list):
    noise_multiplier, orders = 0.1, 1
-    if np.isscalar(max_participation_list):
+    rdp_list = [
-      rdp_list = [
+        rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps, orders)
-          rdp_accountant.compute_rdp_tree(noise_multiplier, steps,
+        for steps in steps_list
-                                          max_participation_list, orders)
+    ]
-          for steps in steps_list
+    rdp_composed = rdp_accountant.compute_rdp_tree_restart(
-      ]
+        noise_multiplier, steps_list, orders)
    else:
      rdp_list = [
          rdp_accountant.compute_rdp_tree(noise_multiplier, steps,
                                          max_participation, orders) for steps,
          max_participation in zip(steps_list, max_participation_list)
      ]
    rdp_composed = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
                                                   max_participation_list,
                                                   orders)
    self.assertAllClose(rdp_composed, sum(rdp_list), rtol=1e-12)
  @parameterized.named_parameters(
-      ('restart4_max2', [400] * 4, [2] * 4),
+      ('restart4', [400] * 4),
-      ('restart2_max1', [800] * 2, [1] * 2),
+      ('restart2', [800] * 2),
-      ('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5),
+      ('adaptive', [10, 400, 400, 400, 390]),
  )
-  def test_compute_eps_tree_decreasing(self, steps_list, max_participation):
+  def test_compute_eps_tree_decreasing(self, steps_list):
    # Test privacy epsilon decreases with noise multiplier increasing when
    # keeping other parameters the same.
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
    target_delta = 1e-6
-    prev_eps = rdp_accountant.compute_rdp_tree(0, steps_list, max_participation,
+    prev_eps = rdp_accountant.compute_rdp_tree_restart(0, steps_list, orders)
                                               orders)
    for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]:
-      rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
+      rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier,
-                                            max_participation, orders)
+                                                    steps_list, orders)
      eps = rdp_accountant.get_privacy_spent(
          orders, rdp, target_delta=target_delta)[0]
      self.assertLess(eps, prev_eps)
  @parameterized.named_parameters(
-      ('negative_noise', -1, 3, 2, 1),
+      ('negative_noise', -1, 3, 1),
-      ('empty_steps', 1, [], 2, 1),
+      ('empty_steps', 1, [], 1),
-      ('empty_part', 1, 1, [], 1),
+      ('negative_steps', 1, -3, 1),
      ('negative_steps', 1, -3, 2, 1),
      ('zero_participation', 1, 3, 0, 1),
      ('negative_participation', 1, 3, -1, 1),
  )
-  def test_compute_rdp_tree_raise(self, noise_multiplier, steps_list,
+  def test_compute_rdp_tree_restart_raise(self, noise_multiplier, steps_list,
-                                  max_participation, orders):
+                                          orders):
    with self.assertRaisesRegex(ValueError, 'must be'):
-      rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
+      rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps_list,
-                                      max_participation, orders)
+                                              orders)
  @parameterized.named_parameters(
      ('list_scalar', [2], 1),
      ('scalar_list', 2, [1]),
      ('list_length', [2, 3], [1]),
      ('list_length2', [2, 3], [1, 2, 2]),
  )
  def test_compute_rdp_tree_raise_input_type(self, steps_list,
                                             max_participation):
    with self.assertRaisesRegex(ValueError, 'must have the same'):
      rdp_accountant.compute_rdp_tree(
          0.1, steps_list, max_participation, orders=1)
  @parameterized.named_parameters(
      ('t100n0.1', 100, 0.1),
@ -354,9 +329,9 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
  )
  def test_no_tree_no_sampling(self, total_steps, noise_multiplier):
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
-    tree_rdp = rdp_accountant.compute_rdp_tree(noise_multiplier,
+    tree_rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier,
-                                               [1] * total_steps,
+                                                       [1] * total_steps,
-                                               [1] * total_steps, orders)
+                                                       orders)
    rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders)
    self.assertAllClose(tree_rdp, rdp, rtol=1e-12)