Update tree aggregation rdp accountant to allow different number of max_participation.

PiperOrigin-RevId: 399510813
2021-09-28 13:03:57 -07:00 · 2021-09-28 13:03:57 -07:00 · 7426a4ec30
commit 7426a4ec30
parent 99c82a49d8
3 changed files with 96 additions and 26 deletions
--- a/tensorflow_privacy/init.py
+++ b/tensorflow_privacy/init.py
@ -45,6 +45,7 @@ else:
  from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy
  from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogeneous_rdp
  from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
+  from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_tree
  from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent

  # DPQuery classes
--- a/tensorflow_privacy/privacy/analysis/rdp_accountant.py
+++ b/tensorflow_privacy/privacy/analysis/rdp_accountant.py
@ -398,19 +398,22 @@ def compute_rdp(q, noise_multiplier, steps, orders):
  return rdp * steps


-def _compute_rdp_tree(sigma, steps_list, max_participation, alpha):
+def _compute_rdp_tree(sigma, steps_list, max_participation_list, alpha):
  """Computes RDP of the Tree Aggregation Protocol at order alpha."""
  if np.isinf(alpha):
    return np.inf
  tree_depths = [
      math.floor(math.log2(steps)) + 1 for steps in steps_list if steps > 0
  ]
-  return alpha * max_participation * sum(tree_depths) / (2 * sigma**2)
+  record_occurence = [
+      x * y for x, y in zip(max_participation_list, tree_depths)
+  ]
+  return alpha * sum(record_occurence) / (2 * sigma**2)


 def compute_rdp_tree(
-    noise_multiplier: float, steps_list: Collection[float],
-    max_participation: int,
+    noise_multiplier: float, steps_list: Union[float, Collection[float]],
+    max_participation_list: Union[int, Collection[int]],
    orders: Union[float, Collection[float]]) -> Collection[float]:
  """Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism.

@ -418,10 +421,11 @@ def compute_rdp_tree(
    noise_multiplier: A non-negative float representing the ratio of the
      standard deviation of the Gaussian noise to the l2-sensitivity of the
      function to which it is added.
-    steps_list: A list of non-negative intergers representing the number of
-      steps between tree restarts.
-    max_participation: A positive integer representing maximum number of times a
-      sample may appear between tree restarts.
+    steps_list: A scalar or a list of non-negative intergers representing the
+      number of steps between tree restarts.
+    max_participation_list: A scalar or a list of positive integers representing
+      maximum number of times a sample may appear between tree restarts. The
+      type (scalar/list) of `max_participation_list` should match `steps_list`.
    orders: An array (or a scalar) of RDP orders.

  Returns:
@ -433,23 +437,50 @@ def compute_rdp_tree(
  elif noise_multiplier == 0:
    return np.inf

+  if not steps_list:
+    raise ValueError(
+        "steps_list must be a non-empty list, or a non-zero scalar, got "
+        f"{steps_list}.")
+
+  if not max_participation_list:
+    raise ValueError(
+        "max_participation_list must be a non-empty list, or a non-zero scalar,"
+        f" got {max_participation_list}.")
+
+  if np.isscalar(steps_list) and np.isscalar(max_participation_list):
+    steps_list = [steps_list]
+    max_participation_list = [max_participation_list]
+  elif np.isscalar(steps_list):
+    raise ValueError(
+        "`steps_list` and `max_participation_list` must have the same type, got"
+        f"scalar of steps: {steps_list}, and list of max_participations with "
+        f"length {len(max_participation_list)}.")
+  elif np.isscalar(max_participation_list):
+    raise ValueError(
+        "`steps_list` and `max_participation_list` must have the same type, got"
+        f"scalar of max_participation: {max_participation_list}, and list of "
+        f"steps with length {len(steps_list)}.")
+  elif len(max_participation_list) != len(steps_list):
+    raise ValueError(
+        "`steps_list` and `max_participation_list` must have the same size, got"
+        f"steps length {len(steps_list)}, max_participations length "
+        f"{len(max_participation_list)}")
+
+  for max_participation in max_participation_list:
    if max_participation <= 0:
      raise ValueError(
          f"Max participation must be positive, got {max_participation}")

-  if not steps_list:
-    raise ValueError("List of steps must be non-empty.")
-
  for steps in steps_list:
    if steps < 0:
-      raise ValueError(f"List of steps must be non-negative, got {steps_list}")
+      raise ValueError(f"Steps must be non-negative, got {steps_list}")

  if np.isscalar(orders):
-    rdp = _compute_rdp_tree(noise_multiplier, steps_list, max_participation,
-                            orders)
+    rdp = _compute_rdp_tree(noise_multiplier, steps_list,
+                            max_participation_list, orders)
  else:
    rdp = np.array([
-        _compute_rdp_tree(noise_multiplier, steps_list, max_participation,
+        _compute_rdp_tree(noise_multiplier, steps_list, max_participation_list,
                          alpha) for alpha in orders
    ])

--- a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py
+++ b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py
@ -272,7 +272,7 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
    # This tests is based on the StackOverflow setting in "Practical and
    # Private (Deep) Learning without Sampling or Shuffling". The calculated
    # epsilon could be better as the method in this package keeps improving.
-    steps_list, target_delta, max_participation = [1600], 1e-6, 1
+    steps_list, target_delta, max_participation = 1600, 1e-6, 1
    rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
                                          max_participation, orders)
    new_eps = rdp_accountant.get_privacy_spent(
@ -280,9 +280,33 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
    self.assertLess(new_eps, eps)

  @parameterized.named_parameters(
-      ('restart4_max2', [400] * 4, 2),
-      ('restart2_max1', [800] * 2, 1),
-      ('adaptive_max4', [10, 400, 400, 400, 390], 4),
+      ('restart4_max2', [400] * 4, [2] * 4),
+      ('restart2_max1', [800] * 2, [1] * 2),
+      ('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5),
+      ('adaptive', [10, 400, 400, 400, 390], [2, 3, 4, 4, 3]))
+  def test_compose_tree_rdp(self, steps_list, max_participation_list):
+    noise_multiplier, orders = 0.1, 1
+    if np.isscalar(max_participation_list):
+      rdp_list = [
+          rdp_accountant.compute_rdp_tree(noise_multiplier, steps,
+                                          max_participation_list, orders)
+          for steps in steps_list
+      ]
+    else:
+      rdp_list = [
+          rdp_accountant.compute_rdp_tree(noise_multiplier, steps,
+                                          max_participation, orders) for steps,
+          max_participation in zip(steps_list, max_participation_list)
+      ]
+    rdp_composed = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
+                                                   max_participation_list,
+                                                   orders)
+    self.assertAllClose(rdp_composed, sum(rdp_list), rtol=1e-12)
+
+  @parameterized.named_parameters(
+      ('restart4_max2', [400] * 4, [2] * 4),
+      ('restart2_max1', [800] * 2, [1] * 2),
+      ('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5),
  )
  def test_compute_eps_tree_decreasing(self, steps_list, max_participation):
    # Test privacy epsilon decreases with noise multiplier increasing when
@ -299,11 +323,12 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
      self.assertLess(eps, prev_eps)

  @parameterized.named_parameters(
-      ('negative_noise', -1, [3], 2, 1),
+      ('negative_noise', -1, 3, 2, 1),
      ('empty_steps', 1, [], 2, 1),
-      ('negative_steps', 1, [-3], 2, 1),
-      ('zero_participation', 1, [3], 0, 1),
-      ('negative_participation', 1, [3], -1, 1),
+      ('empty_part', 1, 1, [], 1),
+      ('negative_steps', 1, -3, 2, 1),
+      ('zero_participation', 1, 3, 0, 1),
+      ('negative_participation', 1, 3, -1, 1),
  )
  def test_compute_rdp_tree_raise(self, noise_multiplier, steps_list,
                                  max_participation, orders):
@ -311,6 +336,18 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
      rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
                                      max_participation, orders)

+  @parameterized.named_parameters(
+      ('list_scalar', [2], 1),
+      ('scalar_list', 2, [1]),
+      ('list_length', [2, 3], [1]),
+      ('list_length2', [2, 3], [1, 2, 2]),
+  )
+  def test_compute_rdp_tree_raise_input_type(self, steps_list,
+                                             max_participation):
+    with self.assertRaisesRegex(ValueError, 'must have the same'):
+      rdp_accountant.compute_rdp_tree(
+          0.1, steps_list, max_participation, orders=1)
+
  @parameterized.named_parameters(
      ('t100n0.1', 100, 0.1),
      ('t1000n0.01', 1000, 0.01),
@ -318,7 +355,8 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
  def test_no_tree_no_sampling(self, total_steps, noise_multiplier):
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
    tree_rdp = rdp_accountant.compute_rdp_tree(noise_multiplier,
-                                               [1] * total_steps, 1, orders)
+                                               [1] * total_steps,
+                                               [1] * total_steps, orders)
    rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders)
    self.assertAllClose(tree_rdp, rdp, rtol=1e-12)