From 9757e1bc870bc65283abb5a8bc6fc69546b46b67 Mon Sep 17 00:00:00 2001
From: Zheng Xu <xuzheng@google.com>
Date: Mon, 1 Nov 2021 11:39:10 -0700
Subject: [PATCH] Update the tree aggregation RDP accounting for restarts. This
 prevents the potential inaccurate usage of the previous implementation for
 no-restarts.

PiperOrigin-RevId: 406878834
---
 tensorflow_privacy/__init__.py                |  2 +-
 .../privacy/analysis/rdp_accountant.py        | 63 ++++----------
 .../privacy/analysis/rdp_accountant_test.py   | 87 +++++++------------
 3 files changed, 50 insertions(+), 102 deletions(-)

diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py
index 5a90fe5..72cc746 100644
--- a/tensorflow_privacy/__init__.py
+++ b/tensorflow_privacy/__init__.py
@@ -45,7 +45,7 @@ else:
   from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy
   from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogeneous_rdp
   from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp
-  from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_tree
+  from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_tree_restart
   from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
 
   # DPQuery classes
diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_accountant.py
index 500a16a..d50a7a8 100644
--- a/tensorflow_privacy/privacy/analysis/rdp_accountant.py
+++ b/tensorflow_privacy/privacy/analysis/rdp_accountant.py
@@ -398,34 +398,34 @@ def compute_rdp(q, noise_multiplier, steps, orders):
   return rdp * steps
 
 
-def _compute_rdp_tree(sigma, steps_list, max_participation_list, alpha):
+def _compute_rdp_tree_restart(sigma, steps_list, alpha):
   """Computes RDP of the Tree Aggregation Protocol at order alpha."""
   if np.isinf(alpha):
     return np.inf
   tree_depths = [
-      math.floor(math.log2(steps)) + 1 for steps in steps_list if steps > 0
+      math.floor(math.log2(float(steps))) + 1
+      for steps in steps_list
+      if steps > 0
   ]
-  record_occurence = [
-      x * y for x, y in zip(max_participation_list, tree_depths)
-  ]
-  return alpha * sum(record_occurence) / (2 * sigma**2)
+  return alpha * sum(tree_depths) / (2 * sigma**2)
 
 
-def compute_rdp_tree(
-    noise_multiplier: float, steps_list: Union[float, Collection[float]],
-    max_participation_list: Union[int, Collection[int]],
-    orders: Union[float, Collection[float]]) -> Collection[float]:
+def compute_rdp_tree_restart(
+    noise_multiplier: float, steps_list: Union[int, Collection[int]],
+    orders: Union[float, Collection[float]]) -> Union[float, Collection[float]]:
   """Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism.
 
+  This function implements the accounting when the tree is restarted at every
+  epoch. See appendix of
+  "Practical and Private (Deep) Learning without Sampling or Shuffling"
+  https://arxiv.org/abs/2103.00039.
+
   Args:
     noise_multiplier: A non-negative float representing the ratio of the
       standard deviation of the Gaussian noise to the l2-sensitivity of the
       function to which it is added.
     steps_list: A scalar or a list of non-negative intergers representing the
-      number of steps between tree restarts.
-    max_participation_list: A scalar or a list of positive integers representing
-      maximum number of times a sample may appear between tree restarts. The
-      type (scalar/list) of `max_participation_list` should match `steps_list`.
+      number of steps per epoch (between two restarts).
     orders: An array (or a scalar) of RDP orders.
 
   Returns:
@@ -442,46 +442,19 @@ def compute_rdp_tree(
         "steps_list must be a non-empty list, or a non-zero scalar, got "
         f"{steps_list}.")
 
-  if not max_participation_list:
-    raise ValueError(
-        "max_participation_list must be a non-empty list, or a non-zero scalar,"
-        f" got {max_participation_list}.")
-
-  if np.isscalar(steps_list) and np.isscalar(max_participation_list):
+  if np.isscalar(steps_list):
     steps_list = [steps_list]
-    max_participation_list = [max_participation_list]
-  elif np.isscalar(steps_list):
-    raise ValueError(
-        "`steps_list` and `max_participation_list` must have the same type, got"
-        f"scalar of steps: {steps_list}, and list of max_participations with "
-        f"length {len(max_participation_list)}.")
-  elif np.isscalar(max_participation_list):
-    raise ValueError(
-        "`steps_list` and `max_participation_list` must have the same type, got"
-        f"scalar of max_participation: {max_participation_list}, and list of "
-        f"steps with length {len(steps_list)}.")
-  elif len(max_participation_list) != len(steps_list):
-    raise ValueError(
-        "`steps_list` and `max_participation_list` must have the same size, got"
-        f"steps length {len(steps_list)}, max_participations length "
-        f"{len(max_participation_list)}")
-
-  for max_participation in max_participation_list:
-    if max_participation <= 0:
-      raise ValueError(
-          f"Max participation must be positive, got {max_participation}")
 
   for steps in steps_list:
     if steps < 0:
       raise ValueError(f"Steps must be non-negative, got {steps_list}")
 
   if np.isscalar(orders):
-    rdp = _compute_rdp_tree(noise_multiplier, steps_list,
-                            max_participation_list, orders)
+    rdp = _compute_rdp_tree_restart(noise_multiplier, steps_list, orders)
   else:
     rdp = np.array([
-        _compute_rdp_tree(noise_multiplier, steps_list, max_participation_list,
-                          alpha) for alpha in orders
+        _compute_rdp_tree_restart(noise_multiplier, steps_list, alpha)
+        for alpha in orders
     ])
 
   return rdp
diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py
index 583fa3f..42e751c 100644
--- a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py
+++ b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py
@@ -272,81 +272,56 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
     # This tests is based on the StackOverflow setting in "Practical and
     # Private (Deep) Learning without Sampling or Shuffling". The calculated
     # epsilon could be better as the method in this package keeps improving.
-    steps_list, target_delta, max_participation = 1600, 1e-6, 1
-    rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
-                                          max_participation, orders)
+    steps_list, target_delta = 1600, 1e-6
+    rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps_list,
+                                                  orders)
     new_eps = rdp_accountant.get_privacy_spent(
         orders, rdp, target_delta=target_delta)[0]
     self.assertLess(new_eps, eps)
 
   @parameterized.named_parameters(
-      ('restart4_max2', [400] * 4, [2] * 4),
-      ('restart2_max1', [800] * 2, [1] * 2),
-      ('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5),
-      ('adaptive', [10, 400, 400, 400, 390], [2, 3, 4, 4, 3]))
-  def test_compose_tree_rdp(self, steps_list, max_participation_list):
+      ('restart4', [400] * 4),
+      ('restart2', [800] * 2),
+      ('adaptive', [10, 400, 400, 400, 390]),
+  )
+  def test_compose_tree_rdp(self, steps_list):
     noise_multiplier, orders = 0.1, 1
-    if np.isscalar(max_participation_list):
-      rdp_list = [
-          rdp_accountant.compute_rdp_tree(noise_multiplier, steps,
-                                          max_participation_list, orders)
-          for steps in steps_list
-      ]
-    else:
-      rdp_list = [
-          rdp_accountant.compute_rdp_tree(noise_multiplier, steps,
-                                          max_participation, orders) for steps,
-          max_participation in zip(steps_list, max_participation_list)
-      ]
-    rdp_composed = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
-                                                   max_participation_list,
-                                                   orders)
+    rdp_list = [
+        rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps, orders)
+        for steps in steps_list
+    ]
+    rdp_composed = rdp_accountant.compute_rdp_tree_restart(
+        noise_multiplier, steps_list, orders)
     self.assertAllClose(rdp_composed, sum(rdp_list), rtol=1e-12)
 
   @parameterized.named_parameters(
-      ('restart4_max2', [400] * 4, [2] * 4),
-      ('restart2_max1', [800] * 2, [1] * 2),
-      ('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5),
+      ('restart4', [400] * 4),
+      ('restart2', [800] * 2),
+      ('adaptive', [10, 400, 400, 400, 390]),
   )
-  def test_compute_eps_tree_decreasing(self, steps_list, max_participation):
+  def test_compute_eps_tree_decreasing(self, steps_list):
     # Test privacy epsilon decreases with noise multiplier increasing when
     # keeping other parameters the same.
     orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
     target_delta = 1e-6
-    prev_eps = rdp_accountant.compute_rdp_tree(0, steps_list, max_participation,
-                                               orders)
+    prev_eps = rdp_accountant.compute_rdp_tree_restart(0, steps_list, orders)
     for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]:
-      rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
-                                            max_participation, orders)
+      rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier,
+                                                    steps_list, orders)
       eps = rdp_accountant.get_privacy_spent(
           orders, rdp, target_delta=target_delta)[0]
       self.assertLess(eps, prev_eps)
 
   @parameterized.named_parameters(
-      ('negative_noise', -1, 3, 2, 1),
-      ('empty_steps', 1, [], 2, 1),
-      ('empty_part', 1, 1, [], 1),
-      ('negative_steps', 1, -3, 2, 1),
-      ('zero_participation', 1, 3, 0, 1),
-      ('negative_participation', 1, 3, -1, 1),
+      ('negative_noise', -1, 3, 1),
+      ('empty_steps', 1, [], 1),
+      ('negative_steps', 1, -3, 1),
   )
-  def test_compute_rdp_tree_raise(self, noise_multiplier, steps_list,
-                                  max_participation, orders):
+  def test_compute_rdp_tree_restart_raise(self, noise_multiplier, steps_list,
+                                          orders):
     with self.assertRaisesRegex(ValueError, 'must be'):
-      rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
-                                      max_participation, orders)
-
-  @parameterized.named_parameters(
-      ('list_scalar', [2], 1),
-      ('scalar_list', 2, [1]),
-      ('list_length', [2, 3], [1]),
-      ('list_length2', [2, 3], [1, 2, 2]),
-  )
-  def test_compute_rdp_tree_raise_input_type(self, steps_list,
-                                             max_participation):
-    with self.assertRaisesRegex(ValueError, 'must have the same'):
-      rdp_accountant.compute_rdp_tree(
-          0.1, steps_list, max_participation, orders=1)
+      rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps_list,
+                                              orders)
 
   @parameterized.named_parameters(
       ('t100n0.1', 100, 0.1),
@@ -354,9 +329,9 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
   )
   def test_no_tree_no_sampling(self, total_steps, noise_multiplier):
     orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
-    tree_rdp = rdp_accountant.compute_rdp_tree(noise_multiplier,
-                                               [1] * total_steps,
-                                               [1] * total_steps, orders)
+    tree_rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier,
+                                                       [1] * total_steps,
+                                                       orders)
     rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders)
     self.assertAllClose(tree_rdp, rdp, rtol=1e-12)