From a20cbf9578972d90ac25c8624e11fbb2a43d3311 Mon Sep 17 00:00:00 2001
From: Zheng Xu <xuzheng@google.com>
Date: Fri, 3 Sep 2021 15:42:17 -0700
Subject: [PATCH] RDP for tree aggregation. See "Practical and Private (Deep)
 Learning without Sampling or Shuffling" https://arxiv.org/abs/2103.00039 for
 more details. See tests for example usage for calculating epsilon.

PiperOrigin-RevId: 394770205
---
 .../privacy/analysis/rdp_accountant.py        | 59 ++++++++++++++++++
 .../privacy/analysis/rdp_accountant_test.py   | 62 ++++++++++++++++++-
 2 files changed, 119 insertions(+), 2 deletions(-)

diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_accountant.py
index c872318..a328a63 100644
--- a/tensorflow_privacy/privacy/analysis/rdp_accountant.py
+++ b/tensorflow_privacy/privacy/analysis/rdp_accountant.py
@@ -42,6 +42,7 @@ from __future__ import print_function
 
 import math
 import sys
+from typing import Collection, Union
 
 import numpy as np
 from scipy import special
@@ -397,6 +398,64 @@ def compute_rdp(q, noise_multiplier, steps, orders):
   return rdp * steps
 
 
+def _compute_rdp_tree(sigma, steps_list, max_participation, alpha):
+  """Computes RDP of the Tree Aggregation Protocol at order alpha."""
+  if np.isinf(alpha):
+    return np.inf
+  tree_depths = [
+      math.floor(math.log2(steps)) + 1 for steps in steps_list if steps > 0
+  ]
+  return alpha * max_participation * sum(tree_depths) / (2 * sigma**2)
+
+
+def compute_rdp_tree(
+    noise_multiplier: float, steps_list: Collection[float],
+    max_participation: int,
+    orders: Union[float, Collection[float]]) -> Collection[float]:
+  """Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism.
+
+  Args:
+    noise_multiplier: A non-negative float representing the ratio of the
+      standard deviation of the Gaussian noise to the l2-sensitivity of the
+      function to which it is added.
+    steps_list: A list of non-negative intergers representing the number of
+      steps between tree restarts.
+    max_participation: A positive integer representing maximum number of times a
+      sample may appear between tree restarts.
+    orders: An array (or a scalar) of RDP orders.
+
+  Returns:
+    The RDPs at all orders. Can be `np.inf`.
+  """
+  if noise_multiplier < 0:
+    raise ValueError(
+        f"Noise multiplier must be non-negative, got {noise_multiplier}")
+  elif noise_multiplier == 0:
+    return np.inf
+
+  if max_participation <= 0:
+    raise ValueError(
+        f"Max participation must be positive, got {max_participation}")
+
+  if not steps_list:
+    raise ValueError("List of steps must be non-empty.")
+
+  for steps in steps_list:
+    if steps < 0:
+      raise ValueError(f"List of steps must be non-negative, got {steps_list}")
+
+  if np.isscalar(orders):
+    rdp = _compute_rdp_tree(noise_multiplier, steps_list, max_participation,
+                            orders)
+  else:
+    rdp = np.array([
+        _compute_rdp_tree(noise_multiplier, steps_list, max_participation,
+                          alpha) for alpha in orders
+    ])
+
+  return rdp
+
+
 def compute_rdp_sample_without_replacement(q, noise_multiplier, steps, orders):
   """Compute RDP of Gaussian Mechanism using sampling without replacement.
 
diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py
index 3a5529f..33c51fd 100644
--- a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py
+++ b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py
@@ -21,7 +21,6 @@ from __future__ import print_function
 import math
 import sys
 
-from absl.testing import absltest
 from absl.testing import parameterized
 from mpmath import exp
 from mpmath import inf
@@ -265,5 +264,64 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase):
       self.assertLessEqual(delta, delta1 + 1e-300)
 
 
+class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(('eps20', 1.13, 19.74), ('eps2', 8.83, 2.04))
+  def test_compute_eps_tree(self, noise_multiplier, eps):
+    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
+    # This tests is based on the StackOverflow setting in "Practical and
+    # Private (Deep) Learning without Sampling or Shuffling". The calculated
+    # epsilon could be better as the method in this package keeps improving.
+    steps_list, target_delta, max_participation = [1600], 1e-6, 1
+    rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
+                                          max_participation, orders)
+    new_eps = rdp_accountant.get_privacy_spent(
+        orders, rdp, target_delta=target_delta)[0]
+    self.assertLess(new_eps, eps)
+
+  @parameterized.named_parameters(
+      ('restart4_max2', [400] * 4, 2),
+      ('restart2_max1', [800] * 2, 1),
+      ('adaptive_max4', [10, 400, 400, 400, 390], 4),
+  )
+  def test_compute_eps_tree_decreasing(self, steps_list, max_participation):
+    # Test privacy epsilon decreases with noise multiplier increasing when
+    # keeping other parameters the same.
+    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
+    target_delta = 1e-6
+    prev_eps = rdp_accountant.compute_rdp_tree(0, steps_list, max_participation,
+                                               orders)
+    for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]:
+      rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
+                                            max_participation, orders)
+      eps = rdp_accountant.get_privacy_spent(
+          orders, rdp, target_delta=target_delta)[0]
+      self.assertLess(eps, prev_eps)
+
+  @parameterized.named_parameters(
+      ('negative_noise', -1, [3], 2, 1),
+      ('empty_steps', 1, [], 2, 1),
+      ('negative_steps', 1, [-3], 2, 1),
+      ('zero_participation', 1, [3], 0, 1),
+      ('negative_participation', 1, [3], -1, 1),
+  )
+  def test_compute_rdp_tree_raise(self, noise_multiplier, steps_list,
+                                  max_participation, orders):
+    with self.assertRaisesRegex(ValueError, 'must be'):
+      rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list,
+                                      max_participation, orders)
+
+  @parameterized.named_parameters(
+      ('t100n0.1', 100, 0.1),
+      ('t1000n0.01', 1000, 0.01),
+  )
+  def test_no_tree_no_sampling(self, total_steps, noise_multiplier):
+    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
+    tree_rdp = rdp_accountant.compute_rdp_tree(noise_multiplier,
+                                               [1] * total_steps, 1, orders)
+    rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders)
+    self.assertAllClose(tree_rdp, rdp, rtol=1e-12)
+
+
 if __name__ == '__main__':
-  absltest.main()
+  tf.test.main()