Remove dependence on six and use super() in rdp_privacy_accountant

PiperOrigin-RevId: 434668652
2022-03-14 23:24:06 -07:00 · 2022-03-14 23:24:06 -07:00 · adde2064dd
commit adde2064dd
parent 42df23eb79
3 changed files with 167 additions and 3 deletions
--- a/tensorflow_privacy/privacy/analysis/privacy_accountant.py
+++ b/tensorflow_privacy/privacy/analysis/privacy_accountant.py
@ -24,6 +24,10 @@ class NeighboringRelation(enum.Enum):
  ADD_OR_REMOVE_ONE = 1
  REPLACE_ONE = 2

+  # A record is replaced with a special record, such as the "zero record". See
+  # https://arxiv.org/pdf/2103.00039.pdf, Definition 1.1.
+  REPLACE_SPECIAL = 3
+

 class UnsupportedEventError(Exception):
  """Exception to raise if _compose is called on unsupported event type."""
@ -91,7 +95,7 @@ class PrivacyAccountant(metaclass=abc.ABCMeta):
      raise TypeError(f'`event` must be `DpEvent`. Found {type(event)}.')

    if not self.supports(event):
-      raise UnsupportedEventError('Unsupported event: {event}.')
+      raise UnsupportedEventError(f'Unsupported event: {event}.')

    self._ledger.compose(event, count)
    self._compose(event, count)
--- a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py
+++ b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py
@ -15,7 +15,7 @@
 """Privacy accountant that uses Renyi differential privacy."""

 import math
-from typing import Collection, Optional
+from typing import Collection, Optional, Union

 import numpy as np
 from scipy import special
@ -519,6 +519,49 @@ def _effective_gaussian_noise_multiplier(event: dp_event.DpEvent):
    return None


+def _compute_rdp_single_epoch_tree_aggregation(
+    noise_multiplier: float, step_counts: Union[int, Collection[int]],
+    orders: Collection[float]) -> Union[float, np.ndarray]:
+  """Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism.
+
+  This function implements the accounting when the tree is periodically
+  restarted and no record occurs twice across all trees. See appendix D of
+  "Practical and Private (Deep) Learning without Sampling or Shuffling"
+  https://arxiv.org/abs/2103.00039.
+
+  Args:
+    noise_multiplier: A non-negative float representing the ratio of the
+      standard deviation of the Gaussian noise to the l2-sensitivity of the
+      function to which it is added.
+    step_counts: A scalar or a list of non-negative integers representing the
+      number of steps per epoch (between two restarts).
+    orders: An array of RDP orders.
+
+  Returns:
+    The RDPs at all orders. Can be `np.inf`.
+  """
+  if noise_multiplier < 0:
+    raise ValueError(
+        f'noise_multiplier must be non-negative. Got {noise_multiplier}.')
+  if noise_multiplier == 0:
+    return np.inf
+
+  if not step_counts:
+    raise ValueError(
+        'steps_list must be a non-empty list, or a non-zero scalar. Got '
+        f'{step_counts}.')
+
+  if np.isscalar(step_counts):
+    step_counts = [step_counts]
+
+  for steps in step_counts:
+    if steps < 0:
+      raise ValueError(f'Steps must be non-negative. Got {step_counts}')
+
+  max_depth = max(math.ceil(math.log2(steps + 1)) for steps in step_counts)
+  return np.array([a * max_depth / (2 * noise_multiplier**2) for a in orders])
+
+
 class RdpAccountant(privacy_accountant.PrivacyAccountant):
  """Privacy accountant that uses Renyi differential privacy."""

@ -603,6 +646,13 @@ class RdpAccountant(privacy_accountant.PrivacyAccountant):
            noise_multiplier=gaussian_noise_multiplier,
            orders=self._orders)
      return True
+    elif isinstance(event, dp_event.SingleEpochTreeAggregationDpEvent):
+      if self._neighboring_relation is not NeighborRel.REPLACE_SPECIAL:
+        return False
+      if do_compose:
+        self._rdp += count * _compute_rdp_single_epoch_tree_aggregation(
+            event.noise_multiplier, event.step_counts, self._orders)
+      return True
    else:
      # Unsupported event (including `UnsupportedDpEvent`).
      return False
--- a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py
+++ b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py
@ -59,6 +59,26 @@ def _compute_a_mp(sigma, q, alpha):
  return a_alpha


+def _compose_trees(noise_multiplier, step_counts, orders):
+  accountant = rdp_privacy_accountant.RdpAccountant(
+      orders, privacy_accountant.NeighboringRelation.REPLACE_SPECIAL)
+  accountant.compose(
+      dp_event.ComposedDpEvent([
+          dp_event.SingleEpochTreeAggregationDpEvent(noise_multiplier,
+                                                     step_count)
+          for step_count in step_counts
+      ]))
+  return accountant
+
+
+def _compose_trees_single_epoch(noise_multiplier, step_counts, orders):
+  accountant = rdp_privacy_accountant.RdpAccountant(
+      orders, privacy_accountant.NeighboringRelation.REPLACE_SPECIAL)
+  accountant.compose(
+      dp_event.SingleEpochTreeAggregationDpEvent(noise_multiplier, step_counts))
+  return accountant
+
+
 class RdpPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest,
                               parameterized.TestCase):

@ -67,7 +87,9 @@ class RdpPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest,
        rdp_privacy_accountant.RdpAccountant(
            [2.0], privacy_accountant.NeighboringRelation.ADD_OR_REMOVE_ONE),
        rdp_privacy_accountant.RdpAccountant(
-            [2.0], privacy_accountant.NeighboringRelation.REPLACE_ONE)
+            [2.0], privacy_accountant.NeighboringRelation.REPLACE_ONE),
+        rdp_privacy_accountant.RdpAccountant(
+            [2.0], privacy_accountant.NeighboringRelation.REPLACE_SPECIAL)
    ]

  def test_supports(self):
@ -350,6 +372,94 @@ class RdpPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest,
          else:
            self.assertAlmostEqual(delta, delta2)

+  @parameterized.named_parameters(
+      ('add_remove', privacy_accountant.NeighboringRelation.ADD_OR_REMOVE_ONE),
+      ('replace', privacy_accountant.NeighboringRelation.REPLACE_ONE))
+  def test_tree_wrong_neighbor_rel(self, neighboring_relation):
+    event = dp_event.SingleEpochTreeAggregationDpEvent(1.0, 1)
+    accountant = rdp_privacy_accountant.RdpAccountant(
+        neighboring_relation=neighboring_relation)
+    self.assertFalse(accountant.supports(event))
+
+  @parameterized.named_parameters(('eps20', 1.13, 19.74), ('eps2', 8.83, 2.04))
+  def test_compute_eps_tree(self, noise_multiplier, eps):
+    orders = [1 + x / 10 for x in range(1, 100)] + list(range(12, 64))
+    # This test is based on the StackOverflow setting in "Practical and
+    # Private (Deep) Learning without Sampling or Shuffling". The calculated
+    # epsilon could be better as the method in this package keeps improving.
+    step_counts, target_delta = 1600, 1e-6
+    new_eps = _compose_trees_single_epoch(noise_multiplier, step_counts,
+                                          orders).get_epsilon(target_delta)
+    self.assertLess(new_eps, eps)
+
+  @parameterized.named_parameters(
+      ('restart4', [400] * 4),
+      ('restart2', [800] * 2),
+      ('adaptive', [10, 400, 400, 400, 390]),
+  )
+  def test_compose_tree_rdp(self, step_counts):
+    noise_multiplier, orders = 0.1, [1]
+
+    def get_rdp(step_count):
+      return _compose_trees_single_epoch(noise_multiplier, [step_count],
+                                         orders)._rdp[0]
+
+    rdp_summed = sum(get_rdp(step_count) for step_count in step_counts)
+    rdp_composed = _compose_trees(noise_multiplier, step_counts, orders)._rdp[0]
+    self.assertTrue(np.allclose(rdp_composed, rdp_summed, rtol=1e-12))
+
+  def test_single_epoch_multi_tree_rdp(self):
+    noise_multiplier, orders = 0.1, [1]
+    step_counts = [10, 40, 30, 20]
+    single_rdp = _compose_trees_single_epoch(noise_multiplier, step_counts,
+                                             orders)._rdp[0]
+
+    max_rdp = max(
+        _compose_trees_single_epoch(noise_multiplier, step_count,
+                                    orders)._rdp[0]
+        for step_count in step_counts)
+
+    self.assertEqual(single_rdp, max_rdp)
+
+  @parameterized.named_parameters(
+      ('restart4', [400] * 4),
+      ('restart2', [800] * 2),
+      ('adaptive', [10, 400, 400, 400, 390]),
+  )
+  def test_compute_eps_tree_decreasing(self, step_counts):
+    # Test privacy epsilon decreases with noise multiplier increasing when
+    # keeping other parameters the same.
+    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
+    target_delta = 1e-6
+    prev_eps = np.inf
+    for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]:
+      accountant = _compose_trees(noise_multiplier, step_counts, orders)
+      eps = accountant.get_epsilon(target_delta=target_delta)
+      self.assertLess(eps, prev_eps)
+      prev_eps = eps
+
+  @parameterized.named_parameters(
+      ('negative_noise', -1, [3]),
+      ('negative_steps', 1, [-3]),
+  )
+  def test_compute_rdp_tree_restart_raise(self, noise_multiplier, step_counts):
+    with self.assertRaisesRegex(ValueError, 'non-negative'):
+      _compose_trees(noise_multiplier, step_counts, orders=[1])
+
+  @parameterized.named_parameters(
+      ('t100n0.1', 100, 0.1),
+      ('t1000n0.01', 1000, 0.01),
+  )
+  def test_no_tree_no_sampling(self, total_steps, noise_multiplier):
+    orders = [1 + x / 10 for x in range(1, 100)] + list(range(12, 64))
+    tree_rdp = _compose_trees(noise_multiplier, [1] * total_steps, orders)._rdp
+    accountant = rdp_privacy_accountant.RdpAccountant(orders)
+    event = dp_event.SelfComposedDpEvent(
+        dp_event.GaussianDpEvent(noise_multiplier), total_steps)
+    accountant.compose(event)
+    base_rdp = accountant._rdp
+    self.assertTrue(np.allclose(tree_rdp, base_rdp, rtol=1e-12))
+

 if __name__ == '__main__':
  absltest.main()