From 6fde7b04807d62da0ad7f797671ade3adb98328c Mon Sep 17 00:00:00 2001
From: Zheng Xu <xuzheng@google.com>
Date: Tue, 1 Feb 2022 21:34:11 -0800
Subject: [PATCH] Add clearer connection of TFP tree aggregation query and
 accounting arguments.

PiperOrigin-RevId: 425790945
---
 .../analysis/tree_aggregation_accountant.py   | 21 ++++++++++++++++---
 .../dp_query/tree_aggregation_query.py        | 21 +++++++++++++++++--
 2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py b/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py
index 794dbb3..5b3325c 100644
--- a/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py
+++ b/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py
@@ -29,6 +29,10 @@ mechanism. Its public interface consists of the following methods:
     min_separation: int,
     orders: Union[float, Collection[float]]) -> Union[float, Collection[float]]:
     computes RDP for DP-FTRL-NoTreeRestart.
+  compute_zcdp_single_tree(
+    noise_multiplier: float, total_steps: int, max_participation: int,
+    min_separation: int) -> Union[float, Collection[float]]:
+    computes zCDP for DP-FTRL-NoTreeRestart.
 
 For RDP to (epsilon, delta)-DP conversion, use the following public function
 described in `rdp_accountant.py`:
@@ -36,6 +40,11 @@ described in `rdp_accountant.py`:
                                    (or eps) given RDP at multiple orders and
                                    a target value for eps (or delta).
 
+The `noise_multiplier` is usually from `TreeCumulativeSumQuery` and
+`TreeResidualSumQuery` in `dp_query.tree_aggregation_query`. The other
+inputs depend on the data streaming setting (single/multi-pass) and the restart
+strategy (see `restart_query`).
+
 Example use:
 
 (1) DP-FTRL-TreeRestart RDP:
@@ -91,7 +100,9 @@ def compute_rdp_tree_restart(
   Args:
     noise_multiplier: A non-negative float representing the ratio of the
       standard deviation of the Gaussian noise to the l2-sensitivity of the
-      function to which it is added.
+      function to which it is added, which is usually set in
+      `TreeCumulativeSumQuery` and `TreeResidualSumQuery` from
+      `dp_query.tree_aggregation_query`.
     steps_list: A scalar or a list of non-negative intergers representing the
       number of steps per epoch (between two restarts).
     orders: An array (or a scalar) of RDP orders.
@@ -282,7 +293,9 @@ def compute_rdp_single_tree(
   Args:
     noise_multiplier: A non-negative float representing the ratio of the
       standard deviation of the Gaussian noise to the l2-sensitivity of the
-      function to which it is added.
+      function to which it is added, which is usually set in
+      `TreeCumulativeSumQuery` and `TreeResidualSumQuery` from
+      `dp_query.tree_aggregation_query`.
     total_steps: Total number of steps (leaf nodes in tree aggregation).
     max_participation: The maximum number of times a sample can appear.
     min_separation: The minimum number of nodes between two appearance of a
@@ -342,7 +355,9 @@ def compute_zcdp_single_tree(
   Args:
     noise_multiplier: A non-negative float representing the ratio of the
       standard deviation of the Gaussian noise to the l2-sensitivity of the
-      function to which it is added.
+      function to which it is added, which is usually set in
+      `TreeCumulativeSumQuery` and `TreeResidualSumQuery` from
+      `dp_query.tree_aggregation_query`.
     total_steps: Total number of steps (leaf nodes in tree aggregation).
     max_participation: The maximum number of times a sample can appear.
     min_separation: The minimum number of nodes between two appearance of a
diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py
index 70f9efa..eef67dc 100644
--- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py
+++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py
@@ -18,6 +18,19 @@ online observation queries relying on `tree_aggregation`. 'Online' means that
 the leaf nodes of the tree arrive one by one as the time proceeds. The core
 logic of tree aggregation is implemented in `tree_aggregation.TreeAggregator`
 and `tree_aggregation.EfficientTreeAggregator`.
+
+Depending on the data streaming setting (single/multi-pass), the privacy
+accounting method ((epsilon,delta)-DP/RDP/zCDP), and the restart strategy (see
+`restart_query`), the DP bound can be computed by one of the public methods
+in `analysis.tree_aggregation_accountant`.
+
+For example, for a single-pass algorithm where a sample may appear at most once
+in the querying process; if `get_noised_result` is called `steps` times, the
+corresponding epsilon for a `target_delta` and `noise_multiplier` to achieve
+(epsilon,delta)-DP can be computed as:
+  orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
+  rdp = compute_rdp_tree_restart(noise_multiplier, [steps], orders)
+  eps = rdp_accountant.get_privacy_spent(orders, rdp, target_delta)[0]
 """
 
 import attr
@@ -210,7 +223,9 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery):
       clip_norm: Each record will be clipped so that it has L2 norm at most
         `clip_norm`.
       noise_multiplier: The effective noise multiplier for the sum of records.
-        Noise standard deviation is `clip_norm*noise_multiplier`.
+        Noise standard deviation is `clip_norm*noise_multiplier`. The value can
+        be used as the input of the privacy accounting functions in
+        `analysis.tree_aggregation_accountant`.
       record_specs: A nested structure of `tf.TensorSpec`s specifying structure
         and shapes of records.
       noise_seed: Integer seed for the Gaussian noise generator. If `None`, a
@@ -461,7 +476,9 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery):
       clip_norm: Each record will be clipped so that it has L2 norm at most
         `clip_norm`.
       noise_multiplier: The effective noise multiplier for the sum of records.
-        Noise standard deviation is `clip_norm*noise_multiplier`.
+        Noise standard deviation is `clip_norm*noise_multiplier`. The value can
+        be used as the input of the privacy accounting functions in
+        `analysis.tree_aggregation_accountant`.
       record_specs: A nested structure of `tf.TensorSpec`s specifying structure
         and shapes of records.
       noise_seed: Integer seed for the Gaussian noise generator. If `None`, a