From 6fde7b04807d62da0ad7f797671ade3adb98328c Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Tue, 1 Feb 2022 21:34:11 -0800 Subject: [PATCH] Add clearer connection of TFP tree aggregation query and accounting arguments. PiperOrigin-RevId: 425790945 --- .../analysis/tree_aggregation_accountant.py | 21 ++++++++++++++++--- .../dp_query/tree_aggregation_query.py | 21 +++++++++++++++++-- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py b/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py index 794dbb3..5b3325c 100644 --- a/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py +++ b/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py @@ -29,6 +29,10 @@ mechanism. Its public interface consists of the following methods: min_separation: int, orders: Union[float, Collection[float]]) -> Union[float, Collection[float]]: computes RDP for DP-FTRL-NoTreeRestart. + compute_zcdp_single_tree( + noise_multiplier: float, total_steps: int, max_participation: int, + min_separation: int) -> Union[float, Collection[float]]: + computes zCDP for DP-FTRL-NoTreeRestart. For RDP to (epsilon, delta)-DP conversion, use the following public function described in `rdp_accountant.py`: @@ -36,6 +40,11 @@ described in `rdp_accountant.py`: (or eps) given RDP at multiple orders and a target value for eps (or delta). +The `noise_multiplier` is usually from `TreeCumulativeSumQuery` and +`TreeResidualSumQuery` in `dp_query.tree_aggregation_query`. The other +inputs depend on the data streaming setting (single/multi-pass) and the restart +strategy (see `restart_query`). + Example use: (1) DP-FTRL-TreeRestart RDP: @@ -91,7 +100,9 @@ def compute_rdp_tree_restart( Args: noise_multiplier: A non-negative float representing the ratio of the standard deviation of the Gaussian noise to the l2-sensitivity of the - function to which it is added. + function to which it is added, which is usually set in + `TreeCumulativeSumQuery` and `TreeResidualSumQuery` from + `dp_query.tree_aggregation_query`. steps_list: A scalar or a list of non-negative intergers representing the number of steps per epoch (between two restarts). orders: An array (or a scalar) of RDP orders. @@ -282,7 +293,9 @@ def compute_rdp_single_tree( Args: noise_multiplier: A non-negative float representing the ratio of the standard deviation of the Gaussian noise to the l2-sensitivity of the - function to which it is added. + function to which it is added, which is usually set in + `TreeCumulativeSumQuery` and `TreeResidualSumQuery` from + `dp_query.tree_aggregation_query`. total_steps: Total number of steps (leaf nodes in tree aggregation). max_participation: The maximum number of times a sample can appear. min_separation: The minimum number of nodes between two appearance of a @@ -342,7 +355,9 @@ def compute_zcdp_single_tree( Args: noise_multiplier: A non-negative float representing the ratio of the standard deviation of the Gaussian noise to the l2-sensitivity of the - function to which it is added. + function to which it is added, which is usually set in + `TreeCumulativeSumQuery` and `TreeResidualSumQuery` from + `dp_query.tree_aggregation_query`. total_steps: Total number of steps (leaf nodes in tree aggregation). max_participation: The maximum number of times a sample can appear. min_separation: The minimum number of nodes between two appearance of a diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 70f9efa..eef67dc 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -18,6 +18,19 @@ online observation queries relying on `tree_aggregation`. 'Online' means that the leaf nodes of the tree arrive one by one as the time proceeds. The core logic of tree aggregation is implemented in `tree_aggregation.TreeAggregator` and `tree_aggregation.EfficientTreeAggregator`. + +Depending on the data streaming setting (single/multi-pass), the privacy +accounting method ((epsilon,delta)-DP/RDP/zCDP), and the restart strategy (see +`restart_query`), the DP bound can be computed by one of the public methods +in `analysis.tree_aggregation_accountant`. + +For example, for a single-pass algorithm where a sample may appear at most once +in the querying process; if `get_noised_result` is called `steps` times, the +corresponding epsilon for a `target_delta` and `noise_multiplier` to achieve +(epsilon,delta)-DP can be computed as: + orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) + rdp = compute_rdp_tree_restart(noise_multiplier, [steps], orders) + eps = rdp_accountant.get_privacy_spent(orders, rdp, target_delta)[0] """ import attr @@ -210,7 +223,9 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): clip_norm: Each record will be clipped so that it has L2 norm at most `clip_norm`. noise_multiplier: The effective noise multiplier for the sum of records. - Noise standard deviation is `clip_norm*noise_multiplier`. + Noise standard deviation is `clip_norm*noise_multiplier`. The value can + be used as the input of the privacy accounting functions in + `analysis.tree_aggregation_accountant`. record_specs: A nested structure of `tf.TensorSpec`s specifying structure and shapes of records. noise_seed: Integer seed for the Gaussian noise generator. If `None`, a @@ -461,7 +476,9 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): clip_norm: Each record will be clipped so that it has L2 norm at most `clip_norm`. noise_multiplier: The effective noise multiplier for the sum of records. - Noise standard deviation is `clip_norm*noise_multiplier`. + Noise standard deviation is `clip_norm*noise_multiplier`. The value can + be used as the input of the privacy accounting functions in + `analysis.tree_aggregation_accountant`. record_specs: A nested structure of `tf.TensorSpec`s specifying structure and shapes of records. noise_seed: Integer seed for the Gaussian noise generator. If `None`, a