From 4326014a0e493e650e182f1bb708c733595c0702 Mon Sep 17 00:00:00 2001 From: Steve Chien Date: Wed, 7 Jul 2021 13:45:19 -0700 Subject: [PATCH 01/71] Move TensorFlow v1 imports to their own __init__.py file in a new subdirectory. PiperOrigin-RevId: 383485268 --- tensorflow_privacy/__init__.py | 21 ++------ .../analysis/compute_dp_sgd_privacy.py | 5 -- .../analysis/compute_dp_sgd_privacy_lib.py | 4 -- .../analysis/compute_noise_from_budget.py | 5 -- .../analysis/compute_noise_from_budget_lib.py | 4 -- tensorflow_privacy/v1/__init__.py | 50 +++++++++++++++++++ 6 files changed, 53 insertions(+), 36 deletions(-) create mode 100644 tensorflow_privacy/v1/__init__.py diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index 1e6e42d..bf0e8e4 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -26,6 +26,9 @@ from tensorflow_privacy.version import __version__ # pylint: disable=g-bad-impo if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts. pass else: + # TensorFlow v1 imports + import tensorflow_privacy.v1 + # Analysis from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy from tensorflow_privacy.privacy.analysis.privacy_ledger import GaussianSumQueryEntry @@ -54,7 +57,6 @@ else: # Estimators from tensorflow_privacy.privacy.estimators.dnn import DNNClassifier - from tensorflow_privacy.privacy.estimators.v1.dnn import DNNClassifier as DNNClassifierV1 # Keras Models from tensorflow_privacy.privacy.keras_models.dp_keras_model import DPModel @@ -62,14 +64,6 @@ else: from tensorflow_privacy.privacy.keras_models.dp_keras_model import make_dp_model_class # Optimizers - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradGaussianOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamGaussianOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import make_optimizer_class - from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasAdagradOptimizer from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasAdamOptimizer from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer @@ -80,15 +74,6 @@ else: from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import VectorizedDPKerasSGDOptimizer from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import make_vectorized_keras_optimizer_class - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagradOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdamOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGDOptimizer - - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagrad - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdam - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGD - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import make_vectorized_optimizer_class - try: from tensorflow_privacy.privacy.bolt_on.models import BoltOnModel from tensorflow_privacy.privacy.bolt_on.optimizers import BoltOn diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py index e4a0efb..e9d999d 100644 --- a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py @@ -32,16 +32,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import sys - from absl import app from absl import flags from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy -# Opting out of loading all sibling packages and their dependencies. -sys.skip_tf_privacy_import = True - FLAGS = flags.FLAGS flags.DEFINE_integer('N', None, 'Total number of examples') diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py index a69d7db..8d081c5 100644 --- a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py @@ -19,13 +19,9 @@ from __future__ import division from __future__ import print_function import math -import sys from absl import app -# Opting out of loading all sibling packages and their dependencies. -sys.skip_tf_privacy_import = True - from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent diff --git a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py index bb23eea..731c9ac 100644 --- a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py +++ b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py @@ -34,16 +34,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import sys - from absl import app from absl import flags from tensorflow_privacy.privacy.analysis.compute_noise_from_budget_lib import compute_noise -# Opting out of loading all sibling packages and their dependencies. -sys.skip_tf_privacy_import = True - FLAGS = flags.FLAGS flags.DEFINE_integer('N', None, 'Total number of examples') diff --git a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py index 1325cb2..ccf1f1c 100644 --- a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py +++ b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import math -import sys from absl import app from scipy.optimize import bisect @@ -27,9 +26,6 @@ from scipy.optimize import bisect from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent -# Opting out of loading all sibling packages and their dependencies. -sys.skip_tf_privacy_import = True - def apply_dp_sgd_analysis(q, sigma, steps, orders, delta): """Compute and print results of DP-SGD analysis.""" diff --git a/tensorflow_privacy/v1/__init__.py b/tensorflow_privacy/v1/__init__.py new file mode 100644 index 0000000..7bce1a9 --- /dev/null +++ b/tensorflow_privacy/v1/__init__.py @@ -0,0 +1,50 @@ +# Copyright 2020, The TensorFlow Privacy Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""TensorFlow Privacy library v1 imports. + +This module includes classes designed to be compatible with TF1, based on +`tf.compat.v1.train.Optimizer` and `tf.estimator.Estimator`. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + +# pylint: disable=g-import-not-at-top + +if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts. + pass +else: + # Estimators + from tensorflow_privacy.privacy.estimators.v1.dnn import DNNClassifier as DNNClassifierV1 + + # Optimizers + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradGaussianOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamGaussianOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import make_optimizer_class + + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagradOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdamOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGDOptimizer + + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagrad + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdam + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGD + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import make_vectorized_optimizer_class From d6aa796684a44dd3d35e2791c28adf0a2f2fb8e7 Mon Sep 17 00:00:00 2001 From: Kuangyuan Chen Date: Wed, 7 Jul 2021 15:26:00 -0700 Subject: [PATCH 02/71] Automated rollback of commit 4326014a0e493e650e182f1bb708c733595c0702 PiperOrigin-RevId: 383505647 --- tensorflow_privacy/__init__.py | 21 ++++++-- .../analysis/compute_dp_sgd_privacy.py | 5 ++ .../analysis/compute_dp_sgd_privacy_lib.py | 4 ++ .../analysis/compute_noise_from_budget.py | 5 ++ .../analysis/compute_noise_from_budget_lib.py | 4 ++ tensorflow_privacy/v1/__init__.py | 50 ------------------- 6 files changed, 36 insertions(+), 53 deletions(-) delete mode 100644 tensorflow_privacy/v1/__init__.py diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index bf0e8e4..1e6e42d 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -26,9 +26,6 @@ from tensorflow_privacy.version import __version__ # pylint: disable=g-bad-impo if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts. pass else: - # TensorFlow v1 imports - import tensorflow_privacy.v1 - # Analysis from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy from tensorflow_privacy.privacy.analysis.privacy_ledger import GaussianSumQueryEntry @@ -57,6 +54,7 @@ else: # Estimators from tensorflow_privacy.privacy.estimators.dnn import DNNClassifier + from tensorflow_privacy.privacy.estimators.v1.dnn import DNNClassifier as DNNClassifierV1 # Keras Models from tensorflow_privacy.privacy.keras_models.dp_keras_model import DPModel @@ -64,6 +62,14 @@ else: from tensorflow_privacy.privacy.keras_models.dp_keras_model import make_dp_model_class # Optimizers + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradGaussianOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamGaussianOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import make_optimizer_class + from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasAdagradOptimizer from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasAdamOptimizer from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer @@ -74,6 +80,15 @@ else: from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import VectorizedDPKerasSGDOptimizer from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import make_vectorized_keras_optimizer_class + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagradOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdamOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGDOptimizer + + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagrad + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdam + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGD + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import make_vectorized_optimizer_class + try: from tensorflow_privacy.privacy.bolt_on.models import BoltOnModel from tensorflow_privacy.privacy.bolt_on.optimizers import BoltOn diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py index e9d999d..e4a0efb 100644 --- a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py @@ -32,11 +32,16 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import sys + from absl import app from absl import flags from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy +# Opting out of loading all sibling packages and their dependencies. +sys.skip_tf_privacy_import = True + FLAGS = flags.FLAGS flags.DEFINE_integer('N', None, 'Total number of examples') diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py index 8d081c5..a69d7db 100644 --- a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py @@ -19,9 +19,13 @@ from __future__ import division from __future__ import print_function import math +import sys from absl import app +# Opting out of loading all sibling packages and their dependencies. +sys.skip_tf_privacy_import = True + from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent diff --git a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py index 731c9ac..bb23eea 100644 --- a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py +++ b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py @@ -34,11 +34,16 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import sys + from absl import app from absl import flags from tensorflow_privacy.privacy.analysis.compute_noise_from_budget_lib import compute_noise +# Opting out of loading all sibling packages and their dependencies. +sys.skip_tf_privacy_import = True + FLAGS = flags.FLAGS flags.DEFINE_integer('N', None, 'Total number of examples') diff --git a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py index ccf1f1c..1325cb2 100644 --- a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py +++ b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import math +import sys from absl import app from scipy.optimize import bisect @@ -26,6 +27,9 @@ from scipy.optimize import bisect from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent +# Opting out of loading all sibling packages and their dependencies. +sys.skip_tf_privacy_import = True + def apply_dp_sgd_analysis(q, sigma, steps, orders, delta): """Compute and print results of DP-SGD analysis.""" diff --git a/tensorflow_privacy/v1/__init__.py b/tensorflow_privacy/v1/__init__.py deleted file mode 100644 index 7bce1a9..0000000 --- a/tensorflow_privacy/v1/__init__.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2020, The TensorFlow Privacy Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""TensorFlow Privacy library v1 imports. - -This module includes classes designed to be compatible with TF1, based on -`tf.compat.v1.train.Optimizer` and `tf.estimator.Estimator`. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import sys - -# pylint: disable=g-import-not-at-top - -if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts. - pass -else: - # Estimators - from tensorflow_privacy.privacy.estimators.v1.dnn import DNNClassifier as DNNClassifierV1 - - # Optimizers - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradGaussianOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamGaussianOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import make_optimizer_class - - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagradOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdamOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGDOptimizer - - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagrad - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdam - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGD - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import make_vectorized_optimizer_class From caf6f36b80b4f2dd4ff0ea7fcbd6c02054c3d71c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Jul 2021 15:54:53 -0700 Subject: [PATCH 03/71] (1) add `CentralTreeSumQuery` and `DistributedTreeSumQuery` to tree_aggregation_query.py. (2) move `build_tree_from_leaf` to tree_aggregation_query.py together with `CentralTreeSumQuery`. PiperOrigin-RevId: 383511025 --- .../privacy/dp_query/tree_aggregation.py | 80 ----- .../dp_query/tree_aggregation_query.py | 317 +++++++++++++++++- .../dp_query/tree_aggregation_query_test.py | 285 +++++++++++++++- .../privacy/dp_query/tree_aggregation_test.py | 34 -- 4 files changed, 594 insertions(+), 122 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation.py index 3561be3..ba8ea2f 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation.py @@ -17,10 +17,6 @@ based on tree aggregation. When using an appropriate noise function (e.g., Gaussian noise), it allows for efficient differentially private algorithms under continual observation, without prior subsampling or shuffling assumptions. - -`build_tree` constructs a tree given the leaf nodes by recursively summing the -children nodes to get the parent node. It allows for efficient range queries and -other statistics such as quantiles on the leaf nodes. """ import abc @@ -449,79 +445,3 @@ class EfficientTreeAggregator(): level_buffer_idx=new_level_buffer_idx, value_generator_state=value_generator_state) return cumsum, new_state - - -@tf.function -def build_tree_from_leaf(leaf_nodes: tf.Tensor, arity: int) -> tf.RaggedTensor: - """A function constructs a complete tree given all the leaf nodes. - - The function takes a 1-D array representing the leaf nodes of a tree and the - tree's arity, and constructs a complete tree by recursively summing the - adjacent children to get the parent until reaching the root node. Because we - assume a complete tree, if the number of leaf nodes does not divide arity, the - leaf nodes will be padded with zeros. - - Args: - leaf_nodes: A 1-D array storing the leaf nodes of the tree. - arity: A `int` for the branching factor of the tree, i.e. the number of - children for each internal node. - - Returns: - `tf.RaggedTensor` representing the tree. For example, if - `leaf_nodes=tf.Tensor([1, 2, 3, 4])` and `arity=2`, then the returned value - should be `tree=tf.RaggedTensor([[10],[3,7],[1,2,3,4]])`. In this way, - `tree[layer][index]` can be used to access the node indexed by (layer, - index) in the tree, - - Raises: - ValueError: if parameters don't meet expectations. There are two situations - where the error is raised: (1) the input tensor has length smaller than 1; - (2) The arity is less than 2. - """ - - if len(leaf_nodes) <= 0: - raise ValueError( - 'The number of leaf nodes should at least be 1.' - f'However, an array of length {len(leaf_nodes)} is detected') - - if arity <= 1: - raise ValueError('The branching factor should be at least 2.' - f'However, a branching factor of {arity} is detected.') - - def pad_zero(leaf_nodes, size): - paddings = [[0, size - len(leaf_nodes)]] - return tf.pad(leaf_nodes, paddings) - - leaf_nodes_size = tf.constant(len(leaf_nodes), dtype=tf.float32) - num_layers = tf.math.ceil( - tf.math.log(leaf_nodes_size) / - tf.math.log(tf.constant(arity, dtype=tf.float32))) + 1 - leaf_nodes = pad_zero(leaf_nodes, tf.math.pow(float(arity), num_layers - 1)) - - def _shrink_layer(layer: tf.Tensor, arity: int) -> tf.Tensor: - return tf.reduce_sum((tf.reshape(layer, (-1, arity))), 1) - - # The following `tf.while_loop` constructs the tree from bottom up by - # iteratively applying `_shrink_layer` to each layer of the tree. The reason - # for the choice of TF1.0-style `tf.while_loop` is that @tf.function does not - # support auto-translation from python loop to tf loop when loop variables - # contain a `RaggedTensor` whose shape changes across iterations. - - idx = tf.identity(num_layers) - loop_cond = lambda i, h: tf.less_equal(2.0, i) - - def _loop_body(i, h): - return [ - tf.add(i, -1.0), - tf.concat(([_shrink_layer(h[0], arity)], h), axis=0) - ] - - _, tree = tf.while_loop( - loop_cond, - _loop_body, [idx, tf.RaggedTensor.from_tensor([leaf_nodes])], - shape_invariants=[ - idx.get_shape(), - tf.RaggedTensorSpec(dtype=leaf_nodes.dtype, ragged_rank=1) - ]) - - return tree diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index fb7dc76..79bc243 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -11,9 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""DPQuery for continual observation queries relying on `tree_aggregation`.""" +"""`DPQuery`s for differentially private tree aggregation protocols. +`TreeCumulativeSumQuery` and `TreeResidualSumQuery` are `DPQuery`s for continual +online observation queries relying on `tree_aggregation`. 'Online' means that +the leaf nodes of the tree arrive one by one as the time proceeds. The leaves +are vector records as defined in `dp_query.DPQuery`. + +`CentralTreeSumQuery` and `DistributedTreeSumQuery` are `DPQuery`s for +central/distributed offline tree aggregation protocol. 'Offline' means all the +leaf nodes are ready before the protocol starts. Each record, different from +what is defined in `dp_query.DPQuery`, is a histogram (i.e. the leaf nodes). +""" +import distutils +import math import attr + import tensorflow as tf from tensorflow_privacy.privacy.dp_query import dp_query @@ -31,11 +44,11 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): Attributes: clip_fn: Callable that specifies clipping function. `clip_fn` receives two arguments: a flat list of vars in a record and a `clip_value` to clip the - corresponding record, e.g. clip_fn(flat_record, clip_value). + corresponding record, e.g. clip_fn(flat_record, clip_value). clip_value: float indicating the value at which to clip the record. record_specs: `Collection[tf.TensorSpec]` specifying shapes of records. - tree_aggregator: `tree_aggregation.TreeAggregator` initialized with - user defined `noise_generator`. `noise_generator` is a + tree_aggregator: `tree_aggregation.TreeAggregator` initialized with user + defined `noise_generator`. `noise_generator` is a `tree_aggregation.ValueGenerator` to generate the noise value for a tree node. Noise stdandard deviation is specified outside the `dp_query` by the user when defining `noise_fn` and should have order @@ -209,7 +222,7 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): Attributes: clip_fn: Callable that specifies clipping function. `clip_fn` receives two arguments: a flat list of vars in a record and a `clip_value` to clip the - corresponding record, e.g. clip_fn(flat_record, clip_value). + corresponding record, e.g. clip_fn(flat_record, clip_value). clip_value: float indicating the value at which to clip the record. record_specs: A nested structure of `tf.TensorSpec`s specifying structure and shapes of records. @@ -364,3 +377,297 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): record_specs=record_specs, noise_generator=gaussian_noise_generator, use_efficient=use_efficient) + + +@tf.function +def _build_tree_from_leaf(leaf_nodes: tf.Tensor, arity: int) -> tf.RaggedTensor: + """A function constructs a complete tree given all the leaf nodes. + + The function takes a 1-D array representing the leaf nodes of a tree and the + tree's arity, and constructs a complete tree by recursively summing the + adjacent children to get the parent until reaching the root node. Because we + assume a complete tree, if the number of leaf nodes does not divide arity, the + leaf nodes will be padded with zeros. + + Args: + leaf_nodes: A 1-D array storing the leaf nodes of the tree. + arity: A `int` for the branching factor of the tree, i.e. the number of + children for each internal node. + + Returns: + `tf.RaggedTensor` representing the tree. For example, if + `leaf_nodes=tf.Tensor([1, 2, 3, 4])` and `arity=2`, then the returned value + should be `tree=tf.RaggedTensor([[10],[3,7],[1,2,3,4]])`. In this way, + `tree[layer][index]` can be used to access the node indexed by (layer, + index) in the tree, + """ + + def pad_zero(leaf_nodes, size): + paddings = [[0, size - len(leaf_nodes)]] + return tf.pad(leaf_nodes, paddings) + + leaf_nodes_size = tf.constant(len(leaf_nodes), dtype=tf.float32) + num_layers = tf.math.ceil( + tf.math.log(leaf_nodes_size) / + tf.math.log(tf.cast(arity, dtype=tf.float32))) + 1 + leaf_nodes = pad_zero( + leaf_nodes, tf.math.pow(tf.cast(arity, dtype=tf.float32), num_layers - 1)) + + def _shrink_layer(layer: tf.Tensor, arity: int) -> tf.Tensor: + return tf.reduce_sum((tf.reshape(layer, (-1, arity))), 1) + + # The following `tf.while_loop` constructs the tree from bottom up by + # iteratively applying `_shrink_layer` to each layer of the tree. The reason + # for the choice of TF1.0-style `tf.while_loop` is that @tf.function does not + # support auto-translation from python loop to tf loop when loop variables + # contain a `RaggedTensor` whose shape changes across iterations. + + idx = tf.identity(num_layers) + loop_cond = lambda i, h: tf.less_equal(2.0, i) + + def _loop_body(i, h): + return [ + tf.add(i, -1.0), + tf.concat(([_shrink_layer(h[0], arity)], h), axis=0) + ] + + _, tree = tf.while_loop( + loop_cond, + _loop_body, [idx, tf.RaggedTensor.from_tensor([leaf_nodes])], + shape_invariants=[ + idx.get_shape(), + tf.RaggedTensorSpec(dtype=leaf_nodes.dtype, ragged_rank=1) + ]) + + return tree + + +def _get_add_noise(stddev): + """Utility function to decide which `add_noise` to use according to tf version.""" + if distutils.version.LooseVersion( + tf.__version__) < distutils.version.LooseVersion('2.0.0'): + + def add_noise(v): + return v + tf.random.normal( + tf.shape(input=v), stddev=stddev, dtype=v.dtype) + else: + random_normal = tf.random_normal_initializer(stddev=stddev) + + def add_noise(v): + return v + tf.cast(random_normal(tf.shape(input=v)), dtype=v.dtype) + + return add_noise + + +class CentralTreeSumQuery(dp_query.SumAggregationDPQuery): + """Implements dp_query for differentially private tree aggregation protocol. + + Implements a central variant of the tree aggregation protocol from the paper + "'Is interaction necessary for distributed private learning?.' Adam Smith, + Abhradeep Thakurta, Jalaj Upadhyay" by replacing their local randomizer with + gaussian mechanism. The first step is to clip the clients' local updates (i.e. + a 1-D array containing the leaf nodes of the tree) by L1 norm to make sure it + does not exceed a prespecified upper bound. The second step is to construct + the tree on the clipped update. The third step is to add independent gaussian + noise to each node in the tree. The returned tree can support efficient and + accurate range queries with differential privacy. + """ + + @attr.s(frozen=True) + class GlobalState(object): + """Class defining global state for `CentralTreeSumQuery`. + + Attributes: + stddev: The stddev of the noise added to each node in the tree. + arity: The branching factor of the tree (i.e. the number of children each + internal node has). + l1_bound: An upper bound on the L1 norm of the input record. This is + needed to bound the sensitivity and deploy differential privacy. + """ + stddev = attr.ib() + arity = attr.ib() + l1_bound = attr.ib() + + def __init__(self, stddev: float, arity: int = 2, l1_bound: int = 10): + """Initializes the `CentralTreeSumQuery`. + + Args: + stddev: The stddev of the noise added to each internal node of the + constructed tree. + arity: The branching factor of the tree. + l1_bound: An upper bound on the L1 norm of the input record. This is + needed to bound the sensitivity and deploy differential privacy. + """ + self._stddev = stddev + self._arity = arity + self._l1_bound = l1_bound + + def initial_global_state(self): + """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" + return CentralTreeSumQuery.GlobalState( + stddev=self._stddev, arity=self._arity, l1_bound=self._l1_bound) + + def derive_sample_params(self, global_state): + """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" + return global_state.l1_bound + + def preprocess_record(self, params, record): + """Implements `tensorflow_privacy.DPQuery.preprocess_record`.""" + casted_record = tf.cast(record, tf.float32) + l1_norm = tf.norm(casted_record, ord=1) + + l1_bound = tf.cast(params, tf.float32) + + preprocessed_record, _ = tf.clip_by_global_norm([casted_record], + l1_bound, + use_norm=l1_norm) + + return preprocessed_record[0] + + def get_noised_result(self, sample_state, global_state): + """Implements `tensorflow_privacy.DPQuery.get_noised_result`. + + Args: + sample_state: a frequency histogram. + global_state: hyper-parameters of the query. + + Returns: + a `tf.RaggedTensor` representing the tree built on top of `sample_state`. + The jth node on the ith layer of the tree can be accessed by tree[i][j] + where tree is the returned value. + """ + add_noise = _get_add_noise(self._stddev) + tree = _build_tree_from_leaf(sample_state, global_state.arity) + return tf.nest.map_structure( + add_noise, tree, expand_composites=True), global_state + + +class DistributedTreeSumQuery(dp_query.SumAggregationDPQuery): + """Implements dp_query for differentially private tree aggregation protocol. + + The difference from `CentralTreeSumQuery` is that the tree construction and + gaussian noise addition happen in `preprocess_records`. The difference only + takes effect when used together with + `tff.aggregators.DifferentiallyPrivateFactory`. In other cases, this class + should be treated as equal with `CentralTreeSumQuery`. + + Implements a distributed version of the tree aggregation protocol from. "Is + interaction necessary for distributed private learning?." by replacing their + local randomizer with gaussian mechanism. The first step is to check the L1 + norm of the clients' local updates (i.e. a 1-D array containing the leaf nodes + of the tree) to make sure it does not exceed a prespecified upper bound. The + second step is to construct the tree. The third step is to add independent + gaussian noise to each node in the tree. The returned tree can support + efficient and accurate range queries with differential privacy. + """ + + @attr.s(frozen=True) + class GlobalState(object): + """Class defining global state for DistributedTreeSumQuery. + + Attributes: + stddev: The stddev of the noise added to each internal node in the + constructed tree. + arity: The branching factor of the tree (i.e. the number of children each + internal node has). + l1_bound: An upper bound on the L1 norm of the input record. This is + needed to bound the sensitivity and deploy differential privacy. + """ + stddev = attr.ib() + arity = attr.ib() + l1_bound = attr.ib() + + def __init__(self, stddev: float, arity: int = 2, l1_bound: int = 10): + """Initializes the `DistributedTreeSumQuery`. + + Args: + stddev: The stddev of the noise added to each node in the tree. + arity: The branching factor of the tree. + l1_bound: An upper bound on the L1 norm of the input record. This is + needed to bound the sensitivity and deploy differential privacy. + """ + self._stddev = stddev + self._arity = arity + self._l1_bound = l1_bound + + def initial_global_state(self): + """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" + return DistributedTreeSumQuery.GlobalState( + stddev=self._stddev, arity=self._arity, l1_bound=self._l1_bound) + + def derive_sample_params(self, global_state): + """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" + return (global_state.stddev, global_state.arity, global_state.l1_bound) + + def preprocess_record(self, params, record): + """Implements `tensorflow_privacy.DPQuery.preprocess_record`. + + This method clips the input record by L1 norm, constructs a tree on top of + it, and adds gaussian noise to each node of the tree for differential + privacy. Unlike `get_noised_result` in `CentralTreeSumQuery`, this function + flattens the `tf.RaggedTensor` before outputting it. This is useful when + used inside `tff.aggregators.DifferentiallyPrivateFactory` because it does + not accept ragged output tensor. + + Args: + params: hyper-parameters for preprocessing record, (stddev, aritry, + l1_bound) + record: leaf nodes for the tree. + + Returns: + `tf.Tensor` representing the flattened version of the tree. + """ + _, arity, l1_bound_ = params + l1_bound = tf.cast(l1_bound_, tf.float32) + + casted_record = tf.cast(record, tf.float32) + l1_norm = tf.norm(casted_record, ord=1) + + preprocessed_record, _ = tf.clip_by_global_norm([casted_record], + l1_bound, + use_norm=l1_norm) + preprocessed_record = preprocessed_record[0] + + add_noise = _get_add_noise(self._stddev) + tree = _build_tree_from_leaf(preprocessed_record, arity) + noisy_tree = tf.nest.map_structure(add_noise, tree, expand_composites=True) + + # The following codes reshape the output vector so the output shape of can + # be statically inferred. This is useful when used with + # `tff.aggregators.DifferentiallyPrivateFactory` because it needs to know + # the output shape of this function statically and explicitly. + flat_noisy_tree = noisy_tree.flat_values + flat_tree_shape = [ + (self._arity**(math.ceil(math.log(record.shape[0], self._arity)) + 1) - + 1) // (self._arity - 1) + ] + return tf.reshape(flat_noisy_tree, flat_tree_shape) + + def get_noised_result(self, sample_state, global_state): + """Implements `tensorflow_privacy.DPQuery.get_noised_result`. + + This function re-constructs the `tf.RaggedTensor` from the flattened tree + output by `preprocess_records.` + + Args: + sample_state: `tf.Tensor` for the flattened tree. + global_state: hyper-parameters including noise multiplier, the branching + factor of the tree and the maximum records per user. + + Returns: + a `tf.RaggedTensor` for the tree. + """ + # The [0] is needed because of how tf.RaggedTensor.from_two_splits works. + # print(tf.RaggedTensor.from_row_splits(values=[3, 1, 4, 1, 5, 9, 2, 6], + # row_splits=[0, 4, 4, 7, 8, 8])) + # + # This part is not written in tensorflow and will be executed on the server + # side instead of the client side if used with + # tff.aggregators.DifferentiallyPrivateFactory for federated learning. + row_splits = [0] + [ + (self._arity**(x + 1) - 1) // (self._arity - 1) for x in range( + math.floor(math.log(sample_state.shape[0], self._arity)) + 1) + ] + tree = tf.RaggedTensor.from_row_splits( + values=sample_state, row_splits=row_splits) + return tree, global_state diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py index 8cf2157..34f2c9c 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py @@ -13,16 +13,15 @@ # limitations under the License. """Tests for `tree_aggregation_query`.""" -from absl.testing import parameterized +import math +from absl.testing import parameterized import numpy as np import tensorflow as tf - from tensorflow_privacy.privacy.dp_query import test_utils from tensorflow_privacy.privacy.dp_query import tree_aggregation from tensorflow_privacy.privacy.dp_query import tree_aggregation_query - STRUCT_RECORD = [ tf.constant([[2.0, 0.0], [0.0, 1.0]]), tf.constant([-1.0, 0.0]) @@ -55,6 +54,7 @@ def _get_noise_fn(specs, stddev=NOISE_STD, seed=1): def _get_no_noise_fn(specs): shape = tf.nest.map_structure(lambda spec: spec.shape, specs) + def no_noise_fn(): return tf.nest.map_structure(tf.zeros, shape) @@ -73,6 +73,7 @@ def _get_l2_clip_fn(): def _get_l_infty_clip_fn(): def l_infty_clip_fn(record_as_list, clip_value): + def clip(record): return tf.clip_by_value( record, clip_value_min=-clip_value, clip_value_max=clip_value) @@ -395,5 +396,283 @@ class TreeResidualQueryTest(tf.test.TestCase, parameterized.TestCase): self.assertIsInstance(query._tree_aggregator, tree_class) +class BuildTreeTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.product( + leaf_nodes_size=[1, 2, 3, 4, 5], + arity=[2, 3], + dtype=[tf.int32, tf.float32], + ) + def test_build_tree_from_leaf(self, leaf_nodes_size, arity, dtype): + """Test whether `_build_tree_from_leaf` will output the correct tree.""" + + leaf_nodes = tf.cast(tf.range(leaf_nodes_size), dtype) + depth = math.ceil(math.log(leaf_nodes_size, arity)) + 1 + + tree = tree_aggregation_query._build_tree_from_leaf(leaf_nodes, arity) + + self.assertEqual(depth, tree.shape[0]) + + for layer in range(depth): + reverse_depth = tree.shape[0] - layer - 1 + span_size = arity**reverse_depth + for idx in range(arity**layer): + left = idx * span_size + right = (idx + 1) * span_size + expected_value = sum(leaf_nodes[left:right]) + self.assertEqual(tree[layer][idx], expected_value) + + +class CentralTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): + + def test_initial_global_state_type(self): + + query = tree_aggregation_query.CentralTreeSumQuery(stddev=NOISE_STD) + global_state = query.initial_global_state() + self.assertIsInstance( + global_state, tree_aggregation_query.CentralTreeSumQuery.GlobalState) + + def test_derive_sample_params(self): + query = tree_aggregation_query.CentralTreeSumQuery(stddev=NOISE_STD) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + self.assertAllClose(params, 10.) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32)), + ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32)), + ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32)), + ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], + dtype=tf.float32)), + ) + def test_preprocess_record(self, arity, record): + query = tree_aggregation_query.CentralTreeSumQuery( + stddev=NOISE_STD, arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + + self.assertAllClose(preprocessed_record, record) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.constant([5, 5, 0, 0], dtype=tf.int32)), + ('binary_test_float', 2, tf.constant( + [10., 10., 0., 0.], + dtype=tf.float32), tf.constant([5., 5., 0., 0.], dtype=tf.float32)), + ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.constant([5, 5, 0, 0], dtype=tf.int32)), + ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.constant([5., 5., 0., 0.], dtype=tf.float32)), + ) + def test_preprocess_record_clipped(self, arity, record, + expected_clipped_value): + query = tree_aggregation_query.CentralTreeSumQuery( + stddev=NOISE_STD, arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + self.assertAllClose(preprocessed_record, expected_clipped_value) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), + ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), + ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([[1.], [1., 0., 0.], + [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), + ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([[1.], [1., 0., 0.], + [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), + ) + def test_get_noised_result(self, arity, record, expected_tree): + query = tree_aggregation_query.CentralTreeSumQuery(stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + sample_state, global_state = query.get_noised_result( + preprocessed_record, global_state) + + self.assertAllClose(sample_state, expected_tree) + + @parameterized.named_parameters( + ('stddev_0_01', 0.01, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), + ('stddev_0_1', 0.1, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), + ) + def test_get_noised_result_with_noise(self, stddev, record, expected_tree): + query = tree_aggregation_query.CentralTreeSumQuery(stddev=stddev) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + sample_state_list = [] + for _ in range(1000): + sample_state, _ = query.get_noised_result(preprocessed_record, + global_state) + sample_state_list.append(sample_state.flat_values.numpy()) + expectation = np.mean(sample_state_list, axis=0) + variance = np.std(sample_state_list, axis=0) + + self.assertAllClose(expectation, expected_tree, rtol=3 * stddev, atol=1e-4) + self.assertAllClose( + variance, np.ones(len(variance)) * stddev, rtol=0.1, atol=1e-4) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), + ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), + ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant([[10.], [10., 0., 0.], + [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), + ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant([[10.], [10., 0., 0.], + [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), + ) + def test_get_noised_result_clipped(self, arity, record, expected_tree): + query = tree_aggregation_query.CentralTreeSumQuery(stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + sample_state, global_state = query.get_noised_result( + preprocessed_record, global_state) + + self.assertAllClose(sample_state, expected_tree) + + +class DistributedTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): + + def test_initial_global_state_type(self): + + query = tree_aggregation_query.DistributedTreeSumQuery(stddev=NOISE_STD) + global_state = query.initial_global_state() + self.assertIsInstance( + global_state, + tree_aggregation_query.DistributedTreeSumQuery.GlobalState) + + def test_derive_sample_params(self): + query = tree_aggregation_query.DistributedTreeSumQuery(stddev=NOISE_STD) + global_state = query.initial_global_state() + stddev, arity, l1_bound = query.derive_sample_params( + global_state) + self.assertAllClose(stddev, NOISE_STD) + self.assertAllClose(arity, 2) + self.assertAllClose(l1_bound, 10) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([1., 1., 0., 1., 0., 0., 0.])), + ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([1., 1., 0., 1., 0., 0., 0.])), + ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0. + ])), + ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0. + ])), + ) + def test_preprocess_record(self, arity, record, expected_tree): + query = tree_aggregation_query.DistributedTreeSumQuery( + stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + self.assertAllClose(preprocessed_record, expected_tree) + + @parameterized.named_parameters( + ('stddev_0_01', 0.01, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), + ('stddev_0_1', 0.1, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), + ) + def test_preprocess_record_with_noise(self, stddev, record, expected_tree): + query = tree_aggregation_query.DistributedTreeSumQuery(stddev=stddev) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + + preprocessed_record_list = [] + for _ in range(1000): + preprocessed_record = query.preprocess_record(params, record) + preprocessed_record_list.append(preprocessed_record.numpy()) + + expectation = np.mean(preprocessed_record_list, axis=0) + variance = np.std(preprocessed_record_list, axis=0) + + self.assertAllClose(expectation, expected_tree, rtol=3 * stddev, atol=1e-4) + self.assertAllClose( + variance, np.ones(len(variance)) * stddev, rtol=0.1, atol=1e-4) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant([10., 10., 0., 5., 5., 0., 0.])), + ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant([10., 10., 0., 5., 5., 0., 0.])), + ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant( + [10., 10., 0., 0., 5., 5., 0., 0., 0., 0., 0., 0., 0.])), + ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant( + [10., 10., 0., 0., 5., 5., 0., 0., 0., 0., 0., 0., 0.])), + ) + def test_preprocess_record_clipped(self, arity, record, expected_tree): + query = tree_aggregation_query.DistributedTreeSumQuery( + stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + self.assertAllClose(preprocessed_record, expected_tree) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), + ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), + ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([[1.], [1., 0., 0.], + [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), + ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([[1.], [1., 0., 0.], + [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), + ) + def test_get_noised_result(self, arity, record, expected_tree): + query = tree_aggregation_query.DistributedTreeSumQuery( + stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + sample_state, global_state = query.get_noised_result( + preprocessed_record, global_state) + + self.assertAllClose(sample_state, expected_tree) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), + ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), + ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant([[10.], [10., 0., 0.], + [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), + ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant([[10.], [10., 0., 0.], + [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), + ) + def test_get_noised_result_clipped(self, arity, record, expected_tree): + query = tree_aggregation_query.DistributedTreeSumQuery( + stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + sample_state, global_state = query.get_noised_result( + preprocessed_record, global_state) + + self.assertAllClose(sample_state, expected_tree) + + if __name__ == '__main__': tf.test.main() diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py index 9a237ad..9a8be35 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py @@ -365,39 +365,5 @@ class GaussianNoiseGeneratorTest(tf.test.TestCase): self.assertAllEqual(gstate, gstate2) -class BuildTreeTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.product( - leaf_nodes_size=[1, 2, 3, 4, 5], - arity=[2, 3], - dtype=[tf.int32, tf.float32], - ) - def test_build_tree_from_leaf(self, leaf_nodes_size, arity, dtype): - """Test whether `build_tree_from_leaf` will output the correct tree.""" - - leaf_nodes = tf.cast(tf.range(leaf_nodes_size), dtype) - depth = math.ceil(math.log(leaf_nodes_size, arity)) + 1 - - tree = tree_aggregation.build_tree_from_leaf(leaf_nodes, arity) - - self.assertEqual(depth, tree.shape[0]) - - for layer in range(depth): - reverse_depth = tree.shape[0] - layer - 1 - span_size = arity**reverse_depth - for idx in range(arity**layer): - left = idx * span_size - right = (idx + 1) * span_size - expected_value = sum(leaf_nodes[left:right]) - self.assertEqual(tree[layer][idx], expected_value) - - @parameterized.named_parameters(('negative_arity', [1], -1), - ('empty_hist', [], 2)) - def test_value_error_raises(self, leaf_nodes, arity): - """Test whether `build_tree_from_leaf` will raise the correct error when the input is illegal.""" - with self.assertRaises(ValueError): - tree_aggregation.build_tree_from_leaf(leaf_nodes, arity) - - if __name__ == '__main__': tf.test.main() From 7f44b02456bb861044ea021b8f7d12fcbcd5ae86 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Tue, 13 Jul 2021 11:13:44 -0700 Subject: [PATCH 04/71] Increment version number. PiperOrigin-RevId: 384507585 --- setup.py | 2 +- tensorflow_privacy/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 5c739be..d1979e4 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ from setuptools import setup setup( name='tensorflow_privacy', - version='0.6.1', + version='0.6.2', url='https://github.com/tensorflow/privacy', license='Apache-2.0', install_requires=[ diff --git a/tensorflow_privacy/version.py b/tensorflow_privacy/version.py index 6092b3b..05ecadf 100644 --- a/tensorflow_privacy/version.py +++ b/tensorflow_privacy/version.py @@ -13,4 +13,4 @@ # limitations under the License. """TensorFlow Privacy version.""" -__version__ = '0.6.1' +__version__ = '0.6.2' From 2cafe28d8d668d8a561076f01edb410f11af4a06 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 13 Jul 2021 16:13:50 -0700 Subject: [PATCH 05/71] The previous version uses `tf.nest.map_structure` to apply `add_noise` to a `tf.RaggedTensor`. This causes a bug when used in tensorflow federated because `tf.nest.map_structure` will also map `add_noise` to the tensor for shape information in `tf.RaggedTensor`. This causes failure when tff conducts automatic type conversion. Also use fixed random seed to avoid flaky timeouts and testing failures. PiperOrigin-RevId: 384573740 --- .../dp_query/tree_aggregation_query.py | 49 ++++++++++++------- .../dp_query/tree_aggregation_query_test.py | 32 ++++-------- 2 files changed, 39 insertions(+), 42 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 79bc243..5717e4f 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -25,10 +25,10 @@ what is defined in `dp_query.DPQuery`, is a histogram (i.e. the leaf nodes). """ import distutils import math +from typing import Optional + import attr - import tensorflow as tf - from tensorflow_privacy.privacy.dp_query import dp_query from tensorflow_privacy.privacy.dp_query import tree_aggregation @@ -442,16 +442,20 @@ def _build_tree_from_leaf(leaf_nodes: tf.Tensor, arity: int) -> tf.RaggedTensor: return tree -def _get_add_noise(stddev): +def _get_add_noise(stddev, seed: int = None): """Utility function to decide which `add_noise` to use according to tf version.""" if distutils.version.LooseVersion( tf.__version__) < distutils.version.LooseVersion('2.0.0'): + # The seed should be only used for testing purpose. + if seed is not None: + tf.random.set_seed(seed) + def add_noise(v): return v + tf.random.normal( tf.shape(input=v), stddev=stddev, dtype=v.dtype) else: - random_normal = tf.random_normal_initializer(stddev=stddev) + random_normal = tf.random_normal_initializer(stddev=stddev, seed=seed) def add_noise(v): return v + tf.cast(random_normal(tf.shape(input=v)), dtype=v.dtype) @@ -478,17 +482,16 @@ class CentralTreeSumQuery(dp_query.SumAggregationDPQuery): """Class defining global state for `CentralTreeSumQuery`. Attributes: - stddev: The stddev of the noise added to each node in the tree. - arity: The branching factor of the tree (i.e. the number of children each - internal node has). l1_bound: An upper bound on the L1 norm of the input record. This is needed to bound the sensitivity and deploy differential privacy. """ - stddev = attr.ib() - arity = attr.ib() l1_bound = attr.ib() - def __init__(self, stddev: float, arity: int = 2, l1_bound: int = 10): + def __init__(self, + stddev: float, + arity: int = 2, + l1_bound: int = 10, + seed: Optional[int] = None): """Initializes the `CentralTreeSumQuery`. Args: @@ -497,15 +500,17 @@ class CentralTreeSumQuery(dp_query.SumAggregationDPQuery): arity: The branching factor of the tree. l1_bound: An upper bound on the L1 norm of the input record. This is needed to bound the sensitivity and deploy differential privacy. + seed: Random seed to generate Gaussian noise. Defaults to `None`. Only for + test purpose. """ self._stddev = stddev self._arity = arity self._l1_bound = l1_bound + self._seed = seed def initial_global_state(self): """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" - return CentralTreeSumQuery.GlobalState( - stddev=self._stddev, arity=self._arity, l1_bound=self._l1_bound) + return CentralTreeSumQuery.GlobalState(l1_bound=self._l1_bound) def derive_sample_params(self, global_state): """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" @@ -536,10 +541,9 @@ class CentralTreeSumQuery(dp_query.SumAggregationDPQuery): The jth node on the ith layer of the tree can be accessed by tree[i][j] where tree is the returned value. """ - add_noise = _get_add_noise(self._stddev) - tree = _build_tree_from_leaf(sample_state, global_state.arity) - return tf.nest.map_structure( - add_noise, tree, expand_composites=True), global_state + add_noise = _get_add_noise(self._stddev, self._seed) + tree = _build_tree_from_leaf(sample_state, self._arity) + return tf.map_fn(add_noise, tree), global_state class DistributedTreeSumQuery(dp_query.SumAggregationDPQuery): @@ -577,7 +581,11 @@ class DistributedTreeSumQuery(dp_query.SumAggregationDPQuery): arity = attr.ib() l1_bound = attr.ib() - def __init__(self, stddev: float, arity: int = 2, l1_bound: int = 10): + def __init__(self, + stddev: float, + arity: int = 2, + l1_bound: int = 10, + seed: Optional[int] = None): """Initializes the `DistributedTreeSumQuery`. Args: @@ -585,10 +593,13 @@ class DistributedTreeSumQuery(dp_query.SumAggregationDPQuery): arity: The branching factor of the tree. l1_bound: An upper bound on the L1 norm of the input record. This is needed to bound the sensitivity and deploy differential privacy. + seed: Random seed to generate Gaussian noise. Defaults to `None`. Only for + test purpose. """ self._stddev = stddev self._arity = arity self._l1_bound = l1_bound + self._seed = seed def initial_global_state(self): """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" @@ -628,9 +639,9 @@ class DistributedTreeSumQuery(dp_query.SumAggregationDPQuery): use_norm=l1_norm) preprocessed_record = preprocessed_record[0] - add_noise = _get_add_noise(self._stddev) + add_noise = _get_add_noise(self._stddev, self._seed) tree = _build_tree_from_leaf(preprocessed_record, arity) - noisy_tree = tf.nest.map_structure(add_noise, tree, expand_composites=True) + noisy_tree = tf.map_fn(add_noise, tree) # The following codes reshape the output vector so the output shape of can # be statically inferred. This is useful when used with diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py index 34f2c9c..cc3a89a 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py @@ -502,21 +502,15 @@ class CentralTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): ('stddev_0_1', 0.1, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), ) def test_get_noised_result_with_noise(self, stddev, record, expected_tree): - query = tree_aggregation_query.CentralTreeSumQuery(stddev=stddev) + query = tree_aggregation_query.CentralTreeSumQuery(stddev=stddev, seed=0) global_state = query.initial_global_state() params = query.derive_sample_params(global_state) preprocessed_record = query.preprocess_record(params, record) - sample_state_list = [] - for _ in range(1000): - sample_state, _ = query.get_noised_result(preprocessed_record, - global_state) - sample_state_list.append(sample_state.flat_values.numpy()) - expectation = np.mean(sample_state_list, axis=0) - variance = np.std(sample_state_list, axis=0) - self.assertAllClose(expectation, expected_tree, rtol=3 * stddev, atol=1e-4) + sample_state, _ = query.get_noised_result(preprocessed_record, global_state) + self.assertAllClose( - variance, np.ones(len(variance)) * stddev, rtol=0.1, atol=1e-4) + sample_state.flat_values, expected_tree, atol=3 * stddev) @parameterized.named_parameters( ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), @@ -556,8 +550,7 @@ class DistributedTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): def test_derive_sample_params(self): query = tree_aggregation_query.DistributedTreeSumQuery(stddev=NOISE_STD) global_state = query.initial_global_state() - stddev, arity, l1_bound = query.derive_sample_params( - global_state) + stddev, arity, l1_bound = query.derive_sample_params(global_state) self.assertAllClose(stddev, NOISE_STD) self.assertAllClose(arity, 2) self.assertAllClose(l1_bound, 10) @@ -587,21 +580,14 @@ class DistributedTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): ('stddev_0_1', 0.1, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), ) def test_preprocess_record_with_noise(self, stddev, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery(stddev=stddev) + query = tree_aggregation_query.DistributedTreeSumQuery( + stddev=stddev, seed=0) global_state = query.initial_global_state() params = query.derive_sample_params(global_state) - preprocessed_record_list = [] - for _ in range(1000): - preprocessed_record = query.preprocess_record(params, record) - preprocessed_record_list.append(preprocessed_record.numpy()) + preprocessed_record = query.preprocess_record(params, record) - expectation = np.mean(preprocessed_record_list, axis=0) - variance = np.std(preprocessed_record_list, axis=0) - - self.assertAllClose(expectation, expected_tree, rtol=3 * stddev, atol=1e-4) - self.assertAllClose( - variance, np.ones(len(variance)) * stddev, rtol=0.1, atol=1e-4) + self.assertAllClose(preprocessed_record, expected_tree, atol=3 * stddev) @parameterized.named_parameters( ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), From 2f862eba9b00094ad844e93b81ea1d49538324fa Mon Sep 17 00:00:00 2001 From: Steve Chien Date: Tue, 27 Jul 2021 11:28:15 -0700 Subject: [PATCH 06/71] Move TensorFlow v1 imports to their own __init__.py file in a new subdirectory. PiperOrigin-RevId: 387156295 --- tensorflow_privacy/__init__.py | 21 ++------ .../analysis/compute_dp_sgd_privacy.py | 5 -- .../analysis/compute_dp_sgd_privacy_lib.py | 4 -- .../analysis/compute_noise_from_budget.py | 5 -- .../analysis/compute_noise_from_budget_lib.py | 4 -- tensorflow_privacy/v1/__init__.py | 50 +++++++++++++++++++ 6 files changed, 53 insertions(+), 36 deletions(-) create mode 100644 tensorflow_privacy/v1/__init__.py diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index 1e6e42d..bf0e8e4 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -26,6 +26,9 @@ from tensorflow_privacy.version import __version__ # pylint: disable=g-bad-impo if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts. pass else: + # TensorFlow v1 imports + import tensorflow_privacy.v1 + # Analysis from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy from tensorflow_privacy.privacy.analysis.privacy_ledger import GaussianSumQueryEntry @@ -54,7 +57,6 @@ else: # Estimators from tensorflow_privacy.privacy.estimators.dnn import DNNClassifier - from tensorflow_privacy.privacy.estimators.v1.dnn import DNNClassifier as DNNClassifierV1 # Keras Models from tensorflow_privacy.privacy.keras_models.dp_keras_model import DPModel @@ -62,14 +64,6 @@ else: from tensorflow_privacy.privacy.keras_models.dp_keras_model import make_dp_model_class # Optimizers - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradGaussianOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamGaussianOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer import make_optimizer_class - from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasAdagradOptimizer from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasAdamOptimizer from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer @@ -80,15 +74,6 @@ else: from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import VectorizedDPKerasSGDOptimizer from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import make_vectorized_keras_optimizer_class - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagradOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdamOptimizer - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGDOptimizer - - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagrad - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdam - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGD - from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import make_vectorized_optimizer_class - try: from tensorflow_privacy.privacy.bolt_on.models import BoltOnModel from tensorflow_privacy.privacy.bolt_on.optimizers import BoltOn diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py index e4a0efb..e9d999d 100644 --- a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py @@ -32,16 +32,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import sys - from absl import app from absl import flags from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy -# Opting out of loading all sibling packages and their dependencies. -sys.skip_tf_privacy_import = True - FLAGS = flags.FLAGS flags.DEFINE_integer('N', None, 'Total number of examples') diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py index a69d7db..8d081c5 100644 --- a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy_lib.py @@ -19,13 +19,9 @@ from __future__ import division from __future__ import print_function import math -import sys from absl import app -# Opting out of loading all sibling packages and their dependencies. -sys.skip_tf_privacy_import = True - from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent diff --git a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py index bb23eea..731c9ac 100644 --- a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py +++ b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget.py @@ -34,16 +34,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import sys - from absl import app from absl import flags from tensorflow_privacy.privacy.analysis.compute_noise_from_budget_lib import compute_noise -# Opting out of loading all sibling packages and their dependencies. -sys.skip_tf_privacy_import = True - FLAGS = flags.FLAGS flags.DEFINE_integer('N', None, 'Total number of examples') diff --git a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py index 1325cb2..ccf1f1c 100644 --- a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py +++ b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import math -import sys from absl import app from scipy.optimize import bisect @@ -27,9 +26,6 @@ from scipy.optimize import bisect from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent -# Opting out of loading all sibling packages and their dependencies. -sys.skip_tf_privacy_import = True - def apply_dp_sgd_analysis(q, sigma, steps, orders, delta): """Compute and print results of DP-SGD analysis.""" diff --git a/tensorflow_privacy/v1/__init__.py b/tensorflow_privacy/v1/__init__.py new file mode 100644 index 0000000..7bce1a9 --- /dev/null +++ b/tensorflow_privacy/v1/__init__.py @@ -0,0 +1,50 @@ +# Copyright 2020, The TensorFlow Privacy Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""TensorFlow Privacy library v1 imports. + +This module includes classes designed to be compatible with TF1, based on +`tf.compat.v1.train.Optimizer` and `tf.estimator.Estimator`. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + +# pylint: disable=g-import-not-at-top + +if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts. + pass +else: + # Estimators + from tensorflow_privacy.privacy.estimators.v1.dnn import DNNClassifier as DNNClassifierV1 + + # Optimizers + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradGaussianOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamGaussianOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import make_optimizer_class + + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagradOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdamOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGDOptimizer + + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagrad + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdam + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGD + from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import make_vectorized_optimizer_class From e7e11d14d9edde33805ce1270267742708105556 Mon Sep 17 00:00:00 2001 From: Ken Liu Date: Tue, 27 Jul 2021 17:17:53 -0700 Subject: [PATCH 07/71] Adds discrete Gaussian (sampler and distributed DPQuery) to public TF Privacy. PiperOrigin-RevId: 387232449 --- tensorflow_privacy/__init__.py | 1 + .../dp_query/discrete_gaussian_utils.py | 142 +++++++++ .../dp_query/discrete_gaussian_utils_test.py | 275 ++++++++++++++++++ .../distributed_discrete_gaussian_query.py | 114 ++++++++ ...istributed_discrete_gaussian_query_test.py | 165 +++++++++++ 5 files changed, 697 insertions(+) create mode 100644 tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils.py create mode 100644 tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils_test.py create mode 100644 tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py create mode 100644 tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index bf0e8e4..f775d80 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -43,6 +43,7 @@ else: # DPQuery classes from tensorflow_privacy.privacy.dp_query.dp_query import DPQuery from tensorflow_privacy.privacy.dp_query.dp_query import SumAggregationDPQuery + from tensorflow_privacy.privacy.dp_query.distributed_discrete_gaussian_query import DistributedDiscreteGaussianSumQuery from tensorflow_privacy.privacy.dp_query.gaussian_query import GaussianSumQuery from tensorflow_privacy.privacy.dp_query.nested_query import NestedQuery from tensorflow_privacy.privacy.dp_query.no_privacy_query import NoPrivacyAverageQuery diff --git a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils.py b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils.py new file mode 100644 index 0000000..ea0a663 --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils.py @@ -0,0 +1,142 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Util functions for drawing discrete Gaussian samples. + +The following functions implement a vectorized TF version of the sampling +algorithm described in the paper: + +The Discrete Gaussian for Differential Privacy +https://arxiv.org/pdf/2004.00010.pdf + +Note that the exact sampling implementation should use integer and fractional +parameters only. Here, we relax this constraint a bit and use vectorized +implementations of Bernoulli and discrete Laplace sampling that can take float +parameters. +""" + +import tensorflow as tf +import tensorflow_probability as tf_prob + + +def _sample_discrete_laplace(t, shape): + """Sample from discrete Laplace with scale t. + + This method is based on the observation that sampling from Z ~ Lap(t) is + equivalent to sampling X, Y independently from Geo(1 - exp(-1/t)) and take + Z = X - Y. + + Note also that tensorflow_probability's geometric sampler is based on floating + operations and may possibly be inexact. + + Args: + t: The scale of the discrete Laplace distribution. + shape: The tensor shape of the tensors drawn. + + Returns: + A tensor of the specified shape filled with random values. + """ + geometric_probs = 1.0 - tf.exp(-1.0 / tf.cast(t, tf.float64)) + sampler = tf_prob.distributions.Geometric(probs=geometric_probs) + return tf.cast(sampler.sample(shape) - sampler.sample(shape), tf.int64) + + +def _sample_bernoulli(p): + """Sample from Bernoulli(p).""" + return tf_prob.distributions.Bernoulli(probs=p, dtype=tf.int64).sample() + + +def _check_input_args(scale, shape, dtype): + """Checks the input args to the discrete Gaussian sampler.""" + if tf.as_dtype(dtype) not in (tf.int32, tf.int64): + raise ValueError( + f'Only tf.int32 and tf.int64 are supported. Found dtype `{dtype}`.') + + checks = [ + tf.compat.v1.assert_non_negative(scale), + tf.compat.v1.assert_integer(scale) + ] + with tf.control_dependencies(checks): + return tf.identity(scale), shape, dtype + + +def _int_square(value): + """Avoids the TF op `Square(T=...)` for ints as sampling can happen on clients.""" + return (value - 1) * (value + 1) + 1 + + +@tf.function +def _sample_discrete_gaussian_helper(scale, shape, dtype): + """Draw samples from discrete Gaussian, assuming scale >= 0.""" + scale = tf.cast(scale, tf.int64) + sq_scale = _int_square(scale) + + # Scale for discrete Laplace. The sampling algorithm should be correct + # for any discrete Laplace scale, and the original paper uses + # `dlap_scale = floor(scale) + 1`. Here we use `dlap_scale = scale` (where + # input `scale` is restricted to integers >= 1) to simplify the fraction + # below. It turns out that for integer scales >= 1, `dlap_scale = scale` gives + # a good minimum success rate of ~70%, allowing a small oversampling factor. + dlap_scale = scale + oversample_factor = 1.5 + + # Draw at least some samples in case we got unlucky with small input shape. + min_n = 1000 + target_n = tf.reduce_prod(tf.cast(shape, tf.int64)) + oversample_n = oversample_factor * tf.cast(target_n, tf.float32) + draw_n = tf.maximum(min_n, tf.cast(oversample_n, tf.int32)) + + accepted_n = tf.constant(0, dtype=target_n.dtype) + result = tf.zeros((0,), dtype=tf.int64) + + while accepted_n < target_n: + # Since the number of samples could be different in every retry, we need to + # manually specify the shape info for TF. + tf.autograph.experimental.set_loop_options( + shape_invariants=[(result, tf.TensorShape([None]))]) + # Draw samples. + samples = _sample_discrete_laplace(dlap_scale, shape=(draw_n,)) + z_numer = _int_square(tf.abs(samples) - scale) + z_denom = 2 * sq_scale + bern_probs = tf.exp(-1.0 * tf.divide(z_numer, z_denom)) + accept = _sample_bernoulli(bern_probs) + # Keep successful samples and increment counter. + accepted_samples = samples[tf.equal(accept, 1)] + accepted_n += tf.cast(tf.size(accepted_samples), accepted_n.dtype) + result = tf.concat([result, accepted_samples], axis=0) + # Reduce the number of draws for any retries. + draw_n = tf.cast(target_n - accepted_n, tf.float32) * oversample_factor + draw_n = tf.maximum(min_n, tf.cast(draw_n, tf.int32)) + + return tf.cast(tf.reshape(result[:target_n], shape), dtype) + + +def sample_discrete_gaussian(scale, shape, dtype=tf.int32): + """Draws (possibly inexact) samples from the discrete Gaussian distribution. + + We relax some integer constraints to use vectorized implementations of + Bernoulli and discrete Laplace sampling. Integer operations are done in + tf.int64 as TF does not have direct support for fractions. + + Args: + scale: The scale of the discrete Gaussian distribution. + shape: The shape of the output tensor. + dtype: The type of the output. + + Returns: + A tensor of the specified shape filled with random values. + """ + scale, shape, dtype = _check_input_args(scale, shape, dtype) + return tf.cond( + tf.equal(scale, 0), lambda: tf.zeros(shape, dtype), + lambda: _sample_discrete_gaussian_helper(scale, shape, dtype)) diff --git a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils_test.py b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils_test.py new file mode 100644 index 0000000..185a649 --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_utils_test.py @@ -0,0 +1,275 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for discrete_gaussian_utils.""" + +import collections +import fractions +import math +import random + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf +from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils + +EXACT_SAMPLER_SEED = 4242 + + +class DiscreteGaussianUtilsTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.product(dtype=[tf.bool, tf.float32, tf.float64]) + def test_raise_on_bad_dtype(self, dtype): + with self.assertRaises(ValueError): + _ = discrete_gaussian_utils.sample_discrete_gaussian(1, (1,), dtype) + + def test_raise_on_negative_scale(self): + with self.assertRaises(tf.errors.InvalidArgumentError): + _ = discrete_gaussian_utils.sample_discrete_gaussian(-10, (1,)) + + def test_raise_on_float_scale(self): + with self.assertRaises(TypeError): + _ = discrete_gaussian_utils.sample_discrete_gaussian(3.14, (1,)) + + @parameterized.product(shape=[(), (1,), (100,), (2, 2), (3, 3, 3), + (4, 1, 1, 1)]) + def test_shapes(self, shape): + samples = discrete_gaussian_utils.sample_discrete_gaussian(100, shape) + samples = self.evaluate(samples) + self.assertAllEqual(samples.shape, shape) + + @parameterized.product(dtype=[tf.int32, tf.int64]) + def test_dtypes(self, dtype): + samples = discrete_gaussian_utils.sample_discrete_gaussian(1, (10,), dtype) + samples = self.evaluate(samples) + # Convert output np dtypes to tf dtypes. + self.assertEqual(tf.as_dtype(samples.dtype), dtype) + + def test_zero_noise(self): + scale = 0 + shape = (100,) + dtype = tf.int32 + samples = discrete_gaussian_utils.sample_discrete_gaussian( + scale, shape, dtype=dtype) + samples = self.evaluate(samples) + self.assertAllEqual(samples, tf.zeros(shape, dtype=dtype)) + + @parameterized.named_parameters([('small_scale_small_n', 10, 2000, 1, 2), + ('small_scale_large_n', 10, 5000, 1, 1), + ('large_scale_small_n', 50, 2000, 2, 5), + ('large_scale_large_n', 50, 5000, 2, 3)]) + def test_match_exact_sampler(self, scale, num_samples, mean_std_atol, + percentile_atol): + true_samples = exact_sampler(scale, num_samples) + drawn_samples = discrete_gaussian_utils.sample_discrete_gaussian( + scale=scale, shape=(num_samples,)) + drawn_samples = self.evaluate(drawn_samples) + + # Check mean, std, and percentiles. + self.assertAllClose( + np.mean(true_samples), np.mean(drawn_samples), atol=mean_std_atol) + self.assertAllClose( + np.std(true_samples), np.std(drawn_samples), atol=mean_std_atol) + self.assertAllClose( + np.percentile(true_samples, [10, 30, 50, 70, 90]), + np.percentile(drawn_samples, [10, 30, 50, 70, 90]), + atol=percentile_atol) + + @parameterized.named_parameters([('n_1000', 1000, 5e-2), + ('n_10000', 10000, 5e-3)]) + def test_kl_divergence(self, num_samples, kl_tolerance): + """Compute KL divergence betwen empirical & true distribution.""" + scale = 10 + sq_sigma = scale * scale + drawn_samples = discrete_gaussian_utils.sample_discrete_gaussian( + scale=scale, shape=(num_samples,)) + drawn_samples = self.evaluate(drawn_samples) + value_counts = collections.Counter(drawn_samples) + + kl = 0 + norm_const = dgauss_normalizing_constant(sq_sigma) + + for value, count in value_counts.items(): + kl += count * ( + math.log(count * norm_const / num_samples) + value * value / + (2.0 * sq_sigma)) + + kl /= num_samples + self.assertLess(kl, kl_tolerance) + + +def exact_sampler(scale, num_samples, seed=EXACT_SAMPLER_SEED): + """Implementation of the exact discrete gaussian distribution sampler. + + Source: https://arxiv.org/pdf/2004.00010.pdf. + + Args: + scale: The scale of the discrete Gaussian. + num_samples: The number of samples to generate. + seed: The seed for the random number generator to reproduce samples. + + Returns: + A numpy array of discrete Gaussian samples. + """ + + def randrange(a, rng): + return rng.randrange(a) + + def bern_em1(rng): + """Sample from Bernoulli(exp(-1)).""" + k = 2 + while True: + if randrange(k, rng) == 0: # if Bernoulli(1/k)==1 + k = k + 1 + else: + return k % 2 + + def bern_emab1(a, b, rng): + """Sample from Bernoulli(exp(-a/b)), assuming 0 <= a <= b.""" + assert isinstance(a, int) + assert isinstance(b, int) + assert 0 <= a <= b + k = 1 + while True: + if randrange(b, rng) < a and randrange(k, rng) == 0: # if Bern(a/b/k)==1 + k = k + 1 + else: + return k % 2 + + def bern_emab(a, b, rng): + """Sample from Bernoulli(exp(-a/b)), allowing a > b.""" + while a > b: + if bern_em1(rng) == 0: + return 0 + a = a - b + return bern_emab1(a, b, rng) + + def geometric(t, rng): + """Sample from geometric(1-exp(-1/t)).""" + assert isinstance(t, int) + assert t > 0 + while True: + u = randrange(t, rng) + if bern_emab1(u, t, rng) == 1: + while bern_em1(rng) == 1: + u = u + t + return u + + def dlap(t, rng): + """Sample from discrete Laplace with scale t. + + Pr[x] = exp(-|x|/t) * (exp(1/t)-1)/(exp(1/t)+1). Supported on integers. + + Args: + t: The scale. + rng: The random number generator. + + Returns: + A discrete Laplace sample. + """ + assert isinstance(t, int) + assert t > 0 + while True: + u = geometric(t, rng) + b = randrange(2, rng) + if b == 1: + return u + elif u > 0: + return -u + + def floorsqrt(x): + """Compute floor(sqrt(x)) exactly.""" + assert x >= 0 + a = 0 # maintain a^2<=x. + b = 1 # maintain b^2>x. + while b * b <= x: + b = 2 * b + # Do binary search. + while a + 1 < b: + c = (a + b) // 2 + if c * c <= x: + a = c + else: + b = c + return a + + def dgauss(ss, num, rng): + """Sample from discrete Gaussian. + + Args: + ss: Variance proxy, squared scale, sigma^2. + num: The number of samples to generate. + rng: The random number generator. + + Returns: + A list of discrete Gaussian samples. + """ + ss = fractions.Fraction(ss) # cast to rational for exact arithmetic + assert ss > 0 + t = floorsqrt(ss) + 1 + results = [] + trials = 0 + while len(results) < num: + trials = trials + 1 + y = dlap(t, rng) + z = (abs(y) - ss / t)**2 / (2 * ss) + if bern_emab(z.numerator, z.denominator, rng) == 1: + results.append(y) + return results, t, trials + + rng = random.Random(seed) + return np.array(dgauss(scale * scale, num_samples, rng)[0]) + + +def dgauss_normalizing_constant(sigma_sq): + """Compute the normalizing constant of the discrete Gaussian. + + Source: https://arxiv.org/pdf/2004.00010.pdf. + + Args: + sigma_sq: Variance proxy, squared scale, sigma^2. + + Returns: + The normalizing constant. + """ + original = None + poisson = None + if sigma_sq <= 1: + original = 0 + x = 1000 + while x > 0: + original = original + math.exp(-x * x / (2.0 * sigma_sq)) + x = x - 1 + original = 2 * original + 1 + + if sigma_sq * 100 >= 1: + poisson = 0 + y = 1000 + while y > 0: + poisson = poisson + math.exp(-math.pi * math.pi * sigma_sq * 2 * y * y) + y = y - 1 + poisson = math.sqrt(2 * math.pi * sigma_sq) * (1 + 2 * poisson) + + if poisson is None: + return original + if original is None: + return poisson + + scale = max(1, math.sqrt(2 * math.pi * sigma_sq)) + precision = 1e-15 + assert -precision * scale <= original - poisson <= precision * scale + return (original + poisson) / 2 + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py new file mode 100644 index 0000000..5b450ee --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py @@ -0,0 +1,114 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Implements DPQuery interface for distributed discrete Gaussian mechanism.""" + +import collections + +import tensorflow as tf +from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils +from tensorflow_privacy.privacy.dp_query import dp_query + + +class DistributedDiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery): + """Implements DPQuery for discrete distributed Gaussian sum queries. + + For each local record, we check the L2 norm bound and add discrete Gaussian + noise. In particular, this DPQuery does not perform L2 norm clipping and the + norms of the input records are expected to be bounded. + """ + + # pylint: disable=invalid-name + _GlobalState = collections.namedtuple('_GlobalState', + ['l2_norm_bound', 'local_stddev']) + + # pylint: disable=invalid-name + _SampleParams = collections.namedtuple('_SampleParams', + ['l2_norm_bound', 'local_stddev']) + + def __init__(self, l2_norm_bound, local_stddev): + """Initializes the DistributedDiscreteGaussianSumQuery. + + Args: + l2_norm_bound: The L2 norm bound to verify for each record. + local_stddev: The scale/stddev of the local discrete Gaussian noise. + """ + self._l2_norm_bound = l2_norm_bound + self._local_stddev = local_stddev + + def set_ledger(self, ledger): + del ledger # Unused. + raise NotImplementedError('Ledger has not yet been implemented for' + 'DistributedDiscreteGaussianSumQuery!') + + def initial_global_state(self): + return self._GlobalState( + tf.cast(self._l2_norm_bound, tf.float32), + tf.cast(self._local_stddev, tf.float32)) + + def derive_sample_params(self, global_state): + return self._SampleParams(global_state.l2_norm_bound, + global_state.local_stddev) + + def _add_local_noise(self, record, local_stddev, shares=1): + """Add local discrete Gaussian noise to the record. + + Args: + record: The record to which we generate and add local noise. + local_stddev: The scale/stddev of the local discrete Gaussian noise. + shares: Number of shares of local noise to generate. Should be 1 for each + record. This can be useful when we want to generate multiple noise + shares at once. + + Returns: + The record with local noise added. + """ + # Round up the noise as the TF discrete Gaussian sampler only takes + # integer noise stddevs for now. + ceil_local_stddev = tf.cast(tf.math.ceil(local_stddev), tf.int32) + + def add_noise(v): + # Adds an extra dimension for `shares` number of draws. + shape = tf.concat([[shares], tf.shape(v)], axis=0) + dgauss_noise = discrete_gaussian_utils.sample_discrete_gaussian( + scale=ceil_local_stddev, shape=shape, dtype=v.dtype) + # Sum across the number of noise shares and add it. + noised_v = v + tf.reduce_sum(dgauss_noise, axis=0) + # Ensure shape as TF shape inference may fail due to custom noise sampler. + noised_v.set_shape(v.shape.as_list()) + return noised_v + + return tf.nest.map_structure(add_noise, record) + + def preprocess_record(self, params, record): + """Check record norm and add noise to the record.""" + record_as_list = tf.nest.flatten(record) + record_as_float_list = [tf.cast(x, tf.float32) for x in record_as_list] + tf.nest.map_structure(lambda x: tf.compat.v1.assert_type(x, tf.int32), + record_as_list) + dependencies = [ + tf.compat.v1.assert_less_equal( + tf.linalg.global_norm(record_as_float_list), + params.l2_norm_bound, + message=f'Global L2 norm exceeds {params.l2_norm_bound}.') + ] + with tf.control_dependencies(dependencies): + result = tf.cond( + tf.equal(params.local_stddev, 0), lambda: record, + lambda: self._add_local_noise(record, params.local_stddev)) + return result + + def get_noised_result(self, sample_state, global_state): + # Note that by directly returning the aggregate, this assumes that there + # will not be missing local noise shares during execution. + return sample_state, global_state diff --git a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py new file mode 100644 index 0000000..b2f6051 --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py @@ -0,0 +1,165 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for DistributedDiscreteGaussianQuery.""" + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf +from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils +from tensorflow_privacy.privacy.dp_query import distributed_discrete_gaussian_query +from tensorflow_privacy.privacy.dp_query import test_utils + +ddg_sum_query = distributed_discrete_gaussian_query.DistributedDiscreteGaussianSumQuery + + +def silence_tf_error_messages(func): + """Decorator that temporarily changes the TF logging levels.""" + + def wrapper(*args, **kwargs): + cur_verbosity = tf.compat.v1.logging.get_verbosity() + tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.FATAL) + func(*args, **kwargs) + tf.compat.v1.logging.set_verbosity(cur_verbosity) # Reset verbosity. + + return wrapper + + +class DistributedDiscreteGaussianQueryTest(tf.test.TestCase, + parameterized.TestCase): + + def test_sum_no_noise(self): + with self.cached_session() as sess: + record1 = tf.constant([2, 0], dtype=tf.int32) + record2 = tf.constant([-1, 1], dtype=tf.int32) + + query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0) + query_result, _ = test_utils.run_query(query, [record1, record2]) + result = sess.run(query_result) + expected = [1, 1] + self.assertAllEqual(result, expected) + + @parameterized.product(sample_size=[1, 3]) + def test_sum_multiple_shapes(self, sample_size): + with self.cached_session() as sess: + t1 = tf.constant([2, 0], dtype=tf.int32) + t2 = tf.constant([-1, 1, 3], dtype=tf.int32) + t3 = tf.constant([-2], dtype=tf.int32) + record = [t1, t2, t3] + sample = [record] * sample_size + + query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0) + query_result, _ = test_utils.run_query(query, sample) + expected = [sample_size * t1, sample_size * t2, sample_size * t3] + result, expected = sess.run([query_result, expected]) + # Use `assertAllClose` for nested structures equality (with tolerance=0). + self.assertAllClose(result, expected, atol=0) + + @parameterized.product(sample_size=[1, 3]) + def test_sum_nested_record_structure(self, sample_size): + with self.cached_session() as sess: + t1 = tf.constant([1, 0], dtype=tf.int32) + t2 = tf.constant([1, 1, 1], dtype=tf.int32) + t3 = tf.constant([1], dtype=tf.int32) + t4 = tf.constant([[1, 1], [1, 1]], dtype=tf.int32) + record = [t1, dict(a=t2, b=[t3, (t4, t1)])] + sample = [record] * sample_size + + query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0) + query_result, _ = test_utils.run_query(query, sample) + result = sess.run(query_result) + + s = sample_size + expected = [t1 * s, dict(a=t2 * s, b=[t3 * s, (t4 * s, t1 * s)])] + # Use `assertAllClose` for nested structures equality (with tolerance=0) + self.assertAllClose(result, expected, atol=0) + + def test_sum_raise_on_float_inputs(self): + with self.cached_session() as sess: + record1 = tf.constant([2, 0], dtype=tf.float32) + record2 = tf.constant([-1, 1], dtype=tf.float32) + query = ddg_sum_query(l2_norm_bound=10, local_stddev=0.0) + + with self.assertRaises(TypeError): + query_result, _ = test_utils.run_query(query, [record1, record2]) + sess.run(query_result) + + @parameterized.product(l2_norm_bound=[0, 3, 10, 14.1]) + @silence_tf_error_messages + def test_sum_raise_on_l2_norm_excess(self, l2_norm_bound): + with self.cached_session() as sess: + record = tf.constant([10, 10], dtype=tf.int32) + query = ddg_sum_query(l2_norm_bound=l2_norm_bound, local_stddev=0.0) + + with self.assertRaises(tf.errors.InvalidArgumentError): + query_result, _ = test_utils.run_query(query, [record]) + sess.run(query_result) + + def test_sum_float_norm_not_rounded(self): + """Test that the float L2 norm bound doesn't get rounded/casted to integers.""" + with self.cached_session() as sess: + # A casted/rounded norm bound would be insufficient. + l2_norm_bound = 14.2 + record = tf.constant([10, 10], dtype=tf.int32) + query = ddg_sum_query(l2_norm_bound=l2_norm_bound, local_stddev=0.0) + query_result, _ = test_utils.run_query(query, [record]) + result = sess.run(query_result) + expected = [10, 10] + self.assertAllEqual(result, expected) + + @parameterized.named_parameters([('2_local_stddev_1_record', 2, 1), + ('10_local_stddev_4_records', 10, 4), + ('1000_local_stddev_1_record', 1000, 1), + ('1000_local_stddev_25_records', 1000, 25)]) + def test_sum_local_noise_shares(self, local_stddev, num_records): + """Test the noise level of the sum of discrete Gaussians applied locally. + + The sum of discrete Gaussians is not a discrete Gaussian, but it will be + extremely close for sigma >= 2. We will thus compare the aggregated noise + to a central discrete Gaussian noise with appropriately scaled stddev with + some reasonable tolerance. + + Args: + local_stddev: The stddev of the local discrete Gaussian noise. + num_records: The number of records to be aggregated. + """ + # Aggregated local noises. + num_trials = 1000 + record = tf.zeros([num_trials], dtype=tf.int32) + sample = [record] * num_records + query = ddg_sum_query(l2_norm_bound=10.0, local_stddev=local_stddev) + query_result, _ = test_utils.run_query(query, sample) + + # Central discrete Gaussian noise. + central_stddev = np.sqrt(num_records) * local_stddev + central_noise = discrete_gaussian_utils.sample_discrete_gaussian( + scale=tf.cast(tf.round(central_stddev), record.dtype), + shape=tf.shape(record), + dtype=record.dtype) + + agg_noise, central_noise = self.evaluate([query_result, central_noise]) + + mean_stddev = central_stddev * np.sqrt(num_trials) / num_trials + atol = 3.5 * mean_stddev + + # Use the atol for mean as a rough default atol for stddev/percentile. + self.assertAllClose(np.mean(agg_noise), np.mean(central_noise), atol=atol) + self.assertAllClose(np.std(agg_noise), np.std(central_noise), atol=atol) + self.assertAllClose( + np.percentile(agg_noise, [25, 50, 75]), + np.percentile(central_noise, [25, 50, 75]), + atol=atol) + + +if __name__ == '__main__': + tf.test.main() From 4d335d1b69206712b6325626d7df1063b9815ade Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Jul 2021 17:42:18 -0700 Subject: [PATCH 08/71] (1) Merge `CentralTreeSumQuery` and `DistributedTreeSumQuery` into one DPQuery to modularize things. The new query takes in an `inner_query` argument. Depending on the behavior of inner query, the query will follow central DP or distributed DP. (2) Remove the hard-coded L1 clipping and replace with norm bound checking in the inner query. This design allows us to use whatever clipping factory we want outside the DPQuery. PiperOrigin-RevId: 387236482 --- .../dp_query/tree_aggregation_query.py | 286 ++++++---------- .../dp_query/tree_aggregation_query_test.py | 308 ++++++------------ 2 files changed, 204 insertions(+), 390 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 5717e4f..bd6ff3c 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -15,21 +15,18 @@ `TreeCumulativeSumQuery` and `TreeResidualSumQuery` are `DPQuery`s for continual online observation queries relying on `tree_aggregation`. 'Online' means that -the leaf nodes of the tree arrive one by one as the time proceeds. The leaves -are vector records as defined in `dp_query.DPQuery`. +the leaf nodes of the tree arrive one by one as the time proceeds. -`CentralTreeSumQuery` and `DistributedTreeSumQuery` are `DPQuery`s for -central/distributed offline tree aggregation protocol. 'Offline' means all the -leaf nodes are ready before the protocol starts. Each record, different from -what is defined in `dp_query.DPQuery`, is a histogram (i.e. the leaf nodes). +`TreeRangeSumQuery` is a `DPQuery`s for offline tree aggregation protocol. +'Offline' means all the leaf nodes are ready before the protocol starts. """ -import distutils import math -from typing import Optional import attr import tensorflow as tf +from tensorflow_privacy.privacy.dp_query import distributed_discrete_gaussian_query from tensorflow_privacy.privacy.dp_query import dp_query +from tensorflow_privacy.privacy.dp_query import gaussian_query from tensorflow_privacy.privacy.dp_query import tree_aggregation @@ -442,217 +439,84 @@ def _build_tree_from_leaf(leaf_nodes: tf.Tensor, arity: int) -> tf.RaggedTensor: return tree -def _get_add_noise(stddev, seed: int = None): - """Utility function to decide which `add_noise` to use according to tf version.""" - if distutils.version.LooseVersion( - tf.__version__) < distutils.version.LooseVersion('2.0.0'): +class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): + """Implements dp_query for accurate range queries using tree aggregation. - # The seed should be only used for testing purpose. - if seed is not None: - tf.random.set_seed(seed) - - def add_noise(v): - return v + tf.random.normal( - tf.shape(input=v), stddev=stddev, dtype=v.dtype) - else: - random_normal = tf.random_normal_initializer(stddev=stddev, seed=seed) - - def add_noise(v): - return v + tf.cast(random_normal(tf.shape(input=v)), dtype=v.dtype) - - return add_noise - - -class CentralTreeSumQuery(dp_query.SumAggregationDPQuery): - """Implements dp_query for differentially private tree aggregation protocol. - - Implements a central variant of the tree aggregation protocol from the paper - "'Is interaction necessary for distributed private learning?.' Adam Smith, - Abhradeep Thakurta, Jalaj Upadhyay" by replacing their local randomizer with - gaussian mechanism. The first step is to clip the clients' local updates (i.e. - a 1-D array containing the leaf nodes of the tree) by L1 norm to make sure it - does not exceed a prespecified upper bound. The second step is to construct - the tree on the clipped update. The third step is to add independent gaussian - noise to each node in the tree. The returned tree can support efficient and - accurate range queries with differential privacy. + Implements a variant of the tree aggregation protocol from. "Is interaction + necessary for distributed private learning?. Adam Smith, Abhradeep Thakurta, + Jalaj Upadhyay." Builds a tree on top of the input record and adds noise to + the tree for differential privacy. Any range query can be decomposed into the + sum of O(log(n)) nodes in the tree compared to O(n) when using a histogram. + Improves efficiency and reduces noise scale. """ @attr.s(frozen=True) class GlobalState(object): - """Class defining global state for `CentralTreeSumQuery`. + """Class defining global state for TreeRangeSumQuery. Attributes: - l1_bound: An upper bound on the L1 norm of the input record. This is - needed to bound the sensitivity and deploy differential privacy. - """ - l1_bound = attr.ib() - - def __init__(self, - stddev: float, - arity: int = 2, - l1_bound: int = 10, - seed: Optional[int] = None): - """Initializes the `CentralTreeSumQuery`. - - Args: - stddev: The stddev of the noise added to each internal node of the - constructed tree. - arity: The branching factor of the tree. - l1_bound: An upper bound on the L1 norm of the input record. This is - needed to bound the sensitivity and deploy differential privacy. - seed: Random seed to generate Gaussian noise. Defaults to `None`. Only for - test purpose. - """ - self._stddev = stddev - self._arity = arity - self._l1_bound = l1_bound - self._seed = seed - - def initial_global_state(self): - """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" - return CentralTreeSumQuery.GlobalState(l1_bound=self._l1_bound) - - def derive_sample_params(self, global_state): - """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" - return global_state.l1_bound - - def preprocess_record(self, params, record): - """Implements `tensorflow_privacy.DPQuery.preprocess_record`.""" - casted_record = tf.cast(record, tf.float32) - l1_norm = tf.norm(casted_record, ord=1) - - l1_bound = tf.cast(params, tf.float32) - - preprocessed_record, _ = tf.clip_by_global_norm([casted_record], - l1_bound, - use_norm=l1_norm) - - return preprocessed_record[0] - - def get_noised_result(self, sample_state, global_state): - """Implements `tensorflow_privacy.DPQuery.get_noised_result`. - - Args: - sample_state: a frequency histogram. - global_state: hyper-parameters of the query. - - Returns: - a `tf.RaggedTensor` representing the tree built on top of `sample_state`. - The jth node on the ith layer of the tree can be accessed by tree[i][j] - where tree is the returned value. - """ - add_noise = _get_add_noise(self._stddev, self._seed) - tree = _build_tree_from_leaf(sample_state, self._arity) - return tf.map_fn(add_noise, tree), global_state - - -class DistributedTreeSumQuery(dp_query.SumAggregationDPQuery): - """Implements dp_query for differentially private tree aggregation protocol. - - The difference from `CentralTreeSumQuery` is that the tree construction and - gaussian noise addition happen in `preprocess_records`. The difference only - takes effect when used together with - `tff.aggregators.DifferentiallyPrivateFactory`. In other cases, this class - should be treated as equal with `CentralTreeSumQuery`. - - Implements a distributed version of the tree aggregation protocol from. "Is - interaction necessary for distributed private learning?." by replacing their - local randomizer with gaussian mechanism. The first step is to check the L1 - norm of the clients' local updates (i.e. a 1-D array containing the leaf nodes - of the tree) to make sure it does not exceed a prespecified upper bound. The - second step is to construct the tree. The third step is to add independent - gaussian noise to each node in the tree. The returned tree can support - efficient and accurate range queries with differential privacy. - """ - - @attr.s(frozen=True) - class GlobalState(object): - """Class defining global state for DistributedTreeSumQuery. - - Attributes: - stddev: The stddev of the noise added to each internal node in the - constructed tree. arity: The branching factor of the tree (i.e. the number of children each internal node has). - l1_bound: An upper bound on the L1 norm of the input record. This is - needed to bound the sensitivity and deploy differential privacy. + inner_query_state: The global state of the inner query. """ - stddev = attr.ib() arity = attr.ib() - l1_bound = attr.ib() + inner_query_state = attr.ib() def __init__(self, - stddev: float, - arity: int = 2, - l1_bound: int = 10, - seed: Optional[int] = None): - """Initializes the `DistributedTreeSumQuery`. + inner_query: dp_query.SumAggregationDPQuery, + arity: int = 2): + """Initializes the `TreeRangeSumQuery`. Args: - stddev: The stddev of the noise added to each node in the tree. - arity: The branching factor of the tree. - l1_bound: An upper bound on the L1 norm of the input record. This is - needed to bound the sensitivity and deploy differential privacy. - seed: Random seed to generate Gaussian noise. Defaults to `None`. Only for - test purpose. + inner_query: The inner `DPQuery` that adds noise to the tree. + arity: The branching factor of the tree (i.e. the number of children each + internal node has). Defaults to 2. """ - self._stddev = stddev + self._inner_query = inner_query self._arity = arity - self._l1_bound = l1_bound - self._seed = seed + + if self._arity < 1: + raise ValueError(f'Invalid arity={arity} smaller than 2.') def initial_global_state(self): """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" - return DistributedTreeSumQuery.GlobalState( - stddev=self._stddev, arity=self._arity, l1_bound=self._l1_bound) + return TreeRangeSumQuery.GlobalState( + arity=self._arity, + inner_query_state=self._inner_query.initial_global_state()) def derive_sample_params(self, global_state): """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" - return (global_state.stddev, global_state.arity, global_state.l1_bound) + return (global_state.arity, + self._inner_query.derive_sample_params( + global_state.inner_query_state)) def preprocess_record(self, params, record): """Implements `tensorflow_privacy.DPQuery.preprocess_record`. - This method clips the input record by L1 norm, constructs a tree on top of - it, and adds gaussian noise to each node of the tree for differential - privacy. Unlike `get_noised_result` in `CentralTreeSumQuery`, this function - flattens the `tf.RaggedTensor` before outputting it. This is useful when - used inside `tff.aggregators.DifferentiallyPrivateFactory` because it does - not accept ragged output tensor. + This method builds the tree, flattens it and applies + `inner_query.preprocess_record` to the flattened tree. Args: - params: hyper-parameters for preprocessing record, (stddev, aritry, - l1_bound) - record: leaf nodes for the tree. + params: Hyper-parameters for preprocessing record. + record: A histogram representing the leaf nodes of the tree. Returns: - `tf.Tensor` representing the flattened version of the tree. + A `tf.Tensor` representing the flattened version of the preprocessed tree. """ - _, arity, l1_bound_ = params - l1_bound = tf.cast(l1_bound_, tf.float32) - - casted_record = tf.cast(record, tf.float32) - l1_norm = tf.norm(casted_record, ord=1) - - preprocessed_record, _ = tf.clip_by_global_norm([casted_record], - l1_bound, - use_norm=l1_norm) - preprocessed_record = preprocessed_record[0] - - add_noise = _get_add_noise(self._stddev, self._seed) - tree = _build_tree_from_leaf(preprocessed_record, arity) - noisy_tree = tf.map_fn(add_noise, tree) + arity, inner_query_params = params + preprocessed_record = _build_tree_from_leaf(record, arity).flat_values + preprocessed_record = self._inner_query.preprocess_record( + inner_query_params, preprocessed_record) # The following codes reshape the output vector so the output shape of can # be statically inferred. This is useful when used with # `tff.aggregators.DifferentiallyPrivateFactory` because it needs to know # the output shape of this function statically and explicitly. - flat_noisy_tree = noisy_tree.flat_values - flat_tree_shape = [ + preprocessed_record_shape = [ (self._arity**(math.ceil(math.log(record.shape[0], self._arity)) + 1) - 1) // (self._arity - 1) ] - return tf.reshape(flat_noisy_tree, flat_tree_shape) + return tf.reshape(preprocessed_record, preprocessed_record_shape) def get_noised_result(self, sample_state, global_state): """Implements `tensorflow_privacy.DPQuery.get_noised_result`. @@ -661,12 +525,11 @@ class DistributedTreeSumQuery(dp_query.SumAggregationDPQuery): output by `preprocess_records.` Args: - sample_state: `tf.Tensor` for the flattened tree. - global_state: hyper-parameters including noise multiplier, the branching - factor of the tree and the maximum records per user. + sample_state: A `tf.Tensor` for the flattened tree. + global_state: The global state of the protocol. Returns: - a `tf.RaggedTensor` for the tree. + A `tf.RaggedTensor` representing the tree. """ # The [0] is needed because of how tf.RaggedTensor.from_two_splits works. # print(tf.RaggedTensor.from_row_splits(values=[3, 1, 4, 1, 5, 9, 2, 6], @@ -682,3 +545,60 @@ class DistributedTreeSumQuery(dp_query.SumAggregationDPQuery): tree = tf.RaggedTensor.from_row_splits( values=sample_state, row_splits=row_splits) return tree, global_state + + @classmethod + def build_central_gaussian_query(cls, + l2_norm_clip: float, + stddev: float, + arity: int = 2): + """Returns `TreeRangeSumQuery` with central Gaussian noise. + + Args: + l2_norm_clip: Each record should be clipped so that it has L2 norm at most + `l2_norm_clip`. + stddev: Stddev of the central Gaussian noise. + arity: The branching factor of the tree (i.e. the number of children each + internal node has). Defaults to 2. + """ + if l2_norm_clip <= 0: + raise ValueError(f'`l2_norm_clip` must be positive, got {l2_norm_clip}.') + + if stddev < 0: + raise ValueError(f'`stddev` must be non-negative, got {stddev}.') + + if arity < 2: + raise ValueError(f'`arity` must be at least 2, got {arity}.') + + inner_query = gaussian_query.GaussianSumQuery(l2_norm_clip, stddev) + + return cls(arity=arity, inner_query=inner_query) + + @classmethod + def build_distributed_discrete_gaussian_query(cls, + l2_norm_bound: float, + local_stddev: float, + arity: int = 2): + """Returns `TreeRangeSumQuery` with central Gaussian noise. + + Args: + l2_norm_bound: Each record should be clipped so that it has L2 norm at + most `l2_norm_bound`. + local_stddev: Scale/stddev of the local discrete Gaussian noise. + arity: The branching factor of the tree (i.e. the number of children each + internal node has). Defaults to 2. + """ + if l2_norm_bound <= 0: + raise ValueError( + f'`l2_clip_bound` must be positive, got {l2_norm_bound}.') + + if local_stddev < 0: + raise ValueError( + f'`local_stddev` must be non-negative, got {local_stddev}.') + + if arity < 2: + raise ValueError(f'`arity` must be at least 2, got {arity}.') + + inner_query = distributed_discrete_gaussian_query.DistributedDiscreteGaussianSumQuery( + l2_norm_bound, local_stddev) + + return cls(arity=arity, inner_query=inner_query) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py index cc3a89a..3713b5d 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py @@ -423,111 +423,115 @@ class BuildTreeTest(tf.test.TestCase, parameterized.TestCase): self.assertEqual(tree[layer][idx], expected_value) -class CentralTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): +class TreeRangeSumQueryTest(tf.test.TestCase, parameterized.TestCase): - def test_initial_global_state_type(self): - - query = tree_aggregation_query.CentralTreeSumQuery(stddev=NOISE_STD) - global_state = query.initial_global_state() - self.assertIsInstance( - global_state, tree_aggregation_query.CentralTreeSumQuery.GlobalState) - - def test_derive_sample_params(self): - query = tree_aggregation_query.CentralTreeSumQuery(stddev=NOISE_STD) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - self.assertAllClose(params, 10.) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32)), - ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32)), - ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32)), - ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], - dtype=tf.float32)), + @parameterized.product( + inner_query=['central', 'distributed'], + params=[(0., 1., 2), (1., -1., 2), (1., 1., 1)], ) - def test_preprocess_record(self, arity, record): - query = tree_aggregation_query.CentralTreeSumQuery( - stddev=NOISE_STD, arity=arity) + def test_raises_error(self, inner_query, params): + clip_norm, stddev, arity = params + with self.assertRaises(ValueError): + if inner_query == 'central': + tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( + clip_norm, stddev, arity) + elif inner_query == 'distributed': + tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + clip_norm, stddev, arity) + + @parameterized.product( + inner_query=['central', 'distributed'], + clip_norm=[0.1, 1.0, 10.0], + stddev=[0.1, 1.0, 10.0]) + def test_initial_global_state_type(self, inner_query, clip_norm, stddev): + + if inner_query == 'central': + query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( + clip_norm, stddev) + elif inner_query == 'distributed': + query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + clip_norm, stddev) + global_state = query.initial_global_state() + self.assertIsInstance(global_state, + tree_aggregation_query.TreeRangeSumQuery.GlobalState) + + @parameterized.product( + inner_query=['central', 'distributed'], + clip_norm=[0.1, 1.0, 10.0], + stddev=[0.1, 1.0, 10.0], + arity=[2, 3, 4]) + def test_derive_sample_params(self, inner_query, clip_norm, stddev, arity): + if inner_query == 'central': + query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( + clip_norm, stddev, arity) + elif inner_query == 'distributed': + query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + clip_norm, stddev, arity) + global_state = query.initial_global_state() + derived_arity, inner_query_state = query.derive_sample_params(global_state) + self.assertAllClose(derived_arity, arity) + if inner_query == 'central': + self.assertAllClose(inner_query_state, clip_norm) + elif inner_query == 'distributed': + self.assertAllClose(inner_query_state.l2_norm_bound, clip_norm) + self.assertAllClose(inner_query_state.local_stddev, stddev) + + @parameterized.product( + (dict(arity=2, expected_tree=[1, 1, 0, 1, 0, 0, 0]), + dict(arity=3, expected_tree=[1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])), + inner_query=['central', 'distributed'], + ) + def test_preprocess_record(self, inner_query, arity, expected_tree): + if inner_query == 'central': + query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( + 10., 0., arity) + record = tf.constant([1, 0, 0, 0], dtype=tf.float32) + expected_tree = tf.cast(expected_tree, tf.float32) + elif inner_query == 'distributed': + query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + 10., 0., arity) + record = tf.constant([1, 0, 0, 0], dtype=tf.int32) global_state = query.initial_global_state() params = query.derive_sample_params(global_state) preprocessed_record = query.preprocess_record(params, record) - - self.assertAllClose(preprocessed_record, record) + self.assertAllClose(preprocessed_record, expected_tree) @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.constant([5, 5, 0, 0], dtype=tf.int32)), - ('binary_test_float', 2, tf.constant( - [10., 10., 0., 0.], - dtype=tf.float32), tf.constant([5., 5., 0., 0.], dtype=tf.float32)), - ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.constant([5, 5, 0, 0], dtype=tf.int32)), - ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.constant([5., 5., 0., 0.], dtype=tf.float32)), + ('stddev_1', 1, tf.constant([1, 0], dtype=tf.int32), [1, 1, 0]), + ('stddev_0_1', 4, tf.constant([1, 0], dtype=tf.int32), [1, 1, 0]), ) - def test_preprocess_record_clipped(self, arity, record, - expected_clipped_value): - query = tree_aggregation_query.CentralTreeSumQuery( - stddev=NOISE_STD, arity=arity) + def test_distributed_preprocess_record_with_noise(self, local_stddev, record, + expected_tree): + query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + 10., local_stddev) global_state = query.initial_global_state() params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) - self.assertAllClose(preprocessed_record, expected_clipped_value) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), - ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), - ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([[1.], [1., 0., 0.], - [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), - ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([[1.], [1., 0., 0.], - [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), - ) - def test_get_noised_result(self, arity, record, expected_tree): - query = tree_aggregation_query.CentralTreeSumQuery(stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - sample_state, global_state = query.get_noised_result( - preprocessed_record, global_state) - - self.assertAllClose(sample_state, expected_tree) - - @parameterized.named_parameters( - ('stddev_0_01', 0.01, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), - ('stddev_0_1', 0.1, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), - ) - def test_get_noised_result_with_noise(self, stddev, record, expected_tree): - query = tree_aggregation_query.CentralTreeSumQuery(stddev=stddev, seed=0) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - - sample_state, _ = query.get_noised_result(preprocessed_record, global_state) self.assertAllClose( - sample_state.flat_values, expected_tree, atol=3 * stddev) + preprocessed_record, expected_tree, atol=10 * local_stddev) - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), - ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), - ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant([[10.], [10., 0., 0.], - [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), - ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant([[10.], [10., 0., 0.], - [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), + @parameterized.product( + (dict( + arity=2, + expected_tree=tf.ragged.constant([[1], [1, 0], [1, 0, 0, 0]])), + dict( + arity=3, + expected_tree=tf.ragged.constant([[1], [1, 0, 0], + [1, 0, 0, 0, 0, 0, 0, 0, 0]]))), + inner_query=['central', 'distributed'], ) - def test_get_noised_result_clipped(self, arity, record, expected_tree): - query = tree_aggregation_query.CentralTreeSumQuery(stddev=0., arity=arity) + def test_get_noised_result(self, inner_query, arity, expected_tree): + if inner_query == 'central': + query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( + 10., 0., arity) + record = tf.constant([1, 0, 0, 0], dtype=tf.float32) + expected_tree = tf.cast(expected_tree, tf.float32) + elif inner_query == 'distributed': + query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + 10., 0., arity) + record = tf.constant([1, 0, 0, 0], dtype=tf.int32) global_state = query.initial_global_state() params = query.derive_sample_params(global_state) preprocessed_record = query.preprocess_record(params, record) @@ -536,128 +540,18 @@ class CentralTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): self.assertAllClose(sample_state, expected_tree) - -class DistributedTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): - - def test_initial_global_state_type(self): - - query = tree_aggregation_query.DistributedTreeSumQuery(stddev=NOISE_STD) - global_state = query.initial_global_state() - self.assertIsInstance( - global_state, - tree_aggregation_query.DistributedTreeSumQuery.GlobalState) - - def test_derive_sample_params(self): - query = tree_aggregation_query.DistributedTreeSumQuery(stddev=NOISE_STD) - global_state = query.initial_global_state() - stddev, arity, l1_bound = query.derive_sample_params(global_state) - self.assertAllClose(stddev, NOISE_STD) - self.assertAllClose(arity, 2) - self.assertAllClose(l1_bound, 10) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([1., 1., 0., 1., 0., 0., 0.])), - ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([1., 1., 0., 1., 0., 0., 0.])), - ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0. - ])), - ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0. - ])), - ) - def test_preprocess_record(self, arity, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery( - stddev=0., arity=arity) + @parameterized.product(stddev=[0.1, 1.0, 10.0]) + def test_central_get_noised_result_with_noise(self, stddev): + query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( + 10., stddev) global_state = query.initial_global_state() params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - self.assertAllClose(preprocessed_record, expected_tree) - - @parameterized.named_parameters( - ('stddev_0_01', 0.01, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), - ('stddev_0_1', 0.1, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), - ) - def test_preprocess_record_with_noise(self, stddev, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery( - stddev=stddev, seed=0) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - - preprocessed_record = query.preprocess_record(params, record) - - self.assertAllClose(preprocessed_record, expected_tree, atol=3 * stddev) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant([10., 10., 0., 5., 5., 0., 0.])), - ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant([10., 10., 0., 5., 5., 0., 0.])), - ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant( - [10., 10., 0., 0., 5., 5., 0., 0., 0., 0., 0., 0., 0.])), - ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant( - [10., 10., 0., 0., 5., 5., 0., 0., 0., 0., 0., 0., 0.])), - ) - def test_preprocess_record_clipped(self, arity, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery( - stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - self.assertAllClose(preprocessed_record, expected_tree) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), - ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), - ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([[1.], [1., 0., 0.], - [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), - ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([[1.], [1., 0., 0.], - [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), - ) - def test_get_noised_result(self, arity, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery( - stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) + preprocessed_record = query.preprocess_record(params, tf.constant([1., 0.])) sample_state, global_state = query.get_noised_result( preprocessed_record, global_state) - self.assertAllClose(sample_state, expected_tree) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), - ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), - ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant([[10.], [10., 0., 0.], - [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), - ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant([[10.], [10., 0., 0.], - [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), - ) - def test_get_noised_result_clipped(self, arity, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery( - stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - sample_state, global_state = query.get_noised_result( - preprocessed_record, global_state) - - self.assertAllClose(sample_state, expected_tree) + self.assertAllClose( + sample_state, tf.ragged.constant([[1.], [1., 0.]]), atol=10 * stddev) if __name__ == '__main__': From eef5810d94d56428717757574ed39bb0430cf246 Mon Sep 17 00:00:00 2001 From: Keith Rush Date: Tue, 27 Jul 2021 20:03:58 -0700 Subject: [PATCH 09/71] Automated rollback of commit 4d335d1b69206712b6325626d7df1063b9815ade PiperOrigin-RevId: 387254617 --- .../dp_query/tree_aggregation_query.py | 292 +++++++++++------ .../dp_query/tree_aggregation_query_test.py | 308 ++++++++++++------ 2 files changed, 393 insertions(+), 207 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index bd6ff3c..5717e4f 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -15,18 +15,21 @@ `TreeCumulativeSumQuery` and `TreeResidualSumQuery` are `DPQuery`s for continual online observation queries relying on `tree_aggregation`. 'Online' means that -the leaf nodes of the tree arrive one by one as the time proceeds. +the leaf nodes of the tree arrive one by one as the time proceeds. The leaves +are vector records as defined in `dp_query.DPQuery`. -`TreeRangeSumQuery` is a `DPQuery`s for offline tree aggregation protocol. -'Offline' means all the leaf nodes are ready before the protocol starts. +`CentralTreeSumQuery` and `DistributedTreeSumQuery` are `DPQuery`s for +central/distributed offline tree aggregation protocol. 'Offline' means all the +leaf nodes are ready before the protocol starts. Each record, different from +what is defined in `dp_query.DPQuery`, is a histogram (i.e. the leaf nodes). """ +import distutils import math +from typing import Optional import attr import tensorflow as tf -from tensorflow_privacy.privacy.dp_query import distributed_discrete_gaussian_query from tensorflow_privacy.privacy.dp_query import dp_query -from tensorflow_privacy.privacy.dp_query import gaussian_query from tensorflow_privacy.privacy.dp_query import tree_aggregation @@ -439,84 +442,217 @@ def _build_tree_from_leaf(leaf_nodes: tf.Tensor, arity: int) -> tf.RaggedTensor: return tree -class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): - """Implements dp_query for accurate range queries using tree aggregation. +def _get_add_noise(stddev, seed: int = None): + """Utility function to decide which `add_noise` to use according to tf version.""" + if distutils.version.LooseVersion( + tf.__version__) < distutils.version.LooseVersion('2.0.0'): - Implements a variant of the tree aggregation protocol from. "Is interaction - necessary for distributed private learning?. Adam Smith, Abhradeep Thakurta, - Jalaj Upadhyay." Builds a tree on top of the input record and adds noise to - the tree for differential privacy. Any range query can be decomposed into the - sum of O(log(n)) nodes in the tree compared to O(n) when using a histogram. - Improves efficiency and reduces noise scale. + # The seed should be only used for testing purpose. + if seed is not None: + tf.random.set_seed(seed) + + def add_noise(v): + return v + tf.random.normal( + tf.shape(input=v), stddev=stddev, dtype=v.dtype) + else: + random_normal = tf.random_normal_initializer(stddev=stddev, seed=seed) + + def add_noise(v): + return v + tf.cast(random_normal(tf.shape(input=v)), dtype=v.dtype) + + return add_noise + + +class CentralTreeSumQuery(dp_query.SumAggregationDPQuery): + """Implements dp_query for differentially private tree aggregation protocol. + + Implements a central variant of the tree aggregation protocol from the paper + "'Is interaction necessary for distributed private learning?.' Adam Smith, + Abhradeep Thakurta, Jalaj Upadhyay" by replacing their local randomizer with + gaussian mechanism. The first step is to clip the clients' local updates (i.e. + a 1-D array containing the leaf nodes of the tree) by L1 norm to make sure it + does not exceed a prespecified upper bound. The second step is to construct + the tree on the clipped update. The third step is to add independent gaussian + noise to each node in the tree. The returned tree can support efficient and + accurate range queries with differential privacy. """ @attr.s(frozen=True) class GlobalState(object): - """Class defining global state for TreeRangeSumQuery. + """Class defining global state for `CentralTreeSumQuery`. Attributes: - arity: The branching factor of the tree (i.e. the number of children each - internal node has). - inner_query_state: The global state of the inner query. + l1_bound: An upper bound on the L1 norm of the input record. This is + needed to bound the sensitivity and deploy differential privacy. """ - arity = attr.ib() - inner_query_state = attr.ib() + l1_bound = attr.ib() def __init__(self, - inner_query: dp_query.SumAggregationDPQuery, - arity: int = 2): - """Initializes the `TreeRangeSumQuery`. + stddev: float, + arity: int = 2, + l1_bound: int = 10, + seed: Optional[int] = None): + """Initializes the `CentralTreeSumQuery`. Args: - inner_query: The inner `DPQuery` that adds noise to the tree. - arity: The branching factor of the tree (i.e. the number of children each - internal node has). Defaults to 2. + stddev: The stddev of the noise added to each internal node of the + constructed tree. + arity: The branching factor of the tree. + l1_bound: An upper bound on the L1 norm of the input record. This is + needed to bound the sensitivity and deploy differential privacy. + seed: Random seed to generate Gaussian noise. Defaults to `None`. Only for + test purpose. """ - self._inner_query = inner_query + self._stddev = stddev self._arity = arity - - if self._arity < 1: - raise ValueError(f'Invalid arity={arity} smaller than 2.') + self._l1_bound = l1_bound + self._seed = seed def initial_global_state(self): """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" - return TreeRangeSumQuery.GlobalState( - arity=self._arity, - inner_query_state=self._inner_query.initial_global_state()) + return CentralTreeSumQuery.GlobalState(l1_bound=self._l1_bound) def derive_sample_params(self, global_state): """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" - return (global_state.arity, - self._inner_query.derive_sample_params( - global_state.inner_query_state)) + return global_state.l1_bound + + def preprocess_record(self, params, record): + """Implements `tensorflow_privacy.DPQuery.preprocess_record`.""" + casted_record = tf.cast(record, tf.float32) + l1_norm = tf.norm(casted_record, ord=1) + + l1_bound = tf.cast(params, tf.float32) + + preprocessed_record, _ = tf.clip_by_global_norm([casted_record], + l1_bound, + use_norm=l1_norm) + + return preprocessed_record[0] + + def get_noised_result(self, sample_state, global_state): + """Implements `tensorflow_privacy.DPQuery.get_noised_result`. + + Args: + sample_state: a frequency histogram. + global_state: hyper-parameters of the query. + + Returns: + a `tf.RaggedTensor` representing the tree built on top of `sample_state`. + The jth node on the ith layer of the tree can be accessed by tree[i][j] + where tree is the returned value. + """ + add_noise = _get_add_noise(self._stddev, self._seed) + tree = _build_tree_from_leaf(sample_state, self._arity) + return tf.map_fn(add_noise, tree), global_state + + +class DistributedTreeSumQuery(dp_query.SumAggregationDPQuery): + """Implements dp_query for differentially private tree aggregation protocol. + + The difference from `CentralTreeSumQuery` is that the tree construction and + gaussian noise addition happen in `preprocess_records`. The difference only + takes effect when used together with + `tff.aggregators.DifferentiallyPrivateFactory`. In other cases, this class + should be treated as equal with `CentralTreeSumQuery`. + + Implements a distributed version of the tree aggregation protocol from. "Is + interaction necessary for distributed private learning?." by replacing their + local randomizer with gaussian mechanism. The first step is to check the L1 + norm of the clients' local updates (i.e. a 1-D array containing the leaf nodes + of the tree) to make sure it does not exceed a prespecified upper bound. The + second step is to construct the tree. The third step is to add independent + gaussian noise to each node in the tree. The returned tree can support + efficient and accurate range queries with differential privacy. + """ + + @attr.s(frozen=True) + class GlobalState(object): + """Class defining global state for DistributedTreeSumQuery. + + Attributes: + stddev: The stddev of the noise added to each internal node in the + constructed tree. + arity: The branching factor of the tree (i.e. the number of children each + internal node has). + l1_bound: An upper bound on the L1 norm of the input record. This is + needed to bound the sensitivity and deploy differential privacy. + """ + stddev = attr.ib() + arity = attr.ib() + l1_bound = attr.ib() + + def __init__(self, + stddev: float, + arity: int = 2, + l1_bound: int = 10, + seed: Optional[int] = None): + """Initializes the `DistributedTreeSumQuery`. + + Args: + stddev: The stddev of the noise added to each node in the tree. + arity: The branching factor of the tree. + l1_bound: An upper bound on the L1 norm of the input record. This is + needed to bound the sensitivity and deploy differential privacy. + seed: Random seed to generate Gaussian noise. Defaults to `None`. Only for + test purpose. + """ + self._stddev = stddev + self._arity = arity + self._l1_bound = l1_bound + self._seed = seed + + def initial_global_state(self): + """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" + return DistributedTreeSumQuery.GlobalState( + stddev=self._stddev, arity=self._arity, l1_bound=self._l1_bound) + + def derive_sample_params(self, global_state): + """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" + return (global_state.stddev, global_state.arity, global_state.l1_bound) def preprocess_record(self, params, record): """Implements `tensorflow_privacy.DPQuery.preprocess_record`. - This method builds the tree, flattens it and applies - `inner_query.preprocess_record` to the flattened tree. + This method clips the input record by L1 norm, constructs a tree on top of + it, and adds gaussian noise to each node of the tree for differential + privacy. Unlike `get_noised_result` in `CentralTreeSumQuery`, this function + flattens the `tf.RaggedTensor` before outputting it. This is useful when + used inside `tff.aggregators.DifferentiallyPrivateFactory` because it does + not accept ragged output tensor. Args: - params: Hyper-parameters for preprocessing record. - record: A histogram representing the leaf nodes of the tree. + params: hyper-parameters for preprocessing record, (stddev, aritry, + l1_bound) + record: leaf nodes for the tree. Returns: - A `tf.Tensor` representing the flattened version of the preprocessed tree. + `tf.Tensor` representing the flattened version of the tree. """ - arity, inner_query_params = params - preprocessed_record = _build_tree_from_leaf(record, arity).flat_values - preprocessed_record = self._inner_query.preprocess_record( - inner_query_params, preprocessed_record) + _, arity, l1_bound_ = params + l1_bound = tf.cast(l1_bound_, tf.float32) + + casted_record = tf.cast(record, tf.float32) + l1_norm = tf.norm(casted_record, ord=1) + + preprocessed_record, _ = tf.clip_by_global_norm([casted_record], + l1_bound, + use_norm=l1_norm) + preprocessed_record = preprocessed_record[0] + + add_noise = _get_add_noise(self._stddev, self._seed) + tree = _build_tree_from_leaf(preprocessed_record, arity) + noisy_tree = tf.map_fn(add_noise, tree) # The following codes reshape the output vector so the output shape of can # be statically inferred. This is useful when used with # `tff.aggregators.DifferentiallyPrivateFactory` because it needs to know # the output shape of this function statically and explicitly. - preprocessed_record_shape = [ + flat_noisy_tree = noisy_tree.flat_values + flat_tree_shape = [ (self._arity**(math.ceil(math.log(record.shape[0], self._arity)) + 1) - 1) // (self._arity - 1) ] - return tf.reshape(preprocessed_record, preprocessed_record_shape) + return tf.reshape(flat_noisy_tree, flat_tree_shape) def get_noised_result(self, sample_state, global_state): """Implements `tensorflow_privacy.DPQuery.get_noised_result`. @@ -525,11 +661,12 @@ class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): output by `preprocess_records.` Args: - sample_state: A `tf.Tensor` for the flattened tree. - global_state: The global state of the protocol. + sample_state: `tf.Tensor` for the flattened tree. + global_state: hyper-parameters including noise multiplier, the branching + factor of the tree and the maximum records per user. Returns: - A `tf.RaggedTensor` representing the tree. + a `tf.RaggedTensor` for the tree. """ # The [0] is needed because of how tf.RaggedTensor.from_two_splits works. # print(tf.RaggedTensor.from_row_splits(values=[3, 1, 4, 1, 5, 9, 2, 6], @@ -545,60 +682,3 @@ class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): tree = tf.RaggedTensor.from_row_splits( values=sample_state, row_splits=row_splits) return tree, global_state - - @classmethod - def build_central_gaussian_query(cls, - l2_norm_clip: float, - stddev: float, - arity: int = 2): - """Returns `TreeRangeSumQuery` with central Gaussian noise. - - Args: - l2_norm_clip: Each record should be clipped so that it has L2 norm at most - `l2_norm_clip`. - stddev: Stddev of the central Gaussian noise. - arity: The branching factor of the tree (i.e. the number of children each - internal node has). Defaults to 2. - """ - if l2_norm_clip <= 0: - raise ValueError(f'`l2_norm_clip` must be positive, got {l2_norm_clip}.') - - if stddev < 0: - raise ValueError(f'`stddev` must be non-negative, got {stddev}.') - - if arity < 2: - raise ValueError(f'`arity` must be at least 2, got {arity}.') - - inner_query = gaussian_query.GaussianSumQuery(l2_norm_clip, stddev) - - return cls(arity=arity, inner_query=inner_query) - - @classmethod - def build_distributed_discrete_gaussian_query(cls, - l2_norm_bound: float, - local_stddev: float, - arity: int = 2): - """Returns `TreeRangeSumQuery` with central Gaussian noise. - - Args: - l2_norm_bound: Each record should be clipped so that it has L2 norm at - most `l2_norm_bound`. - local_stddev: Scale/stddev of the local discrete Gaussian noise. - arity: The branching factor of the tree (i.e. the number of children each - internal node has). Defaults to 2. - """ - if l2_norm_bound <= 0: - raise ValueError( - f'`l2_clip_bound` must be positive, got {l2_norm_bound}.') - - if local_stddev < 0: - raise ValueError( - f'`local_stddev` must be non-negative, got {local_stddev}.') - - if arity < 2: - raise ValueError(f'`arity` must be at least 2, got {arity}.') - - inner_query = distributed_discrete_gaussian_query.DistributedDiscreteGaussianSumQuery( - l2_norm_bound, local_stddev) - - return cls(arity=arity, inner_query=inner_query) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py index 3713b5d..cc3a89a 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py @@ -423,115 +423,72 @@ class BuildTreeTest(tf.test.TestCase, parameterized.TestCase): self.assertEqual(tree[layer][idx], expected_value) -class TreeRangeSumQueryTest(tf.test.TestCase, parameterized.TestCase): +class CentralTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): - @parameterized.product( - inner_query=['central', 'distributed'], - params=[(0., 1., 2), (1., -1., 2), (1., 1., 1)], - ) - def test_raises_error(self, inner_query, params): - clip_norm, stddev, arity = params - with self.assertRaises(ValueError): - if inner_query == 'central': - tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( - clip_norm, stddev, arity) - elif inner_query == 'distributed': - tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( - clip_norm, stddev, arity) + def test_initial_global_state_type(self): - @parameterized.product( - inner_query=['central', 'distributed'], - clip_norm=[0.1, 1.0, 10.0], - stddev=[0.1, 1.0, 10.0]) - def test_initial_global_state_type(self, inner_query, clip_norm, stddev): - - if inner_query == 'central': - query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( - clip_norm, stddev) - elif inner_query == 'distributed': - query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( - clip_norm, stddev) + query = tree_aggregation_query.CentralTreeSumQuery(stddev=NOISE_STD) global_state = query.initial_global_state() - self.assertIsInstance(global_state, - tree_aggregation_query.TreeRangeSumQuery.GlobalState) + self.assertIsInstance( + global_state, tree_aggregation_query.CentralTreeSumQuery.GlobalState) - @parameterized.product( - inner_query=['central', 'distributed'], - clip_norm=[0.1, 1.0, 10.0], - stddev=[0.1, 1.0, 10.0], - arity=[2, 3, 4]) - def test_derive_sample_params(self, inner_query, clip_norm, stddev, arity): - if inner_query == 'central': - query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( - clip_norm, stddev, arity) - elif inner_query == 'distributed': - query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( - clip_norm, stddev, arity) - global_state = query.initial_global_state() - derived_arity, inner_query_state = query.derive_sample_params(global_state) - self.assertAllClose(derived_arity, arity) - if inner_query == 'central': - self.assertAllClose(inner_query_state, clip_norm) - elif inner_query == 'distributed': - self.assertAllClose(inner_query_state.l2_norm_bound, clip_norm) - self.assertAllClose(inner_query_state.local_stddev, stddev) - - @parameterized.product( - (dict(arity=2, expected_tree=[1, 1, 0, 1, 0, 0, 0]), - dict(arity=3, expected_tree=[1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])), - inner_query=['central', 'distributed'], - ) - def test_preprocess_record(self, inner_query, arity, expected_tree): - if inner_query == 'central': - query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( - 10., 0., arity) - record = tf.constant([1, 0, 0, 0], dtype=tf.float32) - expected_tree = tf.cast(expected_tree, tf.float32) - elif inner_query == 'distributed': - query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( - 10., 0., arity) - record = tf.constant([1, 0, 0, 0], dtype=tf.int32) + def test_derive_sample_params(self): + query = tree_aggregation_query.CentralTreeSumQuery(stddev=NOISE_STD) global_state = query.initial_global_state() params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - self.assertAllClose(preprocessed_record, expected_tree) + self.assertAllClose(params, 10.) @parameterized.named_parameters( - ('stddev_1', 1, tf.constant([1, 0], dtype=tf.int32), [1, 1, 0]), - ('stddev_0_1', 4, tf.constant([1, 0], dtype=tf.int32), [1, 1, 0]), + ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32)), + ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32)), + ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32)), + ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], + dtype=tf.float32)), ) - def test_distributed_preprocess_record_with_noise(self, local_stddev, record, - expected_tree): - query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( - 10., local_stddev) + def test_preprocess_record(self, arity, record): + query = tree_aggregation_query.CentralTreeSumQuery( + stddev=NOISE_STD, arity=arity) global_state = query.initial_global_state() params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - self.assertAllClose( - preprocessed_record, expected_tree, atol=10 * local_stddev) + self.assertAllClose(preprocessed_record, record) - @parameterized.product( - (dict( - arity=2, - expected_tree=tf.ragged.constant([[1], [1, 0], [1, 0, 0, 0]])), - dict( - arity=3, - expected_tree=tf.ragged.constant([[1], [1, 0, 0], - [1, 0, 0, 0, 0, 0, 0, 0, 0]]))), - inner_query=['central', 'distributed'], + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.constant([5, 5, 0, 0], dtype=tf.int32)), + ('binary_test_float', 2, tf.constant( + [10., 10., 0., 0.], + dtype=tf.float32), tf.constant([5., 5., 0., 0.], dtype=tf.float32)), + ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.constant([5, 5, 0, 0], dtype=tf.int32)), + ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.constant([5., 5., 0., 0.], dtype=tf.float32)), ) - def test_get_noised_result(self, inner_query, arity, expected_tree): - if inner_query == 'central': - query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( - 10., 0., arity) - record = tf.constant([1, 0, 0, 0], dtype=tf.float32) - expected_tree = tf.cast(expected_tree, tf.float32) - elif inner_query == 'distributed': - query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( - 10., 0., arity) - record = tf.constant([1, 0, 0, 0], dtype=tf.int32) + def test_preprocess_record_clipped(self, arity, record, + expected_clipped_value): + query = tree_aggregation_query.CentralTreeSumQuery( + stddev=NOISE_STD, arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + self.assertAllClose(preprocessed_record, expected_clipped_value) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), + ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), + ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([[1.], [1., 0., 0.], + [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), + ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([[1.], [1., 0., 0.], + [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), + ) + def test_get_noised_result(self, arity, record, expected_tree): + query = tree_aggregation_query.CentralTreeSumQuery(stddev=0., arity=arity) global_state = query.initial_global_state() params = query.derive_sample_params(global_state) preprocessed_record = query.preprocess_record(params, record) @@ -540,18 +497,167 @@ class TreeRangeSumQueryTest(tf.test.TestCase, parameterized.TestCase): self.assertAllClose(sample_state, expected_tree) - @parameterized.product(stddev=[0.1, 1.0, 10.0]) - def test_central_get_noised_result_with_noise(self, stddev): - query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( - 10., stddev) + @parameterized.named_parameters( + ('stddev_0_01', 0.01, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), + ('stddev_0_1', 0.1, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), + ) + def test_get_noised_result_with_noise(self, stddev, record, expected_tree): + query = tree_aggregation_query.CentralTreeSumQuery(stddev=stddev, seed=0) global_state = query.initial_global_state() params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, tf.constant([1., 0.])) + preprocessed_record = query.preprocess_record(params, record) + + sample_state, _ = query.get_noised_result(preprocessed_record, global_state) + + self.assertAllClose( + sample_state.flat_values, expected_tree, atol=3 * stddev) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), + ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), + ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant([[10.], [10., 0., 0.], + [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), + ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant([[10.], [10., 0., 0.], + [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), + ) + def test_get_noised_result_clipped(self, arity, record, expected_tree): + query = tree_aggregation_query.CentralTreeSumQuery(stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) sample_state, global_state = query.get_noised_result( preprocessed_record, global_state) - self.assertAllClose( - sample_state, tf.ragged.constant([[1.], [1., 0.]]), atol=10 * stddev) + self.assertAllClose(sample_state, expected_tree) + + +class DistributedTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): + + def test_initial_global_state_type(self): + + query = tree_aggregation_query.DistributedTreeSumQuery(stddev=NOISE_STD) + global_state = query.initial_global_state() + self.assertIsInstance( + global_state, + tree_aggregation_query.DistributedTreeSumQuery.GlobalState) + + def test_derive_sample_params(self): + query = tree_aggregation_query.DistributedTreeSumQuery(stddev=NOISE_STD) + global_state = query.initial_global_state() + stddev, arity, l1_bound = query.derive_sample_params(global_state) + self.assertAllClose(stddev, NOISE_STD) + self.assertAllClose(arity, 2) + self.assertAllClose(l1_bound, 10) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([1., 1., 0., 1., 0., 0., 0.])), + ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([1., 1., 0., 1., 0., 0., 0.])), + ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0. + ])), + ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0. + ])), + ) + def test_preprocess_record(self, arity, record, expected_tree): + query = tree_aggregation_query.DistributedTreeSumQuery( + stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + self.assertAllClose(preprocessed_record, expected_tree) + + @parameterized.named_parameters( + ('stddev_0_01', 0.01, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), + ('stddev_0_1', 0.1, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), + ) + def test_preprocess_record_with_noise(self, stddev, record, expected_tree): + query = tree_aggregation_query.DistributedTreeSumQuery( + stddev=stddev, seed=0) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + + preprocessed_record = query.preprocess_record(params, record) + + self.assertAllClose(preprocessed_record, expected_tree, atol=3 * stddev) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant([10., 10., 0., 5., 5., 0., 0.])), + ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant([10., 10., 0., 5., 5., 0., 0.])), + ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant( + [10., 10., 0., 0., 5., 5., 0., 0., 0., 0., 0., 0., 0.])), + ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant( + [10., 10., 0., 0., 5., 5., 0., 0., 0., 0., 0., 0., 0.])), + ) + def test_preprocess_record_clipped(self, arity, record, expected_tree): + query = tree_aggregation_query.DistributedTreeSumQuery( + stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + self.assertAllClose(preprocessed_record, expected_tree) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), + ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), + ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([[1.], [1., 0., 0.], + [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), + ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([[1.], [1., 0., 0.], + [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), + ) + def test_get_noised_result(self, arity, record, expected_tree): + query = tree_aggregation_query.DistributedTreeSumQuery( + stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + sample_state, global_state = query.get_noised_result( + preprocessed_record, global_state) + + self.assertAllClose(sample_state, expected_tree) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), + ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), + ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant([[10.], [10., 0., 0.], + [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), + ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant([[10.], [10., 0., 0.], + [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), + ) + def test_get_noised_result_clipped(self, arity, record, expected_tree): + query = tree_aggregation_query.DistributedTreeSumQuery( + stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + sample_state, global_state = query.get_noised_result( + preprocessed_record, global_state) + + self.assertAllClose(sample_state, expected_tree) if __name__ == '__main__': From 26725594719fd9890e5f1d6c8556350902a08ef7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Jul 2021 11:39:48 -0700 Subject: [PATCH 10/71] (1) Merge `CentralTreeSumQuery` and `DistributedTreeSumQuery` into one DPQuery to modularize things. The new query takes in an `inner_query` argument. Depending on the behavior of inner query, the query will follow central DP or distributed DP. (2) Remove the hard-coded L1 clipping and replace with norm bound checking in the inner query. This design allows us to use whatever clipping factory we want outside the DPQuery. PiperOrigin-RevId: 387398741 --- .../dp_query/tree_aggregation_query.py | 176 +++++++++++++++++- .../dp_query/tree_aggregation_query_test.py | 131 +++++++++++++ 2 files changed, 301 insertions(+), 6 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 5717e4f..990391b 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -15,13 +15,10 @@ `TreeCumulativeSumQuery` and `TreeResidualSumQuery` are `DPQuery`s for continual online observation queries relying on `tree_aggregation`. 'Online' means that -the leaf nodes of the tree arrive one by one as the time proceeds. The leaves -are vector records as defined in `dp_query.DPQuery`. +the leaf nodes of the tree arrive one by one as the time proceeds. -`CentralTreeSumQuery` and `DistributedTreeSumQuery` are `DPQuery`s for -central/distributed offline tree aggregation protocol. 'Offline' means all the -leaf nodes are ready before the protocol starts. Each record, different from -what is defined in `dp_query.DPQuery`, is a histogram (i.e. the leaf nodes). +`TreeRangeSumQuery` is a `DPQuery`s for offline tree aggregation protocol. +'Offline' means all the leaf nodes are ready before the protocol starts. """ import distutils import math @@ -29,7 +26,9 @@ from typing import Optional import attr import tensorflow as tf +from tensorflow_privacy.privacy.dp_query import distributed_discrete_gaussian_query from tensorflow_privacy.privacy.dp_query import dp_query +from tensorflow_privacy.privacy.dp_query import gaussian_query from tensorflow_privacy.privacy.dp_query import tree_aggregation @@ -442,6 +441,171 @@ def _build_tree_from_leaf(leaf_nodes: tf.Tensor, arity: int) -> tf.RaggedTensor: return tree +class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): + """Implements dp_query for accurate range queries using tree aggregation. + + Implements a variant of the tree aggregation protocol from. "Is interaction + necessary for distributed private learning?. Adam Smith, Abhradeep Thakurta, + Jalaj Upadhyay." Builds a tree on top of the input record and adds noise to + the tree for differential privacy. Any range query can be decomposed into the + sum of O(log(n)) nodes in the tree compared to O(n) when using a histogram. + Improves efficiency and reduces noise scale. + """ + + @attr.s(frozen=True) + class GlobalState(object): + """Class defining global state for TreeRangeSumQuery. + + Attributes: + arity: The branching factor of the tree (i.e. the number of children each + internal node has). + inner_query_state: The global state of the inner query. + """ + arity = attr.ib() + inner_query_state = attr.ib() + + def __init__(self, + inner_query: dp_query.SumAggregationDPQuery, + arity: int = 2): + """Initializes the `TreeRangeSumQuery`. + + Args: + inner_query: The inner `DPQuery` that adds noise to the tree. + arity: The branching factor of the tree (i.e. the number of children each + internal node has). Defaults to 2. + """ + self._inner_query = inner_query + self._arity = arity + + if self._arity < 1: + raise ValueError(f'Invalid arity={arity} smaller than 2.') + + def initial_global_state(self): + """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" + return TreeRangeSumQuery.GlobalState( + arity=self._arity, + inner_query_state=self._inner_query.initial_global_state()) + + def derive_sample_params(self, global_state): + """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" + return (global_state.arity, + self._inner_query.derive_sample_params( + global_state.inner_query_state)) + + def preprocess_record(self, params, record): + """Implements `tensorflow_privacy.DPQuery.preprocess_record`. + + This method builds the tree, flattens it and applies + `inner_query.preprocess_record` to the flattened tree. + + Args: + params: Hyper-parameters for preprocessing record. + record: A histogram representing the leaf nodes of the tree. + + Returns: + A `tf.Tensor` representing the flattened version of the preprocessed tree. + """ + arity, inner_query_params = params + preprocessed_record = _build_tree_from_leaf(record, arity).flat_values + preprocessed_record = self._inner_query.preprocess_record( + inner_query_params, preprocessed_record) + + # The following codes reshape the output vector so the output shape of can + # be statically inferred. This is useful when used with + # `tff.aggregators.DifferentiallyPrivateFactory` because it needs to know + # the output shape of this function statically and explicitly. + preprocessed_record_shape = [ + (self._arity**(math.ceil(math.log(record.shape[0], self._arity)) + 1) - + 1) // (self._arity - 1) + ] + return tf.reshape(preprocessed_record, preprocessed_record_shape) + + def get_noised_result(self, sample_state, global_state): + """Implements `tensorflow_privacy.DPQuery.get_noised_result`. + + This function re-constructs the `tf.RaggedTensor` from the flattened tree + output by `preprocess_records.` + + Args: + sample_state: A `tf.Tensor` for the flattened tree. + global_state: The global state of the protocol. + + Returns: + A `tf.RaggedTensor` representing the tree. + """ + # The [0] is needed because of how tf.RaggedTensor.from_two_splits works. + # print(tf.RaggedTensor.from_row_splits(values=[3, 1, 4, 1, 5, 9, 2, 6], + # row_splits=[0, 4, 4, 7, 8, 8])) + # + # This part is not written in tensorflow and will be executed on the server + # side instead of the client side if used with + # tff.aggregators.DifferentiallyPrivateFactory for federated learning. + row_splits = [0] + [ + (self._arity**(x + 1) - 1) // (self._arity - 1) for x in range( + math.floor(math.log(sample_state.shape[0], self._arity)) + 1) + ] + tree = tf.RaggedTensor.from_row_splits( + values=sample_state, row_splits=row_splits) + return tree, global_state + + @classmethod + def build_central_gaussian_query(cls, + l2_norm_clip: float, + stddev: float, + arity: int = 2): + """Returns `TreeRangeSumQuery` with central Gaussian noise. + + Args: + l2_norm_clip: Each record should be clipped so that it has L2 norm at most + `l2_norm_clip`. + stddev: Stddev of the central Gaussian noise. + arity: The branching factor of the tree (i.e. the number of children each + internal node has). Defaults to 2. + """ + if l2_norm_clip <= 0: + raise ValueError(f'`l2_norm_clip` must be positive, got {l2_norm_clip}.') + + if stddev < 0: + raise ValueError(f'`stddev` must be non-negative, got {stddev}.') + + if arity < 2: + raise ValueError(f'`arity` must be at least 2, got {arity}.') + + inner_query = gaussian_query.GaussianSumQuery(l2_norm_clip, stddev) + + return cls(arity=arity, inner_query=inner_query) + + @classmethod + def build_distributed_discrete_gaussian_query(cls, + l2_norm_bound: float, + local_stddev: float, + arity: int = 2): + """Returns `TreeRangeSumQuery` with central Gaussian noise. + + Args: + l2_norm_bound: Each record should be clipped so that it has L2 norm at + most `l2_norm_bound`. + local_stddev: Scale/stddev of the local discrete Gaussian noise. + arity: The branching factor of the tree (i.e. the number of children each + internal node has). Defaults to 2. + """ + if l2_norm_bound <= 0: + raise ValueError( + f'`l2_clip_bound` must be positive, got {l2_norm_bound}.') + + if local_stddev < 0: + raise ValueError( + f'`local_stddev` must be non-negative, got {local_stddev}.') + + if arity < 2: + raise ValueError(f'`arity` must be at least 2, got {arity}.') + + inner_query = distributed_discrete_gaussian_query.DistributedDiscreteGaussianSumQuery( + l2_norm_bound, local_stddev) + + return cls(arity=arity, inner_query=inner_query) + + def _get_add_noise(stddev, seed: int = None): """Utility function to decide which `add_noise` to use according to tf version.""" if distutils.version.LooseVersion( diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py index cc3a89a..a958f26 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py @@ -423,6 +423,137 @@ class BuildTreeTest(tf.test.TestCase, parameterized.TestCase): self.assertEqual(tree[layer][idx], expected_value) +class TreeRangeSumQueryTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.product( + inner_query=['central', 'distributed'], + params=[(0., 1., 2), (1., -1., 2), (1., 1., 1)], + ) + def test_raises_error(self, inner_query, params): + clip_norm, stddev, arity = params + with self.assertRaises(ValueError): + if inner_query == 'central': + tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( + clip_norm, stddev, arity) + elif inner_query == 'distributed': + tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + clip_norm, stddev, arity) + + @parameterized.product( + inner_query=['central', 'distributed'], + clip_norm=[0.1, 1.0, 10.0], + stddev=[0.1, 1.0, 10.0]) + def test_initial_global_state_type(self, inner_query, clip_norm, stddev): + + if inner_query == 'central': + query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( + clip_norm, stddev) + elif inner_query == 'distributed': + query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + clip_norm, stddev) + global_state = query.initial_global_state() + self.assertIsInstance(global_state, + tree_aggregation_query.TreeRangeSumQuery.GlobalState) + + @parameterized.product( + inner_query=['central', 'distributed'], + clip_norm=[0.1, 1.0, 10.0], + stddev=[0.1, 1.0, 10.0], + arity=[2, 3, 4]) + def test_derive_sample_params(self, inner_query, clip_norm, stddev, arity): + if inner_query == 'central': + query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( + clip_norm, stddev, arity) + elif inner_query == 'distributed': + query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + clip_norm, stddev, arity) + global_state = query.initial_global_state() + derived_arity, inner_query_state = query.derive_sample_params(global_state) + self.assertAllClose(derived_arity, arity) + if inner_query == 'central': + self.assertAllClose(inner_query_state, clip_norm) + elif inner_query == 'distributed': + self.assertAllClose(inner_query_state.l2_norm_bound, clip_norm) + self.assertAllClose(inner_query_state.local_stddev, stddev) + + @parameterized.product( + (dict(arity=2, expected_tree=[1, 1, 0, 1, 0, 0, 0]), + dict(arity=3, expected_tree=[1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])), + inner_query=['central', 'distributed'], + ) + def test_preprocess_record(self, inner_query, arity, expected_tree): + if inner_query == 'central': + query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( + 10., 0., arity) + record = tf.constant([1, 0, 0, 0], dtype=tf.float32) + expected_tree = tf.cast(expected_tree, tf.float32) + elif inner_query == 'distributed': + query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + 10., 0., arity) + record = tf.constant([1, 0, 0, 0], dtype=tf.int32) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + self.assertAllClose(preprocessed_record, expected_tree) + + @parameterized.named_parameters( + ('stddev_1', 1, tf.constant([1, 0], dtype=tf.int32), [1, 1, 0]), + ('stddev_0_1', 4, tf.constant([1, 0], dtype=tf.int32), [1, 1, 0]), + ) + def test_distributed_preprocess_record_with_noise(self, local_stddev, record, + expected_tree): + query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + 10., local_stddev) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + + preprocessed_record = query.preprocess_record(params, record) + + self.assertAllClose( + preprocessed_record, expected_tree, atol=10 * local_stddev) + + @parameterized.product( + (dict( + arity=2, + expected_tree=tf.ragged.constant([[1], [1, 0], [1, 0, 0, 0]])), + dict( + arity=3, + expected_tree=tf.ragged.constant([[1], [1, 0, 0], + [1, 0, 0, 0, 0, 0, 0, 0, 0]]))), + inner_query=['central', 'distributed'], + ) + def test_get_noised_result(self, inner_query, arity, expected_tree): + if inner_query == 'central': + query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( + 10., 0., arity) + record = tf.constant([1, 0, 0, 0], dtype=tf.float32) + expected_tree = tf.cast(expected_tree, tf.float32) + elif inner_query == 'distributed': + query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + 10., 0., arity) + record = tf.constant([1, 0, 0, 0], dtype=tf.int32) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + sample_state, global_state = query.get_noised_result( + preprocessed_record, global_state) + + self.assertAllClose(sample_state, expected_tree) + + @parameterized.product(stddev=[0.1, 1.0, 10.0]) + def test_central_get_noised_result_with_noise(self, stddev): + query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( + 10., stddev) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, tf.constant([1., 0.])) + sample_state, global_state = query.get_noised_result( + preprocessed_record, global_state) + + self.assertAllClose( + sample_state, tf.ragged.constant([[1.], [1., 0.]]), atol=10 * stddev) + + class CentralTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): def test_initial_global_state_type(self): From 11900acf9ba4c70d876e480e787671d93f9952fe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 1 Aug 2021 23:13:00 -0700 Subject: [PATCH 11/71] Fixed the previous bug that `get_noised_result` does not map inner_query's `get_noised_result` to the input record and updates `global_state`. PiperOrigin-RevId: 388153296 --- .../privacy/dp_query/tree_aggregation_query.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 990391b..59ea0dc 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -540,13 +540,19 @@ class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): # This part is not written in tensorflow and will be executed on the server # side instead of the client side if used with # tff.aggregators.DifferentiallyPrivateFactory for federated learning. + sample_state, inner_query_state = self._inner_query.get_noised_result( + sample_state, global_state.inner_query_state) + new_global_state = TreeRangeSumQuery.GlobalState( + arity=global_state.arity, + inner_query_state=inner_query_state) + row_splits = [0] + [ (self._arity**(x + 1) - 1) // (self._arity - 1) for x in range( math.floor(math.log(sample_state.shape[0], self._arity)) + 1) ] tree = tf.RaggedTensor.from_row_splits( values=sample_state, row_splits=row_splits) - return tree, global_state + return tree, new_global_state @classmethod def build_central_gaussian_query(cls, From aa3f841893b15e856f93fe33ab2c33ef7a3f4442 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 7 Aug 2021 11:21:13 -0700 Subject: [PATCH 12/71] In `TreeRangeSumQuery.preprocess_record`, move the reshaping operation before applying `inner_query.preprocess_record`. The change is due to the newly checked-in `DistributedDiscreteGaussianSumQuery` whose `preprocess_record` requires explicit shape information during tracing. PiperOrigin-RevId: 389392878 --- .../privacy/dp_query/tree_aggregation_query.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 59ea0dc..082bf01 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -507,9 +507,6 @@ class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): """ arity, inner_query_params = params preprocessed_record = _build_tree_from_leaf(record, arity).flat_values - preprocessed_record = self._inner_query.preprocess_record( - inner_query_params, preprocessed_record) - # The following codes reshape the output vector so the output shape of can # be statically inferred. This is useful when used with # `tff.aggregators.DifferentiallyPrivateFactory` because it needs to know @@ -518,7 +515,12 @@ class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): (self._arity**(math.ceil(math.log(record.shape[0], self._arity)) + 1) - 1) // (self._arity - 1) ] - return tf.reshape(preprocessed_record, preprocessed_record_shape) + preprocessed_record = tf.reshape(preprocessed_record, + preprocessed_record_shape) + preprocessed_record = self._inner_query.preprocess_record( + inner_query_params, preprocessed_record) + + return preprocessed_record def get_noised_result(self, sample_state, global_state): """Implements `tensorflow_privacy.DPQuery.get_noised_result`. @@ -543,8 +545,7 @@ class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): sample_state, inner_query_state = self._inner_query.get_noised_result( sample_state, global_state.inner_query_state) new_global_state = TreeRangeSumQuery.GlobalState( - arity=global_state.arity, - inner_query_state=inner_query_state) + arity=global_state.arity, inner_query_state=inner_query_state) row_splits = [0] + [ (self._arity**(x + 1) - 1) // (self._arity - 1) for x in range( From f3af24b00ebe9a598337a592ff9096299c23b6e6 Mon Sep 17 00:00:00 2001 From: Ken Liu Date: Sun, 8 Aug 2021 03:43:01 -0700 Subject: [PATCH 13/71] Adds central discrete Gaussian DPQuery. PiperOrigin-RevId: 389467360 --- tensorflow_privacy/__init__.py | 1 + .../dp_query/discrete_gaussian_query.py | 89 +++++++++++ .../dp_query/discrete_gaussian_query_test.py | 148 ++++++++++++++++++ .../distributed_discrete_gaussian_query.py | 6 +- ...istributed_discrete_gaussian_query_test.py | 2 +- 5 files changed, 242 insertions(+), 4 deletions(-) create mode 100644 tensorflow_privacy/privacy/dp_query/discrete_gaussian_query.py create mode 100644 tensorflow_privacy/privacy/dp_query/discrete_gaussian_query_test.py diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index f775d80..cfd5344 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -43,6 +43,7 @@ else: # DPQuery classes from tensorflow_privacy.privacy.dp_query.dp_query import DPQuery from tensorflow_privacy.privacy.dp_query.dp_query import SumAggregationDPQuery + from tensorflow_privacy.privacy.dp_query.discrete_gaussian_query import DiscreteGaussianSumQuery from tensorflow_privacy.privacy.dp_query.distributed_discrete_gaussian_query import DistributedDiscreteGaussianSumQuery from tensorflow_privacy.privacy.dp_query.gaussian_query import GaussianSumQuery from tensorflow_privacy.privacy.dp_query.nested_query import NestedQuery diff --git a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query.py b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query.py new file mode 100644 index 0000000..444489b --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query.py @@ -0,0 +1,89 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Implements DPQuery interface for discrete Gaussian mechanism.""" + +import collections + +import tensorflow as tf +from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils +from tensorflow_privacy.privacy.dp_query import dp_query + + +class DiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery): + """Implements DPQuery for discrete Gaussian sum queries. + + For each local record, we check the L2 norm bound and add discrete Gaussian + noise. In particular, this DPQuery does not perform L2 norm clipping and the + norms of the input records are expected to be bounded. + """ + + # pylint: disable=invalid-name + _GlobalState = collections.namedtuple('_GlobalState', + ['l2_norm_bound', 'stddev']) + + # pylint: disable=invalid-name + _SampleParams = collections.namedtuple('_SampleParams', + ['l2_norm_bound', 'stddev']) + + def __init__(self, l2_norm_bound, stddev): + """Initializes the DiscreteGaussianSumQuery. + + Args: + l2_norm_bound: The L2 norm bound to verify for each record. + stddev: The stddev of the discrete Gaussian noise added to the sum. + """ + self._l2_norm_bound = l2_norm_bound + self._stddev = stddev + + def set_ledger(self, ledger): + del ledger # Unused. + raise NotImplementedError('Ledger has not yet been implemented for' + 'DiscreteGaussianSumQuery!') + + def initial_global_state(self): + return self._GlobalState( + tf.cast(self._l2_norm_bound, tf.float32), + tf.cast(self._stddev, tf.float32)) + + def derive_sample_params(self, global_state): + return self._SampleParams(global_state.l2_norm_bound, global_state.stddev) + + def preprocess_record(self, params, record): + """Check record norm and add noise to the record.""" + record_as_list = tf.nest.flatten(record) + record_as_float_list = [tf.cast(x, tf.float32) for x in record_as_list] + tf.nest.map_structure(lambda x: tf.compat.v1.assert_type(x, tf.int32), + record_as_list) + dependencies = [ + tf.compat.v1.assert_less_equal( + tf.linalg.global_norm(record_as_float_list), + params.l2_norm_bound, + message=f'Global L2 norm exceeds {params.l2_norm_bound}.') + ] + with tf.control_dependencies(dependencies): + return tf.nest.map_structure(tf.identity, record) + + def get_noised_result(self, sample_state, global_state): + """Adds discrete Gaussian noise to the aggregate.""" + # Round up the noise as the TF discrete Gaussian sampler only takes + # integer noise stddevs for now. + ceil_stddev = tf.cast(tf.math.ceil(global_state.stddev), tf.int32) + + def add_noise(v): + noised_v = v + discrete_gaussian_utils.sample_discrete_gaussian( + scale=ceil_stddev, shape=tf.shape(v), dtype=v.dtype) + # Ensure shape as TF shape inference may fail due to custom noise sampler. + return tf.ensure_shape(noised_v, v.shape) + + return tf.nest.map_structure(add_noise, sample_state), global_state diff --git a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query_test.py b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query_test.py new file mode 100644 index 0000000..fc14e7c --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query_test.py @@ -0,0 +1,148 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for DiscreteGaussianSumQuery.""" + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf +from tensorflow_privacy.privacy.dp_query import discrete_gaussian_query +from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils +from tensorflow_privacy.privacy.dp_query import test_utils + +dg_sum_query = discrete_gaussian_query.DiscreteGaussianSumQuery + + +def silence_tf_error_messages(func): + """Decorator that temporarily changes the TF logging levels.""" + + def wrapper(*args, **kwargs): + cur_verbosity = tf.compat.v1.logging.get_verbosity() + tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.FATAL) + func(*args, **kwargs) + tf.compat.v1.logging.set_verbosity(cur_verbosity) # Reset verbosity. + + return wrapper + + +class DiscreteGaussianQueryTest(tf.test.TestCase, parameterized.TestCase): + + def test_sum_no_noise(self): + with self.cached_session() as sess: + record1 = tf.constant([2, 0], dtype=tf.int32) + record2 = tf.constant([-1, 1], dtype=tf.int32) + + query = dg_sum_query(l2_norm_bound=10, stddev=0.0) + query_result, _ = test_utils.run_query(query, [record1, record2]) + result = sess.run(query_result) + expected = [1, 1] + self.assertAllEqual(result, expected) + + @parameterized.product(sample_size=[1, 3]) + def test_sum_multiple_shapes(self, sample_size): + with self.cached_session() as sess: + t1 = tf.constant([2, 0], dtype=tf.int32) + t2 = tf.constant([-1, 1, 3], dtype=tf.int32) + t3 = tf.constant([-2], dtype=tf.int32) + record = [t1, t2, t3] + sample = [record] * sample_size + + query = dg_sum_query(l2_norm_bound=10, stddev=0.0) + query_result, _ = test_utils.run_query(query, sample) + expected = [sample_size * t1, sample_size * t2, sample_size * t3] + result, expected = sess.run([query_result, expected]) + # Use `assertAllClose` for nested structures equality (with tolerance=0). + self.assertAllClose(result, expected, atol=0) + + @parameterized.product(sample_size=[1, 3]) + def test_sum_nested_record_structure(self, sample_size): + with self.cached_session() as sess: + t1 = tf.constant([1, 0], dtype=tf.int32) + t2 = tf.constant([1, 1, 1], dtype=tf.int32) + t3 = tf.constant([1], dtype=tf.int32) + t4 = tf.constant([[1, 1], [1, 1]], dtype=tf.int32) + record = [t1, dict(a=t2, b=[t3, (t4, t1)])] + sample = [record] * sample_size + + query = dg_sum_query(l2_norm_bound=10, stddev=0.0) + query_result, _ = test_utils.run_query(query, sample) + result = sess.run(query_result) + + s = sample_size + expected = [t1 * s, dict(a=t2 * s, b=[t3 * s, (t4 * s, t1 * s)])] + # Use `assertAllClose` for nested structures equality (with tolerance=0) + self.assertAllClose(result, expected, atol=0) + + def test_sum_raise_on_float_inputs(self): + with self.cached_session() as sess: + record1 = tf.constant([2, 0], dtype=tf.float32) + record2 = tf.constant([-1, 1], dtype=tf.float32) + query = dg_sum_query(l2_norm_bound=10, stddev=0.0) + + with self.assertRaises(TypeError): + query_result, _ = test_utils.run_query(query, [record1, record2]) + sess.run(query_result) + + @parameterized.product(l2_norm_bound=[0, 3, 10, 14.1]) + @silence_tf_error_messages + def test_sum_raise_on_l2_norm_excess(self, l2_norm_bound): + with self.cached_session() as sess: + record = tf.constant([10, 10], dtype=tf.int32) + query = dg_sum_query(l2_norm_bound=l2_norm_bound, stddev=0.0) + + with self.assertRaises(tf.errors.InvalidArgumentError): + query_result, _ = test_utils.run_query(query, [record]) + sess.run(query_result) + + def test_sum_float_norm_not_rounded(self): + """Test that the float L2 norm bound doesn't get rounded/casted to integers.""" + with self.cached_session() as sess: + # A casted/rounded norm bound would be insufficient. + l2_norm_bound = 14.2 + record = tf.constant([10, 10], dtype=tf.int32) + query = dg_sum_query(l2_norm_bound=l2_norm_bound, stddev=0.0) + query_result, _ = test_utils.run_query(query, [record]) + result = sess.run(query_result) + expected = [10, 10] + self.assertAllEqual(result, expected) + + @parameterized.product(stddev=[10, 100, 1000]) + def test_noisy_sum(self, stddev): + num_trials = 1000 + record_1 = tf.zeros([num_trials], dtype=tf.int32) + record_2 = tf.ones([num_trials], dtype=tf.int32) + sample = [record_1, record_2] + query = dg_sum_query(l2_norm_bound=num_trials, stddev=stddev) + result, _ = test_utils.run_query(query, sample) + + sampled_noise = discrete_gaussian_utils.sample_discrete_gaussian( + scale=tf.cast(stddev, tf.int32), shape=[num_trials], dtype=tf.int32) + + result, sampled_noise = self.evaluate([result, sampled_noise]) + + # The standard error of the stddev should be roughly sigma / sqrt(2N - 2), + # (https://stats.stackexchange.com/questions/156518) so set a rtol to give + # < 0.01% of failure (within ~4 standard errors). + rtol = 4 / np.sqrt(2 * num_trials - 2) + self.assertAllClose(np.std(result), stddev, rtol=rtol) + + # Use standard error of the mean to compare percentiles. + stderr = stddev / np.sqrt(num_trials) + self.assertAllClose( + np.percentile(result, [25, 50, 75]), + np.percentile(sampled_noise, [25, 50, 75]), + atol=4 * stderr) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py index 5b450ee..8dd4dba 100644 --- a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py +++ b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py @@ -41,7 +41,7 @@ class DistributedDiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery): Args: l2_norm_bound: The L2 norm bound to verify for each record. - local_stddev: The scale/stddev of the local discrete Gaussian noise. + local_stddev: The stddev of the local discrete Gaussian noise. """ self._l2_norm_bound = l2_norm_bound self._local_stddev = local_stddev @@ -65,7 +65,7 @@ class DistributedDiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery): Args: record: The record to which we generate and add local noise. - local_stddev: The scale/stddev of the local discrete Gaussian noise. + local_stddev: The stddev of the local discrete Gaussian noise. shares: Number of shares of local noise to generate. Should be 1 for each record. This can be useful when we want to generate multiple noise shares at once. @@ -84,7 +84,7 @@ class DistributedDiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery): scale=ceil_local_stddev, shape=shape, dtype=v.dtype) # Sum across the number of noise shares and add it. noised_v = v + tf.reduce_sum(dgauss_noise, axis=0) - # Ensure shape as TF shape inference may fail due to custom noise sampler. + # Set shape as TF shape inference may fail due to custom noise sampler. noised_v.set_shape(v.shape.as_list()) return noised_v diff --git a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py index b2f6051..1c1a461 100644 --- a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query_test.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for DistributedDiscreteGaussianQuery.""" +"""Tests for DistributedDiscreteGaussianSumQuery.""" from absl.testing import parameterized import numpy as np From b19e0b197a01286fd552f6fffac13339691453e2 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 9 Aug 2021 15:38:17 -0700 Subject: [PATCH 14/71] Implement the membership inference attach using a keras-callback. PiperOrigin-RevId: 389741018 --- g3doc/tutorials/privacy_report.ipynb | 166 ++++++++++++++++----------- 1 file changed, 102 insertions(+), 64 deletions(-) diff --git a/g3doc/tutorials/privacy_report.ipynb b/g3doc/tutorials/privacy_report.ipynb index 1e89828..e6ff428 100644 --- a/g3doc/tutorials/privacy_report.ipynb +++ b/g3doc/tutorials/privacy_report.ipynb @@ -95,7 +95,6 @@ "from sklearn import metrics\n", "\n", "import tensorflow as tf\n", - "tf.compat.v1.disable_v2_behavior()\n", "\n", "import tensorflow_datasets as tfds\n", "\n", @@ -137,14 +136,25 @@ }, "outputs": [], "source": [ - "from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResultsCollection\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyMetric\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec\n", - "from tensorflow_privacy.privacy.membership_inference_attack import privacy_report" + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResultsCollection\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import PrivacyMetric\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import PrivacyReportMetadata\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import privacy_report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VpOdtnbPbPXE" + }, + "outputs": [], + "source": [ + "import tensorflow_privacy" ] }, { @@ -171,13 +181,13 @@ "dataset = 'cifar10'\n", "num_classes = 10\n", "activation = 'relu'\n", - "lr = 0.02\n", - "momentum = 0.9\n", - "batch_size = 250\n", - "epochs_per_report = 5\n", - "num_reports = 10\n", - "# Privacy risks are especially visible with lots of epochs.\n", - "total_epochs = epochs_per_report*num_reports " + "num_conv = 3\n", + "\n", + "batch_size=50\n", + "epochs_per_report = 2\n", + "total_epochs = 50\n", + "\n", + "lr = 0.001" ] }, { @@ -197,7 +207,7 @@ }, "outputs": [], "source": [ - "#@title Load the data\n", + "#@title\n", "print('Loading the dataset.')\n", "train_ds = tfds.as_numpy(\n", " tfds.load(dataset, split=tfds.Split.TRAIN, batch_size=-1))\n", @@ -212,7 +222,9 @@ "y_train = tf.keras.utils.to_categorical(y_train_indices, num_classes)\n", "y_test = tf.keras.utils.to_categorical(y_test_indices, num_classes)\n", "\n", - "input_shape = x_train.shape[1:]" + "input_shape = x_train.shape[1:]\n", + "\n", + "assert x_train.shape[0] % batch_size == 0, \"The tensorflow_privacy optimizer doesn't handle partial batches\"" ] }, { @@ -232,7 +244,7 @@ }, "outputs": [], "source": [ - "#@title Define the models\n", + "#@title\n", "def small_cnn(input_shape: Tuple[int],\n", " num_classes: int,\n", " num_conv: int,\n", @@ -259,7 +271,13 @@ " model.add(tf.keras.layers.Flatten())\n", " model.add(tf.keras.layers.Dense(64, activation=activation))\n", " model.add(tf.keras.layers.Dense(num_classes))\n", - " return model\n" + " \n", + " model.compile(\n", + " loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),\n", + " optimizer=tf.keras.optimizers.Adam(learning_rate=lr),\n", + " metrics=['accuracy'])\n", + "\n", + " return model" ] }, { @@ -268,7 +286,9 @@ "id": "hs0Smn24Dty-" }, "source": [ - "Build two-layer and a three-layer CNN models using that function. Again there's nothing provacy specific about this code. It uses standard models, layers, losses, and optimizers." + "Build two three-layer CNN models using that function.\n", + "\n", + "Configure the first to use a basic SGD optimizer, an the second to use a differentially private optimizer (`tf_privacy.DPKerasAdamOptimizer`), so you can compare the results." ] }, { @@ -279,16 +299,10 @@ }, "outputs": [], "source": [ - "optimizer = tf.keras.optimizers.SGD(lr=lr, momentum=momentum)\n", - "loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)\n", - "\n", - "three_layer_model = small_cnn(\n", - " input_shape, num_classes, num_conv=3, activation=activation)\n", - "three_layer_model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])\n", - "\n", - "two_layer_model = small_cnn(\n", + "model_2layers = small_cnn(\n", " input_shape, num_classes, num_conv=2, activation=activation)\n", - "two_layer_model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])" + "model_3layers = small_cnn(\n", + " input_shape, num_classes, num_conv=3, activation=activation)" ] }, { @@ -318,42 +332,42 @@ " def __init__(self, epochs_per_report, model_name):\n", " self.epochs_per_report = epochs_per_report\n", " self.model_name = model_name\n", - " self.epochs = []\n", - " self.attack_results = [] \n", + " self.attack_results = []\n", + "\n", + " def on_epoch_end(self, epoch, logs=None):\n", + " epoch = epoch+1\n", "\n", - " def on_epoch_end(self, n, logs=None):\n", - " epoch = n + 1\n", " if epoch % self.epochs_per_report != 0:\n", " return\n", - " \n", - " print(f\"\\nRunning privacy report for epoch: {epoch}\")\n", - " self.epochs.append(epoch)\n", "\n", - " logits_train = model.predict(x_train, batch_size=batch_size)\n", - " logits_test = model.predict(x_test, batch_size=batch_size)\n", + " print(f'\\nRunning privacy report for epoch: {epoch}\\n')\n", + "\n", + " logits_train = self.model.predict(x_train, batch_size=batch_size)\n", + " logits_test = self.model.predict(x_test, batch_size=batch_size)\n", "\n", " prob_train = special.softmax(logits_train, axis=1)\n", " prob_test = special.softmax(logits_test, axis=1)\n", "\n", " # Add metadata to generate a privacy report.\n", " privacy_report_metadata = PrivacyReportMetadata(\n", - " accuracy_train=metrics.accuracy_score(y_train_indices,\n", - " np.argmax(prob_train, axis=1)),\n", - " accuracy_test=metrics.accuracy_score(y_test_indices,\n", - " np.argmax(prob_test, axis=1)),\n", + " # Show the validation accuracy on the plot\n", + " # It's what you send to train_accuracy that gets plotted.\n", + " accuracy_train=logs['val_accuracy'], \n", + " accuracy_test=logs['val_accuracy'],\n", " epoch_num=epoch,\n", " model_variant_label=self.model_name)\n", "\n", " attack_results = mia.run_attacks(\n", " AttackInputData(\n", - " labels_train=np.asarray([x[0] for x in y_train_indices]),\n", - " labels_test=np.asarray([x[0] for x in y_test_indices]),\n", + " labels_train=y_train_indices[:, 0],\n", + " labels_test=y_test_indices[:, 0],\n", " probs_train=prob_train,\n", " probs_test=prob_test),\n", " SlicingSpec(entire_dataset=True, by_class=True),\n", " attack_types=(AttackType.THRESHOLD_ATTACK,\n", " AttackType.LOGISTIC_REGRESSION),\n", " privacy_report_metadata=privacy_report_metadata)\n", + "\n", " self.attack_results.append(attack_results)\n" ] }, @@ -365,7 +379,18 @@ "source": [ "### Train the models\n", "\n", - "The next code block trains the two models. The `all_reports` list is used to collect all the results from all the models' training runs. The individual reports are tagged witht the `model_name`, so there's no confusion about which model generated which report. " + "The next code block trains the two models. The `all_reports` list is used to collect all the results from all the models' training runs. The individual reports are tagged witht the `model_name`, so there's no confusion about which model generated which report." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "o3U76c2Y4irD" + }, + "outputs": [], + "source": [ + "all_reports = []" ] }, { @@ -376,19 +401,8 @@ }, "outputs": [], "source": [ - "all_reports = []\n", - "\n", - "models = {\n", - " 'two layer model': two_layer_model,\n", - " 'three layer model': three_layer_model,\n", - "}\n", - "\n", - "for model_name, model in models.items():\n", - " print(f\"\\n\\n\\nFitting {model_name}\\n\")\n", - " callback = PrivacyMetrics(epochs_per_report, \n", - " model_name)\n", - "\n", - " model.fit(\n", + "callback = PrivacyMetrics(epochs_per_report, \"2 Layers\")\n", + "history = model_2layers.fit(\n", " x_train,\n", " y_train,\n", " batch_size=batch_size,\n", @@ -396,8 +410,29 @@ " validation_data=(x_test, y_test),\n", " callbacks=[callback],\n", " shuffle=True)\n", - " \n", - " all_reports.extend(callback.attack_results)\n" + "\n", + "all_reports.extend(callback.attack_results)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "27qLElOR4y_i" + }, + "outputs": [], + "source": [ + "callback = PrivacyMetrics(epochs_per_report, \"3 Layers\")\n", + "history = model_3layers.fit(\n", + " x_train,\n", + " y_train,\n", + " batch_size=batch_size,\n", + " epochs=total_epochs,\n", + " validation_data=(x_test, y_test),\n", + " callbacks=[callback],\n", + " shuffle=True)\n", + "\n", + "all_reports.extend(callback.attack_results)" ] }, { @@ -470,7 +505,10 @@ "source": [ "privacy_metrics = (PrivacyMetric.AUC, PrivacyMetric.ATTACKER_ADVANTAGE)\n", "utility_privacy_plot = privacy_report.plot_privacy_vs_accuracy(\n", - " results, privacy_metrics=privacy_metrics)" + " results, privacy_metrics=privacy_metrics)\n", + "\n", + "for axis in utility_privacy_plot.axes:\n", + " axis.set_xlabel('Validation accuracy')" ] }, { @@ -490,8 +528,7 @@ "id": "7u3BAg87v3qv" }, "source": [ - "This is the end of the colab!\n", - "Feel free to analyze your own results." + "This is the end of the tutorial. Feel free to analyze your own results." ] } ], @@ -500,6 +537,7 @@ "colab": { "collapsed_sections": [], "name": "privacy_report.ipynb", + "provenance": [], "toc_visible": true }, "kernelspec": { From c447a1a3c20f391f123f2896c7a7448a1c7427a1 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Tue, 10 Aug 2021 13:07:56 -0700 Subject: [PATCH 15/71] Bump version number. PiperOrigin-RevId: 389959093 --- setup.py | 2 +- tensorflow_privacy/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index d1979e4..155802d 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ from setuptools import setup setup( name='tensorflow_privacy', - version='0.6.2', + version='0.7.0', url='https://github.com/tensorflow/privacy', license='Apache-2.0', install_requires=[ diff --git a/tensorflow_privacy/version.py b/tensorflow_privacy/version.py index 05ecadf..da4254b 100644 --- a/tensorflow_privacy/version.py +++ b/tensorflow_privacy/version.py @@ -13,4 +13,4 @@ # limitations under the License. """TensorFlow Privacy version.""" -__version__ = '0.6.2' +__version__ = '0.7.0' From 26f3d8368f3330d9c1eb719001a65499e45e5e63 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Aug 2021 13:17:06 -0700 Subject: [PATCH 16/71] Add the getting started page to the external Tensorflow Responsible AI Guide. PiperOrigin-RevId: 389961144 --- g3doc/guide/get_started.md | 90 ++++++++++++++++++++- g3doc/guide/images/getting-started-img.png | Bin 0 -> 112061 bytes 2 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 g3doc/guide/images/getting-started-img.png diff --git a/g3doc/guide/get_started.md b/g3doc/guide/get_started.md index 903cdd6..143a513 100644 --- a/g3doc/guide/get_started.md +++ b/g3doc/guide/get_started.md @@ -1,3 +1,91 @@ # Get Started -## Tips +Using TF Privacy + +This document assumes you are already familiar with differential privacy, and +have determined that you would like to implement TF Privacy to achieve +differential privacy guarantees in your model(s). If you’re not familiar with +differential privacy, please review +[the overview page](https://tensorflow.org/responsible_ai/privacy/guide). After +installing TF Privacy get started by following these steps: + +## 1. Choose a differentially private version of an existing Optimizer + +If you’re currently using a TensorFlow +[optimizer](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers), you +will most likely want to select an Optimizer with the name `DPKeras*Optimizer`, +such as [`DPKerasAdamOptimizer`] in [`TF Privacy`]. + +Optionally, you may try vectorized optimizers like +[`tf_privacy.VectorizedDPKerasAdamOptimizer`]. for a possible speed improvement +(in terms of global steps per second). The use of vectorized optimizers has been +found to provide inconsistent speedups in experiments, but is not yet well +understood. As before, you will most likely want to use an optimizer analogous +to the one you're using now. These vectorized optimizers use Tensorflow's +`vectorized_map` operator, which may not work with some other Tensorflow +operators. If this is the case for you, please +[open an issue on the TF Privacy GitHub repository](https://github.com/tensorflow/privacy/issues). + +## 2. Compute loss for your input minibatch + +When computing the loss for your input minibatch, make sure it is a vector with +one entry per example, instead of aggregating it into a scalar. This is +necessary since DP-SGD must be able to compute the loss for individual +microbatches. + +## 3. Train your model + +Train your model using the DP Optimizer (step 1) and vectorized loss (step 2). +There are two options for doing this: + +- Pass the optimizer and loss as arguments to `Model.compile` before calling + `Model.fit`. +- When writing a custom training loop, use `Optimizer.minimize()` on the + vectorized loss. + +Once this is done, it’s recommended that you tune your hyperparameters. For a +complete walkthrough see the +[classification privacy tutorial](../tutorials/classification_privacy.ipynb) + +## 4. Tune the DP-SGD hyperparameters + +All `tf_privacy` optimizers take three additional hyperparameters: + +* `l2_norm_clip` or $C$ - Clipping norm (the maximum Euclidean (L2) norm of + each individual gradient computed per minibatch). +* `noise_multiplier` or $σ$ - Ratio of the standard deviation to the clipping + norm. +* `num_microbatches` or $B$ - Number of microbatches into which each minibatch + is split. + +Generally, the lower the effective standard deviation $σC / B$, the better the +performance of the trained model on its evaluation metrics. + +The three new DP-SGD hyperparameters have the following effects and tradeoffs: + +1. The number of microbatches $B$: Generally, increasing this will improve + utility because it lowers the standard deviation of the noise. However, it + will slow down training in terms of time. +2. The clipping norm $C$: Since the standard deviation of the noise scales with + C, it is probably best to set $C$ to be some quantile (e.g. median, 75th + percentile, 90th percentile) of the gradient norms. Having too large a value + of $C$ adds unnecessarily large amounts of noise. +3. The noise multiplier $σ$: Of the three hyperparameters, the amount of + privacy depends only on the noise multiplier. The larger the noise + multiplier, the more privacy is obtained; however, this also comes with a + loss of utility. + +These tradeoffs between utility, privacy, and speed in terms of steps/second are +summarized here: + +![tradeoffs](./images/getting-started-img.png) + +Follow these suggestions to find the optimal hyperparameters: + +* Set $C$ to a quantile as recommended above. A value of 1.00 often works + well. +* Set $B$ = 1, for maximum training speed. +* Experiment to find the largest value of σ that still gives acceptable + utility. Generally, values of 0.01 or lower have been observed to work well. +* Once a suitable value of $σ$ is found, scale both $B$ and $σ$ by a constant + to achieve a reasonable level of privacy. diff --git a/g3doc/guide/images/getting-started-img.png b/g3doc/guide/images/getting-started-img.png new file mode 100644 index 0000000000000000000000000000000000000000..716bdd7886a22ff02c4dca3a4941b54ef8510e9c GIT binary patch literal 112061 zcmcG#bzED|(=eK#E!t8F6fYEacSwsnh2q7FOK=MkpvASg(*mWqyE_yTticITTtji^ z^8G#f-h1EoKL6bF`D}L2?8uopXLo02HxU|Y^7uI9H~;_uUr|9u69B-N2LOQHPp}># z!PZGn002A-TWM(xMQLeT4Og(0t%D^1pb(L)i>;@nOCjm?>1!3*^Z4iIM3yf=a!=TT zp(<}bydzQ|<6*>jDWq9qxEqmBJDLn;wW64eam>dN-h`5RMri zy4g8ZY7cl1OI`ZVaNm4yGOWR@`2+yseGMmJ#rX295e=G(^XZrl5WX4@7Ry)o(PJJS zv(60)-}>oXfmNxo>VS`|NKsTIO zQUw|%eV2vz_h(O!Eo&RVoQat$#GABle}Bwj^Bxv(Uga+-~U%zHVEy#=>rX-ai(YwldT5(OYQLgQbcHIc>y#^^j>9uPmqUU z;&7JHn7-TRTn`%rIS{xM)X{S4jEun(WR7V=TQRd)SpG2BdyqyEKUXx!q}Z4uS_^ZI z%^I256+GIo>twz1v<$$Jw|@xL>!YZ7sFBIa!s2-eh&EtXj0w>rHK&&i(2SPDs%3fZ zwifzn{g;gPt>?2^*~y>WG5+j)^3>@+lT%-^h939->L)VJBGDy{NX!>F>-NOG+`Uw# zbs|<2VUhNfiHxY0Zhj)W`aEN?njnE+Hj~R!SBE&|C*mOSbSvQkwjkwT>j|$+`2r?@IEqshTB9-Sp_AEb>PNd2P(>gwYY8z^&>F+ zXtA3?7>jjTuVeVnB8epvK~FEy>7Hxg?R_cMmCYkNmcnxrP#22NH|Ef>Uk{OgMjA|} z0SIJ67)+Xhi&tK+E8MRd0W3J6()H)+)Yz`(^^_a&Mk&bnH$1^)YSD~xMA|d!1>ee-XmHY zDuaRardQ6r?cZn!sL>>DK%BZ1jjUeNox^UzA!BwG$@xuM8o!<>G57B%s~V9S<(jdT zLq7qb1$ zmk{4a`NLo$;lZOaag}GqB?Ue0$o9dzc29ib2rH6e0y3`*hm?vlQf6UOEYE?ps6lv-M0*+#GI0tC~(`5eprOsfy4 zTK?m6_0KUF^v@O8zlImk>nm{5$tDd^etNDMQT9%d)-XYI9ee$mE2T(?JDpGhM1k;_ z^q8PugWT@3K_0<2SyGJaI2r$|?C8z!QximWct)W*`Ssh>bU~Ttw(ZgRGrbwy*Ed37^2--D@spjkDk1E+Wh&|XV-Tx&9GN#SKo-ME`8ROp-WJ7 ze{{E%64bj(Ir+$R8c!6BE@Fe`nJ3=qL2^w90GMx9Z>$*^T_t8MraZBLytBBQ|2T z<#m~Lr0$|iLDWiPd88Am6MNfC+g>|h_g5|X9zHGamabX^T4GvK-E()|pYERt`)Zvk zotB&uY#WX5A&R!ew%yLIu1`+wPYHKSW>7+=WZq)eUF$r!X(Q=3=`(34IlbtW=w~r{yx-0G3ZGZ1%4K}c2v=cMInVeg;A8!ZH-;M~ zWMn)4$#)P>$c-n{RIt~fc22PtnIK1xE{`Lp6TK*Ptn2oBv0r?`ui;AYsxT6X^$8sJ7v&@3^~v*62KoU7A-STdrG_ zJ_P_H|I6V176E{q4yK)%@I;YKlZ2tBo`*3(1z7CekLb zSLcS=HV-`icYgmGmpZ9>kaf(~ORg5(f*HR`tI9281(FU0LkjtkNv`>KoZFsUt?X^F zT{s>c{H;A1IbPWI%69BiGv%_(J!iTMyBfQY+J6x263SyX4eZJdatgW%YDLeD_Ml`V zZ}r~~q0OWX7@y9oFx7F^Icn@SdOiL6Hhw<-=3B$Jo^h&gaU$?6aX2-+mivO+XSTa) zW)X1s(aBzzg!-lNbL4X~vIL^)*MFP}R6jX=^8aL#r2c#V=d{f`ca_s$+k4`;IZ)lf zJ&G+Mr^BXmG6NOO(a^M;YR>B8*_6NAmo96%ah+7wG)yt}k=TFRE)jpdh$vtDV7lcG z5`Jy=4)$3-f;z&DNS%S_>kq~QHQ!{)ls54zok*=n-a-y-E&;xJ&NB}K@A;CtiUw??AtoM}lNNY;lcGUT2Y}BPsiCR|Fxa!ZGLU-(o zE`}}E#fZbrYrZ!RJuya(v1KQju%KRIlgxp>^lq%OBU7 z-IgLFqA53=%Wc6;?^^!M|B6p;)p=FMs()sBc8Q3w*RyAxZu=IkQl1Qhk4i^*6Lsz)S!llmY%&^Y@H#^@nn%w2+WP^{i&|#!}-~FxVkxeqL`+ z!>~T4soZ-Z|Dbq5-GA`D!YOPn_7@e6r;?ZFfzl$vcuQBeal!B6*tgDDf4P>)ZLA;F z+X;4Zwx4ZDZEkd>nJrqpxFOV+`nW(-+j#)E0rke6OF4-T3F}RS^TX}-?TLgu1d4>b zsL8#4mNQgstf=P)&il+>JHwu@(T~Jw#-&hEiR-wQb!tz~L&PsGLmG-58`V_AT?+l* zK?aA@5{m+3Z?F1T;Kde2A!;;|FPt~W_0Q*ixOHrikKJTRT}F*@VT<{Rk#k)d*>r3N zRu{TCY)vS?PK`Ao=)667iA;9WU90dkR&MrO^W8kFpy;7ccOqHzySM*aiQY)E$hcV1 zlpk2MTXD5Cznm4gu&}Y>oeRTTm&+5%#jr^&+c3e62jKp6**CwW# ztv{&508{`gM0DKnKCGLDB3DG>=I_|J)YygU{$2oQpycJuB&yLf8x#`^Tzzglw z(x91_V3p;4-;n^wuF-t!AUYp15vd#Kd_R87v$=NA-P8T{l?hze1e05MJc|-=KmQa! zg`^@QUA;5=`eRG$#rGX8#FJa5vK?H+IxYx3>%)wP6C2=W#aS=UNfEkRl0%83!Bx5X zjX^dxwP=6@-&6dvP)GXAUOc@u?2iA zl0b_8Y|8pyuvl8XNW71glywsg>wv2}d3 z*`o|e9$tQ4@qZEgKf3-)yIHzQgB>3!-6j9a>iBg#VJM|6elsgarS) z$p6;!@08+P|ET`oO!04a{ulJo(2_XfT>tsfByr+speXb9gEw%03A-z0!$b0D5R)#FPP+TccUm0IMF^_hLgl+6 zUq+qk)a`~=%)|8(7opD;84Wo`@)Oy-8CS?l0MQ5izXAg2pR&&Y96J+k zGq{BQKXCuy#!O@bZLoU%|HbwnR6U%3a0_YsHi`ZxPePtou8;mdK}-q8AkTATc~adh z^S@E*e^DFD#(w&Lr~UteSV$X$Wn+l*&Vh5{|6s8HBmvH62>hQc`yYs=EDCwR`IO*i znackYtNmkM{Qn8!|NrLgL*-&1(@uQP=F40X8 zUWWAtt*8&+_A9HiYx~-gS6u2N%eAGANZ)*efG;mgv~`TycjxR6b`QKBUD6>5>$YRf zv+I3*L?#$ktYc7ZUw_QHR7!1sUf$xa-+bWPnH|`10vFTX2pr)^eYj|#xjn+W7h2mW zMwGp({NT3PmhuqLY7o%8QA#aw^I=Kn;C*?W=8UZ5vKYA2sQvzW-%f4wPyHMppZXI0 zns(_+-Ca$N&RZOsTU?GcFMgU@YZ&v%T;bj??bKx5;ub449X+%DWtQQ#yK_TJuwk5b z!w_p}xpDbqyL=JqXxb1gH#1sc)qd@&Uu&d$sN*qwW7rF`d2?iz0umuShIW?Mu7sq- zT=?IVH@XIlmRN~FT#Y;b4(y36EHm@w)waM9CQCIJhStB1X-+NRUXKg1+&;eb5(RiP z9py#!&@1k1)gYI;{q3dB#~q11Gv91ZcQvk5w|e^-C)vu@0KNbY;0Yg-8=wC!uIzdH zv@w_cK5RE;=&(L3YJ{uPyx#RfyEK`XiOouqM5t_Rj?X0GZZ^YyOZ|e{{zLV2By9SA z>C1tU=WqL3E1gO!xX$imy?}Lf%x>)^Vn^$jC~Iew{{lxRUt4dP?vKGA{(Nrj9})ib zIoheC7ib{|o6e^)sKh&AzVnmPsUjV=vO3MVsM3m#&B}w8#pT+srFUJ_7o!8mBjw`; zH*A$3J@z_$b2{C8Qn@(M1NGP{VL1~9x_{$Z%4-bj?c#E9%Ze>Q#4t8NB5&P)<_0f@ zip}1UNDSkgbHB6ES@}|1E;ZJ?nN}aL9G0}}_H?W6o#5;zr0y&3uFqS!6owM#D^!T2 zt=w{}Hc^StEmlNmro;P#s=-x+Ay05ohj6v8->K2nV$&kTxBfb#z4U(1QPZ)0twVbZ zdADVRYCIzxI57kg>x8f3Y1}$SH%drTW)4y}wUb}0{~X$HMhoYSZXoNh7BC$r(T)@F zC1o-*8@sSt7q>j-L!7)A)J@FsDcSA3A3(HF;Fahc%-u)>tvL7715E-4pnR!gF@a^i z>-P&K0yh;}X9t~c5#aV1T)=hfDKh$;Np4ABb=Vg}j*zlOwf?^ATXsiJnXnvJJ9jnt z$+o|KEt`)2w6PRQstP)g9ngIBIAbk;#n?Amsc{}T?zG6eguG0@*vRV?wtXkCIy|w_ zmUBOOg87D&e=jFbf39GPH#w!Q&9${0MS=V#AN<+= z{E$>aoVGhy09i{NZ~_xS)NSRKQV)wPG4O6rRTj=Xj?|ey)Mn{7OOFhmiG6bLSf z#%_n6v#pj=*zOs^tHYynGr6wT0o2IJvrbQ;d@%*d$wbOKl@1!!PXFdwj9msdw*bO z?oc=USNT$Q4_v1&AneLvx^sh(Cg=)&b9D313UK3}*1t)W(t^P_(A4};VP8^&-XIdb z3^NrsEui$wUgrV-#jwMxB&yD-+lw1|AXZj8&u#@pNh#O$djVSb&OoN4p<@Tk31Qc1K`=L} zYf|FWbXtdYA32X|8LLJd84(_SaOP6??}0)#D^ zt4gZ#Au`nd)5yH6C5orFr@O5(`_mU|Modg;r@z-OnBraR1$O3&NFY%g=ulJd%9>qr zOlQNuURcQ)7D4eej%oFq{l)XBMA&C0nv#m^gNFARueA1P($qYDB0zyktBOm{KLvibpPM zuX{)I+kBw{Ymsgpx)Rqb2OD0QmYj{V0jT4X;WPym#eRAv3AgjR{h2LB z@%PXneIVAd&)z{#j|I4qtazzZ(!SiVoRr&kCBFOwb%vJ4wXniHz5KHk{F5589lM)e zL|xv1w0*G4?Uj0ex`b*)nk;^`513KN<9o^vGPEYL3+Pl)S}B%Cl{+=vrULs9nXv?V zgTD2IA3AVbk(Dq79@-;3A(T3jhez^TQV(0pC2~klSpom~D8w!gzsxAqHPfNNsDA4$ zO?gH8r8Po;^nrh%#}HBS4H?CJv0txKe+cbHaJtOjoG#?mE_8xB2M5+ZMzkP(mqZZq z_ba7{SP0dbp;^5IdDv=-IJ(tJov@Vs`3`Bm;~80uy$*!R&$WI|M~qbdt$To(^zY;e z?&AiwQZaDL&v_q|1rDgOn^8LIeQOnpUkVcVMuUvbWyq=X{C0Qr;l`le_IrV$Q=?b? z-C>xvJ;tYdBXYV8>-!^n#P*Fmamd3^V#kLAuir(Y{+Rn@4js6GXEcak69R@#o$B># zIr5`!A4JcVf)vUy>klApE;u<&38^=8@Ka<(1kAH5HY0OOP9cS-6DJK9J=D0DXfhDb%^abVz~4lFLEVAs0fJvNd4@qwaT!aMmYh8OKOB?E45~M>qI=JWC!~Ll;gO|XeJr37aGz~ z5-WbCjF?JilA>{e%^}FEffpjDYlytrh-^#~fTeEsMelCK^HfHSTj%=a)!ee!o8Klc zbu6j8{nk4SOT!M^S@LFazpymwR=}(TP@vORa7&ZuTG)ezW0Lip`GQ30L2Up8)0;Qo z_JAwd9(~1ges9R`0$oZofS|4AQujey)qizv{|lmoUh=r#t`=hiueg_&->GGH6s_){ zY3qIekAqjOj<&JWlG$2DK8J)QY+RRS^%vbVxS;01`nR*Tj~>#Cc)yPx9)Lc1iy7r0 zBbMUwr5doVaWiDX2)uD;4fk%I-3417SQHQ}Z*-O^q^uPbWJu9}$FO219#-rseyO@OclQWg%4@MBgZBI@6zCM!w& zu;;v01O^D=XNiO!A21_AM?c!zm znYerMMMOt;ByjZ3-p&{} zV$ZkXHaK?{G*l$CR8e79nOoV(;*?0-u9O!Z7Jtx#!d5sunRlX;iJaYyRDNvbQt7V$ zFrE?JBV=Xq>9hwX#qt`=KlqOztVxgTl4pHG&_@qRj&NQa-J#7_be@tTRCVu5Fx-R) z*&LVYrSzeKK(jrBFYxE-VXKo!ln`Z`Y;p5uJ1J(J*|3E7#aZER95I!upUWTR&ikqc zbNoJi68N)Qi!%gSnaEXC`~oX2#IIA_(FtB{Q4TaFa~#Y*PBlxSNxGYB^kto+BOGvg z$&4lEMBK0hjcH_NAfMfH=x+-+`!%9fGCURwYyFrSsLtjwzby=<2)(N@FsXWs*Ul@z z6^&*83f+>^oscI+ig{EvZr(I^)eosdr?H3<`!fCA3HE#Qa=JZlqt;R)F*GwVhwR^op^T;#Y2jG#*DY$-sK<)yk*l%x+ zuP>#&baJnb1GOPk*VW57`3DX5W2Nw${!U^h5XNPbR)^*+n({7yAGCm7WtwtC5)JWq z_@SQpQB@89o3>_?Pkq6!)9{n))@i(dVom4C%(y$a5~#fL(e2cZX*%EUi50!24`kCV_tZ-JQcGsb&Pmf>nc z0e*Jg4Ot{t9{$OG^cp8d6x0_dXhQr*+R@M=gD8jQ@eN%!Sd;6jO#8_T; zZ!0|&F-eC%K5bJbhlWx`!yi9Ku21{zU3^4404-#>?}`4MGEwFCZ3H;BZ&3|pM?ha2 z(ID{<#fiD)0atgGI%5)dGWBN!H#!Rko;CvPSjpWWZ>vQMgh%Qvd)N&J0vYBfE_W4x z=pySCSnBjUYP=i_dur1v@O;HB;2LH=q}aN#f2LMc!9ayS-ca!MdPR~+TX8q}w6;aV zwLMU>spt}R`1>)DlYW~44!mTjVh1{cu#0O57kwN(4#)26iGL1eN$y`tm0*4GDJVU{ zVf*PX5crGT=-jtM=%9a?q5u5XNnm`>zC_8}r&Uy5zf9gR*G*uPgY2>Ag-5X|udo?p zI|G$8*|~dmC+-$~q|U2vaB+7gFs2KhIc3(VzwKfmyoaeu#~71-`;Bn9dO0h5GGZTO zttS^iJN0(!BueceZnpv8m}YGFV{=Y4owVkmE2yE_b*8MVwjRE!GZlQzZcIELntphT z#cv%PwPb*bBE+#lKaFWS`-K*(sxDoduYT{UXmYlnntiqejgn%nM{At*tSs2Nw*>}x zARK8d$FTYR1f$_%PEA!cUX79MlH$cv3#b5_Sr_~IK)>`WFZ4PNKCPv;ZOxR&xs}5* zNHF#0F22hTWJuRVb2Yl09DG&Uj++8wv6WFtLmBUx zQHn4>-eSg%BmDoqdNZ-H1L;otUDaoX<+SrU(z^ci+J?U)d1 z&u8irNosVWea`;ap%*VQ2bvPH%*Rq!(jL}v9~ehfGgX-i1@wSx_J+NIlbWb6udaLQbMbLcMIDtM%_*NDK+@y z)`^w#z*1Dd1UXTCD-Z?&0bw6aR=H&_+K8aa2?>j1MwBI;pLh)M4FHTb^v-+rnL*C5J;5G$Hgg@~;7& zP<9*tbXtlCUs$xh?Gp)2T{VNMt8*cyg1TJwRV0s@w1Saw;EGit)m$X z)8Z@!Wx}n$ryg@fEnV}u$sXsENn3nzamhYii`;&?W_4v$Q6ewG$9P9+({%0if|dUni`$9a>uF*_4{e3PU7z6Izpz9y#UdR|moJKKQl z(|xe-_(hRygZukN?i7!uM71Ku=kkeJ2QCl!Q`?B!@8T4o(2D(R0i|3oLN_fhN$I;0 zmWkrc3`uH!hZ{|yA+agaLuITbORii5%1p}CS1Ugjh#iTkNc)LfEmQO>c&BGLWNQc6K*tA(vK?R=JhrdYTGtapx@ zbY=ynVajF~9clHS%F)|fHmSBpTr0LDkma>fG~*;b)ZWB-j$6EgXD6PwhHLc49lR@_ zlk|ZcpkCuJKXzY~Il*IQ^VCgb3Ohx_ z|9CZF!!N;A?2Ssr7wlgK{RbWi#Ni*_^=2TQ6FEjGl~fUV!4l@EpOuy|>5#qQtNpC3 z#x2)aL&Af6==UplL75|DIKT~mZzjh`;Km(RcHrUm;}m|q$94UFW4ShgCiALn;Qy<3^h2uImU ze5E2Sb`dCI%~o`y!-6I-Z`(&@yklZ+A8<`IE4*4fQRt)4Ia_YOxrJ*X0S~`#Jj_w9 zI1#m*g3``^^2Y&OQ-4ad|H7BA9EnP{4e_z%2qzqMwNJ5sIB+*;EI$tpg2X(SBk zn_DAw7%3#uS?*U#KY-e0BmGP4n2oOp!87Sr-4>Jbm-)F<%pNw1Eo{6> z0J5$;9-cbHXd0!3@q1%lJ)VoL@d`21DLYfVOb#!F>^Na_fJ|{clqqyv-ZJ39xZshclX7OdAi>61R;l)DP)Z2*9!*c&#tx*5IKj%#Q~636Jm- zNqO`f19y%@MfV3ow75RmMgKL#X-bj_GrvT&EW046u>a9cP3(Lj2v$(qI7D)Wqc z1R*$Lq=)k)x0sgM;oDuRjVuJB8q%o^!$4cIB%ih#0znlosaWo?XUxV$t=!Qfm(yHF z>)i=L#PKsXK)RPQ#$s3j55J%N(OB}j(e9s{mtvvM`qT2P&c|p`Zysbg1_;bYwKIsW zUmthim`7ExiJKG^&Wu?Q91_M?&P7)A-d{A9F)nyPi<*Oj;)1-;so!Wn&^Y%ekP}ZgoodUyI0%Tj zi?J|@F#K9>7g4F^bDAL{RG=5Mz4qzSU5jt!14SWbwYz^u5Bi`Ml%cRjWV>kqXlhumnjlpq@aiO_+Gps2v_T!UjK2m z%qZRXcv#1tHk6?0whfhR@R3p@=xH}F)<3S`p;+2|HX`sN)Z5okcaX_VN))YX>yBM* z2K_@NRajIkE9NS_g{}L*X0J%VZ-e&hjl<+rprVP`MhHq$h zn_sE11zmST@fMort+vo7yS=;_6P`9{7N%qsF?`{Z<@~w*6y=dGfQ)-l&(fd8W z#=AH!n6V&%;gX0svhet+jv`goSMy_ls_(M?ot54@BlLF*C!2n=M-uso4J@?g?P_9> z&nLafo1OW%8u z5ge~=uGY5`Hahm%-m=6n&x%y6`GOi^%m;;Q4NUt*Px>rlMx$YvdEQ4Ad&;2aM_?r8 zkRMIgoRsOfpcy~#cbNw)jZ=M>;gw>7IGrl^nsBzjF96wp5=UanSKhLc`d}>TMAFK7 zKCCmW!_80dtPA5)&4%Rt75GD@75;cS;J&cmMr`wUr3?zj8AQ-k+=UJaCNO4LVQ4o8 zJEYfs!S6MexF~w~h~@*=@__F;6-A-V=R6(qxN2Tz1102*ydAtVdy1oX1IJ_eea>L& zaVBsr)7hUAepe++PBXzEWc0W6>sQs>($`H@fFM)K@f4#0V+m%wE}+PX?il5SNZ0U5 zk9hqw7Pvzi{;QyFto(&FzEJ|>^I+f8yXQ1_Q)ts(J1KJ3dc{5!*Ja=9mVLxnR`f~N zUd(a;o3i@P+gwSeH)2xncwJk{p>}9Q{x|r{K{nlbH3iPTYYX(CKu9b_xekMPEql zC;fQ!P9g!e9D_mfuLN4ObA|~=E0LZ7h{Ad;T^LAbMCZBtA-o}C{>%iNNMuQ8sYX!b z;Xt75@FeuLPPhUQNV<<~8>>r0VsWof_%)Xrxk)r#C{LGJ zdyHVNbsL)ZVboe)9$#-*?s@zm0zugENTptuVl1Zgq_}I7@@dkG z=OON;2N&5hwo$-Ov5%=dS&uCkDd~^2Z90fTDwffXd!aY-a_$oeb6&tU&WEHxc}y z9@m~^0iY+s02MTS7XGJQxNq7DR&Uib;TE2<5}pROsL4u zBiy-Eq8VN(Q`qz>VKUBho!>RE47Hto4EvdFIcSP)ZjwnJw>zHZMQ{Zi^kY18k$YKX zLMnQL8`5=s?0?e_$Wwjd4SW8_27jz&-e=h&lFvkprsWq9bDO)2kX@bNjNHD$Xmh4D z>a*dD|Fl8pJUwRnPW2(sNw}Tsb1}qBvw+(iABf%SJG}}ce+>pr4vh)`p1SEuu&E5? z=*K~Ip?Cm-+N6B=(}o;R1#9tRSFSx%rRm)m-P;=I0#}$LP@z6Ygt{wdrWUKJd|MR$ zLf#kk`#_)L#OZ-dH|wsm81t$;B>*Q;Gbwk!iuHgEucBA&HnFFsYaBT4YARAe7VM5m zYbLjTJ^5Qv7Ta6W-foZToQ&jbZ6KXwtBc1j;WFHmAJ=Hq`LtU@!4jxAUXg{4Zxy-y_`<-> zP_sXv#p4Cw2o9Bxz3I0OD8kUIBFQ#771STkt-!3}uLM!-{mS@mvJ)7~*{lxXY2kw8 zQcuFjs)U4rF$}NZs68$*juT%Gpa!A%?$pa8Bc4D}S4AvndToH^uCvScFj+aV{Qz%$ zaF+f_fEozj2(;5H;)881;WOQ`lm6F?J=AFbcGeP=UwYsv^+wdTtIcD6&aI6!50=ht zHriWQbt_E|X1zvI9<(i2BcjGsnOAlYr1w8ZBN(walLB_Wb{UT$?UM!m$(yC--my}# z@4R7wv?wk}M;^3tYOEZ^-P)SHVu?j%@4da>|57^1qN#{)ntt;BV4k$++3z!*75v`c zegFiZe}tp+Bao$hY`FRrfFM}8rBhTFPb!}ecCbXx$>z4Ub^eYp&wsN^3@91YN4D2> znLTN{blrB)a^j|gGy-JMt6kSZX%~pQ1_gFk-2fIUth5hm5NiGNC!R?_;mN1x&1JX{ z^{04v>=091G{-L!DPH}8jYzY4;Z(@eT`M>OIX}E!w2vje~smD+sYOpRjK&RLdT$fxqamH_MeDmD7J|iG2Xs zX3_3!F6(riY7UI1_Hpx&c5k^bHK9!8cW8;YAD}~r<|NdIW;fmhF!ebh+p=DD1dPE_ zbsCMmoIu~Pf4#!2o97l}quSE{{kflv5fV~SmlL$)J{ala)KvhbhsaB8+#I$sUR!ED zEig4~A<%F_P`5|6%UraRQ6#hrej0H{#js<#8O*4cIBtGghozFz-p~DwOTKfwt5%)u z1+CTGyLwJugwSUiX)ZKdPk_t)|A==HldfRIo9WgX%?7#)>v%tP9Sx`-ia30g15k+@ znol=1?ZHJ`_Po2=U@r}MSFx6rxCDxu=sb<*JIu3?TmU%A`od@)*Bh>XHf-AbGL|{> ziEOavWo$0Qj|w!~>Nge}2>!@MU`3%Tbic)b+t9;(5;CCE1jOS%{HK0gCY+=O%T6Om8rUf_!z z{T%CSTB66?gpjNO_TI|8pdet2sqXeJqtb|e@?I3{jTJ!t_$cwdfQ(8jG1vnPrR;== z=KV4o417K2<9xW3FD#+_6@IKjDKIhl&rUDp^d2?_@l}d>&*i3yOP8=MR8g&Co*VlO zq8}Bci$|gMCq_5RI7_OGWB|PEfyQ3`m3SEGzXmO9__@O(WkQs9Fv<5I)(;trsFtYb zW*$-~dG~F3Fb$i?d~+#x^I7z3Td(P1WSIZ=MQND?dcTuoYhvp>-43o6mW*GDpPbRy z0XW|~K#d>{Onbf*Jy^RSf#!fWDpCN7BFy}Vzj5z>iu8q;NtnGg{gkc+ z=dyZ(j%*>L@j+7>#!_gJwd*#uB2(Y;vdc96tXB%4c`*h|C~!*^nX*NjbcWgX2L z>Hml3)=`mTLg4hiYzcBgeGzunbG1o)HZ0}_6`qgYuT~e=K$3=+HJw8NLr{aAF}=jj z%R`g?ZwvLrOGps{h7h9h^ApsqteJMWcp`oNi*R&2Pyd1TJwCzm=e#;?-XjI!WO#10 z&#Rs;DBwJ?9u%9;uk3%qF-B@}5&;+o2&oi3j@l_YWrs?JR|4akBHVNhxF=8cifg-H z_cgSNjmdjm5(qw7G1K`zBxD@mYZd~e1lZgZDKFXqbM1Yk&`TG(m38LPfV>Gc;BqJ>ph0Wnl>~;*VQB^L`Un>G&%I!LY21#$Sk)KY&y&B*E5BR9vFWj!E19*J%XPBT2Q#$VXd1Ed+aFO|<^ zlTta`yLa`v%+J8B1qoL=boJv-ca` z6Cgd&-^C)Shr~_Ags}^(4Bg25I2z`cWN9X1PDwxu#+t;GH{k&u2e5|4!jI^tYPx{F z;p~bPXG9BHl#*}SsY>Ts+08!uhBEQ4&SmF;YOu`KC_4dfRmce{u!|akE3F?7Gbq&ZT%@<8=m8jh<(sLk zM|X6;3JA;9v>P=$Y9MSmDZe#dK{q^3$m)ff08@oaoAeg>gOA1{aZTA-s=~)PO9FMIvx#=evdEJ*j0( zLbY4J!DP9yX<>nJt>X>DP=+bKzel(2GAD?j>cdGjK0T9fcV6oLN$YnDE}WZ0D$aO6 z1qY3{+@|p$x)qYyty^*aPQ!NHW%Jf%EDG#3Trq7of@mK033H1u0V{l<9_)a%v_c0> zbh>5}KRxFPD=&r$pWTzg`i&8eLNAU>)o4if;yfm+4-t8Ve0G{;Eu{0J9zc>i(aku z(|T?@Zf^MH>deSD?O8m{=jL_fl>wtx2jEtb0ToOPmcq^jeA19-Dz#59jC zegM?%J{4=b_Q`joksP5%(GNfK43tYwdOE<-70lPFo*}K!tjYB`(a$hPRTKxCSkK#e zlGqn>+jRw!3MaT{z?>&5z#Hr~8{N-LdNqKJh1KxH$AXS`pX{Wcb8XOnFBYG@XETg`{Lf=t3x%JMU-za=*>EcAW_H!DUTfnnZ%htaBay?7$|G9_oYTr$IoI!}*4?JrT zwDm-iN}D~FB|eUSu;8y@QND_nA?HjotR zy5s{Srd0Iyi(9p#%%iby!|E)!r$N6tJBJ2 zpU{i5%`QMjuiQ0#mU{PnbV*KAqlY&c4SOiNaRBX!sivYA)^0P_mQN=)bSMvNzTn=} zWzt2@23lS*QJ#bE_V~9Owz!bEw+xn8v-$Snh6w7Li2f?hW9FlNKSCe$V}xHF~)f9TH{q)IsD5cbQo^ zpUS3JIN=>Z(yY&NepGBexIkROeN5G|5?x?v1Ogl_>?@YH%C+EM_L*XM1d0}#f>R%! zHURj>eo=ojRG0fRNmx@;fjYDf2L!hp(fTZh|$MaU~v|npWxY@VR z96=hfMPBu}2P|7vd*Y-dLl(uPUPjZ5SZN=4_Y_4tR(*hPW=xJx9;4S0(fI@DixHIA zO>ak!AcPhW^t)tD?QUc@{zZqz;nJMf-ZJSk@s^c3)V->Hsa^De$M~>i9GtKY#tgqs z0G{w0@7B0pj1Ie|#|bHwFg_!p|IjE$Ad#nbP{i=+@i@n(8idl@TrmQbYHdqX3Cq&& z^6nzcvI+sux{4l+^PFZWU94!gi?-nBk zJZ7FipV{DdZ_&b}@Mjm4+>i{`x-iDGkNey~tN?ndyXrCjcJjJ@f@;~8qNckCVu13I z>B4sU;~KGrz?t9!l-Km~U5J)VMob0zn5LD`2->WzT>^m*vlUQNpt$YtU#g$z zh-lNaVA1$~TEQ-#IQu;^sR)@F1aR^tQKui$8r+K@rv6lC)m*9ddJBb6QO^eGP)Icw z&Di2p@d(UuyxMh|*qMxt{AcIeU@<%CiiMQtnixq^c6l+Jx|#9#Ef%wpnyr|AOaB*H zXZ_an`~LlHl#nHWKpF%F2q@hkFgm5X1O_OebayC9mja{VyZ7ff z?w{^IVEbX)>$=YK8IMiVu;+W3gejnGV(oeF?-Cq@tU_HHp4T0u;kh$P7~s9h)qQBc zj=Rsw>IeuEo;N*9!G1Gnk7P6h6@-&ZB%{@8yi(h}2e<_ah@B5B>x6J-yG|n>D@{xu zEc9EcTHVu7E7ry;DH?7^QT$?K>*TM$sGg=z^H11i*J3#t9`IDaL`q4`P8HcgHP|Nr zuE^-}D^vqlXgoAqL^OUhS-zg8Z-Ftq-*6hTf=VP^l)k$OF~AIncC7n*!(fa(Us#jV z{EKUFcOjtcPUZcKv}#aMF;lsA_^y{cj8E{f;NlWNyfF6co|UP2&`-_i#%hg(M^xKt zPE7eRr0o_?ycAnHIZbF_6&AltQlO*@4QU~g_-A%XccecH(T1?aeXWEe6A#|In+s~R z+W=M6b{0XfsaeouLQDaVlaE6V6%jSK`cSS~pz7oB*ws3#XR}ywPI-py4kW;5&*vjb zg-+`1tH6k=t3%IN;eA&zG}}wsQL!AIQ>BM>&@2VAfz2l*QGNXY`8gdzcg1hPT0Z}+ zcUtMy-mggy?3UQgPN&>YIwmB$I8*h4{LlHWM!MnWRmQL-uvn4xhKY_89`=IN7(d(C zInf50J+&tuwD)7GMu&eU%T^oX?{OgRZ;&9OgRmrs_{I-dSw$1csc0%sy};aOG7qpi zsh|{rptkvhh6J7E?_}>4A9Qoxu~?i32>M8f?2WPA zjhY2+pl~H2k%EdP8h2^vVH$eML3NvT<7KN(X@dR3zW`FviAWqzaK`-LZuKTxx(L*!76z zAMpqdm1Q|)c<@obQW_uBWa@qKG#8@)4&&wQm`{DKkRF0}kH@2CM-MW4%n{Wd`VEl= zjZ!9Z5=u5XT?H*M@;#q6BG%fJ$vNz(CjcB)F@+QXqUqsdUjl*-$ARwCUvaY&YPPi8 zXcyft_u*ZhXEky6DY-l{(#*0)N;r2WQhgCOp}K(8y-D)Y+#S=Mmjo!3GNw zA~HDN?YUt!9g2#Z)uc@ok8M}O+f?SUXm`wspl2>w6{Z2`2kaMMr1a;MwU;I4Gw(WB z12Mgj!lw8<^`%1o1w4kE#X=rIo?Iof3Q~-7pGh{gnhHh{7i0@I3XqhW8BSsb1GmE* zjkK)-GY*XICyRW)t3LRqaXJ);OX5PQm`BhA926(41JbVrkN3wMRpn7@MgJ{Mr!#Bz zM(FRSch9hxI;G}WQ%Q#Gs#jG)O1|Xgy}siMA`|4^_+JP}NcRvENde%6nabXb8Gua9 zbPTnfhT)E_SgvS5`q<0CcQvpcqWx&|=GvjXBScwQ(6QU_sp|5!);z-9etep^X;O&Z zH8i>-*WoTGpAnY<0T@mng)+f;kD4x$;O|_G(YM7r*;$iYSGeq!4fcDbM2i92{`NYl zPENRET+S_QMXswWd8SqM`}}vWfu2Af#dl{heD2=@@iM6vfW!dfk?dw%diUV?$5Mlm z=|NihvztZC4zpX8 zqiXV}`>YoNZ-i6O{1-8NrGN)^0+}b#dIB_G#@;1d>jHa2e|`PS-bK-$G?tNzvBjY&lFf`bn%M3&<5|FRtYah`73lo^DysQS z?(Lf%>?LCktzlwT&rn{UtOGL95TViHv&zG=?-VzKpA-zLlRW3L$e+yRlopo{m<#q` z9xRsQ02Mtq_=j}bL;z4%L^d!uCmWlDyH;j=|)|V zW>LqemR>3^pJck0n|LF)AJ~>(%_;A;C4Cn*>p*>21p7a`4ktOSq)h4Rj@ok=4i3p> z&c*%X`Gz}l9%9_6TOK;#<&{zG<3IDb0`pshof&)_A+1(_{e6I`k(2tW?N^;isKWnl ziI{Zt(`;KqOlmT_iDcvnw^`v0k_fp29^sQK47s{=!*3FD(R}#1P92GNqFN2kR~ghM zo&{%S8bY6u@-tJ4rI%;QCF}gqdVv{lEeNZ9`cdbI=T57D*WjGOXhIY3={25-0YPm; zy=~5BCo#qbZ;yZI=teE6s$l;sF;GvwK(U>Nl`&hmphLc!Na{ryU{}b z4vJ_^9VL^IW$|m|TQU3PcQ{=85_OjRmw!T@G8~p#2{Pq~6b1DkSV7k*InNC$9_*N` z>4AJVUq2j46pP9FUBp#nd%}~p196t*=Vk#ZrNEIR7bQ&hW~7ZfPW+k=(4?+rTmccs zBqi2llr_`;4Nlfs0P|pp60y-eOb-=3{NCl@ACR?KX!guzk?j4$R^_;XEZu4^%eAf3 zse6O>neGmgvqA?eIupW=S8_zB5O?iAH7!LPDa#Q%qd|JFdHYwkOuILW9iRnM)M+xU zZcKSN*^e-qInr&@Cu-u(mzG4#aEF`;01b(GP%ziao18Ai+P6M#N^mEXG{Aj3D1j(Z zK>F-(NaX^hzlo(r>|9;7duw7Q(VT=$q+pY$3Mab-GuD;iS^->9CN8|!=|v(gk# zyHxEHN&B?3ZxC+^K$YJudNO2+YB}oY18o~upN0h}x4x)&#klpXh5}k8Cz>vixn);_ zw-#IxalhO(u42py$c~rUA`j7K0Hf;DFJAS`{EULK62*+*rLP++XB$Y7WMX6mkGjzI ztKr!ytCj@Cj2LPhfuoo@;jEH31Y6hYgKr}_ALmea{8@G zR|ViqmNK20S(6|-!7&>+C>=kt_{I0J`ADb6+4n+3%Xvzlvyx&DJK@u+*N&c?L0iYF z(l4D&s8D7VmEHpnLDHy=Qg%`D>~^>8HC)S^oSxmt*7Y&Qyw2k_KR(3IDgzaAfsVbNDFlf{YKHJcS3bhLF$ZR2@P?<*GQExo5i} zGB0sl+AWVG(kild{?zvis;WK<35z)3yU?-L=vZTiZz&^jY_Jxca7CCWjXukic^Os( zf5P$hcxX|~InIO9jTkW}GSmB|mq|{#>XZ93fZCIJK?&I#i&GkR}@&8x=0l8+#HP7ICfyZDMBM?{EI?OM0 zsG9bTw91n*esi^*inDj3E;5vywe~r?S(GBj#;le?Tr}kLWIY#SQ(c$? znWG`f_N~96|2!UVqSJfZGJOrAeYfV6OFS<<9*22pnHP5`TCl$&ZbMSf21G)LQm_WLTYtfq#qL? zP5(j0xigU1KL)&f_3O&8@2Fbezh=wD!Fb+BZuIYPg}ai54jd6{3FX`+;Wbx`IZu0esS z*lYN2*}ZqO&fcsSz32q4<<>i7&i3qi4dAJNUVwYR_sg)n2a|~t4|g2&pE@UOlL1a-da!i-Wm*1UTs3sT-8PS7Phplgmc zkuySp>84M13~y~Z+mNocw0=ml=XCoN=qhZEq%!j`TO{b#G*J59q$owaaeY=HK+!(I zW0(l_&(Eo;#fZ>vFH=joWbej%97@W!Y3iR=WyJRzy;ii`#!NsgP`X4?E@LcLSRG|O zUJG~Xn#_>tv;9$nJZ(hYHV6QByvl=-gju5{(^=-GuNTQz`^t?ZP&2`Q=6Zxif$gxK z8QgDuFD~B~f8{)xInoli9JP3m2gu{^?aE%>fXqMlKWAGGM^-eRt_`LF%%mpx{Os?6Psg`I z=2Y%v9S;CGyYTU)%`x5*k@)+|3<69B_xBeE9L^3!%^=a(FJ{!4^PopqJEyXbjO(Ki z%``=1P1w2kmR5Ig(fanGCw}ECp^++hxD<>0FQ4-|LFtiq3s}As6ATM2ZeCQlR0H;^ z<9dHq@ZL7I&+WA98%LzkB2ump}!*Xu~#F`Tnsl`SwI;2>?U? zvwh6UHPmAaFzdLSz!qDOdrr@PKO2JkC8TOWRgojeH+$zAqHd_(>j{&=$~F(uNB=3U zc-d0^Qgcd3B?K^Gku5ECk(c(6YXSmI&_!qQ*Xy>?Y#E(~qvDZkq%KbqHcBhpaP^1fCbx6iB6KN#tU zGTfp(nMqT6)6PJ^s|9`p7${9r!n%PobnEV`q%)L6DpLO+iJ} zSO+hWdp&RoqEc~ntNh)$4pV+$1Ov%5JL$X|sHr_K ztW5U$LSVa&!3?Y!l(L3Gnaw=;nN6-Ac4RSCT8EIRVZ%u0ay|A6OQt>%Bv*4SdH7}gt?Z9jPdarBT+dk|K zL|ouIU3g)eK78?8JId}P2KB4l^uRaQNX6}2`J$m??^BSHvjc_IN z#Qhf;w9%0>@=&@H-JbztwN1if3P8H0o);JEtm|`N4w7y1%PQ5PYYDKc7kGoQvG@qE z5d4YOfn0QcX5;sGQ1ueT*;{+_cIbC>olUi(TBPyXltHT~m5rd53yMd&^Q7KBukZ|34u6&})kCfM&50`J1of}$J z6?MM(>2v=@3tET^drMG}D}4q6GCN)q`LmQPlxNSfe4!3GJJy#SZIp_acdL9UoC zzA-$Nfm-g%rs0bTvBDY2zYJ#N>%tT1S4-Y`oA}GtN=|-FB#v%5_7wp+I?vP0+v~hm zt^-B)lZr+ibSHAzhCT!^S3IH*KiYj7^BrMZZ|78BVPVTqQC-6S!8jx*h1NmGH$vNp z&Qpg&#tim+grVif*E;goFrnYz(2^UAT*~m*e^1$gOY#Xqk|iXw>co4#|1OdNt^lrc z-C|~eDj!<0DIG_AcZ@5@WBDrz$Kf=X@Ni#eSS8?SI;m2_7q3jNI&95`ZIni53n5YB z%t6#(j&+LVx~!LXJMBid%Jz2L0h1W*%!K{Zxxp6nE?5 zysxU7Jrbl15kU;|h;Pm%q+!I~^j1iNYB%JXh?I7b`th9HIq)gdtQz zZvaYAW#m7q?UmV62cjLn81?rRZ4UW|)$P#b2c8PUptnKlz7*8QF6Eo{UFE|fQl|!3 zu9*lMQ4w!War&lNP)32TL};UZw`rC{tfs5DbF4-`ZLeWr20&yBx-<)fms>YCm$3@C z|GWioMSNp%HAaeg|Dq7VlbPq;hzP2^v;QHZuO z_-&v|KKv#iH1$DMI5M8wX04jRq0z1Q<{9Ng>jNeM9XFzC56$-1Ax9g?UPko+re9_d zPnRncVc)z)ZJvmzFA47kj>Xf`x61}-(O`{g0Sq!x=+=(>y(p(8#K((~HpS@CaPH3> zzhC6@0n<){|JDMRas^%vW5W1A?E#J_=wZR*`kA_9w9IdE_9t&0VXt;oH!pP_J}CJ^8a%Tv;#O$>%0CKdiAEEhqtK075d!$qH zhs`FHb#!V^H)d|7?n&-BPCp4z8{XG}TuJZ*3-biAaX1sMzUBq)P08;T0%oO!^FV3{ z=9ZXH?`TRM=+i&s;Q@Do&3ku_B-22ES?^a^h6Ir-1`%}e3Rtle4_sQKUf%m9+>;U{ zRWal2=~I(O>j4|DXYC!s`J z^c*RbI-QvIGlo9GC$@-r8g4*!lM!K^hE+rp`l`+Uzr9ImSJAS_{ zKydo1W87Tw35{6{ZQ(5}-AntQ|3P|Ge_hAs00F*IpZ4Wf>TV;5B_E2*nmOuEs7g)e zlTx4pfO)&?`4X7CvH!W{yU!+tm5hocYn@{|8&Y~RD-s7@Ng>(GLXPXe? z*LKs^iv!htK*zoifArx;@390pDz-aov1l$q0jFR|cdsR_Cf!JUA-tr`KgU@c_AQdq zGrd9Z=%aRoEehCIe!H{XZ4Mw_cbxSNwVTDv8Br7(9fS1tzfW_L9Z_wfN*6{JY?Qm zQW}%SqCy`m3lJM$G>BmiI*+ZA-TN2zlmoc=Sf{`gFKsy$@|KSsug;A`*(}#E&2m_l zm(9VWCQ6?-TL2zXK^F}nn$S>Khv26=0?|sqrDq4_jed{E`I_St8M37f9z19l^vKRZ zxA>kNrANEbccysLJe0_v`lF_T_zBg5ue*~+MHx2NWi`MR*h;L%7OAcy>w#pq`P$r* z5hB2s9~YYw1fkqVIjk*+!Nv~vzr)R+DY;d@L3ZdXEM9!ds07vQe~m7AZ%cr3H-~#I zucKE~%sL~Jir?(SmGHE7yoV_U5uaGC@4x4B$SjS=tTL1ybACvxe#ub#6HNa7nwSH>#n&=;SBR)ExFJvXSy&Lj9RKx zGs9x+4r}vq%4t0HhVP<{dME?Gq1I^$}Ut<0-?)^+a&uuDW18;v4*O}1hWrue&Y?yKo; zD7QmoJ#YNHHkM|~?hn?jGyAd?^6S_Z*}%{Mwe*S z2JdBW?ALj(qt~W!)*wfIan}Vq&p$mn@Gb$*#1v&Hl)g;b)@sz-D+By`Mlk7|8B{x( zWt98T6_DzqU~N=+^N~9tsUkS1riLNPck*d}r^8tB8#AmX?^M|7eSYBbp1mo#J`RfW@Xx!2bwQ zKGu6_bzi|Z(LI&(y&uUJZ5F(6n?6~`a(I6G_;n7P{w9U4l*-^n+KNWs6XerQYq9aFm?&Jnti0n+H&S#Q9J}#(l6EE{;q$4lS~p zIOoCfu4W-_qdEsGIa#-l8GAfk&I1nbEK0E3fL@#h$66ts*9G!jABrCG0WlY)U?HEU zcuECc%>g2~8Bq`O;@T?7Zfe>s`nEtrzq73$lIJQ029Ii}IjhEVSAaYa~o1 z)W(@3@KhZ-1vz{ZYMeCrs+>V)jAcedtfB}(kOq-lAEQ5a>_^s303Lnr1(pNW%x5Wi z19#J1d`GqHr>kv2c~g{bc5dB?!g$M3H9K@=RX*n4kT+>8nakcHs4%Y{V+*z5>k4de zx3*2^t1%9Zd*VO_(yn>$Tp1RV8;d8ZW}P3&2Xwnsseh$owHHI_EOB-8@xJS&>`5>? z>mPP>DC3zu9oqaNDC<>{DiX0hk6Hf}Aq4zy{1mRQx|eIYE4F!R7MewH7LHFescW&cKL@hZZ0!rFf z1(s!x*FkvGMXF;eUC&3K?OX;~7adYtHV2u)I^ys17XTe(Qk-4l(l~9VduEDkH~(d4 zr`Qn_Y+F7IW*dY*y7JVg`H7Se$0`hd91jEc%XU$Bs*g#wm4m3t<0@$=%>+=i7raave7J6Dp}xe}@o8 z$cH0+9j&01*_44Lp7S?CB}_pZ=ms~1VqHX|qLv(63;M#B@A?D1Ag}0oKGUKsEJrBP zo1sg*86-}mqgD4fkZi;B80%mDcnv|(eNAzl>gz!(%tnX)&{;i^m zoP1b1FtDCrG%74+AUN|(9^@Kf#FG1(u*qfIqB8Z^8x_Dj|7Yc)H{-qw@pO3mY7fg_ zd$`o6&rQYIX9UpfhVgk_rv|$z@SN|i7&c7b%VBFJ63?@Z&_p(A=r&{Bb^@9lxAKRC zaj6HXq*PwI5tm3-M_mUNj!y{@#5{9+J_QNQL1H|EkAk3a40I{&6$)nH6O&Xtbo&-g zH+$M=sK*vl?@088zDRYu(>0|-YV(iRA3{0&N33_2!{C1~;ZfDQpaI6R4GgwP;M+n; z;`14qWPT5N1f{-lMaT6mQ`%5NphvvV+C}ESTRq1z3b zO(6uJta~gBO$sI~>z`KlbSIqd*ebN2jyT8#yruO~`r8+LEhw~fz@jlxWW;G-29uD=ai5kZBQqCc5O-xC){brr(&>qYDJ^emHHn;ob8w8@+9%!G;RWx_#EX3QbjNq2E|^BI+PJ|&6yz~ z@K-@y{&?-1j(-L?YQ0>^V`Y*tcLj~&i0@kC+CdV8Z#UA9j)SgidjP0c8TiFOB;tKo z5~S9eftv_0vnx@YZAG_A#nGK=f@YF(Tb}vt%v5p{h+uuIUp6bb>*Du-XwiOb9>*1G z+#AnkwOQV*KTlenH)>Q(P!=hG&bliLuJSd=edW3yMD|p8{M^*qd)0kWAo^XJ$dljh zHS_(D%rrINADqizVs(_$J_ zYJvj?=RD6E7F+Tyr8Rm2Jc+=MCEn#VwA0~`H9`JI2qdkVB4i$8i%ek&sF0(uHV5jH zdTJD1D?n+Y=u5GpLW{(nj)B-zo(S!4oR@o@evT_# z$H^xmf5nD%dae~a0*Avaw_H<=#a4gSJukJxIpM8PhMOo6m3LO6KeCicc-ceg!k;V* zILu=Mo(OyGuc~UhIu`KMu0)Sn2U|++L2iB(+pExKi?-Y`Mx{6L;BNXWduy{jPWkAe zv6kbxiwl!|{f6c*J!^%rq4wU=ha=zJx#|vdUtVhA?H;QTwj{Rg-FQ&sqQ3#(q2!4U zOiICw40(zmJEks*!-BgT&In=IrEU{s!?vZz+J_`0K|dK|Hft{HXPi;0n1$AZk*=We zoXAg7IJRz5wVkH#%gum8i>xH8z|-of7nY=N!Nd}K(*#sc$ElY?#Ryrw6D1IHtU>9j zxHCw~E>9Emof%M5fqJ!JoP|liIp3MSh(@R?)J1$C$p(RCL}@dp+)^`MwsV~icH38|r4IU2ROhw+rFNeNW4TI$bd{RBSUW(CMy-;g z!p6w6Vr}#Xjn5#BFR57ynuC8K0wwslyw39V=vB~=V-7Q=O8*nlL9aS}dpE6)P<-O@ zKX$VfFGK@Jn0Z}W_= zv22ydakGMf_Mxf9@ud=Ym^63hVCf=wp7Eqnjw7gSJ;AYyL&s=OQS#kE5`8@4J~zC#L_Z&&j zExSYG-k!84V@)RD>$j5bDh@&P@Yv&(diyx0kQ>a1WgR)4hTxNn6BrGc`J#KFZJ>G} zwYj*^qC#)VB=*IAw$8pwuyEeotsiEbkINF2&U@VUv+!-?fA}o;(Dd%Yau#+)ty2@A zSX@>3*}>o_=(%$5w_q0%z@mK^6i`e0$$2F3V)=!SKudEZvnV-HjB3Z12A%^9Xt}*? zL9gST>fc%U_e9TLgNqKw7S#7Rvq>&|N~feDKGq+whOm*j%8;&Ohn<4QFXtDQ!FHP| zH2FCI{gN!l@WO<}&5i+Ufb}d~ZND`7?{^Zwd*zT--cV5tOk(Me<3K@0)Rm1d(TbPf z!cE0X>g9o;`e3AI>mR}leZItExg%Vo)43}Kpq=Phq2?kUyI}Ga)$erk9|QU0g5sh& z(*=~|jqs+ll7-tzCCmE%rq6HDWmkV1o`w44tF!1i`s^W_6(Q7|bP8$2#*jWCE&0@7 z^8C00=C=U`PU;nAg>&f;HWXcCshnQwM{De_Q%9yTUdTHMrW_emeE3zQm(LW;+=&B? z7tsCY?b_jNhQ+gH*Z_ufUY#{2k1x){7)!*8)aiXk%%0WGA5`dEQ?PNK7}11%%*Cca zQKTBWDAX<+{+?W#tD=JSv#?Hv3yhFHR!l=mPH9lnTLFyieV2+kb>rm5>%Yl=5laoU ztPC(9RAA<`^su4_BJqX3fSV1;9Bv3_{mm3NpYncIpu2j0cC6`?BB>3|V3`$&T;Gtu z>Y6j=9QjCzfBHULpzD>Z*y-n(UnsBWKw%^)r6;u{9&|^EJqVyfw1GFI6{0Y&%s}o9 zv-$9RC|={e?}pv7&D}Y-8qc`9YOG`+4!9qjfjj)sQc#8=GP&JMdW^eB?9Mz+u+pdh)WNg22Mo)w`Em?3VSGi~e&!}> z&-n4Q;Z?40OjR}}|#_E^FBc)G5jGd_w7qHu|?Q zv%^8~4p^h~z2QOVkylvI4YIfyS01PC3#L3rO3C~&rRDF$;g7ZRm18Q0>CCX{IDa9L zc)}B%Nl$h74%c|tr>7+1dKw8x4mo|Es7D!MZYQDYK^p>$KF*Q zF>Fuo5IXQQb8e#cePZ|kHVvg-2{TY^IVheq;ZF?uxZ(w!*!w$c&5)-WKCK7hRS}pE zVV{zUFumZ+4ZzNkaK%mzHCh*m$>%|6WBeM2+1UUpu>xV6>1yLu^%&xW&2k+EG38@K zH9A_@rc65FS;UX;r(+WP_?ad=HQ}SP0vhFKuCYsVM&3Hgwr_(OkTaFlMJYQ1mTMTh zXhEGkm)^Q2-DVx={`k8 zFdLc^TCRv9RF_!5Gwbvb&^R2yj`rf>6BWl|P9n0j8wZeuztu?h z?Ax1AP0{O!-asQvAh}P63 zR-!s#oi22J=cIqi6|CB0FT<&&h2@${`oT^bG;WLK#|fb9fxkO@kIFnBZXq|ElU!T+ zN&YC_!uyCs^>A~yK2G%C0&hTz+W`?;+THNNNUZC^RtN6)rfFB7Z_9KJ>3JS+^Hf51 zMvux1Ye}+yL~41j9+L+h3uvoKa)mkTa^r40BCVeB^ce+w^IsQGQ+lbjyPTzI>p>mF zDcpZL63@qehxwWzHFaA(bdPyWF|67a_jVCkvjO%KFJh(mFuAEvZcu3pmHvD(AvAj^ zStM;usELMP+Iq-i2rOTyPuS5>$M;0_Oz&@y_7~)ZOsPC8jdLNaI*Zg3pH9OpvR>%k zbP$m5(X#$SdPVVlW5|LMBAdnJ07ghYzUfY_{_ahmwPfNk&!mqUdwkh3UAOZNnS?{* zAvt`xMnYpLL6spVlh}#aYYcae5s<*vOPW%%uxQz4X(>$>mGhHb8YR6NBH8?AYpJMk z9+jE?w^dI_$vJUU`ba&MR=9TN&&Q-L1K6uMrO;pO3<&CGX@#gy+d(gSvKV4hA+!N< zPBcOiGz}&ZQz4eD2nIYXXPaD{o5+_JKzam4fekuv@UW=r(B#zf1~TkSyg2O|7!X3AZw?-SIKrJ-v4s#7b2-b#5r*I zbaC0x08KWiwnWBuUM}$vuG5M=?WP>rT#Z7fkR*&OcO#nybt>aUr>T69j&O??Y0{4q zy)BTF@)Pq!nX?{T=&_`ptIxtJDWKQ)t?_@~eO2r+gxP)3yupM;he9T{q$u(kH$eyt z`jC!Co(l2d<;Ndx^Z=iJ^%l$-|NFdjZ@HYnx zi|2gDM#D;t(S46bgSSr@awYMe%@)l}ee8y%Xequ9Tr~&Xa_71NOmL)T-shZQnACz& z3^oFpO5?BGFsT~AHbslYT~HvU;D?`&1i9iK{WE*p`&qXq!IVShPJV4SehQwZQ78b( z)yt)(EB4ku%;|`S7urfU?>&|_`ax|u1G*0lb;@og*`U@s>g{|p3N9Fj{R^*MO z_==(#%o)d7es%J;;SLsa*LwGpc5htSo{{D zl`B|OY3+-m&<w5@O_vuDWvBJ!y?uE9g)Ab)sAXPK-#(9c5T`XZ%x+*642#2DfTOSk;>@C# z{XU-4u-(*3{@b3u+`QDEMkf;IQhE+HKg))u`ZOh{BKAqiH6ti zDJP_lQfpHq$(n<3-xqI-()KpZu>=n8*dvpqazk^Mg_V%vZ|~1_gCH5G;*i6L49w_i^79zi#BwCJTnxVgutVj ze$*7~BtRLL?Y#%5o_;jL*86R^rO+FBM@c{WJ9ROp-2IDBnz%B0iDg^EKa{!t;S#yYriG=_lwxVZb(%%Ww_s_eCIqq zIh0d*qu4>BJ+pGjBblGTO_tLQ3IJ`ypFbMmje7*!juh%xi_9J@ zU+It#nM3{>K?&iO`j!)2`WOM?75=niSPM_I{0yf)?#;MFewNIj0+s)J2U%K-0a!Dz z%&VWJ4dT6>X-g{r0LUD2V%V5}@jqsoayuD}76JOVq zEY0ph4%fv6pf{K&LR*=K49HXy?go8*)3u4Y5dg}tUe19;bZjOQ*tPI#E~7mOVoNe- z`TFJ@@ehU!{c&#}H~PqM0;Cm~+6?krmAIioz@%Q+!1-3_N*9~B$zwd?wI!MN%l`Lf zfqPeg2*q!Ywe}OISER=p^LBr?Dnom{$rBwURaVCJ?PZao-tVvc|~%vubU=X93na_FrtIBN;(g=S|7Ta zhB5d!Hq4l%3JC z%_y@elkk)eR;Cn-AD+u+d@^bSwB?lD%`Z>$jC>i)01*dHGT3lx5 z`aSZNpGeJK`C-86OfsLDir>s&cma~c$BdlzcYBx;Lh-gBET04_Ww}?rALw(Q`Vj`E z8IUclmu3}pJDI>9W8Fr3C_2Gc|D!a7Pz$;&ppi9$0kV$(z>ldjiJ#pBXy$SVareVw zL&LI2fn3g(t#>ptOo99_r?oHE3|&mOSx@Fp3Be*&Gs>OPRj=anneE-Dx2G+#3(j-u@YdJq2Z6ioIPSyJM4`|? z?b1h`vAhOj>e@cIJNBG2+PB~{u(Oe; zJC{xpw@FV7Bl$ag9xgK)6gvHw1|e+2vj}?9a)q^uXO-mOKIP^JZkE@szhy8(9N^Ww zRI;~O;CPXJesL?Le%0CQKg^^zX&cN zj3i1whG(*Gmsewf(Y`3Rj}QD$nI9 zMb$qnz1$tF3wzRw6OZ5At2P_=a(80=6pxWVm7rTm&h|}WXd}lb`S@x+biEny$8o@1!ZXxnC6P{bRQ8szUa7^3OqC1z{ha zv1h&br$2SXgNo5CqdWlp@5kE&~^_nP;$XYdD^>EYMtOvj%ne~@nA zwiUBTy+McM5|Z-Eh84hh^uDFuhzeP+q1SVA0x%|0aH-pHvrOGU zwU%I3uQNllABxa?+Jd{oa#bb#S-X@>&8G?-ISo-+q!*XAjs`=2QZXxtK+@}!0dg`G zS$DCDb!vJ?cWiguEj?;uBmxrato^nXG-tEk^Xi`cWHmo{0`z;n8kTutp&&S7zby3L zsAZXwPPuV_RhS*RA2L>LIubZQEFWo@n`E1n9WSY217|viTr{Zcpjh8# z+HdlC9H(>imvQh)n=11>#ePu*!}TGwi`X;gOe2oyJ9FsoVBzj;$e{^$z~d**ZFYEy z-bLA}$A(VoV;K!@+_j(LLv7e~^>p~V!LPTex8wo=UL%zDl-5U7xo!q*u(H?pV1nQf}5WfD@^ zY+EJirosX)F*A+2ex8Em2lwtfiDA;0;(3X)KEz{wsIxh6io0)pbA9Ld1>1a@<`O%Q zz2;5Uvu8|8A$Q1*PMI;*2{>4A6!nSK*RPCEUQ2 zB;Xd)p;fYix<81#X=Az38f)--z2v2>j92CF+NuNqW!s!<2kdcS^x1dRVu5QU_h`*s z6us7Po{aDx=3g%Ck7E6!$lzy)hFiM*j2L}5f$$F(0q_!i@4#E~44kdGNZ%VfeyY%@ zrO(F$dyu!m7?bDfIki6{kwwdcZf`C^1bPfBn@7f|lh_hYF%4uLrJ~qlnh9!(SFgL; z9m&@RF%B=c1zUnGa;LhgOL{{l8P6;ah+6oZe{*JpTUXcI!`F!%aP`N@V+#2@X;RBj z$1hmcGB=h?DBtMO{w1LI>g8c>4Kf$H49H~B-`nPcJDtJxC)f$geVY=SSd*pTYAgO& zCC~E+TN;^04058uW(&ZEx+2YiJqnplP#Glxp6ynSiy$$ML3Rq$1d=r6^>wrqe< zZ{X_9-Z_Nw!qSx0?CoJHHK3q<@7}-O2C#$gPP5ZKxfZi{(;QA=W)+zsV0Off$Y(t` zJ>yJPPLh|3G%MSQ3f|l_>`b)UgQV3Z+R++;&{`sb^UGrcUnLX@7Y}X~Do$oc;B@rB zu8vfdI&xT_gs`?q#8;rDYf!SqkZex}>A!r)Qy*_ab9OB*C%6B~woDaom5$X;?9|BN z!CnBT?fI@AIyIs-HEQ(`Iom0V9PP#WYcLz*v?bXE^Cr!OYHG% zzpWz}I&^=6E1m=qg09ZFVwwL=JB*7ln@&sun2HvPH!1w5nJ zHY>^HG9V|nbQ4FJ6rfa!UOiP~Zqlipl9A46RPEOg40K-lph%Eb>~8s!&ESD;;;_c6Xbm4V9aJN*xVPdv+eo-nh4;Q zoWORqKUrJBW?I7)7@2BL`|134?^4T)8n%BIZ?S_nxDT@#WjHW*|9;PB{rfY?LPqe* zVaZm>lRC;!J(+`R=#}e}F#tqJ9%(yRL3?aRgIK4o1dRokyTr;VW)9R6kjwCmx$J(Dw+v+QI9Ef_5XlbzIUsqdo;l!zP_W18xm>@+ zOWI)3F%Kg4s}b*n=;_$h5?Ma+1aR5;??C>qKU6J{03|0epk1pIX8v0pJUgnd#^IPo4Y=DLT+oLNBjp?0+ERzxoQ20SL|kUz|k#r%wJ6 z!a+o!C$s8d!@R}bw59((3*bL92ec}l0|tn<2=w;)cVPbm2>*}6^#7sxZ$bVq>Hi-I zdV(hV3MZM*Q%1MJRLyF++S+Ofrg@??#>EM|(FIP-PR-gJNloNkEYq->w5zpRKH~dF z3P}VJhZ0$pPeoHRbsLsm)qmSR{YSFaX^mkll;B6Ze-+Nzzf+G*EaQ{%EfwaBXBE7G zWp_PYG!*E!KD=`sHFt84HaM^S`GumMEnl_^_F?Wn&Ls@TZ}LV6d}L()a`xWO=clQtW+;W%;ck z&8Ot!mDH1Z`vnJf)|e?yrbmA&&??tuvh_I)=5`+a1-ucI%pNzgbDM$ak^3D?{)1*j zr!JCR$93x2VUhNv0Fe)eZN2SsJ=+yMptB?xh_oSM2@mJd%lUZ90&L`7S0|XCmNJW; zX!|os!|rlc6yw>t-{LB`a~3XoU~(q_e5F9_C&LzLXbLM2QByvaUVAQk3?B~ed6P5+9o5ZpD@tY2EwWl zV8j{Rf#-xfLQg*#<6Ho$hH;#mXa7QVYza_xVX1m*9wBZ|4N%o|(Ot^{Kza$DAMn<#=70NOf9m6S(|uRQ7!!E(VtkSS zyQgm($uB)3JA(lYx9#RQrT^E7W)nU+M|7$i9GUFQCs`OJ{WJL#E^Yyzlsdw_C27b0 zJi-5`;U^yN6SVQjK(h)TDZhvPee!nhM;+Rqkp{s4KO0SZhPB`s_9q^Q4SgE_1$$-90ea*i0etXF0i^)<(72x*?TkW2|WU|wrS{-RUsweeJeR}_i|JvZ+ zkyagg!op14o1az{`{@y7%v%9yXqNzJ^&G83#SV!C37|C0I8F57YOT&@C$7%x`fr`p zudpeYbm>%$=H671TJBYeRamK3`P*2U zukyOCag(7s(ylc8zJ$$VHO0?kx4_S1`2F(gB`1}o=(jp8PSxvj z-=^N)yveej$dh+2jF^syF3;Oc@S5UvP|;tnUXgthpne>y)-KmiNn|l9 znNG}4WYOz)7uf29_a}dSWYl{?eDt@YSjZytchjA?Udtd;1c#swobAFcPvwf@+D9Zyz+7`@XG(>7Ujy=cVH9knQbLG-}-&jDx@G(^*rKMkiR5;7;J zwKyLoc$_b3b-{G%k%wW@=o8znOLB^d%nIeY_40Zbb9B!8Q}rONjVB2=Mk;v{#$Q*a zl_RMfEJ~TeYRU=Mmdz1A&oM!~@k2`Uu5xM4+wr_dGx>Ao@SCV0^#1} z2G;Eqr1jULb`x&=>;{CVom`#C zNtADWK_e~hg{(QX4R+t<6S9VZdmBD0r}Cb6bGshOa+Z+hA2cir)Z0#r@>ne}49BNK zqBHNuBltfZl{cykb2lb%b`kx|N(1h3Cfhokb=<3KJ~K6a8HI{X``@Nz{%Z7(O`t^< zr`d-a(K7K(ztk0`a0?FTht+3L9m=$I+pM^JhD09v4e=55W@plYX2+ z*WD+U5o7|iPKQf+wn-OLwZ(gcB6)B--O27NZ?ms}=5o%gx4njWin@&shaQ#3``N=+ z`U`#TFMoPYw{*~Gh^7jNMEYDtp6!o?bb+h7qwY0~cFKsoFkl*l;XP^GCS~y2zGBbO zCF<5MRP1yERXztBTa#DzM>2%{_Vih49#H zJ*U=4?)WGrUYCm!>2rI2wvu-l;6z^MR+(xSvptn-ytXXkjPa%zd)8VSgm4NDdC?A7V2}{)&6t zMkY-TnCr)nw65>g$+s$%V)n|;UX8m}V)#u;@eK(O873f;d)WNXpvk02OZcq)B9gQ} zeEs?K(32bo%x}HClQoD6m@l6BgTmm2ti$&6Bg>zypJgC28u>xE!)mjtGU9njAMWfL zbo!lostfZ2u!VS?_r`_lEhiglqTk){(UdPIjNjbVMi&BC)QO%iduKUy6>o)1%$b~y zB>43CGXG;+m)oN$Z|!b2qC3EW+Il-BN&462NY$FZ#0$i;MIL{C+zIZu7d%&;8bENs z!WKMBhC< zdy7aSH5OU$>;z?EQbja5Sma!eg!?MlX_lzt+nn8;C{!q&AX>yNWaQpW@P1)v5&?xW zp<&4-WUN~L<36zad!?^3~XfRDPMd0%|?`A{uA>T=opV=9NO zLY}`=jjv?3Ji^V@$Nfmnu-&8N2Eu`hl#eRf$AfBUhL(7oxpw9T!sdF+l7BPQDhrGr z_w~RVt)mDb@}II|ZVqZ>Pu|ubnSXdv!Y7Mo<6o$%iX`WnRyh?36dg(r9(xx*+C2|2 zowr^N$fYxpP&o`BVgbhAs)45&LRvq4A#lu|?_ifm!sKt8K^=YcmtbhpDPIdruB0nn zS$@-V@A5NrE09BRN>2%SonMX0Ey{@O!gTlh@P%b|#3<{cM?piBMJZ~a*(n7O9@qZL zVy;t#%r|j-&Y#|h@LY}cRPP@#m`vF+05k6Cc;G=6nb<^Z3OqQLK#oaDuC#V-Sf>X=Uv#q*f6M15GKpxl;`N4+|~=Nl#1wIB*r zy}vpV;CEt{yqqB(7sVmDz|N*jBHxEJnuUwvBJ;XTi>8GD`!e^E0EiWFAB&oeUWp3> z*R}`2p+519AXq6~ly52EnY2qK- zZ!{Jgo*)Aj);8eJToIqf$JqGsj{BWL1q5G?3CT;LZ74(Slymsu8}o5dk`iV$$H3+| z@aON>>X3Gk`<~}&l?|Ue3fQY4b50G$_OXm?Qm@PT~og(TJ4@i9V zgo(g>;}g{;t}Xuc{a-#`++LrZnIth$tyd(@QA<9j8)sEk_HE~ViZt_yd0b+**L9aZ zA#nIUY3B0XsOE*x&QhNBSaGx*|HyP6swD|4mElXnjT=57Fl;t@ryd9OYO0d5V;F z(%?-FLafV-8aavB=pGH(y240`I!brDwB{_73qY|8RX*m&L zW+PfX+6!g0x;s?coYZVST$BPrfa2*W=K)H z*adkIcr?7x#LeX$wLv@!nwk2Std4q{@i&{%TaFKR0w;n8OFMo2*U-OzxULNfleO(a z-UQyny;g+v5!2_%zbMwo(1d3pwqF|qIoH$yw|AsL3)NbZ6=X<_l<7S6TEsR0{oC^U;Ve`*f*$bO&>mEP4=X z-TNbxykpGg8J*&5y zlVTrP6pKHuRuG}?N#BdxrU)Y)O&8TFlpyy_Cg zic=b~MDWpbOsf>t%I(=4?84$*E2a3pb`*+C7m6xZ_#c7V*F;+*m??^C$MzqWqPn_p zoYPznX#}bg)-@DkZHCqaCX_jmQsi@1f_8mw+O4>$N99ajzX+d-ypw1_;aro-uf|oI z$kEuE$_Z$nItcsW9htuBBP+w27-7_4qtn(`t=C{fFj_ehwwfD2Y*~Bl5bDUR)5dQ~ z)t_9r_0wVar9l6dkW+lybZvgXdLP{`r(7dv3$`+u7#-|%ZqHp@BXJOR_WpLY7oGM6 zk&b^Gy+`m?X8+Ulu)G(h?^9w6E6)u}npBDxtYGW7ifG5i{(e}rHVU0mqZu398?zik zAJd_}Yn-j9JmWa*kDgYn%b{o1rxFqWYZ8@8kv-awBpP8CTyb%zSDD}Xj@~_N9Ut7y z|LS7qwi8Ms6BWq%+PlKzPRC|D(pf)GeLlus08ZK-3WJ3-w2)u(^ zBCwadHH5!K3tiV*V(0rGQjvb}4j5G}LDmbgA50-CJaCzLT_1zlsPBR7Lb1U!##DuB z36)H8w&Bmb+$K4wvj3HnOtNkJYq;(@wSc*KD{!4YE^71x*GDEPD{3J(F$2|D0XVy< zteK133yCJqGhzkbF>YiOMobERWEajFq}FeCe-kjHVtK;F3%f?TqhwWV3Uw2_u&yr) zm+z>`e=q3gmUH4;KYj*G1LY*9=m%Ro+jZDzGRN!hIarEpMGaU6a;`I$;&T;8tOyA{BnEif9n%)qH zC|O7psPPFS=s0G}em6d}#Fs;1aWA+)eyv|&C52ED^da&ihHB1%!Q}#*^U;!CueJ09 z^qt9=Bt=ko0XB}E-Wz(#0E_L9mvR!R_(wVK?K&PjgQ|MmJWlJ0%v34B$m5`L-*kLR zkZd5M3@-An-Qet&7igT2v#-W4w)t&W8VECwcqK zHK1;uq^j;cPW=}>)JP|J4fu&iCC@gPXhVu0WJ_m4M#)@drii}sf3$O2>AK~VCO4tt z0!LxAI8F$J_-EdJt(GgsqKWBDYB8q_&GyuEmJJrWeZMb3|Lcw0(V|+l$`t#G34%aS z%vQ^c9?!T|xqbrO?hh%CzCin2Da#Ev;`#Z@1InX$_v!D?DA$)!SL+)u9Q z*ZjVY8Rk#BJ)`ciw5ytA_1sOJF+>&)nikV4FGAh1CD5d-K!ihPz1UCgsRr0b)U2>4 z&UsbCSi%_hX&ejKPtqzQp&#~%d(nxQ$Gl9Z8jq-$Z)DKO!q~CdF&=HITrU?+M%nip zr_ZtEW$izPwK%dk@f0PV-pXX-09oG<~fh7Ap zz#Qxlv_+VA2gK_5mtqcI{g{7Cy@EPnoF$p2Ydg`9GD&;c6Mu)Jn1UH=g)Zhr(!oqx ztkJqk(PB>KyKgo;<_}!Ll$X;6Ph!PXb=5qmnW+(66Zpt<7QV;TZ~8W*`|yoxDYqI_ z|DL0p`!WH0t|l8dgITi}(4&-D1A!x^ugzGh?t!OrtvPj_h3 zGGc|;CP)Ue@~vLy*!$*A|4cfqQkrjwvz&j^Yf~7Lz%R1=@;nt|N4nS>g(peiW~%9t zYms5z-O0cyDdgz35v`$vU?e2t_K3X2_JRU9wB{*)u;?RcoI5H^1;hOQ0RN~Q32kUE zsfXTc>g0JN;lmTtI1aj-_-+Sye~kP<2>AvUve;g`d?TKtG-o?{pF57YPIkfGf{s~O zrP3J7Ow4c_UgvQ3djEAh@xmq3dXFs1!0xO%CDBCG^c=hyWo6{Vb=LN%h+cmB;%DCm zF^&Vbsfhutk(;iWm|;Y-I1((ne%ACG)&0&$TeuCE%}v<8e?4PWdiYnuv$7;ZFJ0b0 zq%_3+LAkY&XH@%ego%KvtVbZbJJ4YB35I#RXAC5Wp2w65NdsmDQpcjpZN$TRD9>l~ z*gxf|#9;%^xZ7sh?2EjQz(5UDj5gHEA2qz&0R8Ai|I*Lm`|a|vTU8jE>HmzT1i>L} zn_3P7)+^|Gn8;w`yFOkRG|d)X*SQCM=JB`MQ#vyOx=jZ|5VTo(O%n^_y4yuh;*=YCGz6Jx+2z@Q!z@V(&>Pv+(LI}lGGtt9A& zBC$QaJHyTXm`Z(@a=fflA&+FGS_}t+A;<|z0TqX{2jcb6SZ7kNnlH7@N~-L z3avs-Gp>R~?k!r5Mto($5FNXieh|IFStkx&)7dh5>G@+x?530a&DgCjaWak{;eLR$3?CgA3f;s0Y}UXN73s?5U{B zbTkez@KI5r{j!jE(U>%I(4XCD_a$v}cuSPI_jcsB({__sVP!fbr=O0PT2RlGLZIw> zt&?`(>;8af>(%Iv8=sh3$TLnh*ew;Bd&cSseU^`jpz^qgzU=iNr}cS}(38|+?5RlA z zi3lIGUm`es*iX3SebkqH%Br>w_n^vB^@>&AYc7<|yyS0hinhN1JIiRF>ysnhqNC_Q zf_Z7t)uU-eB#nC(6@V?q zI`YkWOUp)5Pk+tm&`K!19(n~dIhP(SwF}%}7;d4#5gX<)3o*AcjJY-}HnP1(Pi>Zgr z_mp6T*3$~l{BY6B$g32F0AIt~A>-dC>#?bFGhV>qZZx|Q_+;>+MuPI7mHMHI=aa?e zhOnYqQFY^%o|BzbZilPM9QYs28JY2C#0qU8X%v2(CU>6W2cgjV=UAd1O0LPzyWzA* zkJRoUqZp2hsCaT%9*%L~HSgz&I=cMCLyqIl_e8~&b#}5lA*$;lka@7*SNj(`GRf?Q z8gM40jh7{QND^=kj-TwvpOX+!$E5}>IuJZ%ULmbhHI)+@f+XPujjw(}|MIVHYjoAd zz5EJVNUMC5iV4i40ILQ6DR2`|(_BnwA9!FTJmGWsS>#kH#|f=Zt@HzDGgK zI-6ahp_K=}_K02k6X|}Rumaw3{TiW8Ln`0=A%YMLveReE!^D$Wl%<|oBw$3*;ggMZ zj;S#2${gD;?+C!c)@M(qw~A(>Va^&m|LZWPZ2g)uvO8@Q8yM9_aeyJ474U=Ht$d(rpIxkZU!1?1`;V9Qb>HCOoLj1Xbt~VScuRaR(dd|fz zESG6d;c!F$d&_tCXX;f%@{hhUB4oXp}2CvMaZ4}JXm-p453x(`j z=xCDE#@IS`TTRMNn-@6lnyJA%vJhs6+Zr|>`gJbO$&XKYt=}KiSjB$vj>}l#>0sl` zS1N9;n5Reaql|5gg+tqn)L9G;>AA{YfF(`(y^4N$SD9F#~UUClL7EwmOb+?y?W9jjZ}$w8&0+ zl?j(QX$k@)Mx=O?g&$!oV=T5Rmj6hfC;rAb(p&l^sbC-WI~TBy@JuJtySGUPx;bn2 zv^GNi#2|hNwvXg9ux&wpI#k%7nf^>CJGj)oq77%$GdUl!w3$R5y7>Y$^*_#tkT1MA zRfs3DpOQiw($QgG#ZoPpsG4|0oJ7ZETG07O&YM97danxxljMm_GQiYr{z4Ld%N1j3`VB7}N-Ah_kM1&K;O>dDB|+1+`Np z;9oyp3ux+M2ozr- z&W-oyKC8a<2R5GjpNL(TDN3evF+GcwiNu=7&5SdVaV;Gs^YnlSV$67V6uI&$?<7ta zmtyXOITZ`M-sSL9KPo=Cyun0jwr5>u}R_en<@UpP^mA1$LQ{OA7s z^bm687a(!`X+U-8!z_F_;z#h5Mc9`5PZ;UHp4|Gw*(Xd@gb%vpOr*!nu;A!$x6Lo7 z=N)(h?ZRZVS>NvT%crl%VmXALkH)31x>x6`_c`=?yDk-(uTf4z*s$NiErf@gO+Oso z4xO9-lW+v`d^sJfAWx;&LWXWGa{T7@goacGF`)NWC9yis+>J;fq=KNoX!%&Q=Rmmk zIrd?2HwNn>Wlb4dmUiU5lL^f)yS76<2{<_wJvn0;|DI&1LG5`AYhJF#;PZ*&!OA-o zer2V>q{Ht(GWzg>kQCwjno`0h^_ORh&*|GK&Q|v6X((Wr>J*6NG@;B?oQ_rC+GpQv z`hZNnnl+BKd6uhLd;)bXh?GdgWG-HQCjmT~pZ0j&U&SGZ&})~6gzVVzSmB#5WJYOH zimT&!acu-OM%j<3ip<*+$6a!~J(t-0SW6?djTr4kjc1YU6mL$zqar|Qr5>jVc}b}* zx>FYMJONjD6kJV*@*E3Z>qL?Mf}aY+DZ%=n*C4!ebe}&&_T@M>XEIPU-~EH9=uPK= z$wzR8qH1tx!@d>6zAI3Q(c35^J4!zX+jeTqB#@Cce53e)ARoR`AcZjSdd`i~k}Xd{ z^kR%0eB-fcUI~f|WMGy_#F9e-qjinbXOic=*ok4^emRc36R0XcXDIdtN%fNjPKCl0 z$x7Q4Nx=Tqey@>w{0z0)-3@{?DUQ8t9g)cye$}fM2ChE#V34)9%eGW(BL8LvA1&HJ z98(ARi!Y%)UoAXcwrO`XgL-fqJhx-%rQz)B)c#R^(_i6i7?u4LZ3YwpKd1villcrj ziMSAAw!I&Fautsw&oAEIGLl`vKaPTOpvOT|v@c>|*FyVErLc;d9)TKPc9PJzs{^)n zykM$KC7Nq!#7AHk%{YEWtCPx>p_gW!rt{ts6I^uZxer-@huxpP+agjKr99?WDVH3X zp1UhJV>pg6zDfEg2RQ3(pKT%Yl&_Orfzb1c(mQOKOmd*O2#Sr!! zk{pZmA{b->Z4kTrdmDyfO$h$9^(H7;<^^%7@8i&D!h@%ZFdU2jiXrw(l1v^2Hm!_H zMBxU3kJhR8N;5tFxC=yZZDHvAkH%CQ#KLsGQ~+b)Xbh^K$NK}l zI~bCOx!$R!5G^99=Bw|juQ4kQ-daJ^2|0N?EK1>3_R>L+cJ5eNV|nLFV??FUssbnN zPw#oKIa4b|&DxFU{XYc>OMadQJl+oATgm@&~ldUmaJEbCpB! zUDpupspi{o$ivJ35H=MTP6g zT6*c!X3lxkJK(H93#3+=C;!K!S3(@0|WrX0V)KU(R~c*bgb79*2C z7m#{0BhqM)b$E4IAN$bD^@1}_9f@KO$XMDM2oJ6_0$Gr;Mq++(4*o3FKsoG$^`^U$ zqkIaQIZlzU%wcoXb5W=v6dnjzGe!?rq$?4x1cUoHP!#Fdq5RZ%2;@X5(t`w8om}f< zcaRZyj=Vd5gnh&Z8r_kOGo~zl)HU&k!u6+Qbfca^6;k9H{4LG#SC3Kt<7?6z&Zo{Jj9it?#G@j7CAKe_M6WZ+}M9<`;e0=;gly^7W#Eo0qiAc>>6Egw< z&W(iQ+*JeR71f}QS&A;=mgA%Gz4Bi$eoLScZT?K%zbFt0J_`)R6d0k4U8+OShI}-k z{KS`h8_GgQ;H=?$l)uO?0>1bLE+uuGPaY%3v0jaRZ-f)y>IrU-eT20W2Q@Kqy^|>w zXQ^+d8r(*7U3#q@;`;RyVl!lZmPo%?SBge6{%^7#5~YdsDPoJRDC>vVBGw35tGEqM zd@w(V7bG45Y*iVNott#{g+NOZj@T5F1kx9#h6MX@7!jKsg}1e6#gfE`I#sQEag!QD zc8h4{ie5sTdbQi{eIXn773&Ah85pf>KTE&Zk$2~!z~nse zAL^i>AeF0$u%O-b|K6=3>4-qVIZK6xsb5Uq-^SLyss~n+x#!+3H4E4#jL>b9Dck89 z^%iN9477eczkDP`dbPfQ@E2yPU~!?-&NY3qwxsrIc+%te&Y*tM1yemb1Z`zV6Ktzr z>gL71D=#MUxLTSUg@)Z5!TwW(=+zGv&bZmagY|3bb96Q>nL5lQS+Wn`-tk2^Mz2Et zi2+_GS*#iQz3-ZQnN#T=A|9;D6zhqS;?kr(kAf+xHD2?bHo?CUQ2IP5Zt*XTShA^s zpU7A^6Kz@AC7QrI*2|B%b%)XyB>nAOus1=DM+ps7{zz7a=1~nq-3@(DJCX9CEuLEZ zByZ&PqNEN=O=6S}Jm&3h$+K8Q&+y%dcFw=EU*PG?A;@au@Gpa?cQ=L07%B2d%xG_V z{ZCO8xGjOTXIPURwNwg&9vPkQ)y5_MFTPjnXE`PH?h6B3$ly`O1YCs6<@-vvOdMNT zB#iget|GssJ0bHeZt>lpNu}yACmkR9fWiWhn-x74iPu|x-XG;N&;}c};tdD{zCX*A zXH8xXt)~o}Oqot%d~>lR^jU8D?xEl0PaMo=%HBNq8{chPM-l}vOW8G78CR3;L(U< zl@~jr32lL^5X_6PDRX~gL{-mbn8AU7xwbjL2=}`&z+S|K( zwsz7R+*#yp4l*WR@jn{MPNVttJ3k*{kiv zl~K^5Urw-@Cf#da<{a7}G?{O7D}lH%4fx zMfMBUT`H*3zqtT1iV%@w1*w7FhTR+$Pc5LYE9D@QLe_L)2P#%*_UdlBp>#c^v6vL1 z+;6GWqdi_}dtG--`l;J+Bs$W5D3U#!UcQ&3SO_DrA$l_$ZXN9MD<+PiKSsyRYj)_7 znA1nTbR)IrdEmIv6?83NCWyE^%;Mt#-~GMo-$9H}o>&^iA7jasfs#wfuV^zp82b(1#UghX!uPY=`Y;x_mz)`jcN47 zZc?4P_HxAgQD9T$i^ueJ%2=~z#$SNywF-9{n{{n33K~Gi59)F> zON9#1gxcFhXuYEx1(Is-UCnxY9|VRDfY@=PVU3`8zC#^<=5|*ydZFo^4k%^^a1Xtq zoKf~gi*6ZE-W^`Q@)vt47OT>#F@xWm=X62 z)ephH{MdwB)v0kM;&O3oXWKbFbOyufzzEp!Wmgk?KDdj4V#(aB{_-HxPF^~PEKYJ9 z*K1lEeY2N1CMwq$HLW+r={Q&&Mt9rRIa|gmhs@1>BZ*>TMp~7+auZx%RMh}}Rv_fHjC7YiMQFa|Ai9LACG@FWJYPpkdt^jA-0L9K&^CXl4 zSr$S%V1BSkep&}-9K&Vj!kO63VS+^PI-YGCc5gvfvYTq)_)j@X!An@F{|^1xMT^+u zyI1oy#xp8nTaIcS;iIu5$zWUjRxu48x0^SSaqJ6se&&DF@~s_eyUTnx=Eb79$yx?Y zsN&FOG{_AEi)+czbXc3ek)u0=V>uA(rU%SfxKYaG|9m|s`nYk#X!>{>`z>0Vh7?d& zoL?fhUE03Hivsd|@Q9)P?_RPNOZgrjVzlo~seI&3i7^pzAdu}?EpC&AWcCNDHg>cm zbb3QJcZs^{M^d1JW^lNbESR~Xg^ZHN*zW=M~QP!H-BRNP&(@m zDXDLC8;9|1jWxgX#iqv4Q83qz7!e`xc5TNOhVn}0iBZql-D%|M;abX|5n{*Lh{uEL zawr2alRe84jEpn zbbG5~*&KnI1JglGquW^1*#ZR=BhqqBg7LoQm-t}W3Y>Epv^RacqtvFa$V)jOZi%YO zf40{~)kXN#Ip4&KJt#-20J$w8N^fqFi8{Got10iOKQo10sfWiCUJB<@%~6f8d-*?U9C?Mv zD~)Oo9s%W3gROekb4{OpPisHR7rF_p_nDP=zhA5+^p4-gQW=@*flT zD%8ai_@_7BCt3%55z}&Zcb!!-P2I)(2BhG7a6-P4di8{k4>Swje|IpwpALR1+HaX> z+<$FDUCdvcat3X%{gqu27s0k)uY(o`Ox?7V7NEgfTQW67tK}0*GU?lJ)H`j#+$?K9rPx|{TTB}vLZ7MlBX%aJ?KO@63 zq!`m56;yI{{pj^NmE5>eeW-!+_Kgr$!{J@CW9}jMqHJgveT=_@9CtD7@I0ygWNmD# zX4G_f+&$)_*!>ZkbPVc7)Z03tU$pagEJg2Ef= ztH+zxM@;pgDqW1ox3f>Ka|$?HDm=C!**WK@0omdTNH563Q$7}=%>;|o6>$n#Uzn<& zu}l&%L$ak`wK;uc#}XiX(Zt2}0-44@-4iY(4l!aKcM0k`AARS9r1qj2Sw(c*<=3fXLrlx2$?Ndo1-q6lL|70UCAE0R$$WR#K z%kfVrGm%zDmol{}LB{`VkR8ZoD6)_&C4cHvc z_fxcp&_LX<#tCBTajwG_x_8EPqFzqCl_DJC8un5K?%Da9wqVb1-3=P#(CLS+rdf72W^0(dGJRCU&OUkx$cb_0y{+?8FbR3uF{7883n)i-@}Iu0 zNteBj2eLk-K?E6Ma^54La#$@lLQspC5l21(7`@yB{Cw*PT@l(<2O=5WwePMAILP2d z{tg95nxyWs`(6U{6R>2zg89CH=7+|#FLZsJ8 zL;yj_5Z+;S4a8uJXecxGJxotwbHgbY-^I@mo<+f^ynSc=aW~|f|M8r{xK&vSoJUuB z(&{CG><|Mi%u5C;`U(qM@3|#8P;|cKx-1_9!(OHU#`j{yT z;XcTbszy`@>&jXdj_v&=TJJKjN~n9{4HRV5iU6j?V&rfA(H+f1g{W;$PYYz8nr6=D zZ!wg2g7=tkQU~AsnL)Jt8brqMvM@`UJmLQJ?d3O{pSy+Nop1?{Bs5U;)V(s2p$cQs zluS-IM@>V5DT~--@W+w9_lV42K!~R-CsLJ8JL8j=;G55t{R^I(^AdH5sJe}8})x6Bv5&?tZL%{XI4F=6N%C(3vO*4!WCTWIO9Y>hR zORJ9S%x5+G1)@RtwvT6>34YWa5f!i`Mn5cdDQ6n*fB?8LY`0iQl{$ipPO|yLhK=s| z`iK&NIFKF_gyCdTAhua*eDm4$dx*q{OetV9yT4^zd6sQ*#6jq$r(zcr*dJY^A}?k! zB2j8t8PjdMM5QItX&BS*);#l7HI+15w>~C3e9u_;PL3WN^I%bcq0*fY3mW^DJd8mH zdE+vng0ODPWRKzb{piN1vwY{ES)XD^cdi>&R6$~pmq;&VU5nZubeXN*#4O|-VtpH* z*bodFazd8m+3+o=nao1rv2K{2ZUhbr`pGm4{GYbn6NE$?h@6eEsK8G9qn@&|R5#Ae z?Q|Y`6J6?@kn0aaO5}o4oJi-Gi*(Zq`o?o}{C@@w{cxl{TA3fyj(cw&_Pktmw3 zO|ry_p0;~D#>cU4_dnc;z#(=jjzo!5@tqR9BgY@d4VUXLO(H(v0Zs-kRzd5eaYCotL3XI;ayE_(SY>oZmk=U6+K*G8+ zcY1Zq6o$N}dx3q{G1+s#;i^a%32+s^d)vO~1p?8~WgBkMZ=oVln2C=hw7^lDzqG=f z+lhLW_z#qO_@{tZ0rZqzTGJ(1(yed#W-p2}fw)NGlHEW}jk%7s#2cZ-_dadzw~W>2 z1fp1*PUiLNARI*OR)&ywlmpYT_tn%rfnR;9d&tyh>5>t+4? zKUO3|o0wtBZf0gxf-H|_?pNQoN{sW3Ph1qo<(04Xknho3h)#noaR}aJxJ>{5lq#-k+ z|Hw|{FU;2BGykcC1lQC9F&~OHoI1*&_-@=8`C4xt!qRe6lwa3N2IHGUQ<0&8UM%io zIow*G)mf#l5QC_-t0TaAA!t|i&>N<$qS7oJ(^4oWuL+FWHyyenWhw~ejy93#pD_E8 z<7lQ~3X;rUc>rrf=i>woORP%5;?o-#C*)GE2Eu`S>EM7^wwejPmyz?|)Y#O^mQji% zzeH;H>C1iD&V~u7$gpV65hd3)sejKDw(*r_QoD6&Nqn?rn7&;Kc1PHhxKmIylhqnW z@_e3y&nLfna(|M?(Ru7or-Cz7Tb!e(Y?C&KDpuv*9ahr{4|?E07$V1x5-@UTKINuy z+bgal==4sF?y6Axv*`o2_n(>+* zlRlx&_r@=&GK)g|*LYxQi3T(bF7tAv+3`cFKLG-)1)3rao5nP~3E@eR`1@hO#{!3c z*lACk`6bWI(75(D$@J=9FYD<_yj)~YlF6e<6XDq{n{&jUL4z}u{!a~gA|M3uM7-%w z4-&=ibtQBJT)FZ^gOW$yCH`my74%&FIPfwjbDmkYqIB&o+LJwX4jgK-l6w-lZGRJn z&G?r?gJ(a8HB?W+;AK1AFuXYff7)ZtN+O%q#wkv`brCP|NC4jRG>bk=Du>(+brAQG zz>N6wWl`%_m7FAe6&l11b^MD>r-jV?^dcTkHhT4Ow~F?4Gf-vwZN|Lnwq>|A8K(E4 z%DjWG>iBm>O}v0@^`x6_B{o_omB5{%{6`k zpqLo~eg;r>)t>io3;0%>Qu@Tv6mz;Q-j=*@C6k^sdQS-Oo@exPW8_J>Fdrw1y{ci>9kTpuF3 zwQ`ca1ilbsV{*60aSwl=0iOcqF!}ti0G;&~_+pxzLMwEyM8LdU(C(n-_3s-e-O_|6 zUL0E!h0xkYWG{Z9>ExFGTb+pUz$!r9y6`*Pol1$ppBl$6ln6$caYQL4zkv)X^M$_` zn-U=rtxy?D|1#?#7-0$0-uVa|^Bv^ZTwr^L^h~H3+&il-WUG+#qDj)6ho91?1vf*&MO5|QM^Q@gQD zY>EUq6;~qnX?#(MRiAK_W|%Egr#}C*c5eJt^o=X) z8A!hh)uzP4Q3W^Kf2gC5CSu7fw()Ei;K3?F)}VC=^MjuektC;}?|+RIOAB#2+uC{o zHe<(cOTI^N@o{%m@-YlKpfC%JNua6ahil$p>$dCwS7pv*7lf)G@60&^h$)d+(AY~~ z`yfrS#G;t4SQcK2pW`J+9SR``Pa+|OtYLFPs6pmskpxK(fj-BbyPBfOo5fT#5Az9q zpnHtLH+I)Sqn#v^#TA{v#IVYn3%)h+EO}*_&Ck#xF+g6Lu10^zTcdeVwd|L&0$hnn zS&7w>uvF)0_oB+Q!2YyS-h3xAFMmu&&p2s&YGI9+hd!Zk=Vnbe%>#t9CitL}klBei zNPK(R#hwkT)ZJIIBA#U+CxoVO*Z~hy8ze&2N0GRl;8lga4^7!IRX?TRJDD1!RF;iJ z&PQ6Ml3W*`QK5ksPuEc})fOnWLTO)k8?%3Om{8r*5RE2r+X(qs5pp5iWsu{Df_I zVq{r?uUK{@n3p}qYvp&N!{!DFMWh3?#nPx8Vd2nL;4^5Nh~eBYxF|0zbDpr!SDIyK z=^}DbWs zc+K^uu9HD4q%U6G+1A9A@Zg%~9m+#)?K*hnq`=kn&l}qC88QdUr=={3LQxLYyHpap ztK`kAs@4cxS8iRW)HyX4y1XL|ag$qEXeu8&}Z= z9Q^?WJ_*y|e5_TRQ21|_acPP5D-@4|3R(Q4K;ZZ#GToB@W<&6@>+!5rX%0rW-6Ie` zeNdv}Ra83vj`u7J0UXj9Z4ibM%no`#4RxgQ&xa_O{BIuX|?0ZDXlU_RcmgX;r-^ayAlz>GW zDqO1f*I!;{QcEFEpH?oW-CLE~d=x&Y2bmG2MFx#=#I$%~trtCTYoH|75}qhp7G)?y zU4ksp+D0-Kr(YU@7|t;^s7t5dEuV4>(fc@j)pTzBbjuadQtemm z%^shY&<|;EG|`Vc?t~>Cc}1q**lp)=8b}Z~qJ7Awenq_e3dEBr4%(V>!SANmxciVjby$Z#2V zD-+lN7Qnx^;nDgm z`+T02I&U2Oh)D9%A{{J-ZI~=$dg7>Ln4a^WB84A3q50`efA6_p@UrI9801X?W_leF zwmzIvZ)GP^NR;HZ)!NI5q!$8@K$>J0&I!@g2R80h75}Y@zWiJZ-RQG9ezP+aGx7$M zi7A{^rLYYHC*@(@L?o*i=5(EawKLTx1RCJ@EQ_FqjnkC{TBiOj3qMH%8R)ok8+vcu zAi8=|HspcAblhzr8PHMs3+*@t$mPtxBuMkcHI5SSfM#e?onPZ&(Sgo?jGlT^%Y1%Vj2e{tB$@Lf#qdFbZZ}pBbRzDWx*bX%M;?rd zMHwj}$<%#pG}>YnLHBEb9+8T9CT0=F#IpGn9X>-ir;rjiufX@3du+$@w>4ZiR^N=e zse;JzbKf*Jb0u3wg>x~QaS-N4YCF$k^!CZ1?IcP@sY~1X;p1*t?F+he2ImhTtx5hs zMxuhl$_xD<7M_HJXz(L(UJrqKgw{$yLY!(&_@(en`X{9duDC|<&fd`r_L|t@qHD4V zj?|~%;6?nkz}}I*fU5jo;?r{YeCuKRX_oE#Ii7k228$ej*y-iMDM)%dtHQZ%acn~w z3r$wPH}wTaxf9nV`qGGDNs{8mHlS7^TECvMOSM64KgUe4|GJ^ijbyLq;6g;^^Yda; zknYAg^^mb$V5{Xil)?aS-Ct=@FN*5YBiE@TZ)pO!urH%6pu=#;Kis@tA*hl%4q#T+ ze`5Rs61F&33ck=m;#G#|(GXvrSYPNpg}G!tk`To!Vtvjj?5l@V@WNu^%qTGOc0a|4 zj$R>pin8FEzE_zXGESeI!bgf@zv1RiLCy$g$KOm~6`!@x(QhHZ!PTiJ%Dd}e`^eXlbickAGJE=2VS@U8f0Cm&a=}1x zqxsa7peRi)w(a91GL!pUX%lQpJHaNsdL0J@N-l*`i(X2%J_F7DL$a z$x|C&@m(){$d&(E-BC0G{dD7f|LqSmr)~s;@6#?3jb#Xm#t_?KHqN;SBRP%Iu+L`p z&vo%96XjNrHNyQOx0 ze#8daizN#w_Iz}DBLXq0Hy-`^TqrMrJrdi05w;!#U)OaAybtjdfvJ|~|4t_fcwsRa zrT0{lzU()`29qg$D<+uE3%6jilc`O-2JV*SrY@nnnz{0pz|6dZhBT6M>o?U^#3D7* zwD;6JN@n+Z?+;5*CAQX4>mbK8_&vqVC21*l!C*Us_&2>H+T=C`2u*_6>K_dZRk4@r z3`zvrSgjZcuD2}q4wjhoN3qlROD7f{ct9r~N05?3k6 zn`xBUV&CtnBOj)-Djc8fRRi4A@gXXrOS{YpWJ%%3Vty?x5qcRPeXZm5+e5jr10^Mh zT5ctqH6L_n zNjiQu=NueBOZGk9cu1kWjle3IThccf$lu0HOhgvtT9U6~Ii$G|`Ex9(B`rs%;`6$G zIhPBqw4%V%5}7%ubbL)kOv$kbFrtz_!!Ayv?}zf#KSq}pI{hQ9bgtm zj8!P(98YbP+0R+Ko^mAQ(6gEML>Zd@`vhC8fZkPVAdVF8K#pisG^m)3TeZSmy3B(p zl;-Zkw1J1CY_SC6XtMPBw=eR7ILTb9h2ifCePAe-eFk5h6`Qahl zD}$ODr%>TvCMlM+PET`ZVYOA#;F>MJDsA37y%q3@Mo!aF`b4L9lkpAU$>?i+@DPG4<~vY#UyN26mrqtaTW-;`bGTq@chZXDIvJM%B-3Jwi7RY z&Fp(ZEV;VI1;x$3cADy_^h{>%h(F&TM&|P|_A#L?4$@^170i&+%t%DYn&@YC z1a}epj{w0n6ZSXZQigM~0=otq@|Vav+;NDsYc3;(gtes^*ZWKO9|4N&z$FlzK(Gv3 z!o8!S9Lr5YegI!ALjIIKhoSrS>g>E{a(Wt>MP!{j)xmetOFZ8&HmRk%rQr-_CLUUZ z&sa-T{2Qc#OsXo<%qhuZ0rNBcJVao|agv%lM);5_Y0qq5{CRNJ!v6m~@g@!qov@6T z6lkEvZ2fKC9h@p3qV6U_GjEAC784t&s={q!ckyz}{#jz0{P0>zMY{K#I$cp6FC=<% za((m;2y~Ii;L&e0)&J++4pm;A59V&z1Mln4_XDc$fvY%P4=p+9G@<8)A9QkE=n&AS zzj*M34~#@A?0p%#r;7grDncF|{p2&>l zo{h5)5jxbS>G!*d@J5(FWKizt>>&L@S|OSi<>z-_pb)olx8TPZOQlopLvmHDh`GnV zkLBjl@?T$^KAjGRug8B9aoeT9Y8mg~cA#jaDGnv;RhBB+V~)rql72O9CD8ZBmV4Mb zR)5*sQ+R)GX-H@XwyqF0mgw~Buw^EV-CLy+#8&M)VYXL4Vz#1uMqX`vxc-KYGaP?O zM#TGm;(fL(S=3~s7N1oWd=T#rCK-ndr}%`*R>0{*{Evbk9_lf`_G#G#T!_}*~NO8pv!aFmz#MAAem=OXZap+m%`(rdx9#{hcQe0lIV&V2O z%bisfcTru7=@V1$_59}D@y+U_GuizGpF3Qj{^HI%%7+RD)JsT_nxZVCEv+wk6d~xY{uG<@86_&!Tyh-Xi$QSe@ZH)s-0_oc>QYf%5Mzk8% zmrGT;uz9t*p@ZN4UY7$kU~L_^BEi%#3}rpz#R_d!lza{h%i$nJKp#;;Upr1|bgD*6 zHyg>LPk{kb(ju852a5h&E+$7Q0yedNcTXBznh~T36oQT-@joE2e`CYZ_8@XfDH@p= zeo&rJ{=*g+WmFz5Q*nGn`oKNIgcF{b)DnogV^Fo(mN)qB`mv=bwcKsyFix_;s>*0q z8?K|sdEj;NG~7|8?U`p*11pLwg$NnZClstn|89J*N+CaEXwt=NJ<<^scH#hTlKZr^ z?H{o1c}FN_8k0mj4DhOnr$ZXUZ-nYCH^ow3HFF-Ct71jYxwrD<&v~iG0fTV(ktxmH zQR%8(O4#V!C+xWQ`?zc5JOWm1qYj@Kz#p1Fu;t`6P}m4I=xv#ncbXjCh-Z%|?{g8M=p4Cs$ zzF*{$CdW+Xec7UN{k?anrE!&nt@B$pkTM}gyIK>QQ@#E1F?}_vzf+E8uT)g}aNf}W zrSHRttbD$ksTQa91p_NI?P<_&U+AiyScBdHNIFP3N%3w!j7`d7z4JcZzxO^Nqm51W zPPg`-Na@Ql$DGgZb|}Gt_6I?&&u7-u)xThul^Q&!k)Qh7xY7f;G)G*K?;tFt_87=N z=nt3DkRUZ~g+jjn8}&G1EX9*;`G#FT>t~VTk7S`t4x#kh^UQ+fUf42TNUhqUT$b`~ zz+`CEw8Tm%&E6e1K7ve6adQ9T;ivF1k;MYHw>Rc-s>Ld;<7Z0_?uD)`sV3sfvyk-S z2rxvf?lew~0PqC$7sT?!z-_&^?QISvF=z48XPOz?wZ@;6w?iR9@NOI2yD15iQemQeq^cV+UT-QtFfl5a zTCV$GRIA_dkDxq!FAgg+mq-4ic4p8D17|K5M@I#=^UcR2LZHj8QSNrxF??qEb4F*O zHvah>vzpfC?iK0TWXWLFK6orzTPzP&Bt1&DBulz<04iBn;3>uq5!?QO_wg|_HO3OK#VW{;p972t<-To zw@CYZeul)Dt%ej=pOMrW3H{Z*GWoX$MLQX@bU#4V9;cCCl|OkeR2)qINJ{=}J?wh6 z%ugb?6e34|VXui%;4Mz-tw$g$}&78)$V;<>J#=_E)cW1J2XrEkbLqMxc|~%{?Vf zG6Wf-%Bhd+pH-aoTQ@6a*w_9G*V=!#etbVU#`TlyeP@b?RDU$k@RxP+$?<=T*Y?*Mlgb@}&esm<&cQ!su^g1; zS=d@{p!JVtnCZ4+!wDw^0~EjKgEC3^fL|J6@rM7!EDnN>BUQ~qu2ilfw4P$eM^mT+ zf(3dRX@n<$NUOX973C7+^7QDmWA@=1F zv@?J>P@N$_(4?G0nPUX%ImEZm83TGr17+?)$#;M#WKdB$MY93wEHJ+6&`~VoKXOgM z1ImwBHFanL?E=L!s^Ot@PbksU73JO)+8JOTQGh~daOGIm2#5GUJ+5d?O6GtT;6SGZ zg{ud^RZ2n)gJy$DY4Tr(%Af+zXWg{^qaN-ytQeF7?E=Ne7XyJG^U!oGzL3AI(9Qt! zhyYZI7&wp23Fiu(IKFQy^ne=XqClqwA>$V#APV$4Wr9O|vBkd*X+THiaKcM}Hv}l} z1hK0ISkbUR@x3GH^!$JI2S(-pss1s*Eq;Gry|s-0o`ctc5GkWS_2M&Ol+jAa6i(`g zGmYMMluFZ4Ma!t#DTby zM#G73txi{4N?kKFOU;f5-?qxJjzYDmhA@zTS+hv;il~@SA)^RfsZ;6OBmA$r1*)mv z-2NT25*e(>|IyVFX9VEb@PXsGgL{Uk^ZZ*wA143Y{k^Xk@MkWHz~US~zNhEAE~PLE zcM-_Gk#vKd07xhZ=xl#%cSvp0j3BpQrrX&%b5`ribH9-OB%sfMULxI`kV9CPoEd0$ zj1W{oQY)maf+z>pj} z1KeP49@IhSo#pO3AfQ15?Ou~(_J6&J7I_QxSqS(q!zdgd0K?#{!MJh(JkJvJu?9Pi z5TXAC)`r^07W87$*V8BqGXyTE?U4qhZrA{AGeQj--)-9F1Kc;Lhcmvonf{LpGZF;a z(8c=jT>n2ANQ6Mmjl$G6!`UoRUC3G}QG# zjhc!7!1Z-tFH}^=rH7&aa%0eeD1#YfB@RS_E?7u)D%nNA%rrvD(mKI2$ZT+Lp_L}C z>97Ls4QPB7+T!Wc#c{NV0<>DZzu-y&lKVm?~dTWDra_WD1UEqNmV1cxg z?x3JmeuVz7?|Hne63KgUDjmnUDNzW4rnJa93}-!Xyc(X<0J7G>S+q0%a7`YarGpMtpJ>> zTmt~DW~vVD#xny&0wGJ|~hBSP(1~x{h z6?L~wsth{#s@76kW9yINc{SXh7XBc(o=feG`wn&d)MDQp>6nXOP{#Ytv{1@P1po(njS?0%v=I~lth8DbVdX-+oh0urUzU2m(4?6w{sw zzs1Gn_4Jn6z5nyhow))@u_g^Ux7GB!Mr)+|Kg05_&%r=25VlIs0rx+Q=YPhMfixn} zaR8D+e?U(<(|P=&!@*poZ79iEZtH?})4rojdD?ZRU%d{#70>)cHJRO#M4zRA$C7J1 zsWnW=VH_Z$_vfMRpnpXd>(7cZ033Ksjp>7G13T z)&4jJsM4r1y2uWEMDNsS(joOm6h@Imm@CHfE~dfbqL;(5tLG~Q`(P-W<~zge zfoMPWj+b@*uauZez8ejL>&QjsG#p2KU$AGeoLnE~uo^dfSA@3_2Lr{k$!kr!3wr*DQ7~9+y8;{L!{9pC|(+q=D!(S*An4 z)~S52jx)dgAV5U$I8uQOFPIK){x6K0d z7a3L|?l&{AkG31ES)S_zR@J_kRcxwO>#NHLIg8J19UqQ|!`=AK63b_QHe-3tipMGv2Dn6|q`bmLd{z!$A z-JfY8@@1sClBup374b`$zE42}`=ZGAw?s`9V8Y{VF1q9wf3zDxhLqFK;>E3H ztoztWwC+SY6v87@l6%iL91KwU=hJHkz`xUN;TzX&r4hVe*tz~$GW<1E8UE5Wkn?k) z0>Su#UlL)de|2oBVbg7?!=jsOwjA&!2}2ZwWrc9~8n9m1SItUVjBv@(Lr6OiRY|wW z40a_$^qC+}bhRdywt{uvGPh%koICgb{wgVHFnDoXsNk^L8=Gr$PMfRM=TBL;(k={^ zv*9M0us)2j)NOTIE1O>#!HN&E8ho2r)I4&kq;>gHiSw}Y>8%iqg!Eyb;n ze2@ez%ohJ;Gj zZx525JA#~+zx8X@c<0Qg)m7Guti)p~=*??Ncd#>ChvZg*n8;7*E4qivMNlA$26lZc zlx=YRlKtR%$27aO=Cv8Hxwe~mRbQk2LtBUU?Fji-Wjork<@kgr+n>#OgX>0r!fW6R z^Wdm0AJHWFVki<*(=0OB+1_!|FM@tDBe|V2Xvr)+A8_#X)9)%XVKnUS?|O6<^z6ri zsr%m7TuDXOOC7dpruwn~*O7KtwMt_;uLhiN2jrFHYQLLhwca*loc-zKw4YghteY>z zd+S|Ea_j9Vv8#~5nbB&OZ>QG)RTvY$!5xD3gmhRTov~1-}C2F`&HLX z@vuv~pagadVvTjrjL2ot%2}tkz+c)*w$-g?*=TKyt-T#5(GCjG#RvpHF);;Tcw5zW z+K;Rnlh&iL&{hxOemR8H{N%A&E2-j4O=$p0p+K0*W(e9S#p2M=&}=8O1zU#&9e+-s z!j@HX7dKkZUoPIzK7ahXnOp%K(cxJ)$fwqlnxV@px1|RSFMqIoR3Es(jyN?t-K@?1 zWWjr|-o<}<>V2nMw7%!npV__UKKRmAG+<_&jtr;0Ct2P#oV2uXx!;iZ8g*pOn|D}}(VTWpj`eUW6z8%Y8Q)&-=m$$%f3l}ITBy`w>==*T z{~MZH*^jC+Mn1{Q^LbKXqjqJ_egd4Ld-YDxA`Rn=&|yu7qmHK(QkF&gQliI6g_Ugg z5U{~^{U`ZY?9o&sUAGPoanGwU9DG|#u-Hcj%&-lo`l%cGO{mN5@p5W<`e8=`4U4`) zI!Ep8$%<|iAC`^xRV&HY->lq78$N>1jQU#X zTV;M9k9;EJUyJfbqckiGt`S9*F)&tYNX!rAa^C8n$p9%>FZ@{KHw&3-^6M8+C)W;x}tne21&MLg+Nn8?T^gOv?pDYtoojIz2SODxeXN#Hak& z(;Qgqm+bVnUa3)uMrQfRWnY%hFMu3NAd{R8o<9oJiQa6KE#~U)oGcpGEfLnqWX258 zA0$D>qB!R@wJ`gg)K512$K7L`CtF8OGiF*qrn!LY`q$O~x{*f7^(dL(elVlMlFQAx z`1o9*u0vm{Y~8zGDf{H(nq*_;+Jmu?5hsa?aypDRpS1FyviQ-j2W;YA1aJ1*YxLhm zju8;(yP3C&a|o^hhqS7JIx+V-Pc4bD_owQYEJt63OY_Snsh8YL9oQ$(jWH`QaOG6G zYLUL<9ffUg9r5PqzTu?|fi)^L_0GgwpoPWpOxLx08oQ5Hi{slH_rWZ*mWwI48S{~; z+g&O(Os=9oRG2Zx&__l9$9N~ z7U&mU@kC`B^guRsGYHw@gP%Dw6Mo>g?&r0GaI7Cn>D?6)*txIX<%S74+KN70h$he< zijx2jcEdPVRE*cfrYsUKXctu_w6DIyGRs?Ma6IA;zuo836z)mqwDv_syJ~(SbJlDH z-tio~=g+kX#Ucgy#0<8AxIIo#4D+OQEV!uXa#PqdB0;8T;Zx(+d^wPjwb66hE(3^ z?k`OxJe~Z#*wn&}|}Zo9W<@taYPCwsq<%=^O0@}6&iuPMA~ z0Ap_d&(&jpbwlmBsD%U~um&W+cszfXw>@mD=Cid~F6)Tt`ismCbA{&vd^MwQ&^jy`;fdb=YjM9%gj z?mN^<$bAf>Rd^KPm>UC(NbZ5}{?9kCJy(c5U&w7Mw7E7ktF%qalhO|N{F{PXH6iws z>%q1Gs*JS)NaJg?qNE>wub(}wx)Gf?P1V>PA$1Tl;O~^#obb8e*dChcx86D#X+ac6 zV;)6&!Z^t9ijo`U4l|ErvtVd#c}0G>k>io;xM@w65AixytnT>p9l?*vY|x?Lo-wfn zENJ&>Lnd{1R?EobGAP@^Oe*MRKo}1m5>-;x@v3O5t{@V|V#9`?o081TOn{ z?jy_hyM*KH3^LQNdKLV=+*@0^eBC<52ooC2ge#GSS2y4F=5KQQnV;%1_T*v2lT|;! znnnx27FO!23h<^~Ei}D&5-x&L0_=QW+$Tl@;UCFQjal_OoFWzxKJ-O7sH?+Q=43z2 zj!t64hr43_RG8idf=gg`QcaX{FSE`srSfy5Iw1sjzooPdgWo@MF_=3#p#5Zp>+xZ= zWDxdXNw|k3;Ad63|D*wY2i6zzo}FJ_Oc;>&8gW0P-nXsC`9DESSHNb$x~-&~k%A3K zoLYP6-ONwuA)fFtJQFuIi7b-FIC|y%aI>#q$Q|RImko|^m_Mw*YQHz&(cSj}vrJQ3 zkKAxdbdd179h?TFIj_r6Wt+gk;icDnIx=;={8D*?9~SSO41=(0y#}toQDKmbO0^@y zbXs#h@xuwteY2u__MEpjHGU+a-(*D^Pz8i(o=&zo&-*%Bum)A~d#`!nu+vM0oT7-v zE}Rotx>2>OscRObF>bQo!~amtEAmmYhAKoQ~pBHe!G(|zAtf< zxU{0vcu?|m_2BNdL)STT$52-A z1uUJ#&%6TUu>p(es}(-FY!2UcP4^B3?+wZ;3smqOc$YmF>J32{LR}jNjwPEP)dQj7 z23lr_TsquAzAi!Z2ZwPT-8SDgeO}8NNMl1ox|b5(s)NSNT<@A@Ei~Mqt^694%)u_? zF(|5H@0N-|#y2yd@0Hc}`k@gqUtlV=kaWE<=xqT597TZZn)mHh#^O+a>=zWy72WD* zwGY(&NIBdPW9|Ja*-lcC(U<3&uBRAb?#^x;BQ)-ZRb}|Z-?WI`R%=!D@Pzn$!*F(E zSa0^;+^=OP~dR*_$IvR(ixzSWyHOjSw&D)`)xbbyty zB}_TLN`$h*>3l;R`CbH98D80OdA@0>KW*dp2Q-bv=>h6O#q{xKk93h`G0~Fws#|fD zAq)Z-M1t2fRX^07IuJQ<3w3N+ex))_HF@o-`k#{Ykieit=Ko$ASSZq`m&ZQ0o~?@N zUUC#S)>>bxv+C$Lzs?ax{-)rQ!!RsYAh1171QUIQa2>5=x&5L32fj^a(pa>5;h_4R zbCcTs>q7i^EbS5AW16X-GHb2FsjN?*tg>dwSM|S++hOLzc)ZG;b*jwYmEZe(IfIDl z6j0DjI_KKsWj*swao;T*czlQ|U_oKuCJyT9#RLUeb zgqq^H&Z;>*ok+EA(y{kw-?Zx-V?}mB;&L*Ai}R6QG=rm!HM%2&bFLAB4-6XD);)(F zPe-Ckl-a`WMU~0v7?O1RsLczSi-fsJ26p89?nDhMCpLe#vFN>@R8T-mTx( zN)Dhq{$GdX`Zsxzgwc&@+ z*JHLS&9FNFwQxz>kWpbs{L&Hc7-@xzU!2l$sI;pwfN2?Hh1r4Gkwh1oPXEsOCqCa< zZE?T*9yBo*xqVi&$g+?LKNG&#HIz0I5yfV_tDO@nY z@+4qjlX&yTtke_iV*>7vJBjj89FHjV?&G@|)GKM)xC8gFj7({D&)R-vndeONHg zXeo*O{12glZt+A5pT5P&(b(S2itP#=B8}~6!|wr%N{to5hb%)XchOGdH^5tD>t0xg zUt3eD1gg-gfH;?_`Im;J`=bkGjr`fyRWYV1&Gu@E5wIocYGG3OYVRGF#&1)NOLbtu z#R+hH^u=mPG~cwSKgKIm1eub2FbzrO-`0oi!}w@aA~1!dr`u{;rQmmD7^Gx#GO$8X z16!Btv`vNzfw2fkEPj60)k4#EoSqM|Ga;inM^6tu?s{H+YCU*Qm}^}%>y)~(27?hm zZ-hd~O-1wpxw>O54|bmy7r4hKwh8~L z4~!H5V+q@)%^28A(>dlIXMyNGX zu46xE6TfXA3Gz7;uE^gj2mj*$bWXy-NqWC)3Gog@$CmR%8{3NGb~5kWA!eJ(FJa=- z*9*)_E2-m0jak8b?)tt$C9W`d#0RBd73*SO?>pI++3^XD^Zu5;K|Tq%cosk@HC7(x znf1n(8a&&}qe`sA%E6eVfl9j=$yQh|dxMxN%dGfF@)Y~5a=tJvrQ$7MeOJWn^Serf zSmk+*G_?2}*!R4A*_?d-OOzS?Z?2 z%1MKVA2O~NqB<49#rC*k@-EsamX~(&u1!)#IDf+JyvCZD?SB52CtHAt^nW>`Y}w_N z0vmh5&P}!SVh;1>6KWay7yDyTW!qw!MAlfF5punr z3jKM`#br??Bx|*~ndGo#%Q(qt%RbJT@YYR%tyg;Nn{W85Vr>}8_uF|X<@EVRQ&Eiy zWuH)fo$#LvAotE?b;guz@(^0XQz2`ADMk8XB55X^QTThc)hd`7CaEBk)P1;d0r9bA z!372alP@COl#iwZr@~7w83bAC=iShoWobmRZ#hC%Wxt*6JS2O-*IRrnF2id~FFt9Q zGSN=T#F{`ST);TeY~f2jnV9y88TvUaAoPyD!#atsvxNKcvwfD-S_I>23T?4M#g_{{=MF}%#4t6BcUQ`t)ojzMc9$L+ZPgd|7T(`FyQWk8 z+`g5s{S5No$RDhh8Fl7qYHR)p;jVPpcaL}!aJ^pAyfE4reB^A1rEoy(+Ycis`3qf6 zFDH9`2*4kM_}z~i=0_KF&2_*+$Cbu*{Ec;ra5kkGvn={OD4!2_7h1*TkJ4RskV7q; z)_P5ZJAr>b`E%&bC%`ZLWZ7h9v0`Dfc%O0HNj~S##U*vtb41xgr5I*{5x1G4k*g}Y zqQ7w;QLX~RIVpG_&g{_~*eThOu56q~(I=&okc;t+OfYA_z^BswZ@cc^;{|O39XnfoRgRetpgBbKURvNxwk5qTCfhAd}mo^?A&(L@vn6 zd0BeU&?~sMOKQ3n)1oF9&NcP@!<_dS&WK~7_07#IuX)tB;C!f?5A>Zz6qj$ zs&aMJlHK#0S!ufwe%8^6huYx03qA)e1Mje0+vM^c7`r*Rlug8-OoogAJ_dZ)uCEuU zm^Oxg_H#P4UI*MoqHf@}%i;3yA<)3~mVmX(&pNIW9r$ILN_Ed3@(#0d?FcAgAtZ;* zDzW-M`A%2+CXpRc3dufT>9QTl&J~ZL=7zFimEXuu{_W4B`DNK1fAy4(p`N6jQ5ymW z&ucESJyffO@(|;;OW3Gg=qza{r2e&}_w@0ZwZeTPPiL{wbs}Hy2lq+G;@V!jHsNe7 zk>NHKx+U^YF`U$-Bx)J{4Flf`)WFjEilo?0w0V5TCq&B(!L{_GC2!P7r%ITn&uR>t z%6*Fd{-Dal!&jWY6q@`eHLNb+tj0W_ASt)k+g(>SwM7eCZ?)ErJKn$^FhFi88NR-y z8|)@25}Nz6G<1Qw4Tn4>}@UEAx4|^WR15 zHMx2FstWdun3`ZE^2C$NYLjq<_WMGOfx;1$2O+uZ&~3%8#p6v5sN=0B!>Fa-CdEpv zca8%{O%MKvWKsFBNlNo6vwq}wpYZrd!XHC)C8El*xuXV-z%UTILBnUfPmrQYIg zh9h~L+HEGT(^{ESXXkq8Mha<+digrvxxOE^Jys=B7zA$XdQ@{r&1USMopU$*`VAc| z1s$QNnOKq~1hEPTTW^K!Qyt)LF^G3DMq0F+jirc#6OT7k5%Oqg$ggI~?Z1A*pb8ZU z5Qo`vSa*IkbxtY#n6LW*mVasTS@1?7gZojuDH?;+GXW!|5i5f%9wQf#0bT%9<9&9L zw)%z3fKbtXO4l;y`WP;|gPJozcXzJIW@>7>DiXb*tRzA?U171)!1MX*=L?87>rI@P zewf$k2TSi20$!5={kH_;cAdV-T-tsC;tU-lq+>UY{!i8L#4v9)wE|u}P1eNpDnVdi zC|YDCMAgqedb|QR_aC})KjvFu>MOUy>MI|?Ccdujx-L)!V8SbmBImm9FGDFo&vAyE z!myU&SrR#o8K)lIQn4*#lW)re_5uX|;EDD$8Tv2enHE=Y7@DiK(JRj?pk}f@wp&|4R4W2$NS*UNSn8b>ICVag$z0>q3=5*%i0m;dm4o z$vD9)xCU5%9n{lx#})TA1Cso8mq!lw5D_9q(;U5{#!yCB{PKd}!V%vW?F^VAH^S!nG zwM~3hN(9l=BEvO;t%)Kl5msRn0xH+%4GqGt5fJ%5?0schUBS*MTC7OX;_gt|QZ`cD z-QC@t0>$0k-J!U{#=W?^dvSM%yEyNCo_qeoeZOsntV|}!N+vS1xKMB{RG4aMn@`n_ zj?Z#*xVPq^=p>KZy$elV@1jtS*#01}u52UnW}8-ls+)3C^G<2ls?%#T5ik*W@pa>f zECqsebo1&?;hEr}$g1}3wV;>52hZ9b+pC^jb_~l#xCI}WIA4nB$)1ou?>_YdM{U0} zYP)!-Q?;BAk8|(xAr&#%G`~Qq^nF6S?4FKld_>KbAa1VSu2f#KCs!$whtsCU9?ddR z!!_e(GWTywnUAmhE?aCRht^$V<#H=$UxFo3u3Rh3Vxia&I9uYm?|%6G*FuYmIY?I{ zM$vM%G{dU8CLG0dT@VufhOwQGkZ@XMaXZ9K5=eP#GWIYAU=@|Ud%+qyFXJ=ksCdg1 zMDETn%|-$xEY{ctqZp~p_T^3)0w=1!B`3%9J(3$F&S>+>n^U=Qx{V9F_JJEMEj5od`&x=%+kLHVyr0;;Hr73y{#I zUxN`9IZU?$KSy^WKN!-cu1ynLf3Ggqs?1c(H@2^REPGTL#me5=eM$W3r8~ds-a$MN z0*=(j&97W6hhn&%MEo$vOENF=mif;%1pn@nycDuxk2^oQuaA`dlm^oRK>#v?(px1l z5|O6;4r6n11=R%T3~3_Jd@u6J>lj;X#vlgT5mk0?a{X;4L8k2q)$uok>Vs^yQDWJz zS{~2!X)@r7U16217<4t)^Twq7s@y`Hp~kc4{SHkb9B*(G401KTHa(kD7^aMl3B-gq zsn^pq6M?;LrmSs3Jt5KXw@-o>`)5EDofAm!^aW={K&qcjE*sg+q+H$cavS(T~9$O9)$*t^7NE}AKvLSj%94TeA()7f%{S3`3Hu+Aim{a zD!=u_3UmmgSw7R0d+VIZxQ@P-IS4mGZKA1$R)53jfvdPz~dZ< zeRCl7LGa-bwlA2x6zdgJnWU_L*E(Gnz5`?jt+p$!`%J&as+jg-zUtW0*RneQ*$x}`H~>K)NMZhukA+7)uw%X z%E960^_jq8>0DMK4-K608df0g4RVcwoQ7(_XPp~$-1aKQyL86VEMZDB7V-gEJNfX; zul%aiSn;Lw7Po9WPG*K((snsL1MRoUK}o3mp3*gZ`|7G8<@uyjVcmUtJHZ5D#VU~@ zznUmM=ns5>0TubQ<88vGtUPsczFQdko2(AxF_GMOwPR_L2K|jiv*I$ubKA?K!)=IV z>cXgZOwpleDnn^hY7h2J7k>dP&3%{GtMb#)ma?F09i%e^#R6Nkxn~9+wDaSymy=DM zX1t=XOD(DZB{gX8CPmQSj^|Ai>l~OrYYej_B|?26P*&b5m@=x|2T52Tc{NfM zRrAZaDFc-FkDMwh@aXmhI_h+FKfRhh8lEA6Bs`!iJ8qUsMzt!U#aF?_l0x{|$kCUZ zbWC@_`m;fL52_pK)Z_RSS zgL)`%AQi9R21k>)i`%!dEDKq^a-8cXcv;egLHAQZq8}Jn;aRoflVNXaxc!gs*S<~q z1u(~4W%c}TU-52grK^-KZ8|%+#FuuEnH-^iLn@}w^BDBN_T~IDbffty7cb`^?2?VV z5rTYQ>Gj-n=O%E2Fi_yEalj{q@U!dB2ovM+-hv7AgzyqIBD3Xa5iEIv{&n?oe9f4) z$EKZ1P9#nv(N-Hi+yi@8_oyTP;YqfG$OFeldIC`sloN~*gh2HhRJ|Pi+)Vw$Yre{H z25XkeTZiNgZ`md>g{}^z6n4GdL|%-?xq8s!&C&DaDs>T?i2|5$HQm4h%X&h%IA;|- zm$Af{rId67n|-CbweYXK5zA_N?X_rAdwZpFHcmB&C0 z;L5X4fU(yQQe>kKJ* zud6Cj%0!(IpolJbIe?TNu)4&TMk#&0fi+B5#Fly zQlbani7JmR2NC1@`6Dm>5Kh@ICax#epLfqlv$coj4MBE(1Rr^qF>m@(8FyyL_$uJM zE~*5+E7Gl;LoZ>Er4``c^}X7tB9ZUmTEdB51Vg~FPxC1*0N$%-HtH`?CRBiqKA$( z=14=?$6UyiNDze3yNhU>^z0FybeeDQ+Ip$@{S3SZGaV_qSaT8#^L#p{$nSG&R1E6y4|{xybjhrgy6dx!~BaLXEV=;;B6h#ly=~mK4SFsDl!_ zA11P1Wi9qXPzPiamAI~qo9$8DEY&QcJ*EXBq7l3q4oX>9Up>~TZ~lIfl$r+m9{#wZ z?9xys9Qo%@=m&g(z(5@}h=cBJuhEW_=jqN#MFO(QeUBz>oEQ6*S~tB*{0KPtFi#(b z9#H4zAnG6sY&!0E2`U-$)BsUZGh?)7-_M7&zr9k^Y^Tqs)IBJuO)=B>n zWOf-tQG_|#LZE)k(NW^hjx$}1{b3ePmA03A@y~FeYjqY2Hy+ST)d<3QI3Zy#=|;Kq zNi)iduS?=x0*Sv_>9l1CU|BX6qa&rPB=4}5D~%mXY28e}Qs~Da-ejM|JN}SRPHm!9 ztMFYe+`F^HwMwECo26k>X~RrM%Jll)9+-V%sK~Nntm+e5{+`DJubEt~!E@6vESLfH zZB=3OKX-Z-tK41aZjRh>!{|hAlVK(Ran-+B;N@s-b79G z;)U`m`vhT6c9L3{>YL^T3c}B^F^Hk6W_6!QhrszmB*JFThPZYha3IT2Q>&{ib z@+xqA(IY{|?&n~J5uP~n8t|M3Dr7#fdDcmR>9T=>QE@&KA%tsTyd?orNUnd#547aJ zQ|c=77#p$8?t~M<>%tI=apDNV$Qo6D1H5lcT=FzVH_}J z$I4NZPra^I6Qw*~s9v65c05;`RaP+na2JEvGxZeq>=JeR({4~%gJJ@#AYLyQVh3tB z@%2pcVqfUgOB=)RSmj<#HbTnxUrwx_UJ{38{UhSbqM&Z(ryj&-_|H?i?+&OC<0>IH z;3{Y9zwcN=%kNTJ8%{K)taf5q#KW1+*VEeIZmOt84R$^jyHF)ja^8x+N6vIP`-Pof z4tl{7fAbGs0BX+Ce1jRJ=x2R39*CG_@(<8j~&GHnd;vU3vGB z0&b2yXvK_6d9Xu*xL`l*%LO*WHpbKIItfOBx?tk<yeF3$py4kRog2W1JI zsz3u1)RL&I6%`H3DLU^mattcZ?w#rfA9-(rW5>2OlT}KmoLJH*5CpZV$U&G;%`V{` z_r6C9V4z{F{J&{BGiRSXGyRdrHiZtn{>hge(Qfu*W_)xIo2E$_?ZU*UZ*P?1+*1%9mZh8G2V>Yr82^!;PnkD#nl8<^dcy9)qhRD-8D5h$zwi#7xS@Et> z#!tL|fCj58OhsOeY||Dw543|)kqwapmS4Q*`|D6rG)wyW_&;HCD^wE>nm|%wHDxkW zK4K5yVKygpjJpCItn$p&A3L6(*R>vmy}ma@dQu8${Hb_&s4$)Ws~O>xS=Q-m(y=FBop8L-lcO@I!MCQipq zV84p25%&QNjU&?{pNL7h?=?4LkXAH1 zx2KHQY)?&!JT^3wH7%u!6^*-2jlqF$K{oET+G4hXUA{`_Xs&*WYJ3}ii&2)emGlpT ze7~p7V_Cab(|;ws;$ zKO!0_*^KK?xai59obdXpdEU^S3@k@^L{{ELw{ots_}v2D(+l5Q5c^^d{e;ra(X@%% z5lo*{&oN5B=%&rAwX{EBG^o!j5Z6!n9r-A#>f+jlw}+*Bc?wVVu}Hx~A99-KuQ!N4 zuNs&&kIA{la-@}6q@v>WAu7j3i~sJe=EZ+mL=3xIq`#84R8sfp{ZRze`pk{VIeyS{ z^)h+ReZPj8iefcf()0meuVl>e73XccQmSqjsavzZY;{pTq{&V?#Uc?{bC>bLzWa47 z*?-Kxaf*78?41`bVd)EC#wX8a(2rPu402EqSwGo*NTTMv7JuLL(iNX8xi_pFU68Ut zMQIkqSdR|Xu$Rs)#7596XJ2pMCz~EE=lCHXuMBeZ^{ZUuArv~dVinnEN5F=1P^z(J zVd%K^frf5B(6If0vAKR*K7HWP$&WOK&n}nLHngF?uo*2eGVk*Wn|keQe5AUha$HR{ ze3IlEl--;VDgKUS#hY+=cmuPsKUvz5I^4j%#R;`&3iH;Aw>XVSBtgXmmK}5_8xVf` zIwix@eC&D#H>XWod~?Jay6^$#p3kRz5p>eV`m&q3rnA6dpr4kBHt`{NQwWCsG#*Hj zHh$N?rd&e`GrmH>;LiFKC9&KtG}n)NJA&a=$XByVi@cP-%TF0tb5_gF ztR)uIi|&ZtHtOr6AJ=Yn{hZ2ipwz&p-z3x|tjp_?ZB^--mE#WO{<2;DX2;9^IjlIF z$h>9>y77lZb})NR6%;RqF+|<%SD2fuw5P#e_*|!mXCe#YT*z`eqPffV6?jA{njA-?)C`T~L=fn_jNthA^J@tRQ$NNJ;-f2Zg-+VB2SR1bj%Ncj zQ*u!5Ntz!RO9F>YGM%EH0oy_`i|gs5y@tuZ*N4ojP*;>$yllI=Y~5B&Ga60Ek)xZ( zX_L;Txnc>KhA;||Lf(l4 zEYcfU8Lz63{kzY%AI5mywo`_HT{eChT*xdyEk05cfdnUiz29If#p8bY{SLHUzH#v2 zZkY!wB$UWBkf>GF^ZoJ)b`i8Bt|rPn)Bls3sxR=@fj)VhYjerTbs7I}MLyUP0#hWa zKr%Qq2*t6B(#MmdG9LwTE7$7F}q(Yq&s>H$4i52f2WSaV};(|sD1(~sDlom*Jm zEkhFd;V{N~6d9~(c;gL;_Y(4rJE*~JKjQVzG!lb?!LV-ZFVCgspWKa` z_3}pROs+3gF^z1~(i3rTh9dJ0p>csI?4*o5*hVe!}F86wLV zPL==Pl}`p=Zn8f@m`Dx!%W>+}Ycbr5RkD<&dB74NfJ_2Uc`#TNCL5lEtO|#=+ z)6mCzKi675KhvP^GcDO9^ScL2q&b*qgjo~H2$kzse;4ssEQmf>$;;)u;&EDr!&yy) z6H{Wp!22xIbbC)bn7lk8!D_+rY0d3f$e))_v%^uWJ+)>1G__cOZ_KNcr<}4c8!njG zQkAoSJawhh-SD+jX^BX17*o%@T{|jS{?#DA_L7m+XoVj@Oea;Az{Gup(_#GZI&^d@ zD}VeusF$SxN~m7={%=V_Si;}OaANwB>gBUID4Dj?$axO&ELO_{|E%m&fqI1N)65y2 zeh-?|v@8AzM8s)U9Zz{(_n=+)7)_6-C}y6IzfL_?P}6f$>O`(<4``Q&KJNAq>D`GkZ@n zi_xFpwrzSmTc#NLB*ElI%GIIzB=!aty!eOn{1N`o_nIIfa6;t_qMuD0vJ9sxfa@P%fOU zQk_NNXexWJX}eB-SjvwPIeALG;vQ^K;G-=k>@_zruFd*`?zF*u&7r3I=XPryhd_yU zw&o6!pZbs!Qou}EhN+B54BFiq_JJL&*4Dk>5;TOkVed2by|V2<`|S?mYvy5gL=}|o zu!67dHMHet!flI>SU5u<2A}14Hujs?|VBK)=C4t8EeERaVZMJuvQQZilNtL zZg-Y>3bw;b^eDS>KYo9(|x>~WC zH&3FK3#a)Fku9~rK%X#A8CsHZ=ur~2;|i7(7vC_fZ;4%mPU?{?%r^lh71bWdCbuWH#*-Tw(S;{b`B7|(7GvPcuN!U@b&x{Mf+x7PTBv{7@%kp@< zsFM74jF1Cm#<&Qr<=4rHvSmdB?9!XN7BsJzE0H~S6{(&3 z+xSuiMcwh<()+-|jd52+-XJTsdH^mShn5J)%?wK$hxL z=Z8vCtLQRHh=V0*7i;2qUHA8(-dYIS_9`jorLXo)@z*=D81jPz&PoQZd(6@|5Ea!X z0#|Ll2DzUU&699zy!4=lKXn$77f4W;=H20#$I+g(9-9*%Thp{~{5sw}B>0Eh$#SfD zO#_XRO;#*90oGmXPpsrtIr-^yvwKlwdq|%Taf8@NC7@hG2>VczfKt~Eg;aE9j%0g& z1{Y6)n4N7i^L-KdfHi0eIT#&(dF1wB;rEvf-|e!zH4Rk`NNz?LrhPbPpvwBv3>@Sw zuj-$qMiQOMH*4M1?C&C0*0PM+OV=9Zpr%3~3qMDLX-F+6$^LwF|ymgUaf>H;ZQYV@-b`_`R#XTUuxn|mF zLxLl6@$tn9XoOlQZfj~C+r9r@hduF{|G-r3JlVY{#|)ehXFJa(rnf~t7ng-Rg692* z*)GYo_ND0%TSe-d%yG(sDWp<jH%Pc_A! zdBru~wkxjFtB`r*%H*DHYj%V!e1!t?r)nL2>F*Jj!zwdyw>)Ht zFc8csvy2HSrS)2S=xS3u?MuJVPPGy9oGB*{TYKo_yJ4Mb9S&&;dVw30neG-U%=Lx|;L#6-m7#md3*)BLr0#LHL$aRK0Z#W^PD# ztMhC2ZZhOZ)$fK2MWr5!Dk_6J zz!~=bxtki{{_h^Q`qO>qe=pFniCk`4SJEXHtE?lAm)nFQ375!qUUs7l7t5WI`s}tA z_!hwz9_&uPD`)0rhep`LcJ!xk7h5c9XJ+RP*$#zVZ(@^^f8+k3kh5+-@x)~}j)e*G z4-E?HpXH{br_Zlzlm%6IX@G4WWaYo;MAuB|7gx?76(q6G_xFngek9IwNT;7RVT|n( z5HX)_lf=iu+PNGd_A=fjz(^wrORS8gWL_5-#G5hdfX{#Gw56E7%JQ zY`&!PFv`4L6vk#HF;?5P-Ms#mq>@hYxIwQP9F$IaaRU$WWIg#^6Mq%lWoQ(mlFVeh zXnWG)sw}G$bUIrgX=(5(BgQwJZHpQV{Yex*Q3XFJDH9{`1Rhq_>gn$0blI4sHX6A% zLoYuH$SWz4zqGG4yaPuWy};RsODi{H;$WpdA?Ac`*+$LG&Zar`-$xQHF~YjA%PS~YNnf3xV-geV zw0Q{+BqJyojXkFsR;~4GYp)qI($NiF6Fu{uHA%Ui5Vdts0Vl0!mO&mggGpm)Pw37e zm02AW6xVUZwb(<9uEbkJ{SK&bnMhbWMwer0Oaqal#Nz5pM#VF3h~wld2JP*y=-8hq zsJq+BI`$B@TS_(7v786OROk)h|_S_ndtw zn+;!-px^+lmH9QPz9}dsGZoV&{X1dJWM#PEO#bfC-lTGoF2Iva@5y7S+MpggmKk3* zqIjj*<9^|OHMIxo<5YGTPj8vwT(3fj+|w`?F41=8pKE&yjC_m6w8*--am?c65o~pA zgM#uix@|QwekK==V7*H-2xs)*PSmFP8Ea2L-=P>fjCbmTpIM1?lyii!f5x~;`ue@u zIXOHSDP{md=VE%#)S2Mz>FR7?@+EC2#UwN4V7kz`A8mmeA=*0*=Jlr1YY;<*lQhcx zgh=gXV^kX4H!S82Bii{06G_NM`}XA5F(lO^GVFZTJp2f~3lIEHQ#w0|UPvRbopfAQ z4RZ4n#=J-%b)*vQMj?9VQ6NS?{(WvP)Rl036^RU|qVh-lZ+*f3eK-k4zv%|XW5VkU zuc&@9u`ahx-YA5?rC{Q(_lrh4m_$mLiN(Zx{zhj=MrI(%{M*1&upl4_OpJ>~|DyPp zw}K6&byJu)nh_^fT1?Yj=U~;@8!?y$3&}HbI^0%NX)>ui7Iq5I=HjW!H?*r=VtxuO zTwP1H$6Mljn`R^m#+z6({2G{CS~`xgI^x1yX#OEmF@g+ThWdII;eUoke}LQds=xbE zbf>uy6Uz99qSRrOJ46MC#$*dml_(di4I02H+8g3kPZqB85)SK%T%EjzS9{a$TJ*(b zQ1?x)C){Q1Yq zK4+~WqwJC_@$!&>A>40a#+|VW5j69Pcd@F znlY2rfs{*j!Rlom!b_AH8a*!v!k=O$V@qm4BFlfdPK(F;J3C1TO>~&Y?)}^8{M;)L z=|qP)uzGRn%1FDr5OW)vo0mr5Ui47VbkTmP4dwD?wM17!!=cXCn?<)bMwKe17W~nS zulpA^M7O@u5gpYd9L_S1Dp{fltCcgxB;n#qo5>R$$Hvd!J$m1Td53fA$O%W%f1ciV zf9?i(!txb$qMs*WP8C0RGfb>-^aY9Z_`c2C$#z}0A^`T`F@0_?6`WEzZm@Eu8{*LI z7`cyg#bPkAPWx!UHR3hGrKT=#>FNA08+j-E3GS-j;}a%9$$X!R?Xf=SH|S&IfsxiLmykvh zxiK8Y#10J2QZvD~pCpjzRi7b4L1B#WK>|?N6dUrBfuhNH?lUvVtf#?-J!#sl2$lz( z_-?IsPJ$=AnGa@vrKX`Lx$BplLfR3&42(0M@LLNC3gQp?w3A5KqWEJAqEKL?gcoa) zTYklU{&@MZyUUJk&33*>l2;J9LI^)Ib4`Wv!&t8>DdbmW_#y}_?C20vob~yA{3pDZ z@M>P;4HK-L9gbc8E0$ah{_6HZIhQCla0vNO4`a)D)Bt| zv6UTfjt>0`RbQeT(7@cyfg9VK4`x45cXdw{xKQ`Dxcyq8R5YX9;G*V_+IG)ybaPWk zBvOe?;U?wLWTR~|>Cn2m1p__Eb_wP;G_M10u4bxv+g*;h{3l;}bU$k=1|YnJT`hAN4>IYMM1VGA7`Dm zDL=rF%ZwQ3TSormOD9^db$+ho866tAjui3KKY!2VKLC7UH5~uSJ344Un1VF-!1`A< z6X$${`>fq;@#mhlpmpfYPjby`+Vjs}n&f^)cxmUwf=j^KXCn71f(tBO-ryh+CXA-t zf~<0T**Y~NN;VYqeKyEYaAfUv0tGMBEd}qS@wnr5kyvK}j3(XEdEY`S;f&1htW~y} z{Ccr%sgfp5nZIeveKs@rI$_+lU)HL&J6(P|oUpA=MsBfnVEX`qC8Ym|V7Tc^Y^+ z@rbahLL-UlE_a*L-CCw)UaRm3^9~s1t&fz$BTowH)jMS=#ee zYol7riB0TFd^(ZUtS8H5@O{Ph#d?mm);$gd2Ti7hj4!EIRa2@qFaJ#YFL0w+xh2!) z&#iwGH8^?wTry3j21od+^h{0>5bUk&*P6lhusR<_j6LQPYt75Nu3v_HagWa(4X9eg zLiqIw;O`fxVPn9LF-Umlj@C%@_{4Y{!8a<_sTt1^z#~LMAVl*vU}T#f-BNBqh<2Jm zVOmL(8$tsD0g(d&sfFFP70bqlm#uSz{^cYF1&mXW@K5%&c*7ja00>_#25KW9$HmXm40xTut{A z8w|Q_EPyT!!Ue$YeFBUwS`V`6qT8|q7Ac9`Bbo!1#(fEK#7t}17A5+9ULZ0`3}t*?(q z-;Kt8<8s*v1}$v}y;~ApYAD=dwHn74YurzS)JT1i_0tpV%IGAJIG^^hFbRNq1lhuI z;D69>C_pJGCCKZjP`|#uZXFyv96&#Z0k5LnN6!KkoHJCZvI?tt%bzj=M~JqmAVaNI+2UgivQj z=LiI;J0=t00Ok@2#_4#0REFoJZpCd`QLQ~XhLACPO8_99%fIu$;o!0%2{f2r?C{SA zj%c5>cIyX#jUx`iq0W&?3M}*=D?DKgH8MG8tjHMyIE~Jjyp#U7cJcPdCdx!YYB=F{ zkPIQ+r%dL{f-_EPdKE5`j^~|9j#4$S02IZ9A7Fri!|eYQ-x=H~{cgzX)CuSjegNL0 z+~vzNEkNTqCGX^%!+at$xz89G@KcRq5#@h8QOmorN5YYzf|NkdHm&_vV=D_xrzHwJ zi?mr|hQ3b0Uq@1|0YFH4Fu2_CHg{>LZaIO4jzQ#W6VmSYXFcA(&j|Tc!xd) zphw3_`>W%4p#g|B1b|(q!ob+7a{3je&n>_q3m#@|zW*DMfzR9VkCUGhxPis5wT7!z+g2U z4LPh)OgI6+Rwlj7|Cc8*02bXf-gCFwWcReK%lS%qIIT zpnWv}#&yos9t5l|ca&>BqZZgx01Mt7D;;l&<6VB&y$FnPvUlmA?;8SGGCszS#_+Pf zE76Dj<5)nqGyqy0={=pKp}>HojKhq!NZx75-g}q5i^ULb6EVQ=JK(nTWSErz`hDzD6QAV*A*TH%ScEFR$g9_hJ|I;_;04RHaa#oX;+utR)Nl)yx81| z4-UCeLGLaHlMV63%>@>*4v&MmlvG4~Qo>SXBE)UziQi+bO}Xn#~2sZb=|)&*_6tYAOe9Wl?N;tA24&Sq#UEgodtnr9aGrgy3iGVOJ+%R^V;7 z1}w%&py@rZQS77!9%cAVPcl^2+$>(hJWWYWFJSNRy&#)JMLZPgrqY?k_;0W=5(08k zaIk4e$Qh7)L?(_PZzn0VeSPK9pkyW&(l>CESceqI*Lmyu6On%Wk=oN!*>^>tNX{I`UiBcY}Ytnlcc47Dh_-^0L~D0gwFn zvE_}q!X?)M&Jpbkf%dm@li0_5j5$8s=%;7oa(tfX`_r!1g-0gaYh3bmO?N0Yu82P}i;50n%b&3CiV+qpbN_*W$d@y|)x{9yZP+Ri}Ns(eW-@@4tOX8EL9 zj9m41vn7I*Zzk0vnGKpcviHf1*=&`{d0jyI@}HO2?b*^8b_$yjm_fB>%6;P^FX(n4 z%j=GumYz1T(VzRWGfyYU4x@0JRDvKF+>-oD`1SngO>tA1gxQ!W)+Nj)p&Fx%Sy;l` zzLrlnZD8m5JA0}ejmb0;C1a&z7Jj|GkdCQWje}xCB*-%Eln{%;Z(G2~yhHQ=pJ<`V zacT6onoBdLm9jQrWCY9-VA9I1(tjsj!HlJkN00uL9&{rnj^_%By=JKpt~FC^VgK=) zi>6cN@h&1(BS|`XyeyTrdXkiCX!Id-5eh*Qh>1?riJl{2L^oW4_kEuE0GvgVglD)E zlQCNT~0WWLtN$7ABjv02TLi4hoOd;<|OcbsuRp;-#ywK;vS75!qEBx-M6 zy!hqtU{NBMMW1nCUgNoyD_=PPpKQ%uoW7v>k9O;?qkY)~tDbFsBSX#Pdsmi@(dHxJ zu|yj|V6DQ2_YUY)sDO!jDofB3QCCwUPg1Zqx*Ais>}zq$FUsdv$oW0E|F<0-14}=5 z@q2A31tp`(7=_XPIKNSMGNk={xq{Ku0wh&Qgv;`A^Wz)=t56!td_W! zfB%UK;MHO&4P zijp|MtAj9yv!?Voe*(pIXP+r7XpALcZk^));Xz#g?5kR8a(2_Qd$_y9#rdMvy9ZA7 znS%E8_sSa@9+pyMv9|(oM=M}9@m~1ob94UR2-x%n_a9E;U>_m%zg0)Loe1aKA(#Y{ z+Cty`C&#h%*JgN`O0mBwIPCG} zIt9rnSGXu58;zp0N-^xPPE+4Sm%IqDTMJbbKoHVL7Dw^HDM zU1N0TYdj}=dMwl4G@_JJ^+GT&?m8>~2a!XM%~G2+9$NQoNGti$nsRvuFroA`fmhNc zMTe@xbHKy!)?`~j6qPs%sxBWg&X-l>mR!>oQalYFZ=4Kjnauc=6-|Y2Ef`4@K;NGX@I<4g%HDw1*c3%09Y4(T)T zinVjRbOlbqj%iVYf#v;1<6RUFE6_;)z(yEGxuNWs5*vKLwiPfJwW3Q686_ojw2X`i z4trx_o}RCc7+pu{z#dv$S2z3q{@kPA(R*bB4$}%-)$p?&{5MNnkAt(0B*|6u&}R#v z45;{LOfwr6;zQa=j#N|_e^_SU7P8<}3Qb6GNOdXUJ;s_gJTa+-^dwk?M_ z^k88nsOSQz3J&Gyn-*jeV}CL#$-~C_cLbUstJRtn$isbBN)7NoKq??Xewn9f?St!6 zDpgXh_Q0O>@O*_7277Syq0d4mYRE@ z_uY+lSik&Oy=c31V!sJ*)hSFpA@~{z1%bKg%-p=>*Kx1<0m}?#)Yn;UxB6D(PjUf{ z>d3Wqb)r1tBlT-T7+Y1Ur32VlJNPNVCDpI5bl$(xH#Q4;zoeE@gt3UEeWvcyP*yGr zHy@3Mir0}9-KTcAVUY)4Xp$;_YbYr}tR=+NM7(lve5xK&w!(NOcjV$qAb+Htb>tu8 zQEceNH>qNrTJfP$$>t^^!24rSNxX#87gnC3imEse05c~%;(`AOtNde6#NV4nKF5{5 zZOcc;giHEq0?V`?u_vPTnej?*A`GBxvSmT?vC21VsB8TplRoKmMsaJj9%@{3;gxveE~ zR+SZtl+KmuU+9(U7*o0A`33IJ+Wjzie!geYTmqeJa%yVi!?3UiV#q}(=;@D|4>laf z8tt}6h_OB`_Vv8H$1J*|r2aLvQ{fD5yD~f~DVQ{Q{n9Z{M6WgBct6EN7Fq>6Ct`w- zFU_y|&xVl3U#0uPv&aIJ&9a0ob{dl@yHFKOae$?fz0; zCqP069kelv!RqD5)JIeT9sg5B{g{K}qab^94Lcd4pEM$MG{RO|i=Mo50e64n@MkuUPvl%8ay zCMFV}L@Z$W-t3hh;wuquL?I})-4c67^?Y>9H*eRy9 z)wR&j5WD2-RZ<#uWZd93oEGcFcaJu2tGw~H8ll(wW04yEFtRdL4moQzPVH`%)C@R; zU)VqhHb!Q)?gWJHElw49?C(=FOvpqC1Ghl6s(`H?dDmR7U=>Dm$9&Ty14dt=1HrZh z$)-O9KN}S_a=7g#(>V5NqLO#$l46sa^a^qR4*1zGrD8-BL7AKHA}+Ja(tJ1int zX%IjmUNA%a%v+Z+&rKSJW~MBsY@Gk8FNEM%UZ1q`g z5BTL#QCXZ194zYRpW1Mq_+j%8 zI^rky-xqA@%mUq&-wXVWenEf%|#Bc)uHR@q-VG%_QOhk8bd0 zq#X$qN{l~EbhTUm2l5(%fUlfC7I_RAPRYaJ9E9}32d8)O_<~J&2%^)Abls$vy~_li z7IMQ`eAte7Y>RdeG{`oG(C=pEBF^wp3>4e7*t~{GT;?W#AVM)R6(Np_Wqo zN1TCzde^UrAHKI}|2OlQ(6BY@71GRp?=8WegSg@NjwXAWItiBgKi$jd&$z2UHPP!E zyCSfD382o_-{2vukPoI-mWy|w%;zFZrdBdaUe#Ufd%j269Urv5K|Mc3XJnLjP}7Zc z$VEJL>NxuSfQ10d-ye2B|JN2NUPW>6Ax!NtwOIX4^oY{KO+@^c;kjea*cf9X;nUbq z+SzFW3-NK1$Y~mV15*~mG&ORJ?#o@)e{1B(DdQ@4YEsauHrBip1qlhSGonyxhTe#n z8zE7{JtPqMCgN`C#|h{2_30wf&{?cCANpA2D~JRxs?W+6`A*pJMW;V1KIa51ce%FvHdT2{SF2J)h+ z0Nb}r#u~fT^LPJ*Hc@Vv{X@A z-onNSroXgG#vXF|+pXu|96FuG6H(xcHL+xD+AmpMZhCo+R{9RW)3SrZgP3(8rhKq} zEMytfmT>w)iukV~<**CNzEai2xnHDP&OdFhHfa9{kE#8VBjZKhT898M)6CNFw>B{{ zV;Lu{xfgzvASi9c7S)& zB!WSJQr&W0C|*Ba7)3vgmO`&tm{Z^FXa@I6>Iu!VQH6xMI-p*4c|g9s(J5QG_hp1! zM|l&Z_Cz>=USmNdfOjL1nq+k%7a>YtEYG*g*dTdKE%0lYCKT_yiWP`v+$xDhWd5hd z(}@TpE>8c?QvOfb!@2kIirbtb@yn2b#K_!a7qot_41L@c$$zCCuQdTZ##Yr31#ULe z`Huc?z-P7QU*gZ5Ot?&0Qpin=xy?@;^h;}=FT_-#2rkVZJ`zzmV6QM>6pn!86wvq= z#2;RlD+eI4*k186zIt6Cy4+vA8H3^YXV`%6#5r0}eCJTEg0(vsb+-67gGSzmc4Mp7 zpl`i+p1CL-a+2!2`qua*rAYmf9Vd3{nKeKaTp4Xy@@ zQw~8SX+;D!w^w_$_ATvc9)CT5_tHc>6w5!Fk(AV{ zm6OQ@P2Gf+d9vQ}UWHf$KcI;Hz|yW3njy`Yn*K7p`SLUTdRSHue4N-~IM-?9DLkx}nwB>lo1(JyC#s6;5yuxeo|P(pf-LWKI>pR0%*fErQN~EKx_T zh_m2~=-EB|!)2a?Va~3})!o&%O48!>*509pVuBRV0mAONCLB_}m$XW5 z3ov%(++j`;f$LkIjJTm<`r!QBIL zF*Ejn-~VCnEd!!zzqRoPL;;a5r5glE3F(lOZe$3N8e%}YK|s1Y1szg4hM`00W=NSK zq+6vM-aXInJ?Fe1{-6J!&ew^(_kG`Mt-a#9)=CH*C3BH+5**+i;8O1H9X1?10{uxc z8r2W?cYgIj(lx#^@br(X+VOWrv4gMU9%``mo11(=Dn5h8&aq_iKYnU%?Qu=n&IFhH zV+Hh!L#=+-ii_DT%9~{i#%+#io2upfGQYbhkem;}+xuS5_Xh#)7-y?;P!Vg-sl_^8~G{h#@~UVxp@OVG*eaFw_da6I|2cR+}96e%Rh@VzF^b8FUv^NT07_FPgV+|c|c zCMw%)gb?IHtMnk^&E?*oAWrA@Gp}dm+BT6yCO%u;HhUX~%zhhL(zid>?Q{5Tqn@#E zE}VSCS!ndDpRa0lLn*m7Dk}0AwN2tot|^_HlS6=9=Voo~(1V9$RPlr{t8dn{A7lQkoIXgm;M6m_bO2;1 z3AQR;J4~0Fo1Rha*tDE?V+768DW^;E%L>T3)pOHOacPe0OnyRX|fGpRrXt*A_H9Sbkbv=A?Ea`1J;*BR}1u2Bqc#r1k1gzt&SyA zfut#+sgXtume_v^1^Zy#pFf%ui)PdD|Jaj{-m>lP-Hiv%D940Df@vM*-y$B#hD8?- zw4U`=-n3*Pv9palK2e!*WN=bV6N$eeVOnq9fzPeyw z;83$%1>J6x2<`6M=@z+`IYr&icarP_sJP|cYY}; z_^8*N7Y+A%#M{*mamR-9gYGSL|FO8L2xFVHA3C{I=M_nME3Q`unc=RYmiASmyVy^m zMT$jVTC}o52htbZx7dAtQMI_~N!uOoK>w>?NXJMlH%cXg%bv*Vk2S>c$=jr(U%?U~o zcDf_2+$>q@a{(aO7FKP^1O_-OQ`;$Btrl0vo;{^!D!5V{s0c=oJR4xy<^EAW7z(7N z-*OP7U>O5RtyxSOscB$-li+Immr}K*@gVM* z9x$8-^uWt06FU0YFyDD<9Lz^{8% z9Mevw{mn{OquR_OlkLClf*pE^7#xTo;oM&n0(QjNe1dd|ej0lp{S3TZ47#V+?RD$R zEvf_WK(edL|CB)0kH1J>R?_FRSuz?Ik*zuSw%ztg!`^k(v++y2M$01K@j!?n1N0Fl zmR??LBH9Q~8Yn)> zvX<@kicq+hkV<6Q2*zl*_<6`%rD6>LcP-EZVun%{bi@tVZSCaK4&?~{^@;(> zpi5vUPyJY=;v*R3lfSk?A#FG_tD@)q+fHPghD%tu$YW}m9uH$r@u^=KDcRsvEI>3xt| z5)3*ip(YQVOr<;)~nb^fc^hKQ5Q5 z$-0C-&whX(b;J0H2Bf6@;n%l6w{pjBi=$FfI9#fZOFDqzytC z2*Fkwo9%GOR3*F-P9|)V4U7DN7vKF1F-wFnI8>1bl|6|jOiK#rwuURh z6kz;N?)oMjfj68zp#D$MMFR($ML{c@7Wlx+bk+zvJ_IY55jcP+l4dAvOi>@!I+tr) zA5eL0$LW&PWgn%}BzM7-Ri zQF`#??H1My2XU~V!*79|a$+TJS|Ykg6Y;N?jWuzEpxYWrQV=8KLM|~R(x_}I1d9lg zAFv7%i3D>A2Hf8T-A&2EMyGEq@G_PHP35Uw+qkD7v89eXJ3a-G@J`&vuhnuVJ0jr# zVSx$(ABu$E&8^sLz!>>?K9RUKl9y$ML}rV6eH%QTg<(X#lD~({M#8<8uS#^GkQISQ zI0(yUmgXTP)j2jI+&`dODS#7+&TO>`LJw{m&1X6KWsOx%xY$Z)pp)5jY58OyWe&qL z8YUiaW-x^~1xJCe^vunA}X^9;qvt4nN?DK$$3#WPy=bEy;7(L-Sa65pVqGciOl z^yu54Z(!Z1T|~K?JSc8x;>Y7rM0zUopWmG?4z+^?Ox$4Hbad>9%5`x0lzmYe)u^XL zyG(oug>s@|*J;d?R9xTo{JB%;In_Tmeb#8b3eToZlF2~pl>p4K=^^IM+pxOL+gqEx z?S4?gFz@~G%cBX?dRBQncAvxLONxQRuVZL?`;2V<5$=iOztPX~)s@)b;1JO|hTmp_ zv^^N5SnnFwWZZb>aoJscUwv_nJyhzFvca`{5j<$P320h; zb+iDvxvuRnsrhYSJ%~2R|9*Q#^5}# z!hPF$IXzHYJb3H#agQR_z|X*> zx)<8_lLv0?+wEfffh#(|-tzR0jwydp1wsuETmxa`<6u}UrqAu7$&R$NidsN#nDt%S-b`u@;3G9EL3#rbVJ>Gv4J) z(95AWs_Qe9_l}$y1ka>~l&kfzF0&C@M4;^4KGFfs*7vE+iv0w}k6<3nbp1-GC}XI&`WCCgRy!-&iKlkpacwUs(VT z0=$N7zM_8J%&)PL?ELDWrLsn9I=uxeJ%?+6@#$;9`T!?c#nKv&3uvCK&5~S?lTzx&Kv%mxa&7)EwJ<7KjmW6;=j#DwF+_WDZWwF`QQ8#q{U}*Qe)5 z00?%FBPA$0viZV@2MDCnjpJtb$|B)FBC3+jxKtDw=m71F#>0cjZJE~H%=)puVAAo| zCzEUX`m=ij&sHB(%3%{n^gMA>*{uj18Y|(o*(7mXl3#wIMymH#fmKV7uh+mr9>?C4 zoB0K=*{dln+>>8UD(W5Y3V)eYibdx-CVF_1K;la_2S=FZU3pY>>S7w?DmS0d&@guk z)m55%f4T7SqjGt~YpmINoG9RP_m5QDc z{mlXz?$HtWYoCQ2YDgjeaMZP8((qaCuFOy7QAtdcJMZh~*IMbuD{B|hcuK?Jza z=;Pp^fsT%qYk_PYjZ}@UIXE?24(D9*hCEH5is- zVQlWmchcm3&ZCv&rtOyAZY7__3y-t!9N5^bT*Y>Ci4BLaFr-4KHK5;CxyM-o&QwgsM%f^kOM$y@zl_<(cSywl1baxf@|`cqmeb;&roJHHtuA@*C&n z#NIGZUxhtXE~HSDNi^10h_kX=3AMBfbLB6WuZtNf(zHrsIm693%+BFvoL+W(ZV_U& zcQaW4t{*f0z``=24H!9E*8aU*#zb=?Ha~*gjAP1r7OqBRicvb&HN|to`92!R*oTb< z2mmLa71of5!CJ@yJh?D~&f*rNUKcvz-2Yj(6qzy<{h0aNs^DQjDqFD2vfo9j}s!3b>b*gHCr9a?4g*p_rcw; ztF888q`l0$FK60IML^Kz@oXnUXwsqD(WKG1Keybgo1A$;uA6FIU)C)CPC{v}NcqUM z4nPO>>b+CY|JiiW!AV=q%`$P~S*Dd0A>mnyi9}Qu46sLV|A=T{=wAAqW3`p;LGle1 zf(Yuwc&(=WkWW*!-HM6k@L4XqMpn^#_b8d!)kA>ih}41U@0wStmf5d&ZAge_88GaG zk*`*3+t+fAF~RDL600+o3Yf@eJah_)9kY7LL!yg_jsLMIRqVh|AWV6s9v^$`Z4?9S zal>qA0b_7H8oq`jWVZGq_+$jr$L&CB-s>V{2(ET{6KZ@K_}wTf{s{%W^PS(H7v^Tm z0)Wj+zx=BW*gPlNhIPq*RKk=6)=k8rpnqyji9tfuU~7pvlhNjLxKnqpfk{n%CCavB7p^(dx^vhleJf|sdFHjqBVmyBLd5L|af(%M zPfOW%o@YQW>2=j)WnCCuqS)%=3og3&CTV3xkAm&9_&1*=c+RPknbi6swU|GQ*f0L@{bEXFWBMS(59`KH;gS1>98pVXW}a`(lED$3UP{1 zlwlO${;Da>T*OfP!K$ouzR{tcSdhqbjZLGIQz|+MAAyfY4uH9P_+%7g zshm0C+1S*#TQ~Eht~Uc_*4hqbo*H?=d|_;Gxo~>Ihf=t ziZ{R`kCHccmb;}@Fx#=+|F`NrPvhe0Tik@$7BK@Qkh=8T>~!XKClF6%4@lPh0ZM>l zUWtzpdnuQ%*S~@>pp@4REXL5WL0tv*9tx&sc!IlQ#LeNA6+fLo1r8M)qYJgBWD?&W z2#l5^Za;3zMh>SAtB_PQFWQW}b*%a>K}`?L-(#EnJAL?iwK^#b02c6Tr!^HHx}2S8 zZ@4N7OzFg0eg5im!O-$|igAfOafuodqqu~0R>K~-x2C3cuEB2NYx zKlf=VBGYz^H0xd|*LIS|x679zp(<{!cL#Ao%NSCrfYMQU zi;gR7m%T4?`^9>mTP$%h?)xW~D@22oP>4`J28su-K^hi;Be)(DY7+8EK2LPL@pTTz zt6+}$_mZ6&^xYRVMdT>=xz6*J{huvM@{a;n2JiWR67>@rW3dU_&yS_nE4W^aybR&e z+WHts6gGP@Fje+7%(p6E@n%J$pJKLni@eM`%siiP7n2vgjPQu@0&|2 z*OEVBDr*bTb$86oVrF8drcbm?OnCaT2V(Rz`t^lrKiuf}#Dd$!spbs~oYoP?N{#Gk z$?{YDBNVAHAe@UrJU}>{8tapRRNRL3Vfs75_Hk7`D_a}S8CxU;1+?bW!$4EDnJE|a zs%VJdwCe}H|Ejq3#pJSRRcasjWOp{e)XeL*9YUJ6wQ%w9$($)(e@K48>x>7yj^J9W zqAb7AVDlFpcSJ&o4Q*&@vZ8`VvG1BN;z@w``Z7f+ODFj5Y)-*N90re#Jk~4Wk=-t$@e?Dr-?g zyHj2t6}`FneMV`vc2$b2X~@%frSGvvUEu1>^~oE<%NJjx;p5ppB6H@t6p5d~q-&=W zopr)yp~YKrLl&PAJYx>=+*PuEn|TxC?>;>^w!dg1#%|B}ARRC%HCxquC?Y>DwBXe= zN7I&_hw9RAeCcri)ZuvYF#0X?Nz5MYdHJ&s*!OFR5Rah-GwGB2&56xk(tynCm8sGe z0?+Q@{~;g%>2rV%_-JG^hjrpF@^Q5tl^GsG^r)_Mc87u8+55XhtXwq3jzz{y0~4)P zqxclf%({-8$c`$&A2NVQW|KlxVfJ=%`FA1fcLuMBb{-+Z7px;6=K%bFwV4otl@)Ea z5Ik3$uqVl5pOX`$y{FcrVrFqVTBVMgM)Epyc{eL+B56V^0TBJX@8v6R5R$iiy0*RrqnlId;od>yjJ<3#Cz0>Z&p<8murrw+8eJi}W~ z1}a}W!UzmbBvtaUmx!`siD?A)SCEw8kNLhDU_$4K^i{F zn;{Z*m;4zWN!dgHxA3;2Rdnt-xqutnEZ<^W0 zlnUqGFP=p~A3!7%-Ra4BG=YvKwwkmrLEK9AGumVtVB?LtN{Hu~5B{a3y?WRvv)FQ> z8#}9hFDQ*d8a%+bWdXflnB_aOh_X~?$zfJ9u}uK52p?Mk-F0gu-}3iEN5y$fWx1pP zlSdVJEMsVQKd@VXyu_W$)?;b;kdSL6Z7&TE_v;NhYKFXB7b&m&Cfqy@C|_CL!002D znLN*IS`+rWi2-45nU9xDn$(*nIywN+z}Q1C2JcH92j{1&3-FA3W%kaRWgMjXbL2<$ zB$-BZhD-QlKNCwV4sbb2zFMJ4O~OEW(k3Nrl@B1PqNy3o{+ zR=ElvA3u=87aAb{K?EGGq!ptmej}RpF*`gynDP;XCYlUeZxyP1F$GqG42xk$^GpBv zFCf&Qr}7sx2i1e#?@SFKIQ@5MiXo0RXH)hr6=*Kf#D5c?7Se%3LGehk1Lud9AV@|Y zDSNOmnsW1!Gw(k{*NH6PR$?h^D5l`wzzSw17v{Nu?L0@XQR-|z3u_;pJ>}u*bx8)E zMTQU^J9Sf}PPwQ8D{yMw&lSXN5&|@)(JW9S4+{xEyDQZ*y-EHB0Dm)P?&<#zo2TUyaI6{ctP6nt0l<^|n>hn$%jk#2jOk)b z{MZW|@LT`jR}%m7bG`w8_T3$FHTw;$e)O|JjvK)J`3JcB5;kz>N?<1);)v+~rCO0} zlK>Yq4M~ej{-_FEq?t@a9AI+Qpnr|`0y9DelLDZR*&IGx{R;@P@|P0yR(<0d_Gu;s ze&J;d8B4=b{s*|9H$9r`j3ylj$@%A^2}}Qz+k1n_gRWKREbPYu#5YXf*Dq>tXfSb& z(5$b2F$w^$C^!#Td}T@1i>(bbHEzMKLDUdf4G^KN1)zn93ec@toI@A%3ISpoS~OUV z<~eYifLdmbs!RP#;0rw9*Y#aYA8=USxd6+eFlQi0_?J7yiB`;(y4(!3V7iseY?=TY z`>$I5?`HphZg#NQ^6hu$Dwrq9|CJK}7{pd%u}Gz&D#d=j9Mt)Q=2cC7j;$XcP&^DZ zLHB_^mm3+@y^}5i&WcbxUn|OgHNPDzna6stb9O(ucCC2mpmBMrJ=Ee4@Krj?#PHNq z40JSEwOcj`t;pkST0uo-^3uLO-pahU^n}21UK2ODZbVPd&BZA322ku_qdz10fQ@<4 z9x=yDclG^W>S8v@_GxJTeHgUzIeDPwe^`4PShMZtEi9kKU#J?6W_;>!F7OwXtE ze|h@A?Y&q&(m+~0JAfkZ93_j(0-u+BmuSkz16)Uc=hmA)gm+B$Rj3lFR&dh6R`-8w zjsw=&XG!+upp}Qs$KNB?Gdi#I9ZHVTax8*&DZ*pMp8w0*CkHI)TMP16k4QeCn*g}o zcnblyM8!33y|)XrEWHPXPR*VT-+XdRIPg#KWyw_c@- z7Ol%`Wo|%@5Lg@rS{aP62!j6~FNWnE+Ni-n0tA^rBOky{{L5zqlsNGp;FgC{!1e=n zx&EHY^$~E51GJfyGLy)`S^xk8IpS4fTY$v}tOJ^K5v=fwNV!V|xSR&&`UdbR9NizG zm38f-O4kM;UMBImv}l9+4=&L2fcnpa|9xnHIr_gl{r`h_JRERva43YqjOqk*SS3&E zfbyKOw><2G{*~U%%?;2Au4RP-z+|$Duuu8x8Ke#aDJ51W0Y!iR+mkwKbFc14IX+#n zTnEWOD>_wWaMKeyF&%$@{~Xa2>0ORsB+~f%73*j5=j0njBtSh=`-+&3*h4+V+BY8v zt=|BI4j~7vuXAP{-Ue@?V+uQ}#ul*oUD~Ug;RVY;bF?==3Vz+j_Kl)%9e}S-@xH~u zd6LRyScP+?(c!KE6eU=MI}+p-dzFnY8rFV;m)pkL(Mo-OC2kW`boZ}WjEIOhIoi$* znxlanyUhXWy64;X`{x6*py@sAc(W^M!aJuA{iv=ww_s`^H}2ZI5hoF{m7ml{F+PGWmJ1PrHe*Tsp;tQ48Y#T zSkEpL30NGBIPi1=?5uSw+kuG{OLu&xYN_<3^cTGMYYDEur}@8?Ri#Eq-{ZfnvD%pF z|9pwbbkLI&P-nU4`}TZkukK{C)#@_ZuSl~qSf?a}@AaS7IkTIX+*fdO3T_bVlnqa! z3#sj^Db{LM3?N?;CxVGHJv;rzLqF~gV-VEV){fI_1K1)*_&ecePNt^qdV3B;gtr9( zQf+M#;s=By!m?c!#+iU^3!{v9&5RKPxDsyPPB#6Nb?&xn0-%;{KT3`N1xo4cR(8L_9w|Q z;#@h<48Gul=Kg&xD1oJ_9xa&PgEVNRS%CftM?EP3$86-V>196G(D3@-@1{WAx1rAS zRMmC(fN%h)H_k>5GlnR)!KPJ@ek=zv_Y)lOV>+P$tz_a;?7&8$7nyMVr41%m;G)XP z{!;Tz^c$g;Mjh$0!n(FM%r;zc;MX*$kuY7W534fgA16a?R1mYF6jO4#&#TLlZ<0xx zb3Zd%Q|(9xPRK+(#@&Iq;M!bIwh}r+ct9{r-9Rmjl2O?9<@L!{u^+ucAZ3;Fc zW=wQ>d7~ZPAHu+>xJaN{O(pTB?rh zqmguaW9vRDynyyN{TBWS^XX0goW(_{VV+!Z;ux35lirq#d7PA!R+3bIx=O#zo*;et zWyG`({I+$G{NF|jezA>BZ6*SEkFCzL>XVc5d1LzGQ@#W4CVxQ3=`R4@qpEuwsM4(l3-m$ zUEQh}K%tZFK10;iksJ@VdF5k}d~Kc-TLY`WR`TS>UadQEaX-Ifkp>lFMSSv5oOA3R zMVpX*e60e|Yp!8VAxf)SvQ2W8;6M`zueW3QEi5GopI=^Pyk9L?59VEBo!ZiLCFGC< zrvtGen4eoP&3IG^^sQQTyr^jSGWN+JH&1#nKY!i3eWSx!Na4WFKqyuVP+b9eB{gbv zvfaZwl!it|rq6Ay=kQ;=dS&$9(z17MZVsn$PvaM%uLR(@P`+>h&^eU_${+u19B}t4 zgb7$9CEofDn`R%|xAdI<4q7zdL2@GBO)FG^5n&|H4WB)D8=CLq>us-d&2UJFBN#?0 zadKUZ%~Z(Xn&?ikxwtI-H=B}AStriK)Y`iDO1rGtz05tADbWQZ_%9-KM*v4v)xTjI z3%-AK`8D!W#Pkl{E3$7K&v^s~?--wHXJS_TMHmSR-!wpxC)$;lh#U724n+$RFA+}V z;A1VLfBXSTNpyc5S6PiSOGTl`D}`FF~1=zGVGhY|Ba=+l&lD)Qgo zlcAW;=RDimJ|sN+_7hKNZ`ON39cB0XS}bm$9O0s`H>aWx`8>UC(k3CEt%_}SwER@S-Zensh+Dv= z&!`;#^O_yHs|DA`iyB=lhMzjFZW~&Y4-a*kg#{U9AMNJMW!CTo(Dnrtd=43o5&us+ z58sh6{j0$0(o$uO+*gdEqM~!7Ts%A)it#khiJTH45(_|ItsK1BwpuW#0Q9#{Ez?GR z@UZ)uMT-@)%wOu_=bZHc)t6fu=XJs!jc*2jC4zo`vf71@2#gj`lBY1VUlXJTik0*c zgA#2^&!UEMfu@XNn^?M+ICeH19bq0P&jP4nf*CCXG zlKOj25faX##c%B3W)DQ4l29=|fDJ)_(r>JLsJ}Z6ca(5VxwgSJwYE>GH=giQ!?kW@ z#k7x2_met2+1G^UEbG?GcdthzL*A4pg4GH2X1FJAK0YO7Ve47Q(p6&NzJwZ>F92n!U2JTkn)}LG$9Wiog$yjXfBpUm`*fTIqT1HklbxR&$8oO$DM+2VnT*I)>UNw zUGD-Yj3%K99f5c_a?jTdoii;8XMaN}ojZwh3{pM+^g3~%;-Txvep?b0>G|(7K{Sk%Up20|MQkc%>wgbt9B|$G|FhgrR&Cj9(nmKdjEH;5B>c4!= zk4i=FrY-NOX6_Wi*^li`C6KLD0o<}y9m}n$B9yIFAGwBd$Yd_EvkhLe6 zk|WDV9?Rsnwk@Gn!6x3O|B?&K-ZP5hQJcOL9H8YTb@99#!97^}NP`IZ$BlBqst^qR z+2Bfyup|NjopZJFa@4;*AZy*83#zf0&jr7h1^_1E``UN58g%?5q#=(b9+jI=@qsg= z9Al;-Pyp3o$dzs;m}CufP8QHk{N?_8gg*sU*;FG_Ft9sHqh-Q)dE_HYh6}}Y$$!^& z$hqC;P`Dt|KF;u0t^lt`C7;Cb!%Z8~@bAk%^*sY^(l0EX=@x_5 z2!1aX)1$19PMQ^A+(3^K9AX?Iw00}wI#!2Tp_)&bFSW`e+ zII&Vxm0|W7co~(G{oU$?lvhpKGuk+*Yf5K!s6Fe?6slbm%E~qW#JvjX`cCovM~K8R zrtQhabL5%2G48PjX_nQes1ClCOn2jghO>dqgk_;L!`clJ$UAEGqqvm^&=2dGQ38-n zxhyfa;FB2~iIW+lzIVAIVs}T^^uxucf52Aqsb*`8GcD$HCN-bQ6xeEnND)SnE!2X7 z+BRcvIaR0z;pTXRGR>KlwVbg;4Ue7+0RS-o@nAz9fBsali0S}tyQMf^XHog4R%8Ps zP8d|X%NR8kLJ@boYeAR!=lV|@Q>2NB&m42eDNTXnJ8z(v1HfsL49#xp_mO8wNuiNE zp#&j+U!TlJYOkqbgp`&xQ5(>Pl^Pddis&+5e~{dag5+(8{ozY<_-UL}jre5Ul14`C z4PX-_l?P4@y&tN7KZA2dG`)KYib{ur1X!&Nx{n7|mT! zDo0SXsj6d8OAL|iSR(mVf`Xp9SvL&rxYA|>>XlD-6MxZm#E`-tZ(eEdwnmgLk zqpM*}>a2$d0n0HGQjD-l=ubr)#3O`Hq#bE!_}UF9E}JGBvQ?HBTPC6 zTp+h7fO{zjnU$+=CltWZeuokX;)slV-648%JFup?BizgPIbl;pSN7p|_|;5jjtfJX zU0!thC#hQ?)^XyIS2FK*p2 zCO*T{yo(@CPxLfpMM?pXQYGQQ6ZqpUT~1Yp^wk1p&d_#*^*XFCeAfw!H_3Mul44AhY-oe!#v0sxyH;w(P9 za7FhePBgZG*go?gc@*l3W?+0+Xgrj|lwQ7r75p8$ravJ1n%uRj=wJ|xCpw?(s=DEB z^OdHf6qM_+XKNLi9~)1Y(nJ(=czDEjiF6$27zB z-7?N-@VN?|2yD2(0bM}CH!N>3PSoYtd7kC!{m13@bzEm`HlJL9iHB-iXSJ2>`Wp?K zymf2JZ8(B>vNb4oCVc!5hQ>6K%lJ1<;Y|NZaJf>_BUA1&m>da_8=a@%#cPnkGs|+)Za_mDcDH z0J5!nThJR%Kkp2$l$%x4(tf1Op0 za6h+ExT#Lfk|1@+&1{xLS;-8=7v0n9S-C6;g20#GJ8ZcwdP=CU(p#}XNr>NOxZf4d zXSbbIX6XG0-)onY1psG>PC|5+zg;5XGu5}Ile)Y)H?H*_w@Q1Sh>n&G3QRJDOOx&4 z+-~~a`#1?6a;i}{^K&H)upr1+w)sgkZo10@s1>--36Eeh*B7bSu-P=sTb>?gE3odU zn6#duH+0kEoh8Mhcq6nNldo9ID4mlxR1gCM#vc5?oSZs}TNF8ANDj`}VzaRcnsoh1 zKba7M^**v76|O8BkjSIwAqgW6kM_`r=;&UHM*#V6>1Jt>K?POI5-t{ zQ}XQ6_qJJSnN$S!s5p9e(~r=EcF|8J>O2Vg6#D6Sx-<~rgggV10b-#{D4_bK4y528 z?xAwQIn_SMK09|pxM1GYjXCe_Z6nZXOIsKAE8dST1@${n-ki)k`_mana2Jvj$l!~w zZ4ep$x+f!Q8riIZ?moes08rXqvL)kPHd5i?I>xC1lR65f?tt-m;)EAvL#+w-&s2Ug zTCl|O{U9MqWE~*GBG%3|QkpS{nT@V*Kz39bxAudU8{y6~1Py7k_S)s83{VCJPVs>y z=3qV&pv%j5@1Ra>o~2Kps2R@J*O!(r{pI9-i*EVk{c$7tA#!hFMudcjvYCHWln@PXE<)W7cvsbl7KvG{3I@v)!!^njZP zZ944mhz{ZJcjQ_fhRzooQu)7Yt5l$LW0hgwc<+|X0xm_TS@$f+xaS!36Hk{p-CiNi z`lJ)-^AF?~1D1Ure~aF}z(ovL-Y=GWVVmI-qkIVO78*Jbci}_aQ z1^|jTnemf%%DtBFAF%snI=$7Er)K`u5qR#E+oSH3=6WRh>k1Be`v*#0(tc#iu2S9i za@MPh&mnZ6ww@SLfqyQV*BR7)Q&BGZio&k@!GuBvG1xl4Q=f|xC2i*YXq0cZdFu*8 zS?#e!-j+Nyl(_ZX)K#d%;$rj%0s!SJl{Hy1-mgbAUvIJI6XWRpa+NRbsbu};hjLp3kZX+9l8Qzy!%!eZ6=tnX)?VBqVuU6Lm z=<5*~W^p*s*^3+M4MkY3+a}Pp2T4&tj5#Iq%(}mkrSPM$k}s3Ej{wyRVPOhl-G{#e z*4S{X6lugYJA_X~K_Orf&-=EO^w!Tt7V7oCG(P{9Hjdl>TsPe3C;T=~lN1dH-= zmKI-NOcADC$G>v0NJ$S@oiau>U;Z`uQ{q&7Kt! zW_Y&>y*&Fm|429GnaaRG<4bOu7VIRu9K_A9wV7>uke1aJGc8OLCBK*Czw4$KnXT<` zwvLR3Q~*;hD4DV$e1#@&bloKt+J$MCh!Vla;q`L`HD6VnJn7y%C`9d&?`etDMX@&6 z*?lt=H%j?588-Z&E^aY(k70=WVi~)7tMYIx{-Zo<-epREOOauM!Q!PA%3e^pG}K(9 zG1mG0gBJJNi*Wo!-kqZfTq*X>E+bUs+myc9M~nCFkuv(g9m^j_w?yE`3u8nH6P}&B zL*s~*)y^$HbI*$B9DfHTQhbYiLS?c4Vz5+LnE74pWgRgjRX5d&OkPdo_Ut@xN1mLT zAXW0|OvA--!sm%AtKZk2QwO>X$SOH3q)#dSnL#s@n~?Uk?YQ^UC}iVk+~n(JIQY{9 z+>~A3-^2a5F!4Pnm|3}2Cgp+RbN-Ouy!Yp-*O=eJUWAXkPv}KC~B7BBK>^L9q2x@@j_pXQ_Ew2lWIK?7`oxSZJyAsQH zg|}lvM46BjRg2cQI-s9npN)+pZ15}jso1vvhJTdL5y@?*3DX{W=o8_H&4Plfiyy+f zAI0rEw_kaAb;3%+PPl246l9>>b!YX(m26zR<3h?`F5ac0{^m(mn&P<=I8PG^E!05_ z4d8yu{Lcq{;fD2T+3?Q5&(-od=3a`&BpCN-k$cB07bRRgX7y(|W}vqDOki^FsZZ+1 zu_Te8TPd7atBl{x@5V-S-AbXHk~Vi3-vJMJa3B=3yh7n5?qrclrk&JahUheJ2{8N zI_7k`R3FF`Qf(j$@pQRG@4O#;s+%K&rlWQxMI*t(bqe65d0M!BaQmwftb4F1yyPk^9<_RY!iUV!SI4>_54%N=j8g3TFYG9;jU?ri1b5L5re- z7{baNa-p9*c6PvtiFVJn-yrLOA(0p>a9n&+qv5z8mDo}uHG79_@4o&idny0x1d9c! zF|Je{{SoKO31Zdbl7qaAITs5!`S{UIj2y-~quBg!+2TpN-p}pLYH=j?n)uZd;vR%J zz-ps@RCqBWwY=hJrJt7={mdh&*iUK*N~o0Spe0pkN|@)}0PcLZBYPgWGfd5}ny2)%ck3BvPan|OL^Sbnh`TieSy ztb9$FZWNEF4LmIx11~1(Ui3Rt2q=0**+BA6XUVS^MiBhe=H;hPJvL`zyY<>Ia>lx2 z=-2Ht-}|7nRpTYeq3eZJ+UtYvhE|C47^7;G^E(lIjfTzAW(t!W?hmFI_HqH?V0ap; z-VuLE@eI6S57VBgeHKFCjH3+m>7I|#?r`+4S@d*NZ|uIHa!5;q>=(xCLQH|Wi@}n%45E_+*}{G?-BUT;e?X-6$q0A zVymh#7bSD@uku%OF(Y5$=k*)EpJJd}^06%A!pRR29E}I_$>8>EVtt2>&IpCRdq1b4 zXe;T@i1?>l_Jy}Q=hAMp67y$AX4CiuMrHfNCTh~tLS&Ho@4-nr zjv%D8j5M>6kt=N`^DAku5y%5$bt0VKbW z8^=S8pyxb1uHzHVfme|z%Hr7g_8Z^52(rogD6k)WvKIlAQQG`fzXHxuz%n6GSf2AO>^{_!(HicIS{ZN##)f^O(r2Z6t#90W_m7Ez<} zL)(uy$oTf{1$I?o&3PI+qmI0z5ktwI?kx^zv`) z2&tvN+}l;p#V~2T1rpW_$lFHE1h7|1C9uI9Th|E|2p*pKVh`=PzgwV-)Gr2!e zAcA6#Wy3_7RUr3M9|38iYgULN?)T5y^@(?_?g$C?m(A>EG4QxO!?$?*z=$TG9R@@B zZUz(FYn;b0Ck~&d4#Ay{2#Wo*zfiRGEJpZ^n4uxjD3O%+xyRmoYvahU~f}lb&@7!*6{4M)1OSu}Q&PBtD1LI<(!Fa+p_c@m|d^t{vh-%zJ?p zFQk#Qfs+n`p1d#mytdk%S~r%4+`_(S=9cArQP)v&64hW+jitb#hHo9YM`g8dS0$D1 zlrw#7|4e`27hk=wLCf!Yb}y?@C5 z!{Yh_iVV_}g}>GKp86k_J4m&#w)=EWfetiegp7$?T?4O#ZU`R?Z7BZ5))Mm7FBJ(LhC8;KpCJ z;gRk^ua{XX9q;_S|xt$9c|mra!1tKfL5 zK?jMs-%HDrCn^4@OA~~DGBK8`GLv$mgY9CDjWcrhhaWMo!=W;ipdhS}=s!wQ&i=Q6 zmkpTIFD)&P>y^8Ru<Zd!=?R!Xe=2_zlc(-i^Y$3Q?0@$E%;ZOcoDk!@IE8w?@kvW+azRM=LRukh)1X z#zBpbC+d^1kU#mwWxuC{C3fAt$7X~0zA4@Eok0jp9dZ2JT9s&C9ctQ^o!=Yf!c%|P zXiJDPggzRDoD-rNhUmz zxn~xMSN$>7rFqrmg-5c+(*VAM`6)FMTCPE}Hw{XBH)!W@zsIxm%^Dx*oY)Mr4G#3x zTw>$MHosQqddvLH%Lq_=F52&V=tD$ovhQII2<+~05zhk7S|0|_M{KD|ObH_z*aZa% zyzaSJHts2##anyMdaT^PKqRXFxsHy7-K_;9zy2J1o?0a^rvbGV5!{yEeQ|I3%UxKq zp^DH$t4CKbH@ye4zXkr;(?_-cpr2|EW|w<#*;jo{V9Am0lDUzQBTlONwGeJpHOR|f zAU50KoO5E5po4w$U}x(=A#AdMm)mNzz3ul&)dPH2@pYg6ceSNqbOK$ya@q|2er(*I zTs8TrlqzXnC#sBXx77siIXg8BUgUVY(`>xF?0j{Jy=);1_0+=2|JlvX0v5@J`%6Zx zbKeUDQ*a_5M{pbKWy6CSsW?`g)|b;=8{El)xbQ*FTIM-rs=r5;=(E2ngSky1e4d*a z?BhT+&LS-Y+hP1c|FY9tvO>cNhEjE-8_dUD{GA3+OiI9K^=2RKQ@w!e&F_lhm&tW6 zEGxQfO)VC<8mwY9w>-%x(*4(7A{Xcl*=lyH5mTA)c&-@fmUElBAI4M7E`Uy` zqwT?P^jLLfRf|~^L&KR`JO&P%CAh4h#1U;+owSAyF!_`3cL@W_09U}AEH}_JJvVTk zv%SH2Dwk%e#y0x?Xp55Z=%PW{-f;hWQw75I3I`|zeg$r z&t9D)4dY!-wXYPHO*D8l9i=F&v6{wLN~8J(w>pW2JGEsls9D0?*~G5o|H68}6b9 zd+qVXuVY+=rrXBA_m1CR`?EKxV`kysTXBt-ry*bv{TSYoEOstl5hm!_ZiBIFIw+zq zajGqjcGh^`TJ$*Jubjc_ZYY>1*xzCw5`S%^>y$tECdz zCBFfMJ>yflL96PW?t_`A+9w^9@V@~ntLH!0__*L>Eoo)%@7TmiOl5XFozN~15eqvY z8?JU5o+&((UpI24zk+I9yZEd{`TcthvUu8BPxp}S!wtl=W7{z?dr-V~p zRT*c%@A1yaPYAP0jV79Ds|Li{)cJFL3!KBRX)?N6tuT(wdTv2;dQY9pecp>7Dd`QT zwm4?#(7LSi)7s+F#wJpt)~c#SdshSn@(1iN%nl6?>?$9IJ*LKKS|>_VwaX0ae(x9_ zTyy`^;R=b4OAGL*%SmEn1aQ_#399 z)u*Y@q;*o|sH_7-vwKS=%?Jq>OL;tpPJJ-s_CIQML+-wYiS%eJ*NctrVN-4ti_GbH z?iEb@8_Gl3TIJ`hUbe)fe2}6O}ii%6q#OBpHU-+;lMHo<*Ck94KDl@ zb~$s-U)6zg9iueRJ1O#lFV*;;e&FZU7EgMizub2Z`z6o&BlyDbs_Lf%k;-L}7YX;G z4>~O3Q@^FMYouS3z31qHzw6yv^}g%GU3UtN%UXB7rVgPExNOrL)ZRjit}@K4>s8t* z^y8#v*N|q5MRoUY3BOM6jKjWPS6vmgk!bfQ)wzS*3N6BRPV=ASrS(Ry#h!9nN!6fS zbj4@d(6@h3J5z3VNRKKlT1^ISZjO$$JDDTC@D#Pq&TxO=@Z-E{xd>i;zQ67k9iJwE zy?DIh$nGUp$Kl1|+tVxf3!>QTP4Af~`YY*q1Dp<<1CeC_N3>`S(KzW^{J}FMfgSxZ z*1Ak3IW_r>YhTvmY2B^inTqLJ)fqVr=6l8YMIYgmSG=Uww?*Y@{lR+s~09MRP+HX|sGw z*y9?JThnBCGCq@LXdDjlmE%sv_2wHZ85Ju`jn4?z{F#L!LKiz;K4NK=M6OHcQ+$p)g%ApzgtyJ#Q*j z*EV>Xn@JGu=xYQoJL-=F3#PK~hALomJfLrsA9v@2)7jJq4tNjuOs>Jwu5h^)e2|Xy zdKNNyRfJIl9#+MU7-m-U2`#CX4?S3suowa{eAlC8m9O^ibP7XTj*0rNXpm z+-VYSETh9|&ELlVAnJCw7(Qds81ZxVEKhw&mhuxzWxdg?Xz9%$y~tSK>)COqhpvv{ z8#df1#Scab38HC%tz{nCO*4XIhcYuER25;T0cvL>c-PhDt8}PHblMvM|I(N9bVd|7 zJi>gKNso@A|9gnGibjl~i4kWdCw_E9^SzHs@Ll#$IAwtEQuwb~=jqF*s_hK^3`hn& zt<_=ejt|+E*BQdIQ)BbUdh|cY^9w}NYJ94by54oOC&{OPL{$BFd^+{>s?wj8+yRv* zEndp?bxd~Dn_cj~2WY<61aDhv6`mz_$9%L?<9Lp7YjsLsJU1+T&PEjK8+ zYkFPz^6E21(4md=*us;QO+klNC79Km4oOf(a&Zot83$1v>N(`{J=9uGQyE*%_a(?U8O(lldkRPLlM4A z$%m1eZ|2eYq@RN8d`VY6`1j5S|J=(KWeDcsQQvm59S9jwAM0H_bhRn^M#>ZPk1YqC zZ6yDVfbz0khim(`|B#SS8fhU^OoU$M<;k?N>r2zL*NSnxq2}A^lNHBgl8PKbT-CI? z{A70QCqC@S_P`tUFU@*7VVsfQ>Ays+7|M%LrA!C;)WW3*MT@%O)*J&|%(X(CslAzg zSU;J%?bm+uig*7W6;a~!4AOqdr+r+Bpb=USxxUi}x2QL?Qam{ts$G1>Mm<1Vx%xZD znxC$zXkOBnhHorQ(wE~Xg`(70#!lO!=)LG?C+08X3J>_YE4mk0kN&yG9_kv^vwSvF z&}7?Bzml8NE5D*kOVybL@7WYt?&pQXyz@oxZFHQQoyw4T5X5;<_!XUcQz88sB>9VI zFmS~hjOD>NNk_DlOKarT&|c7IS3$ygb}M@1!VI_~zm?Y1DE$5~BrI6hNxdHhIf?<* zUSCReh%VBP6YN%9-t~VDoSDiEoC$A0K4iL}Ph0{?LPaMkkI(~e->+;glLk+0{a$T| z;Cu}Jb!wR(GF^b~!ohZi(st4^H|ZZ3@c+y6uU?4x6d+wFDrPfJ4}wN<0)kZgk6OS< zDwzeEoH{Lqv?7=10YU=aG`ysy`VlrpD+Br83wU0A>_j%qZ4}s|e#@vgg z=PilKSmHj)<#kqLa>Nrk;vA4KELDh9l1M?{IA5O;xgfWJg@yew<(~CWO0#?SSBBSH zUTiuhG6}YHks?5tn#^Rzh+Li*XcfIn!$%4;*aF792&0w&wkQBw=6o`F`AGqnnSe_n zu|w0UBw=90Ab!rO29gkwkH8g?Lg^w6fRrlq+XeuF%K>Bo6H|5bb4ZXjsD;MUth(fg zca#7V`sz{v;QKM)dq7%qi2eeQ)dV&(J-=OrRi_7rxldwhOvw=u0K9)KAp#NuXk<#t zuw@3&3s49(fV{FFL8`wq2F)m17_|rb6oJ#0qmf@35bsuqO4?Nlia_Ha2tk0>oUyFA zQ2=^&^&xL`4L}30R<0Hw-t-l+;8 zH!ZNn(A{ic_|rzoJfM;mV2F6EX@k+eqRi5?w!KRRu{P z0qWLdi3AY$T^P99go#iX9TR{!biBYNMLY$D%Xigu0DYm#fOJaS6M=+?0U&oA8ZDtf zBrm}Ppfj7_SeZ+Jl9gOJGin7IsHt3v9Eb!2@PoT0kTon3N*DQx7#-ARGbaBLldyR* zfKePLN2-vA>H#GyY~^c-MDof3Wy!;ZBh96WAoe7-2&00eFl7L~e;G7L6{w?D*L;}_ zpunTC0AlfKy%IJr01VgnMT-EH9KW5|{oJlLej0N^6B{}1W^9n!=1e=!emP-C@DF>QT8=kS(RJ0-T+H}RBkPY5xvp4pP(0^KIxGT(f4 zK`m~QMta(y?cv}m@JBnh!3|zsv(mmts4}g8gUXp+=H8D4_S*r&o@383c+Zif%`Q(Y zjIM+{ja|X@%&OM7g)q{MyG(p;f_o63_>>VZ)6 z(yg2C)~*nt|K|naHK+my8xbfGXxPnvbL-zqMvRmHxZG6c|6Wi$gq7rZE%K3opO(5F KqE^*D>c0Sq!vNj@ literal 0 HcmV?d00001 From dad4ff0a589b74c378320f2134c6efde65818ab9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Aug 2021 13:18:03 -0700 Subject: [PATCH 17/71] Add the measure privacy page to the external Tensorflow Responsible AI Guide. PiperOrigin-RevId: 389961385 --- g3doc/guide/measure_privacy.md | 47 ++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/g3doc/guide/measure_privacy.md b/g3doc/guide/measure_privacy.md index 0cda83e..d6c2a73 100644 --- a/g3doc/guide/measure_privacy.md +++ b/g3doc/guide/measure_privacy.md @@ -1,5 +1,48 @@ # Measure Privacy -[TOC] +Differential privacy is a framework for measuring the privacy guarantees +provided by an algorithm and can be expressed using the values ε (epsilon) and δ +(delta). Of the two, ε is the more important and more sensitive to the choice of +hyperparameters. Roughly speaking, they mean the following: -## Tips +* ε gives a ceiling on how much the probability of a particular output can + increase by including (or removing) a single training example. You usually + want it to be a small constant (less than 10, or, for more stringent privacy + guarantees, less than 1). However, this is only an upper bound, and a large + value of epsilon may still mean good practical privacy. +* δ bounds the probability of an arbitrary change in model behavior. You can + usually set this to a very small number (1e-7 or so) without compromising + utility. A rule of thumb is to set it to be less than the inverse of the + training data size. + +The relationship between training hyperparameters and the resulting privacy in +terms of (ε, δ) is complicated and tricky to state explicitly. Our current +recommended approach is at the bottom of the [Get Started page](get_started.md), +which involves finding the maximum noise multiplier one can use while still +having reasonable utility, and then scaling the noise multiplier and number of +microbatches. TensorFlow Privacy provides a tool, `compute_dp_sgd_privacy` to +compute (ε, δ) based on the noise multiplier σ, the number of training steps +taken, and the fraction of input data consumed at each step. The amount of +privacy increases with the noise multiplier σ and decreases the more times the +data is used on training. Generally, in order to achieve an epsilon of at most +10.0, we need to set the noise multiplier to around 0.3 to 0.5, depending on the +dataset size and number of epochs. See the +[classification privacy tutorial](../tutorials/classification_privacy.ipynb) to +see the approach. + +For more detail, you can see +[the original DP-SGD paper](https://arxiv.org/pdf/1607.00133.pdf). + +You can use `compute_dp_sgd_privacy`, to find out the epsilon given a fixed +delta value for your model [../tutorials/classification_privacy.ipynb]: + +* `q` : the sampling ratio - the probability of an individual training point + being included in a mini batch (`batch_size/number_of_examples`). +* `noise_multiplier` : A float that governs the amount of noise added during + training. Generally, more noise results in better privacy and lower utility. + This generally +* `steps` : The number of global steps taken. + +A detailed writeup of the theory behind the computation of epsilon and delta is +available at +[Differential Privacy of the Sampled Gaussian Mechanism](https://arxiv.org/abs/1908.10530). From fbaa55cc43cf2a0ca7c168f34039ee3c04fae1b5 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Tue, 10 Aug 2021 13:23:10 -0700 Subject: [PATCH 18/71] Add dependency on tensorflow probability. PiperOrigin-RevId: 389962756 --- requirements.txt | 1 + setup.py | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 955d819..7994d55 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ tensorflow-estimator>=2.3.0 attrs>=21.2.0 mpmath dm-tree~=0.1.1 +tensorflow-probability>=0.13.0 diff --git a/setup.py b/setup.py index 155802d..4ca4064 100644 --- a/setup.py +++ b/setup.py @@ -26,6 +26,7 @@ setup( 'attrs>=21.2.0', # for tree_aggregation_query.py. 'mpmath', # used in tests only 'dm-tree~=0.1.1', # used in tests only + 'tensorflow-probability>=0.13.0', # For discrete Gaussian. ], # Explicit dependence on TensorFlow is not supported. # See https://github.com/tensorflow/tensorflow/issues/7166 From f44dcb8760af62879cae2d2b5b102ccf524b83d2 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Wed, 11 Aug 2021 10:53:59 -0700 Subject: [PATCH 19/71] Add tensorflow and tensorflow-datasets to setup/requirements. PiperOrigin-RevId: 390171562 --- requirements.txt | 1 + setup.py | 4 +++- tensorflow_privacy/version.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7994d55..304b0cd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ attrs>=21.2.0 mpmath dm-tree~=0.1.1 tensorflow-probability>=0.13.0 +tensorflow-datasets>=4.4.0 diff --git a/setup.py b/setup.py index 4ca4064..9a698d1 100644 --- a/setup.py +++ b/setup.py @@ -17,16 +17,18 @@ from setuptools import setup setup( name='tensorflow_privacy', - version='0.7.0', + version='0.7.1', url='https://github.com/tensorflow/privacy', license='Apache-2.0', install_requires=[ + 'tensorflow>=1.14', 'scipy>=0.17', 'tensorflow-estimator>=2.3.0', # for DP versions of estimator. 'attrs>=21.2.0', # for tree_aggregation_query.py. 'mpmath', # used in tests only 'dm-tree~=0.1.1', # used in tests only 'tensorflow-probability>=0.13.0', # For discrete Gaussian. + 'tensorflow-datasets>=4.4.0' ], # Explicit dependence on TensorFlow is not supported. # See https://github.com/tensorflow/tensorflow/issues/7166 diff --git a/tensorflow_privacy/version.py b/tensorflow_privacy/version.py index da4254b..878eae5 100644 --- a/tensorflow_privacy/version.py +++ b/tensorflow_privacy/version.py @@ -13,4 +13,4 @@ # limitations under the License. """TensorFlow Privacy version.""" -__version__ = '0.7.0' +__version__ = '0.7.1' From b4c04093cfa4250d44579204c2c1c196646e482e Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Wed, 11 Aug 2021 16:25:32 -0700 Subject: [PATCH 20/71] Restart the tree state in tree related DPQuery for streaming data: a general abstract class and an instance of restarting every a few rounds. PiperOrigin-RevId: 390244330 --- .../privacy/dp_query/tree_aggregation.py | 183 +++++++++++++++--- .../dp_query/tree_aggregation_query.py | 168 ++++++++++++---- .../dp_query/tree_aggregation_query_test.py | 94 ++++++++- .../privacy/dp_query/tree_aggregation_test.py | 21 ++ 4 files changed, 399 insertions(+), 67 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation.py index ba8ea2f..6015545 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation.py @@ -16,7 +16,10 @@ `TreeAggregator` and `EfficientTreeAggregator` compute cumulative sums of noise based on tree aggregation. When using an appropriate noise function (e.g., Gaussian noise), it allows for efficient differentially private algorithms under -continual observation, without prior subsampling or shuffling assumptions. +continual observation, without prior subsampling or shuffling assumptions. This +module implements the core logic of tree aggregation in Tensorflow, which serves +as helper functions for `tree_aggregation_query`. This module and helper +functions are publicly accessible. """ import abc @@ -26,6 +29,10 @@ import attr import tensorflow as tf +# TODO(b/192464750): find a proper place for the helper functions, privatize +# the tree aggregation logic, and encourage users to use the DPQuery API. + + class ValueGenerator(metaclass=abc.ABCMeta): """Base class establishing interface for stateful value generation. @@ -40,6 +47,7 @@ class ValueGenerator(metaclass=abc.ABCMeta): Returns: An initial state. """ + raise NotImplementedError @abc.abstractmethod def next(self, state): @@ -52,6 +60,7 @@ class ValueGenerator(metaclass=abc.ABCMeta): A pair (value, new_state) where value is the next value and new_state is the advanced state. """ + raise NotImplementedError class GaussianNoiseGenerator(ValueGenerator): @@ -148,6 +157,78 @@ class StatelessValueGenerator(ValueGenerator): return self.value_fn(), state +# TODO(b/192464750): define `RestartQuery` and move `RestartIndicator` to be +# in the same module. + + +class RestartIndicator(metaclass=abc.ABCMeta): + """Base class establishing interface for restarting the tree state. + + A `RestartIndicator` maintains a state, and each time `next` is called, a bool + value is generated to indicate whether to restart, and the indicator state is + advanced. + """ + + @abc.abstractmethod + def initialize(self): + """Makes an initialized state for `RestartIndicator`. + + Returns: + An initial state. + """ + raise NotImplementedError + + @abc.abstractmethod + def next(self, state): + """Gets next bool indicator and advances the `RestartIndicator` state. + + Args: + state: The current state. + + Returns: + A pair (value, new_state) where value is bool indicator and new_state + is the advanced state. + """ + raise NotImplementedError + + +class PeriodicRoundRestartIndicator(RestartIndicator): + """Indicator for resetting the tree state after every a few number of queries. + + The indicator will maintain an internal counter as state. + """ + + def __init__(self, frequency: int): + """Construct the `PeriodicRoundRestartIndicator`. + + Args: + frequency: The `next` function will return `True` every `frequency` number + of `next` calls. + """ + if frequency < 1: + raise ValueError('Restart frequency should be equal or larger than 1 ' + f'got {frequency}') + self.frequency = tf.constant(frequency, tf.int32) + + def initialize(self): + """Returns initialized state of 0 for `PeriodicRoundRestartIndicator`.""" + return tf.constant(0, tf.int32) + + def next(self, state): + """Gets next bool indicator and advances the state. + + Args: + state: The current state. + + Returns: + A pair (value, new_state) where value is the bool indicator and new_state + of `state+1`. + """ + state = state + tf.constant(1, tf.int32) + flag = state % self.frequency == 0 + return flag, state + + @attr.s(eq=False, frozen=True, slots=True) class TreeState(object): """Class defining state of the tree. @@ -166,6 +247,7 @@ class TreeState(object): value_generator_state = attr.ib(type=Any) +# TODO(b/192464750): move `get_step_idx` to be a property of `TreeState`. @tf.function def get_step_idx(state: TreeState) -> tf.Tensor: """Returns the current leaf node index based on `TreeState.level_buffer_idx`.""" @@ -188,6 +270,14 @@ class TreeAggregator(): https://dl.acm.org/doi/pdf/10.1145/1806689.1806787. A buffer at the scale of tree depth is maintained and updated when a new conceptual leaf node arrives. + Example usage: + random_generator = GaussianNoiseGenerator(...) + tree_aggregator = TreeAggregator(random_generator) + state = tree_aggregator.init_state() + for leaf_node_idx in range(total_steps): + assert leaf_node_idx == get_step_idx(state)) + noise, state = tree_aggregator.get_cumsum_and_update(state) + Attributes: value_generator: A `ValueGenerator` or a no-arg function to generate a noise value for each tree node. @@ -205,14 +295,8 @@ class TreeAggregator(): else: self.value_generator = StatelessValueGenerator(value_generator) - def init_state(self) -> TreeState: - """Returns initial `TreeState`. - - Initializes `TreeState` for a tree of a single leaf node: the respective - initial node value in `TreeState.level_buffer` is generated by the value - generator function, and the node index is 0. - """ - value_generator_state = self.value_generator.initialize() + def _get_init_state(self, value_generator_state) -> TreeState: + """Returns initial `TreeState` given `value_generator_state`.""" level_buffer_idx = tf.TensorArray(dtype=tf.int32, size=1, dynamic_size=True) level_buffer_idx = level_buffer_idx.write(0, tf.constant( 0, dtype=tf.int32)).stack() @@ -224,12 +308,28 @@ class TreeAggregator(): new_val) level_buffer = tf.nest.map_structure(lambda x, y: x.write(0, y).stack(), level_buffer_structure, new_val) - return TreeState( level_buffer=level_buffer, level_buffer_idx=level_buffer_idx, value_generator_state=value_generator_state) + def init_state(self) -> TreeState: + """Returns initial `TreeState`. + + Initializes `TreeState` for a tree of a single leaf node: the respective + initial node value in `TreeState.level_buffer` is generated by the value + generator function, and the node index is 0. + + Returns: + An initialized `TreeState`. + """ + value_generator_state = self.value_generator.initialize() + return self._get_init_state(value_generator_state) + + def reset_state(self, state: TreeState) -> TreeState: + """Returns reset `TreeState` after restarting a new tree.""" + return self._get_init_state(state.value_generator_state) + @tf.function def _get_cumsum(self, level_buffer: Collection[tf.Tensor]) -> tf.Tensor: return tf.nest.map_structure(lambda x: tf.reduce_sum(x, axis=0), @@ -238,7 +338,7 @@ class TreeAggregator(): @tf.function def get_cumsum_and_update(self, state: TreeState) -> Tuple[tf.Tensor, TreeState]: - """Returns tree aggregated value and updated `TreeState` for one step. + """Returns tree aggregated noise and updates `TreeState` for the next step. `TreeState` is updated to prepare for accepting the *next* leaf node. Note that `get_step_idx` can be called to get the current index of the leaf node @@ -249,10 +349,20 @@ class TreeAggregator(): Args: state: `TreeState` for the current leaf node, index can be queried by `tree_aggregation.get_step_idx(state.level_buffer_idx)`. + + Returns: + Tuple of (noise, state) where `noise` is generated by tree aggregated + protocol for the cumulative sum of streaming data, and `state` is the + updated `TreeState`. """ level_buffer_idx, level_buffer, value_generator_state = ( state.level_buffer_idx, state.level_buffer, state.value_generator_state) + # We only publicize a combined function for updating state and returning + # noised results because this DPQuery is designed for the streaming data, + # and we only maintain a dynamic memory buffer of max size logT. Only the + # the most recent noised results can be queried, and the queries are + # expected to happen for every step in the streaming setting. cumsum = self._get_cumsum(level_buffer) new_level_buffer = tf.nest.map_structure( @@ -311,6 +421,14 @@ class EfficientTreeAggregator(): `sigma * sqrt(2^{d-1}/(2^d-1))`. which becomes `sigma / sqrt(2)` when the tree is very tall. + Example usage: + random_generator = GaussianNoiseGenerator(...) + tree_aggregator = EfficientTreeAggregator(random_generator) + state = tree_aggregator.init_state() + for leaf_node_idx in range(total_steps): + assert leaf_node_idx == get_step_idx(state)) + noise, state = tree_aggregator.get_cumsum_and_update(state) + Attributes: value_generator: A `ValueGenerator` or a no-arg function to generate a noise value for each tree node. @@ -328,17 +446,8 @@ class EfficientTreeAggregator(): else: self.value_generator = StatelessValueGenerator(value_generator) - def init_state(self) -> TreeState: - """Returns initial `TreeState`. - - Initializes `TreeState` for a tree of a single leaf node: the respective - initial node value in `TreeState.level_buffer` is generated by the value - generator function, and the node index is 0. - - Returns: - An initialized `TreeState`. - """ - value_generator_state = self.value_generator.initialize() + def _get_init_state(self, value_generator_state): + """Returns initial buffer for `TreeState`.""" level_buffer_idx = tf.TensorArray(dtype=tf.int32, size=1, dynamic_size=True) level_buffer_idx = level_buffer_idx.write(0, tf.constant( 0, dtype=tf.int32)).stack() @@ -350,12 +459,28 @@ class EfficientTreeAggregator(): new_val) level_buffer = tf.nest.map_structure(lambda x, y: x.write(0, y).stack(), level_buffer_structure, new_val) - return TreeState( level_buffer=level_buffer, level_buffer_idx=level_buffer_idx, value_generator_state=value_generator_state) + def init_state(self) -> TreeState: + """Returns initial `TreeState`. + + Initializes `TreeState` for a tree of a single leaf node: the respective + initial node value in `TreeState.level_buffer` is generated by the value + generator function, and the node index is 0. + + Returns: + An initialized `TreeState`. + """ + value_generator_state = self.value_generator.initialize() + return self._get_init_state(value_generator_state) + + def reset_state(self, state: TreeState) -> TreeState: + """Returns reset `TreeState` after restarting a new tree.""" + return self._get_init_state(state.value_generator_state) + @tf.function def _get_cumsum(self, state: TreeState) -> tf.Tensor: """Returns weighted cumulative sum of noise based on `TreeState`.""" @@ -377,7 +502,7 @@ class EfficientTreeAggregator(): @tf.function def get_cumsum_and_update(self, state: TreeState) -> Tuple[tf.Tensor, TreeState]: - """Returns tree aggregated value and updated `TreeState` for one step. + """Returns tree aggregated noise and updates `TreeState` for the next step. `TreeState` is updated to prepare for accepting the *next* leaf node. Note that `get_step_idx` can be called to get the current index of the leaf node @@ -390,7 +515,17 @@ class EfficientTreeAggregator(): Args: state: `TreeState` for the current leaf node, index can be queried by `tree_aggregation.get_step_idx(state.level_buffer_idx)`. + + Returns: + Tuple of (noise, state) where `noise` is generated by tree aggregated + protocol for the cumulative sum of streaming data, and `state` is the + updated `TreeState`.. """ + # We only publicize a combined function for updating state and returning + # noised results because this DPQuery is designed for the streaming data, + # and we only maintain a dynamic memory buffer of max size logT. Only the + # the most recent noised results can be queried, and the queries are + # expected to happen for every step in the streaming setting. cumsum = self._get_cumsum(state) level_buffer_idx, level_buffer, value_generator_state = ( diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 082bf01..943cf9f 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -32,13 +32,35 @@ from tensorflow_privacy.privacy.dp_query import gaussian_query from tensorflow_privacy.privacy.dp_query import tree_aggregation -class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): - """Implements dp_query for adding correlated noise through tree structure. +# TODO(b/192464750): define `RestartQuery` and move `RestartIndicator` to be +# in the same module. - First clips and sums records in current sample, returns cumulative sum of - samples over time (instead of only current sample) with added noise for - cumulative sum proportional to log(T), T being the number of times the query - is called. + +class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): + """Returns private cumulative sums by clipping and adding correlated noise. + + Consider calling `get_noised_result` T times, and each (x_i, i=0,2,...,T-1) is + the private value returned by `accumulate_record`, i.e. x_i = sum_{j=0}^{n-1} + x_{i,j} where each x_{i,j} is a private record in the database. This class is + intended to make multiple queries, which release privatized values of the + cumulative sums s_i = sum_{k=0}^{i} x_k, for i=0,...,T-1. + Each call to `get_noised_result` releases the next cumulative sum s_i, which + is in contrast to the GaussianSumQuery that releases x_i. Noise for the + cumulative sums is accomplished using the tree aggregation logic in + `tree_aggregation`, which is proportional to log(T). + + Example usage: + query = TreeCumulativeSumQuery(...) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + for i, samples in enumerate(streaming_samples): + sample_state = query.initial_sample_state(samples[0]) + # Compute x_i = sum_{j=0}^{n-1} x_{i,j} + for j,sample in enumerate(samples): + sample_state = query.accumulate_record(params, sample_state, sample) + # noised_cumsum is privatized estimate of s_i + noised_cumsum, global_state = query.get_noised_result( + sample_state, global_state) Attributes: clip_fn: Callable that specifies clipping function. `clip_fn` receives two @@ -52,6 +74,8 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): node. Noise stdandard deviation is specified outside the `dp_query` by the user when defining `noise_fn` and should have order O(clip_norm*log(T)/eps) to guarantee eps-DP. + restart_indicator: `tree_aggregation.RestartIndicator` to generate the + boolean indicator for resetting the tree state. """ @attr.s(frozen=True) @@ -63,17 +87,21 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): each level state. clip_value: The clipping value to be passed to clip_fn. samples_cumulative_sum: Noiseless cumulative sum of samples over time. + restarter_state: Current state of the restarter to indicate whether + the tree state will be reset. """ tree_state = attr.ib() clip_value = attr.ib() samples_cumulative_sum = attr.ib() + restarter_state = attr.ib() def __init__(self, record_specs, noise_generator, clip_fn, clip_value, - use_efficient=True): + use_efficient=True, + restart_indicator=None): """Initializes the `TreeCumulativeSumQuery`. Consider using `build_l2_gaussian_query` for the construction of a @@ -91,6 +119,8 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): use_efficient: Boolean indicating the usage of the efficient tree aggregation algorithm based on the paper "Efficient Use of Differentially Private Binary Trees". + restart_indicator: `tree_aggregation.RestartIndicator` to generate the + boolean indicator for resetting the tree state. """ self._clip_fn = clip_fn self._clip_value = clip_value @@ -100,17 +130,21 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): noise_generator) else: self._tree_aggregator = tree_aggregation.TreeAggregator(noise_generator) + self._restart_indicator = restart_indicator def initial_global_state(self): """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" initial_tree_state = self._tree_aggregator.init_state() initial_samples_cumulative_sum = tf.nest.map_structure( lambda spec: tf.zeros(spec.shape), self._record_specs) - initial_state = TreeCumulativeSumQuery.GlobalState( + restarter_state = None + if self._restart_indicator is not None: + restarter_state = self._restart_indicator.initialize() + return TreeCumulativeSumQuery.GlobalState( tree_state=initial_tree_state, clip_value=tf.constant(self._clip_value, tf.float32), - samples_cumulative_sum=initial_samples_cumulative_sum) - return initial_state + samples_cumulative_sum=initial_samples_cumulative_sum, + restarter_state=restarter_state) def derive_sample_params(self, global_state): """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" @@ -151,13 +185,21 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): tf.add, global_state.samples_cumulative_sum, sample_state) cumulative_sum_noise, new_tree_state = self._tree_aggregator.get_cumsum_and_update( global_state.tree_state) + noised_cumulative_sum = tf.nest.map_structure(tf.add, new_cumulative_sum, + cumulative_sum_noise) + restarter_state = global_state.restarter_state + if self._restart_indicator is not None: + restart_flag, restarter_state = self._restart_indicator.next( + restarter_state) + if restart_flag: + new_cumulative_sum = noised_cumulative_sum + new_tree_state = self._tree_aggregator.reset_state(new_tree_state) new_global_state = attr.evolve( global_state, samples_cumulative_sum=new_cumulative_sum, - tree_state=new_tree_state) - noised_cum_sum = tf.nest.map_structure(tf.add, new_cumulative_sum, - cumulative_sum_noise) - return noised_cum_sum, new_global_state + tree_state=new_tree_state, + restarter_state=restarter_state) + return noised_cumulative_sum, new_global_state @classmethod def build_l2_gaussian_query(cls, @@ -165,7 +207,8 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): noise_multiplier, record_specs, noise_seed=None, - use_efficient=True): + use_efficient=True, + restart_indicator=None): """Returns a query instance with L2 norm clipping and Gaussian noise. Args: @@ -180,6 +223,8 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): use_efficient: Boolean indicating the usage of the efficient tree aggregation algorithm based on the paper "Efficient Use of Differentially Private Binary Trees". + restart_indicator: `tree_aggregation.RestartIndicator` to generate the + boolean indicator for resetting the tree state. """ if clip_norm <= 0: raise ValueError(f'`clip_norm` must be positive, got {clip_norm}.') @@ -202,22 +247,48 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): clip_value=clip_norm, record_specs=record_specs, noise_generator=gaussian_noise_generator, - use_efficient=use_efficient) + use_efficient=use_efficient, + restart_indicator=restart_indicator) class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): - """Implements dp_query for adding correlated noise through tree structure. + """Implements DPQuery for adding correlated noise through tree structure. - Clips and sums records in current sample; returns the current sample adding - the noise residual from tree aggregation. The returned value is conceptually - equivalent to the following: calculates cumulative sum of samples over time - (instead of only current sample) with added noise for cumulative sum - proportional to log(T), T being the number of times the query is called; - returns the residual between the current noised cumsum and the previous one - when the query is called. Combining this query with a SGD optimizer can be - used to implement the DP-FTRL algorithm in + Clips and sums records in current sample x_i = sum_{j=0}^{n-1} x_{i,j}; + returns the current sample adding the noise residual from tree aggregation. + The returned value is conceptually equivalent to the following: calculates + cumulative sum of samples over time s_i = sum_{k=0}^i x_i (instead of only + current sample) with added noise by tree aggregation protocol that is + proportional to log(T), T being the number of times the query is called; r + eturns the residual between the current noised cumsum noised(s_i) and the + previous one noised(s_{i-1}) when the query is called. + + This can be used as a drop-in replacement for `GaussianSumQuery`, and can + offer stronger utility/privacy tradeoffs when aplification-via-sampling is not + possible, or when privacy epsilon is relativly large. This may result in + more noise by a log(T) factor in each individual estimate of x_i, but if the + x_i are used in the underlying code to compute cumulative sums, the noise in + those sums can be less. That is, this allows us to adapt code that was written + to use a regular `SumQuery` to benefit from the tree aggregation protocol. + + Combining this query with a SGD optimizer can be used to implement the + DP-FTRL algorithm in "Practical and Private (Deep) Learning without Sampling or Shuffling". + Example usage: + query = TreeResidualSumQuery(...) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + for i, samples in enumerate(streaming_samples): + sample_state = query.initial_sample_state(samples[0]) + # Compute x_i = sum_{j=0}^{n-1} x_{i,j} + for j,sample in enumerate(samples): + sample_state = query.accumulate_record(params, sample_state, sample) + # noised_sum is privatized estimate of x_i by conceptually postprocessing + # noised cumulative sum s_i + noised_sum, global_state = query.get_noised_result( + sample_state, global_state) + Attributes: clip_fn: Callable that specifies clipping function. `clip_fn` receives two arguments: a flat list of vars in a record and a `clip_value` to clip the @@ -231,6 +302,8 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): node. Noise stdandard deviation is specified outside the `dp_query` by the user when defining `noise_fn` and should have order O(clip_norm*log(T)/eps) to guarantee eps-DP. + restart_indicator: `tree_aggregation.RestartIndicator` to generate the + boolean indicator for resetting the tree state. """ @attr.s(frozen=True) @@ -243,21 +316,25 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): clip_value: The clipping value to be passed to clip_fn. previous_tree_noise: Cumulative noise by tree aggregation from the previous time the query is called on a sample. + restarter_state: Current state of the restarter to indicate whether + the tree state will be reset. """ tree_state = attr.ib() clip_value = attr.ib() previous_tree_noise = attr.ib() + restarter_state = attr.ib() def __init__(self, record_specs, noise_generator, clip_fn, clip_value, - use_efficient=True): - """Initializes the `TreeResidualSumQuery`. + use_efficient=True, + restart_indicator=None): + """Initializes the `TreeCumulativeSumQuery`. Consider using `build_l2_gaussian_query` for the construction of a - `TreeResidualSumQuery` with L2 norm clipping and Gaussian noise. + `TreeCumulativeSumQuery` with L2 norm clipping and Gaussian noise. Args: record_specs: A nested structure of `tf.TensorSpec`s specifying structure @@ -271,6 +348,8 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): use_efficient: Boolean indicating the usage of the efficient tree aggregation algorithm based on the paper "Efficient Use of Differentially Private Binary Trees". + restart_indicator: `tree_aggregation.RestartIndicator` to generate the + boolean indicator for resetting the tree state. """ self._clip_fn = clip_fn self._clip_value = clip_value @@ -280,16 +359,23 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): noise_generator) else: self._tree_aggregator = tree_aggregation.TreeAggregator(noise_generator) + self._restart_indicator = restart_indicator + + def _zero_initial_noise(self): + return tf.nest.map_structure(lambda spec: tf.zeros(spec.shape), + self._record_specs) def initial_global_state(self): """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" initial_tree_state = self._tree_aggregator.init_state() - initial_noise = tf.nest.map_structure(lambda spec: tf.zeros(spec.shape), - self._record_specs) + restarter_state = None + if self._restart_indicator is not None: + restarter_state = self._restart_indicator.initialize() return TreeResidualSumQuery.GlobalState( tree_state=initial_tree_state, clip_value=tf.constant(self._clip_value, tf.float32), - previous_tree_noise=initial_noise) + previous_tree_noise=self._zero_initial_noise(), + restarter_state=restarter_state) def derive_sample_params(self, global_state): """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" @@ -328,8 +414,18 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): noised_sample = tf.nest.map_structure(lambda a, b, c: a + b - c, sample_state, tree_noise, global_state.previous_tree_noise) + restarter_state = global_state.restarter_state + if self._restart_indicator is not None: + restart_flag, restarter_state = self._restart_indicator.next( + restarter_state) + if restart_flag: + tree_noise = self._zero_initial_noise() + new_tree_state = self._tree_aggregator.reset_state(new_tree_state) new_global_state = attr.evolve( - global_state, previous_tree_noise=tree_noise, tree_state=new_tree_state) + global_state, + previous_tree_noise=tree_noise, + tree_state=new_tree_state, + restarter_state=restarter_state) return noised_sample, new_global_state @classmethod @@ -338,7 +434,8 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): noise_multiplier, record_specs, noise_seed=None, - use_efficient=True): + use_efficient=True, + restart_indicator=None): """Returns `TreeResidualSumQuery` with L2 norm clipping and Gaussian noise. Args: @@ -353,6 +450,8 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): use_efficient: Boolean indicating the usage of the efficient tree aggregation algorithm based on the paper "Efficient Use of Differentially Private Binary Trees". + restart_indicator: `tree_aggregation.RestartIndicator` to generate the + boolean indicator for resetting the tree state. """ if clip_norm <= 0: raise ValueError(f'`clip_norm` must be positive, got {clip_norm}.') @@ -375,7 +474,8 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): clip_value=clip_norm, record_specs=record_specs, noise_generator=gaussian_noise_generator, - use_efficient=use_efficient) + use_efficient=use_efficient, + restart_indicator=restart_indicator) @tf.function diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py index a958f26..1bfaa21 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py @@ -263,13 +263,13 @@ class TreeCumulativeSumQueryTest(tf.test.TestCase, parameterized.TestCase): self.assertEqual(query_result, expected_sum) @parameterized.named_parameters( - ('s0t1step8', 0., 1., [1., 1., 2., 1., 2., 2., 3., 1.]), - ('s1t1step8', 1., 1., [2., 3., 5., 5., 7., 8., 10., 9.]), - ('s1t2step8', 1., 2., [3., 4., 7., 6., 9., 10., 13., 10.]), + ('s0t1', 0., 1.), + ('s1t1', 1., 1.), + ('s1t2', 1., 2.), ) def test_partial_sum_scalar_tree_aggregation(self, scalar_value, - tree_node_value, - expected_values): + tree_node_value): + total_steps = 8 query = tree_aggregation_query.TreeCumulativeSumQuery( clip_fn=_get_l2_clip_fn(), clip_value=scalar_value + 1., # no clip @@ -279,14 +279,54 @@ class TreeCumulativeSumQueryTest(tf.test.TestCase, parameterized.TestCase): ) global_state = query.initial_global_state() params = query.derive_sample_params(global_state) - for val in expected_values: - # For each streaming step i , the expected value is roughly - # `scalar_value*i + tree_aggregation(tree_node_value, i)` + for i in range(total_steps): sample_state = query.initial_sample_state(scalar_value) sample_state = query.accumulate_record(params, sample_state, scalar_value) query_result, global_state = query.get_noised_result( sample_state, global_state) - self.assertEqual(query_result, val) + # For each streaming step i , the expected value is roughly + # `scalar_value*(i+1) + tree_aggregation(tree_node_value, i)`. + # The tree aggregation value can be inferred from the binary + # representation of the current step. + self.assertEqual( + query_result, + scalar_value * (i + 1) + tree_node_value * bin(i + 1)[2:].count('1')) + + @parameterized.named_parameters( + ('s0t1f1', 0., 1., 1), + ('s0t1f2', 0., 1., 2), + ('s0t1f5', 0., 1., 5), + ('s1t1f5', 1., 1., 5), + ('s1t2f2', 1., 2., 2), + ('s1t5f6', 1., 5., 6), + ) + def test_sum_scalar_tree_aggregation_reset(self, scalar_value, + tree_node_value, frequency): + total_steps = 20 + indicator = tree_aggregation.PeriodicRoundRestartIndicator(frequency) + query = tree_aggregation_query.TreeCumulativeSumQuery( + clip_fn=_get_l2_clip_fn(), + clip_value=scalar_value + 1., # no clip + noise_generator=lambda: tree_node_value, + record_specs=tf.TensorSpec([]), + use_efficient=False, + restart_indicator=indicator, + ) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + for i in range(total_steps): + sample_state = query.initial_sample_state(scalar_value) + sample_state = query.accumulate_record(params, sample_state, scalar_value) + query_result, global_state = query.get_noised_result( + sample_state, global_state) + # Expected value is the combination of cumsum of signal; sum of trees + # that have been reset; current tree sum. The tree aggregation value can + # be inferred from the binary representation of the current step. + expected = ( + scalar_value * (i + 1) + + i // frequency * tree_node_value * bin(frequency)[2:].count('1') + + tree_node_value * bin(i % frequency + 1)[2:].count('1')) + self.assertEqual(query_result, expected) @parameterized.named_parameters( ('efficient', True, tree_aggregation.EfficientTreeAggregator), @@ -395,6 +435,42 @@ class TreeResidualQueryTest(tf.test.TestCase, parameterized.TestCase): ) self.assertIsInstance(query._tree_aggregator, tree_class) + @parameterized.named_parameters( + ('s0t1f1', 0., 1., 1), + ('s0t1f2', 0., 1., 2), + ('s0t1f5', 0., 1., 5), + ('s1t1f5', 1., 1., 5), + ('s1t2f2', 1., 2., 2), + ('s1t5f6', 1., 5., 6), + ) + def test_scalar_tree_aggregation_reset(self, scalar_value, tree_node_value, + frequency): + total_steps = 20 + indicator = tree_aggregation.PeriodicRoundRestartIndicator(frequency) + query = tree_aggregation_query.TreeResidualSumQuery( + clip_fn=_get_l2_clip_fn(), + clip_value=scalar_value + 1., # no clip + noise_generator=lambda: tree_node_value, + record_specs=tf.TensorSpec([]), + use_efficient=False, + restart_indicator=indicator, + ) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + for i in range(total_steps): + sample_state = query.initial_sample_state(scalar_value) + sample_state = query.accumulate_record(params, sample_state, scalar_value) + query_result, global_state = query.get_noised_result( + sample_state, global_state) + # Expected value is the signal of the current round plus the residual of + # two continous tree aggregation values. The tree aggregation value can + # be inferred from the binary representation of the current step. + expected = scalar_value + tree_node_value * ( + bin(i % frequency + 1)[2:].count('1') - + bin(i % frequency)[2:].count('1')) + print(i, query_result, expected) + self.assertEqual(query_result, expected) + class BuildTreeTest(tf.test.TestCase, parameterized.TestCase): diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py index 9a8be35..fc5e6cc 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py @@ -365,5 +365,26 @@ class GaussianNoiseGeneratorTest(tf.test.TestCase): self.assertAllEqual(gstate, gstate2) +class RestartIndicatorTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters(('zero', 0), ('negative', -1)) + def test_round_raise(self, frequency): + with self.assertRaisesRegex( + ValueError, 'Restart frequency should be equal or larger than 1'): + tree_aggregation.PeriodicRoundRestartIndicator(frequency) + + @parameterized.named_parameters(('f1', 1), ('f2', 2), ('f4', 4), ('f5', 5)) + def test_round_indicator(self, frequency): + total_steps = 20 + indicator = tree_aggregation.PeriodicRoundRestartIndicator(frequency) + state = indicator.initialize() + for i in range(total_steps): + flag, state = indicator.next(state) + if i % frequency == frequency - 1: + self.assertTrue(flag) + else: + self.assertFalse(flag) + + if __name__ == '__main__': tf.test.main() From b8c1ba72cdb195be51faea4b9958661cc6737151 Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Wed, 11 Aug 2021 20:20:15 -0700 Subject: [PATCH 21/71] Change default restarter state in tree_aggregation_query to empty tuple as None type is not compatible with TFF. PiperOrigin-RevId: 390278173 --- tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 943cf9f..7ef73a1 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -137,7 +137,7 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): initial_tree_state = self._tree_aggregator.init_state() initial_samples_cumulative_sum = tf.nest.map_structure( lambda spec: tf.zeros(spec.shape), self._record_specs) - restarter_state = None + restarter_state = () if self._restart_indicator is not None: restarter_state = self._restart_indicator.initialize() return TreeCumulativeSumQuery.GlobalState( @@ -368,7 +368,7 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): def initial_global_state(self): """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" initial_tree_state = self._tree_aggregator.init_state() - restarter_state = None + restarter_state = () if self._restart_indicator is not None: restarter_state = self._restart_indicator.initialize() return TreeResidualSumQuery.GlobalState( From 50673fec409d7c228771c6117040e4f2d008dc75 Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Thu, 12 Aug 2021 11:02:46 -0700 Subject: [PATCH 22/71] Minor fix for the nondeterministic seed of tree_aggregation.GaussianNoiseGenerator. The previous log results won't change much, while one seed is probably good enough. PiperOrigin-RevId: 390412713 --- tensorflow_privacy/privacy/dp_query/tree_aggregation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation.py index 6015545..c0a02fc 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation.py @@ -94,10 +94,12 @@ class GaussianNoiseGenerator(ValueGenerator): An initial state. """ if self.seed is None: + time_now = tf.timestamp() + residual = time_now - tf.math.floor(time_now) return tf.cast( tf.stack([ tf.math.floor(tf.timestamp() * 1e6), - tf.math.floor(tf.math.log(tf.timestamp() * 1e6)) + tf.math.floor(residual * 1e9) ]), dtype=tf.int64) else: From d9a75968155e510b816b71926f2306665adb60a0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 12 Aug 2021 13:38:26 -0700 Subject: [PATCH 23/71] Remove deprecated `CentralTreeSumQuery` and `DistributedTreeSumQuery`. They are replaced by `TreeRangeSumQuery` PiperOrigin-RevId: 390449215 --- .../dp_query/tree_aggregation_query.py | 222 ---------------- .../dp_query/tree_aggregation_query_test.py | 237 ------------------ 2 files changed, 459 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 7ef73a1..4e19a49 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -22,7 +22,6 @@ the leaf nodes of the tree arrive one by one as the time proceeds. """ import distutils import math -from typing import Optional import attr import tensorflow as tf @@ -732,224 +731,3 @@ def _get_add_noise(stddev, seed: int = None): return v + tf.cast(random_normal(tf.shape(input=v)), dtype=v.dtype) return add_noise - - -class CentralTreeSumQuery(dp_query.SumAggregationDPQuery): - """Implements dp_query for differentially private tree aggregation protocol. - - Implements a central variant of the tree aggregation protocol from the paper - "'Is interaction necessary for distributed private learning?.' Adam Smith, - Abhradeep Thakurta, Jalaj Upadhyay" by replacing their local randomizer with - gaussian mechanism. The first step is to clip the clients' local updates (i.e. - a 1-D array containing the leaf nodes of the tree) by L1 norm to make sure it - does not exceed a prespecified upper bound. The second step is to construct - the tree on the clipped update. The third step is to add independent gaussian - noise to each node in the tree. The returned tree can support efficient and - accurate range queries with differential privacy. - """ - - @attr.s(frozen=True) - class GlobalState(object): - """Class defining global state for `CentralTreeSumQuery`. - - Attributes: - l1_bound: An upper bound on the L1 norm of the input record. This is - needed to bound the sensitivity and deploy differential privacy. - """ - l1_bound = attr.ib() - - def __init__(self, - stddev: float, - arity: int = 2, - l1_bound: int = 10, - seed: Optional[int] = None): - """Initializes the `CentralTreeSumQuery`. - - Args: - stddev: The stddev of the noise added to each internal node of the - constructed tree. - arity: The branching factor of the tree. - l1_bound: An upper bound on the L1 norm of the input record. This is - needed to bound the sensitivity and deploy differential privacy. - seed: Random seed to generate Gaussian noise. Defaults to `None`. Only for - test purpose. - """ - self._stddev = stddev - self._arity = arity - self._l1_bound = l1_bound - self._seed = seed - - def initial_global_state(self): - """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" - return CentralTreeSumQuery.GlobalState(l1_bound=self._l1_bound) - - def derive_sample_params(self, global_state): - """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" - return global_state.l1_bound - - def preprocess_record(self, params, record): - """Implements `tensorflow_privacy.DPQuery.preprocess_record`.""" - casted_record = tf.cast(record, tf.float32) - l1_norm = tf.norm(casted_record, ord=1) - - l1_bound = tf.cast(params, tf.float32) - - preprocessed_record, _ = tf.clip_by_global_norm([casted_record], - l1_bound, - use_norm=l1_norm) - - return preprocessed_record[0] - - def get_noised_result(self, sample_state, global_state): - """Implements `tensorflow_privacy.DPQuery.get_noised_result`. - - Args: - sample_state: a frequency histogram. - global_state: hyper-parameters of the query. - - Returns: - a `tf.RaggedTensor` representing the tree built on top of `sample_state`. - The jth node on the ith layer of the tree can be accessed by tree[i][j] - where tree is the returned value. - """ - add_noise = _get_add_noise(self._stddev, self._seed) - tree = _build_tree_from_leaf(sample_state, self._arity) - return tf.map_fn(add_noise, tree), global_state - - -class DistributedTreeSumQuery(dp_query.SumAggregationDPQuery): - """Implements dp_query for differentially private tree aggregation protocol. - - The difference from `CentralTreeSumQuery` is that the tree construction and - gaussian noise addition happen in `preprocess_records`. The difference only - takes effect when used together with - `tff.aggregators.DifferentiallyPrivateFactory`. In other cases, this class - should be treated as equal with `CentralTreeSumQuery`. - - Implements a distributed version of the tree aggregation protocol from. "Is - interaction necessary for distributed private learning?." by replacing their - local randomizer with gaussian mechanism. The first step is to check the L1 - norm of the clients' local updates (i.e. a 1-D array containing the leaf nodes - of the tree) to make sure it does not exceed a prespecified upper bound. The - second step is to construct the tree. The third step is to add independent - gaussian noise to each node in the tree. The returned tree can support - efficient and accurate range queries with differential privacy. - """ - - @attr.s(frozen=True) - class GlobalState(object): - """Class defining global state for DistributedTreeSumQuery. - - Attributes: - stddev: The stddev of the noise added to each internal node in the - constructed tree. - arity: The branching factor of the tree (i.e. the number of children each - internal node has). - l1_bound: An upper bound on the L1 norm of the input record. This is - needed to bound the sensitivity and deploy differential privacy. - """ - stddev = attr.ib() - arity = attr.ib() - l1_bound = attr.ib() - - def __init__(self, - stddev: float, - arity: int = 2, - l1_bound: int = 10, - seed: Optional[int] = None): - """Initializes the `DistributedTreeSumQuery`. - - Args: - stddev: The stddev of the noise added to each node in the tree. - arity: The branching factor of the tree. - l1_bound: An upper bound on the L1 norm of the input record. This is - needed to bound the sensitivity and deploy differential privacy. - seed: Random seed to generate Gaussian noise. Defaults to `None`. Only for - test purpose. - """ - self._stddev = stddev - self._arity = arity - self._l1_bound = l1_bound - self._seed = seed - - def initial_global_state(self): - """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" - return DistributedTreeSumQuery.GlobalState( - stddev=self._stddev, arity=self._arity, l1_bound=self._l1_bound) - - def derive_sample_params(self, global_state): - """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" - return (global_state.stddev, global_state.arity, global_state.l1_bound) - - def preprocess_record(self, params, record): - """Implements `tensorflow_privacy.DPQuery.preprocess_record`. - - This method clips the input record by L1 norm, constructs a tree on top of - it, and adds gaussian noise to each node of the tree for differential - privacy. Unlike `get_noised_result` in `CentralTreeSumQuery`, this function - flattens the `tf.RaggedTensor` before outputting it. This is useful when - used inside `tff.aggregators.DifferentiallyPrivateFactory` because it does - not accept ragged output tensor. - - Args: - params: hyper-parameters for preprocessing record, (stddev, aritry, - l1_bound) - record: leaf nodes for the tree. - - Returns: - `tf.Tensor` representing the flattened version of the tree. - """ - _, arity, l1_bound_ = params - l1_bound = tf.cast(l1_bound_, tf.float32) - - casted_record = tf.cast(record, tf.float32) - l1_norm = tf.norm(casted_record, ord=1) - - preprocessed_record, _ = tf.clip_by_global_norm([casted_record], - l1_bound, - use_norm=l1_norm) - preprocessed_record = preprocessed_record[0] - - add_noise = _get_add_noise(self._stddev, self._seed) - tree = _build_tree_from_leaf(preprocessed_record, arity) - noisy_tree = tf.map_fn(add_noise, tree) - - # The following codes reshape the output vector so the output shape of can - # be statically inferred. This is useful when used with - # `tff.aggregators.DifferentiallyPrivateFactory` because it needs to know - # the output shape of this function statically and explicitly. - flat_noisy_tree = noisy_tree.flat_values - flat_tree_shape = [ - (self._arity**(math.ceil(math.log(record.shape[0], self._arity)) + 1) - - 1) // (self._arity - 1) - ] - return tf.reshape(flat_noisy_tree, flat_tree_shape) - - def get_noised_result(self, sample_state, global_state): - """Implements `tensorflow_privacy.DPQuery.get_noised_result`. - - This function re-constructs the `tf.RaggedTensor` from the flattened tree - output by `preprocess_records.` - - Args: - sample_state: `tf.Tensor` for the flattened tree. - global_state: hyper-parameters including noise multiplier, the branching - factor of the tree and the maximum records per user. - - Returns: - a `tf.RaggedTensor` for the tree. - """ - # The [0] is needed because of how tf.RaggedTensor.from_two_splits works. - # print(tf.RaggedTensor.from_row_splits(values=[3, 1, 4, 1, 5, 9, 2, 6], - # row_splits=[0, 4, 4, 7, 8, 8])) - # - # This part is not written in tensorflow and will be executed on the server - # side instead of the client side if used with - # tff.aggregators.DifferentiallyPrivateFactory for federated learning. - row_splits = [0] + [ - (self._arity**(x + 1) - 1) // (self._arity - 1) for x in range( - math.floor(math.log(sample_state.shape[0], self._arity)) + 1) - ] - tree = tf.RaggedTensor.from_row_splits( - values=sample_state, row_splits=row_splits) - return tree, global_state diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py index 1bfaa21..f88ed90 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py @@ -630,242 +630,5 @@ class TreeRangeSumQueryTest(tf.test.TestCase, parameterized.TestCase): sample_state, tf.ragged.constant([[1.], [1., 0.]]), atol=10 * stddev) -class CentralTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): - - def test_initial_global_state_type(self): - - query = tree_aggregation_query.CentralTreeSumQuery(stddev=NOISE_STD) - global_state = query.initial_global_state() - self.assertIsInstance( - global_state, tree_aggregation_query.CentralTreeSumQuery.GlobalState) - - def test_derive_sample_params(self): - query = tree_aggregation_query.CentralTreeSumQuery(stddev=NOISE_STD) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - self.assertAllClose(params, 10.) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32)), - ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32)), - ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32)), - ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], - dtype=tf.float32)), - ) - def test_preprocess_record(self, arity, record): - query = tree_aggregation_query.CentralTreeSumQuery( - stddev=NOISE_STD, arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - - self.assertAllClose(preprocessed_record, record) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.constant([5, 5, 0, 0], dtype=tf.int32)), - ('binary_test_float', 2, tf.constant( - [10., 10., 0., 0.], - dtype=tf.float32), tf.constant([5., 5., 0., 0.], dtype=tf.float32)), - ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.constant([5, 5, 0, 0], dtype=tf.int32)), - ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.constant([5., 5., 0., 0.], dtype=tf.float32)), - ) - def test_preprocess_record_clipped(self, arity, record, - expected_clipped_value): - query = tree_aggregation_query.CentralTreeSumQuery( - stddev=NOISE_STD, arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - self.assertAllClose(preprocessed_record, expected_clipped_value) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), - ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), - ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([[1.], [1., 0., 0.], - [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), - ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([[1.], [1., 0., 0.], - [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), - ) - def test_get_noised_result(self, arity, record, expected_tree): - query = tree_aggregation_query.CentralTreeSumQuery(stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - sample_state, global_state = query.get_noised_result( - preprocessed_record, global_state) - - self.assertAllClose(sample_state, expected_tree) - - @parameterized.named_parameters( - ('stddev_0_01', 0.01, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), - ('stddev_0_1', 0.1, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), - ) - def test_get_noised_result_with_noise(self, stddev, record, expected_tree): - query = tree_aggregation_query.CentralTreeSumQuery(stddev=stddev, seed=0) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - - sample_state, _ = query.get_noised_result(preprocessed_record, global_state) - - self.assertAllClose( - sample_state.flat_values, expected_tree, atol=3 * stddev) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), - ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), - ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant([[10.], [10., 0., 0.], - [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), - ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant([[10.], [10., 0., 0.], - [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), - ) - def test_get_noised_result_clipped(self, arity, record, expected_tree): - query = tree_aggregation_query.CentralTreeSumQuery(stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - sample_state, global_state = query.get_noised_result( - preprocessed_record, global_state) - - self.assertAllClose(sample_state, expected_tree) - - -class DistributedTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): - - def test_initial_global_state_type(self): - - query = tree_aggregation_query.DistributedTreeSumQuery(stddev=NOISE_STD) - global_state = query.initial_global_state() - self.assertIsInstance( - global_state, - tree_aggregation_query.DistributedTreeSumQuery.GlobalState) - - def test_derive_sample_params(self): - query = tree_aggregation_query.DistributedTreeSumQuery(stddev=NOISE_STD) - global_state = query.initial_global_state() - stddev, arity, l1_bound = query.derive_sample_params(global_state) - self.assertAllClose(stddev, NOISE_STD) - self.assertAllClose(arity, 2) - self.assertAllClose(l1_bound, 10) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([1., 1., 0., 1., 0., 0., 0.])), - ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([1., 1., 0., 1., 0., 0., 0.])), - ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0. - ])), - ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0. - ])), - ) - def test_preprocess_record(self, arity, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery( - stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - self.assertAllClose(preprocessed_record, expected_tree) - - @parameterized.named_parameters( - ('stddev_0_01', 0.01, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), - ('stddev_0_1', 0.1, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), - ) - def test_preprocess_record_with_noise(self, stddev, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery( - stddev=stddev, seed=0) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - - preprocessed_record = query.preprocess_record(params, record) - - self.assertAllClose(preprocessed_record, expected_tree, atol=3 * stddev) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant([10., 10., 0., 5., 5., 0., 0.])), - ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant([10., 10., 0., 5., 5., 0., 0.])), - ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant( - [10., 10., 0., 0., 5., 5., 0., 0., 0., 0., 0., 0., 0.])), - ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant( - [10., 10., 0., 0., 5., 5., 0., 0., 0., 0., 0., 0., 0.])), - ) - def test_preprocess_record_clipped(self, arity, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery( - stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - self.assertAllClose(preprocessed_record, expected_tree) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), - ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), - ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([[1.], [1., 0., 0.], - [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), - ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([[1.], [1., 0., 0.], - [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), - ) - def test_get_noised_result(self, arity, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery( - stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - sample_state, global_state = query.get_noised_result( - preprocessed_record, global_state) - - self.assertAllClose(sample_state, expected_tree) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), - ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), - ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant([[10.], [10., 0., 0.], - [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), - ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant([[10.], [10., 0., 0.], - [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), - ) - def test_get_noised_result_clipped(self, arity, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery( - stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - sample_state, global_state = query.get_noised_result( - preprocessed_record, global_state) - - self.assertAllClose(sample_state, expected_tree) - - if __name__ == '__main__': tf.test.main() From 0600fa26a200e0322e63cc60634bc544e2afa3e3 Mon Sep 17 00:00:00 2001 From: Michael Reneer Date: Thu, 19 Aug 2021 17:56:36 -0700 Subject: [PATCH 24/71] Automated rollback of commit d9a75968155e510b816b71926f2306665adb60a0 PiperOrigin-RevId: 391885401 --- .../dp_query/tree_aggregation_query.py | 222 ++++++++++++++++ .../dp_query/tree_aggregation_query_test.py | 237 ++++++++++++++++++ 2 files changed, 459 insertions(+) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 4e19a49..7ef73a1 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -22,6 +22,7 @@ the leaf nodes of the tree arrive one by one as the time proceeds. """ import distutils import math +from typing import Optional import attr import tensorflow as tf @@ -731,3 +732,224 @@ def _get_add_noise(stddev, seed: int = None): return v + tf.cast(random_normal(tf.shape(input=v)), dtype=v.dtype) return add_noise + + +class CentralTreeSumQuery(dp_query.SumAggregationDPQuery): + """Implements dp_query for differentially private tree aggregation protocol. + + Implements a central variant of the tree aggregation protocol from the paper + "'Is interaction necessary for distributed private learning?.' Adam Smith, + Abhradeep Thakurta, Jalaj Upadhyay" by replacing their local randomizer with + gaussian mechanism. The first step is to clip the clients' local updates (i.e. + a 1-D array containing the leaf nodes of the tree) by L1 norm to make sure it + does not exceed a prespecified upper bound. The second step is to construct + the tree on the clipped update. The third step is to add independent gaussian + noise to each node in the tree. The returned tree can support efficient and + accurate range queries with differential privacy. + """ + + @attr.s(frozen=True) + class GlobalState(object): + """Class defining global state for `CentralTreeSumQuery`. + + Attributes: + l1_bound: An upper bound on the L1 norm of the input record. This is + needed to bound the sensitivity and deploy differential privacy. + """ + l1_bound = attr.ib() + + def __init__(self, + stddev: float, + arity: int = 2, + l1_bound: int = 10, + seed: Optional[int] = None): + """Initializes the `CentralTreeSumQuery`. + + Args: + stddev: The stddev of the noise added to each internal node of the + constructed tree. + arity: The branching factor of the tree. + l1_bound: An upper bound on the L1 norm of the input record. This is + needed to bound the sensitivity and deploy differential privacy. + seed: Random seed to generate Gaussian noise. Defaults to `None`. Only for + test purpose. + """ + self._stddev = stddev + self._arity = arity + self._l1_bound = l1_bound + self._seed = seed + + def initial_global_state(self): + """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" + return CentralTreeSumQuery.GlobalState(l1_bound=self._l1_bound) + + def derive_sample_params(self, global_state): + """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" + return global_state.l1_bound + + def preprocess_record(self, params, record): + """Implements `tensorflow_privacy.DPQuery.preprocess_record`.""" + casted_record = tf.cast(record, tf.float32) + l1_norm = tf.norm(casted_record, ord=1) + + l1_bound = tf.cast(params, tf.float32) + + preprocessed_record, _ = tf.clip_by_global_norm([casted_record], + l1_bound, + use_norm=l1_norm) + + return preprocessed_record[0] + + def get_noised_result(self, sample_state, global_state): + """Implements `tensorflow_privacy.DPQuery.get_noised_result`. + + Args: + sample_state: a frequency histogram. + global_state: hyper-parameters of the query. + + Returns: + a `tf.RaggedTensor` representing the tree built on top of `sample_state`. + The jth node on the ith layer of the tree can be accessed by tree[i][j] + where tree is the returned value. + """ + add_noise = _get_add_noise(self._stddev, self._seed) + tree = _build_tree_from_leaf(sample_state, self._arity) + return tf.map_fn(add_noise, tree), global_state + + +class DistributedTreeSumQuery(dp_query.SumAggregationDPQuery): + """Implements dp_query for differentially private tree aggregation protocol. + + The difference from `CentralTreeSumQuery` is that the tree construction and + gaussian noise addition happen in `preprocess_records`. The difference only + takes effect when used together with + `tff.aggregators.DifferentiallyPrivateFactory`. In other cases, this class + should be treated as equal with `CentralTreeSumQuery`. + + Implements a distributed version of the tree aggregation protocol from. "Is + interaction necessary for distributed private learning?." by replacing their + local randomizer with gaussian mechanism. The first step is to check the L1 + norm of the clients' local updates (i.e. a 1-D array containing the leaf nodes + of the tree) to make sure it does not exceed a prespecified upper bound. The + second step is to construct the tree. The third step is to add independent + gaussian noise to each node in the tree. The returned tree can support + efficient and accurate range queries with differential privacy. + """ + + @attr.s(frozen=True) + class GlobalState(object): + """Class defining global state for DistributedTreeSumQuery. + + Attributes: + stddev: The stddev of the noise added to each internal node in the + constructed tree. + arity: The branching factor of the tree (i.e. the number of children each + internal node has). + l1_bound: An upper bound on the L1 norm of the input record. This is + needed to bound the sensitivity and deploy differential privacy. + """ + stddev = attr.ib() + arity = attr.ib() + l1_bound = attr.ib() + + def __init__(self, + stddev: float, + arity: int = 2, + l1_bound: int = 10, + seed: Optional[int] = None): + """Initializes the `DistributedTreeSumQuery`. + + Args: + stddev: The stddev of the noise added to each node in the tree. + arity: The branching factor of the tree. + l1_bound: An upper bound on the L1 norm of the input record. This is + needed to bound the sensitivity and deploy differential privacy. + seed: Random seed to generate Gaussian noise. Defaults to `None`. Only for + test purpose. + """ + self._stddev = stddev + self._arity = arity + self._l1_bound = l1_bound + self._seed = seed + + def initial_global_state(self): + """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" + return DistributedTreeSumQuery.GlobalState( + stddev=self._stddev, arity=self._arity, l1_bound=self._l1_bound) + + def derive_sample_params(self, global_state): + """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" + return (global_state.stddev, global_state.arity, global_state.l1_bound) + + def preprocess_record(self, params, record): + """Implements `tensorflow_privacy.DPQuery.preprocess_record`. + + This method clips the input record by L1 norm, constructs a tree on top of + it, and adds gaussian noise to each node of the tree for differential + privacy. Unlike `get_noised_result` in `CentralTreeSumQuery`, this function + flattens the `tf.RaggedTensor` before outputting it. This is useful when + used inside `tff.aggregators.DifferentiallyPrivateFactory` because it does + not accept ragged output tensor. + + Args: + params: hyper-parameters for preprocessing record, (stddev, aritry, + l1_bound) + record: leaf nodes for the tree. + + Returns: + `tf.Tensor` representing the flattened version of the tree. + """ + _, arity, l1_bound_ = params + l1_bound = tf.cast(l1_bound_, tf.float32) + + casted_record = tf.cast(record, tf.float32) + l1_norm = tf.norm(casted_record, ord=1) + + preprocessed_record, _ = tf.clip_by_global_norm([casted_record], + l1_bound, + use_norm=l1_norm) + preprocessed_record = preprocessed_record[0] + + add_noise = _get_add_noise(self._stddev, self._seed) + tree = _build_tree_from_leaf(preprocessed_record, arity) + noisy_tree = tf.map_fn(add_noise, tree) + + # The following codes reshape the output vector so the output shape of can + # be statically inferred. This is useful when used with + # `tff.aggregators.DifferentiallyPrivateFactory` because it needs to know + # the output shape of this function statically and explicitly. + flat_noisy_tree = noisy_tree.flat_values + flat_tree_shape = [ + (self._arity**(math.ceil(math.log(record.shape[0], self._arity)) + 1) - + 1) // (self._arity - 1) + ] + return tf.reshape(flat_noisy_tree, flat_tree_shape) + + def get_noised_result(self, sample_state, global_state): + """Implements `tensorflow_privacy.DPQuery.get_noised_result`. + + This function re-constructs the `tf.RaggedTensor` from the flattened tree + output by `preprocess_records.` + + Args: + sample_state: `tf.Tensor` for the flattened tree. + global_state: hyper-parameters including noise multiplier, the branching + factor of the tree and the maximum records per user. + + Returns: + a `tf.RaggedTensor` for the tree. + """ + # The [0] is needed because of how tf.RaggedTensor.from_two_splits works. + # print(tf.RaggedTensor.from_row_splits(values=[3, 1, 4, 1, 5, 9, 2, 6], + # row_splits=[0, 4, 4, 7, 8, 8])) + # + # This part is not written in tensorflow and will be executed on the server + # side instead of the client side if used with + # tff.aggregators.DifferentiallyPrivateFactory for federated learning. + row_splits = [0] + [ + (self._arity**(x + 1) - 1) // (self._arity - 1) for x in range( + math.floor(math.log(sample_state.shape[0], self._arity)) + 1) + ] + tree = tf.RaggedTensor.from_row_splits( + values=sample_state, row_splits=row_splits) + return tree, global_state diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py index f88ed90..1bfaa21 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py @@ -630,5 +630,242 @@ class TreeRangeSumQueryTest(tf.test.TestCase, parameterized.TestCase): sample_state, tf.ragged.constant([[1.], [1., 0.]]), atol=10 * stddev) +class CentralTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): + + def test_initial_global_state_type(self): + + query = tree_aggregation_query.CentralTreeSumQuery(stddev=NOISE_STD) + global_state = query.initial_global_state() + self.assertIsInstance( + global_state, tree_aggregation_query.CentralTreeSumQuery.GlobalState) + + def test_derive_sample_params(self): + query = tree_aggregation_query.CentralTreeSumQuery(stddev=NOISE_STD) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + self.assertAllClose(params, 10.) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32)), + ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32)), + ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32)), + ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], + dtype=tf.float32)), + ) + def test_preprocess_record(self, arity, record): + query = tree_aggregation_query.CentralTreeSumQuery( + stddev=NOISE_STD, arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + + self.assertAllClose(preprocessed_record, record) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.constant([5, 5, 0, 0], dtype=tf.int32)), + ('binary_test_float', 2, tf.constant( + [10., 10., 0., 0.], + dtype=tf.float32), tf.constant([5., 5., 0., 0.], dtype=tf.float32)), + ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.constant([5, 5, 0, 0], dtype=tf.int32)), + ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.constant([5., 5., 0., 0.], dtype=tf.float32)), + ) + def test_preprocess_record_clipped(self, arity, record, + expected_clipped_value): + query = tree_aggregation_query.CentralTreeSumQuery( + stddev=NOISE_STD, arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + self.assertAllClose(preprocessed_record, expected_clipped_value) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), + ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), + ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([[1.], [1., 0., 0.], + [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), + ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([[1.], [1., 0., 0.], + [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), + ) + def test_get_noised_result(self, arity, record, expected_tree): + query = tree_aggregation_query.CentralTreeSumQuery(stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + sample_state, global_state = query.get_noised_result( + preprocessed_record, global_state) + + self.assertAllClose(sample_state, expected_tree) + + @parameterized.named_parameters( + ('stddev_0_01', 0.01, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), + ('stddev_0_1', 0.1, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), + ) + def test_get_noised_result_with_noise(self, stddev, record, expected_tree): + query = tree_aggregation_query.CentralTreeSumQuery(stddev=stddev, seed=0) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + + sample_state, _ = query.get_noised_result(preprocessed_record, global_state) + + self.assertAllClose( + sample_state.flat_values, expected_tree, atol=3 * stddev) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), + ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), + ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant([[10.], [10., 0., 0.], + [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), + ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant([[10.], [10., 0., 0.], + [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), + ) + def test_get_noised_result_clipped(self, arity, record, expected_tree): + query = tree_aggregation_query.CentralTreeSumQuery(stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + sample_state, global_state = query.get_noised_result( + preprocessed_record, global_state) + + self.assertAllClose(sample_state, expected_tree) + + +class DistributedTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): + + def test_initial_global_state_type(self): + + query = tree_aggregation_query.DistributedTreeSumQuery(stddev=NOISE_STD) + global_state = query.initial_global_state() + self.assertIsInstance( + global_state, + tree_aggregation_query.DistributedTreeSumQuery.GlobalState) + + def test_derive_sample_params(self): + query = tree_aggregation_query.DistributedTreeSumQuery(stddev=NOISE_STD) + global_state = query.initial_global_state() + stddev, arity, l1_bound = query.derive_sample_params(global_state) + self.assertAllClose(stddev, NOISE_STD) + self.assertAllClose(arity, 2) + self.assertAllClose(l1_bound, 10) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([1., 1., 0., 1., 0., 0., 0.])), + ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([1., 1., 0., 1., 0., 0., 0.])), + ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0. + ])), + ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0. + ])), + ) + def test_preprocess_record(self, arity, record, expected_tree): + query = tree_aggregation_query.DistributedTreeSumQuery( + stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + self.assertAllClose(preprocessed_record, expected_tree) + + @parameterized.named_parameters( + ('stddev_0_01', 0.01, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), + ('stddev_0_1', 0.1, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), + ) + def test_preprocess_record_with_noise(self, stddev, record, expected_tree): + query = tree_aggregation_query.DistributedTreeSumQuery( + stddev=stddev, seed=0) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + + preprocessed_record = query.preprocess_record(params, record) + + self.assertAllClose(preprocessed_record, expected_tree, atol=3 * stddev) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant([10., 10., 0., 5., 5., 0., 0.])), + ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant([10., 10., 0., 5., 5., 0., 0.])), + ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant( + [10., 10., 0., 0., 5., 5., 0., 0., 0., 0., 0., 0., 0.])), + ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant( + [10., 10., 0., 0., 5., 5., 0., 0., 0., 0., 0., 0., 0.])), + ) + def test_preprocess_record_clipped(self, arity, record, expected_tree): + query = tree_aggregation_query.DistributedTreeSumQuery( + stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + self.assertAllClose(preprocessed_record, expected_tree) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), + ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), + ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), + tf.ragged.constant([[1.], [1., 0., 0.], + [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), + ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), + tf.ragged.constant([[1.], [1., 0., 0.], + [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), + ) + def test_get_noised_result(self, arity, record, expected_tree): + query = tree_aggregation_query.DistributedTreeSumQuery( + stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + sample_state, global_state = query.get_noised_result( + preprocessed_record, global_state) + + self.assertAllClose(sample_state, expected_tree) + + @parameterized.named_parameters( + ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), + ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), + ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), + tf.ragged.constant([[10.], [10., 0., 0.], + [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), + ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], + dtype=tf.float32), + tf.ragged.constant([[10.], [10., 0., 0.], + [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), + ) + def test_get_noised_result_clipped(self, arity, record, expected_tree): + query = tree_aggregation_query.DistributedTreeSumQuery( + stddev=0., arity=arity) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + sample_state, global_state = query.get_noised_result( + preprocessed_record, global_state) + + self.assertAllClose(sample_state, expected_tree) + + if __name__ == '__main__': tf.test.main() From ef83391ce67ba9aa6aa02d3d7237420147d1831f Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Thu, 19 Aug 2021 23:56:16 -0700 Subject: [PATCH 25/71] Use tree aggregation noise for quantile estimation. PiperOrigin-RevId: 391928297 --- .../dp_query/quantile_estimator_query.py | 28 +++++ .../dp_query/quantile_estimator_query_test.py | 118 +++++++++++------- 2 files changed, 98 insertions(+), 48 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py index 4358a95..e23b83d 100644 --- a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py +++ b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py @@ -24,6 +24,7 @@ from tensorflow_privacy.privacy.dp_query import dp_query from tensorflow_privacy.privacy.dp_query import gaussian_query from tensorflow_privacy.privacy.dp_query import no_privacy_query from tensorflow_privacy.privacy.dp_query import normalized_query +from tensorflow_privacy.privacy.dp_query import tree_aggregation_query class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery): @@ -209,3 +210,30 @@ class NoPrivacyQuantileEstimatorQuery(QuantileEstimatorQuery): del below_estimate_stddev del expected_num_records return no_privacy_query.NoPrivacyAverageQuery() + + +class TreeAggregationQuantileEstimatorQuery(QuantileEstimatorQuery): + """Iterative process to estimate target quantile of a univariate distribution. + + Unlike the base class, this uses a `TreeResidualSumQuery` to estimate the + fraction below estimate with an exact denominator. This assumes that below + estimate value is used in a SGD-like update and we want to privatize the + cumsum of the below estimate. + + See "Practical and Private (Deep) Learning without Sampling or Shuffling" + (https://arxiv.org/abs/2103.00039) for tree aggregation and privacy + accounting, and "Differentially Private Learning with Adaptive Clipping" + (https://arxiv.org/abs/1905.03871) for how below estimate is used in a + SGD-like algorithm. + """ + + def _construct_below_estimate_query(self, below_estimate_stddev, + expected_num_records): + # See comments in `QuantileEstimatorQuery._construct_below_estimate_query` + # for why clip norm 0.5 is used for the query. + sum_query = tree_aggregation_query.TreeResidualSumQuery.build_l2_gaussian_query( + clip_norm=0.5, + noise_multiplier=2 * below_estimate_stddev, + record_specs=tf.TensorSpec([])) + return normalized_query.NormalizedQuery( + sum_query, denominator=expected_num_records) diff --git a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query_test.py b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query_test.py index 679e525..d349f56 100644 --- a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query_test.py @@ -29,22 +29,26 @@ from tensorflow_privacy.privacy.dp_query import test_utils tf.enable_eager_execution() -def _make_quantile_estimator_query( - initial_estimate, - target_quantile, - learning_rate, - below_estimate_stddev, - expected_num_records, - geometric_update): +def _make_quantile_estimator_query(initial_estimate, + target_quantile, + learning_rate, + below_estimate_stddev, + expected_num_records, + geometric_update, + tree_aggregation=False): if expected_num_records is not None: - return quantile_estimator_query.QuantileEstimatorQuery( - initial_estimate, - target_quantile, - learning_rate, - below_estimate_stddev, - expected_num_records, - geometric_update) + if tree_aggregation: + return quantile_estimator_query.TreeAggregationQuantileEstimatorQuery( + initial_estimate, target_quantile, learning_rate, + below_estimate_stddev, expected_num_records, geometric_update) + else: + return quantile_estimator_query.QuantileEstimatorQuery( + initial_estimate, target_quantile, learning_rate, + below_estimate_stddev, expected_num_records, geometric_update) else: + if tree_aggregation: + raise ValueError( + 'Cannot set expected_num_records to None for tree aggregation.') return quantile_estimator_query.NoPrivacyQuantileEstimatorQuery( initial_estimate, target_quantile, @@ -54,8 +58,9 @@ def _make_quantile_estimator_query( class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase): - @parameterized.named_parameters(('exact', True), ('fixed', False)) - def test_target_zero(self, exact): + @parameterized.named_parameters( + ('exact', True, False), ('fixed', False, False), ('tree', False, True)) + def test_target_zero(self, exact, tree): record1 = tf.constant(8.5) record2 = tf.constant(7.25) @@ -65,7 +70,8 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase): learning_rate=1.0, below_estimate_stddev=0.0, expected_num_records=(None if exact else 2.0), - geometric_update=False) + geometric_update=False, + tree_aggregation=tree) global_state = query.initial_global_state() @@ -84,18 +90,20 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase): self.assertAllClose(actual_estimate.numpy(), expected_estimate) - @parameterized.named_parameters(('exact', True), ('fixed', False)) - def test_target_zero_geometric(self, exact): + @parameterized.named_parameters( + ('exact', True, False), ('fixed', False, False), ('tree', False, True)) + def test_target_zero_geometric(self, exact, tree): record1 = tf.constant(5.0) record2 = tf.constant(2.5) query = _make_quantile_estimator_query( initial_estimate=16.0, target_quantile=0.0, - learning_rate=np.log(2.0), # Geometric steps in powers of 2. + learning_rate=np.log(2.0), # Geometric steps in powers of 2. below_estimate_stddev=0.0, expected_num_records=(None if exact else 2.0), - geometric_update=True) + geometric_update=True, + tree_aggregation=tree) global_state = query.initial_global_state() @@ -116,8 +124,9 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase): self.assertAllClose(actual_estimate.numpy(), expected_estimate) - @parameterized.named_parameters(('exact', True), ('fixed', False)) - def test_target_one(self, exact): + @parameterized.named_parameters( + ('exact', True, False), ('fixed', False, False), ('tree', False, True)) + def test_target_one(self, exact, tree): record1 = tf.constant(1.5) record2 = tf.constant(2.75) @@ -127,7 +136,8 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase): learning_rate=1.0, below_estimate_stddev=0.0, expected_num_records=(None if exact else 2.0), - geometric_update=False) + geometric_update=False, + tree_aggregation=tree) global_state = query.initial_global_state() @@ -146,18 +156,20 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase): self.assertAllClose(actual_estimate.numpy(), expected_estimate) - @parameterized.named_parameters(('exact', True), ('fixed', False)) - def test_target_one_geometric(self, exact): + @parameterized.named_parameters( + ('exact', True, False), ('fixed', False, False), ('tree', False, True)) + def test_target_one_geometric(self, exact, tree): record1 = tf.constant(1.5) record2 = tf.constant(3.0) query = _make_quantile_estimator_query( initial_estimate=0.5, target_quantile=1.0, - learning_rate=np.log(2.0), # Geometric steps in powers of 2. + learning_rate=np.log(2.0), # Geometric steps in powers of 2. below_estimate_stddev=0.0, expected_num_records=(None if exact else 2.0), - geometric_update=True) + geometric_update=True, + tree_aggregation=tree) global_state = query.initial_global_state() @@ -179,15 +191,19 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase): self.assertAllClose(actual_estimate.numpy(), expected_estimate) @parameterized.named_parameters( - ('start_low_geometric_exact', True, True, True), - ('start_low_arithmetic_exact', True, True, False), - ('start_high_geometric_exact', True, False, True), - ('start_high_arithmetic_exact', True, False, False), - ('start_low_geometric_noised', False, True, True), - ('start_low_arithmetic_noised', False, True, False), - ('start_high_geometric_noised', False, False, True), - ('start_high_arithmetic_noised', False, False, False)) - def test_linspace(self, exact, start_low, geometric): + ('start_low_geometric_exact', True, True, True, False), + ('start_low_arithmetic_exact', True, True, False, False), + ('start_high_geometric_exact', True, False, True, False), + ('start_high_arithmetic_exact', True, False, False, False), + ('start_low_geometric_noised', False, True, True, False), + ('start_low_arithmetic_noised', False, True, False, False), + ('start_high_geometric_noised', False, False, True, False), + ('start_high_arithmetic_noised', False, False, False, False), + ('start_low_geometric_tree', False, True, True, True), + ('start_low_arithmetic_tree', False, True, False, True), + ('start_high_geometric_tree', False, False, True, True), + ('start_high_arithmetic_tree', False, False, False, True)) + def test_linspace(self, exact, start_low, geometric, tree): # 100 records equally spaced from 0 to 10 in 0.1 increments. # Test that we converge to the correct median value and bounce around it. num_records = 21 @@ -200,7 +216,8 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase): learning_rate=1.0, below_estimate_stddev=(0.0 if exact else 1e-2), expected_num_records=(None if exact else num_records), - geometric_update=geometric) + geometric_update=geometric, + tree_aggregation=tree) global_state = query.initial_global_state() @@ -213,15 +230,19 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase): self.assertNear(actual_estimate, 5.0, 0.25) @parameterized.named_parameters( - ('start_low_geometric_exact', True, True, True), - ('start_low_arithmetic_exact', True, True, False), - ('start_high_geometric_exact', True, False, True), - ('start_high_arithmetic_exact', True, False, False), - ('start_low_geometric_noised', False, True, True), - ('start_low_arithmetic_noised', False, True, False), - ('start_high_geometric_noised', False, False, True), - ('start_high_arithmetic_noised', False, False, False)) - def test_all_equal(self, exact, start_low, geometric): + ('start_low_geometric_exact', True, True, True, False), + ('start_low_arithmetic_exact', True, True, False, False), + ('start_high_geometric_exact', True, False, True, False), + ('start_high_arithmetic_exact', True, False, False, False), + ('start_low_geometric_noised', False, True, True, False), + ('start_low_arithmetic_noised', False, True, False, False), + ('start_high_geometric_noised', False, False, True, False), + ('start_high_arithmetic_noised', False, False, False, False), + ('start_low_geometric_tree', False, True, True, True), + ('start_low_arithmetic_tree', False, True, False, True), + ('start_high_geometric_tree', False, False, True, True), + ('start_high_arithmetic_tree', False, False, False, True)) + def test_all_equal(self, exact, start_low, geometric, tree): # 20 equal records. Test that we converge to that record and bounce around # it. Unlike the linspace test, the quantile-matching objective is very # sharp at the optimum so a decaying learning rate is necessary. @@ -236,7 +257,8 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase): learning_rate=learning_rate, below_estimate_stddev=(0.0 if exact else 1e-2), expected_num_records=(None if exact else num_records), - geometric_update=geometric) + geometric_update=geometric, + tree_aggregation=tree) global_state = query.initial_global_state() From b9e4cf1a20edce72a3d0e387f004d48678fa87bc Mon Sep 17 00:00:00 2001 From: Wennan Zhu Date: Fri, 20 Aug 2021 21:35:18 -0700 Subject: [PATCH 26/71] Automated rollback of commit 0600fa26a200e0322e63cc60634bc544e2afa3e3 PiperOrigin-RevId: 392126244 --- .../dp_query/tree_aggregation_query.py | 222 ---------------- .../dp_query/tree_aggregation_query_test.py | 237 ------------------ 2 files changed, 459 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 7ef73a1..4e19a49 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -22,7 +22,6 @@ the leaf nodes of the tree arrive one by one as the time proceeds. """ import distutils import math -from typing import Optional import attr import tensorflow as tf @@ -732,224 +731,3 @@ def _get_add_noise(stddev, seed: int = None): return v + tf.cast(random_normal(tf.shape(input=v)), dtype=v.dtype) return add_noise - - -class CentralTreeSumQuery(dp_query.SumAggregationDPQuery): - """Implements dp_query for differentially private tree aggregation protocol. - - Implements a central variant of the tree aggregation protocol from the paper - "'Is interaction necessary for distributed private learning?.' Adam Smith, - Abhradeep Thakurta, Jalaj Upadhyay" by replacing their local randomizer with - gaussian mechanism. The first step is to clip the clients' local updates (i.e. - a 1-D array containing the leaf nodes of the tree) by L1 norm to make sure it - does not exceed a prespecified upper bound. The second step is to construct - the tree on the clipped update. The third step is to add independent gaussian - noise to each node in the tree. The returned tree can support efficient and - accurate range queries with differential privacy. - """ - - @attr.s(frozen=True) - class GlobalState(object): - """Class defining global state for `CentralTreeSumQuery`. - - Attributes: - l1_bound: An upper bound on the L1 norm of the input record. This is - needed to bound the sensitivity and deploy differential privacy. - """ - l1_bound = attr.ib() - - def __init__(self, - stddev: float, - arity: int = 2, - l1_bound: int = 10, - seed: Optional[int] = None): - """Initializes the `CentralTreeSumQuery`. - - Args: - stddev: The stddev of the noise added to each internal node of the - constructed tree. - arity: The branching factor of the tree. - l1_bound: An upper bound on the L1 norm of the input record. This is - needed to bound the sensitivity and deploy differential privacy. - seed: Random seed to generate Gaussian noise. Defaults to `None`. Only for - test purpose. - """ - self._stddev = stddev - self._arity = arity - self._l1_bound = l1_bound - self._seed = seed - - def initial_global_state(self): - """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" - return CentralTreeSumQuery.GlobalState(l1_bound=self._l1_bound) - - def derive_sample_params(self, global_state): - """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" - return global_state.l1_bound - - def preprocess_record(self, params, record): - """Implements `tensorflow_privacy.DPQuery.preprocess_record`.""" - casted_record = tf.cast(record, tf.float32) - l1_norm = tf.norm(casted_record, ord=1) - - l1_bound = tf.cast(params, tf.float32) - - preprocessed_record, _ = tf.clip_by_global_norm([casted_record], - l1_bound, - use_norm=l1_norm) - - return preprocessed_record[0] - - def get_noised_result(self, sample_state, global_state): - """Implements `tensorflow_privacy.DPQuery.get_noised_result`. - - Args: - sample_state: a frequency histogram. - global_state: hyper-parameters of the query. - - Returns: - a `tf.RaggedTensor` representing the tree built on top of `sample_state`. - The jth node on the ith layer of the tree can be accessed by tree[i][j] - where tree is the returned value. - """ - add_noise = _get_add_noise(self._stddev, self._seed) - tree = _build_tree_from_leaf(sample_state, self._arity) - return tf.map_fn(add_noise, tree), global_state - - -class DistributedTreeSumQuery(dp_query.SumAggregationDPQuery): - """Implements dp_query for differentially private tree aggregation protocol. - - The difference from `CentralTreeSumQuery` is that the tree construction and - gaussian noise addition happen in `preprocess_records`. The difference only - takes effect when used together with - `tff.aggregators.DifferentiallyPrivateFactory`. In other cases, this class - should be treated as equal with `CentralTreeSumQuery`. - - Implements a distributed version of the tree aggregation protocol from. "Is - interaction necessary for distributed private learning?." by replacing their - local randomizer with gaussian mechanism. The first step is to check the L1 - norm of the clients' local updates (i.e. a 1-D array containing the leaf nodes - of the tree) to make sure it does not exceed a prespecified upper bound. The - second step is to construct the tree. The third step is to add independent - gaussian noise to each node in the tree. The returned tree can support - efficient and accurate range queries with differential privacy. - """ - - @attr.s(frozen=True) - class GlobalState(object): - """Class defining global state for DistributedTreeSumQuery. - - Attributes: - stddev: The stddev of the noise added to each internal node in the - constructed tree. - arity: The branching factor of the tree (i.e. the number of children each - internal node has). - l1_bound: An upper bound on the L1 norm of the input record. This is - needed to bound the sensitivity and deploy differential privacy. - """ - stddev = attr.ib() - arity = attr.ib() - l1_bound = attr.ib() - - def __init__(self, - stddev: float, - arity: int = 2, - l1_bound: int = 10, - seed: Optional[int] = None): - """Initializes the `DistributedTreeSumQuery`. - - Args: - stddev: The stddev of the noise added to each node in the tree. - arity: The branching factor of the tree. - l1_bound: An upper bound on the L1 norm of the input record. This is - needed to bound the sensitivity and deploy differential privacy. - seed: Random seed to generate Gaussian noise. Defaults to `None`. Only for - test purpose. - """ - self._stddev = stddev - self._arity = arity - self._l1_bound = l1_bound - self._seed = seed - - def initial_global_state(self): - """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" - return DistributedTreeSumQuery.GlobalState( - stddev=self._stddev, arity=self._arity, l1_bound=self._l1_bound) - - def derive_sample_params(self, global_state): - """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" - return (global_state.stddev, global_state.arity, global_state.l1_bound) - - def preprocess_record(self, params, record): - """Implements `tensorflow_privacy.DPQuery.preprocess_record`. - - This method clips the input record by L1 norm, constructs a tree on top of - it, and adds gaussian noise to each node of the tree for differential - privacy. Unlike `get_noised_result` in `CentralTreeSumQuery`, this function - flattens the `tf.RaggedTensor` before outputting it. This is useful when - used inside `tff.aggregators.DifferentiallyPrivateFactory` because it does - not accept ragged output tensor. - - Args: - params: hyper-parameters for preprocessing record, (stddev, aritry, - l1_bound) - record: leaf nodes for the tree. - - Returns: - `tf.Tensor` representing the flattened version of the tree. - """ - _, arity, l1_bound_ = params - l1_bound = tf.cast(l1_bound_, tf.float32) - - casted_record = tf.cast(record, tf.float32) - l1_norm = tf.norm(casted_record, ord=1) - - preprocessed_record, _ = tf.clip_by_global_norm([casted_record], - l1_bound, - use_norm=l1_norm) - preprocessed_record = preprocessed_record[0] - - add_noise = _get_add_noise(self._stddev, self._seed) - tree = _build_tree_from_leaf(preprocessed_record, arity) - noisy_tree = tf.map_fn(add_noise, tree) - - # The following codes reshape the output vector so the output shape of can - # be statically inferred. This is useful when used with - # `tff.aggregators.DifferentiallyPrivateFactory` because it needs to know - # the output shape of this function statically and explicitly. - flat_noisy_tree = noisy_tree.flat_values - flat_tree_shape = [ - (self._arity**(math.ceil(math.log(record.shape[0], self._arity)) + 1) - - 1) // (self._arity - 1) - ] - return tf.reshape(flat_noisy_tree, flat_tree_shape) - - def get_noised_result(self, sample_state, global_state): - """Implements `tensorflow_privacy.DPQuery.get_noised_result`. - - This function re-constructs the `tf.RaggedTensor` from the flattened tree - output by `preprocess_records.` - - Args: - sample_state: `tf.Tensor` for the flattened tree. - global_state: hyper-parameters including noise multiplier, the branching - factor of the tree and the maximum records per user. - - Returns: - a `tf.RaggedTensor` for the tree. - """ - # The [0] is needed because of how tf.RaggedTensor.from_two_splits works. - # print(tf.RaggedTensor.from_row_splits(values=[3, 1, 4, 1, 5, 9, 2, 6], - # row_splits=[0, 4, 4, 7, 8, 8])) - # - # This part is not written in tensorflow and will be executed on the server - # side instead of the client side if used with - # tff.aggregators.DifferentiallyPrivateFactory for federated learning. - row_splits = [0] + [ - (self._arity**(x + 1) - 1) // (self._arity - 1) for x in range( - math.floor(math.log(sample_state.shape[0], self._arity)) + 1) - ] - tree = tf.RaggedTensor.from_row_splits( - values=sample_state, row_splits=row_splits) - return tree, global_state diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py index 1bfaa21..f88ed90 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py @@ -630,242 +630,5 @@ class TreeRangeSumQueryTest(tf.test.TestCase, parameterized.TestCase): sample_state, tf.ragged.constant([[1.], [1., 0.]]), atol=10 * stddev) -class CentralTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): - - def test_initial_global_state_type(self): - - query = tree_aggregation_query.CentralTreeSumQuery(stddev=NOISE_STD) - global_state = query.initial_global_state() - self.assertIsInstance( - global_state, tree_aggregation_query.CentralTreeSumQuery.GlobalState) - - def test_derive_sample_params(self): - query = tree_aggregation_query.CentralTreeSumQuery(stddev=NOISE_STD) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - self.assertAllClose(params, 10.) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32)), - ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32)), - ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32)), - ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], - dtype=tf.float32)), - ) - def test_preprocess_record(self, arity, record): - query = tree_aggregation_query.CentralTreeSumQuery( - stddev=NOISE_STD, arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - - self.assertAllClose(preprocessed_record, record) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.constant([5, 5, 0, 0], dtype=tf.int32)), - ('binary_test_float', 2, tf.constant( - [10., 10., 0., 0.], - dtype=tf.float32), tf.constant([5., 5., 0., 0.], dtype=tf.float32)), - ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.constant([5, 5, 0, 0], dtype=tf.int32)), - ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.constant([5., 5., 0., 0.], dtype=tf.float32)), - ) - def test_preprocess_record_clipped(self, arity, record, - expected_clipped_value): - query = tree_aggregation_query.CentralTreeSumQuery( - stddev=NOISE_STD, arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - self.assertAllClose(preprocessed_record, expected_clipped_value) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), - ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), - ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([[1.], [1., 0., 0.], - [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), - ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([[1.], [1., 0., 0.], - [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), - ) - def test_get_noised_result(self, arity, record, expected_tree): - query = tree_aggregation_query.CentralTreeSumQuery(stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - sample_state, global_state = query.get_noised_result( - preprocessed_record, global_state) - - self.assertAllClose(sample_state, expected_tree) - - @parameterized.named_parameters( - ('stddev_0_01', 0.01, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), - ('stddev_0_1', 0.1, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), - ) - def test_get_noised_result_with_noise(self, stddev, record, expected_tree): - query = tree_aggregation_query.CentralTreeSumQuery(stddev=stddev, seed=0) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - - sample_state, _ = query.get_noised_result(preprocessed_record, global_state) - - self.assertAllClose( - sample_state.flat_values, expected_tree, atol=3 * stddev) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), - ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), - ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant([[10.], [10., 0., 0.], - [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), - ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant([[10.], [10., 0., 0.], - [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), - ) - def test_get_noised_result_clipped(self, arity, record, expected_tree): - query = tree_aggregation_query.CentralTreeSumQuery(stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - sample_state, global_state = query.get_noised_result( - preprocessed_record, global_state) - - self.assertAllClose(sample_state, expected_tree) - - -class DistributedTreeSumQueryTest(tf.test.TestCase, parameterized.TestCase): - - def test_initial_global_state_type(self): - - query = tree_aggregation_query.DistributedTreeSumQuery(stddev=NOISE_STD) - global_state = query.initial_global_state() - self.assertIsInstance( - global_state, - tree_aggregation_query.DistributedTreeSumQuery.GlobalState) - - def test_derive_sample_params(self): - query = tree_aggregation_query.DistributedTreeSumQuery(stddev=NOISE_STD) - global_state = query.initial_global_state() - stddev, arity, l1_bound = query.derive_sample_params(global_state) - self.assertAllClose(stddev, NOISE_STD) - self.assertAllClose(arity, 2) - self.assertAllClose(l1_bound, 10) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([1., 1., 0., 1., 0., 0., 0.])), - ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([1., 1., 0., 1., 0., 0., 0.])), - ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0. - ])), - ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0. - ])), - ) - def test_preprocess_record(self, arity, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery( - stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - self.assertAllClose(preprocessed_record, expected_tree) - - @parameterized.named_parameters( - ('stddev_0_01', 0.01, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), - ('stddev_0_1', 0.1, tf.constant([1, 0], dtype=tf.int32), [1., 1., 0.]), - ) - def test_preprocess_record_with_noise(self, stddev, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery( - stddev=stddev, seed=0) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - - preprocessed_record = query.preprocess_record(params, record) - - self.assertAllClose(preprocessed_record, expected_tree, atol=3 * stddev) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant([10., 10., 0., 5., 5., 0., 0.])), - ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant([10., 10., 0., 5., 5., 0., 0.])), - ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant( - [10., 10., 0., 0., 5., 5., 0., 0., 0., 0., 0., 0., 0.])), - ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant( - [10., 10., 0., 0., 5., 5., 0., 0., 0., 0., 0., 0., 0.])), - ) - def test_preprocess_record_clipped(self, arity, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery( - stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - self.assertAllClose(preprocessed_record, expected_tree) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), - ('binary_test_float', 2, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([[1.], [1., 0.], [1., 0., 0., 0.]])), - ('ternary_test_int', 3, tf.constant([1, 0, 0, 0], dtype=tf.int32), - tf.ragged.constant([[1.], [1., 0., 0.], - [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), - ('ternary_test_float', 3, tf.constant([1., 0., 0., 0.], dtype=tf.float32), - tf.ragged.constant([[1.], [1., 0., 0.], - [1., 0., 0., 0., 0., 0., 0., 0., 0.]])), - ) - def test_get_noised_result(self, arity, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery( - stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - sample_state, global_state = query.get_noised_result( - preprocessed_record, global_state) - - self.assertAllClose(sample_state, expected_tree) - - @parameterized.named_parameters( - ('binary_test_int', 2, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), - ('binary_test_float', 2, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant([[10.], [10., 0.], [5., 5., 0., 0.]])), - ('ternary_test_int', 3, tf.constant([10, 10, 0, 0], dtype=tf.int32), - tf.ragged.constant([[10.], [10., 0., 0.], - [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), - ('ternary_test_float', 3, tf.constant([10., 10., 0., 0.], - dtype=tf.float32), - tf.ragged.constant([[10.], [10., 0., 0.], - [5., 5., 0., 0., 0., 0., 0., 0., 0.]])), - ) - def test_get_noised_result_clipped(self, arity, record, expected_tree): - query = tree_aggregation_query.DistributedTreeSumQuery( - stddev=0., arity=arity) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - sample_state, global_state = query.get_noised_result( - preprocessed_record, global_state) - - self.assertAllClose(sample_state, expected_tree) - - if __name__ == '__main__': tf.test.main() From ce9e002529f21f9d2bd397770794916adbc4242e Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 24 Aug 2021 05:55:49 -0700 Subject: [PATCH 27/71] Remove --gen_report flag. This will be the default behavior. PiperOrigin-RevId: 392643474 --- g3doc/build_docs.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/g3doc/build_docs.py b/g3doc/build_docs.py index 598de71..9e2ebda 100644 --- a/g3doc/build_docs.py +++ b/g3doc/build_docs.py @@ -37,9 +37,6 @@ flags.DEFINE_string('site_path', 'responsible_ai/privacy/api_docs/python/', 'The location of the doc setin the site.') flags.DEFINE_bool('search_hints', True, 'Include metadata search hints in the generated files.') -flags.DEFINE_bool('gen_report', False, - ('Generate an API report containing the health of the' - 'docstrings of the public API.')) FLAGS = flags.FLAGS @@ -85,8 +82,6 @@ def gen_api_docs(): code_url_prefix=FLAGS.code_url_prefix, site_path=FLAGS.site_path, search_hints=FLAGS.search_hints, - private_map={}, - gen_report=FLAGS.gen_report, # This callback cleans up a lot of aliases caused by internal imports. callbacks=[public_api.explicit_package_contents_filter]) From 477b5b289925fc166d8f000ae32d412034c7d8ac Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Tue, 24 Aug 2021 09:37:57 -0700 Subject: [PATCH 28/71] Remove declaration of dependency on tensorflow. PiperOrigin-RevId: 392683668 --- setup.py | 3 +-- tensorflow_privacy/version.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 9a698d1..98674e1 100644 --- a/setup.py +++ b/setup.py @@ -17,11 +17,10 @@ from setuptools import setup setup( name='tensorflow_privacy', - version='0.7.1', + version='0.7.2', url='https://github.com/tensorflow/privacy', license='Apache-2.0', install_requires=[ - 'tensorflow>=1.14', 'scipy>=0.17', 'tensorflow-estimator>=2.3.0', # for DP versions of estimator. 'attrs>=21.2.0', # for tree_aggregation_query.py. diff --git a/tensorflow_privacy/version.py b/tensorflow_privacy/version.py index 878eae5..8647bf9 100644 --- a/tensorflow_privacy/version.py +++ b/tensorflow_privacy/version.py @@ -13,4 +13,4 @@ # limitations under the License. """TensorFlow Privacy version.""" -__version__ = '0.7.1' +__version__ = '0.7.2' From 853b18929d8bf65dedd76c7f42e1181d7b78f343 Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Tue, 24 Aug 2021 16:49:58 -0700 Subject: [PATCH 29/71] Move `TreeRangeSumQuery` to its own module. This is the first step, will remove the function in the old module after a TFP release. PiperOrigin-RevId: 392776774 --- tensorflow_privacy/__init__.py | 1 + .../dp_query/tree_aggregation_query.py | 13 +- .../privacy/dp_query/tree_range_query.py | 281 ++++++++++++++++++ .../privacy/dp_query/tree_range_query_test.py | 182 ++++++++++++ 4 files changed, 472 insertions(+), 5 deletions(-) create mode 100644 tensorflow_privacy/privacy/dp_query/tree_range_query.py create mode 100644 tensorflow_privacy/privacy/dp_query/tree_range_query_test.py diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index cfd5344..1b3adab 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -56,6 +56,7 @@ else: from tensorflow_privacy.privacy.dp_query import tree_aggregation from tensorflow_privacy.privacy.dp_query.tree_aggregation_query import TreeCumulativeSumQuery from tensorflow_privacy.privacy.dp_query.tree_aggregation_query import TreeResidualSumQuery + from tensorflow_privacy.privacy.dp_query.tree_range_query import TreeRangeSumQuery # Estimators from tensorflow_privacy.privacy.estimators.dnn import DNNClassifier diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 4e19a49..3120eea 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -15,10 +15,9 @@ `TreeCumulativeSumQuery` and `TreeResidualSumQuery` are `DPQuery`s for continual online observation queries relying on `tree_aggregation`. 'Online' means that -the leaf nodes of the tree arrive one by one as the time proceeds. - -`TreeRangeSumQuery` is a `DPQuery`s for offline tree aggregation protocol. -'Offline' means all the leaf nodes are ready before the protocol starts. +the leaf nodes of the tree arrive one by one as the time proceeds. The core +logic of tree aggregation is implemented in `tree_aggregation.TreeAggregator` +and `tree_aggregation.EfficientTreeAggregator`. """ import distutils import math @@ -31,7 +30,7 @@ from tensorflow_privacy.privacy.dp_query import gaussian_query from tensorflow_privacy.privacy.dp_query import tree_aggregation -# TODO(b/192464750): define `RestartQuery` and move `RestartIndicator` to be +# TODO(b/193679963): define `RestartQuery` and move `RestartIndicator` to be # in the same module. @@ -477,6 +476,10 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): restart_indicator=restart_indicator) +# TODO(b/197596864): Remove `TreeRangeSumQuery` from this file after the next +# TFP release + + @tf.function def _build_tree_from_leaf(leaf_nodes: tf.Tensor, arity: int) -> tf.RaggedTensor: """A function constructs a complete tree given all the leaf nodes. diff --git a/tensorflow_privacy/privacy/dp_query/tree_range_query.py b/tensorflow_privacy/privacy/dp_query/tree_range_query.py new file mode 100644 index 0000000..1b47071 --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/tree_range_query.py @@ -0,0 +1,281 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""`DPQuery`s for offline differentially private tree aggregation protocols. + +'Offline' means all the leaf nodes are ready before the protocol starts. +""" +import distutils +import math + +import attr +import tensorflow as tf +from tensorflow_privacy.privacy.dp_query import distributed_discrete_gaussian_query +from tensorflow_privacy.privacy.dp_query import dp_query +from tensorflow_privacy.privacy.dp_query import gaussian_query + + +@tf.function +def _build_tree_from_leaf(leaf_nodes: tf.Tensor, arity: int) -> tf.RaggedTensor: + """A function constructs a complete tree given all the leaf nodes. + + The function takes a 1-D array representing the leaf nodes of a tree and the + tree's arity, and constructs a complete tree by recursively summing the + adjacent children to get the parent until reaching the root node. Because we + assume a complete tree, if the number of leaf nodes does not divide arity, the + leaf nodes will be padded with zeros. + + Args: + leaf_nodes: A 1-D array storing the leaf nodes of the tree. + arity: A `int` for the branching factor of the tree, i.e. the number of + children for each internal node. + + Returns: + `tf.RaggedTensor` representing the tree. For example, if + `leaf_nodes=tf.Tensor([1, 2, 3, 4])` and `arity=2`, then the returned value + should be `tree=tf.RaggedTensor([[10],[3,7],[1,2,3,4]])`. In this way, + `tree[layer][index]` can be used to access the node indexed by (layer, + index) in the tree, + """ + + def pad_zero(leaf_nodes, size): + paddings = [[0, size - len(leaf_nodes)]] + return tf.pad(leaf_nodes, paddings) + + leaf_nodes_size = tf.constant(len(leaf_nodes), dtype=tf.float32) + num_layers = tf.math.ceil( + tf.math.log(leaf_nodes_size) / + tf.math.log(tf.cast(arity, dtype=tf.float32))) + 1 + leaf_nodes = pad_zero( + leaf_nodes, tf.math.pow(tf.cast(arity, dtype=tf.float32), num_layers - 1)) + + def _shrink_layer(layer: tf.Tensor, arity: int) -> tf.Tensor: + return tf.reduce_sum((tf.reshape(layer, (-1, arity))), 1) + + # The following `tf.while_loop` constructs the tree from bottom up by + # iteratively applying `_shrink_layer` to each layer of the tree. The reason + # for the choice of TF1.0-style `tf.while_loop` is that @tf.function does not + # support auto-translation from python loop to tf loop when loop variables + # contain a `RaggedTensor` whose shape changes across iterations. + + idx = tf.identity(num_layers) + loop_cond = lambda i, h: tf.less_equal(2.0, i) + + def _loop_body(i, h): + return [ + tf.add(i, -1.0), + tf.concat(([_shrink_layer(h[0], arity)], h), axis=0) + ] + + _, tree = tf.while_loop( + loop_cond, + _loop_body, [idx, tf.RaggedTensor.from_tensor([leaf_nodes])], + shape_invariants=[ + idx.get_shape(), + tf.RaggedTensorSpec(dtype=leaf_nodes.dtype, ragged_rank=1) + ]) + + return tree + + +class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): + """Implements dp_query for accurate range queries using tree aggregation. + + Implements a variant of the tree aggregation protocol from. "Is interaction + necessary for distributed private learning?. Adam Smith, Abhradeep Thakurta, + Jalaj Upadhyay." Builds a tree on top of the input record and adds noise to + the tree for differential privacy. Any range query can be decomposed into the + sum of O(log(n)) nodes in the tree compared to O(n) when using a histogram. + Improves efficiency and reduces noise scale. + """ + + @attr.s(frozen=True) + class GlobalState(object): + """Class defining global state for TreeRangeSumQuery. + + Attributes: + arity: The branching factor of the tree (i.e. the number of children each + internal node has). + inner_query_state: The global state of the inner query. + """ + arity = attr.ib() + inner_query_state = attr.ib() + + def __init__(self, + inner_query: dp_query.SumAggregationDPQuery, + arity: int = 2): + """Initializes the `TreeRangeSumQuery`. + + Args: + inner_query: The inner `DPQuery` that adds noise to the tree. + arity: The branching factor of the tree (i.e. the number of children each + internal node has). Defaults to 2. + """ + self._inner_query = inner_query + self._arity = arity + + if self._arity < 1: + raise ValueError(f'Invalid arity={arity} smaller than 2.') + + def initial_global_state(self): + """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" + return TreeRangeSumQuery.GlobalState( + arity=self._arity, + inner_query_state=self._inner_query.initial_global_state()) + + def derive_sample_params(self, global_state): + """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" + return (global_state.arity, + self._inner_query.derive_sample_params( + global_state.inner_query_state)) + + def preprocess_record(self, params, record): + """Implements `tensorflow_privacy.DPQuery.preprocess_record`. + + This method builds the tree, flattens it and applies + `inner_query.preprocess_record` to the flattened tree. + + Args: + params: Hyper-parameters for preprocessing record. + record: A histogram representing the leaf nodes of the tree. + + Returns: + A `tf.Tensor` representing the flattened version of the preprocessed tree. + """ + arity, inner_query_params = params + preprocessed_record = _build_tree_from_leaf(record, arity).flat_values + # The following codes reshape the output vector so the output shape of can + # be statically inferred. This is useful when used with + # `tff.aggregators.DifferentiallyPrivateFactory` because it needs to know + # the output shape of this function statically and explicitly. + preprocessed_record_shape = [ + (self._arity**(math.ceil(math.log(record.shape[0], self._arity)) + 1) - + 1) // (self._arity - 1) + ] + preprocessed_record = tf.reshape(preprocessed_record, + preprocessed_record_shape) + preprocessed_record = self._inner_query.preprocess_record( + inner_query_params, preprocessed_record) + + return preprocessed_record + + def get_noised_result(self, sample_state, global_state): + """Implements `tensorflow_privacy.DPQuery.get_noised_result`. + + This function re-constructs the `tf.RaggedTensor` from the flattened tree + output by `preprocess_records.` + + Args: + sample_state: A `tf.Tensor` for the flattened tree. + global_state: The global state of the protocol. + + Returns: + A `tf.RaggedTensor` representing the tree. + """ + # The [0] is needed because of how tf.RaggedTensor.from_two_splits works. + # print(tf.RaggedTensor.from_row_splits(values=[3, 1, 4, 1, 5, 9, 2, 6], + # row_splits=[0, 4, 4, 7, 8, 8])) + # + # This part is not written in tensorflow and will be executed on the server + # side instead of the client side if used with + # tff.aggregators.DifferentiallyPrivateFactory for federated learning. + sample_state, inner_query_state = self._inner_query.get_noised_result( + sample_state, global_state.inner_query_state) + new_global_state = TreeRangeSumQuery.GlobalState( + arity=global_state.arity, inner_query_state=inner_query_state) + + row_splits = [0] + [ + (self._arity**(x + 1) - 1) // (self._arity - 1) for x in range( + math.floor(math.log(sample_state.shape[0], self._arity)) + 1) + ] + tree = tf.RaggedTensor.from_row_splits( + values=sample_state, row_splits=row_splits) + return tree, new_global_state + + @classmethod + def build_central_gaussian_query(cls, + l2_norm_clip: float, + stddev: float, + arity: int = 2): + """Returns `TreeRangeSumQuery` with central Gaussian noise. + + Args: + l2_norm_clip: Each record should be clipped so that it has L2 norm at most + `l2_norm_clip`. + stddev: Stddev of the central Gaussian noise. + arity: The branching factor of the tree (i.e. the number of children each + internal node has). Defaults to 2. + """ + if l2_norm_clip <= 0: + raise ValueError(f'`l2_norm_clip` must be positive, got {l2_norm_clip}.') + + if stddev < 0: + raise ValueError(f'`stddev` must be non-negative, got {stddev}.') + + if arity < 2: + raise ValueError(f'`arity` must be at least 2, got {arity}.') + + inner_query = gaussian_query.GaussianSumQuery(l2_norm_clip, stddev) + + return cls(arity=arity, inner_query=inner_query) + + @classmethod + def build_distributed_discrete_gaussian_query(cls, + l2_norm_bound: float, + local_stddev: float, + arity: int = 2): + """Returns `TreeRangeSumQuery` with central Gaussian noise. + + Args: + l2_norm_bound: Each record should be clipped so that it has L2 norm at + most `l2_norm_bound`. + local_stddev: Scale/stddev of the local discrete Gaussian noise. + arity: The branching factor of the tree (i.e. the number of children each + internal node has). Defaults to 2. + """ + if l2_norm_bound <= 0: + raise ValueError( + f'`l2_clip_bound` must be positive, got {l2_norm_bound}.') + + if local_stddev < 0: + raise ValueError( + f'`local_stddev` must be non-negative, got {local_stddev}.') + + if arity < 2: + raise ValueError(f'`arity` must be at least 2, got {arity}.') + + inner_query = distributed_discrete_gaussian_query.DistributedDiscreteGaussianSumQuery( + l2_norm_bound, local_stddev) + + return cls(arity=arity, inner_query=inner_query) + + +def _get_add_noise(stddev, seed: int = None): + """Utility function to decide which `add_noise` to use according to tf version.""" + if distutils.version.LooseVersion( + tf.__version__) < distutils.version.LooseVersion('2.0.0'): + + # The seed should be only used for testing purpose. + if seed is not None: + tf.random.set_seed(seed) + + def add_noise(v): + return v + tf.random.normal( + tf.shape(input=v), stddev=stddev, dtype=v.dtype) + else: + random_normal = tf.random_normal_initializer(stddev=stddev, seed=seed) + + def add_noise(v): + return v + tf.cast(random_normal(tf.shape(input=v)), dtype=v.dtype) + + return add_noise diff --git a/tensorflow_privacy/privacy/dp_query/tree_range_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_range_query_test.py new file mode 100644 index 0000000..e3f1156 --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/tree_range_query_test.py @@ -0,0 +1,182 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for `tree_range_query`.""" + +import math + +from absl.testing import parameterized +import tensorflow as tf +from tensorflow_privacy.privacy.dp_query import tree_range_query + + +class BuildTreeTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.product( + leaf_nodes_size=[1, 2, 3, 4, 5], + arity=[2, 3], + dtype=[tf.int32, tf.float32], + ) + def test_build_tree_from_leaf(self, leaf_nodes_size, arity, dtype): + """Test whether `_build_tree_from_leaf` will output the correct tree.""" + + leaf_nodes = tf.cast(tf.range(leaf_nodes_size), dtype) + depth = math.ceil(math.log(leaf_nodes_size, arity)) + 1 + + tree = tree_range_query._build_tree_from_leaf(leaf_nodes, arity) + + self.assertEqual(depth, tree.shape[0]) + + for layer in range(depth): + reverse_depth = tree.shape[0] - layer - 1 + span_size = arity**reverse_depth + for idx in range(arity**layer): + left = idx * span_size + right = (idx + 1) * span_size + expected_value = sum(leaf_nodes[left:right]) + self.assertEqual(tree[layer][idx], expected_value) + + +class TreeRangeSumQueryTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.product( + inner_query=['central', 'distributed'], + params=[(0., 1., 2), (1., -1., 2), (1., 1., 1)], + ) + def test_raises_error(self, inner_query, params): + clip_norm, stddev, arity = params + with self.assertRaises(ValueError): + if inner_query == 'central': + tree_range_query.TreeRangeSumQuery.build_central_gaussian_query( + clip_norm, stddev, arity) + elif inner_query == 'distributed': + tree_range_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + clip_norm, stddev, arity) + + @parameterized.product( + inner_query=['central', 'distributed'], + clip_norm=[0.1, 1.0, 10.0], + stddev=[0.1, 1.0, 10.0]) + def test_initial_global_state_type(self, inner_query, clip_norm, stddev): + + if inner_query == 'central': + query = tree_range_query.TreeRangeSumQuery.build_central_gaussian_query( + clip_norm, stddev) + elif inner_query == 'distributed': + query = tree_range_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + clip_norm, stddev) + global_state = query.initial_global_state() + self.assertIsInstance(global_state, + tree_range_query.TreeRangeSumQuery.GlobalState) + + @parameterized.product( + inner_query=['central', 'distributed'], + clip_norm=[0.1, 1.0, 10.0], + stddev=[0.1, 1.0, 10.0], + arity=[2, 3, 4]) + def test_derive_sample_params(self, inner_query, clip_norm, stddev, arity): + if inner_query == 'central': + query = tree_range_query.TreeRangeSumQuery.build_central_gaussian_query( + clip_norm, stddev, arity) + elif inner_query == 'distributed': + query = tree_range_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + clip_norm, stddev, arity) + global_state = query.initial_global_state() + derived_arity, inner_query_state = query.derive_sample_params(global_state) + self.assertAllClose(derived_arity, arity) + if inner_query == 'central': + self.assertAllClose(inner_query_state, clip_norm) + elif inner_query == 'distributed': + self.assertAllClose(inner_query_state.l2_norm_bound, clip_norm) + self.assertAllClose(inner_query_state.local_stddev, stddev) + + @parameterized.product( + (dict(arity=2, expected_tree=[1, 1, 0, 1, 0, 0, 0]), + dict(arity=3, expected_tree=[1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])), + inner_query=['central', 'distributed'], + ) + def test_preprocess_record(self, inner_query, arity, expected_tree): + if inner_query == 'central': + query = tree_range_query.TreeRangeSumQuery.build_central_gaussian_query( + 10., 0., arity) + record = tf.constant([1, 0, 0, 0], dtype=tf.float32) + expected_tree = tf.cast(expected_tree, tf.float32) + elif inner_query == 'distributed': + query = tree_range_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + 10., 0., arity) + record = tf.constant([1, 0, 0, 0], dtype=tf.int32) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + self.assertAllClose(preprocessed_record, expected_tree) + + @parameterized.named_parameters( + ('stddev_1', 1, tf.constant([1, 0], dtype=tf.int32), [1, 1, 0]), + ('stddev_0_1', 4, tf.constant([1, 0], dtype=tf.int32), [1, 1, 0]), + ) + def test_distributed_preprocess_record_with_noise(self, local_stddev, record, + expected_tree): + query = tree_range_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + 10., local_stddev) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + + preprocessed_record = query.preprocess_record(params, record) + + self.assertAllClose( + preprocessed_record, expected_tree, atol=10 * local_stddev) + + @parameterized.product( + (dict( + arity=2, + expected_tree=tf.ragged.constant([[1], [1, 0], [1, 0, 0, 0]])), + dict( + arity=3, + expected_tree=tf.ragged.constant([[1], [1, 0, 0], + [1, 0, 0, 0, 0, 0, 0, 0, 0]]))), + inner_query=['central', 'distributed'], + ) + def test_get_noised_result(self, inner_query, arity, expected_tree): + if inner_query == 'central': + query = tree_range_query.TreeRangeSumQuery.build_central_gaussian_query( + 10., 0., arity) + record = tf.constant([1, 0, 0, 0], dtype=tf.float32) + expected_tree = tf.cast(expected_tree, tf.float32) + elif inner_query == 'distributed': + query = tree_range_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( + 10., 0., arity) + record = tf.constant([1, 0, 0, 0], dtype=tf.int32) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, record) + sample_state, global_state = query.get_noised_result( + preprocessed_record, global_state) + + self.assertAllClose(sample_state, expected_tree) + + @parameterized.product(stddev=[0.1, 1.0, 10.0]) + def test_central_get_noised_result_with_noise(self, stddev): + query = tree_range_query.TreeRangeSumQuery.build_central_gaussian_query( + 10., stddev) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + preprocessed_record = query.preprocess_record(params, tf.constant([1., 0.])) + sample_state, global_state = query.get_noised_result( + preprocessed_record, global_state) + + self.assertAllClose( + sample_state, tf.ragged.constant([[1.], [1., 0.]]), atol=10 * stddev) + + +if __name__ == '__main__': + tf.test.main() From 433b66b31655cc2f37313fad32bec51403013bb0 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Wed, 25 Aug 2021 14:15:39 -0700 Subject: [PATCH 30/71] New DpEvent/PrivacyAccountant libraries. PiperOrigin-RevId: 392977699 --- .../privacy/analysis/dp_event.py | 84 +++++++++++++++ .../privacy/analysis/dp_event_builder.py | 78 ++++++++++++++ .../privacy/analysis/dp_event_builder_test.py | 73 +++++++++++++ .../privacy/analysis/privacy_accountant.py | 101 ++++++++++++++++++ 4 files changed, 336 insertions(+) create mode 100644 tensorflow_privacy/privacy/analysis/dp_event.py create mode 100644 tensorflow_privacy/privacy/analysis/dp_event_builder.py create mode 100644 tensorflow_privacy/privacy/analysis/dp_event_builder_test.py create mode 100644 tensorflow_privacy/privacy/analysis/privacy_accountant.py diff --git a/tensorflow_privacy/privacy/analysis/dp_event.py b/tensorflow_privacy/privacy/analysis/dp_event.py new file mode 100644 index 0000000..fbec1d1 --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/dp_event.py @@ -0,0 +1,84 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Standard DpEvent classes.""" + +from typing import List + +import attr + + +class DpEvent(object): + """Base class for `DpEvent`s. + + A `DpEvent` describes a differentially private mechanism sufficiently for + computing the associated privacy losses, both in isolation and in combination + with other `DpEvent`s. + """ + + +@attr.s(frozen=True) +class NoOpDpEvent(DpEvent): + """A `DpEvent` to represent operations with no privacy impact. + + A `NoOpDpEvent` is generally never required, but it can be useful as a + placeholder where a `DpEvent` is expected, such as in tests or some live + accounting pipelines. + """ + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class GaussianDpEvent(DpEvent): + """The Gaussian mechanism.""" + noise_multiplier: float + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class SelfComposedDpEvent(DpEvent): + """A mechanism composed with itself multiple times.""" + event: DpEvent + count: int + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class ComposedDpEvent(DpEvent): + """A series of composed mechanisms.""" + events: List[SelfComposedDpEvent] + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class PoissonSampledDpEvent(DpEvent): + """An application of Poisson subsampling.""" + sampling_probability: float + event: DpEvent + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class EqualBatchSampledDpEvent(DpEvent): + """An application of sampling exactly `batch_size` records.""" + dataset_size: int + batch_size: int + event: DpEvent + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class ShuffledDatasetDpEvent(DpEvent): + """Shuffling a dataset and applying a mechanism to each partition.""" + partition_events: ComposedDpEvent + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class TreeAggregationDpEvent(DpEvent): + """Applying a series of mechanisms with tree aggregation.""" + round_events: ComposedDpEvent + max_record_occurences_across_all_rounds: int diff --git a/tensorflow_privacy/privacy/analysis/dp_event_builder.py b/tensorflow_privacy/privacy/analysis/dp_event_builder.py new file mode 100644 index 0000000..a0a9435 --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/dp_event_builder.py @@ -0,0 +1,78 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Builder class for ComposedDpEvent.""" + +import collections + +from tensorflow_privacy.privacy.analysis import dp_event + + +class DpEventBuilder(object): + """Constructs a `DpEvent` representing the composition of a series of events. + + Two common use cases of the `DpEventBuilder` are 1) for producing and tracking + a ledger of `DpEvent`s during sequential accounting using a + `PrivacyAccountant`, and 2) for building up a description of a composite + mechanism for subsequent batch accounting. + """ + + def __init__(self): + self._events = collections.OrderedDict() + self._composed_event = None + + def compose(self, event: dp_event.DpEvent, count: int = 1): + """Composes new event into event represented by builder. + + Args: + event: The new event to compose. + count: The number of times to compose the event. + """ + if not isinstance(event, dp_event.DpEvent): + raise TypeError('`event` must be a subclass of `DpEvent`. ' + f'Found {type(event)}.') + if not isinstance(count, int): + raise TypeError(f'`count` must be an integer. Found {type(count)}.') + if count < 1: + raise ValueError(f'`count` must be positive. Found {count}.') + + if isinstance(event, dp_event.ComposedDpEvent): + for composed_event in event.events: + self.compose(composed_event, count) + elif isinstance(event, dp_event.SelfComposedDpEvent): + self.compose(event.event, count * event.count) + elif isinstance(event, dp_event.NoOpDpEvent): + return + else: + current_count = self._events.get(event, 0) + self._events[event] = current_count + count + self._composed_event = None + + def build(self) -> dp_event.DpEvent: + """Builds and returns the composed DpEvent represented by the builder.""" + if not self._composed_event: + self_composed_events = [] + for event, num_self_compositions in self._events.items(): + if num_self_compositions == 1: + self_composed_events.append(event) + else: + self_composed_events.append( + dp_event.SelfComposedDpEvent(event, num_self_compositions)) + if not self_composed_events: + return dp_event.NoOpDpEvent() + elif len(self_composed_events) == 1: + self._composed_event = self_composed_events[0] + else: + self._composed_event = dp_event.ComposedDpEvent(self_composed_events) + + return self._composed_event diff --git a/tensorflow_privacy/privacy/analysis/dp_event_builder_test.py b/tensorflow_privacy/privacy/analysis/dp_event_builder_test.py new file mode 100644 index 0000000..a10d4bb --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/dp_event_builder_test.py @@ -0,0 +1,73 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for DpEventBuilder.""" + +from absl.testing import absltest +from tensorflow_privacy.privacy.analysis import dp_event +from tensorflow_privacy.privacy.analysis import dp_event_builder + +_gaussian_event = dp_event.GaussianDpEvent(1.0) +_poisson_event = dp_event.PoissonSampledDpEvent(_gaussian_event, 0.1) +_self_composed_event = dp_event.SelfComposedDpEvent(_gaussian_event, 3) +_composed_event = dp_event.ComposedDpEvent( + [_self_composed_event, _poisson_event]) + + +class DpEventBuilderTest(absltest.TestCase): + + def test_no_op(self): + builder = dp_event_builder.DpEventBuilder() + self.assertEqual(dp_event.NoOpDpEvent(), builder.build()) + + def test_single(self): + builder = dp_event_builder.DpEventBuilder() + builder.compose(_gaussian_event) + self.assertEqual(_gaussian_event, builder.build()) + + def test_compose_no_op(self): + builder = dp_event_builder.DpEventBuilder() + builder.compose(dp_event.NoOpDpEvent()) + builder.compose(_gaussian_event) + builder.compose(dp_event.NoOpDpEvent()) + self.assertEqual(_gaussian_event, builder.build()) + + def test_compose_self(self): + builder = dp_event_builder.DpEventBuilder() + builder.compose(_gaussian_event) + builder.compose(_gaussian_event, 2) + self.assertEqual(_self_composed_event, builder.build()) + + def test_compose_heterogenous(self): + builder = dp_event_builder.DpEventBuilder() + builder.compose(_gaussian_event) + builder.compose(_poisson_event) + builder.compose(_gaussian_event, 2) + self.assertEqual(_composed_event, builder.build()) + + def test_compose_complex(self): + builder = dp_event_builder.DpEventBuilder() + builder.compose(_gaussian_event, 2) + builder.compose(_composed_event) + builder.compose(_poisson_event) + builder.compose(_composed_event, 2) + + expected_event = dp_event.ComposedDpEvent([ + dp_event.SelfComposedDpEvent(_gaussian_event, 11), + dp_event.SelfComposedDpEvent(_poisson_event, 4) + ]) + self.assertEqual(expected_event, builder.build()) + + +if __name__ == '__main__': + absltest.main() diff --git a/tensorflow_privacy/privacy/analysis/privacy_accountant.py b/tensorflow_privacy/privacy/analysis/privacy_accountant.py new file mode 100644 index 0000000..2f1265f --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/privacy_accountant.py @@ -0,0 +1,101 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PrivacyAccountant abstract base class.""" + +import abc + +from tensorflow_privacy.privacy.dp_event import dp_event +from tensorflow_privacy.privacy.dp_event import dp_event_builder + + +class PrivacyAccountant(metaclass=abc.ABCMeta): + """Abstract base class for privacy accountants.""" + + def __init__(self): + self._ledger = dp_event_builder.DpEventBuilder() + + @abc.abstractmethod + def is_supported(self, event: dp_event.DpEvent) -> bool: + """Checks whether the `DpEvent` can be processed by this accountant. + + In general this will require recursively checking the structure of the + `DpEvent`. In particular `ComposedDpEvent` and `SelfComposedDpEvent` should + be recursively examined. + + Args: + event: The `DpEvent` to check. + + Returns: + True iff this accountant supports processing `event`. + """ + + @abc.abstractmethod + def _compose(self, event: dp_event.DpEvent, count: int = 1): + """Update internal state to account for application of a `DpEvent`. + + Calls to `get_epsilon` or `get_delta` after calling `_compose` will return + values that account for this `DpEvent`. + + Args: + event: A `DpEvent` to process. + count: The number of times to compose the event. + """ + + def compose(self, event: dp_event.DpEvent, count: int = 1): + """Update internal state to account for application of a `DpEvent`. + + Calls to `get_epsilon` or `get_delta` after calling `compose` will return + values that account for this `DpEvent`. + + Args: + event: A `DpEvent` to process. + count: The number of times to compose the event. + + Raises: + TypeError: `event` is not supported by this `PrivacyAccountant`. + """ + if not self.is_supported(event): + raise TypeError(f'`DpEvent` {event} is of unsupported type.') + self._ledger.compose(event, count) + self._compose(event, count) + + @property + def ledger(self) -> dp_event.DpEvent: + """Returns the (composed) `DpEvent` processed so far by this accountant.""" + return self._ledger.build() + + @abc.abstractmethod + def get_epsilon(self, target_delta: float) -> float: + """Gets the current epsilon. + + Args: + target_delta: The target delta. + + Returns: + The current epsilon, accounting for all composed `DpEvent`s. + """ + + def get_delta(self, target_epsilon: float) -> float: + """Gets the current delta. + + An implementer of `PrivacyAccountant` may choose not to override this, in + which case `NotImplementedError` will be raised. + + Args: + target_epsilon: The target epsilon. + + Returns: + The current delta, accounting for all composed `DpEvent`s. + """ + raise NotImplementedError() From 9b48c81b6af9a2d516fa426a62880a2b5fa049ab Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Wed, 25 Aug 2021 14:34:23 -0700 Subject: [PATCH 31/71] Minor cleanup. PiperOrigin-RevId: 392982022 --- tensorflow_privacy/privacy/analysis/dp_event_builder.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow_privacy/privacy/analysis/dp_event_builder.py b/tensorflow_privacy/privacy/analysis/dp_event_builder.py index a0a9435..722a1e4 100644 --- a/tensorflow_privacy/privacy/analysis/dp_event_builder.py +++ b/tensorflow_privacy/privacy/analysis/dp_event_builder.py @@ -62,14 +62,14 @@ class DpEventBuilder(object): """Builds and returns the composed DpEvent represented by the builder.""" if not self._composed_event: self_composed_events = [] - for event, num_self_compositions in self._events.items(): - if num_self_compositions == 1: + for event, count in self._events.items(): + if count == 1: self_composed_events.append(event) else: self_composed_events.append( - dp_event.SelfComposedDpEvent(event, num_self_compositions)) + dp_event.SelfComposedDpEvent(event, count)) if not self_composed_events: - return dp_event.NoOpDpEvent() + self._composed_event = dp_event.NoOpDpEvent() elif len(self_composed_events) == 1: self._composed_event = self_composed_events[0] else: From 0e04e1baebb41dd2f9e0db5144af1a26256d247d Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Wed, 25 Aug 2021 19:06:51 -0700 Subject: [PATCH 32/71] Adding NonPrivateDpEvent and UnsupportedDpEvent. PiperOrigin-RevId: 393028308 --- .../privacy/analysis/dp_event.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tensorflow_privacy/privacy/analysis/dp_event.py b/tensorflow_privacy/privacy/analysis/dp_event.py index fbec1d1..da027e9 100644 --- a/tensorflow_privacy/privacy/analysis/dp_event.py +++ b/tensorflow_privacy/privacy/analysis/dp_event.py @@ -37,6 +37,27 @@ class NoOpDpEvent(DpEvent): """ +@attr.s(frozen=True) +class NonPrivateDpEvent(DpEvent): + """A `DpEvent` to represent non-private operations. + + This `DpEvent` should be used when an operation is performed that does not + satisfy (epsilon, delta)-DP. All `PrivacyAccountant`s should return infinite + epsilon/delta when encountering a `NonPrivateDpEvent`. + """ + + +@attr.s(frozen=True) +class UnsupportedDpEvent(DpEvent): + """A `DpEvent` to represent as-yet unsupported operations. + + This `DpEvent` should be used when an operation is performed that does not yet + have any associated DP description, or if the description is temporarily + inaccessible, for example, during development. All `PrivacyAccountant`s should + return `is_supported(event)` is `False` for `UnsupportedDpEvent`. + """ + + @attr.s(frozen=True, slots=True, auto_attribs=True) class GaussianDpEvent(DpEvent): """The Gaussian mechanism.""" From d9236d5619ce308aeb8e67c4791fc893605a24fd Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Thu, 26 Aug 2021 09:59:45 -0700 Subject: [PATCH 33/71] Remove PrivacyLedger which will soon be replaced by DpEvent and PrivacyAccountant. PiperOrigin-RevId: 393147667 --- tensorflow_privacy/__init__.py | 4 - .../privacy/analysis/privacy_ledger.py | 299 ------------------ .../privacy/analysis/privacy_ledger_test.py | 133 -------- .../privacy/analysis/rdp_accountant.py | 52 +-- .../privacy/analysis/rdp_accountant_test.py | 104 +++--- .../dp_query/discrete_gaussian_query.py | 5 - .../distributed_discrete_gaussian_query.py | 5 - .../privacy/dp_query/dp_query.py | 53 ++-- .../privacy/dp_query/gaussian_query.py | 4 - .../privacy/dp_query/nested_query.py | 35 +- .../privacy/dp_query/no_privacy_query.py | 28 +- .../privacy/dp_query/normalized_query.py | 17 +- .../quantile_adaptive_clip_sum_query.py | 5 - .../quantile_adaptive_clip_sum_query_test.py | 48 --- .../dp_query/quantile_estimator_query.py | 4 - .../privacy/optimizers/dp_optimizer.py | 44 +-- .../optimizers/dp_optimizer_eager_test.py | 9 +- .../privacy/optimizers/dp_optimizer_test.py | 50 +-- tutorials/lm_dpsgd_tutorial.py | 49 ++- 19 files changed, 172 insertions(+), 776 deletions(-) delete mode 100644 tensorflow_privacy/privacy/analysis/privacy_ledger.py delete mode 100644 tensorflow_privacy/privacy/analysis/privacy_ledger_test.py diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index 1b3adab..815cfe3 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -31,10 +31,6 @@ else: # Analysis from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy - from tensorflow_privacy.privacy.analysis.privacy_ledger import GaussianSumQueryEntry - from tensorflow_privacy.privacy.analysis.privacy_ledger import PrivacyLedger - from tensorflow_privacy.privacy.analysis.privacy_ledger import QueryWithLedger - from tensorflow_privacy.privacy.analysis.privacy_ledger import SampleEntry from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogenous_rdp from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_from_ledger diff --git a/tensorflow_privacy/privacy/analysis/privacy_ledger.py b/tensorflow_privacy/privacy/analysis/privacy_ledger.py deleted file mode 100644 index 08dee5d..0000000 --- a/tensorflow_privacy/privacy/analysis/privacy_ledger.py +++ /dev/null @@ -1,299 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""PrivacyLedger class for keeping a record of private queries.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections - -import numpy as np -import tensorflow.compat.v1 as tf - -from tensorflow_privacy.privacy.analysis import tensor_buffer -from tensorflow_privacy.privacy.dp_query import dp_query - -SampleEntry = collections.namedtuple( # pylint: disable=invalid-name - 'SampleEntry', ['population_size', 'selection_probability', 'queries']) - -GaussianSumQueryEntry = collections.namedtuple( # pylint: disable=invalid-name - 'GaussianSumQueryEntry', ['l2_norm_bound', 'noise_stddev']) - - -def format_ledger(sample_array, query_array): - """Converts array representation into a list of SampleEntries.""" - samples = [] - query_pos = 0 - sample_pos = 0 - for sample in sample_array: - population_size, selection_probability, num_queries = sample - queries = [] - for _ in range(int(num_queries)): - query = query_array[query_pos] - assert int(query[0]) == sample_pos - queries.append(GaussianSumQueryEntry(*query[1:])) - query_pos += 1 - samples.append(SampleEntry(population_size, selection_probability, queries)) - sample_pos += 1 - return samples - - -class PrivacyLedger(object): - """Class for keeping a record of private queries. - - The PrivacyLedger keeps a record of all queries executed over a given dataset - for the purpose of computing privacy guarantees. To use it, it must be - associated with a `DPQuery` object via a `QueryWithLedger`. - - The current implementation works only with DPQueries that consist of composing - Gaussian sum mechanism with Poisson subsampling. - - Example usage: - - ``` - import tensorflow_privacy as tfp - - dp_query = tfp.QueryWithLedger( - tensorflow_privacy.GaussianSumQuery( - l2_norm_clip=1.0, stddev=1.0), - population_size=10000, - selection_probability=0.01) - - # Use dp_query here in training loop. - - formatted_ledger = dp_query.ledger.get_formatted_ledger_eager() - orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] + - list(range(5, 64)) + [128, 256, 512]) - total_rdp = tfp.compute_rdp_from_ledger(formatted_ledger, orders) - epsilon = tfp.get_privacy_spent(orders, total_rdp, target_delta=1e-5) - ``` - """ - - def __init__(self, - population_size, - selection_probability): - """Initializes the PrivacyLedger. - - Args: - population_size: An integer (may be variable) specifying the size of the - population, i.e. size of the training data used in each epoch. - selection_probability: A floating point value (may be variable) specifying - the probability each record is included in a sample. - - Raises: - ValueError: If `selection_probability` is 0. - """ - self._population_size = population_size - self._selection_probability = selection_probability - - if tf.executing_eagerly(): - if tf.equal(selection_probability, 0): - raise ValueError('Selection probability cannot be 0.') - init_capacity = tf.cast(tf.math.ceil(1 / selection_probability), tf.int32) - else: - if selection_probability == 0: - raise ValueError('Selection probability cannot be 0.') - init_capacity = np.int(np.ceil(1 / selection_probability)) - - # The query buffer stores rows corresponding to GaussianSumQueryEntries. - self._query_buffer = tensor_buffer.TensorBuffer( - init_capacity, [3], tf.float32, 'query') - self._sample_var = tf.Variable( - initial_value=tf.zeros([3]), trainable=False, name='sample') - - # The sample buffer stores rows corresponding to SampleEntries. - self._sample_buffer = tensor_buffer.TensorBuffer( - init_capacity, [3], tf.float32, 'sample') - self._sample_count = tf.Variable( - initial_value=0.0, trainable=False, name='sample_count') - self._query_count = tf.Variable( - initial_value=0.0, trainable=False, name='query_count') - self._cs = tf.CriticalSection() - - def record_sum_query(self, l2_norm_bound, noise_stddev): - """Records that a query was issued. - - Args: - l2_norm_bound: The maximum l2 norm of the tensor group in the query. - noise_stddev: The standard deviation of the noise applied to the sum. - - Returns: - An operation recording the sum query to the ledger. This should be called - for every Gaussian sum query that is issued on a sample. - """ - - def _do_record_query(): - with tf.control_dependencies( - [tf.assign(self._query_count, self._query_count + 1)]): - return self._query_buffer.append( - [self._sample_count, l2_norm_bound, noise_stddev]) - - return self._cs.execute(_do_record_query) - - def finalize_sample(self): - """Finalizes sample and records sample ledger entry. - - This should be called once per application of the mechanism on a sample, - after all sum queries have been recorded. - - Returns: - An operation recording the complete mechanism (sampling and sum - estimation) to the ledger. - """ - with tf.control_dependencies([ - tf.assign(self._sample_var, [ - self._population_size, self._selection_probability, - self._query_count - ]) - ]): - with tf.control_dependencies([ - tf.assign(self._sample_count, self._sample_count + 1), - tf.assign(self._query_count, 0) - ]): - return self._sample_buffer.append(self._sample_var) - - def get_unformatted_ledger(self): - """Returns the raw sample and query values.""" - return self._sample_buffer.values, self._query_buffer.values - - def get_formatted_ledger(self, sess): - """Gets the formatted query ledger. - - Args: - sess: The tensorflow session in which the ledger was created. - - Returns: - The query ledger as a list of `SampleEntry` instances. - """ - sample_array = sess.run(self._sample_buffer.values) - query_array = sess.run(self._query_buffer.values) - - return format_ledger(sample_array, query_array) - - def get_formatted_ledger_eager(self): - """Gets the formatted query ledger. - - Returns: - The query ledger as a list of `SampleEntry` instances. - """ - sample_array = self._sample_buffer.values.numpy() - query_array = self._query_buffer.values.numpy() - - return format_ledger(sample_array, query_array) - - -class QueryWithLedger(dp_query.DPQuery): - """A class for DP queries that record events to a `PrivacyLedger`. - - `QueryWithLedger` should be the top-level query in a structure of queries that - may include sum queries, nested queries, etc. It should simply wrap another - query and contain a reference to the ledger. Any contained queries (including - those contained in the leaves of a nested query) should also contain a - reference to the same ledger object. - - Only composed Gaussian sum queries with Poisson subsampling are supported. - This includes `GaussianSumQuery`, `QuantileEstimatorQuery`, and - `QuantileAdaptiveClipSumQuery`, as well as `NestedQuery` or `NormalizedQuery` - objects that contain the previous mentioned query types. - """ - - def __init__(self, query, - population_size=None, selection_probability=None, - ledger=None): - """Initializes the `QueryWithLedger`. - - Args: - query: The query whose events should be recorded to the ledger. Any - subqueries (including those in the leaves of a nested query) should also - contain a reference to the same ledger given here. - population_size: An integer (may be variable) specifying the size of the - population, i.e. size of the training data used in each epoch. May be - `None` if `ledger` is specified. - selection_probability: A floating point value (may be variable) specifying - the probability each record is included in a sample under Poisson - subsampling. May be `None` if `ledger` is specified. - ledger: A `PrivacyLedger` to use. Must be specified if either of - `population_size` or `selection_probability` is `None`. - """ - self._query = query - if population_size is not None and selection_probability is not None: - self.set_ledger(PrivacyLedger(population_size, selection_probability)) - elif ledger is not None: - self.set_ledger(ledger) - else: - raise ValueError('One of (population_size, selection_probability) or ' - 'ledger must be specified.') - - @property - def ledger(self): - """Gets the ledger that all inner queries record to.""" - return self._ledger - - def set_ledger(self, ledger): - """Sets a new ledger.""" - self._ledger = ledger - self._query.set_ledger(ledger) - - def initial_global_state(self): - """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" - return self._query.initial_global_state() - - def derive_sample_params(self, global_state): - """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" - return self._query.derive_sample_params(global_state) - - def initial_sample_state(self, template): - """Implements `tensorflow_privacy.DPQuery.initial_sample_state`.""" - return self._query.initial_sample_state(template) - - def preprocess_record(self, params, record): - """Implements `tensorflow_privacy.DPQuery.preprocess_record`.""" - return self._query.preprocess_record(params, record) - - def accumulate_preprocessed_record(self, sample_state, preprocessed_record): - """Implements `tensorflow_privacy.DPQuery.accumulate_preprocessed_record`.""" - return self._query.accumulate_preprocessed_record( - sample_state, preprocessed_record) - - def merge_sample_states(self, sample_state_1, sample_state_2): - """Implements `tensorflow_privacy.DPQuery.merge_sample_states`.""" - return self._query.merge_sample_states(sample_state_1, sample_state_2) - - def get_noised_result(self, sample_state, global_state): - """Implements `tensorflow_privacy.DPQuery.derive_metrics`. - - Besides noising and returning the result of the inner query, ensures that - the sample is recorded to the ledger. - - Args: - sample_state: The sample state after all records have been accumulated. - global_state: The global state, storing long-term privacy bookkeeping. - - Returns: - A tuple (result, new_global_state) where "result" is the result of the - query and "new_global_state" is the updated global state. - """ - # Ensure sample_state is fully aggregated before calling get_noised_result. - with tf.control_dependencies(tf.nest.flatten(sample_state)): - result, new_global_state = self._query.get_noised_result( - sample_state, global_state) - - # Ensure inner queries have recorded before finalizing. - with tf.control_dependencies(tf.nest.flatten(result)): - finalize = self._ledger.finalize_sample() - - # Ensure finalizing happens. - with tf.control_dependencies([finalize]): - return tf.nest.map_structure(tf.identity, result), new_global_state diff --git a/tensorflow_privacy/privacy/analysis/privacy_ledger_test.py b/tensorflow_privacy/privacy/analysis/privacy_ledger_test.py deleted file mode 100644 index 3d07eb6..0000000 --- a/tensorflow_privacy/privacy/analysis/privacy_ledger_test.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Tests for PrivacyLedger.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow.compat.v1 as tf - -from tensorflow_privacy.privacy.analysis import privacy_ledger -from tensorflow_privacy.privacy.dp_query import gaussian_query -from tensorflow_privacy.privacy.dp_query import nested_query -from tensorflow_privacy.privacy.dp_query import test_utils - -tf.enable_eager_execution() - - -class PrivacyLedgerTest(tf.test.TestCase): - - def test_fail_on_probability_zero(self): - with self.assertRaisesRegexp(ValueError, - 'Selection probability cannot be 0.'): - privacy_ledger.PrivacyLedger(10, 0) - - def test_basic(self): - ledger = privacy_ledger.PrivacyLedger(10, 0.1) - ledger.record_sum_query(5.0, 1.0) - ledger.record_sum_query(2.0, 0.5) - - ledger.finalize_sample() - - expected_queries = [[5.0, 1.0], [2.0, 0.5]] - formatted = ledger.get_formatted_ledger_eager() - - sample = formatted[0] - self.assertAllClose(sample.population_size, 10.0) - self.assertAllClose(sample.selection_probability, 0.1) - self.assertAllClose(sorted(sample.queries), sorted(expected_queries)) - - def test_sum_query(self): - record1 = tf.constant([2.0, 0.0]) - record2 = tf.constant([-1.0, 1.0]) - - population_size = tf.Variable(0) - selection_probability = tf.Variable(1.0) - - query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=0.0) - query = privacy_ledger.QueryWithLedger(query, population_size, - selection_probability) - - # First sample. - tf.assign(population_size, 10) - tf.assign(selection_probability, 0.1) - test_utils.run_query(query, [record1, record2]) - - expected_queries = [[10.0, 0.0]] - formatted = query.ledger.get_formatted_ledger_eager() - sample_1 = formatted[0] - self.assertAllClose(sample_1.population_size, 10.0) - self.assertAllClose(sample_1.selection_probability, 0.1) - self.assertAllClose(sample_1.queries, expected_queries) - - # Second sample. - tf.assign(population_size, 20) - tf.assign(selection_probability, 0.2) - test_utils.run_query(query, [record1, record2]) - - formatted = query.ledger.get_formatted_ledger_eager() - sample_1, sample_2 = formatted - self.assertAllClose(sample_1.population_size, 10.0) - self.assertAllClose(sample_1.selection_probability, 0.1) - self.assertAllClose(sample_1.queries, expected_queries) - - self.assertAllClose(sample_2.population_size, 20.0) - self.assertAllClose(sample_2.selection_probability, 0.2) - self.assertAllClose(sample_2.queries, expected_queries) - - def test_nested_query(self): - population_size = tf.Variable(0) - selection_probability = tf.Variable(1.0) - - query1 = gaussian_query.GaussianSumQuery(l2_norm_clip=4.0, stddev=2.0) - query2 = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=1.0) - - query = nested_query.NestedQuery([query1, query2]) - query = privacy_ledger.QueryWithLedger(query, population_size, - selection_probability) - - record1 = [1.0, [12.0, 9.0]] - record2 = [5.0, [1.0, 2.0]] - - # First sample. - tf.assign(population_size, 10) - tf.assign(selection_probability, 0.1) - test_utils.run_query(query, [record1, record2]) - - expected_queries = [[4.0, 2.0], [5.0, 1.0]] - formatted = query.ledger.get_formatted_ledger_eager() - sample_1 = formatted[0] - self.assertAllClose(sample_1.population_size, 10.0) - self.assertAllClose(sample_1.selection_probability, 0.1) - self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) - - # Second sample. - tf.assign(population_size, 20) - tf.assign(selection_probability, 0.2) - test_utils.run_query(query, [record1, record2]) - - formatted = query.ledger.get_formatted_ledger_eager() - sample_1, sample_2 = formatted - self.assertAllClose(sample_1.population_size, 10.0) - self.assertAllClose(sample_1.selection_probability, 0.1) - self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) - - self.assertAllClose(sample_2.population_size, 20.0) - self.assertAllClose(sample_2.selection_probability, 0.2) - self.assertAllClose(sorted(sample_2.queries), sorted(expected_queries)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_accountant.py index 00798e7..ef4dcba 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant.py @@ -47,7 +47,6 @@ import numpy as np from scipy import special import six - ######################## # LOG-SPACE ARITHMETIC # ######################## @@ -102,8 +101,8 @@ def _log_print(logx): def _log_comb(n, k): - return (special.gammaln(n + 1) - - special.gammaln(k + 1) - special.gammaln(n - k + 1)) + return (special.gammaln(n + 1) - special.gammaln(k + 1) - + special.gammaln(n - k + 1)) def _compute_log_a_int(q, sigma, alpha): @@ -215,17 +214,19 @@ def _compute_delta(orders, rdp, eps): # Improved bound from https://arxiv.org/abs/2004.00010 Proposition 12 (in v4): logdeltas = [] # work in log space to avoid overflows for (a, r) in zip(orders_vec, rdp_vec): - if a < 1: raise ValueError("Renyi divergence order must be >=1.") - if r < 0: raise ValueError("Renyi divergence must be >=0.") + if a < 1: + raise ValueError("Renyi divergence order must be >=1.") + if r < 0: + raise ValueError("Renyi divergence must be >=0.") # For small alpha, we are better of with bound via KL divergence: # delta <= sqrt(1-exp(-KL)). # Take a min of the two bounds. - logdelta = 0.5*math.log1p(-math.exp(-r)) + logdelta = 0.5 * math.log1p(-math.exp(-r)) if a > 1.01: # This bound is not numerically stable as alpha->1. # Thus we have a min value for alpha. # The bound is also not useful for small alpha, so doesn't matter. - rdp_bound = (a - 1) * (r - eps + math.log1p(-1/a)) - math.log(a) + rdp_bound = (a - 1) * (r - eps + math.log1p(-1 / a)) - math.log(a) logdelta = min(logdelta, rdp_bound) logdeltas.append(logdelta) @@ -264,8 +265,10 @@ def _compute_eps(orders, rdp, delta): # Also appears in https://arxiv.org/abs/2001.05990 Equation 20 (in v1). eps_vec = [] for (a, r) in zip(orders_vec, rdp_vec): - if a < 1: raise ValueError("Renyi divergence order must be >=1.") - if r < 0: raise ValueError("Renyi divergence must be >=0.") + if a < 1: + raise ValueError("Renyi divergence order must be >=1.") + if r < 0: + raise ValueError("Renyi divergence must be >=0.") if delta**2 + math.expm1(-r) >= 0: # In this case, we can simply bound via KL divergence: @@ -378,7 +381,7 @@ def compute_rdp(q, noise_multiplier, steps, orders): Args: q: The sampling rate. noise_multiplier: The ratio of the standard deviation of the Gaussian noise - to the l2-sensitivity of the function to which it is added. + to the l2-sensitivity of the function to which it is added. steps: The number of steps. orders: An array (or a scalar) of RDP orders. @@ -388,8 +391,8 @@ def compute_rdp(q, noise_multiplier, steps, orders): if np.isscalar(orders): rdp = _compute_rdp(q, noise_multiplier, orders) else: - rdp = np.array([_compute_rdp(q, noise_multiplier, order) - for order in orders]) + rdp = np.array( + [_compute_rdp(q, noise_multiplier, order) for order in orders]) return rdp * steps @@ -572,8 +575,8 @@ def get_privacy_spent(orders, rdp, target_eps=None, target_delta=None): target_eps: If not `None`, the epsilon for which we compute the corresponding delta. target_delta: If not `None`, the delta for which we compute the - corresponding epsilon. Exactly one of `target_eps` and `target_delta` - must be `None`. + corresponding epsilon. Exactly one of `target_eps` and `target_delta` must + be `None`. Returns: A tuple of epsilon, delta, and the optimal order. @@ -595,24 +598,3 @@ def get_privacy_spent(orders, rdp, target_eps=None, target_delta=None): else: eps, opt_order = _compute_eps(orders, rdp, target_delta) return eps, target_delta, opt_order - - -def compute_rdp_from_ledger(ledger, orders): - """Computes RDP of Sampled Gaussian Mechanism from ledger. - - Args: - ledger: A formatted privacy ledger. - orders: An array (or a scalar) of RDP orders. - - Returns: - RDP at all orders. Can be `np.inf`. - """ - total_rdp = np.zeros_like(orders, dtype=float) - for sample in ledger: - # Compute equivalent z from l2_clip_bounds and noise stddevs in sample. - # See https://arxiv.org/pdf/1812.06210.pdf for derivation of this formula. - effective_z = sum([ - (q.noise_stddev / q.l2_norm_bound)**-2 for q in sample.queries])**-0.5 - total_rdp += compute_rdp( - sample.selection_probability, effective_z, 1, orders) - return total_rdp diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py index c7dadf4..5c0353e 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py @@ -31,7 +31,6 @@ from mpmath import quad import numpy as np import tensorflow as tf -from tensorflow_privacy.privacy.analysis import privacy_ledger from tensorflow_privacy.privacy.analysis import rdp_accountant @@ -121,16 +120,47 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase): [6.5007e-04, 1.0854e-03, 2.1808e-03, 2.3846e-02, 1.6742e+02, np.inf], rtol=1e-4) - params = ({'q': 1e-7, 'sigma': .1, 'order': 1.01}, - {'q': 1e-6, 'sigma': .1, 'order': 256}, - {'q': 1e-5, 'sigma': .1, 'order': 256.1}, - {'q': 1e-6, 'sigma': 1, 'order': 27}, - {'q': 1e-4, 'sigma': 1., 'order': 1.5}, - {'q': 1e-3, 'sigma': 1., 'order': 2}, - {'q': .01, 'sigma': 10, 'order': 20}, - {'q': .1, 'sigma': 100, 'order': 20.5}, - {'q': .99, 'sigma': .1, 'order': 256}, - {'q': .999, 'sigma': 100, 'order': 256.1}) + params = ({ + 'q': 1e-7, + 'sigma': .1, + 'order': 1.01 + }, { + 'q': 1e-6, + 'sigma': .1, + 'order': 256 + }, { + 'q': 1e-5, + 'sigma': .1, + 'order': 256.1 + }, { + 'q': 1e-6, + 'sigma': 1, + 'order': 27 + }, { + 'q': 1e-4, + 'sigma': 1., + 'order': 1.5 + }, { + 'q': 1e-3, + 'sigma': 1., + 'order': 2 + }, { + 'q': .01, + 'sigma': 10, + 'order': 20 + }, { + 'q': .1, + 'sigma': 100, + 'order': 20.5 + }, { + 'q': .99, + 'sigma': .1, + 'order': 256 + }, { + 'q': .999, + 'sigma': 100, + 'order': 256.1 + }) # pylint:disable=undefined-variable @parameterized.parameters(p for p in params) @@ -152,7 +182,8 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase): self.assertAlmostEqual(eps, 1.32783806176) # Second test for Gaussian noise (with no subsampling): - orders = [0.001*i for i in range(1000, 100000)] # Pick fine set of orders. + orders = [0.001 * i for i in range(1000, 100000) + ] # Pick fine set of orders. rdp = rdp_accountant.compute_rdp(1, 4.530877117, 1, orders) # Scale is chosen to obtain exactly (1,1e-6)-DP. eps, _, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-6) @@ -168,7 +199,7 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase): self.assertAlmostEqual(delta, 1e-5) # Second test for Gaussian noise (with no subsampling): - orders = [0.001*i for i in range(1000, 100000)] # Pick fine set of order. + orders = [0.001 * i for i in range(1000, 100000)] # Pick fine set of order. rdp = rdp_accountant.compute_rdp(1, 4.530877117, 1, orders) # Scale is chosen to obtain exactly (1,1e-6)-DP. _, delta, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_eps=1) @@ -178,17 +209,13 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase): orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14., 16., 20., 24., 28., 32., 64., 256.) - rdp = rdp_accountant.compute_rdp(q=1e-4, - noise_multiplier=.4, - steps=40000, - orders=orders) + rdp = rdp_accountant.compute_rdp( + q=1e-4, noise_multiplier=.4, steps=40000, orders=orders) eps, _, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-6) - rdp += rdp_accountant.compute_rdp(q=0.1, - noise_multiplier=2, - steps=100, - orders=orders) + rdp += rdp_accountant.compute_rdp( + q=0.1, noise_multiplier=2, steps=100, orders=orders) eps, _, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-5) # These tests use the old RDP -> approx DP conversion # self.assertAlmostEqual(eps, 8.509656, places=5) @@ -217,42 +244,25 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase): def test_get_privacy_spent_gaussian(self): # Compare the optimal bound for Gaussian with the one derived from RDP. # Also compare the RDP upper bound with the "standard" upper bound. - orders = [0.1*x for x in range(10, 505)] - eps_vec = [0.1*x for x in range(500)] + orders = [0.1 * x for x in range(10, 505)] + eps_vec = [0.1 * x for x in range(500)] rdp = rdp_accountant.compute_rdp(1, 1, 1, orders) for eps in eps_vec: - _, delta, _ = rdp_accountant.get_privacy_spent(orders, rdp, - target_eps=eps) + _, delta, _ = rdp_accountant.get_privacy_spent( + orders, rdp, target_eps=eps) # For comparison, we compute the optimal guarantee for Gaussian # using https://arxiv.org/abs/1805.06530 Theorem 8 (in v2). - delta0 = math.erfc((eps-.5)/math.sqrt(2))/2 - delta0 = delta0 - math.exp(eps)*math.erfc((eps+.5)/math.sqrt(2))/2 - self.assertLessEqual(delta0, delta+1e-300) # need tolerance 10^-300 + delta0 = math.erfc((eps - .5) / math.sqrt(2)) / 2 + delta0 = delta0 - math.exp(eps) * math.erfc((eps + .5) / math.sqrt(2)) / 2 + self.assertLessEqual(delta0, delta + 1e-300) # need tolerance 10^-300 # Compute the "standard" upper bound, which should be an upper bound. # Note, if orders is too sparse, this will NOT be an upper bound. if eps >= 0.5: - delta1 = math.exp(-0.5*(eps-0.5)**2) + delta1 = math.exp(-0.5 * (eps - 0.5)**2) else: delta1 = 1 - self.assertLessEqual(delta, delta1+1e-300) - - def test_compute_rdp_from_ledger(self): - orders = range(2, 33) - q = 0.1 - n = 1000 - l2_norm_clip = 3.14159 - noise_stddev = 2.71828 - steps = 3 - - query_entry = privacy_ledger.GaussianSumQueryEntry( - l2_norm_clip, noise_stddev) - ledger = [privacy_ledger.SampleEntry(n, q, [query_entry])] * steps - - z = noise_stddev / l2_norm_clip - rdp = rdp_accountant.compute_rdp(q, z, steps, orders) - rdp_from_ledger = rdp_accountant.compute_rdp_from_ledger(ledger, orders) - self.assertSequenceAlmostEqual(rdp, rdp_from_ledger) + self.assertLessEqual(delta, delta1 + 1e-300) if __name__ == '__main__': diff --git a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query.py b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query.py index 444489b..c79b31a 100644 --- a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query.py +++ b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query.py @@ -46,11 +46,6 @@ class DiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery): self._l2_norm_bound = l2_norm_bound self._stddev = stddev - def set_ledger(self, ledger): - del ledger # Unused. - raise NotImplementedError('Ledger has not yet been implemented for' - 'DiscreteGaussianSumQuery!') - def initial_global_state(self): return self._GlobalState( tf.cast(self._l2_norm_bound, tf.float32), diff --git a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py index 8dd4dba..ace95e3 100644 --- a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py +++ b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py @@ -46,11 +46,6 @@ class DistributedDiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery): self._l2_norm_bound = l2_norm_bound self._local_stddev = local_stddev - def set_ledger(self, ledger): - del ledger # Unused. - raise NotImplementedError('Ledger has not yet been implemented for' - 'DistributedDiscreteGaussianSumQuery!') - def initial_global_state(self): return self._GlobalState( tf.cast(self._l2_norm_bound, tf.float32), diff --git a/tensorflow_privacy/privacy/dp_query/dp_query.py b/tensorflow_privacy/privacy/dp_query/dp_query.py index d7f8e18..627e38e 100644 --- a/tensorflow_privacy/privacy/dp_query/dp_query.py +++ b/tensorflow_privacy/privacy/dp_query/dp_query.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """An interface for differentially private query mechanisms. The DPQuery class abstracts the differential privacy mechanism needed by DP-SGD. @@ -100,18 +99,6 @@ class DPQuery(object): __metaclass__ = abc.ABCMeta - def set_ledger(self, ledger): - """Supplies privacy ledger to which the query can record privacy events. - - The ledger should be updated with each call to get_noised_result. - - Args: - ledger: A `PrivacyLedger`. - """ - del ledger - raise TypeError( - 'DPQuery type %s does not support set_ledger.' % type(self).__name__) - def initial_global_state(self): """Returns the initial global state for the DPQuery. @@ -155,7 +142,6 @@ class DPQuery(object): as a template to create the initial sample state. It is assumed that the leaves of the structure are python scalars or some type that has properties `shape` and `dtype`. - Returns: An initial sample state. """ pass @@ -171,12 +157,12 @@ class DPQuery(object): variables that are stored in self. Args: - params: The parameters for the sample. In standard DP-SGD training, - the clipping norm for the sample's microbatch gradients (i.e., - a maximum norm magnitude to which each gradient is clipped) - record: The record to be processed. In standard DP-SGD training, - the gradient computed for the examples in one microbatch, which - may be the gradient for just one example (for size 1 microbatches). + params: The parameters for the sample. In standard DP-SGD training, the + clipping norm for the sample's microbatch gradients (i.e., a maximum + norm magnitude to which each gradient is clipped) + record: The record to be processed. In standard DP-SGD training, the + gradient computed for the examples in one microbatch, which may be the + gradient for just one example (for size 1 microbatches). Returns: A structure of tensors to be aggregated. @@ -185,8 +171,7 @@ class DPQuery(object): return record @abc.abstractmethod - def accumulate_preprocessed_record( - self, sample_state, preprocessed_record): + def accumulate_preprocessed_record(self, sample_state, preprocessed_record): """Accumulates a single preprocessed record into the sample state. This method is intended to only do simple aggregation, typically just a sum. @@ -194,8 +179,8 @@ class DPQuery(object): declaratively specify the type of aggregation required. Args: - sample_state: The current sample state. In standard DP-SGD training, - the accumulated sum of previous clipped microbatch gradients. + sample_state: The current sample state. In standard DP-SGD training, the + accumulated sum of previous clipped microbatch gradients. preprocessed_record: The preprocessed record to accumulate. Returns: @@ -211,22 +196,22 @@ class DPQuery(object): functions run on a single device. Typically this will be a simple sum. Args: - params: The parameters for the sample. In standard DP-SGD training, - the clipping norm for the sample's microbatch gradients (i.e., - a maximum norm magnitude to which each gradient is clipped) - sample_state: The current sample state. In standard DP-SGD training, - the accumulated sum of previous clipped microbatch gradients. - record: The record to accumulate. In standard DP-SGD training, - the gradient computed for the examples in one microbatch, which - may be the gradient for just one example (for size 1 microbatches). + params: The parameters for the sample. In standard DP-SGD training, the + clipping norm for the sample's microbatch gradients (i.e., a maximum + norm magnitude to which each gradient is clipped) + sample_state: The current sample state. In standard DP-SGD training, the + accumulated sum of previous clipped microbatch gradients. + record: The record to accumulate. In standard DP-SGD training, the + gradient computed for the examples in one microbatch, which may be the + gradient for just one example (for size 1 microbatches). Returns: The updated sample state. In standard DP-SGD training, the set of previous microbatch gradients with the addition of the record argument. """ preprocessed_record = self.preprocess_record(params, record) - return self.accumulate_preprocessed_record( - sample_state, preprocessed_record) + return self.accumulate_preprocessed_record(sample_state, + preprocessed_record) @abc.abstractmethod def merge_sample_states(self, sample_state_1, sample_state_2): diff --git a/tensorflow_privacy/privacy/dp_query/gaussian_query.py b/tensorflow_privacy/privacy/dp_query/gaussian_query.py index bc0888c..548427e 100644 --- a/tensorflow_privacy/privacy/dp_query/gaussian_query.py +++ b/tensorflow_privacy/privacy/dp_query/gaussian_query.py @@ -47,10 +47,6 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery): self._stddev = stddev self._ledger = None - def set_ledger(self, ledger): - """Implements `tensorflow_privacy.DPQuery.set_ledger`.""" - self._ledger = ledger - def make_global_state(self, l2_norm_clip, stddev): """Creates a global state from the given parameters.""" return self._GlobalState( diff --git a/tensorflow_privacy/privacy/dp_query/nested_query.py b/tensorflow_privacy/privacy/dp_query/nested_query.py index 783485e..e42fdb8 100644 --- a/tensorflow_privacy/privacy/dp_query/nested_query.py +++ b/tensorflow_privacy/privacy/dp_query/nested_query.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -"""Implements DPQuery interface for queries over nested structures. -""" +"""Implements DPQuery interface for queries over nested structures.""" from __future__ import absolute_import from __future__ import division @@ -60,16 +58,13 @@ class NestedQuery(dp_query.DPQuery): def _map_to_queries(self, fn, *inputs, **kwargs): """Maps DPQuery methods to the subqueries.""" + def caller(query, *args): return getattr(query, fn)(*args, **kwargs) return tree.map_structure_up_to(self._queries, caller, self._queries, *inputs) - def set_ledger(self, ledger): - """Implements `tensorflow_privacy.DPQuery.set_ledger`.""" - self._map_to_queries('set_ledger', ledger=ledger) - def initial_global_state(self): """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" return self._map_to_queries('initial_global_state') @@ -89,18 +84,15 @@ class NestedQuery(dp_query.DPQuery): """Implements `tensorflow_privacy.DPQuery.preprocess_record`.""" return self._map_to_queries('preprocess_record', params, record) - def accumulate_preprocessed_record( - self, sample_state, preprocessed_record): + def accumulate_preprocessed_record(self, sample_state, preprocessed_record): """Implements `tensorflow_privacy.DPQuery.accumulate_preprocessed_record`.""" - return self._map_to_queries( - 'accumulate_preprocessed_record', - sample_state, - preprocessed_record) + return self._map_to_queries('accumulate_preprocessed_record', sample_state, + preprocessed_record) def merge_sample_states(self, sample_state_1, sample_state_2): """Implements `tensorflow_privacy.DPQuery.merge_sample_states`.""" - return self._map_to_queries( - 'merge_sample_states', sample_state_1, sample_state_2) + return self._map_to_queries('merge_sample_states', sample_state_1, + sample_state_2) def get_noised_result(self, sample_state, global_state): """Implements `tensorflow_privacy.DPQuery.get_noised_result`.""" @@ -118,12 +110,12 @@ class NestedQuery(dp_query.DPQuery): def add_metrics(tuple_path, subquery, subquery_global_state): metrics.update({ - '/'.join(str(s) for s in tuple_path + (name,)): metric - for name, metric - in subquery.derive_metrics(subquery_global_state).items()}) + '/'.join(str(s) for s in tuple_path + (name,)): metric for name, + metric in subquery.derive_metrics(subquery_global_state).items() + }) - tree.map_structure_with_path_up_to( - self._queries, add_metrics, self._queries, global_state) + tree.map_structure_with_path_up_to(self._queries, add_metrics, + self._queries, global_state) return metrics @@ -137,12 +129,13 @@ class NestedSumQuery(NestedQuery, dp_query.SumAggregationDPQuery): Args: queries: A nested structure of queries that must all be SumAggregationDPQueries. - Raises: TypeError if any of the subqueries are not SumAggregationDPQueries. """ + def check(query): if not isinstance(query, dp_query.SumAggregationDPQuery): raise TypeError('All subqueries must be SumAggregationDPQueries.') + tree.map_structure(check, queries) super(NestedSumQuery, self).__init__(queries) diff --git a/tensorflow_privacy/privacy/dp_query/no_privacy_query.py b/tensorflow_privacy/privacy/dp_query/no_privacy_query.py index bee419c..a970010 100644 --- a/tensorflow_privacy/privacy/dp_query/no_privacy_query.py +++ b/tensorflow_privacy/privacy/dp_query/no_privacy_query.py @@ -17,8 +17,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import warnings - import tensorflow.compat.v1 as tf from tensorflow_privacy.privacy.dp_query import dp_query @@ -33,20 +31,11 @@ class NoPrivacySumQuery(dp_query.SumAggregationDPQuery): def __init__(self): self._ledger = None - def set_ledger(self, ledger): - """Implements `tensorflow_privacy.DPQuery.set_ledger`.""" - warnings.warn( - 'Attempt to use NoPrivacySumQuery with privacy ledger. Privacy ' - 'guarantees will be vacuous.') - self._ledger = ledger - def get_noised_result(self, sample_state, global_state): """Implements `tensorflow_privacy.DPQuery.get_noised_result`.""" if self._ledger: - dependencies = [ - self._ledger.record_sum_query(float('inf'), 0.0) - ] + dependencies = [self._ledger.record_sum_query(float('inf'), 0.0)] else: dependencies = [] @@ -71,17 +60,10 @@ class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery): """Initializes the NoPrivacyAverageQuery.""" self._ledger = None - def set_ledger(self, ledger): - """Implements `tensorflow_privacy.DPQuery.set_ledger`.""" - warnings.warn( - 'Attempt to use NoPrivacyAverageQuery with privacy ledger. Privacy ' - 'guarantees will be vacuous.') - self._ledger = ledger - def initial_sample_state(self, template): """Implements `tensorflow_privacy.DPQuery.initial_sample_state`.""" - return (super(NoPrivacyAverageQuery, self).initial_sample_state(template), - tf.constant(0.0)) + return (super(NoPrivacyAverageQuery, + self).initial_sample_state(template), tf.constant(0.0)) def preprocess_record(self, params, record, weight=1): """Implements `tensorflow_privacy.DPQuery.preprocess_record`. @@ -122,9 +104,7 @@ class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery): sum_state, denominator = sample_state if self._ledger: - dependencies = [ - self._ledger.record_sum_query(float('inf'), 0.0) - ] + dependencies = [self._ledger.record_sum_query(float('inf'), 0.0)] else: dependencies = [] diff --git a/tensorflow_privacy/privacy/dp_query/normalized_query.py b/tensorflow_privacy/privacy/dp_query/normalized_query.py index 2b9cdfc..35bc79f 100644 --- a/tensorflow_privacy/privacy/dp_query/normalized_query.py +++ b/tensorflow_privacy/privacy/dp_query/normalized_query.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -"""Implements DPQuery interface for normalized queries. -""" +"""Implements DPQuery interface for normalized queries.""" from __future__ import absolute_import from __future__ import division @@ -38,8 +36,8 @@ class NormalizedQuery(dp_query.SumAggregationDPQuery): """ # pylint: disable=invalid-name - _GlobalState = collections.namedtuple( - '_GlobalState', ['numerator_state', 'denominator']) + _GlobalState = collections.namedtuple('_GlobalState', + ['numerator_state', 'denominator']) def __init__(self, numerator_query, denominator): """Initializes the NormalizedQuery. @@ -55,15 +53,11 @@ class NormalizedQuery(dp_query.SumAggregationDPQuery): assert isinstance(self._numerator, dp_query.SumAggregationDPQuery) - def set_ledger(self, ledger): - """Implements `tensorflow_privacy.DPQuery.set_ledger`.""" - self._numerator.set_ledger(ledger) - def initial_global_state(self): """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" denominator = tf.cast(self._denominator, tf.float32) - return self._GlobalState( - self._numerator.initial_global_state(), denominator) + return self._GlobalState(self._numerator.initial_global_state(), + denominator) def derive_sample_params(self, global_state): """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" @@ -82,6 +76,7 @@ class NormalizedQuery(dp_query.SumAggregationDPQuery): """Implements `tensorflow_privacy.DPQuery.get_noised_result`.""" noised_sum, new_sum_global_state = self._numerator.get_noised_result( sample_state, global_state.numerator_state) + def normalize(v): return tf.truediv(v, global_state.denominator) diff --git a/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query.py b/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query.py index 4d3cd2a..2799336 100644 --- a/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query.py +++ b/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query.py @@ -91,11 +91,6 @@ class QuantileAdaptiveClipSumQuery(dp_query.SumAggregationDPQuery): assert isinstance(self._quantile_estimator_query, dp_query.SumAggregationDPQuery) - def set_ledger(self, ledger): - """Implements `tensorflow_privacy.DPQuery.set_ledger`.""" - self._sum_query.set_ledger(ledger) - self._quantile_estimator_query.set_ledger(ledger) - def initial_global_state(self): """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" return self._GlobalState( diff --git a/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py b/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py index 17e7232..51da202 100644 --- a/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py @@ -22,7 +22,6 @@ from absl.testing import parameterized import numpy as np import tensorflow.compat.v1 as tf -from tensorflow_privacy.privacy.analysis import privacy_ledger from tensorflow_privacy.privacy.dp_query import quantile_adaptive_clip_sum_query from tensorflow_privacy.privacy.dp_query import test_utils @@ -291,53 +290,6 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase, if t > 40: self.assertNear(actual_clip, 5.0, 0.5) - def test_ledger(self): - record1 = tf.constant([8.5]) - record2 = tf.constant([-7.25]) - - population_size = tf.Variable(0) - selection_probability = tf.Variable(1.0) - - query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( - initial_l2_norm_clip=10.0, - noise_multiplier=1.0, - target_unclipped_quantile=0.0, - learning_rate=1.0, - clipped_count_stddev=0.0, - expected_num_records=2.0, - geometric_update=False) - - query = privacy_ledger.QueryWithLedger(query, population_size, - selection_probability) - - # First sample. - tf.assign(population_size, 10) - tf.assign(selection_probability, 0.1) - _, global_state = test_utils.run_query(query, [record1, record2]) - - expected_queries = [[10.0, 10.0], [0.5, 0.0]] - formatted = query.ledger.get_formatted_ledger_eager() - sample_1 = formatted[0] - self.assertAllClose(sample_1.population_size, 10.0) - self.assertAllClose(sample_1.selection_probability, 0.1) - self.assertAllClose(sample_1.queries, expected_queries) - - # Second sample. - tf.assign(population_size, 20) - tf.assign(selection_probability, 0.2) - test_utils.run_query(query, [record1, record2], global_state) - - formatted = query.ledger.get_formatted_ledger_eager() - sample_1, sample_2 = formatted - self.assertAllClose(sample_1.population_size, 10.0) - self.assertAllClose(sample_1.selection_probability, 0.1) - self.assertAllClose(sample_1.queries, expected_queries) - - expected_queries_2 = [[9.0, 9.0], [0.5, 0.0]] - self.assertAllClose(sample_2.population_size, 20.0) - self.assertAllClose(sample_2.selection_probability, 0.2) - self.assertAllClose(sample_2.queries, expected_queries_2) - if __name__ == '__main__': tf.test.main() diff --git a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py index e23b83d..69e02af 100644 --- a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py +++ b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py @@ -101,10 +101,6 @@ class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery): l2_norm_clip=0.5, stddev=below_estimate_stddev), denominator=expected_num_records) - def set_ledger(self, ledger): - """Implements `tensorflow_privacy.DPQuery.set_ledger`.""" - self._below_estimate_query.set_ledger(ledger) - def initial_global_state(self): """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" return self._GlobalState( diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer.py index 75159be..5334b63 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer.py @@ -21,7 +21,6 @@ from absl import logging import tensorflow.compat.v1 as tf -from tensorflow_privacy.privacy.analysis import privacy_ledger from tensorflow_privacy.privacy.dp_query import gaussian_query @@ -166,8 +165,8 @@ def make_optimizer_class(cls): sample_state = process_microbatch(idx, sample_state) grad_sums, self._global_state = ( - self._dp_sum_query.get_noised_result( - sample_state, self._global_state)) + self._dp_sum_query.get_noised_result(sample_state, + self._global_state)) def normalize(v): return v / tf.cast(self._num_microbatches, tf.float32) @@ -197,8 +196,8 @@ def make_optimizer_class(cls): """Process one microbatch (record) with privacy helper.""" self_super = super(DPOptimizerClass, self) - mean_loss = tf.reduce_mean(input_tensor=tf.gather( - microbatches_losses, [i])) + mean_loss = tf.reduce_mean( + input_tensor=tf.gather(microbatches_losses, [i])) if hasattr(self_super, 'compute_gradients'): # This case covers optimizers in tf.train. @@ -208,8 +207,8 @@ def make_optimizer_class(cls): compute_gradients_fn = self_super._compute_gradients # pylint: disable=protected-access grads, _ = zip(*compute_gradients_fn( - mean_loss, var_list, gate_gradients, - aggregation_method, colocate_gradients_with_ops, grad_loss)) + mean_loss, var_list, gate_gradients, aggregation_method, + colocate_gradients_with_ops, grad_loss)) grads_list = list(grads) sample_state = self._dp_sum_query.accumulate_record( @@ -218,8 +217,8 @@ def make_optimizer_class(cls): if var_list is None: var_list = ( - tf.trainable_variables() + tf.get_collection( - tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) + tf.trainable_variables() + + tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) sample_state = self._dp_sum_query.initial_sample_state(var_list) @@ -237,8 +236,8 @@ def make_optimizer_class(cls): cond=cond_fn, body=body_fn, loop_vars=[idx, sample_state]) grad_sums, self._global_state = ( - self._dp_sum_query.get_noised_result( - sample_state, self._global_state)) + self._dp_sum_query.get_noised_result(sample_state, + self._global_state)) def normalize(v): try: @@ -307,9 +306,7 @@ def make_gaussian_optimizer_class(cls): ``` """).format( - 'tf.compat.v1.train.' + cls.__name__, - cls.__name__, - cls.__name__, + 'tf.compat.v1.train.' + cls.__name__, cls.__name__, cls.__name__, 'DP' + cls.__name__.replace('Optimizer', 'GaussianOptimizer')) def __init__( @@ -317,7 +314,6 @@ def make_gaussian_optimizer_class(cls): l2_norm_clip, noise_multiplier, num_microbatches=None, - ledger=None, unroll_microbatches=False, *args, # pylint: disable=keyword-arg-before-vararg **kwargs): @@ -329,7 +325,6 @@ def make_gaussian_optimizer_class(cls): num_microbatches: Number of microbatches into which each minibatch is split. If `None`, will default to the size of the minibatch, and per-example gradients will be computed. - ledger: Defaults to `None`. An instance of `tf_privacy.PrivacyLedger`. unroll_microbatches: If true, processes microbatches within a Python loop instead of a `tf.while_loop`. Can be used if using a `tf.while_loop` raises an exception. @@ -344,16 +339,9 @@ def make_gaussian_optimizer_class(cls): dp_sum_query = gaussian_query.GaussianSumQuery( l2_norm_clip, l2_norm_clip * noise_multiplier) - if ledger: - dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, - ledger=ledger) - - super(DPGaussianOptimizerClass, self).__init__( - dp_sum_query, - num_microbatches, - unroll_microbatches, - *args, - **kwargs) + super(DPGaussianOptimizerClass, + self).__init__(dp_sum_query, num_microbatches, unroll_microbatches, + *args, **kwargs) def get_config(self): """Creates configuration for Keras serialization. @@ -370,7 +358,8 @@ def make_gaussian_optimizer_class(cls): config.update({ 'l2_norm_clip': self._l2_norm_clip, 'noise_multiplier': self._noise_multiplier, - 'num_microbatches': self._num_microbatches}) + 'num_microbatches': self._num_microbatches + }) return config @@ -380,6 +369,7 @@ def make_gaussian_optimizer_class(cls): return DPGaussianOptimizerClass + AdagradOptimizer = tf.train.AdagradOptimizer AdamOptimizer = tf.train.AdamOptimizer GradientDescentOptimizer = tf.train.GradientDescentOptimizer diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_eager_test.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_eager_test.py index f64e6e3..7b4ccd1 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer_eager_test.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_eager_test.py @@ -22,7 +22,6 @@ import numpy as np from six.moves import range import tensorflow.compat.v1 as tf -from tensorflow_privacy.privacy.analysis import privacy_ledger from tensorflow_privacy.privacy.dp_query import gaussian_query from tensorflow_privacy.privacy.optimizers import dp_optimizer @@ -56,13 +55,9 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase): data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) - dp_sum_query = privacy_ledger.QueryWithLedger( - dp_sum_query, 1e6, num_microbatches / 1e6) opt = cls( - dp_sum_query, - num_microbatches=num_microbatches, - learning_rate=2.0) + dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values @@ -85,7 +80,6 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase): data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) - dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) @@ -109,7 +103,6 @@ class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase): data0 = tf.Variable([[0.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0) - dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py index 5876b75..5d30085 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py @@ -24,7 +24,6 @@ import numpy as np from six.moves import range import tensorflow.compat.v1 as tf -from tensorflow_privacy.privacy.analysis import privacy_ledger from tensorflow_privacy.privacy.dp_query import gaussian_query from tensorflow_privacy.privacy.optimizers import dp_optimizer @@ -51,9 +50,8 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): ('DPAdam 2', dp_optimizer.DPAdamOptimizer, 2, [-2.5, -2.5]), ('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5]), ('DPRMSPropOptimizer 1', dp_optimizer.DPRMSPropOptimizer, 1, - [-2.5, -2.5]), - ('DPRMSPropOptimizer 2', dp_optimizer.DPRMSPropOptimizer, 2, - [-2.5, -2.5]), + [-2.5, -2.5]), ('DPRMSPropOptimizer 2', dp_optimizer.DPRMSPropOptimizer, + 2, [-2.5, -2.5]), ('DPRMSPropOptimizer 4', dp_optimizer.DPRMSPropOptimizer, 4, [-2.5, -2.5]) ) def testBaseline(self, cls, num_microbatches, expected_answer): @@ -62,13 +60,9 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) - dp_sum_query = privacy_ledger.QueryWithLedger( - dp_sum_query, 1e6, num_microbatches / 1e6) opt = cls( - dp_sum_query, - num_microbatches=num_microbatches, - learning_rate=2.0) + dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values @@ -91,7 +85,6 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) - dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) @@ -115,7 +108,6 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): data0 = tf.Variable([[0.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0) - dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) @@ -157,11 +149,8 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): vector_loss = tf.math.squared_difference(labels, preds) scalar_loss = tf.reduce_mean(input_tensor=vector_loss) dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) - dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) optimizer = dp_optimizer.DPGradientDescentOptimizer( - dp_sum_query, - num_microbatches=1, - learning_rate=1.0) + dp_sum_query, num_microbatches=1, learning_rate=1.0) global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss=vector_loss, global_step=global_step) return tf.estimator.EstimatorSpec( @@ -201,8 +190,6 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): num_microbatches = 4 dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) - dp_sum_query = privacy_ledger.QueryWithLedger( - dp_sum_query, 1e6, num_microbatches / 1e6) opt = cls( dp_sum_query, @@ -283,8 +270,6 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): extra_variable = tf.Variable('foo', trainable=True, dtype=tf.string) dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) - dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, - num_microbatches / 1e6) opt = cls( dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0) @@ -298,27 +283,26 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): sess.run(minimize_op) def _testWriteOutAndReload(self, optimizer_cls): - optimizer = optimizer_cls(l2_norm_clip=1.0, - noise_multiplier=0.01, - num_microbatches=1) + optimizer = optimizer_cls( + l2_norm_clip=1.0, noise_multiplier=0.01, num_microbatches=1) test_dir = self.get_temp_dir() model_path = os.path.join(test_dir, 'model') - model = tf.keras.Sequential([tf.keras.layers.InputLayer(input_shape=(1, 1)), - tf.keras.layers.Dense(units=1, - activation='softmax')]) - model.compile(optimizer=optimizer, - loss=tf.keras.losses.SparseCategoricalCrossentropy( - from_logits=True)) + model = tf.keras.Sequential([ + tf.keras.layers.InputLayer(input_shape=(1, 1)), + tf.keras.layers.Dense(units=1, activation='softmax') + ]) + model.compile( + optimizer=optimizer, + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)) - tf.keras.models.save_model(model, filepath=model_path, - include_optimizer=True) + tf.keras.models.save_model( + model, filepath=model_path, include_optimizer=True) optimizer_cls_str = optimizer_cls.__name__ - tf.keras.models.load_model(model_path, - custom_objects={ - optimizer_cls_str: optimizer_cls}) + tf.keras.models.load_model( + model_path, custom_objects={optimizer_cls_str: optimizer_cls}) return diff --git a/tutorials/lm_dpsgd_tutorial.py b/tutorials/lm_dpsgd_tutorial.py index 8f72ff6..e1e2765 100644 --- a/tutorials/lm_dpsgd_tutorial.py +++ b/tutorials/lm_dpsgd_tutorial.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Training a language model (recurrent neural network) with DP-SGD optimizer. This tutorial uses a corpus of text from TensorFlow datasets unless a @@ -44,7 +43,6 @@ import numpy as np import tensorflow.compat.v1 as tf import tensorflow_datasets as tfds -from tensorflow_privacy.privacy.analysis import privacy_ledger from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent from tensorflow_privacy.privacy.optimizers import dp_optimizer @@ -92,27 +90,20 @@ def rnn_model_fn(features, labels, mode): # pylint: disable=unused-argument if mode == tf.estimator.ModeKeys.TRAIN: if FLAGS.dpsgd: - ledger = privacy_ledger.PrivacyLedger( - population_size=NB_TRAIN, - selection_probability=(FLAGS.batch_size / NB_TRAIN)) - optimizer = dp_optimizer.DPAdamGaussianOptimizer( l2_norm_clip=FLAGS.l2_norm_clip, noise_multiplier=FLAGS.noise_multiplier, num_microbatches=FLAGS.microbatches, - ledger=ledger, learning_rate=FLAGS.learning_rate, unroll_microbatches=True) opt_loss = vector_loss else: - optimizer = tf.train.AdamOptimizer( - learning_rate=FLAGS.learning_rate) + optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) opt_loss = scalar_loss global_step = tf.train.get_global_step() train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) - return tf.estimator.EstimatorSpec(mode=mode, - loss=scalar_loss, - train_op=train_op) + return tf.estimator.EstimatorSpec( + mode=mode, loss=scalar_loss, train_op=train_op) # Add evaluation metrics (for EVAL mode). elif mode == tf.estimator.ModeKeys.EVAL: @@ -122,9 +113,8 @@ def rnn_model_fn(features, labels, mode): # pylint: disable=unused-argument labels=tf.cast(x[:, 1:], dtype=tf.int32), predictions=tf.argmax(input=logits, axis=2)) } - return tf.estimator.EstimatorSpec(mode=mode, - loss=scalar_loss, - eval_metric_ops=eval_metric_ops) + return tf.estimator.EstimatorSpec( + mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops) def load_data(): @@ -132,13 +122,13 @@ def load_data(): if not FLAGS.data_dir: print('FLAGS.data_dir containing train.txt and test.txt was not specified, ' 'using a substitute dataset from the tensorflow_datasets module.') - train_dataset = tfds.load(name='lm1b/subwords8k', - split=tfds.Split.TRAIN, - batch_size=NB_TRAIN, - shuffle_files=True) - test_dataset = tfds.load(name='lm1b/subwords8k', - split=tfds.Split.TEST, - batch_size=10000) + train_dataset = tfds.load( + name='lm1b/subwords8k', + split=tfds.Split.TRAIN, + batch_size=NB_TRAIN, + shuffle_files=True) + test_dataset = tfds.load( + name='lm1b/subwords8k', split=tfds.Split.TEST, batch_size=10000) train_data = next(iter(tfds.as_numpy(train_dataset))) test_data = next(iter(tfds.as_numpy(test_dataset))) train_data = train_data['text'].flatten() @@ -162,10 +152,11 @@ def compute_epsilon(steps): return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = FLAGS.batch_size / NB_TRAIN - rdp = compute_rdp(q=sampling_probability, - noise_multiplier=FLAGS.noise_multiplier, - steps=steps, - orders=orders) + rdp = compute_rdp( + q=sampling_probability, + noise_multiplier=FLAGS.noise_multiplier, + steps=steps, + orders=orders) # Delta is set to 1e-5 because Penn TreeBank has 60000 training points. return get_privacy_spent(orders, rdp, target_delta=1e-5)[0] @@ -180,9 +171,8 @@ def main(unused_argv): # Instantiate the tf.Estimator. conf = tf.estimator.RunConfig(save_summary_steps=1000) - lm_classifier = tf.estimator.Estimator(model_fn=rnn_model_fn, - model_dir=FLAGS.model_dir, - config=conf) + lm_classifier = tf.estimator.Estimator( + model_fn=rnn_model_fn, model_dir=FLAGS.model_dir, config=conf) # Create tf.Estimator input functions for the training and test data. batch_len = FLAGS.batch_size * SEQ_LEN @@ -221,5 +211,6 @@ def main(unused_argv): else: print('Trained with vanilla non-private SGD optimizer') + if __name__ == '__main__': app.run(main) From 48e4836a36edb2cd653cfd9058108f67668703a7 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Thu, 26 Aug 2021 10:58:24 -0700 Subject: [PATCH 34/71] Remove deleted compute_rdp_from_ledger from public symbols. PiperOrigin-RevId: 393161824 --- tensorflow_privacy/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index 815cfe3..f855eba 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -33,7 +33,6 @@ else: from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogenous_rdp from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp - from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_from_ledger from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent # DPQuery classes From 07c248d86800938a6591fee85a0d852a93f77c11 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Fri, 27 Aug 2021 17:33:11 -0700 Subject: [PATCH 35/71] Adds NeighboringRelation to Accountant and clarifies FixedBatchSample events to be with or without replacement. PiperOrigin-RevId: 393459878 --- .../privacy/analysis/dp_event.py | 12 ++++++++++-- .../privacy/analysis/privacy_accountant.py | 19 ++++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/tensorflow_privacy/privacy/analysis/dp_event.py b/tensorflow_privacy/privacy/analysis/dp_event.py index da027e9..ac45a64 100644 --- a/tensorflow_privacy/privacy/analysis/dp_event.py +++ b/tensorflow_privacy/privacy/analysis/dp_event.py @@ -85,8 +85,16 @@ class PoissonSampledDpEvent(DpEvent): @attr.s(frozen=True, slots=True, auto_attribs=True) -class EqualBatchSampledDpEvent(DpEvent): - """An application of sampling exactly `batch_size` records.""" +class FixedBatchSampledWrDpEvent(DpEvent): + """Sampling exactly `batch_size` records with replacement.""" + dataset_size: int + batch_size: int + event: DpEvent + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class FixedBatchSampledWorDpEvent(DpEvent): + """Sampling exactly `batch_size` records without replacement.""" dataset_size: int batch_size: int event: DpEvent diff --git a/tensorflow_privacy/privacy/analysis/privacy_accountant.py b/tensorflow_privacy/privacy/analysis/privacy_accountant.py index 2f1265f..9235156 100644 --- a/tensorflow_privacy/privacy/analysis/privacy_accountant.py +++ b/tensorflow_privacy/privacy/analysis/privacy_accountant.py @@ -14,17 +14,34 @@ """PrivacyAccountant abstract base class.""" import abc +import enum from tensorflow_privacy.privacy.dp_event import dp_event from tensorflow_privacy.privacy.dp_event import dp_event_builder +class NeighboringRelation(enum.Enum): + ADD_OR_REMOVE_ONE = 1 + REPLACE_ONE = 2 + + class PrivacyAccountant(metaclass=abc.ABCMeta): """Abstract base class for privacy accountants.""" - def __init__(self): + def __init__(self, neighboring_relation: NeighboringRelation): + self._neighboring_relation = neighboring_relation self._ledger = dp_event_builder.DpEventBuilder() + @property + def neighboring_relation(self) -> NeighboringRelation: + """The neighboring relation used by the accountant. + + The neighboring relation is expected to remain constant after + initialization. Subclasses should not override this property or change the + value of the private attribute. + """ + return self._neighboring_relation + @abc.abstractmethod def is_supported(self, event: dp_event.DpEvent) -> bool: """Checks whether the `DpEvent` can be processed by this accountant. From 5edea5863cd7e28d7ac85222c695bab1dc3caea1 Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Mon, 30 Aug 2021 14:17:04 -0700 Subject: [PATCH 36/71] Add STDDEV to the state of random noise generator, which will be used to enable adaptive clipping norm in tree aggregation queries. PiperOrigin-RevId: 393851743 --- .../privacy/dp_query/tree_aggregation.py | 56 +++++++++++-------- .../privacy/dp_query/tree_aggregation_test.py | 41 ++++++++++++-- 2 files changed, 70 insertions(+), 27 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation.py index c0a02fc..e4cc35f 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation.py @@ -21,8 +21,8 @@ module implements the core logic of tree aggregation in Tensorflow, which serves as helper functions for `tree_aggregation_query`. This module and helper functions are publicly accessible. """ - import abc +import collections from typing import Any, Callable, Collection, Optional, Tuple, Union import attr @@ -70,6 +70,9 @@ class GaussianNoiseGenerator(ValueGenerator): nested structure of `tf.TensorSpec`s. """ + # pylint: disable=invalid-name + _GlobalState = collections.namedtuple('_GlobalState', ['seeds', 'stddev']) + def __init__(self, noise_std: float, specs: Collection[tf.TensorSpec], @@ -83,48 +86,57 @@ class GaussianNoiseGenerator(ValueGenerator): seed: An optional integer seed. If None, generator is seeded from the clock. """ - self.noise_std = noise_std - self.specs = specs - self.seed = seed + self._noise_std = noise_std + self._specs = specs + self._seed = seed def initialize(self): """Makes an initial state for the GaussianNoiseGenerator. Returns: - An initial state. + A named tuple of (seeds, stddev). """ - if self.seed is None: + if self._seed is None: time_now = tf.timestamp() residual = time_now - tf.math.floor(time_now) - return tf.cast( - tf.stack([ - tf.math.floor(tf.timestamp() * 1e6), - tf.math.floor(residual * 1e9) - ]), - dtype=tf.int64) + return self._GlobalState( + tf.cast( + tf.stack([ + tf.math.floor(tf.timestamp() * 1e6), + tf.math.floor(residual * 1e9) + ]), + dtype=tf.int64), tf.constant(self._noise_std, dtype=tf.float32)) else: - return tf.constant(self.seed, dtype=tf.int64, shape=(2,)) + return self._GlobalState( + tf.constant(self._seed, dtype=tf.int64, shape=(2,)), + tf.constant(self._noise_std, dtype=tf.float32)) def next(self, state): """Gets next value and advances the GaussianNoiseGenerator. Args: - state: The current state. + state: The current state (seed, noise_std). Returns: - A pair (sample, new_state) where sample is a new sample and new_state - is the advanced state. + A tuple of (sample, new_state) where sample is a new sample and new_state + is the advanced state (seed+1, noise_std). """ - flat_structure = tf.nest.flatten(self.specs) - flat_seeds = [state + i for i in range(len(flat_structure))] - nest_seeds = tf.nest.pack_sequence_as(self.specs, flat_seeds) + flat_structure = tf.nest.flatten(self._specs) + flat_seeds = [state.seeds + i for i in range(len(flat_structure))] + nest_seeds = tf.nest.pack_sequence_as(self._specs, flat_seeds) def _get_noise(spec, seed): return tf.random.stateless_normal( - shape=spec.shape, seed=seed, stddev=self.noise_std) + shape=spec.shape, seed=seed, stddev=state.stddev) - nest_noise = tf.nest.map_structure(_get_noise, self.specs, nest_seeds) - return nest_noise, flat_seeds[-1] + 1 + nest_noise = tf.nest.map_structure(_get_noise, self._specs, nest_seeds) + return nest_noise, self._GlobalState(flat_seeds[-1] + 1, state.stddev) + + def make_state(self, seeds: tf.Tensor, stddev: tf.Tensor): + """Returns a new named tuple of (seeds, stddev).""" + seeds = tf.ensure_shape(seeds, shape=(2,)) + return self._GlobalState( + tf.cast(seeds, dtype=tf.int64), tf.cast(stddev, dtype=tf.float32)) class StatelessValueGenerator(ValueGenerator): diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py index fc5e6cc..47be880 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py @@ -13,6 +13,7 @@ # limitations under the License. """Tests for `tree_aggregation`.""" import math +import random from absl.testing import parameterized import tensorflow as tf @@ -297,7 +298,11 @@ class EfficientTreeAggregatorTest(tf.test.TestCase, parameterized.TestCase): tf.nest.map_structure(self.assertAllClose, val, expected_result) -class GaussianNoiseGeneratorTest(tf.test.TestCase): +class GaussianNoiseGeneratorTest(tf.test.TestCase, parameterized.TestCase): + + def assertStateEqual(self, state1, state2): + for s1, s2 in zip(tf.nest.flatten(state1), tf.nest.flatten(state2)): + self.assertAllEqual(s1, s2) def test_random_generator_tf(self, noise_mean=1.0, @@ -330,12 +335,12 @@ class GaussianNoiseGeneratorTest(tf.test.TestCase): g2 = tree_aggregation.GaussianNoiseGenerator( noise_std=noise_std, specs=tf.TensorSpec([]), seed=seed) gstate2 = g.initialize() - self.assertAllEqual(gstate, gstate2) + self.assertStateEqual(gstate, gstate2) for _ in range(steps): value, gstate = g.next(gstate) value2, gstate2 = g2.next(gstate2) self.assertAllEqual(value, value2) - self.assertAllEqual(gstate, gstate2) + self.assertStateEqual(gstate, gstate2) def test_seed_state_nondeterministic(self, steps=32, noise_std=0.1): g = tree_aggregation.GaussianNoiseGenerator( @@ -344,11 +349,12 @@ class GaussianNoiseGeneratorTest(tf.test.TestCase): g2 = tree_aggregation.GaussianNoiseGenerator( noise_std=noise_std, specs=tf.TensorSpec([])) gstate2 = g2.initialize() + self.assertNotAllEqual(gstate.seeds, gstate2.seeds) for _ in range(steps): value, gstate = g.next(gstate) value2, gstate2 = g2.next(gstate2) self.assertNotAllEqual(value, value2) - self.assertNotAllEqual(gstate, gstate2) + self.assertNotAllEqual(gstate.seeds, gstate2.seeds) def test_seed_state_structure(self, seed=1, steps=32, noise_std=0.1): specs = [tf.TensorSpec([]), tf.TensorSpec([1]), tf.TensorSpec([2, 2])] @@ -358,11 +364,36 @@ class GaussianNoiseGeneratorTest(tf.test.TestCase): g2 = tree_aggregation.GaussianNoiseGenerator( noise_std=noise_std, specs=specs, seed=seed) gstate2 = g2.initialize() + self.assertStateEqual(gstate, gstate2) for _ in range(steps): value, gstate = g.next(gstate) value2, gstate2 = g2.next(gstate2) self.assertAllClose(value, value2) - self.assertAllEqual(gstate, gstate2) + self.assertStateEqual(gstate, gstate2) + + @parameterized.named_parameters( + ('increase', range(10), 1), + ('decrease', range(30, 20, -2), 2), + ('flat', [3.0] * 5, 1), + ('small', [0.1**x for x in range(4)], 4), + ('random', [random.uniform(1, 10) for _ in range(5)], 4), + ) + def test_adaptive_stddev(self, stddev_list, reset_frequency): + # The stddev estimation follows a chi distribution. The confidence for + # `sample_num` samples should be high, and we use a relatively large + # tolerance to guard the numerical stability for small stddev values. + sample_num, tolerance = 10000, 0.05 + g = tree_aggregation.GaussianNoiseGenerator( + noise_std=1., specs=tf.TensorSpec([sample_num]), seed=2021) + gstate = g.initialize() + for stddev in stddev_list: + gstate = g.make_state(gstate.seeds, tf.constant(stddev, dtype=tf.float32)) + for _ in range(reset_frequency): + prev_gstate = gstate + value, gstate = g.next(gstate) + print(tf.math.reduce_std(value), stddev) + self.assertAllClose(tf.math.reduce_std(value), stddev, rtol=tolerance) + self.assertNotAllEqual(gstate.seeds, prev_gstate.seeds) class RestartIndicatorTest(tf.test.TestCase, parameterized.TestCase): From 54f1887b10fdbe518be1dc5d164cbda979d904d7 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 30 Aug 2021 15:27:51 -0700 Subject: [PATCH 37/71] Use `from tensorflow_privacy import v1` so v1 is visible to the api-generator. PiperOrigin-RevId: 393866702 --- tensorflow_privacy/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index f855eba..84839e4 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -27,7 +27,7 @@ if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts. pass else: # TensorFlow v1 imports - import tensorflow_privacy.v1 + from tensorflow_privacy import v1 # Analysis from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy From 3059fbae67498cda61f06e3a2605eb7364b47d26 Mon Sep 17 00:00:00 2001 From: Steve Chien Date: Mon, 30 Aug 2021 16:45:40 -0700 Subject: [PATCH 38/71] Fix heterogeneous spellings of "heterogeneous". PiperOrigin-RevId: 393881235 --- tensorflow_privacy/__init__.py | 2 +- tensorflow_privacy/privacy/analysis/rdp_accountant.py | 4 ++-- tensorflow_privacy/privacy/analysis/rdp_accountant_test.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index 84839e4..43426b6 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -31,7 +31,7 @@ else: # Analysis from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy - from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogenous_rdp + from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogeneous_rdp from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_accountant.py index ef4dcba..c872318 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant.py @@ -540,8 +540,8 @@ def _compute_rdp_sample_without_replacement_int(q, sigma, alpha): return log_a -def compute_heterogenous_rdp(sampling_probabilities, noise_multipliers, - steps_list, orders): +def compute_heterogeneous_rdp(sampling_probabilities, noise_multipliers, + steps_list, orders): """Computes RDP of Heteregoneous Applications of Sampled Gaussian Mechanisms. Args: diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py index 5c0353e..3a5529f 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py @@ -86,9 +86,9 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase): steps_list = [1, 1] orders = 20 self.assertEqual( - rdp_accountant.compute_heterogenous_rdp(sampling_probabilities, - noise_multipliers, steps_list, - orders), 0.1) + rdp_accountant.compute_heterogeneous_rdp(sampling_probabilities, + noise_multipliers, steps_list, + orders), 0.1) def test_compute_rdp_no_data(self): # q = 0 From 789a05df63db4a2f31bd2821aefb954ab7fa45f9 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Tue, 31 Aug 2021 08:41:23 -0700 Subject: [PATCH 39/71] Add DpEvent to TFP public symbols. PiperOrigin-RevId: 394010603 --- tensorflow_privacy/__init__.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index 43426b6..a2e25d1 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -29,6 +29,20 @@ else: # TensorFlow v1 imports from tensorflow_privacy import v1 + # DpEvents + from tensorflow_privacy.privacy.analysis.dp_event import DpEvent + from tensorflow_privacy.privacy.analysis.dp_event import NoOpDpEvent + from tensorflow_privacy.privacy.analysis.dp_event import NonPrivateDpEvent + from tensorflow_privacy.privacy.analysis.dp_event import UnsupportedDpEvent + from tensorflow_privacy.privacy.analysis.dp_event import GaussianDpEvent + from tensorflow_privacy.privacy.analysis.dp_event import SelfComposedDpEvent + from tensorflow_privacy.privacy.analysis.dp_event import ComposedDpEvent + from tensorflow_privacy.privacy.analysis.dp_event import PoissonSampledDpEvent + from tensorflow_privacy.privacy.analysis.dp_event import FixedBatchSampledWrDpEvent + from tensorflow_privacy.privacy.analysis.dp_event import FixedBatchSampledWorDpEvent + from tensorflow_privacy.privacy.analysis.dp_event import ShuffledDatasetDpEvent + from tensorflow_privacy.privacy.analysis.dp_event import TreeAggregationDpEvent + # Analysis from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogeneous_rdp From 6ac4bc8d010ac11f2d0c8c91a4e8b70cb99dea6f Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Tue, 31 Aug 2021 16:05:57 -0700 Subject: [PATCH 40/71] Define `RestartQuery` for easy composition to restart tree in tree aggregation queries. PiperOrigin-RevId: 394106175 --- tensorflow_privacy/__init__.py | 2 + .../privacy/dp_query/restart_query.py | 148 ++++++++++++++++++ .../privacy/dp_query/restart_query_test.py | 126 +++++++++++++++ .../privacy/dp_query/tree_aggregation.py | 72 --------- .../dp_query/tree_aggregation_query.py | 116 +++++++------- .../dp_query/tree_aggregation_query_test.py | 14 +- .../privacy/dp_query/tree_aggregation_test.py | 21 --- 7 files changed, 336 insertions(+), 163 deletions(-) create mode 100644 tensorflow_privacy/privacy/dp_query/restart_query.py create mode 100644 tensorflow_privacy/privacy/dp_query/restart_query_test.py diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index a2e25d1..29b64ff 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -62,7 +62,9 @@ else: from tensorflow_privacy.privacy.dp_query.quantile_estimator_query import QuantileEstimatorQuery from tensorflow_privacy.privacy.dp_query.quantile_estimator_query import NoPrivacyQuantileEstimatorQuery from tensorflow_privacy.privacy.dp_query.quantile_adaptive_clip_sum_query import QuantileAdaptiveClipSumQuery + from tensorflow_privacy.privacy.dp_query import restart_query from tensorflow_privacy.privacy.dp_query import tree_aggregation + from tensorflow_privacy.privacy.dp_query.restart_query import RestartQuery from tensorflow_privacy.privacy.dp_query.tree_aggregation_query import TreeCumulativeSumQuery from tensorflow_privacy.privacy.dp_query.tree_aggregation_query import TreeResidualSumQuery from tensorflow_privacy.privacy.dp_query.tree_range_query import TreeRangeSumQuery diff --git a/tensorflow_privacy/privacy/dp_query/restart_query.py b/tensorflow_privacy/privacy/dp_query/restart_query.py new file mode 100644 index 0000000..b3994cc --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/restart_query.py @@ -0,0 +1,148 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Implements DPQuery interface for restarting the states of another query. + +This query is used to compose with a DPQuery that has `reset_state` function. +""" +import abc +import collections + +import tensorflow as tf + +from tensorflow_privacy.privacy.dp_query import dp_query + + +class RestartIndicator(metaclass=abc.ABCMeta): + """Base class establishing interface for restarting the tree state. + + A `RestartIndicator` maintains a state, and each time `next` is called, a bool + value is generated to indicate whether to restart, and the indicator state is + advanced. + """ + + @abc.abstractmethod + def initialize(self): + """Makes an initialized state for `RestartIndicator`. + + Returns: + An initial state. + """ + raise NotImplementedError + + @abc.abstractmethod + def next(self, state): + """Gets next bool indicator and advances the `RestartIndicator` state. + + Args: + state: The current state. + + Returns: + A pair (value, new_state) where value is bool indicator and new_state + is the advanced state. + """ + raise NotImplementedError + + +class PeriodicRoundRestartIndicator(RestartIndicator): + """Indicator for resetting the tree state after every a few number of queries. + + The indicator will maintain an internal counter as state. + """ + + def __init__(self, frequency: int): + """Construct the `PeriodicRoundRestartIndicator`. + + Args: + frequency: The `next` function will return `True` every `frequency` number + of `next` calls. + """ + if frequency < 1: + raise ValueError('Restart frequency should be equal or larger than 1 ' + f'got {frequency}') + self.frequency = tf.constant(frequency, tf.int32) + + def initialize(self): + """Returns initialized state of 0 for `PeriodicRoundRestartIndicator`.""" + return tf.constant(0, tf.int32) + + def next(self, state): + """Gets next bool indicator and advances the state. + + Args: + state: The current state. + + Returns: + A pair (value, new_state) where value is the bool indicator and new_state + of `state+1`. + """ + state = state + tf.constant(1, tf.int32) + flag = state % self.frequency == 0 + return flag, state + + +class RestartQuery(dp_query.SumAggregationDPQuery): + """`DPQuery` for `SumAggregationDPQuery` with a `reset_state` function.""" + + # pylint: disable=invalid-name + _GlobalState = collections.namedtuple( + '_GlobalState', ['inner_query_state', 'indicator_state']) + + def __init__(self, inner_query: dp_query.SumAggregationDPQuery, + restart_indicator: RestartIndicator): + """Initializes `RestartQuery`. + + Args: + inner_query: A `SumAggregationDPQuery` has `reset_state` attribute. + restart_indicator: A `RestartIndicator` to generate the boolean indicator + for resetting the state. + """ + if not hasattr(inner_query, 'reset_state'): + raise ValueError(f'{type(inner_query)} must define `reset_state` to be ' + 'composed with `RestartQuery`.') + self._inner_query = inner_query + self._restart_indicator = restart_indicator + + def initial_global_state(self): + """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" + return self._GlobalState( + inner_query_state=self._inner_query.initial_global_state(), + indicator_state=self._restart_indicator.initialize()) + + def derive_sample_params(self, global_state): + """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" + return self._inner_query.derive_sample_params( + global_state.inner_query_state) + + def initial_sample_state(self, template): + """Implements `tensorflow_privacy.DPQuery.initial_sample_state`.""" + return self._inner_query.initial_sample_state(template) + + def preprocess_record(self, params, record): + """Implements `tensorflow_privacy.DPQuery.preprocess_record`.""" + return self._inner_query.preprocess_record(params, record) + + def get_noised_result(self, sample_state, global_state): + """Implements `tensorflow_privacy.DPQuery.get_noised_result`.""" + noised_results, inner_query_state = self._inner_query.get_noised_result( + sample_state, global_state.inner_query_state) + restart_flag, indicator_state = self._restart_indicator.next( + global_state.indicator_state) + if restart_flag: + inner_query_state = self._inner_query.reset_state(noised_results, + inner_query_state) + return noised_results, self._GlobalState(inner_query_state, indicator_state) + + def derive_metrics(self, global_state): + """Implements `tensorflow_privacy.DPQuery.derive_metrics`.""" + return self._inner_query.derive_metrics(global_state.inner_query_state) diff --git a/tensorflow_privacy/privacy/dp_query/restart_query_test.py b/tensorflow_privacy/privacy/dp_query/restart_query_test.py new file mode 100644 index 0000000..ef57a2b --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/restart_query_test.py @@ -0,0 +1,126 @@ +# Copyright 2021, Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for `restart_query`.""" +from absl.testing import parameterized + +import tensorflow as tf +from tensorflow_privacy.privacy.dp_query import restart_query +from tensorflow_privacy.privacy.dp_query import tree_aggregation_query + + +class RestartIndicatorTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters(('zero', 0), ('negative', -1)) + def test_round_raise(self, frequency): + with self.assertRaisesRegex( + ValueError, 'Restart frequency should be equal or larger than 1'): + restart_query.PeriodicRoundRestartIndicator(frequency) + + @parameterized.named_parameters(('f1', 1), ('f2', 2), ('f4', 4), ('f5', 5)) + def test_round_indicator(self, frequency): + total_steps = 20 + indicator = restart_query.PeriodicRoundRestartIndicator(frequency) + state = indicator.initialize() + for i in range(total_steps): + flag, state = indicator.next(state) + if i % frequency == frequency - 1: + self.assertTrue(flag) + else: + self.assertFalse(flag) + + +def _get_l2_clip_fn(): + + def l2_clip_fn(record_as_list, clip_value): + clipped_record, _ = tf.clip_by_global_norm(record_as_list, clip_value) + return clipped_record + + return l2_clip_fn + + +class RestartQueryTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + ('s0t1f1', 0., 1., 1), + ('s0t1f2', 0., 1., 2), + ('s0t1f5', 0., 1., 5), + ('s1t1f5', 1., 1., 5), + ('s1t2f2', 1., 2., 2), + ('s1t5f6', 1., 5., 6), + ) + def test_sum_scalar_tree_aggregation_reset(self, scalar_value, + tree_node_value, frequency): + total_steps = 20 + indicator = restart_query.PeriodicRoundRestartIndicator(frequency) + query = tree_aggregation_query.TreeCumulativeSumQuery( + clip_fn=_get_l2_clip_fn(), + clip_value=scalar_value + 1., # no clip + noise_generator=lambda: tree_node_value, + record_specs=tf.TensorSpec([]), + use_efficient=False) + query = restart_query.RestartQuery(query, indicator) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + for i in range(total_steps): + sample_state = query.initial_sample_state(scalar_value) + sample_state = query.accumulate_record(params, sample_state, scalar_value) + query_result, global_state = query.get_noised_result( + sample_state, global_state) + # Expected value is the combination of cumsum of signal; sum of trees + # that have been reset; current tree sum. The tree aggregation value can + # be inferred from the binary representation of the current step. + expected = ( + scalar_value * (i + 1) + + i // frequency * tree_node_value * bin(frequency)[2:].count('1') + + tree_node_value * bin(i % frequency + 1)[2:].count('1')) + self.assertEqual(query_result, expected) + + @parameterized.named_parameters( + ('s0t1f1', 0., 1., 1), + ('s0t1f2', 0., 1., 2), + ('s0t1f5', 0., 1., 5), + ('s1t1f5', 1., 1., 5), + ('s1t2f2', 1., 2., 2), + ('s1t5f6', 1., 5., 6), + ) + def test_scalar_tree_aggregation_reset(self, scalar_value, tree_node_value, + frequency): + total_steps = 20 + indicator = restart_query.PeriodicRoundRestartIndicator(frequency) + query = tree_aggregation_query.TreeResidualSumQuery( + clip_fn=_get_l2_clip_fn(), + clip_value=scalar_value + 1., # no clip + noise_generator=lambda: tree_node_value, + record_specs=tf.TensorSpec([]), + use_efficient=False) + query = restart_query.RestartQuery(query, indicator) + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + for i in range(total_steps): + sample_state = query.initial_sample_state(scalar_value) + sample_state = query.accumulate_record(params, sample_state, scalar_value) + query_result, global_state = query.get_noised_result( + sample_state, global_state) + # Expected value is the signal of the current round plus the residual of + # two continous tree aggregation values. The tree aggregation value can + # be inferred from the binary representation of the current step. + expected = scalar_value + tree_node_value * ( + bin(i % frequency + 1)[2:].count('1') - + bin(i % frequency)[2:].count('1')) + print(i, query_result, expected) + self.assertEqual(query_result, expected) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation.py index e4cc35f..0842975 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation.py @@ -171,78 +171,6 @@ class StatelessValueGenerator(ValueGenerator): return self.value_fn(), state -# TODO(b/192464750): define `RestartQuery` and move `RestartIndicator` to be -# in the same module. - - -class RestartIndicator(metaclass=abc.ABCMeta): - """Base class establishing interface for restarting the tree state. - - A `RestartIndicator` maintains a state, and each time `next` is called, a bool - value is generated to indicate whether to restart, and the indicator state is - advanced. - """ - - @abc.abstractmethod - def initialize(self): - """Makes an initialized state for `RestartIndicator`. - - Returns: - An initial state. - """ - raise NotImplementedError - - @abc.abstractmethod - def next(self, state): - """Gets next bool indicator and advances the `RestartIndicator` state. - - Args: - state: The current state. - - Returns: - A pair (value, new_state) where value is bool indicator and new_state - is the advanced state. - """ - raise NotImplementedError - - -class PeriodicRoundRestartIndicator(RestartIndicator): - """Indicator for resetting the tree state after every a few number of queries. - - The indicator will maintain an internal counter as state. - """ - - def __init__(self, frequency: int): - """Construct the `PeriodicRoundRestartIndicator`. - - Args: - frequency: The `next` function will return `True` every `frequency` number - of `next` calls. - """ - if frequency < 1: - raise ValueError('Restart frequency should be equal or larger than 1 ' - f'got {frequency}') - self.frequency = tf.constant(frequency, tf.int32) - - def initialize(self): - """Returns initialized state of 0 for `PeriodicRoundRestartIndicator`.""" - return tf.constant(0, tf.int32) - - def next(self, state): - """Gets next bool indicator and advances the state. - - Args: - state: The current state. - - Returns: - A pair (value, new_state) where value is the bool indicator and new_state - of `state+1`. - """ - state = state + tf.constant(1, tf.int32) - flag = state % self.frequency == 0 - return flag, state - - @attr.s(eq=False, frozen=True, slots=True) class TreeState(object): """Class defining state of the tree. diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 3120eea..4907585 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -72,8 +72,6 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): node. Noise stdandard deviation is specified outside the `dp_query` by the user when defining `noise_fn` and should have order O(clip_norm*log(T)/eps) to guarantee eps-DP. - restart_indicator: `tree_aggregation.RestartIndicator` to generate the - boolean indicator for resetting the tree state. """ @attr.s(frozen=True) @@ -85,21 +83,17 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): each level state. clip_value: The clipping value to be passed to clip_fn. samples_cumulative_sum: Noiseless cumulative sum of samples over time. - restarter_state: Current state of the restarter to indicate whether - the tree state will be reset. """ tree_state = attr.ib() clip_value = attr.ib() samples_cumulative_sum = attr.ib() - restarter_state = attr.ib() def __init__(self, record_specs, noise_generator, clip_fn, clip_value, - use_efficient=True, - restart_indicator=None): + use_efficient=True): """Initializes the `TreeCumulativeSumQuery`. Consider using `build_l2_gaussian_query` for the construction of a @@ -117,8 +111,6 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): use_efficient: Boolean indicating the usage of the efficient tree aggregation algorithm based on the paper "Efficient Use of Differentially Private Binary Trees". - restart_indicator: `tree_aggregation.RestartIndicator` to generate the - boolean indicator for resetting the tree state. """ self._clip_fn = clip_fn self._clip_value = clip_value @@ -128,21 +120,16 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): noise_generator) else: self._tree_aggregator = tree_aggregation.TreeAggregator(noise_generator) - self._restart_indicator = restart_indicator def initial_global_state(self): """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" initial_tree_state = self._tree_aggregator.init_state() initial_samples_cumulative_sum = tf.nest.map_structure( lambda spec: tf.zeros(spec.shape), self._record_specs) - restarter_state = () - if self._restart_indicator is not None: - restarter_state = self._restart_indicator.initialize() return TreeCumulativeSumQuery.GlobalState( tree_state=initial_tree_state, clip_value=tf.constant(self._clip_value, tf.float32), - samples_cumulative_sum=initial_samples_cumulative_sum, - restarter_state=restarter_state) + samples_cumulative_sum=initial_samples_cumulative_sum) def derive_sample_params(self, global_state): """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" @@ -185,28 +172,41 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): global_state.tree_state) noised_cumulative_sum = tf.nest.map_structure(tf.add, new_cumulative_sum, cumulative_sum_noise) - restarter_state = global_state.restarter_state - if self._restart_indicator is not None: - restart_flag, restarter_state = self._restart_indicator.next( - restarter_state) - if restart_flag: - new_cumulative_sum = noised_cumulative_sum - new_tree_state = self._tree_aggregator.reset_state(new_tree_state) new_global_state = attr.evolve( global_state, samples_cumulative_sum=new_cumulative_sum, - tree_state=new_tree_state, - restarter_state=restarter_state) + tree_state=new_tree_state) return noised_cumulative_sum, new_global_state + def reset_state(self, noised_results, global_state): + """Returns state after resetting the tree. + + This function will be used in `restart_query.RestartQuery` after calling + `get_noised_result` when the restarting condition is met. + + Args: + noised_results: Noised cumulative sum returned by `get_noised_result`. + global_state: Updated global state returned by `get_noised_result`, which + has current sample's cumulative sum and tree state for the next + cumulative sum. + + Returns: + New global state with current noised cumulative sum and restarted tree + state for the next cumulative sum. + """ + new_tree_state = self._tree_aggregator.reset_state(global_state.tree_state) + return attr.evolve( + global_state, + samples_cumulative_sum=noised_results, + tree_state=new_tree_state) + @classmethod def build_l2_gaussian_query(cls, clip_norm, noise_multiplier, record_specs, noise_seed=None, - use_efficient=True, - restart_indicator=None): + use_efficient=True): """Returns a query instance with L2 norm clipping and Gaussian noise. Args: @@ -221,8 +221,6 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): use_efficient: Boolean indicating the usage of the efficient tree aggregation algorithm based on the paper "Efficient Use of Differentially Private Binary Trees". - restart_indicator: `tree_aggregation.RestartIndicator` to generate the - boolean indicator for resetting the tree state. """ if clip_norm <= 0: raise ValueError(f'`clip_norm` must be positive, got {clip_norm}.') @@ -245,8 +243,7 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): clip_value=clip_norm, record_specs=record_specs, noise_generator=gaussian_noise_generator, - use_efficient=use_efficient, - restart_indicator=restart_indicator) + use_efficient=use_efficient) class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): @@ -300,8 +297,6 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): node. Noise stdandard deviation is specified outside the `dp_query` by the user when defining `noise_fn` and should have order O(clip_norm*log(T)/eps) to guarantee eps-DP. - restart_indicator: `tree_aggregation.RestartIndicator` to generate the - boolean indicator for resetting the tree state. """ @attr.s(frozen=True) @@ -314,21 +309,17 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): clip_value: The clipping value to be passed to clip_fn. previous_tree_noise: Cumulative noise by tree aggregation from the previous time the query is called on a sample. - restarter_state: Current state of the restarter to indicate whether - the tree state will be reset. """ tree_state = attr.ib() clip_value = attr.ib() previous_tree_noise = attr.ib() - restarter_state = attr.ib() def __init__(self, record_specs, noise_generator, clip_fn, clip_value, - use_efficient=True, - restart_indicator=None): + use_efficient=True): """Initializes the `TreeCumulativeSumQuery`. Consider using `build_l2_gaussian_query` for the construction of a @@ -346,8 +337,6 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): use_efficient: Boolean indicating the usage of the efficient tree aggregation algorithm based on the paper "Efficient Use of Differentially Private Binary Trees". - restart_indicator: `tree_aggregation.RestartIndicator` to generate the - boolean indicator for resetting the tree state. """ self._clip_fn = clip_fn self._clip_value = clip_value @@ -357,7 +346,6 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): noise_generator) else: self._tree_aggregator = tree_aggregation.TreeAggregator(noise_generator) - self._restart_indicator = restart_indicator def _zero_initial_noise(self): return tf.nest.map_structure(lambda spec: tf.zeros(spec.shape), @@ -366,14 +354,10 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): def initial_global_state(self): """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" initial_tree_state = self._tree_aggregator.init_state() - restarter_state = () - if self._restart_indicator is not None: - restarter_state = self._restart_indicator.initialize() return TreeResidualSumQuery.GlobalState( tree_state=initial_tree_state, clip_value=tf.constant(self._clip_value, tf.float32), - previous_tree_noise=self._zero_initial_noise(), - restarter_state=restarter_state) + previous_tree_noise=self._zero_initial_noise()) def derive_sample_params(self, global_state): """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" @@ -412,28 +396,39 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): noised_sample = tf.nest.map_structure(lambda a, b, c: a + b - c, sample_state, tree_noise, global_state.previous_tree_noise) - restarter_state = global_state.restarter_state - if self._restart_indicator is not None: - restart_flag, restarter_state = self._restart_indicator.next( - restarter_state) - if restart_flag: - tree_noise = self._zero_initial_noise() - new_tree_state = self._tree_aggregator.reset_state(new_tree_state) new_global_state = attr.evolve( - global_state, - previous_tree_noise=tree_noise, - tree_state=new_tree_state, - restarter_state=restarter_state) + global_state, previous_tree_noise=tree_noise, tree_state=new_tree_state) return noised_sample, new_global_state + def reset_state(self, noised_results, global_state): + """Returns state after resetting the tree. + + This function will be used in `restart_query.RestartQuery` after calling + `get_noised_result` when the restarting condition is met. + + Args: + noised_results: Noised cumulative sum returned by `get_noised_result`. + global_state: Updated global state returned by `get_noised_result`, which + records noise for the conceptual cumulative sum of the current leaf + node, and tree state for the next conceptual cumulative sum. + + Returns: + New global state with zero noise and restarted tree state. + """ + del noised_results + new_tree_state = self._tree_aggregator.reset_state(global_state.tree_state) + return attr.evolve( + global_state, + previous_tree_noise=self._zero_initial_noise(), + tree_state=new_tree_state) + @classmethod def build_l2_gaussian_query(cls, clip_norm, noise_multiplier, record_specs, noise_seed=None, - use_efficient=True, - restart_indicator=None): + use_efficient=True): """Returns `TreeResidualSumQuery` with L2 norm clipping and Gaussian noise. Args: @@ -448,8 +443,6 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): use_efficient: Boolean indicating the usage of the efficient tree aggregation algorithm based on the paper "Efficient Use of Differentially Private Binary Trees". - restart_indicator: `tree_aggregation.RestartIndicator` to generate the - boolean indicator for resetting the tree state. """ if clip_norm <= 0: raise ValueError(f'`clip_norm` must be positive, got {clip_norm}.') @@ -472,8 +465,7 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): clip_value=clip_norm, record_specs=record_specs, noise_generator=gaussian_noise_generator, - use_efficient=use_efficient, - restart_indicator=restart_indicator) + use_efficient=use_efficient) # TODO(b/197596864): Remove `TreeRangeSumQuery` from this file after the next diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py index f88ed90..65ab076 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py @@ -303,15 +303,12 @@ class TreeCumulativeSumQueryTest(tf.test.TestCase, parameterized.TestCase): def test_sum_scalar_tree_aggregation_reset(self, scalar_value, tree_node_value, frequency): total_steps = 20 - indicator = tree_aggregation.PeriodicRoundRestartIndicator(frequency) query = tree_aggregation_query.TreeCumulativeSumQuery( clip_fn=_get_l2_clip_fn(), clip_value=scalar_value + 1., # no clip noise_generator=lambda: tree_node_value, record_specs=tf.TensorSpec([]), - use_efficient=False, - restart_indicator=indicator, - ) + use_efficient=False) global_state = query.initial_global_state() params = query.derive_sample_params(global_state) for i in range(total_steps): @@ -319,6 +316,8 @@ class TreeCumulativeSumQueryTest(tf.test.TestCase, parameterized.TestCase): sample_state = query.accumulate_record(params, sample_state, scalar_value) query_result, global_state = query.get_noised_result( sample_state, global_state) + if i % frequency == frequency - 1: + global_state = query.reset_state(query_result, global_state) # Expected value is the combination of cumsum of signal; sum of trees # that have been reset; current tree sum. The tree aggregation value can # be inferred from the binary representation of the current step. @@ -446,15 +445,12 @@ class TreeResidualQueryTest(tf.test.TestCase, parameterized.TestCase): def test_scalar_tree_aggregation_reset(self, scalar_value, tree_node_value, frequency): total_steps = 20 - indicator = tree_aggregation.PeriodicRoundRestartIndicator(frequency) query = tree_aggregation_query.TreeResidualSumQuery( clip_fn=_get_l2_clip_fn(), clip_value=scalar_value + 1., # no clip noise_generator=lambda: tree_node_value, record_specs=tf.TensorSpec([]), - use_efficient=False, - restart_indicator=indicator, - ) + use_efficient=False) global_state = query.initial_global_state() params = query.derive_sample_params(global_state) for i in range(total_steps): @@ -462,6 +458,8 @@ class TreeResidualQueryTest(tf.test.TestCase, parameterized.TestCase): sample_state = query.accumulate_record(params, sample_state, scalar_value) query_result, global_state = query.get_noised_result( sample_state, global_state) + if i % frequency == frequency - 1: + global_state = query.reset_state(query_result, global_state) # Expected value is the signal of the current round plus the residual of # two continous tree aggregation values. The tree aggregation value can # be inferred from the binary representation of the current step. diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py index 47be880..2f6ad82 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_test.py @@ -396,26 +396,5 @@ class GaussianNoiseGeneratorTest(tf.test.TestCase, parameterized.TestCase): self.assertNotAllEqual(gstate.seeds, prev_gstate.seeds) -class RestartIndicatorTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.named_parameters(('zero', 0), ('negative', -1)) - def test_round_raise(self, frequency): - with self.assertRaisesRegex( - ValueError, 'Restart frequency should be equal or larger than 1'): - tree_aggregation.PeriodicRoundRestartIndicator(frequency) - - @parameterized.named_parameters(('f1', 1), ('f2', 2), ('f4', 4), ('f5', 5)) - def test_round_indicator(self, frequency): - total_steps = 20 - indicator = tree_aggregation.PeriodicRoundRestartIndicator(frequency) - state = indicator.initialize() - for i in range(total_steps): - flag, state = indicator.next(state) - if i % frequency == frequency - 1: - self.assertTrue(flag) - else: - self.assertFalse(flag) - - if __name__ == '__main__': tf.test.main() From 7e7736ea911d02f0523f097a668bc465e9bbd3c7 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Tue, 31 Aug 2021 19:26:51 -0700 Subject: [PATCH 41/71] Add DpEvent to return value of get_noised_result. For most DPQueries, the default UnsupportedDpEvent is returned, pending further development. PiperOrigin-RevId: 394137614 --- .../dp_query/discrete_gaussian_query.py | 5 +++- .../distributed_discrete_gaussian_query.py | 4 ++- .../privacy/dp_query/dp_query.py | 17 ++++++------ .../privacy/dp_query/gaussian_query.py | 16 +++++------ .../privacy/dp_query/nested_query.py | 14 ++++++---- .../privacy/dp_query/no_privacy_query.py | 27 +++---------------- .../privacy/dp_query/normalized_query.py | 6 +++-- .../quantile_adaptive_clip_sum_query.py | 8 +++--- .../dp_query/quantile_estimator_query.py | 4 +-- .../privacy/dp_query/restart_query.py | 8 +++--- .../privacy/dp_query/restart_query_test.py | 4 +-- .../privacy/dp_query/test_utils.py | 7 ++--- .../dp_query/tree_aggregation_query.py | 17 +++++++----- .../dp_query/tree_aggregation_query_test.py | 12 ++++----- .../privacy/dp_query/tree_range_query.py | 6 +++-- .../privacy/dp_query/tree_range_query_test.py | 4 +-- .../privacy/optimizers/dp_optimizer.py | 8 ++---- .../privacy/optimizers/dp_optimizer_keras.py | 13 ++++----- 18 files changed, 86 insertions(+), 94 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query.py b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query.py index c79b31a..6797b6a 100644 --- a/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query.py +++ b/tensorflow_privacy/privacy/dp_query/discrete_gaussian_query.py @@ -16,6 +16,7 @@ import collections import tensorflow as tf +from tensorflow_privacy.privacy.analysis import dp_event from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils from tensorflow_privacy.privacy.dp_query import dp_query @@ -81,4 +82,6 @@ class DiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery): # Ensure shape as TF shape inference may fail due to custom noise sampler. return tf.ensure_shape(noised_v, v.shape) - return tf.nest.map_structure(add_noise, sample_state), global_state + result = tf.nest.map_structure(add_noise, sample_state) + event = dp_event.UnsupportedDpEvent() + return result, global_state, event diff --git a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py index ace95e3..30031f6 100644 --- a/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py +++ b/tensorflow_privacy/privacy/dp_query/distributed_discrete_gaussian_query.py @@ -16,6 +16,7 @@ import collections import tensorflow as tf +from tensorflow_privacy.privacy.analysis import dp_event from tensorflow_privacy.privacy.dp_query import discrete_gaussian_utils from tensorflow_privacy.privacy.dp_query import dp_query @@ -106,4 +107,5 @@ class DistributedDiscreteGaussianSumQuery(dp_query.SumAggregationDPQuery): def get_noised_result(self, sample_state, global_state): # Note that by directly returning the aggregate, this assumes that there # will not be missing local noise shares during execution. - return sample_state, global_state + event = dp_event.UnsupportedDpEvent() + return sample_state, global_state, event diff --git a/tensorflow_privacy/privacy/dp_query/dp_query.py b/tensorflow_privacy/privacy/dp_query/dp_query.py index 627e38e..298b45f 100644 --- a/tensorflow_privacy/privacy/dp_query/dp_query.py +++ b/tensorflow_privacy/privacy/dp_query/dp_query.py @@ -246,11 +246,14 @@ class DPQuery(object): global_state: The global state, storing long-term privacy bookkeeping. Returns: - A tuple (result, new_global_state) where "result" is the result of the - query and "new_global_state" is the updated global state. In standard - DP-SGD training, the result is a gradient update comprising a noised - average of the clipped gradients in the sample state---with the noise and - averaging performed in a manner that guarantees differential privacy. + A tuple `(result, new_global_state, event)` where: + * `result` is the result of the query, + * `new_global_state` is the updated global state, and + * `event` is the `DpEvent` that occurred. + In standard DP-SGD training, the result is a gradient update comprising a + noised average of the clipped gradients in the sample state---with the + noise and averaging performed in a manner that guarantees differential + privacy. """ pass @@ -297,7 +300,3 @@ class SumAggregationDPQuery(DPQuery): def merge_sample_states(self, sample_state_1, sample_state_2): """Implements `tensorflow_privacy.DPQuery.merge_sample_states`.""" return tf.nest.map_structure(tf.add, sample_state_1, sample_state_2) - - def get_noised_result(self, sample_state, global_state): - """Implements `tensorflow_privacy.DPQuery.get_noised_result`.""" - return sample_state, global_state diff --git a/tensorflow_privacy/privacy/dp_query/gaussian_query.py b/tensorflow_privacy/privacy/dp_query/gaussian_query.py index 548427e..3edd2fb 100644 --- a/tensorflow_privacy/privacy/dp_query/gaussian_query.py +++ b/tensorflow_privacy/privacy/dp_query/gaussian_query.py @@ -22,6 +22,7 @@ import distutils import tensorflow.compat.v1 as tf +from tensorflow_privacy.privacy.analysis import dp_event from tensorflow_privacy.privacy.dp_query import dp_query @@ -45,7 +46,6 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery): """ self._l2_norm_clip = l2_norm_clip self._stddev = stddev - self._ledger = None def make_global_state(self, l2_norm_clip, stddev): """Creates a global state from the given parameters.""" @@ -96,12 +96,8 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery): def add_noise(v): return v + tf.cast(random_normal(tf.shape(input=v)), dtype=v.dtype) - if self._ledger: - dependencies = [ - self._ledger.record_sum_query(global_state.l2_norm_clip, - global_state.stddev) - ] - else: - dependencies = [] - with tf.control_dependencies(dependencies): - return tf.nest.map_structure(add_noise, sample_state), global_state + result = tf.nest.map_structure(add_noise, sample_state) + noise_multiplier = global_state.stddev / global_state.l2_norm_clip + event = dp_event.GaussianDpEvent(noise_multiplier) + + return result, global_state, event diff --git a/tensorflow_privacy/privacy/dp_query/nested_query.py b/tensorflow_privacy/privacy/dp_query/nested_query.py index e42fdb8..b4e2247 100644 --- a/tensorflow_privacy/privacy/dp_query/nested_query.py +++ b/tensorflow_privacy/privacy/dp_query/nested_query.py @@ -20,6 +20,8 @@ from __future__ import print_function import collections import tensorflow.compat.v1 as tf + +from tensorflow_privacy.privacy.analysis import dp_event from tensorflow_privacy.privacy.dp_query import dp_query import tree @@ -96,13 +98,15 @@ class NestedQuery(dp_query.DPQuery): def get_noised_result(self, sample_state, global_state): """Implements `tensorflow_privacy.DPQuery.get_noised_result`.""" - estimates_and_new_global_states = self._map_to_queries( - 'get_noised_result', sample_state, global_state) + mapped_query_results = self._map_to_queries('get_noised_result', + sample_state, global_state) + + flat_estimates, flat_new_global_states, flat_events = zip( + *tree.flatten_up_to(self._queries, mapped_query_results)) - flat_estimates, flat_new_global_states = zip( - *tree.flatten_up_to(self._queries, estimates_and_new_global_states)) return (tf.nest.pack_sequence_as(self._queries, flat_estimates), - tf.nest.pack_sequence_as(self._queries, flat_new_global_states)) + tf.nest.pack_sequence_as(self._queries, flat_new_global_states), + dp_event.ComposedDpEvent(events=flat_events)) def derive_metrics(self, global_state): """Implements `tensorflow_privacy.DPQuery.derive_metrics`.""" diff --git a/tensorflow_privacy/privacy/dp_query/no_privacy_query.py b/tensorflow_privacy/privacy/dp_query/no_privacy_query.py index a970010..3840003 100644 --- a/tensorflow_privacy/privacy/dp_query/no_privacy_query.py +++ b/tensorflow_privacy/privacy/dp_query/no_privacy_query.py @@ -19,6 +19,7 @@ from __future__ import print_function import tensorflow.compat.v1 as tf +from tensorflow_privacy.privacy.analysis import dp_event from tensorflow_privacy.privacy.dp_query import dp_query @@ -28,19 +29,9 @@ class NoPrivacySumQuery(dp_query.SumAggregationDPQuery): Accumulates vectors without clipping or adding noise. """ - def __init__(self): - self._ledger = None - def get_noised_result(self, sample_state, global_state): """Implements `tensorflow_privacy.DPQuery.get_noised_result`.""" - - if self._ledger: - dependencies = [self._ledger.record_sum_query(float('inf'), 0.0)] - else: - dependencies = [] - - with tf.control_dependencies(dependencies): - return sample_state, global_state + return sample_state, global_state, dp_event.NonPrivateDpEvent() class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery): @@ -56,10 +47,6 @@ class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery): privatized. """ - def __init__(self): - """Initializes the NoPrivacyAverageQuery.""" - self._ledger = None - def initial_sample_state(self, template): """Implements `tensorflow_privacy.DPQuery.initial_sample_state`.""" return (super(NoPrivacyAverageQuery, @@ -103,11 +90,5 @@ class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery): """Implements `tensorflow_privacy.DPQuery.get_noised_result`.""" sum_state, denominator = sample_state - if self._ledger: - dependencies = [self._ledger.record_sum_query(float('inf'), 0.0)] - else: - dependencies = [] - - with tf.control_dependencies(dependencies): - return (tf.nest.map_structure(lambda t: t / denominator, - sum_state), global_state) + result = tf.nest.map_structure(lambda t: t / denominator, sum_state) + return result, global_state, dp_event.NonPrivateDpEvent() diff --git a/tensorflow_privacy/privacy/dp_query/normalized_query.py b/tensorflow_privacy/privacy/dp_query/normalized_query.py index 35bc79f..929f526 100644 --- a/tensorflow_privacy/privacy/dp_query/normalized_query.py +++ b/tensorflow_privacy/privacy/dp_query/normalized_query.py @@ -74,14 +74,16 @@ class NormalizedQuery(dp_query.SumAggregationDPQuery): def get_noised_result(self, sample_state, global_state): """Implements `tensorflow_privacy.DPQuery.get_noised_result`.""" - noised_sum, new_sum_global_state = self._numerator.get_noised_result( + noised_sum, new_sum_global_state, event = self._numerator.get_noised_result( sample_state, global_state.numerator_state) def normalize(v): return tf.truediv(v, global_state.denominator) + # The denominator is constant so the privacy cost comes from the numerator. return (tf.nest.map_structure(normalize, noised_sum), - self._GlobalState(new_sum_global_state, global_state.denominator)) + self._GlobalState(new_sum_global_state, + global_state.denominator), event) def derive_metrics(self, global_state): """Implements `tensorflow_privacy.DPQuery.derive_metrics`.""" diff --git a/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query.py b/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query.py index 2799336..15a8b36 100644 --- a/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query.py +++ b/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query.py @@ -21,6 +21,7 @@ import collections import tensorflow.compat.v1 as tf +from tensorflow_privacy.privacy.analysis import dp_event from tensorflow_privacy.privacy.dp_query import dp_query from tensorflow_privacy.privacy.dp_query import gaussian_query from tensorflow_privacy.privacy.dp_query import quantile_estimator_query @@ -123,11 +124,11 @@ class QuantileAdaptiveClipSumQuery(dp_query.SumAggregationDPQuery): def get_noised_result(self, sample_state, global_state): """Implements `tensorflow_privacy.DPQuery.get_noised_result`.""" - noised_vectors, sum_state = self._sum_query.get_noised_result( + noised_vectors, sum_state, sum_event = self._sum_query.get_noised_result( sample_state.sum_state, global_state.sum_state) del sum_state # To be set explicitly later when we know the new clip. - new_l2_norm_clip, new_quantile_estimator_state = ( + new_l2_norm_clip, new_quantile_estimator_state, quantile_event = ( self._quantile_estimator_query.get_noised_result( sample_state.quantile_estimator_state, global_state.quantile_estimator_state)) @@ -141,7 +142,8 @@ class QuantileAdaptiveClipSumQuery(dp_query.SumAggregationDPQuery): new_sum_query_state, new_quantile_estimator_state) - return noised_vectors, new_global_state + event = dp_event.ComposedDpEvent(events=[sum_event, quantile_event]) + return noised_vectors, new_global_state, event def derive_metrics(self, global_state): """Returns the current clipping norm as a metric.""" diff --git a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py index 69e02af..9c90a03 100644 --- a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py +++ b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py @@ -135,7 +135,7 @@ class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery): def get_noised_result(self, sample_state, global_state): """Implements `tensorflow_privacy.DPQuery.get_noised_result`.""" - below_estimate_result, new_below_estimate_state = ( + below_estimate_result, new_below_estimate_state, below_estimate_event = ( self._below_estimate_query.get_noised_result( sample_state, global_state.below_estimate_state)) @@ -159,7 +159,7 @@ class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery): current_estimate=new_estimate, below_estimate_state=new_below_estimate_state) - return new_estimate, new_global_state + return new_estimate, new_global_state, below_estimate_event def derive_metrics(self, global_state): """Implements `tensorflow_privacy.DPQuery.derive_metrics`.""" diff --git a/tensorflow_privacy/privacy/dp_query/restart_query.py b/tensorflow_privacy/privacy/dp_query/restart_query.py index b3994cc..5716b0b 100644 --- a/tensorflow_privacy/privacy/dp_query/restart_query.py +++ b/tensorflow_privacy/privacy/dp_query/restart_query.py @@ -134,14 +134,14 @@ class RestartQuery(dp_query.SumAggregationDPQuery): def get_noised_result(self, sample_state, global_state): """Implements `tensorflow_privacy.DPQuery.get_noised_result`.""" - noised_results, inner_query_state = self._inner_query.get_noised_result( + noised_results, inner_state, event = self._inner_query.get_noised_result( sample_state, global_state.inner_query_state) restart_flag, indicator_state = self._restart_indicator.next( global_state.indicator_state) if restart_flag: - inner_query_state = self._inner_query.reset_state(noised_results, - inner_query_state) - return noised_results, self._GlobalState(inner_query_state, indicator_state) + inner_state = self._inner_query.reset_state(noised_results, inner_state) + return (noised_results, self._GlobalState(inner_state, + indicator_state), event) def derive_metrics(self, global_state): """Implements `tensorflow_privacy.DPQuery.derive_metrics`.""" diff --git a/tensorflow_privacy/privacy/dp_query/restart_query_test.py b/tensorflow_privacy/privacy/dp_query/restart_query_test.py index ef57a2b..f3a0276 100644 --- a/tensorflow_privacy/privacy/dp_query/restart_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/restart_query_test.py @@ -75,7 +75,7 @@ class RestartQueryTest(tf.test.TestCase, parameterized.TestCase): for i in range(total_steps): sample_state = query.initial_sample_state(scalar_value) sample_state = query.accumulate_record(params, sample_state, scalar_value) - query_result, global_state = query.get_noised_result( + query_result, global_state, _ = query.get_noised_result( sample_state, global_state) # Expected value is the combination of cumsum of signal; sum of trees # that have been reset; current tree sum. The tree aggregation value can @@ -110,7 +110,7 @@ class RestartQueryTest(tf.test.TestCase, parameterized.TestCase): for i in range(total_steps): sample_state = query.initial_sample_state(scalar_value) sample_state = query.accumulate_record(params, sample_state, scalar_value) - query_result, global_state = query.get_noised_result( + query_result, global_state, _ = query.get_noised_result( sample_state, global_state) # Expected value is the signal of the current round plus the residual of # two continous tree aggregation values. The tree aggregation value can diff --git a/tensorflow_privacy/privacy/dp_query/test_utils.py b/tensorflow_privacy/privacy/dp_query/test_utils.py index 18456b3..81134f9 100644 --- a/tensorflow_privacy/privacy/dp_query/test_utils.py +++ b/tensorflow_privacy/privacy/dp_query/test_utils.py @@ -44,6 +44,7 @@ def run_query(query, records, global_state=None, weights=None): sample_state = query.accumulate_record(params, sample_state, record) else: for weight, record in zip(weights, records): - sample_state = query.accumulate_record( - params, sample_state, record, weight) - return query.get_noised_result(sample_state, global_state) + sample_state = query.accumulate_record(params, sample_state, record, + weight) + result, global_state, _ = query.get_noised_result(sample_state, global_state) + return result, global_state diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 4907585..1219651 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -24,12 +24,12 @@ import math import attr import tensorflow as tf +from tensorflow_privacy.privacy.analysis import dp_event from tensorflow_privacy.privacy.dp_query import distributed_discrete_gaussian_query from tensorflow_privacy.privacy.dp_query import dp_query from tensorflow_privacy.privacy.dp_query import gaussian_query from tensorflow_privacy.privacy.dp_query import tree_aggregation - # TODO(b/193679963): define `RestartQuery` and move `RestartIndicator` to be # in the same module. @@ -57,7 +57,7 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): for j,sample in enumerate(samples): sample_state = query.accumulate_record(params, sample_state, sample) # noised_cumsum is privatized estimate of s_i - noised_cumsum, global_state = query.get_noised_result( + noised_cumsum, global_state, event = query.get_noised_result( sample_state, global_state) Attributes: @@ -176,7 +176,8 @@ class TreeCumulativeSumQuery(dp_query.SumAggregationDPQuery): global_state, samples_cumulative_sum=new_cumulative_sum, tree_state=new_tree_state) - return noised_cumulative_sum, new_global_state + event = dp_event.UnsupportedDpEvent() + return noised_cumulative_sum, new_global_state, event def reset_state(self, noised_results, global_state): """Returns state after resetting the tree. @@ -281,7 +282,7 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): sample_state = query.accumulate_record(params, sample_state, sample) # noised_sum is privatized estimate of x_i by conceptually postprocessing # noised cumulative sum s_i - noised_sum, global_state = query.get_noised_result( + noised_sum, global_state, event = query.get_noised_result( sample_state, global_state) Attributes: @@ -398,7 +399,8 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): global_state.previous_tree_noise) new_global_state = attr.evolve( global_state, previous_tree_noise=tree_noise, tree_state=new_tree_state) - return noised_sample, new_global_state + event = dp_event.UnsupportedDpEvent() + return noised_sample, new_global_state, event def reset_state(self, noised_results, global_state): """Returns state after resetting the tree. @@ -636,7 +638,7 @@ class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): # This part is not written in tensorflow and will be executed on the server # side instead of the client side if used with # tff.aggregators.DifferentiallyPrivateFactory for federated learning. - sample_state, inner_query_state = self._inner_query.get_noised_result( + sample_state, inner_query_state, _ = self._inner_query.get_noised_result( sample_state, global_state.inner_query_state) new_global_state = TreeRangeSumQuery.GlobalState( arity=global_state.arity, inner_query_state=inner_query_state) @@ -647,7 +649,8 @@ class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): ] tree = tf.RaggedTensor.from_row_splits( values=sample_state, row_splits=row_splits) - return tree, new_global_state + event = dp_event.UnsupportedDpEvent() + return tree, new_global_state, event @classmethod def build_central_gaussian_query(cls, diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py index 65ab076..56118ce 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py @@ -258,7 +258,7 @@ class TreeCumulativeSumQueryTest(tf.test.TestCase, parameterized.TestCase): for scalar, expected_sum in zip(streaming_scalars, partial_sum): sample_state = query.initial_sample_state(scalar) sample_state = query.accumulate_record(params, sample_state, scalar) - query_result, global_state = query.get_noised_result( + query_result, global_state, _ = query.get_noised_result( sample_state, global_state) self.assertEqual(query_result, expected_sum) @@ -282,7 +282,7 @@ class TreeCumulativeSumQueryTest(tf.test.TestCase, parameterized.TestCase): for i in range(total_steps): sample_state = query.initial_sample_state(scalar_value) sample_state = query.accumulate_record(params, sample_state, scalar_value) - query_result, global_state = query.get_noised_result( + query_result, global_state, _ = query.get_noised_result( sample_state, global_state) # For each streaming step i , the expected value is roughly # `scalar_value*(i+1) + tree_aggregation(tree_node_value, i)`. @@ -314,7 +314,7 @@ class TreeCumulativeSumQueryTest(tf.test.TestCase, parameterized.TestCase): for i in range(total_steps): sample_state = query.initial_sample_state(scalar_value) sample_state = query.accumulate_record(params, sample_state, scalar_value) - query_result, global_state = query.get_noised_result( + query_result, global_state, _ = query.get_noised_result( sample_state, global_state) if i % frequency == frequency - 1: global_state = query.reset_state(query_result, global_state) @@ -456,7 +456,7 @@ class TreeResidualQueryTest(tf.test.TestCase, parameterized.TestCase): for i in range(total_steps): sample_state = query.initial_sample_state(scalar_value) sample_state = query.accumulate_record(params, sample_state, scalar_value) - query_result, global_state = query.get_noised_result( + query_result, global_state, _ = query.get_noised_result( sample_state, global_state) if i % frequency == frequency - 1: global_state = query.reset_state(query_result, global_state) @@ -609,7 +609,7 @@ class TreeRangeSumQueryTest(tf.test.TestCase, parameterized.TestCase): global_state = query.initial_global_state() params = query.derive_sample_params(global_state) preprocessed_record = query.preprocess_record(params, record) - sample_state, global_state = query.get_noised_result( + sample_state, global_state, _ = query.get_noised_result( preprocessed_record, global_state) self.assertAllClose(sample_state, expected_tree) @@ -621,7 +621,7 @@ class TreeRangeSumQueryTest(tf.test.TestCase, parameterized.TestCase): global_state = query.initial_global_state() params = query.derive_sample_params(global_state) preprocessed_record = query.preprocess_record(params, tf.constant([1., 0.])) - sample_state, global_state = query.get_noised_result( + sample_state, global_state, _ = query.get_noised_result( preprocessed_record, global_state) self.assertAllClose( diff --git a/tensorflow_privacy/privacy/dp_query/tree_range_query.py b/tensorflow_privacy/privacy/dp_query/tree_range_query.py index 1b47071..f5a6083 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_range_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_range_query.py @@ -20,6 +20,7 @@ import math import attr import tensorflow as tf +from tensorflow_privacy.privacy.analysis import dp_event from tensorflow_privacy.privacy.dp_query import distributed_discrete_gaussian_query from tensorflow_privacy.privacy.dp_query import dp_query from tensorflow_privacy.privacy.dp_query import gaussian_query @@ -189,7 +190,7 @@ class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): # This part is not written in tensorflow and will be executed on the server # side instead of the client side if used with # tff.aggregators.DifferentiallyPrivateFactory for federated learning. - sample_state, inner_query_state = self._inner_query.get_noised_result( + sample_state, inner_query_state, _ = self._inner_query.get_noised_result( sample_state, global_state.inner_query_state) new_global_state = TreeRangeSumQuery.GlobalState( arity=global_state.arity, inner_query_state=inner_query_state) @@ -200,7 +201,8 @@ class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): ] tree = tf.RaggedTensor.from_row_splits( values=sample_state, row_splits=row_splits) - return tree, new_global_state + event = dp_event.UnsupportedDpEvent() + return tree, new_global_state, event @classmethod def build_central_gaussian_query(cls, diff --git a/tensorflow_privacy/privacy/dp_query/tree_range_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_range_query_test.py index e3f1156..9cae1a1 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_range_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_range_query_test.py @@ -159,7 +159,7 @@ class TreeRangeSumQueryTest(tf.test.TestCase, parameterized.TestCase): global_state = query.initial_global_state() params = query.derive_sample_params(global_state) preprocessed_record = query.preprocess_record(params, record) - sample_state, global_state = query.get_noised_result( + sample_state, global_state, _ = query.get_noised_result( preprocessed_record, global_state) self.assertAllClose(sample_state, expected_tree) @@ -171,7 +171,7 @@ class TreeRangeSumQueryTest(tf.test.TestCase, parameterized.TestCase): global_state = query.initial_global_state() params = query.derive_sample_params(global_state) preprocessed_record = query.preprocess_record(params, tf.constant([1., 0.])) - sample_state, global_state = query.get_noised_result( + sample_state, global_state, _ = query.get_noised_result( preprocessed_record, global_state) self.assertAllClose( diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer.py index 5334b63..3b80092 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer.py @@ -164,7 +164,7 @@ def make_optimizer_class(cls): for idx in range(self._num_microbatches): sample_state = process_microbatch(idx, sample_state) - grad_sums, self._global_state = ( + grad_sums, self._global_state, _ = ( self._dp_sum_query.get_noised_result(sample_state, self._global_state)) @@ -235,7 +235,7 @@ def make_optimizer_class(cls): _, sample_state = tf.while_loop( cond=cond_fn, body=body_fn, loop_vars=[idx, sample_state]) - grad_sums, self._global_state = ( + grad_sums, self._global_state, _ = ( self._dp_sum_query.get_noised_result(sample_state, self._global_state)) @@ -363,10 +363,6 @@ def make_gaussian_optimizer_class(cls): return config - @property - def ledger(self): - return self._dp_sum_query.ledger - return DPGaussianOptimizerClass diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py index 0fa68db..5345c70 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py @@ -81,9 +81,10 @@ def make_keras_optimizer_class(cls): model.fit(...) ``` - """.format(base_class='tf.keras.optimizers.' + cls.__name__, - short_base_class=cls.__name__, - dp_keras_class='DPKeras' + cls.__name__) + """.format( + base_class='tf.keras.optimizers.' + cls.__name__, + short_base_class=cls.__name__, + dp_keras_class='DPKeras' + cls.__name__) # The class tf.keras.optimizers.Optimizer has two methods to compute # gradients, `_compute_gradients` and `get_gradients`. The first works @@ -106,8 +107,8 @@ def make_keras_optimizer_class(cls): Args: l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients). noise_multiplier: Ratio of the standard deviation to the clipping norm. - num_microbatches: Number of microbatches into which each minibatch - is split. + num_microbatches: Number of microbatches into which each minibatch is + split. *args: These will be passed on to the base class `__init__` method. **kwargs: These will be passed on to the base class `__init__` method. """ @@ -210,7 +211,7 @@ def make_keras_optimizer_class(cls): sample_state = self._dp_sum_query.initial_sample_state(params) for idx in range(self._num_microbatches): sample_state = process_microbatch(idx, sample_state) - grad_sums, self._global_state = ( + grad_sums, self._global_state, _ = ( self._dp_sum_query.get_noised_result(sample_state, self._global_state)) From e99fb7ea9baee079db99f440cb1e89a1990ba0a8 Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Wed, 1 Sep 2021 09:29:40 -0700 Subject: [PATCH 42/71] Try to fix flaky `tree_aggregation_query_test.test_noisy_cumsum_and_state_update`. PiperOrigin-RevId: 394248815 --- .../privacy/dp_query/tree_aggregation_query_test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py index 56118ce..ace6484 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py @@ -214,16 +214,16 @@ class TreeCumulativeSumQueryTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters( ('two_records_noise_fn', [2.71828, 3.14159], _get_noise_fn), - ('five_records_noise_fn', np.random.uniform(size=5).tolist(), + ('five_records_noise_fn', np.random.uniform(low=0.1, size=5).tolist(), _get_noise_fn), ('two_records_generator', [2.71828, 3.14159], _get_noise_generator), - ('five_records_generator', np.random.uniform(size=5).tolist(), + ('five_records_generator', np.random.uniform(low=0.1, size=5).tolist(), _get_noise_generator), ) def test_noisy_cumsum_and_state_update(self, records, value_generator): num_trials = 200 - record_specs = tf.nest.map_structure(lambda t: tf.TensorSpec(tf.shape(t)), - records[0]) + record_specs = tf.TensorSpec([]) + records = [tf.constant(r) for r in records] noised_sums = [] for i in range(num_trials): query = tree_aggregation_query.TreeCumulativeSumQuery( @@ -232,7 +232,7 @@ class TreeCumulativeSumQueryTest(tf.test.TestCase, parameterized.TestCase): noise_generator=value_generator(record_specs, seed=i), record_specs=record_specs) query_result, _ = test_utils.run_query(query, records) - noised_sums.append(query_result) + noised_sums.append(query_result.numpy()) result_stddev = np.std(noised_sums) self.assertNear(result_stddev, NOISE_STD, 0.7) # value for chi-squared test From bb5ca9277bf99b7fc48a1805a104679f5b42aac4 Mon Sep 17 00:00:00 2001 From: Anirudh Sriram Date: Wed, 1 Sep 2021 10:29:05 -0700 Subject: [PATCH 43/71] TF Privacy launch cleanup PiperOrigin-RevId: 394262183 --- g3doc/guide/_index.yaml | 2 +- g3doc/guide/_toc.yaml | 2 -- g3doc/guide/get_started.md | 2 -- g3doc/guide/install.md | 3 --- g3doc/tutorials/_toc.yaml | 2 -- g3doc/tutorials/index.md | 3 --- 6 files changed, 1 insertion(+), 13 deletions(-) delete mode 100644 g3doc/guide/install.md delete mode 100644 g3doc/tutorials/index.md diff --git a/g3doc/guide/_index.yaml b/g3doc/guide/_index.yaml index 2efe35b..b72822d 100644 --- a/g3doc/guide/_index.yaml +++ b/g3doc/guide/_index.yaml @@ -1,5 +1,5 @@ # TODO(b/181782485): Switch to the main book for launch - /responsible_ai/_book.yaml -book_path: /responsible_ai/privacy/_book.yaml +book_path: /responsible_ai/_book.yaml project_path: /responsible_ai/_project.yaml title: TensorFlow Privacy description: > diff --git a/g3doc/guide/_toc.yaml b/g3doc/guide/_toc.yaml index 3ea85f2..7091153 100644 --- a/g3doc/guide/_toc.yaml +++ b/g3doc/guide/_toc.yaml @@ -1,8 +1,6 @@ toc: - title: Overview path: /responsible_ai/privacy/guide/ -- title: Install - path: /responsible_ai/privacy/guide/install - title: Get Started path: /responsible_ai/privacy/guide/get_started - title: Measure Privacy diff --git a/g3doc/guide/get_started.md b/g3doc/guide/get_started.md index 143a513..aaec65d 100644 --- a/g3doc/guide/get_started.md +++ b/g3doc/guide/get_started.md @@ -1,7 +1,5 @@ # Get Started -Using TF Privacy - This document assumes you are already familiar with differential privacy, and have determined that you would like to implement TF Privacy to achieve differential privacy guarantees in your model(s). If you’re not familiar with diff --git a/g3doc/guide/install.md b/g3doc/guide/install.md deleted file mode 100644 index 86249f5..0000000 --- a/g3doc/guide/install.md +++ /dev/null @@ -1,3 +0,0 @@ -# Installation Instructions - -## Tips diff --git a/g3doc/tutorials/_toc.yaml b/g3doc/tutorials/_toc.yaml index 57272e2..7687134 100644 --- a/g3doc/tutorials/_toc.yaml +++ b/g3doc/tutorials/_toc.yaml @@ -1,6 +1,4 @@ toc: -- title: Overview - path: /responsible_ai/privacy/tutorials/ - title: Compute privacy path: /responsible_ai/privacy/tutorials/classification_privacy - title: Assess privacy risk diff --git a/g3doc/tutorials/index.md b/g3doc/tutorials/index.md deleted file mode 100644 index f6b8051..0000000 --- a/g3doc/tutorials/index.md +++ /dev/null @@ -1,3 +0,0 @@ -# PROJECT_NAME tutorials - -Lorem ipsum dolor sit amet, consectetur adipiscing elit. From fc7504efcaeebb1674f32dd694b057665129f751 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 1 Sep 2021 19:00:25 -0700 Subject: [PATCH 44/71] Slight language adjustments PiperOrigin-RevId: 394363646 --- g3doc/tutorials/classification_privacy.ipynb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/g3doc/tutorials/classification_privacy.ipynb b/g3doc/tutorials/classification_privacy.ipynb index 71cf3f2..08004cf 100644 --- a/g3doc/tutorials/classification_privacy.ipynb +++ b/g3doc/tutorials/classification_privacy.ipynb @@ -77,7 +77,7 @@ "id": "vsCUvXP0W4j2" }, "source": [ - "[Differential privacy](https://en.wikipedia.org/wiki/Differential_privacy) (DP) is a framework for measuring the privacy guarantees provided by an algorithm. Through the lens of differential privacy, you can design machine learning algorithms that responsibly train models on private data. Learning with differential privacy provides provable guarantees of privacy, mitigating the risk of exposing sensitive training data in machine learning. Intuitively, a model trained with differential privacy should not be affected by any single training example, or small set of training examples, in its data set. This mitigates the risk of exposing sensitive training data in ML." + "[Differential privacy](https://en.wikipedia.org/wiki/Differential_privacy) (DP) is a framework for measuring the privacy guarantees provided by an algorithm. Through the lens of differential privacy, you can design machine learning algorithms that responsibly train models on private data. Learning with differential privacy provides measurable guarantees of privacy, helping to mitigate the risk of exposing sensitive training data in machine learning. Intuitively, a model trained with differential privacy should not be affected by any single training example, or small set of training examples, in its data set. This helps mitigate the risk of exposing sensitive training data in ML." ] }, { @@ -452,6 +452,7 @@ "colab": { "collapsed_sections": [], "name": "classification_privacy.ipynb", + "provenance": [], "toc_visible": true }, "kernelspec": { From b7249e6ab2ed59b53d4860ed8956a47ea573cda9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 2 Sep 2021 15:37:43 -0700 Subject: [PATCH 45/71] Update narrative content. PiperOrigin-RevId: 394558889 --- g3doc/guide/_index.yaml | 42 +++++++++++++++++++++------------- g3doc/guide/get_started.md | 15 ++++++------ g3doc/guide/measure_privacy.md | 11 ++++----- 3 files changed, 39 insertions(+), 29 deletions(-) diff --git a/g3doc/guide/_index.yaml b/g3doc/guide/_index.yaml index b72822d..3720bdc 100644 --- a/g3doc/guide/_index.yaml +++ b/g3doc/guide/_index.yaml @@ -13,16 +13,21 @@ landing_page: - classname: devsite-landing-row-50 description: >

- Preventing ML models from exposing potentially sensitive information is a critical part of - using AI responsibly. To that end, differentially private stochastic gradient descent - (DP-SGD) is a modification to the standard stochastic gradient descent (SGD) algorithm - in machine learning.

-
+

+ Models trained with DP-SGD have measurable differential privacy (DP) improvements, which + helps mitigate the risk of exposing sensitive training data. Since the purpose of DP is + to help prevent individual data points from being identified, a model trained with DP + should not be affected by any single training example in its training data set. DP-SGD + techniques can also be used in federated learning to provide user-level differential privacy. + You can learn more about differentially private deep learning in + the original paper.

- code_block: | @@ -58,14 +63,19 @@ landing_page: items: - classname: devsite-landing-row-100 description: > -

Tensorflow Privacy (TF Privacy) is an open source library developed by teams in Google - Research. The library includes implementations of commonly used TensorFlow Optimizers for - training ML models with DP. The goal is to enable ML practitioners using standard Tensorflow - APIs to train privacy-preserving models by changing only a few lines of code.

-

The differentially private Optimizers can be used in conjunction with high-level APIs +

+ Tensorflow Privacy (TF Privacy) is an open source library developed by teams in + Google Research. The library includes implementations of commonly used TensorFlow + Optimizers for training ML models with DP. The goal is to enable ML practitioners + using standard Tensorflow APIs to train privacy-preserving models by changing only a + few lines of code. +

+

+ The differentially private optimizers can be used in conjunction with high-level APIs that use the Optimizer class, especially Keras. Additionally, you can find differentially private implementations of some Keras models. All of the Optimizers and models can be found - in the API Documentation.

+ in the API Documentation.

+

- classname: devsite-landing-row-cards items: diff --git a/g3doc/guide/get_started.md b/g3doc/guide/get_started.md index aaec65d..8a01aee 100644 --- a/g3doc/guide/get_started.md +++ b/g3doc/guide/get_started.md @@ -1,11 +1,12 @@ # Get Started + This document assumes you are already familiar with differential privacy, and -have determined that you would like to implement TF Privacy to achieve -differential privacy guarantees in your model(s). If you’re not familiar with -differential privacy, please review +have determined that you would like to use TF Privacy to implement differential +privacy guarantees in your model(s). If you’re not familiar with differential +privacy, please review [the overview page](https://tensorflow.org/responsible_ai/privacy/guide). After -installing TF Privacy get started by following these steps: +installing TF Privacy, get started by following these steps: ## 1. Choose a differentially private version of an existing Optimizer @@ -36,9 +37,9 @@ microbatches. Train your model using the DP Optimizer (step 1) and vectorized loss (step 2). There are two options for doing this: -- Pass the optimizer and loss as arguments to `Model.compile` before calling +* Pass the optimizer and loss as arguments to `Model.compile` before calling `Model.fit`. -- When writing a custom training loop, use `Optimizer.minimize()` on the +* When writing a custom training loop, use `Optimizer.minimize()` on the vectorized loss. Once this is done, it’s recommended that you tune your hyperparameters. For a @@ -65,7 +66,7 @@ The three new DP-SGD hyperparameters have the following effects and tradeoffs: utility because it lowers the standard deviation of the noise. However, it will slow down training in terms of time. 2. The clipping norm $C$: Since the standard deviation of the noise scales with - C, it is probably best to set $C$ to be some quantile (e.g. median, 75th + $C$, it is probably best to set $C$ to be some quantile (e.g. median, 75th percentile, 90th percentile) of the gradient norms. Having too large a value of $C$ adds unnecessarily large amounts of noise. 3. The noise multiplier $σ$: Of the three hyperparameters, the amount of diff --git a/g3doc/guide/measure_privacy.md b/g3doc/guide/measure_privacy.md index d6c2a73..f735ab3 100644 --- a/g3doc/guide/measure_privacy.md +++ b/g3doc/guide/measure_privacy.md @@ -2,12 +2,12 @@ Differential privacy is a framework for measuring the privacy guarantees provided by an algorithm and can be expressed using the values ε (epsilon) and δ -(delta). Of the two, ε is the more important and more sensitive to the choice of +(delta). Of the two, ε is more important and more sensitive to the choice of hyperparameters. Roughly speaking, they mean the following: * ε gives a ceiling on how much the probability of a particular output can increase by including (or removing) a single training example. You usually - want it to be a small constant (less than 10, or, for more stringent privacy + want it to be a small constant (less than 10, or for more stringent privacy guarantees, less than 1). However, this is only an upper bound, and a large value of epsilon may still mean good practical privacy. * δ bounds the probability of an arbitrary change in model behavior. You can @@ -30,17 +30,16 @@ dataset size and number of epochs. See the [classification privacy tutorial](../tutorials/classification_privacy.ipynb) to see the approach. -For more detail, you can see +For more detail, see [the original DP-SGD paper](https://arxiv.org/pdf/1607.00133.pdf). -You can use `compute_dp_sgd_privacy`, to find out the epsilon given a fixed -delta value for your model [../tutorials/classification_privacy.ipynb]: +You can use `compute_dp_sgd_privacy` to find out the epsilon given a fixed delta +value for your model [../tutorials/classification_privacy.ipynb]: * `q` : the sampling ratio - the probability of an individual training point being included in a mini batch (`batch_size/number_of_examples`). * `noise_multiplier` : A float that governs the amount of noise added during training. Generally, more noise results in better privacy and lower utility. - This generally * `steps` : The number of global steps taken. A detailed writeup of the theory behind the computation of epsilon and delta is From a20cbf9578972d90ac25c8624e11fbb2a43d3311 Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Fri, 3 Sep 2021 15:42:17 -0700 Subject: [PATCH 46/71] RDP for tree aggregation. See "Practical and Private (Deep) Learning without Sampling or Shuffling" https://arxiv.org/abs/2103.00039 for more details. See tests for example usage for calculating epsilon. PiperOrigin-RevId: 394770205 --- .../privacy/analysis/rdp_accountant.py | 59 ++++++++++++++++++ .../privacy/analysis/rdp_accountant_test.py | 62 ++++++++++++++++++- 2 files changed, 119 insertions(+), 2 deletions(-) diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_accountant.py index c872318..a328a63 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant.py @@ -42,6 +42,7 @@ from __future__ import print_function import math import sys +from typing import Collection, Union import numpy as np from scipy import special @@ -397,6 +398,64 @@ def compute_rdp(q, noise_multiplier, steps, orders): return rdp * steps +def _compute_rdp_tree(sigma, steps_list, max_participation, alpha): + """Computes RDP of the Tree Aggregation Protocol at order alpha.""" + if np.isinf(alpha): + return np.inf + tree_depths = [ + math.floor(math.log2(steps)) + 1 for steps in steps_list if steps > 0 + ] + return alpha * max_participation * sum(tree_depths) / (2 * sigma**2) + + +def compute_rdp_tree( + noise_multiplier: float, steps_list: Collection[float], + max_participation: int, + orders: Union[float, Collection[float]]) -> Collection[float]: + """Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism. + + Args: + noise_multiplier: A non-negative float representing the ratio of the + standard deviation of the Gaussian noise to the l2-sensitivity of the + function to which it is added. + steps_list: A list of non-negative intergers representing the number of + steps between tree restarts. + max_participation: A positive integer representing maximum number of times a + sample may appear between tree restarts. + orders: An array (or a scalar) of RDP orders. + + Returns: + The RDPs at all orders. Can be `np.inf`. + """ + if noise_multiplier < 0: + raise ValueError( + f"Noise multiplier must be non-negative, got {noise_multiplier}") + elif noise_multiplier == 0: + return np.inf + + if max_participation <= 0: + raise ValueError( + f"Max participation must be positive, got {max_participation}") + + if not steps_list: + raise ValueError("List of steps must be non-empty.") + + for steps in steps_list: + if steps < 0: + raise ValueError(f"List of steps must be non-negative, got {steps_list}") + + if np.isscalar(orders): + rdp = _compute_rdp_tree(noise_multiplier, steps_list, max_participation, + orders) + else: + rdp = np.array([ + _compute_rdp_tree(noise_multiplier, steps_list, max_participation, + alpha) for alpha in orders + ]) + + return rdp + + def compute_rdp_sample_without_replacement(q, noise_multiplier, steps, orders): """Compute RDP of Gaussian Mechanism using sampling without replacement. diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py index 3a5529f..33c51fd 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py @@ -21,7 +21,6 @@ from __future__ import print_function import math import sys -from absl.testing import absltest from absl.testing import parameterized from mpmath import exp from mpmath import inf @@ -265,5 +264,64 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase): self.assertLessEqual(delta, delta1 + 1e-300) +class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters(('eps20', 1.13, 19.74), ('eps2', 8.83, 2.04)) + def test_compute_eps_tree(self, noise_multiplier, eps): + orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) + # This tests is based on the StackOverflow setting in "Practical and + # Private (Deep) Learning without Sampling or Shuffling". The calculated + # epsilon could be better as the method in this package keeps improving. + steps_list, target_delta, max_participation = [1600], 1e-6, 1 + rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list, + max_participation, orders) + new_eps = rdp_accountant.get_privacy_spent( + orders, rdp, target_delta=target_delta)[0] + self.assertLess(new_eps, eps) + + @parameterized.named_parameters( + ('restart4_max2', [400] * 4, 2), + ('restart2_max1', [800] * 2, 1), + ('adaptive_max4', [10, 400, 400, 400, 390], 4), + ) + def test_compute_eps_tree_decreasing(self, steps_list, max_participation): + # Test privacy epsilon decreases with noise multiplier increasing when + # keeping other parameters the same. + orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) + target_delta = 1e-6 + prev_eps = rdp_accountant.compute_rdp_tree(0, steps_list, max_participation, + orders) + for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]: + rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list, + max_participation, orders) + eps = rdp_accountant.get_privacy_spent( + orders, rdp, target_delta=target_delta)[0] + self.assertLess(eps, prev_eps) + + @parameterized.named_parameters( + ('negative_noise', -1, [3], 2, 1), + ('empty_steps', 1, [], 2, 1), + ('negative_steps', 1, [-3], 2, 1), + ('zero_participation', 1, [3], 0, 1), + ('negative_participation', 1, [3], -1, 1), + ) + def test_compute_rdp_tree_raise(self, noise_multiplier, steps_list, + max_participation, orders): + with self.assertRaisesRegex(ValueError, 'must be'): + rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list, + max_participation, orders) + + @parameterized.named_parameters( + ('t100n0.1', 100, 0.1), + ('t1000n0.01', 1000, 0.01), + ) + def test_no_tree_no_sampling(self, total_steps, noise_multiplier): + orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) + tree_rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, + [1] * total_steps, 1, orders) + rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders) + self.assertAllClose(tree_rdp, rdp, rtol=1e-12) + + if __name__ == '__main__': - absltest.main() + tf.test.main() From 06eef5136908961ea5a235341ec88c2b9838e85b Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Tue, 7 Sep 2021 09:23:38 -0700 Subject: [PATCH 47/71] New version 0.7.3 PiperOrigin-RevId: 395263672 --- setup.py | 2 +- tensorflow_privacy/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 98674e1..9d1d596 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ from setuptools import setup setup( name='tensorflow_privacy', - version='0.7.2', + version='0.7.3', url='https://github.com/tensorflow/privacy', license='Apache-2.0', install_requires=[ diff --git a/tensorflow_privacy/version.py b/tensorflow_privacy/version.py index 8647bf9..d31cff6 100644 --- a/tensorflow_privacy/version.py +++ b/tensorflow_privacy/version.py @@ -13,4 +13,4 @@ # limitations under the License. """TensorFlow Privacy version.""" -__version__ = '0.7.2' +__version__ = '0.7.3' From c5f35b3ca1226edc8d82779da668f8cdc8388aa8 Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Wed, 8 Sep 2021 13:58:53 -0700 Subject: [PATCH 48/71] Try to fix flakiness by reducing the number of query construction. PiperOrigin-RevId: 395552656 --- .../privacy/dp_query/tree_aggregation_query_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py index ace6484..1115f40 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py @@ -221,9 +221,9 @@ class TreeCumulativeSumQueryTest(tf.test.TestCase, parameterized.TestCase): _get_noise_generator), ) def test_noisy_cumsum_and_state_update(self, records, value_generator): - num_trials = 200 - record_specs = tf.TensorSpec([]) - records = [tf.constant(r) for r in records] + num_trials, vector_size = 10, 100 + record_specs = tf.TensorSpec([vector_size]) + records = [tf.constant(r, shape=[vector_size]) for r in records] noised_sums = [] for i in range(num_trials): query = tree_aggregation_query.TreeCumulativeSumQuery( From a9764e3e7d4795d4ce8bd724b5d9347be110769c Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Wed, 8 Sep 2021 21:05:28 -0700 Subject: [PATCH 49/71] TFF: cleanup the TFP query usage in tff.analytics; remove dependency on internal TFP structure. TFP: remove duplicate TreeRangeSumQuery in `tree_aggregation_query` PiperOrigin-RevId: 395618363 --- .../dp_query/tree_aggregation_query.py | 265 ------------------ .../dp_query/tree_aggregation_query_test.py | 160 ----------- 2 files changed, 425 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 1219651..2752dba 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -19,15 +19,11 @@ the leaf nodes of the tree arrive one by one as the time proceeds. The core logic of tree aggregation is implemented in `tree_aggregation.TreeAggregator` and `tree_aggregation.EfficientTreeAggregator`. """ -import distutils -import math import attr import tensorflow as tf from tensorflow_privacy.privacy.analysis import dp_event -from tensorflow_privacy.privacy.dp_query import distributed_discrete_gaussian_query from tensorflow_privacy.privacy.dp_query import dp_query -from tensorflow_privacy.privacy.dp_query import gaussian_query from tensorflow_privacy.privacy.dp_query import tree_aggregation # TODO(b/193679963): define `RestartQuery` and move `RestartIndicator` to be @@ -468,264 +464,3 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): record_specs=record_specs, noise_generator=gaussian_noise_generator, use_efficient=use_efficient) - - -# TODO(b/197596864): Remove `TreeRangeSumQuery` from this file after the next -# TFP release - - -@tf.function -def _build_tree_from_leaf(leaf_nodes: tf.Tensor, arity: int) -> tf.RaggedTensor: - """A function constructs a complete tree given all the leaf nodes. - - The function takes a 1-D array representing the leaf nodes of a tree and the - tree's arity, and constructs a complete tree by recursively summing the - adjacent children to get the parent until reaching the root node. Because we - assume a complete tree, if the number of leaf nodes does not divide arity, the - leaf nodes will be padded with zeros. - - Args: - leaf_nodes: A 1-D array storing the leaf nodes of the tree. - arity: A `int` for the branching factor of the tree, i.e. the number of - children for each internal node. - - Returns: - `tf.RaggedTensor` representing the tree. For example, if - `leaf_nodes=tf.Tensor([1, 2, 3, 4])` and `arity=2`, then the returned value - should be `tree=tf.RaggedTensor([[10],[3,7],[1,2,3,4]])`. In this way, - `tree[layer][index]` can be used to access the node indexed by (layer, - index) in the tree, - """ - - def pad_zero(leaf_nodes, size): - paddings = [[0, size - len(leaf_nodes)]] - return tf.pad(leaf_nodes, paddings) - - leaf_nodes_size = tf.constant(len(leaf_nodes), dtype=tf.float32) - num_layers = tf.math.ceil( - tf.math.log(leaf_nodes_size) / - tf.math.log(tf.cast(arity, dtype=tf.float32))) + 1 - leaf_nodes = pad_zero( - leaf_nodes, tf.math.pow(tf.cast(arity, dtype=tf.float32), num_layers - 1)) - - def _shrink_layer(layer: tf.Tensor, arity: int) -> tf.Tensor: - return tf.reduce_sum((tf.reshape(layer, (-1, arity))), 1) - - # The following `tf.while_loop` constructs the tree from bottom up by - # iteratively applying `_shrink_layer` to each layer of the tree. The reason - # for the choice of TF1.0-style `tf.while_loop` is that @tf.function does not - # support auto-translation from python loop to tf loop when loop variables - # contain a `RaggedTensor` whose shape changes across iterations. - - idx = tf.identity(num_layers) - loop_cond = lambda i, h: tf.less_equal(2.0, i) - - def _loop_body(i, h): - return [ - tf.add(i, -1.0), - tf.concat(([_shrink_layer(h[0], arity)], h), axis=0) - ] - - _, tree = tf.while_loop( - loop_cond, - _loop_body, [idx, tf.RaggedTensor.from_tensor([leaf_nodes])], - shape_invariants=[ - idx.get_shape(), - tf.RaggedTensorSpec(dtype=leaf_nodes.dtype, ragged_rank=1) - ]) - - return tree - - -class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): - """Implements dp_query for accurate range queries using tree aggregation. - - Implements a variant of the tree aggregation protocol from. "Is interaction - necessary for distributed private learning?. Adam Smith, Abhradeep Thakurta, - Jalaj Upadhyay." Builds a tree on top of the input record and adds noise to - the tree for differential privacy. Any range query can be decomposed into the - sum of O(log(n)) nodes in the tree compared to O(n) when using a histogram. - Improves efficiency and reduces noise scale. - """ - - @attr.s(frozen=True) - class GlobalState(object): - """Class defining global state for TreeRangeSumQuery. - - Attributes: - arity: The branching factor of the tree (i.e. the number of children each - internal node has). - inner_query_state: The global state of the inner query. - """ - arity = attr.ib() - inner_query_state = attr.ib() - - def __init__(self, - inner_query: dp_query.SumAggregationDPQuery, - arity: int = 2): - """Initializes the `TreeRangeSumQuery`. - - Args: - inner_query: The inner `DPQuery` that adds noise to the tree. - arity: The branching factor of the tree (i.e. the number of children each - internal node has). Defaults to 2. - """ - self._inner_query = inner_query - self._arity = arity - - if self._arity < 1: - raise ValueError(f'Invalid arity={arity} smaller than 2.') - - def initial_global_state(self): - """Implements `tensorflow_privacy.DPQuery.initial_global_state`.""" - return TreeRangeSumQuery.GlobalState( - arity=self._arity, - inner_query_state=self._inner_query.initial_global_state()) - - def derive_sample_params(self, global_state): - """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" - return (global_state.arity, - self._inner_query.derive_sample_params( - global_state.inner_query_state)) - - def preprocess_record(self, params, record): - """Implements `tensorflow_privacy.DPQuery.preprocess_record`. - - This method builds the tree, flattens it and applies - `inner_query.preprocess_record` to the flattened tree. - - Args: - params: Hyper-parameters for preprocessing record. - record: A histogram representing the leaf nodes of the tree. - - Returns: - A `tf.Tensor` representing the flattened version of the preprocessed tree. - """ - arity, inner_query_params = params - preprocessed_record = _build_tree_from_leaf(record, arity).flat_values - # The following codes reshape the output vector so the output shape of can - # be statically inferred. This is useful when used with - # `tff.aggregators.DifferentiallyPrivateFactory` because it needs to know - # the output shape of this function statically and explicitly. - preprocessed_record_shape = [ - (self._arity**(math.ceil(math.log(record.shape[0], self._arity)) + 1) - - 1) // (self._arity - 1) - ] - preprocessed_record = tf.reshape(preprocessed_record, - preprocessed_record_shape) - preprocessed_record = self._inner_query.preprocess_record( - inner_query_params, preprocessed_record) - - return preprocessed_record - - def get_noised_result(self, sample_state, global_state): - """Implements `tensorflow_privacy.DPQuery.get_noised_result`. - - This function re-constructs the `tf.RaggedTensor` from the flattened tree - output by `preprocess_records.` - - Args: - sample_state: A `tf.Tensor` for the flattened tree. - global_state: The global state of the protocol. - - Returns: - A `tf.RaggedTensor` representing the tree. - """ - # The [0] is needed because of how tf.RaggedTensor.from_two_splits works. - # print(tf.RaggedTensor.from_row_splits(values=[3, 1, 4, 1, 5, 9, 2, 6], - # row_splits=[0, 4, 4, 7, 8, 8])) - # - # This part is not written in tensorflow and will be executed on the server - # side instead of the client side if used with - # tff.aggregators.DifferentiallyPrivateFactory for federated learning. - sample_state, inner_query_state, _ = self._inner_query.get_noised_result( - sample_state, global_state.inner_query_state) - new_global_state = TreeRangeSumQuery.GlobalState( - arity=global_state.arity, inner_query_state=inner_query_state) - - row_splits = [0] + [ - (self._arity**(x + 1) - 1) // (self._arity - 1) for x in range( - math.floor(math.log(sample_state.shape[0], self._arity)) + 1) - ] - tree = tf.RaggedTensor.from_row_splits( - values=sample_state, row_splits=row_splits) - event = dp_event.UnsupportedDpEvent() - return tree, new_global_state, event - - @classmethod - def build_central_gaussian_query(cls, - l2_norm_clip: float, - stddev: float, - arity: int = 2): - """Returns `TreeRangeSumQuery` with central Gaussian noise. - - Args: - l2_norm_clip: Each record should be clipped so that it has L2 norm at most - `l2_norm_clip`. - stddev: Stddev of the central Gaussian noise. - arity: The branching factor of the tree (i.e. the number of children each - internal node has). Defaults to 2. - """ - if l2_norm_clip <= 0: - raise ValueError(f'`l2_norm_clip` must be positive, got {l2_norm_clip}.') - - if stddev < 0: - raise ValueError(f'`stddev` must be non-negative, got {stddev}.') - - if arity < 2: - raise ValueError(f'`arity` must be at least 2, got {arity}.') - - inner_query = gaussian_query.GaussianSumQuery(l2_norm_clip, stddev) - - return cls(arity=arity, inner_query=inner_query) - - @classmethod - def build_distributed_discrete_gaussian_query(cls, - l2_norm_bound: float, - local_stddev: float, - arity: int = 2): - """Returns `TreeRangeSumQuery` with central Gaussian noise. - - Args: - l2_norm_bound: Each record should be clipped so that it has L2 norm at - most `l2_norm_bound`. - local_stddev: Scale/stddev of the local discrete Gaussian noise. - arity: The branching factor of the tree (i.e. the number of children each - internal node has). Defaults to 2. - """ - if l2_norm_bound <= 0: - raise ValueError( - f'`l2_clip_bound` must be positive, got {l2_norm_bound}.') - - if local_stddev < 0: - raise ValueError( - f'`local_stddev` must be non-negative, got {local_stddev}.') - - if arity < 2: - raise ValueError(f'`arity` must be at least 2, got {arity}.') - - inner_query = distributed_discrete_gaussian_query.DistributedDiscreteGaussianSumQuery( - l2_norm_bound, local_stddev) - - return cls(arity=arity, inner_query=inner_query) - - -def _get_add_noise(stddev, seed: int = None): - """Utility function to decide which `add_noise` to use according to tf version.""" - if distutils.version.LooseVersion( - tf.__version__) < distutils.version.LooseVersion('2.0.0'): - - # The seed should be only used for testing purpose. - if seed is not None: - tf.random.set_seed(seed) - - def add_noise(v): - return v + tf.random.normal( - tf.shape(input=v), stddev=stddev, dtype=v.dtype) - else: - random_normal = tf.random_normal_initializer(stddev=stddev, seed=seed) - - def add_noise(v): - return v + tf.cast(random_normal(tf.shape(input=v)), dtype=v.dtype) - - return add_noise diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py index 1115f40..699c890 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query_test.py @@ -13,8 +13,6 @@ # limitations under the License. """Tests for `tree_aggregation_query`.""" -import math - from absl.testing import parameterized import numpy as np import tensorflow as tf @@ -470,163 +468,5 @@ class TreeResidualQueryTest(tf.test.TestCase, parameterized.TestCase): self.assertEqual(query_result, expected) -class BuildTreeTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.product( - leaf_nodes_size=[1, 2, 3, 4, 5], - arity=[2, 3], - dtype=[tf.int32, tf.float32], - ) - def test_build_tree_from_leaf(self, leaf_nodes_size, arity, dtype): - """Test whether `_build_tree_from_leaf` will output the correct tree.""" - - leaf_nodes = tf.cast(tf.range(leaf_nodes_size), dtype) - depth = math.ceil(math.log(leaf_nodes_size, arity)) + 1 - - tree = tree_aggregation_query._build_tree_from_leaf(leaf_nodes, arity) - - self.assertEqual(depth, tree.shape[0]) - - for layer in range(depth): - reverse_depth = tree.shape[0] - layer - 1 - span_size = arity**reverse_depth - for idx in range(arity**layer): - left = idx * span_size - right = (idx + 1) * span_size - expected_value = sum(leaf_nodes[left:right]) - self.assertEqual(tree[layer][idx], expected_value) - - -class TreeRangeSumQueryTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.product( - inner_query=['central', 'distributed'], - params=[(0., 1., 2), (1., -1., 2), (1., 1., 1)], - ) - def test_raises_error(self, inner_query, params): - clip_norm, stddev, arity = params - with self.assertRaises(ValueError): - if inner_query == 'central': - tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( - clip_norm, stddev, arity) - elif inner_query == 'distributed': - tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( - clip_norm, stddev, arity) - - @parameterized.product( - inner_query=['central', 'distributed'], - clip_norm=[0.1, 1.0, 10.0], - stddev=[0.1, 1.0, 10.0]) - def test_initial_global_state_type(self, inner_query, clip_norm, stddev): - - if inner_query == 'central': - query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( - clip_norm, stddev) - elif inner_query == 'distributed': - query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( - clip_norm, stddev) - global_state = query.initial_global_state() - self.assertIsInstance(global_state, - tree_aggregation_query.TreeRangeSumQuery.GlobalState) - - @parameterized.product( - inner_query=['central', 'distributed'], - clip_norm=[0.1, 1.0, 10.0], - stddev=[0.1, 1.0, 10.0], - arity=[2, 3, 4]) - def test_derive_sample_params(self, inner_query, clip_norm, stddev, arity): - if inner_query == 'central': - query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( - clip_norm, stddev, arity) - elif inner_query == 'distributed': - query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( - clip_norm, stddev, arity) - global_state = query.initial_global_state() - derived_arity, inner_query_state = query.derive_sample_params(global_state) - self.assertAllClose(derived_arity, arity) - if inner_query == 'central': - self.assertAllClose(inner_query_state, clip_norm) - elif inner_query == 'distributed': - self.assertAllClose(inner_query_state.l2_norm_bound, clip_norm) - self.assertAllClose(inner_query_state.local_stddev, stddev) - - @parameterized.product( - (dict(arity=2, expected_tree=[1, 1, 0, 1, 0, 0, 0]), - dict(arity=3, expected_tree=[1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])), - inner_query=['central', 'distributed'], - ) - def test_preprocess_record(self, inner_query, arity, expected_tree): - if inner_query == 'central': - query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( - 10., 0., arity) - record = tf.constant([1, 0, 0, 0], dtype=tf.float32) - expected_tree = tf.cast(expected_tree, tf.float32) - elif inner_query == 'distributed': - query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( - 10., 0., arity) - record = tf.constant([1, 0, 0, 0], dtype=tf.int32) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - self.assertAllClose(preprocessed_record, expected_tree) - - @parameterized.named_parameters( - ('stddev_1', 1, tf.constant([1, 0], dtype=tf.int32), [1, 1, 0]), - ('stddev_0_1', 4, tf.constant([1, 0], dtype=tf.int32), [1, 1, 0]), - ) - def test_distributed_preprocess_record_with_noise(self, local_stddev, record, - expected_tree): - query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( - 10., local_stddev) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - - preprocessed_record = query.preprocess_record(params, record) - - self.assertAllClose( - preprocessed_record, expected_tree, atol=10 * local_stddev) - - @parameterized.product( - (dict( - arity=2, - expected_tree=tf.ragged.constant([[1], [1, 0], [1, 0, 0, 0]])), - dict( - arity=3, - expected_tree=tf.ragged.constant([[1], [1, 0, 0], - [1, 0, 0, 0, 0, 0, 0, 0, 0]]))), - inner_query=['central', 'distributed'], - ) - def test_get_noised_result(self, inner_query, arity, expected_tree): - if inner_query == 'central': - query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( - 10., 0., arity) - record = tf.constant([1, 0, 0, 0], dtype=tf.float32) - expected_tree = tf.cast(expected_tree, tf.float32) - elif inner_query == 'distributed': - query = tree_aggregation_query.TreeRangeSumQuery.build_distributed_discrete_gaussian_query( - 10., 0., arity) - record = tf.constant([1, 0, 0, 0], dtype=tf.int32) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, record) - sample_state, global_state, _ = query.get_noised_result( - preprocessed_record, global_state) - - self.assertAllClose(sample_state, expected_tree) - - @parameterized.product(stddev=[0.1, 1.0, 10.0]) - def test_central_get_noised_result_with_noise(self, stddev): - query = tree_aggregation_query.TreeRangeSumQuery.build_central_gaussian_query( - 10., stddev) - global_state = query.initial_global_state() - params = query.derive_sample_params(global_state) - preprocessed_record = query.preprocess_record(params, tf.constant([1., 0.])) - sample_state, global_state, _ = query.get_noised_result( - preprocessed_record, global_state) - - self.assertAllClose( - sample_state, tf.ragged.constant([[1.], [1., 0.]]), atol=10 * stddev) - - if __name__ == '__main__': tf.test.main() From 7f22cbeb89c2584b98e688815e8ab4ca91761c4d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 9 Sep 2021 15:37:32 -0700 Subject: [PATCH 50/71] Add support of large batch emulation to Tensorflow Privacy Keras optimizer. PiperOrigin-RevId: 395802081 --- .../privacy/optimizers/dp_optimizer_keras.py | 111 +++++++++++++++++- .../optimizers/dp_optimizer_keras_test.py | 81 +++++++++++++ 2 files changed, 190 insertions(+), 2 deletions(-) diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py index 5345c70..2fe2a7f 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py @@ -49,7 +49,7 @@ def make_keras_optimizer_class(cls): ```python # Create optimizer. - opt = {dp_keras_class}(l2_norm_clip=1.0, noise_multiplier=0.5, num_microbatches=1, + opt = {dp_keras_class}(l2_norm_clip=1.0, noise_multiplier=0.5, num_microbatches=1, ) ``` @@ -81,6 +81,39 @@ def make_keras_optimizer_class(cls): model.fit(...) ``` + In DP-SGD training, a larger batch size typically helps to achieve better + privacy/utility tradeoff. However there is typically a maximum batch size + imposed by hardware. + This optimizer can emulate large batch sizes on hardware with limited + memory by accumulating gradients for several steps before actually + applying them to update model weights. + Constructor argument `gradient_accumulation_steps` controls the number + of steps for which gradients are accumulated before updating + the model weights. + + Below is an example which demonstrates how to use this feature: + + ```python + # Create optimizer which will be accumulating gradients for 4 steps. + # and then performing an update of model weights. + opt = {dp_keras_class}(l2_norm_clip=1.0, + noise_multiplier=0.5, + num_microbatches=1, + gradient_accumulation_steps=4, + ) + + # Use optimizer in a regular way. + # First three calls to opt.minimize won't update model weights and will + # only accumulate gradients. Model weights will be updated on the fourth + # call to opt.minimize + opt.minimize(loss, var_list=[var]) + ``` + + Note that when using this feature effective batch size is + `gradient_accumulation_steps * one_step_batch_size` where + `one_step_batch_size` size of the batch which is passed to single step + of the optimizer. Thus user may have to adjust learning rate, weight decay + and possibly other training hyperparameters accordingly. """.format( base_class='tf.keras.optimizers.' + cls.__name__, short_base_class=cls.__name__, @@ -100,6 +133,7 @@ def make_keras_optimizer_class(cls): l2_norm_clip, noise_multiplier, num_microbatches=None, + gradient_accumulation_steps=1, *args, # pylint: disable=keyword-arg-before-vararg, g-doc-args **kwargs): """Initialize the DPOptimizerClass. @@ -108,11 +142,21 @@ def make_keras_optimizer_class(cls): l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients). noise_multiplier: Ratio of the standard deviation to the clipping norm. num_microbatches: Number of microbatches into which each minibatch is - split. + split. Default is `None` which means that number of microbatches + is equal to batch size (i.e. each microbatch contains exactly one + example). If `gradient_accumulation_steps` is greater than 1 and + `num_microbatches` is not `None` then the effective number of + microbatches is equal to + `num_microbatches * gradient_accumulation_steps`. + gradient_accumulation_steps: If greater than 1 then optimizer will be + accumulating gradients for this number of optimizer steps before + applying them to update model weights. If this argument is set to 1 + then updates will be applied on each optimizer step. *args: These will be passed on to the base class `__init__` method. **kwargs: These will be passed on to the base class `__init__` method. """ super(DPOptimizerClass, self).__init__(*args, **kwargs) + self.gradient_accumulation_steps = gradient_accumulation_steps self._l2_norm_clip = l2_norm_clip self._noise_multiplier = noise_multiplier self._num_microbatches = num_microbatches @@ -121,6 +165,69 @@ def make_keras_optimizer_class(cls): self._global_state = None self._was_dp_gradients_called = False + def _create_slots(self, var_list): + super(DPOptimizerClass, self)._create_slots(var_list) + if self.gradient_accumulation_steps > 1: + for var in var_list: + self.add_slot(var, 'grad_acc') + + def _prepare_local(self, var_device, var_dtype, apply_state): + super(DPOptimizerClass, self)._prepare_local( + var_device, var_dtype, apply_state) + if self.gradient_accumulation_steps > 1: + apply_update = tf.math.equal( + tf.math.floormod(self.iterations + 1, + self.gradient_accumulation_steps), + 0) + grad_scaler = tf.cast(1. / self.gradient_accumulation_steps, var_dtype) + apply_state[(var_device, var_dtype)].update( + { + 'apply_update': apply_update, + 'grad_scaler': grad_scaler + }) + + def _resource_apply_dense(self, grad, var, apply_state=None): + if self.gradient_accumulation_steps > 1: + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = ((apply_state or {}).get((var_device, var_dtype)) + or self._fallback_apply_state(var_device, var_dtype)) + grad_acc = self.get_slot(var, 'grad_acc') + + def _update_grad(): + apply_grad_op = super(DPOptimizerClass, self)._resource_apply_dense( + grad_acc + grad * coefficients['grad_scaler'], var, apply_state) + with tf.control_dependencies([apply_grad_op]): + return grad_acc.assign(tf.zeros_like(grad_acc), + use_locking=self._use_locking, + read_value=False) + + def _accumulate(): + return grad_acc.assign_add(grad * coefficients['grad_scaler'], + use_locking=self._use_locking, + read_value=False) + + return tf.cond(coefficients['apply_update'], _update_grad, _accumulate) + else: + return super(DPOptimizerClass, self)._resource_apply_dense( + grad, var, apply_state) + + def _resource_apply_sparse_duplicate_indices(self, *args, **kwargs): + if self.gradient_accumulation_steps > 1: + raise NotImplementedError( + 'Sparse gradients are not supported with large batch emulation.') + else: + return super(DPOptimizerClass, + self)._resource_apply_sparse_duplicate_indices( + *args, **kwargs) + + def _resource_apply_sparse(self, *args, **kwargs): + if self.gradient_accumulation_steps > 1: + raise NotImplementedError( + 'Sparse gradients are not supported with large batch emulation.') + else: + return super(DPOptimizerClass, self)._resource_apply_sparse( + *args, **kwargs) + def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None): """DP-SGD version of base class method.""" diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras_test.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras_test.py index 71b68c6..b4013bf 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras_test.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras_test.py @@ -394,6 +394,87 @@ class DPOptimizerGetGradientsTest(tf.test.TestCase, parameterized.TestCase): grads_and_vars = tf.Variable([0.0]) opt.apply_gradients(grads_and_vars) + def testLargeBatchEmulationNoNoise(self): + # Test for emulation of large batch training. + # It tests that updates are only done every gradient_accumulation_steps + # steps. + # In this test we set noise multiplier to zero and clipping norm to high + # value, such that optimizer essentially behave as non-DP optimizer. + # This makes easier to check how values of variables are changing. + # + # This test optimizes loss var0*x + var1 + # Gradients of this loss are computed as: + # d(loss)/d(var0) = x + # d(loss)/d(var1) = 1 + var0 = tf.Variable([[1.0, 2.0]], dtype=tf.float32) + var1 = tf.Variable([3.0], dtype=tf.float32) + x1 = tf.constant([[2.0, 0.0], [0.0, 1.0]], dtype=tf.float32) + loss1 = lambda: tf.matmul(var0, x1, transpose_b=True) + var1 + x2 = tf.constant([[4.0, 2.0], [2.0, 1.0]], dtype=tf.float32) + loss2 = lambda: tf.matmul(var0, x2, transpose_b=True) + var1 + + opt = dp_optimizer_keras.DPKerasSGDOptimizer( + l2_norm_clip=100.0, + noise_multiplier=0.0, + gradient_accumulation_steps=2, + learning_rate=1.0) + + # before any call to optimizer + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0) + self.assertAllCloseAccordingToType([3.0], var1) + + opt.minimize(loss1, [var0, var1]) + # After first call to optimizer values didn't change + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0) + self.assertAllCloseAccordingToType([3.0], var1) + + opt.minimize(loss2, [var0, var1]) + # After second call to optimizer updates were applied + self.assertAllCloseAccordingToType([[-1.0, 1.0]], var0) + self.assertAllCloseAccordingToType([2.0], var1) + + opt.minimize(loss2, [var0, var1]) + # After third call to optimizer values didn't change + self.assertAllCloseAccordingToType([[-1.0, 1.0]], var0) + self.assertAllCloseAccordingToType([2.0], var1) + + opt.minimize(loss2, [var0, var1]) + # After fourth call to optimizer updates were applied again + self.assertAllCloseAccordingToType([[-4.0, -0.5]], var0) + self.assertAllCloseAccordingToType([1.0], var1) + + @parameterized.named_parameters( + ('DPKerasSGDOptimizer 1', dp_optimizer_keras.DPKerasSGDOptimizer, 1), + ('DPKerasSGDOptimizer 2', dp_optimizer_keras.DPKerasSGDOptimizer, 2), + ('DPKerasSGDOptimizer 4', dp_optimizer_keras.DPKerasSGDOptimizer, 4), + ('DPKerasAdamOptimizer 2', + dp_optimizer_keras.DPKerasAdamOptimizer, 1), + ('DPKerasAdagradOptimizer 2', + dp_optimizer_keras.DPKerasAdagradOptimizer, 2), + ) + def testLargeBatchEmulation(self, cls, gradient_accumulation_steps): + # Tests various optimizers with large batch emulation. + # Uses clipping and noise, thus does not test specific values + # of the variables and only tests how often variables are updated. + var0 = tf.Variable([[1.0, 2.0]], dtype=tf.float32) + var1 = tf.Variable([3.0], dtype=tf.float32) + x = tf.constant([[2.0, 0.0], [0.0, 1.0]], dtype=tf.float32) + loss = lambda: tf.matmul(var0, x, transpose_b=True) + var1 + + opt = cls( + l2_norm_clip=100.0, + noise_multiplier=0.0, + gradient_accumulation_steps=gradient_accumulation_steps, + learning_rate=1.0) + + for _ in range(gradient_accumulation_steps): + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0) + self.assertAllCloseAccordingToType([3.0], var1) + opt.minimize(loss, [var0, var1]) + + self.assertNotAllClose([[1.0, 2.0]], var0) + self.assertNotAllClose([3.0], var1) + if __name__ == '__main__': tf.test.main() From 0d05f2eb18b9f17df78d12b4749f57682e54a51d Mon Sep 17 00:00:00 2001 From: Steve Chien Date: Fri, 10 Sep 2021 14:21:45 -0700 Subject: [PATCH 51/71] Fix link to API documentation in guide page. PiperOrigin-RevId: 396006636 --- g3doc/guide/_index.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/g3doc/guide/_index.yaml b/g3doc/guide/_index.yaml index 3720bdc..2f634d7 100644 --- a/g3doc/guide/_index.yaml +++ b/g3doc/guide/_index.yaml @@ -74,7 +74,7 @@ landing_page: The differentially private optimizers can be used in conjunction with high-level APIs that use the Optimizer class, especially Keras. Additionally, you can find differentially private implementations of some Keras models. All of the Optimizers and models can be found - in the API Documentation.

+ in the API Documentation.

- classname: devsite-landing-row-cards From b572707cfc578aa1347840e686378ac9b96bc905 Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Mon, 13 Sep 2021 17:47:50 -0700 Subject: [PATCH 52/71] Update reset and pre-process functions for tree aggregation queries. Minor comments update for adaptive clip query tests. PiperOrigin-RevId: 396483111 --- .../quantile_adaptive_clip_sum_query_test.py | 9 ++--- .../dp_query/quantile_estimator_query.py | 11 +++++- .../dp_query/quantile_estimator_query_test.py | 2 +- .../dp_query/tree_aggregation_query.py | 34 +++++++++++++++++-- 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py b/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py index 51da202..5979266 100644 --- a/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py @@ -230,7 +230,7 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase, ('start_high_arithmetic', False, False), ('start_high_geometric', False, True)) def test_adaptation_linspace(self, start_low, geometric): - # 100 records equally spaced from 0 to 10 in 0.1 increments. + # `num_records` records equally spaced from 0 to 10 in 0.1 increments. # Test that we converge to the correct median value and bounce around it. num_records = 21 records = [ @@ -262,9 +262,10 @@ class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase, ('start_high_arithmetic', False, False), ('start_high_geometric', False, True)) def test_adaptation_all_equal(self, start_low, geometric): - # 20 equal records. Test that we converge to that record and bounce around - # it. Unlike the linspace test, the quantile-matching objective is very - # sharp at the optimum so a decaying learning rate is necessary. + # `num_records` equal records. Test that we converge to that record and + # bounce around it. Unlike the linspace test, the quantile-matching + # objective is very sharp at the optimum so a decaying learning rate is + # necessary. num_records = 20 records = [tf.constant(5.0)] * num_records diff --git a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py index 9c90a03..4a453d6 100644 --- a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py +++ b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py @@ -74,6 +74,15 @@ class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery): updating is preferred for non-negative records like vector norms that could potentially be very large or very close to zero. """ + + if target_quantile < 0 or target_quantile > 1: + raise ValueError( + f'`target_quantile` must be between 0 and 1, got {target_quantile}.') + + if learning_rate < 0: + raise ValueError( + f'`learning_rate` must be non-negative, got {learning_rate}') + self._initial_estimate = initial_estimate self._target_quantile = target_quantile self._learning_rate = learning_rate @@ -208,7 +217,7 @@ class NoPrivacyQuantileEstimatorQuery(QuantileEstimatorQuery): return no_privacy_query.NoPrivacyAverageQuery() -class TreeAggregationQuantileEstimatorQuery(QuantileEstimatorQuery): +class TreeQuantileEstimatorQuery(QuantileEstimatorQuery): """Iterative process to estimate target quantile of a univariate distribution. Unlike the base class, this uses a `TreeResidualSumQuery` to estimate the diff --git a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query_test.py b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query_test.py index d349f56..e29fc4a 100644 --- a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query_test.py @@ -38,7 +38,7 @@ def _make_quantile_estimator_query(initial_estimate, tree_aggregation=False): if expected_num_records is not None: if tree_aggregation: - return quantile_estimator_query.TreeAggregationQuantileEstimatorQuery( + return quantile_estimator_query.TreeQuantileEstimatorQuery( initial_estimate, target_quantile, learning_rate, below_estimate_stddev, expected_num_records, geometric_update) else: diff --git a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py index 2752dba..70f9efa 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_aggregation_query.py @@ -360,6 +360,23 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): """Implements `tensorflow_privacy.DPQuery.derive_sample_params`.""" return global_state.clip_value + def preprocess_record_l2_impl(self, params, record): + """Clips the l2 norm, returning the clipped record and the l2 norm. + + Args: + params: The parameters for the sample. + record: The record to be processed. + + Returns: + A tuple (preprocessed_records, l2_norm) where `preprocessed_records` is + the structure of preprocessed tensors, and l2_norm is the total l2 norm + before clipping. + """ + l2_norm_clip = params + record_as_list = tf.nest.flatten(record) + clipped_as_list, norm = tf.clip_by_global_norm(record_as_list, l2_norm_clip) + return tf.nest.pack_sequence_as(record, clipped_as_list), norm + def preprocess_record(self, params, record): """Implements `tensorflow_privacy.DPQuery.preprocess_record`. @@ -405,7 +422,7 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): `get_noised_result` when the restarting condition is met. Args: - noised_results: Noised cumulative sum returned by `get_noised_result`. + noised_results: Noised results returned by `get_noised_result`. global_state: Updated global state returned by `get_noised_result`, which records noise for the conceptual cumulative sum of the current leaf node, and tree state for the next conceptual cumulative sum. @@ -420,6 +437,17 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): previous_tree_noise=self._zero_initial_noise(), tree_state=new_tree_state) + def reset_l2_clip_gaussian_noise(self, global_state, clip_norm, stddev): + noise_generator_state = global_state.tree_state.value_generator_state + assert isinstance(self._tree_aggregator.value_generator, + tree_aggregation.GaussianNoiseGenerator) + noise_generator_state = self._tree_aggregator.value_generator.make_state( + noise_generator_state.seeds, stddev) + new_tree_state = attr.evolve( + global_state.tree_state, value_generator_state=noise_generator_state) + return attr.evolve( + global_state, clip_value=clip_norm, tree_state=new_tree_state) + @classmethod def build_l2_gaussian_query(cls, clip_norm, @@ -442,8 +470,8 @@ class TreeResidualSumQuery(dp_query.SumAggregationDPQuery): aggregation algorithm based on the paper "Efficient Use of Differentially Private Binary Trees". """ - if clip_norm <= 0: - raise ValueError(f'`clip_norm` must be positive, got {clip_norm}.') + if clip_norm < 0: + raise ValueError(f'`clip_norm` must be non-negative, got {clip_norm}.') if noise_multiplier < 0: raise ValueError( From 388f46ffa0ee0d6d4573ad304b20c26afbb94fd9 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Mon, 20 Sep 2021 17:19:29 -0700 Subject: [PATCH 53/71] Adds RdpAccountant: implementation of PrivacyAccountant for RDP. Also adds UnsupportedEventError for handling unsupported events by PrivacyAccountant. PiperOrigin-RevId: 397878895 --- .../privacy/analysis/privacy_accountant.py | 25 +- .../analysis/privacy_accountant_test.py | 101 ++++ .../analysis/rdp_privacy_accountant.py | 572 ++++++++++++++++++ .../analysis/rdp_privacy_accountant_test.py | 307 ++++++++++ 4 files changed, 997 insertions(+), 8 deletions(-) create mode 100644 tensorflow_privacy/privacy/analysis/privacy_accountant_test.py create mode 100644 tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py create mode 100644 tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py diff --git a/tensorflow_privacy/privacy/analysis/privacy_accountant.py b/tensorflow_privacy/privacy/analysis/privacy_accountant.py index 9235156..578ef0a 100644 --- a/tensorflow_privacy/privacy/analysis/privacy_accountant.py +++ b/tensorflow_privacy/privacy/analysis/privacy_accountant.py @@ -16,8 +16,8 @@ import abc import enum -from tensorflow_privacy.privacy.dp_event import dp_event -from tensorflow_privacy.privacy.dp_event import dp_event_builder +from tensorflow_privacy.privacy.analysis import dp_event +from tensorflow_privacy.privacy.analysis import dp_event_builder class NeighboringRelation(enum.Enum): @@ -25,6 +25,10 @@ class NeighboringRelation(enum.Enum): REPLACE_ONE = 2 +class UnsupportedEventError(Exception): + """Exception to raise if _compose is called on unsupported event type.""" + + class PrivacyAccountant(metaclass=abc.ABCMeta): """Abstract base class for privacy accountants.""" @@ -43,7 +47,7 @@ class PrivacyAccountant(metaclass=abc.ABCMeta): return self._neighboring_relation @abc.abstractmethod - def is_supported(self, event: dp_event.DpEvent) -> bool: + def supports(self, event: dp_event.DpEvent) -> bool: """Checks whether the `DpEvent` can be processed by this accountant. In general this will require recursively checking the structure of the @@ -59,7 +63,7 @@ class PrivacyAccountant(metaclass=abc.ABCMeta): @abc.abstractmethod def _compose(self, event: dp_event.DpEvent, count: int = 1): - """Update internal state to account for application of a `DpEvent`. + """Updates internal state to account for application of a `DpEvent`. Calls to `get_epsilon` or `get_delta` after calling `_compose` will return values that account for this `DpEvent`. @@ -70,7 +74,7 @@ class PrivacyAccountant(metaclass=abc.ABCMeta): """ def compose(self, event: dp_event.DpEvent, count: int = 1): - """Update internal state to account for application of a `DpEvent`. + """Updates internal state to account for application of a `DpEvent`. Calls to `get_epsilon` or `get_delta` after calling `compose` will return values that account for this `DpEvent`. @@ -80,10 +84,15 @@ class PrivacyAccountant(metaclass=abc.ABCMeta): count: The number of times to compose the event. Raises: - TypeError: `event` is not supported by this `PrivacyAccountant`. + UnsupportedEventError: `event` is not supported by this + `PrivacyAccountant`. """ - if not self.is_supported(event): - raise TypeError(f'`DpEvent` {event} is of unsupported type.') + if not isinstance(event, dp_event.DpEvent): + raise TypeError(f'`event` must be `DpEvent`. Found {type(event)}.') + + if not self.supports(event): + raise UnsupportedEventError('Unsupported event: {event}.') + self._ledger.compose(event, count) self._compose(event, count) diff --git a/tensorflow_privacy/privacy/analysis/privacy_accountant_test.py b/tensorflow_privacy/privacy/analysis/privacy_accountant_test.py new file mode 100644 index 0000000..344f3e4 --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/privacy_accountant_test.py @@ -0,0 +1,101 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Abstract base class for tests of `PrivacyAccountant` classes. + +Checks that a class derived from `PrivacyAccountant` has the correct behavior +for standard `DpEvent` classes. +""" + +from typing import Collection + +from absl.testing import absltest + +from tensorflow_privacy.privacy.analysis import dp_event +from tensorflow_privacy.privacy.analysis import privacy_accountant + + +class PrivacyAccountantTest(absltest.TestCase): + + def _make_test_accountants( + self) -> Collection[privacy_accountant.PrivacyAccountant]: + """Makes a list of accountants to test. + + Subclasses should define this to return a list of accountants to be tested. + + Returns: + A list of accountants to test. + """ + return [] + + def test_make_test_accountants(self): + self.assertNotEmpty(self._make_test_accountants()) + + def test_unsupported(self): + + class UnknownDpEvent(dp_event.DpEvent): + pass + + for accountant in self._make_test_accountants(): + for unsupported in [dp_event.UnsupportedDpEvent(), UnknownDpEvent()]: + self.assertFalse(accountant.supports(unsupported)) + self.assertFalse( + accountant.supports(dp_event.SelfComposedDpEvent(unsupported, 10))) + self.assertFalse( + accountant.supports(dp_event.ComposedDpEvent([unsupported]))) + + def test_no_events(self): + for accountant in self._make_test_accountants(): + self.assertEqual(accountant.get_epsilon(1e-12), 0) + self.assertEqual(accountant.get_epsilon(0), 0) + self.assertEqual(accountant.get_epsilon(1), 0) + try: + self.assertEqual(accountant.get_delta(1e-12), 0) + self.assertEqual(accountant.get_delta(0), 0) + self.assertEqual(accountant.get_delta(float('inf')), 0) + except NotImplementedError: + # Implementing `get_delta` is optional. + pass + + def test_no_op(self): + for accountant in self._make_test_accountants(): + event = dp_event.NoOpDpEvent() + self.assertTrue(accountant.supports(event)) + accountant._compose(event) + self.assertEqual(accountant.get_epsilon(1e-12), 0) + self.assertEqual(accountant.get_epsilon(0), 0) + self.assertEqual(accountant.get_epsilon(1), 0) + try: + self.assertEqual(accountant.get_delta(1e-12), 0) + self.assertEqual(accountant.get_delta(0), 0) + self.assertEqual(accountant.get_delta(float('inf')), 0) + except NotImplementedError: + # Implementing `get_delta` is optional. + pass + + def test_non_private(self): + for accountant in self._make_test_accountants(): + event = dp_event.NonPrivateDpEvent() + self.assertTrue(accountant.supports(event)) + accountant._compose(event) + self.assertEqual(accountant.get_epsilon(0.99), float('inf')) + self.assertEqual(accountant.get_epsilon(0), float('inf')) + self.assertEqual(accountant.get_epsilon(1), float('inf')) + try: + self.assertEqual(accountant.get_delta(100), 1) + self.assertEqual(accountant.get_delta(0), 1) + self.assertEqual(accountant.get_delta(float('inf')), 1) + except NotImplementedError: + # Implementing `get_delta` is optional. + pass diff --git a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py new file mode 100644 index 0000000..2bbc327 --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py @@ -0,0 +1,572 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Privacy accountant that uses Renyi differential privacy.""" + +import math +from typing import Collection, Optional + +import numpy as np +from scipy import special +import six +from tensorflow_privacy.privacy.analysis import dp_event +from tensorflow_privacy.privacy.analysis import privacy_accountant + +NeighborRel = privacy_accountant.NeighboringRelation + + +def _log_add(logx, logy): + """Adds two numbers in the log space.""" + a, b = min(logx, logy), max(logx, logy) + if a == -np.inf: # adding 0 + return b + # Use exp(a) + exp(b) = (exp(a - b) + 1) * exp(b) + return math.log1p(math.exp(a - b)) + b # log1p(x) = log(x + 1) + + +def _log_sub(logx, logy): + """Subtracts two numbers in the log space. Answer must be non-negative.""" + if logx < logy: + raise ValueError('The result of subtraction must be non-negative.') + if logy == -np.inf: # subtracting 0 + return logx + if logx == logy: + return -np.inf # 0 is represented as -np.inf in the log space. + + try: + # Use exp(x) - exp(y) = (exp(x - y) - 1) * exp(y). + return math.log(math.expm1(logx - logy)) + logy # expm1(x) = exp(x) - 1 + except OverflowError: + return logx + + +def _log_sub_sign(logx, logy): + """Returns log(exp(logx)-exp(logy)) and its sign.""" + if logx > logy: + s = True + mag = logx + np.log(1 - np.exp(logy - logx)) + elif logx < logy: + s = False + mag = logy + np.log(1 - np.exp(logx - logy)) + else: + s = True + mag = -np.inf + + return s, mag + + +def _log_comb(n, k): + """Computes log of binomial coefficient.""" + return (special.gammaln(n + 1) - special.gammaln(k + 1) - + special.gammaln(n - k + 1)) + + +def _compute_log_a_int(q, sigma, alpha): + """Computes log(A_alpha) for integer alpha, 0 < q < 1.""" + assert isinstance(alpha, six.integer_types) + + # Initialize with 0 in the log space. + log_a = -np.inf + + for i in range(alpha + 1): + log_coef_i = ( + _log_comb(alpha, i) + i * math.log(q) + (alpha - i) * math.log(1 - q)) + + s = log_coef_i + (i * i - i) / (2 * (sigma**2)) + log_a = _log_add(log_a, s) + + return float(log_a) + + +def _compute_log_a_frac(q, sigma, alpha): + """Computes log(A_alpha) for fractional alpha, 0 < q < 1.""" + # The two parts of A_alpha, integrals over (-inf,z0] and [z0, +inf), are + # initialized to 0 in the log space: + log_a0, log_a1 = -np.inf, -np.inf + i = 0 + + z0 = sigma**2 * math.log(1 / q - 1) + .5 + + while True: # do ... until loop + coef = special.binom(alpha, i) + log_coef = math.log(abs(coef)) + j = alpha - i + + log_t0 = log_coef + i * math.log(q) + j * math.log(1 - q) + log_t1 = log_coef + j * math.log(q) + i * math.log(1 - q) + + log_e0 = math.log(.5) + _log_erfc((i - z0) / (math.sqrt(2) * sigma)) + log_e1 = math.log(.5) + _log_erfc((z0 - j) / (math.sqrt(2) * sigma)) + + log_s0 = log_t0 + (i * i - i) / (2 * (sigma**2)) + log_e0 + log_s1 = log_t1 + (j * j - j) / (2 * (sigma**2)) + log_e1 + + if coef > 0: + log_a0 = _log_add(log_a0, log_s0) + log_a1 = _log_add(log_a1, log_s1) + else: + log_a0 = _log_sub(log_a0, log_s0) + log_a1 = _log_sub(log_a1, log_s1) + + i += 1 + if max(log_s0, log_s1) < -30: + break + + return _log_add(log_a0, log_a1) + + +def _log_erfc(x): + """Computes log(erfc(x)) with high accuracy for large x.""" + try: + return math.log(2) + special.log_ndtr(-x * 2**.5) + except NameError: + # If log_ndtr is not available, approximate as follows: + r = special.erfc(x) + if r == 0.0: + # Using the Laurent series at infinity for the tail of the erfc function: + # erfc(x) ~ exp(-x^2-.5/x^2+.625/x^4)/(x*pi^.5) + # To verify in Mathematica: + # Series[Log[Erfc[x]] + Log[x] + Log[Pi]/2 + x^2, {x, Infinity, 6}] + return (-math.log(math.pi) / 2 - math.log(x) - x**2 - .5 * x**-2 + + .625 * x**-4 - 37. / 24. * x**-6 + 353. / 64. * x**-8) + else: + return math.log(r) + + +def _compute_delta(orders, rdp, epsilon): + """Compute delta given a list of RDP values and target epsilon. + + Args: + orders: An array of orders. + rdp: An array of RDP guarantees. + epsilon: The target epsilon. + + Returns: + Optimal delta. + + Raises: + ValueError: If input is malformed. + + """ + if epsilon < 0: + raise ValueError(f'Epsilon cannot be negative. Found {epsilon}.') + if len(orders) != len(rdp): + raise ValueError('Input lists must have the same length.') + + # Basic bound (see https://arxiv.org/abs/1702.07476 Proposition 3 in v3): + # delta = min( np.exp((rdp - epsilon) * (orders - 1)) ) + + # Improved bound from https://arxiv.org/abs/2004.00010 Proposition 12 (in v4): + logdeltas = [] # work in log space to avoid overflows + for (a, r) in zip(orders, rdp): + if a < 1: + raise ValueError(f'Renyi divergence order must be at least 1. Found {a}.') + if r < 0: + raise ValueError(f'Renyi divergence cannot be negative. Found {r}.') + # For small alpha, we are better of with bound via KL divergence: + # delta <= sqrt(1-exp(-KL)). + # Take a min of the two bounds. + if r == 0: + logdelta = -np.inf + else: + logdelta = 0.5 * math.log1p(-math.exp(-r)) + if a > 1.01: + # This bound is not numerically stable as alpha->1. + # Thus we have a min value for alpha. + # The bound is also not useful for small alpha, so doesn't matter. + rdp_bound = (a - 1) * (r - epsilon + math.log1p(-1 / a)) - math.log(a) + logdelta = min(logdelta, rdp_bound) + + logdeltas.append(logdelta) + + return min(math.exp(np.min(logdeltas)), 1.) + + +def _compute_epsilon(orders, rdp, delta): + """Compute epsilon given a list of RDP values and target delta. + + Args: + orders: An array of orders. + rdp: An array of RDP guarantees. + delta: The target delta. Must be >= 0. + + Returns: + Optimal epsilon. + + Raises: + ValueError: If input is malformed. + + """ + if delta < 0: + raise ValueError(f'Delta cannot be negative. Found {delta}.') + + if delta == 0: + if all(r == 0 for r in rdp): + return 0 + else: + return np.inf + + if len(orders) != len(rdp): + raise ValueError('Input lists must have the same length.') + + # Basic bound (see https://arxiv.org/abs/1702.07476 Proposition 3 in v3): + # epsilon = min( rdp - math.log(delta) / (orders - 1) ) + + # Improved bound from https://arxiv.org/abs/2004.00010 Proposition 12 (in v4). + # Also appears in https://arxiv.org/abs/2001.05990 Equation 20 (in v1). + eps = [] + for (a, r) in zip(orders, rdp): + if a < 1: + raise ValueError(f'Renyi divergence order must be at least 1. Found {a}.') + if r < 0: + raise ValueError(f'Renyi divergence cannot be negative. Found {r}.') + + if delta**2 + math.expm1(-r) > 0: + # In this case, we can simply bound via KL divergence: + # delta <= sqrt(1-exp(-KL)). + epsilon = 0 # No need to try further computation if we have epsilon = 0. + elif a > 1.01: + # This bound is not numerically stable as alpha->1. + # Thus we have a min value of alpha. + # The bound is also not useful for small alpha, so doesn't matter. + epsilon = r + math.log1p(-1 / a) - math.log(delta * a) / (a - 1) + else: + # In this case we can't do anything. E.g., asking for delta = 0. + epsilon = np.inf + eps.append(epsilon) + + return max(0, np.min(eps)) + + +def _stable_inplace_diff_in_log(vec, signs, n=-1): + """Replaces the first n-1 dims of vec with the log of abs difference operator. + + Args: + vec: numpy array of floats with size larger than 'n' + signs: Optional numpy array of bools with the same size as vec in case one + needs to compute partial differences vec and signs jointly describe a + vector of real numbers' sign and abs in log scale. + n: Optonal upper bound on number of differences to compute. If negative, all + differences are computed. + + Returns: + The first n-1 dimension of vec and signs will store the log-abs and sign of + the difference. + + Raises: + ValueError: If input is malformed. + """ + + assert vec.shape == signs.shape + if n < 0: + n = np.max(vec.shape) - 1 + else: + assert np.max(vec.shape) >= n + 1 + for j in range(0, n, 1): + if signs[j] == signs[j + 1]: # When the signs are the same + # if the signs are both positive, then we can just use the standard one + signs[j], vec[j] = _log_sub_sign(vec[j + 1], vec[j]) + # otherwise, we do that but toggle the sign + if not signs[j + 1]: + signs[j] = ~signs[j] + else: # When the signs are different. + vec[j] = _log_add(vec[j], vec[j + 1]) + signs[j] = signs[j + 1] + + +def _get_forward_diffs(fun, n): + """Computes up to nth order forward difference evaluated at 0. + + See Theorem 27 of https://arxiv.org/pdf/1808.00087.pdf + + Args: + fun: Function to compute forward differences of. + n: Number of differences to compute. + + Returns: + Pair (deltas, signs_deltas) of the log deltas and their signs. + """ + func_vec = np.zeros(n + 3) + signs_func_vec = np.ones(n + 3, dtype=bool) + + # ith coordinate of deltas stores log(abs(ith order discrete derivative)) + deltas = np.zeros(n + 2) + signs_deltas = np.zeros(n + 2, dtype=bool) + for i in range(1, n + 3, 1): + func_vec[i] = fun(1.0 * (i - 1)) + for i in range(0, n + 2, 1): + # Diff in log scale + _stable_inplace_diff_in_log(func_vec, signs_func_vec, n=n + 2 - i) + deltas[i] = func_vec[0] + signs_deltas[i] = signs_func_vec[0] + return deltas, signs_deltas + + +def _compute_log_a(q, noise_multiplier, alpha): + if float(alpha).is_integer(): + return _compute_log_a_int(q, noise_multiplier, int(alpha)) + else: + return _compute_log_a_frac(q, noise_multiplier, alpha) + + +def _compute_rdp_poisson_subsampled_gaussian(q, noise_multiplier, orders): + """Computes RDP of the Poisson sampled Gaussian mechanism. + + Args: + q: The sampling rate. + noise_multiplier: The ratio of the standard deviation of the Gaussian noise + to the l2-sensitivity of the function to which it is added. + orders: An array of RDP orders. + + Returns: + The RDPs at all orders. Can be `np.inf`. + """ + + def compute_one_order(q, alpha): + if np.isinf(alpha) or noise_multiplier == 0: + return np.inf + + if q == 0: + return 0 + + if q == 1.: + return alpha / (2 * noise_multiplier**2) + + return _compute_log_a(q, noise_multiplier, alpha) / (alpha - 1) + + return np.array([compute_one_order(q, order) for order in orders]) + + +def _compute_rdp_sample_wor_gaussian(q, noise_multiplier, orders): + """Computes RDP of Gaussian mechanism using sampling without replacement. + + This function applies to the following schemes: + 1. Sampling w/o replacement: Sample a uniformly random subset of size m = q*n. + 2. ``Replace one data point'' version of differential privacy, i.e., n is + considered public information. + + Reference: Theorem 27 of https://arxiv.org/pdf/1808.00087.pdf (A strengthened + version applies subsampled-Gaussian mechanism.) + - Wang, Balle, Kasiviswanathan. "Subsampled Renyi Differential Privacy and + Analytical Moments Accountant." AISTATS'2019. + + Args: + q: The sampling proportion = m / n. Assume m is an integer <= n. + noise_multiplier: The ratio of the standard deviation of the Gaussian noise + to the l2-sensitivity of the function to which it is added. + orders: An array of RDP orders. + + Returns: + The RDPs at all orders, can be np.inf. + """ + return np.array([ + _compute_rdp_sample_wor_gaussian_scalar(q, noise_multiplier, order) + for order in orders + ]) + + +def _compute_rdp_sample_wor_gaussian_scalar(q, sigma, alpha): + """Compute RDP of the Sampled Gaussian mechanism at order alpha. + + Args: + q: The sampling proportion = m / n. Assume m is an integer <= n. + sigma: The std of the additive Gaussian noise. + alpha: The order at which RDP is computed. + + Returns: + RDP at alpha, can be np.inf. + """ + + assert (q <= 1) and (q >= 0) and (alpha >= 1) + + if q == 0: + return 0 + + if q == 1.: + return alpha / (2 * sigma**2) + + if np.isinf(alpha): + return np.inf + + if float(alpha).is_integer(): + return _compute_rdp_sample_wor_gaussian_int(q, sigma, int(alpha)) / ( + alpha - 1) + else: + # When alpha not an integer, we apply Corollary 10 of [WBK19] to interpolate + # the CGF and obtain an upper bound + alpha_f = math.floor(alpha) + alpha_c = math.ceil(alpha) + + x = _compute_rdp_sample_wor_gaussian_int(q, sigma, alpha_f) + y = _compute_rdp_sample_wor_gaussian_int(q, sigma, alpha_c) + t = alpha - alpha_f + return ((1 - t) * x + t * y) / (alpha - 1) + + +def _compute_rdp_sample_wor_gaussian_int(q, sigma, alpha): + """Compute log(A_alpha) for integer alpha, subsampling without replacement. + + When alpha is smaller than max_alpha, compute the bound Theorem 27 exactly, + otherwise compute the bound with Stirling approximation. + + Args: + q: The sampling proportion = m / n. Assume m is an integer <= n. + sigma: The std of the additive Gaussian noise. + alpha: The order at which RDP is computed. + + Returns: + RDP at alpha, can be np.inf. + """ + + max_alpha = 256 + assert isinstance(alpha, six.integer_types) + + if np.isinf(alpha): + return np.inf + elif alpha == 1: + return 0 + + def cgf(x): + # Return rdp(x+1)*x, the rdp of Gaussian mechanism is alpha/(2*sigma**2) + return x * 1.0 * (x + 1) / (2.0 * sigma**2) + + def func(x): + # Return the rdp of Gaussian mechanism + return 1.0 * x / (2.0 * sigma**2) + + # Initialize with 1 in the log space. + log_a = 0 + # Calculates the log term when alpha = 2 + log_f2m1 = func(2.0) + np.log(1 - np.exp(-func(2.0))) + if alpha <= max_alpha: + # We need forward differences of exp(cgf) + # The following line is the numerically stable way of implementing it. + # The output is in polar form with logarithmic magnitude + deltas, _ = _get_forward_diffs(cgf, alpha) + # Compute the bound exactly requires book keeping of O(alpha**2) + + for i in range(2, alpha + 1): + if i == 2: + s = 2 * np.log(q) + _log_comb(alpha, 2) + np.minimum( + np.log(4) + log_f2m1, + func(2.0) + np.log(2)) + elif i > 2: + delta_lo = deltas[int(2 * np.floor(i / 2.0)) - 1] + delta_hi = deltas[int(2 * np.ceil(i / 2.0)) - 1] + s = np.log(4) + 0.5 * (delta_lo + delta_hi) + s = np.minimum(s, np.log(2) + cgf(i - 1)) + s += i * np.log(q) + _log_comb(alpha, i) + log_a = _log_add(log_a, s) + return float(log_a) + else: + # Compute the bound with stirling approximation. Everything is O(x) now. + for i in range(2, alpha + 1): + if i == 2: + s = 2 * np.log(q) + _log_comb(alpha, 2) + np.minimum( + np.log(4) + log_f2m1, + func(2.0) + np.log(2)) + else: + s = np.log(2) + cgf(i - 1) + i * np.log(q) + _log_comb(alpha, i) + log_a = _log_add(log_a, s) + + return log_a + + +class RdpAccountant(privacy_accountant.PrivacyAccountant): + """Privacy accountant that uses Renyi differential privacy.""" + + def __init__( + self, + orders: Optional[Collection[float]] = None, + neighboring_relation: NeighborRel = NeighborRel.ADD_OR_REMOVE_ONE, + ): + super(RdpAccountant, self).__init__(neighboring_relation) + if orders is None: + # Default orders chosen to give good coverage for Gaussian mechanism in + # the privacy regime of interest. In the future, more orders might be + # added, in particular, fractional orders between 1.0 and 10.0 or so. + orders = [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 20, 24, 28, 32, 48, 64, 128, + 256, 512, 1024 + ] + self._orders = np.array(orders) + self._rdp = np.zeros_like(orders, dtype=np.float64) + + def supports(self, event: dp_event.DpEvent) -> bool: + return self._maybe_compose(event, 0, False) + + def _compose(self, event: dp_event.DpEvent, count: int = 1): + self._maybe_compose(event, count, True) + + def _maybe_compose(self, event: dp_event.DpEvent, count: int, + do_compose: bool) -> bool: + """Traverses `event` and performs composition if `do_compose` is True. + + If `do_compose` is False, can be used to check whether composition is + supported. + + Args: + event: A `DpEvent` to process. + count: The number of times to compose the event. + do_compose: Whether to actually perform the composition. + + Returns: + True if event is supported, otherwise False. + """ + + if isinstance(event, dp_event.NoOpDpEvent): + return True + elif isinstance(event, dp_event.NonPrivateDpEvent): + if do_compose: + self._rdp += np.inf + return True + elif isinstance(event, dp_event.SelfComposedDpEvent): + return self._maybe_compose(event.event, event.count * count, do_compose) + elif isinstance(event, dp_event.ComposedDpEvent): + return all( + self._maybe_compose(e, count, do_compose) for e in event.events) + elif isinstance(event, dp_event.GaussianDpEvent): + if do_compose: + self._rdp += count * _compute_rdp_poisson_subsampled_gaussian( + q=1.0, noise_multiplier=event.noise_multiplier, orders=self._orders) + return True + elif isinstance(event, dp_event.PoissonSampledDpEvent): + if (self._neighboring_relation is not NeighborRel.ADD_OR_REMOVE_ONE or + not isinstance(event.event, dp_event.GaussianDpEvent)): + return False + if do_compose: + self._rdp += count * _compute_rdp_poisson_subsampled_gaussian( + q=event.sampling_probability, + noise_multiplier=event.event.noise_multiplier, + orders=self._orders) + return True + elif isinstance(event, dp_event.FixedBatchSampledWorDpEvent): + if (self._neighboring_relation is not NeighborRel.REPLACE_ONE or + not isinstance(event.event, dp_event.GaussianDpEvent)): + return False + if do_compose: + self._rdp += count * _compute_rdp_sample_wor_gaussian( + q=event.batch_size / event.dataset_size, + noise_multiplier=event.event.noise_multiplier, + orders=self._orders) + return True + else: + # Unsupported event (including `UnsupportedDpEvent`). + return False + + def get_epsilon(self, target_delta: float) -> float: + return _compute_epsilon(self._orders, self._rdp, target_delta) + + def get_delta(self, target_epsilon: float) -> float: + return _compute_delta(self._orders, self._rdp, target_epsilon) diff --git a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py new file mode 100644 index 0000000..817d41c --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py @@ -0,0 +1,307 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for rdp_privacy_accountant.""" + +import math +import sys + +from absl.testing import absltest +from absl.testing import parameterized +import mpmath +import numpy as np + +from tensorflow_privacy.privacy.analysis import dp_event +from tensorflow_privacy.privacy.analysis import privacy_accountant +from tensorflow_privacy.privacy.analysis import privacy_accountant_test +from tensorflow_privacy.privacy.analysis import rdp_privacy_accountant + + +def _get_test_rdp(event, count=1): + accountant = rdp_privacy_accountant.RdpAccountant(orders=[2.71828]) + accountant.compose(event, count) + return accountant._rdp[0] + + +def _log_float_mp(x): + # Convert multi-precision input to float log space. + if x >= sys.float_info.min: + return float(mpmath.log(x)) + else: + return -np.inf + + +def _compute_a_mp(sigma, q, alpha): + """Compute A_alpha for arbitrary alpha by numerical integration.""" + + def mu0(x): + return mpmath.npdf(x, mu=0, sigma=sigma) + + def _mu_over_mu0(x, q, sigma): + return (1 - q) + q * mpmath.exp((2 * x - 1) / (2 * sigma**2)) + + def a_alpha_fn(z): + return mu0(z) * _mu_over_mu0(z, q, sigma)**alpha + + bounds = (-mpmath.inf, mpmath.inf) + a_alpha, _ = mpmath.quad(a_alpha_fn, bounds, error=True, maxdegree=8) + return a_alpha + + +class RdpPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest, + parameterized.TestCase): + + def _make_test_accountants(self): + return [ + rdp_privacy_accountant.RdpAccountant( + [2.0], privacy_accountant.NeighboringRelation.ADD_OR_REMOVE_ONE), + rdp_privacy_accountant.RdpAccountant( + [2.0], privacy_accountant.NeighboringRelation.REPLACE_ONE) + ] + + def test_supports(self): + aor_accountant = rdp_privacy_accountant.RdpAccountant( + [2.0], privacy_accountant.NeighboringRelation.ADD_OR_REMOVE_ONE) + ro_accountant = rdp_privacy_accountant.RdpAccountant( + [2.0], privacy_accountant.NeighboringRelation.REPLACE_ONE) + + event = dp_event.GaussianDpEvent(1.0) + self.assertTrue(aor_accountant.supports(event)) + self.assertTrue(ro_accountant.supports(event)) + + event = dp_event.SelfComposedDpEvent(dp_event.GaussianDpEvent(1.0), 6) + self.assertTrue(aor_accountant.supports(event)) + self.assertTrue(ro_accountant.supports(event)) + + event = dp_event.ComposedDpEvent( + [dp_event.GaussianDpEvent(1.0), + dp_event.GaussianDpEvent(2.0)]) + self.assertTrue(aor_accountant.supports(event)) + self.assertTrue(ro_accountant.supports(event)) + + event = dp_event.PoissonSampledDpEvent(0.1, dp_event.GaussianDpEvent(1.0)) + self.assertTrue(aor_accountant.supports(event)) + self.assertFalse(ro_accountant.supports(event)) + + event = dp_event.FixedBatchSampledWorDpEvent(1000, 10, + dp_event.GaussianDpEvent(1.0)) + self.assertFalse(aor_accountant.supports(event)) + self.assertTrue(ro_accountant.supports(event)) + + event = dp_event.FixedBatchSampledWrDpEvent(1000, 10, + dp_event.GaussianDpEvent(1.0)) + self.assertFalse(aor_accountant.supports(event)) + self.assertFalse(ro_accountant.supports(event)) + + def test_rdp_composition(self): + base_event = dp_event.GaussianDpEvent(3.14159) + base_rdp = _get_test_rdp(base_event) + + rdp_with_count = _get_test_rdp(base_event, count=6) + self.assertAlmostEqual(rdp_with_count, base_rdp * 6) + + rdp_with_self_compose = _get_test_rdp( + dp_event.SelfComposedDpEvent(base_event, 6)) + self.assertAlmostEqual(rdp_with_self_compose, base_rdp * 6) + + rdp_with_self_compose_and_count = _get_test_rdp( + dp_event.SelfComposedDpEvent(base_event, 2), count=3) + self.assertAlmostEqual(rdp_with_self_compose_and_count, base_rdp * 6) + + rdp_with_compose = _get_test_rdp(dp_event.ComposedDpEvent([base_event] * 6)) + self.assertAlmostEqual(rdp_with_compose, base_rdp * 6) + + rdp_with_compose_and_self_compose = _get_test_rdp( + dp_event.ComposedDpEvent([ + dp_event.SelfComposedDpEvent(base_event, 1), + dp_event.SelfComposedDpEvent(base_event, 2), + dp_event.SelfComposedDpEvent(base_event, 3) + ])) + self.assertAlmostEqual(rdp_with_compose_and_self_compose, base_rdp * 6) + + base_event_2 = dp_event.GaussianDpEvent(1.61803) + base_rdp_2 = _get_test_rdp(base_event_2) + rdp_with_heterogeneous_compose = _get_test_rdp( + dp_event.ComposedDpEvent([base_event, base_event_2])) + self.assertAlmostEqual(rdp_with_heterogeneous_compose, + base_rdp + base_rdp_2) + + def test_zero_poisson_sample(self): + accountant = rdp_privacy_accountant.RdpAccountant([3.14159]) + accountant.compose( + dp_event.PoissonSampledDpEvent(0, dp_event.GaussianDpEvent(1.0))) + self.assertEqual(accountant.get_epsilon(1e-10), 0) + self.assertEqual(accountant.get_delta(1e-10), 0) + + def test_zero_fixed_batch_sample(self): + accountant = rdp_privacy_accountant.RdpAccountant( + [3.14159], privacy_accountant.NeighboringRelation.REPLACE_ONE) + accountant.compose( + dp_event.FixedBatchSampledWorDpEvent(1000, 0, + dp_event.GaussianDpEvent(1.0))) + self.assertEqual(accountant.get_epsilon(1e-10), 0) + self.assertEqual(accountant.get_delta(1e-10), 0) + + def test_epsilon_non_private_gaussian(self): + accountant = rdp_privacy_accountant.RdpAccountant([3.14159]) + accountant.compose(dp_event.GaussianDpEvent(0)) + self.assertEqual(accountant.get_epsilon(1e-1), np.inf) + + def test_compute_rdp_gaussian(self): + alpha = 3.14159 + sigma = 2.71828 + event = dp_event.GaussianDpEvent(sigma) + accountant = rdp_privacy_accountant.RdpAccountant(orders=[alpha]) + accountant.compose(event) + self.assertAlmostEqual(accountant._rdp[0], alpha / (2 * sigma**2)) + + def test_compute_rdp_poisson_sampled_gaussian(self): + orders = [1.5, 2.5, 5, 50, 100, np.inf] + noise_multiplier = 2.5 + sampling_probability = 0.01 + count = 50 + event = dp_event.SelfComposedDpEvent( + dp_event.PoissonSampledDpEvent( + sampling_probability, dp_event.GaussianDpEvent(noise_multiplier)), + count) + accountant = rdp_privacy_accountant.RdpAccountant(orders=orders) + accountant.compose(event) + self.assertTrue( + np.allclose( + accountant._rdp, [ + 6.5007e-04, 1.0854e-03, 2.1808e-03, 2.3846e-02, 1.6742e+02, + np.inf + ], + rtol=1e-4)) + + def test_compute_epsilon_delta_pure_dp(self): + orders = range(2, 33) + rdp = [1.1 for o in orders] # Constant corresponds to pure DP. + + epsilon = rdp_privacy_accountant._compute_epsilon(orders, rdp, delta=1e-5) + # Compare with epsilon computed by hand. + self.assertAlmostEqual(epsilon, 1.32783806176) + + delta = rdp_privacy_accountant._compute_delta( + orders, rdp, epsilon=1.32783806176) + self.assertAlmostEqual(delta, 1e-5) + + def test_compute_epsilon_delta_gaussian(self): + orders = [0.001 * i for i in range(1000, 100000)] + + # noise multiplier is chosen to obtain exactly (1,1e-6)-DP. + rdp = rdp_privacy_accountant._compute_rdp_poisson_subsampled_gaussian( + 1, 4.530877117, orders) + + eps = rdp_privacy_accountant._compute_epsilon(orders, rdp, delta=1e-6) + self.assertAlmostEqual(eps, 1) + + delta = rdp_privacy_accountant._compute_delta(orders, rdp, epsilon=1) + self.assertAlmostEqual(delta, 1e-6) + + params = ({ + 'q': 1e-7, + 'sigma': .1, + 'order': 1.01 + }, { + 'q': 1e-6, + 'sigma': .1, + 'order': 256 + }, { + 'q': 1e-5, + 'sigma': .1, + 'order': 256.1 + }, { + 'q': 1e-6, + 'sigma': 1, + 'order': 27 + }, { + 'q': 1e-4, + 'sigma': 1., + 'order': 1.5 + }, { + 'q': 1e-3, + 'sigma': 1., + 'order': 2 + }, { + 'q': .01, + 'sigma': 10, + 'order': 20 + }, { + 'q': .1, + 'sigma': 100, + 'order': 20.5 + }, { + 'q': .99, + 'sigma': .1, + 'order': 256 + }, { + 'q': .999, + 'sigma': 100, + 'order': 256.1 + }) + + # pylint:disable=undefined-variable + @parameterized.parameters(p for p in params) + def test_compute_log_a_equals_mp(self, q, sigma, order): + # Compare the cheap computation of log(A) with an expensive, multi-precision + # computation. + log_a = rdp_privacy_accountant._compute_log_a(q, sigma, order) + log_a_mp = _log_float_mp(_compute_a_mp(sigma, q, order)) + np.testing.assert_allclose(log_a, log_a_mp, rtol=1e-4) + + def test_delta_bounds_gaussian(self): + # Compare the optimal bound for Gaussian with the one derived from RDP. + # Also compare the RDP upper bound with the "standard" upper bound. + orders = [0.1 * x for x in range(10, 505)] + eps_vec = [0.1 * x for x in range(500)] + rdp = rdp_privacy_accountant._compute_rdp_poisson_subsampled_gaussian( + 1, 1, orders) + for eps in eps_vec: + delta = rdp_privacy_accountant._compute_delta(orders, rdp, epsilon=eps) + # For comparison, we compute the optimal guarantee for Gaussian + # using https://arxiv.org/abs/1805.06530 Theorem 8 (in v2). + delta0 = math.erfc((eps - .5) / math.sqrt(2)) / 2 + delta0 = delta0 - math.exp(eps) * math.erfc((eps + .5) / math.sqrt(2)) / 2 + self.assertLessEqual(delta0, delta + 1e-300) # need tolerance 10^-300 + + # Compute the "standard" upper bound, which should be an upper bound. + # Note, if orders is too sparse, this will NOT be an upper bound. + if eps >= 0.5: + delta1 = math.exp(-0.5 * (eps - 0.5)**2) + else: + delta1 = 1 + self.assertLessEqual(delta, delta1 + 1e-300) + + def test_epsilon_delta_consistency(self): + orders = range(2, 50) # Large range of orders (helps test for overflows). + for q in [0, 0.01, 0.1, 0.8, 1.]: + for multiplier in [0.0, 0.1, 1., 10., 100.]: + event = dp_event.PoissonSampledDpEvent( + q, dp_event.GaussianDpEvent(multiplier)) + accountant = rdp_privacy_accountant.RdpAccountant(orders) + accountant.compose(event) + for delta in [.99, .9, .1, .01, 1e-3, 1e-5, 1e-9, 1e-12]: + epsilon = accountant.get_epsilon(delta) + delta2 = accountant.get_delta(epsilon) + if np.isposinf(epsilon): + self.assertEqual(delta2, 1.0) + elif epsilon == 0: + self.assertLessEqual(delta2, delta) + else: + self.assertAlmostEqual(delta, delta2) + + +if __name__ == '__main__': + absltest.main() From c39d628e1654a7a7623b0a4be67cbf11f5b01a0e Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Mon, 20 Sep 2021 22:38:23 -0700 Subject: [PATCH 54/71] Change `PeriodicRoundRestartIndicator` to return the first `True` at a given number of calls. Also update the code style to be more compatible with graph mode and TFF. PiperOrigin-RevId: 397918733 --- .../privacy/dp_query/restart_query.py | 21 ++++++++++++++---- .../privacy/dp_query/restart_query_test.py | 22 ++++++++++++++++++- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/restart_query.py b/tensorflow_privacy/privacy/dp_query/restart_query.py index 5716b0b..8d1ff8c 100644 --- a/tensorflow_privacy/privacy/dp_query/restart_query.py +++ b/tensorflow_privacy/privacy/dp_query/restart_query.py @@ -17,6 +17,7 @@ This query is used to compose with a DPQuery that has `reset_state` function. """ import abc import collections +from typing import Optional import tensorflow as tf @@ -60,17 +61,26 @@ class PeriodicRoundRestartIndicator(RestartIndicator): The indicator will maintain an internal counter as state. """ - def __init__(self, frequency: int): + def __init__(self, frequency: int, warmup: Optional[int] = None): """Construct the `PeriodicRoundRestartIndicator`. Args: frequency: The `next` function will return `True` every `frequency` number of `next` calls. + warmup: The first `True` will be returned at the `warmup` times call of + `next`. """ if frequency < 1: - raise ValueError('Restart frequency should be equal or larger than 1 ' + raise ValueError('Restart frequency should be equal or larger than 1, ' f'got {frequency}') - self.frequency = tf.constant(frequency, tf.int32) + if warmup is None: + warmup = 0 + elif warmup <= 0 or warmup >= frequency: + raise ValueError( + f'Warmup should be between 1 and `frequency-1={frequency-1}`, ' + f'got {warmup}') + self.frequency = frequency + self.warmup = warmup def initialize(self): """Returns initialized state of 0 for `PeriodicRoundRestartIndicator`.""" @@ -86,8 +96,10 @@ class PeriodicRoundRestartIndicator(RestartIndicator): A pair (value, new_state) where value is the bool indicator and new_state of `state+1`. """ + frequency = tf.constant(self.frequency, tf.int32) + warmup = tf.constant(self.warmup, tf.int32) state = state + tf.constant(1, tf.int32) - flag = state % self.frequency == 0 + flag = tf.math.equal(tf.math.floormod(state, frequency), warmup) return flag, state @@ -132,6 +144,7 @@ class RestartQuery(dp_query.SumAggregationDPQuery): """Implements `tensorflow_privacy.DPQuery.preprocess_record`.""" return self._inner_query.preprocess_record(params, record) + @tf.function def get_noised_result(self, sample_state, global_state): """Implements `tensorflow_privacy.DPQuery.get_noised_result`.""" noised_results, inner_state, event = self._inner_query.get_noised_result( diff --git a/tensorflow_privacy/privacy/dp_query/restart_query_test.py b/tensorflow_privacy/privacy/dp_query/restart_query_test.py index f3a0276..bf6c374 100644 --- a/tensorflow_privacy/privacy/dp_query/restart_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/restart_query_test.py @@ -27,6 +27,15 @@ class RestartIndicatorTest(tf.test.TestCase, parameterized.TestCase): ValueError, 'Restart frequency should be equal or larger than 1'): restart_query.PeriodicRoundRestartIndicator(frequency) + @parameterized.named_parameters(('zero', 0), ('negative', -1), ('equal', 2), + ('large', 3)) + def test_round_raise_warmup(self, warmup): + frequency = 2 + with self.assertRaisesRegex( + ValueError, + f'Warmup should be between 1 and `frequency-1={frequency-1}`'): + restart_query.PeriodicRoundRestartIndicator(frequency, warmup) + @parameterized.named_parameters(('f1', 1), ('f2', 2), ('f4', 4), ('f5', 5)) def test_round_indicator(self, frequency): total_steps = 20 @@ -39,6 +48,18 @@ class RestartIndicatorTest(tf.test.TestCase, parameterized.TestCase): else: self.assertFalse(flag) + @parameterized.named_parameters(('f2', 2, 1), ('f4', 4, 3), ('f5', 5, 2)) + def test_round_indicator_warmup(self, frequency, warmup): + total_steps = 20 + indicator = restart_query.PeriodicRoundRestartIndicator(frequency, warmup) + state = indicator.initialize() + for i in range(total_steps): + flag, state = indicator.next(state) + if i % frequency == warmup - 1: + self.assertTrue(flag) + else: + self.assertFalse(flag) + def _get_l2_clip_fn(): @@ -118,7 +139,6 @@ class RestartQueryTest(tf.test.TestCase, parameterized.TestCase): expected = scalar_value + tree_node_value * ( bin(i % frequency + 1)[2:].count('1') - bin(i % frequency)[2:].count('1')) - print(i, query_result, expected) self.assertEqual(query_result, expected) From 67a7096d529a8d43191defa5183670279537c919 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Wed, 22 Sep 2021 11:30:33 -0700 Subject: [PATCH 55/71] ComposedDpEvent can be a list of any DpEvent, not only SelfComposedDpEvent. For example there is no reason we shouldn't be able to compose a single GaussianDpEvent and a single LaplaceDpEvent without having to wrap them in SelfComposedDpEvent with count == 1. PiperOrigin-RevId: 398288473 --- tensorflow_privacy/privacy/analysis/dp_event.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_privacy/privacy/analysis/dp_event.py b/tensorflow_privacy/privacy/analysis/dp_event.py index ac45a64..b40b823 100644 --- a/tensorflow_privacy/privacy/analysis/dp_event.py +++ b/tensorflow_privacy/privacy/analysis/dp_event.py @@ -74,7 +74,7 @@ class SelfComposedDpEvent(DpEvent): @attr.s(frozen=True, slots=True, auto_attribs=True) class ComposedDpEvent(DpEvent): """A series of composed mechanisms.""" - events: List[SelfComposedDpEvent] + events: List[DpEvent] @attr.s(frozen=True, slots=True, auto_attribs=True) From 39c75f62af7bc424bfcd0d1b7cff3a967880f96a Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Wed, 22 Sep 2021 16:37:21 -0700 Subject: [PATCH 56/71] DpEventBuilder tracks the order of events, instead of just maintaining a multiset. Existing approaches to accounting are generally agnostic to the order of composition, even when the composition is adaptive. But in principle it is possible for an accountant to require such information, so we had better not throw it away. Note that `ComposedDpEvent` is now treated like any other `DpEvent`, not taken apart and the components added separately as it was. The reason for this is that a common pattern may be to compose a series of `ComposedDpEvent`s that have identical substructure. We want the `DpEventBuilder` to represent this as a single `SelfComposedDpEvent`, not a linearly-growing `ComposedDpEvent`. PiperOrigin-RevId: 398359519 --- .../privacy/analysis/dp_event_builder.py | 36 +++++++++---------- .../privacy/analysis/dp_event_builder_test.py | 27 +++++++------- 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/tensorflow_privacy/privacy/analysis/dp_event_builder.py b/tensorflow_privacy/privacy/analysis/dp_event_builder.py index 722a1e4..53d4cc2 100644 --- a/tensorflow_privacy/privacy/analysis/dp_event_builder.py +++ b/tensorflow_privacy/privacy/analysis/dp_event_builder.py @@ -13,8 +13,6 @@ # limitations under the License. """Builder class for ComposedDpEvent.""" -import collections - from tensorflow_privacy.privacy.analysis import dp_event @@ -28,7 +26,8 @@ class DpEventBuilder(object): """ def __init__(self): - self._events = collections.OrderedDict() + # A list of (event, count) pairs. + self._event_counts = [] self._composed_event = None def compose(self, event: dp_event.DpEvent, count: int = 1): @@ -46,33 +45,32 @@ class DpEventBuilder(object): if count < 1: raise ValueError(f'`count` must be positive. Found {count}.') - if isinstance(event, dp_event.ComposedDpEvent): - for composed_event in event.events: - self.compose(composed_event, count) + if isinstance(event, dp_event.NoOpDpEvent): + return elif isinstance(event, dp_event.SelfComposedDpEvent): self.compose(event.event, count * event.count) - elif isinstance(event, dp_event.NoOpDpEvent): - return else: - current_count = self._events.get(event, 0) - self._events[event] = current_count + count + if self._event_counts and self._event_counts[-1][0] == event: + new_event_count = (event, self._event_counts[-1][1] + count) + self._event_counts[-1] = new_event_count + else: + self._event_counts.append((event, count)) self._composed_event = None def build(self) -> dp_event.DpEvent: """Builds and returns the composed DpEvent represented by the builder.""" if not self._composed_event: - self_composed_events = [] - for event, count in self._events.items(): + events = [] + for event, count in self._event_counts: if count == 1: - self_composed_events.append(event) + events.append(event) else: - self_composed_events.append( - dp_event.SelfComposedDpEvent(event, count)) - if not self_composed_events: + events.append(dp_event.SelfComposedDpEvent(event, count)) + if not events: self._composed_event = dp_event.NoOpDpEvent() - elif len(self_composed_events) == 1: - self._composed_event = self_composed_events[0] + elif len(events) == 1: + self._composed_event = events[0] else: - self._composed_event = dp_event.ComposedDpEvent(self_composed_events) + self._composed_event = dp_event.ComposedDpEvent(events) return self._composed_event diff --git a/tensorflow_privacy/privacy/analysis/dp_event_builder_test.py b/tensorflow_privacy/privacy/analysis/dp_event_builder_test.py index a10d4bb..dd8a5f2 100644 --- a/tensorflow_privacy/privacy/analysis/dp_event_builder_test.py +++ b/tensorflow_privacy/privacy/analysis/dp_event_builder_test.py @@ -20,8 +20,6 @@ from tensorflow_privacy.privacy.analysis import dp_event_builder _gaussian_event = dp_event.GaussianDpEvent(1.0) _poisson_event = dp_event.PoissonSampledDpEvent(_gaussian_event, 0.1) _self_composed_event = dp_event.SelfComposedDpEvent(_gaussian_event, 3) -_composed_event = dp_event.ComposedDpEvent( - [_self_composed_event, _poisson_event]) class DpEventBuilderTest(absltest.TestCase): @@ -50,22 +48,27 @@ class DpEventBuilderTest(absltest.TestCase): def test_compose_heterogenous(self): builder = dp_event_builder.DpEventBuilder() + builder.compose(_poisson_event) builder.compose(_gaussian_event) - builder.compose(_poisson_event) builder.compose(_gaussian_event, 2) - self.assertEqual(_composed_event, builder.build()) + builder.compose(_poisson_event) + expected_event = dp_event.ComposedDpEvent( + [_poisson_event, _self_composed_event, _poisson_event]) + self.assertEqual(expected_event, builder.build()) - def test_compose_complex(self): + def test_compose_composed(self): builder = dp_event_builder.DpEventBuilder() - builder.compose(_gaussian_event, 2) - builder.compose(_composed_event) + composed_event = dp_event.ComposedDpEvent( + [_gaussian_event, _poisson_event, _self_composed_event]) + builder.compose(_gaussian_event) + builder.compose(composed_event) + builder.compose(composed_event, 2) + builder.compose(_poisson_event) builder.compose(_poisson_event) - builder.compose(_composed_event, 2) - expected_event = dp_event.ComposedDpEvent([ - dp_event.SelfComposedDpEvent(_gaussian_event, 11), - dp_event.SelfComposedDpEvent(_poisson_event, 4) - ]) + _gaussian_event, + dp_event.SelfComposedDpEvent(composed_event, 3), + dp_event.SelfComposedDpEvent(_poisson_event, 2)]) self.assertEqual(expected_event, builder.build()) From b8b4c4b26476e749fa46f7766d5ed5d4f01a4f8d Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Fri, 24 Sep 2021 13:59:16 -0700 Subject: [PATCH 57/71] Much more detailed documentation for `DpEvent`. The as yet unused `TreeAggregationDpEvent` is removed. It will be added as a custom `DpEvent` alongside the DpQueries in tree_aggregation_query.py in the near future. PiperOrigin-RevId: 398808647 --- tensorflow_privacy/__init__.py | 6 +- .../privacy/analysis/dp_event.py | 126 +++++++++++++----- .../analysis/rdp_privacy_accountant.py | 4 +- .../analysis/rdp_privacy_accountant_test.py | 12 +- 4 files changed, 106 insertions(+), 42 deletions(-) diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index 29b64ff..d3503f3 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -38,10 +38,8 @@ else: from tensorflow_privacy.privacy.analysis.dp_event import SelfComposedDpEvent from tensorflow_privacy.privacy.analysis.dp_event import ComposedDpEvent from tensorflow_privacy.privacy.analysis.dp_event import PoissonSampledDpEvent - from tensorflow_privacy.privacy.analysis.dp_event import FixedBatchSampledWrDpEvent - from tensorflow_privacy.privacy.analysis.dp_event import FixedBatchSampledWorDpEvent - from tensorflow_privacy.privacy.analysis.dp_event import ShuffledDatasetDpEvent - from tensorflow_privacy.privacy.analysis.dp_event import TreeAggregationDpEvent + from tensorflow_privacy.privacy.analysis.dp_event import SampledWithReplacementDpEvent + from tensorflow_privacy.privacy.analysis.dp_event import SampledWithoutReplacementDpEvent # Analysis from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy diff --git a/tensorflow_privacy/privacy/analysis/dp_event.py b/tensorflow_privacy/privacy/analysis/dp_event.py index b40b823..290970f 100644 --- a/tensorflow_privacy/privacy/analysis/dp_event.py +++ b/tensorflow_privacy/privacy/analysis/dp_event.py @@ -11,7 +11,53 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Standard DpEvent classes.""" +"""Standard DpEvent classes. + +A `DpEvent` represents the (hyper)parameters of a differentially +private query, amplification mechanism, or composition, that are necessary +and sufficient for privacy accounting. Various independent implementations of DP +algorithms that are functionally equivalent from an accounting perspective may +correspond to the same `DpEvent`. Similarly, various independent implementations +of accounting algorithms may consume the same `DpEvent`. + +All `DpEvents` processed together are assumed to take place on a single dataset +of records. `DpEvents` fall into roughly three categories: + - `DpEvents` that release an output, and incur a privacy cost, + e.g., `GaussianDpEvent`. + - `DpEvents` that select a subset (or subsets) of the dataset, and run nested + `DpEvents` on those subsets, e.g., `PoissonSampledDpEvent`. + - `DpEvents` that represent (possibly sequentially) applying (multiple) + mechanisms to the dataset (or currently active subset). Currently, this is + only `ComposedDpEvent` and `SelfComposedDpEvent`. + +Each `DpEvent` should completely document the mathematical behavior and +assumptions of the mechanism it represents so that the writer of an accountant +class can implement the accounting correctly without knowing any other +implementation details of the algorithm that produced it. + +New mechanism types should be given a corresponding `DpEvent` class, although +not all accountants will be required to support them. In general, +`PrivacyAccountant` implementations are not required to be aware of all +`DpEvent` classes, but they should support the following basic events and handle +them appropriately: `NoOpDpEvent`, `NonPrivateDpEvent`, `ComposedDpEvent`, and +`SelfComposedDpEvent`. They should return `supports(event)` is False for +`UnsupportedDpEvent` or any other event type they have not been designed to +handle. + +To ensure that a `PrivacyAccountant` does not accidentally start to return +incorrect results, the following should be enforced: + * `DpEvent` classes and their parameters should never be removed, barring some + extended, onerous deprecation process. + * New parameters cannot be added to existing mechanisms unless they are + optional. That is, old composed `DpEvent` objects that do not include them + must remain valid. + * The meaning of existing mechanisms or parameters must not change. That is, + existing mechanisms should not have their implementations change in ways that + alter their privacy properties; new `DpEvent` classes should be added + instead. + * `PrivacyAccountant` implementations are expected to return `supports(event)` + is `False` when processing unknown mechanisms. +""" from typing import List @@ -19,7 +65,7 @@ import attr class DpEvent(object): - """Base class for `DpEvent`s. + """Represents application of a private mechanism. A `DpEvent` describes a differentially private mechanism sufficiently for computing the associated privacy losses, both in isolation and in combination @@ -29,7 +75,7 @@ class DpEvent(object): @attr.s(frozen=True) class NoOpDpEvent(DpEvent): - """A `DpEvent` to represent operations with no privacy impact. + """Represents appplication of an operation with no privacy impact. A `NoOpDpEvent` is generally never required, but it can be useful as a placeholder where a `DpEvent` is expected, such as in tests or some live @@ -39,7 +85,7 @@ class NoOpDpEvent(DpEvent): @attr.s(frozen=True) class NonPrivateDpEvent(DpEvent): - """A `DpEvent` to represent non-private operations. + """Represents application of a non-private operation. This `DpEvent` should be used when an operation is performed that does not satisfy (epsilon, delta)-DP. All `PrivacyAccountant`s should return infinite @@ -49,65 +95,85 @@ class NonPrivateDpEvent(DpEvent): @attr.s(frozen=True) class UnsupportedDpEvent(DpEvent): - """A `DpEvent` to represent as-yet unsupported operations. + """Represents application of an as-yet unsupported operation. This `DpEvent` should be used when an operation is performed that does not yet have any associated DP description, or if the description is temporarily inaccessible, for example, during development. All `PrivacyAccountant`s should - return `is_supported(event)` is `False` for `UnsupportedDpEvent`. + return `supports(event) == False` for `UnsupportedDpEvent`. """ @attr.s(frozen=True, slots=True, auto_attribs=True) class GaussianDpEvent(DpEvent): - """The Gaussian mechanism.""" + """Represents an application of the Gaussian mechanism. + + For values v_i and noise z ~ N(0, s^2I), this mechanism returns sum_i v_i + z. + If the norms of the values are bounded ||v_i|| <= C, the noise_multiplier is + defined as s / C. + """ noise_multiplier: float @attr.s(frozen=True, slots=True, auto_attribs=True) class SelfComposedDpEvent(DpEvent): - """A mechanism composed with itself multiple times.""" + """Represents repeated application of a mechanism. + + The repeated applications may be adaptive, where the query producing each + event depends on the results of prior queries. + + This is equivalent to `ComposedDpEvent` that contains a list of length `count` + of identical copies of `event`. + """ event: DpEvent count: int @attr.s(frozen=True, slots=True, auto_attribs=True) class ComposedDpEvent(DpEvent): - """A series of composed mechanisms.""" + """Represents application of a series of composed mechanisms. + + The composition may be adaptive, where the query producing each event depends + on the results of prior queries. + """ events: List[DpEvent] @attr.s(frozen=True, slots=True, auto_attribs=True) class PoissonSampledDpEvent(DpEvent): - """An application of Poisson subsampling.""" + """Represents an application of Poisson subsampling. + + Each record in the dataset is included in the sample independently with + probability `sampling_probability`. Then the `DpEvent` `event` is applied + to the sample of records. + """ sampling_probability: float event: DpEvent @attr.s(frozen=True, slots=True, auto_attribs=True) -class FixedBatchSampledWrDpEvent(DpEvent): - """Sampling exactly `batch_size` records with replacement.""" - dataset_size: int - batch_size: int +class SampledWithReplacementDpEvent(DpEvent): + """Represents sampling a fixed sized batch of records with replacement. + + A sample of `sample_size` (possibly repeated) records is drawn uniformly at + random from the set of possible samples of a source dataset of size + `source_dataset_size`. Then the `DpEvent` `event` is applied to the sample of + records. + """ + source_dataset_size: int + sample_size: int event: DpEvent @attr.s(frozen=True, slots=True, auto_attribs=True) -class FixedBatchSampledWorDpEvent(DpEvent): - """Sampling exactly `batch_size` records without replacement.""" - dataset_size: int - batch_size: int +class SampledWithoutReplacementDpEvent(DpEvent): + """Represents sampling a fixed sized batch of records without replacement. + + A sample of `sample_size` unique records is drawn uniformly at random from the + set of possible samples of a source dataset of size `source_dataset_size`. + Then the `DpEvent` `event` is applied to the sample of records. + """ + source_dataset_size: int + sample_size: int event: DpEvent - -@attr.s(frozen=True, slots=True, auto_attribs=True) -class ShuffledDatasetDpEvent(DpEvent): - """Shuffling a dataset and applying a mechanism to each partition.""" - partition_events: ComposedDpEvent - - -@attr.s(frozen=True, slots=True, auto_attribs=True) -class TreeAggregationDpEvent(DpEvent): - """Applying a series of mechanisms with tree aggregation.""" - round_events: ComposedDpEvent - max_record_occurences_across_all_rounds: int diff --git a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py index 2bbc327..24463b8 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py +++ b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py @@ -551,13 +551,13 @@ class RdpAccountant(privacy_accountant.PrivacyAccountant): noise_multiplier=event.event.noise_multiplier, orders=self._orders) return True - elif isinstance(event, dp_event.FixedBatchSampledWorDpEvent): + elif isinstance(event, dp_event.SampledWithoutReplacementDpEvent): if (self._neighboring_relation is not NeighborRel.REPLACE_ONE or not isinstance(event.event, dp_event.GaussianDpEvent)): return False if do_compose: self._rdp += count * _compute_rdp_sample_wor_gaussian( - q=event.batch_size / event.dataset_size, + q=event.sample_size / event.source_dataset_size, noise_multiplier=event.event.noise_multiplier, orders=self._orders) return True diff --git a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py index 817d41c..e030fcd 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py +++ b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py @@ -94,13 +94,13 @@ class RdpPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest, self.assertTrue(aor_accountant.supports(event)) self.assertFalse(ro_accountant.supports(event)) - event = dp_event.FixedBatchSampledWorDpEvent(1000, 10, - dp_event.GaussianDpEvent(1.0)) + event = dp_event.SampledWithoutReplacementDpEvent( + 1000, 10, dp_event.GaussianDpEvent(1.0)) self.assertFalse(aor_accountant.supports(event)) self.assertTrue(ro_accountant.supports(event)) - event = dp_event.FixedBatchSampledWrDpEvent(1000, 10, - dp_event.GaussianDpEvent(1.0)) + event = dp_event.SampledWithReplacementDpEvent( + 1000, 10, dp_event.GaussianDpEvent(1.0)) self.assertFalse(aor_accountant.supports(event)) self.assertFalse(ro_accountant.supports(event)) @@ -148,8 +148,8 @@ class RdpPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest, accountant = rdp_privacy_accountant.RdpAccountant( [3.14159], privacy_accountant.NeighboringRelation.REPLACE_ONE) accountant.compose( - dp_event.FixedBatchSampledWorDpEvent(1000, 0, - dp_event.GaussianDpEvent(1.0))) + dp_event.SampledWithoutReplacementDpEvent( + 1000, 0, dp_event.GaussianDpEvent(1.0))) self.assertEqual(accountant.get_epsilon(1e-10), 0) self.assertEqual(accountant.get_delta(1e-10), 0) From 99c82a49d8e9c5f24efd2e262a4b071683abb2b5 Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Tue, 28 Sep 2021 12:55:22 -0700 Subject: [PATCH 58/71] Function to reset tree for tree aggregation based quantile estimation. PiperOrigin-RevId: 399508765 --- .../dp_query/quantile_estimator_query.py | 7 ++++ .../dp_query/quantile_estimator_query_test.py | 32 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py index 4a453d6..0708016 100644 --- a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py +++ b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py @@ -242,3 +242,10 @@ class TreeQuantileEstimatorQuery(QuantileEstimatorQuery): record_specs=tf.TensorSpec([])) return normalized_query.NormalizedQuery( sum_query, denominator=expected_num_records) + + def reset_state(self, noised_results, global_state): + new_numerator_state = self._below_estimate_query._numerator.reset_state( # pylint: disable=protected-access,line-too-long + noised_results, global_state.below_estimate_state.numerator_state) + new_below_estimate_state = global_state.below_estimate_state._replace( + numerator_state=new_numerator_state) + return global_state._replace(below_estimate_state=new_below_estimate_state) diff --git a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query_test.py b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query_test.py index e29fc4a..fa3f03e 100644 --- a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query_test.py @@ -280,6 +280,38 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase): with self.assertRaisesRegex(ValueError, 'scalar'): query.accumulate_record(None, None, [1.0, 2.0]) + def test_tree_noise_restart(self): + sample_num, tolerance, stddev = 1000, 0.3, 0.1 + initial_estimate, expected_num_records = 5., 2. + record1 = tf.constant(1.) + record2 = tf.constant(10.) + + query = _make_quantile_estimator_query( + initial_estimate=initial_estimate, + target_quantile=.5, + learning_rate=1., + below_estimate_stddev=stddev, + expected_num_records=expected_num_records, + geometric_update=False, + tree_aggregation=True) + + global_state = query.initial_global_state() + + self.assertAllClose(global_state.current_estimate, initial_estimate) + + # As the target quantile is accurate, there is no signal and only noise. + samples = [] + for _ in range(sample_num): + noised_estimate, global_state = test_utils.run_query( + query, [record1, record2], global_state) + samples.append(noised_estimate.numpy()) + global_state = query.reset_state(noised_estimate, global_state) + self.assertNotEqual(global_state.current_estimate, initial_estimate) + global_state = global_state._replace(current_estimate=initial_estimate) + + self.assertAllClose( + np.std(samples), stddev / expected_num_records, rtol=tolerance) + if __name__ == '__main__': tf.test.main() From 7426a4ec30bf52451d349380f3085b6a2f95b08f Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Tue, 28 Sep 2021 13:03:57 -0700 Subject: [PATCH 59/71] Update tree aggregation rdp accountant to allow different number of max_participation. PiperOrigin-RevId: 399510813 --- tensorflow_privacy/__init__.py | 1 + .../privacy/analysis/rdp_accountant.py | 65 ++++++++++++++----- .../privacy/analysis/rdp_accountant_test.py | 56 +++++++++++++--- 3 files changed, 96 insertions(+), 26 deletions(-) diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index d3503f3..5a90fe5 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -45,6 +45,7 @@ else: from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogeneous_rdp from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp + from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_tree from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent # DPQuery classes diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_accountant.py index a328a63..500a16a 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant.py @@ -398,19 +398,22 @@ def compute_rdp(q, noise_multiplier, steps, orders): return rdp * steps -def _compute_rdp_tree(sigma, steps_list, max_participation, alpha): +def _compute_rdp_tree(sigma, steps_list, max_participation_list, alpha): """Computes RDP of the Tree Aggregation Protocol at order alpha.""" if np.isinf(alpha): return np.inf tree_depths = [ math.floor(math.log2(steps)) + 1 for steps in steps_list if steps > 0 ] - return alpha * max_participation * sum(tree_depths) / (2 * sigma**2) + record_occurence = [ + x * y for x, y in zip(max_participation_list, tree_depths) + ] + return alpha * sum(record_occurence) / (2 * sigma**2) def compute_rdp_tree( - noise_multiplier: float, steps_list: Collection[float], - max_participation: int, + noise_multiplier: float, steps_list: Union[float, Collection[float]], + max_participation_list: Union[int, Collection[int]], orders: Union[float, Collection[float]]) -> Collection[float]: """Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism. @@ -418,10 +421,11 @@ def compute_rdp_tree( noise_multiplier: A non-negative float representing the ratio of the standard deviation of the Gaussian noise to the l2-sensitivity of the function to which it is added. - steps_list: A list of non-negative intergers representing the number of - steps between tree restarts. - max_participation: A positive integer representing maximum number of times a - sample may appear between tree restarts. + steps_list: A scalar or a list of non-negative intergers representing the + number of steps between tree restarts. + max_participation_list: A scalar or a list of positive integers representing + maximum number of times a sample may appear between tree restarts. The + type (scalar/list) of `max_participation_list` should match `steps_list`. orders: An array (or a scalar) of RDP orders. Returns: @@ -433,23 +437,50 @@ def compute_rdp_tree( elif noise_multiplier == 0: return np.inf - if max_participation <= 0: - raise ValueError( - f"Max participation must be positive, got {max_participation}") - if not steps_list: - raise ValueError("List of steps must be non-empty.") + raise ValueError( + "steps_list must be a non-empty list, or a non-zero scalar, got " + f"{steps_list}.") + + if not max_participation_list: + raise ValueError( + "max_participation_list must be a non-empty list, or a non-zero scalar," + f" got {max_participation_list}.") + + if np.isscalar(steps_list) and np.isscalar(max_participation_list): + steps_list = [steps_list] + max_participation_list = [max_participation_list] + elif np.isscalar(steps_list): + raise ValueError( + "`steps_list` and `max_participation_list` must have the same type, got" + f"scalar of steps: {steps_list}, and list of max_participations with " + f"length {len(max_participation_list)}.") + elif np.isscalar(max_participation_list): + raise ValueError( + "`steps_list` and `max_participation_list` must have the same type, got" + f"scalar of max_participation: {max_participation_list}, and list of " + f"steps with length {len(steps_list)}.") + elif len(max_participation_list) != len(steps_list): + raise ValueError( + "`steps_list` and `max_participation_list` must have the same size, got" + f"steps length {len(steps_list)}, max_participations length " + f"{len(max_participation_list)}") + + for max_participation in max_participation_list: + if max_participation <= 0: + raise ValueError( + f"Max participation must be positive, got {max_participation}") for steps in steps_list: if steps < 0: - raise ValueError(f"List of steps must be non-negative, got {steps_list}") + raise ValueError(f"Steps must be non-negative, got {steps_list}") if np.isscalar(orders): - rdp = _compute_rdp_tree(noise_multiplier, steps_list, max_participation, - orders) + rdp = _compute_rdp_tree(noise_multiplier, steps_list, + max_participation_list, orders) else: rdp = np.array([ - _compute_rdp_tree(noise_multiplier, steps_list, max_participation, + _compute_rdp_tree(noise_multiplier, steps_list, max_participation_list, alpha) for alpha in orders ]) diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py index 33c51fd..583fa3f 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py @@ -272,7 +272,7 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase): # This tests is based on the StackOverflow setting in "Practical and # Private (Deep) Learning without Sampling or Shuffling". The calculated # epsilon could be better as the method in this package keeps improving. - steps_list, target_delta, max_participation = [1600], 1e-6, 1 + steps_list, target_delta, max_participation = 1600, 1e-6, 1 rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list, max_participation, orders) new_eps = rdp_accountant.get_privacy_spent( @@ -280,9 +280,33 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase): self.assertLess(new_eps, eps) @parameterized.named_parameters( - ('restart4_max2', [400] * 4, 2), - ('restart2_max1', [800] * 2, 1), - ('adaptive_max4', [10, 400, 400, 400, 390], 4), + ('restart4_max2', [400] * 4, [2] * 4), + ('restart2_max1', [800] * 2, [1] * 2), + ('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5), + ('adaptive', [10, 400, 400, 400, 390], [2, 3, 4, 4, 3])) + def test_compose_tree_rdp(self, steps_list, max_participation_list): + noise_multiplier, orders = 0.1, 1 + if np.isscalar(max_participation_list): + rdp_list = [ + rdp_accountant.compute_rdp_tree(noise_multiplier, steps, + max_participation_list, orders) + for steps in steps_list + ] + else: + rdp_list = [ + rdp_accountant.compute_rdp_tree(noise_multiplier, steps, + max_participation, orders) for steps, + max_participation in zip(steps_list, max_participation_list) + ] + rdp_composed = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list, + max_participation_list, + orders) + self.assertAllClose(rdp_composed, sum(rdp_list), rtol=1e-12) + + @parameterized.named_parameters( + ('restart4_max2', [400] * 4, [2] * 4), + ('restart2_max1', [800] * 2, [1] * 2), + ('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5), ) def test_compute_eps_tree_decreasing(self, steps_list, max_participation): # Test privacy epsilon decreases with noise multiplier increasing when @@ -299,11 +323,12 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase): self.assertLess(eps, prev_eps) @parameterized.named_parameters( - ('negative_noise', -1, [3], 2, 1), + ('negative_noise', -1, 3, 2, 1), ('empty_steps', 1, [], 2, 1), - ('negative_steps', 1, [-3], 2, 1), - ('zero_participation', 1, [3], 0, 1), - ('negative_participation', 1, [3], -1, 1), + ('empty_part', 1, 1, [], 1), + ('negative_steps', 1, -3, 2, 1), + ('zero_participation', 1, 3, 0, 1), + ('negative_participation', 1, 3, -1, 1), ) def test_compute_rdp_tree_raise(self, noise_multiplier, steps_list, max_participation, orders): @@ -311,6 +336,18 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase): rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list, max_participation, orders) + @parameterized.named_parameters( + ('list_scalar', [2], 1), + ('scalar_list', 2, [1]), + ('list_length', [2, 3], [1]), + ('list_length2', [2, 3], [1, 2, 2]), + ) + def test_compute_rdp_tree_raise_input_type(self, steps_list, + max_participation): + with self.assertRaisesRegex(ValueError, 'must have the same'): + rdp_accountant.compute_rdp_tree( + 0.1, steps_list, max_participation, orders=1) + @parameterized.named_parameters( ('t100n0.1', 100, 0.1), ('t1000n0.01', 1000, 0.01), @@ -318,7 +355,8 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase): def test_no_tree_no_sampling(self, total_steps, noise_multiplier): orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) tree_rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, - [1] * total_steps, 1, orders) + [1] * total_steps, + [1] * total_steps, orders) rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders) self.assertAllClose(tree_rdp, rdp, rtol=1e-12) From 27bb6e48d9b218c642def488befa38ddd83511a0 Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Fri, 8 Oct 2021 15:40:27 -0700 Subject: [PATCH 60/71] Time based indicator for restart query. PiperOrigin-RevId: 401871582 --- .../privacy/dp_query/restart_query.py | 44 +++++++++++++++++++ .../privacy/dp_query/restart_query_test.py | 36 ++++++++++++++- 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/tensorflow_privacy/privacy/dp_query/restart_query.py b/tensorflow_privacy/privacy/dp_query/restart_query.py index 8d1ff8c..38c08f9 100644 --- a/tensorflow_privacy/privacy/dp_query/restart_query.py +++ b/tensorflow_privacy/privacy/dp_query/restart_query.py @@ -103,6 +103,50 @@ class PeriodicRoundRestartIndicator(RestartIndicator): return flag, state +class PeriodicTimeRestartIndicator(RestartIndicator): + """Indicator for periodically resetting the tree state after a certain time. + + The indicator will maintain a state to track the previous restart time. + """ + + def __init__(self, period_seconds: float): + """Construct the `PeriodicTimeRestartIndicator`. + + Args: + period_seconds: The `next` function will return `True` if called after + `period_seconds`. + """ + if period_seconds <= 0: + raise ValueError('Restart period_seconds should be larger than 0, got ' + f'{period_seconds}') + self.period_seconds = period_seconds + + @tf.function + def initialize(self): + """Returns initial time as state.""" + return tf.timestamp() + + @tf.function + def next(self, state): + """Gets next bool indicator and advances the state. + + Args: + state: The current state. + + Returns: + A pair (value, new_state) where value is the bool indicator and new_state + of time. + """ + current_time = tf.timestamp() + current_period = current_time - state + reset_flag = tf.math.greater( + current_period, + tf.convert_to_tensor(self.period_seconds, current_period.dtype)) + if reset_flag: + state = current_time + return reset_flag, state + + class RestartQuery(dp_query.SumAggregationDPQuery): """`DPQuery` for `SumAggregationDPQuery` with a `reset_state` function.""" diff --git a/tensorflow_privacy/privacy/dp_query/restart_query_test.py b/tensorflow_privacy/privacy/dp_query/restart_query_test.py index bf6c374..ce05ed2 100644 --- a/tensorflow_privacy/privacy/dp_query/restart_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/restart_query_test.py @@ -13,13 +13,14 @@ # limitations under the License. """Tests for `restart_query`.""" from absl.testing import parameterized +import mock import tensorflow as tf from tensorflow_privacy.privacy.dp_query import restart_query from tensorflow_privacy.privacy.dp_query import tree_aggregation_query -class RestartIndicatorTest(tf.test.TestCase, parameterized.TestCase): +class RoundRestartIndicatorTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters(('zero', 0), ('negative', -1)) def test_round_raise(self, frequency): @@ -61,6 +62,39 @@ class RestartIndicatorTest(tf.test.TestCase, parameterized.TestCase): self.assertFalse(flag) +class TimeRestartIndicatorTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters(('zero', 0), ('negative', -1.)) + def test_round_raise(self, secs): + with self.assertRaisesRegex( + ValueError, 'Restart period_seconds should be larger than 0'): + restart_query.PeriodicTimeRestartIndicator(secs) + + def test_round_indicator(self): + indicator = restart_query.PeriodicTimeRestartIndicator(period_seconds=3600 * + 23.5) + # TODO(b/193679963): use `tf.timestamp` as the default of a member of + # the `PeriodicTimeRestartIndicator` to unroll the mock test. + return_time = tf.Variable( + 1627018868.452365) # 22:41pm PST 5:41am UTC, July 22, initialize + with mock.patch.object( + tf, 'timestamp', return_value=return_time) as mock_func: + time_stamps = [ + 1627022468.452365, # 23:41pm PST 5:41am UTC, July 22, 1 hr, False + 1627105268.452365, # 23:41pm PST 5:41am UTC, July 23, 1 day, True + 1627112468.452365, # 2 hr after restart, False + 1627189508.452365, # 23.4 hr after restart, False + 1627189904.452365, # 23.51 hr after restart, True + ] + expected_values = [False, True, False, False, True] + state = indicator.initialize() + for v, t in zip(expected_values, time_stamps): + return_time.assign(t) + mock_func.return_value = return_time + flag, state = indicator.next(state) + self.assertEqual(v, flag.numpy()) + + def _get_l2_clip_fn(): def l2_clip_fn(record_as_list, clip_value): From 98df2fed6113fbfd441fb37a1786bd78ec05440c Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Mon, 11 Oct 2021 10:03:36 -0700 Subject: [PATCH 61/71] Fix a typo in test comment. PiperOrigin-RevId: 402327052 --- tensorflow_privacy/privacy/dp_query/restart_query_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_privacy/privacy/dp_query/restart_query_test.py b/tensorflow_privacy/privacy/dp_query/restart_query_test.py index ce05ed2..1ce303a 100644 --- a/tensorflow_privacy/privacy/dp_query/restart_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/restart_query_test.py @@ -81,7 +81,7 @@ class TimeRestartIndicatorTest(tf.test.TestCase, parameterized.TestCase): tf, 'timestamp', return_value=return_time) as mock_func: time_stamps = [ 1627022468.452365, # 23:41pm PST 5:41am UTC, July 22, 1 hr, False - 1627105268.452365, # 23:41pm PST 5:41am UTC, July 23, 1 day, True + 1627105268.452365, # 22:41pm PST 5:41am UTC, July 23, 1 day, True 1627112468.452365, # 2 hr after restart, False 1627189508.452365, # 23.4 hr after restart, False 1627189904.452365, # 23.51 hr after restart, True From 977647a3bf3ff11643804169b52be5bdddb8f666 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Tue, 12 Oct 2021 17:13:03 -0700 Subject: [PATCH 62/71] Add support for subsampled multi-Gaussian queries (composition of several Gaussian queries that may have different noise multipliers). This is used, for example, by QuantileAdaptiveClipSumQuery. PiperOrigin-RevId: 402693872 --- .../analysis/rdp_privacy_accountant.py | 54 ++++++++++++++++--- .../analysis/rdp_privacy_accountant_test.py | 48 +++++++++++++++++ 2 files changed, 96 insertions(+), 6 deletions(-) diff --git a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py index 24463b8..5c012a3 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py +++ b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py @@ -483,6 +483,42 @@ def _compute_rdp_sample_wor_gaussian_int(q, sigma, alpha): return log_a +def _effective_gaussian_noise_multiplier(event: dp_event.DpEvent): + """Determines the effective noise multiplier of nested structure of Gaussians. + + A series of Gaussian queries on the same data can be reexpressed as a single + query with pre- and post- processing. For details, see section 3 of + https://arxiv.org/pdf/1812.06210.pdf. + + Args: + event: A `dp_event.DpEvent`. In order for conversion to be successful it + must consist of a single `dp_event.GaussianDpEvent`, or a nested structure + of `dp_event.ComposedDpEvent` and/or `dp_event.SelfComposedDpEvent` + bottoming out in `dp_event.GaussianDpEvent`s. + + Returns: + The noise multiplier of the equivalent `dp_event.GaussianDpEvent`, or None + if the input event was not a `dp_event.GaussianDpEvent` or a nested + structure of `dp_event.ComposedDpEvent` and/or + `dp_event.SelfComposedDpEvent` bottoming out in `dp_event.GaussianDpEvent`s. + """ + if isinstance(event, dp_event.GaussianDpEvent): + return event.noise_multiplier + elif isinstance(event, dp_event.ComposedDpEvent): + sum_sigma_inv_sq = 0 + for e in event.events: + sigma = _effective_gaussian_noise_multiplier(e) + if sigma is None: + return None + sum_sigma_inv_sq += sigma**-2 + return sum_sigma_inv_sq**-0.5 + elif isinstance(event, dp_event.SelfComposedDpEvent): + sigma = _effective_gaussian_noise_multiplier(event.event) + return None if sigma is None else (event.count * sigma**-2)**-0.5 + else: + return None + + class RdpAccountant(privacy_accountant.PrivacyAccountant): """Privacy accountant that uses Renyi differential privacy.""" @@ -542,23 +578,29 @@ class RdpAccountant(privacy_accountant.PrivacyAccountant): q=1.0, noise_multiplier=event.noise_multiplier, orders=self._orders) return True elif isinstance(event, dp_event.PoissonSampledDpEvent): - if (self._neighboring_relation is not NeighborRel.ADD_OR_REMOVE_ONE or - not isinstance(event.event, dp_event.GaussianDpEvent)): + if self._neighboring_relation is not NeighborRel.ADD_OR_REMOVE_ONE: + return False + gaussian_noise_multiplier = _effective_gaussian_noise_multiplier( + event.event) + if gaussian_noise_multiplier is None: return False if do_compose: self._rdp += count * _compute_rdp_poisson_subsampled_gaussian( q=event.sampling_probability, - noise_multiplier=event.event.noise_multiplier, + noise_multiplier=gaussian_noise_multiplier, orders=self._orders) return True elif isinstance(event, dp_event.SampledWithoutReplacementDpEvent): - if (self._neighboring_relation is not NeighborRel.REPLACE_ONE or - not isinstance(event.event, dp_event.GaussianDpEvent)): + if self._neighboring_relation is not NeighborRel.REPLACE_ONE: + return False + gaussian_noise_multiplier = _effective_gaussian_noise_multiplier( + event.event) + if gaussian_noise_multiplier is None: return False if do_compose: self._rdp += count * _compute_rdp_sample_wor_gaussian( q=event.sample_size / event.source_dataset_size, - noise_multiplier=event.event.noise_multiplier, + noise_multiplier=gaussian_noise_multiplier, orders=self._orders) return True else: diff --git a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py index e030fcd..fc4b8dd 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py +++ b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py @@ -94,11 +94,23 @@ class RdpPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest, self.assertTrue(aor_accountant.supports(event)) self.assertFalse(ro_accountant.supports(event)) + composed_gaussian = dp_event.ComposedDpEvent( + [dp_event.GaussianDpEvent(1.0), + dp_event.GaussianDpEvent(2.0)]) + event = dp_event.PoissonSampledDpEvent(0.1, composed_gaussian) + self.assertTrue(aor_accountant.supports(event)) + self.assertFalse(ro_accountant.supports(event)) + event = dp_event.SampledWithoutReplacementDpEvent( 1000, 10, dp_event.GaussianDpEvent(1.0)) self.assertFalse(aor_accountant.supports(event)) self.assertTrue(ro_accountant.supports(event)) + event = dp_event.SampledWithoutReplacementDpEvent(1000, 10, + composed_gaussian) + self.assertFalse(aor_accountant.supports(event)) + self.assertTrue(ro_accountant.supports(event)) + event = dp_event.SampledWithReplacementDpEvent( 1000, 10, dp_event.GaussianDpEvent(1.0)) self.assertFalse(aor_accountant.supports(event)) @@ -166,6 +178,42 @@ class RdpPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest, accountant.compose(event) self.assertAlmostEqual(accountant._rdp[0], alpha / (2 * sigma**2)) + def test_compute_rdp_multi_gaussian(self): + alpha = 3.14159 + sigma1, sigma2 = 2.71828, 6.28319 + + rdp1 = alpha / (2 * sigma1**2) + rdp2 = alpha / (2 * sigma2**2) + rdp = rdp1 + rdp2 + + accountant = rdp_privacy_accountant.RdpAccountant(orders=[alpha]) + accountant.compose( + dp_event.PoissonSampledDpEvent( + 1.0, + dp_event.ComposedDpEvent([ + dp_event.GaussianDpEvent(sigma1), + dp_event.GaussianDpEvent(sigma2) + ]))) + self.assertAlmostEqual(accountant._rdp[0], rdp) + + def test_effective_gaussian_noise_multiplier(self): + np.random.seed(0xBAD5EED) + sigmas = np.random.uniform(size=(4,)) + + event = dp_event.ComposedDpEvent([ + dp_event.GaussianDpEvent(sigmas[0]), + dp_event.SelfComposedDpEvent(dp_event.GaussianDpEvent(sigmas[1]), 3), + dp_event.ComposedDpEvent([ + dp_event.GaussianDpEvent(sigmas[2]), + dp_event.GaussianDpEvent(sigmas[3]) + ]) + ]) + + sigma = rdp_privacy_accountant._effective_gaussian_noise_multiplier(event) + multi_sigmas = list(sigmas) + [sigmas[1]] * 2 + expected = sum(s**-2 for s in multi_sigmas)**-0.5 + self.assertAlmostEqual(sigma, expected) + def test_compute_rdp_poisson_sampled_gaussian(self): orders = [1.5, 2.5, 5, 50, 100, np.inf] noise_multiplier = 2.5 From c530356ae9a4c12218f6ba6c4275f6153520f0b7 Mon Sep 17 00:00:00 2001 From: Steve Chien Date: Mon, 18 Oct 2021 15:08:40 -0700 Subject: [PATCH 63/71] Add tests for varying number of microbatches in dp_optimizer_test.py. PiperOrigin-RevId: 404072714 --- .../privacy/optimizers/dp_optimizer_test.py | 72 ++++++++++++++++--- 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py index 5d30085..646fcd9 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py @@ -35,6 +35,24 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): return 0.5 * tf.reduce_sum( input_tensor=tf.math.squared_difference(val0, val1), axis=1) + def _compute_expected_gradients(self, per_example_gradients, + l2_norm_clip, num_microbatches): + batch_size, num_vars = per_example_gradients.shape + microbatch_gradients = np.mean( + np.reshape(per_example_gradients, + [num_microbatches, + np.int(batch_size / num_microbatches), num_vars]), + axis=1) + microbatch_gradients_norms = np.linalg.norm(microbatch_gradients, axis=1) + + def scale(x): + return 1.0 if x < l2_norm_clip else l2_norm_clip / x + + scales = np.array(list(map(scale, microbatch_gradients_norms))) + mean_clipped_gradients = np.mean( + microbatch_gradients * scales[:, None], axis=0) + return mean_clipped_gradients + # Parameters for testing: optimizer, num_microbatches, expected answer. @parameterized.named_parameters( ('DPGradientDescent 1', dp_optimizer.DPGradientDescentOptimizer, 1, @@ -98,18 +116,56 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0]) @parameterized.named_parameters( - ('DPGradientDescent', dp_optimizer.DPGradientDescentOptimizer), - ('DPAdagrad', dp_optimizer.DPAdagradOptimizer), - ('DPAdam', dp_optimizer.DPAdamOptimizer), - ('DPRMSPropOptimizer', dp_optimizer.DPRMSPropOptimizer)) - def testNoiseMultiplier(self, cls): + ('DPGradientDescent 1', dp_optimizer.DPGradientDescentOptimizer, 1), + ('DPGradientDescent 2', dp_optimizer.DPGradientDescentOptimizer, 2), + ('DPGradientDescent 4', dp_optimizer.DPGradientDescentOptimizer, 4), + ) + def testClippingNormWithMicrobatches(self, cls, num_microbatches): + with self.cached_session() as sess: + var0 = tf.Variable([0.0, 0.0]) + data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0], [-9.0, -12.0], + [-12.0, -16.0]]) + + l2_norm_clip = 1.0 + dp_sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip, 0.0) + + opt = cls(dp_sum_query, num_microbatches=num_microbatches, + learning_rate=2.0) + + self.evaluate(tf.global_variables_initializer()) + # Fetch params to validate initial values + var_np = self.evaluate(var0) + self.assertAllClose([0.0, 0.0], var_np) + + # Compute expected gradient, which is the sum of differences. + data_np = self.evaluate(data0) + per_example_gradients = var_np - data_np + mean_clipped_gradients = self._compute_expected_gradients( + per_example_gradients, l2_norm_clip, num_microbatches) + + # Compare actual with expected gradients. + gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) + grads_and_vars = sess.run(gradient_op) + print('mean_clipped_gradients: ', mean_clipped_gradients) + self.assertAllCloseAccordingToType(mean_clipped_gradients, + grads_and_vars[0][0]) + + @parameterized.named_parameters( + ('DPGradientDescent 1', dp_optimizer.DPGradientDescentOptimizer, 1), + ('DPGradientDescent 2', dp_optimizer.DPGradientDescentOptimizer, 2), + ('DPGradientDescent 4', dp_optimizer.DPGradientDescentOptimizer, 4), + ('DPAdagrad', dp_optimizer.DPAdagradOptimizer, 1), + ('DPAdam', dp_optimizer.DPAdamOptimizer, 1), + ('DPRMSPropOptimizer', dp_optimizer.DPRMSPropOptimizer, 1)) + def testNoiseMultiplier(self, cls, num_microbatches): with self.cached_session() as sess: var0 = tf.Variable([0.0]) - data0 = tf.Variable([[0.0]]) + data0 = tf.Variable([[0.0], [0.0], [0.0], [0.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0) - opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) + opt = cls( + dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values @@ -122,7 +178,7 @@ class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): grads.append(grads_and_vars[0][0]) # Test standard deviation is close to l2_norm_clip * noise_multiplier. - self.assertNear(np.std(grads), 2.0 * 4.0, 0.5) + self.assertNear(np.std(grads), 2.0 * 4.0 / num_microbatches, 0.5) @mock.patch('absl.logging.warning') def testComputeGradientsOverrideWarning(self, mock_logging): From c5cb68750777f355be248122c612a2fc23a82022 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 28 Oct 2021 14:26:07 -0700 Subject: [PATCH 64/71] Allow using gradient tape for gradient calculation in graph mode. PiperOrigin-RevId: 406217961 --- .../privacy/optimizers/dp_optimizer.py | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer.py index 3b80092..1d9c8cc 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer.py @@ -98,6 +98,7 @@ def make_optimizer_class(cls): dp_sum_query, num_microbatches=None, unroll_microbatches=False, + while_loop_parallel_iterations=10, *args, # pylint: disable=keyword-arg-before-vararg, g-doc-args **kwargs): """Initializes the DPOptimizerClass. @@ -111,6 +112,10 @@ def make_optimizer_class(cls): unroll_microbatches: If true, processes microbatches within a Python loop instead of a `tf.while_loop`. Can be used if using a `tf.while_loop` raises an exception. + while_loop_parallel_iterations: The number of iterations allowed to run + in parallel. It must be a positive integer. Applicable only when + unroll_microbatches is set to False. It gives users some control over + memory consumption. *args: These will be passed on to the base class `__init__` method. **kwargs: These will be passed on to the base class `__init__` method. """ @@ -122,6 +127,7 @@ def make_optimizer_class(cls): # Beware: When num_microbatches is large (>100), enabling this parameter # may cause an OOM error. self._unroll_microbatches = unroll_microbatches + self._while_loop_parallel_iterations = while_loop_parallel_iterations self._was_compute_gradients_called = False def compute_gradients(self, @@ -177,10 +183,6 @@ def make_optimizer_class(cls): return grads_and_vars else: - # TF is running in graph mode. Check we did not receive a gradient tape. - if gradient_tape: - raise ValueError('When in graph mode, a tape should not be passed.') - # Note: it would be closer to the correct i.i.d. sampling of records if # we sampled each microbatch from the appropriate binomial distribution, # although that still wouldn't be quite correct because it would be @@ -206,10 +208,15 @@ def make_optimizer_class(cls): # This case covers Keras optimizers from optimizers_v2. compute_gradients_fn = self_super._compute_gradients # pylint: disable=protected-access - grads, _ = zip(*compute_gradients_fn( - mean_loss, var_list, gate_gradients, aggregation_method, - colocate_gradients_with_ops, grad_loss)) - grads_list = list(grads) + if gradient_tape: + # This is intended to work for TF2 and may not work for TF1. + with gradient_tape.stop_recording(): + grads_list = list(gradient_tape.gradient(mean_loss, var_list)) + else: + grads, _ = zip(*compute_gradients_fn( + mean_loss, var_list, gate_gradients, aggregation_method, + colocate_gradients_with_ops, grad_loss)) + grads_list = list(grads) sample_state = self._dp_sum_query.accumulate_record( sample_params, sample_state, grads_list) @@ -233,7 +240,10 @@ def make_optimizer_class(cls): body_fn = lambda i, state: [tf.add(i, 1), process_microbatch(i, state)] # pylint: disable=line-too-long idx = tf.constant(0) _, sample_state = tf.while_loop( - cond=cond_fn, body=body_fn, loop_vars=[idx, sample_state]) + cond=cond_fn, + body=body_fn, + loop_vars=[idx, sample_state], + parallel_iterations=self._while_loop_parallel_iterations) grad_sums, self._global_state, _ = ( self._dp_sum_query.get_noised_result(sample_state, From 9757e1bc870bc65283abb5a8bc6fc69546b46b67 Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Mon, 1 Nov 2021 11:39:10 -0700 Subject: [PATCH 65/71] Update the tree aggregation RDP accounting for restarts. This prevents the potential inaccurate usage of the previous implementation for no-restarts. PiperOrigin-RevId: 406878834 --- tensorflow_privacy/__init__.py | 2 +- .../privacy/analysis/rdp_accountant.py | 63 ++++---------- .../privacy/analysis/rdp_accountant_test.py | 87 +++++++------------ 3 files changed, 50 insertions(+), 102 deletions(-) diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index 5a90fe5..72cc746 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -45,7 +45,7 @@ else: from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogeneous_rdp from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp - from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_tree + from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_tree_restart from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent # DPQuery classes diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_accountant.py index 500a16a..d50a7a8 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant.py @@ -398,34 +398,34 @@ def compute_rdp(q, noise_multiplier, steps, orders): return rdp * steps -def _compute_rdp_tree(sigma, steps_list, max_participation_list, alpha): +def _compute_rdp_tree_restart(sigma, steps_list, alpha): """Computes RDP of the Tree Aggregation Protocol at order alpha.""" if np.isinf(alpha): return np.inf tree_depths = [ - math.floor(math.log2(steps)) + 1 for steps in steps_list if steps > 0 + math.floor(math.log2(float(steps))) + 1 + for steps in steps_list + if steps > 0 ] - record_occurence = [ - x * y for x, y in zip(max_participation_list, tree_depths) - ] - return alpha * sum(record_occurence) / (2 * sigma**2) + return alpha * sum(tree_depths) / (2 * sigma**2) -def compute_rdp_tree( - noise_multiplier: float, steps_list: Union[float, Collection[float]], - max_participation_list: Union[int, Collection[int]], - orders: Union[float, Collection[float]]) -> Collection[float]: +def compute_rdp_tree_restart( + noise_multiplier: float, steps_list: Union[int, Collection[int]], + orders: Union[float, Collection[float]]) -> Union[float, Collection[float]]: """Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism. + This function implements the accounting when the tree is restarted at every + epoch. See appendix of + "Practical and Private (Deep) Learning without Sampling or Shuffling" + https://arxiv.org/abs/2103.00039. + Args: noise_multiplier: A non-negative float representing the ratio of the standard deviation of the Gaussian noise to the l2-sensitivity of the function to which it is added. steps_list: A scalar or a list of non-negative intergers representing the - number of steps between tree restarts. - max_participation_list: A scalar or a list of positive integers representing - maximum number of times a sample may appear between tree restarts. The - type (scalar/list) of `max_participation_list` should match `steps_list`. + number of steps per epoch (between two restarts). orders: An array (or a scalar) of RDP orders. Returns: @@ -442,46 +442,19 @@ def compute_rdp_tree( "steps_list must be a non-empty list, or a non-zero scalar, got " f"{steps_list}.") - if not max_participation_list: - raise ValueError( - "max_participation_list must be a non-empty list, or a non-zero scalar," - f" got {max_participation_list}.") - - if np.isscalar(steps_list) and np.isscalar(max_participation_list): + if np.isscalar(steps_list): steps_list = [steps_list] - max_participation_list = [max_participation_list] - elif np.isscalar(steps_list): - raise ValueError( - "`steps_list` and `max_participation_list` must have the same type, got" - f"scalar of steps: {steps_list}, and list of max_participations with " - f"length {len(max_participation_list)}.") - elif np.isscalar(max_participation_list): - raise ValueError( - "`steps_list` and `max_participation_list` must have the same type, got" - f"scalar of max_participation: {max_participation_list}, and list of " - f"steps with length {len(steps_list)}.") - elif len(max_participation_list) != len(steps_list): - raise ValueError( - "`steps_list` and `max_participation_list` must have the same size, got" - f"steps length {len(steps_list)}, max_participations length " - f"{len(max_participation_list)}") - - for max_participation in max_participation_list: - if max_participation <= 0: - raise ValueError( - f"Max participation must be positive, got {max_participation}") for steps in steps_list: if steps < 0: raise ValueError(f"Steps must be non-negative, got {steps_list}") if np.isscalar(orders): - rdp = _compute_rdp_tree(noise_multiplier, steps_list, - max_participation_list, orders) + rdp = _compute_rdp_tree_restart(noise_multiplier, steps_list, orders) else: rdp = np.array([ - _compute_rdp_tree(noise_multiplier, steps_list, max_participation_list, - alpha) for alpha in orders + _compute_rdp_tree_restart(noise_multiplier, steps_list, alpha) + for alpha in orders ]) return rdp diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py index 583fa3f..42e751c 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py @@ -272,81 +272,56 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase): # This tests is based on the StackOverflow setting in "Practical and # Private (Deep) Learning without Sampling or Shuffling". The calculated # epsilon could be better as the method in this package keeps improving. - steps_list, target_delta, max_participation = 1600, 1e-6, 1 - rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list, - max_participation, orders) + steps_list, target_delta = 1600, 1e-6 + rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps_list, + orders) new_eps = rdp_accountant.get_privacy_spent( orders, rdp, target_delta=target_delta)[0] self.assertLess(new_eps, eps) @parameterized.named_parameters( - ('restart4_max2', [400] * 4, [2] * 4), - ('restart2_max1', [800] * 2, [1] * 2), - ('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5), - ('adaptive', [10, 400, 400, 400, 390], [2, 3, 4, 4, 3])) - def test_compose_tree_rdp(self, steps_list, max_participation_list): + ('restart4', [400] * 4), + ('restart2', [800] * 2), + ('adaptive', [10, 400, 400, 400, 390]), + ) + def test_compose_tree_rdp(self, steps_list): noise_multiplier, orders = 0.1, 1 - if np.isscalar(max_participation_list): - rdp_list = [ - rdp_accountant.compute_rdp_tree(noise_multiplier, steps, - max_participation_list, orders) - for steps in steps_list - ] - else: - rdp_list = [ - rdp_accountant.compute_rdp_tree(noise_multiplier, steps, - max_participation, orders) for steps, - max_participation in zip(steps_list, max_participation_list) - ] - rdp_composed = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list, - max_participation_list, - orders) + rdp_list = [ + rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps, orders) + for steps in steps_list + ] + rdp_composed = rdp_accountant.compute_rdp_tree_restart( + noise_multiplier, steps_list, orders) self.assertAllClose(rdp_composed, sum(rdp_list), rtol=1e-12) @parameterized.named_parameters( - ('restart4_max2', [400] * 4, [2] * 4), - ('restart2_max1', [800] * 2, [1] * 2), - ('adaptive_max4', [10, 400, 400, 400, 390], [4] * 5), + ('restart4', [400] * 4), + ('restart2', [800] * 2), + ('adaptive', [10, 400, 400, 400, 390]), ) - def test_compute_eps_tree_decreasing(self, steps_list, max_participation): + def test_compute_eps_tree_decreasing(self, steps_list): # Test privacy epsilon decreases with noise multiplier increasing when # keeping other parameters the same. orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) target_delta = 1e-6 - prev_eps = rdp_accountant.compute_rdp_tree(0, steps_list, max_participation, - orders) + prev_eps = rdp_accountant.compute_rdp_tree_restart(0, steps_list, orders) for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]: - rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list, - max_participation, orders) + rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier, + steps_list, orders) eps = rdp_accountant.get_privacy_spent( orders, rdp, target_delta=target_delta)[0] self.assertLess(eps, prev_eps) @parameterized.named_parameters( - ('negative_noise', -1, 3, 2, 1), - ('empty_steps', 1, [], 2, 1), - ('empty_part', 1, 1, [], 1), - ('negative_steps', 1, -3, 2, 1), - ('zero_participation', 1, 3, 0, 1), - ('negative_participation', 1, 3, -1, 1), + ('negative_noise', -1, 3, 1), + ('empty_steps', 1, [], 1), + ('negative_steps', 1, -3, 1), ) - def test_compute_rdp_tree_raise(self, noise_multiplier, steps_list, - max_participation, orders): + def test_compute_rdp_tree_restart_raise(self, noise_multiplier, steps_list, + orders): with self.assertRaisesRegex(ValueError, 'must be'): - rdp_accountant.compute_rdp_tree(noise_multiplier, steps_list, - max_participation, orders) - - @parameterized.named_parameters( - ('list_scalar', [2], 1), - ('scalar_list', 2, [1]), - ('list_length', [2, 3], [1]), - ('list_length2', [2, 3], [1, 2, 2]), - ) - def test_compute_rdp_tree_raise_input_type(self, steps_list, - max_participation): - with self.assertRaisesRegex(ValueError, 'must have the same'): - rdp_accountant.compute_rdp_tree( - 0.1, steps_list, max_participation, orders=1) + rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps_list, + orders) @parameterized.named_parameters( ('t100n0.1', 100, 0.1), @@ -354,9 +329,9 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase): ) def test_no_tree_no_sampling(self, total_steps, noise_multiplier): orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) - tree_rdp = rdp_accountant.compute_rdp_tree(noise_multiplier, - [1] * total_steps, - [1] * total_steps, orders) + tree_rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier, + [1] * total_steps, + orders) rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders) self.assertAllClose(tree_rdp, rdp, rtol=1e-12) From 7c4f5bab0964bd32b7ceafa009d9488920856440 Mon Sep 17 00:00:00 2001 From: David Marn Date: Thu, 11 Nov 2021 02:16:31 -0800 Subject: [PATCH 66/71] Allows one to run a test on probabilities alone. PiperOrigin-RevId: 409095932 --- .../data_structures.py | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py index d9137b8..da21ebd 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py @@ -15,12 +15,13 @@ # Lint as: python3 """Data structures representing attack inputs, configuration, outputs.""" import collections +import dataclasses import enum import glob import os import pickle from typing import Any, Iterable, Union -from dataclasses import dataclass + import numpy as np import pandas as pd from scipy import special @@ -37,7 +38,7 @@ class SlicingFeature(enum.Enum): CORRECTLY_CLASSIFIED = 'correctly_classified' -@dataclass +@dataclasses.dataclass class SingleSliceSpec: """Specifies a slice. @@ -64,7 +65,7 @@ class SingleSliceSpec: return '%s=%s' % (self.feature.name, self.value) -@dataclass +@dataclasses.dataclass class SlicingSpec: """Specification of a slicing procedure. @@ -165,7 +166,7 @@ def _log_value(probs, small_value=1e-30): return -np.log(np.maximum(probs, small_value)) -@dataclass +@dataclasses.dataclass class AttackInputData: """Input data for running an attack. @@ -334,9 +335,11 @@ class AttackInputData: 'labels_train and labels_test should both be either set or unset') if (self.labels_train is None and self.loss_train is None and - self.logits_train is None and self.entropy_train is None): + self.logits_train is None and self.entropy_train is None and + self.probs_train is None): raise ValueError( - 'At least one of labels, logits, losses or entropy should be set') + 'At least one of labels, logits, losses, probabilities or entropy should be set' + ) if self.labels_train is not None and not _is_integer_type_array( self.labels_train): @@ -390,7 +393,7 @@ def _append_array_shape(arr: np.array, arr_name: str, result): result.append(' %s with shape: %s,' % (arr_name, arr.shape)) -@dataclass +@dataclasses.dataclass class RocCurve: """Represents ROC curve of a membership inference classifier.""" # Thresholds used to define points on ROC curve. @@ -433,7 +436,7 @@ class RocCurve: DataSize = collections.namedtuple('DataSize', 'ntrain ntest') -@dataclass +@dataclasses.dataclass class SingleAttackResult: """Results from running a single attack.""" @@ -488,7 +491,7 @@ class SingleAttackResult: ]) -@dataclass +@dataclasses.dataclass class SingleMembershipProbabilityResult: """Results from computing membership probabilities (denoted as privacy risk score in https://arxiv.org/abs/2003.10595). @@ -578,7 +581,7 @@ class SingleMembershipProbabilityResult: return summary -@dataclass +@dataclasses.dataclass class MembershipProbabilityResults: """Membership probability results from multiple data slices.""" @@ -593,7 +596,7 @@ class MembershipProbabilityResults: return '\n'.join(summary) -@dataclass +@dataclasses.dataclass class PrivacyReportMetadata: """Metadata about the evaluated model. @@ -622,7 +625,7 @@ class AttackResultsDFColumns(enum.Enum): return '%s' % self.value -@dataclass +@dataclasses.dataclass class AttackResults: """Results from running multiple attacks.""" single_attack_results: Iterable[SingleAttackResult] @@ -759,7 +762,7 @@ class AttackResults: return pickle.load(inp) -@dataclass +@dataclasses.dataclass class AttackResultsCollection: """A collection of AttackResults.""" attack_results_list: Iterable[AttackResults] From 290ecf7797a883e6015902f77f0ac1366edb57ea Mon Sep 17 00:00:00 2001 From: Wennan Zhu Date: Tue, 23 Nov 2021 10:37:41 -0800 Subject: [PATCH 67/71] Create a hierarchical histogram IterativeProcess that is compatible with tff.backends.mapreduce.MapReduceForm. PiperOrigin-RevId: 411845363 --- tensorflow_privacy/privacy/dp_query/tree_range_query.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow_privacy/privacy/dp_query/tree_range_query.py b/tensorflow_privacy/privacy/dp_query/tree_range_query.py index f5a6083..471915b 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_range_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_range_query.py @@ -26,7 +26,6 @@ from tensorflow_privacy.privacy.dp_query import dp_query from tensorflow_privacy.privacy.dp_query import gaussian_query -@tf.function def _build_tree_from_leaf(leaf_nodes: tf.Tensor, arity: int) -> tf.RaggedTensor: """A function constructs a complete tree given all the leaf nodes. @@ -50,10 +49,11 @@ def _build_tree_from_leaf(leaf_nodes: tf.Tensor, arity: int) -> tf.RaggedTensor: """ def pad_zero(leaf_nodes, size): - paddings = [[0, size - len(leaf_nodes)]] - return tf.pad(leaf_nodes, paddings) + paddings = tf.zeros( + shape=(size - leaf_nodes.shape[0],), dtype=leaf_nodes.dtype) + return tf.concat((leaf_nodes, paddings), axis=0) - leaf_nodes_size = tf.constant(len(leaf_nodes), dtype=tf.float32) + leaf_nodes_size = tf.constant(leaf_nodes.shape[0], dtype=tf.float32) num_layers = tf.math.ceil( tf.math.log(leaf_nodes_size) / tf.math.log(tf.cast(arity, dtype=tf.float32))) + 1 From 49db04e3561638fc02795edb5774d322cdd1d7d1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 1 Dec 2021 15:06:01 -0800 Subject: [PATCH 68/71] Overwrite the get_config method for dp_optimizer to reflect the actual parameters required(noise_multiplier, l2_norm_clip) PiperOrigin-RevId: 413520270 --- .../privacy/optimizers/dp_optimizer_keras.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py index 2fe2a7f..1ba11b9 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py @@ -332,6 +332,25 @@ def make_keras_optimizer_class(cls): return final_grads + def get_config(self): + """Returns the config of the optimizer. + + An optimizer config is a Python dictionary (serializable) + containing the configuration of an optimizer. + The same optimizer can be reinstantiated later + (without any saved state) from this configuration. + + Returns: + Python dictionary. + """ + config = super(DPOptimizerClass, self).get_config() + config.update({ + 'l2_norm_clip': self._l2_norm_clip, + 'noise_multiplier': self._noise_multiplier, + 'num_microbatches': self._num_microbatches, + }) + return config + def apply_gradients(self, grads_and_vars, global_step=None, name=None): """DP-SGD version of base class method.""" assert self._was_dp_gradients_called, ( From 245fd069ca50b6a7ec38b8a5a32e29bc95983c90 Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Mon, 6 Dec 2021 17:38:14 -0800 Subject: [PATCH 69/71] RDP accounting for tree aggregation without restart. This implements the dynamic programming algorithm detailed in the updated version of "Practical and Private (Deep) Learning without Sampling or Shuffling" https://arxiv.org/abs/2103.00039. PiperOrigin-RevId: 414583453 --- .../privacy/analysis/rdp_accountant.py | 197 +++++++++++++++++- .../privacy/analysis/rdp_accountant_test.py | 84 ++++++++ 2 files changed, 276 insertions(+), 5 deletions(-) diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_accountant.py index d50a7a8..e00c7b3 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant.py @@ -40,6 +40,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools import math import sys from typing import Collection, Union @@ -398,6 +399,7 @@ def compute_rdp(q, noise_multiplier, steps, orders): return rdp * steps +# TODO(b/193679963): move accounting for tree aggregation to a separate module def _compute_rdp_tree_restart(sigma, steps_list, alpha): """Computes RDP of the Tree Aggregation Protocol at order alpha.""" if np.isinf(alpha): @@ -407,7 +409,8 @@ def _compute_rdp_tree_restart(sigma, steps_list, alpha): for steps in steps_list if steps > 0 ] - return alpha * sum(tree_depths) / (2 * sigma**2) + return _compute_gaussian_rdp( + alpha=alpha, sum_sensitivity_square=sum(tree_depths), sigma=sigma) def compute_rdp_tree_restart( @@ -431,10 +434,8 @@ def compute_rdp_tree_restart( Returns: The RDPs at all orders. Can be `np.inf`. """ - if noise_multiplier < 0: - raise ValueError( - f"Noise multiplier must be non-negative, got {noise_multiplier}") - elif noise_multiplier == 0: + _check_nonnegative(noise_multiplier, "noise_multiplier") + if noise_multiplier == 0: return np.inf if not steps_list: @@ -460,6 +461,192 @@ def compute_rdp_tree_restart( return rdp +def _check_nonnegative(value: Union[int, float], name: str): + if value < 0: + raise ValueError(f"Provided {name} must be non-negative, got {value}") + + +def _check_possible_tree_participation(num_participation: int, + min_separation: int, start: int, + end: int, steps: int) -> bool: + """Check if participation is possible with `min_separation` in `steps`. + + This function checks if it is possible for a sample to appear + `num_participation` in `steps`, assuming there are at least `min_separation` + nodes between the appearance of the same sample in the streaming data (leaf + nodes in tree aggregation). The first appearance of the sample is after + `start` steps, and the sample won't appear in the `end` steps after the given + `steps`. + + Args: + num_participation: The number of times a sample will appear. + min_separation: The minimum number of nodes between two appearance of a + sample. If a sample appears in consecutive x, y steps in a streaming + setting, then `min_separation=y-x-1`. + start: The first appearance of the sample is after `start` steps. + end: The sample won't appear in the `end` steps after the given `steps`. + steps: Total number of steps (leaf nodes in tree aggregation). + + Returns: + True if a sample can appear `num_participation` with given conditions. + """ + return start + (min_separation + 1) * num_participation <= steps + end + + +@functools.lru_cache(maxsize=None) +def _tree_sensitivity_square_sum(num_participation: int, min_separation: int, + start: int, end: int, size: int) -> float: + """Compute the worst-case sum of sensitivtiy square for `num_participation`. + + This is the key algorithm for DP accounting for DP-FTRL tree aggregation + without restart, which recurrently counts the worst-case occurence of a sample + in all the nodes in a tree. This implements a dynamic programming algorithm + that exhausts the possible `num_participation` appearance of a sample in + `size` leaf nodes. See Appendix D.2 (DP-FTRL-NoTreeRestart) of + "Practical and Private (Deep) Learning without Sampling or Shuffling" + https://arxiv.org/abs/2103.00039. + + Args: + num_participation: The number of times a sample will appear. + min_separation: The minimum number of nodes between two appearance of a + sample. If a sample appears in consecutive x, y size in a streaming + setting, then `min_separation=y-x-1`. + start: The first appearance of the sample is after `start` steps. + end: The sample won't appear in the `end` steps after given `size` steps. + size: Total number of steps (leaf nodes in tree aggregation). + + Returns: + The worst-case sum of sensitivity square for the given input. + """ + if not _check_possible_tree_participation(num_participation, min_separation, + start, end, size): + sum_value = -np.inf + elif num_participation == 0: + sum_value = 0. + elif num_participation == 1 and size == 1: + sum_value = 1. + else: + size_log2 = math.log2(size) + max_2power = math.floor(size_log2) + if max_2power == size_log2: + sum_value = num_participation**2 + max_2power -= 1 + else: + sum_value = 0. + candidate_sum = [] + # i is the `num_participation` in the right subtree + for i in range(num_participation + 1): + # j is the `start` in the right subtree + for j in range(min_separation + 1): + left_sum = _tree_sensitivity_square_sum( + num_participation=num_participation - i, + min_separation=min_separation, + start=start, + end=j, + size=2**max_2power) + if np.isinf(left_sum): + candidate_sum.append(-np.inf) + continue # Early pruning for dynamic programming + right_sum = _tree_sensitivity_square_sum( + num_participation=i, + min_separation=min_separation, + start=j, + end=end, + size=size - 2**max_2power) + candidate_sum.append(left_sum + right_sum) + sum_value += max(candidate_sum) + return sum_value + + +def _max_tree_sensitivity_square_sum(max_participation: int, + min_separation: int, steps: int) -> float: + """Compute the worst-case sum of sensitivity square in tree aggregation. + + See Appendix D.2 of + "Practical and Private (Deep) Learning without Sampling or Shuffling" + https://arxiv.org/abs/2103.00039. + + Args: + max_participation: The maximum number of times a sample will appear. + min_separation: The minimum number of nodes between two appearance of a + sample. If a sample appears in consecutive x, y steps in a streaming + setting, then `min_separation=y-x-1`. + steps: Total number of steps (leaf nodes in tree aggregation). + + Returns: + The worst-case sum of sensitivity square for the given input. + """ + num_participation = max_participation + while not _check_possible_tree_participation( + num_participation, min_separation, 0, min_separation, steps): + num_participation -= 1 + candidate_sum = [] + for num_part in range(1, num_participation + 1): + candidate_sum.append( + _tree_sensitivity_square_sum(num_part, min_separation, 0, + min_separation, steps)) + return max(candidate_sum) + + +def _compute_gaussian_rdp(sigma: float, sum_sensitivity_square: float, + alpha: float) -> float: + """Computes RDP of Gaussian mechanism.""" + if np.isinf(alpha): + return np.inf + return alpha * sum_sensitivity_square / (2 * sigma**2) + + +def compute_rdp_single_tree( + noise_multiplier: float, total_steps: int, max_participation: int, + min_separation: int, + orders: Union[float, Collection[float]]) -> Union[float, Collection[float]]: + """Computes RDP of the Tree Aggregation Protocol for a single tree. + + The accounting assume a single tree is constructed for `total_steps` leaf + nodes, where the same sample will appear at most `max_participation` times, + and there are at least `min_separation` nodes between two appearance. The key + idea is to (recurrently) count the worst-case occurence of a sample + in all the nodes in a tree, which implements a dynamic programming algorithm + that exhausts the possible `num_participation` appearance of a sample in + `steps` leaf nodes. + + See Appendix D of + "Practical and Private (Deep) Learning without Sampling or Shuffling" + https://arxiv.org/abs/2103.00039. + + Args: + noise_multiplier: A non-negative float representing the ratio of the + standard deviation of the Gaussian noise to the l2-sensitivity of the + function to which it is added. + total_steps: Total number of steps (leaf nodes in tree aggregation). + max_participation: The maximum number of times a sample can appear. + min_separation: The minimum number of nodes between two appearance of a + sample. If a sample appears in consecutive x, y steps in a streaming + setting, then `min_separation=y-x-1`. + orders: An array (or a scalar) of RDP orders. + + Returns: + The RDPs at all orders. Can be `np.inf`. + """ + _check_nonnegative(noise_multiplier, "noise_multiplier") + if noise_multiplier == 0: + return np.inf + _check_nonnegative(total_steps, "total_steps") + _check_nonnegative(max_participation, "max_participation") + _check_nonnegative(min_separation, "min_separation") + sum_sensitivity_square = _max_tree_sensitivity_square_sum( + max_participation, min_separation, total_steps) + if np.isscalar(orders): + rdp = _compute_gaussian_rdp(noise_multiplier, sum_sensitivity_square, + orders) + else: + rdp = np.array([ + _compute_gaussian_rdp(noise_multiplier, sum_sensitivity_square, alpha) + for alpha in orders + ]) + return rdp + + def compute_rdp_sample_without_replacement(q, noise_multiplier, steps, orders): """Compute RDP of Gaussian Mechanism using sampling without replacement. diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py index 42e751c..df241af 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py @@ -335,6 +335,90 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase): rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders) self.assertAllClose(tree_rdp, rdp, rtol=1e-12) + @parameterized.named_parameters( + ('negative_noise', -1, 3, 1, 1), + ('negative_steps', 0.1, -3, 1, 1), + ('negative_part', 0.1, 3, -1, 1), + ('negative_sep', 0.1, 3, 1, -1), + ) + def test_compute_rdp_single_tree_raise(self, noise_multiplier, total_steps, + max_participation, min_separation): + orders = 1 + with self.assertRaisesRegex(ValueError, 'must be'): + rdp_accountant.compute_rdp_single_tree(noise_multiplier, total_steps, + max_participation, min_separation, + orders) + + @parameterized.named_parameters( + ('3', 3), + ('8', 8), + ('11', 11), + ('19', 19), + ) + def test_max_tree_sensitivity_square_sum_every_step(self, steps): + max_participation, min_separation = steps, 0 + # If a sample will appear in every leaf node, we can infer the total + # sensitivity by adding all the nodes. + steps_bin = bin(steps)[2:] + depth = [ + len(steps_bin) - 1 - i for i, v in enumerate(steps_bin) if v == '1' + ] + expected = sum([2**d * (2**(d + 1) - 1) for d in depth]) + self.assertEqual( + expected, + rdp_accountant._max_tree_sensitivity_square_sum(max_participation, + min_separation, steps)) + + @parameterized.named_parameters( + ('11', 11), + ('19', 19), + ('200', 200), + ) + def test_max_tree_sensitivity_square_sum_every_step_part(self, max_part): + steps, min_separation = 8, 0 + assert max_part > steps + # If a sample will appear in every leaf node, we can infer the total + # sensitivity by adding all the nodes. + expected = 120 + self.assertEqual( + expected, + rdp_accountant._max_tree_sensitivity_square_sum(max_part, + min_separation, steps)) + + @parameterized.named_parameters( + ('3', 3), + ('8', 8), + ('11', 11), + ('19', 19), + ) + def test_max_tree_sensitivity_square_sum_every_step_part2(self, steps): + max_participation, min_separation = 2, 0 + # If a sample will appear twice, the worst case is to put the two nodes at + # consecutive nodes of the deepest subtree. + steps_bin = bin(steps)[2:] + depth = len(steps_bin) - 1 + expected = 2 + 4 * depth + self.assertEqual( + expected, + rdp_accountant._max_tree_sensitivity_square_sum(max_participation, + min_separation, steps)) + + @parameterized.named_parameters( + ('test1', 1, 7, 8, 4), + ('test2', 3, 3, 9, 11), + ('test3', 3, 2, 7, 9), + # This is an example showing worst-case sensitivity is larger than greedy + # in "Practical and Private (Deep) Learning without Sampling or Shuffling" + # https://arxiv.org/abs/2103.00039. + ('test4', 8, 2, 24, 88), + ) + def test_max_tree_sensitivity_square_sum_toy(self, max_participation, + min_separation, steps, expected): + self.assertEqual( + expected, + rdp_accountant._max_tree_sensitivity_square_sum(max_participation, + min_separation, steps)) + if __name__ == '__main__': tf.test.main() From 8850c23f67d31a3baee2490224dea9a605581ddd Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Tue, 7 Dec 2021 10:48:30 -0800 Subject: [PATCH 70/71] Move tree_aggregation accountant to their own module. PiperOrigin-RevId: 414770173 --- tensorflow_privacy/__init__.py | 3 +- .../privacy/analysis/rdp_accountant.py | 251 -------------- .../privacy/analysis/rdp_accountant_test.py | 156 --------- .../analysis/tree_aggregation_accountant.py | 315 ++++++++++++++++++ .../tree_aggregation_accountant_test.py | 185 ++++++++++ 5 files changed, 502 insertions(+), 408 deletions(-) create mode 100644 tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py create mode 100644 tensorflow_privacy/privacy/analysis/tree_aggregation_accountant_test.py diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index 72cc746..fcf607e 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -45,8 +45,9 @@ else: from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_heterogeneous_rdp from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp - from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_tree_restart from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent + from tensorflow_privacy.privacy.analysis.tree_aggregation_accountant import compute_rdp_tree_restart + from tensorflow_privacy.privacy.analysis.tree_aggregation_accountant import compute_rdp_single_tree # DPQuery classes from tensorflow_privacy.privacy.dp_query.dp_query import DPQuery diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_accountant.py index e00c7b3..380ff9c 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant.py @@ -40,11 +40,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import functools import math import sys -from typing import Collection, Union - import numpy as np from scipy import special import six @@ -399,254 +396,6 @@ def compute_rdp(q, noise_multiplier, steps, orders): return rdp * steps -# TODO(b/193679963): move accounting for tree aggregation to a separate module -def _compute_rdp_tree_restart(sigma, steps_list, alpha): - """Computes RDP of the Tree Aggregation Protocol at order alpha.""" - if np.isinf(alpha): - return np.inf - tree_depths = [ - math.floor(math.log2(float(steps))) + 1 - for steps in steps_list - if steps > 0 - ] - return _compute_gaussian_rdp( - alpha=alpha, sum_sensitivity_square=sum(tree_depths), sigma=sigma) - - -def compute_rdp_tree_restart( - noise_multiplier: float, steps_list: Union[int, Collection[int]], - orders: Union[float, Collection[float]]) -> Union[float, Collection[float]]: - """Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism. - - This function implements the accounting when the tree is restarted at every - epoch. See appendix of - "Practical and Private (Deep) Learning without Sampling or Shuffling" - https://arxiv.org/abs/2103.00039. - - Args: - noise_multiplier: A non-negative float representing the ratio of the - standard deviation of the Gaussian noise to the l2-sensitivity of the - function to which it is added. - steps_list: A scalar or a list of non-negative intergers representing the - number of steps per epoch (between two restarts). - orders: An array (or a scalar) of RDP orders. - - Returns: - The RDPs at all orders. Can be `np.inf`. - """ - _check_nonnegative(noise_multiplier, "noise_multiplier") - if noise_multiplier == 0: - return np.inf - - if not steps_list: - raise ValueError( - "steps_list must be a non-empty list, or a non-zero scalar, got " - f"{steps_list}.") - - if np.isscalar(steps_list): - steps_list = [steps_list] - - for steps in steps_list: - if steps < 0: - raise ValueError(f"Steps must be non-negative, got {steps_list}") - - if np.isscalar(orders): - rdp = _compute_rdp_tree_restart(noise_multiplier, steps_list, orders) - else: - rdp = np.array([ - _compute_rdp_tree_restart(noise_multiplier, steps_list, alpha) - for alpha in orders - ]) - - return rdp - - -def _check_nonnegative(value: Union[int, float], name: str): - if value < 0: - raise ValueError(f"Provided {name} must be non-negative, got {value}") - - -def _check_possible_tree_participation(num_participation: int, - min_separation: int, start: int, - end: int, steps: int) -> bool: - """Check if participation is possible with `min_separation` in `steps`. - - This function checks if it is possible for a sample to appear - `num_participation` in `steps`, assuming there are at least `min_separation` - nodes between the appearance of the same sample in the streaming data (leaf - nodes in tree aggregation). The first appearance of the sample is after - `start` steps, and the sample won't appear in the `end` steps after the given - `steps`. - - Args: - num_participation: The number of times a sample will appear. - min_separation: The minimum number of nodes between two appearance of a - sample. If a sample appears in consecutive x, y steps in a streaming - setting, then `min_separation=y-x-1`. - start: The first appearance of the sample is after `start` steps. - end: The sample won't appear in the `end` steps after the given `steps`. - steps: Total number of steps (leaf nodes in tree aggregation). - - Returns: - True if a sample can appear `num_participation` with given conditions. - """ - return start + (min_separation + 1) * num_participation <= steps + end - - -@functools.lru_cache(maxsize=None) -def _tree_sensitivity_square_sum(num_participation: int, min_separation: int, - start: int, end: int, size: int) -> float: - """Compute the worst-case sum of sensitivtiy square for `num_participation`. - - This is the key algorithm for DP accounting for DP-FTRL tree aggregation - without restart, which recurrently counts the worst-case occurence of a sample - in all the nodes in a tree. This implements a dynamic programming algorithm - that exhausts the possible `num_participation` appearance of a sample in - `size` leaf nodes. See Appendix D.2 (DP-FTRL-NoTreeRestart) of - "Practical and Private (Deep) Learning without Sampling or Shuffling" - https://arxiv.org/abs/2103.00039. - - Args: - num_participation: The number of times a sample will appear. - min_separation: The minimum number of nodes between two appearance of a - sample. If a sample appears in consecutive x, y size in a streaming - setting, then `min_separation=y-x-1`. - start: The first appearance of the sample is after `start` steps. - end: The sample won't appear in the `end` steps after given `size` steps. - size: Total number of steps (leaf nodes in tree aggregation). - - Returns: - The worst-case sum of sensitivity square for the given input. - """ - if not _check_possible_tree_participation(num_participation, min_separation, - start, end, size): - sum_value = -np.inf - elif num_participation == 0: - sum_value = 0. - elif num_participation == 1 and size == 1: - sum_value = 1. - else: - size_log2 = math.log2(size) - max_2power = math.floor(size_log2) - if max_2power == size_log2: - sum_value = num_participation**2 - max_2power -= 1 - else: - sum_value = 0. - candidate_sum = [] - # i is the `num_participation` in the right subtree - for i in range(num_participation + 1): - # j is the `start` in the right subtree - for j in range(min_separation + 1): - left_sum = _tree_sensitivity_square_sum( - num_participation=num_participation - i, - min_separation=min_separation, - start=start, - end=j, - size=2**max_2power) - if np.isinf(left_sum): - candidate_sum.append(-np.inf) - continue # Early pruning for dynamic programming - right_sum = _tree_sensitivity_square_sum( - num_participation=i, - min_separation=min_separation, - start=j, - end=end, - size=size - 2**max_2power) - candidate_sum.append(left_sum + right_sum) - sum_value += max(candidate_sum) - return sum_value - - -def _max_tree_sensitivity_square_sum(max_participation: int, - min_separation: int, steps: int) -> float: - """Compute the worst-case sum of sensitivity square in tree aggregation. - - See Appendix D.2 of - "Practical and Private (Deep) Learning without Sampling or Shuffling" - https://arxiv.org/abs/2103.00039. - - Args: - max_participation: The maximum number of times a sample will appear. - min_separation: The minimum number of nodes between two appearance of a - sample. If a sample appears in consecutive x, y steps in a streaming - setting, then `min_separation=y-x-1`. - steps: Total number of steps (leaf nodes in tree aggregation). - - Returns: - The worst-case sum of sensitivity square for the given input. - """ - num_participation = max_participation - while not _check_possible_tree_participation( - num_participation, min_separation, 0, min_separation, steps): - num_participation -= 1 - candidate_sum = [] - for num_part in range(1, num_participation + 1): - candidate_sum.append( - _tree_sensitivity_square_sum(num_part, min_separation, 0, - min_separation, steps)) - return max(candidate_sum) - - -def _compute_gaussian_rdp(sigma: float, sum_sensitivity_square: float, - alpha: float) -> float: - """Computes RDP of Gaussian mechanism.""" - if np.isinf(alpha): - return np.inf - return alpha * sum_sensitivity_square / (2 * sigma**2) - - -def compute_rdp_single_tree( - noise_multiplier: float, total_steps: int, max_participation: int, - min_separation: int, - orders: Union[float, Collection[float]]) -> Union[float, Collection[float]]: - """Computes RDP of the Tree Aggregation Protocol for a single tree. - - The accounting assume a single tree is constructed for `total_steps` leaf - nodes, where the same sample will appear at most `max_participation` times, - and there are at least `min_separation` nodes between two appearance. The key - idea is to (recurrently) count the worst-case occurence of a sample - in all the nodes in a tree, which implements a dynamic programming algorithm - that exhausts the possible `num_participation` appearance of a sample in - `steps` leaf nodes. - - See Appendix D of - "Practical and Private (Deep) Learning without Sampling or Shuffling" - https://arxiv.org/abs/2103.00039. - - Args: - noise_multiplier: A non-negative float representing the ratio of the - standard deviation of the Gaussian noise to the l2-sensitivity of the - function to which it is added. - total_steps: Total number of steps (leaf nodes in tree aggregation). - max_participation: The maximum number of times a sample can appear. - min_separation: The minimum number of nodes between two appearance of a - sample. If a sample appears in consecutive x, y steps in a streaming - setting, then `min_separation=y-x-1`. - orders: An array (or a scalar) of RDP orders. - - Returns: - The RDPs at all orders. Can be `np.inf`. - """ - _check_nonnegative(noise_multiplier, "noise_multiplier") - if noise_multiplier == 0: - return np.inf - _check_nonnegative(total_steps, "total_steps") - _check_nonnegative(max_participation, "max_participation") - _check_nonnegative(min_separation, "min_separation") - sum_sensitivity_square = _max_tree_sensitivity_square_sum( - max_participation, min_separation, total_steps) - if np.isscalar(orders): - rdp = _compute_gaussian_rdp(noise_multiplier, sum_sensitivity_square, - orders) - else: - rdp = np.array([ - _compute_gaussian_rdp(noise_multiplier, sum_sensitivity_square, alpha) - for alpha in orders - ]) - return rdp - - def compute_rdp_sample_without_replacement(q, noise_multiplier, steps, orders): """Compute RDP of Gaussian Mechanism using sampling without replacement. diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py index df241af..63983ad 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py @@ -264,161 +264,5 @@ class TestGaussianMoments(tf.test.TestCase, parameterized.TestCase): self.assertLessEqual(delta, delta1 + 1e-300) -class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase): - - @parameterized.named_parameters(('eps20', 1.13, 19.74), ('eps2', 8.83, 2.04)) - def test_compute_eps_tree(self, noise_multiplier, eps): - orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) - # This tests is based on the StackOverflow setting in "Practical and - # Private (Deep) Learning without Sampling or Shuffling". The calculated - # epsilon could be better as the method in this package keeps improving. - steps_list, target_delta = 1600, 1e-6 - rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps_list, - orders) - new_eps = rdp_accountant.get_privacy_spent( - orders, rdp, target_delta=target_delta)[0] - self.assertLess(new_eps, eps) - - @parameterized.named_parameters( - ('restart4', [400] * 4), - ('restart2', [800] * 2), - ('adaptive', [10, 400, 400, 400, 390]), - ) - def test_compose_tree_rdp(self, steps_list): - noise_multiplier, orders = 0.1, 1 - rdp_list = [ - rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps, orders) - for steps in steps_list - ] - rdp_composed = rdp_accountant.compute_rdp_tree_restart( - noise_multiplier, steps_list, orders) - self.assertAllClose(rdp_composed, sum(rdp_list), rtol=1e-12) - - @parameterized.named_parameters( - ('restart4', [400] * 4), - ('restart2', [800] * 2), - ('adaptive', [10, 400, 400, 400, 390]), - ) - def test_compute_eps_tree_decreasing(self, steps_list): - # Test privacy epsilon decreases with noise multiplier increasing when - # keeping other parameters the same. - orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) - target_delta = 1e-6 - prev_eps = rdp_accountant.compute_rdp_tree_restart(0, steps_list, orders) - for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]: - rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier, - steps_list, orders) - eps = rdp_accountant.get_privacy_spent( - orders, rdp, target_delta=target_delta)[0] - self.assertLess(eps, prev_eps) - - @parameterized.named_parameters( - ('negative_noise', -1, 3, 1), - ('empty_steps', 1, [], 1), - ('negative_steps', 1, -3, 1), - ) - def test_compute_rdp_tree_restart_raise(self, noise_multiplier, steps_list, - orders): - with self.assertRaisesRegex(ValueError, 'must be'): - rdp_accountant.compute_rdp_tree_restart(noise_multiplier, steps_list, - orders) - - @parameterized.named_parameters( - ('t100n0.1', 100, 0.1), - ('t1000n0.01', 1000, 0.01), - ) - def test_no_tree_no_sampling(self, total_steps, noise_multiplier): - orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) - tree_rdp = rdp_accountant.compute_rdp_tree_restart(noise_multiplier, - [1] * total_steps, - orders) - rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders) - self.assertAllClose(tree_rdp, rdp, rtol=1e-12) - - @parameterized.named_parameters( - ('negative_noise', -1, 3, 1, 1), - ('negative_steps', 0.1, -3, 1, 1), - ('negative_part', 0.1, 3, -1, 1), - ('negative_sep', 0.1, 3, 1, -1), - ) - def test_compute_rdp_single_tree_raise(self, noise_multiplier, total_steps, - max_participation, min_separation): - orders = 1 - with self.assertRaisesRegex(ValueError, 'must be'): - rdp_accountant.compute_rdp_single_tree(noise_multiplier, total_steps, - max_participation, min_separation, - orders) - - @parameterized.named_parameters( - ('3', 3), - ('8', 8), - ('11', 11), - ('19', 19), - ) - def test_max_tree_sensitivity_square_sum_every_step(self, steps): - max_participation, min_separation = steps, 0 - # If a sample will appear in every leaf node, we can infer the total - # sensitivity by adding all the nodes. - steps_bin = bin(steps)[2:] - depth = [ - len(steps_bin) - 1 - i for i, v in enumerate(steps_bin) if v == '1' - ] - expected = sum([2**d * (2**(d + 1) - 1) for d in depth]) - self.assertEqual( - expected, - rdp_accountant._max_tree_sensitivity_square_sum(max_participation, - min_separation, steps)) - - @parameterized.named_parameters( - ('11', 11), - ('19', 19), - ('200', 200), - ) - def test_max_tree_sensitivity_square_sum_every_step_part(self, max_part): - steps, min_separation = 8, 0 - assert max_part > steps - # If a sample will appear in every leaf node, we can infer the total - # sensitivity by adding all the nodes. - expected = 120 - self.assertEqual( - expected, - rdp_accountant._max_tree_sensitivity_square_sum(max_part, - min_separation, steps)) - - @parameterized.named_parameters( - ('3', 3), - ('8', 8), - ('11', 11), - ('19', 19), - ) - def test_max_tree_sensitivity_square_sum_every_step_part2(self, steps): - max_participation, min_separation = 2, 0 - # If a sample will appear twice, the worst case is to put the two nodes at - # consecutive nodes of the deepest subtree. - steps_bin = bin(steps)[2:] - depth = len(steps_bin) - 1 - expected = 2 + 4 * depth - self.assertEqual( - expected, - rdp_accountant._max_tree_sensitivity_square_sum(max_participation, - min_separation, steps)) - - @parameterized.named_parameters( - ('test1', 1, 7, 8, 4), - ('test2', 3, 3, 9, 11), - ('test3', 3, 2, 7, 9), - # This is an example showing worst-case sensitivity is larger than greedy - # in "Practical and Private (Deep) Learning without Sampling or Shuffling" - # https://arxiv.org/abs/2103.00039. - ('test4', 8, 2, 24, 88), - ) - def test_max_tree_sensitivity_square_sum_toy(self, max_participation, - min_separation, steps, expected): - self.assertEqual( - expected, - rdp_accountant._max_tree_sensitivity_square_sum(max_participation, - min_separation, steps)) - - if __name__ == '__main__': tf.test.main() diff --git a/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py b/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py new file mode 100644 index 0000000..417c910 --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py @@ -0,0 +1,315 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""DP analysis of tree aggregation. + +See Appendix D of +"Practical and Private (Deep) Learning without Sampling or Shuffling" + https://arxiv.org/abs/2103.00039. + +Functionality for computing differential privacy of tree aggregation of Gaussian +mechanism. Its public interface consists of the following methods: + compute_rdp_tree_restart( + noise_multiplier: float, steps_list: Union[int, Collection[int]], + orders: Union[float, Collection[float]]) -> Union[float, Collection[float]]: + computes RDP for DP-FTRL-TreeRestart. + compute_rdp_single_tree( + noise_multiplier: float, total_steps: int, max_participation: int, + min_separation: int, + orders: Union[float, Collection[float]]) -> Union[float, Collection[float]]: + computes RDP for DP-FTRL-NoTreeRestart. + +For RDP to (epsilon, delta)-DP conversion, use the following public function +described in `rdp_accountant.py`: + get_privacy_spent(orders, rdp, target_eps, target_delta) computes delta + (or eps) given RDP at multiple orders and + a target value for eps (or delta). + +Example use: + +(1) DP-FTRL-TreeRestart RDP: +Suppose we use Gaussian mechanism of `noise_multiplier`; a sample may appear +at most once for every epoch and tree is restarted every epoch; the number of +leaf nodes for every epoch are tracked in `steps_list`. For `target_delta`, the +estimated epsilon is: + orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) + rdp = compute_rdp_tree_restart(noise_multiplier, steps_list, orders) + eps = rdp_accountant.get_privacy_spent(orders, rdp, target_delta)[0] + +(2) DP-FTRL-NoTreeRestart RDP: +Suppose we use Gaussian mechanism of `noise_multiplier`; a sample may appear +at most `max_participation` times for a total of `total_steps` leaf nodes in a +single tree; there are at least `min_separation` leaf nodes between the two +appearance of a same sample. For `target_delta`, the estimated epsilon is: + orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) + rdp = compute_rdp_single_tree(noise_multiplier, total_steps, + max_participation, min_separation, orders) + eps = rdp_accountant.get_privacy_spent(orders, rdp, target_delta)[0] +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools +import math +from typing import Collection, Union + +import numpy as np + + +def _compute_rdp_tree_restart(sigma, steps_list, alpha): + """Computes RDP of the Tree Aggregation Protocol at order alpha.""" + if np.isinf(alpha): + return np.inf + tree_depths = [ + math.floor(math.log2(float(steps))) + 1 + for steps in steps_list + if steps > 0 + ] + return _compute_gaussian_rdp( + alpha=alpha, sum_sensitivity_square=sum(tree_depths), sigma=sigma) + + +def compute_rdp_tree_restart( + noise_multiplier: float, steps_list: Union[int, Collection[int]], + orders: Union[float, Collection[float]]) -> Union[float, Collection[float]]: + """Computes RDP of the Tree Aggregation Protocol for Gaussian Mechanism. + + This function implements the accounting when the tree is restarted at every + epoch. See appendix D of + "Practical and Private (Deep) Learning without Sampling or Shuffling" + https://arxiv.org/abs/2103.00039. + + Args: + noise_multiplier: A non-negative float representing the ratio of the + standard deviation of the Gaussian noise to the l2-sensitivity of the + function to which it is added. + steps_list: A scalar or a list of non-negative intergers representing the + number of steps per epoch (between two restarts). + orders: An array (or a scalar) of RDP orders. + + Returns: + The RDPs at all orders. Can be `np.inf`. + """ + _check_nonnegative(noise_multiplier, "noise_multiplier") + if noise_multiplier == 0: + return np.inf + + if not steps_list: + raise ValueError( + "steps_list must be a non-empty list, or a non-zero scalar, got " + f"{steps_list}.") + + if np.isscalar(steps_list): + steps_list = [steps_list] + + for steps in steps_list: + if steps < 0: + raise ValueError(f"Steps must be non-negative, got {steps_list}") + + if np.isscalar(orders): + rdp = _compute_rdp_tree_restart(noise_multiplier, steps_list, orders) + else: + rdp = np.array([ + _compute_rdp_tree_restart(noise_multiplier, steps_list, alpha) + for alpha in orders + ]) + + return rdp + + +def _check_nonnegative(value: Union[int, float], name: str): + if value < 0: + raise ValueError(f"Provided {name} must be non-negative, got {value}") + + +def _check_possible_tree_participation(num_participation: int, + min_separation: int, start: int, + end: int, steps: int) -> bool: + """Check if participation is possible with `min_separation` in `steps`. + + This function checks if it is possible for a sample to appear + `num_participation` in `steps`, assuming there are at least `min_separation` + nodes between the appearance of the same sample in the streaming data (leaf + nodes in tree aggregation). The first appearance of the sample is after + `start` steps, and the sample won't appear in the `end` steps after the given + `steps`. + + Args: + num_participation: The number of times a sample will appear. + min_separation: The minimum number of nodes between two appearance of a + sample. If a sample appears in consecutive x, y steps in a streaming + setting, then `min_separation=y-x-1`. + start: The first appearance of the sample is after `start` steps. + end: The sample won't appear in the `end` steps after the given `steps`. + steps: Total number of steps (leaf nodes in tree aggregation). + + Returns: + True if a sample can appear `num_participation` with given conditions. + """ + return start + (min_separation + 1) * num_participation <= steps + end + + +@functools.lru_cache(maxsize=None) +def _tree_sensitivity_square_sum(num_participation: int, min_separation: int, + start: int, end: int, size: int) -> float: + """Compute the worst-case sum of sensitivtiy square for `num_participation`. + + This is the key algorithm for DP accounting for DP-FTRL tree aggregation + without restart, which recurrently counts the worst-case occurence of a sample + in all the nodes in a tree. This implements a dynamic programming algorithm + that exhausts the possible `num_participation` appearance of a sample in + `size` leaf nodes. See Appendix D.2 (DP-FTRL-NoTreeRestart) of + "Practical and Private (Deep) Learning without Sampling or Shuffling" + https://arxiv.org/abs/2103.00039. + + Args: + num_participation: The number of times a sample will appear. + min_separation: The minimum number of nodes between two appearance of a + sample. If a sample appears in consecutive x, y size in a streaming + setting, then `min_separation=y-x-1`. + start: The first appearance of the sample is after `start` steps. + end: The sample won't appear in the `end` steps after given `size` steps. + size: Total number of steps (leaf nodes in tree aggregation). + + Returns: + The worst-case sum of sensitivity square for the given input. + """ + if not _check_possible_tree_participation(num_participation, min_separation, + start, end, size): + sum_value = -np.inf + elif num_participation == 0: + sum_value = 0. + elif num_participation == 1 and size == 1: + sum_value = 1. + else: + size_log2 = math.log2(size) + max_2power = math.floor(size_log2) + if max_2power == size_log2: + sum_value = num_participation**2 + max_2power -= 1 + else: + sum_value = 0. + candidate_sum = [] + # i is the `num_participation` in the right subtree + for i in range(num_participation + 1): + # j is the `start` in the right subtree + for j in range(min_separation + 1): + left_sum = _tree_sensitivity_square_sum( + num_participation=num_participation - i, + min_separation=min_separation, + start=start, + end=j, + size=2**max_2power) + if np.isinf(left_sum): + candidate_sum.append(-np.inf) + continue # Early pruning for dynamic programming + right_sum = _tree_sensitivity_square_sum( + num_participation=i, + min_separation=min_separation, + start=j, + end=end, + size=size - 2**max_2power) + candidate_sum.append(left_sum + right_sum) + sum_value += max(candidate_sum) + return sum_value + + +def _max_tree_sensitivity_square_sum(max_participation: int, + min_separation: int, steps: int) -> float: + """Compute the worst-case sum of sensitivity square in tree aggregation. + + See Appendix D.2 of + "Practical and Private (Deep) Learning without Sampling or Shuffling" + https://arxiv.org/abs/2103.00039. + + Args: + max_participation: The maximum number of times a sample will appear. + min_separation: The minimum number of nodes between two appearance of a + sample. If a sample appears in consecutive x, y steps in a streaming + setting, then `min_separation=y-x-1`. + steps: Total number of steps (leaf nodes in tree aggregation). + + Returns: + The worst-case sum of sensitivity square for the given input. + """ + num_participation = max_participation + while not _check_possible_tree_participation( + num_participation, min_separation, 0, min_separation, steps): + num_participation -= 1 + candidate_sum = [] + for num_part in range(1, num_participation + 1): + candidate_sum.append( + _tree_sensitivity_square_sum(num_part, min_separation, 0, + min_separation, steps)) + return max(candidate_sum) + + +def _compute_gaussian_rdp(sigma: float, sum_sensitivity_square: float, + alpha: float) -> float: + """Computes RDP of Gaussian mechanism.""" + if np.isinf(alpha): + return np.inf + return alpha * sum_sensitivity_square / (2 * sigma**2) + + +def compute_rdp_single_tree( + noise_multiplier: float, total_steps: int, max_participation: int, + min_separation: int, + orders: Union[float, Collection[float]]) -> Union[float, Collection[float]]: + """Computes RDP of the Tree Aggregation Protocol for a single tree. + + The accounting assume a single tree is constructed for `total_steps` leaf + nodes, where the same sample will appear at most `max_participation` times, + and there are at least `min_separation` nodes between two appearance. The key + idea is to (recurrently) count the worst-case occurence of a sample + in all the nodes in a tree, which implements a dynamic programming algorithm + that exhausts the possible `num_participation` appearance of a sample in + `steps` leaf nodes. + + See Appendix D of + "Practical and Private (Deep) Learning without Sampling or Shuffling" + https://arxiv.org/abs/2103.00039. + + Args: + noise_multiplier: A non-negative float representing the ratio of the + standard deviation of the Gaussian noise to the l2-sensitivity of the + function to which it is added. + total_steps: Total number of steps (leaf nodes in tree aggregation). + max_participation: The maximum number of times a sample can appear. + min_separation: The minimum number of nodes between two appearance of a + sample. If a sample appears in consecutive x, y steps in a streaming + setting, then `min_separation=y-x-1`. + orders: An array (or a scalar) of RDP orders. + + Returns: + The RDPs at all orders. Can be `np.inf`. + """ + _check_nonnegative(noise_multiplier, "noise_multiplier") + if noise_multiplier == 0: + return np.inf + _check_nonnegative(total_steps, "total_steps") + _check_nonnegative(max_participation, "max_participation") + _check_nonnegative(min_separation, "min_separation") + sum_sensitivity_square = _max_tree_sensitivity_square_sum( + max_participation, min_separation, total_steps) + if np.isscalar(orders): + rdp = _compute_gaussian_rdp(noise_multiplier, sum_sensitivity_square, + orders) + else: + rdp = np.array([ + _compute_gaussian_rdp(noise_multiplier, sum_sensitivity_square, alpha) + for alpha in orders + ]) + return rdp diff --git a/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant_test.py b/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant_test.py new file mode 100644 index 0000000..17f6437 --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant_test.py @@ -0,0 +1,185 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for rdp_accountant.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import tensorflow as tf + +from tensorflow_privacy.privacy.analysis import rdp_accountant +from tensorflow_privacy.privacy.analysis import tree_aggregation_accountant + + +class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters(('eps20', 1.13, 19.74), ('eps2', 8.83, 2.04)) + def test_compute_eps_tree(self, noise_multiplier, eps): + orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) + # This tests is based on the StackOverflow setting in "Practical and + # Private (Deep) Learning without Sampling or Shuffling". The calculated + # epsilon could be better as the method in this package keeps improving. + steps_list, target_delta = 1600, 1e-6 + rdp = tree_aggregation_accountant.compute_rdp_tree_restart( + noise_multiplier, steps_list, orders) + new_eps = rdp_accountant.get_privacy_spent( + orders, rdp, target_delta=target_delta)[0] + self.assertLess(new_eps, eps) + + @parameterized.named_parameters( + ('restart4', [400] * 4), + ('restart2', [800] * 2), + ('adaptive', [10, 400, 400, 400, 390]), + ) + def test_compose_tree_rdp(self, steps_list): + noise_multiplier, orders = 0.1, 1 + rdp_list = [ + tree_aggregation_accountant.compute_rdp_tree_restart( + noise_multiplier, steps, orders) for steps in steps_list + ] + rdp_composed = tree_aggregation_accountant.compute_rdp_tree_restart( + noise_multiplier, steps_list, orders) + self.assertAllClose(rdp_composed, sum(rdp_list), rtol=1e-12) + + @parameterized.named_parameters( + ('restart4', [400] * 4), + ('restart2', [800] * 2), + ('adaptive', [10, 400, 400, 400, 390]), + ) + def test_compute_eps_tree_decreasing(self, steps_list): + # Test privacy epsilon decreases with noise multiplier increasing when + # keeping other parameters the same. + orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) + target_delta = 1e-6 + prev_eps = tree_aggregation_accountant.compute_rdp_tree_restart( + 0, steps_list, orders) + for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]: + rdp = tree_aggregation_accountant.compute_rdp_tree_restart( + noise_multiplier, steps_list, orders) + eps = rdp_accountant.get_privacy_spent( + orders, rdp, target_delta=target_delta)[0] + self.assertLess(eps, prev_eps) + + @parameterized.named_parameters( + ('negative_noise', -1, 3, 1), + ('empty_steps', 1, [], 1), + ('negative_steps', 1, -3, 1), + ) + def test_compute_rdp_tree_restart_raise(self, noise_multiplier, steps_list, + orders): + with self.assertRaisesRegex(ValueError, 'must be'): + tree_aggregation_accountant.compute_rdp_tree_restart( + noise_multiplier, steps_list, orders) + + @parameterized.named_parameters( + ('t100n0.1', 100, 0.1), + ('t1000n0.01', 1000, 0.01), + ) + def test_no_tree_no_sampling(self, total_steps, noise_multiplier): + orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) + tree_rdp = tree_aggregation_accountant.compute_rdp_tree_restart( + noise_multiplier, [1] * total_steps, orders) + rdp = rdp_accountant.compute_rdp(1., noise_multiplier, total_steps, orders) + self.assertAllClose(tree_rdp, rdp, rtol=1e-12) + + @parameterized.named_parameters( + ('negative_noise', -1, 3, 1, 1), + ('negative_steps', 0.1, -3, 1, 1), + ('negative_part', 0.1, 3, -1, 1), + ('negative_sep', 0.1, 3, 1, -1), + ) + def test_compute_rdp_single_tree_raise(self, noise_multiplier, total_steps, + max_participation, min_separation): + orders = 1 + with self.assertRaisesRegex(ValueError, 'must be'): + tree_aggregation_accountant.compute_rdp_single_tree( + noise_multiplier, total_steps, max_participation, min_separation, + orders) + + @parameterized.named_parameters( + ('3', 3), + ('8', 8), + ('11', 11), + ('19', 19), + ) + def test_max_tree_sensitivity_square_sum_every_step(self, steps): + max_participation, min_separation = steps, 0 + # If a sample will appear in every leaf node, we can infer the total + # sensitivity by adding all the nodes. + steps_bin = bin(steps)[2:] + depth = [ + len(steps_bin) - 1 - i for i, v in enumerate(steps_bin) if v == '1' + ] + expected = sum([2**d * (2**(d + 1) - 1) for d in depth]) + self.assertEqual( + expected, + tree_aggregation_accountant._max_tree_sensitivity_square_sum( + max_participation, min_separation, steps)) + + @parameterized.named_parameters( + ('11', 11), + ('19', 19), + ('200', 200), + ) + def test_max_tree_sensitivity_square_sum_every_step_part(self, max_part): + steps, min_separation = 8, 0 + assert max_part > steps + # If a sample will appear in every leaf node, we can infer the total + # sensitivity by adding all the nodes. + expected = 120 + self.assertEqual( + expected, + tree_aggregation_accountant._max_tree_sensitivity_square_sum( + max_part, min_separation, steps)) + + @parameterized.named_parameters( + ('3', 3), + ('8', 8), + ('11', 11), + ('19', 19), + ) + def test_max_tree_sensitivity_square_sum_every_step_part2(self, steps): + max_participation, min_separation = 2, 0 + # If a sample will appear twice, the worst case is to put the two nodes at + # consecutive nodes of the deepest subtree. + steps_bin = bin(steps)[2:] + depth = len(steps_bin) - 1 + expected = 2 + 4 * depth + self.assertEqual( + expected, + tree_aggregation_accountant._max_tree_sensitivity_square_sum( + max_participation, min_separation, steps)) + + @parameterized.named_parameters( + ('test1', 1, 7, 8, 4), + ('test2', 3, 3, 9, 11), + ('test3', 3, 2, 7, 9), + # This is an example showing worst-case sensitivity is larger than greedy + # in "Practical and Private (Deep) Learning without Sampling or Shuffling" + # https://arxiv.org/abs/2103.00039. + ('test4', 8, 2, 24, 88), + ) + def test_max_tree_sensitivity_square_sum_toy(self, max_participation, + min_separation, steps, expected): + self.assertEqual( + expected, + tree_aggregation_accountant._max_tree_sensitivity_square_sum( + max_participation, min_separation, steps)) + + +if __name__ == '__main__': + tf.test.main() From 38eface1fd9bf59ec15ec9c52d02b38055d66393 Mon Sep 17 00:00:00 2001 From: Zheng Xu Date: Tue, 14 Dec 2021 10:32:37 -0800 Subject: [PATCH 71/71] zCDP for tree aggregation. PiperOrigin-RevId: 416338656 --- tensorflow_privacy/__init__.py | 1 + .../analysis/tree_aggregation_accountant.py | 51 +++++++++++++++++++ .../tree_aggregation_accountant_test.py | 10 ++++ 3 files changed, 62 insertions(+) diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index fcf607e..5db9cc3 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -48,6 +48,7 @@ else: from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent from tensorflow_privacy.privacy.analysis.tree_aggregation_accountant import compute_rdp_tree_restart from tensorflow_privacy.privacy.analysis.tree_aggregation_accountant import compute_rdp_single_tree + from tensorflow_privacy.privacy.analysis.tree_aggregation_accountant import compute_zcdp_single_tree # DPQuery classes from tensorflow_privacy.privacy.dp_query.dp_query import DPQuery diff --git a/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py b/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py index 417c910..796a543 100644 --- a/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py +++ b/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant.py @@ -313,3 +313,54 @@ def compute_rdp_single_tree( for alpha in orders ]) return rdp + + +def _compute_gaussian_zcdp(sigma: float, + sum_sensitivity_square: float) -> float: + """Computes zCDP of Gaussian mechanism.""" + return sum_sensitivity_square / (2 * sigma**2) + + +def compute_zcdp_single_tree( + noise_multiplier: float, total_steps: int, max_participation: int, + min_separation: int) -> Union[float, Collection[float]]: + """Computes zCDP of the Tree Aggregation Protocol for a single tree. + + The accounting assume a single tree is constructed for `total_steps` leaf + nodes, where the same sample will appear at most `max_participation` times, + and there are at least `min_separation` nodes between two appearance. The key + idea is to (recurrently) count the worst-case occurence of a sample + in all the nodes in a tree, which implements a dynamic programming algorithm + that exhausts the possible `num_participation` appearance of a sample in + `steps` leaf nodes. + + See Appendix D of + "Practical and Private (Deep) Learning without Sampling or Shuffling" + https://arxiv.org/abs/2103.00039. + + The Zero-Concentrated Differential Privacy (zCDP) definition is described in + "Concentrated Differential Privacy: Simplifications, Extensions, + and Lower Bounds" https://arxiv.org/abs/1605.02065 + + Args: + noise_multiplier: A non-negative float representing the ratio of the + standard deviation of the Gaussian noise to the l2-sensitivity of the + function to which it is added. + total_steps: Total number of steps (leaf nodes in tree aggregation). + max_participation: The maximum number of times a sample can appear. + min_separation: The minimum number of nodes between two appearance of a + sample. If a sample appears in consecutive x, y steps in a streaming + setting, then `min_separation=y-x-1`. + + Returns: + The zCDP. + """ + _check_nonnegative(noise_multiplier, "noise_multiplier") + if noise_multiplier == 0: + return np.inf + _check_nonnegative(total_steps, "total_steps") + _check_nonnegative(max_participation, "max_participation") + _check_nonnegative(min_separation, "min_separation") + sum_sensitivity_square = _max_tree_sensitivity_square_sum( + max_participation, min_separation, total_steps) + return _compute_gaussian_zcdp(noise_multiplier, sum_sensitivity_square) diff --git a/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant_test.py b/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant_test.py index 17f6437..68291b4 100644 --- a/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant_test.py +++ b/tensorflow_privacy/privacy/analysis/tree_aggregation_accountant_test.py @@ -180,6 +180,16 @@ class TreeAggregationTest(tf.test.TestCase, parameterized.TestCase): tree_aggregation_accountant._max_tree_sensitivity_square_sum( max_participation, min_separation, steps)) + def test_compute_gaussian_zcdp(self): + for sigma in tf.random.uniform([5], minval=0.01, maxval=100).numpy(): + for sum_sensitivity_square in tf.random.uniform([5], + minval=0.01, + maxval=1000).numpy(): + self.assertEqual( + tree_aggregation_accountant._compute_gaussian_rdp( + sigma, sum_sensitivity_square, alpha=1), + tree_aggregation_accountant._compute_gaussian_zcdp( + sigma, sum_sensitivity_square)) if __name__ == '__main__': tf.test.main()

Models trained with DP-SGD have provable differential privacy (DP) - guarantees, mitigating the risk of exposing sensitive training data. Intuitively, a model - trained with differential privacy should not be affected by any single training example in - its data set. DP-SGD techniques can also be used in federated learning to provide user-level - differential privacy. You can learn more about differentially private deep learning in the original paper. + An important aspect of responsible AI usage is ensuring that ML models are prevented from + exposing potentially sensitive information, such as demographic information or other + attributes in the training dataset that could be used to identify people. + One way to achieve this is by using differentially private stochastic gradient descent + (DP-SGD), which is a modification to the standard stochastic gradient descent (SGD) + algorithm in machine learning. +