From 36b8ea34ef9915e3db4c5f3e345fe20f16ca24eb Mon Sep 17 00:00:00 2001 From: Michael Reneer Date: Fri, 28 Jan 2022 15:01:09 -0800 Subject: [PATCH] Internal PiperOrigin-RevId: 424965569 --- tensorflow_privacy/__init__.py | 10 +--- .../privacy/dp_query/tree_range_query.py | 4 +- .../multinomial_logistic.py | 14 ++++-- .../single_layer_softmax.py | 7 +-- .../privacy/optimizers/dp_optimizer_keras.py | 28 +++++------ .../data_structures.py | 48 +++++++++---------- .../dataset_slicing.py | 7 +-- .../membership_inference_attack/models.py | 11 +++-- .../seq2seq_mia.py | 28 +++++------ 9 files changed, 78 insertions(+), 79 deletions(-) diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index 9e8aa63..813c078 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -85,12 +85,4 @@ else: from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import VectorizedDPKerasSGDOptimizer from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import make_vectorized_keras_optimizer_class - try: - from tensorflow_privacy.privacy.bolt_on.models import BoltOnModel - from tensorflow_privacy.privacy.bolt_on.optimizers import BoltOn - from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexMixin - from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexBinaryCrossentropy - from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexHuber - except ImportError: - # module `bolt_on` not yet available in this version of TF Privacy - pass + # module `bolt_on` not yet available in this version of TF Privacy diff --git a/tensorflow_privacy/privacy/dp_query/tree_range_query.py b/tensorflow_privacy/privacy/dp_query/tree_range_query.py index 471915b..4d31e98 100644 --- a/tensorflow_privacy/privacy/dp_query/tree_range_query.py +++ b/tensorflow_privacy/privacy/dp_query/tree_range_query.py @@ -15,8 +15,10 @@ 'Offline' means all the leaf nodes are ready before the protocol starts. """ + import distutils import math +from typing import Optional import attr import tensorflow as tf @@ -262,7 +264,7 @@ class TreeRangeSumQuery(dp_query.SumAggregationDPQuery): return cls(arity=arity, inner_query=inner_query) -def _get_add_noise(stddev, seed: int = None): +def _get_add_noise(stddev, seed: Optional[int] = None): """Utility function to decide which `add_noise` to use according to tf version.""" if distutils.version.LooseVersion( tf.__version__) < distutils.version.LooseVersion('2.0.0'): diff --git a/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic.py b/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic.py index 8a2200e..3cc056a 100644 --- a/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic.py +++ b/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic.py @@ -27,12 +27,14 @@ the algorithm of Abadi et al.: https://arxiv.org/pdf/1607.00133.pdf%20. import math from typing import List, Optional, Tuple + import numpy as np import tensorflow as tf -from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy import compute_dp_sgd_privacy as compute_epsilon +from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy_lib from tensorflow_privacy.privacy.logistic_regression import datasets from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras + from differential_privacy.python.accounting import common @@ -170,9 +172,13 @@ def compute_dpsgd_noise_multiplier(num_train: int, """ search_parameters = common.BinarySearchParameters( lower_bound=0, upper_bound=math.inf, initial_guess=1, tolerance=tolerance) - return common.inverse_monotone_function( - lambda x: compute_epsilon(num_train, batch_size, x, epochs, delta)[0], - epsilon, search_parameters) + + def _func(x): + result = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy( + num_train, batch_size, x, epochs, delta) + return result[0] + + return common.inverse_monotone_function(_func, epsilon, search_parameters) def logistic_dpsgd(train_dataset: datasets.RegressionDataset, diff --git a/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax.py b/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax.py index 80a3596..ab77d13 100644 --- a/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax.py +++ b/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax.py @@ -13,7 +13,8 @@ # limitations under the License. """Implementation of a single-layer softmax classifier.""" -from typing import List +from typing import List, Optional, Union + import tensorflow as tf from tensorflow_privacy.privacy.logistic_regression import datasets @@ -24,9 +25,9 @@ def single_layer_softmax_classifier( epochs: int, num_classes: int, optimizer: tf.keras.optimizers.Optimizer, - loss: tf.keras.losses.Loss = 'categorical_crossentropy', + loss: Union[tf.keras.losses.Loss, str] = 'categorical_crossentropy', batch_size: int = 32, - kernel_regularizer: tf.keras.regularizers.Regularizer = None + kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None ) -> List[float]: """Trains a single layer neural network classifier with softmax activation. diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py index 35990f6..6ecbb31 100644 --- a/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_keras.py @@ -138,12 +138,12 @@ def make_keras_optimizer_class(cls): l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients). noise_multiplier: Ratio of the standard deviation to the clipping norm. num_microbatches: Number of microbatches into which each minibatch is - split. Default is `None` which means that number of microbatches is - equal to batch size (i.e. each microbatch contains exactly one + split. Default is `None` which means that number of microbatches + is equal to batch size (i.e. each microbatch contains exactly one example). If `gradient_accumulation_steps` is greater than 1 and `num_microbatches` is not `None` then the effective number of - microbatches is equal to `num_microbatches * - gradient_accumulation_steps`. + microbatches is equal to + `num_microbatches * gradient_accumulation_steps`. gradient_accumulation_steps: If greater than 1 then optimizer will be accumulating gradients for this number of optimizer steps before applying them to update model weights. If this argument is set to 1 @@ -162,13 +162,13 @@ def make_keras_optimizer_class(cls): self._was_dp_gradients_called = False def _create_slots(self, var_list): - super()._create_slots(var_list) + super()._create_slots(var_list) # pytype: disable=attribute-error if self.gradient_accumulation_steps > 1: for var in var_list: self.add_slot(var, 'grad_acc') def _prepare_local(self, var_device, var_dtype, apply_state): - super()._prepare_local(var_device, var_dtype, apply_state) + super()._prepare_local(var_device, var_dtype, apply_state) # pytype: disable=attribute-error if self.gradient_accumulation_steps > 1: apply_update = tf.math.equal( tf.math.floormod(self.iterations + 1, @@ -188,7 +188,7 @@ def make_keras_optimizer_class(cls): def _update_grad(): apply_grad_op = super(DPOptimizerClass, self)._resource_apply_dense( - grad_acc + grad * coefficients['grad_scaler'], var, apply_state) + grad_acc + grad * coefficients['grad_scaler'], var, apply_state) # pytype: disable=attribute-error with tf.control_dependencies([apply_grad_op]): return grad_acc.assign( tf.zeros_like(grad_acc), @@ -203,25 +203,21 @@ def make_keras_optimizer_class(cls): return tf.cond(coefficients['apply_update'], _update_grad, _accumulate) else: - return super(DPOptimizerClass, - self)._resource_apply_dense(grad, var, apply_state) + return super()._resource_apply_dense(grad, var, apply_state) # pytype: disable=attribute-error def _resource_apply_sparse_duplicate_indices(self, *args, **kwargs): if self.gradient_accumulation_steps > 1: raise NotImplementedError( 'Sparse gradients are not supported with large batch emulation.') else: - return super(DPOptimizerClass, - self)._resource_apply_sparse_duplicate_indices( - *args, **kwargs) + return super()._resource_apply_sparse_duplicate_indices(*args, **kwargs) # pytype: disable=attribute-error def _resource_apply_sparse(self, *args, **kwargs): if self.gradient_accumulation_steps > 1: raise NotImplementedError( 'Sparse gradients are not supported with large batch emulation.') else: - return super(DPOptimizerClass, - self)._resource_apply_sparse(*args, **kwargs) + return super()._resource_apply_sparse(*args, **kwargs) # pytype: disable=attribute-error def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None): """DP-SGD version of base class method.""" @@ -338,7 +334,7 @@ def make_keras_optimizer_class(cls): Returns: Python dictionary. """ - config = super(DPOptimizerClass, self).get_config() + config = super().get_config() config.update({ 'l2_norm_clip': self._l2_norm_clip, 'noise_multiplier': self._noise_multiplier, @@ -354,7 +350,7 @@ def make_keras_optimizer_class(cls): 'training is not differentially private. It may be the case that ' 'you need to upgrade to TF 2.4 or higher to use this particular ' 'optimizer.') - return super(DPOptimizerClass, self).apply_gradients(*args, **kwargs) + return super().apply_gradients(*args, **kwargs) return DPOptimizerClass diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py index 56faa99..b0addd9 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py @@ -19,7 +19,7 @@ import enum import glob import os import pickle -from typing import Any, Iterable, Union +from typing import Any, Iterable, MutableSequence, Optional, Union import numpy as np import pandas as pd @@ -47,8 +47,8 @@ class SingleSliceSpec: When feature is None, it means that the slice is the entire dataset. """ - feature: SlicingFeature = None - value: Any = None + feature: Optional[SlicingFeature] = None + value: Optional[Any] = None @property def entire_dataset(self): @@ -172,29 +172,29 @@ class AttackInputData: This includes only the data, and not configuration. """ - logits_train: np.ndarray = None - logits_test: np.ndarray = None + logits_train: Optional[np.ndarray] = None + logits_test: Optional[np.ndarray] = None # Predicted probabilities for each class. They can be derived from logits, # so they can be set only if logits are not explicitly provided. - probs_train: np.ndarray = None - probs_test: np.ndarray = None + probs_train: Optional[np.ndarray] = None + probs_test: Optional[np.ndarray] = None # Contains ground-truth classes. Classes are assumed to be integers starting # from 0. - labels_train: np.ndarray = None - labels_test: np.ndarray = None + labels_train: Optional[np.ndarray] = None + labels_test: Optional[np.ndarray] = None # Explicitly specified loss. If provided, this is used instead of deriving # loss from logits and labels - loss_train: np.ndarray = None - loss_test: np.ndarray = None + loss_train: Optional[np.ndarray] = None + loss_test: Optional[np.ndarray] = None # Explicitly specified prediction entropy. If provided, this is used instead # of deriving entropy from logits and labels # (https://arxiv.org/pdf/2003.10595.pdf by Song and Mittal). - entropy_train: np.ndarray = None - entropy_test: np.ndarray = None + entropy_train: Optional[np.ndarray] = None + entropy_test: Optional[np.ndarray] = None @property def num_classes(self): @@ -387,7 +387,7 @@ class AttackInputData: return '\n'.join(result) -def _append_array_shape(arr: np.array, arr_name: str, result): +def _append_array_shape(arr: Optional[np.ndarray], arr_name: str, result): if arr is not None: result.append(' %s with shape: %s,' % (arr_name, arr.shape)) @@ -465,11 +465,11 @@ class SingleAttackResult: # Membership scores for the training set samples. For a perfect attacker, # all training samples will have higher scores than test samples. - membership_scores_train: np.ndarray = None + membership_scores_train: Optional[np.ndarray] = None # Membership scores for the test set samples. For a perfect attacker, all # test set samples will have lower scores than the training set samples. - membership_scores_test: np.ndarray = None + membership_scores_test: Optional[np.ndarray] = None def get_attacker_advantage(self): return self.roc_curve.get_attacker_advantage() @@ -601,14 +601,14 @@ class PrivacyReportMetadata: Used to create a privacy report based on AttackResults. """ - accuracy_train: float = None - accuracy_test: float = None + accuracy_train: Optional[float] = None + accuracy_test: Optional[float] = None - loss_train: float = None - loss_test: float = None + loss_train: Optional[float] = None + loss_test: Optional[float] = None model_variant_label: str = 'Default model variant' - epoch_num: int = None + epoch_num: Optional[int] = None class AttackResultsDFColumns(enum.Enum): @@ -627,9 +627,9 @@ class AttackResultsDFColumns(enum.Enum): @dataclasses.dataclass class AttackResults: """Results from running multiple attacks.""" - single_attack_results: Iterable[SingleAttackResult] + single_attack_results: MutableSequence[SingleAttackResult] - privacy_report_metadata: PrivacyReportMetadata = None + privacy_report_metadata: Optional[PrivacyReportMetadata] = None def calculate_pd_dataframe(self): """Returns all metrics as a Pandas DataFrame.""" @@ -760,7 +760,7 @@ class AttackResults: @dataclasses.dataclass class AttackResultsCollection: """A collection of AttackResults.""" - attack_results_list: Iterable[AttackResults] + attack_results_list: MutableSequence[AttackResults] def append(self, attack_results: AttackResults): self.attack_results_list.append(attack_results) diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/dataset_slicing.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/dataset_slicing.py index b9eb40a..4129f7b 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/dataset_slicing.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/dataset_slicing.py @@ -15,7 +15,7 @@ import collections import copy -from typing import List +from typing import List, Optional import numpy as np @@ -90,8 +90,9 @@ def _slice_by_classification_correctness(data: AttackInputData, return _slice_data_by_indices(data, idx_train, idx_test) -def get_single_slice_specs(slicing_spec: SlicingSpec, - num_classes: int = None) -> List[SingleSliceSpec]: +def get_single_slice_specs( + slicing_spec: SlicingSpec, + num_classes: Optional[int] = None) -> List[SingleSliceSpec]: """Returns slices of data according to slicing_spec.""" result = [] diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py index 9d162c5..1209c8d 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py @@ -14,6 +14,7 @@ """Trained models for membership inference attacks.""" import dataclasses +from typing import Optional import numpy as np from sklearn import ensemble @@ -33,15 +34,15 @@ class AttackerData: This includes only the data, and not configuration. """ - features_train: np.ndarray = None + features_train: Optional[np.ndarray] = None # element-wise boolean array denoting if the example was part of training. - is_training_labels_train: np.ndarray = None + is_training_labels_train: Optional[np.ndarray] = None - features_test: np.ndarray = None + features_test: Optional[np.ndarray] = None # element-wise boolean array denoting if the example was part of training. - is_training_labels_test: np.ndarray = None + is_training_labels_test: Optional[np.ndarray] = None - data_size: DataSize = None + data_size: Optional[DataSize] = None def create_attacker_data(attack_input_data: AttackInputData, diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/seq2seq_mia.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/seq2seq_mia.py index 1eef53c..d848cdd 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/seq2seq_mia.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/seq2seq_mia.py @@ -19,14 +19,13 @@ and the logistic regression membership inference attack. """ import dataclasses -from typing import Iterator, List +from typing import Iterator, List, Optional, Tuple import numpy as np from scipy import stats from sklearn import metrics from sklearn import model_selection import tensorflow as tf - from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import models from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType @@ -51,15 +50,15 @@ class Seq2SeqAttackInputData: This includes only the data, and not configuration. """ - logits_train: Iterator[np.ndarray] = None - logits_test: Iterator[np.ndarray] = None + logits_train: Optional[Iterator[np.ndarray]] = None + logits_test: Optional[Iterator[np.ndarray]] = None # Contains ground-truth token indices for the target sequences. - labels_train: Iterator[np.ndarray] = None - labels_test: Iterator[np.ndarray] = None + labels_train: Optional[Iterator[np.ndarray]] = None + labels_test: Optional[Iterator[np.ndarray]] = None # Size of the target sequence vocabulary. - vocab_size: int = None + vocab_size: Optional[int] = None # Train, test size = number of batches in training, test set. # These values need to be supplied by the user as logits, labels @@ -126,7 +125,7 @@ class Seq2SeqAttackInputData: def _get_attack_features_and_metadata( logits: Iterator[np.ndarray], - labels: Iterator[np.ndarray]) -> (np.ndarray, float, float): + labels: Iterator[np.ndarray]) -> Tuple[np.ndarray, float, float]: """Returns the average rank of tokens per batch of sequences and the loss. Args: @@ -212,7 +211,7 @@ def _get_ranks_for_sequence(logits: np.ndarray, def _get_batch_loss_metrics(batch_logits: np.ndarray, - batch_labels: np.ndarray) -> (float, int): + batch_labels: np.ndarray) -> Tuple[float, int]: """Returns the loss, number of sequences for a batch. Args: @@ -234,12 +233,13 @@ def _get_batch_loss_metrics(batch_logits: np.ndarray, batch_loss += tf.reduce_sum(sequence_loss) if not tf.executing_eagerly(): - batch_loss = batch_loss.eval(session=tf.compat.v1.Session()) + session = tf.compat.v1.Session() + batch_loss = batch_loss.eval(session) # pytype: disable=attribute-error return batch_loss / batch_length, batch_length -def _get_batch_accuracy_metrics(batch_logits: np.ndarray, - batch_labels: np.ndarray) -> (float, float): +def _get_batch_accuracy_metrics( + batch_logits: np.ndarray, batch_labels: np.ndarray) -> Tuple[float, float]: """Returns the number of correct predictions, total number of predictions for a batch. Args: @@ -261,8 +261,8 @@ def _get_batch_accuracy_metrics(batch_logits: np.ndarray, batch_total_preds += len(sequence_labels) if not tf.executing_eagerly(): - batch_correct_preds = batch_correct_preds.eval( - session=tf.compat.v1.Session()) + session = tf.compat.v1.Session() + batch_correct_preds = batch_correct_preds.eval(session) # pytype: disable=attribute-error return batch_correct_preds, batch_total_preds