PiperOrigin-RevId: 424965569
This commit is contained in:
Michael Reneer 2022-01-28 15:01:09 -08:00 committed by A. Unique TensorFlower
parent 25571aaf87
commit 36b8ea34ef
9 changed files with 78 additions and 79 deletions

View file

@ -85,12 +85,4 @@ else:
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import VectorizedDPKerasSGDOptimizer from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import VectorizedDPKerasSGDOptimizer
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import make_vectorized_keras_optimizer_class from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import make_vectorized_keras_optimizer_class
try: # module `bolt_on` not yet available in this version of TF Privacy
from tensorflow_privacy.privacy.bolt_on.models import BoltOnModel
from tensorflow_privacy.privacy.bolt_on.optimizers import BoltOn
from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexMixin
from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexBinaryCrossentropy
from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexHuber
except ImportError:
# module `bolt_on` not yet available in this version of TF Privacy
pass

View file

@ -15,8 +15,10 @@
'Offline' means all the leaf nodes are ready before the protocol starts. 'Offline' means all the leaf nodes are ready before the protocol starts.
""" """
import distutils import distutils
import math import math
from typing import Optional
import attr import attr
import tensorflow as tf import tensorflow as tf
@ -262,7 +264,7 @@ class TreeRangeSumQuery(dp_query.SumAggregationDPQuery):
return cls(arity=arity, inner_query=inner_query) return cls(arity=arity, inner_query=inner_query)
def _get_add_noise(stddev, seed: int = None): def _get_add_noise(stddev, seed: Optional[int] = None):
"""Utility function to decide which `add_noise` to use according to tf version.""" """Utility function to decide which `add_noise` to use according to tf version."""
if distutils.version.LooseVersion( if distutils.version.LooseVersion(
tf.__version__) < distutils.version.LooseVersion('2.0.0'): tf.__version__) < distutils.version.LooseVersion('2.0.0'):

View file

@ -27,12 +27,14 @@ the algorithm of Abadi et al.: https://arxiv.org/pdf/1607.00133.pdf%20.
import math import math
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy import compute_dp_sgd_privacy as compute_epsilon from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy_lib
from tensorflow_privacy.privacy.logistic_regression import datasets from tensorflow_privacy.privacy.logistic_regression import datasets
from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax
from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras
from differential_privacy.python.accounting import common from differential_privacy.python.accounting import common
@ -170,9 +172,13 @@ def compute_dpsgd_noise_multiplier(num_train: int,
""" """
search_parameters = common.BinarySearchParameters( search_parameters = common.BinarySearchParameters(
lower_bound=0, upper_bound=math.inf, initial_guess=1, tolerance=tolerance) lower_bound=0, upper_bound=math.inf, initial_guess=1, tolerance=tolerance)
return common.inverse_monotone_function(
lambda x: compute_epsilon(num_train, batch_size, x, epochs, delta)[0], def _func(x):
epsilon, search_parameters) result = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy(
num_train, batch_size, x, epochs, delta)
return result[0]
return common.inverse_monotone_function(_func, epsilon, search_parameters)
def logistic_dpsgd(train_dataset: datasets.RegressionDataset, def logistic_dpsgd(train_dataset: datasets.RegressionDataset,

View file

@ -13,7 +13,8 @@
# limitations under the License. # limitations under the License.
"""Implementation of a single-layer softmax classifier.""" """Implementation of a single-layer softmax classifier."""
from typing import List from typing import List, Optional, Union
import tensorflow as tf import tensorflow as tf
from tensorflow_privacy.privacy.logistic_regression import datasets from tensorflow_privacy.privacy.logistic_regression import datasets
@ -24,9 +25,9 @@ def single_layer_softmax_classifier(
epochs: int, epochs: int,
num_classes: int, num_classes: int,
optimizer: tf.keras.optimizers.Optimizer, optimizer: tf.keras.optimizers.Optimizer,
loss: tf.keras.losses.Loss = 'categorical_crossentropy', loss: Union[tf.keras.losses.Loss, str] = 'categorical_crossentropy',
batch_size: int = 32, batch_size: int = 32,
kernel_regularizer: tf.keras.regularizers.Regularizer = None kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
) -> List[float]: ) -> List[float]:
"""Trains a single layer neural network classifier with softmax activation. """Trains a single layer neural network classifier with softmax activation.

View file

@ -138,12 +138,12 @@ def make_keras_optimizer_class(cls):
l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients). l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients).
noise_multiplier: Ratio of the standard deviation to the clipping norm. noise_multiplier: Ratio of the standard deviation to the clipping norm.
num_microbatches: Number of microbatches into which each minibatch is num_microbatches: Number of microbatches into which each minibatch is
split. Default is `None` which means that number of microbatches is split. Default is `None` which means that number of microbatches
equal to batch size (i.e. each microbatch contains exactly one is equal to batch size (i.e. each microbatch contains exactly one
example). If `gradient_accumulation_steps` is greater than 1 and example). If `gradient_accumulation_steps` is greater than 1 and
`num_microbatches` is not `None` then the effective number of `num_microbatches` is not `None` then the effective number of
microbatches is equal to `num_microbatches * microbatches is equal to
gradient_accumulation_steps`. `num_microbatches * gradient_accumulation_steps`.
gradient_accumulation_steps: If greater than 1 then optimizer will be gradient_accumulation_steps: If greater than 1 then optimizer will be
accumulating gradients for this number of optimizer steps before accumulating gradients for this number of optimizer steps before
applying them to update model weights. If this argument is set to 1 applying them to update model weights. If this argument is set to 1
@ -162,13 +162,13 @@ def make_keras_optimizer_class(cls):
self._was_dp_gradients_called = False self._was_dp_gradients_called = False
def _create_slots(self, var_list): def _create_slots(self, var_list):
super()._create_slots(var_list) super()._create_slots(var_list) # pytype: disable=attribute-error
if self.gradient_accumulation_steps > 1: if self.gradient_accumulation_steps > 1:
for var in var_list: for var in var_list:
self.add_slot(var, 'grad_acc') self.add_slot(var, 'grad_acc')
def _prepare_local(self, var_device, var_dtype, apply_state): def _prepare_local(self, var_device, var_dtype, apply_state):
super()._prepare_local(var_device, var_dtype, apply_state) super()._prepare_local(var_device, var_dtype, apply_state) # pytype: disable=attribute-error
if self.gradient_accumulation_steps > 1: if self.gradient_accumulation_steps > 1:
apply_update = tf.math.equal( apply_update = tf.math.equal(
tf.math.floormod(self.iterations + 1, tf.math.floormod(self.iterations + 1,
@ -188,7 +188,7 @@ def make_keras_optimizer_class(cls):
def _update_grad(): def _update_grad():
apply_grad_op = super(DPOptimizerClass, self)._resource_apply_dense( apply_grad_op = super(DPOptimizerClass, self)._resource_apply_dense(
grad_acc + grad * coefficients['grad_scaler'], var, apply_state) grad_acc + grad * coefficients['grad_scaler'], var, apply_state) # pytype: disable=attribute-error
with tf.control_dependencies([apply_grad_op]): with tf.control_dependencies([apply_grad_op]):
return grad_acc.assign( return grad_acc.assign(
tf.zeros_like(grad_acc), tf.zeros_like(grad_acc),
@ -203,25 +203,21 @@ def make_keras_optimizer_class(cls):
return tf.cond(coefficients['apply_update'], _update_grad, _accumulate) return tf.cond(coefficients['apply_update'], _update_grad, _accumulate)
else: else:
return super(DPOptimizerClass, return super()._resource_apply_dense(grad, var, apply_state) # pytype: disable=attribute-error
self)._resource_apply_dense(grad, var, apply_state)
def _resource_apply_sparse_duplicate_indices(self, *args, **kwargs): def _resource_apply_sparse_duplicate_indices(self, *args, **kwargs):
if self.gradient_accumulation_steps > 1: if self.gradient_accumulation_steps > 1:
raise NotImplementedError( raise NotImplementedError(
'Sparse gradients are not supported with large batch emulation.') 'Sparse gradients are not supported with large batch emulation.')
else: else:
return super(DPOptimizerClass, return super()._resource_apply_sparse_duplicate_indices(*args, **kwargs) # pytype: disable=attribute-error
self)._resource_apply_sparse_duplicate_indices(
*args, **kwargs)
def _resource_apply_sparse(self, *args, **kwargs): def _resource_apply_sparse(self, *args, **kwargs):
if self.gradient_accumulation_steps > 1: if self.gradient_accumulation_steps > 1:
raise NotImplementedError( raise NotImplementedError(
'Sparse gradients are not supported with large batch emulation.') 'Sparse gradients are not supported with large batch emulation.')
else: else:
return super(DPOptimizerClass, return super()._resource_apply_sparse(*args, **kwargs) # pytype: disable=attribute-error
self)._resource_apply_sparse(*args, **kwargs)
def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None): def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None):
"""DP-SGD version of base class method.""" """DP-SGD version of base class method."""
@ -338,7 +334,7 @@ def make_keras_optimizer_class(cls):
Returns: Returns:
Python dictionary. Python dictionary.
""" """
config = super(DPOptimizerClass, self).get_config() config = super().get_config()
config.update({ config.update({
'l2_norm_clip': self._l2_norm_clip, 'l2_norm_clip': self._l2_norm_clip,
'noise_multiplier': self._noise_multiplier, 'noise_multiplier': self._noise_multiplier,
@ -354,7 +350,7 @@ def make_keras_optimizer_class(cls):
'training is not differentially private. It may be the case that ' 'training is not differentially private. It may be the case that '
'you need to upgrade to TF 2.4 or higher to use this particular ' 'you need to upgrade to TF 2.4 or higher to use this particular '
'optimizer.') 'optimizer.')
return super(DPOptimizerClass, self).apply_gradients(*args, **kwargs) return super().apply_gradients(*args, **kwargs)
return DPOptimizerClass return DPOptimizerClass

View file

@ -19,7 +19,7 @@ import enum
import glob import glob
import os import os
import pickle import pickle
from typing import Any, Iterable, Union from typing import Any, Iterable, MutableSequence, Optional, Union
import numpy as np import numpy as np
import pandas as pd import pandas as pd
@ -47,8 +47,8 @@ class SingleSliceSpec:
When feature is None, it means that the slice is the entire dataset. When feature is None, it means that the slice is the entire dataset.
""" """
feature: SlicingFeature = None feature: Optional[SlicingFeature] = None
value: Any = None value: Optional[Any] = None
@property @property
def entire_dataset(self): def entire_dataset(self):
@ -172,29 +172,29 @@ class AttackInputData:
This includes only the data, and not configuration. This includes only the data, and not configuration.
""" """
logits_train: np.ndarray = None logits_train: Optional[np.ndarray] = None
logits_test: np.ndarray = None logits_test: Optional[np.ndarray] = None
# Predicted probabilities for each class. They can be derived from logits, # Predicted probabilities for each class. They can be derived from logits,
# so they can be set only if logits are not explicitly provided. # so they can be set only if logits are not explicitly provided.
probs_train: np.ndarray = None probs_train: Optional[np.ndarray] = None
probs_test: np.ndarray = None probs_test: Optional[np.ndarray] = None
# Contains ground-truth classes. Classes are assumed to be integers starting # Contains ground-truth classes. Classes are assumed to be integers starting
# from 0. # from 0.
labels_train: np.ndarray = None labels_train: Optional[np.ndarray] = None
labels_test: np.ndarray = None labels_test: Optional[np.ndarray] = None
# Explicitly specified loss. If provided, this is used instead of deriving # Explicitly specified loss. If provided, this is used instead of deriving
# loss from logits and labels # loss from logits and labels
loss_train: np.ndarray = None loss_train: Optional[np.ndarray] = None
loss_test: np.ndarray = None loss_test: Optional[np.ndarray] = None
# Explicitly specified prediction entropy. If provided, this is used instead # Explicitly specified prediction entropy. If provided, this is used instead
# of deriving entropy from logits and labels # of deriving entropy from logits and labels
# (https://arxiv.org/pdf/2003.10595.pdf by Song and Mittal). # (https://arxiv.org/pdf/2003.10595.pdf by Song and Mittal).
entropy_train: np.ndarray = None entropy_train: Optional[np.ndarray] = None
entropy_test: np.ndarray = None entropy_test: Optional[np.ndarray] = None
@property @property
def num_classes(self): def num_classes(self):
@ -387,7 +387,7 @@ class AttackInputData:
return '\n'.join(result) return '\n'.join(result)
def _append_array_shape(arr: np.array, arr_name: str, result): def _append_array_shape(arr: Optional[np.ndarray], arr_name: str, result):
if arr is not None: if arr is not None:
result.append(' %s with shape: %s,' % (arr_name, arr.shape)) result.append(' %s with shape: %s,' % (arr_name, arr.shape))
@ -465,11 +465,11 @@ class SingleAttackResult:
# Membership scores for the training set samples. For a perfect attacker, # Membership scores for the training set samples. For a perfect attacker,
# all training samples will have higher scores than test samples. # all training samples will have higher scores than test samples.
membership_scores_train: np.ndarray = None membership_scores_train: Optional[np.ndarray] = None
# Membership scores for the test set samples. For a perfect attacker, all # Membership scores for the test set samples. For a perfect attacker, all
# test set samples will have lower scores than the training set samples. # test set samples will have lower scores than the training set samples.
membership_scores_test: np.ndarray = None membership_scores_test: Optional[np.ndarray] = None
def get_attacker_advantage(self): def get_attacker_advantage(self):
return self.roc_curve.get_attacker_advantage() return self.roc_curve.get_attacker_advantage()
@ -601,14 +601,14 @@ class PrivacyReportMetadata:
Used to create a privacy report based on AttackResults. Used to create a privacy report based on AttackResults.
""" """
accuracy_train: float = None accuracy_train: Optional[float] = None
accuracy_test: float = None accuracy_test: Optional[float] = None
loss_train: float = None loss_train: Optional[float] = None
loss_test: float = None loss_test: Optional[float] = None
model_variant_label: str = 'Default model variant' model_variant_label: str = 'Default model variant'
epoch_num: int = None epoch_num: Optional[int] = None
class AttackResultsDFColumns(enum.Enum): class AttackResultsDFColumns(enum.Enum):
@ -627,9 +627,9 @@ class AttackResultsDFColumns(enum.Enum):
@dataclasses.dataclass @dataclasses.dataclass
class AttackResults: class AttackResults:
"""Results from running multiple attacks.""" """Results from running multiple attacks."""
single_attack_results: Iterable[SingleAttackResult] single_attack_results: MutableSequence[SingleAttackResult]
privacy_report_metadata: PrivacyReportMetadata = None privacy_report_metadata: Optional[PrivacyReportMetadata] = None
def calculate_pd_dataframe(self): def calculate_pd_dataframe(self):
"""Returns all metrics as a Pandas DataFrame.""" """Returns all metrics as a Pandas DataFrame."""
@ -760,7 +760,7 @@ class AttackResults:
@dataclasses.dataclass @dataclasses.dataclass
class AttackResultsCollection: class AttackResultsCollection:
"""A collection of AttackResults.""" """A collection of AttackResults."""
attack_results_list: Iterable[AttackResults] attack_results_list: MutableSequence[AttackResults]
def append(self, attack_results: AttackResults): def append(self, attack_results: AttackResults):
self.attack_results_list.append(attack_results) self.attack_results_list.append(attack_results)

View file

@ -15,7 +15,7 @@
import collections import collections
import copy import copy
from typing import List from typing import List, Optional
import numpy as np import numpy as np
@ -90,8 +90,9 @@ def _slice_by_classification_correctness(data: AttackInputData,
return _slice_data_by_indices(data, idx_train, idx_test) return _slice_data_by_indices(data, idx_train, idx_test)
def get_single_slice_specs(slicing_spec: SlicingSpec, def get_single_slice_specs(
num_classes: int = None) -> List[SingleSliceSpec]: slicing_spec: SlicingSpec,
num_classes: Optional[int] = None) -> List[SingleSliceSpec]:
"""Returns slices of data according to slicing_spec.""" """Returns slices of data according to slicing_spec."""
result = [] result = []

View file

@ -14,6 +14,7 @@
"""Trained models for membership inference attacks.""" """Trained models for membership inference attacks."""
import dataclasses import dataclasses
from typing import Optional
import numpy as np import numpy as np
from sklearn import ensemble from sklearn import ensemble
@ -33,15 +34,15 @@ class AttackerData:
This includes only the data, and not configuration. This includes only the data, and not configuration.
""" """
features_train: np.ndarray = None features_train: Optional[np.ndarray] = None
# element-wise boolean array denoting if the example was part of training. # element-wise boolean array denoting if the example was part of training.
is_training_labels_train: np.ndarray = None is_training_labels_train: Optional[np.ndarray] = None
features_test: np.ndarray = None features_test: Optional[np.ndarray] = None
# element-wise boolean array denoting if the example was part of training. # element-wise boolean array denoting if the example was part of training.
is_training_labels_test: np.ndarray = None is_training_labels_test: Optional[np.ndarray] = None
data_size: DataSize = None data_size: Optional[DataSize] = None
def create_attacker_data(attack_input_data: AttackInputData, def create_attacker_data(attack_input_data: AttackInputData,

View file

@ -19,14 +19,13 @@ and the logistic regression membership inference attack.
""" """
import dataclasses import dataclasses
from typing import Iterator, List from typing import Iterator, List, Optional, Tuple
import numpy as np import numpy as np
from scipy import stats from scipy import stats
from sklearn import metrics from sklearn import metrics
from sklearn import model_selection from sklearn import model_selection
import tensorflow as tf import tensorflow as tf
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import models from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import models
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType
@ -51,15 +50,15 @@ class Seq2SeqAttackInputData:
This includes only the data, and not configuration. This includes only the data, and not configuration.
""" """
logits_train: Iterator[np.ndarray] = None logits_train: Optional[Iterator[np.ndarray]] = None
logits_test: Iterator[np.ndarray] = None logits_test: Optional[Iterator[np.ndarray]] = None
# Contains ground-truth token indices for the target sequences. # Contains ground-truth token indices for the target sequences.
labels_train: Iterator[np.ndarray] = None labels_train: Optional[Iterator[np.ndarray]] = None
labels_test: Iterator[np.ndarray] = None labels_test: Optional[Iterator[np.ndarray]] = None
# Size of the target sequence vocabulary. # Size of the target sequence vocabulary.
vocab_size: int = None vocab_size: Optional[int] = None
# Train, test size = number of batches in training, test set. # Train, test size = number of batches in training, test set.
# These values need to be supplied by the user as logits, labels # These values need to be supplied by the user as logits, labels
@ -126,7 +125,7 @@ class Seq2SeqAttackInputData:
def _get_attack_features_and_metadata( def _get_attack_features_and_metadata(
logits: Iterator[np.ndarray], logits: Iterator[np.ndarray],
labels: Iterator[np.ndarray]) -> (np.ndarray, float, float): labels: Iterator[np.ndarray]) -> Tuple[np.ndarray, float, float]:
"""Returns the average rank of tokens per batch of sequences and the loss. """Returns the average rank of tokens per batch of sequences and the loss.
Args: Args:
@ -212,7 +211,7 @@ def _get_ranks_for_sequence(logits: np.ndarray,
def _get_batch_loss_metrics(batch_logits: np.ndarray, def _get_batch_loss_metrics(batch_logits: np.ndarray,
batch_labels: np.ndarray) -> (float, int): batch_labels: np.ndarray) -> Tuple[float, int]:
"""Returns the loss, number of sequences for a batch. """Returns the loss, number of sequences for a batch.
Args: Args:
@ -234,12 +233,13 @@ def _get_batch_loss_metrics(batch_logits: np.ndarray,
batch_loss += tf.reduce_sum(sequence_loss) batch_loss += tf.reduce_sum(sequence_loss)
if not tf.executing_eagerly(): if not tf.executing_eagerly():
batch_loss = batch_loss.eval(session=tf.compat.v1.Session()) session = tf.compat.v1.Session()
batch_loss = batch_loss.eval(session) # pytype: disable=attribute-error
return batch_loss / batch_length, batch_length return batch_loss / batch_length, batch_length
def _get_batch_accuracy_metrics(batch_logits: np.ndarray, def _get_batch_accuracy_metrics(
batch_labels: np.ndarray) -> (float, float): batch_logits: np.ndarray, batch_labels: np.ndarray) -> Tuple[float, float]:
"""Returns the number of correct predictions, total number of predictions for a batch. """Returns the number of correct predictions, total number of predictions for a batch.
Args: Args:
@ -261,8 +261,8 @@ def _get_batch_accuracy_metrics(batch_logits: np.ndarray,
batch_total_preds += len(sequence_labels) batch_total_preds += len(sequence_labels)
if not tf.executing_eagerly(): if not tf.executing_eagerly():
batch_correct_preds = batch_correct_preds.eval( session = tf.compat.v1.Session()
session=tf.compat.v1.Session()) batch_correct_preds = batch_correct_preds.eval(session) # pytype: disable=attribute-error
return batch_correct_preds, batch_total_preds return batch_correct_preds, batch_total_preds