PiperOrigin-RevId: 424965569
This commit is contained in:
Michael Reneer 2022-01-28 15:01:09 -08:00 committed by A. Unique TensorFlower
parent 25571aaf87
commit 36b8ea34ef
9 changed files with 78 additions and 79 deletions

View file

@ -85,12 +85,4 @@ else:
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import VectorizedDPKerasSGDOptimizer
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import make_vectorized_keras_optimizer_class
try:
from tensorflow_privacy.privacy.bolt_on.models import BoltOnModel
from tensorflow_privacy.privacy.bolt_on.optimizers import BoltOn
from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexMixin
from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexBinaryCrossentropy
from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexHuber
except ImportError:
# module `bolt_on` not yet available in this version of TF Privacy
pass
# module `bolt_on` not yet available in this version of TF Privacy

View file

@ -15,8 +15,10 @@
'Offline' means all the leaf nodes are ready before the protocol starts.
"""
import distutils
import math
from typing import Optional
import attr
import tensorflow as tf
@ -262,7 +264,7 @@ class TreeRangeSumQuery(dp_query.SumAggregationDPQuery):
return cls(arity=arity, inner_query=inner_query)
def _get_add_noise(stddev, seed: int = None):
def _get_add_noise(stddev, seed: Optional[int] = None):
"""Utility function to decide which `add_noise` to use according to tf version."""
if distutils.version.LooseVersion(
tf.__version__) < distutils.version.LooseVersion('2.0.0'):

View file

@ -27,12 +27,14 @@ the algorithm of Abadi et al.: https://arxiv.org/pdf/1607.00133.pdf%20.
import math
from typing import List, Optional, Tuple
import numpy as np
import tensorflow as tf
from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy import compute_dp_sgd_privacy as compute_epsilon
from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy_lib
from tensorflow_privacy.privacy.logistic_regression import datasets
from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax
from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras
from differential_privacy.python.accounting import common
@ -170,9 +172,13 @@ def compute_dpsgd_noise_multiplier(num_train: int,
"""
search_parameters = common.BinarySearchParameters(
lower_bound=0, upper_bound=math.inf, initial_guess=1, tolerance=tolerance)
return common.inverse_monotone_function(
lambda x: compute_epsilon(num_train, batch_size, x, epochs, delta)[0],
epsilon, search_parameters)
def _func(x):
result = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy(
num_train, batch_size, x, epochs, delta)
return result[0]
return common.inverse_monotone_function(_func, epsilon, search_parameters)
def logistic_dpsgd(train_dataset: datasets.RegressionDataset,

View file

@ -13,7 +13,8 @@
# limitations under the License.
"""Implementation of a single-layer softmax classifier."""
from typing import List
from typing import List, Optional, Union
import tensorflow as tf
from tensorflow_privacy.privacy.logistic_regression import datasets
@ -24,9 +25,9 @@ def single_layer_softmax_classifier(
epochs: int,
num_classes: int,
optimizer: tf.keras.optimizers.Optimizer,
loss: tf.keras.losses.Loss = 'categorical_crossentropy',
loss: Union[tf.keras.losses.Loss, str] = 'categorical_crossentropy',
batch_size: int = 32,
kernel_regularizer: tf.keras.regularizers.Regularizer = None
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
) -> List[float]:
"""Trains a single layer neural network classifier with softmax activation.

View file

@ -138,12 +138,12 @@ def make_keras_optimizer_class(cls):
l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients).
noise_multiplier: Ratio of the standard deviation to the clipping norm.
num_microbatches: Number of microbatches into which each minibatch is
split. Default is `None` which means that number of microbatches is
equal to batch size (i.e. each microbatch contains exactly one
split. Default is `None` which means that number of microbatches
is equal to batch size (i.e. each microbatch contains exactly one
example). If `gradient_accumulation_steps` is greater than 1 and
`num_microbatches` is not `None` then the effective number of
microbatches is equal to `num_microbatches *
gradient_accumulation_steps`.
microbatches is equal to
`num_microbatches * gradient_accumulation_steps`.
gradient_accumulation_steps: If greater than 1 then optimizer will be
accumulating gradients for this number of optimizer steps before
applying them to update model weights. If this argument is set to 1
@ -162,13 +162,13 @@ def make_keras_optimizer_class(cls):
self._was_dp_gradients_called = False
def _create_slots(self, var_list):
super()._create_slots(var_list)
super()._create_slots(var_list) # pytype: disable=attribute-error
if self.gradient_accumulation_steps > 1:
for var in var_list:
self.add_slot(var, 'grad_acc')
def _prepare_local(self, var_device, var_dtype, apply_state):
super()._prepare_local(var_device, var_dtype, apply_state)
super()._prepare_local(var_device, var_dtype, apply_state) # pytype: disable=attribute-error
if self.gradient_accumulation_steps > 1:
apply_update = tf.math.equal(
tf.math.floormod(self.iterations + 1,
@ -188,7 +188,7 @@ def make_keras_optimizer_class(cls):
def _update_grad():
apply_grad_op = super(DPOptimizerClass, self)._resource_apply_dense(
grad_acc + grad * coefficients['grad_scaler'], var, apply_state)
grad_acc + grad * coefficients['grad_scaler'], var, apply_state) # pytype: disable=attribute-error
with tf.control_dependencies([apply_grad_op]):
return grad_acc.assign(
tf.zeros_like(grad_acc),
@ -203,25 +203,21 @@ def make_keras_optimizer_class(cls):
return tf.cond(coefficients['apply_update'], _update_grad, _accumulate)
else:
return super(DPOptimizerClass,
self)._resource_apply_dense(grad, var, apply_state)
return super()._resource_apply_dense(grad, var, apply_state) # pytype: disable=attribute-error
def _resource_apply_sparse_duplicate_indices(self, *args, **kwargs):
if self.gradient_accumulation_steps > 1:
raise NotImplementedError(
'Sparse gradients are not supported with large batch emulation.')
else:
return super(DPOptimizerClass,
self)._resource_apply_sparse_duplicate_indices(
*args, **kwargs)
return super()._resource_apply_sparse_duplicate_indices(*args, **kwargs) # pytype: disable=attribute-error
def _resource_apply_sparse(self, *args, **kwargs):
if self.gradient_accumulation_steps > 1:
raise NotImplementedError(
'Sparse gradients are not supported with large batch emulation.')
else:
return super(DPOptimizerClass,
self)._resource_apply_sparse(*args, **kwargs)
return super()._resource_apply_sparse(*args, **kwargs) # pytype: disable=attribute-error
def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None):
"""DP-SGD version of base class method."""
@ -338,7 +334,7 @@ def make_keras_optimizer_class(cls):
Returns:
Python dictionary.
"""
config = super(DPOptimizerClass, self).get_config()
config = super().get_config()
config.update({
'l2_norm_clip': self._l2_norm_clip,
'noise_multiplier': self._noise_multiplier,
@ -354,7 +350,7 @@ def make_keras_optimizer_class(cls):
'training is not differentially private. It may be the case that '
'you need to upgrade to TF 2.4 or higher to use this particular '
'optimizer.')
return super(DPOptimizerClass, self).apply_gradients(*args, **kwargs)
return super().apply_gradients(*args, **kwargs)
return DPOptimizerClass

View file

@ -19,7 +19,7 @@ import enum
import glob
import os
import pickle
from typing import Any, Iterable, Union
from typing import Any, Iterable, MutableSequence, Optional, Union
import numpy as np
import pandas as pd
@ -47,8 +47,8 @@ class SingleSliceSpec:
When feature is None, it means that the slice is the entire dataset.
"""
feature: SlicingFeature = None
value: Any = None
feature: Optional[SlicingFeature] = None
value: Optional[Any] = None
@property
def entire_dataset(self):
@ -172,29 +172,29 @@ class AttackInputData:
This includes only the data, and not configuration.
"""
logits_train: np.ndarray = None
logits_test: np.ndarray = None
logits_train: Optional[np.ndarray] = None
logits_test: Optional[np.ndarray] = None
# Predicted probabilities for each class. They can be derived from logits,
# so they can be set only if logits are not explicitly provided.
probs_train: np.ndarray = None
probs_test: np.ndarray = None
probs_train: Optional[np.ndarray] = None
probs_test: Optional[np.ndarray] = None
# Contains ground-truth classes. Classes are assumed to be integers starting
# from 0.
labels_train: np.ndarray = None
labels_test: np.ndarray = None
labels_train: Optional[np.ndarray] = None
labels_test: Optional[np.ndarray] = None
# Explicitly specified loss. If provided, this is used instead of deriving
# loss from logits and labels
loss_train: np.ndarray = None
loss_test: np.ndarray = None
loss_train: Optional[np.ndarray] = None
loss_test: Optional[np.ndarray] = None
# Explicitly specified prediction entropy. If provided, this is used instead
# of deriving entropy from logits and labels
# (https://arxiv.org/pdf/2003.10595.pdf by Song and Mittal).
entropy_train: np.ndarray = None
entropy_test: np.ndarray = None
entropy_train: Optional[np.ndarray] = None
entropy_test: Optional[np.ndarray] = None
@property
def num_classes(self):
@ -387,7 +387,7 @@ class AttackInputData:
return '\n'.join(result)
def _append_array_shape(arr: np.array, arr_name: str, result):
def _append_array_shape(arr: Optional[np.ndarray], arr_name: str, result):
if arr is not None:
result.append(' %s with shape: %s,' % (arr_name, arr.shape))
@ -465,11 +465,11 @@ class SingleAttackResult:
# Membership scores for the training set samples. For a perfect attacker,
# all training samples will have higher scores than test samples.
membership_scores_train: np.ndarray = None
membership_scores_train: Optional[np.ndarray] = None
# Membership scores for the test set samples. For a perfect attacker, all
# test set samples will have lower scores than the training set samples.
membership_scores_test: np.ndarray = None
membership_scores_test: Optional[np.ndarray] = None
def get_attacker_advantage(self):
return self.roc_curve.get_attacker_advantage()
@ -601,14 +601,14 @@ class PrivacyReportMetadata:
Used to create a privacy report based on AttackResults.
"""
accuracy_train: float = None
accuracy_test: float = None
accuracy_train: Optional[float] = None
accuracy_test: Optional[float] = None
loss_train: float = None
loss_test: float = None
loss_train: Optional[float] = None
loss_test: Optional[float] = None
model_variant_label: str = 'Default model variant'
epoch_num: int = None
epoch_num: Optional[int] = None
class AttackResultsDFColumns(enum.Enum):
@ -627,9 +627,9 @@ class AttackResultsDFColumns(enum.Enum):
@dataclasses.dataclass
class AttackResults:
"""Results from running multiple attacks."""
single_attack_results: Iterable[SingleAttackResult]
single_attack_results: MutableSequence[SingleAttackResult]
privacy_report_metadata: PrivacyReportMetadata = None
privacy_report_metadata: Optional[PrivacyReportMetadata] = None
def calculate_pd_dataframe(self):
"""Returns all metrics as a Pandas DataFrame."""
@ -760,7 +760,7 @@ class AttackResults:
@dataclasses.dataclass
class AttackResultsCollection:
"""A collection of AttackResults."""
attack_results_list: Iterable[AttackResults]
attack_results_list: MutableSequence[AttackResults]
def append(self, attack_results: AttackResults):
self.attack_results_list.append(attack_results)

View file

@ -15,7 +15,7 @@
import collections
import copy
from typing import List
from typing import List, Optional
import numpy as np
@ -90,8 +90,9 @@ def _slice_by_classification_correctness(data: AttackInputData,
return _slice_data_by_indices(data, idx_train, idx_test)
def get_single_slice_specs(slicing_spec: SlicingSpec,
num_classes: int = None) -> List[SingleSliceSpec]:
def get_single_slice_specs(
slicing_spec: SlicingSpec,
num_classes: Optional[int] = None) -> List[SingleSliceSpec]:
"""Returns slices of data according to slicing_spec."""
result = []

View file

@ -14,6 +14,7 @@
"""Trained models for membership inference attacks."""
import dataclasses
from typing import Optional
import numpy as np
from sklearn import ensemble
@ -33,15 +34,15 @@ class AttackerData:
This includes only the data, and not configuration.
"""
features_train: np.ndarray = None
features_train: Optional[np.ndarray] = None
# element-wise boolean array denoting if the example was part of training.
is_training_labels_train: np.ndarray = None
is_training_labels_train: Optional[np.ndarray] = None
features_test: np.ndarray = None
features_test: Optional[np.ndarray] = None
# element-wise boolean array denoting if the example was part of training.
is_training_labels_test: np.ndarray = None
is_training_labels_test: Optional[np.ndarray] = None
data_size: DataSize = None
data_size: Optional[DataSize] = None
def create_attacker_data(attack_input_data: AttackInputData,

View file

@ -19,14 +19,13 @@ and the logistic regression membership inference attack.
"""
import dataclasses
from typing import Iterator, List
from typing import Iterator, List, Optional, Tuple
import numpy as np
from scipy import stats
from sklearn import metrics
from sklearn import model_selection
import tensorflow as tf
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import models
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType
@ -51,15 +50,15 @@ class Seq2SeqAttackInputData:
This includes only the data, and not configuration.
"""
logits_train: Iterator[np.ndarray] = None
logits_test: Iterator[np.ndarray] = None
logits_train: Optional[Iterator[np.ndarray]] = None
logits_test: Optional[Iterator[np.ndarray]] = None
# Contains ground-truth token indices for the target sequences.
labels_train: Iterator[np.ndarray] = None
labels_test: Iterator[np.ndarray] = None
labels_train: Optional[Iterator[np.ndarray]] = None
labels_test: Optional[Iterator[np.ndarray]] = None
# Size of the target sequence vocabulary.
vocab_size: int = None
vocab_size: Optional[int] = None
# Train, test size = number of batches in training, test set.
# These values need to be supplied by the user as logits, labels
@ -126,7 +125,7 @@ class Seq2SeqAttackInputData:
def _get_attack_features_and_metadata(
logits: Iterator[np.ndarray],
labels: Iterator[np.ndarray]) -> (np.ndarray, float, float):
labels: Iterator[np.ndarray]) -> Tuple[np.ndarray, float, float]:
"""Returns the average rank of tokens per batch of sequences and the loss.
Args:
@ -212,7 +211,7 @@ def _get_ranks_for_sequence(logits: np.ndarray,
def _get_batch_loss_metrics(batch_logits: np.ndarray,
batch_labels: np.ndarray) -> (float, int):
batch_labels: np.ndarray) -> Tuple[float, int]:
"""Returns the loss, number of sequences for a batch.
Args:
@ -234,12 +233,13 @@ def _get_batch_loss_metrics(batch_logits: np.ndarray,
batch_loss += tf.reduce_sum(sequence_loss)
if not tf.executing_eagerly():
batch_loss = batch_loss.eval(session=tf.compat.v1.Session())
session = tf.compat.v1.Session()
batch_loss = batch_loss.eval(session) # pytype: disable=attribute-error
return batch_loss / batch_length, batch_length
def _get_batch_accuracy_metrics(batch_logits: np.ndarray,
batch_labels: np.ndarray) -> (float, float):
def _get_batch_accuracy_metrics(
batch_logits: np.ndarray, batch_labels: np.ndarray) -> Tuple[float, float]:
"""Returns the number of correct predictions, total number of predictions for a batch.
Args:
@ -261,8 +261,8 @@ def _get_batch_accuracy_metrics(batch_logits: np.ndarray,
batch_total_preds += len(sequence_labels)
if not tf.executing_eagerly():
batch_correct_preds = batch_correct_preds.eval(
session=tf.compat.v1.Session())
session = tf.compat.v1.Session()
batch_correct_preds = batch_correct_preds.eval(session) # pytype: disable=attribute-error
return batch_correct_preds, batch_total_preds