forked from 626_privacy/tensorflow_privacy
Internal
PiperOrigin-RevId: 424965569
This commit is contained in:
parent
25571aaf87
commit
36b8ea34ef
9 changed files with 78 additions and 79 deletions
|
@ -85,12 +85,4 @@ else:
|
|||
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import VectorizedDPKerasSGDOptimizer
|
||||
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import make_vectorized_keras_optimizer_class
|
||||
|
||||
try:
|
||||
from tensorflow_privacy.privacy.bolt_on.models import BoltOnModel
|
||||
from tensorflow_privacy.privacy.bolt_on.optimizers import BoltOn
|
||||
from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexMixin
|
||||
from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexBinaryCrossentropy
|
||||
from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexHuber
|
||||
except ImportError:
|
||||
# module `bolt_on` not yet available in this version of TF Privacy
|
||||
pass
|
||||
|
|
|
@ -15,8 +15,10 @@
|
|||
|
||||
'Offline' means all the leaf nodes are ready before the protocol starts.
|
||||
"""
|
||||
|
||||
import distutils
|
||||
import math
|
||||
from typing import Optional
|
||||
|
||||
import attr
|
||||
import tensorflow as tf
|
||||
|
@ -262,7 +264,7 @@ class TreeRangeSumQuery(dp_query.SumAggregationDPQuery):
|
|||
return cls(arity=arity, inner_query=inner_query)
|
||||
|
||||
|
||||
def _get_add_noise(stddev, seed: int = None):
|
||||
def _get_add_noise(stddev, seed: Optional[int] = None):
|
||||
"""Utility function to decide which `add_noise` to use according to tf version."""
|
||||
if distutils.version.LooseVersion(
|
||||
tf.__version__) < distutils.version.LooseVersion('2.0.0'):
|
||||
|
|
|
@ -27,12 +27,14 @@ the algorithm of Abadi et al.: https://arxiv.org/pdf/1607.00133.pdf%20.
|
|||
|
||||
import math
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy import compute_dp_sgd_privacy as compute_epsilon
|
||||
from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy_lib
|
||||
from tensorflow_privacy.privacy.logistic_regression import datasets
|
||||
from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax
|
||||
from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras
|
||||
|
||||
from differential_privacy.python.accounting import common
|
||||
|
||||
|
||||
|
@ -170,9 +172,13 @@ def compute_dpsgd_noise_multiplier(num_train: int,
|
|||
"""
|
||||
search_parameters = common.BinarySearchParameters(
|
||||
lower_bound=0, upper_bound=math.inf, initial_guess=1, tolerance=tolerance)
|
||||
return common.inverse_monotone_function(
|
||||
lambda x: compute_epsilon(num_train, batch_size, x, epochs, delta)[0],
|
||||
epsilon, search_parameters)
|
||||
|
||||
def _func(x):
|
||||
result = compute_dp_sgd_privacy_lib.compute_dp_sgd_privacy(
|
||||
num_train, batch_size, x, epochs, delta)
|
||||
return result[0]
|
||||
|
||||
return common.inverse_monotone_function(_func, epsilon, search_parameters)
|
||||
|
||||
|
||||
def logistic_dpsgd(train_dataset: datasets.RegressionDataset,
|
||||
|
|
|
@ -13,7 +13,8 @@
|
|||
# limitations under the License.
|
||||
"""Implementation of a single-layer softmax classifier."""
|
||||
|
||||
from typing import List
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow_privacy.privacy.logistic_regression import datasets
|
||||
|
||||
|
@ -24,9 +25,9 @@ def single_layer_softmax_classifier(
|
|||
epochs: int,
|
||||
num_classes: int,
|
||||
optimizer: tf.keras.optimizers.Optimizer,
|
||||
loss: tf.keras.losses.Loss = 'categorical_crossentropy',
|
||||
loss: Union[tf.keras.losses.Loss, str] = 'categorical_crossentropy',
|
||||
batch_size: int = 32,
|
||||
kernel_regularizer: tf.keras.regularizers.Regularizer = None
|
||||
kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None
|
||||
) -> List[float]:
|
||||
"""Trains a single layer neural network classifier with softmax activation.
|
||||
|
||||
|
|
|
@ -138,12 +138,12 @@ def make_keras_optimizer_class(cls):
|
|||
l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients).
|
||||
noise_multiplier: Ratio of the standard deviation to the clipping norm.
|
||||
num_microbatches: Number of microbatches into which each minibatch is
|
||||
split. Default is `None` which means that number of microbatches is
|
||||
equal to batch size (i.e. each microbatch contains exactly one
|
||||
split. Default is `None` which means that number of microbatches
|
||||
is equal to batch size (i.e. each microbatch contains exactly one
|
||||
example). If `gradient_accumulation_steps` is greater than 1 and
|
||||
`num_microbatches` is not `None` then the effective number of
|
||||
microbatches is equal to `num_microbatches *
|
||||
gradient_accumulation_steps`.
|
||||
microbatches is equal to
|
||||
`num_microbatches * gradient_accumulation_steps`.
|
||||
gradient_accumulation_steps: If greater than 1 then optimizer will be
|
||||
accumulating gradients for this number of optimizer steps before
|
||||
applying them to update model weights. If this argument is set to 1
|
||||
|
@ -162,13 +162,13 @@ def make_keras_optimizer_class(cls):
|
|||
self._was_dp_gradients_called = False
|
||||
|
||||
def _create_slots(self, var_list):
|
||||
super()._create_slots(var_list)
|
||||
super()._create_slots(var_list) # pytype: disable=attribute-error
|
||||
if self.gradient_accumulation_steps > 1:
|
||||
for var in var_list:
|
||||
self.add_slot(var, 'grad_acc')
|
||||
|
||||
def _prepare_local(self, var_device, var_dtype, apply_state):
|
||||
super()._prepare_local(var_device, var_dtype, apply_state)
|
||||
super()._prepare_local(var_device, var_dtype, apply_state) # pytype: disable=attribute-error
|
||||
if self.gradient_accumulation_steps > 1:
|
||||
apply_update = tf.math.equal(
|
||||
tf.math.floormod(self.iterations + 1,
|
||||
|
@ -188,7 +188,7 @@ def make_keras_optimizer_class(cls):
|
|||
|
||||
def _update_grad():
|
||||
apply_grad_op = super(DPOptimizerClass, self)._resource_apply_dense(
|
||||
grad_acc + grad * coefficients['grad_scaler'], var, apply_state)
|
||||
grad_acc + grad * coefficients['grad_scaler'], var, apply_state) # pytype: disable=attribute-error
|
||||
with tf.control_dependencies([apply_grad_op]):
|
||||
return grad_acc.assign(
|
||||
tf.zeros_like(grad_acc),
|
||||
|
@ -203,25 +203,21 @@ def make_keras_optimizer_class(cls):
|
|||
|
||||
return tf.cond(coefficients['apply_update'], _update_grad, _accumulate)
|
||||
else:
|
||||
return super(DPOptimizerClass,
|
||||
self)._resource_apply_dense(grad, var, apply_state)
|
||||
return super()._resource_apply_dense(grad, var, apply_state) # pytype: disable=attribute-error
|
||||
|
||||
def _resource_apply_sparse_duplicate_indices(self, *args, **kwargs):
|
||||
if self.gradient_accumulation_steps > 1:
|
||||
raise NotImplementedError(
|
||||
'Sparse gradients are not supported with large batch emulation.')
|
||||
else:
|
||||
return super(DPOptimizerClass,
|
||||
self)._resource_apply_sparse_duplicate_indices(
|
||||
*args, **kwargs)
|
||||
return super()._resource_apply_sparse_duplicate_indices(*args, **kwargs) # pytype: disable=attribute-error
|
||||
|
||||
def _resource_apply_sparse(self, *args, **kwargs):
|
||||
if self.gradient_accumulation_steps > 1:
|
||||
raise NotImplementedError(
|
||||
'Sparse gradients are not supported with large batch emulation.')
|
||||
else:
|
||||
return super(DPOptimizerClass,
|
||||
self)._resource_apply_sparse(*args, **kwargs)
|
||||
return super()._resource_apply_sparse(*args, **kwargs) # pytype: disable=attribute-error
|
||||
|
||||
def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None):
|
||||
"""DP-SGD version of base class method."""
|
||||
|
@ -338,7 +334,7 @@ def make_keras_optimizer_class(cls):
|
|||
Returns:
|
||||
Python dictionary.
|
||||
"""
|
||||
config = super(DPOptimizerClass, self).get_config()
|
||||
config = super().get_config()
|
||||
config.update({
|
||||
'l2_norm_clip': self._l2_norm_clip,
|
||||
'noise_multiplier': self._noise_multiplier,
|
||||
|
@ -354,7 +350,7 @@ def make_keras_optimizer_class(cls):
|
|||
'training is not differentially private. It may be the case that '
|
||||
'you need to upgrade to TF 2.4 or higher to use this particular '
|
||||
'optimizer.')
|
||||
return super(DPOptimizerClass, self).apply_gradients(*args, **kwargs)
|
||||
return super().apply_gradients(*args, **kwargs)
|
||||
|
||||
return DPOptimizerClass
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ import enum
|
|||
import glob
|
||||
import os
|
||||
import pickle
|
||||
from typing import Any, Iterable, Union
|
||||
from typing import Any, Iterable, MutableSequence, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
@ -47,8 +47,8 @@ class SingleSliceSpec:
|
|||
|
||||
When feature is None, it means that the slice is the entire dataset.
|
||||
"""
|
||||
feature: SlicingFeature = None
|
||||
value: Any = None
|
||||
feature: Optional[SlicingFeature] = None
|
||||
value: Optional[Any] = None
|
||||
|
||||
@property
|
||||
def entire_dataset(self):
|
||||
|
@ -172,29 +172,29 @@ class AttackInputData:
|
|||
This includes only the data, and not configuration.
|
||||
"""
|
||||
|
||||
logits_train: np.ndarray = None
|
||||
logits_test: np.ndarray = None
|
||||
logits_train: Optional[np.ndarray] = None
|
||||
logits_test: Optional[np.ndarray] = None
|
||||
|
||||
# Predicted probabilities for each class. They can be derived from logits,
|
||||
# so they can be set only if logits are not explicitly provided.
|
||||
probs_train: np.ndarray = None
|
||||
probs_test: np.ndarray = None
|
||||
probs_train: Optional[np.ndarray] = None
|
||||
probs_test: Optional[np.ndarray] = None
|
||||
|
||||
# Contains ground-truth classes. Classes are assumed to be integers starting
|
||||
# from 0.
|
||||
labels_train: np.ndarray = None
|
||||
labels_test: np.ndarray = None
|
||||
labels_train: Optional[np.ndarray] = None
|
||||
labels_test: Optional[np.ndarray] = None
|
||||
|
||||
# Explicitly specified loss. If provided, this is used instead of deriving
|
||||
# loss from logits and labels
|
||||
loss_train: np.ndarray = None
|
||||
loss_test: np.ndarray = None
|
||||
loss_train: Optional[np.ndarray] = None
|
||||
loss_test: Optional[np.ndarray] = None
|
||||
|
||||
# Explicitly specified prediction entropy. If provided, this is used instead
|
||||
# of deriving entropy from logits and labels
|
||||
# (https://arxiv.org/pdf/2003.10595.pdf by Song and Mittal).
|
||||
entropy_train: np.ndarray = None
|
||||
entropy_test: np.ndarray = None
|
||||
entropy_train: Optional[np.ndarray] = None
|
||||
entropy_test: Optional[np.ndarray] = None
|
||||
|
||||
@property
|
||||
def num_classes(self):
|
||||
|
@ -387,7 +387,7 @@ class AttackInputData:
|
|||
return '\n'.join(result)
|
||||
|
||||
|
||||
def _append_array_shape(arr: np.array, arr_name: str, result):
|
||||
def _append_array_shape(arr: Optional[np.ndarray], arr_name: str, result):
|
||||
if arr is not None:
|
||||
result.append(' %s with shape: %s,' % (arr_name, arr.shape))
|
||||
|
||||
|
@ -465,11 +465,11 @@ class SingleAttackResult:
|
|||
|
||||
# Membership scores for the training set samples. For a perfect attacker,
|
||||
# all training samples will have higher scores than test samples.
|
||||
membership_scores_train: np.ndarray = None
|
||||
membership_scores_train: Optional[np.ndarray] = None
|
||||
|
||||
# Membership scores for the test set samples. For a perfect attacker, all
|
||||
# test set samples will have lower scores than the training set samples.
|
||||
membership_scores_test: np.ndarray = None
|
||||
membership_scores_test: Optional[np.ndarray] = None
|
||||
|
||||
def get_attacker_advantage(self):
|
||||
return self.roc_curve.get_attacker_advantage()
|
||||
|
@ -601,14 +601,14 @@ class PrivacyReportMetadata:
|
|||
|
||||
Used to create a privacy report based on AttackResults.
|
||||
"""
|
||||
accuracy_train: float = None
|
||||
accuracy_test: float = None
|
||||
accuracy_train: Optional[float] = None
|
||||
accuracy_test: Optional[float] = None
|
||||
|
||||
loss_train: float = None
|
||||
loss_test: float = None
|
||||
loss_train: Optional[float] = None
|
||||
loss_test: Optional[float] = None
|
||||
|
||||
model_variant_label: str = 'Default model variant'
|
||||
epoch_num: int = None
|
||||
epoch_num: Optional[int] = None
|
||||
|
||||
|
||||
class AttackResultsDFColumns(enum.Enum):
|
||||
|
@ -627,9 +627,9 @@ class AttackResultsDFColumns(enum.Enum):
|
|||
@dataclasses.dataclass
|
||||
class AttackResults:
|
||||
"""Results from running multiple attacks."""
|
||||
single_attack_results: Iterable[SingleAttackResult]
|
||||
single_attack_results: MutableSequence[SingleAttackResult]
|
||||
|
||||
privacy_report_metadata: PrivacyReportMetadata = None
|
||||
privacy_report_metadata: Optional[PrivacyReportMetadata] = None
|
||||
|
||||
def calculate_pd_dataframe(self):
|
||||
"""Returns all metrics as a Pandas DataFrame."""
|
||||
|
@ -760,7 +760,7 @@ class AttackResults:
|
|||
@dataclasses.dataclass
|
||||
class AttackResultsCollection:
|
||||
"""A collection of AttackResults."""
|
||||
attack_results_list: Iterable[AttackResults]
|
||||
attack_results_list: MutableSequence[AttackResults]
|
||||
|
||||
def append(self, attack_results: AttackResults):
|
||||
self.attack_results_list.append(attack_results)
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
import collections
|
||||
import copy
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
@ -90,8 +90,9 @@ def _slice_by_classification_correctness(data: AttackInputData,
|
|||
return _slice_data_by_indices(data, idx_train, idx_test)
|
||||
|
||||
|
||||
def get_single_slice_specs(slicing_spec: SlicingSpec,
|
||||
num_classes: int = None) -> List[SingleSliceSpec]:
|
||||
def get_single_slice_specs(
|
||||
slicing_spec: SlicingSpec,
|
||||
num_classes: Optional[int] = None) -> List[SingleSliceSpec]:
|
||||
"""Returns slices of data according to slicing_spec."""
|
||||
result = []
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
"""Trained models for membership inference attacks."""
|
||||
|
||||
import dataclasses
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
from sklearn import ensemble
|
||||
|
@ -33,15 +34,15 @@ class AttackerData:
|
|||
This includes only the data, and not configuration.
|
||||
"""
|
||||
|
||||
features_train: np.ndarray = None
|
||||
features_train: Optional[np.ndarray] = None
|
||||
# element-wise boolean array denoting if the example was part of training.
|
||||
is_training_labels_train: np.ndarray = None
|
||||
is_training_labels_train: Optional[np.ndarray] = None
|
||||
|
||||
features_test: np.ndarray = None
|
||||
features_test: Optional[np.ndarray] = None
|
||||
# element-wise boolean array denoting if the example was part of training.
|
||||
is_training_labels_test: np.ndarray = None
|
||||
is_training_labels_test: Optional[np.ndarray] = None
|
||||
|
||||
data_size: DataSize = None
|
||||
data_size: Optional[DataSize] = None
|
||||
|
||||
|
||||
def create_attacker_data(attack_input_data: AttackInputData,
|
||||
|
|
|
@ -19,14 +19,13 @@ and the logistic regression membership inference attack.
|
|||
"""
|
||||
|
||||
import dataclasses
|
||||
from typing import Iterator, List
|
||||
from typing import Iterator, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
from sklearn import metrics
|
||||
from sklearn import model_selection
|
||||
import tensorflow as tf
|
||||
|
||||
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import models
|
||||
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults
|
||||
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType
|
||||
|
@ -51,15 +50,15 @@ class Seq2SeqAttackInputData:
|
|||
|
||||
This includes only the data, and not configuration.
|
||||
"""
|
||||
logits_train: Iterator[np.ndarray] = None
|
||||
logits_test: Iterator[np.ndarray] = None
|
||||
logits_train: Optional[Iterator[np.ndarray]] = None
|
||||
logits_test: Optional[Iterator[np.ndarray]] = None
|
||||
|
||||
# Contains ground-truth token indices for the target sequences.
|
||||
labels_train: Iterator[np.ndarray] = None
|
||||
labels_test: Iterator[np.ndarray] = None
|
||||
labels_train: Optional[Iterator[np.ndarray]] = None
|
||||
labels_test: Optional[Iterator[np.ndarray]] = None
|
||||
|
||||
# Size of the target sequence vocabulary.
|
||||
vocab_size: int = None
|
||||
vocab_size: Optional[int] = None
|
||||
|
||||
# Train, test size = number of batches in training, test set.
|
||||
# These values need to be supplied by the user as logits, labels
|
||||
|
@ -126,7 +125,7 @@ class Seq2SeqAttackInputData:
|
|||
|
||||
def _get_attack_features_and_metadata(
|
||||
logits: Iterator[np.ndarray],
|
||||
labels: Iterator[np.ndarray]) -> (np.ndarray, float, float):
|
||||
labels: Iterator[np.ndarray]) -> Tuple[np.ndarray, float, float]:
|
||||
"""Returns the average rank of tokens per batch of sequences and the loss.
|
||||
|
||||
Args:
|
||||
|
@ -212,7 +211,7 @@ def _get_ranks_for_sequence(logits: np.ndarray,
|
|||
|
||||
|
||||
def _get_batch_loss_metrics(batch_logits: np.ndarray,
|
||||
batch_labels: np.ndarray) -> (float, int):
|
||||
batch_labels: np.ndarray) -> Tuple[float, int]:
|
||||
"""Returns the loss, number of sequences for a batch.
|
||||
|
||||
Args:
|
||||
|
@ -234,12 +233,13 @@ def _get_batch_loss_metrics(batch_logits: np.ndarray,
|
|||
batch_loss += tf.reduce_sum(sequence_loss)
|
||||
|
||||
if not tf.executing_eagerly():
|
||||
batch_loss = batch_loss.eval(session=tf.compat.v1.Session())
|
||||
session = tf.compat.v1.Session()
|
||||
batch_loss = batch_loss.eval(session) # pytype: disable=attribute-error
|
||||
return batch_loss / batch_length, batch_length
|
||||
|
||||
|
||||
def _get_batch_accuracy_metrics(batch_logits: np.ndarray,
|
||||
batch_labels: np.ndarray) -> (float, float):
|
||||
def _get_batch_accuracy_metrics(
|
||||
batch_logits: np.ndarray, batch_labels: np.ndarray) -> Tuple[float, float]:
|
||||
"""Returns the number of correct predictions, total number of predictions for a batch.
|
||||
|
||||
Args:
|
||||
|
@ -261,8 +261,8 @@ def _get_batch_accuracy_metrics(batch_logits: np.ndarray,
|
|||
batch_total_preds += len(sequence_labels)
|
||||
|
||||
if not tf.executing_eagerly():
|
||||
batch_correct_preds = batch_correct_preds.eval(
|
||||
session=tf.compat.v1.Session())
|
||||
session = tf.compat.v1.Session()
|
||||
batch_correct_preds = batch_correct_preds.eval(session) # pytype: disable=attribute-error
|
||||
return batch_correct_preds, batch_total_preds
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue