diff --git a/LICENSE b/LICENSE index 0a849ed..a5e7fe9 100644 --- a/LICENSE +++ b/LICENSE @@ -199,4 +199,30 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file + limitations under the License. + +------------------ + +Files: privacy/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/* + +MIT License + +Copyright (c) 2019 Congzheng Song + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/tensorflow_privacy/privacy/membership_inference_attack/README.md b/tensorflow_privacy/privacy/membership_inference_attack/README.md index ff2f16b..19fa7f1 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/README.md +++ b/tensorflow_privacy/privacy/membership_inference_attack/README.md @@ -1,269 +1,2 @@ -# Membership inference attack - -A good privacy-preserving model learns from the training data, but -doesn't memorize it. This library provides empirical tests for measuring -potential memorization. - -Technically, the tests build classifiers that infer whether a particular sample -was present in the training set. The more accurate such classifier is, the more -memorization is present and thus the less privacy-preserving the model is. - -The privacy vulnerability (or memorization potential) is measured -via the area under the ROC-curve (`auc`) or via max{|fpr - tpr|} (`advantage`) -of the attack classifier. These measures are very closely related. - -The tests provided by the library are "black box". That is, only the outputs of -the model are used (e.g., losses, logits, predictions). Neither model internals -(weights) nor input samples are required. - -## How to use - -### Installation notes - -To use the latest version of the MIA library, please install TF Privacy with -"pip install -U git+https://github.com/tensorflow/privacy". See -https://github.com/tensorflow/privacy/issues/151 for more details. - -### Basic usage - -The simplest possible usage is - -```python -from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData - -# Suppose we have the labels as integers starting from 0 -# labels_train shape: (n_train, ) -# labels_test shape: (n_test, ) - -# Evaluate your model on training and test examples to get -# loss_train shape: (n_train, ) -# loss_test shape: (n_test, ) - -attacks_result = mia.run_attacks( - AttackInputData( - loss_train = loss_train, - loss_test = loss_test, - labels_train = labels_train, - labels_test = labels_test)) -``` - -This example calls `run_attacks` with the default options to run a host of -(fairly simple) attacks behind the scenes (depending on which data is fed in), -and computes the most important measures. - -> NOTE: The train and test sets are balanced internally, i.e., an equal number -> of in-training and out-of-training examples is chosen for the attacks -> (whichever has fewer examples). These are subsampled uniformly at random -> without replacement from the larger of the two. - -Then, we can view the attack results by: - -```python -print(attacks_result.summary()) -# Example output: -# -> Best-performing attacks over all slices -# THRESHOLD_ATTACK (with 50000 training and 10000 test examples) achieved an AUC of 0.59 on slice Entire dataset -# THRESHOLD_ATTACK (with 50000 training and 10000 test examples) achieved an advantage of 0.20 on slice Entire dataset -``` - -### Other codelabs - -Please head over to the [codelabs](https://github.com/tensorflow/privacy/tree/master/tensorflow_privacy/privacy/membership_inference_attack/codelabs) -section for an overview of the library in action. - -### Advanced usage - -#### Specifying attacks to run - -Sometimes, we have more information about the data, such as the logits and the -labels, -and we may want to have finer-grained control of the attack, such as using more -complicated classifiers instead of the simple threshold attack, and looks at the -attack results by examples' class. -In thoses cases, we can provide more information to `run_attacks`. - -```python -from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType -``` - -First, similar as before, we specify the input for the attack as an -`AttackInputData` object: - -```python -# Evaluate your model on training and test examples to get -# logits_train shape: (n_train, n_classes) -# logits_test shape: (n_test, n_classes) -# loss_train shape: (n_train, ) -# loss_test shape: (n_test, ) - -attack_input = AttackInputData( - logits_train = logits_train, - logits_test = logits_test, - loss_train = loss_train, - loss_test = loss_test, - labels_train = labels_train, - labels_test = labels_test) -``` - -Instead of `logits`, you can also specify -`probs_train` and `probs_test` as the predicted probabilty vectors of each -example. - -Then, we specify some details of the attack. -The first part includes the specifications of the slicing of the data. For -example, we may want to evaluate the result on the whole dataset, or by class, -percentiles, or the correctness of the model's classification. -These can be specified by a `SlicingSpec` object. - -```python -slicing_spec = SlicingSpec( - entire_dataset = True, - by_class = True, - by_percentiles = False, - by_classification_correctness = True) -``` - -The second part specifies the classifiers for the attacker to use. -Currently, our API supports five classifiers, including -`AttackType.THRESHOLD_ATTACK` for simple threshold attack, -`AttackType.LOGISTIC_REGRESSION`, -`AttackType.MULTI_LAYERED_PERCEPTRON`, -`AttackType.RANDOM_FOREST`, and -`AttackType.K_NEAREST_NEIGHBORS` -which use the corresponding machine learning models. -For some model, different classifiers can yield pertty different results. -We can put multiple classifers in a list: - -```python -attack_types = [ - AttackType.THRESHOLD_ATTACK, - AttackType.LOGISTIC_REGRESSION -] -``` - -Now, we can call the `run_attacks` methods with all specifications: - -```python -attacks_result = mia.run_attacks(attack_input=attack_input, - slicing_spec=slicing_spec, - attack_types=attack_types) -``` - -This returns an object of type `AttackResults`. We can, for example, use the -following code to see the attack results specificed per-slice, as we have -request attacks by class and by model's classification correctness. - -```python -print(attacks_result.summary(by_slices = True)) -# Example output: -# -> Best-performing attacks over all slices -# THRESHOLD_ATTACK achieved an AUC of 0.75 on slice CORRECTLY_CLASSIFIED=False -# THRESHOLD_ATTACK achieved an advantage of 0.38 on slice CORRECTLY_CLASSIFIED=False -# -# Best-performing attacks over slice: "Entire dataset" -# LOGISTIC_REGRESSION achieved an AUC of 0.61 -# THRESHOLD_ATTACK achieved an advantage of 0.22 -# -# Best-performing attacks over slice: "CLASS=0" -# LOGISTIC_REGRESSION achieved an AUC of 0.62 -# LOGISTIC_REGRESSION achieved an advantage of 0.24 -# -# Best-performing attacks over slice: "CLASS=1" -# LOGISTIC_REGRESSION achieved an AUC of 0.61 -# LOGISTIC_REGRESSION achieved an advantage of 0.19 -# -# ... -# -# Best-performing attacks over slice: "CORRECTLY_CLASSIFIED=True" -# LOGISTIC_REGRESSION achieved an AUC of 0.53 -# THRESHOLD_ATTACK achieved an advantage of 0.05 -# -# Best-performing attacks over slice: "CORRECTLY_CLASSIFIED=False" -# THRESHOLD_ATTACK achieved an AUC of 0.75 -# THRESHOLD_ATTACK achieved an advantage of 0.38 -``` - - -#### Viewing and plotting the attack results - -We have seen an example of using `summary()` to view the attack results as text. -We also provide some other ways for inspecting the attack results. - -To get the attack that achieves the maximum attacker advantage or AUC, we can do - -```python -max_auc_attacker = attacks_result.get_result_with_max_auc() -max_advantage_attacker = attacks_result.get_result_with_max_attacker_advantage() -``` -Then, for individual attack, such as `max_auc_attacker`, we can check its type, -attacker advantage and AUC by - -```python -print("Attack type with max AUC: %s, AUC of %.2f, Attacker advantage of %.2f" % - (max_auc_attacker.attack_type, - max_auc_attacker.roc_curve.get_auc(), - max_auc_attacker.roc_curve.get_attacker_advantage())) -# Example output: -# -> Attack type with max AUC: THRESHOLD_ATTACK, AUC of 0.75, Attacker advantage of 0.38 -``` -We can also plot its ROC curve by - -```python -import tensorflow_privacy.privacy.membership_inference_attack.plotting as plotting - -figure = plotting.plot_roc_curve(max_auc_attacker.roc_curve) -``` -which would give a figure like the one below -![roc_fig](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/membership_inference_attack/codelab_roc_fig.png?raw=true) - -Additionally, we provide functionality to convert the attack results into Pandas -data frame: - -```python -import pandas as pd - -pd.set_option("display.max_rows", 8, "display.max_columns", None) -print(attacks_result.calculate_pd_dataframe()) -# Example output: -# slice feature slice value attack type Attacker advantage AUC -# 0 entire_dataset threshold 0.216440 0.600630 -# 1 entire_dataset lr 0.212073 0.612989 -# 2 class 0 threshold 0.226000 0.611669 -# 3 class 0 lr 0.239452 0.624076 -# .. ... ... ... ... ... -# 22 correctly_classfied True threshold 0.054907 0.471290 -# 23 correctly_classfied True lr 0.046986 0.525194 -# 24 correctly_classfied False threshold 0.379465 0.748138 -# 25 correctly_classfied False lr 0.370713 0.737148 -``` - -### External guides / press mentions - -* [Introductory blog post](https://franziska-boenisch.de/posts/2021/01/membership-inference/) -to the theory and the library by Franziska Boenisch from the Fraunhofer AISEC -institute. -* [Google AI Blog Post](https://ai.googleblog.com/2021/01/google-research-looking-back-at-2020.html#ResponsibleAI) -* [TensorFlow Blog Post](https://blog.tensorflow.org/2020/06/introducing-new-privacy-testing-library.html) -* [VentureBeat article](https://venturebeat.com/2020/06/24/google-releases-experimental-tensorflow-module-that-tests-the-privacy-of-ai-models/) -* [Tech Xplore article](https://techxplore.com/news/2020-06-google-tensorflow-privacy-module.html) - - -## Contact / Feedback - -Fill out this -[Google form](https://docs.google.com/forms/d/1DPwr3_OfMcqAOA6sdelTVjIZhKxMZkXvs94z16UCDa4/edit) -or reach out to us at tf-privacy@google.com and let us know how you’re using -this module. We’re keen on hearing your stories, feedback, and suggestions! - -## Contributing - -If you wish to add novel attacks to the attack library, please check our -[guidelines](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/membership_inference_attack/CONTRIBUTING.md). - -## Copyright - -Copyright 2021 - Google LLC +The sources from this folder were moved to +privacy/privacy_tests/membership_inference_attack. diff --git a/tensorflow_privacy/privacy/membership_inference_attack/__init__.py b/tensorflow_privacy/privacy/membership_inference_attack/__init__.py index 2225510..8cf087b 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/__init__.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/__init__.py @@ -11,3 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""The old location of Membership Inference Attack sources.""" + +import warnings + +warnings.warn( + "\nMembership inference attack sources were moved. Please replace" + "\nimport tensorflow_privacy.privacy.membership_inference_attack\n" + "\nwith" + "\nimport tensorflow_privacy.privacy.privacy_tests.membership_inference_attack" +) diff --git a/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py b/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py index 1e2f075..bf52465 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py @@ -13,807 +13,6 @@ # limitations under the License. # Lint as: python3 -"""Data structures representing attack inputs, configuration, outputs.""" -import collections -import enum -import glob -import os -import pickle -from typing import Any, Iterable, Union -from dataclasses import dataclass -import numpy as np -import pandas as pd -from scipy import special -from sklearn import metrics -import tensorflow_privacy.privacy.membership_inference_attack.utils as utils +"""Moved to privacy_attack/membership_inference_attack.""" -ENTIRE_DATASET_SLICE_STR = 'Entire dataset' - - -class SlicingFeature(enum.Enum): - """Enum with features by which slicing is available.""" - CLASS = 'class' - PERCENTILE = 'percentile' - CORRECTLY_CLASSIFIED = 'correctly_classified' - - -@dataclass -class SingleSliceSpec: - """Specifies a slice. - - The slice is defined by values in one feature - it might be a single value - (eg. slice of examples of the specific classification class) or some set of - values (eg. range of percentiles of the attacked model loss). - - When feature is None, it means that the slice is the entire dataset. - """ - feature: SlicingFeature = None - value: Any = None - - @property - def entire_dataset(self): - return self.feature is None - - def __str__(self): - if self.entire_dataset: - return ENTIRE_DATASET_SLICE_STR - - if self.feature == SlicingFeature.PERCENTILE: - return 'Loss percentiles: %d-%d' % self.value - - return '%s=%s' % (self.feature.name, self.value) - - -@dataclass -class SlicingSpec: - """Specification of a slicing procedure. - - Each variable which is set specifies a slicing by different dimension. - """ - - # When is set to true, one of the slices is the whole dataset. - entire_dataset: bool = True - - # Used in classification tasks for slicing by classes. It is assumed that - # classes are integers 0, 1, ... number of classes. When true one slice per - # each class is generated. - by_class: Union[bool, Iterable[int], int] = False - - # if true, it generates 10 slices for percentiles of the loss - 0-10%, 10-20%, - # ... 90-100%. - by_percentiles: bool = False - - # When true, a slice for correctly classifed and a slice for misclassifed - # examples will be generated. - by_classification_correctness: bool = False - - def __str__(self): - """Only keeps the True values.""" - result = ['SlicingSpec('] - if self.entire_dataset: - result.append(' Entire dataset,') - if self.by_class: - if isinstance(self.by_class, Iterable): - result.append(' Into classes %s,' % self.by_class) - elif isinstance(self.by_class, int): - result.append(' Up to class %d,' % self.by_class) - else: - result.append(' By classes,') - if self.by_percentiles: - result.append(' By percentiles,') - if self.by_classification_correctness: - result.append(' By classification correctness,') - result.append(')') - return '\n'.join(result) - - -class AttackType(enum.Enum): - """An enum define attack types.""" - LOGISTIC_REGRESSION = 'lr' - MULTI_LAYERED_PERCEPTRON = 'mlp' - RANDOM_FOREST = 'rf' - K_NEAREST_NEIGHBORS = 'knn' - THRESHOLD_ATTACK = 'threshold' - THRESHOLD_ENTROPY_ATTACK = 'threshold-entropy' - - @property - def is_trained_attack(self): - """Returns whether this type of attack requires training a model.""" - return (self != AttackType.THRESHOLD_ATTACK) and ( - self != AttackType.THRESHOLD_ENTROPY_ATTACK) - - def __str__(self): - """Returns LOGISTIC_REGRESSION instead of AttackType.LOGISTIC_REGRESSION.""" - return '%s' % self.name - - -class PrivacyMetric(enum.Enum): - """An enum for the supported privacy risk metrics.""" - AUC = 'AUC' - ATTACKER_ADVANTAGE = 'Attacker advantage' - - def __str__(self): - """Returns 'AUC' instead of PrivacyMetric.AUC.""" - return '%s' % self.value - - -def _is_integer_type_array(a): - return np.issubdtype(a.dtype, np.integer) - - -def _is_last_dim_equal(arr1, arr1_name, arr2, arr2_name): - """Checks whether the last dimension of the arrays is the same.""" - if arr1 is not None and arr2 is not None and arr1.shape[-1] != arr2.shape[-1]: - raise ValueError('%s and %s should have the same number of features.' % - (arr1_name, arr2_name)) - - -def _is_array_one_dimensional(arr, arr_name): - """Checks whether the array is one dimensional.""" - if arr is not None and len(arr.shape) != 1: - raise ValueError('%s should be a one dimensional numpy array.' % arr_name) - - -def _is_np_array(arr, arr_name): - """Checks whether array is a numpy array.""" - if arr is not None and not isinstance(arr, np.ndarray): - raise ValueError('%s should be a numpy array.' % arr_name) - - -def _log_value(probs, small_value=1e-30): - """Compute the log value on the probability. Clip probabilities close to 0.""" - return -np.log(np.maximum(probs, small_value)) - - -@dataclass -class AttackInputData: - """Input data for running an attack. - - This includes only the data, and not configuration. - """ - - logits_train: np.ndarray = None - logits_test: np.ndarray = None - - # Predicted probabilities for each class. They can be derived from logits, - # so they can be set only if logits are not explicitly provided. - probs_train: np.ndarray = None - probs_test: np.ndarray = None - - # Contains ground-truth classes. Classes are assumed to be integers starting - # from 0. - labels_train: np.ndarray = None - labels_test: np.ndarray = None - - # Explicitly specified loss. If provided, this is used instead of deriving - # loss from logits and labels - loss_train: np.ndarray = None - loss_test: np.ndarray = None - - # Explicitly specified prediction entropy. If provided, this is used instead - # of deriving entropy from logits and labels - # (https://arxiv.org/pdf/2003.10595.pdf by Song and Mittal). - entropy_train: np.ndarray = None - entropy_test: np.ndarray = None - - @property - def num_classes(self): - if self.labels_train is None or self.labels_test is None: - raise ValueError( - 'Can\'t identify the number of classes as no labels were provided. ' - 'Please set labels_train and labels_test') - return int(max(np.max(self.labels_train), np.max(self.labels_test))) + 1 - - @property - def logits_or_probs_train(self): - """Returns train logits or probs whatever is not None.""" - if self.logits_train is not None: - return self.logits_train - return self.probs_train - - @property - def logits_or_probs_test(self): - """Returns test logits or probs whatever is not None.""" - if self.logits_test is not None: - return self.logits_test - return self.probs_test - - @staticmethod - def _get_entropy(logits: np.ndarray, true_labels: np.ndarray): - """Computes the prediction entropy (by Song and Mittal).""" - if (np.absolute(np.sum(logits, axis=1) - 1) <= 1e-3).all(): - probs = logits - else: - # Using softmax to compute probability from logits. - probs = special.softmax(logits, axis=1) - if true_labels is None: - # When not given ground truth label, we compute the - # normal prediction entropy. - # See the Equation (7) in https://arxiv.org/pdf/2003.10595.pdf - return np.sum(np.multiply(probs, _log_value(probs)), axis=1) - else: - # When given the ground truth label, we compute the - # modified prediction entropy. - # See the Equation (8) in https://arxiv.org/pdf/2003.10595.pdf - log_probs = _log_value(probs) - reverse_probs = 1 - probs - log_reverse_probs = _log_value(reverse_probs) - modified_probs = np.copy(probs) - modified_probs[range(true_labels.size), - true_labels] = reverse_probs[range(true_labels.size), - true_labels] - modified_log_probs = np.copy(log_reverse_probs) - modified_log_probs[range(true_labels.size), - true_labels] = log_probs[range(true_labels.size), - true_labels] - return np.sum(np.multiply(modified_probs, modified_log_probs), axis=1) - - def get_loss_train(self): - """Calculates (if needed) cross-entropy losses for the training set. - - Returns: - Loss (or None if neither the loss nor the labels are present). - """ - if self.loss_train is None: - if self.labels_train is None: - return None - if self.logits_train is not None: - self.loss_train = utils.log_loss_from_logits(self.labels_train, - self.logits_train) - else: - self.loss_train = utils.log_loss(self.labels_train, self.probs_train) - return self.loss_train - - def get_loss_test(self): - """Calculates (if needed) cross-entropy losses for the test set. - - Returns: - Loss (or None if neither the loss nor the labels are present). - """ - if self.loss_test is None: - if self.labels_test is None: - return None - if self.logits_test is not None: - self.loss_test = utils.log_loss_from_logits(self.labels_test, - self.logits_test) - else: - self.loss_test = utils.log_loss(self.labels_test, self.probs_test) - return self.loss_test - - def get_entropy_train(self): - """Calculates prediction entropy for the training set.""" - if self.entropy_train is not None: - return self.entropy_train - return self._get_entropy(self.logits_train, self.labels_train) - - def get_entropy_test(self): - """Calculates prediction entropy for the test set.""" - if self.entropy_test is not None: - return self.entropy_test - return self._get_entropy(self.logits_test, self.labels_test) - - def get_train_size(self): - """Returns size of the training set.""" - if self.loss_train is not None: - return self.loss_train.size - if self.entropy_train is not None: - return self.entropy_train.size - return self.logits_or_probs_train.shape[0] - - def get_test_size(self): - """Returns size of the test set.""" - if self.loss_test is not None: - return self.loss_test.size - if self.entropy_test is not None: - return self.entropy_test.size - return self.logits_or_probs_test.shape[0] - - def validate(self): - """Validates the inputs.""" - if (self.loss_train is None) != (self.loss_test is None): - raise ValueError( - 'loss_test and loss_train should both be either set or unset') - - if (self.entropy_train is None) != (self.entropy_test is None): - raise ValueError( - 'entropy_test and entropy_train should both be either set or unset') - - if (self.logits_train is None) != (self.logits_test is None): - raise ValueError( - 'logits_train and logits_test should both be either set or unset') - - if (self.probs_train is None) != (self.probs_test is None): - raise ValueError( - 'probs_train and probs_test should both be either set or unset') - - if (self.logits_train is not None) and (self.probs_train is not None): - raise ValueError('Logits and probs can not be both set') - - if (self.labels_train is None) != (self.labels_test is None): - raise ValueError( - 'labels_train and labels_test should both be either set or unset') - - if (self.labels_train is None and self.loss_train is None and - self.logits_train is None and self.entropy_train is None): - raise ValueError( - 'At least one of labels, logits, losses or entropy should be set') - - if self.labels_train is not None and not _is_integer_type_array( - self.labels_train): - raise ValueError('labels_train elements should have integer type') - - if self.labels_test is not None and not _is_integer_type_array( - self.labels_test): - raise ValueError('labels_test elements should have integer type') - - _is_np_array(self.logits_train, 'logits_train') - _is_np_array(self.logits_test, 'logits_test') - _is_np_array(self.probs_train, 'probs_train') - _is_np_array(self.probs_test, 'probs_test') - _is_np_array(self.labels_train, 'labels_train') - _is_np_array(self.labels_test, 'labels_test') - _is_np_array(self.loss_train, 'loss_train') - _is_np_array(self.loss_test, 'loss_test') - _is_np_array(self.entropy_train, 'entropy_train') - _is_np_array(self.entropy_test, 'entropy_test') - - _is_last_dim_equal(self.logits_train, 'logits_train', self.logits_test, - 'logits_test') - _is_last_dim_equal(self.probs_train, 'probs_train', self.probs_test, - 'probs_test') - _is_array_one_dimensional(self.loss_train, 'loss_train') - _is_array_one_dimensional(self.loss_test, 'loss_test') - _is_array_one_dimensional(self.entropy_train, 'entropy_train') - _is_array_one_dimensional(self.entropy_test, 'entropy_test') - _is_array_one_dimensional(self.labels_train, 'labels_train') - _is_array_one_dimensional(self.labels_test, 'labels_test') - - def __str__(self): - """Return the shapes of variables that are not None.""" - result = ['AttackInputData('] - _append_array_shape(self.loss_train, 'loss_train', result) - _append_array_shape(self.loss_test, 'loss_test', result) - _append_array_shape(self.entropy_train, 'entropy_train', result) - _append_array_shape(self.entropy_test, 'entropy_test', result) - _append_array_shape(self.logits_train, 'logits_train', result) - _append_array_shape(self.logits_test, 'logits_test', result) - _append_array_shape(self.probs_train, 'probs_train', result) - _append_array_shape(self.probs_test, 'probs_test', result) - _append_array_shape(self.labels_train, 'labels_train', result) - _append_array_shape(self.labels_test, 'labels_test', result) - result.append(')') - return '\n'.join(result) - - -def _append_array_shape(arr: np.array, arr_name: str, result): - if arr is not None: - result.append(' %s with shape: %s,' % (arr_name, arr.shape)) - - -@dataclass -class RocCurve: - """Represents ROC curve of a membership inference classifier.""" - # Thresholds used to define points on ROC curve. - # Thresholds are not explicitly part of the curve, and are stored for - # debugging purposes. - thresholds: np.ndarray - - # True positive rates based on thresholds - tpr: np.ndarray - - # False positive rates based on thresholds - fpr: np.ndarray - - def get_auc(self): - """Calculates area under curve (aka AUC).""" - return metrics.auc(self.fpr, self.tpr) - - def get_attacker_advantage(self): - """Calculates membership attacker's (or adversary's) advantage. - - This metric is inspired by https://arxiv.org/abs/1709.01604, specifically - by Definition 4. The difference here is that we calculate maximum advantage - over all available classifier thresholds. - - Returns: - a single float number with membership attacker's advantage. - """ - return max(np.abs(self.tpr - self.fpr)) - - def __str__(self): - """Returns AUC and advantage metrics.""" - return '\n'.join([ - 'RocCurve(', - ' AUC: %.2f' % self.get_auc(), - ' Attacker advantage: %.2f' % self.get_attacker_advantage(), ')' - ]) - - -# (no. of training examples, no. of test examples) for the test. -DataSize = collections.namedtuple('DataSize', 'ntrain ntest') - - -@dataclass -class SingleAttackResult: - """Results from running a single attack.""" - - # Data slice this result was calculated for. - slice_spec: SingleSliceSpec - - # (no. of training examples, no. of test examples) for the test. - data_size: DataSize - attack_type: AttackType - - # NOTE: roc_curve could theoretically be derived from membership scores. - # Currently, we store it explicitly since not all attack types support - # membership scores. - # TODO(b/175870479): Consider deriving ROC curve from the membership scores. - - # ROC curve representing the accuracy of the attacker - roc_curve: RocCurve - - # Membership score is some measure of confidence of this attacker that - # a particular sample is a member of the training set. - # - # This is NOT necessarily probability. The nature of this score depends on - # the type of attacker. Scores from different attacker types are not directly - # comparable, but can be compared in relative terms (e.g. considering order - # imposed by this measure). - # - - # Membership scores for the training set samples. For a perfect attacker, - # all training samples will have higher scores than test samples. - membership_scores_train: np.ndarray = None - - # Membership scores for the test set samples. For a perfect attacker, all - # test set samples will have lower scores than the training set samples. - membership_scores_test: np.ndarray = None - - def get_attacker_advantage(self): - return self.roc_curve.get_attacker_advantage() - - def get_auc(self): - return self.roc_curve.get_auc() - - def __str__(self): - """Returns SliceSpec, AttackType, AUC and advantage metrics.""" - return '\n'.join([ - 'SingleAttackResult(', - ' SliceSpec: %s' % str(self.slice_spec), - ' DataSize: (ntrain=%d, ntest=%d)' % (self.data_size.ntrain, - self.data_size.ntest), - ' AttackType: %s' % str(self.attack_type), - ' AUC: %.2f' % self.get_auc(), - ' Attacker advantage: %.2f' % self.get_attacker_advantage(), ')' - ]) - - -@dataclass -class SingleMembershipProbabilityResult: - """Results from computing membership probabilities (denoted as privacy risk score in https://arxiv.org/abs/2003.10595). - - this part shows how to leverage membership probabilities to perform attacks - with thresholding on them. - """ - - # Data slice this result was calculated for. - slice_spec: SingleSliceSpec - - train_membership_probs: np.ndarray - - test_membership_probs: np.ndarray - - def attack_with_varied_thresholds(self, threshold_list): - """Performs an attack with the specified thresholds. - - For each threshold value, we count how many training and test samples with - membership probabilities larger than the threshold and further compute - precision and recall values. We skip the threshold value if it is larger - than every sample's membership probability. - - Args: - threshold_list: List of provided thresholds - - Returns: - An array of attack results. - """ - fpr, tpr, thresholds = metrics.roc_curve( - np.concatenate((np.ones(len(self.train_membership_probs)), - np.zeros(len(self.test_membership_probs)))), - np.concatenate( - (self.train_membership_probs, self.test_membership_probs)), - drop_intermediate=False) - - precision_list = [] - recall_list = [] - meaningful_threshold_list = [] - max_prob = max(self.train_membership_probs.max(), - self.test_membership_probs.max()) - for threshold in threshold_list: - if threshold <= max_prob: - idx = np.argwhere(thresholds >= threshold)[-1][0] - meaningful_threshold_list.append(threshold) - precision_list.append(tpr[idx] / (tpr[idx] + fpr[idx])) - recall_list.append(tpr[idx]) - - return np.array(meaningful_threshold_list), np.array( - precision_list), np.array(recall_list) - - def collect_results(self, threshold_list, return_roc_results=True): - """The membership probability (from 0 to 1) represents each sample's probability of being in the training set. - - Usually, we choose a list of threshold values from 0.5 (uncertain of - training or test) to 1 (100% certain of training) - to compute corresponding attack precision and recall. - - Args: - threshold_list: List of provided thresholds - return_roc_results: Whether to return ROC results - - Returns: - Summary string. - """ - meaningful_threshold_list, precision_list, recall_list = self.attack_with_varied_thresholds( - threshold_list) - summary = [] - summary.append('\nMembership probability analysis over slice: \"%s\"' % - str(self.slice_spec)) - for i in range(len(meaningful_threshold_list)): - summary.append( - ' with %.4f as the threshold on membership probability, the precision-recall pair is (%.4f, %.4f)' - % (meaningful_threshold_list[i], precision_list[i], recall_list[i])) - if return_roc_results: - fpr, tpr, thresholds = metrics.roc_curve( - np.concatenate((np.ones(len(self.train_membership_probs)), - np.zeros(len(self.test_membership_probs)))), - np.concatenate( - (self.train_membership_probs, self.test_membership_probs))) - roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) - summary.append( - ' thresholding on membership probability achieved an AUC of %.2f' % - (roc_curve.get_auc())) - summary.append( - ' thresholding on membership probability achieved an advantage of %.2f' - % (roc_curve.get_attacker_advantage())) - return summary - - -@dataclass -class MembershipProbabilityResults: - """Membership probability results from multiple data slices.""" - - membership_prob_results: Iterable[SingleMembershipProbabilityResult] - - def summary(self, threshold_list): - """Returns the summary of membership probability analyses on all slices.""" - summary = [] - for single_result in self.membership_prob_results: - single_summary = single_result.collect_results(threshold_list) - summary.extend(single_summary) - return '\n'.join(summary) - - -@dataclass -class PrivacyReportMetadata: - """Metadata about the evaluated model. - - Used to create a privacy report based on AttackResults. - """ - accuracy_train: float = None - accuracy_test: float = None - - loss_train: float = None - loss_test: float = None - - model_variant_label: str = 'Default model variant' - epoch_num: int = None - - -class AttackResultsDFColumns(enum.Enum): - """Columns for the Pandas DataFrame that stores AttackResults metrics.""" - SLICE_FEATURE = 'slice feature' - SLICE_VALUE = 'slice value' - DATA_SIZE_TRAIN = 'train size' - DATA_SIZE_TEST = 'test size' - ATTACK_TYPE = 'attack type' - - def __str__(self): - """Returns 'slice value' instead of AttackResultsDFColumns.SLICE_VALUE.""" - return '%s' % self.value - - -@dataclass -class AttackResults: - """Results from running multiple attacks.""" - single_attack_results: Iterable[SingleAttackResult] - - privacy_report_metadata: PrivacyReportMetadata = None - - def calculate_pd_dataframe(self): - """Returns all metrics as a Pandas DataFrame.""" - slice_features = [] - slice_values = [] - data_size_train = [] - data_size_test = [] - attack_types = [] - advantages = [] - aucs = [] - - for attack_result in self.single_attack_results: - slice_spec = attack_result.slice_spec - if slice_spec.entire_dataset: - slice_feature, slice_value = str(slice_spec), '' - else: - slice_feature, slice_value = slice_spec.feature.value, slice_spec.value - slice_features.append(str(slice_feature)) - slice_values.append(str(slice_value)) - data_size_train.append(attack_result.data_size.ntrain) - data_size_test.append(attack_result.data_size.ntest) - attack_types.append(str(attack_result.attack_type)) - advantages.append(float(attack_result.get_attacker_advantage())) - aucs.append(float(attack_result.get_auc())) - - df = pd.DataFrame({ - str(AttackResultsDFColumns.SLICE_FEATURE): slice_features, - str(AttackResultsDFColumns.SLICE_VALUE): slice_values, - str(AttackResultsDFColumns.DATA_SIZE_TRAIN): data_size_train, - str(AttackResultsDFColumns.DATA_SIZE_TEST): data_size_test, - str(AttackResultsDFColumns.ATTACK_TYPE): attack_types, - str(PrivacyMetric.ATTACKER_ADVANTAGE): advantages, - str(PrivacyMetric.AUC): aucs - }) - return df - - def summary(self, by_slices=False) -> str: - """Provides a summary of the metrics. - - The summary provides the best-performing attacks for each requested data - slice. - Args: - by_slices : whether to prepare a per-slice summary. - - Returns: - A string with a summary of all the metrics. - """ - summary = [] - - # Summary over all slices - max_auc_result_all = self.get_result_with_max_attacker_advantage() - summary.append('Best-performing attacks over all slices') - summary.append( - ' %s (with %d training and %d test examples) achieved an AUC of %.2f on slice %s' - % (max_auc_result_all.attack_type, - max_auc_result_all.data_size.ntrain, - max_auc_result_all.data_size.ntest, - max_auc_result_all.get_auc(), - max_auc_result_all.slice_spec)) - - max_advantage_result_all = self.get_result_with_max_attacker_advantage() - summary.append( - ' %s (with %d training and %d test examples) achieved an advantage of %.2f on slice %s' - % (max_advantage_result_all.attack_type, - max_advantage_result_all.data_size.ntrain, - max_advantage_result_all.data_size.ntest, - max_advantage_result_all.get_attacker_advantage(), - max_advantage_result_all.slice_spec)) - - slice_dict = self._group_results_by_slice() - - if by_slices and len(slice_dict.keys()) > 1: - for slice_str in slice_dict: - results = slice_dict[slice_str] - summary.append('\nBest-performing attacks over slice: \"%s\"' % - slice_str) - max_auc_result = results.get_result_with_max_auc() - summary.append( - ' %s (with %d training and %d test examples) achieved an AUC of %.2f' - % (max_auc_result.attack_type, - max_auc_result.data_size.ntrain, - max_auc_result.data_size.ntest, - max_auc_result.get_auc())) - max_advantage_result = results.get_result_with_max_attacker_advantage() - summary.append( - ' %s (with %d training and %d test examples) achieved an advantage of %.2f' - % (max_advantage_result.attack_type, - max_advantage_result.data_size.ntrain, - max_auc_result.data_size.ntest, - max_advantage_result.get_attacker_advantage())) - - return '\n'.join(summary) - - def _group_results_by_slice(self): - """Groups AttackResults into a dictionary keyed by the slice.""" - slice_dict = {} - for attack_result in self.single_attack_results: - slice_str = str(attack_result.slice_spec) - if slice_str not in slice_dict: - slice_dict[slice_str] = AttackResults([]) - slice_dict[slice_str].single_attack_results.append(attack_result) - return slice_dict - - def get_result_with_max_auc(self) -> SingleAttackResult: - """Get the result with maximum AUC for all attacks and slices.""" - aucs = [result.get_auc() for result in self.single_attack_results] - - if min(aucs) < 0.4: - print('Suspiciously low AUC detected: %.2f. ' + - 'There might be a bug in the classifier' % min(aucs)) - - return self.single_attack_results[np.argmax(aucs)] - - def get_result_with_max_attacker_advantage(self) -> SingleAttackResult: - """Get the result with maximum advantage for all attacks and slices.""" - return self.single_attack_results[np.argmax([ - result.get_attacker_advantage() for result in self.single_attack_results - ])] - - def save(self, filepath): - """Saves self to a pickle file.""" - with open(filepath, 'wb') as out: - pickle.dump(self, out) - - @classmethod - def load(cls, filepath): - """Loads AttackResults from a pickle file.""" - with open(filepath, 'rb') as inp: - return pickle.load(inp) - - -@dataclass -class AttackResultsCollection: - """A collection of AttackResults.""" - attack_results_list: Iterable[AttackResults] - - def append(self, attack_results: AttackResults): - self.attack_results_list.append(attack_results) - - def save(self, dirname): - """Saves self to a pickle file.""" - for i, attack_results in enumerate(self.attack_results_list): - filepath = os.path.join(dirname, - _get_attack_results_filename(attack_results, i)) - - attack_results.save(filepath) - - @classmethod - def load(cls, dirname): - """Loads AttackResultsCollection from all files in a directory.""" - loaded_collection = AttackResultsCollection([]) - for filepath in sorted(glob.glob('%s/*' % dirname)): - with open(filepath, 'rb') as inp: - loaded_collection.attack_results_list.append(pickle.load(inp)) - return loaded_collection - - -def _get_attack_results_filename(attack_results: AttackResults, index: int): - """Creates a filename for a specific set of AttackResults.""" - metadata = attack_results.privacy_report_metadata - if metadata is not None: - return '%s_%s_epoch_%s.pickle' % (metadata.model_variant_label, index, - metadata.epoch_num) - return '%s.pickle' % index - - -def get_flattened_attack_metrics(results: AttackResults): - """Get flattened attack metrics. - - Args: - results: membership inference attack results. - - Returns: - types: a list of attack types - slices: a list of slices - attack_metrics: a list of metric names - values: a list of metric values, i-th element correspond to properties[i] - """ - types = [] - slices = [] - attack_metrics = [] - values = [] - for attack_result in results.single_attack_results: - types += [str(attack_result.attack_type)] * 2 - slices += [str(attack_result.slice_spec)] * 2 - attack_metrics += ['adv', 'auc'] - values += [float(attack_result.get_attacker_advantage()), - float(attack_result.get_auc())] - return types, slices, attack_metrics, values +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import * # pylint: disable=wildcard-import diff --git a/tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing.py b/tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing.py index a6694e4..c5268ac 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing.py @@ -13,136 +13,6 @@ # limitations under the License. # Lint as: python3 -"""Specifying and creating AttackInputData slices.""" +"""Moved to privacy_attack/membership_inference_attack.""" -import collections -import copy -from typing import List - -import numpy as np -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec - - -def _slice_if_not_none(a, idx): - return None if a is None else a[idx] - - -def _slice_data_by_indices(data: AttackInputData, idx_train, - idx_test) -> AttackInputData: - """Slices train fields with with idx_train and test fields with and idx_test.""" - - result = AttackInputData() - - # Slice train data. - result.logits_train = _slice_if_not_none(data.logits_train, idx_train) - result.probs_train = _slice_if_not_none(data.probs_train, idx_train) - result.labels_train = _slice_if_not_none(data.labels_train, idx_train) - result.loss_train = _slice_if_not_none(data.loss_train, idx_train) - result.entropy_train = _slice_if_not_none(data.entropy_train, idx_train) - - # Slice test data. - result.logits_test = _slice_if_not_none(data.logits_test, idx_test) - result.probs_test = _slice_if_not_none(data.probs_test, idx_test) - result.labels_test = _slice_if_not_none(data.labels_test, idx_test) - result.loss_test = _slice_if_not_none(data.loss_test, idx_test) - result.entropy_test = _slice_if_not_none(data.entropy_test, idx_test) - - return result - - -def _slice_by_class(data: AttackInputData, class_value: int) -> AttackInputData: - idx_train = data.labels_train == class_value - idx_test = data.labels_test == class_value - return _slice_data_by_indices(data, idx_train, idx_test) - - -def _slice_by_percentiles(data: AttackInputData, from_percentile: float, - to_percentile: float): - """Slices samples by loss percentiles.""" - - # Find from_percentile and to_percentile percentiles in losses. - loss_train = data.get_loss_train() - loss_test = data.get_loss_test() - losses = np.concatenate((loss_train, loss_test)) - from_loss = np.percentile(losses, from_percentile) - to_loss = np.percentile(losses, to_percentile) - - idx_train = (from_loss <= loss_train) & (loss_train <= to_loss) - idx_test = (from_loss <= loss_test) & (loss_test <= to_loss) - - return _slice_data_by_indices(data, idx_train, idx_test) - - -def _indices_by_classification(logits_or_probs, labels, correctly_classified): - idx_correct = labels == np.argmax(logits_or_probs, axis=1) - return idx_correct if correctly_classified else np.invert(idx_correct) - - -def _slice_by_classification_correctness(data: AttackInputData, - correctly_classified: bool): - idx_train = _indices_by_classification(data.logits_or_probs_train, - data.labels_train, - correctly_classified) - idx_test = _indices_by_classification(data.logits_or_probs_test, - data.labels_test, correctly_classified) - return _slice_data_by_indices(data, idx_train, idx_test) - - -def get_single_slice_specs(slicing_spec: SlicingSpec, - num_classes: int = None) -> List[SingleSliceSpec]: - """Returns slices of data according to slicing_spec.""" - result = [] - - if slicing_spec.entire_dataset: - result.append(SingleSliceSpec()) - - # Create slices by class. - by_class = slicing_spec.by_class - if isinstance(by_class, bool): - if by_class: - assert num_classes, "When by_class == True, num_classes should be given." - assert 0 <= num_classes <= 1000, ( - f"Too much classes for slicing by classes. " - f"Found {num_classes}.") - for c in range(num_classes): - result.append(SingleSliceSpec(SlicingFeature.CLASS, c)) - elif isinstance(by_class, int): - result.append(SingleSliceSpec(SlicingFeature.CLASS, by_class)) - elif isinstance(by_class, collections.Iterable): - for c in by_class: - result.append(SingleSliceSpec(SlicingFeature.CLASS, c)) - - # Create slices by percentiles - if slicing_spec.by_percentiles: - for percent in range(0, 100, 10): - result.append( - SingleSliceSpec(SlicingFeature.PERCENTILE, (percent, percent + 10))) - - # Create slices by correctness of the classifications. - if slicing_spec.by_classification_correctness: - result.append(SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED, True)) - result.append(SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED, False)) - - return result - - -def get_slice(data: AttackInputData, - slice_spec: SingleSliceSpec) -> AttackInputData: - """Returns a single slice of data according to slice_spec.""" - if slice_spec.entire_dataset: - data_slice = copy.copy(data) - elif slice_spec.feature == SlicingFeature.CLASS: - data_slice = _slice_by_class(data, slice_spec.value) - elif slice_spec.feature == SlicingFeature.PERCENTILE: - from_percentile, to_percentile = slice_spec.value - data_slice = _slice_by_percentiles(data, from_percentile, to_percentile) - elif slice_spec.feature == SlicingFeature.CORRECTLY_CLASSIFIED: - data_slice = _slice_by_classification_correctness(data, slice_spec.value) - else: - raise ValueError('Unknown slice spec feature "%s"' % slice_spec.feature) - - data_slice.slice_spec = slice_spec - return data_slice +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.dataset_slicing import * # pylint: disable=wildcard-import diff --git a/tensorflow_privacy/privacy/membership_inference_attack/keras_evaluation.py b/tensorflow_privacy/privacy/membership_inference_attack/keras_evaluation.py index 54354c5..7cdafe8 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/keras_evaluation.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/keras_evaluation.py @@ -13,129 +13,6 @@ # limitations under the License. # Lint as: python3 -"""A callback and a function in keras for membership inference attack.""" +"""Moved to privacy_attack/membership_inference_attack.""" -import os -from typing import Iterable -from absl import logging - -import tensorflow as tf - -from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import get_flattened_attack_metrics -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec -from tensorflow_privacy.privacy.membership_inference_attack.utils import log_loss -from tensorflow_privacy.privacy.membership_inference_attack.utils_tensorboard import write_results_to_tensorboard_tf2 as write_results_to_tensorboard - - -def calculate_losses(model, data, labels): - """Calculate losses of model prediction on data, provided true labels. - - Args: - model: model to make prediction - data: samples - labels: true labels of samples (integer valued) - - Returns: - preds: probability vector of each sample - loss: cross entropy loss of each sample - """ - pred = model.predict(data) - loss = log_loss(labels, pred) - return pred, loss - - -class MembershipInferenceCallback(tf.keras.callbacks.Callback): - """Callback to perform membership inference attack on epoch end.""" - - def __init__( - self, - in_train, out_train, - slicing_spec: SlicingSpec = None, - attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,), - tensorboard_dir=None, - tensorboard_merge_classifiers=False): - """Initalizes the callback. - - Args: - in_train: (in_training samples, in_training labels) - out_train: (out_training samples, out_training labels) - slicing_spec: slicing specification of the attack - attack_types: a list of attacks, each of type AttackType - tensorboard_dir: directory for tensorboard summary - tensorboard_merge_classifiers: if true, plot different classifiers with - the same slicing_spec and metric in the same figure - """ - self._in_train_data, self._in_train_labels = in_train - self._out_train_data, self._out_train_labels = out_train - self._slicing_spec = slicing_spec - self._attack_types = attack_types - self._tensorboard_merge_classifiers = tensorboard_merge_classifiers - if tensorboard_dir: - if tensorboard_merge_classifiers: - self._writers = {} - for attack_type in attack_types: - self._writers[attack_type.name] = tf.summary.create_file_writer( - os.path.join(tensorboard_dir, 'MI', attack_type.name)) - else: - self._writers = tf.summary.create_file_writer( - os.path.join(tensorboard_dir, 'MI')) - logging.info('Will write to tensorboard.') - else: - self._writers = None - - def on_epoch_end(self, epoch, logs=None): - results = run_attack_on_keras_model( - self.model, - (self._in_train_data, self._in_train_labels), - (self._out_train_data, self._out_train_labels), - self._slicing_spec, - self._attack_types) - logging.info(results) - - att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( - results) - print('Attack result:') - print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in - zip(att_types, att_slices, att_metrics, att_values)])) - - # Write to tensorboard if tensorboard_dir is specified - if self._writers is not None: - write_results_to_tensorboard(results, self._writers, epoch, - self._tensorboard_merge_classifiers) - - -def run_attack_on_keras_model( - model, in_train, out_train, - slicing_spec: SlicingSpec = None, - attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)): - """Performs the attack on a trained model. - - Args: - model: model to be tested - in_train: a (in_training samples, in_training labels) tuple - out_train: a (out_training samples, out_training labels) tuple - slicing_spec: slicing specification of the attack - attack_types: a list of attacks, each of type AttackType - Returns: - Results of the attack - """ - in_train_data, in_train_labels = in_train - out_train_data, out_train_labels = out_train - - # Compute predictions and losses - in_train_pred, in_train_loss = calculate_losses(model, in_train_data, - in_train_labels) - out_train_pred, out_train_loss = calculate_losses(model, out_train_data, - out_train_labels) - attack_input = AttackInputData( - logits_train=in_train_pred, logits_test=out_train_pred, - labels_train=in_train_labels, labels_test=out_train_labels, - loss_train=in_train_loss, loss_test=out_train_loss - ) - results = mia.run_attacks(attack_input, - slicing_spec=slicing_spec, - attack_types=attack_types) - return results +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation import * # pylint: disable=wildcard-import diff --git a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py index 0914c03..90aa00e 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py @@ -13,320 +13,6 @@ # limitations under the License. # Lint as: python3 -"""Code that runs membership inference attacks based on the model outputs. +"""Moved to privacy_attack/membership_inference_attack.""" -This file belongs to the new API for membership inference attacks. This file -will be renamed to membership_inference_attack.py after the old API is removed. -""" - -from typing import Iterable -import numpy as np -from sklearn import metrics - -from tensorflow_privacy.privacy.membership_inference_attack import models -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import DataSize -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import MembershipProbabilityResults -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RocCurve -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleMembershipProbabilityResult -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec -from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_single_slice_specs -from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_slice - - -def _get_slice_spec(data: AttackInputData) -> SingleSliceSpec: - if hasattr(data, 'slice_spec'): - return data.slice_spec - return SingleSliceSpec() - - -def _run_trained_attack(attack_input: AttackInputData, - attack_type: AttackType, - balance_attacker_training: bool = True): - """Classification attack done by ML models.""" - attacker = None - - if attack_type == AttackType.LOGISTIC_REGRESSION: - attacker = models.LogisticRegressionAttacker() - elif attack_type == AttackType.MULTI_LAYERED_PERCEPTRON: - attacker = models.MultilayerPerceptronAttacker() - elif attack_type == AttackType.RANDOM_FOREST: - attacker = models.RandomForestAttacker() - elif attack_type == AttackType.K_NEAREST_NEIGHBORS: - attacker = models.KNearestNeighborsAttacker() - else: - raise NotImplementedError('Attack type %s not implemented yet.' % - attack_type) - - prepared_attacker_data = models.create_attacker_data( - attack_input, balance=balance_attacker_training) - - attacker.train_model(prepared_attacker_data.features_train, - prepared_attacker_data.is_training_labels_train) - - # Run the attacker on (permuted) test examples. - predictions_test = attacker.predict(prepared_attacker_data.features_test) - - # Generate ROC curves with predictions. - fpr, tpr, thresholds = metrics.roc_curve( - prepared_attacker_data.is_training_labels_test, predictions_test) - - roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) - - # NOTE: In the current setup we can't obtain membership scores for all - # samples, since some of them were used to train the attacker. This can be - # fixed by training several attackers to ensure each sample was left out - # in exactly one attacker (basically, this means performing cross-validation). - # TODO(b/175870479): Implement membership scores for predicted attackers. - - return SingleAttackResult( - slice_spec=_get_slice_spec(attack_input), - data_size=prepared_attacker_data.data_size, - attack_type=attack_type, - roc_curve=roc_curve) - - -def _run_threshold_attack(attack_input: AttackInputData): - """Runs a threshold attack on loss.""" - ntrain, ntest = attack_input.get_train_size(), attack_input.get_test_size() - loss_train = attack_input.get_loss_train() - loss_test = attack_input.get_loss_test() - if loss_train is None or loss_test is None: - raise ValueError('Not possible to run threshold attack without losses.') - fpr, tpr, thresholds = metrics.roc_curve( - np.concatenate((np.zeros(ntrain), np.ones(ntest))), - np.concatenate((loss_train, loss_test))) - - roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) - - return SingleAttackResult( - slice_spec=_get_slice_spec(attack_input), - data_size=DataSize(ntrain=ntrain, ntest=ntest), - attack_type=AttackType.THRESHOLD_ATTACK, - membership_scores_train=-attack_input.get_loss_train(), - membership_scores_test=-attack_input.get_loss_test(), - roc_curve=roc_curve) - - -def _run_threshold_entropy_attack(attack_input: AttackInputData): - ntrain, ntest = attack_input.get_train_size(), attack_input.get_test_size() - fpr, tpr, thresholds = metrics.roc_curve( - np.concatenate((np.zeros(ntrain), np.ones(ntest))), - np.concatenate( - (attack_input.get_entropy_train(), attack_input.get_entropy_test()))) - - roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) - - return SingleAttackResult( - slice_spec=_get_slice_spec(attack_input), - data_size=DataSize(ntrain=ntrain, ntest=ntest), - attack_type=AttackType.THRESHOLD_ENTROPY_ATTACK, - membership_scores_train=-attack_input.get_entropy_train(), - membership_scores_test=-attack_input.get_entropy_test(), - roc_curve=roc_curve) - - -def _run_attack(attack_input: AttackInputData, - attack_type: AttackType, - balance_attacker_training: bool = True, - min_num_samples: int = 1): - """Runs membership inference attacks for specified input and type. - - Args: - attack_input: input data for running an attack - attack_type: the attack to run - balance_attacker_training: Whether the training and test sets for the - membership inference attacker should have a balanced (roughly equal) - number of samples from the training and test sets used to develop - the model under attack. - min_num_samples: minimum number of examples in either training or test data. - - Returns: - the attack result. - """ - attack_input.validate() - if min(attack_input.get_train_size(), - attack_input.get_test_size()) < min_num_samples: - return None - - if attack_type.is_trained_attack: - return _run_trained_attack(attack_input, attack_type, - balance_attacker_training) - if attack_type == AttackType.THRESHOLD_ENTROPY_ATTACK: - return _run_threshold_entropy_attack(attack_input) - return _run_threshold_attack(attack_input) - - -def run_attacks(attack_input: AttackInputData, - slicing_spec: SlicingSpec = None, - attack_types: Iterable[AttackType] = ( - AttackType.THRESHOLD_ATTACK,), - privacy_report_metadata: PrivacyReportMetadata = None, - balance_attacker_training: bool = True, - min_num_samples: int = 1) -> AttackResults: - """Runs membership inference attacks on a classification model. - - It runs attacks specified by attack_types on each attack_input slice which is - specified by slicing_spec. - - Args: - attack_input: input data for running an attack - slicing_spec: specifies attack_input slices to run attack on - attack_types: attacks to run - privacy_report_metadata: the metadata of the model under attack. - balance_attacker_training: Whether the training and test sets for the - membership inference attacker should have a balanced (roughly equal) - number of samples from the training and test sets used to develop - the model under attack. - min_num_samples: minimum number of examples in either training or test data. - - Returns: - the attack result. - """ - attack_input.validate() - attack_results = [] - - if slicing_spec is None: - slicing_spec = SlicingSpec(entire_dataset=True) - num_classes = None - if slicing_spec.by_class: - num_classes = attack_input.num_classes - input_slice_specs = get_single_slice_specs(slicing_spec, num_classes) - for single_slice_spec in input_slice_specs: - attack_input_slice = get_slice(attack_input, single_slice_spec) - for attack_type in attack_types: - attack_result = _run_attack(attack_input_slice, attack_type, - balance_attacker_training, - min_num_samples) - if attack_result is not None: - attack_results.append(attack_result) - - privacy_report_metadata = _compute_missing_privacy_report_metadata( - privacy_report_metadata, attack_input) - - return AttackResults( - single_attack_results=attack_results, - privacy_report_metadata=privacy_report_metadata) - - -def _compute_membership_probability( - attack_input: AttackInputData, - num_bins: int = 15) -> SingleMembershipProbabilityResult: - """Computes each individual point's likelihood of being a member (denoted as privacy risk score in https://arxiv.org/abs/2003.10595). - - For an individual sample, its privacy risk score is computed as the posterior - probability of being in the training set - after observing its prediction output by the target machine learning model. - - Args: - attack_input: input data for compute membership probability - num_bins: the number of bins used to compute the training/test histogram - - Returns: - membership probability results - """ - - # Uses the provided loss or entropy. Otherwise computes the loss. - if attack_input.loss_train is not None and attack_input.loss_test is not None: - train_values = attack_input.loss_train - test_values = attack_input.loss_test - elif attack_input.entropy_train is not None and attack_input.entropy_test is not None: - train_values = attack_input.entropy_train - test_values = attack_input.entropy_test - else: - train_values = attack_input.get_loss_train() - test_values = attack_input.get_loss_test() - - # Compute the histogram in the log scale - small_value = 1e-10 - train_values = np.maximum(train_values, small_value) - test_values = np.maximum(test_values, small_value) - - min_value = min(train_values.min(), test_values.min()) - max_value = max(train_values.max(), test_values.max()) - bins_hist = np.logspace( - np.log10(min_value), np.log10(max_value), num_bins + 1) - - train_hist, _ = np.histogram(train_values, bins=bins_hist) - train_hist = train_hist / (len(train_values) + 0.0) - train_hist_indices = np.fmin( - np.digitize(train_values, bins=bins_hist), num_bins) - 1 - - test_hist, _ = np.histogram(test_values, bins=bins_hist) - test_hist = test_hist / (len(test_values) + 0.0) - test_hist_indices = np.fmin( - np.digitize(test_values, bins=bins_hist), num_bins) - 1 - - combined_hist = train_hist + test_hist - combined_hist[combined_hist == 0] = small_value - membership_prob_list = train_hist / (combined_hist + 0.0) - train_membership_probs = membership_prob_list[train_hist_indices] - test_membership_probs = membership_prob_list[test_hist_indices] - - return SingleMembershipProbabilityResult( - slice_spec=_get_slice_spec(attack_input), - train_membership_probs=train_membership_probs, - test_membership_probs=test_membership_probs) - - -def run_membership_probability_analysis( - attack_input: AttackInputData, - slicing_spec: SlicingSpec = None) -> MembershipProbabilityResults: - """Perform membership probability analysis on all given slice types. - - Args: - attack_input: input data for compute membership probabilities - slicing_spec: specifies attack_input slices - - Returns: - the membership probability results. - """ - attack_input.validate() - membership_prob_results = [] - - if slicing_spec is None: - slicing_spec = SlicingSpec(entire_dataset=True) - num_classes = None - if slicing_spec.by_class: - num_classes = attack_input.num_classes - input_slice_specs = get_single_slice_specs(slicing_spec, num_classes) - for single_slice_spec in input_slice_specs: - attack_input_slice = get_slice(attack_input, single_slice_spec) - membership_prob_results.append( - _compute_membership_probability(attack_input_slice)) - - return MembershipProbabilityResults( - membership_prob_results=membership_prob_results) - - -def _compute_missing_privacy_report_metadata( - metadata: PrivacyReportMetadata, - attack_input: AttackInputData) -> PrivacyReportMetadata: - """Populates metadata fields if they are missing.""" - if metadata is None: - metadata = PrivacyReportMetadata() - if metadata.accuracy_train is None: - metadata.accuracy_train = _get_accuracy(attack_input.logits_train, - attack_input.labels_train) - if metadata.accuracy_test is None: - metadata.accuracy_test = _get_accuracy(attack_input.logits_test, - attack_input.labels_test) - loss_train = attack_input.get_loss_train() - loss_test = attack_input.get_loss_test() - if metadata.loss_train is None and loss_train is not None: - metadata.loss_train = np.average(loss_train) - if metadata.loss_test is None and loss_test is not None: - metadata.loss_test = np.average(loss_test) - return metadata - - -def _get_accuracy(logits, labels): - """Computes the accuracy if it is missing.""" - if logits is None or labels is None: - return None - return metrics.accuracy_score(labels, np.argmax(logits, axis=1)) +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.membership_inference_attack import * # pylint: disable=wildcard-import diff --git a/tensorflow_privacy/privacy/membership_inference_attack/models.py b/tensorflow_privacy/privacy/membership_inference_attack/models.py index a784d51..008b776 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/models.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/models.py @@ -13,198 +13,6 @@ # limitations under the License. # Lint as: python3 -"""Trained models for membership inference attacks.""" +"""Moved to privacy_attack/membership_inference_attack.""" -from dataclasses import dataclass -import numpy as np -from sklearn import ensemble -from sklearn import linear_model -from sklearn import model_selection -from sklearn import neighbors -from sklearn import neural_network - -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import DataSize - - -@dataclass -class AttackerData: - """Input data for an ML classifier attack. - - This includes only the data, and not configuration. - """ - - features_train: np.ndarray = None - # element-wise boolean array denoting if the example was part of training. - is_training_labels_train: np.ndarray = None - - features_test: np.ndarray = None - # element-wise boolean array denoting if the example was part of training. - is_training_labels_test: np.ndarray = None - - data_size: DataSize = None - - -def create_attacker_data(attack_input_data: AttackInputData, - test_fraction: float = 0.25, - balance: bool = True) -> AttackerData: - """Prepare AttackInputData to train ML attackers. - - Combines logits and losses and performs a random train-test split. - - Args: - attack_input_data: Original AttackInputData - test_fraction: Fraction of the dataset to include in the test split. - balance: Whether the training and test sets for the membership inference - attacker should have a balanced (roughly equal) number of samples - from the training and test sets used to develop the model - under attack. - - Returns: - AttackerData. - """ - attack_input_train = _column_stack(attack_input_data.logits_or_probs_train, - attack_input_data.get_loss_train()) - attack_input_test = _column_stack(attack_input_data.logits_or_probs_test, - attack_input_data.get_loss_test()) - - if balance: - min_size = min(attack_input_data.get_train_size(), - attack_input_data.get_test_size()) - attack_input_train = _sample_multidimensional_array(attack_input_train, - min_size) - attack_input_test = _sample_multidimensional_array(attack_input_test, - min_size) - ntrain, ntest = attack_input_train.shape[0], attack_input_test.shape[0] - - features_all = np.concatenate((attack_input_train, attack_input_test)) - - labels_all = np.concatenate(((np.zeros(ntrain)), (np.ones(ntest)))) - - # Perform a train-test split - features_train, features_test, is_training_labels_train, is_training_labels_test = model_selection.train_test_split( - features_all, labels_all, test_size=test_fraction, stratify=labels_all) - return AttackerData(features_train, is_training_labels_train, features_test, - is_training_labels_test, - DataSize(ntrain=ntrain, ntest=ntest)) - - -def _sample_multidimensional_array(array, size): - indices = np.random.choice(len(array), size, replace=False) - return array[indices] - - -def _column_stack(logits, loss): - """Stacks logits and losses. - - In case that only one exists, returns that one. - Args: - logits: logits array - loss: loss array - - Returns: - stacked logits and losses (or only one if both do not exist). - """ - if logits is None: - return np.expand_dims(loss, axis=-1) - if loss is None: - return logits - return np.column_stack((logits, loss)) - - -class TrainedAttacker: - """Base class for training attack models.""" - model = None - - def train_model(self, input_features, is_training_labels): - """Train an attacker model. - - This is trained on examples from train and test datasets. - Args: - input_features : array-like of shape (n_samples, n_features) Training - vector, where n_samples is the number of samples and n_features is the - number of features. - is_training_labels : a vector of booleans of shape (n_samples, ) - representing whether the sample is in the training set or not. - """ - raise NotImplementedError() - - def predict(self, input_features): - """Predicts whether input_features belongs to train or test. - - Args: - input_features : A vector of features with the same semantics as x_train - passed to train_model. - Returns: - An array of probabilities denoting whether the example belongs to test. - """ - if self.model is None: - raise AssertionError( - 'Model not trained yet. Please call train_model first.') - return self.model.predict_proba(input_features)[:, 1] - - -class LogisticRegressionAttacker(TrainedAttacker): - """Logistic regression attacker.""" - - def train_model(self, input_features, is_training_labels): - lr = linear_model.LogisticRegression(solver='lbfgs') - param_grid = { - 'C': np.logspace(-4, 2, 10), - } - model = model_selection.GridSearchCV( - lr, param_grid=param_grid, cv=3, n_jobs=1, verbose=0) - model.fit(input_features, is_training_labels) - self.model = model - - -class MultilayerPerceptronAttacker(TrainedAttacker): - """Multilayer perceptron attacker.""" - - def train_model(self, input_features, is_training_labels): - mlp_model = neural_network.MLPClassifier() - param_grid = { - 'hidden_layer_sizes': [(64,), (32, 32)], - 'solver': ['adam'], - 'alpha': [0.0001, 0.001, 0.01], - } - n_jobs = -1 - model = model_selection.GridSearchCV( - mlp_model, param_grid=param_grid, cv=3, n_jobs=n_jobs, verbose=0) - model.fit(input_features, is_training_labels) - self.model = model - - -class RandomForestAttacker(TrainedAttacker): - """Random forest attacker.""" - - def train_model(self, input_features, is_training_labels): - """Setup a random forest pipeline with cross-validation.""" - rf_model = ensemble.RandomForestClassifier() - - param_grid = { - 'n_estimators': [100], - 'max_features': ['auto', 'sqrt'], - 'max_depth': [5, 10, 20, None], - 'min_samples_split': [2, 5, 10], - 'min_samples_leaf': [1, 2, 4] - } - n_jobs = -1 - model = model_selection.GridSearchCV( - rf_model, param_grid=param_grid, cv=3, n_jobs=n_jobs, verbose=0) - model.fit(input_features, is_training_labels) - self.model = model - - -class KNearestNeighborsAttacker(TrainedAttacker): - """K nearest neighbor attacker.""" - - def train_model(self, input_features, is_training_labels): - knn_model = neighbors.KNeighborsClassifier() - param_grid = { - 'n_neighbors': [3, 5, 7], - } - model = model_selection.GridSearchCV( - knn_model, param_grid=param_grid, cv=3, n_jobs=1, verbose=0) - model.fit(input_features, is_training_labels) - self.model = model +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.models import * # pylint: disable=wildcard-import diff --git a/tensorflow_privacy/privacy/membership_inference_attack/plotting.py b/tensorflow_privacy/privacy/membership_inference_attack/plotting.py index f3eab8f..93f6d0a 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/plotting.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/plotting.py @@ -13,74 +13,6 @@ # limitations under the License. # Lint as: python3 -"""Plotting functionality for membership inference attack analysis. +"""Moved to privacy_attack/membership_inference_attack.""" -Functions to plot ROC curves and histograms as well as functionality to store -figures to colossus. -""" - -from typing import Text, Iterable - -import matplotlib.pyplot as plt -import numpy as np -from sklearn import metrics - - -def save_plot(figure: plt.Figure, path: Text, outformat='png'): - """Store a figure to disk.""" - if path is not None: - with open(path, 'wb') as f: - figure.savefig(f, bbox_inches='tight', format=outformat) - plt.close(figure) - - -def plot_curve_with_area(x: Iterable[float], - y: Iterable[float], - xlabel: Text = 'x', - ylabel: Text = 'y') -> plt.Figure: - """Plot the curve defined by inputs and the area under the curve. - - All entries of x and y are required to lie between 0 and 1. - For example, x could be recall and y precision, or x is fpr and y is tpr. - - Args: - x: Values on x-axis (1d) - y: Values on y-axis (must be same length as x) - xlabel: Label for x axis - ylabel: Label for y axis - - Returns: - The matplotlib figure handle - """ - fig = plt.figure() - plt.plot([0, 1], [0, 1], 'k', lw=1.0) - plt.plot(x, y, lw=2, label=f'AUC: {metrics.auc(x, y):.3f}') - plt.xlabel(xlabel) - plt.ylabel(ylabel) - plt.legend() - return fig - - -def plot_histograms(train: Iterable[float], - test: Iterable[float], - xlabel: Text = 'x', - thresh: float = None) -> plt.Figure: - """Plot histograms of training versus test metrics.""" - xmin = min(np.min(train), np.min(test)) - xmax = max(np.max(train), np.max(test)) - bins = np.linspace(xmin, xmax, 100) - fig = plt.figure() - plt.hist(test, bins=bins, density=True, alpha=0.5, label='test', log='y') - plt.hist(train, bins=bins, density=True, alpha=0.5, label='train', log='y') - if thresh is not None: - plt.axvline(thresh, c='r', label=f'threshold = {thresh:.3f}') - plt.xlabel(xlabel) - plt.ylabel('normalized counts (density)') - plt.legend() - return fig - - -def plot_roc_curve(roc_curve) -> plt.Figure: - """Plot the ROC curve and the area under the curve.""" - return plot_curve_with_area( - roc_curve.fpr, roc_curve.tpr, xlabel='FPR', ylabel='TPR') +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.plotting import * # pylint: disable=wildcard-import diff --git a/tensorflow_privacy/privacy/membership_inference_attack/privacy_report.py b/tensorflow_privacy/privacy/membership_inference_attack/privacy_report.py index a2cb56f..93f6d0a 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/privacy_report.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/privacy_report.py @@ -13,126 +13,6 @@ # limitations under the License. # Lint as: python3 -"""Plotting code for ML Privacy Reports.""" -from typing import Iterable -import matplotlib.pyplot as plt -import pandas as pd +"""Moved to privacy_attack/membership_inference_attack.""" -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResultsCollection -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResultsDFColumns -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import ENTIRE_DATASET_SLICE_STR -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyMetric - -# Helper constants for DataFrame keys. -LEGEND_LABEL_STR = 'legend label' -EPOCH_STR = 'Epoch' -TRAIN_ACCURACY_STR = 'Train accuracy' - - -def plot_by_epochs(results: AttackResultsCollection, - privacy_metrics: Iterable[PrivacyMetric]) -> plt.Figure: - """Plots privacy vulnerabilities vs epoch numbers. - - In case multiple privacy metrics are specified, the plot will feature - multiple subplots (one subplot per metrics). Multiple model variants - are supported. - Args: - results: AttackResults for the plot - privacy_metrics: List of enumerated privacy metrics that should be plotted. - - Returns: - A pyplot figure with privacy vs accuracy plots. - """ - - _validate_results(results.attack_results_list) - all_results_df = _calculate_combined_df_with_metadata( - results.attack_results_list) - return _generate_subplots( - all_results_df=all_results_df, - x_axis_metric='Epoch', - figure_title='Vulnerability per Epoch', - privacy_metrics=privacy_metrics) - - -def plot_privacy_vs_accuracy(results: AttackResultsCollection, - privacy_metrics: Iterable[PrivacyMetric]): - """Plots privacy vulnerabilities vs accuracy plots. - - In case multiple privacy metrics are specified, the plot will feature - multiple subplots (one subplot per metrics). Multiple model variants - are supported. - Args: - results: AttackResults for the plot - privacy_metrics: List of enumerated privacy metrics that should be plotted. - - Returns: - A pyplot figure with privacy vs accuracy plots. - - """ - _validate_results(results.attack_results_list) - all_results_df = _calculate_combined_df_with_metadata( - results.attack_results_list) - return _generate_subplots( - all_results_df=all_results_df, - x_axis_metric='Train accuracy', - figure_title='Privacy vs Utility Analysis', - privacy_metrics=privacy_metrics) - - -def _calculate_combined_df_with_metadata(results: Iterable[AttackResults]): - """Adds metadata to the dataframe and concats them together.""" - all_results_df = None - for attack_results in results: - attack_results_df = attack_results.calculate_pd_dataframe() - attack_results_df = attack_results_df.loc[attack_results_df[str( - AttackResultsDFColumns.SLICE_FEATURE)] == ENTIRE_DATASET_SLICE_STR] - attack_results_df.insert(0, EPOCH_STR, - attack_results.privacy_report_metadata.epoch_num) - attack_results_df.insert( - 0, TRAIN_ACCURACY_STR, - attack_results.privacy_report_metadata.accuracy_train) - attack_results_df.insert( - 0, LEGEND_LABEL_STR, - attack_results.privacy_report_metadata.model_variant_label + ' - ' + - attack_results_df[str(AttackResultsDFColumns.ATTACK_TYPE)]) - if all_results_df is None: - all_results_df = attack_results_df - else: - all_results_df = pd.concat([all_results_df, attack_results_df], - ignore_index=True) - return all_results_df - - -def _generate_subplots(all_results_df: pd.DataFrame, x_axis_metric: str, - figure_title: str, - privacy_metrics: Iterable[PrivacyMetric]): - """Create one subplot per privacy metric for a specified x_axis_metric.""" - fig, axes = plt.subplots( - 1, len(privacy_metrics), figsize=(5 * len(privacy_metrics) + 3, 5)) - # Set a title for the entire group of subplots. - fig.suptitle(figure_title) - if len(privacy_metrics) == 1: - axes = (axes,) - for i, privacy_metric in enumerate(privacy_metrics): - legend_labels = all_results_df[LEGEND_LABEL_STR].unique() - for legend_label in legend_labels: - single_label_results = all_results_df.loc[all_results_df[LEGEND_LABEL_STR] - == legend_label] - sorted_label_results = single_label_results.sort_values(x_axis_metric) - axes[i].plot(sorted_label_results[x_axis_metric], - sorted_label_results[str(privacy_metric)]) - axes[i].set_xlabel(x_axis_metric) - axes[i].set_title('%s for %s' % (privacy_metric, ENTIRE_DATASET_SLICE_STR)) - plt.legend(legend_labels, loc='upper left', bbox_to_anchor=(1.02, 1)) - fig.tight_layout(rect=[0, 0, 1, 0.93]) # Leave space for suptitle. - - return fig - - -def _validate_results(results: Iterable[AttackResults]): - for attack_results in results: - if not attack_results or not attack_results.privacy_report_metadata: - raise ValueError('Privacy metadata is not defined.') - if attack_results.privacy_report_metadata.epoch_num is None: - raise ValueError('epoch_num in metadata is not defined.') +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.plotting import * # pylint: disable=wildcard-import diff --git a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py index c6d74e1..e034424 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py @@ -13,361 +13,6 @@ # limitations under the License. # Lint as: python3 -"""Code for membership inference attacks on seq2seq models. +"""Moved to privacy_attack/membership_inference_attack.""" -Contains seq2seq specific logic for attack data structures, attack data -generation, -and the logistic regression membership inference attack. -""" -from typing import Iterator, List - -from dataclasses import dataclass -import numpy as np -from scipy.stats import rankdata -from sklearn import metrics -from sklearn import model_selection -import tensorflow as tf - -from tensorflow_privacy.privacy.membership_inference_attack import models -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import DataSize -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RocCurve -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec -from tensorflow_privacy.privacy.membership_inference_attack.models import _sample_multidimensional_array -from tensorflow_privacy.privacy.membership_inference_attack.models import AttackerData - - -def _is_iterator(obj, obj_name): - """Checks whether obj is a generator.""" - if obj is not None and not isinstance(obj, Iterator): - raise ValueError('%s should be a generator.' % obj_name) - - -@dataclass -class Seq2SeqAttackInputData: - """Input data for running an attack on seq2seq models. - - This includes only the data, and not configuration. - """ - logits_train: Iterator[np.ndarray] = None - logits_test: Iterator[np.ndarray] = None - - # Contains ground-truth token indices for the target sequences. - labels_train: Iterator[np.ndarray] = None - labels_test: Iterator[np.ndarray] = None - - # Size of the target sequence vocabulary. - vocab_size: int = None - - # Train, test size = number of batches in training, test set. - # These values need to be supplied by the user as logits, labels - # are lazy loaded for seq2seq models. - train_size: int = 0 - test_size: int = 0 - - def validate(self): - """Validates the inputs.""" - - if (self.logits_train is None) != (self.logits_test is None): - raise ValueError( - 'logits_train and logits_test should both be either set or unset') - - if (self.labels_train is None) != (self.labels_test is None): - raise ValueError( - 'labels_train and labels_test should both be either set or unset') - - if self.logits_train is None or self.labels_train is None: - raise ValueError( - 'Labels, logits of training, test sets should all be set') - - if (self.vocab_size is None or self.train_size is None or - self.test_size is None): - raise ValueError('vocab_size, train_size, test_size should all be set') - - if self.vocab_size is not None and not int: - raise ValueError('vocab_size should be of integer type') - - if self.train_size is not None and not int: - raise ValueError('train_size should be of integer type') - - if self.test_size is not None and not int: - raise ValueError('test_size should be of integer type') - - _is_iterator(self.logits_train, 'logits_train') - _is_iterator(self.logits_test, 'logits_test') - _is_iterator(self.labels_train, 'labels_train') - _is_iterator(self.labels_test, 'labels_test') - - def __str__(self): - """Returns the shapes of variables that are not None.""" - result = ['AttackInputData('] - - if self.vocab_size is not None and self.train_size is not None: - result.append( - 'logits_train with shape (%d, num_sequences, num_tokens, %d)' % - (self.train_size, self.vocab_size)) - result.append( - 'labels_train with shape (%d, num_sequences, num_tokens, 1)' % - self.train_size) - - if self.vocab_size is not None and self.test_size is not None: - result.append( - 'logits_test with shape (%d, num_sequences, num_tokens, %d)' % - (self.test_size, self.vocab_size)) - result.append( - 'labels_test with shape (%d, num_sequences, num_tokens, 1)' % - self.test_size) - - result.append(')') - return '\n'.join(result) - - -def _get_attack_features_and_metadata( - logits: Iterator[np.ndarray], - labels: Iterator[np.ndarray]) -> (np.ndarray, float, float): - """Returns the average rank of tokens per batch of sequences and the loss. - - Args: - logits: Logits returned by a seq2seq model, dim = (num_batches, - num_sequences, num_tokens, vocab_size). - labels: Target labels for the seq2seq model, dim = (num_batches, - num_sequences, num_tokens, 1). - - Returns: - 1. An array of average ranks, dim = (num_batches, 1). - Each average rank is calculated over ranks of tokens in sequences of a - particular batch. - 2. Loss computed over all logits and labels. - 3. Accuracy computed over all logits and labels. - """ - ranks = [] - loss = 0.0 - dataset_length = 0.0 - correct_preds = 0 - total_preds = 0 - for batch_logits, batch_labels in zip(logits, labels): - # Compute average rank for the current batch. - batch_ranks = _get_batch_ranks(batch_logits, batch_labels) - ranks.append(np.mean(batch_ranks)) - - # Update overall loss metrics with metrics of the current batch. - batch_loss, batch_length = _get_batch_loss_metrics(batch_logits, - batch_labels) - loss += batch_loss - dataset_length += batch_length - - # Update overall accuracy metrics with metrics of the current batch. - batch_correct_preds, batch_total_preds = _get_batch_accuracy_metrics( - batch_logits, batch_labels) - correct_preds += batch_correct_preds - total_preds += batch_total_preds - - # Compute loss and accuracy for the dataset. - loss = loss / dataset_length - accuracy = correct_preds / total_preds - - return np.array(ranks), loss, accuracy - - -def _get_batch_ranks(batch_logits: np.ndarray, - batch_labels: np.ndarray) -> np.ndarray: - """Returns the ranks of tokens in a batch of sequences. - - Args: - batch_logits: Logits returned by a seq2seq model, dim = (num_sequences, - num_tokens, vocab_size). - batch_labels: Target labels for the seq2seq model, dim = (num_sequences, - num_tokens, 1). - - Returns: - An array of ranks of tokens in a batch of sequences, dim = (num_sequences, - num_tokens, 1) - """ - batch_ranks = [] - for sequence_logits, sequence_labels in zip(batch_logits, batch_labels): - batch_ranks += _get_ranks_for_sequence(sequence_logits, sequence_labels) - - return np.array(batch_ranks) - - -def _get_ranks_for_sequence(logits: np.ndarray, - labels: np.ndarray) -> List[float]: - """Returns ranks for a sequence. - - Args: - logits: Logits of a single sequence, dim = (num_tokens, vocab_size). - labels: Target labels of a single sequence, dim = (num_tokens, 1). - - Returns: - An array of ranks for tokens in the sequence, dim = (num_tokens, 1). - """ - sequence_ranks = [] - for logit, label in zip(logits, labels.astype(int)): - rank = rankdata(-logit, method='min')[label] - 1.0 - sequence_ranks.append(rank) - - return sequence_ranks - - -def _get_batch_loss_metrics(batch_logits: np.ndarray, - batch_labels: np.ndarray) -> (float, int): - """Returns the loss, number of sequences for a batch. - - Args: - batch_logits: Logits returned by a seq2seq model, dim = (num_sequences, - num_tokens, vocab_size). - batch_labels: Target labels for the seq2seq model, dim = (num_sequences, - num_tokens, 1). - """ - batch_loss = 0.0 - batch_length = len(batch_logits) - for sequence_logits, sequence_labels in zip(batch_logits, batch_labels): - sequence_loss = tf.losses.sparse_categorical_crossentropy( - tf.keras.backend.constant(sequence_labels), - tf.keras.backend.constant(sequence_logits), - from_logits=True) - batch_loss += sequence_loss.numpy().sum() - - return batch_loss / batch_length, batch_length - - -def _get_batch_accuracy_metrics(batch_logits: np.ndarray, - batch_labels: np.ndarray) -> (float, float): - """Returns the number of correct predictions, total number of predictions for a batch. - - Args: - batch_logits: Logits returned by a seq2seq model, dim = (num_sequences, - num_tokens, vocab_size). - batch_labels: Target labels for the seq2seq model, dim = (num_sequences, - num_tokens, 1). - """ - batch_correct_preds = 0.0 - batch_total_preds = 0.0 - for sequence_logits, sequence_labels in zip(batch_logits, batch_labels): - preds = tf.metrics.sparse_categorical_accuracy( - tf.keras.backend.constant(sequence_labels), - tf.keras.backend.constant(sequence_logits)) - batch_correct_preds += preds.numpy().sum() - batch_total_preds += len(sequence_labels) - - return batch_correct_preds, batch_total_preds - - -def create_seq2seq_attacker_data( - attack_input_data: Seq2SeqAttackInputData, - test_fraction: float = 0.25, - balance: bool = True, - privacy_report_metadata: PrivacyReportMetadata = PrivacyReportMetadata() -) -> AttackerData: - """Prepares Seq2SeqAttackInputData to train ML attackers. - - Uses logits and losses to generate ranks and performs a random train-test - split. - - Also computes metadata (loss, accuracy) for the model under attack - and populates respective fields of PrivacyReportMetadata. - - Args: - attack_input_data: Original Seq2SeqAttackInputData - test_fraction: Fraction of the dataset to include in the test split. - balance: Whether the training and test sets for the membership inference - attacker should have a balanced (roughly equal) number of samples from the - training and test sets used to develop the model under attack. - privacy_report_metadata: the metadata of the model under attack. - - Returns: - AttackerData. - """ - attack_input_train, loss_train, accuracy_train = _get_attack_features_and_metadata( - attack_input_data.logits_train, attack_input_data.labels_train) - attack_input_test, loss_test, accuracy_test = _get_attack_features_and_metadata( - attack_input_data.logits_test, attack_input_data.labels_test) - - if balance: - min_size = min(len(attack_input_train), len(attack_input_test)) - attack_input_train = _sample_multidimensional_array(attack_input_train, - min_size) - attack_input_test = _sample_multidimensional_array(attack_input_test, - min_size) - - features_all = np.concatenate((attack_input_train, attack_input_test)) - ntrain, ntest = attack_input_train.shape[0], attack_input_test.shape[0] - - # Reshape for classifying one-dimensional features - features_all = features_all.reshape(-1, 1) - - labels_all = np.concatenate(((np.zeros(ntrain)), (np.ones(ntest)))) - - # Perform a train-test split - features_train, features_test, \ - is_training_labels_train, is_training_labels_test = \ - model_selection.train_test_split( - features_all, labels_all, test_size=test_fraction, stratify=labels_all) - - # Populate accuracy, loss fields in privacy report metadata - privacy_report_metadata.loss_train = loss_train - privacy_report_metadata.loss_test = loss_test - privacy_report_metadata.accuracy_train = accuracy_train - privacy_report_metadata.accuracy_test = accuracy_test - - return AttackerData(features_train, is_training_labels_train, features_test, - is_training_labels_test, - DataSize(ntrain=ntrain, ntest=ntest)) - - -def run_seq2seq_attack(attack_input: Seq2SeqAttackInputData, - privacy_report_metadata: PrivacyReportMetadata = None, - balance_attacker_training: bool = True) -> AttackResults: - """Runs membership inference attacks on a seq2seq model. - - Args: - attack_input: input data for running an attack - privacy_report_metadata: the metadata of the model under attack. - balance_attacker_training: Whether the training and test sets for the - membership inference attacker should have a balanced (roughly equal) - number of samples from the training and test sets used to develop the - model under attack. - - Returns: - the attack result. - """ - attack_input.validate() - - # The attacker uses the average rank (a single number) of a seq2seq dataset - # record to determine membership. So only Logistic Regression is supported, - # as it makes the most sense for single-number features. - attacker = models.LogisticRegressionAttacker() - - # Create attacker data and populate fields of privacy_report_metadata - privacy_report_metadata = privacy_report_metadata or PrivacyReportMetadata() - prepared_attacker_data = create_seq2seq_attacker_data( - attack_input_data=attack_input, - balance=balance_attacker_training, - privacy_report_metadata=privacy_report_metadata) - - attacker.train_model(prepared_attacker_data.features_train, - prepared_attacker_data.is_training_labels_train) - - # Run the attacker on (permuted) test examples. - predictions_test = attacker.predict(prepared_attacker_data.features_test) - - # Generate ROC curves with predictions. - fpr, tpr, thresholds = metrics.roc_curve( - prepared_attacker_data.is_training_labels_test, predictions_test) - - roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) - - attack_results = [ - SingleAttackResult( - slice_spec=SingleSliceSpec(), - attack_type=AttackType.LOGISTIC_REGRESSION, - roc_curve=roc_curve, - data_size=prepared_attacker_data.data_size) - ] - - return AttackResults( - single_attack_results=attack_results, - privacy_report_metadata=privacy_report_metadata) +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.seq2seq_mia import * # pylint: disable=wildcard-import diff --git a/tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation.py b/tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation.py index abf727f..6c402df 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation.py @@ -13,187 +13,6 @@ # limitations under the License. # Lint as: python3 -"""A hook and a function in tf estimator for membership inference attack.""" +"""Moved to privacy_attack/membership_inference_attack.""" -import os -from typing import Iterable -from absl import logging -import numpy as np -import tensorflow.compat.v1 as tf -from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import get_flattened_attack_metrics -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec -from tensorflow_privacy.privacy.membership_inference_attack.utils import log_loss -from tensorflow_privacy.privacy.membership_inference_attack.utils_tensorboard import write_results_to_tensorboard - - -def calculate_losses(estimator, input_fn, labels): - """Get predictions and losses for samples. - - The assumptions are 1) the loss is cross-entropy loss, and 2) user have - specified prediction mode to return predictions, e.g., - when mode == tf.estimator.ModeKeys.PREDICT, the model function returns - tf.estimator.EstimatorSpec(mode=mode, predictions=tf.nn.softmax(logits)). - - Args: - estimator: model to make prediction - input_fn: input function to be used in estimator.predict - labels: array of size (n_samples, ), true labels of samples (integer valued) - - Returns: - preds: probability vector of each sample - loss: cross entropy loss of each sample - """ - pred = np.array(list(estimator.predict(input_fn=input_fn))) - loss = log_loss(labels, pred) - return pred, loss - - -class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook): - """Training hook to perform membership inference attack on epoch end.""" - - def __init__( - self, - estimator, - in_train, out_train, - input_fn_constructor, - slicing_spec: SlicingSpec = None, - attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,), - tensorboard_dir=None, - tensorboard_merge_classifiers=False): - """Initialize the hook. - - Args: - estimator: model to be tested - in_train: (in_training samples, in_training labels) - out_train: (out_training samples, out_training labels) - input_fn_constructor: a function that receives sample, label and construct - the input_fn for model prediction - slicing_spec: slicing specification of the attack - attack_types: a list of attacks, each of type AttackType - tensorboard_dir: directory for tensorboard summary - tensorboard_merge_classifiers: if true, plot different classifiers with - the same slicing_spec and metric in the same figure - """ - in_train_data, self._in_train_labels = in_train - out_train_data, self._out_train_labels = out_train - - # Define the input functions for both in and out-training samples. - self._in_train_input_fn = input_fn_constructor(in_train_data, - self._in_train_labels) - self._out_train_input_fn = input_fn_constructor(out_train_data, - self._out_train_labels) - self._estimator = estimator - self._slicing_spec = slicing_spec - self._attack_types = attack_types - self._tensorboard_merge_classifiers = tensorboard_merge_classifiers - if tensorboard_dir: - if tensorboard_merge_classifiers: - self._writers = {} - with tf.Graph().as_default(): - for attack_type in attack_types: - self._writers[attack_type.name] = tf.summary.FileWriter( - os.path.join(tensorboard_dir, 'MI', attack_type.name)) - else: - with tf.Graph().as_default(): - self._writers = tf.summary.FileWriter( - os.path.join(tensorboard_dir, 'MI')) - logging.info('Will write to tensorboard.') - else: - self._writers = None - - def end(self, session): - results = run_attack_helper(self._estimator, - self._in_train_input_fn, - self._out_train_input_fn, - self._in_train_labels, self._out_train_labels, - self._slicing_spec, - self._attack_types) - logging.info(results) - - att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( - results) - print('Attack result:') - print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in - zip(att_types, att_slices, att_metrics, att_values)])) - - # Write to tensorboard if tensorboard_dir is specified - global_step = self._estimator.get_variable_value('global_step') - if self._writers is not None: - write_results_to_tensorboard(results, self._writers, global_step, - self._tensorboard_merge_classifiers) - - -def run_attack_on_tf_estimator_model( - estimator, in_train, out_train, - input_fn_constructor, - slicing_spec: SlicingSpec = None, - attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)): - """Performs the attack in the end of training. - - Args: - estimator: model to be tested - in_train: (in_training samples, in_training labels) - out_train: (out_training samples, out_training labels) - input_fn_constructor: a function that receives sample, label and construct - the input_fn for model prediction - slicing_spec: slicing specification of the attack - attack_types: a list of attacks, each of type AttackType - Returns: - Results of the attack - """ - in_train_data, in_train_labels = in_train - out_train_data, out_train_labels = out_train - - # Define the input functions for both in and out-training samples. - in_train_input_fn = input_fn_constructor(in_train_data, in_train_labels) - out_train_input_fn = input_fn_constructor(out_train_data, out_train_labels) - - # Call the helper to run the attack. - results = run_attack_helper(estimator, - in_train_input_fn, out_train_input_fn, - in_train_labels, out_train_labels, - slicing_spec, - attack_types) - logging.info('End of training attack:') - logging.info(results) - return results - - -def run_attack_helper( - estimator, - in_train_input_fn, out_train_input_fn, - in_train_labels, out_train_labels, - slicing_spec: SlicingSpec = None, - attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)): - """A helper function to perform attack. - - Args: - estimator: model to be tested - in_train_input_fn: input_fn for in training data - out_train_input_fn: input_fn for out of training data - in_train_labels: in training labels - out_train_labels: out of training labels - slicing_spec: slicing specification of the attack - attack_types: a list of attacks, each of type AttackType - Returns: - Results of the attack - """ - # Compute predictions and losses - in_train_pred, in_train_loss = calculate_losses(estimator, - in_train_input_fn, - in_train_labels) - out_train_pred, out_train_loss = calculate_losses(estimator, - out_train_input_fn, - out_train_labels) - attack_input = AttackInputData( - logits_train=in_train_pred, logits_test=out_train_pred, - labels_train=in_train_labels, labels_test=out_train_labels, - loss_train=in_train_loss, loss_test=out_train_loss - ) - results = mia.run_attacks(attack_input, - slicing_spec=slicing_spec, - attack_types=attack_types) - return results +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation import * # pylint: disable=wildcard-import diff --git a/tensorflow_privacy/privacy/privacy_tests/README.md b/tensorflow_privacy/privacy/privacy_tests/README.md new file mode 100644 index 0000000..9432bdf --- /dev/null +++ b/tensorflow_privacy/privacy/privacy_tests/README.md @@ -0,0 +1,7 @@ +# Privacy tests + +A good privacy-preserving model learns from the training data, but +doesn't memorize individual samples. Excessive memorization is not only harmful +for the model predictive power, but also presents a privacy risk. + +This library provides empirical tests for measuring potential memorization. diff --git a/tensorflow_privacy/privacy/membership_inference_attack/CONTRIBUTING.md b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/CONTRIBUTING.md similarity index 100% rename from tensorflow_privacy/privacy/membership_inference_attack/CONTRIBUTING.md rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/CONTRIBUTING.md diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/README.md b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/README.md new file mode 100644 index 0000000..ab3fa95 --- /dev/null +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/README.md @@ -0,0 +1,269 @@ +# Membership inference attack + +A good privacy-preserving model learns from the training data, but +doesn't memorize it. This library provides empirical tests for measuring +potential memorization. + +Technically, the tests build classifiers that infer whether a particular sample +was present in the training set. The more accurate such classifier is, the more +memorization is present and thus the less privacy-preserving the model is. + +The privacy vulnerability (or memorization potential) is measured +via the area under the ROC-curve (`auc`) or via max{|fpr - tpr|} (`advantage`) +of the attack classifier. These measures are very closely related. + +The tests provided by the library are "black box". That is, only the outputs of +the model are used (e.g., losses, logits, predictions). Neither model internals +(weights) nor input samples are required. + +## How to use + +### Installation notes + +To use the latest version of the MIA library, please install TF Privacy with +"pip install -U git+https://github.com/tensorflow/privacy". See +https://github.com/tensorflow/privacy/issues/151 for more details. + +### Basic usage + +The simplest possible usage is + +```python +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData + +# Suppose we have the labels as integers starting from 0 +# labels_train shape: (n_train, ) +# labels_test shape: (n_test, ) + +# Evaluate your model on training and test examples to get +# loss_train shape: (n_train, ) +# loss_test shape: (n_test, ) + +attacks_result = mia.run_attacks( + AttackInputData( + loss_train = loss_train, + loss_test = loss_test, + labels_train = labels_train, + labels_test = labels_test)) +``` + +This example calls `run_attacks` with the default options to run a host of +(fairly simple) attacks behind the scenes (depending on which data is fed in), +and computes the most important measures. + +> NOTE: The train and test sets are balanced internally, i.e., an equal number +> of in-training and out-of-training examples is chosen for the attacks +> (whichever has fewer examples). These are subsampled uniformly at random +> without replacement from the larger of the two. + +Then, we can view the attack results by: + +```python +print(attacks_result.summary()) +# Example output: +# -> Best-performing attacks over all slices +# THRESHOLD_ATTACK (with 50000 training and 10000 test examples) achieved an AUC of 0.59 on slice Entire dataset +# THRESHOLD_ATTACK (with 50000 training and 10000 test examples) achieved an advantage of 0.20 on slice Entire dataset +``` + +### Other codelabs + +Please head over to the [codelabs](https://github.com/tensorflow/privacy/tree/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs) +section for an overview of the library in action. + +### Advanced usage + +#### Specifying attacks to run + +Sometimes, we have more information about the data, such as the logits and the +labels, +and we may want to have finer-grained control of the attack, such as using more +complicated classifiers instead of the simple threshold attack, and looks at the +attack results by examples' class. +In thoses cases, we can provide more information to `run_attacks`. + +```python +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType +``` + +First, similar as before, we specify the input for the attack as an +`AttackInputData` object: + +```python +# Evaluate your model on training and test examples to get +# logits_train shape: (n_train, n_classes) +# logits_test shape: (n_test, n_classes) +# loss_train shape: (n_train, ) +# loss_test shape: (n_test, ) + +attack_input = AttackInputData( + logits_train = logits_train, + logits_test = logits_test, + loss_train = loss_train, + loss_test = loss_test, + labels_train = labels_train, + labels_test = labels_test) +``` + +Instead of `logits`, you can also specify +`probs_train` and `probs_test` as the predicted probabilty vectors of each +example. + +Then, we specify some details of the attack. +The first part includes the specifications of the slicing of the data. For +example, we may want to evaluate the result on the whole dataset, or by class, +percentiles, or the correctness of the model's classification. +These can be specified by a `SlicingSpec` object. + +```python +slicing_spec = SlicingSpec( + entire_dataset = True, + by_class = True, + by_percentiles = False, + by_classification_correctness = True) +``` + +The second part specifies the classifiers for the attacker to use. +Currently, our API supports five classifiers, including +`AttackType.THRESHOLD_ATTACK` for simple threshold attack, +`AttackType.LOGISTIC_REGRESSION`, +`AttackType.MULTI_LAYERED_PERCEPTRON`, +`AttackType.RANDOM_FOREST`, and +`AttackType.K_NEAREST_NEIGHBORS` +which use the corresponding machine learning models. +For some model, different classifiers can yield pertty different results. +We can put multiple classifers in a list: + +```python +attack_types = [ + AttackType.THRESHOLD_ATTACK, + AttackType.LOGISTIC_REGRESSION +] +``` + +Now, we can call the `run_attacks` methods with all specifications: + +```python +attacks_result = mia.run_attacks(attack_input=attack_input, + slicing_spec=slicing_spec, + attack_types=attack_types) +``` + +This returns an object of type `AttackResults`. We can, for example, use the +following code to see the attack results specificed per-slice, as we have +request attacks by class and by model's classification correctness. + +```python +print(attacks_result.summary(by_slices = True)) +# Example output: +# -> Best-performing attacks over all slices +# THRESHOLD_ATTACK achieved an AUC of 0.75 on slice CORRECTLY_CLASSIFIED=False +# THRESHOLD_ATTACK achieved an advantage of 0.38 on slice CORRECTLY_CLASSIFIED=False +# +# Best-performing attacks over slice: "Entire dataset" +# LOGISTIC_REGRESSION achieved an AUC of 0.61 +# THRESHOLD_ATTACK achieved an advantage of 0.22 +# +# Best-performing attacks over slice: "CLASS=0" +# LOGISTIC_REGRESSION achieved an AUC of 0.62 +# LOGISTIC_REGRESSION achieved an advantage of 0.24 +# +# Best-performing attacks over slice: "CLASS=1" +# LOGISTIC_REGRESSION achieved an AUC of 0.61 +# LOGISTIC_REGRESSION achieved an advantage of 0.19 +# +# ... +# +# Best-performing attacks over slice: "CORRECTLY_CLASSIFIED=True" +# LOGISTIC_REGRESSION achieved an AUC of 0.53 +# THRESHOLD_ATTACK achieved an advantage of 0.05 +# +# Best-performing attacks over slice: "CORRECTLY_CLASSIFIED=False" +# THRESHOLD_ATTACK achieved an AUC of 0.75 +# THRESHOLD_ATTACK achieved an advantage of 0.38 +``` + + +#### Viewing and plotting the attack results + +We have seen an example of using `summary()` to view the attack results as text. +We also provide some other ways for inspecting the attack results. + +To get the attack that achieves the maximum attacker advantage or AUC, we can do + +```python +max_auc_attacker = attacks_result.get_result_with_max_auc() +max_advantage_attacker = attacks_result.get_result_with_max_attacker_advantage() +``` +Then, for individual attack, such as `max_auc_attacker`, we can check its type, +attacker advantage and AUC by + +```python +print("Attack type with max AUC: %s, AUC of %.2f, Attacker advantage of %.2f" % + (max_auc_attacker.attack_type, + max_auc_attacker.roc_curve.get_auc(), + max_auc_attacker.roc_curve.get_attacker_advantage())) +# Example output: +# -> Attack type with max AUC: THRESHOLD_ATTACK, AUC of 0.75, Attacker advantage of 0.38 +``` +We can also plot its ROC curve by + +```python +import tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.plotting as plotting + +figure = plotting.plot_roc_curve(max_auc_attacker.roc_curve) +``` +which would give a figure like the one below +![roc_fig](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelab_roc_fig.png?raw=true) + +Additionally, we provide functionality to convert the attack results into Pandas +data frame: + +```python +import pandas as pd + +pd.set_option("display.max_rows", 8, "display.max_columns", None) +print(attacks_result.calculate_pd_dataframe()) +# Example output: +# slice feature slice value attack type Attacker advantage AUC +# 0 entire_dataset threshold 0.216440 0.600630 +# 1 entire_dataset lr 0.212073 0.612989 +# 2 class 0 threshold 0.226000 0.611669 +# 3 class 0 lr 0.239452 0.624076 +# .. ... ... ... ... ... +# 22 correctly_classfied True threshold 0.054907 0.471290 +# 23 correctly_classfied True lr 0.046986 0.525194 +# 24 correctly_classfied False threshold 0.379465 0.748138 +# 25 correctly_classfied False lr 0.370713 0.737148 +``` + +### External guides / press mentions + +* [Introductory blog post](https://franziska-boenisch.de/posts/2021/01/membership-inference/) +to the theory and the library by Franziska Boenisch from the Fraunhofer AISEC +institute. +* [Google AI Blog Post](https://ai.googleblog.com/2021/01/google-research-looking-back-at-2020.html#ResponsibleAI) +* [TensorFlow Blog Post](https://blog.tensorflow.org/2020/06/introducing-new-privacy-testing-library.html) +* [VentureBeat article](https://venturebeat.com/2020/06/24/google-releases-experimental-tensorflow-module-that-tests-the-privacy-of-ai-models/) +* [Tech Xplore article](https://techxplore.com/news/2020-06-google-tensorflow-privacy-module.html) + + +## Contact / Feedback + +Fill out this +[Google form](https://docs.google.com/forms/d/1DPwr3_OfMcqAOA6sdelTVjIZhKxMZkXvs94z16UCDa4/edit) +or reach out to us at tf-privacy@google.com and let us know how you’re using +this module. We’re keen on hearing your stories, feedback, and suggestions! + +## Contributing + +If you wish to add novel attacks to the attack library, please check our +[guidelines](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/CONTRIBUTING.md). + +## Copyright + +Copyright 2021 - Google LLC diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/__init__.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/__init__.py new file mode 100644 index 0000000..2225510 --- /dev/null +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2020, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tensorflow_privacy/privacy/membership_inference_attack/codelab_roc_fig.png b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelab_roc_fig.png similarity index 100% rename from tensorflow_privacy/privacy/membership_inference_attack/codelab_roc_fig.png rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelab_roc_fig.png diff --git a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/README.md b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/README.md similarity index 68% rename from tensorflow_privacy/privacy/membership_inference_attack/codelabs/README.md rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/README.md index 7058377..cf45d7f 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/README.md +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/README.md @@ -2,7 +2,7 @@ ## Introductory codelab -The easiest way to get started is to go through [the introductory codelab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/membership_inference_attack/codelabs/codelab.ipynb). +The easiest way to get started is to go through [the introductory codelab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/codelab.ipynb). This trains a simple image classification model and tests it against a series of membership inference attacks. @@ -10,18 +10,18 @@ For a more detailed overview of the library, please check the sections below. ## End to end example As an alternative to the introductory codelab, we also have a standalone -[example.py](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/membership_inference_attack/codelabs/example.py). +[example.py](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/example.py). ## Sequence to sequence models If you're interested in sequence to sequence model attacks, please see the -[seq2seq colab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb). +[seq2seq colab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb). ## Membership probability score If you're interested in the membership probability score (also called privacy risk score) developed by Song and Mittal, please see their -[membership probability codelab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/membership_inference_attack/codelabs/membership_probability_codelab.ipynb). +[membership probability codelab](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/membership_probability_codelab.ipynb). The accompanying paper is on [arXiv](https://arxiv.org/abs/2003.10595). diff --git a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/__init__.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/__init__.py similarity index 100% rename from tensorflow_privacy/privacy/membership_inference_attack/codelabs/__init__.py rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/__init__.py diff --git a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/codelab.ipynb b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/codelab.ipynb similarity index 92% rename from tensorflow_privacy/privacy/membership_inference_attack/codelabs/codelab.ipynb rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/codelab.ipynb index d878a6a..b090310 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/codelab.ipynb +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/codelab.ipynb @@ -53,10 +53,10 @@ "source": [ "\n", " \n", " \n", "
\n", - " Run in Google Colab\n", + " Run in Google Colab\n", " \n", - " View source on GitHub\n", + " View source on GitHub\n", "
" ] @@ -133,7 +133,7 @@ "source": [ "!pip3 install git+https://github.com/tensorflow/privacy\n", "\n", - "from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia" + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia" ] }, { @@ -298,11 +298,11 @@ }, "outputs": [], "source": [ - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType\n", "\n", - "import tensorflow_privacy.privacy.membership_inference_attack.plotting as plotting\n", + "import tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.plotting as plotting\n", "\n", "labels_train = np.argmax(y_train, axis=1)\n", "labels_test = np.argmax(y_test, axis=1)\n", diff --git a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/example.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/example.py similarity index 87% rename from tensorflow_privacy/privacy/membership_inference_attack/codelabs/example.py rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/example.py index 2c8ff70..47454f7 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/example.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/example.py @@ -28,18 +28,17 @@ from sklearn import metrics from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.utils import to_categorical -from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResultsCollection -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyMetric -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import \ - PrivacyReportMetadata -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec -import tensorflow_privacy.privacy.membership_inference_attack.plotting as plotting -import tensorflow_privacy.privacy.membership_inference_attack.privacy_report as privacy_report +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResultsCollection +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import PrivacyMetric +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import PrivacyReportMetadata +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec +import tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.plotting as plotting +import tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.privacy_report as privacy_report def generate_random_cluster(center, scale, num_points): diff --git a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/membership_probability_codelab.ipynb b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/membership_probability_codelab.ipynb similarity index 99% rename from tensorflow_privacy/privacy/membership_inference_attack/codelabs/membership_probability_codelab.ipynb rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/membership_probability_codelab.ipynb index f942e93..612a95b 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/membership_probability_codelab.ipynb +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/membership_probability_codelab.ipynb @@ -53,10 +53,10 @@ "source": [ "\n", " \n", " \n", "
\n", - " Run in Google Colab\n", + " Run in Google Colab\n", " \n", - " View source on GitHub\n", + " View source on GitHub\n", "
" ] @@ -133,7 +133,7 @@ "source": [ "!pip3 install git+https://github.com/tensorflow/privacy\n", "\n", - "from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia" + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia" ] }, { @@ -627,11 +627,11 @@ } ], "source": [ - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType\n", "\n", - "import tensorflow_privacy.privacy.membership_inference_attack.plotting as plotting\n", + "import tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.plotting as plotting\n", "\n", "labels_train = np.argmax(y_train, axis=1)\n", "labels_test = np.argmax(y_test, axis=1)\n", @@ -1190,9 +1190,9 @@ } ], "source": [ - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature\n", - "from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_slice\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SingleSliceSpec\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingFeature\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.dataset_slicing import get_slice\n", "import matplotlib.pyplot as plt\n", "class_list = np.arange(10)\n", "num_images = 5\n", diff --git a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/LICENSE b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/LICENSE similarity index 100% rename from tensorflow_privacy/privacy/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/LICENSE rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/LICENSE diff --git a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb similarity index 98% rename from tensorflow_privacy/privacy/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb index 4c80bd3..631f575 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb @@ -13,10 +13,10 @@ "source": [ "\n", " \n", " \n", "
\n", - " Run in Google Colab\n", + " Run in Google Colab\n", " \n", - " View source on GitHub\n", + " View source on GitHub\n", "
" ] @@ -106,7 +106,7 @@ "source": [ "!pip3 install git+https://github.com/tensorflow/privacy\n", "\n", - "from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia" + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia" ] }, { @@ -1142,9 +1142,9 @@ } ], "source": [ - "from tensorflow_privacy.privacy.membership_inference_attack.seq2seq_mia import Seq2SeqAttackInputData, \\\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.seq2seq_mia import Seq2SeqAttackInputData, \\\n", " run_seq2seq_attack\n", - "import tensorflow_privacy.privacy.membership_inference_attack.plotting as plotting\n", + "import tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.plotting as plotting\n", "\n", "attack_input = Seq2SeqAttackInputData(\n", " logits_train = logits_train_gen,\n", diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py new file mode 100644 index 0000000..e8f9f5f --- /dev/null +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py @@ -0,0 +1,819 @@ +# Copyright 2020, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Data structures representing attack inputs, configuration, outputs.""" +import collections +import enum +import glob +import os +import pickle +from typing import Any, Iterable, Union +from dataclasses import dataclass +import numpy as np +import pandas as pd +from scipy import special +from sklearn import metrics +import tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.utils as utils + +ENTIRE_DATASET_SLICE_STR = 'Entire dataset' + + +class SlicingFeature(enum.Enum): + """Enum with features by which slicing is available.""" + CLASS = 'class' + PERCENTILE = 'percentile' + CORRECTLY_CLASSIFIED = 'correctly_classified' + + +@dataclass +class SingleSliceSpec: + """Specifies a slice. + + The slice is defined by values in one feature - it might be a single value + (eg. slice of examples of the specific classification class) or some set of + values (eg. range of percentiles of the attacked model loss). + + When feature is None, it means that the slice is the entire dataset. + """ + feature: SlicingFeature = None + value: Any = None + + @property + def entire_dataset(self): + return self.feature is None + + def __str__(self): + if self.entire_dataset: + return ENTIRE_DATASET_SLICE_STR + + if self.feature == SlicingFeature.PERCENTILE: + return 'Loss percentiles: %d-%d' % self.value + + return '%s=%s' % (self.feature.name, self.value) + + +@dataclass +class SlicingSpec: + """Specification of a slicing procedure. + + Each variable which is set specifies a slicing by different dimension. + """ + + # When is set to true, one of the slices is the whole dataset. + entire_dataset: bool = True + + # Used in classification tasks for slicing by classes. It is assumed that + # classes are integers 0, 1, ... number of classes. When true one slice per + # each class is generated. + by_class: Union[bool, Iterable[int], int] = False + + # if true, it generates 10 slices for percentiles of the loss - 0-10%, 10-20%, + # ... 90-100%. + by_percentiles: bool = False + + # When true, a slice for correctly classifed and a slice for misclassifed + # examples will be generated. + by_classification_correctness: bool = False + + def __str__(self): + """Only keeps the True values.""" + result = ['SlicingSpec('] + if self.entire_dataset: + result.append(' Entire dataset,') + if self.by_class: + if isinstance(self.by_class, Iterable): + result.append(' Into classes %s,' % self.by_class) + elif isinstance(self.by_class, int): + result.append(' Up to class %d,' % self.by_class) + else: + result.append(' By classes,') + if self.by_percentiles: + result.append(' By percentiles,') + if self.by_classification_correctness: + result.append(' By classification correctness,') + result.append(')') + return '\n'.join(result) + + +class AttackType(enum.Enum): + """An enum define attack types.""" + LOGISTIC_REGRESSION = 'lr' + MULTI_LAYERED_PERCEPTRON = 'mlp' + RANDOM_FOREST = 'rf' + K_NEAREST_NEIGHBORS = 'knn' + THRESHOLD_ATTACK = 'threshold' + THRESHOLD_ENTROPY_ATTACK = 'threshold-entropy' + + @property + def is_trained_attack(self): + """Returns whether this type of attack requires training a model.""" + return (self != AttackType.THRESHOLD_ATTACK) and ( + self != AttackType.THRESHOLD_ENTROPY_ATTACK) + + def __str__(self): + """Returns LOGISTIC_REGRESSION instead of AttackType.LOGISTIC_REGRESSION.""" + return '%s' % self.name + + +class PrivacyMetric(enum.Enum): + """An enum for the supported privacy risk metrics.""" + AUC = 'AUC' + ATTACKER_ADVANTAGE = 'Attacker advantage' + + def __str__(self): + """Returns 'AUC' instead of PrivacyMetric.AUC.""" + return '%s' % self.value + + +def _is_integer_type_array(a): + return np.issubdtype(a.dtype, np.integer) + + +def _is_last_dim_equal(arr1, arr1_name, arr2, arr2_name): + """Checks whether the last dimension of the arrays is the same.""" + if arr1 is not None and arr2 is not None and arr1.shape[-1] != arr2.shape[-1]: + raise ValueError('%s and %s should have the same number of features.' % + (arr1_name, arr2_name)) + + +def _is_array_one_dimensional(arr, arr_name): + """Checks whether the array is one dimensional.""" + if arr is not None and len(arr.shape) != 1: + raise ValueError('%s should be a one dimensional numpy array.' % arr_name) + + +def _is_np_array(arr, arr_name): + """Checks whether array is a numpy array.""" + if arr is not None and not isinstance(arr, np.ndarray): + raise ValueError('%s should be a numpy array.' % arr_name) + + +def _log_value(probs, small_value=1e-30): + """Compute the log value on the probability. Clip probabilities close to 0.""" + return -np.log(np.maximum(probs, small_value)) + + +@dataclass +class AttackInputData: + """Input data for running an attack. + + This includes only the data, and not configuration. + """ + + logits_train: np.ndarray = None + logits_test: np.ndarray = None + + # Predicted probabilities for each class. They can be derived from logits, + # so they can be set only if logits are not explicitly provided. + probs_train: np.ndarray = None + probs_test: np.ndarray = None + + # Contains ground-truth classes. Classes are assumed to be integers starting + # from 0. + labels_train: np.ndarray = None + labels_test: np.ndarray = None + + # Explicitly specified loss. If provided, this is used instead of deriving + # loss from logits and labels + loss_train: np.ndarray = None + loss_test: np.ndarray = None + + # Explicitly specified prediction entropy. If provided, this is used instead + # of deriving entropy from logits and labels + # (https://arxiv.org/pdf/2003.10595.pdf by Song and Mittal). + entropy_train: np.ndarray = None + entropy_test: np.ndarray = None + + @property + def num_classes(self): + if self.labels_train is None or self.labels_test is None: + raise ValueError( + 'Can\'t identify the number of classes as no labels were provided. ' + 'Please set labels_train and labels_test') + return int(max(np.max(self.labels_train), np.max(self.labels_test))) + 1 + + @property + def logits_or_probs_train(self): + """Returns train logits or probs whatever is not None.""" + if self.logits_train is not None: + return self.logits_train + return self.probs_train + + @property + def logits_or_probs_test(self): + """Returns test logits or probs whatever is not None.""" + if self.logits_test is not None: + return self.logits_test + return self.probs_test + + @staticmethod + def _get_entropy(logits: np.ndarray, true_labels: np.ndarray): + """Computes the prediction entropy (by Song and Mittal).""" + if (np.absolute(np.sum(logits, axis=1) - 1) <= 1e-3).all(): + probs = logits + else: + # Using softmax to compute probability from logits. + probs = special.softmax(logits, axis=1) + if true_labels is None: + # When not given ground truth label, we compute the + # normal prediction entropy. + # See the Equation (7) in https://arxiv.org/pdf/2003.10595.pdf + return np.sum(np.multiply(probs, _log_value(probs)), axis=1) + else: + # When given the ground truth label, we compute the + # modified prediction entropy. + # See the Equation (8) in https://arxiv.org/pdf/2003.10595.pdf + log_probs = _log_value(probs) + reverse_probs = 1 - probs + log_reverse_probs = _log_value(reverse_probs) + modified_probs = np.copy(probs) + modified_probs[range(true_labels.size), + true_labels] = reverse_probs[range(true_labels.size), + true_labels] + modified_log_probs = np.copy(log_reverse_probs) + modified_log_probs[range(true_labels.size), + true_labels] = log_probs[range(true_labels.size), + true_labels] + return np.sum(np.multiply(modified_probs, modified_log_probs), axis=1) + + def get_loss_train(self): + """Calculates (if needed) cross-entropy losses for the training set. + + Returns: + Loss (or None if neither the loss nor the labels are present). + """ + if self.loss_train is None: + if self.labels_train is None: + return None + if self.logits_train is not None: + self.loss_train = utils.log_loss_from_logits(self.labels_train, + self.logits_train) + else: + self.loss_train = utils.log_loss(self.labels_train, self.probs_train) + return self.loss_train + + def get_loss_test(self): + """Calculates (if needed) cross-entropy losses for the test set. + + Returns: + Loss (or None if neither the loss nor the labels are present). + """ + if self.loss_test is None: + if self.labels_test is None: + return None + if self.logits_test is not None: + self.loss_test = utils.log_loss_from_logits(self.labels_test, + self.logits_test) + else: + self.loss_test = utils.log_loss(self.labels_test, self.probs_test) + return self.loss_test + + def get_entropy_train(self): + """Calculates prediction entropy for the training set.""" + if self.entropy_train is not None: + return self.entropy_train + return self._get_entropy(self.logits_train, self.labels_train) + + def get_entropy_test(self): + """Calculates prediction entropy for the test set.""" + if self.entropy_test is not None: + return self.entropy_test + return self._get_entropy(self.logits_test, self.labels_test) + + def get_train_size(self): + """Returns size of the training set.""" + if self.loss_train is not None: + return self.loss_train.size + if self.entropy_train is not None: + return self.entropy_train.size + return self.logits_or_probs_train.shape[0] + + def get_test_size(self): + """Returns size of the test set.""" + if self.loss_test is not None: + return self.loss_test.size + if self.entropy_test is not None: + return self.entropy_test.size + return self.logits_or_probs_test.shape[0] + + def validate(self): + """Validates the inputs.""" + if (self.loss_train is None) != (self.loss_test is None): + raise ValueError( + 'loss_test and loss_train should both be either set or unset') + + if (self.entropy_train is None) != (self.entropy_test is None): + raise ValueError( + 'entropy_test and entropy_train should both be either set or unset') + + if (self.logits_train is None) != (self.logits_test is None): + raise ValueError( + 'logits_train and logits_test should both be either set or unset') + + if (self.probs_train is None) != (self.probs_test is None): + raise ValueError( + 'probs_train and probs_test should both be either set or unset') + + if (self.logits_train is not None) and (self.probs_train is not None): + raise ValueError('Logits and probs can not be both set') + + if (self.labels_train is None) != (self.labels_test is None): + raise ValueError( + 'labels_train and labels_test should both be either set or unset') + + if (self.labels_train is None and self.loss_train is None and + self.logits_train is None and self.entropy_train is None): + raise ValueError( + 'At least one of labels, logits, losses or entropy should be set') + + if self.labels_train is not None and not _is_integer_type_array( + self.labels_train): + raise ValueError('labels_train elements should have integer type') + + if self.labels_test is not None and not _is_integer_type_array( + self.labels_test): + raise ValueError('labels_test elements should have integer type') + + _is_np_array(self.logits_train, 'logits_train') + _is_np_array(self.logits_test, 'logits_test') + _is_np_array(self.probs_train, 'probs_train') + _is_np_array(self.probs_test, 'probs_test') + _is_np_array(self.labels_train, 'labels_train') + _is_np_array(self.labels_test, 'labels_test') + _is_np_array(self.loss_train, 'loss_train') + _is_np_array(self.loss_test, 'loss_test') + _is_np_array(self.entropy_train, 'entropy_train') + _is_np_array(self.entropy_test, 'entropy_test') + + _is_last_dim_equal(self.logits_train, 'logits_train', self.logits_test, + 'logits_test') + _is_last_dim_equal(self.probs_train, 'probs_train', self.probs_test, + 'probs_test') + _is_array_one_dimensional(self.loss_train, 'loss_train') + _is_array_one_dimensional(self.loss_test, 'loss_test') + _is_array_one_dimensional(self.entropy_train, 'entropy_train') + _is_array_one_dimensional(self.entropy_test, 'entropy_test') + _is_array_one_dimensional(self.labels_train, 'labels_train') + _is_array_one_dimensional(self.labels_test, 'labels_test') + + def __str__(self): + """Return the shapes of variables that are not None.""" + result = ['AttackInputData('] + _append_array_shape(self.loss_train, 'loss_train', result) + _append_array_shape(self.loss_test, 'loss_test', result) + _append_array_shape(self.entropy_train, 'entropy_train', result) + _append_array_shape(self.entropy_test, 'entropy_test', result) + _append_array_shape(self.logits_train, 'logits_train', result) + _append_array_shape(self.logits_test, 'logits_test', result) + _append_array_shape(self.probs_train, 'probs_train', result) + _append_array_shape(self.probs_test, 'probs_test', result) + _append_array_shape(self.labels_train, 'labels_train', result) + _append_array_shape(self.labels_test, 'labels_test', result) + result.append(')') + return '\n'.join(result) + + +def _append_array_shape(arr: np.array, arr_name: str, result): + if arr is not None: + result.append(' %s with shape: %s,' % (arr_name, arr.shape)) + + +@dataclass +class RocCurve: + """Represents ROC curve of a membership inference classifier.""" + # Thresholds used to define points on ROC curve. + # Thresholds are not explicitly part of the curve, and are stored for + # debugging purposes. + thresholds: np.ndarray + + # True positive rates based on thresholds + tpr: np.ndarray + + # False positive rates based on thresholds + fpr: np.ndarray + + def get_auc(self): + """Calculates area under curve (aka AUC).""" + return metrics.auc(self.fpr, self.tpr) + + def get_attacker_advantage(self): + """Calculates membership attacker's (or adversary's) advantage. + + This metric is inspired by https://arxiv.org/abs/1709.01604, specifically + by Definition 4. The difference here is that we calculate maximum advantage + over all available classifier thresholds. + + Returns: + a single float number with membership attacker's advantage. + """ + return max(np.abs(self.tpr - self.fpr)) + + def __str__(self): + """Returns AUC and advantage metrics.""" + return '\n'.join([ + 'RocCurve(', + ' AUC: %.2f' % self.get_auc(), + ' Attacker advantage: %.2f' % self.get_attacker_advantage(), ')' + ]) + + +# (no. of training examples, no. of test examples) for the test. +DataSize = collections.namedtuple('DataSize', 'ntrain ntest') + + +@dataclass +class SingleAttackResult: + """Results from running a single attack.""" + + # Data slice this result was calculated for. + slice_spec: SingleSliceSpec + + # (no. of training examples, no. of test examples) for the test. + data_size: DataSize + attack_type: AttackType + + # NOTE: roc_curve could theoretically be derived from membership scores. + # Currently, we store it explicitly since not all attack types support + # membership scores. + # TODO(b/175870479): Consider deriving ROC curve from the membership scores. + + # ROC curve representing the accuracy of the attacker + roc_curve: RocCurve + + # Membership score is some measure of confidence of this attacker that + # a particular sample is a member of the training set. + # + # This is NOT necessarily probability. The nature of this score depends on + # the type of attacker. Scores from different attacker types are not directly + # comparable, but can be compared in relative terms (e.g. considering order + # imposed by this measure). + # + + # Membership scores for the training set samples. For a perfect attacker, + # all training samples will have higher scores than test samples. + membership_scores_train: np.ndarray = None + + # Membership scores for the test set samples. For a perfect attacker, all + # test set samples will have lower scores than the training set samples. + membership_scores_test: np.ndarray = None + + def get_attacker_advantage(self): + return self.roc_curve.get_attacker_advantage() + + def get_auc(self): + return self.roc_curve.get_auc() + + def __str__(self): + """Returns SliceSpec, AttackType, AUC and advantage metrics.""" + return '\n'.join([ + 'SingleAttackResult(', + ' SliceSpec: %s' % str(self.slice_spec), + ' DataSize: (ntrain=%d, ntest=%d)' % (self.data_size.ntrain, + self.data_size.ntest), + ' AttackType: %s' % str(self.attack_type), + ' AUC: %.2f' % self.get_auc(), + ' Attacker advantage: %.2f' % self.get_attacker_advantage(), ')' + ]) + + +@dataclass +class SingleMembershipProbabilityResult: + """Results from computing membership probabilities (denoted as privacy risk score in https://arxiv.org/abs/2003.10595). + + this part shows how to leverage membership probabilities to perform attacks + with thresholding on them. + """ + + # Data slice this result was calculated for. + slice_spec: SingleSliceSpec + + train_membership_probs: np.ndarray + + test_membership_probs: np.ndarray + + def attack_with_varied_thresholds(self, threshold_list): + """Performs an attack with the specified thresholds. + + For each threshold value, we count how many training and test samples with + membership probabilities larger than the threshold and further compute + precision and recall values. We skip the threshold value if it is larger + than every sample's membership probability. + + Args: + threshold_list: List of provided thresholds + + Returns: + An array of attack results. + """ + fpr, tpr, thresholds = metrics.roc_curve( + np.concatenate((np.ones(len(self.train_membership_probs)), + np.zeros(len(self.test_membership_probs)))), + np.concatenate( + (self.train_membership_probs, self.test_membership_probs)), + drop_intermediate=False) + + precision_list = [] + recall_list = [] + meaningful_threshold_list = [] + max_prob = max(self.train_membership_probs.max(), + self.test_membership_probs.max()) + for threshold in threshold_list: + if threshold <= max_prob: + idx = np.argwhere(thresholds >= threshold)[-1][0] + meaningful_threshold_list.append(threshold) + precision_list.append(tpr[idx] / (tpr[idx] + fpr[idx])) + recall_list.append(tpr[idx]) + + return np.array(meaningful_threshold_list), np.array( + precision_list), np.array(recall_list) + + def collect_results(self, threshold_list, return_roc_results=True): + """The membership probability (from 0 to 1) represents each sample's probability of being in the training set. + + Usually, we choose a list of threshold values from 0.5 (uncertain of + training or test) to 1 (100% certain of training) + to compute corresponding attack precision and recall. + + Args: + threshold_list: List of provided thresholds + return_roc_results: Whether to return ROC results + + Returns: + Summary string. + """ + meaningful_threshold_list, precision_list, recall_list = self.attack_with_varied_thresholds( + threshold_list) + summary = [] + summary.append('\nMembership probability analysis over slice: \"%s\"' % + str(self.slice_spec)) + for i in range(len(meaningful_threshold_list)): + summary.append( + ' with %.4f as the threshold on membership probability, the precision-recall pair is (%.4f, %.4f)' + % (meaningful_threshold_list[i], precision_list[i], recall_list[i])) + if return_roc_results: + fpr, tpr, thresholds = metrics.roc_curve( + np.concatenate((np.ones(len(self.train_membership_probs)), + np.zeros(len(self.test_membership_probs)))), + np.concatenate( + (self.train_membership_probs, self.test_membership_probs))) + roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) + summary.append( + ' thresholding on membership probability achieved an AUC of %.2f' % + (roc_curve.get_auc())) + summary.append( + ' thresholding on membership probability achieved an advantage of %.2f' + % (roc_curve.get_attacker_advantage())) + return summary + + +@dataclass +class MembershipProbabilityResults: + """Membership probability results from multiple data slices.""" + + membership_prob_results: Iterable[SingleMembershipProbabilityResult] + + def summary(self, threshold_list): + """Returns the summary of membership probability analyses on all slices.""" + summary = [] + for single_result in self.membership_prob_results: + single_summary = single_result.collect_results(threshold_list) + summary.extend(single_summary) + return '\n'.join(summary) + + +@dataclass +class PrivacyReportMetadata: + """Metadata about the evaluated model. + + Used to create a privacy report based on AttackResults. + """ + accuracy_train: float = None + accuracy_test: float = None + + loss_train: float = None + loss_test: float = None + + model_variant_label: str = 'Default model variant' + epoch_num: int = None + + +class AttackResultsDFColumns(enum.Enum): + """Columns for the Pandas DataFrame that stores AttackResults metrics.""" + SLICE_FEATURE = 'slice feature' + SLICE_VALUE = 'slice value' + DATA_SIZE_TRAIN = 'train size' + DATA_SIZE_TEST = 'test size' + ATTACK_TYPE = 'attack type' + + def __str__(self): + """Returns 'slice value' instead of AttackResultsDFColumns.SLICE_VALUE.""" + return '%s' % self.value + + +@dataclass +class AttackResults: + """Results from running multiple attacks.""" + single_attack_results: Iterable[SingleAttackResult] + + privacy_report_metadata: PrivacyReportMetadata = None + + def calculate_pd_dataframe(self): + """Returns all metrics as a Pandas DataFrame.""" + slice_features = [] + slice_values = [] + data_size_train = [] + data_size_test = [] + attack_types = [] + advantages = [] + aucs = [] + + for attack_result in self.single_attack_results: + slice_spec = attack_result.slice_spec + if slice_spec.entire_dataset: + slice_feature, slice_value = str(slice_spec), '' + else: + slice_feature, slice_value = slice_spec.feature.value, slice_spec.value + slice_features.append(str(slice_feature)) + slice_values.append(str(slice_value)) + data_size_train.append(attack_result.data_size.ntrain) + data_size_test.append(attack_result.data_size.ntest) + attack_types.append(str(attack_result.attack_type)) + advantages.append(float(attack_result.get_attacker_advantage())) + aucs.append(float(attack_result.get_auc())) + + df = pd.DataFrame({ + str(AttackResultsDFColumns.SLICE_FEATURE): slice_features, + str(AttackResultsDFColumns.SLICE_VALUE): slice_values, + str(AttackResultsDFColumns.DATA_SIZE_TRAIN): data_size_train, + str(AttackResultsDFColumns.DATA_SIZE_TEST): data_size_test, + str(AttackResultsDFColumns.ATTACK_TYPE): attack_types, + str(PrivacyMetric.ATTACKER_ADVANTAGE): advantages, + str(PrivacyMetric.AUC): aucs + }) + return df + + def summary(self, by_slices=False) -> str: + """Provides a summary of the metrics. + + The summary provides the best-performing attacks for each requested data + slice. + Args: + by_slices : whether to prepare a per-slice summary. + + Returns: + A string with a summary of all the metrics. + """ + summary = [] + + # Summary over all slices + max_auc_result_all = self.get_result_with_max_attacker_advantage() + summary.append('Best-performing attacks over all slices') + summary.append( + ' %s (with %d training and %d test examples) achieved an AUC of %.2f on slice %s' + % (max_auc_result_all.attack_type, + max_auc_result_all.data_size.ntrain, + max_auc_result_all.data_size.ntest, + max_auc_result_all.get_auc(), + max_auc_result_all.slice_spec)) + + max_advantage_result_all = self.get_result_with_max_attacker_advantage() + summary.append( + ' %s (with %d training and %d test examples) achieved an advantage of %.2f on slice %s' + % (max_advantage_result_all.attack_type, + max_advantage_result_all.data_size.ntrain, + max_advantage_result_all.data_size.ntest, + max_advantage_result_all.get_attacker_advantage(), + max_advantage_result_all.slice_spec)) + + slice_dict = self._group_results_by_slice() + + if by_slices and len(slice_dict.keys()) > 1: + for slice_str in slice_dict: + results = slice_dict[slice_str] + summary.append('\nBest-performing attacks over slice: \"%s\"' % + slice_str) + max_auc_result = results.get_result_with_max_auc() + summary.append( + ' %s (with %d training and %d test examples) achieved an AUC of %.2f' + % (max_auc_result.attack_type, + max_auc_result.data_size.ntrain, + max_auc_result.data_size.ntest, + max_auc_result.get_auc())) + max_advantage_result = results.get_result_with_max_attacker_advantage() + summary.append( + ' %s (with %d training and %d test examples) achieved an advantage of %.2f' + % (max_advantage_result.attack_type, + max_advantage_result.data_size.ntrain, + max_auc_result.data_size.ntest, + max_advantage_result.get_attacker_advantage())) + + return '\n'.join(summary) + + def _group_results_by_slice(self): + """Groups AttackResults into a dictionary keyed by the slice.""" + slice_dict = {} + for attack_result in self.single_attack_results: + slice_str = str(attack_result.slice_spec) + if slice_str not in slice_dict: + slice_dict[slice_str] = AttackResults([]) + slice_dict[slice_str].single_attack_results.append(attack_result) + return slice_dict + + def get_result_with_max_auc(self) -> SingleAttackResult: + """Get the result with maximum AUC for all attacks and slices.""" + aucs = [result.get_auc() for result in self.single_attack_results] + + if min(aucs) < 0.4: + print('Suspiciously low AUC detected: %.2f. ' + + 'There might be a bug in the classifier' % min(aucs)) + + return self.single_attack_results[np.argmax(aucs)] + + def get_result_with_max_attacker_advantage(self) -> SingleAttackResult: + """Get the result with maximum advantage for all attacks and slices.""" + return self.single_attack_results[np.argmax([ + result.get_attacker_advantage() for result in self.single_attack_results + ])] + + def save(self, filepath): + """Saves self to a pickle file.""" + with open(filepath, 'wb') as out: + pickle.dump(self, out) + + @classmethod + def load(cls, filepath): + """Loads AttackResults from a pickle file.""" + with open(filepath, 'rb') as inp: + return pickle.load(inp) + + +@dataclass +class AttackResultsCollection: + """A collection of AttackResults.""" + attack_results_list: Iterable[AttackResults] + + def append(self, attack_results: AttackResults): + self.attack_results_list.append(attack_results) + + def save(self, dirname): + """Saves self to a pickle file.""" + for i, attack_results in enumerate(self.attack_results_list): + filepath = os.path.join(dirname, + _get_attack_results_filename(attack_results, i)) + + attack_results.save(filepath) + + @classmethod + def load(cls, dirname): + """Loads AttackResultsCollection from all files in a directory.""" + loaded_collection = AttackResultsCollection([]) + for filepath in sorted(glob.glob('%s/*' % dirname)): + with open(filepath, 'rb') as inp: + loaded_collection.attack_results_list.append(pickle.load(inp)) + return loaded_collection + + +def _get_attack_results_filename(attack_results: AttackResults, index: int): + """Creates a filename for a specific set of AttackResults.""" + metadata = attack_results.privacy_report_metadata + if metadata is not None: + return '%s_%s_epoch_%s.pickle' % (metadata.model_variant_label, index, + metadata.epoch_num) + return '%s.pickle' % index + + +def get_flattened_attack_metrics(results: AttackResults): + """Get flattened attack metrics. + + Args: + results: membership inference attack results. + + Returns: + types: a list of attack types + slices: a list of slices + attack_metrics: a list of metric names + values: a list of metric values, i-th element correspond to properties[i] + """ + types = [] + slices = [] + attack_metrics = [] + values = [] + for attack_result in results.single_attack_results: + types += [str(attack_result.attack_type)] * 2 + slices += [str(attack_result.slice_spec)] * 2 + attack_metrics += ['adv', 'auc'] + values += [float(attack_result.get_attacker_advantage()), + float(attack_result.get_auc())] + return types, slices, attack_metrics, values diff --git a/tensorflow_privacy/privacy/membership_inference_attack/data_structures_test.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures_test.py similarity index 91% rename from tensorflow_privacy/privacy/membership_inference_attack/data_structures_test.py rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures_test.py index bde4652..eb9a669 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/data_structures_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures_test.py @@ -13,25 +13,25 @@ # limitations under the License. # Lint as: python3 -"""Tests for tensorflow_privacy.privacy.membership_inference_attack.data_structures.""" +"""Tests for tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures.""" import os import tempfile from absl.testing import absltest from absl.testing import parameterized import numpy as np import pandas as pd -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import _log_value -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResultsCollection -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import DataSize -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RocCurve -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleMembershipProbabilityResult -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import _log_value +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResultsCollection +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import DataSize +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import PrivacyReportMetadata +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import RocCurve +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SingleAttackResult +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SingleMembershipProbabilityResult +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SingleSliceSpec +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingFeature class SingleSliceSpecTest(parameterized.TestCase): diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/dataset_slicing.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/dataset_slicing.py new file mode 100644 index 0000000..db68cd0 --- /dev/null +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/dataset_slicing.py @@ -0,0 +1,148 @@ +# Copyright 2020, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Specifying and creating AttackInputData slices.""" + +import collections +import copy +from typing import List + +import numpy as np +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SingleSliceSpec +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingFeature +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec + + +def _slice_if_not_none(a, idx): + return None if a is None else a[idx] + + +def _slice_data_by_indices(data: AttackInputData, idx_train, + idx_test) -> AttackInputData: + """Slices train fields with with idx_train and test fields with and idx_test.""" + + result = AttackInputData() + + # Slice train data. + result.logits_train = _slice_if_not_none(data.logits_train, idx_train) + result.probs_train = _slice_if_not_none(data.probs_train, idx_train) + result.labels_train = _slice_if_not_none(data.labels_train, idx_train) + result.loss_train = _slice_if_not_none(data.loss_train, idx_train) + result.entropy_train = _slice_if_not_none(data.entropy_train, idx_train) + + # Slice test data. + result.logits_test = _slice_if_not_none(data.logits_test, idx_test) + result.probs_test = _slice_if_not_none(data.probs_test, idx_test) + result.labels_test = _slice_if_not_none(data.labels_test, idx_test) + result.loss_test = _slice_if_not_none(data.loss_test, idx_test) + result.entropy_test = _slice_if_not_none(data.entropy_test, idx_test) + + return result + + +def _slice_by_class(data: AttackInputData, class_value: int) -> AttackInputData: + idx_train = data.labels_train == class_value + idx_test = data.labels_test == class_value + return _slice_data_by_indices(data, idx_train, idx_test) + + +def _slice_by_percentiles(data: AttackInputData, from_percentile: float, + to_percentile: float): + """Slices samples by loss percentiles.""" + + # Find from_percentile and to_percentile percentiles in losses. + loss_train = data.get_loss_train() + loss_test = data.get_loss_test() + losses = np.concatenate((loss_train, loss_test)) + from_loss = np.percentile(losses, from_percentile) + to_loss = np.percentile(losses, to_percentile) + + idx_train = (from_loss <= loss_train) & (loss_train <= to_loss) + idx_test = (from_loss <= loss_test) & (loss_test <= to_loss) + + return _slice_data_by_indices(data, idx_train, idx_test) + + +def _indices_by_classification(logits_or_probs, labels, correctly_classified): + idx_correct = labels == np.argmax(logits_or_probs, axis=1) + return idx_correct if correctly_classified else np.invert(idx_correct) + + +def _slice_by_classification_correctness(data: AttackInputData, + correctly_classified: bool): + idx_train = _indices_by_classification(data.logits_or_probs_train, + data.labels_train, + correctly_classified) + idx_test = _indices_by_classification(data.logits_or_probs_test, + data.labels_test, correctly_classified) + return _slice_data_by_indices(data, idx_train, idx_test) + + +def get_single_slice_specs(slicing_spec: SlicingSpec, + num_classes: int = None) -> List[SingleSliceSpec]: + """Returns slices of data according to slicing_spec.""" + result = [] + + if slicing_spec.entire_dataset: + result.append(SingleSliceSpec()) + + # Create slices by class. + by_class = slicing_spec.by_class + if isinstance(by_class, bool): + if by_class: + assert num_classes, "When by_class == True, num_classes should be given." + assert 0 <= num_classes <= 1000, ( + f"Too much classes for slicing by classes. " + f"Found {num_classes}.") + for c in range(num_classes): + result.append(SingleSliceSpec(SlicingFeature.CLASS, c)) + elif isinstance(by_class, int): + result.append(SingleSliceSpec(SlicingFeature.CLASS, by_class)) + elif isinstance(by_class, collections.Iterable): + for c in by_class: + result.append(SingleSliceSpec(SlicingFeature.CLASS, c)) + + # Create slices by percentiles + if slicing_spec.by_percentiles: + for percent in range(0, 100, 10): + result.append( + SingleSliceSpec(SlicingFeature.PERCENTILE, (percent, percent + 10))) + + # Create slices by correctness of the classifications. + if slicing_spec.by_classification_correctness: + result.append(SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED, True)) + result.append(SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED, False)) + + return result + + +def get_slice(data: AttackInputData, + slice_spec: SingleSliceSpec) -> AttackInputData: + """Returns a single slice of data according to slice_spec.""" + if slice_spec.entire_dataset: + data_slice = copy.copy(data) + elif slice_spec.feature == SlicingFeature.CLASS: + data_slice = _slice_by_class(data, slice_spec.value) + elif slice_spec.feature == SlicingFeature.PERCENTILE: + from_percentile, to_percentile = slice_spec.value + data_slice = _slice_by_percentiles(data, from_percentile, to_percentile) + elif slice_spec.feature == SlicingFeature.CORRECTLY_CLASSIFIED: + data_slice = _slice_by_classification_correctness(data, slice_spec.value) + else: + raise ValueError('Unknown slice spec feature "%s"' % slice_spec.feature) + + data_slice.slice_spec = slice_spec + return data_slice diff --git a/tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing_test.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/dataset_slicing_test.py similarity index 90% rename from tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing_test.py rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/dataset_slicing_test.py index 48072e1..9777e4a 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/dataset_slicing_test.py @@ -13,17 +13,17 @@ # limitations under the License. # Lint as: python3 -"""Tests for tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing.""" +"""Tests for tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.dataset_slicing.""" from absl.testing import absltest import numpy as np -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec -from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_single_slice_specs -from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_slice +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SingleSliceSpec +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingFeature +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.dataset_slicing import get_single_slice_specs +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.dataset_slicing import get_slice def _are_all_fields_equal(lhs, rhs) -> bool: diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation.py new file mode 100644 index 0000000..052132d --- /dev/null +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation.py @@ -0,0 +1,141 @@ +# Copyright 2020, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""A callback and a function in keras for membership inference attack.""" + +import os +from typing import Iterable +from absl import logging + +import tensorflow as tf + +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import get_flattened_attack_metrics +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.utils import log_loss +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.utils_tensorboard import write_results_to_tensorboard_tf2 as write_results_to_tensorboard + + +def calculate_losses(model, data, labels): + """Calculate losses of model prediction on data, provided true labels. + + Args: + model: model to make prediction + data: samples + labels: true labels of samples (integer valued) + + Returns: + preds: probability vector of each sample + loss: cross entropy loss of each sample + """ + pred = model.predict(data) + loss = log_loss(labels, pred) + return pred, loss + + +class MembershipInferenceCallback(tf.keras.callbacks.Callback): + """Callback to perform membership inference attack on epoch end.""" + + def __init__( + self, + in_train, out_train, + slicing_spec: SlicingSpec = None, + attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,), + tensorboard_dir=None, + tensorboard_merge_classifiers=False): + """Initalizes the callback. + + Args: + in_train: (in_training samples, in_training labels) + out_train: (out_training samples, out_training labels) + slicing_spec: slicing specification of the attack + attack_types: a list of attacks, each of type AttackType + tensorboard_dir: directory for tensorboard summary + tensorboard_merge_classifiers: if true, plot different classifiers with + the same slicing_spec and metric in the same figure + """ + self._in_train_data, self._in_train_labels = in_train + self._out_train_data, self._out_train_labels = out_train + self._slicing_spec = slicing_spec + self._attack_types = attack_types + self._tensorboard_merge_classifiers = tensorboard_merge_classifiers + if tensorboard_dir: + if tensorboard_merge_classifiers: + self._writers = {} + for attack_type in attack_types: + self._writers[attack_type.name] = tf.summary.create_file_writer( + os.path.join(tensorboard_dir, 'MI', attack_type.name)) + else: + self._writers = tf.summary.create_file_writer( + os.path.join(tensorboard_dir, 'MI')) + logging.info('Will write to tensorboard.') + else: + self._writers = None + + def on_epoch_end(self, epoch, logs=None): + results = run_attack_on_keras_model( + self.model, + (self._in_train_data, self._in_train_labels), + (self._out_train_data, self._out_train_labels), + self._slicing_spec, + self._attack_types) + logging.info(results) + + att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( + results) + print('Attack result:') + print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in + zip(att_types, att_slices, att_metrics, att_values)])) + + # Write to tensorboard if tensorboard_dir is specified + if self._writers is not None: + write_results_to_tensorboard(results, self._writers, epoch, + self._tensorboard_merge_classifiers) + + +def run_attack_on_keras_model( + model, in_train, out_train, + slicing_spec: SlicingSpec = None, + attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)): + """Performs the attack on a trained model. + + Args: + model: model to be tested + in_train: a (in_training samples, in_training labels) tuple + out_train: a (out_training samples, out_training labels) tuple + slicing_spec: slicing specification of the attack + attack_types: a list of attacks, each of type AttackType + Returns: + Results of the attack + """ + in_train_data, in_train_labels = in_train + out_train_data, out_train_labels = out_train + + # Compute predictions and losses + in_train_pred, in_train_loss = calculate_losses(model, in_train_data, + in_train_labels) + out_train_pred, out_train_loss = calculate_losses(model, out_train_data, + out_train_labels) + attack_input = AttackInputData( + logits_train=in_train_pred, logits_test=out_train_pred, + labels_train=in_train_labels, labels_test=out_train_labels, + loss_train=in_train_loss, loss_test=out_train_loss + ) + results = mia.run_attacks(attack_input, + slicing_spec=slicing_spec, + attack_types=attack_types) + return results diff --git a/tensorflow_privacy/privacy/membership_inference_attack/keras_evaluation_example.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation_example.py similarity index 86% rename from tensorflow_privacy/privacy/membership_inference_attack/keras_evaluation_example.py rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation_example.py index 26862b8..d000c55 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/keras_evaluation_example.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation_example.py @@ -20,11 +20,11 @@ from absl import flags import numpy as np import tensorflow as tf -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import get_flattened_attack_metrics -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec -from tensorflow_privacy.privacy.membership_inference_attack.keras_evaluation import MembershipInferenceCallback -from tensorflow_privacy.privacy.membership_inference_attack.keras_evaluation import run_attack_on_keras_model +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import get_flattened_attack_metrics +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation import MembershipInferenceCallback +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation import run_attack_on_keras_model FLAGS = flags.FLAGS diff --git a/tensorflow_privacy/privacy/membership_inference_attack/keras_evaluation_test.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation_test.py similarity index 83% rename from tensorflow_privacy/privacy/membership_inference_attack/keras_evaluation_test.py rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation_test.py index cddaadc..f6d2747 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/keras_evaluation_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation_test.py @@ -13,17 +13,17 @@ # limitations under the License. # Lint as: python3 -"""Tests for tensorflow_privacy.privacy.membership_inference_attack.keras_evaluation.""" +"""Tests for tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.keras_evaluation.""" from absl.testing import absltest import numpy as np import tensorflow.compat.v1 as tf -from tensorflow_privacy.privacy.membership_inference_attack import keras_evaluation -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import get_flattened_attack_metrics +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import keras_evaluation +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import get_flattened_attack_metrics class UtilsTest(absltest.TestCase): diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py new file mode 100644 index 0000000..4ee7e73 --- /dev/null +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py @@ -0,0 +1,332 @@ +# Copyright 2020, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Code that runs membership inference attacks based on the model outputs. + +This file belongs to the new API for membership inference attacks. This file +will be renamed to membership_inference_attack.py after the old API is removed. +""" + +from typing import Iterable +import numpy as np +from sklearn import metrics + +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import models +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import DataSize +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import MembershipProbabilityResults +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import PrivacyReportMetadata +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import RocCurve +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SingleAttackResult +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SingleMembershipProbabilityResult +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SingleSliceSpec +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.dataset_slicing import get_single_slice_specs +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.dataset_slicing import get_slice + + +def _get_slice_spec(data: AttackInputData) -> SingleSliceSpec: + if hasattr(data, 'slice_spec'): + return data.slice_spec + return SingleSliceSpec() + + +def _run_trained_attack(attack_input: AttackInputData, + attack_type: AttackType, + balance_attacker_training: bool = True): + """Classification attack done by ML models.""" + attacker = None + + if attack_type == AttackType.LOGISTIC_REGRESSION: + attacker = models.LogisticRegressionAttacker() + elif attack_type == AttackType.MULTI_LAYERED_PERCEPTRON: + attacker = models.MultilayerPerceptronAttacker() + elif attack_type == AttackType.RANDOM_FOREST: + attacker = models.RandomForestAttacker() + elif attack_type == AttackType.K_NEAREST_NEIGHBORS: + attacker = models.KNearestNeighborsAttacker() + else: + raise NotImplementedError('Attack type %s not implemented yet.' % + attack_type) + + prepared_attacker_data = models.create_attacker_data( + attack_input, balance=balance_attacker_training) + + attacker.train_model(prepared_attacker_data.features_train, + prepared_attacker_data.is_training_labels_train) + + # Run the attacker on (permuted) test examples. + predictions_test = attacker.predict(prepared_attacker_data.features_test) + + # Generate ROC curves with predictions. + fpr, tpr, thresholds = metrics.roc_curve( + prepared_attacker_data.is_training_labels_test, predictions_test) + + roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) + + # NOTE: In the current setup we can't obtain membership scores for all + # samples, since some of them were used to train the attacker. This can be + # fixed by training several attackers to ensure each sample was left out + # in exactly one attacker (basically, this means performing cross-validation). + # TODO(b/175870479): Implement membership scores for predicted attackers. + + return SingleAttackResult( + slice_spec=_get_slice_spec(attack_input), + data_size=prepared_attacker_data.data_size, + attack_type=attack_type, + roc_curve=roc_curve) + + +def _run_threshold_attack(attack_input: AttackInputData): + """Runs a threshold attack on loss.""" + ntrain, ntest = attack_input.get_train_size(), attack_input.get_test_size() + loss_train = attack_input.get_loss_train() + loss_test = attack_input.get_loss_test() + if loss_train is None or loss_test is None: + raise ValueError('Not possible to run threshold attack without losses.') + fpr, tpr, thresholds = metrics.roc_curve( + np.concatenate((np.zeros(ntrain), np.ones(ntest))), + np.concatenate((loss_train, loss_test))) + + roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) + + return SingleAttackResult( + slice_spec=_get_slice_spec(attack_input), + data_size=DataSize(ntrain=ntrain, ntest=ntest), + attack_type=AttackType.THRESHOLD_ATTACK, + membership_scores_train=-attack_input.get_loss_train(), + membership_scores_test=-attack_input.get_loss_test(), + roc_curve=roc_curve) + + +def _run_threshold_entropy_attack(attack_input: AttackInputData): + ntrain, ntest = attack_input.get_train_size(), attack_input.get_test_size() + fpr, tpr, thresholds = metrics.roc_curve( + np.concatenate((np.zeros(ntrain), np.ones(ntest))), + np.concatenate( + (attack_input.get_entropy_train(), attack_input.get_entropy_test()))) + + roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) + + return SingleAttackResult( + slice_spec=_get_slice_spec(attack_input), + data_size=DataSize(ntrain=ntrain, ntest=ntest), + attack_type=AttackType.THRESHOLD_ENTROPY_ATTACK, + membership_scores_train=-attack_input.get_entropy_train(), + membership_scores_test=-attack_input.get_entropy_test(), + roc_curve=roc_curve) + + +def _run_attack(attack_input: AttackInputData, + attack_type: AttackType, + balance_attacker_training: bool = True, + min_num_samples: int = 1): + """Runs membership inference attacks for specified input and type. + + Args: + attack_input: input data for running an attack + attack_type: the attack to run + balance_attacker_training: Whether the training and test sets for the + membership inference attacker should have a balanced (roughly equal) + number of samples from the training and test sets used to develop + the model under attack. + min_num_samples: minimum number of examples in either training or test data. + + Returns: + the attack result. + """ + attack_input.validate() + if min(attack_input.get_train_size(), + attack_input.get_test_size()) < min_num_samples: + return None + + if attack_type.is_trained_attack: + return _run_trained_attack(attack_input, attack_type, + balance_attacker_training) + if attack_type == AttackType.THRESHOLD_ENTROPY_ATTACK: + return _run_threshold_entropy_attack(attack_input) + return _run_threshold_attack(attack_input) + + +def run_attacks(attack_input: AttackInputData, + slicing_spec: SlicingSpec = None, + attack_types: Iterable[AttackType] = ( + AttackType.THRESHOLD_ATTACK,), + privacy_report_metadata: PrivacyReportMetadata = None, + balance_attacker_training: bool = True, + min_num_samples: int = 1) -> AttackResults: + """Runs membership inference attacks on a classification model. + + It runs attacks specified by attack_types on each attack_input slice which is + specified by slicing_spec. + + Args: + attack_input: input data for running an attack + slicing_spec: specifies attack_input slices to run attack on + attack_types: attacks to run + privacy_report_metadata: the metadata of the model under attack. + balance_attacker_training: Whether the training and test sets for the + membership inference attacker should have a balanced (roughly equal) + number of samples from the training and test sets used to develop + the model under attack. + min_num_samples: minimum number of examples in either training or test data. + + Returns: + the attack result. + """ + attack_input.validate() + attack_results = [] + + if slicing_spec is None: + slicing_spec = SlicingSpec(entire_dataset=True) + num_classes = None + if slicing_spec.by_class: + num_classes = attack_input.num_classes + input_slice_specs = get_single_slice_specs(slicing_spec, num_classes) + for single_slice_spec in input_slice_specs: + attack_input_slice = get_slice(attack_input, single_slice_spec) + for attack_type in attack_types: + attack_result = _run_attack(attack_input_slice, attack_type, + balance_attacker_training, + min_num_samples) + if attack_result is not None: + attack_results.append(attack_result) + + privacy_report_metadata = _compute_missing_privacy_report_metadata( + privacy_report_metadata, attack_input) + + return AttackResults( + single_attack_results=attack_results, + privacy_report_metadata=privacy_report_metadata) + + +def _compute_membership_probability( + attack_input: AttackInputData, + num_bins: int = 15) -> SingleMembershipProbabilityResult: + """Computes each individual point's likelihood of being a member (denoted as privacy risk score in https://arxiv.org/abs/2003.10595). + + For an individual sample, its privacy risk score is computed as the posterior + probability of being in the training set + after observing its prediction output by the target machine learning model. + + Args: + attack_input: input data for compute membership probability + num_bins: the number of bins used to compute the training/test histogram + + Returns: + membership probability results + """ + + # Uses the provided loss or entropy. Otherwise computes the loss. + if attack_input.loss_train is not None and attack_input.loss_test is not None: + train_values = attack_input.loss_train + test_values = attack_input.loss_test + elif attack_input.entropy_train is not None and attack_input.entropy_test is not None: + train_values = attack_input.entropy_train + test_values = attack_input.entropy_test + else: + train_values = attack_input.get_loss_train() + test_values = attack_input.get_loss_test() + + # Compute the histogram in the log scale + small_value = 1e-10 + train_values = np.maximum(train_values, small_value) + test_values = np.maximum(test_values, small_value) + + min_value = min(train_values.min(), test_values.min()) + max_value = max(train_values.max(), test_values.max()) + bins_hist = np.logspace( + np.log10(min_value), np.log10(max_value), num_bins + 1) + + train_hist, _ = np.histogram(train_values, bins=bins_hist) + train_hist = train_hist / (len(train_values) + 0.0) + train_hist_indices = np.fmin( + np.digitize(train_values, bins=bins_hist), num_bins) - 1 + + test_hist, _ = np.histogram(test_values, bins=bins_hist) + test_hist = test_hist / (len(test_values) + 0.0) + test_hist_indices = np.fmin( + np.digitize(test_values, bins=bins_hist), num_bins) - 1 + + combined_hist = train_hist + test_hist + combined_hist[combined_hist == 0] = small_value + membership_prob_list = train_hist / (combined_hist + 0.0) + train_membership_probs = membership_prob_list[train_hist_indices] + test_membership_probs = membership_prob_list[test_hist_indices] + + return SingleMembershipProbabilityResult( + slice_spec=_get_slice_spec(attack_input), + train_membership_probs=train_membership_probs, + test_membership_probs=test_membership_probs) + + +def run_membership_probability_analysis( + attack_input: AttackInputData, + slicing_spec: SlicingSpec = None) -> MembershipProbabilityResults: + """Perform membership probability analysis on all given slice types. + + Args: + attack_input: input data for compute membership probabilities + slicing_spec: specifies attack_input slices + + Returns: + the membership probability results. + """ + attack_input.validate() + membership_prob_results = [] + + if slicing_spec is None: + slicing_spec = SlicingSpec(entire_dataset=True) + num_classes = None + if slicing_spec.by_class: + num_classes = attack_input.num_classes + input_slice_specs = get_single_slice_specs(slicing_spec, num_classes) + for single_slice_spec in input_slice_specs: + attack_input_slice = get_slice(attack_input, single_slice_spec) + membership_prob_results.append( + _compute_membership_probability(attack_input_slice)) + + return MembershipProbabilityResults( + membership_prob_results=membership_prob_results) + + +def _compute_missing_privacy_report_metadata( + metadata: PrivacyReportMetadata, + attack_input: AttackInputData) -> PrivacyReportMetadata: + """Populates metadata fields if they are missing.""" + if metadata is None: + metadata = PrivacyReportMetadata() + if metadata.accuracy_train is None: + metadata.accuracy_train = _get_accuracy(attack_input.logits_train, + attack_input.labels_train) + if metadata.accuracy_test is None: + metadata.accuracy_test = _get_accuracy(attack_input.logits_test, + attack_input.labels_test) + loss_train = attack_input.get_loss_train() + loss_test = attack_input.get_loss_test() + if metadata.loss_train is None and loss_train is not None: + metadata.loss_train = np.average(loss_train) + if metadata.loss_test is None and loss_test is not None: + metadata.loss_test = np.average(loss_test) + return metadata + + +def _get_accuracy(logits, labels): + """Computes the accuracy if it is missing.""" + if logits is None or labels is None: + return None + return metrics.accuracy_score(labels, np.argmax(logits, axis=1)) diff --git a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_test.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack_test.py similarity index 86% rename from tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_test.py rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack_test.py index 5b8d82b..f7d9401 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack_test.py @@ -13,17 +13,17 @@ # limitations under the License. # Lint as: python3 -"""Tests for tensorflow_privacy.privacy.membership_inference_attack.utils.""" +"""Tests for tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.utils.""" from absl.testing import absltest import numpy as np -from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import DataSize -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import DataSize +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SingleSliceSpec +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingFeature +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec def get_test_input(n_train, n_test): diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py new file mode 100644 index 0000000..79bb826 --- /dev/null +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py @@ -0,0 +1,210 @@ +# Copyright 2020, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Trained models for membership inference attacks.""" + +from dataclasses import dataclass +import numpy as np +from sklearn import ensemble +from sklearn import linear_model +from sklearn import model_selection +from sklearn import neighbors +from sklearn import neural_network + +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import DataSize + + +@dataclass +class AttackerData: + """Input data for an ML classifier attack. + + This includes only the data, and not configuration. + """ + + features_train: np.ndarray = None + # element-wise boolean array denoting if the example was part of training. + is_training_labels_train: np.ndarray = None + + features_test: np.ndarray = None + # element-wise boolean array denoting if the example was part of training. + is_training_labels_test: np.ndarray = None + + data_size: DataSize = None + + +def create_attacker_data(attack_input_data: AttackInputData, + test_fraction: float = 0.25, + balance: bool = True) -> AttackerData: + """Prepare AttackInputData to train ML attackers. + + Combines logits and losses and performs a random train-test split. + + Args: + attack_input_data: Original AttackInputData + test_fraction: Fraction of the dataset to include in the test split. + balance: Whether the training and test sets for the membership inference + attacker should have a balanced (roughly equal) number of samples + from the training and test sets used to develop the model + under attack. + + Returns: + AttackerData. + """ + attack_input_train = _column_stack(attack_input_data.logits_or_probs_train, + attack_input_data.get_loss_train()) + attack_input_test = _column_stack(attack_input_data.logits_or_probs_test, + attack_input_data.get_loss_test()) + + if balance: + min_size = min(attack_input_data.get_train_size(), + attack_input_data.get_test_size()) + attack_input_train = _sample_multidimensional_array(attack_input_train, + min_size) + attack_input_test = _sample_multidimensional_array(attack_input_test, + min_size) + ntrain, ntest = attack_input_train.shape[0], attack_input_test.shape[0] + + features_all = np.concatenate((attack_input_train, attack_input_test)) + + labels_all = np.concatenate(((np.zeros(ntrain)), (np.ones(ntest)))) + + # Perform a train-test split + features_train, features_test, is_training_labels_train, is_training_labels_test = model_selection.train_test_split( + features_all, labels_all, test_size=test_fraction, stratify=labels_all) + return AttackerData(features_train, is_training_labels_train, features_test, + is_training_labels_test, + DataSize(ntrain=ntrain, ntest=ntest)) + + +def _sample_multidimensional_array(array, size): + indices = np.random.choice(len(array), size, replace=False) + return array[indices] + + +def _column_stack(logits, loss): + """Stacks logits and losses. + + In case that only one exists, returns that one. + Args: + logits: logits array + loss: loss array + + Returns: + stacked logits and losses (or only one if both do not exist). + """ + if logits is None: + return np.expand_dims(loss, axis=-1) + if loss is None: + return logits + return np.column_stack((logits, loss)) + + +class TrainedAttacker: + """Base class for training attack models.""" + model = None + + def train_model(self, input_features, is_training_labels): + """Train an attacker model. + + This is trained on examples from train and test datasets. + Args: + input_features : array-like of shape (n_samples, n_features) Training + vector, where n_samples is the number of samples and n_features is the + number of features. + is_training_labels : a vector of booleans of shape (n_samples, ) + representing whether the sample is in the training set or not. + """ + raise NotImplementedError() + + def predict(self, input_features): + """Predicts whether input_features belongs to train or test. + + Args: + input_features : A vector of features with the same semantics as x_train + passed to train_model. + Returns: + An array of probabilities denoting whether the example belongs to test. + """ + if self.model is None: + raise AssertionError( + 'Model not trained yet. Please call train_model first.') + return self.model.predict_proba(input_features)[:, 1] + + +class LogisticRegressionAttacker(TrainedAttacker): + """Logistic regression attacker.""" + + def train_model(self, input_features, is_training_labels): + lr = linear_model.LogisticRegression(solver='lbfgs') + param_grid = { + 'C': np.logspace(-4, 2, 10), + } + model = model_selection.GridSearchCV( + lr, param_grid=param_grid, cv=3, n_jobs=1, verbose=0) + model.fit(input_features, is_training_labels) + self.model = model + + +class MultilayerPerceptronAttacker(TrainedAttacker): + """Multilayer perceptron attacker.""" + + def train_model(self, input_features, is_training_labels): + mlp_model = neural_network.MLPClassifier() + param_grid = { + 'hidden_layer_sizes': [(64,), (32, 32)], + 'solver': ['adam'], + 'alpha': [0.0001, 0.001, 0.01], + } + n_jobs = -1 + model = model_selection.GridSearchCV( + mlp_model, param_grid=param_grid, cv=3, n_jobs=n_jobs, verbose=0) + model.fit(input_features, is_training_labels) + self.model = model + + +class RandomForestAttacker(TrainedAttacker): + """Random forest attacker.""" + + def train_model(self, input_features, is_training_labels): + """Setup a random forest pipeline with cross-validation.""" + rf_model = ensemble.RandomForestClassifier() + + param_grid = { + 'n_estimators': [100], + 'max_features': ['auto', 'sqrt'], + 'max_depth': [5, 10, 20, None], + 'min_samples_split': [2, 5, 10], + 'min_samples_leaf': [1, 2, 4] + } + n_jobs = -1 + model = model_selection.GridSearchCV( + rf_model, param_grid=param_grid, cv=3, n_jobs=n_jobs, verbose=0) + model.fit(input_features, is_training_labels) + self.model = model + + +class KNearestNeighborsAttacker(TrainedAttacker): + """K nearest neighbor attacker.""" + + def train_model(self, input_features, is_training_labels): + knn_model = neighbors.KNeighborsClassifier() + param_grid = { + 'n_neighbors': [3, 5, 7], + } + model = model_selection.GridSearchCV( + knn_model, param_grid=param_grid, cv=3, n_jobs=1, verbose=0) + model.fit(input_features, is_training_labels) + self.model = model diff --git a/tensorflow_privacy/privacy/membership_inference_attack/models_test.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models_test.py similarity index 90% rename from tensorflow_privacy/privacy/membership_inference_attack/models_test.py rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models_test.py index c55ab98..09452e5 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/models_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models_test.py @@ -13,12 +13,12 @@ # limitations under the License. # Lint as: python3 -"""Tests for tensorflow_privacy.privacy.membership_inference_attack.data_structures.""" +"""Tests for tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures.""" from absl.testing import absltest import numpy as np -from tensorflow_privacy.privacy.membership_inference_attack import models -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import models +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData class TrainedAttackerTest(absltest.TestCase): diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/plotting.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/plotting.py new file mode 100644 index 0000000..dbdc49d --- /dev/null +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/plotting.py @@ -0,0 +1,86 @@ +# Copyright 2020, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Plotting functionality for membership inference attack analysis. + +Functions to plot ROC curves and histograms as well as functionality to store +figures to colossus. +""" + +from typing import Text, Iterable, Optional + +import matplotlib.pyplot as plt +import numpy as np +from sklearn import metrics + + +def save_plot(figure: plt.Figure, path: Text, outformat='png'): + """Store a figure to disk.""" + if path is not None: + with open(path, 'wb') as f: + figure.savefig(f, bbox_inches='tight', format=outformat) + plt.close(figure) + + +def plot_curve_with_area(x: Iterable[float], + y: Iterable[float], + xlabel: Text = 'x', + ylabel: Text = 'y') -> plt.Figure: + """Plot the curve defined by inputs and the area under the curve. + + All entries of x and y are required to lie between 0 and 1. + For example, x could be recall and y precision, or x is fpr and y is tpr. + + Args: + x: Values on x-axis (1d) + y: Values on y-axis (must be same length as x) + xlabel: Label for x axis + ylabel: Label for y axis + + Returns: + The matplotlib figure handle + """ + fig = plt.figure() + plt.plot([0, 1], [0, 1], 'k', lw=1.0) + plt.plot(x, y, lw=2, label=f'AUC: {metrics.auc(x, y):.3f}') + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.legend() + return fig + + +def plot_histograms(train: Iterable[float], + test: Iterable[float], + xlabel: Text = 'x', + thresh: Optional[float] = None) -> plt.Figure: + """Plot histograms of training versus test metrics.""" + xmin = min(np.min(train), np.min(test)) + xmax = max(np.max(train), np.max(test)) + bins = np.linspace(xmin, xmax, 100) + fig = plt.figure() + plt.hist(test, bins=bins, density=True, alpha=0.5, label='test', log='y') + plt.hist(train, bins=bins, density=True, alpha=0.5, label='train', log='y') + if thresh is not None: + plt.axvline(thresh, c='r', label=f'threshold = {thresh:.3f}') + plt.xlabel(xlabel) + plt.ylabel('normalized counts (density)') + plt.legend() + return fig + + +def plot_roc_curve(roc_curve) -> plt.Figure: + """Plot the ROC curve and the area under the curve.""" + return plot_curve_with_area( + roc_curve.fpr, roc_curve.tpr, xlabel='FPR', ylabel='TPR') diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/privacy_report.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/privacy_report.py new file mode 100644 index 0000000..6fd09db --- /dev/null +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/privacy_report.py @@ -0,0 +1,138 @@ +# Copyright 2020, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Plotting code for ML Privacy Reports.""" +from typing import Iterable +import matplotlib.pyplot as plt +import pandas as pd + +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResultsCollection +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResultsDFColumns +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import ENTIRE_DATASET_SLICE_STR +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import PrivacyMetric + +# Helper constants for DataFrame keys. +LEGEND_LABEL_STR = 'legend label' +EPOCH_STR = 'Epoch' +TRAIN_ACCURACY_STR = 'Train accuracy' + + +def plot_by_epochs(results: AttackResultsCollection, + privacy_metrics: Iterable[PrivacyMetric]) -> plt.Figure: + """Plots privacy vulnerabilities vs epoch numbers. + + In case multiple privacy metrics are specified, the plot will feature + multiple subplots (one subplot per metrics). Multiple model variants + are supported. + Args: + results: AttackResults for the plot + privacy_metrics: List of enumerated privacy metrics that should be plotted. + + Returns: + A pyplot figure with privacy vs accuracy plots. + """ + + _validate_results(results.attack_results_list) + all_results_df = _calculate_combined_df_with_metadata( + results.attack_results_list) + return _generate_subplots( + all_results_df=all_results_df, + x_axis_metric='Epoch', + figure_title='Vulnerability per Epoch', + privacy_metrics=privacy_metrics) + + +def plot_privacy_vs_accuracy(results: AttackResultsCollection, + privacy_metrics: Iterable[PrivacyMetric]): + """Plots privacy vulnerabilities vs accuracy plots. + + In case multiple privacy metrics are specified, the plot will feature + multiple subplots (one subplot per metrics). Multiple model variants + are supported. + Args: + results: AttackResults for the plot + privacy_metrics: List of enumerated privacy metrics that should be plotted. + + Returns: + A pyplot figure with privacy vs accuracy plots. + + """ + _validate_results(results.attack_results_list) + all_results_df = _calculate_combined_df_with_metadata( + results.attack_results_list) + return _generate_subplots( + all_results_df=all_results_df, + x_axis_metric='Train accuracy', + figure_title='Privacy vs Utility Analysis', + privacy_metrics=privacy_metrics) + + +def _calculate_combined_df_with_metadata(results: Iterable[AttackResults]): + """Adds metadata to the dataframe and concats them together.""" + all_results_df = None + for attack_results in results: + attack_results_df = attack_results.calculate_pd_dataframe() + attack_results_df = attack_results_df.loc[attack_results_df[str( + AttackResultsDFColumns.SLICE_FEATURE)] == ENTIRE_DATASET_SLICE_STR] + attack_results_df.insert(0, EPOCH_STR, + attack_results.privacy_report_metadata.epoch_num) + attack_results_df.insert( + 0, TRAIN_ACCURACY_STR, + attack_results.privacy_report_metadata.accuracy_train) + attack_results_df.insert( + 0, LEGEND_LABEL_STR, + attack_results.privacy_report_metadata.model_variant_label + ' - ' + + attack_results_df[str(AttackResultsDFColumns.ATTACK_TYPE)]) + if all_results_df is None: + all_results_df = attack_results_df + else: + all_results_df = pd.concat([all_results_df, attack_results_df], + ignore_index=True) + return all_results_df + + +def _generate_subplots(all_results_df: pd.DataFrame, x_axis_metric: str, + figure_title: str, + privacy_metrics: Iterable[PrivacyMetric]): + """Create one subplot per privacy metric for a specified x_axis_metric.""" + fig, axes = plt.subplots( + 1, len(privacy_metrics), figsize=(5 * len(privacy_metrics) + 3, 5)) + # Set a title for the entire group of subplots. + fig.suptitle(figure_title) + if len(privacy_metrics) == 1: + axes = (axes,) + for i, privacy_metric in enumerate(privacy_metrics): + legend_labels = all_results_df[LEGEND_LABEL_STR].unique() + for legend_label in legend_labels: + single_label_results = all_results_df.loc[all_results_df[LEGEND_LABEL_STR] + == legend_label] + sorted_label_results = single_label_results.sort_values(x_axis_metric) + axes[i].plot(sorted_label_results[x_axis_metric], + sorted_label_results[str(privacy_metric)]) + axes[i].set_xlabel(x_axis_metric) + axes[i].set_title('%s for %s' % (privacy_metric, ENTIRE_DATASET_SLICE_STR)) + plt.legend(legend_labels, loc='upper left', bbox_to_anchor=(1.02, 1)) + fig.tight_layout(rect=[0, 0, 1, 0.93]) # Leave space for suptitle. + + return fig + + +def _validate_results(results: Iterable[AttackResults]): + for attack_results in results: + if not attack_results or not attack_results.privacy_report_metadata: + raise ValueError('Privacy metadata is not defined.') + if attack_results.privacy_report_metadata.epoch_num is None: + raise ValueError('epoch_num in metadata is not defined.') diff --git a/tensorflow_privacy/privacy/membership_inference_attack/privacy_report_test.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/privacy_report_test.py similarity index 89% rename from tensorflow_privacy/privacy/membership_inference_attack/privacy_report_test.py rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/privacy_report_test.py index 5b6bacc..ac5d904 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/privacy_report_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/privacy_report_test.py @@ -13,21 +13,20 @@ # limitations under the License. # Lint as: python3 -"""Tests for tensorflow_privacy.privacy.membership_inference_attack.privacy_report.""" +"""Tests for tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.privacy_report.""" from absl.testing import absltest import numpy as np -from tensorflow_privacy.privacy.membership_inference_attack import privacy_report +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import privacy_report -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResultsCollection -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import DataSize -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import \ - PrivacyReportMetadata -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RocCurve -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResultsCollection +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import DataSize +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import PrivacyReportMetadata +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import RocCurve +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SingleAttackResult +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SingleSliceSpec class PrivacyReportTest(absltest.TestCase): diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/seq2seq_mia.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/seq2seq_mia.py new file mode 100644 index 0000000..73827b6 --- /dev/null +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/seq2seq_mia.py @@ -0,0 +1,373 @@ +# Copyright 2020, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Code for membership inference attacks on seq2seq models. + +Contains seq2seq specific logic for attack data structures, attack data +generation, +and the logistic regression membership inference attack. +""" +from typing import Iterator, List + +from dataclasses import dataclass +import numpy as np +from scipy.stats import rankdata +from sklearn import metrics +from sklearn import model_selection +import tensorflow as tf + +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import models +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import DataSize +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import PrivacyReportMetadata +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import RocCurve +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SingleAttackResult +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SingleSliceSpec +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.models import _sample_multidimensional_array +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.models import AttackerData + + +def _is_iterator(obj, obj_name): + """Checks whether obj is a generator.""" + if obj is not None and not isinstance(obj, Iterator): + raise ValueError('%s should be a generator.' % obj_name) + + +@dataclass +class Seq2SeqAttackInputData: + """Input data for running an attack on seq2seq models. + + This includes only the data, and not configuration. + """ + logits_train: Iterator[np.ndarray] = None + logits_test: Iterator[np.ndarray] = None + + # Contains ground-truth token indices for the target sequences. + labels_train: Iterator[np.ndarray] = None + labels_test: Iterator[np.ndarray] = None + + # Size of the target sequence vocabulary. + vocab_size: int = None + + # Train, test size = number of batches in training, test set. + # These values need to be supplied by the user as logits, labels + # are lazy loaded for seq2seq models. + train_size: int = 0 + test_size: int = 0 + + def validate(self): + """Validates the inputs.""" + + if (self.logits_train is None) != (self.logits_test is None): + raise ValueError( + 'logits_train and logits_test should both be either set or unset') + + if (self.labels_train is None) != (self.labels_test is None): + raise ValueError( + 'labels_train and labels_test should both be either set or unset') + + if self.logits_train is None or self.labels_train is None: + raise ValueError( + 'Labels, logits of training, test sets should all be set') + + if (self.vocab_size is None or self.train_size is None or + self.test_size is None): + raise ValueError('vocab_size, train_size, test_size should all be set') + + if self.vocab_size is not None and not int: + raise ValueError('vocab_size should be of integer type') + + if self.train_size is not None and not int: + raise ValueError('train_size should be of integer type') + + if self.test_size is not None and not int: + raise ValueError('test_size should be of integer type') + + _is_iterator(self.logits_train, 'logits_train') + _is_iterator(self.logits_test, 'logits_test') + _is_iterator(self.labels_train, 'labels_train') + _is_iterator(self.labels_test, 'labels_test') + + def __str__(self): + """Returns the shapes of variables that are not None.""" + result = ['AttackInputData('] + + if self.vocab_size is not None and self.train_size is not None: + result.append( + 'logits_train with shape (%d, num_sequences, num_tokens, %d)' % + (self.train_size, self.vocab_size)) + result.append( + 'labels_train with shape (%d, num_sequences, num_tokens, 1)' % + self.train_size) + + if self.vocab_size is not None and self.test_size is not None: + result.append( + 'logits_test with shape (%d, num_sequences, num_tokens, %d)' % + (self.test_size, self.vocab_size)) + result.append( + 'labels_test with shape (%d, num_sequences, num_tokens, 1)' % + self.test_size) + + result.append(')') + return '\n'.join(result) + + +def _get_attack_features_and_metadata( + logits: Iterator[np.ndarray], + labels: Iterator[np.ndarray]) -> (np.ndarray, float, float): + """Returns the average rank of tokens per batch of sequences and the loss. + + Args: + logits: Logits returned by a seq2seq model, dim = (num_batches, + num_sequences, num_tokens, vocab_size). + labels: Target labels for the seq2seq model, dim = (num_batches, + num_sequences, num_tokens, 1). + + Returns: + 1. An array of average ranks, dim = (num_batches, 1). + Each average rank is calculated over ranks of tokens in sequences of a + particular batch. + 2. Loss computed over all logits and labels. + 3. Accuracy computed over all logits and labels. + """ + ranks = [] + loss = 0.0 + dataset_length = 0.0 + correct_preds = 0 + total_preds = 0 + for batch_logits, batch_labels in zip(logits, labels): + # Compute average rank for the current batch. + batch_ranks = _get_batch_ranks(batch_logits, batch_labels) + ranks.append(np.mean(batch_ranks)) + + # Update overall loss metrics with metrics of the current batch. + batch_loss, batch_length = _get_batch_loss_metrics(batch_logits, + batch_labels) + loss += batch_loss + dataset_length += batch_length + + # Update overall accuracy metrics with metrics of the current batch. + batch_correct_preds, batch_total_preds = _get_batch_accuracy_metrics( + batch_logits, batch_labels) + correct_preds += batch_correct_preds + total_preds += batch_total_preds + + # Compute loss and accuracy for the dataset. + loss = loss / dataset_length + accuracy = correct_preds / total_preds + + return np.array(ranks), loss, accuracy + + +def _get_batch_ranks(batch_logits: np.ndarray, + batch_labels: np.ndarray) -> np.ndarray: + """Returns the ranks of tokens in a batch of sequences. + + Args: + batch_logits: Logits returned by a seq2seq model, dim = (num_sequences, + num_tokens, vocab_size). + batch_labels: Target labels for the seq2seq model, dim = (num_sequences, + num_tokens, 1). + + Returns: + An array of ranks of tokens in a batch of sequences, dim = (num_sequences, + num_tokens, 1) + """ + batch_ranks = [] + for sequence_logits, sequence_labels in zip(batch_logits, batch_labels): + batch_ranks += _get_ranks_for_sequence(sequence_logits, sequence_labels) + + return np.array(batch_ranks) + + +def _get_ranks_for_sequence(logits: np.ndarray, + labels: np.ndarray) -> List[float]: + """Returns ranks for a sequence. + + Args: + logits: Logits of a single sequence, dim = (num_tokens, vocab_size). + labels: Target labels of a single sequence, dim = (num_tokens, 1). + + Returns: + An array of ranks for tokens in the sequence, dim = (num_tokens, 1). + """ + sequence_ranks = [] + for logit, label in zip(logits, labels.astype(int)): + rank = rankdata(-logit, method='min')[label] - 1.0 + sequence_ranks.append(rank) + + return sequence_ranks + + +def _get_batch_loss_metrics(batch_logits: np.ndarray, + batch_labels: np.ndarray) -> (float, int): + """Returns the loss, number of sequences for a batch. + + Args: + batch_logits: Logits returned by a seq2seq model, dim = (num_sequences, + num_tokens, vocab_size). + batch_labels: Target labels for the seq2seq model, dim = (num_sequences, + num_tokens, 1). + """ + batch_loss = 0.0 + batch_length = len(batch_logits) + for sequence_logits, sequence_labels in zip(batch_logits, batch_labels): + sequence_loss = tf.losses.sparse_categorical_crossentropy( + tf.keras.backend.constant(sequence_labels), + tf.keras.backend.constant(sequence_logits), + from_logits=True) + batch_loss += sequence_loss.numpy().sum() + + return batch_loss / batch_length, batch_length + + +def _get_batch_accuracy_metrics(batch_logits: np.ndarray, + batch_labels: np.ndarray) -> (float, float): + """Returns the number of correct predictions, total number of predictions for a batch. + + Args: + batch_logits: Logits returned by a seq2seq model, dim = (num_sequences, + num_tokens, vocab_size). + batch_labels: Target labels for the seq2seq model, dim = (num_sequences, + num_tokens, 1). + """ + batch_correct_preds = 0.0 + batch_total_preds = 0.0 + for sequence_logits, sequence_labels in zip(batch_logits, batch_labels): + preds = tf.metrics.sparse_categorical_accuracy( + tf.keras.backend.constant(sequence_labels), + tf.keras.backend.constant(sequence_logits)) + batch_correct_preds += preds.numpy().sum() + batch_total_preds += len(sequence_labels) + + return batch_correct_preds, batch_total_preds + + +def create_seq2seq_attacker_data( + attack_input_data: Seq2SeqAttackInputData, + test_fraction: float = 0.25, + balance: bool = True, + privacy_report_metadata: PrivacyReportMetadata = PrivacyReportMetadata() +) -> AttackerData: + """Prepares Seq2SeqAttackInputData to train ML attackers. + + Uses logits and losses to generate ranks and performs a random train-test + split. + + Also computes metadata (loss, accuracy) for the model under attack + and populates respective fields of PrivacyReportMetadata. + + Args: + attack_input_data: Original Seq2SeqAttackInputData + test_fraction: Fraction of the dataset to include in the test split. + balance: Whether the training and test sets for the membership inference + attacker should have a balanced (roughly equal) number of samples from the + training and test sets used to develop the model under attack. + privacy_report_metadata: the metadata of the model under attack. + + Returns: + AttackerData. + """ + attack_input_train, loss_train, accuracy_train = _get_attack_features_and_metadata( + attack_input_data.logits_train, attack_input_data.labels_train) + attack_input_test, loss_test, accuracy_test = _get_attack_features_and_metadata( + attack_input_data.logits_test, attack_input_data.labels_test) + + if balance: + min_size = min(len(attack_input_train), len(attack_input_test)) + attack_input_train = _sample_multidimensional_array(attack_input_train, + min_size) + attack_input_test = _sample_multidimensional_array(attack_input_test, + min_size) + + features_all = np.concatenate((attack_input_train, attack_input_test)) + ntrain, ntest = attack_input_train.shape[0], attack_input_test.shape[0] + + # Reshape for classifying one-dimensional features + features_all = features_all.reshape(-1, 1) + + labels_all = np.concatenate(((np.zeros(ntrain)), (np.ones(ntest)))) + + # Perform a train-test split + features_train, features_test, \ + is_training_labels_train, is_training_labels_test = \ + model_selection.train_test_split( + features_all, labels_all, test_size=test_fraction, stratify=labels_all) + + # Populate accuracy, loss fields in privacy report metadata + privacy_report_metadata.loss_train = loss_train + privacy_report_metadata.loss_test = loss_test + privacy_report_metadata.accuracy_train = accuracy_train + privacy_report_metadata.accuracy_test = accuracy_test + + return AttackerData(features_train, is_training_labels_train, features_test, + is_training_labels_test, + DataSize(ntrain=ntrain, ntest=ntest)) + + +def run_seq2seq_attack(attack_input: Seq2SeqAttackInputData, + privacy_report_metadata: PrivacyReportMetadata = None, + balance_attacker_training: bool = True) -> AttackResults: + """Runs membership inference attacks on a seq2seq model. + + Args: + attack_input: input data for running an attack + privacy_report_metadata: the metadata of the model under attack. + balance_attacker_training: Whether the training and test sets for the + membership inference attacker should have a balanced (roughly equal) + number of samples from the training and test sets used to develop the + model under attack. + + Returns: + the attack result. + """ + attack_input.validate() + + # The attacker uses the average rank (a single number) of a seq2seq dataset + # record to determine membership. So only Logistic Regression is supported, + # as it makes the most sense for single-number features. + attacker = models.LogisticRegressionAttacker() + + # Create attacker data and populate fields of privacy_report_metadata + privacy_report_metadata = privacy_report_metadata or PrivacyReportMetadata() + prepared_attacker_data = create_seq2seq_attacker_data( + attack_input_data=attack_input, + balance=balance_attacker_training, + privacy_report_metadata=privacy_report_metadata) + + attacker.train_model(prepared_attacker_data.features_train, + prepared_attacker_data.is_training_labels_train) + + # Run the attacker on (permuted) test examples. + predictions_test = attacker.predict(prepared_attacker_data.features_test) + + # Generate ROC curves with predictions. + fpr, tpr, thresholds = metrics.roc_curve( + prepared_attacker_data.is_training_labels_test, predictions_test) + + roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) + + attack_results = [ + SingleAttackResult( + slice_spec=SingleSliceSpec(), + attack_type=AttackType.LOGISTIC_REGRESSION, + roc_curve=roc_curve, + data_size=prepared_attacker_data.data_size) + ] + + return AttackResults( + single_attack_results=attack_results, + privacy_report_metadata=privacy_report_metadata) diff --git a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/seq2seq_mia_test.py similarity index 95% rename from tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/seq2seq_mia_test.py index e0aaf96..b21f8c4 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/seq2seq_mia_test.py @@ -13,15 +13,15 @@ # limitations under the License. # Lint as: python3 -"""Tests for tensorflow_privacy.privacy.membership_inference_attack.seq2seq_mia.""" +"""Tests for tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.seq2seq_mia.""" from absl.testing import absltest import numpy as np -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata -from tensorflow_privacy.privacy.membership_inference_attack.seq2seq_mia import create_seq2seq_attacker_data -from tensorflow_privacy.privacy.membership_inference_attack.seq2seq_mia import run_seq2seq_attack -from tensorflow_privacy.privacy.membership_inference_attack.seq2seq_mia import Seq2SeqAttackInputData +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import PrivacyReportMetadata +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.seq2seq_mia import create_seq2seq_attacker_data +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.seq2seq_mia import run_seq2seq_attack +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.seq2seq_mia import Seq2SeqAttackInputData class Seq2SeqAttackInputDataTest(absltest.TestCase): diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation.py new file mode 100644 index 0000000..ff76c4b --- /dev/null +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation.py @@ -0,0 +1,199 @@ +# Copyright 2020, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""A hook and a function in tf estimator for membership inference attack.""" + +import os +from typing import Iterable +from absl import logging +import numpy as np +import tensorflow.compat.v1 as tf +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import get_flattened_attack_metrics +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.utils import log_loss +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.utils_tensorboard import write_results_to_tensorboard + + +def calculate_losses(estimator, input_fn, labels): + """Get predictions and losses for samples. + + The assumptions are 1) the loss is cross-entropy loss, and 2) user have + specified prediction mode to return predictions, e.g., + when mode == tf.estimator.ModeKeys.PREDICT, the model function returns + tf.estimator.EstimatorSpec(mode=mode, predictions=tf.nn.softmax(logits)). + + Args: + estimator: model to make prediction + input_fn: input function to be used in estimator.predict + labels: array of size (n_samples, ), true labels of samples (integer valued) + + Returns: + preds: probability vector of each sample + loss: cross entropy loss of each sample + """ + pred = np.array(list(estimator.predict(input_fn=input_fn))) + loss = log_loss(labels, pred) + return pred, loss + + +class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook): + """Training hook to perform membership inference attack on epoch end.""" + + def __init__( + self, + estimator, + in_train, out_train, + input_fn_constructor, + slicing_spec: SlicingSpec = None, + attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,), + tensorboard_dir=None, + tensorboard_merge_classifiers=False): + """Initialize the hook. + + Args: + estimator: model to be tested + in_train: (in_training samples, in_training labels) + out_train: (out_training samples, out_training labels) + input_fn_constructor: a function that receives sample, label and construct + the input_fn for model prediction + slicing_spec: slicing specification of the attack + attack_types: a list of attacks, each of type AttackType + tensorboard_dir: directory for tensorboard summary + tensorboard_merge_classifiers: if true, plot different classifiers with + the same slicing_spec and metric in the same figure + """ + in_train_data, self._in_train_labels = in_train + out_train_data, self._out_train_labels = out_train + + # Define the input functions for both in and out-training samples. + self._in_train_input_fn = input_fn_constructor(in_train_data, + self._in_train_labels) + self._out_train_input_fn = input_fn_constructor(out_train_data, + self._out_train_labels) + self._estimator = estimator + self._slicing_spec = slicing_spec + self._attack_types = attack_types + self._tensorboard_merge_classifiers = tensorboard_merge_classifiers + if tensorboard_dir: + if tensorboard_merge_classifiers: + self._writers = {} + with tf.Graph().as_default(): + for attack_type in attack_types: + self._writers[attack_type.name] = tf.summary.FileWriter( + os.path.join(tensorboard_dir, 'MI', attack_type.name)) + else: + with tf.Graph().as_default(): + self._writers = tf.summary.FileWriter( + os.path.join(tensorboard_dir, 'MI')) + logging.info('Will write to tensorboard.') + else: + self._writers = None + + def end(self, session): + results = run_attack_helper(self._estimator, + self._in_train_input_fn, + self._out_train_input_fn, + self._in_train_labels, self._out_train_labels, + self._slicing_spec, + self._attack_types) + logging.info(results) + + att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( + results) + print('Attack result:') + print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in + zip(att_types, att_slices, att_metrics, att_values)])) + + # Write to tensorboard if tensorboard_dir is specified + global_step = self._estimator.get_variable_value('global_step') + if self._writers is not None: + write_results_to_tensorboard(results, self._writers, global_step, + self._tensorboard_merge_classifiers) + + +def run_attack_on_tf_estimator_model( + estimator, in_train, out_train, + input_fn_constructor, + slicing_spec: SlicingSpec = None, + attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)): + """Performs the attack in the end of training. + + Args: + estimator: model to be tested + in_train: (in_training samples, in_training labels) + out_train: (out_training samples, out_training labels) + input_fn_constructor: a function that receives sample, label and construct + the input_fn for model prediction + slicing_spec: slicing specification of the attack + attack_types: a list of attacks, each of type AttackType + Returns: + Results of the attack + """ + in_train_data, in_train_labels = in_train + out_train_data, out_train_labels = out_train + + # Define the input functions for both in and out-training samples. + in_train_input_fn = input_fn_constructor(in_train_data, in_train_labels) + out_train_input_fn = input_fn_constructor(out_train_data, out_train_labels) + + # Call the helper to run the attack. + results = run_attack_helper(estimator, + in_train_input_fn, out_train_input_fn, + in_train_labels, out_train_labels, + slicing_spec, + attack_types) + logging.info('End of training attack:') + logging.info(results) + return results + + +def run_attack_helper( + estimator, + in_train_input_fn, out_train_input_fn, + in_train_labels, out_train_labels, + slicing_spec: SlicingSpec = None, + attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)): + """A helper function to perform attack. + + Args: + estimator: model to be tested + in_train_input_fn: input_fn for in training data + out_train_input_fn: input_fn for out of training data + in_train_labels: in training labels + out_train_labels: out of training labels + slicing_spec: slicing specification of the attack + attack_types: a list of attacks, each of type AttackType + Returns: + Results of the attack + """ + # Compute predictions and losses + in_train_pred, in_train_loss = calculate_losses(estimator, + in_train_input_fn, + in_train_labels) + out_train_pred, out_train_loss = calculate_losses(estimator, + out_train_input_fn, + out_train_labels) + attack_input = AttackInputData( + logits_train=in_train_pred, logits_test=out_train_pred, + labels_train=in_train_labels, labels_test=out_train_labels, + loss_train=in_train_loss, loss_test=out_train_loss + ) + results = mia.run_attacks(attack_input, + slicing_spec=slicing_spec, + attack_types=attack_types) + return results diff --git a/tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation_example.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation_example.py similarity index 90% rename from tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation_example.py rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation_example.py index 6f97aa1..f1bc185 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation_example.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation_example.py @@ -21,11 +21,11 @@ from absl import logging import numpy as np import tensorflow.compat.v1 as tf -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import get_flattened_attack_metrics -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec -from tensorflow_privacy.privacy.membership_inference_attack.tf_estimator_evaluation import MembershipInferenceTrainingHook -from tensorflow_privacy.privacy.membership_inference_attack.tf_estimator_evaluation import run_attack_on_tf_estimator_model +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import get_flattened_attack_metrics +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation import MembershipInferenceTrainingHook +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation import run_attack_on_tf_estimator_model FLAGS = flags.FLAGS diff --git a/tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation_test.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation_test.py similarity index 88% rename from tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation_test.py rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation_test.py index 44c618f..7ae11e5 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation_test.py @@ -13,17 +13,17 @@ # limitations under the License. # Lint as: python3 -"""Tests for tensorflow_privacy.privacy.membership_inference_attack.tf_estimator_evaluation.""" +"""Tests for tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.tf_estimator_evaluation.""" from absl.testing import absltest import numpy as np import tensorflow.compat.v1 as tf -from tensorflow_privacy.privacy.membership_inference_attack import tf_estimator_evaluation -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import get_flattened_attack_metrics +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import tf_estimator_evaluation +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import get_flattened_attack_metrics class UtilsTest(absltest.TestCase): diff --git a/tensorflow_privacy/privacy/membership_inference_attack/utils.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils.py similarity index 100% rename from tensorflow_privacy/privacy/membership_inference_attack/utils.py rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils.py diff --git a/tensorflow_privacy/privacy/membership_inference_attack/utils_tensorboard.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils_tensorboard.py similarity index 95% rename from tensorflow_privacy/privacy/membership_inference_attack/utils_tensorboard.py rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils_tensorboard.py index adedf33..a9b6cce 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/utils_tensorboard.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils_tensorboard.py @@ -19,8 +19,8 @@ from typing import Union import tensorflow as tf2 import tensorflow.compat.v1 as tf1 -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import get_flattened_attack_metrics +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import get_flattened_attack_metrics def write_to_tensorboard(writers, tags, values, step): diff --git a/tensorflow_privacy/privacy/membership_inference_attack/utils_test.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils_test.py similarity index 94% rename from tensorflow_privacy/privacy/membership_inference_attack/utils_test.py rename to tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils_test.py index 9384382..994b44f 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/utils_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils_test.py @@ -13,12 +13,12 @@ # limitations under the License. # Lint as: python3 -"""Tests for tensorflow_privacy.privacy.membership_inference_attack.utils.""" +"""Tests for tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.utils.""" from absl.testing import absltest import numpy as np -from tensorflow_privacy.privacy.membership_inference_attack import utils +from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import utils class UtilsTest(absltest.TestCase):