From 46bee91cda7e68f88a6336d9f299641eaba818f9 Mon Sep 17 00:00:00 2001 From: amad-person Date: Tue, 24 Nov 2020 14:52:12 +0800 Subject: [PATCH 1/5] Refactor seq2seq logic and tests into separate files --- ...seq2seq_membership_inference_codelab.ipynb | 6 +- .../data_structures.py | 87 +---- .../data_structures_test.py | 70 ---- .../membership_inference_attack.py | 49 --- .../membership_inference_attack_test.py | 100 +----- .../membership_inference_attack/models.py | 96 ------ .../models_test.py | 123 ------- .../seq2seq_mia.py | 257 ++++++++++++++ .../seq2seq_mia_test.py | 320 ++++++++++++++++++ 9 files changed, 582 insertions(+), 526 deletions(-) create mode 100644 tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py create mode 100644 tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py diff --git a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb b/tensorflow_privacy/privacy/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb index 108f89d..4045e77 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb +++ b/tensorflow_privacy/privacy/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb @@ -1142,8 +1142,8 @@ } ], "source": [ - "from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import Seq2SeqAttackInputData\n", + "from tensorflow_privacy.privacy.membership_inference_attack.seq2seq_mia import Seq2SeqAttackInputData, \\\n", + " run_seq2seq_attack\n", "import tensorflow_privacy.privacy.membership_inference_attack.plotting as plotting\n", "\n", "attack_input = Seq2SeqAttackInputData(\n", @@ -1157,7 +1157,7 @@ ")\n", "\n", "# Run several attacks for different data slices\n", - "attack_result = mia.run_seq2seq_attack(attack_input)\n", + "attack_result = run_seq2seq_attack(attack_input)\n", "\n", "# Plot the ROC curve of the best classifier\n", "fig = plotting.plot_roc_curve(attack_result.get_result_with_max_auc().roc_curve)\n", diff --git a/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py b/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py index ba58990..1140611 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py @@ -18,7 +18,7 @@ import enum import glob import os import pickle -from typing import Any, Iterable, Union, Iterator +from typing import Any, Iterable, Union from dataclasses import dataclass import numpy as np @@ -378,91 +378,6 @@ def _append_array_shape(arr: np.array, arr_name: str, result): result.append(' %s with shape: %s,' % (arr_name, arr.shape)) -def _is_iterator(obj, obj_name): - """Checks whether obj is a generator.""" - if obj is not None and not isinstance(obj, Iterator): - raise ValueError('%s should be a generator.' % obj_name) - - -@dataclass -class Seq2SeqAttackInputData: - """Input data for running an attack on seq2seq models. - - This includes only the data, and not configuration. - """ - logits_train: Iterator[np.ndarray] = None - logits_test: Iterator[np.ndarray] = None - - # Contains ground-truth token indices for the target sequences. - labels_train: Iterator[np.ndarray] = None - labels_test: Iterator[np.ndarray] = None - - # Size of the target sequence vocabulary. - vocab_size: int = None - - # Train, test size = number of batches in training, test set. - # These values need to be supplied by the user as logits, labels - # are lazy loaded for seq2seq models. - train_size: int = 0 - test_size: int = 0 - - def validate(self): - """Validates the inputs.""" - - if (self.logits_train is None) != (self.logits_test is None): - raise ValueError( - 'logits_train and logits_test should both be either set or unset') - - if (self.labels_train is None) != (self.labels_test is None): - raise ValueError( - 'labels_train and labels_test should both be either set or unset') - - if self.logits_train is None or self.labels_train is None: - raise ValueError( - 'Labels, logits of training, test sets should all be set') - - if (self.vocab_size is None or self.train_size is None or - self.test_size is None): - raise ValueError('vocab_size, train_size, test_size should all be set') - - if self.vocab_size is not None and not int: - raise ValueError('vocab_size should be of integer type') - - if self.train_size is not None and not int: - raise ValueError('train_size should be of integer type') - - if self.test_size is not None and not int: - raise ValueError('test_size should be of integer type') - - _is_iterator(self.logits_train, 'logits_train') - _is_iterator(self.logits_test, 'logits_test') - _is_iterator(self.labels_train, 'labels_train') - _is_iterator(self.labels_test, 'labels_test') - - def __str__(self): - """Return the shapes of variables that are not None.""" - result = ['AttackInputData('] - - if self.vocab_size is not None and self.train_size is not None: - result.append( - 'logits_train with shape (%d, num_sequences, num_tokens, %d)' % - (self.train_size, self.vocab_size)) - result.append( - 'labels_train with shape (%d, num_sequences, num_tokens, 1)' % - self.train_size) - - if self.vocab_size is not None and self.test_size is not None: - result.append( - 'logits_test with shape (%d, num_sequences, num_tokens, %d)' % - (self.test_size, self.vocab_size)) - result.append( - 'labels_test with shape (%d, num_sequences, num_tokens, 1)' % - self.test_size) - - result.append(')') - return '\n'.join(result) - - @dataclass class RocCurve: """Represents ROC curve of a membership inference classifier.""" diff --git a/tensorflow_privacy/privacy/membership_inference_attack/data_structures_test.py b/tensorflow_privacy/privacy/membership_inference_attack/data_structures_test.py index eb1d8db..ff7a6c2 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/data_structures_test.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/data_structures_test.py @@ -27,7 +27,6 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RocCurve -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import Seq2SeqAttackInputData from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature @@ -153,75 +152,6 @@ class AttackInputDataTest(absltest.TestCase): probs_test=np.array([])).validate) -class Seq2SeqAttackInputDataTest(absltest.TestCase): - - def test_validator(self): - valid_logits_train = iter([np.array([]), np.array([])]) - valid_logits_test = iter([np.array([]), np.array([])]) - valid_labels_train = iter([np.array([]), np.array([])]) - valid_labels_test = iter([np.array([]), np.array([])]) - - invalid_logits_train = [] - invalid_logits_test = [] - invalid_labels_train = [] - invalid_labels_test = [] - - self.assertRaises( - ValueError, - Seq2SeqAttackInputData(logits_train=valid_logits_train).validate) - self.assertRaises( - ValueError, - Seq2SeqAttackInputData(labels_train=valid_labels_train).validate) - self.assertRaises( - ValueError, - Seq2SeqAttackInputData(logits_test=valid_logits_test).validate) - self.assertRaises( - ValueError, - Seq2SeqAttackInputData(labels_test=valid_labels_test).validate) - self.assertRaises(ValueError, Seq2SeqAttackInputData(vocab_size=0).validate) - self.assertRaises(ValueError, Seq2SeqAttackInputData(train_size=0).validate) - self.assertRaises(ValueError, Seq2SeqAttackInputData(test_size=0).validate) - self.assertRaises(ValueError, Seq2SeqAttackInputData().validate) - - # Tests that both logits and labels must be set. - self.assertRaises( - ValueError, - Seq2SeqAttackInputData( - logits_train=valid_logits_train, - logits_test=valid_logits_test, - vocab_size=0, - train_size=0, - test_size=0).validate) - self.assertRaises( - ValueError, - Seq2SeqAttackInputData( - labels_train=valid_labels_train, - labels_test=valid_labels_test, - vocab_size=0, - train_size=0, - test_size=0).validate) - - # Tests that vocab, train, test sizes must all be set. - self.assertRaises( - ValueError, - Seq2SeqAttackInputData( - logits_train=valid_logits_train, - logits_test=valid_logits_test, - labels_train=valid_labels_train, - labels_test=valid_labels_test).validate) - - self.assertRaises( - ValueError, - Seq2SeqAttackInputData( - logits_train=invalid_logits_train, - logits_test=invalid_logits_test, - labels_train=invalid_labels_train, - labels_test=invalid_labels_test, - vocab_size=0, - train_size=0, - test_size=0).validate) - - class RocCurveTest(absltest.TestCase): def test_auc_random_classifier(self): diff --git a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py index 3d18648..f731958 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py @@ -30,7 +30,6 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo from tensorflow_privacy.privacy.membership_inference_attack.data_structures import \ PrivacyReportMetadata from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RocCurve -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import Seq2SeqAttackInputData from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec @@ -171,54 +170,6 @@ def run_attacks(attack_input: AttackInputData, privacy_report_metadata=privacy_report_metadata) -def run_seq2seq_attack(attack_input: Seq2SeqAttackInputData, - unused_report_metadata: PrivacyReportMetadata = None, - balance_attacker_training: bool = True) -> AttackResults: - """Runs membership inference attacks on a seq2seq model. - - Args: - attack_input: input data for running an attack - unused_report_metadata: the metadata of the model under attack. - balance_attacker_training: Whether the training and test sets for the - membership inference attacker should have a balanced (roughly equal) - number of samples from the training and test sets used to develop the - model under attack. - - Returns: - the attack result. - """ - attack_input.validate() - - # The attacker uses the average rank (a single number) of a seq2seq dataset - # record to determine membership. So only Logistic Regression is supported, - # as it makes the most sense for single-number features. - attacker = models.LogisticRegressionAttacker() - - prepared_attacker_data = models.create_seq2seq_attacker_data( - attack_input, balance=balance_attacker_training) - - attacker.train_model(prepared_attacker_data.features_train, - prepared_attacker_data.is_training_labels_train) - - # Run the attacker on (permuted) test examples. - predictions_test = attacker.predict(prepared_attacker_data.features_test) - - # Generate ROC curves with predictions. - fpr, tpr, thresholds = metrics.roc_curve( - prepared_attacker_data.is_training_labels_test, predictions_test) - - roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) - - attack_results = [ - SingleAttackResult( - slice_spec=SingleSliceSpec(), - attack_type=AttackType.LOGISTIC_REGRESSION, - roc_curve=roc_curve) - ] - - return AttackResults(single_attack_results=attack_results) - - def _compute_missing_privacy_report_metadata( metadata: PrivacyReportMetadata, attack_input: AttackInputData) -> PrivacyReportMetadata: diff --git a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_test.py b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_test.py index 4c80f49..06f7672 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_test.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_test.py @@ -16,10 +16,10 @@ """Tests for tensorflow_privacy.privacy.membership_inference_attack.utils.""" from absl.testing import absltest import numpy as np + from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import Seq2SeqAttackInputData from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec @@ -35,68 +35,6 @@ def get_test_input(n_train, n_test): labels_test=np.array([i % 5 for i in range(n_test)])) -def get_seq2seq_test_input(n_train, - n_test, - max_seq_in_batch, - max_tokens_in_sequence, - vocab_size, - seed=None): - """Returns example inputs for attacks on seq2seq models.""" - if seed is not None: - np.random.seed(seed=seed) - - logits_train, labels_train = [], [] - for _ in range(n_train): - num_sequences = np.random.choice(max_seq_in_batch, 1)[0] + 1 - batch_logits, batch_labels = _get_batch_logits_and_labels( - num_sequences, max_tokens_in_sequence, vocab_size) - logits_train.append(batch_logits) - labels_train.append(batch_labels) - - logits_test, labels_test = [], [] - for _ in range(n_test): - num_sequences = np.random.choice(max_seq_in_batch, 1)[0] + 1 - batch_logits, batch_labels = _get_batch_logits_and_labels( - num_sequences, max_tokens_in_sequence, vocab_size) - logits_test.append(batch_logits) - labels_test.append(batch_labels) - - return Seq2SeqAttackInputData( - logits_train=iter(logits_train), - logits_test=iter(logits_test), - labels_train=iter(labels_train), - labels_test=iter(labels_test), - vocab_size=vocab_size, - train_size=n_train, - test_size=n_test) - - -def _get_batch_logits_and_labels(num_sequences, max_tokens_in_sequence, - vocab_size): - num_tokens_in_sequence = np.random.choice(max_tokens_in_sequence, - num_sequences) + 1 - batch_logits, batch_labels = [], [] - for num_tokens in num_tokens_in_sequence: - logits, labels = _get_sequence_logits_and_labels(num_tokens, vocab_size) - batch_logits.append(logits) - batch_labels.append(labels) - return np.array( - batch_logits, dtype=object), np.array( - batch_labels, dtype=object) - - -def _get_sequence_logits_and_labels(num_tokens, vocab_size): - sequence_logits = [] - for _ in range(num_tokens): - token_logits = np.random.random(vocab_size) - token_logits /= token_logits.sum() - sequence_logits.append(token_logits) - sequence_labels = np.random.choice(vocab_size, num_tokens) - return np.array( - sequence_logits, dtype=np.float32), np.array( - sequence_labels, dtype=np.float32) - - class RunAttacksTest(absltest.TestCase): def test_run_attacks_size(self): @@ -160,42 +98,6 @@ class RunAttacksTest(absltest.TestCase): # If accuracy is already present, simply return it. self.assertIsNone(mia._get_accuracy(None, labels)) - def test_run_seq2seq_attack_size(self): - result = mia.run_seq2seq_attack( - get_seq2seq_test_input( - n_train=10, - n_test=5, - max_seq_in_batch=3, - max_tokens_in_sequence=5, - vocab_size=2)) - - self.assertLen(result.single_attack_results, 1) - - def test_run_seq2seq_attack_trained_sets_attack_type(self): - result = mia.run_seq2seq_attack( - get_seq2seq_test_input( - n_train=10, - n_test=5, - max_seq_in_batch=3, - max_tokens_in_sequence=5, - vocab_size=2)) - seq2seq_result = list(result.single_attack_results)[0] - self.assertEqual(seq2seq_result.attack_type, AttackType.LOGISTIC_REGRESSION) - - def test_run_seq2seq_attack_calculates_correct_auc(self): - result = mia.run_seq2seq_attack( - get_seq2seq_test_input( - n_train=20, - n_test=10, - max_seq_in_batch=3, - max_tokens_in_sequence=5, - vocab_size=3, - seed=12345), - balance_attacker_training=False) - seq2seq_result = list(result.single_attack_results)[0] - np.testing.assert_almost_equal( - seq2seq_result.roc_curve.get_auc(), 0.63, decimal=2) - if __name__ == '__main__': absltest.main() diff --git a/tensorflow_privacy/privacy/membership_inference_attack/models.py b/tensorflow_privacy/privacy/membership_inference_attack/models.py index dd33804..54674e0 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/models.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/models.py @@ -15,11 +15,8 @@ # Lint as: python3 """Trained models for membership inference attacks.""" -from typing import Iterator, List - from dataclasses import dataclass import numpy as np -from scipy.stats import rankdata from sklearn import ensemble from sklearn import linear_model from sklearn import model_selection @@ -27,7 +24,6 @@ from sklearn import neighbors from sklearn import neural_network from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import Seq2SeqAttackInputData @dataclass @@ -114,98 +110,6 @@ def _column_stack(logits, loss): return np.column_stack((logits, loss)) -def create_seq2seq_attacker_data(attack_input_data: Seq2SeqAttackInputData, - test_fraction: float = 0.25, - balance: bool = True) -> AttackerData: - """Prepare Seq2SeqAttackInputData to train ML attackers. - - Uses logits and losses to generate ranks and performs a random train-test - split. - - Args: - attack_input_data: Original Seq2SeqAttackInputData - test_fraction: Fraction of the dataset to include in the test split. - balance: Whether the training and test sets for the membership inference - attacker should have a balanced (roughly equal) number of samples from the - training and test sets used to develop the model under attack. - - Returns: - AttackerData. - """ - attack_input_train = _get_average_ranks(attack_input_data.logits_train, - attack_input_data.labels_train) - attack_input_test = _get_average_ranks(attack_input_data.logits_test, - attack_input_data.labels_test) - - if balance: - min_size = min(len(attack_input_train), len(attack_input_test)) - attack_input_train = _sample_multidimensional_array(attack_input_train, - min_size) - attack_input_test = _sample_multidimensional_array(attack_input_test, - min_size) - - features_all = np.concatenate((attack_input_train, attack_input_test)) - - # Reshape for classifying one-dimensional features - features_all = features_all.reshape(-1, 1) - - labels_all = np.concatenate( - ((np.zeros(len(attack_input_train))), (np.ones(len(attack_input_test))))) - - # Perform a train-test split - features_train, features_test, \ - is_training_labels_train, is_training_labels_test = \ - model_selection.train_test_split( - features_all, labels_all, test_size=test_fraction, stratify=labels_all) - - return AttackerData(features_train, is_training_labels_train, features_test, - is_training_labels_test) - - -def _get_average_ranks(logits: Iterator[np.ndarray], - labels: Iterator[np.ndarray]) -> np.ndarray: - """Returns the average rank of tokens in a batch of sequences. - - Args: - logits: Logits returned by a seq2seq model, dim = (num_batches, - num_sequences, num_tokens, vocab_size). - labels: Target labels for the seq2seq model, dim = (num_batches, - num_sequences, num_tokens, 1). - - Returns: - An array of average ranks, dim = (num_batches, 1). - Each average rank is calculated over ranks of tokens in sequences of a - particular batch. - """ - ranks = [] - for batch_logits, batch_labels in zip(logits, labels): - batch_ranks = [] - for sequence_logits, sequence_labels in zip(batch_logits, batch_labels): - batch_ranks += _get_ranks_for_sequence(sequence_logits, sequence_labels) - ranks.append(np.mean(batch_ranks)) - - return np.array(ranks) - - -def _get_ranks_for_sequence(logits: np.ndarray, - labels: np.ndarray) -> List[float]: - """Returns ranks for a sequence. - - Args: - logits: Logits of a single sequence, dim = (num_tokens, vocab_size). - labels: Target labels of a single sequence, dim = (num_tokens, 1). - - Returns: - An array of ranks for tokens in the sequence, dim = (num_tokens, 1). - """ - sequence_ranks = [] - for logit, label in zip(logits, labels.astype(int)): - rank = rankdata(-logit, method='min')[label] - 1.0 - sequence_ranks.append(rank) - - return sequence_ranks - - class TrainedAttacker: """Base class for training attack models.""" model = None diff --git a/tensorflow_privacy/privacy/membership_inference_attack/models_test.py b/tensorflow_privacy/privacy/membership_inference_attack/models_test.py index eb672c3..8833445 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/models_test.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/models_test.py @@ -19,7 +19,6 @@ import numpy as np from tensorflow_privacy.privacy.membership_inference_attack import models from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import Seq2SeqAttackInputData class TrainedAttackerTest(absltest.TestCase): @@ -56,65 +55,6 @@ class TrainedAttackerTest(absltest.TestCase): expected = feature[:2] not in attack_input.logits_train self.assertEqual(attacker_data.is_training_labels_train[i], expected) - def test_create_seq2seq_attacker_data_logits_and_labels(self): - attack_input = Seq2SeqAttackInputData( - logits_train=iter([ - np.array([ - np.array([[0.1, 0.1, 0.8], [0.7, 0.3, 0]], dtype=np.float32), - np.array([[0.4, 0.5, 0.1]], dtype=np.float32) - ], - dtype=object), - np.array( - [np.array([[0.25, 0.6, 0.15], [1, 0, 0]], dtype=np.float32)], - dtype=object), - np.array([ - np.array([[0.9, 0, 0.1], [0.25, 0.5, 0.25]], dtype=np.float32), - np.array([[0, 1, 0], [0.2, 0.1, 0.7]], dtype=np.float32) - ], - dtype=object) - ]), - logits_test=iter([ - np.array([ - np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32) - ], - dtype=object), - np.array([ - np.array([[0.3, 0.3, 0.4], [0.4, 0.4, 0.2]], dtype=np.float32), - np.array([[0.3, 0.35, 0.35]], dtype=np.float32) - ], - dtype=object) - ]), - labels_train=iter([ - np.array([ - np.array([2, 0], dtype=np.float32), - np.array([1], dtype=np.float32) - ], - dtype=object), - np.array([np.array([1, 0], dtype=np.float32)], dtype=object), - np.array([ - np.array([0, 1], dtype=np.float32), - np.array([1, 2], dtype=np.float32) - ], - dtype=object) - ]), - labels_test=iter([ - np.array([np.array([2, 1], dtype=np.float32)]), - np.array([ - np.array([2, 0], dtype=np.float32), - np.array([1], dtype=np.float32) - ], - dtype=object) - ]), - vocab_size=3, - train_size=3, - test_size=2) - attacker_data = models.create_seq2seq_attacker_data( - attack_input, 0.25, balance=False) - self.assertLen(attacker_data.features_train, 3) - self.assertLen(attacker_data.features_test, 2) - - for _, feature in enumerate(attacker_data.features_train): - self.assertLen(feature, 1) # each feature has one average rank def test_balanced_create_attacker_data_loss_and_logits(self): attack_input = AttackInputData( @@ -131,70 +71,7 @@ class TrainedAttackerTest(absltest.TestCase): expected = feature[:2] not in attack_input.logits_train self.assertEqual(attacker_data.is_training_labels_train[i], expected) - def test_balanced_create_seq2seq_attacker_data_logits_and_labels(self): - attack_input = Seq2SeqAttackInputData( - logits_train=iter([ - np.array([ - np.array([[0.1, 0.1, 0.8], [0.7, 0.3, 0]], dtype=np.float32), - np.array([[0.4, 0.5, 0.1]], dtype=np.float32) - ], - dtype=object), - np.array( - [np.array([[0.25, 0.6, 0.15], [1, 0, 0]], dtype=np.float32)], - dtype=object), - np.array([ - np.array([[0.9, 0, 0.1], [0.25, 0.5, 0.25]], dtype=np.float32), - np.array([[0, 1, 0], [0.2, 0.1, 0.7]], dtype=np.float32) - ], - dtype=object) - ]), - logits_test=iter([ - np.array([ - np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32) - ], - dtype=object), - np.array([ - np.array([[0.3, 0.3, 0.4], [0.4, 0.4, 0.2]], dtype=np.float32), - np.array([[0.3, 0.35, 0.35]], dtype=np.float32) - ], - dtype=object), - np.array([ - np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32) - ], - dtype=object) - ]), - labels_train=iter([ - np.array([ - np.array([2, 0], dtype=np.float32), - np.array([1], dtype=np.float32) - ], - dtype=object), - np.array([np.array([1, 0], dtype=np.float32)], dtype=object), - np.array([ - np.array([0, 1], dtype=np.float32), - np.array([1, 2], dtype=np.float32) - ], - dtype=object) - ]), - labels_test=iter([ - np.array([np.array([2, 1], dtype=np.float32)]), - np.array([ - np.array([2, 0], dtype=np.float32), - np.array([1], dtype=np.float32) - ], - dtype=object), - np.array([np.array([2, 1], dtype=np.float32)]) - ]), - vocab_size=3, - train_size=3, - test_size=3) - attacker_data = models.create_seq2seq_attacker_data( - attack_input, 0.33, balance=True) - self.assertLen(attacker_data.features_train, 4) - self.assertLen(attacker_data.features_test, 2) - for _, feature in enumerate(attacker_data.features_train): - self.assertLen(feature, 1) # each feature has one average rank if __name__ == '__main__': diff --git a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py new file mode 100644 index 0000000..6225d3f --- /dev/null +++ b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py @@ -0,0 +1,257 @@ +# Copyright 2020, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Code for membership inference attacks on seq2seq models. + +Contains seq2seq specific logic for attack data structures, attack data generation, +and the logistic regression membership inference attack. +""" +from typing import Iterator, List + +import numpy as np +from dataclasses import dataclass +from scipy.stats import rankdata +from sklearn import metrics, model_selection + +from tensorflow_privacy.privacy.membership_inference_attack import models +from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata, AttackResults, \ + RocCurve, SingleAttackResult, SingleSliceSpec, AttackType +from tensorflow_privacy.privacy.membership_inference_attack.models import AttackerData, _sample_multidimensional_array + + +def _is_iterator(obj, obj_name): + """Checks whether obj is a generator.""" + if obj is not None and not isinstance(obj, Iterator): + raise ValueError('%s should be a generator.' % obj_name) + + +@dataclass +class Seq2SeqAttackInputData: + """Input data for running an attack on seq2seq models. + + This includes only the data, and not configuration. + """ + logits_train: Iterator[np.ndarray] = None + logits_test: Iterator[np.ndarray] = None + + # Contains ground-truth token indices for the target sequences. + labels_train: Iterator[np.ndarray] = None + labels_test: Iterator[np.ndarray] = None + + # Size of the target sequence vocabulary. + vocab_size: int = None + + # Train, test size = number of batches in training, test set. + # These values need to be supplied by the user as logits, labels + # are lazy loaded for seq2seq models. + train_size: int = 0 + test_size: int = 0 + + def validate(self): + """Validates the inputs.""" + + if (self.logits_train is None) != (self.logits_test is None): + raise ValueError( + 'logits_train and logits_test should both be either set or unset') + + if (self.labels_train is None) != (self.labels_test is None): + raise ValueError( + 'labels_train and labels_test should both be either set or unset') + + if self.logits_train is None or self.labels_train is None: + raise ValueError( + 'Labels, logits of training, test sets should all be set') + + if (self.vocab_size is None or self.train_size is None or + self.test_size is None): + raise ValueError('vocab_size, train_size, test_size should all be set') + + if self.vocab_size is not None and not int: + raise ValueError('vocab_size should be of integer type') + + if self.train_size is not None and not int: + raise ValueError('train_size should be of integer type') + + if self.test_size is not None and not int: + raise ValueError('test_size should be of integer type') + + _is_iterator(self.logits_train, 'logits_train') + _is_iterator(self.logits_test, 'logits_test') + _is_iterator(self.labels_train, 'labels_train') + _is_iterator(self.labels_test, 'labels_test') + + def __str__(self): + """Return the shapes of variables that are not None.""" + result = ['AttackInputData('] + + if self.vocab_size is not None and self.train_size is not None: + result.append( + 'logits_train with shape (%d, num_sequences, num_tokens, %d)' % + (self.train_size, self.vocab_size)) + result.append( + 'labels_train with shape (%d, num_sequences, num_tokens, 1)' % + self.train_size) + + if self.vocab_size is not None and self.test_size is not None: + result.append( + 'logits_test with shape (%d, num_sequences, num_tokens, %d)' % + (self.test_size, self.vocab_size)) + result.append( + 'labels_test with shape (%d, num_sequences, num_tokens, 1)' % + self.test_size) + + result.append(')') + return '\n'.join(result) + + +def _get_average_ranks(logits: Iterator[np.ndarray], + labels: Iterator[np.ndarray]) -> np.ndarray: + """Returns the average rank of tokens in a batch of sequences. + + Args: + logits: Logits returned by a seq2seq model, dim = (num_batches, + num_sequences, num_tokens, vocab_size). + labels: Target labels for the seq2seq model, dim = (num_batches, + num_sequences, num_tokens, 1). + + Returns: + An array of average ranks, dim = (num_batches, 1). + Each average rank is calculated over ranks of tokens in sequences of a + particular batch. + """ + ranks = [] + for batch_logits, batch_labels in zip(logits, labels): + batch_ranks = [] + for sequence_logits, sequence_labels in zip(batch_logits, batch_labels): + batch_ranks += _get_ranks_for_sequence(sequence_logits, sequence_labels) + ranks.append(np.mean(batch_ranks)) + + return np.array(ranks) + + +def _get_ranks_for_sequence(logits: np.ndarray, + labels: np.ndarray) -> List[float]: + """Returns ranks for a sequence. + + Args: + logits: Logits of a single sequence, dim = (num_tokens, vocab_size). + labels: Target labels of a single sequence, dim = (num_tokens, 1). + + Returns: + An array of ranks for tokens in the sequence, dim = (num_tokens, 1). + """ + sequence_ranks = [] + for logit, label in zip(logits, labels.astype(int)): + rank = rankdata(-logit, method='min')[label] - 1.0 + sequence_ranks.append(rank) + + return sequence_ranks + + +def create_seq2seq_attacker_data(attack_input_data: Seq2SeqAttackInputData, + test_fraction: float = 0.25, + balance: bool = True) -> AttackerData: + """Prepare Seq2SeqAttackInputData to train ML attackers. + + Uses logits and losses to generate ranks and performs a random train-test + split. + + Args: + attack_input_data: Original Seq2SeqAttackInputData + test_fraction: Fraction of the dataset to include in the test split. + balance: Whether the training and test sets for the membership inference + attacker should have a balanced (roughly equal) number of samples from the + training and test sets used to develop the model under attack. + + Returns: + AttackerData. + """ + attack_input_train = _get_average_ranks(attack_input_data.logits_train, + attack_input_data.labels_train) + attack_input_test = _get_average_ranks(attack_input_data.logits_test, + attack_input_data.labels_test) + + if balance: + min_size = min(len(attack_input_train), len(attack_input_test)) + attack_input_train = _sample_multidimensional_array(attack_input_train, + min_size) + attack_input_test = _sample_multidimensional_array(attack_input_test, + min_size) + + features_all = np.concatenate((attack_input_train, attack_input_test)) + + # Reshape for classifying one-dimensional features + features_all = features_all.reshape(-1, 1) + + labels_all = np.concatenate( + ((np.zeros(len(attack_input_train))), (np.ones(len(attack_input_test))))) + + # Perform a train-test split + features_train, features_test, \ + is_training_labels_train, is_training_labels_test = \ + model_selection.train_test_split( + features_all, labels_all, test_size=test_fraction, stratify=labels_all) + + return AttackerData(features_train, is_training_labels_train, features_test, + is_training_labels_test) + + +def run_seq2seq_attack(attack_input: Seq2SeqAttackInputData, + unused_report_metadata: PrivacyReportMetadata = None, + balance_attacker_training: bool = True) -> AttackResults: + """Runs membership inference attacks on a seq2seq model. + + Args: + attack_input: input data for running an attack + unused_report_metadata: the metadata of the model under attack. + balance_attacker_training: Whether the training and test sets for the + membership inference attacker should have a balanced (roughly equal) + number of samples from the training and test sets used to develop the + model under attack. + + Returns: + the attack result. + """ + attack_input.validate() + + # The attacker uses the average rank (a single number) of a seq2seq dataset + # record to determine membership. So only Logistic Regression is supported, + # as it makes the most sense for single-number features. + attacker = models.LogisticRegressionAttacker() + + prepared_attacker_data = create_seq2seq_attacker_data( + attack_input, balance=balance_attacker_training) + + attacker.train_model(prepared_attacker_data.features_train, + prepared_attacker_data.is_training_labels_train) + + # Run the attacker on (permuted) test examples. + predictions_test = attacker.predict(prepared_attacker_data.features_test) + + # Generate ROC curves with predictions. + fpr, tpr, thresholds = metrics.roc_curve( + prepared_attacker_data.is_training_labels_test, predictions_test) + + roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) + + attack_results = [ + SingleAttackResult( + slice_spec=SingleSliceSpec(), + attack_type=AttackType.LOGISTIC_REGRESSION, + roc_curve=roc_curve) + ] + + return AttackResults(single_attack_results=attack_results) + diff --git a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py new file mode 100644 index 0000000..2da62c9 --- /dev/null +++ b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py @@ -0,0 +1,320 @@ +# Copyright 2020, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for tensorflow_privacy.privacy.membership_inference_attack.seq2seq_mia.""" +from absl.testing import absltest +import numpy as np + +from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.membership_inference_attack.seq2seq_mia import Seq2SeqAttackInputData, \ + create_seq2seq_attacker_data, run_seq2seq_attack + + +class Seq2SeqAttackInputDataTest(absltest.TestCase): + + def test_validator(self): + valid_logits_train = iter([np.array([]), np.array([])]) + valid_logits_test = iter([np.array([]), np.array([])]) + valid_labels_train = iter([np.array([]), np.array([])]) + valid_labels_test = iter([np.array([]), np.array([])]) + + invalid_logits_train = [] + invalid_logits_test = [] + invalid_labels_train = [] + invalid_labels_test = [] + + self.assertRaises( + ValueError, + Seq2SeqAttackInputData(logits_train=valid_logits_train).validate) + self.assertRaises( + ValueError, + Seq2SeqAttackInputData(labels_train=valid_labels_train).validate) + self.assertRaises( + ValueError, + Seq2SeqAttackInputData(logits_test=valid_logits_test).validate) + self.assertRaises( + ValueError, + Seq2SeqAttackInputData(labels_test=valid_labels_test).validate) + self.assertRaises(ValueError, Seq2SeqAttackInputData(vocab_size=0).validate) + self.assertRaises(ValueError, Seq2SeqAttackInputData(train_size=0).validate) + self.assertRaises(ValueError, Seq2SeqAttackInputData(test_size=0).validate) + self.assertRaises(ValueError, Seq2SeqAttackInputData().validate) + + # Tests that both logits and labels must be set. + self.assertRaises( + ValueError, + Seq2SeqAttackInputData( + logits_train=valid_logits_train, + logits_test=valid_logits_test, + vocab_size=0, + train_size=0, + test_size=0).validate) + self.assertRaises( + ValueError, + Seq2SeqAttackInputData( + labels_train=valid_labels_train, + labels_test=valid_labels_test, + vocab_size=0, + train_size=0, + test_size=0).validate) + + # Tests that vocab, train, test sizes must all be set. + self.assertRaises( + ValueError, + Seq2SeqAttackInputData( + logits_train=valid_logits_train, + logits_test=valid_logits_test, + labels_train=valid_labels_train, + labels_test=valid_labels_test).validate) + + self.assertRaises( + ValueError, + Seq2SeqAttackInputData( + logits_train=invalid_logits_train, + logits_test=invalid_logits_test, + labels_train=invalid_labels_train, + labels_test=invalid_labels_test, + vocab_size=0, + train_size=0, + test_size=0).validate) + + +class Seq2SeqTrainedAttackerTest(absltest.TestCase): + + def test_create_seq2seq_attacker_data_logits_and_labels(self): + attack_input = Seq2SeqAttackInputData( + logits_train=iter([ + np.array([ + np.array([[0.1, 0.1, 0.8], [0.7, 0.3, 0]], dtype=np.float32), + np.array([[0.4, 0.5, 0.1]], dtype=np.float32) + ], + dtype=object), + np.array( + [np.array([[0.25, 0.6, 0.15], [1, 0, 0]], dtype=np.float32)], + dtype=object), + np.array([ + np.array([[0.9, 0, 0.1], [0.25, 0.5, 0.25]], dtype=np.float32), + np.array([[0, 1, 0], [0.2, 0.1, 0.7]], dtype=np.float32) + ], + dtype=object) + ]), + logits_test=iter([ + np.array([ + np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32) + ], + dtype=object), + np.array([ + np.array([[0.3, 0.3, 0.4], [0.4, 0.4, 0.2]], dtype=np.float32), + np.array([[0.3, 0.35, 0.35]], dtype=np.float32) + ], + dtype=object) + ]), + labels_train=iter([ + np.array([ + np.array([2, 0], dtype=np.float32), + np.array([1], dtype=np.float32) + ], + dtype=object), + np.array([np.array([1, 0], dtype=np.float32)], dtype=object), + np.array([ + np.array([0, 1], dtype=np.float32), + np.array([1, 2], dtype=np.float32) + ], + dtype=object) + ]), + labels_test=iter([ + np.array([np.array([2, 1], dtype=np.float32)]), + np.array([ + np.array([2, 0], dtype=np.float32), + np.array([1], dtype=np.float32) + ], + dtype=object) + ]), + vocab_size=3, + train_size=3, + test_size=2) + attacker_data = create_seq2seq_attacker_data(attack_input, 0.25, balance=False) + self.assertLen(attacker_data.features_train, 3) + self.assertLen(attacker_data.features_test, 2) + + for _, feature in enumerate(attacker_data.features_train): + self.assertLen(feature, 1) # each feature has one average rank + + + def test_balanced_create_seq2seq_attacker_data_logits_and_labels(self): + attack_input = Seq2SeqAttackInputData( + logits_train=iter([ + np.array([ + np.array([[0.1, 0.1, 0.8], [0.7, 0.3, 0]], dtype=np.float32), + np.array([[0.4, 0.5, 0.1]], dtype=np.float32) + ], + dtype=object), + np.array( + [np.array([[0.25, 0.6, 0.15], [1, 0, 0]], dtype=np.float32)], + dtype=object), + np.array([ + np.array([[0.9, 0, 0.1], [0.25, 0.5, 0.25]], dtype=np.float32), + np.array([[0, 1, 0], [0.2, 0.1, 0.7]], dtype=np.float32) + ], + dtype=object) + ]), + logits_test=iter([ + np.array([ + np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32) + ], + dtype=object), + np.array([ + np.array([[0.3, 0.3, 0.4], [0.4, 0.4, 0.2]], dtype=np.float32), + np.array([[0.3, 0.35, 0.35]], dtype=np.float32) + ], + dtype=object), + np.array([ + np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32) + ], + dtype=object) + ]), + labels_train=iter([ + np.array([ + np.array([2, 0], dtype=np.float32), + np.array([1], dtype=np.float32) + ], + dtype=object), + np.array([np.array([1, 0], dtype=np.float32)], dtype=object), + np.array([ + np.array([0, 1], dtype=np.float32), + np.array([1, 2], dtype=np.float32) + ], + dtype=object) + ]), + labels_test=iter([ + np.array([np.array([2, 1], dtype=np.float32)]), + np.array([ + np.array([2, 0], dtype=np.float32), + np.array([1], dtype=np.float32) + ], + dtype=object), + np.array([np.array([2, 1], dtype=np.float32)]) + ]), + vocab_size=3, + train_size=3, + test_size=3) + attacker_data = create_seq2seq_attacker_data(attack_input, 0.33, balance=True) + self.assertLen(attacker_data.features_train, 4) + self.assertLen(attacker_data.features_test, 2) + + for _, feature in enumerate(attacker_data.features_train): + self.assertLen(feature, 1) # each feature has one average rank + + +def _get_batch_logits_and_labels(num_sequences, max_tokens_in_sequence, + vocab_size): + num_tokens_in_sequence = np.random.choice(max_tokens_in_sequence, + num_sequences) + 1 + batch_logits, batch_labels = [], [] + for num_tokens in num_tokens_in_sequence: + logits, labels = _get_sequence_logits_and_labels(num_tokens, vocab_size) + batch_logits.append(logits) + batch_labels.append(labels) + return np.array( + batch_logits, dtype=object), np.array( + batch_labels, dtype=object) + + +def _get_sequence_logits_and_labels(num_tokens, vocab_size): + sequence_logits = [] + for _ in range(num_tokens): + token_logits = np.random.random(vocab_size) + token_logits /= token_logits.sum() + sequence_logits.append(token_logits) + sequence_labels = np.random.choice(vocab_size, num_tokens) + return np.array( + sequence_logits, dtype=np.float32), np.array( + sequence_labels, dtype=np.float32) + + +def get_seq2seq_test_input(n_train, + n_test, + max_seq_in_batch, + max_tokens_in_sequence, + vocab_size, + seed=None): + """Returns example inputs for attacks on seq2seq models.""" + if seed is not None: + np.random.seed(seed=seed) + + logits_train, labels_train = [], [] + for _ in range(n_train): + num_sequences = np.random.choice(max_seq_in_batch, 1)[0] + 1 + batch_logits, batch_labels = _get_batch_logits_and_labels( + num_sequences, max_tokens_in_sequence, vocab_size) + logits_train.append(batch_logits) + labels_train.append(batch_labels) + + logits_test, labels_test = [], [] + for _ in range(n_test): + num_sequences = np.random.choice(max_seq_in_batch, 1)[0] + 1 + batch_logits, batch_labels = _get_batch_logits_and_labels( + num_sequences, max_tokens_in_sequence, vocab_size) + logits_test.append(batch_logits) + labels_test.append(batch_labels) + + return Seq2SeqAttackInputData( + logits_train=iter(logits_train), + logits_test=iter(logits_test), + labels_train=iter(labels_train), + labels_test=iter(labels_test), + vocab_size=vocab_size, + train_size=n_train, + test_size=n_test) + + +class RunSeq2SeqAttackTest(absltest.TestCase): + + def test_run_seq2seq_attack_size(self): + result = run_seq2seq_attack(get_seq2seq_test_input( + n_train=10, + n_test=5, + max_seq_in_batch=3, + max_tokens_in_sequence=5, + vocab_size=2)) + + self.assertLen(result.single_attack_results, 1) + + def test_run_seq2seq_attack_trained_sets_attack_type(self): + result = run_seq2seq_attack(get_seq2seq_test_input( + n_train=10, + n_test=5, + max_seq_in_batch=3, + max_tokens_in_sequence=5, + vocab_size=2)) + seq2seq_result = list(result.single_attack_results)[0] + self.assertEqual(seq2seq_result.attack_type, AttackType.LOGISTIC_REGRESSION) + + def test_run_seq2seq_attack_calculates_correct_auc(self): + result = run_seq2seq_attack(get_seq2seq_test_input( + n_train=20, + n_test=10, + max_seq_in_batch=3, + max_tokens_in_sequence=5, + vocab_size=3, + seed=12345), + balance_attacker_training=False) + seq2seq_result = list(result.single_attack_results)[0] + np.testing.assert_almost_equal( + seq2seq_result.roc_curve.get_auc(), 0.63, decimal=2) + + +if __name__ == '__main__': + absltest.main() From eb215072bcc18f202c69ed51508185e9bfa9744e Mon Sep 17 00:00:00 2001 From: amad-person Date: Wed, 25 Nov 2020 16:06:37 +0800 Subject: [PATCH 2/5] Compute and populate PrivacyReportMetadata fields --- ...seq2seq_membership_inference_codelab.ipynb | 5 +- .../seq2seq_mia.py | 125 +++++++++++++++--- .../seq2seq_mia_test.py | 41 +++++- 3 files changed, 146 insertions(+), 25 deletions(-) diff --git a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb b/tensorflow_privacy/privacy/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb index 4045e77..4c80bd3 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb +++ b/tensorflow_privacy/privacy/membership_inference_attack/codelabs/third_party/seq2seq_membership_inference/seq2seq_membership_inference_codelab.ipynb @@ -1163,7 +1163,10 @@ "fig = plotting.plot_roc_curve(attack_result.get_result_with_max_auc().roc_curve)\n", "\n", "# Print a user-friendly summary of the attacks\n", - "print(attack_result.summary())" + "print(attack_result.summary())\n", + "\n", + "# Print metadata of the target model\n", + "print(attack_result.privacy_report_metadata)" ] } ], diff --git a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py index 6225d3f..0cc1daa 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py @@ -25,9 +25,11 @@ from dataclasses import dataclass from scipy.stats import rankdata from sklearn import metrics, model_selection +from tensorflow.keras.metrics import SparseCategoricalCrossentropy, SparseCategoricalAccuracy + from tensorflow_privacy.privacy.membership_inference_attack import models -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata, AttackResults, \ - RocCurve, SingleAttackResult, SingleSliceSpec, AttackType +from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata, \ + AttackResults, RocCurve, SingleAttackResult, SingleSliceSpec, AttackType from tensorflow_privacy.privacy.membership_inference_attack.models import AttackerData, _sample_multidimensional_array @@ -116,9 +118,10 @@ class Seq2SeqAttackInputData: return '\n'.join(result) -def _get_average_ranks(logits: Iterator[np.ndarray], - labels: Iterator[np.ndarray]) -> np.ndarray: - """Returns the average rank of tokens in a batch of sequences. +def _get_attack_features_and_metadata(logits: Iterator[np.ndarray], + labels: Iterator[np.ndarray]) -> (np.ndarray, float): + """Returns the average rank of tokens per batch of sequences, + and the loss computed using logits and labels. Args: logits: Logits returned by a seq2seq model, dim = (num_batches, @@ -127,18 +130,48 @@ def _get_average_ranks(logits: Iterator[np.ndarray], num_sequences, num_tokens, 1). Returns: - An array of average ranks, dim = (num_batches, 1). + 1. An array of average ranks, dim = (num_batches, 1). Each average rank is calculated over ranks of tokens in sequences of a particular batch. + 2. Loss computed over all logits and labels. + 3. Accuracy computed over all logits and labels. """ ranks = [] + loss = SparseCategoricalCrossentropy(from_logits=True) + accuracy = SparseCategoricalAccuracy() for batch_logits, batch_labels in zip(logits, labels): - batch_ranks = [] - for sequence_logits, sequence_labels in zip(batch_logits, batch_labels): - batch_ranks += _get_ranks_for_sequence(sequence_logits, sequence_labels) + # Compute average rank for the current batch. + batch_ranks = _get_batch_ranks(batch_logits, batch_labels) ranks.append(np.mean(batch_ranks)) - return np.array(ranks) + # Update overall loss with loss of the current batch. + _update_batch_loss(batch_logits, batch_labels, loss) + + # Update overall accuracy with accuracy of the current batch. + _update_batch_accuracy(batch_logits, batch_labels, accuracy) + + return np.array(ranks), loss.result().numpy(), accuracy.result().numpy() + + +def _get_batch_ranks(batch_logits: np.ndarray, + batch_labels: np.ndarray) -> np.ndarray: + """Returns the ranks of tokens in a batch of sequences. + + Args: + batch_logits: Logits returned by a seq2seq model, dim = (num_sequences, + num_tokens, vocab_size). + batch_labels: Target labels for the seq2seq model, dim = (num_sequences, + num_tokens, 1). + + Returns: + An array of ranks of tokens in a batch of sequences, dim = (num_sequences, + num_tokens, 1) + """ + batch_ranks = [] + for sequence_logits, sequence_labels in zip(batch_logits, batch_labels): + batch_ranks += _get_ranks_for_sequence(sequence_logits, sequence_labels) + + return np.array(batch_ranks) def _get_ranks_for_sequence(logits: np.ndarray, @@ -160,28 +193,67 @@ def _get_ranks_for_sequence(logits: np.ndarray, return sequence_ranks +def _update_batch_loss(batch_logits: np.ndarray, + batch_labels: np.ndarray, + loss: SparseCategoricalCrossentropy): + """Updates the loss metric per batch. + + Args: + batch_logits: Logits returned by a seq2seq model, dim = (num_sequences, + num_tokens, vocab_size). + batch_labels: Target labels for the seq2seq model, dim = (num_sequences, + num_tokens, 1). + loss: SparseCategoricalCrossentropy loss metric. + """ + for sequence_logits, sequence_labels in zip(batch_logits, batch_labels): + loss.update_state(sequence_labels.astype(np.float32), + sequence_logits.astype(np.float32)) + + +def _update_batch_accuracy(batch_logits: np.ndarray, + batch_labels: np.ndarray, + accuracy: SparseCategoricalAccuracy): + """Updates the accuracy metric per batch. + + Args: + batch_logits: Logits returned by a seq2seq model, dim = (num_sequences, + num_tokens, vocab_size). + batch_labels: Target labels for the seq2seq model, dim = (num_sequences, + num_tokens, 1). + accuracy: SparseCategoricalAccuracy accuracy metric. + """ + for sequence_logits, sequence_labels in zip(batch_logits, batch_labels): + accuracy.update_state(sequence_labels.astype(np.float32), + sequence_logits.astype(np.float32)) + + def create_seq2seq_attacker_data(attack_input_data: Seq2SeqAttackInputData, test_fraction: float = 0.25, - balance: bool = True) -> AttackerData: + balance: bool = True, + privacy_report_metadata: PrivacyReportMetadata = None) -> AttackerData: """Prepare Seq2SeqAttackInputData to train ML attackers. Uses logits and losses to generate ranks and performs a random train-test split. + Also computes metadata (loss, accuracy) for the model under attack + and populates respective fields of PrivacyReportMetadata. + Args: attack_input_data: Original Seq2SeqAttackInputData test_fraction: Fraction of the dataset to include in the test split. balance: Whether the training and test sets for the membership inference attacker should have a balanced (roughly equal) number of samples from the training and test sets used to develop the model under attack. + privacy_report_metadata: the metadata of the model under attack. Returns: AttackerData. """ - attack_input_train = _get_average_ranks(attack_input_data.logits_train, - attack_input_data.labels_train) - attack_input_test = _get_average_ranks(attack_input_data.logits_test, - attack_input_data.labels_test) + attack_input_train, loss_train, accuracy_train = _get_attack_features_and_metadata(attack_input_data.logits_train, + attack_input_data.labels_train) + attack_input_test, loss_test, accuracy_test = _get_attack_features_and_metadata(attack_input_data.logits_test, + attack_input_data.labels_test) if balance: min_size = min(len(attack_input_train), len(attack_input_test)) @@ -204,18 +276,24 @@ def create_seq2seq_attacker_data(attack_input_data: Seq2SeqAttackInputData, model_selection.train_test_split( features_all, labels_all, test_size=test_fraction, stratify=labels_all) + # Populate fields of privacy report metadata + privacy_report_metadata.loss_train = loss_train + privacy_report_metadata.loss_test = loss_test + privacy_report_metadata.accuracy_train = accuracy_train + privacy_report_metadata.accuracy_test = accuracy_test + return AttackerData(features_train, is_training_labels_train, features_test, is_training_labels_test) def run_seq2seq_attack(attack_input: Seq2SeqAttackInputData, - unused_report_metadata: PrivacyReportMetadata = None, + privacy_report_metadata: PrivacyReportMetadata = None, balance_attacker_training: bool = True) -> AttackResults: """Runs membership inference attacks on a seq2seq model. Args: attack_input: input data for running an attack - unused_report_metadata: the metadata of the model under attack. + privacy_report_metadata: the metadata of the model under attack. balance_attacker_training: Whether the training and test sets for the membership inference attacker should have a balanced (roughly equal) number of samples from the training and test sets used to develop the @@ -231,8 +309,12 @@ def run_seq2seq_attack(attack_input: Seq2SeqAttackInputData, # as it makes the most sense for single-number features. attacker = models.LogisticRegressionAttacker() - prepared_attacker_data = create_seq2seq_attacker_data( - attack_input, balance=balance_attacker_training) + # Create attacker data and populate fields of privacy_report_metadata + if privacy_report_metadata is None: + privacy_report_metadata = PrivacyReportMetadata() + prepared_attacker_data = create_seq2seq_attacker_data(attack_input_data=attack_input, + balance=balance_attacker_training, + privacy_report_metadata=privacy_report_metadata) attacker.train_model(prepared_attacker_data.features_train, prepared_attacker_data.is_training_labels_train) @@ -253,5 +335,6 @@ def run_seq2seq_attack(attack_input: Seq2SeqAttackInputData, roc_curve=roc_curve) ] - return AttackResults(single_attack_results=attack_results) - + return AttackResults( + single_attack_results=attack_results, + privacy_report_metadata=privacy_report_metadata) diff --git a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py index 2da62c9..295b457 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py @@ -17,7 +17,7 @@ from absl.testing import absltest import numpy as np -from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType, PrivacyReportMetadata from tensorflow_privacy.privacy.membership_inference_attack.seq2seq_mia import Seq2SeqAttackInputData, \ create_seq2seq_attacker_data, run_seq2seq_attack @@ -145,13 +145,23 @@ class Seq2SeqTrainedAttackerTest(absltest.TestCase): vocab_size=3, train_size=3, test_size=2) - attacker_data = create_seq2seq_attacker_data(attack_input, 0.25, balance=False) + privacy_report_metadata = PrivacyReportMetadata() + attacker_data = create_seq2seq_attacker_data(attack_input_data=attack_input, + test_fraction=0.25, + balance=False, + privacy_report_metadata=privacy_report_metadata) self.assertLen(attacker_data.features_train, 3) self.assertLen(attacker_data.features_test, 2) for _, feature in enumerate(attacker_data.features_train): self.assertLen(feature, 1) # each feature has one average rank + # Tests that fields of PrivacyReportMetadata are populated. + self.assertIsNotNone(privacy_report_metadata.loss_train) + self.assertIsNotNone(privacy_report_metadata.loss_test) + self.assertIsNotNone(privacy_report_metadata.accuracy_train) + self.assertIsNotNone(privacy_report_metadata.accuracy_test) + def test_balanced_create_seq2seq_attacker_data_logits_and_labels(self): attack_input = Seq2SeqAttackInputData( @@ -210,13 +220,23 @@ class Seq2SeqTrainedAttackerTest(absltest.TestCase): vocab_size=3, train_size=3, test_size=3) - attacker_data = create_seq2seq_attacker_data(attack_input, 0.33, balance=True) + privacy_report_metadata = PrivacyReportMetadata() + attacker_data = create_seq2seq_attacker_data(attack_input_data=attack_input, + test_fraction=0.33, + balance=True, + privacy_report_metadata=privacy_report_metadata) self.assertLen(attacker_data.features_train, 4) self.assertLen(attacker_data.features_test, 2) for _, feature in enumerate(attacker_data.features_train): self.assertLen(feature, 1) # each feature has one average rank + # Tests that fields of PrivacyReportMetadata are populated. + self.assertIsNotNone(privacy_report_metadata.loss_train) + self.assertIsNotNone(privacy_report_metadata.loss_test) + self.assertIsNotNone(privacy_report_metadata.accuracy_train) + self.assertIsNotNone(privacy_report_metadata.accuracy_test) + def _get_batch_logits_and_labels(num_sequences, max_tokens_in_sequence, vocab_size): @@ -315,6 +335,21 @@ class RunSeq2SeqAttackTest(absltest.TestCase): np.testing.assert_almost_equal( seq2seq_result.roc_curve.get_auc(), 0.63, decimal=2) + def test_run_seq2seq_attack_calculates_correct_metadata(self): + result = run_seq2seq_attack(get_seq2seq_test_input( + n_train=20, + n_test=10, + max_seq_in_batch=3, + max_tokens_in_sequence=5, + vocab_size=3, + seed=12345), + balance_attacker_training=False) + metadata = result.privacy_report_metadata + np.testing.assert_almost_equal(metadata.loss_train, 1.11, decimal=2) + np.testing.assert_almost_equal(metadata.loss_test, 1.10, decimal=2) + np.testing.assert_almost_equal(metadata.accuracy_train, 0.40, decimal=2) + np.testing.assert_almost_equal(metadata.accuracy_test, 0.34, decimal=2) + if __name__ == '__main__': absltest.main() From 981d5a95f5776b87a951899aa9c33a7f118bcdc4 Mon Sep 17 00:00:00 2001 From: amad-person Date: Fri, 27 Nov 2020 11:59:06 +0800 Subject: [PATCH 3/5] Return loss, accuracy instead of updating args --- .../seq2seq_mia.py | 82 ++++++++++++------- .../seq2seq_mia_test.py | 5 +- 2 files changed, 53 insertions(+), 34 deletions(-) diff --git a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py index 0cc1daa..87552b9 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py @@ -25,7 +25,9 @@ from dataclasses import dataclass from scipy.stats import rankdata from sklearn import metrics, model_selection -from tensorflow.keras.metrics import SparseCategoricalCrossentropy, SparseCategoricalAccuracy +import tensorflow.keras.backend as K +from tensorflow.keras.losses import sparse_categorical_crossentropy +from tensorflow.keras.metrics import sparse_categorical_accuracy from tensorflow_privacy.privacy.membership_inference_attack import models from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata, \ @@ -95,7 +97,7 @@ class Seq2SeqAttackInputData: _is_iterator(self.labels_test, 'labels_test') def __str__(self): - """Return the shapes of variables that are not None.""" + """Returns the shapes of variables that are not None.""" result = ['AttackInputData('] if self.vocab_size is not None and self.train_size is not None: @@ -119,7 +121,7 @@ class Seq2SeqAttackInputData: def _get_attack_features_and_metadata(logits: Iterator[np.ndarray], - labels: Iterator[np.ndarray]) -> (np.ndarray, float): + labels: Iterator[np.ndarray]) -> (np.ndarray, float, float): """Returns the average rank of tokens per batch of sequences, and the loss computed using logits and labels. @@ -137,20 +139,30 @@ def _get_attack_features_and_metadata(logits: Iterator[np.ndarray], 3. Accuracy computed over all logits and labels. """ ranks = [] - loss = SparseCategoricalCrossentropy(from_logits=True) - accuracy = SparseCategoricalAccuracy() + loss = 0.0 + dataset_length = 0.0 + correct_preds = 0 + total_preds = 0 for batch_logits, batch_labels in zip(logits, labels): # Compute average rank for the current batch. batch_ranks = _get_batch_ranks(batch_logits, batch_labels) ranks.append(np.mean(batch_ranks)) - # Update overall loss with loss of the current batch. - _update_batch_loss(batch_logits, batch_labels, loss) + # Update overall loss metrics with metrics of the current batch. + batch_loss, batch_length = _get_batch_loss_metrics(batch_logits, batch_labels) + loss += batch_loss + dataset_length += batch_length - # Update overall accuracy with accuracy of the current batch. - _update_batch_accuracy(batch_logits, batch_labels, accuracy) + # Update overall accuracy metrics with metrics of the current batch. + batch_correct_preds, batch_total_preds = _get_batch_accuracy_metrics(batch_logits, batch_labels) + correct_preds += batch_correct_preds + total_preds += batch_total_preds - return np.array(ranks), loss.result().numpy(), accuracy.result().numpy() + # Compute loss and accuracy for the dataset. + loss = loss / dataset_length + accuracy = correct_preds / total_preds + + return np.array(ranks), loss, accuracy def _get_batch_ranks(batch_logits: np.ndarray, @@ -193,45 +205,53 @@ def _get_ranks_for_sequence(logits: np.ndarray, return sequence_ranks -def _update_batch_loss(batch_logits: np.ndarray, - batch_labels: np.ndarray, - loss: SparseCategoricalCrossentropy): - """Updates the loss metric per batch. +def _get_batch_loss_metrics(batch_logits: np.ndarray, + batch_labels: np.ndarray) -> (float, int): + """Returns the loss, number of sequences for a batch. Args: batch_logits: Logits returned by a seq2seq model, dim = (num_sequences, num_tokens, vocab_size). batch_labels: Target labels for the seq2seq model, dim = (num_sequences, num_tokens, 1). - loss: SparseCategoricalCrossentropy loss metric. """ + batch_loss = 0.0 + batch_length = len(batch_logits) for sequence_logits, sequence_labels in zip(batch_logits, batch_labels): - loss.update_state(sequence_labels.astype(np.float32), - sequence_logits.astype(np.float32)) + sequence_loss = sparse_categorical_crossentropy(K.constant(sequence_labels), + K.constant(sequence_logits), + from_logits=True) + batch_loss += sequence_loss.numpy().sum() + + return batch_loss / batch_length, batch_length -def _update_batch_accuracy(batch_logits: np.ndarray, - batch_labels: np.ndarray, - accuracy: SparseCategoricalAccuracy): - """Updates the accuracy metric per batch. +def _get_batch_accuracy_metrics(batch_logits: np.ndarray, + batch_labels: np.ndarray) -> (float, float): + """Returns the number of correct predictions, total number of predictions for a batch. Args: batch_logits: Logits returned by a seq2seq model, dim = (num_sequences, num_tokens, vocab_size). batch_labels: Target labels for the seq2seq model, dim = (num_sequences, num_tokens, 1). - accuracy: SparseCategoricalAccuracy accuracy metric. """ + batch_correct_preds = 0.0 + batch_total_preds = 0.0 for sequence_logits, sequence_labels in zip(batch_logits, batch_labels): - accuracy.update_state(sequence_labels.astype(np.float32), - sequence_logits.astype(np.float32)) + preds = sparse_categorical_accuracy(K.constant(sequence_labels), + K.constant(sequence_logits)) + batch_correct_preds += preds.numpy().sum() + batch_total_preds += len(sequence_labels) + + return batch_correct_preds, batch_total_preds def create_seq2seq_attacker_data(attack_input_data: Seq2SeqAttackInputData, + privacy_report_metadata: PrivacyReportMetadata, test_fraction: float = 0.25, - balance: bool = True, - privacy_report_metadata: PrivacyReportMetadata = None) -> AttackerData: - """Prepare Seq2SeqAttackInputData to train ML attackers. + balance: bool = True) -> AttackerData: + """Prepares Seq2SeqAttackInputData to train ML attackers. Uses logits and losses to generate ranks and performs a random train-test split. @@ -241,11 +261,11 @@ def create_seq2seq_attacker_data(attack_input_data: Seq2SeqAttackInputData, Args: attack_input_data: Original Seq2SeqAttackInputData + privacy_report_metadata: the metadata of the model under attack. test_fraction: Fraction of the dataset to include in the test split. balance: Whether the training and test sets for the membership inference attacker should have a balanced (roughly equal) number of samples from the training and test sets used to develop the model under attack. - privacy_report_metadata: the metadata of the model under attack. Returns: AttackerData. @@ -276,7 +296,8 @@ def create_seq2seq_attacker_data(attack_input_data: Seq2SeqAttackInputData, model_selection.train_test_split( features_all, labels_all, test_size=test_fraction, stratify=labels_all) - # Populate fields of privacy report metadata + # Populate accuracy, loss fields in privacy report metadata + privacy_report_metadata = privacy_report_metadata or PrivacyReportMetadata() privacy_report_metadata.loss_train = loss_train privacy_report_metadata.loss_test = loss_test privacy_report_metadata.accuracy_train = accuracy_train @@ -310,8 +331,7 @@ def run_seq2seq_attack(attack_input: Seq2SeqAttackInputData, attacker = models.LogisticRegressionAttacker() # Create attacker data and populate fields of privacy_report_metadata - if privacy_report_metadata is None: - privacy_report_metadata = PrivacyReportMetadata() + privacy_report_metadata = privacy_report_metadata or PrivacyReportMetadata() prepared_attacker_data = create_seq2seq_attacker_data(attack_input_data=attack_input, balance=balance_attacker_training, privacy_report_metadata=privacy_report_metadata) diff --git a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py index 295b457..650f696 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py @@ -162,7 +162,6 @@ class Seq2SeqTrainedAttackerTest(absltest.TestCase): self.assertIsNotNone(privacy_report_metadata.accuracy_train) self.assertIsNotNone(privacy_report_metadata.accuracy_test) - def test_balanced_create_seq2seq_attacker_data_logits_and_labels(self): attack_input = Seq2SeqAttackInputData( logits_train=iter([ @@ -345,8 +344,8 @@ class RunSeq2SeqAttackTest(absltest.TestCase): seed=12345), balance_attacker_training=False) metadata = result.privacy_report_metadata - np.testing.assert_almost_equal(metadata.loss_train, 1.11, decimal=2) - np.testing.assert_almost_equal(metadata.loss_test, 1.10, decimal=2) + np.testing.assert_almost_equal(metadata.loss_train, 2.08, decimal=2) + np.testing.assert_almost_equal(metadata.loss_test, 2.02, decimal=2) np.testing.assert_almost_equal(metadata.accuracy_train, 0.40, decimal=2) np.testing.assert_almost_equal(metadata.accuracy_test, 0.34, decimal=2) From 6c7d607e6531add628b6c365d5adb222faa6dcc3 Mon Sep 17 00:00:00 2001 From: amad-person Date: Fri, 27 Nov 2020 18:03:18 +0800 Subject: [PATCH 4/5] Move initialization for privacy_report_metadata to args --- .../privacy/membership_inference_attack/seq2seq_mia.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py index 87552b9..c60e936 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia.py @@ -248,9 +248,10 @@ def _get_batch_accuracy_metrics(batch_logits: np.ndarray, def create_seq2seq_attacker_data(attack_input_data: Seq2SeqAttackInputData, - privacy_report_metadata: PrivacyReportMetadata, test_fraction: float = 0.25, - balance: bool = True) -> AttackerData: + balance: bool = True, + privacy_report_metadata: PrivacyReportMetadata = PrivacyReportMetadata() + ) -> AttackerData: """Prepares Seq2SeqAttackInputData to train ML attackers. Uses logits and losses to generate ranks and performs a random train-test @@ -261,11 +262,11 @@ def create_seq2seq_attacker_data(attack_input_data: Seq2SeqAttackInputData, Args: attack_input_data: Original Seq2SeqAttackInputData - privacy_report_metadata: the metadata of the model under attack. test_fraction: Fraction of the dataset to include in the test split. balance: Whether the training and test sets for the membership inference attacker should have a balanced (roughly equal) number of samples from the training and test sets used to develop the model under attack. + privacy_report_metadata: the metadata of the model under attack. Returns: AttackerData. @@ -297,7 +298,6 @@ def create_seq2seq_attacker_data(attack_input_data: Seq2SeqAttackInputData, features_all, labels_all, test_size=test_fraction, stratify=labels_all) # Populate accuracy, loss fields in privacy report metadata - privacy_report_metadata = privacy_report_metadata or PrivacyReportMetadata() privacy_report_metadata.loss_train = loss_train privacy_report_metadata.loss_test = loss_test privacy_report_metadata.accuracy_train = accuracy_train From 31c747cdd82947b04d8f14385b4531ba572c301d Mon Sep 17 00:00:00 2001 From: amad-person Date: Wed, 2 Dec 2020 21:17:33 +0800 Subject: [PATCH 5/5] Use hard-coded attack input for the metadata calculation test --- .../seq2seq_mia_test.py | 122 ++++++++++++------ 1 file changed, 80 insertions(+), 42 deletions(-) diff --git a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py index 650f696..aef28d8 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/seq2seq_mia_test.py @@ -99,48 +99,41 @@ class Seq2SeqTrainedAttackerTest(absltest.TestCase): np.array([ np.array([[0.1, 0.1, 0.8], [0.7, 0.3, 0]], dtype=np.float32), np.array([[0.4, 0.5, 0.1]], dtype=np.float32) - ], - dtype=object), + ], dtype=object), np.array( [np.array([[0.25, 0.6, 0.15], [1, 0, 0]], dtype=np.float32)], dtype=object), np.array([ np.array([[0.9, 0, 0.1], [0.25, 0.5, 0.25]], dtype=np.float32), np.array([[0, 1, 0], [0.2, 0.1, 0.7]], dtype=np.float32) - ], - dtype=object) + ], dtype=object) ]), logits_test=iter([ np.array([ np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32) - ], - dtype=object), + ], dtype=object), np.array([ np.array([[0.3, 0.3, 0.4], [0.4, 0.4, 0.2]], dtype=np.float32), np.array([[0.3, 0.35, 0.35]], dtype=np.float32) - ], - dtype=object) + ], dtype=object) ]), labels_train=iter([ np.array([ np.array([2, 0], dtype=np.float32), np.array([1], dtype=np.float32) - ], - dtype=object), + ], dtype=object), np.array([np.array([1, 0], dtype=np.float32)], dtype=object), np.array([ np.array([0, 1], dtype=np.float32), np.array([1, 2], dtype=np.float32) - ], - dtype=object) + ], dtype=object) ]), labels_test=iter([ np.array([np.array([2, 1], dtype=np.float32)]), np.array([ np.array([2, 0], dtype=np.float32), np.array([1], dtype=np.float32) - ], - dtype=object) + ], dtype=object) ]), vocab_size=3, train_size=3, @@ -168,52 +161,44 @@ class Seq2SeqTrainedAttackerTest(absltest.TestCase): np.array([ np.array([[0.1, 0.1, 0.8], [0.7, 0.3, 0]], dtype=np.float32), np.array([[0.4, 0.5, 0.1]], dtype=np.float32) - ], - dtype=object), + ], dtype=object), np.array( [np.array([[0.25, 0.6, 0.15], [1, 0, 0]], dtype=np.float32)], dtype=object), np.array([ np.array([[0.9, 0, 0.1], [0.25, 0.5, 0.25]], dtype=np.float32), np.array([[0, 1, 0], [0.2, 0.1, 0.7]], dtype=np.float32) - ], - dtype=object) + ], dtype=object) ]), logits_test=iter([ np.array([ np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32) - ], - dtype=object), + ], dtype=object), np.array([ np.array([[0.3, 0.3, 0.4], [0.4, 0.4, 0.2]], dtype=np.float32), np.array([[0.3, 0.35, 0.35]], dtype=np.float32) - ], - dtype=object), + ], dtype=object), np.array([ np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32) - ], - dtype=object) + ], dtype=object) ]), labels_train=iter([ np.array([ np.array([2, 0], dtype=np.float32), np.array([1], dtype=np.float32) - ], - dtype=object), + ], dtype=object), np.array([np.array([1, 0], dtype=np.float32)], dtype=object), np.array([ np.array([0, 1], dtype=np.float32), np.array([1, 2], dtype=np.float32) - ], - dtype=object) + ], dtype=object) ]), labels_test=iter([ np.array([np.array([2, 1], dtype=np.float32)]), np.array([ np.array([2, 0], dtype=np.float32), np.array([1], dtype=np.float32) - ], - dtype=object), + ], dtype=object), np.array([np.array([2, 1], dtype=np.float32)]) ]), vocab_size=3, @@ -335,19 +320,72 @@ class RunSeq2SeqAttackTest(absltest.TestCase): seq2seq_result.roc_curve.get_auc(), 0.63, decimal=2) def test_run_seq2seq_attack_calculates_correct_metadata(self): - result = run_seq2seq_attack(get_seq2seq_test_input( - n_train=20, - n_test=10, - max_seq_in_batch=3, - max_tokens_in_sequence=5, - vocab_size=3, - seed=12345), - balance_attacker_training=False) + attack_input = Seq2SeqAttackInputData( + logits_train=iter([ + np.array([ + np.array([[0.1, 0.1, 0.8], [0.7, 0.3, 0]], dtype=np.float32), + np.array([[0.4, 0.5, 0.1]], dtype=np.float32) + ], dtype=object), + np.array( + [np.array([[0.25, 0.6, 0.15], [1, 0, 0]], dtype=np.float32)], + dtype=object), + np.array([ + np.array([[0.9, 0, 0.1], [0.25, 0.5, 0.25]], dtype=np.float32), + np.array([[0, 1, 0], [0.2, 0.1, 0.7]], dtype=np.float32) + ], dtype=object), + np.array([ + np.array([[0.9, 0, 0.1], [0.25, 0.5, 0.25]], dtype=np.float32), + np.array([[0, 1, 0], [0.2, 0.1, 0.7]], dtype=np.float32) + ], dtype=object) + ]), + logits_test=iter([ + np.array([ + np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32) + ], dtype=object), + np.array([ + np.array([[0.3, 0.3, 0.4], [0.4, 0.4, 0.2]], dtype=np.float32), + np.array([[0.3, 0.35, 0.35]], dtype=np.float32) + ], dtype=object), + np.array([ + np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32) + ], dtype=object), + np.array([ + np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32) + ], dtype=object) + ]), + labels_train=iter([ + np.array([ + np.array([2, 0], dtype=np.float32), + np.array([1], dtype=np.float32) + ], dtype=object), + np.array([np.array([1, 0], dtype=np.float32)], dtype=object), + np.array([ + np.array([0, 1], dtype=np.float32), + np.array([1, 2], dtype=np.float32) + ], dtype=object), + np.array([ + np.array([0, 0], dtype=np.float32), + np.array([0, 1], dtype=np.float32) + ], dtype=object) + ]), + labels_test=iter([ + np.array([np.array([2, 1], dtype=np.float32)]), + np.array([ + np.array([2, 0], dtype=np.float32), + np.array([1], dtype=np.float32) + ], dtype=object), + np.array([np.array([2, 1], dtype=np.float32)]), + np.array([np.array([2, 1], dtype=np.float32)]), + ]), + vocab_size=3, + train_size=4, + test_size=4) + result = run_seq2seq_attack(attack_input, balance_attacker_training=False) metadata = result.privacy_report_metadata - np.testing.assert_almost_equal(metadata.loss_train, 2.08, decimal=2) - np.testing.assert_almost_equal(metadata.loss_test, 2.02, decimal=2) - np.testing.assert_almost_equal(metadata.accuracy_train, 0.40, decimal=2) - np.testing.assert_almost_equal(metadata.accuracy_test, 0.34, decimal=2) + np.testing.assert_almost_equal(metadata.loss_train, 0.91, decimal=2) + np.testing.assert_almost_equal(metadata.loss_test, 1.58, decimal=2) + np.testing.assert_almost_equal(metadata.accuracy_train, 0.77, decimal=2) + np.testing.assert_almost_equal(metadata.accuracy_test, 0.67, decimal=2) if __name__ == '__main__':