Refactor seq2seq logic and tests into separate files

This commit is contained in:
amad-person 2020-11-24 14:52:12 +08:00
parent 35a8096173
commit 46bee91cda
9 changed files with 582 additions and 526 deletions

View file

@ -1142,8 +1142,8 @@
}
],
"source": [
"from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia\n",
"from tensorflow_privacy.privacy.membership_inference_attack.data_structures import Seq2SeqAttackInputData\n",
"from tensorflow_privacy.privacy.membership_inference_attack.seq2seq_mia import Seq2SeqAttackInputData, \\\n",
" run_seq2seq_attack\n",
"import tensorflow_privacy.privacy.membership_inference_attack.plotting as plotting\n",
"\n",
"attack_input = Seq2SeqAttackInputData(\n",
@ -1157,7 +1157,7 @@
")\n",
"\n",
"# Run several attacks for different data slices\n",
"attack_result = mia.run_seq2seq_attack(attack_input)\n",
"attack_result = run_seq2seq_attack(attack_input)\n",
"\n",
"# Plot the ROC curve of the best classifier\n",
"fig = plotting.plot_roc_curve(attack_result.get_result_with_max_auc().roc_curve)\n",

View file

@ -18,7 +18,7 @@ import enum
import glob
import os
import pickle
from typing import Any, Iterable, Union, Iterator
from typing import Any, Iterable, Union
from dataclasses import dataclass
import numpy as np
@ -378,91 +378,6 @@ def _append_array_shape(arr: np.array, arr_name: str, result):
result.append(' %s with shape: %s,' % (arr_name, arr.shape))
def _is_iterator(obj, obj_name):
"""Checks whether obj is a generator."""
if obj is not None and not isinstance(obj, Iterator):
raise ValueError('%s should be a generator.' % obj_name)
@dataclass
class Seq2SeqAttackInputData:
"""Input data for running an attack on seq2seq models.
This includes only the data, and not configuration.
"""
logits_train: Iterator[np.ndarray] = None
logits_test: Iterator[np.ndarray] = None
# Contains ground-truth token indices for the target sequences.
labels_train: Iterator[np.ndarray] = None
labels_test: Iterator[np.ndarray] = None
# Size of the target sequence vocabulary.
vocab_size: int = None
# Train, test size = number of batches in training, test set.
# These values need to be supplied by the user as logits, labels
# are lazy loaded for seq2seq models.
train_size: int = 0
test_size: int = 0
def validate(self):
"""Validates the inputs."""
if (self.logits_train is None) != (self.logits_test is None):
raise ValueError(
'logits_train and logits_test should both be either set or unset')
if (self.labels_train is None) != (self.labels_test is None):
raise ValueError(
'labels_train and labels_test should both be either set or unset')
if self.logits_train is None or self.labels_train is None:
raise ValueError(
'Labels, logits of training, test sets should all be set')
if (self.vocab_size is None or self.train_size is None or
self.test_size is None):
raise ValueError('vocab_size, train_size, test_size should all be set')
if self.vocab_size is not None and not int:
raise ValueError('vocab_size should be of integer type')
if self.train_size is not None and not int:
raise ValueError('train_size should be of integer type')
if self.test_size is not None and not int:
raise ValueError('test_size should be of integer type')
_is_iterator(self.logits_train, 'logits_train')
_is_iterator(self.logits_test, 'logits_test')
_is_iterator(self.labels_train, 'labels_train')
_is_iterator(self.labels_test, 'labels_test')
def __str__(self):
"""Return the shapes of variables that are not None."""
result = ['AttackInputData(']
if self.vocab_size is not None and self.train_size is not None:
result.append(
'logits_train with shape (%d, num_sequences, num_tokens, %d)' %
(self.train_size, self.vocab_size))
result.append(
'labels_train with shape (%d, num_sequences, num_tokens, 1)' %
self.train_size)
if self.vocab_size is not None and self.test_size is not None:
result.append(
'logits_test with shape (%d, num_sequences, num_tokens, %d)' %
(self.test_size, self.vocab_size))
result.append(
'labels_test with shape (%d, num_sequences, num_tokens, 1)' %
self.test_size)
result.append(')')
return '\n'.join(result)
@dataclass
class RocCurve:
"""Represents ROC curve of a membership inference classifier."""

View file

@ -27,7 +27,6 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RocCurve
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import Seq2SeqAttackInputData
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature
@ -153,75 +152,6 @@ class AttackInputDataTest(absltest.TestCase):
probs_test=np.array([])).validate)
class Seq2SeqAttackInputDataTest(absltest.TestCase):
def test_validator(self):
valid_logits_train = iter([np.array([]), np.array([])])
valid_logits_test = iter([np.array([]), np.array([])])
valid_labels_train = iter([np.array([]), np.array([])])
valid_labels_test = iter([np.array([]), np.array([])])
invalid_logits_train = []
invalid_logits_test = []
invalid_labels_train = []
invalid_labels_test = []
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(logits_train=valid_logits_train).validate)
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(labels_train=valid_labels_train).validate)
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(logits_test=valid_logits_test).validate)
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(labels_test=valid_labels_test).validate)
self.assertRaises(ValueError, Seq2SeqAttackInputData(vocab_size=0).validate)
self.assertRaises(ValueError, Seq2SeqAttackInputData(train_size=0).validate)
self.assertRaises(ValueError, Seq2SeqAttackInputData(test_size=0).validate)
self.assertRaises(ValueError, Seq2SeqAttackInputData().validate)
# Tests that both logits and labels must be set.
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(
logits_train=valid_logits_train,
logits_test=valid_logits_test,
vocab_size=0,
train_size=0,
test_size=0).validate)
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(
labels_train=valid_labels_train,
labels_test=valid_labels_test,
vocab_size=0,
train_size=0,
test_size=0).validate)
# Tests that vocab, train, test sizes must all be set.
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(
logits_train=valid_logits_train,
logits_test=valid_logits_test,
labels_train=valid_labels_train,
labels_test=valid_labels_test).validate)
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(
logits_train=invalid_logits_train,
logits_test=invalid_logits_test,
labels_train=invalid_labels_train,
labels_test=invalid_labels_test,
vocab_size=0,
train_size=0,
test_size=0).validate)
class RocCurveTest(absltest.TestCase):
def test_auc_random_classifier(self):

View file

@ -30,7 +30,6 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import \
PrivacyReportMetadata
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RocCurve
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import Seq2SeqAttackInputData
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
@ -171,54 +170,6 @@ def run_attacks(attack_input: AttackInputData,
privacy_report_metadata=privacy_report_metadata)
def run_seq2seq_attack(attack_input: Seq2SeqAttackInputData,
unused_report_metadata: PrivacyReportMetadata = None,
balance_attacker_training: bool = True) -> AttackResults:
"""Runs membership inference attacks on a seq2seq model.
Args:
attack_input: input data for running an attack
unused_report_metadata: the metadata of the model under attack.
balance_attacker_training: Whether the training and test sets for the
membership inference attacker should have a balanced (roughly equal)
number of samples from the training and test sets used to develop the
model under attack.
Returns:
the attack result.
"""
attack_input.validate()
# The attacker uses the average rank (a single number) of a seq2seq dataset
# record to determine membership. So only Logistic Regression is supported,
# as it makes the most sense for single-number features.
attacker = models.LogisticRegressionAttacker()
prepared_attacker_data = models.create_seq2seq_attacker_data(
attack_input, balance=balance_attacker_training)
attacker.train_model(prepared_attacker_data.features_train,
prepared_attacker_data.is_training_labels_train)
# Run the attacker on (permuted) test examples.
predictions_test = attacker.predict(prepared_attacker_data.features_test)
# Generate ROC curves with predictions.
fpr, tpr, thresholds = metrics.roc_curve(
prepared_attacker_data.is_training_labels_test, predictions_test)
roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)
attack_results = [
SingleAttackResult(
slice_spec=SingleSliceSpec(),
attack_type=AttackType.LOGISTIC_REGRESSION,
roc_curve=roc_curve)
]
return AttackResults(single_attack_results=attack_results)
def _compute_missing_privacy_report_metadata(
metadata: PrivacyReportMetadata,
attack_input: AttackInputData) -> PrivacyReportMetadata:

View file

@ -16,10 +16,10 @@
"""Tests for tensorflow_privacy.privacy.membership_inference_attack.utils."""
from absl.testing import absltest
import numpy as np
from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import Seq2SeqAttackInputData
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
@ -35,68 +35,6 @@ def get_test_input(n_train, n_test):
labels_test=np.array([i % 5 for i in range(n_test)]))
def get_seq2seq_test_input(n_train,
n_test,
max_seq_in_batch,
max_tokens_in_sequence,
vocab_size,
seed=None):
"""Returns example inputs for attacks on seq2seq models."""
if seed is not None:
np.random.seed(seed=seed)
logits_train, labels_train = [], []
for _ in range(n_train):
num_sequences = np.random.choice(max_seq_in_batch, 1)[0] + 1
batch_logits, batch_labels = _get_batch_logits_and_labels(
num_sequences, max_tokens_in_sequence, vocab_size)
logits_train.append(batch_logits)
labels_train.append(batch_labels)
logits_test, labels_test = [], []
for _ in range(n_test):
num_sequences = np.random.choice(max_seq_in_batch, 1)[0] + 1
batch_logits, batch_labels = _get_batch_logits_and_labels(
num_sequences, max_tokens_in_sequence, vocab_size)
logits_test.append(batch_logits)
labels_test.append(batch_labels)
return Seq2SeqAttackInputData(
logits_train=iter(logits_train),
logits_test=iter(logits_test),
labels_train=iter(labels_train),
labels_test=iter(labels_test),
vocab_size=vocab_size,
train_size=n_train,
test_size=n_test)
def _get_batch_logits_and_labels(num_sequences, max_tokens_in_sequence,
vocab_size):
num_tokens_in_sequence = np.random.choice(max_tokens_in_sequence,
num_sequences) + 1
batch_logits, batch_labels = [], []
for num_tokens in num_tokens_in_sequence:
logits, labels = _get_sequence_logits_and_labels(num_tokens, vocab_size)
batch_logits.append(logits)
batch_labels.append(labels)
return np.array(
batch_logits, dtype=object), np.array(
batch_labels, dtype=object)
def _get_sequence_logits_and_labels(num_tokens, vocab_size):
sequence_logits = []
for _ in range(num_tokens):
token_logits = np.random.random(vocab_size)
token_logits /= token_logits.sum()
sequence_logits.append(token_logits)
sequence_labels = np.random.choice(vocab_size, num_tokens)
return np.array(
sequence_logits, dtype=np.float32), np.array(
sequence_labels, dtype=np.float32)
class RunAttacksTest(absltest.TestCase):
def test_run_attacks_size(self):
@ -160,42 +98,6 @@ class RunAttacksTest(absltest.TestCase):
# If accuracy is already present, simply return it.
self.assertIsNone(mia._get_accuracy(None, labels))
def test_run_seq2seq_attack_size(self):
result = mia.run_seq2seq_attack(
get_seq2seq_test_input(
n_train=10,
n_test=5,
max_seq_in_batch=3,
max_tokens_in_sequence=5,
vocab_size=2))
self.assertLen(result.single_attack_results, 1)
def test_run_seq2seq_attack_trained_sets_attack_type(self):
result = mia.run_seq2seq_attack(
get_seq2seq_test_input(
n_train=10,
n_test=5,
max_seq_in_batch=3,
max_tokens_in_sequence=5,
vocab_size=2))
seq2seq_result = list(result.single_attack_results)[0]
self.assertEqual(seq2seq_result.attack_type, AttackType.LOGISTIC_REGRESSION)
def test_run_seq2seq_attack_calculates_correct_auc(self):
result = mia.run_seq2seq_attack(
get_seq2seq_test_input(
n_train=20,
n_test=10,
max_seq_in_batch=3,
max_tokens_in_sequence=5,
vocab_size=3,
seed=12345),
balance_attacker_training=False)
seq2seq_result = list(result.single_attack_results)[0]
np.testing.assert_almost_equal(
seq2seq_result.roc_curve.get_auc(), 0.63, decimal=2)
if __name__ == '__main__':
absltest.main()

View file

@ -15,11 +15,8 @@
# Lint as: python3
"""Trained models for membership inference attacks."""
from typing import Iterator, List
from dataclasses import dataclass
import numpy as np
from scipy.stats import rankdata
from sklearn import ensemble
from sklearn import linear_model
from sklearn import model_selection
@ -27,7 +24,6 @@ from sklearn import neighbors
from sklearn import neural_network
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import Seq2SeqAttackInputData
@dataclass
@ -114,98 +110,6 @@ def _column_stack(logits, loss):
return np.column_stack((logits, loss))
def create_seq2seq_attacker_data(attack_input_data: Seq2SeqAttackInputData,
test_fraction: float = 0.25,
balance: bool = True) -> AttackerData:
"""Prepare Seq2SeqAttackInputData to train ML attackers.
Uses logits and losses to generate ranks and performs a random train-test
split.
Args:
attack_input_data: Original Seq2SeqAttackInputData
test_fraction: Fraction of the dataset to include in the test split.
balance: Whether the training and test sets for the membership inference
attacker should have a balanced (roughly equal) number of samples from the
training and test sets used to develop the model under attack.
Returns:
AttackerData.
"""
attack_input_train = _get_average_ranks(attack_input_data.logits_train,
attack_input_data.labels_train)
attack_input_test = _get_average_ranks(attack_input_data.logits_test,
attack_input_data.labels_test)
if balance:
min_size = min(len(attack_input_train), len(attack_input_test))
attack_input_train = _sample_multidimensional_array(attack_input_train,
min_size)
attack_input_test = _sample_multidimensional_array(attack_input_test,
min_size)
features_all = np.concatenate((attack_input_train, attack_input_test))
# Reshape for classifying one-dimensional features
features_all = features_all.reshape(-1, 1)
labels_all = np.concatenate(
((np.zeros(len(attack_input_train))), (np.ones(len(attack_input_test)))))
# Perform a train-test split
features_train, features_test, \
is_training_labels_train, is_training_labels_test = \
model_selection.train_test_split(
features_all, labels_all, test_size=test_fraction, stratify=labels_all)
return AttackerData(features_train, is_training_labels_train, features_test,
is_training_labels_test)
def _get_average_ranks(logits: Iterator[np.ndarray],
labels: Iterator[np.ndarray]) -> np.ndarray:
"""Returns the average rank of tokens in a batch of sequences.
Args:
logits: Logits returned by a seq2seq model, dim = (num_batches,
num_sequences, num_tokens, vocab_size).
labels: Target labels for the seq2seq model, dim = (num_batches,
num_sequences, num_tokens, 1).
Returns:
An array of average ranks, dim = (num_batches, 1).
Each average rank is calculated over ranks of tokens in sequences of a
particular batch.
"""
ranks = []
for batch_logits, batch_labels in zip(logits, labels):
batch_ranks = []
for sequence_logits, sequence_labels in zip(batch_logits, batch_labels):
batch_ranks += _get_ranks_for_sequence(sequence_logits, sequence_labels)
ranks.append(np.mean(batch_ranks))
return np.array(ranks)
def _get_ranks_for_sequence(logits: np.ndarray,
labels: np.ndarray) -> List[float]:
"""Returns ranks for a sequence.
Args:
logits: Logits of a single sequence, dim = (num_tokens, vocab_size).
labels: Target labels of a single sequence, dim = (num_tokens, 1).
Returns:
An array of ranks for tokens in the sequence, dim = (num_tokens, 1).
"""
sequence_ranks = []
for logit, label in zip(logits, labels.astype(int)):
rank = rankdata(-logit, method='min')[label] - 1.0
sequence_ranks.append(rank)
return sequence_ranks
class TrainedAttacker:
"""Base class for training attack models."""
model = None

View file

@ -19,7 +19,6 @@ import numpy as np
from tensorflow_privacy.privacy.membership_inference_attack import models
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import Seq2SeqAttackInputData
class TrainedAttackerTest(absltest.TestCase):
@ -56,65 +55,6 @@ class TrainedAttackerTest(absltest.TestCase):
expected = feature[:2] not in attack_input.logits_train
self.assertEqual(attacker_data.is_training_labels_train[i], expected)
def test_create_seq2seq_attacker_data_logits_and_labels(self):
attack_input = Seq2SeqAttackInputData(
logits_train=iter([
np.array([
np.array([[0.1, 0.1, 0.8], [0.7, 0.3, 0]], dtype=np.float32),
np.array([[0.4, 0.5, 0.1]], dtype=np.float32)
],
dtype=object),
np.array(
[np.array([[0.25, 0.6, 0.15], [1, 0, 0]], dtype=np.float32)],
dtype=object),
np.array([
np.array([[0.9, 0, 0.1], [0.25, 0.5, 0.25]], dtype=np.float32),
np.array([[0, 1, 0], [0.2, 0.1, 0.7]], dtype=np.float32)
],
dtype=object)
]),
logits_test=iter([
np.array([
np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32)
],
dtype=object),
np.array([
np.array([[0.3, 0.3, 0.4], [0.4, 0.4, 0.2]], dtype=np.float32),
np.array([[0.3, 0.35, 0.35]], dtype=np.float32)
],
dtype=object)
]),
labels_train=iter([
np.array([
np.array([2, 0], dtype=np.float32),
np.array([1], dtype=np.float32)
],
dtype=object),
np.array([np.array([1, 0], dtype=np.float32)], dtype=object),
np.array([
np.array([0, 1], dtype=np.float32),
np.array([1, 2], dtype=np.float32)
],
dtype=object)
]),
labels_test=iter([
np.array([np.array([2, 1], dtype=np.float32)]),
np.array([
np.array([2, 0], dtype=np.float32),
np.array([1], dtype=np.float32)
],
dtype=object)
]),
vocab_size=3,
train_size=3,
test_size=2)
attacker_data = models.create_seq2seq_attacker_data(
attack_input, 0.25, balance=False)
self.assertLen(attacker_data.features_train, 3)
self.assertLen(attacker_data.features_test, 2)
for _, feature in enumerate(attacker_data.features_train):
self.assertLen(feature, 1) # each feature has one average rank
def test_balanced_create_attacker_data_loss_and_logits(self):
attack_input = AttackInputData(
@ -131,70 +71,7 @@ class TrainedAttackerTest(absltest.TestCase):
expected = feature[:2] not in attack_input.logits_train
self.assertEqual(attacker_data.is_training_labels_train[i], expected)
def test_balanced_create_seq2seq_attacker_data_logits_and_labels(self):
attack_input = Seq2SeqAttackInputData(
logits_train=iter([
np.array([
np.array([[0.1, 0.1, 0.8], [0.7, 0.3, 0]], dtype=np.float32),
np.array([[0.4, 0.5, 0.1]], dtype=np.float32)
],
dtype=object),
np.array(
[np.array([[0.25, 0.6, 0.15], [1, 0, 0]], dtype=np.float32)],
dtype=object),
np.array([
np.array([[0.9, 0, 0.1], [0.25, 0.5, 0.25]], dtype=np.float32),
np.array([[0, 1, 0], [0.2, 0.1, 0.7]], dtype=np.float32)
],
dtype=object)
]),
logits_test=iter([
np.array([
np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32)
],
dtype=object),
np.array([
np.array([[0.3, 0.3, 0.4], [0.4, 0.4, 0.2]], dtype=np.float32),
np.array([[0.3, 0.35, 0.35]], dtype=np.float32)
],
dtype=object),
np.array([
np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32)
],
dtype=object)
]),
labels_train=iter([
np.array([
np.array([2, 0], dtype=np.float32),
np.array([1], dtype=np.float32)
],
dtype=object),
np.array([np.array([1, 0], dtype=np.float32)], dtype=object),
np.array([
np.array([0, 1], dtype=np.float32),
np.array([1, 2], dtype=np.float32)
],
dtype=object)
]),
labels_test=iter([
np.array([np.array([2, 1], dtype=np.float32)]),
np.array([
np.array([2, 0], dtype=np.float32),
np.array([1], dtype=np.float32)
],
dtype=object),
np.array([np.array([2, 1], dtype=np.float32)])
]),
vocab_size=3,
train_size=3,
test_size=3)
attacker_data = models.create_seq2seq_attacker_data(
attack_input, 0.33, balance=True)
self.assertLen(attacker_data.features_train, 4)
self.assertLen(attacker_data.features_test, 2)
for _, feature in enumerate(attacker_data.features_train):
self.assertLen(feature, 1) # each feature has one average rank
if __name__ == '__main__':

View file

@ -0,0 +1,257 @@
# Copyright 2020, The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Code for membership inference attacks on seq2seq models.
Contains seq2seq specific logic for attack data structures, attack data generation,
and the logistic regression membership inference attack.
"""
from typing import Iterator, List
import numpy as np
from dataclasses import dataclass
from scipy.stats import rankdata
from sklearn import metrics, model_selection
from tensorflow_privacy.privacy.membership_inference_attack import models
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata, AttackResults, \
RocCurve, SingleAttackResult, SingleSliceSpec, AttackType
from tensorflow_privacy.privacy.membership_inference_attack.models import AttackerData, _sample_multidimensional_array
def _is_iterator(obj, obj_name):
"""Checks whether obj is a generator."""
if obj is not None and not isinstance(obj, Iterator):
raise ValueError('%s should be a generator.' % obj_name)
@dataclass
class Seq2SeqAttackInputData:
"""Input data for running an attack on seq2seq models.
This includes only the data, and not configuration.
"""
logits_train: Iterator[np.ndarray] = None
logits_test: Iterator[np.ndarray] = None
# Contains ground-truth token indices for the target sequences.
labels_train: Iterator[np.ndarray] = None
labels_test: Iterator[np.ndarray] = None
# Size of the target sequence vocabulary.
vocab_size: int = None
# Train, test size = number of batches in training, test set.
# These values need to be supplied by the user as logits, labels
# are lazy loaded for seq2seq models.
train_size: int = 0
test_size: int = 0
def validate(self):
"""Validates the inputs."""
if (self.logits_train is None) != (self.logits_test is None):
raise ValueError(
'logits_train and logits_test should both be either set or unset')
if (self.labels_train is None) != (self.labels_test is None):
raise ValueError(
'labels_train and labels_test should both be either set or unset')
if self.logits_train is None or self.labels_train is None:
raise ValueError(
'Labels, logits of training, test sets should all be set')
if (self.vocab_size is None or self.train_size is None or
self.test_size is None):
raise ValueError('vocab_size, train_size, test_size should all be set')
if self.vocab_size is not None and not int:
raise ValueError('vocab_size should be of integer type')
if self.train_size is not None and not int:
raise ValueError('train_size should be of integer type')
if self.test_size is not None and not int:
raise ValueError('test_size should be of integer type')
_is_iterator(self.logits_train, 'logits_train')
_is_iterator(self.logits_test, 'logits_test')
_is_iterator(self.labels_train, 'labels_train')
_is_iterator(self.labels_test, 'labels_test')
def __str__(self):
"""Return the shapes of variables that are not None."""
result = ['AttackInputData(']
if self.vocab_size is not None and self.train_size is not None:
result.append(
'logits_train with shape (%d, num_sequences, num_tokens, %d)' %
(self.train_size, self.vocab_size))
result.append(
'labels_train with shape (%d, num_sequences, num_tokens, 1)' %
self.train_size)
if self.vocab_size is not None and self.test_size is not None:
result.append(
'logits_test with shape (%d, num_sequences, num_tokens, %d)' %
(self.test_size, self.vocab_size))
result.append(
'labels_test with shape (%d, num_sequences, num_tokens, 1)' %
self.test_size)
result.append(')')
return '\n'.join(result)
def _get_average_ranks(logits: Iterator[np.ndarray],
labels: Iterator[np.ndarray]) -> np.ndarray:
"""Returns the average rank of tokens in a batch of sequences.
Args:
logits: Logits returned by a seq2seq model, dim = (num_batches,
num_sequences, num_tokens, vocab_size).
labels: Target labels for the seq2seq model, dim = (num_batches,
num_sequences, num_tokens, 1).
Returns:
An array of average ranks, dim = (num_batches, 1).
Each average rank is calculated over ranks of tokens in sequences of a
particular batch.
"""
ranks = []
for batch_logits, batch_labels in zip(logits, labels):
batch_ranks = []
for sequence_logits, sequence_labels in zip(batch_logits, batch_labels):
batch_ranks += _get_ranks_for_sequence(sequence_logits, sequence_labels)
ranks.append(np.mean(batch_ranks))
return np.array(ranks)
def _get_ranks_for_sequence(logits: np.ndarray,
labels: np.ndarray) -> List[float]:
"""Returns ranks for a sequence.
Args:
logits: Logits of a single sequence, dim = (num_tokens, vocab_size).
labels: Target labels of a single sequence, dim = (num_tokens, 1).
Returns:
An array of ranks for tokens in the sequence, dim = (num_tokens, 1).
"""
sequence_ranks = []
for logit, label in zip(logits, labels.astype(int)):
rank = rankdata(-logit, method='min')[label] - 1.0
sequence_ranks.append(rank)
return sequence_ranks
def create_seq2seq_attacker_data(attack_input_data: Seq2SeqAttackInputData,
test_fraction: float = 0.25,
balance: bool = True) -> AttackerData:
"""Prepare Seq2SeqAttackInputData to train ML attackers.
Uses logits and losses to generate ranks and performs a random train-test
split.
Args:
attack_input_data: Original Seq2SeqAttackInputData
test_fraction: Fraction of the dataset to include in the test split.
balance: Whether the training and test sets for the membership inference
attacker should have a balanced (roughly equal) number of samples from the
training and test sets used to develop the model under attack.
Returns:
AttackerData.
"""
attack_input_train = _get_average_ranks(attack_input_data.logits_train,
attack_input_data.labels_train)
attack_input_test = _get_average_ranks(attack_input_data.logits_test,
attack_input_data.labels_test)
if balance:
min_size = min(len(attack_input_train), len(attack_input_test))
attack_input_train = _sample_multidimensional_array(attack_input_train,
min_size)
attack_input_test = _sample_multidimensional_array(attack_input_test,
min_size)
features_all = np.concatenate((attack_input_train, attack_input_test))
# Reshape for classifying one-dimensional features
features_all = features_all.reshape(-1, 1)
labels_all = np.concatenate(
((np.zeros(len(attack_input_train))), (np.ones(len(attack_input_test)))))
# Perform a train-test split
features_train, features_test, \
is_training_labels_train, is_training_labels_test = \
model_selection.train_test_split(
features_all, labels_all, test_size=test_fraction, stratify=labels_all)
return AttackerData(features_train, is_training_labels_train, features_test,
is_training_labels_test)
def run_seq2seq_attack(attack_input: Seq2SeqAttackInputData,
unused_report_metadata: PrivacyReportMetadata = None,
balance_attacker_training: bool = True) -> AttackResults:
"""Runs membership inference attacks on a seq2seq model.
Args:
attack_input: input data for running an attack
unused_report_metadata: the metadata of the model under attack.
balance_attacker_training: Whether the training and test sets for the
membership inference attacker should have a balanced (roughly equal)
number of samples from the training and test sets used to develop the
model under attack.
Returns:
the attack result.
"""
attack_input.validate()
# The attacker uses the average rank (a single number) of a seq2seq dataset
# record to determine membership. So only Logistic Regression is supported,
# as it makes the most sense for single-number features.
attacker = models.LogisticRegressionAttacker()
prepared_attacker_data = create_seq2seq_attacker_data(
attack_input, balance=balance_attacker_training)
attacker.train_model(prepared_attacker_data.features_train,
prepared_attacker_data.is_training_labels_train)
# Run the attacker on (permuted) test examples.
predictions_test = attacker.predict(prepared_attacker_data.features_test)
# Generate ROC curves with predictions.
fpr, tpr, thresholds = metrics.roc_curve(
prepared_attacker_data.is_training_labels_test, predictions_test)
roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)
attack_results = [
SingleAttackResult(
slice_spec=SingleSliceSpec(),
attack_type=AttackType.LOGISTIC_REGRESSION,
roc_curve=roc_curve)
]
return AttackResults(single_attack_results=attack_results)

View file

@ -0,0 +1,320 @@
# Copyright 2020, The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for tensorflow_privacy.privacy.membership_inference_attack.seq2seq_mia."""
from absl.testing import absltest
import numpy as np
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType
from tensorflow_privacy.privacy.membership_inference_attack.seq2seq_mia import Seq2SeqAttackInputData, \
create_seq2seq_attacker_data, run_seq2seq_attack
class Seq2SeqAttackInputDataTest(absltest.TestCase):
def test_validator(self):
valid_logits_train = iter([np.array([]), np.array([])])
valid_logits_test = iter([np.array([]), np.array([])])
valid_labels_train = iter([np.array([]), np.array([])])
valid_labels_test = iter([np.array([]), np.array([])])
invalid_logits_train = []
invalid_logits_test = []
invalid_labels_train = []
invalid_labels_test = []
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(logits_train=valid_logits_train).validate)
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(labels_train=valid_labels_train).validate)
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(logits_test=valid_logits_test).validate)
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(labels_test=valid_labels_test).validate)
self.assertRaises(ValueError, Seq2SeqAttackInputData(vocab_size=0).validate)
self.assertRaises(ValueError, Seq2SeqAttackInputData(train_size=0).validate)
self.assertRaises(ValueError, Seq2SeqAttackInputData(test_size=0).validate)
self.assertRaises(ValueError, Seq2SeqAttackInputData().validate)
# Tests that both logits and labels must be set.
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(
logits_train=valid_logits_train,
logits_test=valid_logits_test,
vocab_size=0,
train_size=0,
test_size=0).validate)
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(
labels_train=valid_labels_train,
labels_test=valid_labels_test,
vocab_size=0,
train_size=0,
test_size=0).validate)
# Tests that vocab, train, test sizes must all be set.
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(
logits_train=valid_logits_train,
logits_test=valid_logits_test,
labels_train=valid_labels_train,
labels_test=valid_labels_test).validate)
self.assertRaises(
ValueError,
Seq2SeqAttackInputData(
logits_train=invalid_logits_train,
logits_test=invalid_logits_test,
labels_train=invalid_labels_train,
labels_test=invalid_labels_test,
vocab_size=0,
train_size=0,
test_size=0).validate)
class Seq2SeqTrainedAttackerTest(absltest.TestCase):
def test_create_seq2seq_attacker_data_logits_and_labels(self):
attack_input = Seq2SeqAttackInputData(
logits_train=iter([
np.array([
np.array([[0.1, 0.1, 0.8], [0.7, 0.3, 0]], dtype=np.float32),
np.array([[0.4, 0.5, 0.1]], dtype=np.float32)
],
dtype=object),
np.array(
[np.array([[0.25, 0.6, 0.15], [1, 0, 0]], dtype=np.float32)],
dtype=object),
np.array([
np.array([[0.9, 0, 0.1], [0.25, 0.5, 0.25]], dtype=np.float32),
np.array([[0, 1, 0], [0.2, 0.1, 0.7]], dtype=np.float32)
],
dtype=object)
]),
logits_test=iter([
np.array([
np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32)
],
dtype=object),
np.array([
np.array([[0.3, 0.3, 0.4], [0.4, 0.4, 0.2]], dtype=np.float32),
np.array([[0.3, 0.35, 0.35]], dtype=np.float32)
],
dtype=object)
]),
labels_train=iter([
np.array([
np.array([2, 0], dtype=np.float32),
np.array([1], dtype=np.float32)
],
dtype=object),
np.array([np.array([1, 0], dtype=np.float32)], dtype=object),
np.array([
np.array([0, 1], dtype=np.float32),
np.array([1, 2], dtype=np.float32)
],
dtype=object)
]),
labels_test=iter([
np.array([np.array([2, 1], dtype=np.float32)]),
np.array([
np.array([2, 0], dtype=np.float32),
np.array([1], dtype=np.float32)
],
dtype=object)
]),
vocab_size=3,
train_size=3,
test_size=2)
attacker_data = create_seq2seq_attacker_data(attack_input, 0.25, balance=False)
self.assertLen(attacker_data.features_train, 3)
self.assertLen(attacker_data.features_test, 2)
for _, feature in enumerate(attacker_data.features_train):
self.assertLen(feature, 1) # each feature has one average rank
def test_balanced_create_seq2seq_attacker_data_logits_and_labels(self):
attack_input = Seq2SeqAttackInputData(
logits_train=iter([
np.array([
np.array([[0.1, 0.1, 0.8], [0.7, 0.3, 0]], dtype=np.float32),
np.array([[0.4, 0.5, 0.1]], dtype=np.float32)
],
dtype=object),
np.array(
[np.array([[0.25, 0.6, 0.15], [1, 0, 0]], dtype=np.float32)],
dtype=object),
np.array([
np.array([[0.9, 0, 0.1], [0.25, 0.5, 0.25]], dtype=np.float32),
np.array([[0, 1, 0], [0.2, 0.1, 0.7]], dtype=np.float32)
],
dtype=object)
]),
logits_test=iter([
np.array([
np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32)
],
dtype=object),
np.array([
np.array([[0.3, 0.3, 0.4], [0.4, 0.4, 0.2]], dtype=np.float32),
np.array([[0.3, 0.35, 0.35]], dtype=np.float32)
],
dtype=object),
np.array([
np.array([[0.25, 0.4, 0.35], [0.2, 0.4, 0.4]], dtype=np.float32)
],
dtype=object)
]),
labels_train=iter([
np.array([
np.array([2, 0], dtype=np.float32),
np.array([1], dtype=np.float32)
],
dtype=object),
np.array([np.array([1, 0], dtype=np.float32)], dtype=object),
np.array([
np.array([0, 1], dtype=np.float32),
np.array([1, 2], dtype=np.float32)
],
dtype=object)
]),
labels_test=iter([
np.array([np.array([2, 1], dtype=np.float32)]),
np.array([
np.array([2, 0], dtype=np.float32),
np.array([1], dtype=np.float32)
],
dtype=object),
np.array([np.array([2, 1], dtype=np.float32)])
]),
vocab_size=3,
train_size=3,
test_size=3)
attacker_data = create_seq2seq_attacker_data(attack_input, 0.33, balance=True)
self.assertLen(attacker_data.features_train, 4)
self.assertLen(attacker_data.features_test, 2)
for _, feature in enumerate(attacker_data.features_train):
self.assertLen(feature, 1) # each feature has one average rank
def _get_batch_logits_and_labels(num_sequences, max_tokens_in_sequence,
vocab_size):
num_tokens_in_sequence = np.random.choice(max_tokens_in_sequence,
num_sequences) + 1
batch_logits, batch_labels = [], []
for num_tokens in num_tokens_in_sequence:
logits, labels = _get_sequence_logits_and_labels(num_tokens, vocab_size)
batch_logits.append(logits)
batch_labels.append(labels)
return np.array(
batch_logits, dtype=object), np.array(
batch_labels, dtype=object)
def _get_sequence_logits_and_labels(num_tokens, vocab_size):
sequence_logits = []
for _ in range(num_tokens):
token_logits = np.random.random(vocab_size)
token_logits /= token_logits.sum()
sequence_logits.append(token_logits)
sequence_labels = np.random.choice(vocab_size, num_tokens)
return np.array(
sequence_logits, dtype=np.float32), np.array(
sequence_labels, dtype=np.float32)
def get_seq2seq_test_input(n_train,
n_test,
max_seq_in_batch,
max_tokens_in_sequence,
vocab_size,
seed=None):
"""Returns example inputs for attacks on seq2seq models."""
if seed is not None:
np.random.seed(seed=seed)
logits_train, labels_train = [], []
for _ in range(n_train):
num_sequences = np.random.choice(max_seq_in_batch, 1)[0] + 1
batch_logits, batch_labels = _get_batch_logits_and_labels(
num_sequences, max_tokens_in_sequence, vocab_size)
logits_train.append(batch_logits)
labels_train.append(batch_labels)
logits_test, labels_test = [], []
for _ in range(n_test):
num_sequences = np.random.choice(max_seq_in_batch, 1)[0] + 1
batch_logits, batch_labels = _get_batch_logits_and_labels(
num_sequences, max_tokens_in_sequence, vocab_size)
logits_test.append(batch_logits)
labels_test.append(batch_labels)
return Seq2SeqAttackInputData(
logits_train=iter(logits_train),
logits_test=iter(logits_test),
labels_train=iter(labels_train),
labels_test=iter(labels_test),
vocab_size=vocab_size,
train_size=n_train,
test_size=n_test)
class RunSeq2SeqAttackTest(absltest.TestCase):
def test_run_seq2seq_attack_size(self):
result = run_seq2seq_attack(get_seq2seq_test_input(
n_train=10,
n_test=5,
max_seq_in_batch=3,
max_tokens_in_sequence=5,
vocab_size=2))
self.assertLen(result.single_attack_results, 1)
def test_run_seq2seq_attack_trained_sets_attack_type(self):
result = run_seq2seq_attack(get_seq2seq_test_input(
n_train=10,
n_test=5,
max_seq_in_batch=3,
max_tokens_in_sequence=5,
vocab_size=2))
seq2seq_result = list(result.single_attack_results)[0]
self.assertEqual(seq2seq_result.attack_type, AttackType.LOGISTIC_REGRESSION)
def test_run_seq2seq_attack_calculates_correct_auc(self):
result = run_seq2seq_attack(get_seq2seq_test_input(
n_train=20,
n_test=10,
max_seq_in_batch=3,
max_tokens_in_sequence=5,
vocab_size=3,
seed=12345),
balance_attacker_training=False)
seq2seq_result = list(result.single_attack_results)[0]
np.testing.assert_almost_equal(
seq2seq_result.roc_curve.get_auc(), 0.63, decimal=2)
if __name__ == '__main__':
absltest.main()