This commit is contained in:
amad-person 2020-11-08 21:59:05 +08:00
parent ed2bdcadfa
commit afe3944b1d
3 changed files with 23 additions and 23 deletions

View file

@ -374,10 +374,10 @@ def _append_array_shape(arr: np.array, arr_name: str, result):
result.append(' %s with shape: %s,' % (arr_name, arr.shape)) result.append(' %s with shape: %s,' % (arr_name, arr.shape))
def _is_generator(gen, gen_name): def _is_iterator(obj, obj_name):
"""Checks whether gen is a generator.""" """Checks whether obj is a generator."""
if gen is not None and not isinstance(gen, Iterator): if obj is not None and not isinstance(obj, Iterator):
raise ValueError('%s should be a generator.' % gen_name) raise ValueError('%s should be a generator.' % obj_name)
@dataclass @dataclass
@ -393,7 +393,7 @@ class Seq2SeqAttackInputData:
labels_train: Iterator[np.ndarray] = None labels_train: Iterator[np.ndarray] = None
labels_test: Iterator[np.ndarray] = None labels_test: Iterator[np.ndarray] = None
# Denotes size of the target sequence vocabulary. # Size of the target sequence vocabulary.
vocab_size: int = None vocab_size: int = None
# Train, test size = number of batches in training, test set. # Train, test size = number of batches in training, test set.
@ -431,10 +431,10 @@ class Seq2SeqAttackInputData:
if self.test_size is not None and not int: if self.test_size is not None and not int:
raise ValueError('test_size should be of integer type') raise ValueError('test_size should be of integer type')
_is_generator(self.logits_train, 'logits_train') _is_iterator(self.logits_train, 'logits_train')
_is_generator(self.logits_test, 'logits_test') _is_iterator(self.logits_test, 'logits_test')
_is_generator(self.labels_train, 'labels_train') _is_iterator(self.labels_train, 'labels_train')
_is_generator(self.labels_test, 'labels_test') _is_iterator(self.labels_test, 'labels_test')
def __str__(self): def __str__(self):
"""Return the shapes of variables that are not None.""" """Return the shapes of variables that are not None."""

View file

@ -36,20 +36,18 @@ def get_test_input(n_train, n_test):
def get_seq2seq_test_input(n_train, n_test, max_seq_in_batch, max_tokens_in_sequence, vocab_size, seed=None): def get_seq2seq_test_input(n_train, n_test, max_seq_in_batch, max_tokens_in_sequence, vocab_size, seed=None):
"""Get example inputs for attacks on seq2seq models.""" """Returns example inputs for attacks on seq2seq models."""
if seed is not None: if seed is not None:
np.random.seed(seed=seed) np.random.seed(seed=seed)
logits_train = [] logits_train, labels_train = [], []
labels_train = []
for i in range(n_train): for i in range(n_train):
num_sequences = np.random.choice(max_seq_in_batch, 1)[0] + 1 num_sequences = np.random.choice(max_seq_in_batch, 1)[0] + 1
batch_logits, batch_labels = _get_batch_logits_and_labels(num_sequences, max_tokens_in_sequence, vocab_size) batch_logits, batch_labels = _get_batch_logits_and_labels(num_sequences, max_tokens_in_sequence, vocab_size)
logits_train.append(batch_logits) logits_train.append(batch_logits)
labels_train.append(batch_labels) labels_train.append(batch_labels)
logits_test = [] logits_test, labels_test = [], []
labels_test = []
for i in range(n_test): for i in range(n_test):
num_sequences = np.random.choice(max_seq_in_batch, 1)[0] + 1 num_sequences = np.random.choice(max_seq_in_batch, 1)[0] + 1
batch_logits, batch_labels = _get_batch_logits_and_labels(num_sequences, max_tokens_in_sequence, vocab_size) batch_logits, batch_labels = _get_batch_logits_and_labels(num_sequences, max_tokens_in_sequence, vocab_size)

View file

@ -23,6 +23,7 @@ from sklearn import linear_model
from sklearn import model_selection from sklearn import model_selection
from sklearn import neighbors from sklearn import neighbors
from sklearn import neural_network from sklearn import neural_network
from typing import Iterator, List
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import Seq2SeqAttackInputData from tensorflow_privacy.privacy.membership_inference_attack.data_structures import Seq2SeqAttackInputData
@ -134,8 +135,8 @@ def create_seq2seq_attacker_data(attack_input_data: Seq2SeqAttackInputData,
attack_input_test = _get_average_ranks(attack_input_data.logits_test, attack_input_data.labels_test) attack_input_test = _get_average_ranks(attack_input_data.logits_test, attack_input_data.labels_test)
if balance: if balance:
min_size = min(attack_input_data.train_size, min_size = min(len(attack_input_train),
attack_input_data.test_size) len(attack_input_test))
attack_input_train = _sample_multidimensional_array(attack_input_train, attack_input_train = _sample_multidimensional_array(attack_input_train,
min_size) min_size)
attack_input_test = _sample_multidimensional_array(attack_input_test, attack_input_test = _sample_multidimensional_array(attack_input_test,
@ -159,7 +160,8 @@ def create_seq2seq_attacker_data(attack_input_data: Seq2SeqAttackInputData,
is_training_labels_test) is_training_labels_test)
def _get_average_ranks(logits, labels): def _get_average_ranks(logits: Iterator[np.ndarray],
labels: Iterator[np.ndarray]) -> np.ndarray:
"""Returns the average rank of tokens in a batch of sequences. """Returns the average rank of tokens in a batch of sequences.
Args: Args:
@ -180,7 +182,8 @@ def _get_average_ranks(logits, labels):
return np.array(ranks) return np.array(ranks)
def _get_ranks_for_sequence(logits, labels): def _get_ranks_for_sequence(logits: np.ndarray,
labels: np.ndarray) -> List:
"""Returns ranks for a sequence. """Returns ranks for a sequence.
Args: Args:
@ -190,11 +193,10 @@ def _get_ranks_for_sequence(logits, labels):
Returns: Returns:
An array of ranks for tokens in the sequence, dim = (num_tokens, 1). An array of ranks for tokens in the sequence, dim = (num_tokens, 1).
""" """
scores = -logits sequence_ranks = []
all_ranks = np.empty_like(scores) for logit, label in zip(logits, labels.astype(int)):
for i, s in enumerate(scores): rank = rankdata(-logit, method='min')[label] - 1.0
all_ranks[i] = rankdata(s, method='min') - 1 sequence_ranks.append(rank)
sequence_ranks = all_ranks[np.arange(len(all_ranks)), labels.astype(int)].tolist()
return sequence_ranks return sequence_ranks