Adds per-example membership scores to trained attackers.
PiperOrigin-RevId: 431615160
This commit is contained in:
parent
a33afde0c1
commit
767788e9cf
5 changed files with 232 additions and 134 deletions
|
@ -23,6 +23,7 @@ from absl import app
|
|||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import scipy.stats
|
||||
from sklearn import metrics
|
||||
import tensorflow as tf
|
||||
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import data_structures
|
||||
|
@ -69,69 +70,69 @@ def generate_features_and_labels(samples_per_cluster=250, scale=0.1):
|
|||
return (features, labels)
|
||||
|
||||
|
||||
# Hint: Play with "noise_scale" for different levels of overlap between
|
||||
# the generated clusters. More noise makes the classification harder.
|
||||
noise_scale = 2
|
||||
training_features, training_labels = generate_features_and_labels(
|
||||
samples_per_cluster=250, scale=noise_scale)
|
||||
test_features, test_labels = generate_features_and_labels(
|
||||
samples_per_cluster=250, scale=noise_scale)
|
||||
def get_models(num_clusters):
|
||||
"""Get the two models we will be using."""
|
||||
# Hint: play with the number of layers to achieve different level of
|
||||
# over-fitting and observe its effects on membership inference performance.
|
||||
three_layer_model = tf.keras.Sequential([
|
||||
tf.keras.layers.Dense(300, activation="relu"),
|
||||
tf.keras.layers.Dense(300, activation="relu"),
|
||||
tf.keras.layers.Dense(300, activation="relu"),
|
||||
tf.keras.layers.Dense(num_clusters, activation="relu"),
|
||||
tf.keras.layers.Softmax()
|
||||
])
|
||||
three_layer_model.compile(
|
||||
optimizer="adam",
|
||||
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
|
||||
metrics=["accuracy"])
|
||||
|
||||
num_clusters = int(round(np.max(training_labels))) + 1
|
||||
|
||||
# Hint: play with the number of layers to achieve different level of
|
||||
# over-fitting and observe its effects on membership inference performance.
|
||||
three_layer_model = tf.keras.Sequential([
|
||||
tf.keras.layers.Dense(300, activation="relu"),
|
||||
tf.keras.layers.Dense(300, activation="relu"),
|
||||
tf.keras.layers.Dense(300, activation="relu"),
|
||||
tf.keras.layers.Dense(num_clusters, activation="relu"),
|
||||
tf.keras.layers.Softmax()
|
||||
])
|
||||
three_layer_model.compile(
|
||||
optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
|
||||
|
||||
two_layer_model = tf.keras.Sequential([
|
||||
tf.keras.layers.Dense(300, activation="relu"),
|
||||
tf.keras.layers.Dense(300, activation="relu"),
|
||||
tf.keras.layers.Dense(num_clusters, activation="relu"),
|
||||
tf.keras.layers.Softmax()
|
||||
])
|
||||
two_layer_model.compile(
|
||||
optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
|
||||
|
||||
|
||||
def crossentropy(true_labels, predictions):
|
||||
return tf.keras.backend.eval(
|
||||
tf.keras.metrics.binary_crossentropy(
|
||||
tf.keras.backend.variable(
|
||||
tf.keras.utils.to_categorical(true_labels, num_clusters)),
|
||||
tf.keras.backend.variable(predictions)))
|
||||
two_layer_model = tf.keras.Sequential([
|
||||
tf.keras.layers.Dense(300, activation="relu"),
|
||||
tf.keras.layers.Dense(300, activation="relu"),
|
||||
tf.keras.layers.Dense(num_clusters, activation="relu"),
|
||||
tf.keras.layers.Softmax()
|
||||
])
|
||||
two_layer_model.compile(
|
||||
optimizer="adam",
|
||||
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
|
||||
metrics=["accuracy"])
|
||||
return three_layer_model, two_layer_model
|
||||
|
||||
|
||||
def main(unused_argv):
|
||||
epoch_results = data_structures.AttackResultsCollection([])
|
||||
# Hint: Play with "noise_scale" for different levels of overlap between
|
||||
# the generated clusters. More noise makes the classification harder.
|
||||
noise_scale = 2
|
||||
training_features, training_labels = generate_features_and_labels(
|
||||
samples_per_cluster=250, scale=noise_scale)
|
||||
test_features, test_labels = generate_features_and_labels(
|
||||
samples_per_cluster=250, scale=noise_scale)
|
||||
|
||||
num_epochs = 2
|
||||
num_clusters = int(round(np.max(training_labels))) + 1
|
||||
|
||||
three_layer_model, two_layer_model = get_models(num_clusters)
|
||||
models = {
|
||||
"two layer model": two_layer_model,
|
||||
"three layer model": three_layer_model,
|
||||
"two_layer_model": two_layer_model,
|
||||
"three_layer_model": three_layer_model,
|
||||
}
|
||||
for model_name in models:
|
||||
# Incrementally train the model and store privacy metrics every num_epochs.
|
||||
for i in range(1, 6):
|
||||
models[model_name].fit(
|
||||
|
||||
num_epochs_per_round = 20
|
||||
epoch_results = data_structures.AttackResultsCollection([])
|
||||
for model_name, model in models.items():
|
||||
print(f"Train {model_name}.")
|
||||
# Incrementally train the model and store privacy metrics
|
||||
# every num_epochs_per_round.
|
||||
for i in range(5):
|
||||
model.fit(
|
||||
training_features,
|
||||
tf.keras.utils.to_categorical(training_labels, num_clusters),
|
||||
validation_data=(test_features,
|
||||
tf.keras.utils.to_categorical(
|
||||
test_labels, num_clusters)),
|
||||
training_labels,
|
||||
validation_data=(test_features, test_labels),
|
||||
batch_size=64,
|
||||
epochs=num_epochs,
|
||||
epochs=num_epochs_per_round,
|
||||
shuffle=True)
|
||||
|
||||
training_pred = models[model_name].predict(training_features)
|
||||
test_pred = models[model_name].predict(test_features)
|
||||
training_pred = model.predict(training_features)
|
||||
test_pred = model.predict(test_features)
|
||||
|
||||
# Add metadata to generate a privacy report.
|
||||
privacy_report_metadata = data_structures.PrivacyReportMetadata(
|
||||
|
@ -139,7 +140,7 @@ def main(unused_argv):
|
|||
training_labels, np.argmax(training_pred, axis=1)),
|
||||
accuracy_test=metrics.accuracy_score(test_labels,
|
||||
np.argmax(test_pred, axis=1)),
|
||||
epoch_num=num_epochs * i,
|
||||
epoch_num=num_epochs_per_round * (i + 1),
|
||||
model_variant_label=model_name)
|
||||
|
||||
attack_results = mia.run_attacks(
|
||||
|
@ -147,9 +148,7 @@ def main(unused_argv):
|
|||
labels_train=training_labels,
|
||||
labels_test=test_labels,
|
||||
probs_train=training_pred,
|
||||
probs_test=test_pred,
|
||||
loss_train=crossentropy(training_labels, training_pred),
|
||||
loss_test=crossentropy(test_labels, test_pred)),
|
||||
probs_test=test_pred),
|
||||
data_structures.SlicingSpec(entire_dataset=True, by_class=True),
|
||||
attack_types=(data_structures.AttackType.THRESHOLD_ATTACK,
|
||||
data_structures.AttackType.LOGISTIC_REGRESSION),
|
||||
|
@ -216,6 +215,39 @@ def main(unused_argv):
|
|||
# For saving a figure into a file:
|
||||
# plotting.save_plot(figure, <file_path>)
|
||||
|
||||
# Let's look at the per-example membership scores. We'll look at how the
|
||||
# scores from threshold and logistic regression attackers correlate.
|
||||
|
||||
# We take the MIA result of the final three layer model
|
||||
sample_model = epoch_results.attack_results_list[-1]
|
||||
print("We will look at the membership scores of",
|
||||
sample_model.privacy_report_metadata.model_variant_label, "at epoch",
|
||||
sample_model.privacy_report_metadata.epoch_num)
|
||||
sample_results = sample_model.single_attack_results
|
||||
|
||||
# The first two entries of sample_results are from the threshold and
|
||||
# logistic regression attackers on the whole dataset.
|
||||
print("Correlation between the scores of the following two attackers:", "\n ",
|
||||
sample_results[0].slice_spec, sample_results[0].attack_type, "\n ",
|
||||
sample_results[1].slice_spec, sample_results[1].attack_type)
|
||||
threshold_results = np.concatenate( # scores by threshold attacker
|
||||
(sample_results[0].membership_scores_train,
|
||||
sample_results[0].membership_scores_test))
|
||||
lr_results = np.concatenate( # scores by logistic regression attacker
|
||||
(sample_results[1].membership_scores_train,
|
||||
sample_results[1].membership_scores_test))
|
||||
|
||||
# Order the scores and plot them
|
||||
threshold_orders = scipy.stats.rankdata(threshold_results)
|
||||
lr_orders = scipy.stats.rankdata(lr_results)
|
||||
|
||||
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(5, 5))
|
||||
axes.scatter(threshold_orders, lr_orders, alpha=0.2, linewidths=0)
|
||||
m, b = np.polyfit(threshold_orders, lr_orders, 1) # linear fit
|
||||
axes.plot(threshold_orders, m * threshold_orders + b, color="orange")
|
||||
axes.set_aspect("equal", adjustable="box")
|
||||
fig.show()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(main)
|
||||
|
|
|
@ -21,6 +21,7 @@ from typing import Iterable
|
|||
|
||||
import numpy as np
|
||||
from sklearn import metrics
|
||||
from sklearn import model_selection
|
||||
|
||||
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import models
|
||||
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData
|
||||
|
@ -44,49 +45,61 @@ def _get_slice_spec(data: AttackInputData) -> SingleSliceSpec:
|
|||
return SingleSliceSpec()
|
||||
|
||||
|
||||
# TODO(b/220394926): Allow users to specify their own attack models.
|
||||
def _run_trained_attack(attack_input: AttackInputData,
|
||||
attack_type: AttackType,
|
||||
balance_attacker_training: bool = True):
|
||||
balance_attacker_training: bool = True,
|
||||
cross_validation_folds: int = 2):
|
||||
"""Classification attack done by ML models."""
|
||||
attacker = None
|
||||
|
||||
if attack_type == AttackType.LOGISTIC_REGRESSION:
|
||||
attacker = models.LogisticRegressionAttacker()
|
||||
elif attack_type == AttackType.MULTI_LAYERED_PERCEPTRON:
|
||||
attacker = models.MultilayerPerceptronAttacker()
|
||||
elif attack_type == AttackType.RANDOM_FOREST:
|
||||
attacker = models.RandomForestAttacker()
|
||||
elif attack_type == AttackType.K_NEAREST_NEIGHBORS:
|
||||
attacker = models.KNearestNeighborsAttacker()
|
||||
else:
|
||||
raise NotImplementedError('Attack type %s not implemented yet.' %
|
||||
attack_type)
|
||||
|
||||
prepared_attacker_data = models.create_attacker_data(
|
||||
attack_input, balance=balance_attacker_training)
|
||||
indices = prepared_attacker_data.fold_indices
|
||||
left_out_indices = prepared_attacker_data.left_out_indices
|
||||
features = prepared_attacker_data.features_all
|
||||
labels = prepared_attacker_data.labels_all
|
||||
|
||||
attacker.train_model(prepared_attacker_data.features_train,
|
||||
prepared_attacker_data.is_training_labels_train)
|
||||
# We are going to train multiple models on disjoint subsets of the data
|
||||
# (`features`, `labels`), so we can get the membership scores of all samples,
|
||||
# and each example gets its score assigned only once.
|
||||
# An alternative implementation is to train multiple models on overlapping
|
||||
# subsets of the data, and take an average to get the score for each sample.
|
||||
# `scores` will record the membership score of each sample, initialized to nan
|
||||
scores = np.full(features.shape[0], np.nan)
|
||||
|
||||
# Run the attacker on (permuted) test examples.
|
||||
predictions_test = attacker.predict(prepared_attacker_data.features_test)
|
||||
# We use StratifiedKFold to create disjoint subsets of samples. Notice that
|
||||
# the index it returns is with respect to the samples shuffled with `indices`.
|
||||
kf = model_selection.StratifiedKFold(cross_validation_folds, shuffle=False)
|
||||
for train_indices_in_shuffled, test_indices_in_shuffled in kf.split(
|
||||
features[indices], labels[indices]):
|
||||
# `train_indices_in_shuffled` is with respect to the data shuffled with
|
||||
# `indices`. We convert it to `train_indices` to work with the original
|
||||
# data (`features` and 'labels').
|
||||
train_indices = indices[train_indices_in_shuffled]
|
||||
test_indices = indices[test_indices_in_shuffled]
|
||||
# Make sure one sample only got score predicted once
|
||||
assert np.all(np.isnan(scores[test_indices]))
|
||||
|
||||
# Generate ROC curves with predictions.
|
||||
fpr, tpr, thresholds = metrics.roc_curve(
|
||||
prepared_attacker_data.is_training_labels_test, predictions_test)
|
||||
attacker = models.create_attacker(attack_type)
|
||||
attacker.train_model(features[train_indices], labels[train_indices])
|
||||
scores[test_indices] = attacker.predict(features[test_indices])
|
||||
|
||||
# Predict the left out with the last attacker
|
||||
if left_out_indices.size:
|
||||
assert np.all(np.isnan(scores[left_out_indices]))
|
||||
scores[left_out_indices] = attacker.predict(features[left_out_indices])
|
||||
assert not np.any(np.isnan(scores))
|
||||
|
||||
# Generate ROC curves with scores.
|
||||
fpr, tpr, thresholds = metrics.roc_curve(labels, scores)
|
||||
roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)
|
||||
|
||||
# NOTE: In the current setup we can't obtain membership scores for all
|
||||
# samples, since some of them were used to train the attacker. This can be
|
||||
# fixed by training several attackers to ensure each sample was left out
|
||||
# in exactly one attacker (basically, this means performing cross-validation).
|
||||
# TODO(b/175870479): Implement membership scores for predicted attackers.
|
||||
|
||||
in_train_indices = (labels == 0)
|
||||
return SingleAttackResult(
|
||||
slice_spec=_get_slice_spec(attack_input),
|
||||
data_size=prepared_attacker_data.data_size,
|
||||
attack_type=attack_type,
|
||||
membership_scores_train=scores[in_train_indices],
|
||||
membership_scores_test=scores[~in_train_indices],
|
||||
roc_curve=roc_curve)
|
||||
|
||||
|
||||
|
@ -107,8 +120,8 @@ def _run_threshold_attack(attack_input: AttackInputData):
|
|||
slice_spec=_get_slice_spec(attack_input),
|
||||
data_size=DataSize(ntrain=ntrain, ntest=ntest),
|
||||
attack_type=AttackType.THRESHOLD_ATTACK,
|
||||
membership_scores_train=-attack_input.get_loss_train(),
|
||||
membership_scores_test=-attack_input.get_loss_test(),
|
||||
membership_scores_train=attack_input.get_loss_train(),
|
||||
membership_scores_test=attack_input.get_loss_test(),
|
||||
roc_curve=roc_curve)
|
||||
|
||||
|
||||
|
|
|
@ -90,6 +90,31 @@ class RunAttacksTest(absltest.TestCase):
|
|||
self.assertLen(result.membership_scores_train, 100)
|
||||
self.assertLen(result.membership_scores_test, 50)
|
||||
|
||||
def test_run_attack_trained_sets_membership_scores(self):
|
||||
attack_input = AttackInputData(
|
||||
logits_train=np.tile([500., -500.], (100, 1)),
|
||||
logits_test=np.tile([0., 0.], (50, 1)))
|
||||
|
||||
result = mia._run_trained_attack(
|
||||
attack_input,
|
||||
AttackType.LOGISTIC_REGRESSION,
|
||||
balance_attacker_training=True)
|
||||
self.assertLen(result.membership_scores_train, 100)
|
||||
self.assertLen(result.membership_scores_test, 50)
|
||||
|
||||
# Scores for all training (resp. test) examples should be close
|
||||
np.testing.assert_allclose(
|
||||
result.membership_scores_train,
|
||||
result.membership_scores_train[0],
|
||||
rtol=1e-3)
|
||||
np.testing.assert_allclose(
|
||||
result.membership_scores_test,
|
||||
result.membership_scores_test[0],
|
||||
rtol=1e-3)
|
||||
# Training score should be smaller than test score
|
||||
self.assertLess(result.membership_scores_train[0],
|
||||
result.membership_scores_test[0])
|
||||
|
||||
def test_run_attack_threshold_calculates_correct_auc(self):
|
||||
result = mia._run_attack(
|
||||
AttackInputData(
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
|
||||
import dataclasses
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
from sklearn import ensemble
|
||||
from sklearn import linear_model
|
||||
|
@ -23,30 +22,34 @@ from sklearn import model_selection
|
|||
from sklearn import neighbors
|
||||
from sklearn import neural_network
|
||||
|
||||
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData
|
||||
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import DataSize
|
||||
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import data_structures
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class AttackerData:
|
||||
"""Input data for an ML classifier attack.
|
||||
|
||||
This includes only the data, and not configuration.
|
||||
Labels in this class correspond to whether an example was in the
|
||||
train or test set.
|
||||
"""
|
||||
# Features of in-training and out-of-training examples.
|
||||
features_all: Optional[np.ndarray] = None
|
||||
# Indicator for whether the example is in-training (0) or out-of-training (1).
|
||||
labels_all: Optional[np.ndarray] = None
|
||||
|
||||
features_train: Optional[np.ndarray] = None
|
||||
# element-wise boolean array denoting if the example was part of training.
|
||||
is_training_labels_train: Optional[np.ndarray] = None
|
||||
# Indices for `features_all` and `labels_all` that are going to be used for
|
||||
# training the attackers.
|
||||
fold_indices: Optional[np.ndarray] = None
|
||||
|
||||
features_test: Optional[np.ndarray] = None
|
||||
# element-wise boolean array denoting if the example was part of training.
|
||||
is_training_labels_test: Optional[np.ndarray] = None
|
||||
# Indices for `features_all` and `labels_all` that were left out due to
|
||||
# balancing. Disjoint from `fold_indices`.
|
||||
left_out_indices: Optional[np.ndarray] = None
|
||||
|
||||
data_size: Optional[DataSize] = None
|
||||
# Number of in-training and out-of-training examples.
|
||||
data_size: Optional[data_structures.DataSize] = None
|
||||
|
||||
|
||||
def create_attacker_data(attack_input_data: AttackInputData,
|
||||
test_fraction: float = 0.25,
|
||||
def create_attacker_data(attack_input_data: data_structures.AttackInputData,
|
||||
balance: bool = True) -> AttackerData:
|
||||
"""Prepare AttackInputData to train ML attackers.
|
||||
|
||||
|
@ -54,7 +57,6 @@ def create_attacker_data(attack_input_data: AttackInputData,
|
|||
|
||||
Args:
|
||||
attack_input_data: Original AttackInputData
|
||||
test_fraction: Fraction of the dataset to include in the test split.
|
||||
balance: Whether the training and test sets for the membership inference
|
||||
attacker should have a balanced (roughly equal) number of samples from the
|
||||
training and test sets used to develop the model under attack.
|
||||
|
@ -67,25 +69,49 @@ def create_attacker_data(attack_input_data: AttackInputData,
|
|||
attack_input_test = _column_stack(attack_input_data.logits_or_probs_test,
|
||||
attack_input_data.get_loss_test())
|
||||
|
||||
if balance:
|
||||
min_size = min(attack_input_data.get_train_size(),
|
||||
attack_input_data.get_test_size())
|
||||
attack_input_train = _sample_multidimensional_array(attack_input_train,
|
||||
min_size)
|
||||
attack_input_test = _sample_multidimensional_array(attack_input_test,
|
||||
min_size)
|
||||
ntrain, ntest = attack_input_train.shape[0], attack_input_test.shape[0]
|
||||
|
||||
features_all = np.concatenate((attack_input_train, attack_input_test))
|
||||
labels_all = np.concatenate((np.zeros(ntrain), np.ones(ntest)))
|
||||
|
||||
labels_all = np.concatenate(((np.zeros(ntrain)), (np.ones(ntest))))
|
||||
fold_indices = np.arange(ntrain + ntest)
|
||||
left_out_indices = np.asarray([], dtype=np.int32)
|
||||
|
||||
# Perform a train-test split
|
||||
features_train, features_test, is_training_labels_train, is_training_labels_test = model_selection.train_test_split(
|
||||
features_all, labels_all, test_size=test_fraction, stratify=labels_all)
|
||||
return AttackerData(features_train, is_training_labels_train, features_test,
|
||||
is_training_labels_test,
|
||||
DataSize(ntrain=ntrain, ntest=ntest))
|
||||
if balance:
|
||||
idx_train, idx_test = range(ntrain), range(ntrain, ntrain + ntest)
|
||||
min_size = min(ntrain, ntest)
|
||||
if ntrain > min_size:
|
||||
left_out_size = ntrain - min_size
|
||||
perm_train = np.random.permutation(idx_train) # shuffle training
|
||||
left_out_indices = perm_train[:left_out_size]
|
||||
fold_indices = np.concatenate((perm_train[left_out_size:], idx_test))
|
||||
elif ntest > min_size:
|
||||
left_out_size = ntest - min_size
|
||||
perm_test = np.random.permutation(idx_test) # shuffle test
|
||||
left_out_indices = perm_test[:left_out_size]
|
||||
fold_indices = np.concatenate((perm_test[left_out_size:], idx_train))
|
||||
|
||||
# Shuffle indices for the downstream attackers.
|
||||
fold_indices = np.random.permutation(fold_indices)
|
||||
|
||||
return AttackerData(
|
||||
features_all=features_all,
|
||||
labels_all=labels_all,
|
||||
fold_indices=fold_indices,
|
||||
left_out_indices=left_out_indices,
|
||||
data_size=data_structures.DataSize(ntrain=ntrain, ntest=ntest))
|
||||
|
||||
|
||||
def create_attacker(attack_type):
|
||||
"""Returns the corresponding attacker for the provided attack_type."""
|
||||
if attack_type == data_structures.AttackType.LOGISTIC_REGRESSION:
|
||||
return LogisticRegressionAttacker()
|
||||
if attack_type == data_structures.AttackType.MULTI_LAYERED_PERCEPTRON:
|
||||
return MultilayerPerceptronAttacker()
|
||||
if attack_type == data_structures.AttackType.RANDOM_FOREST:
|
||||
return RandomForestAttacker()
|
||||
if attack_type == data_structures.AttackType.K_NEAREST_NEIGHBORS:
|
||||
return KNearestNeighborsAttacker()
|
||||
raise NotImplementedError('Attack type %s not implemented yet.' % attack_type)
|
||||
|
||||
|
||||
def _sample_multidimensional_array(array, size):
|
||||
|
|
|
@ -33,9 +33,8 @@ class TrainedAttackerTest(absltest.TestCase):
|
|||
def test_create_attacker_data_loss_only(self):
|
||||
attack_input = AttackInputData(
|
||||
loss_train=np.array([1, 3]), loss_test=np.array([2, 4]))
|
||||
attacker_data = models.create_attacker_data(attack_input, 0.5)
|
||||
self.assertLen(attacker_data.features_test, 2)
|
||||
self.assertLen(attacker_data.features_train, 2)
|
||||
attacker_data = models.create_attacker_data(attack_input, 2)
|
||||
self.assertLen(attacker_data.features_all, 4)
|
||||
|
||||
def test_create_attacker_data_loss_and_logits(self):
|
||||
attack_input = AttackInputData(
|
||||
|
@ -43,15 +42,22 @@ class TrainedAttackerTest(absltest.TestCase):
|
|||
logits_test=np.array([[10, 11], [14, 15]]),
|
||||
loss_train=np.array([3, 7, 10]),
|
||||
loss_test=np.array([12, 16]))
|
||||
attacker_data = models.create_attacker_data(
|
||||
attack_input, 0.25, balance=False)
|
||||
self.assertLen(attacker_data.features_test, 2)
|
||||
self.assertLen(attacker_data.features_train, 3)
|
||||
attacker_data = models.create_attacker_data(attack_input, balance=False)
|
||||
self.assertLen(attacker_data.features_all, 5)
|
||||
self.assertLen(attacker_data.fold_indices, 5)
|
||||
self.assertEmpty(attacker_data.left_out_indices)
|
||||
|
||||
for i, feature in enumerate(attacker_data.features_train):
|
||||
self.assertLen(feature, 3) # each feature has two logits and one loss
|
||||
expected = feature[:2] not in attack_input.logits_train
|
||||
self.assertEqual(attacker_data.is_training_labels_train[i], expected)
|
||||
def test_unbalanced_create_attacker_data_loss_and_logits(self):
|
||||
attack_input = AttackInputData(
|
||||
logits_train=np.array([[1, 2], [5, 6], [8, 9]]),
|
||||
logits_test=np.array([[10, 11], [14, 15]]),
|
||||
loss_train=np.array([3, 7, 10]),
|
||||
loss_test=np.array([12, 16]))
|
||||
attacker_data = models.create_attacker_data(attack_input, balance=True)
|
||||
self.assertLen(attacker_data.features_all, 5)
|
||||
self.assertLen(attacker_data.fold_indices, 4)
|
||||
self.assertLen(attacker_data.left_out_indices, 1)
|
||||
self.assertIn(attacker_data.left_out_indices[0], [0, 1, 2])
|
||||
|
||||
def test_balanced_create_attacker_data_loss_and_logits(self):
|
||||
attack_input = AttackInputData(
|
||||
|
@ -59,14 +65,10 @@ class TrainedAttackerTest(absltest.TestCase):
|
|||
logits_test=np.array([[10, 11], [14, 15], [17, 18]]),
|
||||
loss_train=np.array([3, 7, 10]),
|
||||
loss_test=np.array([12, 16, 19]))
|
||||
attacker_data = models.create_attacker_data(attack_input, 0.33)
|
||||
self.assertLen(attacker_data.features_test, 2)
|
||||
self.assertLen(attacker_data.features_train, 4)
|
||||
|
||||
for i, feature in enumerate(attacker_data.features_train):
|
||||
self.assertLen(feature, 3) # each feature has two logits and one loss
|
||||
expected = feature[:2] not in attack_input.logits_train
|
||||
self.assertEqual(attacker_data.is_training_labels_train[i], expected)
|
||||
attacker_data = models.create_attacker_data(attack_input)
|
||||
self.assertLen(attacker_data.features_all, 6)
|
||||
self.assertLen(attacker_data.fold_indices, 6)
|
||||
self.assertEmpty(attacker_data.left_out_indices)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Reference in a new issue