forked from 626_privacy/tensorflow_privacy
Adds per-example membership scores to trained attackers.
PiperOrigin-RevId: 431615160
This commit is contained in:
parent
a33afde0c1
commit
767788e9cf
5 changed files with 232 additions and 134 deletions
|
@ -23,6 +23,7 @@ from absl import app
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import scipy.stats
|
||||||
from sklearn import metrics
|
from sklearn import metrics
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import data_structures
|
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import data_structures
|
||||||
|
@ -69,69 +70,69 @@ def generate_features_and_labels(samples_per_cluster=250, scale=0.1):
|
||||||
return (features, labels)
|
return (features, labels)
|
||||||
|
|
||||||
|
|
||||||
# Hint: Play with "noise_scale" for different levels of overlap between
|
def get_models(num_clusters):
|
||||||
# the generated clusters. More noise makes the classification harder.
|
"""Get the two models we will be using."""
|
||||||
noise_scale = 2
|
# Hint: play with the number of layers to achieve different level of
|
||||||
training_features, training_labels = generate_features_and_labels(
|
# over-fitting and observe its effects on membership inference performance.
|
||||||
samples_per_cluster=250, scale=noise_scale)
|
three_layer_model = tf.keras.Sequential([
|
||||||
test_features, test_labels = generate_features_and_labels(
|
tf.keras.layers.Dense(300, activation="relu"),
|
||||||
samples_per_cluster=250, scale=noise_scale)
|
tf.keras.layers.Dense(300, activation="relu"),
|
||||||
|
tf.keras.layers.Dense(300, activation="relu"),
|
||||||
|
tf.keras.layers.Dense(num_clusters, activation="relu"),
|
||||||
|
tf.keras.layers.Softmax()
|
||||||
|
])
|
||||||
|
three_layer_model.compile(
|
||||||
|
optimizer="adam",
|
||||||
|
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
|
||||||
|
metrics=["accuracy"])
|
||||||
|
|
||||||
num_clusters = int(round(np.max(training_labels))) + 1
|
two_layer_model = tf.keras.Sequential([
|
||||||
|
tf.keras.layers.Dense(300, activation="relu"),
|
||||||
# Hint: play with the number of layers to achieve different level of
|
tf.keras.layers.Dense(300, activation="relu"),
|
||||||
# over-fitting and observe its effects on membership inference performance.
|
tf.keras.layers.Dense(num_clusters, activation="relu"),
|
||||||
three_layer_model = tf.keras.Sequential([
|
tf.keras.layers.Softmax()
|
||||||
tf.keras.layers.Dense(300, activation="relu"),
|
])
|
||||||
tf.keras.layers.Dense(300, activation="relu"),
|
two_layer_model.compile(
|
||||||
tf.keras.layers.Dense(300, activation="relu"),
|
optimizer="adam",
|
||||||
tf.keras.layers.Dense(num_clusters, activation="relu"),
|
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
|
||||||
tf.keras.layers.Softmax()
|
metrics=["accuracy"])
|
||||||
])
|
return three_layer_model, two_layer_model
|
||||||
three_layer_model.compile(
|
|
||||||
optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
|
|
||||||
|
|
||||||
two_layer_model = tf.keras.Sequential([
|
|
||||||
tf.keras.layers.Dense(300, activation="relu"),
|
|
||||||
tf.keras.layers.Dense(300, activation="relu"),
|
|
||||||
tf.keras.layers.Dense(num_clusters, activation="relu"),
|
|
||||||
tf.keras.layers.Softmax()
|
|
||||||
])
|
|
||||||
two_layer_model.compile(
|
|
||||||
optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
|
|
||||||
|
|
||||||
|
|
||||||
def crossentropy(true_labels, predictions):
|
|
||||||
return tf.keras.backend.eval(
|
|
||||||
tf.keras.metrics.binary_crossentropy(
|
|
||||||
tf.keras.backend.variable(
|
|
||||||
tf.keras.utils.to_categorical(true_labels, num_clusters)),
|
|
||||||
tf.keras.backend.variable(predictions)))
|
|
||||||
|
|
||||||
|
|
||||||
def main(unused_argv):
|
def main(unused_argv):
|
||||||
epoch_results = data_structures.AttackResultsCollection([])
|
# Hint: Play with "noise_scale" for different levels of overlap between
|
||||||
|
# the generated clusters. More noise makes the classification harder.
|
||||||
|
noise_scale = 2
|
||||||
|
training_features, training_labels = generate_features_and_labels(
|
||||||
|
samples_per_cluster=250, scale=noise_scale)
|
||||||
|
test_features, test_labels = generate_features_and_labels(
|
||||||
|
samples_per_cluster=250, scale=noise_scale)
|
||||||
|
|
||||||
num_epochs = 2
|
num_clusters = int(round(np.max(training_labels))) + 1
|
||||||
|
|
||||||
|
three_layer_model, two_layer_model = get_models(num_clusters)
|
||||||
models = {
|
models = {
|
||||||
"two layer model": two_layer_model,
|
"two_layer_model": two_layer_model,
|
||||||
"three layer model": three_layer_model,
|
"three_layer_model": three_layer_model,
|
||||||
}
|
}
|
||||||
for model_name in models:
|
|
||||||
# Incrementally train the model and store privacy metrics every num_epochs.
|
num_epochs_per_round = 20
|
||||||
for i in range(1, 6):
|
epoch_results = data_structures.AttackResultsCollection([])
|
||||||
models[model_name].fit(
|
for model_name, model in models.items():
|
||||||
|
print(f"Train {model_name}.")
|
||||||
|
# Incrementally train the model and store privacy metrics
|
||||||
|
# every num_epochs_per_round.
|
||||||
|
for i in range(5):
|
||||||
|
model.fit(
|
||||||
training_features,
|
training_features,
|
||||||
tf.keras.utils.to_categorical(training_labels, num_clusters),
|
training_labels,
|
||||||
validation_data=(test_features,
|
validation_data=(test_features, test_labels),
|
||||||
tf.keras.utils.to_categorical(
|
|
||||||
test_labels, num_clusters)),
|
|
||||||
batch_size=64,
|
batch_size=64,
|
||||||
epochs=num_epochs,
|
epochs=num_epochs_per_round,
|
||||||
shuffle=True)
|
shuffle=True)
|
||||||
|
|
||||||
training_pred = models[model_name].predict(training_features)
|
training_pred = model.predict(training_features)
|
||||||
test_pred = models[model_name].predict(test_features)
|
test_pred = model.predict(test_features)
|
||||||
|
|
||||||
# Add metadata to generate a privacy report.
|
# Add metadata to generate a privacy report.
|
||||||
privacy_report_metadata = data_structures.PrivacyReportMetadata(
|
privacy_report_metadata = data_structures.PrivacyReportMetadata(
|
||||||
|
@ -139,7 +140,7 @@ def main(unused_argv):
|
||||||
training_labels, np.argmax(training_pred, axis=1)),
|
training_labels, np.argmax(training_pred, axis=1)),
|
||||||
accuracy_test=metrics.accuracy_score(test_labels,
|
accuracy_test=metrics.accuracy_score(test_labels,
|
||||||
np.argmax(test_pred, axis=1)),
|
np.argmax(test_pred, axis=1)),
|
||||||
epoch_num=num_epochs * i,
|
epoch_num=num_epochs_per_round * (i + 1),
|
||||||
model_variant_label=model_name)
|
model_variant_label=model_name)
|
||||||
|
|
||||||
attack_results = mia.run_attacks(
|
attack_results = mia.run_attacks(
|
||||||
|
@ -147,9 +148,7 @@ def main(unused_argv):
|
||||||
labels_train=training_labels,
|
labels_train=training_labels,
|
||||||
labels_test=test_labels,
|
labels_test=test_labels,
|
||||||
probs_train=training_pred,
|
probs_train=training_pred,
|
||||||
probs_test=test_pred,
|
probs_test=test_pred),
|
||||||
loss_train=crossentropy(training_labels, training_pred),
|
|
||||||
loss_test=crossentropy(test_labels, test_pred)),
|
|
||||||
data_structures.SlicingSpec(entire_dataset=True, by_class=True),
|
data_structures.SlicingSpec(entire_dataset=True, by_class=True),
|
||||||
attack_types=(data_structures.AttackType.THRESHOLD_ATTACK,
|
attack_types=(data_structures.AttackType.THRESHOLD_ATTACK,
|
||||||
data_structures.AttackType.LOGISTIC_REGRESSION),
|
data_structures.AttackType.LOGISTIC_REGRESSION),
|
||||||
|
@ -216,6 +215,39 @@ def main(unused_argv):
|
||||||
# For saving a figure into a file:
|
# For saving a figure into a file:
|
||||||
# plotting.save_plot(figure, <file_path>)
|
# plotting.save_plot(figure, <file_path>)
|
||||||
|
|
||||||
|
# Let's look at the per-example membership scores. We'll look at how the
|
||||||
|
# scores from threshold and logistic regression attackers correlate.
|
||||||
|
|
||||||
|
# We take the MIA result of the final three layer model
|
||||||
|
sample_model = epoch_results.attack_results_list[-1]
|
||||||
|
print("We will look at the membership scores of",
|
||||||
|
sample_model.privacy_report_metadata.model_variant_label, "at epoch",
|
||||||
|
sample_model.privacy_report_metadata.epoch_num)
|
||||||
|
sample_results = sample_model.single_attack_results
|
||||||
|
|
||||||
|
# The first two entries of sample_results are from the threshold and
|
||||||
|
# logistic regression attackers on the whole dataset.
|
||||||
|
print("Correlation between the scores of the following two attackers:", "\n ",
|
||||||
|
sample_results[0].slice_spec, sample_results[0].attack_type, "\n ",
|
||||||
|
sample_results[1].slice_spec, sample_results[1].attack_type)
|
||||||
|
threshold_results = np.concatenate( # scores by threshold attacker
|
||||||
|
(sample_results[0].membership_scores_train,
|
||||||
|
sample_results[0].membership_scores_test))
|
||||||
|
lr_results = np.concatenate( # scores by logistic regression attacker
|
||||||
|
(sample_results[1].membership_scores_train,
|
||||||
|
sample_results[1].membership_scores_test))
|
||||||
|
|
||||||
|
# Order the scores and plot them
|
||||||
|
threshold_orders = scipy.stats.rankdata(threshold_results)
|
||||||
|
lr_orders = scipy.stats.rankdata(lr_results)
|
||||||
|
|
||||||
|
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(5, 5))
|
||||||
|
axes.scatter(threshold_orders, lr_orders, alpha=0.2, linewidths=0)
|
||||||
|
m, b = np.polyfit(threshold_orders, lr_orders, 1) # linear fit
|
||||||
|
axes.plot(threshold_orders, m * threshold_orders + b, color="orange")
|
||||||
|
axes.set_aspect("equal", adjustable="box")
|
||||||
|
fig.show()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.run(main)
|
app.run(main)
|
||||||
|
|
|
@ -21,6 +21,7 @@ from typing import Iterable
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn import metrics
|
from sklearn import metrics
|
||||||
|
from sklearn import model_selection
|
||||||
|
|
||||||
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import models
|
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import models
|
||||||
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData
|
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData
|
||||||
|
@ -44,49 +45,61 @@ def _get_slice_spec(data: AttackInputData) -> SingleSliceSpec:
|
||||||
return SingleSliceSpec()
|
return SingleSliceSpec()
|
||||||
|
|
||||||
|
|
||||||
|
# TODO(b/220394926): Allow users to specify their own attack models.
|
||||||
def _run_trained_attack(attack_input: AttackInputData,
|
def _run_trained_attack(attack_input: AttackInputData,
|
||||||
attack_type: AttackType,
|
attack_type: AttackType,
|
||||||
balance_attacker_training: bool = True):
|
balance_attacker_training: bool = True,
|
||||||
|
cross_validation_folds: int = 2):
|
||||||
"""Classification attack done by ML models."""
|
"""Classification attack done by ML models."""
|
||||||
attacker = None
|
|
||||||
|
|
||||||
if attack_type == AttackType.LOGISTIC_REGRESSION:
|
|
||||||
attacker = models.LogisticRegressionAttacker()
|
|
||||||
elif attack_type == AttackType.MULTI_LAYERED_PERCEPTRON:
|
|
||||||
attacker = models.MultilayerPerceptronAttacker()
|
|
||||||
elif attack_type == AttackType.RANDOM_FOREST:
|
|
||||||
attacker = models.RandomForestAttacker()
|
|
||||||
elif attack_type == AttackType.K_NEAREST_NEIGHBORS:
|
|
||||||
attacker = models.KNearestNeighborsAttacker()
|
|
||||||
else:
|
|
||||||
raise NotImplementedError('Attack type %s not implemented yet.' %
|
|
||||||
attack_type)
|
|
||||||
|
|
||||||
prepared_attacker_data = models.create_attacker_data(
|
prepared_attacker_data = models.create_attacker_data(
|
||||||
attack_input, balance=balance_attacker_training)
|
attack_input, balance=balance_attacker_training)
|
||||||
|
indices = prepared_attacker_data.fold_indices
|
||||||
|
left_out_indices = prepared_attacker_data.left_out_indices
|
||||||
|
features = prepared_attacker_data.features_all
|
||||||
|
labels = prepared_attacker_data.labels_all
|
||||||
|
|
||||||
attacker.train_model(prepared_attacker_data.features_train,
|
# We are going to train multiple models on disjoint subsets of the data
|
||||||
prepared_attacker_data.is_training_labels_train)
|
# (`features`, `labels`), so we can get the membership scores of all samples,
|
||||||
|
# and each example gets its score assigned only once.
|
||||||
|
# An alternative implementation is to train multiple models on overlapping
|
||||||
|
# subsets of the data, and take an average to get the score for each sample.
|
||||||
|
# `scores` will record the membership score of each sample, initialized to nan
|
||||||
|
scores = np.full(features.shape[0], np.nan)
|
||||||
|
|
||||||
# Run the attacker on (permuted) test examples.
|
# We use StratifiedKFold to create disjoint subsets of samples. Notice that
|
||||||
predictions_test = attacker.predict(prepared_attacker_data.features_test)
|
# the index it returns is with respect to the samples shuffled with `indices`.
|
||||||
|
kf = model_selection.StratifiedKFold(cross_validation_folds, shuffle=False)
|
||||||
|
for train_indices_in_shuffled, test_indices_in_shuffled in kf.split(
|
||||||
|
features[indices], labels[indices]):
|
||||||
|
# `train_indices_in_shuffled` is with respect to the data shuffled with
|
||||||
|
# `indices`. We convert it to `train_indices` to work with the original
|
||||||
|
# data (`features` and 'labels').
|
||||||
|
train_indices = indices[train_indices_in_shuffled]
|
||||||
|
test_indices = indices[test_indices_in_shuffled]
|
||||||
|
# Make sure one sample only got score predicted once
|
||||||
|
assert np.all(np.isnan(scores[test_indices]))
|
||||||
|
|
||||||
# Generate ROC curves with predictions.
|
attacker = models.create_attacker(attack_type)
|
||||||
fpr, tpr, thresholds = metrics.roc_curve(
|
attacker.train_model(features[train_indices], labels[train_indices])
|
||||||
prepared_attacker_data.is_training_labels_test, predictions_test)
|
scores[test_indices] = attacker.predict(features[test_indices])
|
||||||
|
|
||||||
|
# Predict the left out with the last attacker
|
||||||
|
if left_out_indices.size:
|
||||||
|
assert np.all(np.isnan(scores[left_out_indices]))
|
||||||
|
scores[left_out_indices] = attacker.predict(features[left_out_indices])
|
||||||
|
assert not np.any(np.isnan(scores))
|
||||||
|
|
||||||
|
# Generate ROC curves with scores.
|
||||||
|
fpr, tpr, thresholds = metrics.roc_curve(labels, scores)
|
||||||
roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)
|
roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)
|
||||||
|
|
||||||
# NOTE: In the current setup we can't obtain membership scores for all
|
in_train_indices = (labels == 0)
|
||||||
# samples, since some of them were used to train the attacker. This can be
|
|
||||||
# fixed by training several attackers to ensure each sample was left out
|
|
||||||
# in exactly one attacker (basically, this means performing cross-validation).
|
|
||||||
# TODO(b/175870479): Implement membership scores for predicted attackers.
|
|
||||||
|
|
||||||
return SingleAttackResult(
|
return SingleAttackResult(
|
||||||
slice_spec=_get_slice_spec(attack_input),
|
slice_spec=_get_slice_spec(attack_input),
|
||||||
data_size=prepared_attacker_data.data_size,
|
data_size=prepared_attacker_data.data_size,
|
||||||
attack_type=attack_type,
|
attack_type=attack_type,
|
||||||
|
membership_scores_train=scores[in_train_indices],
|
||||||
|
membership_scores_test=scores[~in_train_indices],
|
||||||
roc_curve=roc_curve)
|
roc_curve=roc_curve)
|
||||||
|
|
||||||
|
|
||||||
|
@ -107,8 +120,8 @@ def _run_threshold_attack(attack_input: AttackInputData):
|
||||||
slice_spec=_get_slice_spec(attack_input),
|
slice_spec=_get_slice_spec(attack_input),
|
||||||
data_size=DataSize(ntrain=ntrain, ntest=ntest),
|
data_size=DataSize(ntrain=ntrain, ntest=ntest),
|
||||||
attack_type=AttackType.THRESHOLD_ATTACK,
|
attack_type=AttackType.THRESHOLD_ATTACK,
|
||||||
membership_scores_train=-attack_input.get_loss_train(),
|
membership_scores_train=attack_input.get_loss_train(),
|
||||||
membership_scores_test=-attack_input.get_loss_test(),
|
membership_scores_test=attack_input.get_loss_test(),
|
||||||
roc_curve=roc_curve)
|
roc_curve=roc_curve)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -90,6 +90,31 @@ class RunAttacksTest(absltest.TestCase):
|
||||||
self.assertLen(result.membership_scores_train, 100)
|
self.assertLen(result.membership_scores_train, 100)
|
||||||
self.assertLen(result.membership_scores_test, 50)
|
self.assertLen(result.membership_scores_test, 50)
|
||||||
|
|
||||||
|
def test_run_attack_trained_sets_membership_scores(self):
|
||||||
|
attack_input = AttackInputData(
|
||||||
|
logits_train=np.tile([500., -500.], (100, 1)),
|
||||||
|
logits_test=np.tile([0., 0.], (50, 1)))
|
||||||
|
|
||||||
|
result = mia._run_trained_attack(
|
||||||
|
attack_input,
|
||||||
|
AttackType.LOGISTIC_REGRESSION,
|
||||||
|
balance_attacker_training=True)
|
||||||
|
self.assertLen(result.membership_scores_train, 100)
|
||||||
|
self.assertLen(result.membership_scores_test, 50)
|
||||||
|
|
||||||
|
# Scores for all training (resp. test) examples should be close
|
||||||
|
np.testing.assert_allclose(
|
||||||
|
result.membership_scores_train,
|
||||||
|
result.membership_scores_train[0],
|
||||||
|
rtol=1e-3)
|
||||||
|
np.testing.assert_allclose(
|
||||||
|
result.membership_scores_test,
|
||||||
|
result.membership_scores_test[0],
|
||||||
|
rtol=1e-3)
|
||||||
|
# Training score should be smaller than test score
|
||||||
|
self.assertLess(result.membership_scores_train[0],
|
||||||
|
result.membership_scores_test[0])
|
||||||
|
|
||||||
def test_run_attack_threshold_calculates_correct_auc(self):
|
def test_run_attack_threshold_calculates_correct_auc(self):
|
||||||
result = mia._run_attack(
|
result = mia._run_attack(
|
||||||
AttackInputData(
|
AttackInputData(
|
||||||
|
|
|
@ -15,7 +15,6 @@
|
||||||
|
|
||||||
import dataclasses
|
import dataclasses
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn import ensemble
|
from sklearn import ensemble
|
||||||
from sklearn import linear_model
|
from sklearn import linear_model
|
||||||
|
@ -23,30 +22,34 @@ from sklearn import model_selection
|
||||||
from sklearn import neighbors
|
from sklearn import neighbors
|
||||||
from sklearn import neural_network
|
from sklearn import neural_network
|
||||||
|
|
||||||
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData
|
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import data_structures
|
||||||
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import DataSize
|
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class AttackerData:
|
class AttackerData:
|
||||||
"""Input data for an ML classifier attack.
|
"""Input data for an ML classifier attack.
|
||||||
|
|
||||||
This includes only the data, and not configuration.
|
Labels in this class correspond to whether an example was in the
|
||||||
|
train or test set.
|
||||||
"""
|
"""
|
||||||
|
# Features of in-training and out-of-training examples.
|
||||||
|
features_all: Optional[np.ndarray] = None
|
||||||
|
# Indicator for whether the example is in-training (0) or out-of-training (1).
|
||||||
|
labels_all: Optional[np.ndarray] = None
|
||||||
|
|
||||||
features_train: Optional[np.ndarray] = None
|
# Indices for `features_all` and `labels_all` that are going to be used for
|
||||||
# element-wise boolean array denoting if the example was part of training.
|
# training the attackers.
|
||||||
is_training_labels_train: Optional[np.ndarray] = None
|
fold_indices: Optional[np.ndarray] = None
|
||||||
|
|
||||||
features_test: Optional[np.ndarray] = None
|
# Indices for `features_all` and `labels_all` that were left out due to
|
||||||
# element-wise boolean array denoting if the example was part of training.
|
# balancing. Disjoint from `fold_indices`.
|
||||||
is_training_labels_test: Optional[np.ndarray] = None
|
left_out_indices: Optional[np.ndarray] = None
|
||||||
|
|
||||||
data_size: Optional[DataSize] = None
|
# Number of in-training and out-of-training examples.
|
||||||
|
data_size: Optional[data_structures.DataSize] = None
|
||||||
|
|
||||||
|
|
||||||
def create_attacker_data(attack_input_data: AttackInputData,
|
def create_attacker_data(attack_input_data: data_structures.AttackInputData,
|
||||||
test_fraction: float = 0.25,
|
|
||||||
balance: bool = True) -> AttackerData:
|
balance: bool = True) -> AttackerData:
|
||||||
"""Prepare AttackInputData to train ML attackers.
|
"""Prepare AttackInputData to train ML attackers.
|
||||||
|
|
||||||
|
@ -54,7 +57,6 @@ def create_attacker_data(attack_input_data: AttackInputData,
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
attack_input_data: Original AttackInputData
|
attack_input_data: Original AttackInputData
|
||||||
test_fraction: Fraction of the dataset to include in the test split.
|
|
||||||
balance: Whether the training and test sets for the membership inference
|
balance: Whether the training and test sets for the membership inference
|
||||||
attacker should have a balanced (roughly equal) number of samples from the
|
attacker should have a balanced (roughly equal) number of samples from the
|
||||||
training and test sets used to develop the model under attack.
|
training and test sets used to develop the model under attack.
|
||||||
|
@ -67,25 +69,49 @@ def create_attacker_data(attack_input_data: AttackInputData,
|
||||||
attack_input_test = _column_stack(attack_input_data.logits_or_probs_test,
|
attack_input_test = _column_stack(attack_input_data.logits_or_probs_test,
|
||||||
attack_input_data.get_loss_test())
|
attack_input_data.get_loss_test())
|
||||||
|
|
||||||
if balance:
|
|
||||||
min_size = min(attack_input_data.get_train_size(),
|
|
||||||
attack_input_data.get_test_size())
|
|
||||||
attack_input_train = _sample_multidimensional_array(attack_input_train,
|
|
||||||
min_size)
|
|
||||||
attack_input_test = _sample_multidimensional_array(attack_input_test,
|
|
||||||
min_size)
|
|
||||||
ntrain, ntest = attack_input_train.shape[0], attack_input_test.shape[0]
|
ntrain, ntest = attack_input_train.shape[0], attack_input_test.shape[0]
|
||||||
|
|
||||||
features_all = np.concatenate((attack_input_train, attack_input_test))
|
features_all = np.concatenate((attack_input_train, attack_input_test))
|
||||||
|
labels_all = np.concatenate((np.zeros(ntrain), np.ones(ntest)))
|
||||||
|
|
||||||
labels_all = np.concatenate(((np.zeros(ntrain)), (np.ones(ntest))))
|
fold_indices = np.arange(ntrain + ntest)
|
||||||
|
left_out_indices = np.asarray([], dtype=np.int32)
|
||||||
|
|
||||||
# Perform a train-test split
|
if balance:
|
||||||
features_train, features_test, is_training_labels_train, is_training_labels_test = model_selection.train_test_split(
|
idx_train, idx_test = range(ntrain), range(ntrain, ntrain + ntest)
|
||||||
features_all, labels_all, test_size=test_fraction, stratify=labels_all)
|
min_size = min(ntrain, ntest)
|
||||||
return AttackerData(features_train, is_training_labels_train, features_test,
|
if ntrain > min_size:
|
||||||
is_training_labels_test,
|
left_out_size = ntrain - min_size
|
||||||
DataSize(ntrain=ntrain, ntest=ntest))
|
perm_train = np.random.permutation(idx_train) # shuffle training
|
||||||
|
left_out_indices = perm_train[:left_out_size]
|
||||||
|
fold_indices = np.concatenate((perm_train[left_out_size:], idx_test))
|
||||||
|
elif ntest > min_size:
|
||||||
|
left_out_size = ntest - min_size
|
||||||
|
perm_test = np.random.permutation(idx_test) # shuffle test
|
||||||
|
left_out_indices = perm_test[:left_out_size]
|
||||||
|
fold_indices = np.concatenate((perm_test[left_out_size:], idx_train))
|
||||||
|
|
||||||
|
# Shuffle indices for the downstream attackers.
|
||||||
|
fold_indices = np.random.permutation(fold_indices)
|
||||||
|
|
||||||
|
return AttackerData(
|
||||||
|
features_all=features_all,
|
||||||
|
labels_all=labels_all,
|
||||||
|
fold_indices=fold_indices,
|
||||||
|
left_out_indices=left_out_indices,
|
||||||
|
data_size=data_structures.DataSize(ntrain=ntrain, ntest=ntest))
|
||||||
|
|
||||||
|
|
||||||
|
def create_attacker(attack_type):
|
||||||
|
"""Returns the corresponding attacker for the provided attack_type."""
|
||||||
|
if attack_type == data_structures.AttackType.LOGISTIC_REGRESSION:
|
||||||
|
return LogisticRegressionAttacker()
|
||||||
|
if attack_type == data_structures.AttackType.MULTI_LAYERED_PERCEPTRON:
|
||||||
|
return MultilayerPerceptronAttacker()
|
||||||
|
if attack_type == data_structures.AttackType.RANDOM_FOREST:
|
||||||
|
return RandomForestAttacker()
|
||||||
|
if attack_type == data_structures.AttackType.K_NEAREST_NEIGHBORS:
|
||||||
|
return KNearestNeighborsAttacker()
|
||||||
|
raise NotImplementedError('Attack type %s not implemented yet.' % attack_type)
|
||||||
|
|
||||||
|
|
||||||
def _sample_multidimensional_array(array, size):
|
def _sample_multidimensional_array(array, size):
|
||||||
|
|
|
@ -33,9 +33,8 @@ class TrainedAttackerTest(absltest.TestCase):
|
||||||
def test_create_attacker_data_loss_only(self):
|
def test_create_attacker_data_loss_only(self):
|
||||||
attack_input = AttackInputData(
|
attack_input = AttackInputData(
|
||||||
loss_train=np.array([1, 3]), loss_test=np.array([2, 4]))
|
loss_train=np.array([1, 3]), loss_test=np.array([2, 4]))
|
||||||
attacker_data = models.create_attacker_data(attack_input, 0.5)
|
attacker_data = models.create_attacker_data(attack_input, 2)
|
||||||
self.assertLen(attacker_data.features_test, 2)
|
self.assertLen(attacker_data.features_all, 4)
|
||||||
self.assertLen(attacker_data.features_train, 2)
|
|
||||||
|
|
||||||
def test_create_attacker_data_loss_and_logits(self):
|
def test_create_attacker_data_loss_and_logits(self):
|
||||||
attack_input = AttackInputData(
|
attack_input = AttackInputData(
|
||||||
|
@ -43,15 +42,22 @@ class TrainedAttackerTest(absltest.TestCase):
|
||||||
logits_test=np.array([[10, 11], [14, 15]]),
|
logits_test=np.array([[10, 11], [14, 15]]),
|
||||||
loss_train=np.array([3, 7, 10]),
|
loss_train=np.array([3, 7, 10]),
|
||||||
loss_test=np.array([12, 16]))
|
loss_test=np.array([12, 16]))
|
||||||
attacker_data = models.create_attacker_data(
|
attacker_data = models.create_attacker_data(attack_input, balance=False)
|
||||||
attack_input, 0.25, balance=False)
|
self.assertLen(attacker_data.features_all, 5)
|
||||||
self.assertLen(attacker_data.features_test, 2)
|
self.assertLen(attacker_data.fold_indices, 5)
|
||||||
self.assertLen(attacker_data.features_train, 3)
|
self.assertEmpty(attacker_data.left_out_indices)
|
||||||
|
|
||||||
for i, feature in enumerate(attacker_data.features_train):
|
def test_unbalanced_create_attacker_data_loss_and_logits(self):
|
||||||
self.assertLen(feature, 3) # each feature has two logits and one loss
|
attack_input = AttackInputData(
|
||||||
expected = feature[:2] not in attack_input.logits_train
|
logits_train=np.array([[1, 2], [5, 6], [8, 9]]),
|
||||||
self.assertEqual(attacker_data.is_training_labels_train[i], expected)
|
logits_test=np.array([[10, 11], [14, 15]]),
|
||||||
|
loss_train=np.array([3, 7, 10]),
|
||||||
|
loss_test=np.array([12, 16]))
|
||||||
|
attacker_data = models.create_attacker_data(attack_input, balance=True)
|
||||||
|
self.assertLen(attacker_data.features_all, 5)
|
||||||
|
self.assertLen(attacker_data.fold_indices, 4)
|
||||||
|
self.assertLen(attacker_data.left_out_indices, 1)
|
||||||
|
self.assertIn(attacker_data.left_out_indices[0], [0, 1, 2])
|
||||||
|
|
||||||
def test_balanced_create_attacker_data_loss_and_logits(self):
|
def test_balanced_create_attacker_data_loss_and_logits(self):
|
||||||
attack_input = AttackInputData(
|
attack_input = AttackInputData(
|
||||||
|
@ -59,14 +65,10 @@ class TrainedAttackerTest(absltest.TestCase):
|
||||||
logits_test=np.array([[10, 11], [14, 15], [17, 18]]),
|
logits_test=np.array([[10, 11], [14, 15], [17, 18]]),
|
||||||
loss_train=np.array([3, 7, 10]),
|
loss_train=np.array([3, 7, 10]),
|
||||||
loss_test=np.array([12, 16, 19]))
|
loss_test=np.array([12, 16, 19]))
|
||||||
attacker_data = models.create_attacker_data(attack_input, 0.33)
|
attacker_data = models.create_attacker_data(attack_input)
|
||||||
self.assertLen(attacker_data.features_test, 2)
|
self.assertLen(attacker_data.features_all, 6)
|
||||||
self.assertLen(attacker_data.features_train, 4)
|
self.assertLen(attacker_data.fold_indices, 6)
|
||||||
|
self.assertEmpty(attacker_data.left_out_indices)
|
||||||
for i, feature in enumerate(attacker_data.features_train):
|
|
||||||
self.assertLen(feature, 3) # each feature has two logits and one loss
|
|
||||||
expected = feature[:2] not in attack_input.logits_train
|
|
||||||
self.assertEqual(attacker_data.is_training_labels_train[i], expected)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in a new issue