forked from 626_privacy/tensorflow_privacy
A callback and a function to be called in the end of training for keras to perform membership inference attack.
PiperOrigin-RevId: 323805663
This commit is contained in:
parent
dcbfaa3f5e
commit
cea9e01670
3 changed files with 284 additions and 0 deletions
|
@ -0,0 +1,108 @@
|
|||
# Copyright 2020, The TensorFlow Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Lint as: python3
|
||||
"""A callback and a function in keras for membership inference attack."""
|
||||
|
||||
from absl import logging
|
||||
|
||||
import tensorflow.compat.v1 as tf
|
||||
|
||||
from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.utils import log_loss
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.utils import write_to_tensorboard
|
||||
|
||||
|
||||
def calculate_losses(model, data, labels):
|
||||
"""Calculate losses of model prediction on data, provided true labels.
|
||||
|
||||
Args:
|
||||
model: model to make prediction
|
||||
data: samples
|
||||
labels: true labels of samples (integer valued)
|
||||
|
||||
Returns:
|
||||
preds: probability vector of each sample
|
||||
loss: cross entropy loss of each sample
|
||||
"""
|
||||
pred = model.predict(data)
|
||||
loss = log_loss(labels, pred)
|
||||
return pred, loss
|
||||
|
||||
|
||||
class MembershipInferenceCallback(tf.keras.callbacks.Callback):
|
||||
"""Callback to perform membership inference attack on epoch end."""
|
||||
|
||||
def __init__(self, in_train, out_train, attack_classifiers,
|
||||
tensorboard_dir=None):
|
||||
"""Initalizes the callback.
|
||||
|
||||
Args:
|
||||
in_train: (in_training samples, in_training labels)
|
||||
out_train: (out_training samples, out_training labels)
|
||||
attack_classifiers: a list of classifiers to be used by attacker, must be
|
||||
a subset of ['lr', 'mlp', 'rf', 'knn']
|
||||
tensorboard_dir: directory for tensorboard summary
|
||||
"""
|
||||
self._in_train_data, self._in_train_labels = in_train
|
||||
self._out_train_data, self._out_train_labels = out_train
|
||||
self._attack_classifiers = attack_classifiers
|
||||
# Setup tensorboard writer if tensorboard_dir is specified
|
||||
if tensorboard_dir:
|
||||
with tf.Graph().as_default():
|
||||
self._writer = tf.summary.FileWriter(tensorboard_dir)
|
||||
logging.info('Will write to tensorboard.')
|
||||
else:
|
||||
self._writer = None
|
||||
|
||||
def on_epoch_end(self, epoch, logs=None):
|
||||
results = run_attack_on_keras_model(
|
||||
self.model,
|
||||
(self._in_train_data, self._in_train_labels),
|
||||
(self._out_train_data, self._out_train_labels),
|
||||
self._attack_classifiers)
|
||||
print('all_thresh_loss_advantage', results['all_thresh_loss_advantage'])
|
||||
logging.info(results)
|
||||
|
||||
# Write to tensorboard if tensorboard_dir is specified
|
||||
write_to_tensorboard(self._writer, ['attack advantage'],
|
||||
[results['all_thresh_loss_advantage']], epoch)
|
||||
|
||||
|
||||
def run_attack_on_keras_model(model, in_train, out_train, attack_classifiers):
|
||||
"""Performs the attack on a trained model.
|
||||
|
||||
Args:
|
||||
model: model to be tested
|
||||
in_train: a (in_training samples, in_training labels) tuple
|
||||
out_train: a (out_training samples, out_training labels) tuple
|
||||
attack_classifiers: a list of classifiers to be used by attacker, must be
|
||||
a subset of ['lr', 'mlp', 'rf', 'knn']
|
||||
Returns:
|
||||
Results of the attack
|
||||
"""
|
||||
in_train_data, in_train_labels = in_train
|
||||
out_train_data, out_train_labels = out_train
|
||||
|
||||
# Compute predictions and losses
|
||||
in_train_pred, in_train_loss = calculate_losses(model, in_train_data,
|
||||
in_train_labels)
|
||||
out_train_pred, out_train_loss = calculate_losses(model, out_train_data,
|
||||
out_train_labels)
|
||||
results = mia.run_all_attacks(in_train_loss, out_train_loss,
|
||||
in_train_pred, out_train_pred,
|
||||
in_train_labels, out_train_labels,
|
||||
attack_classifiers=attack_classifiers)
|
||||
return results
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
# Copyright 2020, The TensorFlow Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Lint as: python3
|
||||
"""An example for using keras_evaluation."""
|
||||
|
||||
from absl import app
|
||||
from absl import flags
|
||||
|
||||
import numpy as np
|
||||
import tensorflow.compat.v1 as tf
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.keras_evaluation import MembershipInferenceCallback
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.keras_evaluation import run_attack_on_keras_model
|
||||
|
||||
GradientDescentOptimizer = tf.train.GradientDescentOptimizer
|
||||
|
||||
FLAGS = flags.FLAGS
|
||||
|
||||
flags.DEFINE_float('learning_rate', .15, 'Learning rate for training')
|
||||
flags.DEFINE_integer('batch_size', 256, 'Batch size')
|
||||
flags.DEFINE_integer('epochs', 10, 'Number of epochs')
|
||||
flags.DEFINE_string('model_dir', None, 'Model directory.')
|
||||
|
||||
|
||||
def cnn_model():
|
||||
"""Define a CNN model."""
|
||||
model = tf.keras.Sequential([
|
||||
tf.keras.layers.Conv2D(16, 8, strides=2, padding='same',
|
||||
activation='relu', input_shape=(28, 28, 1)),
|
||||
tf.keras.layers.MaxPool2D(2, 1),
|
||||
tf.keras.layers.Conv2D(32, 4, strides=2, padding='valid',
|
||||
activation='relu'),
|
||||
tf.keras.layers.MaxPool2D(2, 1),
|
||||
tf.keras.layers.Flatten(),
|
||||
tf.keras.layers.Dense(32, activation='relu'),
|
||||
tf.keras.layers.Dense(10)
|
||||
])
|
||||
return model
|
||||
|
||||
|
||||
def load_mnist():
|
||||
"""Loads MNIST and preprocesses to combine training and validation data."""
|
||||
(train_data,
|
||||
train_labels), (test_data,
|
||||
test_labels) = tf.keras.datasets.mnist.load_data()
|
||||
|
||||
train_data = np.array(train_data, dtype=np.float32) / 255
|
||||
test_data = np.array(test_data, dtype=np.float32) / 255
|
||||
|
||||
train_data = train_data.reshape((train_data.shape[0], 28, 28, 1))
|
||||
test_data = test_data.reshape((test_data.shape[0], 28, 28, 1))
|
||||
|
||||
train_labels = np.array(train_labels, dtype=np.int32)
|
||||
test_labels = np.array(test_labels, dtype=np.int32)
|
||||
|
||||
return train_data, train_labels, test_data, test_labels
|
||||
|
||||
|
||||
def main(unused_argv):
|
||||
# Load training and test data.
|
||||
train_data, train_labels, test_data, test_labels = load_mnist()
|
||||
|
||||
# Get model, optimizer and specify loss.
|
||||
model = cnn_model()
|
||||
optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
|
||||
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
|
||||
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
|
||||
|
||||
# Get callback for membership inference attack.
|
||||
mia_callback = MembershipInferenceCallback((train_data, train_labels),
|
||||
(test_data, test_labels),
|
||||
[],
|
||||
FLAGS.model_dir)
|
||||
|
||||
# Train model with Keras
|
||||
model.fit(train_data, train_labels,
|
||||
epochs=FLAGS.epochs,
|
||||
validation_data=(test_data, test_labels),
|
||||
batch_size=FLAGS.batch_size,
|
||||
callbacks=[mia_callback],
|
||||
verbose=2)
|
||||
|
||||
print('End of training attack')
|
||||
attack_results = run_attack_on_keras_model(model,
|
||||
(train_data, train_labels),
|
||||
(test_data, test_labels),
|
||||
[])
|
||||
print('all_thresh_loss_advantage',
|
||||
attack_results['all_thresh_loss_advantage'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(main)
|
|
@ -0,0 +1,72 @@
|
|||
# Copyright 2020, The TensorFlow Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Lint as: python3
|
||||
"""Tests for tensorflow_privacy.privacy.membership_inference_attack.keras_evaluation."""
|
||||
|
||||
from absl.testing import absltest
|
||||
|
||||
import numpy as np
|
||||
import tensorflow.compat.v1 as tf
|
||||
|
||||
from tensorflow_privacy.privacy.membership_inference_attack import keras_evaluation
|
||||
|
||||
|
||||
class UtilsTest(absltest.TestCase):
|
||||
|
||||
def __init__(self, methodname):
|
||||
"""Initialize the test class."""
|
||||
super().__init__(methodname)
|
||||
|
||||
self.ntrain, self.ntest = 50, 100
|
||||
self.nclass = 5
|
||||
self.ndim = 10
|
||||
|
||||
# Generate random training and test data
|
||||
self.train_data = np.random.rand(self.ntrain, self.ndim)
|
||||
self.test_data = np.random.rand(self.ntest, self.ndim)
|
||||
self.train_labels = np.random.randint(self.nclass, size=self.ntrain)
|
||||
self.test_labels = np.random.randint(self.nclass, size=self.ntest)
|
||||
|
||||
self.model = tf.keras.Sequential([tf.keras.layers.Dense(self.nclass)])
|
||||
|
||||
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
|
||||
self.model.compile(optimizer='Adam', loss=loss, metrics=['accuracy'])
|
||||
|
||||
def test_calculate_losses(self):
|
||||
"""Test calculating the loss."""
|
||||
pred, loss = keras_evaluation.calculate_losses(self.model, self.train_data,
|
||||
self.train_labels)
|
||||
self.assertEqual(pred.shape, (self.ntrain, self.nclass))
|
||||
self.assertEqual(loss.shape, (self.ntrain,))
|
||||
|
||||
pred, loss = keras_evaluation.calculate_losses(self.model, self.test_data,
|
||||
self.test_labels)
|
||||
self.assertEqual(pred.shape, (self.ntest, self.nclass))
|
||||
self.assertEqual(loss.shape, (self.ntest,))
|
||||
|
||||
def test_run_attack_on_keras_model(self):
|
||||
"""Test the attack."""
|
||||
results = keras_evaluation.run_attack_on_keras_model(
|
||||
self.model,
|
||||
(self.train_data, self.train_labels),
|
||||
(self.test_data, self.test_labels),
|
||||
[])
|
||||
self.assertIsInstance(results, dict)
|
||||
self.assertIn('all_thresh_loss_auc', results)
|
||||
self.assertIn('all_thresh_loss_advantage', results)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absltest.main()
|
Loading…
Reference in a new issue