Option for plotting attack results in the same figure.

PiperOrigin-RevId: 333225502
This commit is contained in:
Shuang Song 2020-09-22 22:11:37 -07:00 committed by A. Unique TensorFlower
parent 677b3d9e9a
commit 7c53757250
9 changed files with 261 additions and 186 deletions

View file

@ -573,18 +573,19 @@ def get_flattened_attack_metrics(results: AttackResults):
results: membership inference attack results. results: membership inference attack results.
Returns: Returns:
properties: a list of (slice, attack_type, metric name) types: a list of attack types
slices: a list of slices
attack_metrics: a list of metric names
values: a list of metric values, i-th element correspond to properties[i] values: a list of metric values, i-th element correspond to properties[i]
""" """
properties = [] types = []
slices = []
attack_metrics = []
values = [] values = []
for attack_result in results.single_attack_results: for attack_result in results.single_attack_results:
slice_spec = attack_result.slice_spec types += [str(attack_result.attack_type)] * 2
prop = [str(slice_spec), str(attack_result.attack_type)] slices += [str(attack_result.slice_spec)] * 2
properties += [prop + ['adv'], prop + ['auc']] attack_metrics += ['adv', 'auc']
values += [ values += [float(attack_result.get_attacker_advantage()),
float(attack_result.get_attacker_advantage()), float(attack_result.get_auc())]
float(attack_result.get_auc()) return types, slices, attack_metrics, values
]
return properties, values

View file

@ -15,8 +15,8 @@
# Lint as: python3 # Lint as: python3
"""A callback and a function in keras for membership inference attack.""" """A callback and a function in keras for membership inference attack."""
import os
from typing import Iterable from typing import Iterable
from absl import logging from absl import logging
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
@ -27,7 +27,7 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import get_flattened_attack_metrics from tensorflow_privacy.privacy.membership_inference_attack.data_structures import get_flattened_attack_metrics
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
from tensorflow_privacy.privacy.membership_inference_attack.utils import log_loss from tensorflow_privacy.privacy.membership_inference_attack.utils import log_loss
from tensorflow_privacy.privacy.membership_inference_attack.utils import write_to_tensorboard from tensorflow_privacy.privacy.membership_inference_attack.utils_tensorboard import write_results_to_tensorboard
def calculate_losses(model, data, labels): def calculate_losses(model, data, labels):
@ -55,7 +55,8 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback):
in_train, out_train, in_train, out_train,
slicing_spec: SlicingSpec = None, slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,), attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,),
tensorboard_dir=None): tensorboard_dir=None,
tensorboard_merge_classifiers=False):
"""Initalizes the callback. """Initalizes the callback.
Args: Args:
@ -64,18 +65,28 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback):
slicing_spec: slicing specification of the attack slicing_spec: slicing specification of the attack
attack_types: a list of attacks, each of type AttackType attack_types: a list of attacks, each of type AttackType
tensorboard_dir: directory for tensorboard summary tensorboard_dir: directory for tensorboard summary
tensorboard_merge_classifiers: if true, plot different classifiers with
the same slicing_spec and metric in the same figure
""" """
self._in_train_data, self._in_train_labels = in_train self._in_train_data, self._in_train_labels = in_train
self._out_train_data, self._out_train_labels = out_train self._out_train_data, self._out_train_labels = out_train
self._slicing_spec = slicing_spec self._slicing_spec = slicing_spec
self._attack_types = attack_types self._attack_types = attack_types
# Setup tensorboard writer if tensorboard_dir is specified self._tensorboard_merge_classifiers = tensorboard_merge_classifiers
if tensorboard_dir: if tensorboard_dir:
if tensorboard_merge_classifiers:
self._writers = {}
with tf.Graph().as_default(): with tf.Graph().as_default():
self._writer = tf.summary.FileWriter(tensorboard_dir) for attack_type in attack_types:
self._writers[attack_type.name] = tf.summary.FileWriter(
os.path.join(tensorboard_dir, 'MI', attack_type.name))
else:
with tf.Graph().as_default():
self._writers = tf.summary.FileWriter(
os.path.join(tensorboard_dir, 'MI'))
logging.info('Will write to tensorboard.') logging.info('Will write to tensorboard.')
else: else:
self._writer = None self._writers = None
def on_epoch_end(self, epoch, logs=None): def on_epoch_end(self, epoch, logs=None):
results = run_attack_on_keras_model( results = run_attack_on_keras_model(
@ -86,15 +97,16 @@ class MembershipInferenceCallback(tf.keras.callbacks.Callback):
self._attack_types) self._attack_types)
logging.info(results) logging.info(results)
attack_properties, attack_values = get_flattened_attack_metrics(results) att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
results)
print('Attack result:') print('Attack result:')
print('\n'.join([' %s: %.4f' % (', '.join(p), r) for p, r in print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in
zip(attack_properties, attack_values)])) zip(att_types, att_slices, att_metrics, att_values)]))
# Write to tensorboard if tensorboard_dir is specified # Write to tensorboard if tensorboard_dir is specified
attack_property_tags = ['attack/' + '_'.join(p) for p in attack_properties] if self._writers is not None:
write_to_tensorboard(self._writer, attack_property_tags, attack_values, write_results_to_tensorboard(results, self._writers, epoch,
epoch) self._tensorboard_merge_classifiers)
def run_attack_on_keras_model( def run_attack_on_keras_model(

View file

@ -26,95 +26,86 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo
from tensorflow_privacy.privacy.membership_inference_attack.keras_evaluation import MembershipInferenceCallback from tensorflow_privacy.privacy.membership_inference_attack.keras_evaluation import MembershipInferenceCallback
from tensorflow_privacy.privacy.membership_inference_attack.keras_evaluation import run_attack_on_keras_model from tensorflow_privacy.privacy.membership_inference_attack.keras_evaluation import run_attack_on_keras_model
GradientDescentOptimizer = tf.train.GradientDescentOptimizer
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.02, 'Learning rate for training')
flags.DEFINE_float('learning_rate', .15, 'Learning rate for training') flags.DEFINE_integer('batch_size', 250, 'Batch size')
flags.DEFINE_integer('batch_size', 256, 'Batch size') flags.DEFINE_integer('epochs', 100, 'Number of epochs')
flags.DEFINE_integer('epochs', 10, 'Number of epochs')
flags.DEFINE_string('model_dir', None, 'Model directory.') flags.DEFINE_string('model_dir', None, 'Model directory.')
flags.DEFINE_bool('tensorboard_merge_classifiers', False, 'If true, plot '
'different classifiers with the same slicing_spec and metric '
'in the same figure.')
def cnn_model(): def small_cnn():
"""Define a CNN model.""" """Setup a small CNN for image classification."""
model = tf.keras.Sequential([ model = tf.keras.models.Sequential()
tf.keras.layers.Conv2D( model.add(tf.keras.layers.Input(shape=(32, 32, 3)))
16,
8, for _ in range(3):
strides=2, model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu'))
padding='same', model.add(tf.keras.layers.MaxPooling2D())
activation='relu',
input_shape=(28, 28, 1)), model.add(tf.keras.layers.Flatten())
tf.keras.layers.MaxPool2D(2, 1), model.add(tf.keras.layers.Dense(64, activation='relu'))
tf.keras.layers.Conv2D( model.add(tf.keras.layers.Dense(10))
32, 4, strides=2, padding='valid', activation='relu'),
tf.keras.layers.MaxPool2D(2, 1),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(10)
])
return model return model
def load_mnist(): def load_cifar10():
"""Loads MNIST and preprocesses to combine training and validation data.""" """Loads CIFAR10 data."""
(train_data, (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
train_labels), (test_data,
test_labels) = tf.keras.datasets.mnist.load_data()
train_data = np.array(train_data, dtype=np.float32) / 255 x_train = np.array(x_train, dtype=np.float32) / 255
test_data = np.array(test_data, dtype=np.float32) / 255 x_test = np.array(x_test, dtype=np.float32) / 255
train_data = train_data.reshape((train_data.shape[0], 28, 28, 1)) y_train = np.array(y_train, dtype=np.int32).squeeze()
test_data = test_data.reshape((test_data.shape[0], 28, 28, 1)) y_test = np.array(y_test, dtype=np.int32).squeeze()
train_labels = np.array(train_labels, dtype=np.int32) return x_train, y_train, x_test, y_test
test_labels = np.array(test_labels, dtype=np.int32)
return train_data, train_labels, test_data, test_labels
def main(unused_argv): def main(unused_argv):
# Load training and test data. # Load training and test data.
train_data, train_labels, test_data, test_labels = load_mnist() x_train, y_train, x_test, y_test = load_cifar10()
# Get model, optimizer and specify loss. # Get model, optimizer and specify loss.
model = cnn_model() model = small_cnn()
optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) optimizer = tf.keras.optimizers.SGD(lr=FLAGS.learning_rate, momentum=0.9)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
# Get callback for membership inference attack. # Get callback for membership inference attack.
mia_callback = MembershipInferenceCallback( mia_callback = MembershipInferenceCallback(
(train_data, train_labels), (test_data, test_labels), (x_train, y_train),
attack_types=[AttackType.THRESHOLD_ATTACK], (x_test, y_test),
tensorboard_dir=FLAGS.model_dir) slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
attack_types=[AttackType.THRESHOLD_ATTACK,
AttackType.K_NEAREST_NEIGHBORS],
tensorboard_dir=FLAGS.model_dir,
tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers)
# Train model with Keras # Train model with Keras
model.fit( model.fit(
train_data, x_train,
train_labels, y_train,
epochs=FLAGS.epochs, epochs=FLAGS.epochs,
validation_data=(test_data, test_labels), validation_data=(x_test, y_test),
batch_size=FLAGS.batch_size, batch_size=FLAGS.batch_size,
callbacks=[mia_callback], callbacks=[mia_callback],
verbose=2) verbose=2)
print('End of training attack:') print('End of training attack:')
attack_results = run_attack_on_keras_model( attack_results = run_attack_on_keras_model(
model, (train_data, train_labels), (test_data, test_labels), model, (x_train, y_train), (x_test, y_test),
slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
attack_types=[ attack_types=[
AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
]) ])
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
attack_properties, attack_values = get_flattened_attack_metrics(
attack_results) attack_results)
print('\n'.join([ print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in
' %s: %.4f' % (', '.join(p), r) zip(att_types, att_slices, att_metrics, att_values)]))
for p, r in zip(attack_properties, attack_values)
]))
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -67,9 +67,12 @@ class UtilsTest(absltest.TestCase):
(self.test_data, self.test_labels), (self.test_data, self.test_labels),
attack_types=[AttackType.THRESHOLD_ATTACK]) attack_types=[AttackType.THRESHOLD_ATTACK])
self.assertIsInstance(results, AttackResults) self.assertIsInstance(results, AttackResults)
attack_properties, attack_values = get_flattened_attack_metrics(results) att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
self.assertLen(attack_properties, 2) results)
self.assertLen(attack_values, 2) self.assertLen(att_types, 2)
self.assertLen(att_slices, 2)
self.assertLen(att_metrics, 2)
self.assertLen(att_values, 2)
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -15,21 +15,18 @@
# Lint as: python3 # Lint as: python3
"""A hook and a function in tf estimator for membership inference attack.""" """A hook and a function in tf estimator for membership inference attack."""
import os
from typing import Iterable from typing import Iterable
from absl import logging from absl import logging
import numpy as np import numpy as np
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack_new as mia from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack_new as mia
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import get_flattened_attack_metrics from tensorflow_privacy.privacy.membership_inference_attack.data_structures import get_flattened_attack_metrics
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
from tensorflow_privacy.privacy.membership_inference_attack.utils import log_loss from tensorflow_privacy.privacy.membership_inference_attack.utils import log_loss
from tensorflow_privacy.privacy.membership_inference_attack.utils import write_to_tensorboard from tensorflow_privacy.privacy.membership_inference_attack.utils_tensorboard import write_results_to_tensorboard
def calculate_losses(estimator, input_fn, labels): def calculate_losses(estimator, input_fn, labels):
@ -43,7 +40,7 @@ def calculate_losses(estimator, input_fn, labels):
Args: Args:
estimator: model to make prediction estimator: model to make prediction
input_fn: input function to be used in estimator.predict input_fn: input function to be used in estimator.predict
labels: true labels of samples (integer valued) labels: array of size (n_samples, ), true labels of samples (integer valued)
Returns: Returns:
preds: probability vector of each sample preds: probability vector of each sample
@ -64,7 +61,8 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
input_fn_constructor, input_fn_constructor,
slicing_spec: SlicingSpec = None, slicing_spec: SlicingSpec = None,
attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,), attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,),
writer=None): tensorboard_dir=None,
tensorboard_merge_classifiers=False):
"""Initialize the hook. """Initialize the hook.
Args: Args:
@ -75,7 +73,9 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
the input_fn for model prediction the input_fn for model prediction
slicing_spec: slicing specification of the attack slicing_spec: slicing specification of the attack
attack_types: a list of attacks, each of type AttackType attack_types: a list of attacks, each of type AttackType
writer: summary writer for tensorboard tensorboard_dir: directory for tensorboard summary
tensorboard_merge_classifiers: if true, plot different classifiers with
the same slicing_spec and metric in the same figure
""" """
in_train_data, self._in_train_labels = in_train in_train_data, self._in_train_labels = in_train
out_train_data, self._out_train_labels = out_train out_train_data, self._out_train_labels = out_train
@ -88,9 +88,21 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
self._estimator = estimator self._estimator = estimator
self._slicing_spec = slicing_spec self._slicing_spec = slicing_spec
self._attack_types = attack_types self._attack_types = attack_types
self._writer = writer self._tensorboard_merge_classifiers = tensorboard_merge_classifiers
if self._writer: if tensorboard_dir:
if tensorboard_merge_classifiers:
self._writers = {}
with tf.Graph().as_default():
for attack_type in attack_types:
self._writers[attack_type.name] = tf.summary.FileWriter(
os.path.join(tensorboard_dir, 'MI', attack_type.name))
else:
with tf.Graph().as_default():
self._writers = tf.summary.FileWriter(
os.path.join(tensorboard_dir, 'MI'))
logging.info('Will write to tensorboard.') logging.info('Will write to tensorboard.')
else:
self._writers = None
def end(self, session): def end(self, session):
results = run_attack_helper(self._estimator, results = run_attack_helper(self._estimator,
@ -101,16 +113,17 @@ class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
self._attack_types) self._attack_types)
logging.info(results) logging.info(results)
attack_properties, attack_values = get_flattened_attack_metrics(results) att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
results)
print('Attack result:') print('Attack result:')
print('\n'.join([' %s: %.4f' % (', '.join(p), r) for p, r in print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in
zip(attack_properties, attack_values)])) zip(att_types, att_slices, att_metrics, att_values)]))
# Write to tensorboard if writer is specified # Write to tensorboard if tensorboard_dir is specified
global_step = self._estimator.get_variable_value('global_step') global_step = self._estimator.get_variable_value('global_step')
attack_property_tags = ['attack/' + '_'.join(p) for p in attack_properties] if self._writers is not None:
write_to_tensorboard(self._writer, attack_property_tags, attack_values, write_results_to_tensorboard(results, self._writers, global_step,
global_step) self._tensorboard_merge_classifiers)
def run_attack_on_tf_estimator_model( def run_attack_on_tf_estimator_model(
@ -184,4 +197,3 @@ def run_attack_helper(
slicing_spec=slicing_spec, slicing_spec=slicing_spec,
attack_types=attack_types) attack_types=attack_types)
return results return results

View file

@ -27,30 +27,27 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo
from tensorflow_privacy.privacy.membership_inference_attack.tf_estimator_evaluation import MembershipInferenceTrainingHook from tensorflow_privacy.privacy.membership_inference_attack.tf_estimator_evaluation import MembershipInferenceTrainingHook
from tensorflow_privacy.privacy.membership_inference_attack.tf_estimator_evaluation import run_attack_on_tf_estimator_model from tensorflow_privacy.privacy.membership_inference_attack.tf_estimator_evaluation import run_attack_on_tf_estimator_model
GradientDescentOptimizer = tf.train.GradientDescentOptimizer
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.02, 'Learning rate for training')
flags.DEFINE_float('learning_rate', .15, 'Learning rate for training') flags.DEFINE_integer('batch_size', 250, 'Batch size')
flags.DEFINE_integer('batch_size', 256, 'Batch size') flags.DEFINE_integer('epochs', 100, 'Number of epochs')
flags.DEFINE_integer('epochs', 10, 'Number of epochs')
flags.DEFINE_string('model_dir', None, 'Model directory.') flags.DEFINE_string('model_dir', None, 'Model directory.')
flags.DEFINE_bool('tensorboard_merge_classifiers', False, 'If true, plot '
'different classifiers with the same slicing_spec and metric '
'in the same figure.')
def cnn_model_fn(features, labels, mode): def small_cnn_fn(features, labels, mode):
"""Model function for a CNN.""" """Setup a small CNN for image classification."""
input_layer = tf.reshape(features['x'], [-1, 32, 32, 3])
for _ in range(3):
y = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(input_layer)
y = tf.keras.layers.MaxPool2D()(y)
# Define CNN architecture using tf.keras.layers. y = tf.keras.layers.Flatten()(y)
input_layer = tf.reshape(features['x'], [-1, 28, 28, 1]) y = tf.keras.layers.Dense(64, activation='relu')(y)
y = tf.keras.layers.Conv2D( logits = tf.keras.layers.Dense(10)(y)
16, 8, strides=2, padding='same', activation='relu').apply(input_layer)
y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
y = tf.keras.layers.Conv2D(
32, 4, strides=2, padding='valid', activation='relu').apply(y)
y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
y = tf.keras.layers.Flatten().apply(y)
y = tf.keras.layers.Dense(32, activation='relu').apply(y)
logits = tf.keras.layers.Dense(10).apply(y)
if mode != tf.estimator.ModeKeys.PREDICT: if mode != tf.estimator.ModeKeys.PREDICT:
vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
@ -59,7 +56,8 @@ def cnn_model_fn(features, labels, mode):
# Configure the training op (for TRAIN mode). # Configure the training op (for TRAIN mode).
if mode == tf.estimator.ModeKeys.TRAIN: if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate=FLAGS.learning_rate,
momentum=0.9)
global_step = tf.train.get_global_step() global_step = tf.train.get_global_step()
train_op = optimizer.minimize(loss=scalar_loss, global_step=global_step) train_op = optimizer.minimize(loss=scalar_loss, global_step=global_step)
return tf.estimator.EstimatorSpec( return tf.estimator.EstimatorSpec(
@ -81,19 +79,17 @@ def cnn_model_fn(features, labels, mode):
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
def load_mnist(): def load_cifar10():
"""Loads MNIST and preprocesses to combine training and validation data.""" """Loads CIFAR10 data."""
(train_data, (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
train_labels), (test_data,
test_labels) = tf.keras.datasets.mnist.load_data()
train_data = np.array(train_data, dtype=np.float32) / 255 x_train = np.array(x_train, dtype=np.float32) / 255
test_data = np.array(test_data, dtype=np.float32) / 255 x_test = np.array(x_test, dtype=np.float32) / 255
train_labels = np.array(train_labels, dtype=np.int32) y_train = np.array(y_train, dtype=np.int32).squeeze()
test_labels = np.array(test_labels, dtype=np.int32) y_test = np.array(y_test, dtype=np.int32).squeeze()
return train_data, train_labels, test_data, test_labels return x_train, y_train, x_test, y_test
def main(unused_argv): def main(unused_argv):
@ -103,39 +99,38 @@ def main(unused_argv):
logging.get_absl_handler().use_absl_log_file() logging.get_absl_handler().use_absl_log_file()
# Load training and test data. # Load training and test data.
train_data, train_labels, test_data, test_labels = load_mnist() x_train, y_train, x_test, y_test = load_cifar10()
# Instantiate the tf.Estimator. # Instantiate the tf.Estimator.
mnist_classifier = tf.estimator.Estimator( mnist_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir=FLAGS.model_dir) model_fn=small_cnn_fn, model_dir=FLAGS.model_dir)
# A function to construct input_fn given (data, label), to be used by the # A function to construct input_fn given (data, label), to be used by the
# membership inference training hook. # membership inference training hook.
def input_fn_constructor(x, y): def input_fn_constructor(x, y):
return tf.estimator.inputs.numpy_input_fn(x={'x': x}, y=y, shuffle=False) return tf.estimator.inputs.numpy_input_fn(x={'x': x}, y=y, shuffle=False)
with tf.Graph().as_default(): # Get hook for membership inference attack.
# Get a summary writer for the hook to write to tensorboard.
# Can set summary_writer to None if not needed.
if FLAGS.model_dir:
summary_writer = tf.summary.FileWriter(FLAGS.model_dir)
else:
summary_writer = None
mia_hook = MembershipInferenceTrainingHook( mia_hook = MembershipInferenceTrainingHook(
mnist_classifier, (train_data, train_labels), (test_data, test_labels), mnist_classifier,
(x_train, y_train),
(x_test, y_test),
input_fn_constructor, input_fn_constructor,
attack_types=[AttackType.THRESHOLD_ATTACK], slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
writer=summary_writer) attack_types=[AttackType.THRESHOLD_ATTACK,
AttackType.K_NEAREST_NEIGHBORS],
tensorboard_dir=FLAGS.model_dir,
tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers)
# Create tf.Estimator input functions for the training and test data. # Create tf.Estimator input functions for the training and test data.
train_input_fn = tf.estimator.inputs.numpy_input_fn( train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={'x': train_data}, x={'x': x_train},
y=train_labels, y=y_train,
batch_size=FLAGS.batch_size, batch_size=FLAGS.batch_size,
num_epochs=FLAGS.epochs, num_epochs=FLAGS.epochs,
shuffle=True) shuffle=True)
eval_input_fn = tf.estimator.inputs.numpy_input_fn( eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={'x': test_data}, y=test_labels, num_epochs=1, shuffle=False) x={'x': x_test}, y=y_test, num_epochs=1, shuffle=False)
# Training loop. # Training loop.
steps_per_epoch = 60000 // FLAGS.batch_size steps_per_epoch = 60000 // FLAGS.batch_size
@ -151,18 +146,15 @@ def main(unused_argv):
print('End of training attack') print('End of training attack')
attack_results = run_attack_on_tf_estimator_model( attack_results = run_attack_on_tf_estimator_model(
mnist_classifier, (train_data, train_labels), (test_data, test_labels), mnist_classifier, (x_train, y_train), (x_test, y_test),
input_fn_constructor, input_fn_constructor,
slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
attack_types=[ attack_types=[AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS]
AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS )
]) att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
attack_properties, attack_values = get_flattened_attack_metrics(
attack_results) attack_results)
print('\n'.join([ print('\n'.join([' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in
' %s: %.4f' % (', '.join(p), r) zip(att_types, att_slices, att_metrics, att_values)]))
for p, r in zip(attack_properties, attack_values)
]))
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -88,9 +88,12 @@ class UtilsTest(absltest.TestCase):
self.test_labels, self.test_labels,
attack_types=[AttackType.THRESHOLD_ATTACK]) attack_types=[AttackType.THRESHOLD_ATTACK])
self.assertIsInstance(results, AttackResults) self.assertIsInstance(results, AttackResults)
attack_properties, attack_values = get_flattened_attack_metrics(results) att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
self.assertLen(attack_properties, 2) results)
self.assertLen(attack_values, 2) self.assertLen(att_types, 2)
self.assertLen(att_slices, 2)
self.assertLen(att_metrics, 2)
self.assertLen(att_values, 2)
def test_run_attack_on_tf_estimator_model(self): def test_run_attack_on_tf_estimator_model(self):
"""Test the attack on the final models.""" """Test the attack on the final models."""
@ -104,9 +107,12 @@ class UtilsTest(absltest.TestCase):
input_fn_constructor, input_fn_constructor,
attack_types=[AttackType.THRESHOLD_ATTACK]) attack_types=[AttackType.THRESHOLD_ATTACK])
self.assertIsInstance(results, AttackResults) self.assertIsInstance(results, AttackResults)
attack_properties, attack_values = get_flattened_attack_metrics(results) att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
self.assertLen(attack_properties, 2) results)
self.assertLen(attack_values, 2) self.assertLen(att_types, 2)
self.assertLen(att_slices, 2)
self.assertLen(att_metrics, 2)
self.assertLen(att_values, 2)
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -20,7 +20,7 @@ from typing import Text, Dict, Union, List, Any, Tuple
import numpy as np import numpy as np
import scipy.special import scipy.special
from sklearn import metrics from sklearn import metrics
import tensorflow.compat.v1 as tf
ArrayDict = Dict[Text, np.ndarray] ArrayDict = Dict[Text, np.ndarray]
Dataset = Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]] Dataset = Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]
@ -229,10 +229,12 @@ def log_loss(labels: np.ndarray, pred: np.ndarray, small_value=1e-8):
"""Compute the cross entropy loss. """Compute the cross entropy loss.
Args: Args:
labels: numpy array, labels[i] is the true label (scalar) of the i-th sample labels: numpy array of shape (num_samples,) labels[i] is the true label
pred: numpy array, pred[i] is the probability vector of the i-th sample (scalar) of the i-th sample
small_value: np.log can become -inf if the probability is too close to 0, so pred: numpy array of shape(num_samples, num_classes) where pred[i] is the
the probability is clipped below by small_value. probability vector of the i-th sample
small_value: a scalar. np.log can become -inf if the probability is too
close to 0, so the probability is clipped below by small_value.
Returns: Returns:
the cross-entropy loss of each sample the cross-entropy loss of each sample
@ -243,26 +245,3 @@ def log_loss(labels: np.ndarray, pred: np.ndarray, small_value=1e-8):
def log_loss_from_logits(labels: np.ndarray, logits: np.ndarray): def log_loss_from_logits(labels: np.ndarray, logits: np.ndarray):
"""Compute the cross entropy loss from logits.""" """Compute the cross entropy loss from logits."""
return log_loss(labels, scipy.special.softmax(logits, axis=-1)) return log_loss(labels, scipy.special.softmax(logits, axis=-1))
# ------------------------------------------------------------------------------
# Tensorboard
# ------------------------------------------------------------------------------
def write_to_tensorboard(writer, tags, values, step):
"""Write metrics to tensorboard.
Args:
writer: tensorboard writer
tags: a list of tags of metrics
values: a list of values of metrics
step: step for the summary
"""
if writer is None:
return
summary = tf.Summary()
for tag, val in zip(tags, values):
summary.value.add(tag=tag, simple_value=val)
writer.add_summary(summary, step)
writer.flush()

View file

@ -0,0 +1,79 @@
# Copyright 2020, The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Utility functions for writing attack results to tensorboard."""
from typing import List
from typing import Union
import tensorflow.compat.v1 as tf
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import get_flattened_attack_metrics
def write_to_tensorboard(writers, tags, values, step):
"""Write metrics to tensorboard.
Args:
writers: a list of tensorboard writers or one writer to be used for metrics.
If it's a list, it should be of the same length as tags
tags: a list of tags of metrics
values: a list of values of metrics with the same length as tags
step: step for the tensorboard summary
"""
if writers is None or not writers:
raise ValueError('write_to_tensorboard does not get any writer.')
if not isinstance(writers, list):
writers = [writers] * len(tags)
assert len(writers) == len(tags) == len(values)
for writer, tag, val in zip(writers, tags, values):
summary = tf.Summary()
summary.value.add(tag=tag, simple_value=val)
writer.add_summary(summary, step)
for writer in set(writers):
writer.flush()
def write_results_to_tensorboard(
attack_results: AttackResults,
writers: Union[tf.summary.FileWriter, List[tf.summary.FileWriter]],
step: int,
merge_classifiers: bool):
"""Write attack results to tensorboard.
Args:
attack_results: results from attack
writers: a list of tensorboard writers or one writer to be used for metrics
step: step for the tensorboard summary
merge_classifiers: if true, plot different classifiers with the same
slicing_spec and metric in the same figure
"""
if writers is None or not writers:
raise ValueError('write_results_to_tensorboard does not get any writer.')
att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
attack_results)
if merge_classifiers:
att_tags = ['attack/' + '_'.join([s, m]) for s, m in
zip(att_slices, att_metrics)]
write_to_tensorboard([writers[t] for t in att_types],
att_tags, att_values, step)
else:
att_tags = ['attack/' + '_'.join([s, t, m]) for t, s, m in
zip(att_types, att_slices, att_metrics)]
write_to_tensorboard(writers, att_tags, att_values, step)