A training hook and a function to be called in the end of training for tf estimator to perform membership inference attack.

PiperOrigin-RevId: 321648371
2020-07-16 14:38:39 -07:00 · 2020-07-16 14:38:39 -07:00 · a0e1b72838
commit a0e1b72838
parent 51eb7c3712
5 changed files with 497 additions and 0 deletions
--- a/tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation.py
@ -0,0 +1,166 @@
 # Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Lint as: python3
 """A hook and a function in tf estimator for membership inference attack."""
 from absl import logging
 import numpy as np
 import tensorflow.compat.v1 as tf
 from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia
 from tensorflow_privacy.privacy.membership_inference_attack.utils import log_loss
 def calculate_losses(estimator, input_fn, labels):
  """Get predictions and losses for samples.
  The assumptions are 1) the loss is cross-entropy loss, and 2) user have
  specified prediction mode to return predictions, e.g.,
  when mode == tf.estimator.ModeKeys.PREDICT, the model function returns
  tf.estimator.EstimatorSpec(mode=mode, predictions=tf.nn.softmax(logits)).
  Args:
    estimator: model to make prediction
    input_fn: input function to be used in estimator.predict
    labels: true labels of samples
  Returns:
    preds: probability vector of each sample
    loss: cross entropy loss of each sample
  """
  pred = np.array(list(estimator.predict(input_fn=input_fn)))
  loss = log_loss(labels, pred)
  return pred, loss
 class MembershipInferenceTrainingHook(tf.estimator.SessionRunHook):
  """Training hook to perform membership inference attack after an epoch."""
  def __init__(self,
               estimator,
               in_train,
               out_train,
               input_fn_constructor,
               attack_classifiers,
               writer=None):
    """Initalizes the hook.
    Args:
      estimator: model to be tested
      in_train: (in_training samples, in_training labels)
      out_train: (out_training samples, out_training labels)
      input_fn_constructor: a function that receives sample, label and construct
        the input_fn for model prediction
      attack_classifiers: a list of classifiers to be used by attacker, must be
        a subset of ['lr', 'mlp', 'rf', 'knn']
      writer: summary writer for tensorboard
    """
    in_train_data, self._in_train_labels = in_train
    out_train_data, self._out_train_labels = out_train
    # Define the input functions for both in and out-training samples.
    self._in_train_input_fn = input_fn_constructor(in_train_data,
                                                   self._in_train_labels)
    self._out_train_input_fn = input_fn_constructor(out_train_data,
                                                    self._out_train_labels)
    self._estimator = estimator
    self._attack_classifiers = attack_classifiers
    self._writer = writer
    if self._writer:
      logging.info('Will write to tensorboard.')
  def end(self, session):
    results = run_attack_helper(self._estimator,
                                self._in_train_input_fn,
                                self._out_train_input_fn,
                                self._in_train_labels, self._out_train_labels,
                                self._attack_classifiers)
    print('all_thresh_loss_advantage', results['all_thresh_loss_advantage'])
    logging.info(results)
    if self._writer:
      summary = tf.Summary()
      summary.value.add(tag='attack advantage',
                        simple_value=results['all_thresh_loss_advantage'])
      global_step = self._estimator.get_variable_value('global_step')
      self._writer.add_summary(summary, global_step)
      self._writer.flush()
 def run_attack_on_tf_estimator_model(estimator, in_train, out_train,
                                     input_fn_constructor, attack_classifiers):
  """A function to perform the attack in the end of training.
  Args:
    estimator: model to be tested
    in_train: (in_training samples, in_training labels)
    out_train: (out_training samples, out_training labels)
    input_fn_constructor: a function that receives sample, label and construct
      the input_fn for model prediction
    attack_classifiers: a list of classifiers to be used by attacker, must be
      a subset of ['lr', 'mlp', 'rf', 'knn']
  Returns:
    Results of the attack
  """
  in_train_data, in_train_labels = in_train
  out_train_data, out_train_labels = out_train
  # Define the input functions for both in and out-training samples.
  in_train_input_fn = input_fn_constructor(in_train_data, in_train_labels)
  out_train_input_fn = input_fn_constructor(out_train_data, out_train_labels)
  # Call the helper to run the attack.
  results = run_attack_helper(estimator,
                              in_train_input_fn, out_train_input_fn,
                              in_train_labels, out_train_labels,
                              attack_classifiers)
  print('all_thresh_loss_advantage', results['all_thresh_loss_advantage'])
  logging.info('End of training attack:')
  logging.info(results)
  return results
 def run_attack_helper(estimator,
                      in_train_input_fn, out_train_input_fn,
                      in_train_labels, out_train_labels,
                      attack_classifiers):
  """A helper function to perform attack.
  Args:
    estimator: model to be tested
    in_train_input_fn: input_fn for in training data
    out_train_input_fn: input_fn for out of training data
    in_train_labels: in training labels
    out_train_labels: out of training labels
    attack_classifiers: a list of classifiers to be used by attacker, must be
      a subset of ['lr', 'mlp', 'rf', 'knn']
  Returns:
    Results of the attack
  """
  # Compute predictions and losses
  in_train_pred, in_train_loss = calculate_losses(estimator,
                                                  in_train_input_fn,
                                                  in_train_labels)
  out_train_pred, out_train_loss = calculate_losses(estimator,
                                                    out_train_input_fn,
                                                    out_train_labels)
  results = mia.run_all_attacks(in_train_loss, out_train_loss,
                                in_train_pred, out_train_pred,
                                in_train_labels, out_train_labels,
                                attack_classifiers=attack_classifiers)
  return results
--- a/tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation_example.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation_example.py
@ -0,0 +1,162 @@
 # Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Lint as: python3
 """An example for using tf_estimator_evaluation."""
 from absl import app
 from absl import flags
 from absl import logging
 import numpy as np
 import tensorflow.compat.v1 as tf
 from tensorflow_privacy.privacy.membership_inference_attack.tf_estimator_evaluation import MembershipInferenceTrainingHook
 from tensorflow_privacy.privacy.membership_inference_attack.tf_estimator_evaluation import run_attack_on_tf_estimator_model
 GradientDescentOptimizer = tf.train.GradientDescentOptimizer
 FLAGS = flags.FLAGS
 flags.DEFINE_float('learning_rate', .15, 'Learning rate for training')
 flags.DEFINE_integer('batch_size', 256, 'Batch size')
 flags.DEFINE_integer('epochs', 10, 'Number of epochs')
 flags.DEFINE_string('model_dir', None, 'Model directory.')
 def cnn_model_fn(features, labels, mode):
  """Model function for a CNN."""
  # Define CNN architecture using tf.keras.layers.
  input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])
  y = tf.keras.layers.Conv2D(
      16, 8, strides=2, padding='same', activation='relu').apply(input_layer)
  y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
  y = tf.keras.layers.Conv2D(
      32, 4, strides=2, padding='valid', activation='relu').apply(y)
  y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
  y = tf.keras.layers.Flatten().apply(y)
  y = tf.keras.layers.Dense(32, activation='relu').apply(y)
  logits = tf.keras.layers.Dense(10).apply(y)
  if mode != tf.estimator.ModeKeys.PREDICT:
    vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=labels, logits=logits)
    scalar_loss = tf.reduce_mean(input_tensor=vector_loss)
  # Configure the training op (for TRAIN mode).
  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
    global_step = tf.train.get_global_step()
    train_op = optimizer.minimize(loss=scalar_loss, global_step=global_step)
    return tf.estimator.EstimatorSpec(
        mode=mode,
        loss=scalar_loss,
        train_op=train_op)
  # Add evaluation metrics (for EVAL mode).
  elif mode == tf.estimator.ModeKeys.EVAL:
    eval_metric_ops = {
        'accuracy':
            tf.metrics.accuracy(
                labels=labels, predictions=tf.argmax(input=logits, axis=1))
    }
    return tf.estimator.EstimatorSpec(
        mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops)
  # Output the prediction probability (for PREDICT mode).
  elif mode == tf.estimator.ModeKeys.PREDICT:
    predictions = tf.nn.softmax(logits)
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
 def load_mnist():
  """Loads MNIST and preprocesses to combine training and validation data."""
  (train_data,
   train_labels), (test_data,
                   test_labels) = tf.keras.datasets.mnist.load_data()
  train_data = np.array(train_data, dtype=np.float32) / 255
  test_data = np.array(test_data, dtype=np.float32) / 255
  train_labels = np.array(train_labels, dtype=np.int32)
  test_labels = np.array(test_labels, dtype=np.int32)
  return train_data, train_labels, test_data, test_labels
 def main(unused_argv):
  tf.logging.set_verbosity(tf.logging.INFO)
  logging.set_verbosity(logging.INFO)
  logging.set_stderrthreshold(logging.INFO)
  logging.get_absl_handler().use_absl_log_file()
  # Load training and test data.
  train_data, train_labels, test_data, test_labels = load_mnist()
  # Instantiate the tf.Estimator.
  mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,
                                            model_dir=FLAGS.model_dir)
  # A function to construct input_fn given (data, label), to be used by the
  # membership inference training hook.
  def input_fn_constructor(x, y):
    return tf.estimator.inputs.numpy_input_fn(x={'x': x}, y=y, shuffle=False)
  with tf.Graph().as_default():
    # Get a summary writer for the hook to write to tensorboard.
    # Can set summary_writer to None if not needed.
    if FLAGS.model_dir:
      summary_writer = tf.summary.FileWriter(FLAGS.model_dir)
    else:
      summary_writer = None
    mia_hook = MembershipInferenceTrainingHook(mnist_classifier,
                                               (train_data, train_labels),
                                               (test_data, test_labels),
                                               input_fn_constructor,
                                               [],
                                               summary_writer)
  # Create tf.Estimator input functions for the training and test data.
  train_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={'x': train_data},
      y=train_labels,
      batch_size=FLAGS.batch_size,
      num_epochs=FLAGS.epochs,
      shuffle=True)
  eval_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={'x': test_data}, y=test_labels, num_epochs=1, shuffle=False)
  # Training loop.
  steps_per_epoch = 60000 // FLAGS.batch_size
  for epoch in range(1, FLAGS.epochs + 1):
    # Train the model, with the membership inference hook.
    mnist_classifier.train(input_fn=train_input_fn, steps=steps_per_epoch,
                           hooks=[mia_hook])
    # Evaluate the model and print results
    eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
    test_accuracy = eval_results['accuracy']
    print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy))
  print('End of training attack')
  run_attack_on_tf_estimator_model(mnist_classifier,
                                   (train_data, train_labels),
                                   (test_data, test_labels),
                                   input_fn_constructor,
                                   ['lr'])
 if __name__ == '__main__':
  app.run(main)
--- a/tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation_test.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/tf_estimator_evaluation_test.py
@ -0,0 +1,107 @@
 # Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Lint as: python3
 """Tests for tensorflow_privacy.privacy.membership_inference_attack.tf_estimator_evaluation."""
 from absl.testing import absltest
 import numpy as np
 import tensorflow.compat.v1 as tf
 from tensorflow_privacy.privacy.membership_inference_attack import tf_estimator_evaluation
 class UtilsTest(absltest.TestCase):
  def __init__(self, methodname):
    """Initialize the test class."""
    super().__init__(methodname)
    self.ntrain, self.ntest = 50, 100
    self.nclass = 5
    self.ndim = 10
    # Generate random training and test data
    self.train_data = np.random.rand(self.ntrain, self.ndim)
    self.test_data = np.random.rand(self.ntest, self.ndim)
    self.train_labels = np.random.randint(self.nclass, size=self.ntrain)
    self.test_labels = np.random.randint(self.nclass, size=self.ntest)
    # Define a simple model function
    def model_fn(features, labels, mode):
      """Model function for logistic regression."""
      del labels
      input_layer = tf.reshape(features['x'], [-1, self.ndim])
      logits = tf.keras.layers.Dense(self.nclass).apply(input_layer)
      # Define the PREDICT mode becasue we only need that
      if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = tf.nn.softmax(logits)
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
    # Define the classifier, input_fn for training and test data
    self.classifier = tf.estimator.Estimator(model_fn=model_fn)
    self.input_fn_train = tf.estimator.inputs.numpy_input_fn(
        x={'x': self.train_data}, y=self.train_labels, num_epochs=1,
        shuffle=False)
    self.input_fn_test = tf.estimator.inputs.numpy_input_fn(
        x={'x': self.test_data}, y=self.test_labels, num_epochs=1,
        shuffle=False)
  def test_calculate_losses(self):
    """Test calculating the loss."""
    pred, loss = tf_estimator_evaluation.calculate_losses(self.classifier,
                                                          self.input_fn_train,
                                                          self.train_labels)
    self.assertEqual(pred.shape, (self.ntrain, self.nclass))
    self.assertEqual(loss.shape, (self.ntrain,))
    pred, loss = tf_estimator_evaluation.calculate_losses(self.classifier,
                                                          self.input_fn_test,
                                                          self.test_labels)
    self.assertEqual(pred.shape, (self.ntest, self.nclass))
    self.assertEqual(loss.shape, (self.ntest,))
  def test_run_attack_helper(self):
    """Test the attack."""
    results = tf_estimator_evaluation.run_attack_helper(self.classifier,
                                                        self.input_fn_train,
                                                        self.input_fn_test,
                                                        self.train_labels,
                                                        self.test_labels,
                                                        [])
    self.assertIsInstance(results, dict)
    self.assertIn('all_thresh_loss_auc', results)
    self.assertIn('all_thresh_loss_advantage', results)
  def test_run_attack_on_tf_estimator_model(self):
    """Test the attack on the final models."""
    def input_fn_constructor(x, y):
      return tf.estimator.inputs.numpy_input_fn(x={'x': x}, y=y, shuffle=False)
    results = tf_estimator_evaluation.run_attack_on_tf_estimator_model(
        self.classifier,
        (self.train_data, self.train_labels),
        (self.test_data, self.test_labels),
        input_fn_constructor,
        [])
    self.assertIsInstance(results, dict)
    self.assertIn('all_thresh_loss_auc', results)
    self.assertIn('all_thresh_loss_advantage', results)
 if __name__ == '__main__':
  absltest.main()
--- a/tensorflow_privacy/privacy/membership_inference_attack/utils.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/utils.py
@ -216,3 +216,23 @@ def compute_performance_metrics(true_labels: np.ndarray,
      'advantage': advantage,
  })
  return ensure_1d(results)
 # ------------------------------------------------------------------------------
 #  Loss functions
 # ------------------------------------------------------------------------------
 def log_loss(y, pred, small_value=1e-8):
  """Compute the cross entropy loss.
  Args:
    y: numpy array, y[i] is the true label (scalar) of the i-th sample
    pred: numpy array, pred[i] is the probability vector of the i-th sample
    small_value: np.log can become -inf if the probability is too close to 0,
      so the probability is clipped below by small_value.
  Returns:
    the cross-entropy loss of each sample
  """
  return -np.log(np.maximum(pred[range(y.size), y], small_value))
--- a/tensorflow_privacy/privacy/membership_inference_attack/utils_test.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/utils_test.py
@ -100,6 +100,48 @@ class UtilsTest(absltest.TestCase):
      self.assertEqual(x_test.shape, (n_test, 11))
      self.assertEqual(y_test.shape, (n_test,))
  def test_log_loss(self):
    """Test computing cross-entropy loss."""
    # Test binary case with a few normal values
    pred = np.array([[0.01, 0.99], [0.1, 0.9], [0.25, 0.75], [0.5, 0.5],
                     [0.75, 0.25], [0.9, 0.1], [0.99, 0.01]])
    # Test the cases when true label (for all samples) is 0 and 1
    expected_losses = {
        0: np.array([4.60517019, 2.30258509, 1.38629436, 0.69314718, 0.28768207,
                     0.10536052, 0.01005034]),
        1: np.array([0.01005034, 0.10536052, 0.28768207, 0.69314718, 1.38629436,
                     2.30258509, 4.60517019])
    }
    for c in [0, 1]:  # true label
      y = np.ones(shape=pred.shape[0], dtype=int) * c
      loss = utils.log_loss(y, pred)
      np.testing.assert_allclose(loss, expected_losses[c], atol=1e-7)
    # Test multiclass case with a few normal values
    # (values from http://bit.ly/RJJHWA)
    pred = np.array([[0.2, 0.7, 0.1], [0.6, 0.2, 0.2], [0.6, 0.1, 0.3],
                     [0.99, 0.002, 0.008]])
    # Test the cases when true label (for all samples) is 0, 1, and 2
    expected_losses = {
        0: np.array([1.60943791, 0.51082562, 0.51082562, 0.01005034]),
        1: np.array([0.35667494, 1.60943791, 2.30258509, 6.2146081]),
        2: np.array([2.30258509, 1.60943791, 1.2039728, 4.82831374])
    }
    for c in range(3):  # true label
      y = np.ones(shape=pred.shape[0], dtype=int) * c
      loss = utils.log_loss(y, pred)
      np.testing.assert_allclose(loss, expected_losses[c], atol=1e-7)
    # Test boundary values 0 and 1
    pred = np.array([[0, 1]] * 2)
    y = np.array([0, 1])
    small_values = [1e-8, 1e-20, 1e-50]
    expected_losses = np.array([18.42068074, 46.05170186, 115.12925465])
    for i, small_value in enumerate(small_values):
      loss = utils.log_loss(y, pred, small_value)
      np.testing.assert_allclose(loss, np.array([expected_losses[i], 0]),
                                 atol=1e-7)
 if __name__ == '__main__':
  absltest.main()