The new API for the membership inference attack.

1. Colab and Keras/TF estimator integration still use the old API and will be updated in the subsequent CLs. 2. After dropping the old API in membership_inference_attack.py, membership_inference_attack_new.py will be renamed in membership_inference_attack.py. PiperOrigin-RevId: 325823046
2020-08-10 09:43:58 -07:00 · 2020-08-10 09:43:58 -07:00 · 43a0e4be8a
commit 43a0e4be8a
parent 68651eeddc
9 changed files with 1279 additions and 1 deletions
--- a/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py
@ -0,0 +1,322 @@
 # Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Lint as: python3
 """Data structures representing attack inputs, configuration, outputs."""
 import enum
 import pickle
 from typing import Any, Iterable, Union
 from dataclasses import dataclass
 import numpy as np
 from sklearn import metrics
 ENTIRE_DATASET_SLICE_STR = 'SingleSliceSpec(Entire dataset)'
 class SlicingFeature(enum.Enum):
  """Enum with features by which slicing is available."""
  CLASS = 'class'
  PERCENTILE = 'percentile'
  CORRECTLY_CLASSIFIED = 'correctly_classfied'
@dataclass
 class SingleSliceSpec:
  """Specifies a slice.
  The slice is defined by values in one feature - it might be a single value
  (eg. slice of examples of the specific classification class) or some set of
  values (eg. range of percentiles of the attacked model loss).
  When feature is None, it means that the slice is the entire dataset.
  """
  feature: SlicingFeature = None
  value: Any = None
  @property
  def entire_dataset(self):
    return self.feature is None
  def __str__(self):
    if self.entire_dataset:
      return 'Entire dataset'
    if self.feature == SlicingFeature.PERCENTILE:
      return 'Loss percentiles: %d-%d' % self.value
    return f'{self.feature.name}={self.value}'
@dataclass
 class SlicingSpec:
  """Specification of a slicing procedure.
  Each variable which is set specifies a slicing by different dimension.
  """
  # When is set to true, one of the slices is the whole dataset.
  entire_dataset: bool = True
  # Used in classification tasks for slicing by classes. It is assumed that
  # classes are integers 0, 1, ... number of classes. When true one slice per
  # each class is generated.
  by_class: Union[bool, Iterable[int], int] = False
  # if true, it generates 10 slices for percentiles of the loss - 0-10%, 10-20%,
  # ... 90-100%.
  by_percentiles: bool = False
  # When true, a slice for correctly classifed and a slice for misclassifed
  # examples will be generated.
  by_classification_correctness: bool = False
 class AttackType(enum.Enum):
  """An enum define attack types."""
  LOGISTIC_REGRESSION = 'lr'
  MULTI_LAYERED_PERCEPTRON = 'mlp'
  RANDOM_FOREST = 'rf'
  K_NEAREST_NEIGHBORS = 'knn'
  THRESHOLD_ATTACK = 'threshold'
  @property
  def is_trained_attack(self):
    """Returns whether this type of attack requires training a model."""
    return self != AttackType.THRESHOLD_ATTACK
  # Return LOGISTIC_REGRESSION instead of AttackType.LOGISTIC_REGRESSION
  def __str__(self):
    return f'{self.name}'
@dataclass
 class AttackInputData:
  """Input data for running an attack.
  This includes only the data, and not configuration.
  """
  logits_train: np.ndarray = None
  logits_test: np.ndarray = None
  # Contains ground-truth classes. Classes are assumed to be integers starting
  # from 0.
  labels_train: np.ndarray = None
  labels_test: np.ndarray = None
  # Explicitly specified loss. If provided, this is used instead of deriving
  # loss from logits and labels
  loss_train: np.ndarray = None
  loss_test: np.ndarray = None
  @property
  def num_classes(self):
    if self.labels_train is None or self.labels_test is None:
      raise ValueError(
          "Can't identify the number of classes as no labels were provided. "
          'Please set labels_train and labels_test')
    return int(max(np.max(self.labels_train), np.max(self.labels_test))) + 1
  @staticmethod
  def _get_loss(logits: np.ndarray, true_labels: np.ndarray):
    return logits[range(logits.shape[0]), true_labels]
  def get_loss_train(self):
    """Calculates cross-entropy losses for the training set."""
    if self.loss_train is not None:
      return self.loss_train
    return self._get_loss(self.logits_train, self.labels_train)
  def get_loss_test(self):
    """Calculates cross-entropy losses for the test set."""
    if self.loss_test is not None:
      return self.loss_test
    return self._get_loss(self.logits_test, self.labels_test)
  def get_train_size(self):
    """Returns size of the training set."""
    if self.loss_train is not None:
      return self.loss_train.size
    return self.logits_train.shape[0]
  def get_test_size(self):
    """Returns size of the test set."""
    if self.loss_test is not None:
      return self.loss_test.size
    return self.logits_test.shape[0]
  def validate(self):
    """Validates the inputs."""
    if (self.loss_train is None) != (self.loss_test is None):
      raise ValueError(
          'loss_test and loss_train should both be either set or unset')
    if (self.logits_train is None) != (self.logits_test is None):
      raise ValueError(
          'logits_train and logits_test should both be either set or unset')
    if (self.labels_train is None) != (self.labels_test is None):
      raise ValueError(
          'labels_train and labels_test should both be either set or unset')
    if (self.labels_train is None and self.loss_train is None and
        self.logits_train is None):
      raise ValueError('At least one of labels, logits or losses should be set')
    # TODO(b/161366709): Add checks for equal sizes
@dataclass
 class RocCurve:
  """Represents ROC curve of a membership inference classifier."""
  # Thresholds used to define points on ROC curve.
  # Thresholds are not explicitly part of the curve, and are stored for
  # debugging purposes.
  thresholds: np.ndarray
  # True positive rates based on thresholds
  tpr: np.ndarray
  # False positive rates based on thresholds
  fpr: np.ndarray
  def get_auc(self):
    """Calculates area under curve (aka AUC)."""
    return metrics.auc(self.fpr, self.tpr)
  def get_attacker_advantage(self):
    """Calculates membership attacker's (or adversary's) advantage.
    This metric is inspired by https://arxiv.org/abs/1709.01604, specifically
    by Definition 4. The difference here is that we calculate maximum advantage
    over all available classifier thresholds.
    Returns:
      a single float number with membership attaker's advantage.
    """
    return max(np.abs(self.tpr - self.fpr))
@dataclass
 class SingleAttackResult:
  """Results from running a single attack."""
  # Data slice this result was calculated for.
  slice_spec: SingleSliceSpec
  attack_type: AttackType
  roc_curve: RocCurve  # for drawing and metrics calculation
  # TODO(b/162693190): Add more metrics. Think which info we should store
  #  to derive metrics like f1_score or accuracy. Should we store labels and
  #  predictions, or rather some aggregate data?
  def get_attacker_advantage(self):
    return self.roc_curve.get_attacker_advantage()
  def get_auc(self):
    return self.roc_curve.get_auc()
@dataclass
 class AttackResults:
  """Results from running multiple attacks."""
  # add metadata, such as parameters of attack evaluation, input data etc
  single_attack_results: Iterable[SingleAttackResult]
  def calculate_pd_dataframe(self):
    # returns all metrics as a Pandas DataFrame
    return
  def summary(self, by_slices=False) -> str:
    """Provides a summary of the metrics.
    The summary provides the best-performing attacks for each requested data
    slice.
    Args:
      by_slices : whether to prepare a per-slice summary.
    Returns:
      A string with a summary of all the metrics.
    """
    summary = []
    # Summary over all slices
    max_auc_result_all = self.get_result_with_max_attacker_advantage()
    summary.append('Best-performing attacks over all slices')
    summary.append(
        '  %s achieved an AUC of %.2f on slice %s' %
        (max_auc_result_all.attack_type, max_auc_result_all.get_auc(),
         max_auc_result_all.slice_spec))
    max_advantage_result_all = self.get_result_with_max_attacker_advantage()
    summary.append('  %s achieved an advantage of %.2f on slice %s' %
                   (max_advantage_result_all.attack_type,
                    max_advantage_result_all.get_attacker_advantage(),
                    max_advantage_result_all.slice_spec))
    slice_dict = self._group_results_by_slice()
    if len(slice_dict.keys()) > 1 and by_slices:
      for slice_str in slice_dict:
        results = slice_dict[slice_str]
        summary.append('\nBest-performing attacks over slice: \"%s\"' %
                       slice_str)
        max_auc_result = results.get_result_with_max_auc()
        summary.append('  %s achieved an AUC of %.2f' %
                       (max_auc_result.attack_type, max_auc_result.get_auc()))
        max_advantage_result = results.get_result_with_max_attacker_advantage()
        summary.append('  %s achieved an advantage of %.2f' %
                       (max_advantage_result.attack_type,
                        max_advantage_result.get_attacker_advantage()))
    return '\n'.join(summary)
  def _group_results_by_slice(self):
    """Groups AttackResults into a dictionary keyed by the slice."""
    slice_dict = {}
    for attack_result in self.single_attack_results:
      slice_str = str(attack_result.slice_spec)
      if slice_str not in slice_dict:
        slice_dict[slice_str] = AttackResults([])
      slice_dict[slice_str].single_attack_results.append(attack_result)
    return slice_dict
  def get_result_with_max_auc(self) -> SingleAttackResult:
    """Get the result with maximum AUC for all attacks and slices."""
    aucs = [result.get_auc() for result in self.single_attack_results]
    if min(aucs) < 0.4:
      print('Suspiciously low AUC detected: %.2f. ' +
            'There might be a bug in the classifier' % min(aucs))
    return self.single_attack_results[np.argmax(aucs)]
  def get_result_with_max_attacker_advantage(self) -> SingleAttackResult:
    """Get the result with maximum advantage for all attacks and slices."""
    return self.single_attack_results[np.argmax([
        result.get_attacker_advantage() for result in self.single_attack_results
    ])]
  def save(self, filepath):
    """Saves self to a pickle file."""
    with open(filepath, 'wb') as out:
      pickle.dump(self, out)
  @classmethod
  def load(cls, filepath):
    """Loads AttackResults from a pickle file."""
    with open(filepath, 'rb') as inp:
      return pickle.load(inp)
--- a/tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing.py
@ -0,0 +1,143 @@
 # Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Lint as: python3
 """Specifying and creating AttackInputData slices."""
 import collections
 import copy
 from typing import List
 import numpy as np
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
 def _slice_if_not_none(a, idx):
  return None if a is None else a[idx]
 def _slice_data_by_indices(data: AttackInputData, idx_train,
                           idx_test) -> AttackInputData:
  """Slices train fields with with idx_train and test fields with and idx_test."""
  result = AttackInputData()
  # Slice train data.
  result.logits_train = _slice_if_not_none(data.logits_train, idx_train)
  result.labels_train = _slice_if_not_none(data.labels_train, idx_train)
  result.loss_train = _slice_if_not_none(data.loss_train, idx_train)
  # Slice test data.
  result.logits_test = _slice_if_not_none(data.logits_test, idx_test)
  result.labels_test = _slice_if_not_none(data.labels_test, idx_test)
  result.loss_test = _slice_if_not_none(data.loss_test, idx_test)
  return result
 def _slice_by_class(data: AttackInputData, class_value: int) -> AttackInputData:
  idx_train = data.labels_train == class_value
  idx_test = data.labels_test == class_value
  return _slice_data_by_indices(data, idx_train, idx_test)
 def _slice_by_percentiles(data: AttackInputData, from_percentile: float,
                          to_percentile: float):
  """Slices samples by loss percentiles."""
  # Find from_percentile and to_percentile percentiles in losses.
  loss_train = data.get_loss_train()
  loss_test = data.get_loss_test()
  losses = np.concatenate((loss_train, loss_test))
  from_loss = np.percentile(losses, from_percentile)
  to_loss = np.percentile(losses, to_percentile)
  idx_train = (from_loss <= loss_train) & (loss_train <= to_loss)
  idx_test = (from_loss <= loss_test) & (loss_test <= to_loss)
  return _slice_data_by_indices(data, idx_train, idx_test)
 def _indices_by_classification(logits, labels, correctly_classified):
  idx_correct = labels == np.argmax(logits, axis=1)
  return idx_correct if correctly_classified else np.invert(idx_correct)
 def _slice_by_classification_correctness(data: AttackInputData,
                                         correctly_classified: bool):
  idx_train = _indices_by_classification(data.logits_train, data.labels_train,
                                         correctly_classified)
  idx_test = _indices_by_classification(data.logits_test, data.labels_test,
                                        correctly_classified)
  return _slice_data_by_indices(data, idx_train, idx_test)
 def get_single_slice_specs(slicing_spec: SlicingSpec,
                           num_classes: int = None) -> List[SingleSliceSpec]:
  """Returns slices of data according to slicing_spec."""
  result = []
  if slicing_spec.entire_dataset:
    result.append(SingleSliceSpec())
  # Create slices by class.
  by_class = slicing_spec.by_class
  if isinstance(by_class, bool):
    if by_class:
      assert num_classes, "When by_class == True, num_classes should be given."
      assert 0 <= num_classes <= 1000, (
          f"Too much classes for slicing by classes. "
          f"Found {num_classes}.")
      for c in range(num_classes):
        result.append(SingleSliceSpec(SlicingFeature.CLASS, c))
  elif isinstance(by_class, int):
    result.append(SingleSliceSpec(SlicingFeature.CLASS, by_class))
  elif isinstance(by_class, collections.Iterable):
    for c in by_class:
      result.append(SingleSliceSpec(SlicingFeature.CLASS, c))
  # Create slices by percentiles
  if slicing_spec.by_percentiles:
    for percent in range(0, 100, 10):
      result.append(
          SingleSliceSpec(SlicingFeature.PERCENTILE, (percent, percent + 10)))
  # Create slices by correctness of the classifications.
  if slicing_spec.by_classification_correctness:
    result.append(SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED, True))
    result.append(SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED, False))
  return result
 def get_slice(data: AttackInputData,
              slice_spec: SingleSliceSpec) -> AttackInputData:
  """Returns a single slice of data according to slice_spec."""
  if slice_spec.entire_dataset:
    data_slice = copy.copy(data)
  elif slice_spec.feature == SlicingFeature.CLASS:
    data_slice = _slice_by_class(data, slice_spec.value)
  elif slice_spec.feature == SlicingFeature.PERCENTILE:
    from_percentile, to_percentile = slice_spec.value
    data_slice = _slice_by_percentiles(data, from_percentile, to_percentile)
  elif slice_spec.feature == SlicingFeature.CORRECTLY_CLASSIFIED:
    data_slice = _slice_by_classification_correctness(data, slice_spec.value)
  else:
    raise ValueError(f'Unknown slice spec feature "{slice_spec.feature}"')
  data_slice.slice_spec = slice_spec
  return data_slice
--- a/tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing_test.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing_test.py
@ -0,0 +1,180 @@
 # Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Lint as: python3
 """Tests for tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing."""
 from absl.testing import absltest
 import numpy as np
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
 from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_single_slice_specs
 from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_slice
 def _are_all_fields_equal(lhs, rhs) -> bool:
  return vars(lhs) == vars(rhs)
 def _are_lists_equal(lhs, rhs) -> bool:
  if len(lhs) != len(rhs):
    return False
  for l, r in zip(lhs, rhs):
    if not _are_all_fields_equal(l, r):
      return False
  return True
 class SingleSliceSpecsTest(absltest.TestCase):
  """Tests for get_single_slice_specs."""
  ENTIRE_DATASET_SLICE = SingleSliceSpec()
  def test_no_slices(self):
    input_data = SlicingSpec(entire_dataset=False)
    expected = []
    output = get_single_slice_specs(input_data)
    self.assertTrue(_are_lists_equal(output, expected))
  def test_entire_dataset(self):
    input_data = SlicingSpec()
    expected = [self.ENTIRE_DATASET_SLICE]
    output = get_single_slice_specs(input_data)
    self.assertTrue(_are_lists_equal(output, expected))
  def test_slice_by_classes(self):
    input_data = SlicingSpec(by_class=True)
    n_classes = 5
    expected = [self.ENTIRE_DATASET_SLICE] + [
        SingleSliceSpec(SlicingFeature.CLASS, c) for c in range(n_classes)
    ]
    output = get_single_slice_specs(input_data, n_classes)
    self.assertTrue(_are_lists_equal(output, expected))
  def test_slice_by_percentiles(self):
    input_data = SlicingSpec(entire_dataset=False, by_percentiles=True)
    expected0 = SingleSliceSpec(SlicingFeature.PERCENTILE, (0, 10))
    expected5 = SingleSliceSpec(SlicingFeature.PERCENTILE, (50, 60))
    output = get_single_slice_specs(input_data)
    self.assertLen(output, 10)
    self.assertTrue(_are_all_fields_equal(output[0], expected0))
    self.assertTrue(_are_all_fields_equal(output[5], expected5))
  def test_slice_by_correcness(self):
    input_data = SlicingSpec(
        entire_dataset=False, by_classification_correctness=True)
    expected = SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED, True)
    output = get_single_slice_specs(input_data)
    self.assertLen(output, 2)
    self.assertTrue(_are_all_fields_equal(output[0], expected))
  def test_slicing_by_multiple_features(self):
    input_data = SlicingSpec(
        entire_dataset=True,
        by_class=True,
        by_percentiles=True,
        by_classification_correctness=True)
    n_classes = 10
    expected_slices = n_classes
    expected_slices += 1  # entire dataset slice
    expected_slices += 10  # percentiles slices
    expected_slices += 2  # correcness classification slices
    output = get_single_slice_specs(input_data, n_classes)
    self.assertLen(output, expected_slices)
 class GetSliceTest(absltest.TestCase):
  def __init__(self, methodname):
    """Initialize the test class."""
    super().__init__(methodname)
    # Create test data for 3 class classification task.
    logits_train = np.array([[0, 1, 0], [2, 0, 3], [4, 5, 0], [6, 7, 0]])
    logits_test = np.array([[10, 0, 11], [12, 13, 0], [14, 15, 0], [0, 16, 17]])
    labels_train = np.array([1, 0, 1, 2])
    labels_test = np.array([1, 2, 0, 2])
    loss_train = np.array([2, 0.25, 4, 3])
    loss_test = np.array([0.5, 3.5, 7, 4.5])
    self.input_data = AttackInputData(logits_train, logits_test, labels_train,
                                      labels_test, loss_train, loss_test)
  def test_slice_entire_dataset(self):
    entire_dataset_slice = SingleSliceSpec()
    output = get_slice(self.input_data, entire_dataset_slice)
    expected = self.input_data
    expected.slice_spec = entire_dataset_slice
    self.assertTrue(_are_all_fields_equal(output, self.input_data))
  def test_slice_by_class(self):
    class_index = 1
    class_slice = SingleSliceSpec(SlicingFeature.CLASS, class_index)
    output = get_slice(self.input_data, class_slice)
    # Check logits.
    self.assertLen(output.logits_train, 2)
    self.assertLen(output.logits_test, 1)
    self.assertTrue((output.logits_train[1] == [4, 5, 0]).all())
    # Check labels.
    self.assertLen(output.labels_train, 2)
    self.assertLen(output.labels_test, 1)
    self.assertTrue((output.labels_train == class_index).all())
    self.assertTrue((output.labels_test == class_index).all())
    # Check losses
    self.assertLen(output.loss_train, 2)
    self.assertLen(output.loss_test, 1)
    self.assertTrue((output.loss_train == [2, 4]).all())
    self.assertTrue((output.loss_test == [0.5]).all())
  def test_slice_by_percentile(self):
    percentile_slice = SingleSliceSpec(SlicingFeature.PERCENTILE, (0, 50))
    output = get_slice(self.input_data, percentile_slice)
    # Check logits.
    self.assertLen(output.logits_train, 3)
    self.assertLen(output.logits_test, 1)
    self.assertTrue((output.logits_test[0] == [10, 0, 11]).all())
    # Check labels.
    self.assertLen(output.labels_train, 3)
    self.assertLen(output.labels_test, 1)
    self.assertTrue((output.labels_train == [1, 0, 2]).all())
    self.assertTrue((output.labels_test == [1]).all())
  def test_slice_by_correctness(self):
    percentile_slice = SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED,
                                       False)
    output = get_slice(self.input_data, percentile_slice)
    # Check logits.
    self.assertLen(output.logits_train, 2)
    self.assertLen(output.logits_test, 3)
    self.assertTrue((output.logits_train[1] == [6, 7, 0]).all())
    self.assertTrue((output.logits_test[1] == [12, 13, 0]).all())
    # Check labels.
    self.assertLen(output.labels_train, 2)
    self.assertLen(output.labels_test, 3)
    self.assertTrue((output.labels_train == [0, 2]).all())
    self.assertTrue((output.labels_test == [1, 2, 0]).all())
 if __name__ == '__main__':
  absltest.main()
--- a/tensorflow_privacy/privacy/membership_inference_attack/example.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/example.py
@ -0,0 +1,149 @@
 # Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Lint as: python3
 """An example for the membership inference attacks.
 This is using a toy model based on classifying four spacial clusters of data.
 """
 import os
 import tempfile
 import numpy as np
 from tensorflow import keras
 from tensorflow.keras import layers
 from tensorflow.keras.utils import to_categorical
 from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack_new as mia
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
 def generate_random_cluster(center, scale, num_points):
  return np.random.normal(size=(num_points, len(center))) * scale + center
 def generate_features_and_labels(samples_per_cluster=250, scale=0.1):
  """Generates noised 3D clusters."""
  cluster_centers = [[0, 0, 0], [2, 0, 0], [0, 2, 0], [0, 0, 2]]
  features = np.concatenate((
      generate_random_cluster(
          center=cluster_centers[0],
          scale=scale,
          num_points=samples_per_cluster),
      generate_random_cluster(
          center=cluster_centers[1],
          scale=scale,
          num_points=samples_per_cluster),
      generate_random_cluster(
          center=cluster_centers[2],
          scale=scale,
          num_points=samples_per_cluster),
      generate_random_cluster(
          center=cluster_centers[3],
          scale=scale,
          num_points=samples_per_cluster),
  ))
  # Cluster labels: 0, 1, 2 and 3
  labels = np.concatenate((
      np.zeros(samples_per_cluster),
      np.ones(samples_per_cluster),
      np.ones(samples_per_cluster) * 2,
      np.ones(samples_per_cluster) * 3,
  ))
  return (features, labels)
 # Hint: Play with "noise_scale" for different levels of overlap between
 # the generated clusters. More noise makes the classification harder.
 noise_scale = 2
 training_features, training_labels = generate_features_and_labels(
    samples_per_cluster=250, scale=noise_scale)
 test_features, test_labels = generate_features_and_labels(
    samples_per_cluster=250, scale=noise_scale)
 num_clusters = int(round(np.max(training_labels))) + 1
 # Hint: play with the number of layers to achieve different level of
 # over-fitting and observe its effects on membership inference performance.
 model = keras.models.Sequential([
    layers.Dense(300, activation="relu"),
    layers.Dense(300, activation="relu"),
    layers.Dense(300, activation="relu"),
    layers.Dense(num_clusters, activation="relu"),
    layers.Softmax()
 ])
 model.compile(
    optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
 model.fit(
    training_features,
    to_categorical(training_labels, num_clusters),
    validation_data=(test_features, to_categorical(test_labels, num_clusters)),
    batch_size=64,
    epochs=10,
    shuffle=True)
 training_pred = model.predict(training_features)
 test_pred = model.predict(test_features)
 def crossentropy(true_labels, predictions):
  return keras.backend.eval(
      keras.losses.binary_crossentropy(
          keras.backend.variable(to_categorical(true_labels, num_clusters)),
          keras.backend.variable(predictions)))
 attack_results = mia.run_attacks(
    AttackInputData(
        labels_train=training_labels,
        labels_test=test_labels,
        loss_train=crossentropy(training_labels, training_pred),
        loss_test=crossentropy(test_labels, test_pred)),
    SlicingSpec(entire_dataset=True, by_class=True),
    attack_types=(AttackType.THRESHOLD_ATTACK, AttackType.LOGISTIC_REGRESSION))
 # Example of saving the results to the file and loading them back.
 with tempfile.TemporaryDirectory() as tmpdirname:
  filepath = os.path.join(tmpdirname, "results.pickle")
  attack_results.save(filepath)
  loaded_results = AttackResults.load(filepath)
 # Print attack metrics
 for attack_result in attack_results.single_attack_results:
  print("Slice: %s" % attack_result.slice_spec)
  print("Attack type: %s" % attack_result.attack_type)
  print("AUC: %.2f" % attack_result.roc_curve.get_auc())
  print("Attacker advantage: %.2f\n" %
        attack_result.roc_curve.get_attacker_advantage())
 max_auc_attacker = attack_results.get_result_with_max_attacker_advantage()
 print("Attack type with max AUC: %s, AUC of %.2f" %
      (max_auc_attacker.attack_type, max_auc_attacker.roc_curve.get_auc()))
 max_advantage_attacker = attack_results.get_result_with_max_attacker_advantage()
 print("Attack type with max advantage: %s, Attacker advantage of %.2f" %
      (max_advantage_attacker.attack_type,
       max_advantage_attacker.roc_curve.get_attacker_advantage()))
 # Print summary
 print("Summary without slices: \n")
 print(attack_results.summary(by_slices=False))
 print("Summary by slices: \n")
 print(attack_results.summary(by_slices=True))
--- a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py
@ -13,7 +13,12 @@
 # limitations under the License.
 # Lint as: python3
-"""Code that runs membership inference attacks based on the model outputs."""
+"""Code that runs membership inference attacks based on the model outputs.
 Warning: This file belongs to the old API for membership inference attacks. This
 file will be removed soon. membership_inference_attack_new.py contains the new
 API.
 """
 import collections
 import io
@ -354,6 +359,11 @@ def run_attack(loss_train: np.ndarray = None,
    results: Dictionary with the chosen vulnerability metric(s) for all ran
      attacks.
  """
  print(
      'Deprecation warning: function run_attack is '
      'deprecated and will be removed soon. '
      'Please use membership_inference_attack_new.run_attacks'
  )
  attacks = []
  features = {}
  # ---------- Check available data ----------
@ -529,6 +539,11 @@ def run_all_attacks(loss_train: np.ndarray = None,
  Returns:
    result: dictionary with all attack results
  """
  print(
      'Deprecation warning: function run_all_attacks is '
      'deprecated and will be removed soon. '
      'Please use membership_inference_attack_new.run_attacks'
  )
  metrics = ['auc', 'advantage']
  # Entire data
@ -631,6 +646,11 @@ def run_all_attacks_and_create_summary(
    result: a dictionary with all the distilled attack information summarized
      in the summarystring
  """
  print(
      'Deprecation warning: function run_all_attacks_and_create_summary is '
      'deprecated and will be removed soon. '
      'Please use membership_inference_attack_new.run_attacks'
  )
  summary = []
  metrics = ['auc', 'advantage']
  attack_classifiers = ['lr', 'knn']
--- a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_new.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_new.py
@ -0,0 +1,121 @@
 # Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Lint as: python3
 """Code that runs membership inference attacks based on the model outputs.
 This file belongs to the new API for membership inference attacks. This file
 will be renamed to membership_inference_attack.py after the old API is removed.
 """
 from typing import Iterable
 import numpy as np
 from sklearn import metrics
 from tensorflow_privacy.privacy.membership_inference_attack import models
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RocCurve
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
 from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_single_slice_specs
 from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_slice
 def _get_slice_spec(data: AttackInputData) -> SingleSliceSpec:
  if hasattr(data, 'slice_spec'):
    return data.slice_spec
  return SingleSliceSpec()
 def run_trained_attack(attack_input: AttackInputData, attack_type: AttackType):
  """Classification attack done by ML models."""
  attacker = None
  if attack_type == AttackType.LOGISTIC_REGRESSION:
    attacker = models.LogisticRegressionAttacker()
  elif attack_type == AttackType.MULTI_LAYERED_PERCEPTRON:
    attacker = models.MultilayerPerceptronAttacker()
  elif attack_type == AttackType.RANDOM_FOREST:
    attacker = models.RandomForestAttacker()
  elif attack_type == AttackType.K_NEAREST_NEIGHBORS:
    attacker = models.KNearestNeighborsAttacker()
  else:
    raise NotImplementedError(
        'Attack type {} not implemented yet.'.format(attack_type))
  prepared_attacker_data = models.create_attacker_data(attack_input)
  attacker.train_model(prepared_attacker_data.features_train,
                       prepared_attacker_data.is_training_labels_train)
  # Run the attacker on (permuted) test examples.
  predictions_test = attacker.predict(prepared_attacker_data.features_test)
  # Generate ROC curves with predictions.
  fpr, tpr, thresholds = metrics.roc_curve(
      prepared_attacker_data.is_training_labels_test, predictions_test)
  roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)
  return SingleAttackResult(
      slice_spec=_get_slice_spec(attack_input),
      attack_type=attack_type,
      roc_curve=roc_curve)
 def run_threshold_attack(attack_input: AttackInputData):
  fpr, tpr, thresholds = metrics.roc_curve(
      np.concatenate((np.zeros(attack_input.get_train_size()),
                      np.ones(attack_input.get_test_size()))),
      np.concatenate(
          (attack_input.get_loss_train(), attack_input.get_loss_test())))
  roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)
  return SingleAttackResult(
      slice_spec=_get_slice_spec(attack_input),
      attack_type=AttackType.THRESHOLD_ATTACK,
      roc_curve=roc_curve)
 def run_attack(attack_input: AttackInputData, attack_type: AttackType):
  attack_input.validate()
  if attack_type.is_trained_attack:
    return run_trained_attack(attack_input, attack_type)
  return run_threshold_attack(attack_input)
 def run_attacks(
    attack_input: AttackInputData,
    slicing_spec: SlicingSpec = None,
    attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)
 ) -> AttackResults:
  """Run all attacks."""
  attack_input.validate()
  attack_results = []
  if slicing_spec is None:
    slicing_spec = SlicingSpec(entire_dataset=True)
  input_slice_specs = get_single_slice_specs(slicing_spec,
                                             attack_input.num_classes)
  for single_slice_spec in input_slice_specs:
    attack_input_slice = get_slice(attack_input, single_slice_spec)
    for attack_type in attack_types:
      attack_results.append(run_attack(attack_input_slice, attack_type))
  return AttackResults(single_attack_results=attack_results)
--- a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_new_test.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_new_test.py
@ -0,0 +1,77 @@
 # Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Lint as: python3
 """Tests for tensorflow_privacy.privacy.membership_inference_attack.utils."""
 from absl.testing import absltest
 import numpy as np
 from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack_new as mia
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
 def get_test_input(n_train, n_test):
  """Get example inputs for attacks."""
  rng = np.random.RandomState(4)
  return AttackInputData(
      rng.randn(n_train, 5) + 0.2,
      rng.randn(n_test, 5) + 0.2, np.array([i % 5 for i in range(n_train)]),
      np.array([i % 5 for i in range(n_test)]))
 class RunAttacksTest(absltest.TestCase):
  def test_run_attacks_size(self):
    result = mia.run_attacks(
        get_test_input(100, 100), SlicingSpec(),
        (AttackType.THRESHOLD_ATTACK, AttackType.LOGISTIC_REGRESSION))
    self.assertLen(result.single_attack_results, 2)
  def test_run_attack_trained_sets_attack_type(self):
    result = mia.run_attack(
        get_test_input(100, 100), AttackType.LOGISTIC_REGRESSION)
    self.assertEqual(result.attack_type, AttackType.LOGISTIC_REGRESSION)
  def test_run_attack_threshold_sets_attack_type(self):
    result = mia.run_attack(
        get_test_input(100, 100), AttackType.THRESHOLD_ATTACK)
    self.assertEqual(result.attack_type, AttackType.THRESHOLD_ATTACK)
  def test_run_attack_threshold_calculates_correct_auc(self):
    result = mia.run_attack(
        AttackInputData(
            loss_train=np.array([0.1, 0.2, 1.3, 0.4, 0.5, 0.6]),
            loss_test=np.array([1.1, 1.2, 1.3, 0.4, 1.5, 1.6])),
        AttackType.THRESHOLD_ATTACK)
    np.testing.assert_almost_equal(result.roc_curve.get_auc(), 0.83, decimal=2)
  def test_run_attack_by_slice(self):
    result = mia.run_attacks(
        get_test_input(100, 100), SlicingSpec(by_class=True),
        (AttackType.THRESHOLD_ATTACK,))
    self.assertLen(result.single_attack_results, 6)
    expected_slice = SingleSliceSpec(SlicingFeature.CLASS, 2)
    self.assertEqual(result.single_attack_results[3].slice_spec, expected_slice)
 if __name__ == '__main__':
  absltest.main()
--- a/tensorflow_privacy/privacy/membership_inference_attack/models.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/models.py
@ -0,0 +1,207 @@
 # Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Lint as: python3
 """Trained models for membership inference attacks."""
 from dataclasses import dataclass
 import numpy as np
 from sklearn import ensemble
 from sklearn import linear_model
 from sklearn import model_selection
 from sklearn import neighbors
 from sklearn import neural_network
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
@dataclass
 class AttackerData:
  """Input data for an ML classifier attack.
  This includes only the data, and not configuration.
  """
  features_train: np.ndarray = None
  # element-wise boolean array denoting if the example was part of training.
  is_training_labels_train: np.ndarray = None
  features_test: np.ndarray = None
  # element-wise boolean array denoting if the example was part of training.
  is_training_labels_test: np.ndarray = None
 def create_attacker_data(attack_input_data: AttackInputData,
                         test_fraction: float = 0.25) -> AttackerData:
  """Prepare AttackInputData to train ML attackers.
  Combines logits and losses and performs a random train-test split.
  Args:
    attack_input_data: Original AttackInputData
    test_fraction: Fraction of the dataset to include in the test split.
  Returns:
    AttackerData.
  """
  attack_input_train = _column_stack(attack_input_data.logits_train,
                                     attack_input_data.get_loss_train())
  attack_input_test = _column_stack(attack_input_data.logits_test,
                                    attack_input_data.get_loss_test())
  features_all = np.concatenate((attack_input_train, attack_input_test))
  labels_all = np.concatenate(((np.zeros(attack_input_data.get_train_size())),
                               (np.ones(attack_input_data.get_test_size()))))
  # Perform a train-test split
  features_train, features_test, \
  is_training_labels_train, is_training_labels_test = \
    model_selection.train_test_split(
        features_all, labels_all, test_size=test_fraction)
  return AttackerData(features_train, is_training_labels_train, features_test,
                      is_training_labels_test)
 def _column_stack(logits, loss):
  """Stacks logits and losses.
  In case that only one exists, returns that one.
  Args:
    logits: logits array
    loss: loss array
  Returns:
    stacked logits and losses (or only one if both do not exist).
  """
  if logits is None:
    return np.expand_dims(loss, axis=-1)
  if loss is None:
    return logits
  return np.column_stack((logits, loss))
 class TrainedAttacker:
  """Base class for training attack models."""
  model = None
  def train_model(self, input_features, is_training_labels):
    """Train an attacker model.
    This is trained on examples from train and test datasets.
    Args:
      input_features : array-like of shape (n_samples, n_features) Training
        vector, where n_samples is the number of samples and n_features is the
        number of features.
      is_training_labels : a vector of booleans of shape (n_samples, )
        representing whether the sample is in the training set or not.
    """
    raise NotImplementedError()
  def predict(self, input_features):
    """Predicts whether input_features belongs to train or test.
    Args:
      input_features : A vector of features with the same semantics as x_train
        passed to train_model.
    """
    raise NotImplementedError()
 class LogisticRegressionAttacker(TrainedAttacker):
  """Logistic regression attacker."""
  def train_model(self, input_features, is_training_labels):
    lr = linear_model.LogisticRegression(solver='lbfgs')
    param_grid = {
        'C': np.logspace(-4, 2, 10),
    }
    model = model_selection.GridSearchCV(
        lr, param_grid=param_grid, cv=3, n_jobs=1, verbose=0)
    model.fit(input_features, is_training_labels)
    self.model = model
  def predict(self, input_features):
    if self.model is None:
      raise AssertionError(
          'Model not trained yet. Please call train_model first.')
    return self.model.predict(input_features)
 class MultilayerPerceptronAttacker(TrainedAttacker):
  """Multilayer perceptron attacker."""
  def train_model(self, input_features, is_training_labels):
    mlp_model = neural_network.MLPClassifier()
    param_grid = {
        'hidden_layer_sizes': [(64,), (32, 32)],
        'solver': ['adam'],
        'alpha': [0.0001, 0.001, 0.01],
    }
    model = model_selection.GridSearchCV(
        mlp_model, param_grid=param_grid, cv=3, n_jobs=1, verbose=0)
    model.fit(input_features, is_training_labels)
    self.model = model
  def predict(self, input_features):
    if self.model is None:
      raise AssertionError(
          'Model not trained yet. Please call train_model first.')
    return self.model.predict(input_features)
 class RandomForestAttacker(TrainedAttacker):
  """Random forest attacker."""
  def train_model(self, input_features, is_training_labels):
    """Setup a random forest pipeline with cross-validation."""
    rf_model = ensemble.RandomForestClassifier()
    param_grid = {
        'n_estimators': [100],
        'max_features': ['auto', 'sqrt'],
        'max_depth': [5, 10, 20, None],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }
    model = model_selection.GridSearchCV(
        rf_model, param_grid=param_grid, cv=3, n_jobs=1, verbose=0)
    model.fit(input_features, is_training_labels)
    self.model = model
  def predict(self, input_features):
    if self.model is None:
      raise AssertionError(
          'Model not trained yet. Please call train_model first.')
    return self.model.predict(input_features)
 class KNearestNeighborsAttacker(TrainedAttacker):
  """K nearest neighbor attacker."""
  def train_model(self, input_features, is_training_labels):
    knn_model = neighbors.KNeighborsClassifier()
    param_grid = {
        'n_neighbors': [3, 5, 7],
    }
    model = model_selection.GridSearchCV(
        knn_model, param_grid=param_grid, cv=3, n_jobs=1, verbose=0)
    model.fit(input_features, is_training_labels)
    self.model = model
  def predict(self, input_features):
    if self.model is None:
      raise AssertionError(
          'Model not trained yet. Please call train_model first.')
    return self.model.predict(input_features)
--- a/tensorflow_privacy/privacy/membership_inference_attack/models_test.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/models_test.py
@ -0,0 +1,59 @@
 # Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Lint as: python3
 """Tests for tensorflow_privacy.privacy.membership_inference_attack.data_structures."""
 from absl.testing import absltest
 import numpy as np
 from tensorflow_privacy.privacy.membership_inference_attack import models
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
 class TrainedAttackerTest(absltest.TestCase):
  def test_base_attacker_train_and_predict(self):
    base_attacker = models.TrainedAttacker()
    self.assertRaises(NotImplementedError, base_attacker.train_model, [], [])
    self.assertRaises(NotImplementedError, base_attacker.predict, [])
  def test_predict_before_training(self):
    lr_attacker = models.LogisticRegressionAttacker()
    self.assertRaises(AssertionError, lr_attacker.predict, [])
  def test_create_attacker_data_loss_only(self):
    attack_input = AttackInputData(
        loss_train=np.array([1]), loss_test=np.array([2]))
    attacker_data = models.create_attacker_data(attack_input, 0.5)
    self.assertLen(attacker_data.features_test, 1)
    self.assertLen(attacker_data.features_train, 1)
  def test_create_attacker_data_loss_and_logits(self):
    attack_input = AttackInputData(
        logits_train=np.array([[1, 2], [5, 6]]),
        logits_test=np.array([[10, 11], [14, 15]]),
        loss_train=np.array([3, 7]),
        loss_test=np.array([12, 16]))
    attacker_data = models.create_attacker_data(attack_input, 0.25)
    self.assertLen(attacker_data.features_test, 1)
    self.assertLen(attacker_data.features_train, 3)
    for i, feature in enumerate(attacker_data.features_train):
      self.assertLen(feature, 3)  # each feature has two logits and one loss
      expected = feature[:2] not in attack_input.logits_train
      self.assertEqual(attacker_data.is_training_labels_train[i], expected)
 if __name__ == '__main__':
  absltest.main()