The new API for the membership inference attack.

1. Colab and Keras/TF estimator integration still use the old API and will be updated in the subsequent CLs. 2. After dropping the old API in membership_inference_attack.py, membership_inference_attack_new.py will be renamed in membership_inference_attack.py. PiperOrigin-RevId: 325823046
2020-08-10 09:43:58 -07:00 · 2020-08-10 09:43:58 -07:00 · 43a0e4be8a
commit 43a0e4be8a
parent 68651eeddc
9 changed files with 1279 additions and 1 deletions
--- a/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py
@ -0,0 +1,322 @@
+# Copyright 2020, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Data structures representing attack inputs, configuration, outputs."""
+import enum
+import pickle
+from typing import Any, Iterable, Union
+
+from dataclasses import dataclass
+import numpy as np
+from sklearn import metrics
+
+ENTIRE_DATASET_SLICE_STR = 'SingleSliceSpec(Entire dataset)'
+
+
+class SlicingFeature(enum.Enum):
+  """Enum with features by which slicing is available."""
+  CLASS = 'class'
+  PERCENTILE = 'percentile'
+  CORRECTLY_CLASSIFIED = 'correctly_classfied'
+
+
+@dataclass
+class SingleSliceSpec:
+  """Specifies a slice.
+
+  The slice is defined by values in one feature - it might be a single value
+  (eg. slice of examples of the specific classification class) or some set of
+  values (eg. range of percentiles of the attacked model loss).
+
+  When feature is None, it means that the slice is the entire dataset.
+  """
+  feature: SlicingFeature = None
+  value: Any = None
+
+  @property
+  def entire_dataset(self):
+    return self.feature is None
+
+  def __str__(self):
+    if self.entire_dataset:
+      return 'Entire dataset'
+
+    if self.feature == SlicingFeature.PERCENTILE:
+      return 'Loss percentiles: %d-%d' % self.value
+
+    return f'{self.feature.name}={self.value}'
+
+
+@dataclass
+class SlicingSpec:
+  """Specification of a slicing procedure.
+
+  Each variable which is set specifies a slicing by different dimension.
+  """
+
+  # When is set to true, one of the slices is the whole dataset.
+  entire_dataset: bool = True
+
+  # Used in classification tasks for slicing by classes. It is assumed that
+  # classes are integers 0, 1, ... number of classes. When true one slice per
+  # each class is generated.
+  by_class: Union[bool, Iterable[int], int] = False
+
+  # if true, it generates 10 slices for percentiles of the loss - 0-10%, 10-20%,
+  # ... 90-100%.
+  by_percentiles: bool = False
+
+  # When true, a slice for correctly classifed and a slice for misclassifed
+  # examples will be generated.
+  by_classification_correctness: bool = False
+
+
+class AttackType(enum.Enum):
+  """An enum define attack types."""
+  LOGISTIC_REGRESSION = 'lr'
+  MULTI_LAYERED_PERCEPTRON = 'mlp'
+  RANDOM_FOREST = 'rf'
+  K_NEAREST_NEIGHBORS = 'knn'
+  THRESHOLD_ATTACK = 'threshold'
+
+  @property
+  def is_trained_attack(self):
+    """Returns whether this type of attack requires training a model."""
+    return self != AttackType.THRESHOLD_ATTACK
+
+  # Return LOGISTIC_REGRESSION instead of AttackType.LOGISTIC_REGRESSION
+  def __str__(self):
+    return f'{self.name}'
+
+
+@dataclass
+class AttackInputData:
+  """Input data for running an attack.
+
+  This includes only the data, and not configuration.
+  """
+
+  logits_train: np.ndarray = None
+  logits_test: np.ndarray = None
+
+  # Contains ground-truth classes. Classes are assumed to be integers starting
+  # from 0.
+  labels_train: np.ndarray = None
+  labels_test: np.ndarray = None
+
+  # Explicitly specified loss. If provided, this is used instead of deriving
+  # loss from logits and labels
+  loss_train: np.ndarray = None
+  loss_test: np.ndarray = None
+
+  @property
+  def num_classes(self):
+    if self.labels_train is None or self.labels_test is None:
+      raise ValueError(
+          "Can't identify the number of classes as no labels were provided. "
+          'Please set labels_train and labels_test')
+    return int(max(np.max(self.labels_train), np.max(self.labels_test))) + 1
+
+  @staticmethod
+  def _get_loss(logits: np.ndarray, true_labels: np.ndarray):
+    return logits[range(logits.shape[0]), true_labels]
+
+  def get_loss_train(self):
+    """Calculates cross-entropy losses for the training set."""
+    if self.loss_train is not None:
+      return self.loss_train
+    return self._get_loss(self.logits_train, self.labels_train)
+
+  def get_loss_test(self):
+    """Calculates cross-entropy losses for the test set."""
+    if self.loss_test is not None:
+      return self.loss_test
+    return self._get_loss(self.logits_test, self.labels_test)
+
+  def get_train_size(self):
+    """Returns size of the training set."""
+    if self.loss_train is not None:
+      return self.loss_train.size
+    return self.logits_train.shape[0]
+
+  def get_test_size(self):
+    """Returns size of the test set."""
+    if self.loss_test is not None:
+      return self.loss_test.size
+    return self.logits_test.shape[0]
+
+  def validate(self):
+    """Validates the inputs."""
+    if (self.loss_train is None) != (self.loss_test is None):
+      raise ValueError(
+          'loss_test and loss_train should both be either set or unset')
+
+    if (self.logits_train is None) != (self.logits_test is None):
+      raise ValueError(
+          'logits_train and logits_test should both be either set or unset')
+
+    if (self.labels_train is None) != (self.labels_test is None):
+      raise ValueError(
+          'labels_train and labels_test should both be either set or unset')
+
+    if (self.labels_train is None and self.loss_train is None and
+        self.logits_train is None):
+      raise ValueError('At least one of labels, logits or losses should be set')
+
+    # TODO(b/161366709): Add checks for equal sizes
+
+
+@dataclass
+class RocCurve:
+  """Represents ROC curve of a membership inference classifier."""
+  # Thresholds used to define points on ROC curve.
+  # Thresholds are not explicitly part of the curve, and are stored for
+  # debugging purposes.
+  thresholds: np.ndarray
+
+  # True positive rates based on thresholds
+  tpr: np.ndarray
+
+  # False positive rates based on thresholds
+  fpr: np.ndarray
+
+  def get_auc(self):
+    """Calculates area under curve (aka AUC)."""
+    return metrics.auc(self.fpr, self.tpr)
+
+  def get_attacker_advantage(self):
+    """Calculates membership attacker's (or adversary's) advantage.
+
+    This metric is inspired by https://arxiv.org/abs/1709.01604, specifically
+    by Definition 4. The difference here is that we calculate maximum advantage
+    over all available classifier thresholds.
+
+    Returns:
+      a single float number with membership attaker's advantage.
+    """
+    return max(np.abs(self.tpr - self.fpr))
+
+
+@dataclass
+class SingleAttackResult:
+  """Results from running a single attack."""
+
+  # Data slice this result was calculated for.
+  slice_spec: SingleSliceSpec
+
+  attack_type: AttackType
+  roc_curve: RocCurve  # for drawing and metrics calculation
+
+  # TODO(b/162693190): Add more metrics. Think which info we should store
+  #  to derive metrics like f1_score or accuracy. Should we store labels and
+  #  predictions, or rather some aggregate data?
+
+  def get_attacker_advantage(self):
+    return self.roc_curve.get_attacker_advantage()
+
+  def get_auc(self):
+    return self.roc_curve.get_auc()
+
+
+@dataclass
+class AttackResults:
+  """Results from running multiple attacks."""
+  # add metadata, such as parameters of attack evaluation, input data etc
+  single_attack_results: Iterable[SingleAttackResult]
+
+  def calculate_pd_dataframe(self):
+    # returns all metrics as a Pandas DataFrame
+    return
+
+  def summary(self, by_slices=False) -> str:
+    """Provides a summary of the metrics.
+
+    The summary provides the best-performing attacks for each requested data
+    slice.
+    Args:
+      by_slices : whether to prepare a per-slice summary.
+
+    Returns:
+      A string with a summary of all the metrics.
+    """
+    summary = []
+
+    # Summary over all slices
+    max_auc_result_all = self.get_result_with_max_attacker_advantage()
+    summary.append('Best-performing attacks over all slices')
+    summary.append(
+        '  %s achieved an AUC of %.2f on slice %s' %
+        (max_auc_result_all.attack_type, max_auc_result_all.get_auc(),
+         max_auc_result_all.slice_spec))
+
+    max_advantage_result_all = self.get_result_with_max_attacker_advantage()
+    summary.append('  %s achieved an advantage of %.2f on slice %s' %
+                   (max_advantage_result_all.attack_type,
+                    max_advantage_result_all.get_attacker_advantage(),
+                    max_advantage_result_all.slice_spec))
+
+    slice_dict = self._group_results_by_slice()
+
+    if len(slice_dict.keys()) > 1 and by_slices:
+      for slice_str in slice_dict:
+        results = slice_dict[slice_str]
+        summary.append('\nBest-performing attacks over slice: \"%s\"' %
+                       slice_str)
+        max_auc_result = results.get_result_with_max_auc()
+        summary.append('  %s achieved an AUC of %.2f' %
+                       (max_auc_result.attack_type, max_auc_result.get_auc()))
+        max_advantage_result = results.get_result_with_max_attacker_advantage()
+        summary.append('  %s achieved an advantage of %.2f' %
+                       (max_advantage_result.attack_type,
+                        max_advantage_result.get_attacker_advantage()))
+
+    return '\n'.join(summary)
+
+  def _group_results_by_slice(self):
+    """Groups AttackResults into a dictionary keyed by the slice."""
+    slice_dict = {}
+    for attack_result in self.single_attack_results:
+      slice_str = str(attack_result.slice_spec)
+      if slice_str not in slice_dict:
+        slice_dict[slice_str] = AttackResults([])
+      slice_dict[slice_str].single_attack_results.append(attack_result)
+    return slice_dict
+
+  def get_result_with_max_auc(self) -> SingleAttackResult:
+    """Get the result with maximum AUC for all attacks and slices."""
+    aucs = [result.get_auc() for result in self.single_attack_results]
+
+    if min(aucs) < 0.4:
+      print('Suspiciously low AUC detected: %.2f. ' +
+            'There might be a bug in the classifier' % min(aucs))
+
+    return self.single_attack_results[np.argmax(aucs)]
+
+  def get_result_with_max_attacker_advantage(self) -> SingleAttackResult:
+    """Get the result with maximum advantage for all attacks and slices."""
+    return self.single_attack_results[np.argmax([
+        result.get_attacker_advantage() for result in self.single_attack_results
+    ])]
+
+  def save(self, filepath):
+    """Saves self to a pickle file."""
+    with open(filepath, 'wb') as out:
+      pickle.dump(self, out)
+
+  @classmethod
+  def load(cls, filepath):
+    """Loads AttackResults from a pickle file."""
+    with open(filepath, 'rb') as inp:
+      return pickle.load(inp)
--- a/tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing.py
@ -0,0 +1,143 @@
+# Copyright 2020, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Specifying and creating AttackInputData slices."""
+
+import collections
+import copy
+from typing import List
+
+import numpy as np
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
+
+
+def _slice_if_not_none(a, idx):
+  return None if a is None else a[idx]
+
+
+def _slice_data_by_indices(data: AttackInputData, idx_train,
+                           idx_test) -> AttackInputData:
+  """Slices train fields with with idx_train and test fields with and idx_test."""
+
+  result = AttackInputData()
+
+  # Slice train data.
+  result.logits_train = _slice_if_not_none(data.logits_train, idx_train)
+  result.labels_train = _slice_if_not_none(data.labels_train, idx_train)
+  result.loss_train = _slice_if_not_none(data.loss_train, idx_train)
+
+  # Slice test data.
+  result.logits_test = _slice_if_not_none(data.logits_test, idx_test)
+  result.labels_test = _slice_if_not_none(data.labels_test, idx_test)
+  result.loss_test = _slice_if_not_none(data.loss_test, idx_test)
+
+  return result
+
+
+def _slice_by_class(data: AttackInputData, class_value: int) -> AttackInputData:
+  idx_train = data.labels_train == class_value
+  idx_test = data.labels_test == class_value
+  return _slice_data_by_indices(data, idx_train, idx_test)
+
+
+def _slice_by_percentiles(data: AttackInputData, from_percentile: float,
+                          to_percentile: float):
+  """Slices samples by loss percentiles."""
+
+  # Find from_percentile and to_percentile percentiles in losses.
+  loss_train = data.get_loss_train()
+  loss_test = data.get_loss_test()
+  losses = np.concatenate((loss_train, loss_test))
+  from_loss = np.percentile(losses, from_percentile)
+  to_loss = np.percentile(losses, to_percentile)
+
+  idx_train = (from_loss <= loss_train) & (loss_train <= to_loss)
+  idx_test = (from_loss <= loss_test) & (loss_test <= to_loss)
+
+  return _slice_data_by_indices(data, idx_train, idx_test)
+
+
+def _indices_by_classification(logits, labels, correctly_classified):
+  idx_correct = labels == np.argmax(logits, axis=1)
+  return idx_correct if correctly_classified else np.invert(idx_correct)
+
+
+def _slice_by_classification_correctness(data: AttackInputData,
+                                         correctly_classified: bool):
+  idx_train = _indices_by_classification(data.logits_train, data.labels_train,
+                                         correctly_classified)
+  idx_test = _indices_by_classification(data.logits_test, data.labels_test,
+                                        correctly_classified)
+  return _slice_data_by_indices(data, idx_train, idx_test)
+
+
+def get_single_slice_specs(slicing_spec: SlicingSpec,
+                           num_classes: int = None) -> List[SingleSliceSpec]:
+  """Returns slices of data according to slicing_spec."""
+  result = []
+
+  if slicing_spec.entire_dataset:
+    result.append(SingleSliceSpec())
+
+  # Create slices by class.
+  by_class = slicing_spec.by_class
+  if isinstance(by_class, bool):
+    if by_class:
+      assert num_classes, "When by_class == True, num_classes should be given."
+      assert 0 <= num_classes <= 1000, (
+          f"Too much classes for slicing by classes. "
+          f"Found {num_classes}.")
+      for c in range(num_classes):
+        result.append(SingleSliceSpec(SlicingFeature.CLASS, c))
+  elif isinstance(by_class, int):
+    result.append(SingleSliceSpec(SlicingFeature.CLASS, by_class))
+  elif isinstance(by_class, collections.Iterable):
+    for c in by_class:
+      result.append(SingleSliceSpec(SlicingFeature.CLASS, c))
+
+  # Create slices by percentiles
+  if slicing_spec.by_percentiles:
+    for percent in range(0, 100, 10):
+      result.append(
+          SingleSliceSpec(SlicingFeature.PERCENTILE, (percent, percent + 10)))
+
+  # Create slices by correctness of the classifications.
+  if slicing_spec.by_classification_correctness:
+    result.append(SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED, True))
+    result.append(SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED, False))
+
+  return result
+
+
+def get_slice(data: AttackInputData,
+              slice_spec: SingleSliceSpec) -> AttackInputData:
+  """Returns a single slice of data according to slice_spec."""
+  if slice_spec.entire_dataset:
+    data_slice = copy.copy(data)
+  elif slice_spec.feature == SlicingFeature.CLASS:
+    data_slice = _slice_by_class(data, slice_spec.value)
+  elif slice_spec.feature == SlicingFeature.PERCENTILE:
+    from_percentile, to_percentile = slice_spec.value
+    data_slice = _slice_by_percentiles(data, from_percentile, to_percentile)
+  elif slice_spec.feature == SlicingFeature.CORRECTLY_CLASSIFIED:
+    data_slice = _slice_by_classification_correctness(data, slice_spec.value)
+  else:
+    raise ValueError(f'Unknown slice spec feature "{slice_spec.feature}"')
+
+  data_slice.slice_spec = slice_spec
+  return data_slice
--- a/tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing_test.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/dataset_slicing_test.py
@ -0,0 +1,180 @@
+# Copyright 2020, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing."""
+
+from absl.testing import absltest
+import numpy as np
+
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
+from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_single_slice_specs
+from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_slice
+
+
+def _are_all_fields_equal(lhs, rhs) -> bool:
+  return vars(lhs) == vars(rhs)
+
+
+def _are_lists_equal(lhs, rhs) -> bool:
+  if len(lhs) != len(rhs):
+    return False
+  for l, r in zip(lhs, rhs):
+    if not _are_all_fields_equal(l, r):
+      return False
+  return True
+
+
+class SingleSliceSpecsTest(absltest.TestCase):
+  """Tests for get_single_slice_specs."""
+
+  ENTIRE_DATASET_SLICE = SingleSliceSpec()
+
+  def test_no_slices(self):
+    input_data = SlicingSpec(entire_dataset=False)
+    expected = []
+    output = get_single_slice_specs(input_data)
+    self.assertTrue(_are_lists_equal(output, expected))
+
+  def test_entire_dataset(self):
+    input_data = SlicingSpec()
+    expected = [self.ENTIRE_DATASET_SLICE]
+    output = get_single_slice_specs(input_data)
+    self.assertTrue(_are_lists_equal(output, expected))
+
+  def test_slice_by_classes(self):
+    input_data = SlicingSpec(by_class=True)
+    n_classes = 5
+    expected = [self.ENTIRE_DATASET_SLICE] + [
+        SingleSliceSpec(SlicingFeature.CLASS, c) for c in range(n_classes)
+    ]
+    output = get_single_slice_specs(input_data, n_classes)
+    self.assertTrue(_are_lists_equal(output, expected))
+
+  def test_slice_by_percentiles(self):
+    input_data = SlicingSpec(entire_dataset=False, by_percentiles=True)
+    expected0 = SingleSliceSpec(SlicingFeature.PERCENTILE, (0, 10))
+    expected5 = SingleSliceSpec(SlicingFeature.PERCENTILE, (50, 60))
+    output = get_single_slice_specs(input_data)
+    self.assertLen(output, 10)
+    self.assertTrue(_are_all_fields_equal(output[0], expected0))
+    self.assertTrue(_are_all_fields_equal(output[5], expected5))
+
+  def test_slice_by_correcness(self):
+    input_data = SlicingSpec(
+        entire_dataset=False, by_classification_correctness=True)
+    expected = SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED, True)
+    output = get_single_slice_specs(input_data)
+    self.assertLen(output, 2)
+    self.assertTrue(_are_all_fields_equal(output[0], expected))
+
+  def test_slicing_by_multiple_features(self):
+    input_data = SlicingSpec(
+        entire_dataset=True,
+        by_class=True,
+        by_percentiles=True,
+        by_classification_correctness=True)
+    n_classes = 10
+    expected_slices = n_classes
+    expected_slices += 1  # entire dataset slice
+    expected_slices += 10  # percentiles slices
+    expected_slices += 2  # correcness classification slices
+    output = get_single_slice_specs(input_data, n_classes)
+    self.assertLen(output, expected_slices)
+
+
+class GetSliceTest(absltest.TestCase):
+
+  def __init__(self, methodname):
+    """Initialize the test class."""
+    super().__init__(methodname)
+
+    # Create test data for 3 class classification task.
+    logits_train = np.array([[0, 1, 0], [2, 0, 3], [4, 5, 0], [6, 7, 0]])
+    logits_test = np.array([[10, 0, 11], [12, 13, 0], [14, 15, 0], [0, 16, 17]])
+    labels_train = np.array([1, 0, 1, 2])
+    labels_test = np.array([1, 2, 0, 2])
+    loss_train = np.array([2, 0.25, 4, 3])
+    loss_test = np.array([0.5, 3.5, 7, 4.5])
+
+    self.input_data = AttackInputData(logits_train, logits_test, labels_train,
+                                      labels_test, loss_train, loss_test)
+
+  def test_slice_entire_dataset(self):
+    entire_dataset_slice = SingleSliceSpec()
+    output = get_slice(self.input_data, entire_dataset_slice)
+    expected = self.input_data
+    expected.slice_spec = entire_dataset_slice
+    self.assertTrue(_are_all_fields_equal(output, self.input_data))
+
+  def test_slice_by_class(self):
+    class_index = 1
+    class_slice = SingleSliceSpec(SlicingFeature.CLASS, class_index)
+    output = get_slice(self.input_data, class_slice)
+
+    # Check logits.
+    self.assertLen(output.logits_train, 2)
+    self.assertLen(output.logits_test, 1)
+    self.assertTrue((output.logits_train[1] == [4, 5, 0]).all())
+
+    # Check labels.
+    self.assertLen(output.labels_train, 2)
+    self.assertLen(output.labels_test, 1)
+    self.assertTrue((output.labels_train == class_index).all())
+    self.assertTrue((output.labels_test == class_index).all())
+
+    # Check losses
+    self.assertLen(output.loss_train, 2)
+    self.assertLen(output.loss_test, 1)
+    self.assertTrue((output.loss_train == [2, 4]).all())
+    self.assertTrue((output.loss_test == [0.5]).all())
+
+  def test_slice_by_percentile(self):
+    percentile_slice = SingleSliceSpec(SlicingFeature.PERCENTILE, (0, 50))
+    output = get_slice(self.input_data, percentile_slice)
+
+    # Check logits.
+    self.assertLen(output.logits_train, 3)
+    self.assertLen(output.logits_test, 1)
+    self.assertTrue((output.logits_test[0] == [10, 0, 11]).all())
+
+    # Check labels.
+    self.assertLen(output.labels_train, 3)
+    self.assertLen(output.labels_test, 1)
+    self.assertTrue((output.labels_train == [1, 0, 2]).all())
+    self.assertTrue((output.labels_test == [1]).all())
+
+  def test_slice_by_correctness(self):
+    percentile_slice = SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED,
+                                       False)
+    output = get_slice(self.input_data, percentile_slice)
+
+    # Check logits.
+    self.assertLen(output.logits_train, 2)
+    self.assertLen(output.logits_test, 3)
+    self.assertTrue((output.logits_train[1] == [6, 7, 0]).all())
+    self.assertTrue((output.logits_test[1] == [12, 13, 0]).all())
+
+    # Check labels.
+    self.assertLen(output.labels_train, 2)
+    self.assertLen(output.labels_test, 3)
+    self.assertTrue((output.labels_train == [0, 2]).all())
+    self.assertTrue((output.labels_test == [1, 2, 0]).all())
+
+
+if __name__ == '__main__':
+  absltest.main()
--- a/tensorflow_privacy/privacy/membership_inference_attack/example.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/example.py
@ -0,0 +1,149 @@
+# Copyright 2020, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""An example for the membership inference attacks.
+
+This is using a toy model based on classifying four spacial clusters of data.
+"""
+import os
+import tempfile
+import numpy as np
+from tensorflow import keras
+from tensorflow.keras import layers
+from tensorflow.keras.utils import to_categorical
+from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack_new as mia
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
+
+
+def generate_random_cluster(center, scale, num_points):
+  return np.random.normal(size=(num_points, len(center))) * scale + center
+
+
+def generate_features_and_labels(samples_per_cluster=250, scale=0.1):
+  """Generates noised 3D clusters."""
+  cluster_centers = [[0, 0, 0], [2, 0, 0], [0, 2, 0], [0, 0, 2]]
+
+  features = np.concatenate((
+      generate_random_cluster(
+          center=cluster_centers[0],
+          scale=scale,
+          num_points=samples_per_cluster),
+      generate_random_cluster(
+          center=cluster_centers[1],
+          scale=scale,
+          num_points=samples_per_cluster),
+      generate_random_cluster(
+          center=cluster_centers[2],
+          scale=scale,
+          num_points=samples_per_cluster),
+      generate_random_cluster(
+          center=cluster_centers[3],
+          scale=scale,
+          num_points=samples_per_cluster),
+  ))
+
+  # Cluster labels: 0, 1, 2 and 3
+  labels = np.concatenate((
+      np.zeros(samples_per_cluster),
+      np.ones(samples_per_cluster),
+      np.ones(samples_per_cluster) * 2,
+      np.ones(samples_per_cluster) * 3,
+  ))
+
+  return (features, labels)
+
+
+# Hint: Play with "noise_scale" for different levels of overlap between
+# the generated clusters. More noise makes the classification harder.
+noise_scale = 2
+training_features, training_labels = generate_features_and_labels(
+    samples_per_cluster=250, scale=noise_scale)
+test_features, test_labels = generate_features_and_labels(
+    samples_per_cluster=250, scale=noise_scale)
+
+num_clusters = int(round(np.max(training_labels))) + 1
+
+# Hint: play with the number of layers to achieve different level of
+# over-fitting and observe its effects on membership inference performance.
+model = keras.models.Sequential([
+    layers.Dense(300, activation="relu"),
+    layers.Dense(300, activation="relu"),
+    layers.Dense(300, activation="relu"),
+    layers.Dense(num_clusters, activation="relu"),
+    layers.Softmax()
+])
+model.compile(
+    optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
+model.fit(
+    training_features,
+    to_categorical(training_labels, num_clusters),
+    validation_data=(test_features, to_categorical(test_labels, num_clusters)),
+    batch_size=64,
+    epochs=10,
+    shuffle=True)
+
+training_pred = model.predict(training_features)
+test_pred = model.predict(test_features)
+
+
+def crossentropy(true_labels, predictions):
+  return keras.backend.eval(
+      keras.losses.binary_crossentropy(
+          keras.backend.variable(to_categorical(true_labels, num_clusters)),
+          keras.backend.variable(predictions)))
+
+
+attack_results = mia.run_attacks(
+    AttackInputData(
+        labels_train=training_labels,
+        labels_test=test_labels,
+        loss_train=crossentropy(training_labels, training_pred),
+        loss_test=crossentropy(test_labels, test_pred)),
+    SlicingSpec(entire_dataset=True, by_class=True),
+    attack_types=(AttackType.THRESHOLD_ATTACK, AttackType.LOGISTIC_REGRESSION))
+
+# Example of saving the results to the file and loading them back.
+with tempfile.TemporaryDirectory() as tmpdirname:
+  filepath = os.path.join(tmpdirname, "results.pickle")
+  attack_results.save(filepath)
+  loaded_results = AttackResults.load(filepath)
+
+# Print attack metrics
+for attack_result in attack_results.single_attack_results:
+  print("Slice: %s" % attack_result.slice_spec)
+  print("Attack type: %s" % attack_result.attack_type)
+  print("AUC: %.2f" % attack_result.roc_curve.get_auc())
+
+  print("Attacker advantage: %.2f\n" %
+        attack_result.roc_curve.get_attacker_advantage())
+
+max_auc_attacker = attack_results.get_result_with_max_attacker_advantage()
+print("Attack type with max AUC: %s, AUC of %.2f" %
+      (max_auc_attacker.attack_type, max_auc_attacker.roc_curve.get_auc()))
+
+max_advantage_attacker = attack_results.get_result_with_max_attacker_advantage()
+print("Attack type with max advantage: %s, Attacker advantage of %.2f" %
+      (max_advantage_attacker.attack_type,
+       max_advantage_attacker.roc_curve.get_attacker_advantage()))
+
+# Print summary
+print("Summary without slices: \n")
+print(attack_results.summary(by_slices=False))
+
+print("Summary by slices: \n")
+print(attack_results.summary(by_slices=True))
--- a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py
@ -13,7 +13,12 @@
 # limitations under the License.

 # Lint as: python3
-"""Code that runs membership inference attacks based on the model outputs."""
+"""Code that runs membership inference attacks based on the model outputs.
+
+Warning: This file belongs to the old API for membership inference attacks. This
+file will be removed soon. membership_inference_attack_new.py contains the new
+API.
+"""

 import collections
 import io
@ -354,6 +359,11 @@ def run_attack(loss_train: np.ndarray = None,
    results: Dictionary with the chosen vulnerability metric(s) for all ran
      attacks.
  """
+  print(
+      'Deprecation warning: function run_attack is '
+      'deprecated and will be removed soon. '
+      'Please use membership_inference_attack_new.run_attacks'
+  )
  attacks = []
  features = {}
  # ---------- Check available data ----------
@ -529,6 +539,11 @@ def run_all_attacks(loss_train: np.ndarray = None,
  Returns:
    result: dictionary with all attack results
  """
+  print(
+      'Deprecation warning: function run_all_attacks is '
+      'deprecated and will be removed soon. '
+      'Please use membership_inference_attack_new.run_attacks'
+  )
  metrics = ['auc', 'advantage']

  # Entire data
@ -631,6 +646,11 @@ def run_all_attacks_and_create_summary(
    result: a dictionary with all the distilled attack information summarized
      in the summarystring
  """
+  print(
+      'Deprecation warning: function run_all_attacks_and_create_summary is '
+      'deprecated and will be removed soon. '
+      'Please use membership_inference_attack_new.run_attacks'
+  )
  summary = []
  metrics = ['auc', 'advantage']
  attack_classifiers = ['lr', 'knn']
--- a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_new.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_new.py
@ -0,0 +1,121 @@
+# Copyright 2020, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Code that runs membership inference attacks based on the model outputs.
+
+This file belongs to the new API for membership inference attacks. This file
+will be renamed to membership_inference_attack.py after the old API is removed.
+"""
+
+from typing import Iterable
+import numpy as np
+from sklearn import metrics
+
+from tensorflow_privacy.privacy.membership_inference_attack import models
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RocCurve
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
+from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_single_slice_specs
+from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_slice
+
+
+def _get_slice_spec(data: AttackInputData) -> SingleSliceSpec:
+  if hasattr(data, 'slice_spec'):
+    return data.slice_spec
+  return SingleSliceSpec()
+
+
+def run_trained_attack(attack_input: AttackInputData, attack_type: AttackType):
+  """Classification attack done by ML models."""
+  attacker = None
+
+  if attack_type == AttackType.LOGISTIC_REGRESSION:
+    attacker = models.LogisticRegressionAttacker()
+  elif attack_type == AttackType.MULTI_LAYERED_PERCEPTRON:
+    attacker = models.MultilayerPerceptronAttacker()
+  elif attack_type == AttackType.RANDOM_FOREST:
+    attacker = models.RandomForestAttacker()
+  elif attack_type == AttackType.K_NEAREST_NEIGHBORS:
+    attacker = models.KNearestNeighborsAttacker()
+  else:
+    raise NotImplementedError(
+        'Attack type {} not implemented yet.'.format(attack_type))
+
+  prepared_attacker_data = models.create_attacker_data(attack_input)
+
+  attacker.train_model(prepared_attacker_data.features_train,
+                       prepared_attacker_data.is_training_labels_train)
+
+  # Run the attacker on (permuted) test examples.
+  predictions_test = attacker.predict(prepared_attacker_data.features_test)
+
+  # Generate ROC curves with predictions.
+  fpr, tpr, thresholds = metrics.roc_curve(
+      prepared_attacker_data.is_training_labels_test, predictions_test)
+
+  roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)
+
+  return SingleAttackResult(
+      slice_spec=_get_slice_spec(attack_input),
+      attack_type=attack_type,
+      roc_curve=roc_curve)
+
+
+def run_threshold_attack(attack_input: AttackInputData):
+  fpr, tpr, thresholds = metrics.roc_curve(
+      np.concatenate((np.zeros(attack_input.get_train_size()),
+                      np.ones(attack_input.get_test_size()))),
+      np.concatenate(
+          (attack_input.get_loss_train(), attack_input.get_loss_test())))
+
+  roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)
+
+  return SingleAttackResult(
+      slice_spec=_get_slice_spec(attack_input),
+      attack_type=AttackType.THRESHOLD_ATTACK,
+      roc_curve=roc_curve)
+
+
+def run_attack(attack_input: AttackInputData, attack_type: AttackType):
+  attack_input.validate()
+  if attack_type.is_trained_attack:
+    return run_trained_attack(attack_input, attack_type)
+
+  return run_threshold_attack(attack_input)
+
+
+def run_attacks(
+    attack_input: AttackInputData,
+    slicing_spec: SlicingSpec = None,
+    attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK,)
+) -> AttackResults:
+  """Run all attacks."""
+  attack_input.validate()
+  attack_results = []
+
+  if slicing_spec is None:
+    slicing_spec = SlicingSpec(entire_dataset=True)
+  input_slice_specs = get_single_slice_specs(slicing_spec,
+                                             attack_input.num_classes)
+  for single_slice_spec in input_slice_specs:
+    attack_input_slice = get_slice(attack_input, single_slice_spec)
+    for attack_type in attack_types:
+      attack_results.append(run_attack(attack_input_slice, attack_type))
+
+  return AttackResults(single_attack_results=attack_results)
--- a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_new_test.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_new_test.py
@ -0,0 +1,77 @@
+# Copyright 2020, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for tensorflow_privacy.privacy.membership_inference_attack.utils."""
+from absl.testing import absltest
+import numpy as np
+from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack_new as mia
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
+
+
+def get_test_input(n_train, n_test):
+  """Get example inputs for attacks."""
+  rng = np.random.RandomState(4)
+  return AttackInputData(
+      rng.randn(n_train, 5) + 0.2,
+      rng.randn(n_test, 5) + 0.2, np.array([i % 5 for i in range(n_train)]),
+      np.array([i % 5 for i in range(n_test)]))
+
+
+class RunAttacksTest(absltest.TestCase):
+
+  def test_run_attacks_size(self):
+    result = mia.run_attacks(
+        get_test_input(100, 100), SlicingSpec(),
+        (AttackType.THRESHOLD_ATTACK, AttackType.LOGISTIC_REGRESSION))
+
+    self.assertLen(result.single_attack_results, 2)
+
+  def test_run_attack_trained_sets_attack_type(self):
+    result = mia.run_attack(
+        get_test_input(100, 100), AttackType.LOGISTIC_REGRESSION)
+
+    self.assertEqual(result.attack_type, AttackType.LOGISTIC_REGRESSION)
+
+  def test_run_attack_threshold_sets_attack_type(self):
+    result = mia.run_attack(
+        get_test_input(100, 100), AttackType.THRESHOLD_ATTACK)
+
+    self.assertEqual(result.attack_type, AttackType.THRESHOLD_ATTACK)
+
+  def test_run_attack_threshold_calculates_correct_auc(self):
+    result = mia.run_attack(
+        AttackInputData(
+            loss_train=np.array([0.1, 0.2, 1.3, 0.4, 0.5, 0.6]),
+            loss_test=np.array([1.1, 1.2, 1.3, 0.4, 1.5, 1.6])),
+        AttackType.THRESHOLD_ATTACK)
+
+    np.testing.assert_almost_equal(result.roc_curve.get_auc(), 0.83, decimal=2)
+
+  def test_run_attack_by_slice(self):
+    result = mia.run_attacks(
+        get_test_input(100, 100), SlicingSpec(by_class=True),
+        (AttackType.THRESHOLD_ATTACK,))
+
+    self.assertLen(result.single_attack_results, 6)
+    expected_slice = SingleSliceSpec(SlicingFeature.CLASS, 2)
+    self.assertEqual(result.single_attack_results[3].slice_spec, expected_slice)
+
+
+if __name__ == '__main__':
+  absltest.main()
--- a/tensorflow_privacy/privacy/membership_inference_attack/models.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/models.py
@ -0,0 +1,207 @@
+# Copyright 2020, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Trained models for membership inference attacks."""
+
+from dataclasses import dataclass
+import numpy as np
+from sklearn import ensemble
+from sklearn import linear_model
+from sklearn import model_selection
+from sklearn import neighbors
+from sklearn import neural_network
+
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
+
+
+@dataclass
+class AttackerData:
+  """Input data for an ML classifier attack.
+
+  This includes only the data, and not configuration.
+  """
+
+  features_train: np.ndarray = None
+  # element-wise boolean array denoting if the example was part of training.
+  is_training_labels_train: np.ndarray = None
+
+  features_test: np.ndarray = None
+  # element-wise boolean array denoting if the example was part of training.
+  is_training_labels_test: np.ndarray = None
+
+
+def create_attacker_data(attack_input_data: AttackInputData,
+                         test_fraction: float = 0.25) -> AttackerData:
+  """Prepare AttackInputData to train ML attackers.
+
+  Combines logits and losses and performs a random train-test split.
+
+  Args:
+    attack_input_data: Original AttackInputData
+    test_fraction: Fraction of the dataset to include in the test split.
+
+  Returns:
+    AttackerData.
+  """
+  attack_input_train = _column_stack(attack_input_data.logits_train,
+                                     attack_input_data.get_loss_train())
+  attack_input_test = _column_stack(attack_input_data.logits_test,
+                                    attack_input_data.get_loss_test())
+
+  features_all = np.concatenate((attack_input_train, attack_input_test))
+
+  labels_all = np.concatenate(((np.zeros(attack_input_data.get_train_size())),
+                               (np.ones(attack_input_data.get_test_size()))))
+
+  # Perform a train-test split
+  features_train, features_test, \
+  is_training_labels_train, is_training_labels_test = \
+    model_selection.train_test_split(
+        features_all, labels_all, test_size=test_fraction)
+  return AttackerData(features_train, is_training_labels_train, features_test,
+                      is_training_labels_test)
+
+
+def _column_stack(logits, loss):
+  """Stacks logits and losses.
+
+  In case that only one exists, returns that one.
+  Args:
+    logits: logits array
+    loss: loss array
+
+  Returns:
+    stacked logits and losses (or only one if both do not exist).
+  """
+  if logits is None:
+    return np.expand_dims(loss, axis=-1)
+  if loss is None:
+    return logits
+  return np.column_stack((logits, loss))
+
+
+class TrainedAttacker:
+  """Base class for training attack models."""
+  model = None
+
+  def train_model(self, input_features, is_training_labels):
+    """Train an attacker model.
+
+    This is trained on examples from train and test datasets.
+    Args:
+      input_features : array-like of shape (n_samples, n_features) Training
+        vector, where n_samples is the number of samples and n_features is the
+        number of features.
+      is_training_labels : a vector of booleans of shape (n_samples, )
+        representing whether the sample is in the training set or not.
+    """
+    raise NotImplementedError()
+
+  def predict(self, input_features):
+    """Predicts whether input_features belongs to train or test.
+
+    Args:
+      input_features : A vector of features with the same semantics as x_train
+        passed to train_model.
+    """
+    raise NotImplementedError()
+
+
+class LogisticRegressionAttacker(TrainedAttacker):
+  """Logistic regression attacker."""
+
+  def train_model(self, input_features, is_training_labels):
+    lr = linear_model.LogisticRegression(solver='lbfgs')
+    param_grid = {
+        'C': np.logspace(-4, 2, 10),
+    }
+    model = model_selection.GridSearchCV(
+        lr, param_grid=param_grid, cv=3, n_jobs=1, verbose=0)
+    model.fit(input_features, is_training_labels)
+    self.model = model
+
+  def predict(self, input_features):
+    if self.model is None:
+      raise AssertionError(
+          'Model not trained yet. Please call train_model first.')
+    return self.model.predict(input_features)
+
+
+class MultilayerPerceptronAttacker(TrainedAttacker):
+  """Multilayer perceptron attacker."""
+
+  def train_model(self, input_features, is_training_labels):
+    mlp_model = neural_network.MLPClassifier()
+    param_grid = {
+        'hidden_layer_sizes': [(64,), (32, 32)],
+        'solver': ['adam'],
+        'alpha': [0.0001, 0.001, 0.01],
+    }
+    model = model_selection.GridSearchCV(
+        mlp_model, param_grid=param_grid, cv=3, n_jobs=1, verbose=0)
+    model.fit(input_features, is_training_labels)
+    self.model = model
+
+  def predict(self, input_features):
+    if self.model is None:
+      raise AssertionError(
+          'Model not trained yet. Please call train_model first.')
+    return self.model.predict(input_features)
+
+
+class RandomForestAttacker(TrainedAttacker):
+  """Random forest attacker."""
+
+  def train_model(self, input_features, is_training_labels):
+    """Setup a random forest pipeline with cross-validation."""
+    rf_model = ensemble.RandomForestClassifier()
+
+    param_grid = {
+        'n_estimators': [100],
+        'max_features': ['auto', 'sqrt'],
+        'max_depth': [5, 10, 20, None],
+        'min_samples_split': [2, 5, 10],
+        'min_samples_leaf': [1, 2, 4]
+    }
+    model = model_selection.GridSearchCV(
+        rf_model, param_grid=param_grid, cv=3, n_jobs=1, verbose=0)
+    model.fit(input_features, is_training_labels)
+    self.model = model
+
+  def predict(self, input_features):
+    if self.model is None:
+      raise AssertionError(
+          'Model not trained yet. Please call train_model first.')
+    return self.model.predict(input_features)
+
+
+class KNearestNeighborsAttacker(TrainedAttacker):
+  """K nearest neighbor attacker."""
+
+  def train_model(self, input_features, is_training_labels):
+    knn_model = neighbors.KNeighborsClassifier()
+    param_grid = {
+        'n_neighbors': [3, 5, 7],
+    }
+    model = model_selection.GridSearchCV(
+        knn_model, param_grid=param_grid, cv=3, n_jobs=1, verbose=0)
+    model.fit(input_features, is_training_labels)
+    self.model = model
+
+  def predict(self, input_features):
+    if self.model is None:
+      raise AssertionError(
+          'Model not trained yet. Please call train_model first.')
+    return self.model.predict(input_features)
--- a/tensorflow_privacy/privacy/membership_inference_attack/models_test.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/models_test.py
@ -0,0 +1,59 @@
+# Copyright 2020, The TensorFlow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Tests for tensorflow_privacy.privacy.membership_inference_attack.data_structures."""
+from absl.testing import absltest
+import numpy as np
+
+from tensorflow_privacy.privacy.membership_inference_attack import models
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData
+
+
+class TrainedAttackerTest(absltest.TestCase):
+
+  def test_base_attacker_train_and_predict(self):
+    base_attacker = models.TrainedAttacker()
+    self.assertRaises(NotImplementedError, base_attacker.train_model, [], [])
+    self.assertRaises(NotImplementedError, base_attacker.predict, [])
+
+  def test_predict_before_training(self):
+    lr_attacker = models.LogisticRegressionAttacker()
+    self.assertRaises(AssertionError, lr_attacker.predict, [])
+
+  def test_create_attacker_data_loss_only(self):
+    attack_input = AttackInputData(
+        loss_train=np.array([1]), loss_test=np.array([2]))
+    attacker_data = models.create_attacker_data(attack_input, 0.5)
+    self.assertLen(attacker_data.features_test, 1)
+    self.assertLen(attacker_data.features_train, 1)
+
+  def test_create_attacker_data_loss_and_logits(self):
+    attack_input = AttackInputData(
+        logits_train=np.array([[1, 2], [5, 6]]),
+        logits_test=np.array([[10, 11], [14, 15]]),
+        loss_train=np.array([3, 7]),
+        loss_test=np.array([12, 16]))
+    attacker_data = models.create_attacker_data(attack_input, 0.25)
+    self.assertLen(attacker_data.features_test, 1)
+    self.assertLen(attacker_data.features_train, 3)
+
+    for i, feature in enumerate(attacker_data.features_train):
+      self.assertLen(feature, 3)  # each feature has two logits and one loss
+      expected = feature[:2] not in attack_input.logits_train
+      self.assertEqual(attacker_data.is_training_labels_train[i], expected)
+
+
+if __name__ == '__main__':
+  absltest.main()