Remove old API.

PiperOrigin-RevId: 334406920
2020-09-29 10:19:15 -07:00 · 2020-09-29 10:19:15 -07:00 · bca2baae8d
commit bca2baae8d
parent 78d30a0424
3 changed files with 0 additions and 1150 deletions
--- a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py
@ -1,737 +0,0 @@
 # Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Lint as: python3
 """Code that runs membership inference attacks based on the model outputs.
 Warning: This file belongs to the old API for membership inference attacks. This
 file will be removed soon. membership_inference_attack_new.py contains the new
 API.
 """
 import collections
 import io
 import os
 import re
 from typing import Text, Dict, Iterable, Tuple, Union, Any
 from absl import logging
 import numpy as np
 from scipy import special
 from tensorflow_privacy.privacy.membership_inference_attack import plotting
 from tensorflow_privacy.privacy.membership_inference_attack import trained_attack_models
 from tensorflow_privacy.privacy.membership_inference_attack import utils
 from os import mkdir
 ArrayDict = Dict[Text, np.ndarray]
 FloatDict = Dict[Text, float]
 AnyDict = Dict[Text, Any]
 Dataset = Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]
 MetricNames = Union[Text, Iterable[Text]]
 def _get_vulnerabilities(result: ArrayDict, metrics: MetricNames) -> FloatDict:
  """Gets the vulnerabilities according to the chosen metrics for all attacks."""
  vulns = {}
  if isinstance(metrics, str):
    metrics = [metrics]
  for k in result:
    for metric in metrics:
      if k.endswith(metric.lower()) or k.endswith('n_examples'):
        vulns[k] = float(result[k])
  return vulns
 def _get_maximum_vulnerability(
    attack_result: FloatDict,
    metrics: MetricNames,
    filterby: Text = '') -> Dict[Text, Dict[Text, Union[Text, float]]]:
  """Returns the worst vulnerability according to the chosen metrics of all attacks."""
  vulns = {}
  if isinstance(metrics, str):
    metrics = [metrics]
  for metric in metrics:
    best_attack_value = -np.inf
    for k in attack_result:
      if (k.startswith(filterby.lower()) and k.endswith(metric.lower()) and
          'train' not in k):
        if float(attack_result[k]) > best_attack_value:
          best_attack_value = attack_result[k]
          best_attacker = k
    if best_attack_value > -np.inf:
      newkey = filterby + '-' + metric if filterby else metric
      vulns[newkey] = {'value': best_attack_value, 'attacker': best_attacker}
  return vulns
 def _get_maximum_class_gap_or_none(result: FloatDict,
                                   metrics: MetricNames) -> FloatDict:
  """Returns the biggest and smallest vulnerability and the gap across classes."""
  gaps = {}
  if isinstance(metrics, str):
    metrics = [metrics]
  for metric in metrics:
    hi = -np.inf
    lo = np.inf
    hi_idx, lo_idx = -1, -1
    for k in result:
      if (k.startswith('class') and k.endswith(metric.lower()) and
          'train' not in k):
        if float(result[k]) > hi:
          hi = float(result[k])
          hi_idx = int(re.findall(r'class_(\d+)_', k)[0])
        if float(result[k]) < lo:
          lo = float(result[k])
          lo_idx = int(re.findall(r'class_(\d+)_', k)[0])
    if lo - hi < np.inf:
      gaps['max_class_gap_' + metric] = hi - lo
      gaps[f'class_{hi_idx}_' + metric] = hi
      gaps[f'class_{lo_idx}_' + metric] = lo
      gaps['max_vuln_class_' + metric] = hi_idx
      gaps['min_vuln_class_' + metric] = lo_idx
  return gaps
 # ------------------------------------------------------------------------------
 #  Attacks
 # ------------------------------------------------------------------------------
 def _run_threshold_loss_attack(features: ArrayDict,
                               figure_file_prefix: Text = '',
                               figure_directory: Text = None) -> ArrayDict:
  """Runs the threshold attack on the loss."""
  logging.info('Run threshold attack on loss...')
  is_train = features['is_train']
  attack_prefix = 'thresh_loss'
  tmp_results = utils.compute_performance_metrics(is_train, -features['loss'])
  if figure_directory is not None:
    figpath = os.path.join(figure_directory,
                           figure_file_prefix + attack_prefix + '.png')
    plotting.save_plot(
        plotting.plot_curve_with_area(
            tmp_results['fpr'], tmp_results['tpr'], xlabel='FPR', ylabel='TPR'),
        figpath)
    figpath = os.path.join(figure_directory,
                           figure_file_prefix + attack_prefix + '_hist.png')
    plotting.save_plot(
        plotting.plot_histograms(
            features['loss'][is_train == 1],
            features['loss'][is_train == 0],
            xlabel='loss'), figpath)
  return utils.prepend_to_keys(tmp_results, attack_prefix + '_')
 def _run_threshold_attack_maxlogit(features: ArrayDict,
                                   figure_file_prefix: Text = '',
                                   figure_directory: Text = None) -> ArrayDict:
  """Runs the threshold attack on the maximum logit."""
  is_train = features['is_train']
  preds = np.max(features['logits'], axis=-1)
  tmp_results = utils.compute_performance_metrics(is_train, preds)
  attack_prefix = 'thresh_maxlogit'
  if figure_directory is not None:
    figpath = os.path.join(figure_directory,
                           figure_file_prefix + attack_prefix + '.png')
    plotting.save_plot(
        plotting.plot_curve_with_area(
            tmp_results['fpr'], tmp_results['tpr'], xlabel='FPR', ylabel='TPR'),
        figpath)
    figpath = os.path.join(figure_directory,
                           figure_file_prefix + attack_prefix + '_hist.png')
    plotting.save_plot(
        plotting.plot_histograms(
            preds[is_train == 1], preds[is_train == 0], xlabel='loss'), figpath)
  return utils.prepend_to_keys(tmp_results, attack_prefix + '_')
 def _run_trained_attack(attack_classifier: Text,
                        data: Dataset,
                        attack_prefix: Text,
                        figure_file_prefix: Text = '',
                        figure_directory: Text = None) -> ArrayDict:
  """Train a classifier for attack and evaluate it."""
  # Train the attack classifier
  (x_train, y_train), (x_test, y_test) = data
  clf_model = trained_attack_models.choose_model(attack_classifier)
  clf_model.fit(x_train, y_train)
  # Calculate training set metrics
  pred_train = clf_model.predict_proba(x_train)[:, clf_model.classes_ == 1]
  results = utils.prepend_to_keys(
      utils.compute_performance_metrics(y_train, pred_train),
      attack_prefix + 'train_')
  # Calculate test set metrics
  pred_test = clf_model.predict_proba(x_test)[:, clf_model.classes_ == 1]
  results.update(
      utils.prepend_to_keys(
          utils.compute_performance_metrics(y_test, pred_test),
          attack_prefix + 'test_'))
  if figure_directory is not None:
    figpath = os.path.join(figure_directory,
                           figure_file_prefix + attack_prefix[:-1] + '.png')
    plotting.save_plot(
        plotting.plot_curve_with_area(
            results[attack_prefix + 'test_fpr'],
            results[attack_prefix + 'test_tpr'],
            xlabel='FPR',
            ylabel='TPR'), figpath)
  return results
 def _run_attacks_and_plot(features: ArrayDict,
                          attacks: Iterable[Text],
                          attack_classifiers: Iterable[Text],
                          balance: bool,
                          test_size: float,
                          random_state: int,
                          figure_file_prefix: Text = '',
                          figure_directory: Text = None) -> ArrayDict:
  """Runs the specified attacks on the provided data."""
  if balance:
    try:
      features = utils.subsample_to_balance(features, random_state)
    except RuntimeError:
      logging.info('Not enough remaining data for attack: Empty results.')
      return {}
  result = {}
  # -------------------- Simple threshold attacks
  if 'thresh_loss' in attacks:
    result.update(
        _run_threshold_loss_attack(features, figure_file_prefix,
                                   figure_directory))
  if 'thresh_maxlogit' in attacks:
    result.update(
        _run_threshold_attack_maxlogit(features, figure_file_prefix,
                                       figure_directory))
  # -------------------- Run learned attacks
  # TODO(b/157632603): Add a prefix (for example 'trained_') for attacks which
  # use classifiers to distinguish from threshould attacks.
  if 'logits' in attacks:
    data = utils.get_train_test_split(
        features, add_loss=False, test_size=test_size)
    for clf in attack_classifiers:
      logging.info('Train %s on %d logits', clf, data[0][0].shape[1])
      attack_prefix = f'{clf}_logits_'
      result.update(
          _run_trained_attack(clf, data, attack_prefix, figure_file_prefix,
                              figure_directory))
    if 'logits_loss' in attacks:
      data = utils.get_train_test_split(
          features, add_loss=True, test_size=test_size)
      for clf in attack_classifiers:
        logging.info('Train %s on %d logits + loss', clf, data[0][0].shape[1])
        attack_prefix = f'{clf}_logits_loss_'
        result.update(
            _run_trained_attack(clf, data, attack_prefix, figure_file_prefix,
                                figure_directory))
  return result
 def run_attack(loss_train: np.ndarray = None,
               loss_test: np.ndarray = None,
               logits_train: np.ndarray = None,
               logits_test: np.ndarray = None,
               labels_train: np.ndarray = None,
               labels_test: np.ndarray = None,
               attack_classifiers: Iterable[Text] = None,
               only_misclassified: bool = False,
               by_class: Union[bool, Iterable[int], int] = False,
               by_percentile: Union[bool, Iterable[int], int] = False,
               figure_directory: Text = None,
               output_directory: Text = None,
               metric: MetricNames = 'auc',
               balance: bool = True,
               test_size: float = 0.2,
               random_state: int = 0) -> FloatDict:
  """Run membership inference attack(s).
  Based only on specific outputs of a machine learning model on some examples
  used for training (train) and some examples not used for training (test), run
  membership inference attacks that try to discriminate training from test
  inputs based only on the model outputs.
  While all inputs are optional, at least one train/test pair is required to run
  any attacks (either losses or logits/probabilities).
  Note that one can equally provide output probabilities instead of logits in
  the logits_train / logits_test arguments.
  We measure the vulnerability of the model via the area under the ROC-curve
  (auc) or via max |fpr - tpr| (advantage) of the attack classifier. These
  measures are very closely related and may look almost indistinguishable.
  This function provides relatively fine grained control and outputs detailed
  results. For a higher-level wrapper with sane internal default settings and
  distilled output results, see `run_all_attacks`.
  Via the `figure_directory` argument and the `output_directory` argument more
  detailed information as well as roc-curve plots can optionally be stored to
  disk.
  If `loss_train` and `loss_test` are provided we run:
    - simple threshold attack on the loss
  If `logits_train` and `logits_test` are provided we run:
    - simple threshold attack on the top logit
    - if `attack_classifiers` is not None and no losses are provided: train the
       specified classifiers on the top 10 logits (or all logits if there are
       less than 10)
    - if `attack_classifiers` is not None and losses are provided: train the
       specified classifiers on the top 10 logits (or all logits if there are
       less than 10) and the loss
  Args:
    loss_train: A 1D array containing the individual scalar losses for examples
      used during training.
    loss_test: A 1D array containing the individual scalar losses for examples
      not used during training.
    logits_train: A 2D array (n_train, n_classes) of the individual logits or
      output probabilities of examples used during training.
    logits_test: A 2D array (n_test, n_classes) of the individual logits or
      output probabilities of examples not used during training.
    labels_train: The true labels of the training examples. Labels are only
      needed when `by_class` is specified (i.e., not False).
    labels_test: The true labels of the test examples. Labels are only needed
      when `by_class` is specified (i.e., not False).
    attack_classifiers: Attack classifiers to train beyond simple thresholding
      that require training a simple binary ML classifier. This argument is
      ignored if logits are not provided. Classifiers can be 'lr' for logistic
      regression, 'mlp' for multi-layered perceptron, 'rf' for random forests,
      or 'knn' for k-nearest-neighbors. If 'None', don't train classifiers
      beyond simple thresholding.
    only_misclassified: Run and evaluate attacks only on misclassified examples.
      Must specify `labels_train`, `labels_test`, `logits_train` and
      `logits_test` to use this. If this is True, `by_class` and `by_percentile`
      are ignored.
    by_class: This argument determines whether attacks are run on the entire
      data, or on examples grouped by their class label. If `True`, all attacks
      are run separately for each class. If `by_class` is a single integer, run
      attacks for this class only. If `by_class` is an iterable of integers, run
      all attacks for each of the specified class labels separately. Only used
      if `labels_train` and `labels_test` are specified. If `by_class` is
      specified (not False), `by_percentile` is ignored. Ignored if
      `only_misclassified` is True.
    by_percentile: This argument determines whether attacks are run on the
      entire data, or separately for examples where the most likely class
      prediction is within a given percentile of all maximum predicitons. If
      `True`, all attacks are run separately for the examples with max
      probabilities within the ten deciles. If `by_precentile` is a single int
      between 0 and 100, run attacks only for examples with confidence within
      this percentile. If `by_percentile` is an iterable of ints between 0 and
      100, run all attacks for each of the specified percentiles separately.
      Ignored if `by_class` is specified. Ignored if `logits_train` and
      `logits_test` are not specified. Ignored if `only_misclassified` is True.
    figure_directory: Where to store ROC-curve plots and histograms. If `None`,
      don't create plots.
    output_directory: Where to store detailed result data for all run attacks.
      If `None`, don't store detailed result data.
    metric: Available vulnerability metrics are 'auc' or 'advantage' for the
      area under the ROC curve or the advantage (max |tpr - fpr|). Specify
      either one of them or both.
    balance: Whether to use the same number of train and test samples (by
      randomly subsampling whichever happens to be larger).
    test_size: The fraction of the input data to use for the evaluation of
      trained ML attacks. This argument is ignored, if either attack_classifiers
      is None, or no logits are provided.
    random_state: Random seed for reproducibility. Only used if attack models
      are trained.
  Returns:
    results: Dictionary with the chosen vulnerability metric(s) for all ran
      attacks.
  """
  print(
      'Deprecation warning: function run_attack is '
      'deprecated and will be removed soon. '
      'Please use membership_inference_attack_new.run_attacks'
  )
  attacks = []
  features = {}
  # ---------- Check available data ----------
  if ((loss_train is None or loss_test is None) and
      (logits_train is None or logits_test is None)):
    raise ValueError(
        'Need at least train and test for loss or train and test for logits.')
  # ---------- If losses are provided ----------
  if loss_train is not None and loss_test is not None:
    if loss_train.ndim != 1 or loss_test.ndim != 1:
      raise ValueError('Losses must be 1D arrays.')
    features['is_train'] = np.concatenate(
        (np.ones(len(loss_train)), np.zeros(len(loss_test))),
        axis=0).astype(int)
    features['loss'] = np.concatenate((loss_train.ravel(), loss_test.ravel()),
                                      axis=0)
    attacks.append('thresh_loss')
  # ---------- If logits are provided ----------
  if logits_train is not None and logits_test is not None:
    assert logits_train.ndim == 2 and  logits_test.ndim == 2, \
        'Logits must be 2D arrays.'
    assert logits_train.shape[1] == logits_test.shape[1], \
        'Train and test logits must agree along axis 1 (number of classes).'
    if 'is_train' in features:
      assert (loss_train.shape[0] == logits_train.shape[0] and
              loss_test.shape[0] == logits_test.shape[0]), \
          'Number of examples must match between loss and logits.'
    else:
      features['is_train'] = np.concatenate(
          (np.ones(logits_train.shape[0]), np.zeros(logits_test.shape[0])),
          axis=0).astype(int)
    attacks.append('thresh_maxlogit')
    features['logits'] = np.concatenate((logits_train, logits_test), axis=0)
    if attack_classifiers:
      attacks.append('logits')
      if 'loss' in features:
        attacks.append('logits_loss')
  # ---------- If labels are provided ----------
  if labels_train is not None and labels_test is not None:
    if labels_train.ndim != 1 or labels_test.ndim != 1:
      raise ValueError('Labels must be 1D arrays.')
    if 'loss' in features:
      assert (loss_train.shape[0] == labels_train.shape[0] and
              loss_test.shape[0] == labels_test.shape[0]), \
          'Number of examples must match between loss and labels.'
    else:
      assert (logits_train.shape[0] == labels_train.shape[0] and
              logits_test.shape[0] == labels_test.shape[0]), \
          'Number of examples must match between logits and labels.'
    features['label'] = np.concatenate((labels_train, labels_test), axis=0)
  # ---------- Data subsampling or filtering ----------
  filtertype = None
  filtervals = [None]
  if only_misclassified:
    if (labels_train is None or labels_test is None or logits_train is None or
        logits_test is None):
      raise ValueError('Must specify labels_train, labels_test, logits_train, '
                       'and logits_test for the only_misclassified option.')
    filtertype = 'misclassified'
  elif by_class:
    if labels_train is None or labels_test is None:
      raise ValueError('Must specify labels_train and labels_test when using '
                       'the by_class option.')
    if isinstance(by_class, bool):
      filtervals = list(set(labels_train) | set(labels_test))
    elif isinstance(by_class, int):
      filtervals = [by_class]
    elif isinstance(by_class, collections.Iterable):
      filtervals = list(by_class)
    filtertype = 'class'
  elif by_percentile:
    if logits_train is None or logits_test is None:
      raise ValueError('Must specify logits_train and logits_test when using '
                       'the by_percentile option.')
    if isinstance(by_percentile, bool):
      filtervals = list(range(10, 101, 10))
    elif isinstance(by_percentile, int):
      filtervals = [by_percentile]
    elif isinstance(by_percentile, collections.Iterable):
      filtervals = [int(percentile) for percentile in by_percentile]
    filtertype = 'percentile'
  # ---------- Need to create figure directory? ----------
  if figure_directory is not None:
    mkdir(figure_directory)
  # ---------- Actually run attacks and plot if required ----------
  logging.info('Selecting %s with values %s', filtertype, filtervals)
  num = None
  result = {}
  for filterval in filtervals:
    if filtertype is None:
      tmp_features = features
    elif filtertype == 'misclassified':
      idx = features['label'] != np.argmax(features['logits'], axis=-1)
      tmp_features = utils.select_indices(features, idx)
      num = np.sum(idx)
    elif filtertype == 'class':
      idx = features['label'] == filterval
      tmp_features = utils.select_indices(features, idx)
      num = np.sum(idx)
    elif filtertype == 'percentile':
      certainty = np.max(special.softmax(features['logits'], axis=-1), axis=-1)
      idx = certainty <= np.percentile(certainty, filterval)
      tmp_features = utils.select_indices(features, idx)
    prefix = f'{filtertype}_' if filtertype is not None else ''
    prefix += f'{filterval}_' if filterval is not None else ''
    tmp_result = _run_attacks_and_plot(tmp_features, attacks,
                                       attack_classifiers, balance, test_size,
                                       random_state, prefix, figure_directory)
    if num is not None:
      tmp_result['n_examples'] = float(num)
    if tmp_result:
      result.update(utils.prepend_to_keys(tmp_result, prefix))
  # ---------- Store data ----------
  if output_directory is not None:
    mkdir(output_directory)
    resultpath = os.path.join(output_directory, 'attack_results.npz')
    logging.info('Store aggregate results at %s.', resultpath)
    with open(resultpath, 'wb') as fp:
      io_buffer = io.BytesIO()
      np.savez(io_buffer, **result)
      fp.write(io_buffer.getvalue())
  return _get_vulnerabilities(result, metric)
 def run_all_attacks(loss_train: np.ndarray = None,
                    loss_test: np.ndarray = None,
                    logits_train: np.ndarray = None,
                    logits_test: np.ndarray = None,
                    labels_train: np.ndarray = None,
                    labels_test: np.ndarray = None,
                    attack_classifiers: Iterable[Text] = ('lr', 'mlp', 'rf',
                                                          'knn'),
                    decimals: Union[int, None] = 4) -> FloatDict:
  """Runs all possible membership inference attacks.
  Check 'run_attack' for detailed information of how attacks are performed
  and evaluated.
  This function internally chooses sane default settings for all attacks and
  returns all possible output combinations.
  For fine grained control and partial attacks, please see `run_attack`.
  Args:
    loss_train: A 1D array containing the individual scalar losses for examples
      used during training.
    loss_test: A 1D array containing the individual scalar losses for examples
      not used during training.
    logits_train: A 2D array (n_train, n_classes) of the individual logits or
      output probabilities of examples used during training.
    logits_test: A 2D array (n_test, n_classes) of the individual logits or
      output probabilities of examples not used during training.
    labels_train: The true labels of the training examples. Labels are only
      needed when `by_class` is specified (i.e., not False).
    labels_test: The true labels of the test examples. Labels are only needed
      when `by_class` is specified (i.e., not False).
    attack_classifiers: Which binary classifiers to train (in addition to simple
      threshold attacks). This can include 'lr' (logistic regression), 'mlp'
      (multi-layered perceptron), 'rf' (random forests), 'knn' (k-nearest
      neighbors), which will be trained with cross validation to determine good
      hyperparameters.
    decimals: Round all float results to this number of decimals. If decimals is
      None, don't round.
  Returns:
    result: dictionary with all attack results
  """
  print(
      'Deprecation warning: function run_all_attacks is '
      'deprecated and will be removed soon. '
      'Please use membership_inference_attack_new.run_attacks'
  )
  metrics = ['auc', 'advantage']
  # Entire data
  result = run_attack(
      loss_train,
      loss_test,
      logits_train,
      logits_test,
      attack_classifiers=attack_classifiers,
      metric=metrics)
  result = utils.prepend_to_keys(result, 'all_')
  # Misclassified examples
  if (labels_train is not None and labels_test is not None and
      logits_train is not None and logits_test is not None):
    result.update(
        run_attack(
            loss_train,
            loss_test,
            logits_train,
            logits_test,
            labels_train,
            labels_test,
            attack_classifiers=attack_classifiers,
            only_misclassified=True,
            metric=metrics))
  # Split per class
  if labels_train is not None and labels_test is not None:
    result.update(
        run_attack(
            loss_train,
            loss_test,
            logits_train,
            logits_test,
            labels_train,
            labels_test,
            by_class=True,
            attack_classifiers=attack_classifiers,
            metric=metrics))
  # Different deciles
  if logits_train is not None and logits_test is not None:
    result.update(
        run_attack(
            loss_train,
            loss_test,
            logits_train,
            logits_test,
            by_percentile=True,
            attack_classifiers=attack_classifiers,
            metric=metrics))
  if decimals is not None:
    result = {k: round(v, decimals) for k, v in result.items()}
  return result
 def run_all_attacks_and_create_summary(
    loss_train: np.ndarray = None,
    loss_test: np.ndarray = None,
    logits_train: np.ndarray = None,
    logits_test: np.ndarray = None,
    labels_train: np.ndarray = None,
    labels_test: np.ndarray = None,
    return_dict: bool = True,
    decimals: Union[int, None] = 4) -> Union[Text, Tuple[Text, AnyDict]]:
  """Runs all possible membership inference attack(s) and distill results.
  Check 'run_attack' for detailed information of how attacks are performed
  and evaluated.
  This function internally chooses sane default settings for all attacks and
  returns all possible output combinations.
  For fine grained control and partial attacks, please see `run_attack`.
  Args:
    loss_train: A 1D array containing the individual scalar losses for examples
      used during training.
    loss_test: A 1D array containing the individual scalar losses for examples
      not used during training.
    logits_train: A 2D array (n_train, n_classes) of the individual logits or
      output probabilities of examples used during training.
    logits_test: A 2D array (n_test, n_classes) of the individual logits or
      output probabilities of examples not used during training.
    labels_train: The true labels of the training examples. Labels are only
      needed when `by_class` is specified (i.e., not False).
    labels_test: The true labels of the test examples. Labels are only needed
      when `by_class` is specified (i.e., not False).
    return_dict: Whether to also return a dictionary with the results summarized
      in the summary string.
    decimals: Round all float results to this number of decimals. If decimals is
      None, don't round.
  Returns:
    summarystring: A string with natural language summary of the attacks. In the
      summary string printed numbers will be rounded to `decimal` decimals if
      provided, otherwise will round to 3 diits by default for readability.
    result: a dictionary with all the distilled attack information summarized
      in the summarystring
  """
  print(
      'Deprecation warning: function run_all_attacks_and_create_summary is '
      'deprecated and will be removed soon. '
      'Please use membership_inference_attack_new.run_attacks'
  )
  summary = []
  metrics = ['auc', 'advantage']
  attack_classifiers = ['lr', 'knn']
  results = run_all_attacks(
      loss_train,
      loss_test,
      logits_train,
      logits_test,
      labels_train,
      labels_test,
      attack_classifiers=attack_classifiers,
      decimals=None)
  output = _get_maximum_vulnerability(results, metrics, filterby='all')
  if decimals is not None:
    strdec = decimals
  else:
    strdec = 4
  for metric in metrics:
    summary.append(f'========== {metric.upper()} ==========')
    best_value = output['all-' + metric]['value']
    best_attacker = output['all-' + metric]['attacker']
    summary.append(f'The best attack ({best_attacker}) achieved an {metric} of '
                   f'{best_value:.{strdec}f}.')
    summary.append('')
  classgap = _get_maximum_class_gap_or_none(results, metrics)
  if classgap:
    output.update(classgap)
    for metric in metrics:
      summary.append(f'========== {metric.upper()} per class ==========')
      hi_idx = output[f'max_vuln_class_{metric}']
      lo_idx = output[f'min_vuln_class_{metric}']
      hi = output[f'class_{hi_idx}_{metric}']
      lo = output[f'class_{lo_idx}_{metric}']
      gap = output[f'max_class_gap_{metric}']
      summary.append(f'The most vulnerable class {hi_idx} has {metric} of '
                     f'{hi:.{strdec}f}.')
      summary.append(f'The least vulnerable class {lo_idx} has {metric} of '
                     f'{lo:.{strdec}f}.')
      summary.append(f'=> The maximum gap between class vulnerabilities is '
                     f'{gap:.{strdec}f}.')
      summary.append('')
  misclassified = _get_maximum_vulnerability(
      results, metrics, filterby='misclassified')
  if misclassified:
    for metric in metrics:
      best_value = misclassified['misclassified-' + metric]['value']
      best_attacker = misclassified['misclassified-' + metric]['attacker']
      summary.append(f'========== {metric.upper()} for misclassified '
                     '==========')
      summary.append('Among misclassified examples, the best attack '
                     f'({best_attacker}) achieved an {metric} of '
                     f'{best_value:.{strdec}f}.')
      summary.append('')
    output.update(misclassified)
  n_examples = {k: v for k, v in results.items() if k.endswith('n_examples')}
  if n_examples:
    output.update(n_examples)
  # Flatten remaining dicts in output
  fresh_output = {}
  for k, v in output.items():
    if isinstance(v, dict):
      if k.startswith('all'):
        fresh_output[k[4:]] = v['value']
        fresh_output['best_attacker_' + k[4:]] = v['attacker']
    else:
      fresh_output[k] = v
  output = fresh_output
  if decimals is not None:
    for k, v in output.items():
      if isinstance(v, float):
        output[k] = round(v, decimals)
  summary = '\n'.join(summary)
  if return_dict:
    return summary, output
  else:
    return summary
--- a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_test.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack_test.py
@ -1,307 +0,0 @@
 # Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Lint as: python3
 """Tests for tensorflow_privacy.privacy.membership_inference_attack.utils."""
 from absl.testing import absltest
 import numpy as np
 from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia
 def get_result_dict():
  """Get an example result dictionary."""
  return {
      'test_n_examples': np.ones(1),
      'test_examples': np.zeros(1),
      'test_auc': np.ones(1),
      'test_advantage': np.ones(1),
      'all_0-metric': np.array([1]),
      'all_1-metric': np.array([2]),
      'test_2-metric': np.array([3]),
      'test_score': np.array([4]),
  }
 def get_test_inputs():
  """Get example inputs for attacks."""
  n_train = n_test = 500
  rng = np.random.RandomState(4)
  loss_train = rng.randn(n_train) - 0.4
  loss_test = rng.randn(n_test) + 0.4
  logits_train = rng.randn(n_train, 5) + 0.2
  logits_test = rng.randn(n_test, 5) - 0.2
  labels_train = np.array([i % 5 for i in range(n_train)])
  labels_test = np.array([(3 * i) % 5 for i in range(n_test)])
  return (loss_train, loss_test, logits_train, logits_test,
          labels_train, labels_test)
 class GetVulnerabilityTest(absltest.TestCase):
  def test_get_vulnerabilities(self):
    """Test extraction of vulnerability scores."""
    testdict = get_result_dict()
    for key in ['auc', 'advantage']:
      res = mia._get_vulnerabilities(testdict, key)
      self.assertLen(res, 2)
      self.assertEqual(res[f'test_{key}'], 1)
      self.assertEqual(res['test_n_examples'], 1)
    res = mia._get_vulnerabilities(testdict, ['auc', 'advantage'])
    self.assertLen(res, 3)
    self.assertEqual(res['test_auc'], 1)
    self.assertEqual(res['test_advantage'], 1)
    self.assertEqual(res['test_n_examples'], 1)
 class GetMaximumVulnerabilityTest(absltest.TestCase):
  def test_get_maximum_vulnerability(self):
    """Test extraction of maximum vulnerability score."""
    testdict = get_result_dict()
    for i in range(3):
      key = f'{i}-metric'
      res = mia._get_maximum_vulnerability(testdict, key)
      self.assertLen(res, 1)
      self.assertEqual(res[key]['value'], i + 1)
      if i < 2:
        self.assertEqual(res[key]['attacker'], f'all_{i}-metric')
      else:
        self.assertEqual(res[key]['attacker'], 'test_2-metric')
    res = mia._get_maximum_vulnerability(testdict, 'metric')
    self.assertLen(res, 1)
    self.assertEqual(res['metric']['value'], 3)
    res = mia._get_maximum_vulnerability(testdict, ['metric'],
                                         filterby='all')
    self.assertLen(res, 1)
    self.assertEqual(res['all-metric']['value'], 2)
    res = mia._get_maximum_vulnerability(testdict, ['metric', 'score'])
    self.assertLen(res, 2)
    self.assertEqual(res['metric']['value'], 3)
    self.assertEqual(res['score']['value'], 4)
    self.assertEqual(res['score']['attacker'], 'test_score')
 class ThresholdAttackLossTest(absltest.TestCase):
  def test_threshold_attack_loss(self):
    """Test simple threshold attack on loss."""
    features = {
        'loss': np.zeros(10),
        'is_train': np.concatenate((np.zeros(5), np.ones(5))),
    }
    res = mia._run_threshold_loss_attack(features)
    for k in res:
      self.assertStartsWith(k, 'thresh_loss')
    self.assertEqual(res['thresh_loss_auc'], 0.5)
    self.assertEqual(res['thresh_loss_advantage'], 0.0)
    rng = np.random.RandomState(4)
    n_train = 1000
    n_test = 500
    loss_train = rng.randn(n_train) - 0.4
    loss_test = rng.randn(n_test) + 0.4
    features = {
        'loss': np.concatenate((loss_train, loss_test)),
        'is_train': np.concatenate((np.ones(n_train), np.zeros(n_test))),
    }
    res = mia._run_threshold_loss_attack(features)
    self.assertBetween(res['thresh_loss_auc'], 0.7, 0.75)
    self.assertBetween(res['thresh_loss_advantage'], 0.3, 0.35)
 class ThresholdAttackMaxlogitTest(absltest.TestCase):
  def test_threshold_attack_maxlogits(self):
    """Test simple threshold attack on maximum logit."""
    features = {
        'logits': np.eye(10, 14),
        'is_train': np.concatenate((np.zeros(5), np.ones(5))),
    }
    res = mia._run_threshold_attack_maxlogit(features)
    for k in res:
      self.assertStartsWith(k, 'thresh_maxlogit')
    self.assertEqual(res['thresh_maxlogit_auc'], 0.5)
    self.assertEqual(res['thresh_maxlogit_advantage'], 0.0)
    rng = np.random.RandomState(4)
    n_train = 1000
    n_test = 500
    logits_train = rng.randn(n_train, 12) + 0.2
    logits_test = rng.randn(n_test, 12) - 0.2
    features = {
        'logits': np.concatenate((logits_train, logits_test), axis=0),
        'is_train': np.concatenate((np.ones(n_train), np.zeros(n_test))),
    }
    res = mia._run_threshold_attack_maxlogit(features)
    self.assertBetween(res['thresh_maxlogit_auc'], 0.7, 0.75)
    self.assertBetween(res['thresh_maxlogit_advantage'], 0.3, 0.35)
 class TrainedAttackTrivialTest(absltest.TestCase):
  def test_trained_attack(self):
    """Test trained attacks."""
    # Trivially easy problem
    x_train, x_test = np.ones((500, 3)), np.ones((20, 3))
    x_train[:200] *= -1
    x_test[:8] *= -1
    y_train, y_test = np.ones(500).astype(int), np.ones(20).astype(int)
    y_train[:200] = 0
    y_test[:8] = 0
    data = (x_train, y_train), (x_test, y_test)
    for clf in ['lr', 'rf', 'mlp', 'knn']:
      res = mia._run_trained_attack(clf, data, attack_prefix='a-')
      self.assertEqual(res['a-train_auc'], 1)
      self.assertEqual(res['a-test_auc'], 1)
      self.assertEqual(res['a-train_advantage'], 1)
      self.assertEqual(res['a-test_advantage'], 1)
 class TrainedAttackRandomFeaturesTest(absltest.TestCase):
  def test_trained_attack(self):
    """Test trained attacks."""
    # Random labels and features
    rng = np.random.RandomState(4)
    x_train, x_test = rng.randn(500, 3), rng.randn(500, 3)
    y_train = rng.binomial(1, 0.5, size=(500,))
    y_test = rng.binomial(1, 0.5, size=(500,))
    data = (x_train, y_train), (x_test, y_test)
    for clf in ['lr', 'rf', 'mlp', 'knn']:
      res = mia._run_trained_attack(clf, data, attack_prefix='a-')
      self.assertBetween(res['a-train_auc'], 0.5, 1.)
      self.assertBetween(res['a-test_auc'], 0.4, 0.6)
      self.assertBetween(res['a-train_advantage'], 0., 1.0)
      self.assertBetween(res['a-test_advantage'], 0., 0.2)
 class AttackLossesTest(absltest.TestCase):
  def test_attack(self):
    """Test individual attack function."""
    # losses only, both metrics
    loss_train, loss_test, _, _, _, _ = get_test_inputs()
    res = mia.run_attack(loss_train, loss_test, metric=('auc', 'advantage'))
    self.assertBetween(res['thresh_loss_auc'], 0.7, 0.75)
    self.assertBetween(res['thresh_loss_advantage'], 0.3, 0.35)
 class AttackLossesLogitsTest(absltest.TestCase):
  def test_attack(self):
    """Test individual attack function."""
    # losses and logits, two classifiers, single metric
    loss_train, loss_test, logits_train, logits_test, _, _ = get_test_inputs()
    res = mia.run_attack(
        loss_train,
        loss_test,
        logits_train,
        logits_test,
        attack_classifiers=('rf', 'knn'),
        metric='auc')
    self.assertBetween(res['rf_logits_test_auc'], 0.7, 0.9)
    self.assertBetween(res['knn_logits_test_auc'], 0.7, 0.9)
    self.assertBetween(res['rf_logits_loss_test_auc'], 0.7, 0.9)
    self.assertBetween(res['knn_logits_loss_test_auc'], 0.7, 0.9)
 class AttackLossesLabelsByClassTest(absltest.TestCase):
  def test_attack(self):
    # losses and labels, single metric, split by class
    loss_train, loss_test, _, _, labels_train, labels_test = get_test_inputs()
    n_train = loss_train.shape[0]
    n_test = loss_test.shape[0]
    res = mia.run_attack(
        loss_train,
        loss_test,
        labels_train=labels_train,
        labels_test=labels_test,
        by_class=True,
        metric='auc')
    self.assertLen(res, 10)
    for k in res:
      self.assertStartsWith(k, 'class_')
      if k.endswith('n_examples'):
        self.assertEqual(int(res[k]), (n_train + n_test) // 5)
      else:
        self.assertBetween(res[k], 0.65, 0.75)
 class AttackLossesLabelsSingleClassTest(absltest.TestCase):
  def test_attack(self):
    # losses and labels, both metrics, single class
    loss_train, loss_test, _, _, labels_train, labels_test = get_test_inputs()
    n_train = loss_train.shape[0]
    n_test = loss_test.shape[0]
    res = mia.run_attack(
        loss_train,
        loss_test,
        labels_train=labels_train,
        labels_test=labels_test,
        by_class=2,
        metric=('auc', 'advantage'))
    self.assertLen(res, 3)
    for k in res:
      self.assertStartsWith(k, 'class_2')
      if k.endswith('n_examples'):
        self.assertEqual(int(res[k]), (n_train + n_test) // 5)
      elif k.endswith('advantage'):
        self.assertBetween(res[k], 0.3, 0.5)
      elif k.endswith('auc'):
        self.assertBetween(res[k], 0.7, 0.75)
 class AttackLogitsLabelsMisclassifiedTest(absltest.TestCase):
  def test_attack(self):
    # logits and labels, single metric, single classifier, misclassified only
    (_, _, logits_train, logits_test,
     labels_train, labels_test) = get_test_inputs()
    res = mia.run_attack(
        logits_train=logits_train,
        logits_test=logits_test,
        labels_train=labels_train,
        labels_test=labels_test,
        only_misclassified=True,
        attack_classifiers=('lr',),
        metric='advantage')
    self.assertBetween(res['misclassified_lr_logits_test_advantage'], 0.3, 0.8)
    self.assertEqual(res['misclassified_n_examples'], 802)
 class AttackLogitsByPrecentileTest(absltest.TestCase):
  def test_attack(self):
    # only logits, single metric, no classifiers, split by deciles
    _, _, logits_train, logits_test, _, _ = get_test_inputs()
    res = mia.run_attack(
        logits_train=logits_train,
        logits_test=logits_test,
        by_percentile=True,
        metric='auc')
    for k in res:
      self.assertStartsWith(k, 'percentile')
      self.assertBetween(res[k], 0.60, 0.75)
 if __name__ == '__main__':
  absltest.main()
--- a/tensorflow_privacy/privacy/membership_inference_attack/trained_attack_models.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/trained_attack_models.py
@ -1,106 +0,0 @@
 # Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Lint as: python3
 r"""A collection of sklearn models for binary classification.
 This module contains some sklearn pipelines for finding models for binary
 classification from a variable number of numerical input features.
 These models are used to train binary classifiers for membership inference.
 """
 from typing import Text
 import numpy as np
 from sklearn import ensemble
 from sklearn import linear_model
 from sklearn import model_selection
 from sklearn import neighbors
 from sklearn import neural_network
 def choose_model(attack_classifier: Text):
  """Choose a model based on a string classifier."""
  if attack_classifier == 'lr':
    return logistic_regression()
  elif attack_classifier == 'mlp':
    return mlp()
  elif attack_classifier == 'rf':
    return random_forest()
  elif attack_classifier == 'knn':
    return knn()
  else:
    raise ValueError(f'Unknown attack classifier {attack_classifier}.')
 def logistic_regression(verbose: int = 0, n_jobs: int = 1):
  """Setup a logistic regression pipeline with cross-validation."""
  lr = linear_model.LogisticRegression(solver='lbfgs')
  param_grid = {
      'C': np.logspace(-4, 2, 10),
  }
  pipe = model_selection.GridSearchCV(
      lr, param_grid=param_grid, cv=3, n_jobs=n_jobs, iid=False,
      verbose=verbose)
  return pipe
 def random_forest(verbose: int = 0, n_jobs: int = 1):
  """Setup a random forest pipeline with cross-validation."""
  rf = ensemble.RandomForestClassifier()
  n_estimators = [100]
  max_features = ['auto', 'sqrt']
  max_depth = [5, 10, 20]
  max_depth.append(None)
  min_samples_split = [2, 5, 10]
  min_samples_leaf = [1, 2, 4]
  random_grid = {'n_estimators': n_estimators,
                 'max_features': max_features,
                 'max_depth': max_depth,
                 'min_samples_split': min_samples_split,
                 'min_samples_leaf': min_samples_leaf}
  pipe = model_selection.RandomizedSearchCV(
      rf, param_distributions=random_grid, n_iter=7, cv=3, n_jobs=n_jobs,
      iid=False, verbose=verbose)
  return pipe
 def mlp(verbose: int = 0, n_jobs: int = 1):
  """Setup a MLP pipeline with cross-validation."""
  mlpmodel = neural_network.MLPClassifier()
  param_grid = {
      'hidden_layer_sizes': [(64,), (32, 32)],
      'solver': ['adam'],
      'alpha': [0.0001, 0.001, 0.01],
  }
  pipe = model_selection.GridSearchCV(
      mlpmodel, param_grid=param_grid, cv=3, n_jobs=n_jobs, iid=False,
      verbose=verbose)
  return pipe
 def knn(verbose: int = 0, n_jobs: int = 1):
  """Setup a k-nearest neighbors pipeline with cross-validation."""
  knnmodel = neighbors.KNeighborsClassifier()
  param_grid = {
      'n_neighbors': [3, 5, 7],
  }
  pipe = model_selection.GridSearchCV(
      knnmodel, param_grid=param_grid, cv=3, n_jobs=n_jobs, iid=False,
      verbose=verbose)
  return pipe