Remove old API.

PiperOrigin-RevId: 334406920
This commit is contained in:
Shuang Song 2020-09-29 10:19:15 -07:00 committed by A. Unique TensorFlower
parent 78d30a0424
commit bca2baae8d
3 changed files with 0 additions and 1150 deletions

View file

@ -1,737 +0,0 @@
# Copyright 2020, The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Code that runs membership inference attacks based on the model outputs.
Warning: This file belongs to the old API for membership inference attacks. This
file will be removed soon. membership_inference_attack_new.py contains the new
API.
"""
import collections
import io
import os
import re
from typing import Text, Dict, Iterable, Tuple, Union, Any
from absl import logging
import numpy as np
from scipy import special
from tensorflow_privacy.privacy.membership_inference_attack import plotting
from tensorflow_privacy.privacy.membership_inference_attack import trained_attack_models
from tensorflow_privacy.privacy.membership_inference_attack import utils
from os import mkdir
ArrayDict = Dict[Text, np.ndarray]
FloatDict = Dict[Text, float]
AnyDict = Dict[Text, Any]
Dataset = Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]
MetricNames = Union[Text, Iterable[Text]]
def _get_vulnerabilities(result: ArrayDict, metrics: MetricNames) -> FloatDict:
"""Gets the vulnerabilities according to the chosen metrics for all attacks."""
vulns = {}
if isinstance(metrics, str):
metrics = [metrics]
for k in result:
for metric in metrics:
if k.endswith(metric.lower()) or k.endswith('n_examples'):
vulns[k] = float(result[k])
return vulns
def _get_maximum_vulnerability(
attack_result: FloatDict,
metrics: MetricNames,
filterby: Text = '') -> Dict[Text, Dict[Text, Union[Text, float]]]:
"""Returns the worst vulnerability according to the chosen metrics of all attacks."""
vulns = {}
if isinstance(metrics, str):
metrics = [metrics]
for metric in metrics:
best_attack_value = -np.inf
for k in attack_result:
if (k.startswith(filterby.lower()) and k.endswith(metric.lower()) and
'train' not in k):
if float(attack_result[k]) > best_attack_value:
best_attack_value = attack_result[k]
best_attacker = k
if best_attack_value > -np.inf:
newkey = filterby + '-' + metric if filterby else metric
vulns[newkey] = {'value': best_attack_value, 'attacker': best_attacker}
return vulns
def _get_maximum_class_gap_or_none(result: FloatDict,
metrics: MetricNames) -> FloatDict:
"""Returns the biggest and smallest vulnerability and the gap across classes."""
gaps = {}
if isinstance(metrics, str):
metrics = [metrics]
for metric in metrics:
hi = -np.inf
lo = np.inf
hi_idx, lo_idx = -1, -1
for k in result:
if (k.startswith('class') and k.endswith(metric.lower()) and
'train' not in k):
if float(result[k]) > hi:
hi = float(result[k])
hi_idx = int(re.findall(r'class_(\d+)_', k)[0])
if float(result[k]) < lo:
lo = float(result[k])
lo_idx = int(re.findall(r'class_(\d+)_', k)[0])
if lo - hi < np.inf:
gaps['max_class_gap_' + metric] = hi - lo
gaps[f'class_{hi_idx}_' + metric] = hi
gaps[f'class_{lo_idx}_' + metric] = lo
gaps['max_vuln_class_' + metric] = hi_idx
gaps['min_vuln_class_' + metric] = lo_idx
return gaps
# ------------------------------------------------------------------------------
# Attacks
# ------------------------------------------------------------------------------
def _run_threshold_loss_attack(features: ArrayDict,
figure_file_prefix: Text = '',
figure_directory: Text = None) -> ArrayDict:
"""Runs the threshold attack on the loss."""
logging.info('Run threshold attack on loss...')
is_train = features['is_train']
attack_prefix = 'thresh_loss'
tmp_results = utils.compute_performance_metrics(is_train, -features['loss'])
if figure_directory is not None:
figpath = os.path.join(figure_directory,
figure_file_prefix + attack_prefix + '.png')
plotting.save_plot(
plotting.plot_curve_with_area(
tmp_results['fpr'], tmp_results['tpr'], xlabel='FPR', ylabel='TPR'),
figpath)
figpath = os.path.join(figure_directory,
figure_file_prefix + attack_prefix + '_hist.png')
plotting.save_plot(
plotting.plot_histograms(
features['loss'][is_train == 1],
features['loss'][is_train == 0],
xlabel='loss'), figpath)
return utils.prepend_to_keys(tmp_results, attack_prefix + '_')
def _run_threshold_attack_maxlogit(features: ArrayDict,
figure_file_prefix: Text = '',
figure_directory: Text = None) -> ArrayDict:
"""Runs the threshold attack on the maximum logit."""
is_train = features['is_train']
preds = np.max(features['logits'], axis=-1)
tmp_results = utils.compute_performance_metrics(is_train, preds)
attack_prefix = 'thresh_maxlogit'
if figure_directory is not None:
figpath = os.path.join(figure_directory,
figure_file_prefix + attack_prefix + '.png')
plotting.save_plot(
plotting.plot_curve_with_area(
tmp_results['fpr'], tmp_results['tpr'], xlabel='FPR', ylabel='TPR'),
figpath)
figpath = os.path.join(figure_directory,
figure_file_prefix + attack_prefix + '_hist.png')
plotting.save_plot(
plotting.plot_histograms(
preds[is_train == 1], preds[is_train == 0], xlabel='loss'), figpath)
return utils.prepend_to_keys(tmp_results, attack_prefix + '_')
def _run_trained_attack(attack_classifier: Text,
data: Dataset,
attack_prefix: Text,
figure_file_prefix: Text = '',
figure_directory: Text = None) -> ArrayDict:
"""Train a classifier for attack and evaluate it."""
# Train the attack classifier
(x_train, y_train), (x_test, y_test) = data
clf_model = trained_attack_models.choose_model(attack_classifier)
clf_model.fit(x_train, y_train)
# Calculate training set metrics
pred_train = clf_model.predict_proba(x_train)[:, clf_model.classes_ == 1]
results = utils.prepend_to_keys(
utils.compute_performance_metrics(y_train, pred_train),
attack_prefix + 'train_')
# Calculate test set metrics
pred_test = clf_model.predict_proba(x_test)[:, clf_model.classes_ == 1]
results.update(
utils.prepend_to_keys(
utils.compute_performance_metrics(y_test, pred_test),
attack_prefix + 'test_'))
if figure_directory is not None:
figpath = os.path.join(figure_directory,
figure_file_prefix + attack_prefix[:-1] + '.png')
plotting.save_plot(
plotting.plot_curve_with_area(
results[attack_prefix + 'test_fpr'],
results[attack_prefix + 'test_tpr'],
xlabel='FPR',
ylabel='TPR'), figpath)
return results
def _run_attacks_and_plot(features: ArrayDict,
attacks: Iterable[Text],
attack_classifiers: Iterable[Text],
balance: bool,
test_size: float,
random_state: int,
figure_file_prefix: Text = '',
figure_directory: Text = None) -> ArrayDict:
"""Runs the specified attacks on the provided data."""
if balance:
try:
features = utils.subsample_to_balance(features, random_state)
except RuntimeError:
logging.info('Not enough remaining data for attack: Empty results.')
return {}
result = {}
# -------------------- Simple threshold attacks
if 'thresh_loss' in attacks:
result.update(
_run_threshold_loss_attack(features, figure_file_prefix,
figure_directory))
if 'thresh_maxlogit' in attacks:
result.update(
_run_threshold_attack_maxlogit(features, figure_file_prefix,
figure_directory))
# -------------------- Run learned attacks
# TODO(b/157632603): Add a prefix (for example 'trained_') for attacks which
# use classifiers to distinguish from threshould attacks.
if 'logits' in attacks:
data = utils.get_train_test_split(
features, add_loss=False, test_size=test_size)
for clf in attack_classifiers:
logging.info('Train %s on %d logits', clf, data[0][0].shape[1])
attack_prefix = f'{clf}_logits_'
result.update(
_run_trained_attack(clf, data, attack_prefix, figure_file_prefix,
figure_directory))
if 'logits_loss' in attacks:
data = utils.get_train_test_split(
features, add_loss=True, test_size=test_size)
for clf in attack_classifiers:
logging.info('Train %s on %d logits + loss', clf, data[0][0].shape[1])
attack_prefix = f'{clf}_logits_loss_'
result.update(
_run_trained_attack(clf, data, attack_prefix, figure_file_prefix,
figure_directory))
return result
def run_attack(loss_train: np.ndarray = None,
loss_test: np.ndarray = None,
logits_train: np.ndarray = None,
logits_test: np.ndarray = None,
labels_train: np.ndarray = None,
labels_test: np.ndarray = None,
attack_classifiers: Iterable[Text] = None,
only_misclassified: bool = False,
by_class: Union[bool, Iterable[int], int] = False,
by_percentile: Union[bool, Iterable[int], int] = False,
figure_directory: Text = None,
output_directory: Text = None,
metric: MetricNames = 'auc',
balance: bool = True,
test_size: float = 0.2,
random_state: int = 0) -> FloatDict:
"""Run membership inference attack(s).
Based only on specific outputs of a machine learning model on some examples
used for training (train) and some examples not used for training (test), run
membership inference attacks that try to discriminate training from test
inputs based only on the model outputs.
While all inputs are optional, at least one train/test pair is required to run
any attacks (either losses or logits/probabilities).
Note that one can equally provide output probabilities instead of logits in
the logits_train / logits_test arguments.
We measure the vulnerability of the model via the area under the ROC-curve
(auc) or via max |fpr - tpr| (advantage) of the attack classifier. These
measures are very closely related and may look almost indistinguishable.
This function provides relatively fine grained control and outputs detailed
results. For a higher-level wrapper with sane internal default settings and
distilled output results, see `run_all_attacks`.
Via the `figure_directory` argument and the `output_directory` argument more
detailed information as well as roc-curve plots can optionally be stored to
disk.
If `loss_train` and `loss_test` are provided we run:
- simple threshold attack on the loss
If `logits_train` and `logits_test` are provided we run:
- simple threshold attack on the top logit
- if `attack_classifiers` is not None and no losses are provided: train the
specified classifiers on the top 10 logits (or all logits if there are
less than 10)
- if `attack_classifiers` is not None and losses are provided: train the
specified classifiers on the top 10 logits (or all logits if there are
less than 10) and the loss
Args:
loss_train: A 1D array containing the individual scalar losses for examples
used during training.
loss_test: A 1D array containing the individual scalar losses for examples
not used during training.
logits_train: A 2D array (n_train, n_classes) of the individual logits or
output probabilities of examples used during training.
logits_test: A 2D array (n_test, n_classes) of the individual logits or
output probabilities of examples not used during training.
labels_train: The true labels of the training examples. Labels are only
needed when `by_class` is specified (i.e., not False).
labels_test: The true labels of the test examples. Labels are only needed
when `by_class` is specified (i.e., not False).
attack_classifiers: Attack classifiers to train beyond simple thresholding
that require training a simple binary ML classifier. This argument is
ignored if logits are not provided. Classifiers can be 'lr' for logistic
regression, 'mlp' for multi-layered perceptron, 'rf' for random forests,
or 'knn' for k-nearest-neighbors. If 'None', don't train classifiers
beyond simple thresholding.
only_misclassified: Run and evaluate attacks only on misclassified examples.
Must specify `labels_train`, `labels_test`, `logits_train` and
`logits_test` to use this. If this is True, `by_class` and `by_percentile`
are ignored.
by_class: This argument determines whether attacks are run on the entire
data, or on examples grouped by their class label. If `True`, all attacks
are run separately for each class. If `by_class` is a single integer, run
attacks for this class only. If `by_class` is an iterable of integers, run
all attacks for each of the specified class labels separately. Only used
if `labels_train` and `labels_test` are specified. If `by_class` is
specified (not False), `by_percentile` is ignored. Ignored if
`only_misclassified` is True.
by_percentile: This argument determines whether attacks are run on the
entire data, or separately for examples where the most likely class
prediction is within a given percentile of all maximum predicitons. If
`True`, all attacks are run separately for the examples with max
probabilities within the ten deciles. If `by_precentile` is a single int
between 0 and 100, run attacks only for examples with confidence within
this percentile. If `by_percentile` is an iterable of ints between 0 and
100, run all attacks for each of the specified percentiles separately.
Ignored if `by_class` is specified. Ignored if `logits_train` and
`logits_test` are not specified. Ignored if `only_misclassified` is True.
figure_directory: Where to store ROC-curve plots and histograms. If `None`,
don't create plots.
output_directory: Where to store detailed result data for all run attacks.
If `None`, don't store detailed result data.
metric: Available vulnerability metrics are 'auc' or 'advantage' for the
area under the ROC curve or the advantage (max |tpr - fpr|). Specify
either one of them or both.
balance: Whether to use the same number of train and test samples (by
randomly subsampling whichever happens to be larger).
test_size: The fraction of the input data to use for the evaluation of
trained ML attacks. This argument is ignored, if either attack_classifiers
is None, or no logits are provided.
random_state: Random seed for reproducibility. Only used if attack models
are trained.
Returns:
results: Dictionary with the chosen vulnerability metric(s) for all ran
attacks.
"""
print(
'Deprecation warning: function run_attack is '
'deprecated and will be removed soon. '
'Please use membership_inference_attack_new.run_attacks'
)
attacks = []
features = {}
# ---------- Check available data ----------
if ((loss_train is None or loss_test is None) and
(logits_train is None or logits_test is None)):
raise ValueError(
'Need at least train and test for loss or train and test for logits.')
# ---------- If losses are provided ----------
if loss_train is not None and loss_test is not None:
if loss_train.ndim != 1 or loss_test.ndim != 1:
raise ValueError('Losses must be 1D arrays.')
features['is_train'] = np.concatenate(
(np.ones(len(loss_train)), np.zeros(len(loss_test))),
axis=0).astype(int)
features['loss'] = np.concatenate((loss_train.ravel(), loss_test.ravel()),
axis=0)
attacks.append('thresh_loss')
# ---------- If logits are provided ----------
if logits_train is not None and logits_test is not None:
assert logits_train.ndim == 2 and logits_test.ndim == 2, \
'Logits must be 2D arrays.'
assert logits_train.shape[1] == logits_test.shape[1], \
'Train and test logits must agree along axis 1 (number of classes).'
if 'is_train' in features:
assert (loss_train.shape[0] == logits_train.shape[0] and
loss_test.shape[0] == logits_test.shape[0]), \
'Number of examples must match between loss and logits.'
else:
features['is_train'] = np.concatenate(
(np.ones(logits_train.shape[0]), np.zeros(logits_test.shape[0])),
axis=0).astype(int)
attacks.append('thresh_maxlogit')
features['logits'] = np.concatenate((logits_train, logits_test), axis=0)
if attack_classifiers:
attacks.append('logits')
if 'loss' in features:
attacks.append('logits_loss')
# ---------- If labels are provided ----------
if labels_train is not None and labels_test is not None:
if labels_train.ndim != 1 or labels_test.ndim != 1:
raise ValueError('Labels must be 1D arrays.')
if 'loss' in features:
assert (loss_train.shape[0] == labels_train.shape[0] and
loss_test.shape[0] == labels_test.shape[0]), \
'Number of examples must match between loss and labels.'
else:
assert (logits_train.shape[0] == labels_train.shape[0] and
logits_test.shape[0] == labels_test.shape[0]), \
'Number of examples must match between logits and labels.'
features['label'] = np.concatenate((labels_train, labels_test), axis=0)
# ---------- Data subsampling or filtering ----------
filtertype = None
filtervals = [None]
if only_misclassified:
if (labels_train is None or labels_test is None or logits_train is None or
logits_test is None):
raise ValueError('Must specify labels_train, labels_test, logits_train, '
'and logits_test for the only_misclassified option.')
filtertype = 'misclassified'
elif by_class:
if labels_train is None or labels_test is None:
raise ValueError('Must specify labels_train and labels_test when using '
'the by_class option.')
if isinstance(by_class, bool):
filtervals = list(set(labels_train) | set(labels_test))
elif isinstance(by_class, int):
filtervals = [by_class]
elif isinstance(by_class, collections.Iterable):
filtervals = list(by_class)
filtertype = 'class'
elif by_percentile:
if logits_train is None or logits_test is None:
raise ValueError('Must specify logits_train and logits_test when using '
'the by_percentile option.')
if isinstance(by_percentile, bool):
filtervals = list(range(10, 101, 10))
elif isinstance(by_percentile, int):
filtervals = [by_percentile]
elif isinstance(by_percentile, collections.Iterable):
filtervals = [int(percentile) for percentile in by_percentile]
filtertype = 'percentile'
# ---------- Need to create figure directory? ----------
if figure_directory is not None:
mkdir(figure_directory)
# ---------- Actually run attacks and plot if required ----------
logging.info('Selecting %s with values %s', filtertype, filtervals)
num = None
result = {}
for filterval in filtervals:
if filtertype is None:
tmp_features = features
elif filtertype == 'misclassified':
idx = features['label'] != np.argmax(features['logits'], axis=-1)
tmp_features = utils.select_indices(features, idx)
num = np.sum(idx)
elif filtertype == 'class':
idx = features['label'] == filterval
tmp_features = utils.select_indices(features, idx)
num = np.sum(idx)
elif filtertype == 'percentile':
certainty = np.max(special.softmax(features['logits'], axis=-1), axis=-1)
idx = certainty <= np.percentile(certainty, filterval)
tmp_features = utils.select_indices(features, idx)
prefix = f'{filtertype}_' if filtertype is not None else ''
prefix += f'{filterval}_' if filterval is not None else ''
tmp_result = _run_attacks_and_plot(tmp_features, attacks,
attack_classifiers, balance, test_size,
random_state, prefix, figure_directory)
if num is not None:
tmp_result['n_examples'] = float(num)
if tmp_result:
result.update(utils.prepend_to_keys(tmp_result, prefix))
# ---------- Store data ----------
if output_directory is not None:
mkdir(output_directory)
resultpath = os.path.join(output_directory, 'attack_results.npz')
logging.info('Store aggregate results at %s.', resultpath)
with open(resultpath, 'wb') as fp:
io_buffer = io.BytesIO()
np.savez(io_buffer, **result)
fp.write(io_buffer.getvalue())
return _get_vulnerabilities(result, metric)
def run_all_attacks(loss_train: np.ndarray = None,
loss_test: np.ndarray = None,
logits_train: np.ndarray = None,
logits_test: np.ndarray = None,
labels_train: np.ndarray = None,
labels_test: np.ndarray = None,
attack_classifiers: Iterable[Text] = ('lr', 'mlp', 'rf',
'knn'),
decimals: Union[int, None] = 4) -> FloatDict:
"""Runs all possible membership inference attacks.
Check 'run_attack' for detailed information of how attacks are performed
and evaluated.
This function internally chooses sane default settings for all attacks and
returns all possible output combinations.
For fine grained control and partial attacks, please see `run_attack`.
Args:
loss_train: A 1D array containing the individual scalar losses for examples
used during training.
loss_test: A 1D array containing the individual scalar losses for examples
not used during training.
logits_train: A 2D array (n_train, n_classes) of the individual logits or
output probabilities of examples used during training.
logits_test: A 2D array (n_test, n_classes) of the individual logits or
output probabilities of examples not used during training.
labels_train: The true labels of the training examples. Labels are only
needed when `by_class` is specified (i.e., not False).
labels_test: The true labels of the test examples. Labels are only needed
when `by_class` is specified (i.e., not False).
attack_classifiers: Which binary classifiers to train (in addition to simple
threshold attacks). This can include 'lr' (logistic regression), 'mlp'
(multi-layered perceptron), 'rf' (random forests), 'knn' (k-nearest
neighbors), which will be trained with cross validation to determine good
hyperparameters.
decimals: Round all float results to this number of decimals. If decimals is
None, don't round.
Returns:
result: dictionary with all attack results
"""
print(
'Deprecation warning: function run_all_attacks is '
'deprecated and will be removed soon. '
'Please use membership_inference_attack_new.run_attacks'
)
metrics = ['auc', 'advantage']
# Entire data
result = run_attack(
loss_train,
loss_test,
logits_train,
logits_test,
attack_classifiers=attack_classifiers,
metric=metrics)
result = utils.prepend_to_keys(result, 'all_')
# Misclassified examples
if (labels_train is not None and labels_test is not None and
logits_train is not None and logits_test is not None):
result.update(
run_attack(
loss_train,
loss_test,
logits_train,
logits_test,
labels_train,
labels_test,
attack_classifiers=attack_classifiers,
only_misclassified=True,
metric=metrics))
# Split per class
if labels_train is not None and labels_test is not None:
result.update(
run_attack(
loss_train,
loss_test,
logits_train,
logits_test,
labels_train,
labels_test,
by_class=True,
attack_classifiers=attack_classifiers,
metric=metrics))
# Different deciles
if logits_train is not None and logits_test is not None:
result.update(
run_attack(
loss_train,
loss_test,
logits_train,
logits_test,
by_percentile=True,
attack_classifiers=attack_classifiers,
metric=metrics))
if decimals is not None:
result = {k: round(v, decimals) for k, v in result.items()}
return result
def run_all_attacks_and_create_summary(
loss_train: np.ndarray = None,
loss_test: np.ndarray = None,
logits_train: np.ndarray = None,
logits_test: np.ndarray = None,
labels_train: np.ndarray = None,
labels_test: np.ndarray = None,
return_dict: bool = True,
decimals: Union[int, None] = 4) -> Union[Text, Tuple[Text, AnyDict]]:
"""Runs all possible membership inference attack(s) and distill results.
Check 'run_attack' for detailed information of how attacks are performed
and evaluated.
This function internally chooses sane default settings for all attacks and
returns all possible output combinations.
For fine grained control and partial attacks, please see `run_attack`.
Args:
loss_train: A 1D array containing the individual scalar losses for examples
used during training.
loss_test: A 1D array containing the individual scalar losses for examples
not used during training.
logits_train: A 2D array (n_train, n_classes) of the individual logits or
output probabilities of examples used during training.
logits_test: A 2D array (n_test, n_classes) of the individual logits or
output probabilities of examples not used during training.
labels_train: The true labels of the training examples. Labels are only
needed when `by_class` is specified (i.e., not False).
labels_test: The true labels of the test examples. Labels are only needed
when `by_class` is specified (i.e., not False).
return_dict: Whether to also return a dictionary with the results summarized
in the summary string.
decimals: Round all float results to this number of decimals. If decimals is
None, don't round.
Returns:
summarystring: A string with natural language summary of the attacks. In the
summary string printed numbers will be rounded to `decimal` decimals if
provided, otherwise will round to 3 diits by default for readability.
result: a dictionary with all the distilled attack information summarized
in the summarystring
"""
print(
'Deprecation warning: function run_all_attacks_and_create_summary is '
'deprecated and will be removed soon. '
'Please use membership_inference_attack_new.run_attacks'
)
summary = []
metrics = ['auc', 'advantage']
attack_classifiers = ['lr', 'knn']
results = run_all_attacks(
loss_train,
loss_test,
logits_train,
logits_test,
labels_train,
labels_test,
attack_classifiers=attack_classifiers,
decimals=None)
output = _get_maximum_vulnerability(results, metrics, filterby='all')
if decimals is not None:
strdec = decimals
else:
strdec = 4
for metric in metrics:
summary.append(f'========== {metric.upper()} ==========')
best_value = output['all-' + metric]['value']
best_attacker = output['all-' + metric]['attacker']
summary.append(f'The best attack ({best_attacker}) achieved an {metric} of '
f'{best_value:.{strdec}f}.')
summary.append('')
classgap = _get_maximum_class_gap_or_none(results, metrics)
if classgap:
output.update(classgap)
for metric in metrics:
summary.append(f'========== {metric.upper()} per class ==========')
hi_idx = output[f'max_vuln_class_{metric}']
lo_idx = output[f'min_vuln_class_{metric}']
hi = output[f'class_{hi_idx}_{metric}']
lo = output[f'class_{lo_idx}_{metric}']
gap = output[f'max_class_gap_{metric}']
summary.append(f'The most vulnerable class {hi_idx} has {metric} of '
f'{hi:.{strdec}f}.')
summary.append(f'The least vulnerable class {lo_idx} has {metric} of '
f'{lo:.{strdec}f}.')
summary.append(f'=> The maximum gap between class vulnerabilities is '
f'{gap:.{strdec}f}.')
summary.append('')
misclassified = _get_maximum_vulnerability(
results, metrics, filterby='misclassified')
if misclassified:
for metric in metrics:
best_value = misclassified['misclassified-' + metric]['value']
best_attacker = misclassified['misclassified-' + metric]['attacker']
summary.append(f'========== {metric.upper()} for misclassified '
'==========')
summary.append('Among misclassified examples, the best attack '
f'({best_attacker}) achieved an {metric} of '
f'{best_value:.{strdec}f}.')
summary.append('')
output.update(misclassified)
n_examples = {k: v for k, v in results.items() if k.endswith('n_examples')}
if n_examples:
output.update(n_examples)
# Flatten remaining dicts in output
fresh_output = {}
for k, v in output.items():
if isinstance(v, dict):
if k.startswith('all'):
fresh_output[k[4:]] = v['value']
fresh_output['best_attacker_' + k[4:]] = v['attacker']
else:
fresh_output[k] = v
output = fresh_output
if decimals is not None:
for k, v in output.items():
if isinstance(v, float):
output[k] = round(v, decimals)
summary = '\n'.join(summary)
if return_dict:
return summary, output
else:
return summary

View file

@ -1,307 +0,0 @@
# Copyright 2020, The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for tensorflow_privacy.privacy.membership_inference_attack.utils."""
from absl.testing import absltest
import numpy as np
from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia
def get_result_dict():
"""Get an example result dictionary."""
return {
'test_n_examples': np.ones(1),
'test_examples': np.zeros(1),
'test_auc': np.ones(1),
'test_advantage': np.ones(1),
'all_0-metric': np.array([1]),
'all_1-metric': np.array([2]),
'test_2-metric': np.array([3]),
'test_score': np.array([4]),
}
def get_test_inputs():
"""Get example inputs for attacks."""
n_train = n_test = 500
rng = np.random.RandomState(4)
loss_train = rng.randn(n_train) - 0.4
loss_test = rng.randn(n_test) + 0.4
logits_train = rng.randn(n_train, 5) + 0.2
logits_test = rng.randn(n_test, 5) - 0.2
labels_train = np.array([i % 5 for i in range(n_train)])
labels_test = np.array([(3 * i) % 5 for i in range(n_test)])
return (loss_train, loss_test, logits_train, logits_test,
labels_train, labels_test)
class GetVulnerabilityTest(absltest.TestCase):
def test_get_vulnerabilities(self):
"""Test extraction of vulnerability scores."""
testdict = get_result_dict()
for key in ['auc', 'advantage']:
res = mia._get_vulnerabilities(testdict, key)
self.assertLen(res, 2)
self.assertEqual(res[f'test_{key}'], 1)
self.assertEqual(res['test_n_examples'], 1)
res = mia._get_vulnerabilities(testdict, ['auc', 'advantage'])
self.assertLen(res, 3)
self.assertEqual(res['test_auc'], 1)
self.assertEqual(res['test_advantage'], 1)
self.assertEqual(res['test_n_examples'], 1)
class GetMaximumVulnerabilityTest(absltest.TestCase):
def test_get_maximum_vulnerability(self):
"""Test extraction of maximum vulnerability score."""
testdict = get_result_dict()
for i in range(3):
key = f'{i}-metric'
res = mia._get_maximum_vulnerability(testdict, key)
self.assertLen(res, 1)
self.assertEqual(res[key]['value'], i + 1)
if i < 2:
self.assertEqual(res[key]['attacker'], f'all_{i}-metric')
else:
self.assertEqual(res[key]['attacker'], 'test_2-metric')
res = mia._get_maximum_vulnerability(testdict, 'metric')
self.assertLen(res, 1)
self.assertEqual(res['metric']['value'], 3)
res = mia._get_maximum_vulnerability(testdict, ['metric'],
filterby='all')
self.assertLen(res, 1)
self.assertEqual(res['all-metric']['value'], 2)
res = mia._get_maximum_vulnerability(testdict, ['metric', 'score'])
self.assertLen(res, 2)
self.assertEqual(res['metric']['value'], 3)
self.assertEqual(res['score']['value'], 4)
self.assertEqual(res['score']['attacker'], 'test_score')
class ThresholdAttackLossTest(absltest.TestCase):
def test_threshold_attack_loss(self):
"""Test simple threshold attack on loss."""
features = {
'loss': np.zeros(10),
'is_train': np.concatenate((np.zeros(5), np.ones(5))),
}
res = mia._run_threshold_loss_attack(features)
for k in res:
self.assertStartsWith(k, 'thresh_loss')
self.assertEqual(res['thresh_loss_auc'], 0.5)
self.assertEqual(res['thresh_loss_advantage'], 0.0)
rng = np.random.RandomState(4)
n_train = 1000
n_test = 500
loss_train = rng.randn(n_train) - 0.4
loss_test = rng.randn(n_test) + 0.4
features = {
'loss': np.concatenate((loss_train, loss_test)),
'is_train': np.concatenate((np.ones(n_train), np.zeros(n_test))),
}
res = mia._run_threshold_loss_attack(features)
self.assertBetween(res['thresh_loss_auc'], 0.7, 0.75)
self.assertBetween(res['thresh_loss_advantage'], 0.3, 0.35)
class ThresholdAttackMaxlogitTest(absltest.TestCase):
def test_threshold_attack_maxlogits(self):
"""Test simple threshold attack on maximum logit."""
features = {
'logits': np.eye(10, 14),
'is_train': np.concatenate((np.zeros(5), np.ones(5))),
}
res = mia._run_threshold_attack_maxlogit(features)
for k in res:
self.assertStartsWith(k, 'thresh_maxlogit')
self.assertEqual(res['thresh_maxlogit_auc'], 0.5)
self.assertEqual(res['thresh_maxlogit_advantage'], 0.0)
rng = np.random.RandomState(4)
n_train = 1000
n_test = 500
logits_train = rng.randn(n_train, 12) + 0.2
logits_test = rng.randn(n_test, 12) - 0.2
features = {
'logits': np.concatenate((logits_train, logits_test), axis=0),
'is_train': np.concatenate((np.ones(n_train), np.zeros(n_test))),
}
res = mia._run_threshold_attack_maxlogit(features)
self.assertBetween(res['thresh_maxlogit_auc'], 0.7, 0.75)
self.assertBetween(res['thresh_maxlogit_advantage'], 0.3, 0.35)
class TrainedAttackTrivialTest(absltest.TestCase):
def test_trained_attack(self):
"""Test trained attacks."""
# Trivially easy problem
x_train, x_test = np.ones((500, 3)), np.ones((20, 3))
x_train[:200] *= -1
x_test[:8] *= -1
y_train, y_test = np.ones(500).astype(int), np.ones(20).astype(int)
y_train[:200] = 0
y_test[:8] = 0
data = (x_train, y_train), (x_test, y_test)
for clf in ['lr', 'rf', 'mlp', 'knn']:
res = mia._run_trained_attack(clf, data, attack_prefix='a-')
self.assertEqual(res['a-train_auc'], 1)
self.assertEqual(res['a-test_auc'], 1)
self.assertEqual(res['a-train_advantage'], 1)
self.assertEqual(res['a-test_advantage'], 1)
class TrainedAttackRandomFeaturesTest(absltest.TestCase):
def test_trained_attack(self):
"""Test trained attacks."""
# Random labels and features
rng = np.random.RandomState(4)
x_train, x_test = rng.randn(500, 3), rng.randn(500, 3)
y_train = rng.binomial(1, 0.5, size=(500,))
y_test = rng.binomial(1, 0.5, size=(500,))
data = (x_train, y_train), (x_test, y_test)
for clf in ['lr', 'rf', 'mlp', 'knn']:
res = mia._run_trained_attack(clf, data, attack_prefix='a-')
self.assertBetween(res['a-train_auc'], 0.5, 1.)
self.assertBetween(res['a-test_auc'], 0.4, 0.6)
self.assertBetween(res['a-train_advantage'], 0., 1.0)
self.assertBetween(res['a-test_advantage'], 0., 0.2)
class AttackLossesTest(absltest.TestCase):
def test_attack(self):
"""Test individual attack function."""
# losses only, both metrics
loss_train, loss_test, _, _, _, _ = get_test_inputs()
res = mia.run_attack(loss_train, loss_test, metric=('auc', 'advantage'))
self.assertBetween(res['thresh_loss_auc'], 0.7, 0.75)
self.assertBetween(res['thresh_loss_advantage'], 0.3, 0.35)
class AttackLossesLogitsTest(absltest.TestCase):
def test_attack(self):
"""Test individual attack function."""
# losses and logits, two classifiers, single metric
loss_train, loss_test, logits_train, logits_test, _, _ = get_test_inputs()
res = mia.run_attack(
loss_train,
loss_test,
logits_train,
logits_test,
attack_classifiers=('rf', 'knn'),
metric='auc')
self.assertBetween(res['rf_logits_test_auc'], 0.7, 0.9)
self.assertBetween(res['knn_logits_test_auc'], 0.7, 0.9)
self.assertBetween(res['rf_logits_loss_test_auc'], 0.7, 0.9)
self.assertBetween(res['knn_logits_loss_test_auc'], 0.7, 0.9)
class AttackLossesLabelsByClassTest(absltest.TestCase):
def test_attack(self):
# losses and labels, single metric, split by class
loss_train, loss_test, _, _, labels_train, labels_test = get_test_inputs()
n_train = loss_train.shape[0]
n_test = loss_test.shape[0]
res = mia.run_attack(
loss_train,
loss_test,
labels_train=labels_train,
labels_test=labels_test,
by_class=True,
metric='auc')
self.assertLen(res, 10)
for k in res:
self.assertStartsWith(k, 'class_')
if k.endswith('n_examples'):
self.assertEqual(int(res[k]), (n_train + n_test) // 5)
else:
self.assertBetween(res[k], 0.65, 0.75)
class AttackLossesLabelsSingleClassTest(absltest.TestCase):
def test_attack(self):
# losses and labels, both metrics, single class
loss_train, loss_test, _, _, labels_train, labels_test = get_test_inputs()
n_train = loss_train.shape[0]
n_test = loss_test.shape[0]
res = mia.run_attack(
loss_train,
loss_test,
labels_train=labels_train,
labels_test=labels_test,
by_class=2,
metric=('auc', 'advantage'))
self.assertLen(res, 3)
for k in res:
self.assertStartsWith(k, 'class_2')
if k.endswith('n_examples'):
self.assertEqual(int(res[k]), (n_train + n_test) // 5)
elif k.endswith('advantage'):
self.assertBetween(res[k], 0.3, 0.5)
elif k.endswith('auc'):
self.assertBetween(res[k], 0.7, 0.75)
class AttackLogitsLabelsMisclassifiedTest(absltest.TestCase):
def test_attack(self):
# logits and labels, single metric, single classifier, misclassified only
(_, _, logits_train, logits_test,
labels_train, labels_test) = get_test_inputs()
res = mia.run_attack(
logits_train=logits_train,
logits_test=logits_test,
labels_train=labels_train,
labels_test=labels_test,
only_misclassified=True,
attack_classifiers=('lr',),
metric='advantage')
self.assertBetween(res['misclassified_lr_logits_test_advantage'], 0.3, 0.8)
self.assertEqual(res['misclassified_n_examples'], 802)
class AttackLogitsByPrecentileTest(absltest.TestCase):
def test_attack(self):
# only logits, single metric, no classifiers, split by deciles
_, _, logits_train, logits_test, _, _ = get_test_inputs()
res = mia.run_attack(
logits_train=logits_train,
logits_test=logits_test,
by_percentile=True,
metric='auc')
for k in res:
self.assertStartsWith(k, 'percentile')
self.assertBetween(res[k], 0.60, 0.75)
if __name__ == '__main__':
absltest.main()

View file

@ -1,106 +0,0 @@
# Copyright 2020, The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
r"""A collection of sklearn models for binary classification.
This module contains some sklearn pipelines for finding models for binary
classification from a variable number of numerical input features.
These models are used to train binary classifiers for membership inference.
"""
from typing import Text
import numpy as np
from sklearn import ensemble
from sklearn import linear_model
from sklearn import model_selection
from sklearn import neighbors
from sklearn import neural_network
def choose_model(attack_classifier: Text):
"""Choose a model based on a string classifier."""
if attack_classifier == 'lr':
return logistic_regression()
elif attack_classifier == 'mlp':
return mlp()
elif attack_classifier == 'rf':
return random_forest()
elif attack_classifier == 'knn':
return knn()
else:
raise ValueError(f'Unknown attack classifier {attack_classifier}.')
def logistic_regression(verbose: int = 0, n_jobs: int = 1):
"""Setup a logistic regression pipeline with cross-validation."""
lr = linear_model.LogisticRegression(solver='lbfgs')
param_grid = {
'C': np.logspace(-4, 2, 10),
}
pipe = model_selection.GridSearchCV(
lr, param_grid=param_grid, cv=3, n_jobs=n_jobs, iid=False,
verbose=verbose)
return pipe
def random_forest(verbose: int = 0, n_jobs: int = 1):
"""Setup a random forest pipeline with cross-validation."""
rf = ensemble.RandomForestClassifier()
n_estimators = [100]
max_features = ['auto', 'sqrt']
max_depth = [5, 10, 20]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
random_grid = {'n_estimators': n_estimators,
'max_features': max_features,
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf}
pipe = model_selection.RandomizedSearchCV(
rf, param_distributions=random_grid, n_iter=7, cv=3, n_jobs=n_jobs,
iid=False, verbose=verbose)
return pipe
def mlp(verbose: int = 0, n_jobs: int = 1):
"""Setup a MLP pipeline with cross-validation."""
mlpmodel = neural_network.MLPClassifier()
param_grid = {
'hidden_layer_sizes': [(64,), (32, 32)],
'solver': ['adam'],
'alpha': [0.0001, 0.001, 0.01],
}
pipe = model_selection.GridSearchCV(
mlpmodel, param_grid=param_grid, cv=3, n_jobs=n_jobs, iid=False,
verbose=verbose)
return pipe
def knn(verbose: int = 0, n_jobs: int = 1):
"""Setup a k-nearest neighbors pipeline with cross-validation."""
knnmodel = neighbors.KNeighborsClassifier()
param_grid = {
'n_neighbors': [3, 5, 7],
}
pipe = model_selection.GridSearchCV(
knnmodel, param_grid=param_grid, cv=3, n_jobs=n_jobs, iid=False,
verbose=verbose)
return pipe