forked from 626_privacy/tensorflow_privacy
Adds the TF Privacy Report codelab.
PiperOrigin-RevId: 338222024
This commit is contained in:
parent
4143957701
commit
1981ebe2f2
2 changed files with 404 additions and 2 deletions
|
@ -0,0 +1,401 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "1eiwVljWpzM7"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Copyright 2020 The TensorFlow Authors.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"cellView": "both",
|
||||||
|
"id": "4rmwPgXeptiS"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
|
||||||
|
"# you may not use this file except in compliance with the License.\n",
|
||||||
|
"# You may obtain a copy of the License at\n",
|
||||||
|
"#\n",
|
||||||
|
"# https://www.apache.org/licenses/LICENSE-2.0\n",
|
||||||
|
"#\n",
|
||||||
|
"# Unless required by applicable law or agreed to in writing, software\n",
|
||||||
|
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
|
||||||
|
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
|
||||||
|
"# See the License for the specific language governing permissions and\n",
|
||||||
|
"# limitations under the License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "YM2gRaJMqvMi"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"# Assess privacy risks with the TensorFlow Privacy Report"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "7oUAMMc6isck"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
|
||||||
|
" \u003ctd\u003e\n",
|
||||||
|
" \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/membership_inference_attack/codelabs/privacy_report_codelab.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
|
||||||
|
" \u003c/td\u003e\n",
|
||||||
|
" \u003ctd\u003e\n",
|
||||||
|
" \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/membership_inference_attack/codelabs/privacy_report_codelab.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
|
||||||
|
" \u003c/td\u003e\n",
|
||||||
|
"\u003c/table\u003e"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "9rMuytY7Nn8P"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"##Overview\n",
|
||||||
|
"In this codelab we'll train a simple image classification model on the CIFAR10 dataset, and then use the \"membership inference attack\" against this model to assess if the attacker is able to \"guess\" whether a particular sample was present in the training set. We will use the TF Privacy Report to visualize results from multiple models and model checkpoints."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "FUWqArj_q8vs"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Setup\n",
|
||||||
|
"First, set this notebook's runtime to use a GPU, under Runtime \u003e Change runtime type \u003e Hardware accelerator. Then, begin importing the necessary libraries."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"cellView": "form",
|
||||||
|
"id": "Lr1pwHcbralz"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#@title Import statements.\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"from typing import Tuple, Text\n",
|
||||||
|
"from scipy import special\n",
|
||||||
|
"from sklearn import metrics\n",
|
||||||
|
"\n",
|
||||||
|
"import tensorflow as tf\n",
|
||||||
|
"import tensorflow_datasets as tfds\n",
|
||||||
|
"\n",
|
||||||
|
"# Set verbosity.\n",
|
||||||
|
"tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)\n",
|
||||||
|
"from warnings import simplefilter\n",
|
||||||
|
"from sklearn.exceptions import ConvergenceWarning\n",
|
||||||
|
"simplefilter(action=\"ignore\", category=ConvergenceWarning)\n",
|
||||||
|
"simplefilter(action=\"ignore\", category=FutureWarning)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "ucw81ar6ru-6"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### Install TensorFlow Privacy."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"cellView": "both",
|
||||||
|
"id": "zcqAmiGH90kl"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"!pip3 install git+https://github.com/tensorflow/privacy\n",
|
||||||
|
"\n",
|
||||||
|
"from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia\n",
|
||||||
|
"from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData\n",
|
||||||
|
"from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResultsCollection\n",
|
||||||
|
"from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType\n",
|
||||||
|
"from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyMetric\n",
|
||||||
|
"from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata\n",
|
||||||
|
"from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec\n",
|
||||||
|
"from tensorflow_privacy.privacy.membership_inference_attack import privacy_report"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "pBbcG86th_sW"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Train a model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"cellView": "both",
|
||||||
|
"id": "vCyOWyyhXLib"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#@markdown Train a simple model on CIFAR10 with Keras.\n",
|
||||||
|
"\n",
|
||||||
|
"dataset = 'cifar10'\n",
|
||||||
|
"num_classes = 10\n",
|
||||||
|
"activation = 'relu'\n",
|
||||||
|
"lr = 0.02\n",
|
||||||
|
"momentum = 0.9\n",
|
||||||
|
"batch_size = 250\n",
|
||||||
|
"epochs = 50 # Privacy risks are especially visible with lots of epochs.\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def small_cnn(input_shape: Tuple[int],\n",
|
||||||
|
" num_classes: int,\n",
|
||||||
|
" num_conv: int,\n",
|
||||||
|
" activation: Text = 'relu') -\u003e tf.keras.models.Sequential:\n",
|
||||||
|
" \"\"\"Setup a small CNN for image classification.\n",
|
||||||
|
"\n",
|
||||||
|
" Args:\n",
|
||||||
|
" input_shape: Integer tuple for the shape of the images.\n",
|
||||||
|
" num_classes: Number of prediction classes.\n",
|
||||||
|
" num_conv: Number of convolutional layers.\n",
|
||||||
|
" activation: The activation function to use for conv and dense layers.\n",
|
||||||
|
"\n",
|
||||||
|
" Returns:\n",
|
||||||
|
" The Keras model.\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" model = tf.keras.models.Sequential()\n",
|
||||||
|
" model.add(tf.keras.layers.Input(shape=input_shape))\n",
|
||||||
|
"\n",
|
||||||
|
" # Conv layers\n",
|
||||||
|
" for _ in range(num_conv):\n",
|
||||||
|
" model.add(tf.keras.layers.Conv2D(32, (3, 3), activation=activation))\n",
|
||||||
|
" model.add(tf.keras.layers.MaxPooling2D())\n",
|
||||||
|
"\n",
|
||||||
|
" model.add(tf.keras.layers.Flatten())\n",
|
||||||
|
" model.add(tf.keras.layers.Dense(64, activation=activation))\n",
|
||||||
|
" model.add(tf.keras.layers.Dense(num_classes))\n",
|
||||||
|
" return model\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"print('Loading the dataset.')\n",
|
||||||
|
"train_ds = tfds.as_numpy(\n",
|
||||||
|
" tfds.load(dataset, split=tfds.Split.TRAIN, batch_size=-1))\n",
|
||||||
|
"test_ds = tfds.as_numpy(\n",
|
||||||
|
" tfds.load(dataset, split=tfds.Split.TEST, batch_size=-1))\n",
|
||||||
|
"x_train = train_ds['image'].astype('float32') / 255.\n",
|
||||||
|
"y_train_indices = train_ds['label'][:, np.newaxis]\n",
|
||||||
|
"x_test = test_ds['image'].astype('float32') / 255.\n",
|
||||||
|
"y_test_indices = test_ds['label'][:, np.newaxis]\n",
|
||||||
|
"\n",
|
||||||
|
"# Convert class vectors to binary class matrices.\n",
|
||||||
|
"y_train = tf.keras.utils.to_categorical(y_train_indices, num_classes)\n",
|
||||||
|
"y_test = tf.keras.utils.to_categorical(y_test_indices, num_classes)\n",
|
||||||
|
"\n",
|
||||||
|
"input_shape = x_train.shape[1:]\n",
|
||||||
|
"\n",
|
||||||
|
"three_layer_model = small_cnn(\n",
|
||||||
|
" input_shape, num_classes, num_conv=3, activation=activation)\n",
|
||||||
|
"optimizer = tf.keras.optimizers.SGD(lr=lr, momentum=momentum)\n",
|
||||||
|
"loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)\n",
|
||||||
|
"three_layer_model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])\n",
|
||||||
|
"\n",
|
||||||
|
"two_layer_model = small_cnn(\n",
|
||||||
|
" input_shape, num_classes, num_conv=2, activation=activation)\n",
|
||||||
|
"two_layer_model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])\n",
|
||||||
|
"\n",
|
||||||
|
"epoch_results = AttackResultsCollection([])\n",
|
||||||
|
"num_epochs = 5\n",
|
||||||
|
"models = {\n",
|
||||||
|
" 'two layer model': two_layer_model,\n",
|
||||||
|
" 'three layer model': three_layer_model,\n",
|
||||||
|
"}\n",
|
||||||
|
"for model_name, model in models.items():\n",
|
||||||
|
" # Incrementally train the model and store privacy metrics every num_epochs.\n",
|
||||||
|
" for i in range(10):\n",
|
||||||
|
" model.fit(\n",
|
||||||
|
" x_train,\n",
|
||||||
|
" y_train,\n",
|
||||||
|
" batch_size=batch_size,\n",
|
||||||
|
" epochs=num_epochs,\n",
|
||||||
|
" validation_data=(x_test, y_test),\n",
|
||||||
|
" shuffle=True)\n",
|
||||||
|
"\n",
|
||||||
|
" logits_train = model.predict(x_train, batch_size=batch_size)\n",
|
||||||
|
" logits_test = model.predict(x_test, batch_size=batch_size)\n",
|
||||||
|
"\n",
|
||||||
|
" prob_train = special.softmax(logits_train, axis=1)\n",
|
||||||
|
" prob_test = special.softmax(logits_test, axis=1)\n",
|
||||||
|
"\n",
|
||||||
|
" # Add metadata to generate a privacy report.\n",
|
||||||
|
" privacy_report_metadata = PrivacyReportMetadata(\n",
|
||||||
|
" accuracy_train=metrics.accuracy_score(y_train_indices,\n",
|
||||||
|
" np.argmax(prob_train, axis=1)),\n",
|
||||||
|
" accuracy_test=metrics.accuracy_score(y_test_indices,\n",
|
||||||
|
" np.argmax(prob_test, axis=1)),\n",
|
||||||
|
" epoch_num=num_epochs * i,\n",
|
||||||
|
" model_variant_label=model_name)\n",
|
||||||
|
"\n",
|
||||||
|
" attack_results = mia.run_attacks(\n",
|
||||||
|
" AttackInputData(\n",
|
||||||
|
" labels_train=np.asarray([x[0] for x in y_train_indices]),\n",
|
||||||
|
" labels_test=np.asarray([x[0] for x in y_test_indices]),\n",
|
||||||
|
" probs_train=prob_train,\n",
|
||||||
|
" probs_test=prob_test),\n",
|
||||||
|
" SlicingSpec(entire_dataset=True, by_class=True),\n",
|
||||||
|
" attack_types=(AttackType.THRESHOLD_ATTACK,\n",
|
||||||
|
" AttackType.LOGISTIC_REGRESSION),\n",
|
||||||
|
" privacy_report_metadata=privacy_report_metadata)\n",
|
||||||
|
" epoch_results.append(attack_results)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "0snqR0Gbv3qk"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Load attack results\n",
|
||||||
|
"We can load attack results from the model above or replace the filepath with our own results."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "UTZwiCAJt0R6"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"loaded_results = epoch_results\n",
|
||||||
|
"# Or load your own via\n",
|
||||||
|
"#loaded_results = AttackResultsCollection.load(filepath)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "6mBEYh4utxiR"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Epoch Plots\n",
|
||||||
|
"\n",
|
||||||
|
"We can visualize how privacy risks happen as we train models. By probing the model periodically (e.g. every 10 epochs), we can pick the point in time with the best performance / privacy trade-off.\n",
|
||||||
|
"\n",
|
||||||
|
"We use the TF Privacy Membership Inference Attack module to generate AttackResults. These AttackResults get combined into an AttackResultsCollection. The TF Privacy Report is designed to analyze the provided AttackResultsCollection."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"cellView": "both",
|
||||||
|
"id": "o7T8n0ffv3qo"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"privacy_metrics = (PrivacyMetric.AUC, PrivacyMetric.ATTACKER_ADVANTAGE)\n",
|
||||||
|
"epoch_plot = privacy_report.plot_by_epochs(\n",
|
||||||
|
" loaded_results, privacy_metrics=privacy_metrics)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "ijjwGgyixsFg"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"We see that as a rule, privacy vulnerability tends to increase as the number of epochs goes up. This is true across model variants as well as different attacker types.\n",
|
||||||
|
"\n",
|
||||||
|
"Two layer models (with fewer convolutional layers) are generally more vulnerable than their three layer model counterparts.\n",
|
||||||
|
"\n",
|
||||||
|
"Now let's see how model performance changes with respect to privacy risk."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "GbtlV-2Xu8s-"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Privacy vs Utility"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "Lt6fXGoivLH1"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"privacy_metrics = (PrivacyMetric.AUC, PrivacyMetric.ATTACKER_ADVANTAGE)\n",
|
||||||
|
"utility_privacy_plot = privacy_report.plot_privacy_vs_accuracy(\n",
|
||||||
|
" loaded_results, privacy_metrics=privacy_metrics)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "m_6vg3pBPoyy"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Three layer models (perhaps due to too many parameters) only achieve a train accuracy of 0.85. The two layer models achieve roughly equal performance for that level of privacy risk but they continue to get better accuracy.\n",
|
||||||
|
"\n",
|
||||||
|
"We can also see how the line for two layer models gets steeper. This means that additional marginal gains in train accuracy come at an expense of vast privacy vulnerabilities."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "7u3BAg87v3qv"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"This is the end of the colab!\n",
|
||||||
|
"Feel free to analyze your own results."
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"collapsed_sections": [],
|
||||||
|
"last_runtime": {
|
||||||
|
"build_target": "//learning/deepmind/public/tools/ml_python:ml_notebook",
|
||||||
|
"kind": "private"
|
||||||
|
},
|
||||||
|
"name": "TF Privacy Report codelab",
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 0
|
||||||
|
}
|
|
@ -109,7 +109,7 @@ def _generate_subplots(all_results_df: pd.DataFrame, x_axis_metric: str,
|
||||||
privacy_metrics: Iterable[PrivacyMetric]):
|
privacy_metrics: Iterable[PrivacyMetric]):
|
||||||
"""Create one subplot per privacy metric for a specified x_axis_metric."""
|
"""Create one subplot per privacy metric for a specified x_axis_metric."""
|
||||||
fig, axes = plt.subplots(
|
fig, axes = plt.subplots(
|
||||||
1, len(privacy_metrics), figsize=(5 * len(privacy_metrics), 5))
|
1, len(privacy_metrics), figsize=(5 * len(privacy_metrics) + 3, 5))
|
||||||
# Set a title for the entire group of subplots.
|
# Set a title for the entire group of subplots.
|
||||||
fig.suptitle(figure_title)
|
fig.suptitle(figure_title)
|
||||||
if len(privacy_metrics) == 1:
|
if len(privacy_metrics) == 1:
|
||||||
|
@ -122,9 +122,10 @@ def _generate_subplots(all_results_df: pd.DataFrame, x_axis_metric: str,
|
||||||
sorted_label_results = single_label_results.sort_values(x_axis_metric)
|
sorted_label_results = single_label_results.sort_values(x_axis_metric)
|
||||||
axes[i].plot(sorted_label_results[x_axis_metric],
|
axes[i].plot(sorted_label_results[x_axis_metric],
|
||||||
sorted_label_results[str(privacy_metric)])
|
sorted_label_results[str(privacy_metric)])
|
||||||
axes[i].legend(legend_labels, loc='lower right')
|
|
||||||
axes[i].set_xlabel(x_axis_metric)
|
axes[i].set_xlabel(x_axis_metric)
|
||||||
axes[i].set_title('%s for %s' % (privacy_metric, ENTIRE_DATASET_SLICE_STR))
|
axes[i].set_title('%s for %s' % (privacy_metric, ENTIRE_DATASET_SLICE_STR))
|
||||||
|
plt.legend(legend_labels, loc='upper left', bbox_to_anchor=(1.02, 1))
|
||||||
|
fig.tight_layout(rect=[0, 0, 1, 0.93]) # Leave space for suptitle.
|
||||||
|
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue