From b19e0b197a01286fd552f6fffac13339691453e2 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 9 Aug 2021 15:38:17 -0700 Subject: [PATCH] Implement the membership inference attach using a keras-callback. PiperOrigin-RevId: 389741018 --- g3doc/tutorials/privacy_report.ipynb | 166 ++++++++++++++++----------- 1 file changed, 102 insertions(+), 64 deletions(-) diff --git a/g3doc/tutorials/privacy_report.ipynb b/g3doc/tutorials/privacy_report.ipynb index 1e89828..e6ff428 100644 --- a/g3doc/tutorials/privacy_report.ipynb +++ b/g3doc/tutorials/privacy_report.ipynb @@ -95,7 +95,6 @@ "from sklearn import metrics\n", "\n", "import tensorflow as tf\n", - "tf.compat.v1.disable_v2_behavior()\n", "\n", "import tensorflow_datasets as tfds\n", "\n", @@ -137,14 +136,25 @@ }, "outputs": [], "source": [ - "from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack as mia\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResultsCollection\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyMetric\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata\n", - "from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec\n", - "from tensorflow_privacy.privacy.membership_inference_attack import privacy_report" + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResultsCollection\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import PrivacyMetric\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import PrivacyReportMetadata\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec\n", + "from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import privacy_report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VpOdtnbPbPXE" + }, + "outputs": [], + "source": [ + "import tensorflow_privacy" ] }, { @@ -171,13 +181,13 @@ "dataset = 'cifar10'\n", "num_classes = 10\n", "activation = 'relu'\n", - "lr = 0.02\n", - "momentum = 0.9\n", - "batch_size = 250\n", - "epochs_per_report = 5\n", - "num_reports = 10\n", - "# Privacy risks are especially visible with lots of epochs.\n", - "total_epochs = epochs_per_report*num_reports " + "num_conv = 3\n", + "\n", + "batch_size=50\n", + "epochs_per_report = 2\n", + "total_epochs = 50\n", + "\n", + "lr = 0.001" ] }, { @@ -197,7 +207,7 @@ }, "outputs": [], "source": [ - "#@title Load the data\n", + "#@title\n", "print('Loading the dataset.')\n", "train_ds = tfds.as_numpy(\n", " tfds.load(dataset, split=tfds.Split.TRAIN, batch_size=-1))\n", @@ -212,7 +222,9 @@ "y_train = tf.keras.utils.to_categorical(y_train_indices, num_classes)\n", "y_test = tf.keras.utils.to_categorical(y_test_indices, num_classes)\n", "\n", - "input_shape = x_train.shape[1:]" + "input_shape = x_train.shape[1:]\n", + "\n", + "assert x_train.shape[0] % batch_size == 0, \"The tensorflow_privacy optimizer doesn't handle partial batches\"" ] }, { @@ -232,7 +244,7 @@ }, "outputs": [], "source": [ - "#@title Define the models\n", + "#@title\n", "def small_cnn(input_shape: Tuple[int],\n", " num_classes: int,\n", " num_conv: int,\n", @@ -259,7 +271,13 @@ " model.add(tf.keras.layers.Flatten())\n", " model.add(tf.keras.layers.Dense(64, activation=activation))\n", " model.add(tf.keras.layers.Dense(num_classes))\n", - " return model\n" + " \n", + " model.compile(\n", + " loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),\n", + " optimizer=tf.keras.optimizers.Adam(learning_rate=lr),\n", + " metrics=['accuracy'])\n", + "\n", + " return model" ] }, { @@ -268,7 +286,9 @@ "id": "hs0Smn24Dty-" }, "source": [ - "Build two-layer and a three-layer CNN models using that function. Again there's nothing provacy specific about this code. It uses standard models, layers, losses, and optimizers." + "Build two three-layer CNN models using that function.\n", + "\n", + "Configure the first to use a basic SGD optimizer, an the second to use a differentially private optimizer (`tf_privacy.DPKerasAdamOptimizer`), so you can compare the results." ] }, { @@ -279,16 +299,10 @@ }, "outputs": [], "source": [ - "optimizer = tf.keras.optimizers.SGD(lr=lr, momentum=momentum)\n", - "loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)\n", - "\n", - "three_layer_model = small_cnn(\n", - " input_shape, num_classes, num_conv=3, activation=activation)\n", - "three_layer_model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])\n", - "\n", - "two_layer_model = small_cnn(\n", + "model_2layers = small_cnn(\n", " input_shape, num_classes, num_conv=2, activation=activation)\n", - "two_layer_model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])" + "model_3layers = small_cnn(\n", + " input_shape, num_classes, num_conv=3, activation=activation)" ] }, { @@ -318,42 +332,42 @@ " def __init__(self, epochs_per_report, model_name):\n", " self.epochs_per_report = epochs_per_report\n", " self.model_name = model_name\n", - " self.epochs = []\n", - " self.attack_results = [] \n", + " self.attack_results = []\n", + "\n", + " def on_epoch_end(self, epoch, logs=None):\n", + " epoch = epoch+1\n", "\n", - " def on_epoch_end(self, n, logs=None):\n", - " epoch = n + 1\n", " if epoch % self.epochs_per_report != 0:\n", " return\n", - " \n", - " print(f\"\\nRunning privacy report for epoch: {epoch}\")\n", - " self.epochs.append(epoch)\n", "\n", - " logits_train = model.predict(x_train, batch_size=batch_size)\n", - " logits_test = model.predict(x_test, batch_size=batch_size)\n", + " print(f'\\nRunning privacy report for epoch: {epoch}\\n')\n", + "\n", + " logits_train = self.model.predict(x_train, batch_size=batch_size)\n", + " logits_test = self.model.predict(x_test, batch_size=batch_size)\n", "\n", " prob_train = special.softmax(logits_train, axis=1)\n", " prob_test = special.softmax(logits_test, axis=1)\n", "\n", " # Add metadata to generate a privacy report.\n", " privacy_report_metadata = PrivacyReportMetadata(\n", - " accuracy_train=metrics.accuracy_score(y_train_indices,\n", - " np.argmax(prob_train, axis=1)),\n", - " accuracy_test=metrics.accuracy_score(y_test_indices,\n", - " np.argmax(prob_test, axis=1)),\n", + " # Show the validation accuracy on the plot\n", + " # It's what you send to train_accuracy that gets plotted.\n", + " accuracy_train=logs['val_accuracy'], \n", + " accuracy_test=logs['val_accuracy'],\n", " epoch_num=epoch,\n", " model_variant_label=self.model_name)\n", "\n", " attack_results = mia.run_attacks(\n", " AttackInputData(\n", - " labels_train=np.asarray([x[0] for x in y_train_indices]),\n", - " labels_test=np.asarray([x[0] for x in y_test_indices]),\n", + " labels_train=y_train_indices[:, 0],\n", + " labels_test=y_test_indices[:, 0],\n", " probs_train=prob_train,\n", " probs_test=prob_test),\n", " SlicingSpec(entire_dataset=True, by_class=True),\n", " attack_types=(AttackType.THRESHOLD_ATTACK,\n", " AttackType.LOGISTIC_REGRESSION),\n", " privacy_report_metadata=privacy_report_metadata)\n", + "\n", " self.attack_results.append(attack_results)\n" ] }, @@ -365,7 +379,18 @@ "source": [ "### Train the models\n", "\n", - "The next code block trains the two models. The `all_reports` list is used to collect all the results from all the models' training runs. The individual reports are tagged witht the `model_name`, so there's no confusion about which model generated which report. " + "The next code block trains the two models. The `all_reports` list is used to collect all the results from all the models' training runs. The individual reports are tagged witht the `model_name`, so there's no confusion about which model generated which report." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "o3U76c2Y4irD" + }, + "outputs": [], + "source": [ + "all_reports = []" ] }, { @@ -376,19 +401,8 @@ }, "outputs": [], "source": [ - "all_reports = []\n", - "\n", - "models = {\n", - " 'two layer model': two_layer_model,\n", - " 'three layer model': three_layer_model,\n", - "}\n", - "\n", - "for model_name, model in models.items():\n", - " print(f\"\\n\\n\\nFitting {model_name}\\n\")\n", - " callback = PrivacyMetrics(epochs_per_report, \n", - " model_name)\n", - "\n", - " model.fit(\n", + "callback = PrivacyMetrics(epochs_per_report, \"2 Layers\")\n", + "history = model_2layers.fit(\n", " x_train,\n", " y_train,\n", " batch_size=batch_size,\n", @@ -396,8 +410,29 @@ " validation_data=(x_test, y_test),\n", " callbacks=[callback],\n", " shuffle=True)\n", - " \n", - " all_reports.extend(callback.attack_results)\n" + "\n", + "all_reports.extend(callback.attack_results)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "27qLElOR4y_i" + }, + "outputs": [], + "source": [ + "callback = PrivacyMetrics(epochs_per_report, \"3 Layers\")\n", + "history = model_3layers.fit(\n", + " x_train,\n", + " y_train,\n", + " batch_size=batch_size,\n", + " epochs=total_epochs,\n", + " validation_data=(x_test, y_test),\n", + " callbacks=[callback],\n", + " shuffle=True)\n", + "\n", + "all_reports.extend(callback.attack_results)" ] }, { @@ -470,7 +505,10 @@ "source": [ "privacy_metrics = (PrivacyMetric.AUC, PrivacyMetric.ATTACKER_ADVANTAGE)\n", "utility_privacy_plot = privacy_report.plot_privacy_vs_accuracy(\n", - " results, privacy_metrics=privacy_metrics)" + " results, privacy_metrics=privacy_metrics)\n", + "\n", + "for axis in utility_privacy_plot.axes:\n", + " axis.set_xlabel('Validation accuracy')" ] }, { @@ -490,8 +528,7 @@ "id": "7u3BAg87v3qv" }, "source": [ - "This is the end of the colab!\n", - "Feel free to analyze your own results." + "This is the end of the tutorial. Feel free to analyze your own results." ] } ], @@ -500,6 +537,7 @@ "colab": { "collapsed_sections": [], "name": "privacy_report.ipynb", + "provenance": [], "toc_visible": true }, "kernelspec": {