From 70f9585a24eb1ba7c4baf7bff8fd0239cdb02944 Mon Sep 17 00:00:00 2001 From: David Marn Date: Tue, 15 Sep 2020 01:30:00 -0700 Subject: [PATCH] Adds Privacy vs Utility charts to the Privacy Report for a single model. PiperOrigin-RevId: 331720083 --- .../data_structures.py | 14 +++- .../membership_inference_attack/example.py | 16 ++-- .../privacy_report.py | 74 ++++++++++++++++--- .../privacy_report_test.py | 38 ++++++++++ 4 files changed, 126 insertions(+), 16 deletions(-) diff --git a/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py b/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py index 23274ab..cfd67a6 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py @@ -123,6 +123,16 @@ class AttackType(enum.Enum): return '%s' % self.name +class PrivacyMetric(enum.Enum): + """An enum for the supported privacy risk metrics.""" + AUC = 'AUC' + ATTACKER_ADVANTAGE = 'Attacker advantage' + + def __str__(self): + """Returns 'AUC' instead of PrivacyMetric.AUC.""" + return '%s' % self.value + + def _is_integer_type_array(a): return np.issubdtype(a.dtype, np.integer) @@ -469,8 +479,8 @@ class AttackResults: 'slice feature': slice_features, 'slice value': slice_values, 'attack type': attack_types, - 'Attacker advantage': advantages, - 'AUC': aucs + str(PrivacyMetric.ATTACKER_ADVANTAGE): advantages, + str(PrivacyMetric.AUC): aucs }) return df diff --git a/tensorflow_privacy/privacy/membership_inference_attack/example.py b/tensorflow_privacy/privacy/membership_inference_attack/example.py index 5d4b6b5..bfb41c0 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/example.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/example.py @@ -28,9 +28,11 @@ from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.utils import to_categorical from tensorflow_privacy.privacy.membership_inference_attack import membership_inference_attack_new as mia + from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackInputData from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackType +from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyMetric from tensorflow_privacy.privacy.membership_inference_attack.data_structures import \ PrivacyReportMetadata from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec @@ -109,6 +111,7 @@ def crossentropy(true_labels, predictions): epoch_results = [] # Incrementally train the model and store privacy risk metrics every 10 epochs. +num_epochs = 2 for i in range(1, 6): model.fit( training_features, @@ -116,7 +119,7 @@ for i in range(1, 6): validation_data=(test_features, to_categorical(test_labels, num_clusters)), batch_size=64, - epochs=2, + epochs=num_epochs, shuffle=True) training_pred = model.predict(training_features) @@ -128,7 +131,7 @@ for i in range(1, 6): np.argmax(training_pred, axis=1)), accuracy_test=metrics.accuracy_score(test_labels, np.argmax(test_pred, axis=1)), - epoch_num=2 * i, + epoch_num=num_epochs * i, model_variant_label="default") attack_results = mia.run_attacks( @@ -145,10 +148,13 @@ for i in range(1, 6): privacy_report_metadata=privacy_report_metadata) epoch_results.append(attack_results) -# Generate privacy report -epoch_figure = privacy_report.plot_by_epochs(epoch_results, - ["Attacker advantage", "AUC"]) +# Generate privacy reports +epoch_figure = privacy_report.plot_by_epochs( + epoch_results, [PrivacyMetric.ATTACKER_ADVANTAGE, PrivacyMetric.AUC]) epoch_figure.show() +privacy_utility_figure = privacy_report.plot_privacy_vs_accuracy_single_model( + epoch_results, [PrivacyMetric.ATTACKER_ADVANTAGE, PrivacyMetric.AUC]) +privacy_utility_figure.show() # Example of saving the results to the file and loading them back. with tempfile.TemporaryDirectory() as tmpdirname: diff --git a/tensorflow_privacy/privacy/membership_inference_attack/privacy_report.py b/tensorflow_privacy/privacy/membership_inference_attack/privacy_report.py index 0a0b48d..94dc47b 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/privacy_report.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/privacy_report.py @@ -19,12 +19,57 @@ import matplotlib.pyplot as plt import pandas as pd from tensorflow_privacy.privacy.membership_inference_attack.data_structures import AttackResults +from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyMetric def plot_by_epochs(results: Iterable[AttackResults], - privacy_metrics: Iterable[str]) -> plt.Figure: - """Plots privacy vulnerabilities by epochs.""" + privacy_metrics: Iterable[PrivacyMetric]) -> plt.Figure: + """Plots privacy vulnerabilities vs epoch numbers for a single model variant. + + In case multiple privacy metrics are specified, the plot will feature + multiple subplots (one subplot per metrics). + Args: + results: AttackResults for the plot + privacy_metrics: List of enumerated privacy metrics that should be plotted. + + Returns: + A pyplot figure with privacy vs accuracy plots. + """ + _validate_results(results) + all_results_df = _calculate_combined_df_with_metadata(results) + return _generate_subplots( + all_results_df=all_results_df, + x_axis_metric='Epoch', + figure_title='Vulnerability per Epoch', + privacy_metrics=privacy_metrics) + + +def plot_privacy_vs_accuracy_single_model( + results: Iterable[AttackResults], privacy_metrics: Iterable[PrivacyMetric]): + """Plots privacy vulnerabilities vs accuracy plots for a single model variant. + + In case multiple privacy metrics are specified, the plot will feature + multiple subplots (one subplot per metrics). + Args: + results: AttackResults for the plot + privacy_metrics: List of enumerated privacy metrics that should be plotted. + + Returns: + A pyplot figure with privacy vs accuracy plots. + + """ + _validate_results(results) + all_results_df = _calculate_combined_df_with_metadata(results) + return _generate_subplots( + all_results_df=all_results_df, + x_axis_metric='Train accuracy', + figure_title='Privacy vs Utility Analysis', + privacy_metrics=privacy_metrics) + + +def _calculate_combined_df_with_metadata(results: Iterable[AttackResults]): + """Adds metadata to the dataframe and concats them together.""" all_results_df = None for attack_results in results: attack_results_df = attack_results.calculate_pd_dataframe() @@ -32,25 +77,36 @@ def plot_by_epochs(results: Iterable[AttackResults], == 'entire_dataset'] attack_results_df.insert(0, 'Epoch', attack_results.privacy_report_metadata.epoch_num) + attack_results_df.insert( + 0, 'Train accuracy', + attack_results.privacy_report_metadata.accuracy_train) if all_results_df is None: all_results_df = attack_results_df else: all_results_df = pd.concat([all_results_df, attack_results_df], ignore_index=True) + return all_results_df + +def _generate_subplots(all_results_df: pd.DataFrame, x_axis_metric: str, + figure_title: str, + privacy_metrics: Iterable[PrivacyMetric]): + """Create one subplot per privacy metric for a specified x_axis_metric.""" fig, axes = plt.subplots(1, len(privacy_metrics)) + # Set a title for the entire group of subplots. + fig.suptitle(figure_title) if len(privacy_metrics) == 1: axes = (axes,) for i, privacy_metric in enumerate(privacy_metrics): attack_types = all_results_df['attack type'].unique() for attack_type in attack_types: - axes[i].plot( - all_results_df.loc[all_results_df['attack type'] == attack_type] - ['Epoch'], all_results_df.loc[all_results_df['attack type'] == - attack_type][privacy_metric]) - axes[i].legend(attack_types) - axes[i].set_xlabel('Epoch') - axes[i].set_title('%s for Entire dataset' % privacy_metric) + attack_type_results = all_results_df.loc[all_results_df['attack type'] == + attack_type] + axes[i].plot(attack_type_results[x_axis_metric], + attack_type_results[str(privacy_metric)]) + axes[i].legend(attack_types) + axes[i].set_xlabel(x_axis_metric) + axes[i].set_title('%s for Entire dataset' % str(privacy_metric)) return fig diff --git a/tensorflow_privacy/privacy/membership_inference_attack/privacy_report_test.py b/tensorflow_privacy/privacy/membership_inference_attack/privacy_report_test.py index f6a193e..f7f7647 100644 --- a/tensorflow_privacy/privacy/membership_inference_attack/privacy_report_test.py +++ b/tensorflow_privacy/privacy/membership_inference_attack/privacy_report_test.py @@ -84,6 +84,8 @@ class PrivacyReportTest(absltest.TestCase): np.testing.assert_array_equal(auc_data[0], [10, 15]) # Y axis lists AUC values np.testing.assert_array_equal(auc_data[1], [0.5, 1.0]) + # Check the title + self.assertEqual(fig._suptitle.get_text(), 'Vulnerability per Epoch') def test_multiple_metrics_plot_by_epochs(self): fig = privacy_report.plot_by_epochs( @@ -98,6 +100,42 @@ class PrivacyReportTest(absltest.TestCase): # Y axis lists privacy metrics np.testing.assert_array_equal(auc_data[1], [0.5, 1.0]) np.testing.assert_array_equal(attacker_advantage_data[1], [0, 1.0]) + # Check the title + self.assertEqual(fig._suptitle.get_text(), 'Vulnerability per Epoch') + + def test_plot_privacy_vs_accuracy_single_model_no_metadata(self): + # Raise error if metadata is missing + self.assertRaises(ValueError, + privacy_report.plot_privacy_vs_accuracy_single_model, + (self.attack_results_no_metadata,), ['AUC']) + + def test_single_metric_plot_privacy_vs_accuracy_single_model(self): + fig = privacy_report.plot_privacy_vs_accuracy_single_model( + (self.results_epoch_10, self.results_epoch_15), ['AUC']) + # extract data from figure. + auc_data = fig.gca().lines[0].get_data() + # X axis lists epoch values + np.testing.assert_array_equal(auc_data[0], [0.4, 0.5]) + # Y axis lists AUC values + np.testing.assert_array_equal(auc_data[1], [0.5, 1.0]) + # Check the title + self.assertEqual(fig._suptitle.get_text(), 'Privacy vs Utility Analysis') + + def test_multiple_metrics_plot_privacy_vs_accuracy_single_model(self): + fig = privacy_report.plot_privacy_vs_accuracy_single_model( + (self.results_epoch_10, self.results_epoch_15), + ['AUC', 'Attacker advantage']) + # extract data from figure. + auc_data = fig.axes[0].lines[0].get_data() + attacker_advantage_data = fig.axes[1].lines[0].get_data() + # X axis lists epoch values + np.testing.assert_array_equal(auc_data[0], [0.4, 0.5]) + np.testing.assert_array_equal(attacker_advantage_data[0], [0.4, 0.5]) + # Y axis lists privacy metrics + np.testing.assert_array_equal(auc_data[1], [0.5, 1.0]) + np.testing.assert_array_equal(attacker_advantage_data[1], [0, 1.0]) + # Check the title + self.assertEqual(fig._suptitle.get_text(), 'Privacy vs Utility Analysis') if __name__ == '__main__':