diff --git a/research/mi_lira_2021/README.md b/research/mi_lira_2021/README.md index 9c025f0..72cd48f 100644 --- a/research/mi_lira_2021/README.md +++ b/research/mi_lira_2021/README.md @@ -94,15 +94,15 @@ which should give (something like) the following output ``` Attack Ours (online) - AUC 0.6675, Accuracy 0.6074, TPR@0.1%FPR of 0.0104 + AUC 0.6676, Accuracy 0.6077, TPR@0.1%FPR of 0.0169 Attack Ours (online, fixed variance) - AUC 0.6831, Accuracy 0.6140, TPR@0.1%FPR of 0.0541 + AUC 0.6856, Accuracy 0.6137, TPR@0.1%FPR of 0.0593 Attack Ours (offline) - AUC 0.5465, Accuracy 0.5486, TPR@0.1%FPR of 0.0073 + AUC 0.5488, Accuracy 0.5500, TPR@0.1%FPR of 0.0130 Attack Ours (offline, fixed variance) - AUC 0.5518, Accuracy 0.5485, TPR@0.1%FPR of 0.0259 + AUC 0.5549, Accuracy 0.5537, TPR@0.1%FPR of 0.0299 Attack Global threshold - AUC 0.5900, Accuracy 0.6018, TPR@0.1%FPR of 0.0007 + AUC 0.5921, Accuracy 0.6044, TPR@0.1%FPR of 0.0009 ``` where the global threshold attack is the baseline, and our online, diff --git a/research/mi_lira_2021/fprtpr.png b/research/mi_lira_2021/fprtpr.png index 5ad9dca..8419ca1 100644 Binary files a/research/mi_lira_2021/fprtpr.png and b/research/mi_lira_2021/fprtpr.png differ diff --git a/research/mi_lira_2021/inference.py b/research/mi_lira_2021/inference.py index fbe8170..9d78d0b 100644 --- a/research/mi_lira_2021/inference.py +++ b/research/mi_lira_2021/inference.py @@ -69,7 +69,7 @@ def main(argv): for dy in range(0, 2*shift+1, stride): this_x = aug_pad[:, dx:dx+32, dy:dy+32, :].transpose((0,3,1,2)) - logits = model.model(this_x, training=False) + logits = model.model(this_x, training=True) outs.append(logits) print(np.array(outs).shape) diff --git a/research/mi_poison_2022/README.md b/research/mi_poison_2022/README.md index 5066046..a20444d 100644 --- a/research/mi_poison_2022/README.md +++ b/research/mi_poison_2022/README.md @@ -88,13 +88,13 @@ which should give (something like) the following output ``` Attack No poison (LiRA) - AUC 0.6992, Accuracy 0.6240, TPR@0.1%FPR of 0.0529 + AUC 0.7025, Accuracy 0.6258, TPR@0.1%FPR of 0.0544 Attack No poison (Global threshold) - AUC 0.6200, Accuracy 0.6167, TPR@0.1%FPR of 0.0011 + AUC 0.6191, Accuracy 0.6173, TPR@0.1%FPR of 0.0012 Attack With poison (LiRA) - AUC 0.9904, Accuracy 0.9617, TPR@0.1%FPR of 0.3730 + AUC 0.9943, Accuracy 0.9653, TPR@0.1%FPR of 0.4945 Attack With poison (Global threshold) - AUC 0.9911, Accuracy 0.9580, TPR@0.1%FPR of 0.2130 + AUC 0.9922, Accuracy 0.9603, TPR@0.1%FPR of 0.3930 ``` where the baselines are LiRA and a simple global threshold on the membership diff --git a/research/mi_poison_2022/fprtpr.png b/research/mi_poison_2022/fprtpr.png index 14fe1d0..a870cb9 100644 Binary files a/research/mi_poison_2022/fprtpr.png and b/research/mi_poison_2022/fprtpr.png differ diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py index 9c21131..988a50d 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py @@ -109,8 +109,7 @@ def _run_trained_attack( labels[train_indices], sample_weight=sample_weights_train, ) - predictions = attacker.predict(features[test_indices]) - scores[test_indices] = predictions + scores[test_indices] = attacker.predict(features[test_indices]) except ValueError as ve: if 'cannot be greater than the number of members in each class.' in str(ve): logging.warning('kf.split in _run_trained_attack fails with: %s', str(ve)) @@ -200,8 +199,9 @@ def _run_threshold_attack(attack_input: AttackInputData): slice_spec=_get_slice_spec(attack_input), data_size=DataSize(ntrain=ntrain, ntest=ntest), attack_type=AttackType.THRESHOLD_ATTACK, - membership_scores_train=attack_input.get_loss_train(), - membership_scores_test=attack_input.get_loss_test(), + # Negate loss because training examples are expected to have lower loss. + membership_scores_train=-attack_input.get_loss_train(), + membership_scores_test=-attack_input.get_loss_test(), roc_curve=roc_curve, epsilon_lower_bound_value=epsilon_lower_bound_value, ) diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py index f74a9ef..cf45cd5 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py @@ -185,19 +185,19 @@ class TrainedAttacker(object): raise NotImplementedError() def predict(self, input_features): - """Predicts whether input_features belongs to train or test. + """Predicts the probability that input_features belongs to train. Args: input_features : A vector of features with the same semantics as x_train passed to train_model. Returns: - An array of probabilities denoting whether the example belongs to test. + An array of probabilities that the examples belongs to train. """ if self.model is None: raise AssertionError( 'Model not trained yet. Please call train_model first.') - return self.model.predict_proba(input_features)[:, 1] + return self.model.predict_proba(input_features)[:, 1] # Train has label 1 class LogisticRegressionAttacker(TrainedAttacker):