From 21a891c569c3bd8de2fca10d190abebd29338913 Mon Sep 17 00:00:00 2001
From: Liwei Song <liweisongpku@gmail.com>
Date: Wed, 2 Dec 2020 18:57:35 -0500
Subject: [PATCH] add privacy risk score

---
 .../data_structures.py                        | 33 ++++++++++++
 .../membership_inference_attack.py            | 51 +++++++++++++++++++
 2 files changed, 84 insertions(+)

diff --git a/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py b/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py
index ba58990..02f1c44 100644
--- a/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py
@@ -529,6 +529,39 @@ class SingleAttackResult:
     ])
 
 
+@dataclass
+class SingleRiskScoreResult:
+  """Results from computing privacy risk scores.
+  this part is quite preliminary: it shows how to leverage privacy risk score to perform attacks with thresholding on risk score
+  """
+
+  # Data slice this result was calculated for.
+  slice_spec: SingleSliceSpec
+
+  train_risk_scores: np.ndarray 
+  
+  test_risk_scores: np.ndarray
+  
+  def attack_with_varied_thresholds(self, threshold_list):
+    precision_list = []
+    recall_list = []
+    meaningful_threshold_list = []
+    for threshold in threshold_list:
+        true_positive_normalized = np.sum(self.train_risk_scores>=threshold)/(len(self.train_risk_scores)+0.0)
+        false_positive_normalized = np.sum(self.test_risk_scores>=threshold)/(len(self.test_risk_scores)+0.0)
+        if true_positive_normalized+false_positive_normalized>0:
+          meaningful_threshold_list.append(threshold)
+          precision_list.append(true_positive_normalized/(true_positive_normalized+false_positive_normalized+0.0))
+          recall_list.append(true_positive_normalized)
+    return meaningful_threshold_list, precision_list, recall_list
+  
+  def print_results(self, threshold_list=[1,0.9,0.8,0.7,0.6,0.5]):
+    meaningful_threshold_list, precision_list, recall_list = self.attack_with_varied_thresholds(threshold_list)
+    for i in range(len(meaningful_threshold_list)):
+      print(f"with {meaningful_threshold_list[i]} as the threshold on privacy risk score, the precision-recall pair is {(precision_list[i], recall_list[i])}")
+    return
+
+
 @dataclass
 class PrivacyReportMetadata:
   """Metadata about the evaluated model.
diff --git a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py
index 4df75c8..da8381a 100644
--- a/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/membership_inference_attack.py
@@ -34,6 +34,7 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
 from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
+from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleRiskScoreResult
 from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_single_slice_specs
 from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_slice
 
@@ -221,6 +222,56 @@ def run_seq2seq_attack(attack_input: Seq2SeqAttackInputData,
   return AttackResults(single_attack_results=attack_results)
 
 
+def _compute_privacy_risk_score(attack_input: AttackInputData,
+                                num_bins: int = 15):
+  """compute each individual point's likelihood of being a member (https://arxiv.org/abs/2003.10595)
+  Args:
+    attack_input: input data for compute privacy risk scores
+    num_bins: the number of bins used to compute the training/test histogram; we set the default as 15
+  
+  Returns:
+    privacy risk score results
+  """
+  
+  # If the loss or the entropy is provided, just use it; 
+  # Otherwise, call the function to compute the loss (you can also choose to compute entropy) 
+  if attack_input.loss_train is not None and attack_input.loss_test is not None:
+    train_values, test_values = attack_input.loss_train, attack_input.loss_test
+  elif attack_input.entropy_train is not None and attack_input.entropy_test is not None:
+    train_values, test_values = attack_input.entropy_train, attack_input.entropy_test
+  else:
+    train_values, test_values = attack_input.get_loss_train(), attack_input.get_loss_test()
+  
+  # Compute the histogram in the log scale
+  small_value = 1e-10
+  train_log_values = np.log(np.maximum(train_values, small_value))
+  test_log_values = np.log(np.maximum(test_values, small_value))
+  
+  min_log_value = np.amin(np.concatenate((train_log_values, test_log_values)))
+  max_log_value = np.amax(np.concatenate((train_log_values, test_log_values)))
+  bins_hist = np.linspace(min_log_value, max_log_value, num_bins+1)
+  
+  train_hist, _ = np.histogram(train_log_values, bins=bins_hist)
+  train_hist = train_hist/(len(train_log_values)+0.0)
+  train_hist_indices = np.fmin(np.digitize(train_log_values, bins=bins_hist),num_bins)-1
+  
+  test_hist, _ = np.histogram(test_log_values, bins=bins_hist)
+  test_hist = test_hist/(len(test_log_values)+0.0)
+  test_hist_indices = np.fmin(np.digitize(test_log_values, bins=bins_hist),num_bins)-1
+  
+  combined_hist = train_hist+test_hist
+  combined_hist[combined_hist==0] = small_value
+  privacy_risk_list = train_hist/(combined_hist+0.0)
+  train_risk_scores = privacy_risk_list[train_hist_indices]
+  test_risk_scores = privacy_risk_list[test_hist_indices]
+  
+  
+  
+  return SingleRiskScoreResult(slice_spec=_get_slice_spec(attack_input),
+                               train_risk_scores=train_risk_scores,
+                               test_risk_scores=test_risk_scores)
+
+
 def _compute_missing_privacy_report_metadata(
     metadata: PrivacyReportMetadata,
     attack_input: AttackInputData) -> PrivacyReportMetadata: