add privacy risk score
This commit is contained in:
parent
e7c21abb09
commit
21a891c569
2 changed files with 84 additions and 0 deletions
|
@ -529,6 +529,39 @@ class SingleAttackResult:
|
|||
])
|
||||
|
||||
|
||||
@dataclass
|
||||
class SingleRiskScoreResult:
|
||||
"""Results from computing privacy risk scores.
|
||||
this part is quite preliminary: it shows how to leverage privacy risk score to perform attacks with thresholding on risk score
|
||||
"""
|
||||
|
||||
# Data slice this result was calculated for.
|
||||
slice_spec: SingleSliceSpec
|
||||
|
||||
train_risk_scores: np.ndarray
|
||||
|
||||
test_risk_scores: np.ndarray
|
||||
|
||||
def attack_with_varied_thresholds(self, threshold_list):
|
||||
precision_list = []
|
||||
recall_list = []
|
||||
meaningful_threshold_list = []
|
||||
for threshold in threshold_list:
|
||||
true_positive_normalized = np.sum(self.train_risk_scores>=threshold)/(len(self.train_risk_scores)+0.0)
|
||||
false_positive_normalized = np.sum(self.test_risk_scores>=threshold)/(len(self.test_risk_scores)+0.0)
|
||||
if true_positive_normalized+false_positive_normalized>0:
|
||||
meaningful_threshold_list.append(threshold)
|
||||
precision_list.append(true_positive_normalized/(true_positive_normalized+false_positive_normalized+0.0))
|
||||
recall_list.append(true_positive_normalized)
|
||||
return meaningful_threshold_list, precision_list, recall_list
|
||||
|
||||
def print_results(self, threshold_list=[1,0.9,0.8,0.7,0.6,0.5]):
|
||||
meaningful_threshold_list, precision_list, recall_list = self.attack_with_varied_thresholds(threshold_list)
|
||||
for i in range(len(meaningful_threshold_list)):
|
||||
print(f"with {meaningful_threshold_list[i]} as the threshold on privacy risk score, the precision-recall pair is {(precision_list[i], recall_list[i])}")
|
||||
return
|
||||
|
||||
|
||||
@dataclass
|
||||
class PrivacyReportMetadata:
|
||||
"""Metadata about the evaluated model.
|
||||
|
|
|
@ -34,6 +34,7 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo
|
|||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleRiskScoreResult
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_single_slice_specs
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_slice
|
||||
|
||||
|
@ -221,6 +222,56 @@ def run_seq2seq_attack(attack_input: Seq2SeqAttackInputData,
|
|||
return AttackResults(single_attack_results=attack_results)
|
||||
|
||||
|
||||
def _compute_privacy_risk_score(attack_input: AttackInputData,
|
||||
num_bins: int = 15):
|
||||
"""compute each individual point's likelihood of being a member (https://arxiv.org/abs/2003.10595)
|
||||
Args:
|
||||
attack_input: input data for compute privacy risk scores
|
||||
num_bins: the number of bins used to compute the training/test histogram; we set the default as 15
|
||||
|
||||
Returns:
|
||||
privacy risk score results
|
||||
"""
|
||||
|
||||
# If the loss or the entropy is provided, just use it;
|
||||
# Otherwise, call the function to compute the loss (you can also choose to compute entropy)
|
||||
if attack_input.loss_train is not None and attack_input.loss_test is not None:
|
||||
train_values, test_values = attack_input.loss_train, attack_input.loss_test
|
||||
elif attack_input.entropy_train is not None and attack_input.entropy_test is not None:
|
||||
train_values, test_values = attack_input.entropy_train, attack_input.entropy_test
|
||||
else:
|
||||
train_values, test_values = attack_input.get_loss_train(), attack_input.get_loss_test()
|
||||
|
||||
# Compute the histogram in the log scale
|
||||
small_value = 1e-10
|
||||
train_log_values = np.log(np.maximum(train_values, small_value))
|
||||
test_log_values = np.log(np.maximum(test_values, small_value))
|
||||
|
||||
min_log_value = np.amin(np.concatenate((train_log_values, test_log_values)))
|
||||
max_log_value = np.amax(np.concatenate((train_log_values, test_log_values)))
|
||||
bins_hist = np.linspace(min_log_value, max_log_value, num_bins+1)
|
||||
|
||||
train_hist, _ = np.histogram(train_log_values, bins=bins_hist)
|
||||
train_hist = train_hist/(len(train_log_values)+0.0)
|
||||
train_hist_indices = np.fmin(np.digitize(train_log_values, bins=bins_hist),num_bins)-1
|
||||
|
||||
test_hist, _ = np.histogram(test_log_values, bins=bins_hist)
|
||||
test_hist = test_hist/(len(test_log_values)+0.0)
|
||||
test_hist_indices = np.fmin(np.digitize(test_log_values, bins=bins_hist),num_bins)-1
|
||||
|
||||
combined_hist = train_hist+test_hist
|
||||
combined_hist[combined_hist==0] = small_value
|
||||
privacy_risk_list = train_hist/(combined_hist+0.0)
|
||||
train_risk_scores = privacy_risk_list[train_hist_indices]
|
||||
test_risk_scores = privacy_risk_list[test_hist_indices]
|
||||
|
||||
|
||||
|
||||
return SingleRiskScoreResult(slice_spec=_get_slice_spec(attack_input),
|
||||
train_risk_scores=train_risk_scores,
|
||||
test_risk_scores=test_risk_scores)
|
||||
|
||||
|
||||
def _compute_missing_privacy_report_metadata(
|
||||
metadata: PrivacyReportMetadata,
|
||||
attack_input: AttackInputData) -> PrivacyReportMetadata:
|
||||
|
|
Loading…
Reference in a new issue