create a summary string for privacy risk scores

This commit is contained in:
Liwei Song 2020-12-10 17:54:50 -05:00
parent d1dcf56c44
commit e72ff861a1
2 changed files with 55 additions and 4 deletions

View file

@ -559,15 +559,37 @@ class SingleRiskScoreResult:
recall_list.append(true_positive_normalized)
return np.array(meaningful_threshold_list), np.array(precision_list), np.array(recall_list)
def print_results(self, threshold_list=np.array([1,0.9,0.8,0.7,0.6,0.5])):
def collect_results(self, threshold_list=np.array([1,0.9,0.8,0.7,0.6,0.5])):
""" The privacy risk score (from 0 to 1) represents each sample's probability of being in the training set.
Here, we choose a list of threshold values from 0.5 (uncertain of training or test) to 1 (100% certain of training)
to compute corresponding attack precision and recall.
"""
meaningful_threshold_list, precision_list, recall_list = self.attack_with_varied_thresholds(threshold_list)
summary = []
summary.append('\nPrivacy risk score analysis over slice: \"%s\"' %
str(self.slice_spec))
for i in range(len(meaningful_threshold_list)):
print(f"with {meaningful_threshold_list[i]} as the threshold on privacy risk score, the precision-recall pair is {(precision_list[i], recall_list[i])}")
return
summary.append(' with %.5f as the threshold on privacy risk score, the precision-recall pair is (%.5f, %.5f)' %
(meaningful_threshold_list[i], precision_list[i], recall_list[i]))
return summary
@dataclass
class RiskScoreResults:
"""Privacy risk score results from multiple data slices.
"""
risk_score_results: Iterable[SingleRiskScoreResult]
def summary(self):
""" return the summary of privacy risk score analysis on all given data slices
"""
summary = []
for single_result in self.risk_score_results:
single_summary = single_result.collect_results()
for line in single_summary:
summary.append(line)
return '\n'.join(summary)
@dataclass

View file

@ -35,6 +35,7 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleRiskScoreResult
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RiskScoreResults
from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_single_slice_specs
from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_slice
@ -223,7 +224,7 @@ def run_seq2seq_attack(attack_input: Seq2SeqAttackInputData,
def _compute_privacy_risk_score(attack_input: AttackInputData,
num_bins: int = 15):
num_bins: int = 15) -> SingleRiskScoreResult:
"""compute each individual point's likelihood of being a member (https://arxiv.org/abs/2003.10595)
Args:
attack_input: input data for compute privacy risk scores
@ -272,6 +273,34 @@ def _compute_privacy_risk_score(attack_input: AttackInputData,
test_risk_scores=test_risk_scores)
def privacy_risk_score_analysis(attack_input: AttackInputData,
slicing_spec: SlicingSpec = None) -> RiskScoreResults:
"""Perform privacy risk score analysis on all given slice types
Args:
attack_input: input data for compute privacy risk scores
slicing_spec: specifies attack_input slices
Returns:
the privacy risk score results.
"""
attack_input.validate()
risk_score_results = []
if slicing_spec is None:
slicing_spec = SlicingSpec(entire_dataset=True)
num_classes = None
if slicing_spec.by_class:
num_classes = attack_input.num_classes
input_slice_specs = get_single_slice_specs(slicing_spec, num_classes)
for single_slice_spec in input_slice_specs:
attack_input_slice = get_slice(attack_input, single_slice_spec)
risk_score_results.append(_compute_privacy_risk_score(attack_input_slice))
return RiskScoreResults(risk_score_results=risk_score_results)
def _compute_missing_privacy_report_metadata(
metadata: PrivacyReportMetadata,
attack_input: AttackInputData) -> PrivacyReportMetadata: