change risk score to membership probability
This commit is contained in:
parent
b1993344cf
commit
abd8912e6c
4 changed files with 56 additions and 56 deletions
|
@ -445,35 +445,35 @@ class SingleAttackResult:
|
|||
|
||||
|
||||
@dataclass
|
||||
class SingleRiskScoreResult:
|
||||
"""Results from computing privacy risk scores.
|
||||
this part shows how to leverage privacy risk score to perform attacks with thresholding on risk score
|
||||
class SingleMembershipProbabilityResult:
|
||||
"""Results from computing membership probabilities (denoted as privacy risk score in https://arxiv.org/abs/2003.10595).
|
||||
this part shows how to leverage membership probabilities to perform attacks with thresholding on them.
|
||||
"""
|
||||
|
||||
# Data slice this result was calculated for.
|
||||
slice_spec: SingleSliceSpec
|
||||
|
||||
train_risk_scores: np.ndarray
|
||||
train_membership_probs: np.ndarray
|
||||
|
||||
test_risk_scores: np.ndarray
|
||||
test_membership_probs: np.ndarray
|
||||
|
||||
def attack_with_varied_thresholds(self, threshold_list):
|
||||
""" For each threshold value, we count how many training and test samples with privacy risk scores larger than the threshold
|
||||
""" For each threshold value, we count how many training and test samples with membership probabilities larger than the threshold
|
||||
and further compute precision and recall values.
|
||||
We skip the threshold value if it is larger than every sample's privacy risk score.
|
||||
We skip the threshold value if it is larger than every sample's membership probability.
|
||||
"""
|
||||
fpr, tpr, thresholds = metrics.roc_curve(
|
||||
np.concatenate((np.ones(len(self.train_risk_scores)),
|
||||
np.zeros(len(self.test_risk_scores)))),
|
||||
np.concatenate((self.train_risk_scores, self.test_risk_scores)),
|
||||
np.concatenate((np.ones(len(self.train_membership_probs)),
|
||||
np.zeros(len(self.test_membership_probs)))),
|
||||
np.concatenate((self.train_membership_probs, self.test_membership_probs)),
|
||||
drop_intermediate=False)
|
||||
|
||||
precision_list = []
|
||||
recall_list = []
|
||||
meaningful_threshold_list = []
|
||||
max_risk_score = max(self.train_risk_scores.max(), self.test_risk_scores.max())
|
||||
max_prob = max(self.train_membership_probs.max(), self.test_membership_probs.max())
|
||||
for threshold in threshold_list:
|
||||
if threshold <= max_risk_score:
|
||||
if threshold <= max_prob:
|
||||
idx = np.argwhere(thresholds>=threshold)[-1][0]
|
||||
meaningful_threshold_list.append(threshold)
|
||||
precision_list.append(tpr[idx]/(tpr[idx]+fpr[idx]))
|
||||
|
@ -482,40 +482,40 @@ class SingleRiskScoreResult:
|
|||
return np.array(meaningful_threshold_list), np.array(precision_list), np.array(recall_list)
|
||||
|
||||
def collect_results(self, threshold_list, return_roc_results=True):
|
||||
""" The privacy risk score (from 0 to 1) represents each sample's probability of being in the training set.
|
||||
""" The membership probability (from 0 to 1) represents each sample's probability of being in the training set.
|
||||
Usually, we choose a list of threshold values from 0.5 (uncertain of training or test) to 1 (100% certain of training)
|
||||
to compute corresponding attack precision and recall.
|
||||
"""
|
||||
meaningful_threshold_list, precision_list, recall_list = self.attack_with_varied_thresholds(threshold_list)
|
||||
summary = []
|
||||
summary.append('\nPrivacy risk score analysis over slice: \"%s\"' %
|
||||
summary.append('\nMembership probability analysis over slice: \"%s\"' %
|
||||
str(self.slice_spec))
|
||||
for i in range(len(meaningful_threshold_list)):
|
||||
summary.append(' with %.5f as the threshold on privacy risk score, the precision-recall pair is (%.5f, %.5f)' %
|
||||
summary.append(' with %.4f as the threshold on membership probability, the precision-recall pair is (%.4f, %.4f)' %
|
||||
(meaningful_threshold_list[i], precision_list[i], recall_list[i]))
|
||||
if return_roc_results:
|
||||
fpr, tpr, thresholds = metrics.roc_curve(
|
||||
np.concatenate((np.ones(len(self.train_risk_scores)),
|
||||
np.zeros(len(self.test_risk_scores)))),
|
||||
np.concatenate((self.train_risk_scores, self.test_risk_scores)))
|
||||
np.concatenate((np.ones(len(self.train_membership_probs)),
|
||||
np.zeros(len(self.test_membership_probs)))),
|
||||
np.concatenate((self.train_membership_probs, self.test_membership_probs)))
|
||||
roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)
|
||||
summary.append(' thresholding on privacy risk score achieved an AUC of %.2f' %(roc_curve.get_auc()))
|
||||
summary.append(' thresholding on privacy risk score achieved an advantage of %.2f' %(roc_curve.get_attacker_advantage()))
|
||||
summary.append(' thresholding on membership probability achieved an AUC of %.2f' %(roc_curve.get_auc()))
|
||||
summary.append(' thresholding on membership probability achieved an advantage of %.2f' %(roc_curve.get_attacker_advantage()))
|
||||
return summary
|
||||
|
||||
|
||||
@dataclass
|
||||
class RiskScoreResults:
|
||||
"""Privacy risk score results from multiple data slices.
|
||||
class MembershipProbabilityResults:
|
||||
"""Membership probability results from multiple data slices.
|
||||
"""
|
||||
|
||||
risk_score_results: Iterable[SingleRiskScoreResult]
|
||||
membership_prob_results: Iterable[SingleMembershipProbabilityResult]
|
||||
|
||||
def summary(self, threshold_list):
|
||||
""" return the summary of privacy risk score analysis on all given data slices
|
||||
""" return the summary of membership probability analysis on all given data slices
|
||||
"""
|
||||
summary = []
|
||||
for single_result in self.risk_score_results:
|
||||
for single_result in self.membership_prob_results:
|
||||
single_summary = single_result.collect_results(threshold_list)
|
||||
summary.extend(single_summary)
|
||||
return '\n'.join(summary)
|
||||
|
|
|
@ -28,7 +28,7 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo
|
|||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RocCurve
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleRiskScoreResult
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleMembershipProbabilityResult
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature
|
||||
|
||||
|
@ -219,15 +219,15 @@ class SingleAttackResultTest(absltest.TestCase):
|
|||
self.assertEqual(result.get_attacker_advantage(), 0.0)
|
||||
|
||||
|
||||
class SingleRiskScoreResultTest(absltest.TestCase):
|
||||
class SingleMembershipProbabilityResultTest(absltest.TestCase):
|
||||
|
||||
# Only a basic test to check the attack by setting a threshold on risk score.
|
||||
# Only a basic test to check the attack by setting a threshold on membership probability.
|
||||
def test_attack_with_varied_thresholds(self):
|
||||
|
||||
result = SingleRiskScoreResult(
|
||||
result = SingleMembershipProbabilityResult(
|
||||
slice_spec=SingleSliceSpec(None),
|
||||
train_risk_scores=np.array([0.91,1,0.92,0.82,0.75]),
|
||||
test_risk_scores=np.array([0.81,0.7,0.75,0.25,0.3]))
|
||||
train_membership_probs=np.array([0.91,1,0.92,0.82,0.75]),
|
||||
test_membership_probs=np.array([0.81,0.7,0.75,0.25,0.3]))
|
||||
|
||||
self.assertEqual(result.attack_with_varied_thresholds(threshold_list=np.array([0.8,0.7]))[1].tolist(), [0.8,0.625])
|
||||
self.assertEqual(result.attack_with_varied_thresholds(threshold_list=np.array([0.8,0.7]))[2].tolist(), [0.8,1])
|
||||
|
|
|
@ -33,8 +33,8 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo
|
|||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleRiskScoreResult
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RiskScoreResults
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleMembershipProbabilityResult
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import MembershipProbabilityResults
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_single_slice_specs
|
||||
from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_slice
|
||||
|
||||
|
@ -174,18 +174,18 @@ def run_attacks(attack_input: AttackInputData,
|
|||
privacy_report_metadata=privacy_report_metadata)
|
||||
|
||||
|
||||
def _compute_privacy_risk_score(attack_input: AttackInputData,
|
||||
num_bins: int = 15) -> SingleRiskScoreResult:
|
||||
"""Computes each individual point's likelihood of being a member (https://arxiv.org/abs/2003.10595).
|
||||
def _compute_membership_probability(attack_input: AttackInputData,
|
||||
num_bins: int = 15) -> SingleMembershipProbabilityResult:
|
||||
"""Computes each individual point's likelihood of being a member (denoted as privacy risk score in https://arxiv.org/abs/2003.10595).
|
||||
For an individual sample, its privacy risk score is computed as the posterior probability of being in the training set
|
||||
after observing its prediction output by the target machine learning model.
|
||||
|
||||
Args:
|
||||
attack_input: input data for compute privacy risk scores
|
||||
attack_input: input data for compute membership probability
|
||||
num_bins: the number of bins used to compute the training/test histogram
|
||||
|
||||
Returns:
|
||||
privacy risk score results
|
||||
membership probability results
|
||||
"""
|
||||
|
||||
# If the loss or the entropy is provided, just use it;
|
||||
|
@ -219,31 +219,31 @@ def _compute_privacy_risk_score(attack_input: AttackInputData,
|
|||
|
||||
combined_hist = train_hist+test_hist
|
||||
combined_hist[combined_hist==0] = small_value
|
||||
privacy_risk_list = train_hist/(combined_hist+0.0)
|
||||
train_risk_scores = privacy_risk_list[train_hist_indices]
|
||||
test_risk_scores = privacy_risk_list[test_hist_indices]
|
||||
membership_prob_list = train_hist/(combined_hist+0.0)
|
||||
train_membership_probs = membership_prob_list[train_hist_indices]
|
||||
test_membership_probs = membership_prob_list[test_hist_indices]
|
||||
|
||||
|
||||
|
||||
return SingleRiskScoreResult(slice_spec=_get_slice_spec(attack_input),
|
||||
train_risk_scores=train_risk_scores,
|
||||
test_risk_scores=test_risk_scores)
|
||||
return SingleMembershipProbabilityResult(slice_spec=_get_slice_spec(attack_input),
|
||||
train_membership_probs=train_membership_probs,
|
||||
test_membership_probs=test_membership_probs)
|
||||
|
||||
|
||||
def run_privacy_risk_score_analysis(attack_input: AttackInputData,
|
||||
slicing_spec: SlicingSpec = None) -> RiskScoreResults:
|
||||
def run_membership_probability_analysis(attack_input: AttackInputData,
|
||||
slicing_spec: SlicingSpec = None) -> MembershipProbabilityResults:
|
||||
|
||||
"""Perform privacy risk score analysis on all given slice types
|
||||
"""Perform membership probability analysis on all given slice types
|
||||
|
||||
Args:
|
||||
attack_input: input data for compute privacy risk scores
|
||||
attack_input: input data for compute membership probabilities
|
||||
slicing_spec: specifies attack_input slices
|
||||
|
||||
Returns:
|
||||
the privacy risk score results.
|
||||
the membership probability results.
|
||||
"""
|
||||
attack_input.validate()
|
||||
risk_score_results = []
|
||||
membership_prob_results = []
|
||||
|
||||
if slicing_spec is None:
|
||||
slicing_spec = SlicingSpec(entire_dataset=True)
|
||||
|
@ -253,9 +253,9 @@ def run_privacy_risk_score_analysis(attack_input: AttackInputData,
|
|||
input_slice_specs = get_single_slice_specs(slicing_spec, num_classes)
|
||||
for single_slice_spec in input_slice_specs:
|
||||
attack_input_slice = get_slice(attack_input, single_slice_spec)
|
||||
risk_score_results.append(_compute_privacy_risk_score(attack_input_slice))
|
||||
membership_prob_results.append(_compute_membership_probability(attack_input_slice))
|
||||
|
||||
return RiskScoreResults(risk_score_results=risk_score_results)
|
||||
return MembershipProbabilityResults(membership_prob_results=membership_prob_results)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -100,14 +100,14 @@ class RunAttacksTest(absltest.TestCase):
|
|||
self.assertIsNone(mia._get_accuracy(None, labels))
|
||||
|
||||
|
||||
def test_run_compute_privacy_risk_score_correct_score(self):
|
||||
result = mia._compute_privacy_risk_score(
|
||||
def test_run_compute_membership_probability_correct_probs(self):
|
||||
result = mia._compute_membership_probability(
|
||||
AttackInputData(
|
||||
loss_train=np.array([1, 1, 1, 10, 100]),
|
||||
loss_test=np.array([10, 100, 100, 1000, 10000])))
|
||||
|
||||
np.testing.assert_almost_equal(result.train_risk_scores, [1,1,1,0.5,0.33], decimal=2)
|
||||
np.testing.assert_almost_equal(result.test_risk_scores, [0.5,0.33,0.33,0,0], decimal=2)
|
||||
np.testing.assert_almost_equal(result.train_membership_probs, [1,1,1,0.5,0.33], decimal=2)
|
||||
np.testing.assert_almost_equal(result.test_membership_probs, [0.5,0.33,0.33,0,0], decimal=2)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Reference in a new issue