change risk score to membership probability

This commit is contained in:
Liwei Song 2020-12-17 15:55:46 -05:00
parent b1993344cf
commit abd8912e6c
4 changed files with 56 additions and 56 deletions

View file

@ -445,35 +445,35 @@ class SingleAttackResult:
@dataclass
class SingleRiskScoreResult:
"""Results from computing privacy risk scores.
this part shows how to leverage privacy risk score to perform attacks with thresholding on risk score
class SingleMembershipProbabilityResult:
"""Results from computing membership probabilities (denoted as privacy risk score in https://arxiv.org/abs/2003.10595).
this part shows how to leverage membership probabilities to perform attacks with thresholding on them.
"""
# Data slice this result was calculated for.
slice_spec: SingleSliceSpec
train_risk_scores: np.ndarray
train_membership_probs: np.ndarray
test_risk_scores: np.ndarray
test_membership_probs: np.ndarray
def attack_with_varied_thresholds(self, threshold_list):
""" For each threshold value, we count how many training and test samples with privacy risk scores larger than the threshold
""" For each threshold value, we count how many training and test samples with membership probabilities larger than the threshold
and further compute precision and recall values.
We skip the threshold value if it is larger than every sample's privacy risk score.
We skip the threshold value if it is larger than every sample's membership probability.
"""
fpr, tpr, thresholds = metrics.roc_curve(
np.concatenate((np.ones(len(self.train_risk_scores)),
np.zeros(len(self.test_risk_scores)))),
np.concatenate((self.train_risk_scores, self.test_risk_scores)),
np.concatenate((np.ones(len(self.train_membership_probs)),
np.zeros(len(self.test_membership_probs)))),
np.concatenate((self.train_membership_probs, self.test_membership_probs)),
drop_intermediate=False)
precision_list = []
recall_list = []
meaningful_threshold_list = []
max_risk_score = max(self.train_risk_scores.max(), self.test_risk_scores.max())
max_prob = max(self.train_membership_probs.max(), self.test_membership_probs.max())
for threshold in threshold_list:
if threshold <= max_risk_score:
if threshold <= max_prob:
idx = np.argwhere(thresholds>=threshold)[-1][0]
meaningful_threshold_list.append(threshold)
precision_list.append(tpr[idx]/(tpr[idx]+fpr[idx]))
@ -482,40 +482,40 @@ class SingleRiskScoreResult:
return np.array(meaningful_threshold_list), np.array(precision_list), np.array(recall_list)
def collect_results(self, threshold_list, return_roc_results=True):
""" The privacy risk score (from 0 to 1) represents each sample's probability of being in the training set.
""" The membership probability (from 0 to 1) represents each sample's probability of being in the training set.
Usually, we choose a list of threshold values from 0.5 (uncertain of training or test) to 1 (100% certain of training)
to compute corresponding attack precision and recall.
"""
meaningful_threshold_list, precision_list, recall_list = self.attack_with_varied_thresholds(threshold_list)
summary = []
summary.append('\nPrivacy risk score analysis over slice: \"%s\"' %
summary.append('\nMembership probability analysis over slice: \"%s\"' %
str(self.slice_spec))
for i in range(len(meaningful_threshold_list)):
summary.append(' with %.5f as the threshold on privacy risk score, the precision-recall pair is (%.5f, %.5f)' %
summary.append(' with %.4f as the threshold on membership probability, the precision-recall pair is (%.4f, %.4f)' %
(meaningful_threshold_list[i], precision_list[i], recall_list[i]))
if return_roc_results:
fpr, tpr, thresholds = metrics.roc_curve(
np.concatenate((np.ones(len(self.train_risk_scores)),
np.zeros(len(self.test_risk_scores)))),
np.concatenate((self.train_risk_scores, self.test_risk_scores)))
np.concatenate((np.ones(len(self.train_membership_probs)),
np.zeros(len(self.test_membership_probs)))),
np.concatenate((self.train_membership_probs, self.test_membership_probs)))
roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)
summary.append(' thresholding on privacy risk score achieved an AUC of %.2f' %(roc_curve.get_auc()))
summary.append(' thresholding on privacy risk score achieved an advantage of %.2f' %(roc_curve.get_attacker_advantage()))
summary.append(' thresholding on membership probability achieved an AUC of %.2f' %(roc_curve.get_auc()))
summary.append(' thresholding on membership probability achieved an advantage of %.2f' %(roc_curve.get_attacker_advantage()))
return summary
@dataclass
class RiskScoreResults:
"""Privacy risk score results from multiple data slices.
class MembershipProbabilityResults:
"""Membership probability results from multiple data slices.
"""
risk_score_results: Iterable[SingleRiskScoreResult]
membership_prob_results: Iterable[SingleMembershipProbabilityResult]
def summary(self, threshold_list):
""" return the summary of privacy risk score analysis on all given data slices
""" return the summary of membership probability analysis on all given data slices
"""
summary = []
for single_result in self.risk_score_results:
for single_result in self.membership_prob_results:
single_summary = single_result.collect_results(threshold_list)
summary.extend(single_summary)
return '\n'.join(summary)

View file

@ -28,7 +28,7 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RocCurve
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleRiskScoreResult
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleMembershipProbabilityResult
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature
@ -219,15 +219,15 @@ class SingleAttackResultTest(absltest.TestCase):
self.assertEqual(result.get_attacker_advantage(), 0.0)
class SingleRiskScoreResultTest(absltest.TestCase):
class SingleMembershipProbabilityResultTest(absltest.TestCase):
# Only a basic test to check the attack by setting a threshold on risk score.
# Only a basic test to check the attack by setting a threshold on membership probability.
def test_attack_with_varied_thresholds(self):
result = SingleRiskScoreResult(
result = SingleMembershipProbabilityResult(
slice_spec=SingleSliceSpec(None),
train_risk_scores=np.array([0.91,1,0.92,0.82,0.75]),
test_risk_scores=np.array([0.81,0.7,0.75,0.25,0.3]))
train_membership_probs=np.array([0.91,1,0.92,0.82,0.75]),
test_membership_probs=np.array([0.81,0.7,0.75,0.25,0.3]))
self.assertEqual(result.attack_with_varied_thresholds(threshold_list=np.array([0.8,0.7]))[1].tolist(), [0.8,0.625])
self.assertEqual(result.attack_with_varied_thresholds(threshold_list=np.array([0.8,0.7]))[2].tolist(), [0.8,1])

View file

@ -33,8 +33,8 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleRiskScoreResult
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RiskScoreResults
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleMembershipProbabilityResult
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import MembershipProbabilityResults
from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_single_slice_specs
from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_slice
@ -174,18 +174,18 @@ def run_attacks(attack_input: AttackInputData,
privacy_report_metadata=privacy_report_metadata)
def _compute_privacy_risk_score(attack_input: AttackInputData,
num_bins: int = 15) -> SingleRiskScoreResult:
"""Computes each individual point's likelihood of being a member (https://arxiv.org/abs/2003.10595).
def _compute_membership_probability(attack_input: AttackInputData,
num_bins: int = 15) -> SingleMembershipProbabilityResult:
"""Computes each individual point's likelihood of being a member (denoted as privacy risk score in https://arxiv.org/abs/2003.10595).
For an individual sample, its privacy risk score is computed as the posterior probability of being in the training set
after observing its prediction output by the target machine learning model.
Args:
attack_input: input data for compute privacy risk scores
attack_input: input data for compute membership probability
num_bins: the number of bins used to compute the training/test histogram
Returns:
privacy risk score results
membership probability results
"""
# If the loss or the entropy is provided, just use it;
@ -219,31 +219,31 @@ def _compute_privacy_risk_score(attack_input: AttackInputData,
combined_hist = train_hist+test_hist
combined_hist[combined_hist==0] = small_value
privacy_risk_list = train_hist/(combined_hist+0.0)
train_risk_scores = privacy_risk_list[train_hist_indices]
test_risk_scores = privacy_risk_list[test_hist_indices]
membership_prob_list = train_hist/(combined_hist+0.0)
train_membership_probs = membership_prob_list[train_hist_indices]
test_membership_probs = membership_prob_list[test_hist_indices]
return SingleRiskScoreResult(slice_spec=_get_slice_spec(attack_input),
train_risk_scores=train_risk_scores,
test_risk_scores=test_risk_scores)
return SingleMembershipProbabilityResult(slice_spec=_get_slice_spec(attack_input),
train_membership_probs=train_membership_probs,
test_membership_probs=test_membership_probs)
def run_privacy_risk_score_analysis(attack_input: AttackInputData,
slicing_spec: SlicingSpec = None) -> RiskScoreResults:
def run_membership_probability_analysis(attack_input: AttackInputData,
slicing_spec: SlicingSpec = None) -> MembershipProbabilityResults:
"""Perform privacy risk score analysis on all given slice types
"""Perform membership probability analysis on all given slice types
Args:
attack_input: input data for compute privacy risk scores
attack_input: input data for compute membership probabilities
slicing_spec: specifies attack_input slices
Returns:
the privacy risk score results.
the membership probability results.
"""
attack_input.validate()
risk_score_results = []
membership_prob_results = []
if slicing_spec is None:
slicing_spec = SlicingSpec(entire_dataset=True)
@ -253,9 +253,9 @@ def run_privacy_risk_score_analysis(attack_input: AttackInputData,
input_slice_specs = get_single_slice_specs(slicing_spec, num_classes)
for single_slice_spec in input_slice_specs:
attack_input_slice = get_slice(attack_input, single_slice_spec)
risk_score_results.append(_compute_privacy_risk_score(attack_input_slice))
membership_prob_results.append(_compute_membership_probability(attack_input_slice))
return RiskScoreResults(risk_score_results=risk_score_results)
return MembershipProbabilityResults(membership_prob_results=membership_prob_results)

View file

@ -100,14 +100,14 @@ class RunAttacksTest(absltest.TestCase):
self.assertIsNone(mia._get_accuracy(None, labels))
def test_run_compute_privacy_risk_score_correct_score(self):
result = mia._compute_privacy_risk_score(
def test_run_compute_membership_probability_correct_probs(self):
result = mia._compute_membership_probability(
AttackInputData(
loss_train=np.array([1, 1, 1, 10, 100]),
loss_test=np.array([10, 100, 100, 1000, 10000])))
np.testing.assert_almost_equal(result.train_risk_scores, [1,1,1,0.5,0.33], decimal=2)
np.testing.assert_almost_equal(result.test_risk_scores, [0.5,0.33,0.33,0,0], decimal=2)
np.testing.assert_almost_equal(result.train_membership_probs, [1,1,1,0.5,0.33], decimal=2)
np.testing.assert_almost_equal(result.test_membership_probs, [0.5,0.33,0.33,0,0], decimal=2)
if __name__ == '__main__':