change risk score to membership probability
This commit is contained in:
parent
b1993344cf
commit
abd8912e6c
4 changed files with 56 additions and 56 deletions
|
@ -445,35 +445,35 @@ class SingleAttackResult:
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class SingleRiskScoreResult:
|
class SingleMembershipProbabilityResult:
|
||||||
"""Results from computing privacy risk scores.
|
"""Results from computing membership probabilities (denoted as privacy risk score in https://arxiv.org/abs/2003.10595).
|
||||||
this part shows how to leverage privacy risk score to perform attacks with thresholding on risk score
|
this part shows how to leverage membership probabilities to perform attacks with thresholding on them.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Data slice this result was calculated for.
|
# Data slice this result was calculated for.
|
||||||
slice_spec: SingleSliceSpec
|
slice_spec: SingleSliceSpec
|
||||||
|
|
||||||
train_risk_scores: np.ndarray
|
train_membership_probs: np.ndarray
|
||||||
|
|
||||||
test_risk_scores: np.ndarray
|
test_membership_probs: np.ndarray
|
||||||
|
|
||||||
def attack_with_varied_thresholds(self, threshold_list):
|
def attack_with_varied_thresholds(self, threshold_list):
|
||||||
""" For each threshold value, we count how many training and test samples with privacy risk scores larger than the threshold
|
""" For each threshold value, we count how many training and test samples with membership probabilities larger than the threshold
|
||||||
and further compute precision and recall values.
|
and further compute precision and recall values.
|
||||||
We skip the threshold value if it is larger than every sample's privacy risk score.
|
We skip the threshold value if it is larger than every sample's membership probability.
|
||||||
"""
|
"""
|
||||||
fpr, tpr, thresholds = metrics.roc_curve(
|
fpr, tpr, thresholds = metrics.roc_curve(
|
||||||
np.concatenate((np.ones(len(self.train_risk_scores)),
|
np.concatenate((np.ones(len(self.train_membership_probs)),
|
||||||
np.zeros(len(self.test_risk_scores)))),
|
np.zeros(len(self.test_membership_probs)))),
|
||||||
np.concatenate((self.train_risk_scores, self.test_risk_scores)),
|
np.concatenate((self.train_membership_probs, self.test_membership_probs)),
|
||||||
drop_intermediate=False)
|
drop_intermediate=False)
|
||||||
|
|
||||||
precision_list = []
|
precision_list = []
|
||||||
recall_list = []
|
recall_list = []
|
||||||
meaningful_threshold_list = []
|
meaningful_threshold_list = []
|
||||||
max_risk_score = max(self.train_risk_scores.max(), self.test_risk_scores.max())
|
max_prob = max(self.train_membership_probs.max(), self.test_membership_probs.max())
|
||||||
for threshold in threshold_list:
|
for threshold in threshold_list:
|
||||||
if threshold <= max_risk_score:
|
if threshold <= max_prob:
|
||||||
idx = np.argwhere(thresholds>=threshold)[-1][0]
|
idx = np.argwhere(thresholds>=threshold)[-1][0]
|
||||||
meaningful_threshold_list.append(threshold)
|
meaningful_threshold_list.append(threshold)
|
||||||
precision_list.append(tpr[idx]/(tpr[idx]+fpr[idx]))
|
precision_list.append(tpr[idx]/(tpr[idx]+fpr[idx]))
|
||||||
|
@ -482,40 +482,40 @@ class SingleRiskScoreResult:
|
||||||
return np.array(meaningful_threshold_list), np.array(precision_list), np.array(recall_list)
|
return np.array(meaningful_threshold_list), np.array(precision_list), np.array(recall_list)
|
||||||
|
|
||||||
def collect_results(self, threshold_list, return_roc_results=True):
|
def collect_results(self, threshold_list, return_roc_results=True):
|
||||||
""" The privacy risk score (from 0 to 1) represents each sample's probability of being in the training set.
|
""" The membership probability (from 0 to 1) represents each sample's probability of being in the training set.
|
||||||
Usually, we choose a list of threshold values from 0.5 (uncertain of training or test) to 1 (100% certain of training)
|
Usually, we choose a list of threshold values from 0.5 (uncertain of training or test) to 1 (100% certain of training)
|
||||||
to compute corresponding attack precision and recall.
|
to compute corresponding attack precision and recall.
|
||||||
"""
|
"""
|
||||||
meaningful_threshold_list, precision_list, recall_list = self.attack_with_varied_thresholds(threshold_list)
|
meaningful_threshold_list, precision_list, recall_list = self.attack_with_varied_thresholds(threshold_list)
|
||||||
summary = []
|
summary = []
|
||||||
summary.append('\nPrivacy risk score analysis over slice: \"%s\"' %
|
summary.append('\nMembership probability analysis over slice: \"%s\"' %
|
||||||
str(self.slice_spec))
|
str(self.slice_spec))
|
||||||
for i in range(len(meaningful_threshold_list)):
|
for i in range(len(meaningful_threshold_list)):
|
||||||
summary.append(' with %.5f as the threshold on privacy risk score, the precision-recall pair is (%.5f, %.5f)' %
|
summary.append(' with %.4f as the threshold on membership probability, the precision-recall pair is (%.4f, %.4f)' %
|
||||||
(meaningful_threshold_list[i], precision_list[i], recall_list[i]))
|
(meaningful_threshold_list[i], precision_list[i], recall_list[i]))
|
||||||
if return_roc_results:
|
if return_roc_results:
|
||||||
fpr, tpr, thresholds = metrics.roc_curve(
|
fpr, tpr, thresholds = metrics.roc_curve(
|
||||||
np.concatenate((np.ones(len(self.train_risk_scores)),
|
np.concatenate((np.ones(len(self.train_membership_probs)),
|
||||||
np.zeros(len(self.test_risk_scores)))),
|
np.zeros(len(self.test_membership_probs)))),
|
||||||
np.concatenate((self.train_risk_scores, self.test_risk_scores)))
|
np.concatenate((self.train_membership_probs, self.test_membership_probs)))
|
||||||
roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)
|
roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)
|
||||||
summary.append(' thresholding on privacy risk score achieved an AUC of %.2f' %(roc_curve.get_auc()))
|
summary.append(' thresholding on membership probability achieved an AUC of %.2f' %(roc_curve.get_auc()))
|
||||||
summary.append(' thresholding on privacy risk score achieved an advantage of %.2f' %(roc_curve.get_attacker_advantage()))
|
summary.append(' thresholding on membership probability achieved an advantage of %.2f' %(roc_curve.get_attacker_advantage()))
|
||||||
return summary
|
return summary
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class RiskScoreResults:
|
class MembershipProbabilityResults:
|
||||||
"""Privacy risk score results from multiple data slices.
|
"""Membership probability results from multiple data slices.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
risk_score_results: Iterable[SingleRiskScoreResult]
|
membership_prob_results: Iterable[SingleMembershipProbabilityResult]
|
||||||
|
|
||||||
def summary(self, threshold_list):
|
def summary(self, threshold_list):
|
||||||
""" return the summary of privacy risk score analysis on all given data slices
|
""" return the summary of membership probability analysis on all given data slices
|
||||||
"""
|
"""
|
||||||
summary = []
|
summary = []
|
||||||
for single_result in self.risk_score_results:
|
for single_result in self.membership_prob_results:
|
||||||
single_summary = single_result.collect_results(threshold_list)
|
single_summary = single_result.collect_results(threshold_list)
|
||||||
summary.extend(single_summary)
|
summary.extend(single_summary)
|
||||||
return '\n'.join(summary)
|
return '\n'.join(summary)
|
||||||
|
|
|
@ -28,7 +28,7 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo
|
||||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata
|
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import PrivacyReportMetadata
|
||||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RocCurve
|
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RocCurve
|
||||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult
|
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult
|
||||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleRiskScoreResult
|
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleMembershipProbabilityResult
|
||||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
|
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
|
||||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature
|
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingFeature
|
||||||
|
|
||||||
|
@ -219,15 +219,15 @@ class SingleAttackResultTest(absltest.TestCase):
|
||||||
self.assertEqual(result.get_attacker_advantage(), 0.0)
|
self.assertEqual(result.get_attacker_advantage(), 0.0)
|
||||||
|
|
||||||
|
|
||||||
class SingleRiskScoreResultTest(absltest.TestCase):
|
class SingleMembershipProbabilityResultTest(absltest.TestCase):
|
||||||
|
|
||||||
# Only a basic test to check the attack by setting a threshold on risk score.
|
# Only a basic test to check the attack by setting a threshold on membership probability.
|
||||||
def test_attack_with_varied_thresholds(self):
|
def test_attack_with_varied_thresholds(self):
|
||||||
|
|
||||||
result = SingleRiskScoreResult(
|
result = SingleMembershipProbabilityResult(
|
||||||
slice_spec=SingleSliceSpec(None),
|
slice_spec=SingleSliceSpec(None),
|
||||||
train_risk_scores=np.array([0.91,1,0.92,0.82,0.75]),
|
train_membership_probs=np.array([0.91,1,0.92,0.82,0.75]),
|
||||||
test_risk_scores=np.array([0.81,0.7,0.75,0.25,0.3]))
|
test_membership_probs=np.array([0.81,0.7,0.75,0.25,0.3]))
|
||||||
|
|
||||||
self.assertEqual(result.attack_with_varied_thresholds(threshold_list=np.array([0.8,0.7]))[1].tolist(), [0.8,0.625])
|
self.assertEqual(result.attack_with_varied_thresholds(threshold_list=np.array([0.8,0.7]))[1].tolist(), [0.8,0.625])
|
||||||
self.assertEqual(result.attack_with_varied_thresholds(threshold_list=np.array([0.8,0.7]))[2].tolist(), [0.8,1])
|
self.assertEqual(result.attack_with_varied_thresholds(threshold_list=np.array([0.8,0.7]))[2].tolist(), [0.8,1])
|
||||||
|
|
|
@ -33,8 +33,8 @@ from tensorflow_privacy.privacy.membership_inference_attack.data_structures impo
|
||||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult
|
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleAttackResult
|
||||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
|
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleSliceSpec
|
||||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
|
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SlicingSpec
|
||||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleRiskScoreResult
|
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import SingleMembershipProbabilityResult
|
||||||
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import RiskScoreResults
|
from tensorflow_privacy.privacy.membership_inference_attack.data_structures import MembershipProbabilityResults
|
||||||
from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_single_slice_specs
|
from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_single_slice_specs
|
||||||
from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_slice
|
from tensorflow_privacy.privacy.membership_inference_attack.dataset_slicing import get_slice
|
||||||
|
|
||||||
|
@ -174,18 +174,18 @@ def run_attacks(attack_input: AttackInputData,
|
||||||
privacy_report_metadata=privacy_report_metadata)
|
privacy_report_metadata=privacy_report_metadata)
|
||||||
|
|
||||||
|
|
||||||
def _compute_privacy_risk_score(attack_input: AttackInputData,
|
def _compute_membership_probability(attack_input: AttackInputData,
|
||||||
num_bins: int = 15) -> SingleRiskScoreResult:
|
num_bins: int = 15) -> SingleMembershipProbabilityResult:
|
||||||
"""Computes each individual point's likelihood of being a member (https://arxiv.org/abs/2003.10595).
|
"""Computes each individual point's likelihood of being a member (denoted as privacy risk score in https://arxiv.org/abs/2003.10595).
|
||||||
For an individual sample, its privacy risk score is computed as the posterior probability of being in the training set
|
For an individual sample, its privacy risk score is computed as the posterior probability of being in the training set
|
||||||
after observing its prediction output by the target machine learning model.
|
after observing its prediction output by the target machine learning model.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
attack_input: input data for compute privacy risk scores
|
attack_input: input data for compute membership probability
|
||||||
num_bins: the number of bins used to compute the training/test histogram
|
num_bins: the number of bins used to compute the training/test histogram
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
privacy risk score results
|
membership probability results
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# If the loss or the entropy is provided, just use it;
|
# If the loss or the entropy is provided, just use it;
|
||||||
|
@ -219,31 +219,31 @@ def _compute_privacy_risk_score(attack_input: AttackInputData,
|
||||||
|
|
||||||
combined_hist = train_hist+test_hist
|
combined_hist = train_hist+test_hist
|
||||||
combined_hist[combined_hist==0] = small_value
|
combined_hist[combined_hist==0] = small_value
|
||||||
privacy_risk_list = train_hist/(combined_hist+0.0)
|
membership_prob_list = train_hist/(combined_hist+0.0)
|
||||||
train_risk_scores = privacy_risk_list[train_hist_indices]
|
train_membership_probs = membership_prob_list[train_hist_indices]
|
||||||
test_risk_scores = privacy_risk_list[test_hist_indices]
|
test_membership_probs = membership_prob_list[test_hist_indices]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return SingleRiskScoreResult(slice_spec=_get_slice_spec(attack_input),
|
return SingleMembershipProbabilityResult(slice_spec=_get_slice_spec(attack_input),
|
||||||
train_risk_scores=train_risk_scores,
|
train_membership_probs=train_membership_probs,
|
||||||
test_risk_scores=test_risk_scores)
|
test_membership_probs=test_membership_probs)
|
||||||
|
|
||||||
|
|
||||||
def run_privacy_risk_score_analysis(attack_input: AttackInputData,
|
def run_membership_probability_analysis(attack_input: AttackInputData,
|
||||||
slicing_spec: SlicingSpec = None) -> RiskScoreResults:
|
slicing_spec: SlicingSpec = None) -> MembershipProbabilityResults:
|
||||||
|
|
||||||
"""Perform privacy risk score analysis on all given slice types
|
"""Perform membership probability analysis on all given slice types
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
attack_input: input data for compute privacy risk scores
|
attack_input: input data for compute membership probabilities
|
||||||
slicing_spec: specifies attack_input slices
|
slicing_spec: specifies attack_input slices
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
the privacy risk score results.
|
the membership probability results.
|
||||||
"""
|
"""
|
||||||
attack_input.validate()
|
attack_input.validate()
|
||||||
risk_score_results = []
|
membership_prob_results = []
|
||||||
|
|
||||||
if slicing_spec is None:
|
if slicing_spec is None:
|
||||||
slicing_spec = SlicingSpec(entire_dataset=True)
|
slicing_spec = SlicingSpec(entire_dataset=True)
|
||||||
|
@ -253,9 +253,9 @@ def run_privacy_risk_score_analysis(attack_input: AttackInputData,
|
||||||
input_slice_specs = get_single_slice_specs(slicing_spec, num_classes)
|
input_slice_specs = get_single_slice_specs(slicing_spec, num_classes)
|
||||||
for single_slice_spec in input_slice_specs:
|
for single_slice_spec in input_slice_specs:
|
||||||
attack_input_slice = get_slice(attack_input, single_slice_spec)
|
attack_input_slice = get_slice(attack_input, single_slice_spec)
|
||||||
risk_score_results.append(_compute_privacy_risk_score(attack_input_slice))
|
membership_prob_results.append(_compute_membership_probability(attack_input_slice))
|
||||||
|
|
||||||
return RiskScoreResults(risk_score_results=risk_score_results)
|
return MembershipProbabilityResults(membership_prob_results=membership_prob_results)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -100,14 +100,14 @@ class RunAttacksTest(absltest.TestCase):
|
||||||
self.assertIsNone(mia._get_accuracy(None, labels))
|
self.assertIsNone(mia._get_accuracy(None, labels))
|
||||||
|
|
||||||
|
|
||||||
def test_run_compute_privacy_risk_score_correct_score(self):
|
def test_run_compute_membership_probability_correct_probs(self):
|
||||||
result = mia._compute_privacy_risk_score(
|
result = mia._compute_membership_probability(
|
||||||
AttackInputData(
|
AttackInputData(
|
||||||
loss_train=np.array([1, 1, 1, 10, 100]),
|
loss_train=np.array([1, 1, 1, 10, 100]),
|
||||||
loss_test=np.array([10, 100, 100, 1000, 10000])))
|
loss_test=np.array([10, 100, 100, 1000, 10000])))
|
||||||
|
|
||||||
np.testing.assert_almost_equal(result.train_risk_scores, [1,1,1,0.5,0.33], decimal=2)
|
np.testing.assert_almost_equal(result.train_membership_probs, [1,1,1,0.5,0.33], decimal=2)
|
||||||
np.testing.assert_almost_equal(result.test_risk_scores, [0.5,0.33,0.33,0,0], decimal=2)
|
np.testing.assert_almost_equal(result.test_membership_probs, [0.5,0.33,0.33,0,0], decimal=2)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in a new issue