124 lines
4.6 KiB
Python
124 lines
4.6 KiB
Python
|
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
# =============================================================================
|
||
|
"""Class for running auditing procedure."""
|
||
|
|
||
|
from __future__ import absolute_import
|
||
|
from __future__ import division
|
||
|
from __future__ import print_function
|
||
|
|
||
|
import numpy as np
|
||
|
from statsmodels.stats import proportion
|
||
|
|
||
|
import attacks
|
||
|
|
||
|
def compute_results(poison_scores, unpois_scores, pois_ct,
|
||
|
alpha=0.05, threshold=None):
|
||
|
"""
|
||
|
Searches over thresholds for the best epsilon lower bound and accuracy.
|
||
|
poison_scores: list of scores from poisoned models
|
||
|
unpois_scores: list of scores from unpoisoned models
|
||
|
pois_ct: number of poison points
|
||
|
alpha: confidence parameter
|
||
|
threshold: if None, search over all thresholds, else use given threshold
|
||
|
"""
|
||
|
if threshold is None: # search for best threshold
|
||
|
all_thresholds = np.unique(poison_scores + unpois_scores)
|
||
|
else:
|
||
|
all_thresholds = [threshold]
|
||
|
|
||
|
poison_arr = np.array(poison_scores)
|
||
|
unpois_arr = np.array(unpois_scores)
|
||
|
|
||
|
best_threshold, best_epsilon, best_acc = None, 0, 0
|
||
|
for thresh in all_thresholds:
|
||
|
epsilon, acc = compute_epsilon_and_acc(poison_arr, unpois_arr, thresh,
|
||
|
alpha, pois_ct)
|
||
|
if epsilon > best_epsilon:
|
||
|
best_epsilon, best_threshold = epsilon, thresh
|
||
|
best_acc = max(best_acc, acc)
|
||
|
return best_threshold, best_epsilon, best_acc
|
||
|
|
||
|
|
||
|
def compute_epsilon_and_acc(poison_arr, unpois_arr, threshold, alpha, pois_ct):
|
||
|
"""For a given threshold, compute epsilon and accuracy."""
|
||
|
poison_ct = (poison_arr > threshold).sum()
|
||
|
unpois_ct = (unpois_arr > threshold).sum()
|
||
|
|
||
|
# clopper_pearson uses alpha/2 budget on upper and lower
|
||
|
# so total budget will be 2*alpha/2 = alpha
|
||
|
p1, _ = proportion.proportion_confint(poison_ct, poison_arr.size,
|
||
|
alpha, method='beta')
|
||
|
_, p0 = proportion.proportion_confint(unpois_ct, unpois_arr.size,
|
||
|
alpha, method='beta')
|
||
|
|
||
|
if (p1 <= 1e-5) or (p0 >= 1 - 1e-5): # divide by zero issues
|
||
|
return 0, 0
|
||
|
|
||
|
if (p0 + p1) > 1: # see Appendix A
|
||
|
p0, p1 = (1-p1), (1-p0)
|
||
|
|
||
|
epsilon = np.log(p1/p0)/pois_ct
|
||
|
acc = (p1 + (1-p0))/2 # this is not necessarily the best accuracy
|
||
|
|
||
|
return epsilon, acc
|
||
|
|
||
|
|
||
|
class AuditAttack(object):
|
||
|
"""Audit attack class. Generates poisoning, then runs auditing algorithm."""
|
||
|
def __init__(self, trn_x, trn_y, train_function):
|
||
|
"""
|
||
|
trn_x: training features
|
||
|
trn_y: training labels
|
||
|
name: identifier for the attack
|
||
|
train_function: function returning membership score
|
||
|
"""
|
||
|
self.trn_x, self.trn_y = trn_x, trn_y
|
||
|
self.train_function = train_function
|
||
|
self.poisoning = None
|
||
|
|
||
|
def make_poisoning(self, pois_ct, attack_type, l2_norm=10):
|
||
|
"""Get poisoning data."""
|
||
|
return attacks.make_many_pois(self.trn_x, self.trn_y, [pois_ct],
|
||
|
attack=attack_type, l2_norm=l2_norm)
|
||
|
|
||
|
def run_experiments(self, num_trials):
|
||
|
"""Uses multiprocessing to run all training experiments."""
|
||
|
(pois_x1, pois_y1), (pois_x2, pois_y2) = self.poisoning['data']
|
||
|
sample_x, sample_y = self.poisoning['pois']
|
||
|
|
||
|
poison_scores = []
|
||
|
unpois_scores = []
|
||
|
|
||
|
for i in range(num_trials):
|
||
|
poison_tuple = (pois_x1, pois_y1, sample_x, sample_y, i)
|
||
|
unpois_tuple = (pois_x2, pois_y2, sample_x, sample_y, num_trials + i)
|
||
|
poison_scores.append(self.train_function(poison_tuple))
|
||
|
unpois_scores.append(self.train_function(unpois_tuple))
|
||
|
|
||
|
return poison_scores, unpois_scores
|
||
|
|
||
|
def run(self, pois_ct, attack_type, num_trials, alpha=0.05,
|
||
|
threshold=None, l2_norm=10):
|
||
|
"""Complete auditing algorithm. Generates poisoning if necessary."""
|
||
|
if self.poisoning is None:
|
||
|
self.poisoning = self.make_poisoning(pois_ct, attack_type, l2_norm)
|
||
|
self.poisoning['data'] = self.poisoning[pois_ct]
|
||
|
|
||
|
poison_scores, unpois_scores = self.run_experiments(num_trials)
|
||
|
|
||
|
results = compute_results(poison_scores, unpois_scores, pois_ct,
|
||
|
alpha=alpha, threshold=threshold)
|
||
|
return results
|