O1: allow audit to pull from training set
This commit is contained in:
parent
99ba0b3f6d
commit
1c16496e61
1 changed files with 144 additions and 202 deletions
|
@ -34,118 +34,6 @@ DTYPE = None
|
|||
DATADIR = Path("./data")
|
||||
|
||||
|
||||
def get_k_audit(k, scores, hp):
|
||||
correct = np.sum(~scores[:k]) + np.sum(scores[-k:])
|
||||
|
||||
eps_lb = get_eps_audit(
|
||||
hp['target_points'],
|
||||
2*k,
|
||||
correct,
|
||||
hp['delta'],
|
||||
hp['p_value']
|
||||
)
|
||||
return eps_lb, k, correct, len(scores)
|
||||
|
||||
|
||||
def get_dataloaders(m=1000, train_batch_size=128, test_batch_size=10):
|
||||
seed = np.random.randint(0, 1e9)
|
||||
seed ^= int(time.time())
|
||||
pl.seed_everything(seed)
|
||||
|
||||
train_transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Lambda(lambda x: F.pad(x.unsqueeze(0),
|
||||
(4, 4, 4, 4), mode='reflect').squeeze()),
|
||||
transforms.ToPILImage(),
|
||||
transforms.RandomCrop(32),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
|
||||
])
|
||||
test_transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
|
||||
])
|
||||
train_ds = CIFAR10(root=DATADIR, train=True, download=True, transform=train_transform)
|
||||
test_ds = CIFAR10(root=DATADIR, train=False, download=True, transform=test_transform)
|
||||
|
||||
# Original dataset
|
||||
x = np.stack(train_ds[i][0].numpy() for i in range(len(train_ds))) # Applies transforms
|
||||
y = np.array(train_ds.targets).astype(np.int64)
|
||||
p = np.random.permutation(len(train_ds))
|
||||
|
||||
# Choose m points to randomly exclude at chance
|
||||
S = np.full(len(train_ds), True)
|
||||
S[:m] = np.random.choice([True, False], size=m) # Vector of determining if each point is in or out
|
||||
S = S[p]
|
||||
|
||||
# Store the m points which could have been included/excluded
|
||||
mask = np.full(len(train_ds), False)
|
||||
mask[:m] = True
|
||||
mask = mask[p]
|
||||
|
||||
# Mislabel inclusion/exclusion examples intentionally!
|
||||
for i in range(len(y_m)):
|
||||
possible_values = np.array([x for x in range(10) if x != original_array[i]])
|
||||
y_m[i] = np.random.choice(possible_values)
|
||||
|
||||
x_m = x[mask] # These are the points being guessed at
|
||||
S_m = S[mask] # Ground truth of inclusion/exclusion for x_m
|
||||
|
||||
y_m = np.array(train_ds.targets)[mask].astype(np.int64)
|
||||
|
||||
# Remove excluded points from dataset
|
||||
x_in = x[S]
|
||||
y_in = np.array(train_ds.targets).astype(np.int64)
|
||||
y_in = y_in[S]
|
||||
|
||||
td = TensorDataset(torch.from_numpy(x_in), torch.from_numpy(y_in).long())
|
||||
train_dl = DataLoader(td, batch_size=train_batch_size, shuffle=True, num_workers=4)
|
||||
test_dl = DataLoader(test_ds, batch_size=test_batch_size, shuffle=True, num_workers=4)
|
||||
|
||||
return train_dl, test_dl, x_in, x_m, y_m, S_m
|
||||
|
||||
|
||||
def get_dataloaders2(m=1000, train_batch_size=128, test_batch_size=10):
|
||||
seed = np.random.randint(0, 1e9)
|
||||
seed ^= int(time.time())
|
||||
pl.seed_everything(seed)
|
||||
|
||||
train_transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Lambda(lambda x: F.pad(x.unsqueeze(0),
|
||||
(4, 4, 4, 4), mode='reflect').squeeze()),
|
||||
transforms.ToPILImage(),
|
||||
transforms.RandomCrop(32),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
|
||||
])
|
||||
test_transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
|
||||
])
|
||||
train_ds = CIFAR10(root=DATADIR, train=True, download=True, transform=train_transform)
|
||||
trainp_ds = CIFAR10(root=DATADIR, train=False, download=True, transform=test_transform)
|
||||
test_ds = CIFAR10(root=DATADIR, train=False, download=True, transform=test_transform)
|
||||
|
||||
mask = random.sample(range(len(trainp_ds)), m)
|
||||
S = np.random.choice([True, False], size=m)
|
||||
S_mask = list(map(lambda x: x[1], filter(lambda x: S[x[0]], enumerate(mask))))
|
||||
|
||||
x_adv = Subset(trainp_ds, mask)
|
||||
x_in_adv = Subset(trainp_ds, S_mask)
|
||||
|
||||
train_ds = ConcatDataset([train_ds, x_in_adv])
|
||||
|
||||
check_train_dl = DataLoader(train_ds, batch_size=1, shuffle=False, num_workers=1)
|
||||
train_dl = DataLoader(train_ds, batch_size=train_batch_size, shuffle=True, num_workers=4)
|
||||
x_adv_dl = DataLoader(x_adv, batch_size=1, shuffle=False, num_workers=1)
|
||||
test_dl = DataLoader(test_ds, batch_size=test_batch_size, shuffle=True, num_workers=4)
|
||||
|
||||
return train_dl, test_dl, x_adv_dl, S, check_train_dl
|
||||
|
||||
|
||||
def get_dataloaders3(m=1000, train_batch_size=128, test_batch_size=10):
|
||||
seed = np.random.randint(0, 1e9)
|
||||
seed ^= int(time.time())
|
||||
|
@ -175,10 +63,21 @@ def get_dataloaders3(m=1000, train_batch_size=128, test_batch_size=10):
|
|||
x = np.stack(test_ds[i][0].numpy() for i in range(len(test_ds))) # Applies transforms
|
||||
y = np.array(test_ds.targets).astype(np.int64)
|
||||
|
||||
# Pull points from training set when m > test set
|
||||
if m > len(x):
|
||||
k = m - len(x)
|
||||
mask = np.full(len(x_train), False)
|
||||
mask[:k] = True
|
||||
|
||||
x = np.concatenate([x_train[mask], x])
|
||||
y = np.concatenate([y_train[mask], y])
|
||||
x_train = x_train[~mask]
|
||||
y_train = y_train[~mask]
|
||||
|
||||
# Store the m points which could have been included/excluded
|
||||
mask = np.full(len(test_ds), False)
|
||||
mask = np.full(len(x), False)
|
||||
mask[:m] = True
|
||||
mask = mask[np.random.permutation(len(test_ds))]
|
||||
mask = mask[np.random.permutation(len(x))]
|
||||
|
||||
adv_points = x[mask]
|
||||
adv_labels = y[mask]
|
||||
|
@ -227,9 +126,20 @@ def get_dataloaders_raw(m=1000, train_batch_size=512, test_batch_size=10):
|
|||
train_y = np.array(train_ds.targets)
|
||||
test_y = np.array(test_ds.targets)
|
||||
|
||||
if m > len(test_x):
|
||||
k = m - len(test_x)
|
||||
mask = np.full(len(train_x), False)
|
||||
mask[:k] = True
|
||||
mask = mask[np.random.permutation(len(train_x))]
|
||||
|
||||
test_x = np.concatenate([train_x[mask], test_x])
|
||||
test_y = np.concatenate([train_y[mask], test_y])
|
||||
train_y = train_y[~mask]
|
||||
train_x = train_x[~mask]
|
||||
|
||||
mask = np.full(len(test_x), False)
|
||||
mask[:m] = True
|
||||
mask = mask[np.random.permutation(len(test_ds))]
|
||||
mask = mask[np.random.permutation(len(test_x))]
|
||||
S = np.random.choice([True, False], size=m)
|
||||
|
||||
attack_x = test_x[mask][S]
|
||||
|
@ -267,6 +177,7 @@ def get_dataloaders_raw(m=1000, train_batch_size=512, test_batch_size=10):
|
|||
)
|
||||
return train_dl, test_dl, train_x, attack_x.numpy(), attack_y.numpy(), S
|
||||
|
||||
|
||||
def evaluate_on(model, dataloader):
|
||||
correct = 0
|
||||
total = 0
|
||||
|
@ -339,7 +250,7 @@ def train_knowledge_distillation(teacher, train_dl, epochs, device, learning_rat
|
|||
return student_init, student
|
||||
|
||||
|
||||
def train_no_cap(model, hp, train_dl, test_dl, optimizer, criterion, scheduler):
|
||||
def train_no_cap(model, model_init, hp, train_dl, test_dl, optimizer, criterion, scheduler, adv_points, adv_labels, S):
|
||||
best_test_set_accuracy = 0
|
||||
|
||||
for epoch in range(hp['epochs']):
|
||||
|
@ -365,7 +276,9 @@ def train_no_cap(model, hp, train_dl, test_dl, optimizer, criterion, scheduler):
|
|||
if epoch % 10 == 0 or epoch == hp['epochs'] - 1:
|
||||
correct, total = evaluate_on(model, test_dl)
|
||||
epoch_accuracy = round(100 * correct / total, 2)
|
||||
print(f"Epoch {epoch+1}/{hp['epochs']}: {epoch_accuracy}%")
|
||||
scores = score_model(model_init, model, adv_points, adv_labels, S)
|
||||
audits = audit_model(hp, scores)
|
||||
print(f"Epoch {epoch+1}/{hp['epochs']}: {epoch_accuracy}% | Audit : {audits[2]}/{2*audits[1]}/{audits[3]} | p[ε < {audits[0]}] < {hp['p_value']} @ ε={hp['epsilon']}")
|
||||
|
||||
return best_test_set_accuracy
|
||||
|
||||
|
@ -417,10 +330,9 @@ def load(hp, model_path, train_dl):
|
|||
return model_init, model, adv_points, adv_labels, S
|
||||
|
||||
|
||||
def train_wrn2(hp, train_dl, test_dl):
|
||||
def train_wrn2(hp, train_dl, test_dl, adv_points, adv_labels, S):
|
||||
model = wrn.WideResNet(16, 10, 4)
|
||||
model = model.to(DEVICE)
|
||||
#model = ModuleValidator.fix(model)
|
||||
ModuleValidator.validate(model, strict=True)
|
||||
model_init = copy.deepcopy(model)
|
||||
|
||||
|
@ -461,28 +373,37 @@ def train_wrn2(hp, train_dl, test_dl):
|
|||
) as memory_safe_data_loader:
|
||||
best_test_set_accuracy = train_no_cap(
|
||||
model,
|
||||
model_init,
|
||||
hp,
|
||||
memory_safe_data_loader,
|
||||
test_dl,
|
||||
optimizer,
|
||||
criterion,
|
||||
scheduler,
|
||||
adv_points,
|
||||
adv_labels,
|
||||
S,
|
||||
)
|
||||
else:
|
||||
print("Training without differential privacy")
|
||||
best_test_set_accuracy = train_no_cap(
|
||||
model,
|
||||
model_init,
|
||||
hp,
|
||||
train_dl,
|
||||
test_dl,
|
||||
optimizer,
|
||||
criterion,
|
||||
scheduler,
|
||||
adv_points,
|
||||
adv_labels,
|
||||
S,
|
||||
)
|
||||
|
||||
return model_init, model
|
||||
|
||||
def train_small(hp, train_dl, test_dl):
|
||||
|
||||
def train_small(hp, train_dl, test_dl, adv_points, adv_labels, S):
|
||||
model = student_model.Model(num_classes=10).to(DEVICE)
|
||||
model = model.to(DEVICE)
|
||||
model = ModuleValidator.fix(model)
|
||||
|
@ -522,28 +443,37 @@ def train_small(hp, train_dl, test_dl):
|
|||
) as memory_safe_data_loader:
|
||||
best_test_set_accuracy = train_no_cap(
|
||||
model,
|
||||
model_init,
|
||||
hp,
|
||||
memory_safe_data_loader,
|
||||
test_dl,
|
||||
optimizer,
|
||||
criterion,
|
||||
scheduler,
|
||||
adv_points,
|
||||
adv_labels,
|
||||
S,
|
||||
)
|
||||
else:
|
||||
print("Training without differential privacy")
|
||||
best_test_set_accuracy = train_no_cap(
|
||||
model,
|
||||
model_init,
|
||||
hp,
|
||||
train_dl,
|
||||
test_dl,
|
||||
optimizer,
|
||||
criterion,
|
||||
scheduler,
|
||||
adv_points,
|
||||
adv_labels,
|
||||
S,
|
||||
)
|
||||
|
||||
return model_init, model
|
||||
|
||||
def train_fast(hp, train_dl, test_dl, train_x):
|
||||
|
||||
def train_fast(hp, train_dl, test_dl, train_x, adv_points, adv_labels, S):
|
||||
epochs = hp['epochs']
|
||||
momentum = 0.9
|
||||
weight_decay = 0.256
|
||||
|
@ -558,32 +488,9 @@ def train_fast(hp, train_dl, test_dl, train_x):
|
|||
weights = fast_model.patch_whitening(train_x[:10000, :, 4:-4, 4:-4])
|
||||
model = fast_model.Model(weights, c_in=3, c_out=10, scale_out=0.125)
|
||||
|
||||
#train_model.to(DTYPE)
|
||||
#for module in train_model.modules():
|
||||
# if isinstance(module, nn.BatchNorm2d):
|
||||
# module.float()
|
||||
model.to(DEVICE)
|
||||
init_model = copy.deepcopy(model)
|
||||
|
||||
# weights = [
|
||||
# (w, torch.zeros_like(w))
|
||||
# for w in train_model.parameters()
|
||||
# if w.requires_grad and len(w.shape) > 1
|
||||
# ]
|
||||
# biases = [
|
||||
# (w, torch.zeros_like(w))
|
||||
# for w in train_model.parameters()
|
||||
# if w.requires_grad and len(w.shape) <= 1
|
||||
# ]
|
||||
|
||||
# lr_schedule = torch.cat(
|
||||
# [
|
||||
# torch.linspace(0e0, 2e-3, 194),
|
||||
# torch.linspace(2e-3, 2e-4, 582),
|
||||
# ]
|
||||
# )
|
||||
# lr_schedule_bias = 64.0 * lr_schedule
|
||||
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.SGD(
|
||||
model.parameters(),
|
||||
|
@ -598,13 +505,13 @@ def train_fast(hp, train_dl, test_dl, train_x):
|
|||
gamma=0.2
|
||||
)
|
||||
|
||||
train_no_cap(model, hp, train_dl, test_dl, optimizer, criterion, scheduler)
|
||||
train_no_cap(model, model_init, hp, train_dl, test_dl, optimizer, criterion, scheduler, adv_points, adv_labels, S)
|
||||
return init_model, model
|
||||
|
||||
def train_convnet(hp, train_dl, test_dl):
|
||||
|
||||
def train_convnet(hp, train_dl, test_dl, adv_points, adv_labels, S):
|
||||
model = convnet_classifier.ConvNet()
|
||||
model = model.to(DEVICE)
|
||||
#model = ModuleValidator.fix(model)
|
||||
ModuleValidator.validate(model, strict=True)
|
||||
model_init = copy.deepcopy(model)
|
||||
|
||||
|
@ -636,28 +543,37 @@ def train_convnet(hp, train_dl, test_dl):
|
|||
) as memory_safe_data_loader:
|
||||
best_test_set_accuracy = train_no_cap(
|
||||
model,
|
||||
model_init,
|
||||
hp,
|
||||
memory_safe_data_loader,
|
||||
test_dl,
|
||||
optimizer,
|
||||
criterion,
|
||||
scheduler,
|
||||
adv_points,
|
||||
adv_labels,
|
||||
S,
|
||||
)
|
||||
else:
|
||||
print("Training without differential privacy")
|
||||
best_test_set_accuracy = train_no_cap(
|
||||
model,
|
||||
model_init,
|
||||
hp,
|
||||
train_dl,
|
||||
test_dl,
|
||||
optimizer,
|
||||
criterion,
|
||||
scheduler,
|
||||
adv_points,
|
||||
adv_labels,
|
||||
S,
|
||||
)
|
||||
|
||||
return model_init, model
|
||||
|
||||
def train(hp, train_dl, test_dl):
|
||||
|
||||
def train(hp, train_dl, test_dl, adv_points, adv_labels, S):
|
||||
model = WideResNet(
|
||||
d=hp["wrn_depth"],
|
||||
k=hp["wrn_width"],
|
||||
|
@ -710,28 +626,99 @@ def train(hp, train_dl, test_dl):
|
|||
) as memory_safe_data_loader:
|
||||
best_test_set_accuracy = train_no_cap(
|
||||
model,
|
||||
model_init,
|
||||
hp,
|
||||
memory_safe_data_loader,
|
||||
test_dl,
|
||||
optimizer,
|
||||
criterion,
|
||||
scheduler,
|
||||
adv_points,
|
||||
adv_labels,
|
||||
S,
|
||||
)
|
||||
else:
|
||||
print("Training without differential privacy")
|
||||
best_test_set_accuracy = train_no_cap(
|
||||
model,
|
||||
model_init,
|
||||
hp,
|
||||
train_dl,
|
||||
test_dl,
|
||||
optimizer,
|
||||
criterion,
|
||||
scheduler,
|
||||
adv_points,
|
||||
adv_labels,
|
||||
S,
|
||||
)
|
||||
|
||||
return model_init, model
|
||||
|
||||
|
||||
def get_k_audit(k, scores, hp):
|
||||
correct = np.sum(~scores[:k]) + np.sum(scores[-k:])
|
||||
|
||||
eps_lb = get_eps_audit(
|
||||
hp['target_points'],
|
||||
2*k,
|
||||
correct,
|
||||
hp['delta'],
|
||||
hp['p_value']
|
||||
)
|
||||
return eps_lb, k, correct, len(scores)
|
||||
|
||||
|
||||
def score_model(model_init, model_trained, adv_points, adv_labels, S):
|
||||
scores = list()
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
with torch.no_grad():
|
||||
model_init.eval()
|
||||
x_m = torch.from_numpy(adv_points).to(DEVICE)
|
||||
y_m = torch.from_numpy(adv_labels).long().to(DEVICE)
|
||||
|
||||
for i in range(len(x_m)):
|
||||
x_point = x_m[i].unsqueeze(0).to(DEVICE)
|
||||
y_point = y_m[i].unsqueeze(0).to(DEVICE)
|
||||
is_in = S[i]
|
||||
|
||||
wrn_outputs = model_init(x_point)
|
||||
outputs = wrn_outputs[0] if len(wrn_outputs) == 4 else wrn_outputs
|
||||
init_loss = criterion(outputs, y_point)
|
||||
|
||||
wrn_outputs = model_trained(x_point)
|
||||
outputs = wrn_outputs[0] if len(wrn_outputs) == 4 else wrn_outputs
|
||||
trained_loss = criterion(outputs, y_point)
|
||||
|
||||
scores.append(((init_loss - trained_loss).item(), is_in))
|
||||
|
||||
scores = sorted(scores, key=lambda x: x[0])
|
||||
scores = np.array([x[1] for x in scores])
|
||||
return scores
|
||||
|
||||
|
||||
def audit_model(hp, scores):
|
||||
audits = (0, 0, 0, 0)
|
||||
k_schedule = np.linspace(1, hp['target_points']//2, 40)
|
||||
k_schedule = np.floor(k_schedule).astype(int)
|
||||
|
||||
with ProcessPoolExecutor() as executor:
|
||||
futures = {
|
||||
executor.submit(get_k_audit, k, scores, hp): k for k in k_schedule
|
||||
}
|
||||
|
||||
for future in as_completed(futures):
|
||||
try:
|
||||
eps_lb, k, correct, total = future.result()
|
||||
if eps_lb > audits[0]:
|
||||
audits = (eps_lb, k, correct, total)
|
||||
except Exception as exc:
|
||||
k = futures[future]
|
||||
print(f"'k={k}' generated an exception: {exc}")
|
||||
|
||||
return audits
|
||||
|
||||
|
||||
def main():
|
||||
global DEVICE
|
||||
global DTYPE
|
||||
|
@ -789,29 +776,29 @@ def main():
|
|||
test_dl = None
|
||||
elif args.fast:
|
||||
train_dl, test_dl, train_x, adv_points, adv_labels, S = get_dataloaders_raw(hp['target_points'])
|
||||
model_init, model_trained = train_fast(hp, train_dl, test_dl, train_x)
|
||||
model_init, model_trained = train_fast(hp, train_dl, test_dl, train_x, adv_points, adv_labels, S)
|
||||
else:
|
||||
train_dl, test_dl, pure_train_dl, adv_points, adv_labels, S = get_dataloaders3(hp['target_points'], hp['batch_size'])
|
||||
if args.wrn2:
|
||||
print("=========================")
|
||||
print("Training wrn2 model from meta")
|
||||
print("=========================")
|
||||
model_init, model_trained = train_wrn2(hp, train_dl, test_dl)
|
||||
model_init, model_trained = train_wrn2(hp, train_dl, test_dl, adv_points, adv_labels, S)
|
||||
elif args.convnet:
|
||||
print("=========================")
|
||||
print("Training a simple convnet")
|
||||
print("=========================")
|
||||
model_init, model_trained = train_convnet(hp, train_dl, test_dl)
|
||||
model_init, model_trained = train_convnet(hp, train_dl, test_dl, adv_points, adv_labels, S)
|
||||
elif args.studentraw:
|
||||
print("=========================")
|
||||
print("Training a raw student model")
|
||||
print("=========================")
|
||||
model_init, model_trained = train_small(hp, train_dl, test_dl)
|
||||
model_init, model_trained = train_small(hp, train_dl, test_dl, adv_points, adv_labels, S)
|
||||
elif args.distill:
|
||||
print("=========================")
|
||||
print("Training a distilled student model")
|
||||
print("=========================")
|
||||
teacher_init, teacher_trained = train(hp, train_dl, test_dl)
|
||||
teacher_init, teacher_trained = train(hp, train_dl, test_dl, adv_points, adv_labels, S)
|
||||
model_init, model_trained = train_knowledge_distillation(
|
||||
teacher=teacher_trained,
|
||||
train_dl=train_dl,
|
||||
|
@ -834,56 +821,11 @@ def main():
|
|||
torch.save(model_init.state_dict(), "data/init_model.pt")
|
||||
torch.save(model_trained.state_dict(), "data/trained_model.pt")
|
||||
|
||||
scores = list()
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
with torch.no_grad():
|
||||
model_init.eval()
|
||||
x_m = torch.from_numpy(adv_points).to(DEVICE)
|
||||
y_m = torch.from_numpy(adv_labels).long().to(DEVICE)
|
||||
# scores = score_model(model_init, model_trained, adv_points, adv_labels, S)
|
||||
# audits = audit_model(hp, scores)
|
||||
|
||||
for i in range(len(x_m)):
|
||||
x_point = x_m[i].unsqueeze(0).to(DEVICE)
|
||||
y_point = y_m[i].unsqueeze(0).to(DEVICE)
|
||||
is_in = S[i]
|
||||
|
||||
wrn_outputs = model_init(x_point)
|
||||
outputs = wrn_outputs[0] if len(wrn_outputs) == 4 else wrn_outputs
|
||||
init_loss = criterion(outputs, y_point)
|
||||
|
||||
wrn_outputs = model_trained(x_point)
|
||||
outputs = wrn_outputs[0] if len(wrn_outputs) == 4 else wrn_outputs
|
||||
trained_loss = criterion(outputs, y_point)
|
||||
|
||||
scores.append(((init_loss - trained_loss).item(), is_in))
|
||||
|
||||
print(f"Top 10 unsorted scores: {scores[:10]}")
|
||||
print(f"Btm 10 unsorted scores: {scores[-10:]}")
|
||||
scores = sorted(scores, key=lambda x: x[0])
|
||||
print(f"Top 10 sorted scores: {scores[:10]}")
|
||||
print(f"Btm 10 sorted scores: {scores[-10:]}")
|
||||
scores = np.array([x[1] for x in scores])
|
||||
|
||||
audits = (0, 0, 0, 0)
|
||||
k_schedule = np.linspace(1, hp['target_points']//2, 40)
|
||||
k_schedule = np.floor(k_schedule).astype(int)
|
||||
|
||||
with ProcessPoolExecutor() as executor:
|
||||
futures = {
|
||||
executor.submit(get_k_audit, k, scores, hp): k for k in k_schedule
|
||||
}
|
||||
|
||||
# Iterate through completed futures with a progress bar
|
||||
for future in tqdm(as_completed(futures), total=len(futures)):
|
||||
try:
|
||||
eps_lb, k, correct, total = future.result()
|
||||
if eps_lb > audits[0]:
|
||||
audits = (eps_lb, k, correct, total)
|
||||
except Exception as exc:
|
||||
k = futures[future]
|
||||
print(f"'k={k}' generated an exception: {exc}")
|
||||
|
||||
print(f"Audit total: {audits[2]}/{2*audits[1]}/{audits[3]}")
|
||||
print(f"p[ε < {audits[0]}] < {hp['p_value']} for true epsilon {hp['epsilon']}")
|
||||
# print(f"Audit total: {audits[2]}/{2*audits[1]}/{audits[3]}")
|
||||
# print(f"p[ε < {audits[0]}] < {hp['p_value']} for true epsilon {hp['epsilon']}")
|
||||
|
||||
if test_dl is not None:
|
||||
correct, total = evaluate_on(model_init, test_dl)
|
||||
|
|
Loading…
Reference in a new issue