diff --git a/one_run_audit/audit.py b/one_run_audit/audit.py index d10c563..32d935d 100644 --- a/one_run_audit/audit.py +++ b/one_run_audit/audit.py @@ -49,11 +49,11 @@ def get_dataloaders(m=1000, train_batch_size=128, test_batch_size=10): # Original dataset x = np.stack(train_ds[i][0].numpy() for i in range(len(train_ds))) # Applies transforms + p = np.random.permutation(len(train_ds)) # Choose m points to randomly exclude at chance S = np.full(len(train_ds), True) S[:m] = np.random.choice([True, False], size=m) # Vector of determining if each point is in or out - p = np.random.permutation(len(train_ds)) # Store the m points which could have been included/excluded mask = np.full(len(train_ds), False) @@ -62,6 +62,7 @@ def get_dataloaders(m=1000, train_batch_size=128, test_batch_size=10): x_m = x[mask] # These are the points being guessed at y_m = np.array(train_ds.targets)[mask].astype(np.int64) + S_m = S[p][mask] # Ground truth of inclusion/exclusion for x_m # Remove excluded points from dataset x_in = x[S[p]] @@ -72,7 +73,7 @@ def get_dataloaders(m=1000, train_batch_size=128, test_batch_size=10): train_dl = DataLoader(td, batch_size=train_batch_size, shuffle=True, num_workers=4) test_dl = DataLoader(test_ds, batch_size=test_batch_size, shuffle=True, num_workers=4) - return train_dl, test_dl, x_in, x_m, y_m, S[p] + return train_dl, test_dl, x_in, x_m, y_m, S_m def evaluate_on(model, dataloader): @@ -224,7 +225,9 @@ def main(): "delta": 1e-5, "norm": args.norm, "batch_size": 4096, - "epochs": 20, + "epochs": 2, + "k+": 300, + "k-": 300, } hp['logfile'] = Path('WideResNet_{}_{}_{}_{}s_x{}_{}e_{}d_{}C.txt'.format( @@ -238,18 +241,13 @@ def main(): hp['norm'], )) - train_dl, test_dl, x_in, x_m, y_m, S = get_dataloaders(hp['target_points'], hp['batch_size']) + train_dl, test_dl, x_in, x_m, y_m, S_m = get_dataloaders(hp['target_points'], hp['batch_size']) print(f"len train: {len(train_dl)}") - print(f"Got vector S: {S.shape}, sum={np.sum(S)}, S[:{hp['target_points']}] = {S[:8]}") + print(f"Got vector Sm: {S_m.shape}, sum={np.sum(S_m)}") print(f"Got x_in: {x_in.shape}") print(f"Got x_m: {x_m.shape}") print(f"Got y_m: {y_m.shape}") - for x, y in train_dl: - print(f"dl x shape: {x.shape}") - print(f"dl y shape: {y.shape}") - break - model_init, model_trained = train(hp, train_dl, test_dl) # torch.save(model_init.state_dict(), "data/init_model.pt") @@ -265,15 +263,22 @@ def main(): for i in range(len(x_m)): x_point = x_m[i].unsqueeze(0) y_point = y_m[i].unsqueeze(0) + is_in = S_m[i] init_loss = criterion(model_init(x_point)[0], y_point) trained_loss = criterion(model_trained(x_point)[0], y_point) - scores.append(init_loss - trained_loss) + scores.append(((init_loss - trained_loss).item(), is_in)) + + scores = sorted(scores, key=lambda x: x[0]) + scores = np.array([x[1] for x in scores]) - print(len(scores)) print(scores[:10]) + correct = np.sum(~scores[:hp['k-']]) + np.sum(scores[-hp['k+']:]) + total = len(scores) + print(f"Audit total: {correct}/{total} = {round(correct/total*100, 2)}") + correct, total = evaluate_on(model_init, train_dl) print(f"Init model accuracy: {correct}/{total} = {round(correct/total*100, 2)}") correct, total = evaluate_on(model_trained, test_dl)