From bffecb459c36649fffa85d6365629bc80a3cc819 Mon Sep 17 00:00:00 2001 From: Akemi Izuko Date: Sat, 23 Nov 2024 23:19:01 -0700 Subject: [PATCH] Lira: train run shadow models --- cifar10-fast-simple/train.py | 131 ++++++++++++++++++++++++++++++++--- 1 file changed, 122 insertions(+), 9 deletions(-) diff --git a/cifar10-fast-simple/train.py b/cifar10-fast-simple/train.py index 07a3555..abde7a0 100644 --- a/cifar10-fast-simple/train.py +++ b/cifar10-fast-simple/train.py @@ -50,18 +50,124 @@ def eval_model(smodel, device, dtype, data, labels, batch_size): return eval_acc -def run_shadow_model(): - batch_size = 512 - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - dtype = torch.float16 if device.type != "cpu" else torch.float32 - train_data, train_targets, valid_data, valid_targets = load_cifar10(device, dtype) - - smodel = load_model("shadow.pt", device, dtype, train_data) - eval_acc = eval_model(smodel, device, dtype, train_data, train_targets, batch_size) +def run_shadow_model(shadow_path, device, dtype, data, labels, batch_size): + smodel = load_model(shadow_path, device, dtype, data) + eval_acc = eval_model(smodel, device, dtype, data, labels, batch_size) print(f"Evaluation Accuracy: {eval_acc:.4f}") +def train_shadow(shadow_path, train_data, train_targets, valid_data, valid_targets, batch_size): + # Configurable parameters + epochs = 10 + momentum = 0.9 + weight_decay = 0.256 + weight_decay_bias = 0.004 + ema_update_freq = 5 + ema_rho = 0.99**ema_update_freq + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + dtype = torch.float16 if device.type != "cpu" else torch.float32 + + # First, the learning rate rises from 0 to 0.002 for the first 194 batches. + # Next, the learning rate shrinks down to 0.0002 over the next 582 batches. + lr_schedule = torch.cat( + [ + torch.linspace(0e0, 2e-3, 194), + torch.linspace(2e-3, 2e-4, 582), + ] + ) + + lr_schedule_bias = 64.0 * lr_schedule + + torch.backends.cudnn.benchmark = True + + # train_data, train_targets, valid_data, valid_targets = load_cifar10(device, dtype) + + weights = model.patch_whitening(train_data[:10000, :, 4:-4, 4:-4]) + train_model = model.Model(weights, c_in=3, c_out=10, scale_out=0.125) + train_model.to(dtype) + + for module in train_model.modules(): + if isinstance(module, nn.BatchNorm2d): + module.float() + + train_model.to(device) + + # Collect weights and biases and create nesterov velocity values + weights = [ + (w, torch.zeros_like(w)) + for w in train_model.parameters() + if w.requires_grad and len(w.shape) > 1 + ] + biases = [ + (w, torch.zeros_like(w)) + for w in train_model.parameters() + if w.requires_grad and len(w.shape) <= 1 + ] + + # Train and validate + batch_count = 0 + + # Randomly sample half the data per model + nb_rows = train_data.shape[0] + indices = torch.randperm(nb_rows)[: nb_rows // 2] + indices_in = indices[: nb_rows // 2] + train_data = train_data[indices_in] + train_targets = train_targets[indices_in] + + for epoch in range(1, epochs + 1): + # Flush CUDA pipeline for more accurate time measurement + if torch.cuda.is_available(): + torch.cuda.synchronize() + + start_time = time.perf_counter() + + # Randomly shuffle training data + indices = torch.randperm(len(train_data), device=device) + data = train_data[indices] + targets = train_targets[indices] + + # Crop random 32x32 patches from 40x40 training data + data = [ + random_crop(data[i : i + batch_size], crop_size=(32, 32)) + for i in range(0, len(data), batch_size) + ] + data = torch.cat(data) + + # Randomly flip half the training data + data[: len(data) // 2] = torch.flip(data[: len(data) // 2], [-1]) + + for i in range(0, len(data), batch_size): + # discard partial batches + if i + batch_size > len(data): + break + + # Slice batch from data + inputs = data[i : i + batch_size] + target = targets[i : i + batch_size] + batch_count += 1 + + # Compute new gradients + train_model.zero_grad() + train_model.train(True) + + logits = train_model(inputs) + + loss = model.label_smoothing_loss(logits, target, alpha=0.2) + + loss.sum().backward() + + lr_index = min(batch_count, len(lr_schedule) - 1) + lr = lr_schedule[lr_index] + lr_bias = lr_schedule_bias[lr_index] + + # Update weights and biases of training model + update_nesterov(weights, lr, weight_decay, momentum) + update_nesterov(biases, lr_bias, weight_decay_bias, momentum) + + torch.save(train_model.state_dict(), shadow_path) + + def train(seed=0): # Configurable parameters epochs = 10 @@ -338,8 +444,15 @@ def main(): print(f"Max accuracy: {max(accuracies)}") print(f"Mean accuracy: {mean} +- {std}") print() + batch_size = 512 + shadow_path = "shadow.pt" + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + dtype = torch.float16 if device.type != "cpu" else torch.float32 + train_data, train_targets, valid_data, valid_targets = load_cifar10(device, dtype) - run_shadow_model() + train_shadow(shadow_path, train_data, train_targets, valid_data, valid_targets, batch_size) + run_shadow_model(shadow_path, device, dtype, train_data, train_targets, batch_size) + run_shadow_model(shadow_path, device, dtype, valid_data, valid_targets, batch_size) if __name__ == "__main__": main()