# Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= """file containing all auxillary functions for running the optimization algorithms""" # pylint: skip-file # pyformat: disable import time from my_logistic_regression import MyLogisticRegression import numpy as np class CompareAlgs: """Class to run multiple iterative algorithms and compare the results.""" def __init__(self, lrp, optimal_w, hyper_dict): """Initialize the problem. lr = an instance of MyLogisticRegression dataset = dataset in the format of (features,label) optimal_w = optimal minimizer of logistic loss on dataset without privacy pb = hyperparameters """ self.w_opt = optimal_w self.lrp = lrp self.iters = hyper_dict["num_iteration"] self.hyper_params = hyper_dict self.clock_time = [] self.params = [] self.names = [] def add_algo(self, update_rule, name): """Run an iterative update method""" _, dim = self.lrp.num_samples, self.lrp.dim wint_un = np.random.multivariate_normal(np.zeros(dim), np.eye(dim)) w_int = wint_un / np.linalg.norm(wint_un) cutoff_norm = ( 100 * np.linalg.norm(self.w_opt) + 100 * np.linalg.norm(w_int) + 100 ) w_cur = w_int params = [w_cur] start_t = time.time() wall_clock = [0] for _ in range(self.iters): w_cur = update_rule(w_cur, self.lrp, self.hyper_params) if np.linalg.norm(w_cur) > cutoff_norm: w_cur = w_int print("Stop Things Exploding!") params.append(w_cur) wall_clock.append(time.time() - start_t) self.clock_time.append(wall_clock) self.params.append(params) self.names.append(name) def wall_clock_alg(self): """compute the wall clock of different algorithms""" clock_time_dict = {} for time_alg, name in zip(self.clock_time, self.names): clock_time_dict[name] = [time_alg] return clock_time_dict def loss_vals(self): """output the loss per iteration for different methods""" baseline = self.lrp.loss_wor(self.w_opt) loss_dict = {} for params, name in zip(self.params, self.names): losses = [self.lrp.loss_wor(w) - baseline for w in params] loss_dict[name] = [losses] return loss_dict def accuracy_vals(self): """output the accuracy per iteration for different methods""" acc_dict = {} for params, name in zip(self.params, self.names): acc_vec = [self.lrp.accuracy(w) for w in params] acc_dict[name] = [acc_vec] return acc_dict def accuracy_np(self): """output the accuracy of the optimal model without privacy""" return self.lrp.accuracy(self.w_opt) def gradnorm_vals(self): """output the gradient norm per iteration for different methods""" gradnorm_dict = {} for params, name in zip(self.params, self.names): grad_norms = [np.linalg.norm(self.lrp.grad_wor(w)) for w in params] gradnorm_dict[name] = [grad_norms] return gradnorm_dict def private_newton(w_cur, lrp, hyper_dict): """implementation of private newton method from [ABL21] w = current iterate lr = an instance of MyLogisticRegression i = the index of current iterate iters = total number of iterations pb = privacy budget and other info return the next iterate """ total = hyper_dict["total"] grad_frac = hyper_dict["grad_frac"] iters = hyper_dict["num_iteration"] hess = lrp.hess(w_cur) rho_grad = grad_frac * total / iters # divide total privacy budget up. rho_hess = (1 - grad_frac) * total / iters hess_noise = np.random.normal( scale=(0.25 / lrp.num_samples) * np.sqrt(0.5 / rho_hess), size=(lrp.dim, lrp.dim), ) hess_noise = (hess_noise + hess_noise.T) / 2 hess_noisy = eigenclip(hess + hess_noise) grad = lrp.grad(w_cur) grad_noisy = grad + np.random.normal( scale=(1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad), size=lrp.dim ) dir_noisy = np.linalg.solve(hess_noisy, grad_noisy) dir_size = np.linalg.norm(np.linalg.solve(hess, grad)) return w_cur - min(np.log(1 + dir_size) * (1 / dir_size), 1) * dir_noisy def eigenclip(sym_mat, min_eval=1e-5): """operation of the eigenclip A = symmetric matrix min_eval = minimum eigenvalue for clipping return the modified matrix """ eig_val, eig_vec = np.linalg.eigh(sym_mat) eval_mod = np.maximum(eig_val, min_eval * np.ones(eig_val.shape)) clipped_mat = np.dot(eig_vec * eval_mod, eig_vec.T) return clipped_mat def gd_priv(w_cur, lrp, hyper_dict): """Implementation of DP-GD. w = current point lr = logistic regression i = iteration number pb = auxillary information output is the next iterate """ iters = hyper_dict["num_iteration"] inv_lr_gd = 0.25 # learning rate based on the smoothness sens = 1 / (lrp.num_samples * (inv_lr_gd)) # sensitivity rho = hyper_dict["total"] / iters # divide total privacy budget up noise = np.random.normal(scale=sens / np.sqrt(2 * rho), size=lrp.dim) return w_cur - lrp.grad_wor(w_cur) / (inv_lr_gd) + noise def sgd_priv(w_cur, lrp, hyper_dict): """Implementation of DP-SGD. w = current point lr = logistic regression i = iteration number pb = auxillary information output is the next iterate """ batch_size = hyper_dict["batch_size"] sigma_privacy = hyper_dict["noise_multiplier"] lr_sgd = 4 # learning rate based on the smoothness sample_rate = batch_size / lrp.num_samples # sampling probability sample_vec = np.random.binomial(n=1, p=sample_rate, size=lrp.num_samples) batch_idx = np.where(sample_vec == 1)[0] # index of batch batch_size_t = len(batch_idx) noise = np.random.normal(scale=sigma_privacy, size=lrp.dim) grad_minibatch = lrp.grad_wor( w_cur, batch_idx ) # average gradient over batch_idx return w_cur - lr_sgd * ( batch_size_t / batch_size * grad_minibatch + noise / batch_size ) def gd_priv_optls(w_cur, lrp, hyper_dict): """Implementation of DP-GD with back-tracking line search !!! this method is not private. We only use it as a baseline. w = current point lr = logistic regression i = iteration number pb = auxillary information output is the next iterate """ iters = hyper_dict["num_iteration"] rho_grad = hyper_dict["total"] / iters # divide total privacy budget up grad_scale = (1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad) grad_noise = np.random.normal(scale=grad_scale, size=lrp.dim) dir_srch = lrp.grad(w_cur) + grad_noise stepsize_opt = backtracking_ls(lrp, dir_srch, w_cur) return w_cur - stepsize_opt * dir_srch def backtracking_ls(lrp, dir_srch, w_start, alpha=0.4, beta=0.95): """Implementation of backtracking line search lr = logistic regression dir = the "noisy" gradient direction w_start = current point alpha and beta tradeoff the precision and complexity of the linesearch output is an (close to) optimal stepsize """ step_size = 100 val_0 = lrp.loss(w_start) inner_prod = np.dot(dir_srch, lrp.grad(w_start)) while ( lrp.loss(w_start - step_size * dir_srch) >= val_0 - step_size * alpha * inner_prod ): step_size = beta * step_size if step_size < 1e-6: break return step_size def newton(dataset, w_init, bias=True): """Implementation of the newton method with linesearch without privacy constraints dataset = dataset w_init = initialization point output is the model parameter """ feature_vecs, labels = dataset if bias is True: feature_vecs = np.hstack( (np.ones(shape=(np.shape(feature_vecs)[0], 1)), feature_vecs) ) lrp = MyLogisticRegression(feature_vecs, labels, reg=1e-9) w_cur = w_init for _ in range(8): hess = lrp.hess(w_cur) dir_srch = np.linalg.solve(hess, lrp.grad_wor(w_cur)) step_size = backtracking_ls(lrp, dir_srch, w_cur) w_cur = w_cur - step_size * dir if lrp.loss_wor(w_cur) < lrp.loss_wor(w_init): w_out = w_cur else: w_out = w_init return w_out class DoubleNoiseMech: """Our Method: Double Noise Mechanism""" def __init__(self, lrp, type_reg="add", curvature_info="hessian"): """Initializer of the double noise mechanism lr = an instance of MyLogisticRegression type_reg = minimum eigenvalue modification type, it can be either 'add' or 'clip' curvature_info = type of the second-order information """ self.type_reg = type_reg self.curvature_info = curvature_info if self.curvature_info == "hessian": self.hess = lrp.hess_wor elif self.curvature_info == "ub": self.hess = lrp.upperbound_wor def update_rule(self, w_cur, lrp, hyper_dict): """Implementation of the double noise mechanism update rule--full batch""" noisy_grad_cur = self.noisy_grad(w_cur, lrp, hyper_dict) w_next = self.noisy_direction(w_cur, lrp, hyper_dict, noisy_grad_cur) return w_next def update_rule_stochastic(self, w_cur, lrp, hyper_dict): """Implementation of the double noise mechanism update rule--full batch""" noisy_grad_cur = self.noisy_grad(w_cur, lrp, hyper_dict, True) w_next = self.noisy_direction_stochastic( w_cur, lrp, hyper_dict, noisy_grad_cur ) return w_next def noisy_grad(self, w_cur, lrp, hyper_dict, batch=False): """computing gradient""" if batch is False: rho_grad = (hyper_dict["grad_frac"] * hyper_dict["total"]) / hyper_dict[ "num_iteration" ] noise_grad = np.random.normal( scale=(1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad), size=lrp.dim ) noisy_grad = lrp.grad(w_cur) + noise_grad else: std_grad = hyper_dict["noise_multiplier_grad"] pgrad = hyper_dict["batchsize_grad"] / lrp.num_samples sample_vec = np.random.binomial(n=1, p=pgrad, size=lrp.num_samples) batch_idx_grad = np.where(sample_vec == 1)[0] grad_minibatch = lrp.grad_wor(w_cur, batch_idx_grad) noise_grad = np.random.normal(scale=std_grad, size=lrp.dim) noisy_grad = ( len(batch_idx_grad) / (lrp.num_samples * pgrad) ) * grad_minibatch + (noise_grad) / (lrp.num_samples * pgrad) return noisy_grad def noisy_direction(self, w_cur, lrp, hyper_dict, noisy_grad): """computing direction""" total = hyper_dict["total"] grad_frac = hyper_dict["grad_frac"] frac_trace = hyper_dict["trace_frac"] trace_coeff = hyper_dict["trace_coeff"] iters = hyper_dict["num_iteration"] rho_hess = (1 - grad_frac) * total / iters smooth_param = 0.25 hess_cur = self.hess(w_cur) noisy_trace = trace_coeff * max( np.trace(hess_cur) + np.random.normal( scale=(0.25 / lrp.num_samples) * np.sqrt(0.5 / (frac_trace * rho_hess)) ), 0, ) min_eval = max( (noisy_trace / ((lrp.num_samples) ** 2 * (1 - frac_trace) * rho_hess)) ** (1 / 3), 1 / (lrp.num_samples), ) grad_norm = np.linalg.norm(noisy_grad) if self.type_reg == "add": # Sensitivity is different for add vs clip sens2 = ( grad_norm * smooth_param / (lrp.num_samples * min_eval**2 + smooth_param * min_eval) ) noise2 = np.random.normal( scale=sens2 * np.sqrt(0.5 / ((1 - frac_trace) * rho_hess)), size=lrp.dim, ) return ( w_cur - np.linalg.solve(hess_cur + min_eval * np.eye(lrp.dim), noisy_grad) + noise2 ) # type_reg=clip sens2 = ( grad_norm * smooth_param / (lrp.num_samples * min_eval**2 - smooth_param * min_eval) ) noise2 = np.random.normal( scale=sens2 * np.sqrt(0.5 / ((1 - frac_trace) * rho_hess)), size=lrp.dim ) eval_hess, evec_hess = np.linalg.eigh(hess_cur) eval_trunc = eval_hess[eval_hess >= min_eval] num_eig = len(eval_trunc) if num_eig == 0: hess_modified_inv = 1 / min_eval * np.eye(lrp.dim) else: evec_trun = evec_hess[:, -num_eig:] hess_modified_inv = np.dot( evec_trun * (1 / eval_trunc - 1 / min_eval * np.ones(num_eig)), evec_trun.T, ) + 1 / min_eval * np.eye(lrp.dim) return w_cur - (hess_modified_inv @ noisy_grad) + noise2 def noisy_direction_stochastic(self, w_cur, lrp, hyper_dict, noisy_grad): """noisy direction for stochastic variant""" std_hess = hyper_dict["noise_multiplier_hess"] phess = hyper_dict["batchsize_hess"] / lrp.num_samples min_eval = hyper_dict["min_eval"] sample_vec = np.random.binomial(n=1, p=phess, size=lrp.num_samples) batch_idx_hess = np.where(sample_vec == 1)[0] batch_size_hess_t = len(batch_idx_hess) hess_cur = ( (batch_size_hess_t) / (lrp.num_samples * phess) * self.hess(w_cur, batch_idx_hess) ) smooth_param = 0.25 # smoothness parameter grad_norm = np.linalg.norm(noisy_grad) if self.type_reg == "add": # Sensitivity is different for add vs clip sens2 = ( grad_norm * smooth_param / ( (lrp.num_samples * phess) * min_eval**2 + smooth_param * min_eval ) ) noise2 = np.random.normal(scale=sens2 * std_hess, size=lrp.dim) return ( w_cur - np.linalg.solve( hess_cur + min_eval * np.eye(len(hess_cur)), noisy_grad ) + noise2 ) # type_reg=clip min_eval_c = max(min_eval, 1 / ((lrp.num_samples * phess))) sens2 = ( grad_norm * smooth_param / ( (lrp.num_samples * phess) * min_eval_c**2 - smooth_param * min_eval_c ) ) noise2 = np.random.normal(scale=sens2 * std_hess, size=lrp.dim) eval_hess, evec_hess = np.linalg.eigh(hess_cur) eval_trunc = eval_hess[eval_hess >= min_eval_c] num_eig = len(eval_trunc) if num_eig == 0: hess_modified_inv = 1 / min_eval_c * np.eye(lrp.dim) else: evec_trun = evec_hess[:, -num_eig:] hess_modified_inv = np.dot( evec_trun * (1 / eval_trunc - 1 / min_eval_c * np.ones(num_eig)), evec_trun.T, ) + 1 / min_eval_c * np.eye(lrp.dim) return w_cur - (hess_modified_inv @ noisy_grad) + noise2