# Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= # -*- coding: utf-8 -*- """Differentially Private Second-Order Methods for Logistic Regression. This script implements several algorithms for DP logistic regression and tests them on various datasets. It produces plots for our upcoming paper. Code exported from Colab. Written by Mahdi Haghifam. """ # pylint: disable=invalid-name # We use upper case to denote matrices and lower case for vectors. # This conflicts with pylint's variable naming rules. # pylint: disable=redefined-outer-name # This is a script hence we have "global" variables. # pylint: disable=unused-argument # The update rule functions are meant to have the same signature, # so cannot just remove arguments. Ideally this should have been # implemented as a class, but much easier to define a function. import math import urllib.request # from keras.utils.np_utils import to_categorical import matplotlib.pyplot as plt import numpy as np from sklearn import preprocessing import sklearn.datasets from sklearn.linear_model import LogisticRegression import tensorflow as tf # from tensorflow import keras class MyLogisticRegression: """Represents a logistic regression problem. There is a dataset consisting of features (vectors of norm <=1) and labels (+1,-1), represented as a numpy array. There is also an L2 regularizer. """ def __init__(self, X, y, reg=1e-8): """Initialize the data and the regularizer. Args: X: n x d numpy array representing features y: n x 1 numpy array representing labels reg: L2 regularizing coefficient (to ensure solution is finite) Data will be rescaled so that ||X[i,:]|| * |y[i]| <= 1 for all i. """ self.reg = float(reg) X = np.array(X) y = np.array(y) assert len(X.shape) == 2 assert len(y.shape) == 1 self.n, self.d = X.shape assert y.shape[0] == self.n signed_data = X * y[:, np.newaxis] norm = np.linalg.norm(signed_data, axis=1) scale = np.maximum(norm, np.ones_like(norm)) self.data = (1 / scale[:, None]) * signed_data def loss(self, w): """Computes the loss represented by this object at w. Args: w: weight vector Returns: If X,y is the data and reg is the regularizer, then the loss is (1/n)sum_i^n log(1+exp(-)) + (reg/2)||w||^2 """ data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, w)))) reg_loss = 0.5 * self.reg * np.linalg.norm(w)**2 return data_loss + reg_loss def loss_wor(self, w): """Computes the loss represented by this object at w without regularizer. Args: w: weight vector Returns: If X,y is the data and reg is the regularizer, then the loss is (1/n)sum_i^n log(1+exp(-)) """ data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, w)))) return data_loss def grad(self, w): """Computes the gradient of the logistic regression at a given point w. Args: w: weight vector Returns: If X,y is the data and reg is the regularizer, then the gradient is (-1/n)sum_i^n X[i,:]*y[i]/(1+exp()) + reg*w """ coeff_grad = -1/(1+np.exp(np.dot(self.data, w))) data_grad = np.mean(self.data * coeff_grad[:, np.newaxis], axis=0) return data_grad + self.reg * w def grad_wor(self, w): """Computes the gradient of the logistic regression at a given point w. Args: w: weight vector Returns: If X,y is the data and reg is the regularizer, then the gradient is (-1/n)sum_i^n X[i,:]*y[i]/(1+exp()) + reg*w """ coeff_grad = -1/(1+np.exp(np.dot(self.data, w))) data_grad = np.mean(self.data * coeff_grad[:, np.newaxis], axis=0) return data_grad def hess(self, w): """Computes the Hessian of the logistic regression at a given point w. Args: w: weight vector Returns: The Hessian is the matrix of second derivatives. If X,y is the data and reg is the regularizer, then the Hessian is (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(/2)*2)^2 where we assume y[i]^2==1. """ a = np.dot(self.data, w)/2 coeff_hess = 1 / (np.exp(a)+np.exp(-a))**2 raw_hess = np.dot(self.data.T * coeff_hess, self.data) return raw_hess/self.n + self.reg * np.eye(self.d) def hess_wor(self, w): """Computes the Hessian of the logistic regression at a given point w. Args: w: weight vector Returns: The Hessian is the matrix of second derivatives. If X,y is the data, then the Hessian is (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(/2)*2)^2 where we assume y[i]^2==1. """ a = np.dot(self.data, w)/2 coeff_hess = 1 / (np.exp(a)+np.exp(-a))**2 raw_hess = np.dot(self.data.T * coeff_hess, self.data) return raw_hess/self.n def upperbound(self, w): """Computes tightest universal quadratic upper bound on the loss function. log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a) Constant and linear terms are just first-order Taylor This function gives the quadratic term (which replaces the Hessian) https://twitter.com/shortstein/status/1557961202256318464 Args: w: weight vector Returns: Matrix H such that for all v loss(v) <= loss(w)+ + /2 """ a = -np.dot(self.data, w) # vector of y_i for i in [n] # v = 0.5*np.tanh(a/2)/a # But avoid 0/0 by special rule v = np.divide( 0.5 * np.tanh(a / 2), a, out=(np.ones(a.shape) * 0.25), where=(np.abs(a) > 1e-9)) H = np.dot(self.data.T * v, self.data) return H / self.n + self.reg * np.eye(self.d) def upperbound_wor(self, w): """Computes tightest quadratic upper bound on the unregularized loss. log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a) Constant and linear terms are just first-order Taylor This function gives the quadratic term (which replaces the Hessian) https://twitter.com/shortstein/status/1557961202256318464 Args: w: weight vector Returns: Matrix H such that for all v loss(v) <= loss(w)+ + /2 """ a = -np.dot(self.data, w) # vector of y_i for i in [n] # v = 0.5*np.tanh(a/2)/a # But avoid 0/0 by special rule v = np.divide( 0.5 * np.tanh(a / 2), a, out=(np.ones(a.shape) * 0.25), where=(np.abs(a) > 1e-9)) H = np.dot(self.data.T * v, self.data) return H / self.n class Mydatasets: """Represents datasets we use for testing the algorithms. """ def __init__(self): pass def find_optimal_classifier(self, dataset, reg=1e-9): """Find the optimal weight vector for the logistic regression. Args: dataset: training dataset reg: regularizer Returns: Optimal weight vector. """ X, y = dataset model_lr = LogisticRegression(max_iter=10000, C=1/reg).fit(X, y) w_opt1 = np.concatenate([model_lr.intercept_, np.squeeze(model_lr.coef_)]) w_opt = newton(dataset, w_opt1) print("optimal weight vector norms", np.linalg.norm(w_opt)) return w_opt def mnist_binary(self): """Download and extract MNIST data. We also select only two labels for the binary classification task. Returns: Features, labels, and optimal weight vector. """ labels = [1, 7] label0, label1 = int(labels[0]), int(labels[1]) mnist = tf.keras.datasets.mnist (x_train, y_train), (_, _) = mnist.load_data() x_train = x_train.reshape(x_train.shape[0], -1) scaler = preprocessing.StandardScaler().fit(x_train) x_train = scaler.transform(x_train) nrm = np.linalg.norm(x_train, axis=1) x_train = x_train * 1/nrm[:, None] y_train = y_train.astype(float) indx0 = np.nonzero(y_train == label0)[0] indx1 = np.nonzero(y_train == label1)[0] y_train[indx0] = -1 y_train[indx1] = 1 indx = np.concatenate((indx0, indx1)) x_train = x_train[indx] labels = y_train[indx] dataset = x_train, labels w_opt = self.find_optimal_classifier(dataset) X = np.hstack((np.ones(shape=(np.shape(x_train)[0], 1)), x_train)) # adding a dummy dimension for the bias term. return X, labels, w_opt def w8a_dataset(self): """w8a dataset for logistic regression. """ num_points = 15e3 w8a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/w8a" data_path = "./w8a" urllib.request.urlretrieve(w8a_url, data_path) X, labels = sklearn.datasets.load_svmlight_file(data_path) X = X.toarray() selected_samples = np.random.choice(len(X), int(num_points)) X = X[selected_samples, :] labels = labels[selected_samples] scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) nrm = np.linalg.norm(X, axis=1) X = X * 1/nrm[:, None] labels = labels.astype(float) dataset = X, labels w_opt = self.find_optimal_classifier(dataset) X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), X)) # adding a dummy dimension for the bias term. return X, labels, w_opt def a1a_dataset(self): """Loads a1a dataset for logistic regression. """ a1a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a1a" data_path = "./a1a" urllib.request.urlretrieve(a1a_url, data_path) X, labels = sklearn.datasets.load_svmlight_file(data_path) X = X.toarray() scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) nrm = np.linalg.norm(X, axis=1) X = X * 1/nrm[:, None] labels = labels.astype(float) dataset = X, labels w_opt = self.find_optimal_classifier(dataset) X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), X)) # adding a dummy dimension for the bias term. return X, labels, w_opt def phishing(self): """phishing dataset for logistic regression. """ phishing_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/phishing" data_path = "./phishing" urllib.request.urlretrieve(phishing_url, data_path) X, labels = sklearn.datasets.load_svmlight_file(data_path) X = X.toarray() scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) nrm = np.linalg.norm(X, axis=1) X = X * 1/nrm[:, None] labels = labels.astype(float) dataset = X, labels w_opt = self.find_optimal_classifier(dataset) X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), X)) # adding a dummy dimension for the bias term. return X, labels, w_opt def a5a_dataset(self): """a5a dataset for logistic regression. """ a5a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a5a" data_path = "./a5a" urllib.request.urlretrieve(a5a_url, data_path) X, labels = sklearn.datasets.load_svmlight_file(data_path) X = X.toarray() scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) nrm = np.linalg.norm(X, axis=1) X = X * 1/nrm[:, None] labels = labels.astype(float) dataset = X, labels w_opt = self.find_optimal_classifier(dataset) X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), X)) # adding a dummy dimension for the bias term. return X, labels, w_opt def a6a_dataset(self): """a6a dataset for logistic regression. """ a6a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a5a" data_path = "./a6a" urllib.request.urlretrieve(a6a_url, data_path) X, labels = sklearn.datasets.load_svmlight_file(data_path) X = X.toarray() scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) nrm = np.linalg.norm(X, axis=1) X = X * 1/nrm[:, None] labels = labels.astype(float) dataset = X, labels w_opt = self.find_optimal_classifier(dataset) X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), X)) # adding a dummy dimension for the bias term. return X, labels, w_opt def madelon(self): """madelon dataset for logistic regression. """ madelon_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/madelon" data_path = "./madelon" urllib.request.urlretrieve(madelon_url, data_path) X, labels = sklearn.datasets.load_svmlight_file(data_path) X = X.toarray() scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) nrm = np.linalg.norm(X, axis=1) X = X * 1/nrm[:, None] labels = labels.astype(float) dataset = X, labels w_opt = self.find_optimal_classifier(dataset) X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), X)) # adding a dummy dimension for the bias term. return X, labels, w_opt def mushroom_dataset(self): """mushroom dataset for logistic regression. """ mushroom_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/mushrooms" data_path = "./mushrooms" urllib.request.urlretrieve(mushroom_url, data_path) X, labels = sklearn.datasets.load_svmlight_file(data_path) X = X.toarray() scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) nrm = np.linalg.norm(X, axis=1) X = X * 1/nrm[:, None] labels = labels.astype(float) dataset = X, labels w_opt = self.find_optimal_classifier(dataset) X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), X)) # adding a dummy dimension for the bias term. return X, labels, w_opt def synthetic_data(self, n=5000, d=50, cov=None, w=None): """Generates a synthetic dataset for logistic regression. Args: n: number of samples d: dimension cov: covariance of the data (optional, default: identity) w: true coefficient vector (optional, default:first standard basis vector) Returns: Synthetic dataset. Features are unit vectors (by default uniformly random). Labels are sampled from logistic distribution, where argument w is the "true" solution. """ mean = np.zeros(d) if cov is None: cov = np.eye(d) X_un = np.random.multivariate_normal(mean, cov, n) nrm = np.linalg.norm(X_un, axis=1) X = X_un * 1/nrm[:, None] if w is None: w = np.ones(d) w[0] = 1 inner_prod = np.dot(X, w) params = np.exp(inner_prod)/(1+np.exp(inner_prod)) labels = 2*np.random.binomial(1, params)-1 dataset = X, labels w_opt = self.find_optimal_classifier(dataset) X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), X)) # adding a dummy dimension for the bias term. return X, labels, w_opt class CompareAlgs: """Class to run multiple iterative algorithms and compare the results.""" def __init__(self, lr, dataset, optimal_w, iters=10, w0=None, pb=None): """Initialize the problem.""" X, _ = dataset self.w_opt = optimal_w n, d = np.shape(X) print("dataset is created: (number of samples, dimension)=" + str(n) + "," + str(d)) if w0 is None: w0_un = np.random.multivariate_normal(np.zeros(d), np.eye(d)) w0 = w0_un/np.linalg.norm(w0_un) self.w0 = w0 # initial iterate self.iters = iters self.pb = pb self.lr = lr self.plots = [] # List of lists of iterates self.names = [] # List of names self.linestyles = [] # List of line styles for plotting self.cutoff = 20 * np.linalg.norm(self.w_opt) + 20 * np.linalg.norm( self.w0) + 10 # how do you set this value? is it problem-specific? def add_plot(self, update_rule, name, linestyle): """Run an iterative update method & add to plot. Args: update_rule: a function that takes 4 arguments: current iterate LogisticRegression problem index of current iterate total number of iterations pb = privacy budget or similar name: string to display in legend linestyle: line style for plot """ baseline = self.lr.loss_wor(self.w_opt) print(name) w = self.w0 plot = [w] for i in range(self.iters): w = update_rule(w, self.lr, i, self.iters, self.pb) if np.linalg.norm(w) > self.cutoff: w = self.w0 # Stop things exploding print("Stop Things Exploding!") plot.append(w) print( str(i) + ": ||grad||=" + str(np.linalg.norm(self.lr.grad_wor(w))) + " ex_loss=" + str(self.lr.loss_wor(w) - baseline)) self.plots.append(plot) self.names.append(name) self.linestyles.append(linestyle) print() def plot_grad_norms(self, legend=True): """Plot gradient norms for each iteration. """ plt.clf() for plot, name, ls in zip(self.plots, self.names, self.linestyles): grad_norms = [np.linalg.norm(self.lr.grad_wor(w)) for w in plot] plt.plot(range(self.iters+1), grad_norms, ls, label=name) plt.yscale("log") ymax = np.linalg.norm(self.lr.grad(self.plots[0][0])) plt.ylim(top=ymax) # plt.ylim((1e-3, 1e-1)) plt.xlabel("Iteration") plt.ylabel("Norm of Gradient") if legend: plt.legend() plt.show() def loss_vals(self): """Outputs the loss vector for different methods. """ baseline = self.lr.loss_wor(self.w_opt) loss_dict = {} for plot, name in zip(self.plots, self.names): losses = [self.lr.loss_wor(w)-baseline for w in plot] loss_dict[name] = [losses] return loss_dict def gradnorm_vals(self): """Outputs the gradient norm for different methods. """ gradnorm_dict = {} for plot, name in zip(self.plots, self.names): grad_norms = [np.linalg.norm(self.lr.grad_wor(w)) for w in plot] gradnorm_dict[name] = [grad_norms] return gradnorm_dict def plot_losses(self): """Plots excess loss for each iteration. output is a dictionary where the keys are name of method and value is the loss for each iteration. """ baseline = self.lr.loss_wor(self.w_opt) plt.clf() for plot, name, ls in zip(self.plots, self.names, self.linestyles): losses = [self.lr.loss_wor(w)-baseline for w in plot] plt.plot(range(self.iters+1), losses, ls, label=name) # plt.yscale('log') plt.xlabel("Iteration") plt.ylabel("Excess Loss") plt.legend() plt.show() def gd_priv(w, lr, i, iters, pb): """Implementation of DP-GD. Args: w: current point lr: logistic regression i: iteration number iters: total number of iterations pb: auxillary information Returns: The next iterate. """ inv_lr_gd = 0.25 # We select the learning rate based on the smoothness sens = 1/(lr.n*(inv_lr_gd+lr.reg)) # Sensitivity rho = pb["total"] / iters # divide total privacy budget up noise = np.random.normal(scale=sens/math.sqrt(2*rho), size=lr.d) return w - lr.grad(w)/(inv_lr_gd+lr.reg) + noise def gd_priv_backtrackingls(w, lr, i, iters, pb): """Implementation of DP-GD with back-tracking line search. !!! this method is not private. We only use it as a baseline. Args: w: current point lr: logistic regression i: iteration number iters: total number of iterations pb: auxillary information Returns: The next iterate """ rho_grad = pb["total"] / iters # divide total privacy budget up grad_scale = (1/lr.n)*math.sqrt(0.5/rho_grad) grad_noise = np.random.normal(scale=grad_scale, size=lr.d) direction = lr.grad(w)+grad_noise stepsize_opt = backtracking_ls(lr, direction, w) return w - stepsize_opt * direction def backtracking_ls(lr, direction, w_0, alpha=0.4, beta=0.95): """Implementation of backtracking line search. Args: lr: logistic regression direction: the "noisy" gradient direction w_0: current point alpha: tradeoff the precision and complexity of the linesearch beta: tradeoff the precision and complexity of the linesearch Returns: A good stepsize """ t = 100 while lr.loss(w_0 - t * direction ) >= lr.loss(w_0) - t * alpha * np.dot(direction, lr.grad(w_0)): t = beta * t if t < 1e-10: break return t def newton(dataset, w_opt2): """Newton update rule. """ X, y = dataset X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), X)) lr = MyLogisticRegression(X, y, reg=1e-9) w_opt = w_opt2 _, d = np.shape(X) w = np.zeros(d) for _ in range(30): H = lr.hess(w) direction = np.linalg.solve(H, lr.grad(w)) step_size = backtracking_ls(lr, direction, w) w = w - step_size * direction if lr.loss_wor(w) < lr.loss_wor(w_opt2): w_opt = w return w_opt def newton_ur(w, lr, i, iters, pb): H = lr.hess(w) direction = np.linalg.solve(H, lr.grad(w)) step_size = backtracking_ls(lr, direction, w) return w - step_size * direction class DoubleNoiseMech: """Our Method: Double Noise.""" def __init__(self, lr, type_reg="add", hyper_tuning=False, curvature_info="hessian", plot_eigen=False): """Initializes the algorithm. Args: lr: logistic regression problem we are solving. type_reg: "add" or "clip" -- how we regularize eigenvalues. hyper_tuning: do we tune the hyperparameters. curvature_info: "hessian" or "ub" -- what quadratic we use. plot_eigen: show eigenvalues for debugging purposes. """ self.type_reg = type_reg self.hyper_tuning = hyper_tuning self.curvature_info = curvature_info self.plot_eigen = plot_eigen if self.curvature_info == "hessian": self.H = lr.hess_wor elif self.curvature_info == "ub": self.H = lr.upperbound_wor def find_opt_reg_wop(self, w, lr, noisy_grad, rho_hess): """Implementation of finding the optimal lambda. Here, we don't pay for privacy of doing it. Args: w: current point lr: logistic regression problem noisy_grad: the gradient estimate rho_hess: the privacy budget Returns: The next iterate. """ increase_factor = 1.5 # at each step we increase the clipping if self.type_reg == "add": lambda_cur = 5e-6 # starting parameter elif self.type_reg == "clip": lambda_cur = 0.25/lr.n + 1e-5 # starting parameter, num_noise_sample = 5 # we want to estimate expected value over the noise grad_norm = np.linalg.norm(noisy_grad) H = self.H(w) best_loss = 1e6 # a large dummy number while lambda_cur <= 0.25: H = self.hess_mod(w, lambda_cur) if self.type_reg == "add": # Sensitivity is different for add vs clip sens2 = grad_norm * 0.25/(lr.n*lambda_cur**2 + 0.25*lambda_cur) elif self.type_reg == "clip": sens2 = grad_norm * 0.25/(lr.n*lambda_cur**2 - 0.25*lambda_cur) loss_ = 0 for _ in range(num_noise_sample): noise2 = np.random.normal(scale=sens2*math.sqrt(.5/rho_hess), size=lr.d) loss_ = loss_ + lr.loss_wor(w - np.linalg.solve(H, noisy_grad) + noise2) if loss_ < best_loss: best_loss = loss_ lambda_star = lambda_cur lambda_cur = lambda_cur * increase_factor return lambda_star def update_rule(self, w, lr, i, iters, pb): """update rule.""" total = pb["total"] grad_frac = pb["grad_frac4"] rho1 = grad_frac * total / iters # divide total privacy budget for gradient rho2 = (1-grad_frac) * total / iters # divide total privacy budget sc1 = (1/lr.n) * math.sqrt(0.5/rho1) noise1 = np.random.normal(scale=sc1, size=lr.d) noisy_grad = lr.grad(w)+noise1 grad_norm = np.linalg.norm(noisy_grad) m = 0.25 # smoothness parameter frac_trace = 0.1 # fraction of privacy budget for estimating the trace. H = self.H(w) if self.plot_eigen: val, _ = np.linalg.eigh(H) hist, bin_edges = np.histogram(val, bins=300, range=(0, 0.01)) cdf_vals = np.cumsum(hist) plt.clf() plt.plot(bin_edges[1:], cdf_vals) plt.show() if self.hyper_tuning: min_eval = self.find_opt_reg_wop(w, lr, noisy_grad, rho2) print("optimized min_eval", min_eval) else: noisy_trace = max( np.trace(H) + np.random.normal( scale=(0.25 / lr.n) * math.sqrt(0.5 / (frac_trace * rho2))), 0) min_eval = (noisy_trace / ((lr.n)**2 * (1 - frac_trace) * rho2))**(1 / 3) + 5e-4 print("approx min_eval ", min_eval) H = self.hess_mod(w, min_eval, lr.reg) if self.type_reg == "add": # Sensitivity is different for add vs clip sens2 = grad_norm * m/(lr.n * min_eval**2 + m * min_eval) elif self.type_reg == "clip": sens2 = grad_norm * m / (lr.n * min_eval**2 - m * min_eval) noise2 = np.random.normal( scale=sens2 * math.sqrt(0.5 / ((1 - frac_trace) * rho2)), size=lr.d) return w - np.linalg.solve(H, noisy_grad) + noise2 def hess_mod(self, w, min_eval, reg=1e-9): if self.type_reg == "clip": evals, evec = np.linalg.eigh(self.H(w)) # true_min = np.min(evals) evals = np.maximum(evals, min_eval*np.ones(evals.shape)) Hclipped = np.dot(evec * evals, evec.T) return Hclipped elif self.type_reg == "add": return self.H(w) + min_eval*np.eye(len(self.H(w))) def helper_fun(datasetname, pb, num_rep=5, Tuning=False, plot_eigen=False): """This function loads the data & runs the algorithms. Args: datasetname: name of the dataset pb: a dictionary with the parameters num_rep: number of times we repeat the optimization for reporting average Tuning: True or False exhustive search for fining the best min eigenval plot_eigen: Show eigenvalues Returns: losses and gradient norms """ datasets = Mydatasets() X, y, w_opt = getattr(datasets, datasetname)() dataset = X, y lr = MyLogisticRegression(X, y, reg=1e-8) dnm_hess_add = DoubleNoiseMech( lr, type_reg="add", hyper_tuning=False, curvature_info="hessian", plot_eigen=plot_eigen).update_rule dnm_ub_add = DoubleNoiseMech( lr, type_reg="add", hyper_tuning=False, curvature_info="ub", plot_eigen=plot_eigen).update_rule dnm_hess_clip = DoubleNoiseMech( lr, type_reg="clip", hyper_tuning=False, curvature_info="hessian", plot_eigen=plot_eigen).update_rule dnm_ub_clip = DoubleNoiseMech( lr, type_reg="clip", hyper_tuning=False, curvature_info="ub", plot_eigen=plot_eigen).update_rule if Tuning: # dnm_hess_add_ht = DoubleNoiseMech(lr,type_reg='add', # hyper_tuning=True,curvature_info='hessian').update_rule # dnm_ub_add_ht = DoubleNoiseMech(lr,type_reg='add', # hyper_tuning=True,curvature_info='ub').update_rule dnm_hess_clip_ht = DoubleNoiseMech( lr, type_reg="clip", hyper_tuning=True, curvature_info="hessian", plot_eigen=plot_eigen).update_rule # dnm_ub_clip_ht = DoubleNoiseMech(lr,type_reg='clip', # hyper_tuning=True,curvature_info='ub').update_rule c = CompareAlgs(lr, dataset, w_opt, iters=10, pb=pb) for rep in range(num_rep): c.add_plot(gd_priv, "DPGD", "y--") c.add_plot(dnm_hess_add, "DN-Hess-add", "k-") c.add_plot(dnm_ub_add, "DN-UB-add", "b-") c.add_plot(dnm_hess_clip, "DN-Hess-clip", "k*-") c.add_plot(dnm_ub_clip, "DN-UB-clip", "b*-") c.add_plot(gd_priv_backtrackingls, "DP-GD-Oracle", "m") if Tuning: c.add_plot(dnm_hess_clip_ht, "DN-Hess-clip-T", "r*-") # c.add_plot(dnm_hess_add_ht,"DN-Hess-add-T",'r-') # c.add_plot(dnm_ub_clip_ht,"DN-UB-clip-T",'g*-') # c.add_plot(dnm_ub_add_ht,"DN-UB-add-T",'g-') losses_dict = c.loss_vals() gradnorm_dict = c.gradnorm_vals() if rep == 0: losses_total = losses_dict gradnorm_total = gradnorm_dict else: for names in losses_total: losses_total[names].extend(losses_dict[names]) gradnorm_total[names].extend(gradnorm_dict[names]) return losses_total, gradnorm_total linestyle = { "DPGD": "y-", "DN-Hess-add": "k+-", "DN-UB-add": "b-", "DN-Hess-clip": "r*-", "DN-UB-clip": "g-", "DP-GD-Oracle": "c-" } facecolor = { "DPGD": "yellow", "DN-Hess-add": "black", "DN-UB-add": "blue", "DN-Hess-clip": "red", "DN-UB-clip": "green", "DP-GD-Oracle": "cyan" } alg_plt = [ "DPGD", "DN-Hess-add", "DN-UB-add", "DN-Hess-clip", "DN-UB-clip", "DP-GD-Oracle" ] # Synthethic Data pb = { "total": 1, # Total privacy budget for zCDP "min_eval4": 5e-3, # Min eigenvalue for clipping "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix sensitivity } num_rep = 30 losses_total, gradnorm_total = helper_fun( "synthetic_data", pb, num_rep=num_rep, Tuning=False) for alg in losses_total: losses = np.array(losses_total[alg]) gradnorm = np.array(gradnorm_total[alg]) loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) loss_std, gradnorm_std = np.std( losses, axis=0) / np.sqrt(num_rep), np.std( gradnorm, axis=0) / np.sqrt(num_rep) print( str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + str(loss_std[-1])) if alg in alg_plt: iters = len(loss_avg) plt.figure(1) plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) plt.fill_between( range(iters), loss_avg - loss_std, loss_avg + loss_std, facecolor=facecolor[alg]) plt.legend() plt.xlabel("Iteration") plt.ylabel("Excess Loss") plt.savefig("synth.pdf") plt.figure(2) plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) plt.yscale("log") plt.legend() plt.xlabel("Iteration") plt.ylabel("Norm of Gradient") plt.savefig("synth-grad.pdf") # a5a dataset pb = { "total": 1, # Total privacy budget for zCDP "min_eval4": 5e-3, # Min eigenvalue for clipping "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix sensitivity } num_rep = 30 losses_total, gradnorm_total = helper_fun( "a5a_dataset", pb, num_rep=num_rep, Tuning=False) for alg in losses_total: losses = np.array(losses_total[alg]) gradnorm = np.array(gradnorm_total[alg]) loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) loss_std, gradnorm_std = np.std( losses, axis=0) / np.sqrt(num_rep), np.std( gradnorm, axis=0) / np.sqrt(num_rep) print( str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + str(loss_std[-1])) if alg in alg_plt: iters = len(loss_avg) plt.figure(3) plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) plt.fill_between( range(iters), loss_avg - loss_std, loss_avg + loss_std, facecolor=facecolor[alg]) plt.legend() plt.xlabel("Iteration") plt.ylabel("Excess Loss") plt.savefig("a5a.pdf") plt.figure(4) plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) plt.yscale("log") plt.legend() plt.xlabel("Iteration") plt.ylabel("Norm of Gradient") plt.savefig("a5a-grad.pdf") # w8a dataset pb = { "total": 1, # Total privacy budget for zCDP "min_eval4": 5e-3, # Min eigenvalue for clipping "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix sensitivity } num_rep = 30 losses_total, gradnorm_total = helper_fun( "w8a_dataset", pb, num_rep=num_rep, Tuning=False) for alg in losses_total: losses = np.array(losses_total[alg]) gradnorm = np.array(gradnorm_total[alg]) loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) loss_std, gradnorm_std = np.std( losses, axis=0) / np.sqrt(num_rep), np.std( gradnorm, axis=0) / np.sqrt(num_rep) print( str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + str(loss_std[-1])) if alg in alg_plt: iters = len(loss_avg) plt.figure(5) plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) plt.fill_between( range(iters), loss_avg - loss_std, loss_avg + loss_std, facecolor=facecolor[alg]) plt.legend() plt.xlabel("Iteration") plt.ylabel("Excess Loss") plt.savefig("w8a.pdf") plt.figure(6) plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) plt.yscale("log") plt.legend() plt.xlabel("Iteration") plt.ylabel("Norm of Gradient") plt.savefig("w8a-grad.pdf") # a1a dataset pb = { "total": 1, # Total privacy budget for zCDP "min_eval4": 5e-3, # Min eigenvalue for clipping "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix } num_rep = 30 losses_total, gradnorm_total = helper_fun( "a1a_dataset", pb, num_rep=num_rep, Tuning=False) for alg in losses_total: losses = np.array(losses_total[alg]) gradnorm = np.array(gradnorm_total[alg]) loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) loss_std, gradnorm_std = np.std( losses, axis=0) / np.sqrt(num_rep), np.std( gradnorm, axis=0) / np.sqrt(num_rep) print( str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + str(loss_std[-1])) if alg in alg_plt: iters = len(loss_avg) plt.figure(7) plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) plt.fill_between( range(iters), loss_avg - loss_std, loss_avg + loss_std, facecolor=facecolor[alg]) plt.legend() plt.xlabel("Iteration") plt.ylabel("Excess Loss") plt.savefig("a1a.pdf") plt.figure(8) plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) plt.yscale("log") plt.legend() plt.xlabel("Iteration") plt.ylabel("Norm of Gradient") plt.savefig("a1a-grad.pdf") # mushroom dataset pb = { "total": 1, # Total privacy budget for zCDP "min_eval4": 5e-3, # Min eigenvalue for clipping "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix } num_rep = 30 losses_total, gradnorm_total = helper_fun( "mushroom_dataset", pb, num_rep=num_rep, Tuning=False) for alg in losses_total: losses = np.array(losses_total[alg]) gradnorm = np.array(gradnorm_total[alg]) loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) loss_std, gradnorm_std = np.std( losses, axis=0) / np.sqrt(num_rep), np.std( gradnorm, axis=0) / np.sqrt(num_rep) print( str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + str(loss_std[-1])) if alg in alg_plt: iters = len(loss_avg) plt.figure(9) plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) plt.fill_between( range(iters), loss_avg - loss_std, loss_avg + loss_std, facecolor=facecolor[alg]) plt.legend() plt.xlabel("Iteration") plt.ylabel("Excess Loss") plt.savefig("mushroom.pdf") plt.figure(10) plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) plt.yscale("log") plt.legend() plt.xlabel("Iteration") plt.ylabel("Norm of Gradient") plt.savefig("mushroom-grad.pdf") # MNIST pb = { "total": 1, # Total privacy budget for zCDP "min_eval4": 5e-3, # Min eigenvalue for clipping "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix } num_rep = 30 losses_total, gradnorm_total = helper_fun( "mnist_binary", pb, num_rep=num_rep, Tuning=False) for alg in losses_total: losses = np.array(losses_total[alg]) gradnorm = np.array(gradnorm_total[alg]) loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) loss_std, gradnorm_std = np.std( losses, axis=0) / np.sqrt(num_rep), np.std( gradnorm, axis=0) / np.sqrt(num_rep) print( str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + str(loss_std[-1])) if alg in alg_plt: iters = len(loss_avg) plt.figure(11) plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) plt.fill_between( range(iters), loss_avg - loss_std, loss_avg + loss_std, facecolor=facecolor[alg]) plt.legend() plt.xlabel("Iteration") plt.ylabel("Excess Loss") plt.savefig("mnist.pdf") plt.figure(12) plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) plt.yscale("log") plt.legend() plt.xlabel("Iteration") plt.ylabel("Norm of Gradient") plt.savefig("mnist-grad.pdf") # Dataset: phishing pb = { "total": 1, # Total privacy budget for zCDP "min_eval4": 5e-3, # Min eigenvalue for clipping "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix } num_rep = 30 losses_total, gradnorm_total = helper_fun( "phishing", pb, num_rep=num_rep, Tuning=False) for alg in losses_total: losses = np.array(losses_total[alg]) gradnorm = np.array(gradnorm_total[alg]) loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) loss_std, gradnorm_std = np.std( losses, axis=0) / np.sqrt(num_rep), np.std( gradnorm, axis=0) / np.sqrt(num_rep) print( str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + str(loss_std[-1])) if alg in alg_plt: iters = len(loss_avg) plt.figure(13) plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) plt.fill_between( range(iters), loss_avg - loss_std, loss_avg + loss_std, facecolor=facecolor[alg]) plt.legend() plt.xlabel("Iteration") plt.ylabel("Excess Loss") plt.savefig("phishing.pdf") plt.figure(14) plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) plt.yscale("log") plt.legend() plt.xlabel("Iteration") plt.ylabel("Norm of Gradient") plt.savefig("phishing-grad.pdf") # Dataset: Madelon # pb = { # "total": 1, # Total privacy budget for zCDP # "min_eval4": 5e-3, # Min eigenvalue for clipping # "grad_frac4": 0.4 # Fraction of privacy budget for gradient vs matrix # } # num_rep = 1 # losses_total,gradnorm_total = helper_fun('madelon',pb,num_rep = num_rep, # Tuning=True,plot_eigen=True) # for alg in losses_total.keys(): # losses = np.array(losses_total[alg]) # gradnorm = np.array(gradnorm_total[alg]) # loss_avg, gradnorm_avg = np.mean(losses,axis=0), np.mean(gradnorm,axis=0) # loss_std, gradnorm_std = np.std(losses,axis=0)/np.sqrt(num_rep), # np.std(gradnorm,axis=0)/np.sqrt(num_rep) # print(str(alg)+ ':' + " ex_loss="+str(loss_avg[-1])+ ', # std='+str(loss_std[-1])) # if alg in alg_plt: # iters = len(loss_avg) # plt.figure(1) # plt.plot(range(iters),loss_avg,linestyle[alg],label=alg) # plt.fill_between(range(iters), loss_avg-loss_std, loss_avg+loss_std, # facecolor=facecolor[alg]) # plt.legend() # plt.xlabel("Iteration") # plt.ylabel("Excess Loss") # plt.savefig('madelon.pdf') # plt.figure(2) # plt.plot(range(iters),gradnorm_avg,linestyle[alg],label=alg) # plt.yscale('log') # plt.legend() # plt.xlabel("Iteration") # plt.ylabel("Norm of Gradient") # plt.savefig('madelon-grad.pdf') # Test) a6a Dataset pb = { "total": 1, # Total privacy budget for zCDP "min_eval4": 5e-3, # Min eigenvalue for clipping "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix } num_rep = 30 losses_total, gradnorm_total = helper_fun( "a6a_dataset", pb, num_rep=num_rep, Tuning=False) for alg in losses_total: losses = np.array(losses_total[alg]) gradnorm = np.array(gradnorm_total[alg]) loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) loss_std, gradnorm_std = np.std( losses, axis=0) / np.sqrt(num_rep), np.std( gradnorm, axis=0) / np.sqrt(num_rep) print( str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + str(loss_std[-1])) if alg in alg_plt: iters = len(loss_avg) plt.figure(15) plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) plt.fill_between( range(iters), loss_avg - loss_std, loss_avg + loss_std, facecolor=facecolor[alg]) plt.legend() plt.xlabel("Iteration") plt.ylabel("Excess Loss") plt.savefig("a6a.pdf") plt.figure(16) plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) plt.yscale("log") plt.legend() plt.xlabel("Iteration") plt.ylabel("Norm of Gradient") plt.savefig("a6a-grad.pdf")