diff --git a/research/dp_newton/README.md b/research/dp_newton/README.md new file mode 100644 index 0000000..8239933 --- /dev/null +++ b/research/dp_newton/README.md @@ -0,0 +1,32 @@ +# Project Title + +Faster Differentially Private Convex Optimization via Second-Order Methods +https://arxiv.org/abs/2112.03570
+by Arun Ganesh, Mahdi Haghifam, Thomas Steinke, Abhradeep Thakurta. + +## Description + +Implementation of the optimizatoin algorithms proposed in +https://arxiv.org/abs/2112.03570
+ +## Getting Started + +You will need to install fairly standard dependencies + +run 'run_privacy_utility' to compare the convergence speed and excess loss of +different algorithms. + +### Citation + +You can cite this paper with + +``` +@article{ganesh2023faster, + title={Faster Differentially Private Convex Optimization + via Second-Order Methods}, + author={Ganesh, Arun and Haghifam, Mahdi and Steinke, Thomas + and Thakurta, Abhradeep}, + journal={arXiv preprint arXiv:2305.13209}, + year={2023} +} +``` diff --git a/research/dp_newton/dpso-logistic.py b/research/dp_newton/dpso-logistic.py deleted file mode 100644 index 3e5ac43..0000000 --- a/research/dp_newton/dpso-logistic.py +++ /dev/null @@ -1,1238 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= - -# -*- coding: utf-8 -*- -"""Differentially Private Second-Order Methods for Logistic Regression. - -This script implements several algorithms for DP logistic regression and -tests them on various datasets. It produces plots for our upcoming paper. - -Code exported from Colab. Written by Mahdi Haghifam. - -""" - -# pylint: disable=invalid-name -# We use upper case to denote matrices and lower case for vectors. -# This conflicts with pylint's variable naming rules. -# pylint: disable=redefined-outer-name -# This is a script hence we have "global" variables. -# pylint: disable=unused-argument -# The update rule functions are meant to have the same signature, -# so cannot just remove arguments. Ideally this should have been -# implemented as a class, but much easier to define a function. - -import math -import urllib.request - -# from keras.utils.np_utils import to_categorical -import matplotlib.pyplot as plt -import numpy as np -from sklearn import preprocessing -import sklearn.datasets -from sklearn.linear_model import LogisticRegression -import tensorflow as tf -# from tensorflow import keras - - -class MyLogisticRegression: - """Represents a logistic regression problem. - - There is a dataset consisting of features (vectors of norm <=1) - and labels (+1,-1), represented as a numpy array. - There is also an L2 regularizer. - """ - - def __init__(self, X, y, reg=1e-8): - """Initialize the data and the regularizer. - - Args: - X: n x d numpy array representing features - y: n x 1 numpy array representing labels - reg: L2 regularizing coefficient (to ensure solution is finite) - - Data will be rescaled so that ||X[i,:]|| * |y[i]| <= 1 for all i. - """ - self.reg = float(reg) - X = np.array(X) - y = np.array(y) - assert len(X.shape) == 2 - assert len(y.shape) == 1 - self.n, self.d = X.shape - assert y.shape[0] == self.n - signed_data = X * y[:, np.newaxis] - norm = np.linalg.norm(signed_data, axis=1) - scale = np.maximum(norm, np.ones_like(norm)) - self.data = (1 / scale[:, None]) * signed_data - - def loss(self, w): - """Computes the loss represented by this object at w. - - Args: - w: weight vector - - Returns: - If X,y is the data and reg is the regularizer, then the loss is - (1/n)sum_i^n log(1+exp(-)) + (reg/2)||w||^2 - """ - data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, w)))) - reg_loss = 0.5 * self.reg * np.linalg.norm(w)**2 - return data_loss + reg_loss - - def loss_wor(self, w): - """Computes the loss represented by this object at w without regularizer. - - Args: - w: weight vector - - Returns: - If X,y is the data and reg is the regularizer, then the loss is - (1/n)sum_i^n log(1+exp(-)) - """ - data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, w)))) - return data_loss - - def grad(self, w): - """Computes the gradient of the logistic regression at a given point w. - - Args: - w: weight vector - - Returns: - If X,y is the data and reg is the regularizer, then the gradient is - (-1/n)sum_i^n X[i,:]*y[i]/(1+exp()) + reg*w - """ - coeff_grad = -1/(1+np.exp(np.dot(self.data, w))) - data_grad = np.mean(self.data * coeff_grad[:, np.newaxis], axis=0) - return data_grad + self.reg * w - - def grad_wor(self, w): - """Computes the gradient of the logistic regression at a given point w. - - Args: - w: weight vector - - Returns: - If X,y is the data and reg is the regularizer, then the gradient is - (-1/n)sum_i^n X[i,:]*y[i]/(1+exp()) + reg*w - """ - coeff_grad = -1/(1+np.exp(np.dot(self.data, w))) - data_grad = np.mean(self.data * coeff_grad[:, np.newaxis], axis=0) - return data_grad - - def hess(self, w): - """Computes the Hessian of the logistic regression at a given point w. - - Args: - w: weight vector - - Returns: - The Hessian is the matrix of second derivatives. - If X,y is the data and reg is the regularizer, then the Hessian is - (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(/2)*2)^2 - where we assume y[i]^2==1. - """ - a = np.dot(self.data, w)/2 - coeff_hess = 1 / (np.exp(a)+np.exp(-a))**2 - raw_hess = np.dot(self.data.T * coeff_hess, self.data) - return raw_hess/self.n + self.reg * np.eye(self.d) - - def hess_wor(self, w): - """Computes the Hessian of the logistic regression at a given point w. - - Args: - w: weight vector - - Returns: - The Hessian is the matrix of second derivatives. - If X,y is the data, then the Hessian is - (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(/2)*2)^2 - where we assume y[i]^2==1. - """ - a = np.dot(self.data, w)/2 - coeff_hess = 1 / (np.exp(a)+np.exp(-a))**2 - raw_hess = np.dot(self.data.T * coeff_hess, self.data) - return raw_hess/self.n - - def upperbound(self, w): - """Computes tightest universal quadratic upper bound on the loss function. - - log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a) - Constant and linear terms are just first-order Taylor - This function gives the quadratic term (which replaces the Hessian) - https://twitter.com/shortstein/status/1557961202256318464 - - Args: - w: weight vector - - Returns: - Matrix H such that for all v - loss(v) <= loss(w)+ + /2 - """ - a = -np.dot(self.data, w) # vector of y_i for i in [n] - # v = 0.5*np.tanh(a/2)/a - # But avoid 0/0 by special rule - v = np.divide( - 0.5 * np.tanh(a / 2), - a, - out=(np.ones(a.shape) * 0.25), - where=(np.abs(a) > 1e-9)) - H = np.dot(self.data.T * v, self.data) - return H / self.n + self.reg * np.eye(self.d) - - def upperbound_wor(self, w): - """Computes tightest quadratic upper bound on the unregularized loss. - - log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a) - Constant and linear terms are just first-order Taylor - This function gives the quadratic term (which replaces the Hessian) - https://twitter.com/shortstein/status/1557961202256318464 - - Args: - w: weight vector - - Returns: - Matrix H such that for all v - loss(v) <= loss(w)+ + /2 - """ - a = -np.dot(self.data, w) # vector of y_i for i in [n] - # v = 0.5*np.tanh(a/2)/a - # But avoid 0/0 by special rule - v = np.divide( - 0.5 * np.tanh(a / 2), - a, - out=(np.ones(a.shape) * 0.25), - where=(np.abs(a) > 1e-9)) - H = np.dot(self.data.T * v, self.data) - return H / self.n - - -class Mydatasets: - """Represents datasets we use for testing the algorithms. - """ - - def __init__(self): - pass - - def find_optimal_classifier(self, dataset, reg=1e-9): - """Find the optimal weight vector for the logistic regression. - - Args: - dataset: training dataset - reg: regularizer - - Returns: - Optimal weight vector. - """ - X, y = dataset - model_lr = LogisticRegression(max_iter=10000, C=1/reg).fit(X, y) - w_opt1 = np.concatenate([model_lr.intercept_, np.squeeze(model_lr.coef_)]) - w_opt = newton(dataset, w_opt1) - print("optimal weight vector norms", np.linalg.norm(w_opt)) - return w_opt - - def mnist_binary(self): - """Download and extract MNIST data. - - We also select only two labels for the binary classification task. - - Returns: - Features, labels, and optimal weight vector. - """ - labels = [1, 7] - label0, label1 = int(labels[0]), int(labels[1]) - mnist = tf.keras.datasets.mnist - (x_train, y_train), (_, _) = mnist.load_data() - x_train = x_train.reshape(x_train.shape[0], -1) - scaler = preprocessing.StandardScaler().fit(x_train) - x_train = scaler.transform(x_train) - nrm = np.linalg.norm(x_train, axis=1) - x_train = x_train * 1/nrm[:, None] - y_train = y_train.astype(float) - indx0 = np.nonzero(y_train == label0)[0] - indx1 = np.nonzero(y_train == label1)[0] - y_train[indx0] = -1 - y_train[indx1] = 1 - indx = np.concatenate((indx0, indx1)) - x_train = x_train[indx] - labels = y_train[indx] - dataset = x_train, labels - w_opt = self.find_optimal_classifier(dataset) - X = np.hstack((np.ones(shape=(np.shape(x_train)[0], 1)), - x_train)) # adding a dummy dimension for the bias term. - return X, labels, w_opt - - def w8a_dataset(self): - """w8a dataset for logistic regression. - """ - num_points = 15e3 - w8a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/w8a" - data_path = "./w8a" - urllib.request.urlretrieve(w8a_url, data_path) - X, labels = sklearn.datasets.load_svmlight_file(data_path) - X = X.toarray() - selected_samples = np.random.choice(len(X), int(num_points)) - X = X[selected_samples, :] - labels = labels[selected_samples] - scaler = preprocessing.StandardScaler().fit(X) - X = scaler.transform(X) - nrm = np.linalg.norm(X, axis=1) - X = X * 1/nrm[:, None] - labels = labels.astype(float) - dataset = X, labels - w_opt = self.find_optimal_classifier(dataset) - X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), - X)) # adding a dummy dimension for the bias term. - return X, labels, w_opt - - def a1a_dataset(self): - """Loads a1a dataset for logistic regression. - """ - a1a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a1a" - data_path = "./a1a" - urllib.request.urlretrieve(a1a_url, data_path) - X, labels = sklearn.datasets.load_svmlight_file(data_path) - X = X.toarray() - scaler = preprocessing.StandardScaler().fit(X) - X = scaler.transform(X) - nrm = np.linalg.norm(X, axis=1) - X = X * 1/nrm[:, None] - labels = labels.astype(float) - dataset = X, labels - w_opt = self.find_optimal_classifier(dataset) - X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), - X)) # adding a dummy dimension for the bias term. - return X, labels, w_opt - - def phishing(self): - """phishing dataset for logistic regression. - """ - phishing_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/phishing" - data_path = "./phishing" - urllib.request.urlretrieve(phishing_url, data_path) - X, labels = sklearn.datasets.load_svmlight_file(data_path) - X = X.toarray() - scaler = preprocessing.StandardScaler().fit(X) - X = scaler.transform(X) - nrm = np.linalg.norm(X, axis=1) - X = X * 1/nrm[:, None] - labels = labels.astype(float) - dataset = X, labels - w_opt = self.find_optimal_classifier(dataset) - X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), - X)) # adding a dummy dimension for the bias term. - return X, labels, w_opt - - def a5a_dataset(self): - """a5a dataset for logistic regression. - """ - a5a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a5a" - data_path = "./a5a" - urllib.request.urlretrieve(a5a_url, data_path) - X, labels = sklearn.datasets.load_svmlight_file(data_path) - X = X.toarray() - scaler = preprocessing.StandardScaler().fit(X) - X = scaler.transform(X) - nrm = np.linalg.norm(X, axis=1) - X = X * 1/nrm[:, None] - labels = labels.astype(float) - dataset = X, labels - w_opt = self.find_optimal_classifier(dataset) - X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), - X)) # adding a dummy dimension for the bias term. - return X, labels, w_opt - - def a6a_dataset(self): - """a6a dataset for logistic regression. - """ - a6a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a5a" - data_path = "./a6a" - urllib.request.urlretrieve(a6a_url, data_path) - X, labels = sklearn.datasets.load_svmlight_file(data_path) - X = X.toarray() - scaler = preprocessing.StandardScaler().fit(X) - X = scaler.transform(X) - nrm = np.linalg.norm(X, axis=1) - X = X * 1/nrm[:, None] - labels = labels.astype(float) - dataset = X, labels - w_opt = self.find_optimal_classifier(dataset) - X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), - X)) # adding a dummy dimension for the bias term. - return X, labels, w_opt - - def madelon(self): - """madelon dataset for logistic regression. - """ - madelon_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/madelon" - data_path = "./madelon" - urllib.request.urlretrieve(madelon_url, data_path) - X, labels = sklearn.datasets.load_svmlight_file(data_path) - X = X.toarray() - scaler = preprocessing.StandardScaler().fit(X) - X = scaler.transform(X) - nrm = np.linalg.norm(X, axis=1) - X = X * 1/nrm[:, None] - labels = labels.astype(float) - dataset = X, labels - w_opt = self.find_optimal_classifier(dataset) - X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), - X)) # adding a dummy dimension for the bias term. - return X, labels, w_opt - - def mushroom_dataset(self): - """mushroom dataset for logistic regression. - """ - mushroom_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/mushrooms" - data_path = "./mushrooms" - urllib.request.urlretrieve(mushroom_url, data_path) - X, labels = sklearn.datasets.load_svmlight_file(data_path) - X = X.toarray() - scaler = preprocessing.StandardScaler().fit(X) - X = scaler.transform(X) - nrm = np.linalg.norm(X, axis=1) - X = X * 1/nrm[:, None] - labels = labels.astype(float) - dataset = X, labels - w_opt = self.find_optimal_classifier(dataset) - X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), - X)) # adding a dummy dimension for the bias term. - return X, labels, w_opt - - def synthetic_data(self, n=5000, d=50, cov=None, w=None): - """Generates a synthetic dataset for logistic regression. - - Args: - n: number of samples - d: dimension - cov: covariance of the data (optional, default: identity) - w: true coefficient vector (optional, default:first standard basis vector) - - Returns: - Synthetic dataset. - Features are unit vectors (by default uniformly random). - Labels are sampled from logistic distribution, - where argument w is the "true" solution. - """ - mean = np.zeros(d) - if cov is None: - cov = np.eye(d) - X_un = np.random.multivariate_normal(mean, cov, n) - nrm = np.linalg.norm(X_un, axis=1) - X = X_un * 1/nrm[:, None] - if w is None: - w = np.ones(d) - w[0] = 1 - inner_prod = np.dot(X, w) - params = np.exp(inner_prod)/(1+np.exp(inner_prod)) - labels = 2*np.random.binomial(1, params)-1 - dataset = X, labels - w_opt = self.find_optimal_classifier(dataset) - X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), - X)) # adding a dummy dimension for the bias term. - return X, labels, w_opt - - -class CompareAlgs: - """Class to run multiple iterative algorithms and compare the results.""" - - def __init__(self, - lr, - dataset, - optimal_w, - iters=10, - w0=None, - pb=None): - """Initialize the problem.""" - X, _ = dataset - self.w_opt = optimal_w - n, d = np.shape(X) - print("dataset is created: (number of samples, dimension)=" + str(n) + "," + - str(d)) - - if w0 is None: - w0_un = np.random.multivariate_normal(np.zeros(d), np.eye(d)) - w0 = w0_un/np.linalg.norm(w0_un) - self.w0 = w0 # initial iterate - self.iters = iters - self.pb = pb - self.lr = lr - self.plots = [] # List of lists of iterates - self.names = [] # List of names - self.linestyles = [] # List of line styles for plotting - self.cutoff = 20 * np.linalg.norm(self.w_opt) + 20 * np.linalg.norm( - self.w0) + 10 # how do you set this value? is it problem-specific? - - def add_plot(self, update_rule, name, linestyle): - """Run an iterative update method & add to plot. - - Args: - update_rule: a function that takes 4 arguments: - current iterate - LogisticRegression problem - index of current iterate - total number of iterations - pb = privacy budget or similar - name: string to display in legend - linestyle: line style for plot - """ - baseline = self.lr.loss_wor(self.w_opt) - print(name) - w = self.w0 - plot = [w] - for i in range(self.iters): - w = update_rule(w, self.lr, i, self.iters, self.pb) - if np.linalg.norm(w) > self.cutoff: - w = self.w0 # Stop things exploding - print("Stop Things Exploding!") - plot.append(w) - print( - str(i) + ": ||grad||=" + str(np.linalg.norm(self.lr.grad_wor(w))) + - " ex_loss=" + str(self.lr.loss_wor(w) - baseline)) - self.plots.append(plot) - self.names.append(name) - self.linestyles.append(linestyle) - print() - - def plot_grad_norms(self, legend=True): - """Plot gradient norms for each iteration. - """ - plt.clf() - for plot, name, ls in zip(self.plots, self.names, self.linestyles): - grad_norms = [np.linalg.norm(self.lr.grad_wor(w)) for w in plot] - plt.plot(range(self.iters+1), grad_norms, ls, label=name) - plt.yscale("log") - ymax = np.linalg.norm(self.lr.grad(self.plots[0][0])) - plt.ylim(top=ymax) - # plt.ylim((1e-3, 1e-1)) - plt.xlabel("Iteration") - plt.ylabel("Norm of Gradient") - if legend: plt.legend() - plt.show() - - def loss_vals(self): - """Outputs the loss vector for different methods. - """ - baseline = self.lr.loss_wor(self.w_opt) - loss_dict = {} - for plot, name in zip(self.plots, self.names): - losses = [self.lr.loss_wor(w)-baseline for w in plot] - loss_dict[name] = [losses] - return loss_dict - - def gradnorm_vals(self): - """Outputs the gradient norm for different methods. - """ - gradnorm_dict = {} - for plot, name in zip(self.plots, self.names): - grad_norms = [np.linalg.norm(self.lr.grad_wor(w)) for w in plot] - gradnorm_dict[name] = [grad_norms] - return gradnorm_dict - - def plot_losses(self): - """Plots excess loss for each iteration. - - output is a dictionary where the keys are name of method and value is - the loss for each iteration. - """ - baseline = self.lr.loss_wor(self.w_opt) - plt.clf() - for plot, name, ls in zip(self.plots, self.names, self.linestyles): - losses = [self.lr.loss_wor(w)-baseline for w in plot] - plt.plot(range(self.iters+1), losses, ls, label=name) - # plt.yscale('log') - plt.xlabel("Iteration") - plt.ylabel("Excess Loss") - plt.legend() - plt.show() - - -def gd_priv(w, lr, i, iters, pb): - """Implementation of DP-GD. - - Args: - w: current point - lr: logistic regression - i: iteration number - iters: total number of iterations - pb: auxillary information - - Returns: - The next iterate. - """ - inv_lr_gd = 0.25 # We select the learning rate based on the smoothness - sens = 1/(lr.n*(inv_lr_gd+lr.reg)) # Sensitivity - rho = pb["total"] / iters # divide total privacy budget up - noise = np.random.normal(scale=sens/math.sqrt(2*rho), size=lr.d) - return w - lr.grad(w)/(inv_lr_gd+lr.reg) + noise - - -def gd_priv_backtrackingls(w, lr, i, iters, pb): - """Implementation of DP-GD with back-tracking line search. - - !!! this method is not private. We only use it as a baseline. - - Args: - w: current point - lr: logistic regression - i: iteration number - iters: total number of iterations - pb: auxillary information - - Returns: - The next iterate - """ - rho_grad = pb["total"] / iters # divide total privacy budget up - grad_scale = (1/lr.n)*math.sqrt(0.5/rho_grad) - grad_noise = np.random.normal(scale=grad_scale, size=lr.d) - direction = lr.grad(w)+grad_noise - stepsize_opt = backtracking_ls(lr, direction, w) - return w - stepsize_opt * direction - - -def backtracking_ls(lr, direction, w_0, alpha=0.4, beta=0.95): - """Implementation of backtracking line search. - - Args: - lr: logistic regression - direction: the "noisy" gradient direction - w_0: current point - alpha: tradeoff the precision and complexity of the linesearch - beta: tradeoff the precision and complexity of the linesearch - - Returns: - A good stepsize - """ - t = 100 - while lr.loss(w_0 - t * direction - ) >= lr.loss(w_0) - t * alpha * np.dot(direction, lr.grad(w_0)): - t = beta * t - if t < 1e-10: - break - return t - - -def newton(dataset, w_opt2): - """Newton update rule. - """ - X, y = dataset - X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), X)) - lr = MyLogisticRegression(X, y, reg=1e-9) - w_opt = w_opt2 - _, d = np.shape(X) - w = np.zeros(d) - for _ in range(30): - H = lr.hess(w) - direction = np.linalg.solve(H, lr.grad(w)) - step_size = backtracking_ls(lr, direction, w) - w = w - step_size * direction - if lr.loss_wor(w) < lr.loss_wor(w_opt2): - w_opt = w - return w_opt - - -def newton_ur(w, lr, i, iters, pb): - H = lr.hess(w) - direction = np.linalg.solve(H, lr.grad(w)) - step_size = backtracking_ls(lr, direction, w) - return w - step_size * direction - - -class DoubleNoiseMech: - """Our Method: Double Noise.""" - - def __init__(self, - lr, - type_reg="add", - hyper_tuning=False, - curvature_info="hessian", - plot_eigen=False): - """Initializes the algorithm. - - Args: - lr: logistic regression problem we are solving. - type_reg: "add" or "clip" -- how we regularize eigenvalues. - hyper_tuning: do we tune the hyperparameters. - curvature_info: "hessian" or "ub" -- what quadratic we use. - plot_eigen: show eigenvalues for debugging purposes. - - """ - self.type_reg = type_reg - self.hyper_tuning = hyper_tuning - self.curvature_info = curvature_info - self.plot_eigen = plot_eigen - if self.curvature_info == "hessian": - self.H = lr.hess_wor - elif self.curvature_info == "ub": - self.H = lr.upperbound_wor - - def find_opt_reg_wop(self, w, lr, noisy_grad, rho_hess): - """Implementation of finding the optimal lambda. - - Here, we don't pay for privacy of doing it. - - Args: - w: current point - lr: logistic regression problem - noisy_grad: the gradient estimate - rho_hess: the privacy budget - - Returns: - The next iterate. - """ - increase_factor = 1.5 # at each step we increase the clipping - if self.type_reg == "add": - lambda_cur = 5e-6 # starting parameter - elif self.type_reg == "clip": - lambda_cur = 0.25/lr.n + 1e-5 # starting parameter, - num_noise_sample = 5 # we want to estimate expected value over the noise - grad_norm = np.linalg.norm(noisy_grad) - H = self.H(w) - best_loss = 1e6 # a large dummy number - while lambda_cur <= 0.25: - H = self.hess_mod(w, lambda_cur) - if self.type_reg == "add": # Sensitivity is different for add vs clip - sens2 = grad_norm * 0.25/(lr.n*lambda_cur**2 + 0.25*lambda_cur) - elif self.type_reg == "clip": - sens2 = grad_norm * 0.25/(lr.n*lambda_cur**2 - 0.25*lambda_cur) - loss_ = 0 - for _ in range(num_noise_sample): - noise2 = np.random.normal(scale=sens2*math.sqrt(.5/rho_hess), size=lr.d) - loss_ = loss_ + lr.loss_wor(w - np.linalg.solve(H, noisy_grad) + noise2) - if loss_ < best_loss: - best_loss = loss_ - lambda_star = lambda_cur - lambda_cur = lambda_cur * increase_factor - return lambda_star - - def update_rule(self, w, lr, i, iters, pb): - """update rule.""" - total = pb["total"] - grad_frac = pb["grad_frac4"] - rho1 = grad_frac * total / iters # divide total privacy budget for gradient - rho2 = (1-grad_frac) * total / iters # divide total privacy budget - sc1 = (1/lr.n) * math.sqrt(0.5/rho1) - noise1 = np.random.normal(scale=sc1, size=lr.d) - noisy_grad = lr.grad(w)+noise1 - grad_norm = np.linalg.norm(noisy_grad) - m = 0.25 # smoothness parameter - frac_trace = 0.1 # fraction of privacy budget for estimating the trace. - H = self.H(w) - if self.plot_eigen: - val, _ = np.linalg.eigh(H) - hist, bin_edges = np.histogram(val, bins=300, range=(0, 0.01)) - cdf_vals = np.cumsum(hist) - plt.clf() - plt.plot(bin_edges[1:], cdf_vals) - plt.show() - if self.hyper_tuning: - min_eval = self.find_opt_reg_wop(w, lr, noisy_grad, rho2) - print("optimized min_eval", min_eval) - else: - noisy_trace = max( - np.trace(H) + np.random.normal( - scale=(0.25 / lr.n) * math.sqrt(0.5 / (frac_trace * rho2))), 0) - min_eval = (noisy_trace / ((lr.n)**2 * - (1 - frac_trace) * rho2))**(1 / 3) + 5e-4 - print("approx min_eval ", min_eval) - - H = self.hess_mod(w, min_eval, lr.reg) - if self.type_reg == "add": # Sensitivity is different for add vs clip - sens2 = grad_norm * m/(lr.n * min_eval**2 + m * min_eval) - elif self.type_reg == "clip": - sens2 = grad_norm * m / (lr.n * min_eval**2 - m * min_eval) - noise2 = np.random.normal( - scale=sens2 * math.sqrt(0.5 / ((1 - frac_trace) * rho2)), size=lr.d) - return w - np.linalg.solve(H, noisy_grad) + noise2 - - def hess_mod(self, w, min_eval, reg=1e-9): - if self.type_reg == "clip": - evals, evec = np.linalg.eigh(self.H(w)) - # true_min = np.min(evals) - evals = np.maximum(evals, min_eval*np.ones(evals.shape)) - Hclipped = np.dot(evec * evals, evec.T) - return Hclipped - elif self.type_reg == "add": - return self.H(w) + min_eval*np.eye(len(self.H(w))) - - -def helper_fun(datasetname, pb, num_rep=5, Tuning=False, plot_eigen=False): - """This function loads the data & runs the algorithms. - - Args: - datasetname: name of the dataset - pb: a dictionary with the parameters - num_rep: number of times we repeat the optimization for reporting average - Tuning: True or False exhustive search for fining the best min eigenval - plot_eigen: Show eigenvalues - - Returns: - losses and gradient norms - """ - datasets = Mydatasets() - X, y, w_opt = getattr(datasets, datasetname)() - dataset = X, y - lr = MyLogisticRegression(X, y, reg=1e-8) - dnm_hess_add = DoubleNoiseMech( - lr, - type_reg="add", - hyper_tuning=False, - curvature_info="hessian", - plot_eigen=plot_eigen).update_rule - dnm_ub_add = DoubleNoiseMech( - lr, - type_reg="add", - hyper_tuning=False, - curvature_info="ub", - plot_eigen=plot_eigen).update_rule - dnm_hess_clip = DoubleNoiseMech( - lr, - type_reg="clip", - hyper_tuning=False, - curvature_info="hessian", - plot_eigen=plot_eigen).update_rule - dnm_ub_clip = DoubleNoiseMech( - lr, - type_reg="clip", - hyper_tuning=False, - curvature_info="ub", - plot_eigen=plot_eigen).update_rule - if Tuning: - # dnm_hess_add_ht = DoubleNoiseMech(lr,type_reg='add', - # hyper_tuning=True,curvature_info='hessian').update_rule - # dnm_ub_add_ht = DoubleNoiseMech(lr,type_reg='add', - # hyper_tuning=True,curvature_info='ub').update_rule - dnm_hess_clip_ht = DoubleNoiseMech( - lr, - type_reg="clip", - hyper_tuning=True, - curvature_info="hessian", - plot_eigen=plot_eigen).update_rule - # dnm_ub_clip_ht = DoubleNoiseMech(lr,type_reg='clip', - # hyper_tuning=True,curvature_info='ub').update_rule - c = CompareAlgs(lr, dataset, w_opt, iters=10, pb=pb) - for rep in range(num_rep): - c.add_plot(gd_priv, "DPGD", "y--") - c.add_plot(dnm_hess_add, "DN-Hess-add", "k-") - c.add_plot(dnm_ub_add, "DN-UB-add", "b-") - c.add_plot(dnm_hess_clip, "DN-Hess-clip", "k*-") - c.add_plot(dnm_ub_clip, "DN-UB-clip", "b*-") - c.add_plot(gd_priv_backtrackingls, "DP-GD-Oracle", "m") - if Tuning: - c.add_plot(dnm_hess_clip_ht, "DN-Hess-clip-T", "r*-") - # c.add_plot(dnm_hess_add_ht,"DN-Hess-add-T",'r-') - # c.add_plot(dnm_ub_clip_ht,"DN-UB-clip-T",'g*-') - # c.add_plot(dnm_ub_add_ht,"DN-UB-add-T",'g-') - losses_dict = c.loss_vals() - gradnorm_dict = c.gradnorm_vals() - if rep == 0: - losses_total = losses_dict - gradnorm_total = gradnorm_dict - else: - for names in losses_total: - losses_total[names].extend(losses_dict[names]) - gradnorm_total[names].extend(gradnorm_dict[names]) - return losses_total, gradnorm_total - -linestyle = { - "DPGD": "y-", - "DN-Hess-add": "k+-", - "DN-UB-add": "b-", - "DN-Hess-clip": "r*-", - "DN-UB-clip": "g-", - "DP-GD-Oracle": "c-" -} -facecolor = { - "DPGD": "yellow", - "DN-Hess-add": "black", - "DN-UB-add": "blue", - "DN-Hess-clip": "red", - "DN-UB-clip": "green", - "DP-GD-Oracle": "cyan" -} -alg_plt = [ - "DPGD", - "DN-Hess-add", - "DN-UB-add", - "DN-Hess-clip", - "DN-UB-clip", - "DP-GD-Oracle" -] - -# Synthethic Data - -pb = { - "total": 1, # Total privacy budget for zCDP - "min_eval4": 5e-3, # Min eigenvalue for clipping - "grad_frac4": - 0.75 # Fraction of privacy budget for gradient vs matrix sensitivity -} -num_rep = 30 -losses_total, gradnorm_total = helper_fun( - "synthetic_data", pb, num_rep=num_rep, Tuning=False) -for alg in losses_total: - losses = np.array(losses_total[alg]) - gradnorm = np.array(gradnorm_total[alg]) - loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) - loss_std, gradnorm_std = np.std( - losses, axis=0) / np.sqrt(num_rep), np.std( - gradnorm, axis=0) / np.sqrt(num_rep) - print( - str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + - str(loss_std[-1])) - if alg in alg_plt: - iters = len(loss_avg) - plt.figure(1) - plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) - plt.fill_between( - range(iters), - loss_avg - loss_std, - loss_avg + loss_std, - facecolor=facecolor[alg]) - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Excess Loss") - plt.savefig("synth.pdf") - plt.figure(2) - plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) - plt.yscale("log") - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Norm of Gradient") - plt.savefig("synth-grad.pdf") - -# a5a dataset - -pb = { - "total": 1, # Total privacy budget for zCDP - "min_eval4": 5e-3, # Min eigenvalue for clipping - "grad_frac4": - 0.75 # Fraction of privacy budget for gradient vs matrix sensitivity -} -num_rep = 30 -losses_total, gradnorm_total = helper_fun( - "a5a_dataset", pb, num_rep=num_rep, Tuning=False) -for alg in losses_total: - losses = np.array(losses_total[alg]) - gradnorm = np.array(gradnorm_total[alg]) - loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) - loss_std, gradnorm_std = np.std( - losses, axis=0) / np.sqrt(num_rep), np.std( - gradnorm, axis=0) / np.sqrt(num_rep) - print( - str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + - str(loss_std[-1])) - if alg in alg_plt: - iters = len(loss_avg) - plt.figure(3) - plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) - plt.fill_between( - range(iters), - loss_avg - loss_std, - loss_avg + loss_std, - facecolor=facecolor[alg]) - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Excess Loss") - plt.savefig("a5a.pdf") - plt.figure(4) - plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) - plt.yscale("log") - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Norm of Gradient") - plt.savefig("a5a-grad.pdf") - -# w8a dataset - -pb = { - "total": 1, # Total privacy budget for zCDP - "min_eval4": 5e-3, # Min eigenvalue for clipping - "grad_frac4": - 0.75 # Fraction of privacy budget for gradient vs matrix sensitivity -} -num_rep = 30 -losses_total, gradnorm_total = helper_fun( - "w8a_dataset", pb, num_rep=num_rep, Tuning=False) -for alg in losses_total: - losses = np.array(losses_total[alg]) - gradnorm = np.array(gradnorm_total[alg]) - loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) - loss_std, gradnorm_std = np.std( - losses, axis=0) / np.sqrt(num_rep), np.std( - gradnorm, axis=0) / np.sqrt(num_rep) - print( - str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + - str(loss_std[-1])) - if alg in alg_plt: - iters = len(loss_avg) - plt.figure(5) - plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) - plt.fill_between( - range(iters), - loss_avg - loss_std, - loss_avg + loss_std, - facecolor=facecolor[alg]) - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Excess Loss") - plt.savefig("w8a.pdf") - plt.figure(6) - plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) - plt.yscale("log") - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Norm of Gradient") - plt.savefig("w8a-grad.pdf") - -# a1a dataset - -pb = { - "total": 1, # Total privacy budget for zCDP - "min_eval4": 5e-3, # Min eigenvalue for clipping - "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix -} -num_rep = 30 -losses_total, gradnorm_total = helper_fun( - "a1a_dataset", pb, num_rep=num_rep, Tuning=False) -for alg in losses_total: - losses = np.array(losses_total[alg]) - gradnorm = np.array(gradnorm_total[alg]) - loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) - loss_std, gradnorm_std = np.std( - losses, axis=0) / np.sqrt(num_rep), np.std( - gradnorm, axis=0) / np.sqrt(num_rep) - print( - str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + - str(loss_std[-1])) - if alg in alg_plt: - iters = len(loss_avg) - plt.figure(7) - plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) - plt.fill_between( - range(iters), - loss_avg - loss_std, - loss_avg + loss_std, - facecolor=facecolor[alg]) - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Excess Loss") - plt.savefig("a1a.pdf") - plt.figure(8) - plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) - plt.yscale("log") - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Norm of Gradient") - plt.savefig("a1a-grad.pdf") - -# mushroom dataset - -pb = { - "total": 1, # Total privacy budget for zCDP - "min_eval4": 5e-3, # Min eigenvalue for clipping - "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix -} -num_rep = 30 -losses_total, gradnorm_total = helper_fun( - "mushroom_dataset", pb, num_rep=num_rep, Tuning=False) -for alg in losses_total: - losses = np.array(losses_total[alg]) - gradnorm = np.array(gradnorm_total[alg]) - loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) - loss_std, gradnorm_std = np.std( - losses, axis=0) / np.sqrt(num_rep), np.std( - gradnorm, axis=0) / np.sqrt(num_rep) - print( - str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + - str(loss_std[-1])) - if alg in alg_plt: - iters = len(loss_avg) - plt.figure(9) - plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) - plt.fill_between( - range(iters), - loss_avg - loss_std, - loss_avg + loss_std, - facecolor=facecolor[alg]) - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Excess Loss") - plt.savefig("mushroom.pdf") - plt.figure(10) - plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) - plt.yscale("log") - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Norm of Gradient") - plt.savefig("mushroom-grad.pdf") - -# MNIST - -pb = { - "total": 1, # Total privacy budget for zCDP - "min_eval4": 5e-3, # Min eigenvalue for clipping - "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix -} -num_rep = 30 -losses_total, gradnorm_total = helper_fun( - "mnist_binary", pb, num_rep=num_rep, Tuning=False) -for alg in losses_total: - losses = np.array(losses_total[alg]) - gradnorm = np.array(gradnorm_total[alg]) - loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) - loss_std, gradnorm_std = np.std( - losses, axis=0) / np.sqrt(num_rep), np.std( - gradnorm, axis=0) / np.sqrt(num_rep) - print( - str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + - str(loss_std[-1])) - if alg in alg_plt: - iters = len(loss_avg) - plt.figure(11) - plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) - plt.fill_between( - range(iters), - loss_avg - loss_std, - loss_avg + loss_std, - facecolor=facecolor[alg]) - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Excess Loss") - plt.savefig("mnist.pdf") - plt.figure(12) - plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) - plt.yscale("log") - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Norm of Gradient") - plt.savefig("mnist-grad.pdf") - -# Dataset: phishing - -pb = { - "total": 1, # Total privacy budget for zCDP - "min_eval4": 5e-3, # Min eigenvalue for clipping - "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix -} -num_rep = 30 -losses_total, gradnorm_total = helper_fun( - "phishing", pb, num_rep=num_rep, Tuning=False) -for alg in losses_total: - losses = np.array(losses_total[alg]) - gradnorm = np.array(gradnorm_total[alg]) - loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) - loss_std, gradnorm_std = np.std( - losses, axis=0) / np.sqrt(num_rep), np.std( - gradnorm, axis=0) / np.sqrt(num_rep) - print( - str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + - str(loss_std[-1])) - if alg in alg_plt: - iters = len(loss_avg) - plt.figure(13) - plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) - plt.fill_between( - range(iters), - loss_avg - loss_std, - loss_avg + loss_std, - facecolor=facecolor[alg]) - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Excess Loss") - plt.savefig("phishing.pdf") - plt.figure(14) - plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) - plt.yscale("log") - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Norm of Gradient") - plt.savefig("phishing-grad.pdf") - -# Dataset: Madelon - -# pb = { -# "total": 1, # Total privacy budget for zCDP -# "min_eval4": 5e-3, # Min eigenvalue for clipping -# "grad_frac4": 0.4 # Fraction of privacy budget for gradient vs matrix -# } -# num_rep = 1 -# losses_total,gradnorm_total = helper_fun('madelon',pb,num_rep = num_rep, -# Tuning=True,plot_eigen=True) -# for alg in losses_total.keys(): -# losses = np.array(losses_total[alg]) -# gradnorm = np.array(gradnorm_total[alg]) -# loss_avg, gradnorm_avg = np.mean(losses,axis=0), np.mean(gradnorm,axis=0) -# loss_std, gradnorm_std = np.std(losses,axis=0)/np.sqrt(num_rep), -# np.std(gradnorm,axis=0)/np.sqrt(num_rep) -# print(str(alg)+ ':' + " ex_loss="+str(loss_avg[-1])+ ', -# std='+str(loss_std[-1])) -# if alg in alg_plt: -# iters = len(loss_avg) -# plt.figure(1) -# plt.plot(range(iters),loss_avg,linestyle[alg],label=alg) -# plt.fill_between(range(iters), loss_avg-loss_std, loss_avg+loss_std, -# facecolor=facecolor[alg]) -# plt.legend() -# plt.xlabel("Iteration") -# plt.ylabel("Excess Loss") -# plt.savefig('madelon.pdf') -# plt.figure(2) -# plt.plot(range(iters),gradnorm_avg,linestyle[alg],label=alg) -# plt.yscale('log') -# plt.legend() -# plt.xlabel("Iteration") -# plt.ylabel("Norm of Gradient") -# plt.savefig('madelon-grad.pdf') - -# Test) a6a Dataset - -pb = { - "total": 1, # Total privacy budget for zCDP - "min_eval4": 5e-3, # Min eigenvalue for clipping - "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix -} -num_rep = 30 -losses_total, gradnorm_total = helper_fun( - "a6a_dataset", pb, num_rep=num_rep, Tuning=False) -for alg in losses_total: - losses = np.array(losses_total[alg]) - gradnorm = np.array(gradnorm_total[alg]) - loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0) - loss_std, gradnorm_std = np.std( - losses, axis=0) / np.sqrt(num_rep), np.std( - gradnorm, axis=0) / np.sqrt(num_rep) - print( - str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" + - str(loss_std[-1])) - if alg in alg_plt: - iters = len(loss_avg) - plt.figure(15) - plt.plot(range(iters), loss_avg, linestyle[alg], label=alg) - plt.fill_between( - range(iters), - loss_avg - loss_std, - loss_avg + loss_std, - facecolor=facecolor[alg]) - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Excess Loss") - plt.savefig("a6a.pdf") - plt.figure(16) - plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg) - plt.yscale("log") - plt.legend() - plt.xlabel("Iteration") - plt.ylabel("Norm of Gradient") - plt.savefig("a6a-grad.pdf") diff --git a/research/dp_newton/run_privacy_utility b/research/dp_newton/run_privacy_utility new file mode 100644 index 0000000..415d238 --- /dev/null +++ b/research/dp_newton/run_privacy_utility @@ -0,0 +1,28 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +rm -rf ./src/results +mkdir -p ./src/results +rm -rf ./src/datasets_directory +mkdir -p ./src/datasets_directory +dataset="protein_dataset" # 'a1a_dataset', 'synthetic_dataset', 'fmnist_dataset' +privacy_budget="3.0" # epsilon in DP +num_iteration_GD="100" # number of iterations for DP-GD +num_iteration_NT="15" # number of iterations for damped newton +num_iteration_our="15" # number of iterations for double noise (proposed method) +$HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'dp_gd' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_GD +$HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'damped_newton' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_NT --grad_frac $"0.7" +$HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'double_noise' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_our --grad_frac $"0.7" --trace_frac $"0.1" --trace_coeff $"0.5" +$HOME/google-code/dpoptVenv/bin/python3 ./src/print_results.py diff --git a/research/dp_newton/src/dataset_loader.py b/research/dp_newton/src/dataset_loader.py new file mode 100644 index 0000000..9f060ad --- /dev/null +++ b/research/dp_newton/src/dataset_loader.py @@ -0,0 +1,236 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +"""dataset loader""" + +# pylint: skip-file +# pyformat: disable + +import os +import ssl +import tarfile +import urllib.request +from my_logistic_regression import MyLogisticRegression +import numpy as np +import requests +from sklearn import preprocessing +import sklearn.datasets +from sklearn.linear_model import LogisticRegression +import torch +from torchvision import datasets, transforms + + +PATH_PREFIX = './src/datasets_directory' +ssl._create_default_https_context = ssl._create_unverified_context + + +def normalize_fvec(x_train): + """normalize feature vectors""" + feature_mean = np.mean(x_train, axis=0) + feature_std = np.std(x_train, axis=0) + x_train = (x_train - feature_mean) / feature_std + return x_train + + +def backtracking_ls(lrp, dir_srch, w_start, alpha=0.4, beta=0.95): + """Implementation of backtracking line search + + lr = logistic regression + dir = the "noisy" gradient direction + w_start = current point + alpha and beta tradeoff the precision and complexity of the linesearch + + output is an (close to) optimal stepsize + """ + step_size = 100 + val_0 = lrp.loss(w_start) + inner_prod = np.dot(dir_srch, lrp.grad(w_start)) + while ( + lrp.loss(w_start - step_size * dir_srch) + >= val_0 - step_size * alpha * inner_prod + ): + step_size = beta * step_size + if step_size < 1e-6: + break + return step_size + + +def newton(dataset, w_init, bias=True): + """Implementation of the newton method with linesearch without privacy constraints + + dataset = dataset + w_init = initialization point + + output is the model parameter + """ + feature_vecs, labels = dataset + if bias is True: + feature_vecs = np.hstack( + (np.ones(shape=(np.shape(feature_vecs)[0], 1)), feature_vecs) + ) + lrp = MyLogisticRegression(feature_vecs, labels, reg=1e-9) + w_cur = w_init + for _ in range(8): + hess = lrp.hess(w_cur) + dir_srch = np.linalg.solve(hess, lrp.grad_wor(w_cur)) + step_size = backtracking_ls(lrp, dir_srch, w_cur) + w_cur = w_cur - step_size * dir_srch + if lrp.loss_wor(w_cur) < lrp.loss_wor(w_init): + w_out = w_cur + else: + w_out = w_init + return w_out + + +class Mydatasets: + """Represents datasets we use for expriments""" + + def __init__(self): + data_dir = PATH_PREFIX + '/data' + cache_dir = PATH_PREFIX + '/cache_datasets' + if not os.path.exists(data_dir): + os.mkdir(data_dir) + if not os.path.exists(cache_dir): + os.mkdir(cache_dir) + + def find_optimal_classifier(self, dataset, bias=True): + """find the optimal weight vector for the logistic regression + + for the problems with real datasets. + + dataset = training dataset + bias = bias for the logistic model + """ + inputs_vec, labels = dataset + reg = 1e-9 + if bias is True: + model_lr = LogisticRegression(max_iter=200, C=1 / reg).fit( + inputs_vec, labels + ) + w_opt1 = np.concatenate([model_lr.intercept_, np.squeeze(model_lr.coef_)]) + w_opt = newton(dataset, w_opt1, bias) + else: + model_lr = LogisticRegression( + max_iter=200, fit_intercept=False, C=1 / reg + ).fit(inputs_vec, labels) + w_opt1 = np.squeeze(model_lr.coef_) + w_opt = newton(dataset, w_opt1, bias) + return w_opt + + def fmnist_dataset(self): + """fmnist dataset""" + transform_data = transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.5), (0.5))] + ) + train_data_trans = datasets.FashionMNIST( + root=PATH_PREFIX + '/data', + download=True, + train=True, + transform=transform_data, + ) + train_loader = torch.utils.data.DataLoader( + train_data_trans, batch_size=len(train_data_trans) + ) + x_train = next(iter(train_loader))[0].numpy() + x_train = x_train.reshape(len(x_train), -1) + y_train = next(iter(train_loader))[1].numpy() + label0 = 0 + label1 = 3 + indx0 = np.nonzero(y_train == label0)[0] + indx1 = np.nonzero(y_train == label1)[0] + labels = y_train.copy() + labels[indx0] = -1 + labels[indx1] = 1 + indx = np.concatenate((indx0, indx1)) + x_train = x_train[indx] + labels = labels[indx] + dataset = x_train, labels + w_opt = self.find_optimal_classifier(dataset, bias=False) + return x_train, labels, w_opt + + def a1a_dataset(self): + """a1a dataset""" + a1a_url = ( + 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a1a.t' + ) + data_path = PATH_PREFIX + '/data/a1a' + if not os.path.exists(data_path): + _ = urllib.request.urlretrieve(a1a_url, data_path) + data = sklearn.datasets.load_svmlight_file(data_path) + inputs_vec, labels = data[0], data[1] + inputs_vec = inputs_vec.toarray() + scaler = preprocessing.StandardScaler().fit(inputs_vec) + inputs_vec = scaler.transform(inputs_vec) + labels = labels.astype(float) + dataset = inputs_vec, labels + w_opt = self.find_optimal_classifier(dataset) + inputs_vec = np.hstack( + (np.ones(shape=(np.shape(inputs_vec)[0], 1)), inputs_vec) + ) + return inputs_vec, labels, w_opt + + def protein_dataset(self): + """protein dataset""" + path_protein = PATH_PREFIX + '/data/protein/' + if not os.path.exists(path_protein): + os.mkdir(path_protein) + protein_url = ( + 'https://kdd.org/cupfiles/KDDCupData/2004/data_kddcup04.tar.gz' + ) + protein_file = PATH_PREFIX + '/data/protein/data_kddcup04.tar.gz' + response = requests.get(protein_url, stream=True, timeout=100) + if response.status_code == 200: + with open(protein_file, 'wb') as file_data: + file_data.write(response.raw.read()) + with tarfile.open(protein_file, 'r:gz') as tar: + tar.extractall(path_protein) + x_train = np.loadtxt(PATH_PREFIX + '/data/protein/bio_train.dat')[:, 3:] + y_train = np.loadtxt(PATH_PREFIX + '/data/protein/bio_train.dat')[:, 2] + indx0 = np.nonzero(y_train == 0)[0] + indx1 = np.nonzero(y_train == 1)[0] + labels = y_train.copy() + labels[indx0] = -1 + labels[indx1] = 1 + indx = np.arange(len(x_train)) + np.random.seed(3000) + indx_sample = np.random.choice(indx, 50000, replace=False) + np.random.seed(None) + x_train = x_train[indx_sample] + labels = labels[indx_sample] + x_train = normalize_fvec(x_train) + w_opt = self.find_optimal_classifier((x_train, labels)) + x_train = np.hstack((np.ones(shape=(np.shape(x_train)[0], 1)), x_train)) + return x_train, labels, w_opt + + def synthetic_dataset(self, num_samples=10000, dim=100): + """Generates a synthetic dataset for logistic regression. + + n = number of samples d = dimension Features are unit vectors (by default + uniformly random). Labels are sampled from logistic distribution, so w is + the "true" solution. + """ + mean = np.zeros(dim) + cov = np.eye(dim) + inputs_vec_un = np.random.multivariate_normal(mean, cov, num_samples) + nrm = np.linalg.norm(inputs_vec_un, axis=1) + inputs_vec = inputs_vec_un * 1 / nrm[:, None] + w_star = np.ones(dim) + w_star[0] = 1 + inner_prod = np.dot(inputs_vec, w_star) + params = np.exp(inner_prod) / (1 + np.exp(inner_prod)) + labels = 2 * np.random.binomial(1, params) - 1 + dataset = inputs_vec, labels + w_opt = self.find_optimal_classifier(dataset, bias=False) + return inputs_vec, labels, w_opt diff --git a/research/dp_newton/src/my_logistic_regression.py b/research/dp_newton/src/my_logistic_regression.py new file mode 100644 index 0000000..4def9dd --- /dev/null +++ b/research/dp_newton/src/my_logistic_regression.py @@ -0,0 +1,205 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +"""logistic regression class and its methods""" + +# pylint: skip-file +# pyformat: disable + +import numpy as np + + +class MyLogisticRegression: + """return a logistic regression problem + + There is a dataset consisting of features (vectors of norm <=1) + and labels (+1,-1), represented as a numpy array. + There is also an L2 regularizer. + """ + + def __init__(self, input_vecs, labels, reg=1e-8): + """Initialize the data and the regularizer. + + X = n x d numpy array representing features + y = n x 1 numpy array representing labels + reg = L2 regularizing coefficient (to ensure solution is finite) + + Data will be rescaled so that ||X[i,:]|| * |y[i]| <= 1 for all i. + """ + self.reg = float(reg) + input_vecs = np.array(input_vecs) + labels = np.array(labels) + assert len(input_vecs.shape) == 2 + assert len(labels.shape) == 1 + self.input_vecs = input_vecs + self.labels = labels + self.num_samples, self.dim = input_vecs.shape + assert labels.shape[0] == self.num_samples + signed_data = input_vecs * labels[:, np.newaxis] + norm = np.linalg.norm(signed_data, axis=1) + scale = np.maximum(norm, np.ones_like(norm)) + self.data = (1 / scale[:, None]) * signed_data + + def loss(self, param): + """Computes the loss represented by this object at w. + + If X,y is the data and reg is the regularizer, then the loss is (1/n)sum_i^n + log(1+exp(-)) + (reg/2)||w||^2 + """ + data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param)))) + reg_loss = 0.5 * self.reg * np.linalg.norm(param) ** 2 + return data_loss + reg_loss + + def loss_wor(self, param): + """Computes the loss represented by this object at w without regularizer. + + If X,y is the data and reg is the regularizer, then the loss is + (1/n)sum_i^n log(1+exp(-)) + """ + data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param)))) + return data_loss + + def accuracy(self, param): + """ " computes the accuracy of the model gievn by w""" + score_pred = np.dot(self.input_vecs, param) + label1_prob = np.where( + score_pred >= 0, + 1 / (1 + np.exp(-score_pred)), + np.exp(score_pred) / (1 + np.exp(score_pred)), + ) + return np.mean(np.where(label1_prob >= 0.5, 1, -1) == self.labels) + + def grad(self, param, batch_idx=None): + """Computes the gradient of the logistic regression at a given point w. + + If X,y is the data and reg is the regularizer, then the gradient is + (-1/n)sum_i^n X[i,:]*y[i]/(1+exp()) + reg*w + """ + if batch_idx is not None: + data_batch = self.data[batch_idx] + else: + data_batch = self.data + + coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param))) + data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0) + return data_grad + self.reg * param + + def grad_wor(self, param, batch_idx=None): + """Computes the gradient of the logistic regression at a given point w. + + If X,y is the data and reg is the regularizer, then the gradient is + (-1/n)sum_i^n X[i,:]*y[i]/(1+exp()) + reg*w + """ + if batch_idx is not None: + data_batch = self.data[batch_idx] + else: + data_batch = self.data + + coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param))) + data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0) + return data_grad + + def hess(self, param, batch_idx=None): + """Computes the Hessian of the logistic regression at a given point w. + + The Hessian is the matrix of second derivatives. + + If X,y is the data and reg is the regularizer, then the Hessian is + (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(/2)*2)^2 + where we assume y[i]^2==1. + """ + if batch_idx is not None: + data_batch = self.data[batch_idx] + batch_size = len(batch_idx) + else: + data_batch = self.data + batch_size = self.num_samples + + temp_var = np.dot(data_batch, param) / 2 + coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2 + raw_hess = np.dot(data_batch.T * coeff_hess, data_batch) + return raw_hess / batch_size + self.reg * np.eye(self.dim) + + def hess_wor(self, param, batch_idx=None): + """Computes the Hessian of the logistic regression at a given point w. + + The Hessian is the matrix of second derivatives. + + If X,y is the data, then the Hessian is + (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(/2)*2)^2 + where we assume y[i]^2==1. + """ + if batch_idx is not None: + data_batch = self.data[batch_idx] + batch_size = len(batch_idx) + else: + data_batch = self.data + batch_size = self.num_samples + + temp_var = np.dot(data_batch, param) / 2 + coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2 + raw_hess = np.dot(data_batch.T * coeff_hess, data_batch) + return raw_hess / batch_size + + def upperbound(self, param, batch_idx=None): + """Tightest universal quadratic upper bound on the loss function. + + log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a) + Constant and linear terms are just first-order Taylor This function gives + the quadratic term (which replaces the Hessian) + https://twitter.com/shortstein/status/1557961202256318464 + """ + + if batch_idx is not None: + data_batch = self.data[batch_idx] + batch_size = len(batch_idx) + else: + data_batch = self.data + batch_size = self.num_samples + + temp_var = -np.dot(data_batch, param) # vector of y_i for i in [n] + # v = 0.5*np.tanh(a/2)/a, but, avoid 0/0 by special rule + temp_var2 = np.divide( + 0.5 * np.tanh(temp_var / 2), + temp_var, + out=np.ones(temp_var.shape) * 0.25, + where=np.abs(temp_var) > 1e-9, + ) + hess_non = np.dot(data_batch.T * temp_var2, data_batch) + return hess_non / batch_size + self.reg * np.eye(self.dim) + + def upperbound_wor(self, param, batch_idx=None): + """Tightest universal quadratic upper bound on the loss function. + + log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a) + Constant and linear terms are just first-order Taylor This function gives + the quadratic term (which replaces the Hessian) + """ + if batch_idx is not None: + data_batch = self.data[batch_idx] + batch_size = len(batch_idx) + else: + data_batch = self.data + batch_size = self.num_samples + + temp_var = -np.dot(data_batch, param) # vector of y_i for i in [n] + temp_var2 = np.divide( + 0.5 * np.tanh(temp_var / 2), + temp_var, + out=np.ones(temp_var.shape) * 0.25, + where=np.abs(temp_var) > 1e-9, + ) + hess_non = np.dot(data_batch.T * temp_var2, data_batch) + return hess_non / batch_size diff --git a/research/dp_newton/src/opt_algs.py b/research/dp_newton/src/opt_algs.py new file mode 100644 index 0000000..29cb74d --- /dev/null +++ b/research/dp_newton/src/opt_algs.py @@ -0,0 +1,434 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +"""file containing all auxillary functions for running the optimization algorithms""" + +# pylint: skip-file +# pyformat: disable + +import time +from my_logistic_regression import MyLogisticRegression +import numpy as np + + +class CompareAlgs: + """Class to run multiple iterative algorithms and compare the results.""" + + def __init__(self, lrp, optimal_w, hyper_dict): + """Initialize the problem. + + lr = an instance of MyLogisticRegression + dataset = dataset in the format of (features,label) + optimal_w = optimal minimizer of logistic loss on dataset without privacy + pb = hyperparameters + """ + self.w_opt = optimal_w + self.lrp = lrp + self.iters = hyper_dict["num_iteration"] + self.hyper_params = hyper_dict + self.clock_time = [] + self.params = [] + self.names = [] + + def add_algo(self, update_rule, name): + """Run an iterative update method""" + _, dim = self.lrp.num_samples, self.lrp.dim + wint_un = np.random.multivariate_normal(np.zeros(dim), np.eye(dim)) + w_int = wint_un / np.linalg.norm(wint_un) + cutoff_norm = ( + 100 * np.linalg.norm(self.w_opt) + 100 * np.linalg.norm(w_int) + 100 + ) + w_cur = w_int + params = [w_cur] + start_t = time.time() + wall_clock = [0] + for _ in range(self.iters): + w_cur = update_rule(w_cur, self.lrp, self.hyper_params) + if np.linalg.norm(w_cur) > cutoff_norm: + w_cur = w_int + print("Stop Things Exploding!") + params.append(w_cur) + wall_clock.append(time.time() - start_t) + self.clock_time.append(wall_clock) + self.params.append(params) + self.names.append(name) + + def wall_clock_alg(self): + """compute the wall clock of different algorithms""" + clock_time_dict = {} + for time_alg, name in zip(self.clock_time, self.names): + clock_time_dict[name] = [time_alg] + return clock_time_dict + + def loss_vals(self): + """output the loss per iteration for different methods""" + baseline = self.lrp.loss_wor(self.w_opt) + loss_dict = {} + for params, name in zip(self.params, self.names): + losses = [self.lrp.loss_wor(w) - baseline for w in params] + loss_dict[name] = [losses] + return loss_dict + + def accuracy_vals(self): + """output the accuracy per iteration for different methods""" + acc_dict = {} + for params, name in zip(self.params, self.names): + acc_vec = [self.lrp.accuracy(w) for w in params] + acc_dict[name] = [acc_vec] + return acc_dict + + def accuracy_np(self): + """output the accuracy of the optimal model without privacy""" + return self.lrp.accuracy(self.w_opt) + + def gradnorm_vals(self): + """output the gradient norm per iteration for different methods""" + gradnorm_dict = {} + for params, name in zip(self.params, self.names): + grad_norms = [np.linalg.norm(self.lrp.grad_wor(w)) for w in params] + gradnorm_dict[name] = [grad_norms] + return gradnorm_dict + + +def private_newton(w_cur, lrp, hyper_dict): + """implementation of private newton method from [ABL21] + + w = current iterate + lr = an instance of MyLogisticRegression + i = the index of current iterate + iters = total number of iterations + pb = privacy budget and other info + return the next iterate + """ + total = hyper_dict["total"] + grad_frac = hyper_dict["grad_frac"] + iters = hyper_dict["num_iteration"] + hess = lrp.hess(w_cur) + rho_grad = grad_frac * total / iters # divide total privacy budget up. + rho_hess = (1 - grad_frac) * total / iters + hess_noise = np.random.normal( + scale=(0.25 / lrp.num_samples) * np.sqrt(0.5 / rho_hess), + size=(lrp.dim, lrp.dim), + ) + hess_noise = (hess_noise + hess_noise.T) / 2 + hess_noisy = eigenclip(hess + hess_noise) + grad = lrp.grad(w_cur) + grad_noisy = grad + np.random.normal( + scale=(1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad), size=lrp.dim + ) + dir_noisy = np.linalg.solve(hess_noisy, grad_noisy) + dir_size = np.linalg.norm(np.linalg.solve(hess, grad)) + return w_cur - min(np.log(1 + dir_size) * (1 / dir_size), 1) * dir_noisy + + +def eigenclip(sym_mat, min_eval=1e-5): + """operation of the eigenclip + + A = symmetric matrix + min_eval = minimum eigenvalue for clipping + + return the modified matrix + """ + eig_val, eig_vec = np.linalg.eigh(sym_mat) + eval_mod = np.maximum(eig_val, min_eval * np.ones(eig_val.shape)) + clipped_mat = np.dot(eig_vec * eval_mod, eig_vec.T) + return clipped_mat + + +def gd_priv(w_cur, lrp, hyper_dict): + """Implementation of DP-GD. + + w = current point + lr = logistic regression + i = iteration number + pb = auxillary information + + output is the next iterate + """ + iters = hyper_dict["num_iteration"] + inv_lr_gd = 0.25 # learning rate based on the smoothness + sens = 1 / (lrp.num_samples * (inv_lr_gd)) # sensitivity + rho = hyper_dict["total"] / iters # divide total privacy budget up + noise = np.random.normal(scale=sens / np.sqrt(2 * rho), size=lrp.dim) + return w_cur - lrp.grad_wor(w_cur) / (inv_lr_gd) + noise + + +def sgd_priv(w_cur, lrp, hyper_dict): + """Implementation of DP-SGD. + + w = current point + lr = logistic regression + i = iteration number + pb = auxillary information + + output is the next iterate + """ + batch_size = hyper_dict["batch_size"] + sigma_privacy = hyper_dict["noise_multiplier"] + lr_sgd = 4 # learning rate based on the smoothness + sample_rate = batch_size / lrp.num_samples # sampling probability + sample_vec = np.random.binomial(n=1, p=sample_rate, size=lrp.num_samples) + batch_idx = np.where(sample_vec == 1)[0] # index of batch + batch_size_t = len(batch_idx) + noise = np.random.normal(scale=sigma_privacy, size=lrp.dim) + grad_minibatch = lrp.grad_wor( + w_cur, batch_idx + ) # average gradient over batch_idx + return w_cur - lr_sgd * ( + batch_size_t / batch_size * grad_minibatch + noise / batch_size + ) + + +def gd_priv_optls(w_cur, lrp, hyper_dict): + """Implementation of DP-GD with back-tracking line search !!! + + this method is not private. We only use it as a baseline. + + w = current point + lr = logistic regression + i = iteration number + pb = auxillary information + + output is the next iterate + """ + iters = hyper_dict["num_iteration"] + rho_grad = hyper_dict["total"] / iters # divide total privacy budget up + grad_scale = (1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad) + grad_noise = np.random.normal(scale=grad_scale, size=lrp.dim) + dir_srch = lrp.grad(w_cur) + grad_noise + stepsize_opt = backtracking_ls(lrp, dir_srch, w_cur) + return w_cur - stepsize_opt * dir_srch + + +def backtracking_ls(lrp, dir_srch, w_start, alpha=0.4, beta=0.95): + """Implementation of backtracking line search + + lr = logistic regression + dir = the "noisy" gradient direction + w_start = current point + alpha and beta tradeoff the precision and complexity of the linesearch + + output is an (close to) optimal stepsize + """ + step_size = 100 + val_0 = lrp.loss(w_start) + inner_prod = np.dot(dir_srch, lrp.grad(w_start)) + while ( + lrp.loss(w_start - step_size * dir_srch) + >= val_0 - step_size * alpha * inner_prod + ): + step_size = beta * step_size + if step_size < 1e-6: + break + return step_size + + +def newton(dataset, w_init, bias=True): + """Implementation of the newton method with linesearch without privacy constraints + + dataset = dataset + w_init = initialization point + + output is the model parameter + """ + feature_vecs, labels = dataset + if bias is True: + feature_vecs = np.hstack( + (np.ones(shape=(np.shape(feature_vecs)[0], 1)), feature_vecs) + ) + lrp = MyLogisticRegression(feature_vecs, labels, reg=1e-9) + w_cur = w_init + for _ in range(8): + hess = lrp.hess(w_cur) + dir_srch = np.linalg.solve(hess, lrp.grad_wor(w_cur)) + step_size = backtracking_ls(lrp, dir_srch, w_cur) + w_cur = w_cur - step_size * dir + if lrp.loss_wor(w_cur) < lrp.loss_wor(w_init): + w_out = w_cur + else: + w_out = w_init + return w_out + + +class DoubleNoiseMech: + """Our Method: Double Noise Mechanism""" + + def __init__(self, lrp, type_reg="add", curvature_info="hessian"): + """Initializer of the double noise mechanism + + lr = an instance of MyLogisticRegression + type_reg = minimum eigenvalue modification type, it can be either 'add' or + 'clip' + curvature_info = type of the second-order information + """ + self.type_reg = type_reg + self.curvature_info = curvature_info + if self.curvature_info == "hessian": + self.hess = lrp.hess_wor + elif self.curvature_info == "ub": + self.hess = lrp.upperbound_wor + + def update_rule(self, w_cur, lrp, hyper_dict): + """Implementation of the double noise mechanism update rule--full batch""" + noisy_grad_cur = self.noisy_grad(w_cur, lrp, hyper_dict) + w_next = self.noisy_direction(w_cur, lrp, hyper_dict, noisy_grad_cur) + return w_next + + def update_rule_stochastic(self, w_cur, lrp, hyper_dict): + """Implementation of the double noise mechanism update rule--full batch""" + noisy_grad_cur = self.noisy_grad(w_cur, lrp, hyper_dict, True) + w_next = self.noisy_direction_stochastic( + w_cur, lrp, hyper_dict, noisy_grad_cur + ) + return w_next + + def noisy_grad(self, w_cur, lrp, hyper_dict, batch=False): + """computing gradient""" + if batch is False: + rho_grad = (hyper_dict["grad_frac"] * hyper_dict["total"]) / hyper_dict[ + "num_iteration" + ] + noise_grad = np.random.normal( + scale=(1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad), size=lrp.dim + ) + noisy_grad = lrp.grad(w_cur) + noise_grad + else: + std_grad = hyper_dict["noise_multiplier_grad"] + pgrad = hyper_dict["batchsize_grad"] / lrp.num_samples + sample_vec = np.random.binomial(n=1, p=pgrad, size=lrp.num_samples) + batch_idx_grad = np.where(sample_vec == 1)[0] + grad_minibatch = lrp.grad_wor(w_cur, batch_idx_grad) + noise_grad = np.random.normal(scale=std_grad, size=lrp.dim) + noisy_grad = ( + len(batch_idx_grad) / (lrp.num_samples * pgrad) + ) * grad_minibatch + (noise_grad) / (lrp.num_samples * pgrad) + return noisy_grad + + def noisy_direction(self, w_cur, lrp, hyper_dict, noisy_grad): + """computing direction""" + total = hyper_dict["total"] + grad_frac = hyper_dict["grad_frac"] + frac_trace = hyper_dict["trace_frac"] + trace_coeff = hyper_dict["trace_coeff"] + iters = hyper_dict["num_iteration"] + rho_hess = (1 - grad_frac) * total / iters + smooth_param = 0.25 + hess_cur = self.hess(w_cur) + noisy_trace = trace_coeff * max( + np.trace(hess_cur) + + np.random.normal( + scale=(0.25 / lrp.num_samples) + * np.sqrt(0.5 / (frac_trace * rho_hess)) + ), + 0, + ) + min_eval = max( + (noisy_trace / ((lrp.num_samples) ** 2 * (1 - frac_trace) * rho_hess)) + ** (1 / 3), + 1 / (lrp.num_samples), + ) + grad_norm = np.linalg.norm(noisy_grad) + if self.type_reg == "add": # Sensitivity is different for add vs clip + sens2 = ( + grad_norm + * smooth_param + / (lrp.num_samples * min_eval**2 + smooth_param * min_eval) + ) + noise2 = np.random.normal( + scale=sens2 * np.sqrt(0.5 / ((1 - frac_trace) * rho_hess)), + size=lrp.dim, + ) + return ( + w_cur + - np.linalg.solve(hess_cur + min_eval * np.eye(lrp.dim), noisy_grad) + + noise2 + ) + # type_reg=clip + sens2 = ( + grad_norm + * smooth_param + / (lrp.num_samples * min_eval**2 - smooth_param * min_eval) + ) + noise2 = np.random.normal( + scale=sens2 * np.sqrt(0.5 / ((1 - frac_trace) * rho_hess)), size=lrp.dim + ) + eval_hess, evec_hess = np.linalg.eigh(hess_cur) + eval_trunc = eval_hess[eval_hess >= min_eval] + num_eig = len(eval_trunc) + if num_eig == 0: + hess_modified_inv = 1 / min_eval * np.eye(lrp.dim) + else: + evec_trun = evec_hess[:, -num_eig:] + hess_modified_inv = np.dot( + evec_trun * (1 / eval_trunc - 1 / min_eval * np.ones(num_eig)), + evec_trun.T, + ) + 1 / min_eval * np.eye(lrp.dim) + return w_cur - (hess_modified_inv @ noisy_grad) + noise2 + + def noisy_direction_stochastic(self, w_cur, lrp, hyper_dict, noisy_grad): + """noisy direction for stochastic variant""" + std_hess = hyper_dict["noise_multiplier_hess"] + phess = hyper_dict["batchsize_hess"] / lrp.num_samples + min_eval = hyper_dict["min_eval"] + sample_vec = np.random.binomial(n=1, p=phess, size=lrp.num_samples) + batch_idx_hess = np.where(sample_vec == 1)[0] + batch_size_hess_t = len(batch_idx_hess) + hess_cur = ( + (batch_size_hess_t) + / (lrp.num_samples * phess) + * self.hess(w_cur, batch_idx_hess) + ) + smooth_param = 0.25 # smoothness parameter + grad_norm = np.linalg.norm(noisy_grad) + if self.type_reg == "add": # Sensitivity is different for add vs clip + sens2 = ( + grad_norm + * smooth_param + / ( + (lrp.num_samples * phess) * min_eval**2 + + smooth_param * min_eval + ) + ) + noise2 = np.random.normal(scale=sens2 * std_hess, size=lrp.dim) + return ( + w_cur + - np.linalg.solve( + hess_cur + min_eval * np.eye(len(hess_cur)), noisy_grad + ) + + noise2 + ) + # type_reg=clip + min_eval_c = max(min_eval, 1 / ((lrp.num_samples * phess))) + sens2 = ( + grad_norm + * smooth_param + / ( + (lrp.num_samples * phess) * min_eval_c**2 + - smooth_param * min_eval_c + ) + ) + noise2 = np.random.normal(scale=sens2 * std_hess, size=lrp.dim) + eval_hess, evec_hess = np.linalg.eigh(hess_cur) + eval_trunc = eval_hess[eval_hess >= min_eval_c] + num_eig = len(eval_trunc) + if num_eig == 0: + hess_modified_inv = 1 / min_eval_c * np.eye(lrp.dim) + else: + evec_trun = evec_hess[:, -num_eig:] + hess_modified_inv = np.dot( + evec_trun * (1 / eval_trunc - 1 / min_eval_c * np.ones(num_eig)), + evec_trun.T, + ) + 1 / min_eval_c * np.eye(lrp.dim) + return w_cur - (hess_modified_inv @ noisy_grad) + noise2 diff --git a/research/dp_newton/src/print_results.py b/research/dp_newton/src/print_results.py new file mode 100644 index 0000000..5458ad3 --- /dev/null +++ b/research/dp_newton/src/print_results.py @@ -0,0 +1,47 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +"""print the achievable error of different algorithms""" + +# pylint: skip-file +# pyformat: disable + +import json +import os +import numpy as np + +RESULTS_PATH = './src/results/' +excess_loss = {} +opt_algs = [ + 'DPGD', + 'DN-Hess-add', + 'DN-UB-add', + 'DN-Hess-clip', + 'DN-UB-clip', + 'private-newton', +] +for filename in os.listdir(RESULTS_PATH): + f = os.path.join(RESULTS_PATH, filename) + with open(f, encoding='utf-8') as json_file: + data = json.load(json_file) + for alg in data.keys(): + if alg in opt_algs: + loss_avg = np.array(data[alg]['loss_avg']) + loss_std = np.array(data[alg]['loss_std']) + clock_time = np.array(data[alg]['clock_time_avg']) + print('optimization algorithm: ', alg) + print('excess loss: ' + str(loss_avg[-1])) + print('run time: ' + str(clock_time[-1]) + '(sec)') + print('-----') diff --git a/research/dp_newton/src/run.py b/research/dp_newton/src/run.py new file mode 100644 index 0000000..5d49910 --- /dev/null +++ b/research/dp_newton/src/run.py @@ -0,0 +1,264 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +"""collections of helper function to run and compare different algorithms""" + +# pylint: skip-file +# pyformat: disable + +import argparse +import json +from dataset_loader import Mydatasets +from my_logistic_regression import MyLogisticRegression +import numpy as np +from opt_algs import CompareAlgs, DoubleNoiseMech, gd_priv, private_newton +from scipy.optimize import fsolve + + +def zcdp_to_eps(rho, delta): + """ " + + conversion of zcdp gurantee to (eps,delta)-DP using the formula in Lemma 3.6 + of [BS16] + rho : zCDP + delta: delta in DP + + return eps + """ + return rho + np.sqrt(4 * rho * np.log(np.sqrt(np.pi * rho) / delta)) + + +def eps_to_zcdp(eps, delta): + """ " + + conversion of (eps,delta) gurantee to rho-zCDP + eps : eps in DP + delta: delta in DP + + return rho + """ + + def func_root(rho_zcdp): + return zcdp_to_eps(rho_zcdp, delta) - eps + + root = fsolve(func_root, x0=0.001)[-1] + return root + + +def helper_fun(datasetname, alg_type, params_exp): + """helper function for running different algorithms + + args: + datasetname = dataset + alg_type = type of the optimization algorithm + params_exp = hyperparameters + """ + feature_vecs, labels, w_opt = getattr(Mydatasets(), datasetname)() + privacy_dp = params_exp["total"] + params_exp["total"] = eps_to_zcdp(privacy_dp, (1.0 / len(labels)) ** 2) + log_reg = MyLogisticRegression(feature_vecs, labels) + alg_dict, filename_params = prepare_alg_dict( + alg_type, datasetname, privacy_dp, params_exp, log_reg + ) + compare_algs = CompareAlgs(log_reg, w_opt, params_exp) + result = RunReleaseStats(compare_algs, alg_dict).summarize_stats() + result["num-samples"] = len(labels) + with open( + "src/results/" + filename_params, "w", encoding="utf8" + ) as json_file: + json.dump(result, json_file) + + +def prepare_alg_dict(alg_type, datasetname, privacy_dp, params_exp, log_reg): + """prepare update rule for algorithms and filename""" + alg_dict = None + filename_params = None + if alg_type == "double_noise": + filename_params = ( + "so_" + + datasetname + + "_" + + str(privacy_dp) + + "_" + + "DP" + + "_" + + str(params_exp["num_iteration"]) + + "_" + + str(params_exp["grad_frac"]) + + "_" + + str(params_exp["trace_frac"]) + + "_" + + str(params_exp["trace_coeff"]) + + ".txt" + ) + dnm_hess_add = DoubleNoiseMech( + log_reg, type_reg="add", curvature_info="hessian" + ).update_rule + dnm_ub_add = DoubleNoiseMech( + log_reg, type_reg="add", curvature_info="ub" + ).update_rule + dnm_hess_clip = DoubleNoiseMech( + log_reg, type_reg="clip", curvature_info="hessian" + ).update_rule + dnm_ub_clip = DoubleNoiseMech( + log_reg, type_reg="clip", curvature_info="ub" + ).update_rule + alg_dict = { + "DN-Hess-add": dnm_hess_add, + "DN-Hess-clip": dnm_hess_clip, + "DN-UB-clip": dnm_ub_clip, + "DN-UB-add": dnm_ub_add, + } + elif alg_type == "dp_gd": + filename_params = ( + "gd_" + + datasetname + + "_" + + str(privacy_dp) + + "_" + + "DP" + + "_" + + str(params_exp["num_iteration"]) + + ".txt" + ) + alg_dict = {"DPGD": gd_priv} + elif alg_type == "damped_newton": + filename_params = ( + "newton_" + + datasetname + + "_" + + str(privacy_dp) + + "_" + + "DP" + + "_" + + str(params_exp["num_iteration"]) + + ".txt" + ) + alg_dict = {"private-newton": private_newton} + return alg_dict, filename_params + + +class RunReleaseStats: + """Helpfer function to run different algorithms and store the results""" + + def __init__(self, compare_algs, algs_dict, num_rep=10): + self.compare_algs = compare_algs + self.algs_dict = algs_dict + self.num_rep = num_rep + self.losses = 0 + self.gradnorm = 0 + self.accuracy = 0 + self.wall_clock = 0 + + def run_algs(self): + """method to run different algorithms and store different stats""" + for rep in range(self.num_rep): + for alg_name, alg_update_rule in self.algs_dict.items(): + self.compare_algs.add_algo(alg_update_rule, alg_name) + losses_dict = self.compare_algs.loss_vals() + gradnorm_dict = self.compare_algs.gradnorm_vals() + accuracy_dict = self.compare_algs.accuracy_vals() + wall_clock_dict = self.compare_algs.wall_clock_alg() + if rep == 0: + self.losses = losses_dict + self.gradnorm = gradnorm_dict + self.accuracy = accuracy_dict + self.wall_clock = wall_clock_dict + else: + for alg in self.losses: + self.losses[alg].extend(losses_dict[alg]) + self.gradnorm[alg].extend(gradnorm_dict[alg]) + self.accuracy[alg].extend(accuracy_dict[alg]) + self.wall_clock[alg].extend(wall_clock_dict[alg]) + + def summarize_stats(self): + """method to summarize the results""" + self.run_algs() + result = {} + result["acc-best"] = self.compare_algs.accuracy_np().tolist() + for alg in self.losses: + result[alg] = {} + loss_avg = np.mean(np.array(self.losses[alg]), axis=0) + loss_std = np.std(np.array(self.losses[alg]), axis=0) + result[alg]["loss_avg"] = (loss_avg).tolist() + result[alg]["loss_std"] = (loss_std / np.sqrt(self.num_rep)).tolist() + gradnorm_avg = np.mean(np.array(self.gradnorm[alg]), axis=0) + gradnorm_std = np.std(np.array(self.gradnorm[alg]), axis=0) + result[alg]["gradnorm_avg"] = (gradnorm_avg).tolist() + result[alg]["gradnorm_std"] = (gradnorm_std).tolist() + acc_avg = np.mean(np.array(self.accuracy[alg]), axis=0) + acc_std = np.std(np.array(self.accuracy[alg]), axis=0) + result[alg]["acc_avg"] = (acc_avg).tolist() + result[alg]["acc_std"] = (acc_std / np.sqrt(self.num_rep)).tolist() + clocktime_avg = np.mean(np.array(self.wall_clock[alg]), axis=0) + clocktime_std = np.std(np.array(self.wall_clock[alg]), axis=0) + result[alg]["clock_time_avg"] = (clocktime_avg).tolist() + result[alg]["clock_time_std"] = ( + clocktime_std / np.sqrt(self.num_rep) + ).tolist() + + return result + + +def main(): + """main function""" + parser = argparse.ArgumentParser() + parser.add_argument("--datasetname") + parser.add_argument("--alg_type") + parser.add_argument("--total") + parser.add_argument("--numiter") + # double noise and newton + parser.add_argument("--grad_frac") + parser.add_argument("--trace_frac") + parser.add_argument("--trace_coeff") + args = parser.parse_args() + datasetname = args.datasetname + alg_type = args.alg_type + total = float(args.total) + num_iter = int(args.numiter) + if alg_type == "double_noise": + grad_frac = float(args.grad_frac) + trace_frac = float(args.trace_frac) + trace_coeff = float(args.trace_coeff) + hyper_parameters = { + "total": total, + "grad_frac": grad_frac, + "trace_frac": trace_frac, + "trace_coeff": trace_coeff, + "num_iteration": num_iter, + } + elif alg_type == "dp_gd": + hyper_parameters = {"total": total, "num_iteration": num_iter} + elif alg_type == "damped_newton": + grad_frac = float(args.grad_frac) + hyper_parameters = { + "total": total, + "num_iteration": num_iter, + "grad_frac": grad_frac, + } + else: + raise ValueError("no such optmization algorithm exists") + print( + "optimization algorithm " + + alg_type + + "," + + "dataset name: " + + datasetname + ) + helper_fun(datasetname, alg_type, hyper_parameters) + + +if __name__ == "__main__": + main()