tensorflow_privacy/research/dp_newton/dpso-logistic.py

1238 lines
40 KiB
Python

# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
# -*- coding: utf-8 -*-
"""Differentially Private Second-Order Methods for Logistic Regression.
This script implements several algorithms for DP logistic regression and
tests them on various datasets. It produces plots for our upcoming paper.
Code exported from Colab. Written by Mahdi Haghifam.
"""
# pylint: disable=invalid-name
# We use upper case to denote matrices and lower case for vectors.
# This conflicts with pylint's variable naming rules.
# pylint: disable=redefined-outer-name
# This is a script hence we have "global" variables.
# pylint: disable=unused-argument
# The update rule functions are meant to have the same signature,
# so cannot just remove arguments. Ideally this should have been
# implemented as a class, but much easier to define a function.
import math
import urllib.request
# from keras.utils.np_utils import to_categorical
import matplotlib.pyplot as plt
import numpy as np
from sklearn import preprocessing
import sklearn.datasets
from sklearn.linear_model import LogisticRegression
import tensorflow as tf
# from tensorflow import keras
class MyLogisticRegression:
"""Represents a logistic regression problem.
There is a dataset consisting of features (vectors of norm <=1)
and labels (+1,-1), represented as a numpy array.
There is also an L2 regularizer.
"""
def __init__(self, X, y, reg=1e-8):
"""Initialize the data and the regularizer.
Args:
X: n x d numpy array representing features
y: n x 1 numpy array representing labels
reg: L2 regularizing coefficient (to ensure solution is finite)
Data will be rescaled so that ||X[i,:]|| * |y[i]| <= 1 for all i.
"""
self.reg = float(reg)
X = np.array(X)
y = np.array(y)
assert len(X.shape) == 2
assert len(y.shape) == 1
self.n, self.d = X.shape
assert y.shape[0] == self.n
signed_data = X * y[:, np.newaxis]
norm = np.linalg.norm(signed_data, axis=1)
scale = np.maximum(norm, np.ones_like(norm))
self.data = (1 / scale[:, None]) * signed_data
def loss(self, w):
"""Computes the loss represented by this object at w.
Args:
w: weight vector
Returns:
If X,y is the data and reg is the regularizer, then the loss is
(1/n)sum_i^n log(1+exp(-<w,X[i,:]*y[i]>)) + (reg/2)||w||^2
"""
data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, w))))
reg_loss = 0.5 * self.reg * np.linalg.norm(w)**2
return data_loss + reg_loss
def loss_wor(self, w):
"""Computes the loss represented by this object at w without regularizer.
Args:
w: weight vector
Returns:
If X,y is the data and reg is the regularizer, then the loss is
(1/n)sum_i^n log(1+exp(-<w,X[i,:]*y[i]>))
"""
data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, w))))
return data_loss
def grad(self, w):
"""Computes the gradient of the logistic regression at a given point w.
Args:
w: weight vector
Returns:
If X,y is the data and reg is the regularizer, then the gradient is
(-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
"""
coeff_grad = -1/(1+np.exp(np.dot(self.data, w)))
data_grad = np.mean(self.data * coeff_grad[:, np.newaxis], axis=0)
return data_grad + self.reg * w
def grad_wor(self, w):
"""Computes the gradient of the logistic regression at a given point w.
Args:
w: weight vector
Returns:
If X,y is the data and reg is the regularizer, then the gradient is
(-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
"""
coeff_grad = -1/(1+np.exp(np.dot(self.data, w)))
data_grad = np.mean(self.data * coeff_grad[:, np.newaxis], axis=0)
return data_grad
def hess(self, w):
"""Computes the Hessian of the logistic regression at a given point w.
Args:
w: weight vector
Returns:
The Hessian is the matrix of second derivatives.
If X,y is the data and reg is the regularizer, then the Hessian is
(1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
where we assume y[i]^2==1.
"""
a = np.dot(self.data, w)/2
coeff_hess = 1 / (np.exp(a)+np.exp(-a))**2
raw_hess = np.dot(self.data.T * coeff_hess, self.data)
return raw_hess/self.n + self.reg * np.eye(self.d)
def hess_wor(self, w):
"""Computes the Hessian of the logistic regression at a given point w.
Args:
w: weight vector
Returns:
The Hessian is the matrix of second derivatives.
If X,y is the data, then the Hessian is
(1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
where we assume y[i]^2==1.
"""
a = np.dot(self.data, w)/2
coeff_hess = 1 / (np.exp(a)+np.exp(-a))**2
raw_hess = np.dot(self.data.T * coeff_hess, self.data)
return raw_hess/self.n
def upperbound(self, w):
"""Computes tightest universal quadratic upper bound on the loss function.
log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
Constant and linear terms are just first-order Taylor
This function gives the quadratic term (which replaces the Hessian)
https://twitter.com/shortstein/status/1557961202256318464
Args:
w: weight vector
Returns:
Matrix H such that for all v
loss(v) <= loss(w)+<grad(w),w-v> + <H(w-v),w-v>/2
"""
a = -np.dot(self.data, w) # vector of y_i<x_i,w> for i in [n]
# v = 0.5*np.tanh(a/2)/a
# But avoid 0/0 by special rule
v = np.divide(
0.5 * np.tanh(a / 2),
a,
out=(np.ones(a.shape) * 0.25),
where=(np.abs(a) > 1e-9))
H = np.dot(self.data.T * v, self.data)
return H / self.n + self.reg * np.eye(self.d)
def upperbound_wor(self, w):
"""Computes tightest quadratic upper bound on the unregularized loss.
log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
Constant and linear terms are just first-order Taylor
This function gives the quadratic term (which replaces the Hessian)
https://twitter.com/shortstein/status/1557961202256318464
Args:
w: weight vector
Returns:
Matrix H such that for all v
loss(v) <= loss(w)+<grad(w),w-v> + <H(w-v),w-v>/2
"""
a = -np.dot(self.data, w) # vector of y_i<x_i,w> for i in [n]
# v = 0.5*np.tanh(a/2)/a
# But avoid 0/0 by special rule
v = np.divide(
0.5 * np.tanh(a / 2),
a,
out=(np.ones(a.shape) * 0.25),
where=(np.abs(a) > 1e-9))
H = np.dot(self.data.T * v, self.data)
return H / self.n
class Mydatasets:
"""Represents datasets we use for testing the algorithms.
"""
def __init__(self):
pass
def find_optimal_classifier(self, dataset, reg=1e-9):
"""Find the optimal weight vector for the logistic regression.
Args:
dataset: training dataset
reg: regularizer
Returns:
Optimal weight vector.
"""
X, y = dataset
model_lr = LogisticRegression(max_iter=10000, C=1/reg).fit(X, y)
w_opt1 = np.concatenate([model_lr.intercept_, np.squeeze(model_lr.coef_)])
w_opt = newton(dataset, w_opt1)
print("optimal weight vector norms", np.linalg.norm(w_opt))
return w_opt
def mnist_binary(self):
"""Download and extract MNIST data.
We also select only two labels for the binary classification task.
Returns:
Features, labels, and optimal weight vector.
"""
labels = [1, 7]
label0, label1 = int(labels[0]), int(labels[1])
mnist = tf.keras.datasets.mnist
(x_train, y_train), (_, _) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], -1)
scaler = preprocessing.StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)
nrm = np.linalg.norm(x_train, axis=1)
x_train = x_train * 1/nrm[:, None]
y_train = y_train.astype(float)
indx0 = np.nonzero(y_train == label0)[0]
indx1 = np.nonzero(y_train == label1)[0]
y_train[indx0] = -1
y_train[indx1] = 1
indx = np.concatenate((indx0, indx1))
x_train = x_train[indx]
labels = y_train[indx]
dataset = x_train, labels
w_opt = self.find_optimal_classifier(dataset)
X = np.hstack((np.ones(shape=(np.shape(x_train)[0], 1)),
x_train)) # adding a dummy dimension for the bias term.
return X, labels, w_opt
def w8a_dataset(self):
"""w8a dataset for logistic regression.
"""
num_points = 15e3
w8a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/w8a"
data_path = "./w8a"
urllib.request.urlretrieve(w8a_url, data_path)
X, labels = sklearn.datasets.load_svmlight_file(data_path)
X = X.toarray()
selected_samples = np.random.choice(len(X), int(num_points))
X = X[selected_samples, :]
labels = labels[selected_samples]
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)
nrm = np.linalg.norm(X, axis=1)
X = X * 1/nrm[:, None]
labels = labels.astype(float)
dataset = X, labels
w_opt = self.find_optimal_classifier(dataset)
X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
X)) # adding a dummy dimension for the bias term.
return X, labels, w_opt
def a1a_dataset(self):
"""Loads a1a dataset for logistic regression.
"""
a1a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a1a"
data_path = "./a1a"
urllib.request.urlretrieve(a1a_url, data_path)
X, labels = sklearn.datasets.load_svmlight_file(data_path)
X = X.toarray()
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)
nrm = np.linalg.norm(X, axis=1)
X = X * 1/nrm[:, None]
labels = labels.astype(float)
dataset = X, labels
w_opt = self.find_optimal_classifier(dataset)
X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
X)) # adding a dummy dimension for the bias term.
return X, labels, w_opt
def phishing(self):
"""phishing dataset for logistic regression.
"""
phishing_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/phishing"
data_path = "./phishing"
urllib.request.urlretrieve(phishing_url, data_path)
X, labels = sklearn.datasets.load_svmlight_file(data_path)
X = X.toarray()
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)
nrm = np.linalg.norm(X, axis=1)
X = X * 1/nrm[:, None]
labels = labels.astype(float)
dataset = X, labels
w_opt = self.find_optimal_classifier(dataset)
X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
X)) # adding a dummy dimension for the bias term.
return X, labels, w_opt
def a5a_dataset(self):
"""a5a dataset for logistic regression.
"""
a5a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a5a"
data_path = "./a5a"
urllib.request.urlretrieve(a5a_url, data_path)
X, labels = sklearn.datasets.load_svmlight_file(data_path)
X = X.toarray()
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)
nrm = np.linalg.norm(X, axis=1)
X = X * 1/nrm[:, None]
labels = labels.astype(float)
dataset = X, labels
w_opt = self.find_optimal_classifier(dataset)
X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
X)) # adding a dummy dimension for the bias term.
return X, labels, w_opt
def a6a_dataset(self):
"""a6a dataset for logistic regression.
"""
a6a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a5a"
data_path = "./a6a"
urllib.request.urlretrieve(a6a_url, data_path)
X, labels = sklearn.datasets.load_svmlight_file(data_path)
X = X.toarray()
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)
nrm = np.linalg.norm(X, axis=1)
X = X * 1/nrm[:, None]
labels = labels.astype(float)
dataset = X, labels
w_opt = self.find_optimal_classifier(dataset)
X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
X)) # adding a dummy dimension for the bias term.
return X, labels, w_opt
def madelon(self):
"""madelon dataset for logistic regression.
"""
madelon_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/madelon"
data_path = "./madelon"
urllib.request.urlretrieve(madelon_url, data_path)
X, labels = sklearn.datasets.load_svmlight_file(data_path)
X = X.toarray()
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)
nrm = np.linalg.norm(X, axis=1)
X = X * 1/nrm[:, None]
labels = labels.astype(float)
dataset = X, labels
w_opt = self.find_optimal_classifier(dataset)
X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
X)) # adding a dummy dimension for the bias term.
return X, labels, w_opt
def mushroom_dataset(self):
"""mushroom dataset for logistic regression.
"""
mushroom_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/mushrooms"
data_path = "./mushrooms"
urllib.request.urlretrieve(mushroom_url, data_path)
X, labels = sklearn.datasets.load_svmlight_file(data_path)
X = X.toarray()
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)
nrm = np.linalg.norm(X, axis=1)
X = X * 1/nrm[:, None]
labels = labels.astype(float)
dataset = X, labels
w_opt = self.find_optimal_classifier(dataset)
X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
X)) # adding a dummy dimension for the bias term.
return X, labels, w_opt
def synthetic_data(self, n=5000, d=50, cov=None, w=None):
"""Generates a synthetic dataset for logistic regression.
Args:
n: number of samples
d: dimension
cov: covariance of the data (optional, default: identity)
w: true coefficient vector (optional, default:first standard basis vector)
Returns:
Synthetic dataset.
Features are unit vectors (by default uniformly random).
Labels are sampled from logistic distribution,
where argument w is the "true" solution.
"""
mean = np.zeros(d)
if cov is None:
cov = np.eye(d)
X_un = np.random.multivariate_normal(mean, cov, n)
nrm = np.linalg.norm(X_un, axis=1)
X = X_un * 1/nrm[:, None]
if w is None:
w = np.ones(d)
w[0] = 1
inner_prod = np.dot(X, w)
params = np.exp(inner_prod)/(1+np.exp(inner_prod))
labels = 2*np.random.binomial(1, params)-1
dataset = X, labels
w_opt = self.find_optimal_classifier(dataset)
X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
X)) # adding a dummy dimension for the bias term.
return X, labels, w_opt
class CompareAlgs:
"""Class to run multiple iterative algorithms and compare the results."""
def __init__(self,
lr,
dataset,
optimal_w,
iters=10,
w0=None,
pb=None):
"""Initialize the problem."""
X, _ = dataset
self.w_opt = optimal_w
n, d = np.shape(X)
print("dataset is created: (number of samples, dimension)=" + str(n) + "," +
str(d))
if w0 is None:
w0_un = np.random.multivariate_normal(np.zeros(d), np.eye(d))
w0 = w0_un/np.linalg.norm(w0_un)
self.w0 = w0 # initial iterate
self.iters = iters
self.pb = pb
self.lr = lr
self.plots = [] # List of lists of iterates
self.names = [] # List of names
self.linestyles = [] # List of line styles for plotting
self.cutoff = 20 * np.linalg.norm(self.w_opt) + 20 * np.linalg.norm(
self.w0) + 10 # how do you set this value? is it problem-specific?
def add_plot(self, update_rule, name, linestyle):
"""Run an iterative update method & add to plot.
Args:
update_rule: a function that takes 4 arguments:
current iterate
LogisticRegression problem
index of current iterate
total number of iterations
pb = privacy budget or similar
name: string to display in legend
linestyle: line style for plot
"""
baseline = self.lr.loss_wor(self.w_opt)
print(name)
w = self.w0
plot = [w]
for i in range(self.iters):
w = update_rule(w, self.lr, i, self.iters, self.pb)
if np.linalg.norm(w) > self.cutoff:
w = self.w0 # Stop things exploding
print("Stop Things Exploding!")
plot.append(w)
print(
str(i) + ": ||grad||=" + str(np.linalg.norm(self.lr.grad_wor(w))) +
" ex_loss=" + str(self.lr.loss_wor(w) - baseline))
self.plots.append(plot)
self.names.append(name)
self.linestyles.append(linestyle)
print()
def plot_grad_norms(self, legend=True):
"""Plot gradient norms for each iteration.
"""
plt.clf()
for plot, name, ls in zip(self.plots, self.names, self.linestyles):
grad_norms = [np.linalg.norm(self.lr.grad_wor(w)) for w in plot]
plt.plot(range(self.iters+1), grad_norms, ls, label=name)
plt.yscale("log")
ymax = np.linalg.norm(self.lr.grad(self.plots[0][0]))
plt.ylim(top=ymax)
# plt.ylim((1e-3, 1e-1))
plt.xlabel("Iteration")
plt.ylabel("Norm of Gradient")
if legend: plt.legend()
plt.show()
def loss_vals(self):
"""Outputs the loss vector for different methods.
"""
baseline = self.lr.loss_wor(self.w_opt)
loss_dict = {}
for plot, name in zip(self.plots, self.names):
losses = [self.lr.loss_wor(w)-baseline for w in plot]
loss_dict[name] = [losses]
return loss_dict
def gradnorm_vals(self):
"""Outputs the gradient norm for different methods.
"""
gradnorm_dict = {}
for plot, name in zip(self.plots, self.names):
grad_norms = [np.linalg.norm(self.lr.grad_wor(w)) for w in plot]
gradnorm_dict[name] = [grad_norms]
return gradnorm_dict
def plot_losses(self):
"""Plots excess loss for each iteration.
output is a dictionary where the keys are name of method and value is
the loss for each iteration.
"""
baseline = self.lr.loss_wor(self.w_opt)
plt.clf()
for plot, name, ls in zip(self.plots, self.names, self.linestyles):
losses = [self.lr.loss_wor(w)-baseline for w in plot]
plt.plot(range(self.iters+1), losses, ls, label=name)
# plt.yscale('log')
plt.xlabel("Iteration")
plt.ylabel("Excess Loss")
plt.legend()
plt.show()
def gd_priv(w, lr, i, iters, pb):
"""Implementation of DP-GD.
Args:
w: current point
lr: logistic regression
i: iteration number
iters: total number of iterations
pb: auxillary information
Returns:
The next iterate.
"""
inv_lr_gd = 0.25 # We select the learning rate based on the smoothness
sens = 1/(lr.n*(inv_lr_gd+lr.reg)) # Sensitivity
rho = pb["total"] / iters # divide total privacy budget up
noise = np.random.normal(scale=sens/math.sqrt(2*rho), size=lr.d)
return w - lr.grad(w)/(inv_lr_gd+lr.reg) + noise
def gd_priv_backtrackingls(w, lr, i, iters, pb):
"""Implementation of DP-GD with back-tracking line search.
!!! this method is not private. We only use it as a baseline.
Args:
w: current point
lr: logistic regression
i: iteration number
iters: total number of iterations
pb: auxillary information
Returns:
The next iterate
"""
rho_grad = pb["total"] / iters # divide total privacy budget up
grad_scale = (1/lr.n)*math.sqrt(0.5/rho_grad)
grad_noise = np.random.normal(scale=grad_scale, size=lr.d)
direction = lr.grad(w)+grad_noise
stepsize_opt = backtracking_ls(lr, direction, w)
return w - stepsize_opt * direction
def backtracking_ls(lr, direction, w_0, alpha=0.4, beta=0.95):
"""Implementation of backtracking line search.
Args:
lr: logistic regression
direction: the "noisy" gradient direction
w_0: current point
alpha: tradeoff the precision and complexity of the linesearch
beta: tradeoff the precision and complexity of the linesearch
Returns:
A good stepsize
"""
t = 100
while lr.loss(w_0 - t * direction
) >= lr.loss(w_0) - t * alpha * np.dot(direction, lr.grad(w_0)):
t = beta * t
if t < 1e-10:
break
return t
def newton(dataset, w_opt2):
"""Newton update rule.
"""
X, y = dataset
X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), X))
lr = MyLogisticRegression(X, y, reg=1e-9)
w_opt = w_opt2
_, d = np.shape(X)
w = np.zeros(d)
for _ in range(30):
H = lr.hess(w)
direction = np.linalg.solve(H, lr.grad(w))
step_size = backtracking_ls(lr, direction, w)
w = w - step_size * direction
if lr.loss_wor(w) < lr.loss_wor(w_opt2):
w_opt = w
return w_opt
def newton_ur(w, lr, i, iters, pb):
H = lr.hess(w)
direction = np.linalg.solve(H, lr.grad(w))
step_size = backtracking_ls(lr, direction, w)
return w - step_size * direction
class DoubleNoiseMech:
"""Our Method: Double Noise."""
def __init__(self,
lr,
type_reg="add",
hyper_tuning=False,
curvature_info="hessian",
plot_eigen=False):
"""Initializes the algorithm.
Args:
lr: logistic regression problem we are solving.
type_reg: "add" or "clip" -- how we regularize eigenvalues.
hyper_tuning: do we tune the hyperparameters.
curvature_info: "hessian" or "ub" -- what quadratic we use.
plot_eigen: show eigenvalues for debugging purposes.
"""
self.type_reg = type_reg
self.hyper_tuning = hyper_tuning
self.curvature_info = curvature_info
self.plot_eigen = plot_eigen
if self.curvature_info == "hessian":
self.H = lr.hess_wor
elif self.curvature_info == "ub":
self.H = lr.upperbound_wor
def find_opt_reg_wop(self, w, lr, noisy_grad, rho_hess):
"""Implementation of finding the optimal lambda.
Here, we don't pay for privacy of doing it.
Args:
w: current point
lr: logistic regression problem
noisy_grad: the gradient estimate
rho_hess: the privacy budget
Returns:
The next iterate.
"""
increase_factor = 1.5 # at each step we increase the clipping
if self.type_reg == "add":
lambda_cur = 5e-6 # starting parameter
elif self.type_reg == "clip":
lambda_cur = 0.25/lr.n + 1e-5 # starting parameter,
num_noise_sample = 5 # we want to estimate expected value over the noise
grad_norm = np.linalg.norm(noisy_grad)
H = self.H(w)
best_loss = 1e6 # a large dummy number
while lambda_cur <= 0.25:
H = self.hess_mod(w, lambda_cur)
if self.type_reg == "add": # Sensitivity is different for add vs clip
sens2 = grad_norm * 0.25/(lr.n*lambda_cur**2 + 0.25*lambda_cur)
elif self.type_reg == "clip":
sens2 = grad_norm * 0.25/(lr.n*lambda_cur**2 - 0.25*lambda_cur)
loss_ = 0
for _ in range(num_noise_sample):
noise2 = np.random.normal(scale=sens2*math.sqrt(.5/rho_hess), size=lr.d)
loss_ = loss_ + lr.loss_wor(w - np.linalg.solve(H, noisy_grad) + noise2)
if loss_ < best_loss:
best_loss = loss_
lambda_star = lambda_cur
lambda_cur = lambda_cur * increase_factor
return lambda_star
def update_rule(self, w, lr, i, iters, pb):
"""update rule."""
total = pb["total"]
grad_frac = pb["grad_frac4"]
rho1 = grad_frac * total / iters # divide total privacy budget for gradient
rho2 = (1-grad_frac) * total / iters # divide total privacy budget
sc1 = (1/lr.n) * math.sqrt(0.5/rho1)
noise1 = np.random.normal(scale=sc1, size=lr.d)
noisy_grad = lr.grad(w)+noise1
grad_norm = np.linalg.norm(noisy_grad)
m = 0.25 # smoothness parameter
frac_trace = 0.1 # fraction of privacy budget for estimating the trace.
H = self.H(w)
if self.plot_eigen:
val, _ = np.linalg.eigh(H)
hist, bin_edges = np.histogram(val, bins=300, range=(0, 0.01))
cdf_vals = np.cumsum(hist)
plt.clf()
plt.plot(bin_edges[1:], cdf_vals)
plt.show()
if self.hyper_tuning:
min_eval = self.find_opt_reg_wop(w, lr, noisy_grad, rho2)
print("optimized min_eval", min_eval)
else:
noisy_trace = max(
np.trace(H) + np.random.normal(
scale=(0.25 / lr.n) * math.sqrt(0.5 / (frac_trace * rho2))), 0)
min_eval = (noisy_trace / ((lr.n)**2 *
(1 - frac_trace) * rho2))**(1 / 3) + 5e-4
print("approx min_eval ", min_eval)
H = self.hess_mod(w, min_eval, lr.reg)
if self.type_reg == "add": # Sensitivity is different for add vs clip
sens2 = grad_norm * m/(lr.n * min_eval**2 + m * min_eval)
elif self.type_reg == "clip":
sens2 = grad_norm * m / (lr.n * min_eval**2 - m * min_eval)
noise2 = np.random.normal(
scale=sens2 * math.sqrt(0.5 / ((1 - frac_trace) * rho2)), size=lr.d)
return w - np.linalg.solve(H, noisy_grad) + noise2
def hess_mod(self, w, min_eval, reg=1e-9):
if self.type_reg == "clip":
evals, evec = np.linalg.eigh(self.H(w))
# true_min = np.min(evals)
evals = np.maximum(evals, min_eval*np.ones(evals.shape))
Hclipped = np.dot(evec * evals, evec.T)
return Hclipped
elif self.type_reg == "add":
return self.H(w) + min_eval*np.eye(len(self.H(w)))
def helper_fun(datasetname, pb, num_rep=5, Tuning=False, plot_eigen=False):
"""This function loads the data & runs the algorithms.
Args:
datasetname: name of the dataset
pb: a dictionary with the parameters
num_rep: number of times we repeat the optimization for reporting average
Tuning: True or False exhustive search for fining the best min eigenval
plot_eigen: Show eigenvalues
Returns:
losses and gradient norms
"""
datasets = Mydatasets()
X, y, w_opt = getattr(datasets, datasetname)()
dataset = X, y
lr = MyLogisticRegression(X, y, reg=1e-8)
dnm_hess_add = DoubleNoiseMech(
lr,
type_reg="add",
hyper_tuning=False,
curvature_info="hessian",
plot_eigen=plot_eigen).update_rule
dnm_ub_add = DoubleNoiseMech(
lr,
type_reg="add",
hyper_tuning=False,
curvature_info="ub",
plot_eigen=plot_eigen).update_rule
dnm_hess_clip = DoubleNoiseMech(
lr,
type_reg="clip",
hyper_tuning=False,
curvature_info="hessian",
plot_eigen=plot_eigen).update_rule
dnm_ub_clip = DoubleNoiseMech(
lr,
type_reg="clip",
hyper_tuning=False,
curvature_info="ub",
plot_eigen=plot_eigen).update_rule
if Tuning:
# dnm_hess_add_ht = DoubleNoiseMech(lr,type_reg='add',
# hyper_tuning=True,curvature_info='hessian').update_rule
# dnm_ub_add_ht = DoubleNoiseMech(lr,type_reg='add',
# hyper_tuning=True,curvature_info='ub').update_rule
dnm_hess_clip_ht = DoubleNoiseMech(
lr,
type_reg="clip",
hyper_tuning=True,
curvature_info="hessian",
plot_eigen=plot_eigen).update_rule
# dnm_ub_clip_ht = DoubleNoiseMech(lr,type_reg='clip',
# hyper_tuning=True,curvature_info='ub').update_rule
c = CompareAlgs(lr, dataset, w_opt, iters=10, pb=pb)
for rep in range(num_rep):
c.add_plot(gd_priv, "DPGD", "y--")
c.add_plot(dnm_hess_add, "DN-Hess-add", "k-")
c.add_plot(dnm_ub_add, "DN-UB-add", "b-")
c.add_plot(dnm_hess_clip, "DN-Hess-clip", "k*-")
c.add_plot(dnm_ub_clip, "DN-UB-clip", "b*-")
c.add_plot(gd_priv_backtrackingls, "DP-GD-Oracle", "m")
if Tuning:
c.add_plot(dnm_hess_clip_ht, "DN-Hess-clip-T", "r*-")
# c.add_plot(dnm_hess_add_ht,"DN-Hess-add-T",'r-')
# c.add_plot(dnm_ub_clip_ht,"DN-UB-clip-T",'g*-')
# c.add_plot(dnm_ub_add_ht,"DN-UB-add-T",'g-')
losses_dict = c.loss_vals()
gradnorm_dict = c.gradnorm_vals()
if rep == 0:
losses_total = losses_dict
gradnorm_total = gradnorm_dict
else:
for names in losses_total:
losses_total[names].extend(losses_dict[names])
gradnorm_total[names].extend(gradnorm_dict[names])
return losses_total, gradnorm_total
linestyle = {
"DPGD": "y-",
"DN-Hess-add": "k+-",
"DN-UB-add": "b-",
"DN-Hess-clip": "r*-",
"DN-UB-clip": "g-",
"DP-GD-Oracle": "c-"
}
facecolor = {
"DPGD": "yellow",
"DN-Hess-add": "black",
"DN-UB-add": "blue",
"DN-Hess-clip": "red",
"DN-UB-clip": "green",
"DP-GD-Oracle": "cyan"
}
alg_plt = [
"DPGD",
"DN-Hess-add",
"DN-UB-add",
"DN-Hess-clip",
"DN-UB-clip",
"DP-GD-Oracle"
]
# Synthethic Data
pb = {
"total": 1, # Total privacy budget for zCDP
"min_eval4": 5e-3, # Min eigenvalue for clipping
"grad_frac4":
0.75 # Fraction of privacy budget for gradient vs matrix sensitivity
}
num_rep = 30
losses_total, gradnorm_total = helper_fun(
"synthetic_data", pb, num_rep=num_rep, Tuning=False)
for alg in losses_total:
losses = np.array(losses_total[alg])
gradnorm = np.array(gradnorm_total[alg])
loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
loss_std, gradnorm_std = np.std(
losses, axis=0) / np.sqrt(num_rep), np.std(
gradnorm, axis=0) / np.sqrt(num_rep)
print(
str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
str(loss_std[-1]))
if alg in alg_plt:
iters = len(loss_avg)
plt.figure(1)
plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
plt.fill_between(
range(iters),
loss_avg - loss_std,
loss_avg + loss_std,
facecolor=facecolor[alg])
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Excess Loss")
plt.savefig("synth.pdf")
plt.figure(2)
plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
plt.yscale("log")
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Norm of Gradient")
plt.savefig("synth-grad.pdf")
# a5a dataset
pb = {
"total": 1, # Total privacy budget for zCDP
"min_eval4": 5e-3, # Min eigenvalue for clipping
"grad_frac4":
0.75 # Fraction of privacy budget for gradient vs matrix sensitivity
}
num_rep = 30
losses_total, gradnorm_total = helper_fun(
"a5a_dataset", pb, num_rep=num_rep, Tuning=False)
for alg in losses_total:
losses = np.array(losses_total[alg])
gradnorm = np.array(gradnorm_total[alg])
loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
loss_std, gradnorm_std = np.std(
losses, axis=0) / np.sqrt(num_rep), np.std(
gradnorm, axis=0) / np.sqrt(num_rep)
print(
str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
str(loss_std[-1]))
if alg in alg_plt:
iters = len(loss_avg)
plt.figure(3)
plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
plt.fill_between(
range(iters),
loss_avg - loss_std,
loss_avg + loss_std,
facecolor=facecolor[alg])
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Excess Loss")
plt.savefig("a5a.pdf")
plt.figure(4)
plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
plt.yscale("log")
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Norm of Gradient")
plt.savefig("a5a-grad.pdf")
# w8a dataset
pb = {
"total": 1, # Total privacy budget for zCDP
"min_eval4": 5e-3, # Min eigenvalue for clipping
"grad_frac4":
0.75 # Fraction of privacy budget for gradient vs matrix sensitivity
}
num_rep = 30
losses_total, gradnorm_total = helper_fun(
"w8a_dataset", pb, num_rep=num_rep, Tuning=False)
for alg in losses_total:
losses = np.array(losses_total[alg])
gradnorm = np.array(gradnorm_total[alg])
loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
loss_std, gradnorm_std = np.std(
losses, axis=0) / np.sqrt(num_rep), np.std(
gradnorm, axis=0) / np.sqrt(num_rep)
print(
str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
str(loss_std[-1]))
if alg in alg_plt:
iters = len(loss_avg)
plt.figure(5)
plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
plt.fill_between(
range(iters),
loss_avg - loss_std,
loss_avg + loss_std,
facecolor=facecolor[alg])
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Excess Loss")
plt.savefig("w8a.pdf")
plt.figure(6)
plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
plt.yscale("log")
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Norm of Gradient")
plt.savefig("w8a-grad.pdf")
# a1a dataset
pb = {
"total": 1, # Total privacy budget for zCDP
"min_eval4": 5e-3, # Min eigenvalue for clipping
"grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix
}
num_rep = 30
losses_total, gradnorm_total = helper_fun(
"a1a_dataset", pb, num_rep=num_rep, Tuning=False)
for alg in losses_total:
losses = np.array(losses_total[alg])
gradnorm = np.array(gradnorm_total[alg])
loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
loss_std, gradnorm_std = np.std(
losses, axis=0) / np.sqrt(num_rep), np.std(
gradnorm, axis=0) / np.sqrt(num_rep)
print(
str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
str(loss_std[-1]))
if alg in alg_plt:
iters = len(loss_avg)
plt.figure(7)
plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
plt.fill_between(
range(iters),
loss_avg - loss_std,
loss_avg + loss_std,
facecolor=facecolor[alg])
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Excess Loss")
plt.savefig("a1a.pdf")
plt.figure(8)
plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
plt.yscale("log")
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Norm of Gradient")
plt.savefig("a1a-grad.pdf")
# mushroom dataset
pb = {
"total": 1, # Total privacy budget for zCDP
"min_eval4": 5e-3, # Min eigenvalue for clipping
"grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix
}
num_rep = 30
losses_total, gradnorm_total = helper_fun(
"mushroom_dataset", pb, num_rep=num_rep, Tuning=False)
for alg in losses_total:
losses = np.array(losses_total[alg])
gradnorm = np.array(gradnorm_total[alg])
loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
loss_std, gradnorm_std = np.std(
losses, axis=0) / np.sqrt(num_rep), np.std(
gradnorm, axis=0) / np.sqrt(num_rep)
print(
str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
str(loss_std[-1]))
if alg in alg_plt:
iters = len(loss_avg)
plt.figure(9)
plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
plt.fill_between(
range(iters),
loss_avg - loss_std,
loss_avg + loss_std,
facecolor=facecolor[alg])
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Excess Loss")
plt.savefig("mushroom.pdf")
plt.figure(10)
plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
plt.yscale("log")
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Norm of Gradient")
plt.savefig("mushroom-grad.pdf")
# MNIST
pb = {
"total": 1, # Total privacy budget for zCDP
"min_eval4": 5e-3, # Min eigenvalue for clipping
"grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix
}
num_rep = 30
losses_total, gradnorm_total = helper_fun(
"mnist_binary", pb, num_rep=num_rep, Tuning=False)
for alg in losses_total:
losses = np.array(losses_total[alg])
gradnorm = np.array(gradnorm_total[alg])
loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
loss_std, gradnorm_std = np.std(
losses, axis=0) / np.sqrt(num_rep), np.std(
gradnorm, axis=0) / np.sqrt(num_rep)
print(
str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
str(loss_std[-1]))
if alg in alg_plt:
iters = len(loss_avg)
plt.figure(11)
plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
plt.fill_between(
range(iters),
loss_avg - loss_std,
loss_avg + loss_std,
facecolor=facecolor[alg])
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Excess Loss")
plt.savefig("mnist.pdf")
plt.figure(12)
plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
plt.yscale("log")
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Norm of Gradient")
plt.savefig("mnist-grad.pdf")
# Dataset: phishing
pb = {
"total": 1, # Total privacy budget for zCDP
"min_eval4": 5e-3, # Min eigenvalue for clipping
"grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix
}
num_rep = 30
losses_total, gradnorm_total = helper_fun(
"phishing", pb, num_rep=num_rep, Tuning=False)
for alg in losses_total:
losses = np.array(losses_total[alg])
gradnorm = np.array(gradnorm_total[alg])
loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
loss_std, gradnorm_std = np.std(
losses, axis=0) / np.sqrt(num_rep), np.std(
gradnorm, axis=0) / np.sqrt(num_rep)
print(
str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
str(loss_std[-1]))
if alg in alg_plt:
iters = len(loss_avg)
plt.figure(13)
plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
plt.fill_between(
range(iters),
loss_avg - loss_std,
loss_avg + loss_std,
facecolor=facecolor[alg])
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Excess Loss")
plt.savefig("phishing.pdf")
plt.figure(14)
plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
plt.yscale("log")
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Norm of Gradient")
plt.savefig("phishing-grad.pdf")
# Dataset: Madelon
# pb = {
# "total": 1, # Total privacy budget for zCDP
# "min_eval4": 5e-3, # Min eigenvalue for clipping
# "grad_frac4": 0.4 # Fraction of privacy budget for gradient vs matrix
# }
# num_rep = 1
# losses_total,gradnorm_total = helper_fun('madelon',pb,num_rep = num_rep,
# Tuning=True,plot_eigen=True)
# for alg in losses_total.keys():
# losses = np.array(losses_total[alg])
# gradnorm = np.array(gradnorm_total[alg])
# loss_avg, gradnorm_avg = np.mean(losses,axis=0), np.mean(gradnorm,axis=0)
# loss_std, gradnorm_std = np.std(losses,axis=0)/np.sqrt(num_rep),
# np.std(gradnorm,axis=0)/np.sqrt(num_rep)
# print(str(alg)+ ':' + " ex_loss="+str(loss_avg[-1])+ ',
# std='+str(loss_std[-1]))
# if alg in alg_plt:
# iters = len(loss_avg)
# plt.figure(1)
# plt.plot(range(iters),loss_avg,linestyle[alg],label=alg)
# plt.fill_between(range(iters), loss_avg-loss_std, loss_avg+loss_std,
# facecolor=facecolor[alg])
# plt.legend()
# plt.xlabel("Iteration")
# plt.ylabel("Excess Loss")
# plt.savefig('madelon.pdf')
# plt.figure(2)
# plt.plot(range(iters),gradnorm_avg,linestyle[alg],label=alg)
# plt.yscale('log')
# plt.legend()
# plt.xlabel("Iteration")
# plt.ylabel("Norm of Gradient")
# plt.savefig('madelon-grad.pdf')
# Test) a6a Dataset
pb = {
"total": 1, # Total privacy budget for zCDP
"min_eval4": 5e-3, # Min eigenvalue for clipping
"grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix
}
num_rep = 30
losses_total, gradnorm_total = helper_fun(
"a6a_dataset", pb, num_rep=num_rep, Tuning=False)
for alg in losses_total:
losses = np.array(losses_total[alg])
gradnorm = np.array(gradnorm_total[alg])
loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
loss_std, gradnorm_std = np.std(
losses, axis=0) / np.sqrt(num_rep), np.std(
gradnorm, axis=0) / np.sqrt(num_rep)
print(
str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
str(loss_std[-1]))
if alg in alg_plt:
iters = len(loss_avg)
plt.figure(15)
plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
plt.fill_between(
range(iters),
loss_avg - loss_std,
loss_avg + loss_std,
facecolor=facecolor[alg])
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Excess Loss")
plt.savefig("a6a.pdf")
plt.figure(16)
plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
plt.yscale("log")
plt.legend()
plt.xlabel("Iteration")
plt.ylabel("Norm of Gradient")
plt.savefig("a6a-grad.pdf")