COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/privacy/pull/489 from mhaghifam:dp-second-order-optimization 024904810a8f130d554cc3f04713d5562ccfe5df

PiperOrigin-RevId: 547312570
This commit is contained in:
A. Unique TensorFlower 2023-07-11 15:19:53 -07:00 committed by Steve Chien
parent 6b8007ddde
commit cb6659d11b
8 changed files with 1246 additions and 1238 deletions

View file

@ -0,0 +1,32 @@
# Project Title
Faster Differentially Private Convex Optimization via Second-Order Methods
https://arxiv.org/abs/2112.03570 <br>
by Arun Ganesh, Mahdi Haghifam, Thomas Steinke, Abhradeep Thakurta.
## Description
Implementation of the optimizatoin algorithms proposed in
https://arxiv.org/abs/2112.03570 <br>
## Getting Started
You will need to install fairly standard dependencies
run 'run_privacy_utility' to compare the convergence speed and excess loss of
different algorithms.
### Citation
You can cite this paper with
```
@article{ganesh2023faster,
title={Faster Differentially Private Convex Optimization
via Second-Order Methods},
author={Ganesh, Arun and Haghifam, Mahdi and Steinke, Thomas
and Thakurta, Abhradeep},
journal={arXiv preprint arXiv:2305.13209},
year={2023}
}
```

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,28 @@
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
rm -rf ./src/results
mkdir -p ./src/results
rm -rf ./src/datasets_directory
mkdir -p ./src/datasets_directory
dataset="protein_dataset" # 'a1a_dataset', 'synthetic_dataset', 'fmnist_dataset'
privacy_budget="3.0" # epsilon in DP
num_iteration_GD="100" # number of iterations for DP-GD
num_iteration_NT="15" # number of iterations for damped newton
num_iteration_our="15" # number of iterations for double noise (proposed method)
$HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'dp_gd' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_GD
$HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'damped_newton' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_NT --grad_frac $"0.7"
$HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'double_noise' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_our --grad_frac $"0.7" --trace_frac $"0.1" --trace_coeff $"0.5"
$HOME/google-code/dpoptVenv/bin/python3 ./src/print_results.py

View file

@ -0,0 +1,236 @@
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
"""dataset loader"""
# pylint: skip-file
# pyformat: disable
import os
import ssl
import tarfile
import urllib.request
from my_logistic_regression import MyLogisticRegression
import numpy as np
import requests
from sklearn import preprocessing
import sklearn.datasets
from sklearn.linear_model import LogisticRegression
import torch
from torchvision import datasets, transforms
PATH_PREFIX = './src/datasets_directory'
ssl._create_default_https_context = ssl._create_unverified_context
def normalize_fvec(x_train):
"""normalize feature vectors"""
feature_mean = np.mean(x_train, axis=0)
feature_std = np.std(x_train, axis=0)
x_train = (x_train - feature_mean) / feature_std
return x_train
def backtracking_ls(lrp, dir_srch, w_start, alpha=0.4, beta=0.95):
"""Implementation of backtracking line search
lr = logistic regression
dir = the "noisy" gradient direction
w_start = current point
alpha and beta tradeoff the precision and complexity of the linesearch
output is an (close to) optimal stepsize
"""
step_size = 100
val_0 = lrp.loss(w_start)
inner_prod = np.dot(dir_srch, lrp.grad(w_start))
while (
lrp.loss(w_start - step_size * dir_srch)
>= val_0 - step_size * alpha * inner_prod
):
step_size = beta * step_size
if step_size < 1e-6:
break
return step_size
def newton(dataset, w_init, bias=True):
"""Implementation of the newton method with linesearch without privacy constraints
dataset = dataset
w_init = initialization point
output is the model parameter
"""
feature_vecs, labels = dataset
if bias is True:
feature_vecs = np.hstack(
(np.ones(shape=(np.shape(feature_vecs)[0], 1)), feature_vecs)
)
lrp = MyLogisticRegression(feature_vecs, labels, reg=1e-9)
w_cur = w_init
for _ in range(8):
hess = lrp.hess(w_cur)
dir_srch = np.linalg.solve(hess, lrp.grad_wor(w_cur))
step_size = backtracking_ls(lrp, dir_srch, w_cur)
w_cur = w_cur - step_size * dir_srch
if lrp.loss_wor(w_cur) < lrp.loss_wor(w_init):
w_out = w_cur
else:
w_out = w_init
return w_out
class Mydatasets:
"""Represents datasets we use for expriments"""
def __init__(self):
data_dir = PATH_PREFIX + '/data'
cache_dir = PATH_PREFIX + '/cache_datasets'
if not os.path.exists(data_dir):
os.mkdir(data_dir)
if not os.path.exists(cache_dir):
os.mkdir(cache_dir)
def find_optimal_classifier(self, dataset, bias=True):
"""find the optimal weight vector for the logistic regression
for the problems with real datasets.
dataset = training dataset
bias = bias for the logistic model
"""
inputs_vec, labels = dataset
reg = 1e-9
if bias is True:
model_lr = LogisticRegression(max_iter=200, C=1 / reg).fit(
inputs_vec, labels
)
w_opt1 = np.concatenate([model_lr.intercept_, np.squeeze(model_lr.coef_)])
w_opt = newton(dataset, w_opt1, bias)
else:
model_lr = LogisticRegression(
max_iter=200, fit_intercept=False, C=1 / reg
).fit(inputs_vec, labels)
w_opt1 = np.squeeze(model_lr.coef_)
w_opt = newton(dataset, w_opt1, bias)
return w_opt
def fmnist_dataset(self):
"""fmnist dataset"""
transform_data = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.5), (0.5))]
)
train_data_trans = datasets.FashionMNIST(
root=PATH_PREFIX + '/data',
download=True,
train=True,
transform=transform_data,
)
train_loader = torch.utils.data.DataLoader(
train_data_trans, batch_size=len(train_data_trans)
)
x_train = next(iter(train_loader))[0].numpy()
x_train = x_train.reshape(len(x_train), -1)
y_train = next(iter(train_loader))[1].numpy()
label0 = 0
label1 = 3
indx0 = np.nonzero(y_train == label0)[0]
indx1 = np.nonzero(y_train == label1)[0]
labels = y_train.copy()
labels[indx0] = -1
labels[indx1] = 1
indx = np.concatenate((indx0, indx1))
x_train = x_train[indx]
labels = labels[indx]
dataset = x_train, labels
w_opt = self.find_optimal_classifier(dataset, bias=False)
return x_train, labels, w_opt
def a1a_dataset(self):
"""a1a dataset"""
a1a_url = (
'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a1a.t'
)
data_path = PATH_PREFIX + '/data/a1a'
if not os.path.exists(data_path):
_ = urllib.request.urlretrieve(a1a_url, data_path)
data = sklearn.datasets.load_svmlight_file(data_path)
inputs_vec, labels = data[0], data[1]
inputs_vec = inputs_vec.toarray()
scaler = preprocessing.StandardScaler().fit(inputs_vec)
inputs_vec = scaler.transform(inputs_vec)
labels = labels.astype(float)
dataset = inputs_vec, labels
w_opt = self.find_optimal_classifier(dataset)
inputs_vec = np.hstack(
(np.ones(shape=(np.shape(inputs_vec)[0], 1)), inputs_vec)
)
return inputs_vec, labels, w_opt
def protein_dataset(self):
"""protein dataset"""
path_protein = PATH_PREFIX + '/data/protein/'
if not os.path.exists(path_protein):
os.mkdir(path_protein)
protein_url = (
'https://kdd.org/cupfiles/KDDCupData/2004/data_kddcup04.tar.gz'
)
protein_file = PATH_PREFIX + '/data/protein/data_kddcup04.tar.gz'
response = requests.get(protein_url, stream=True, timeout=100)
if response.status_code == 200:
with open(protein_file, 'wb') as file_data:
file_data.write(response.raw.read())
with tarfile.open(protein_file, 'r:gz') as tar:
tar.extractall(path_protein)
x_train = np.loadtxt(PATH_PREFIX + '/data/protein/bio_train.dat')[:, 3:]
y_train = np.loadtxt(PATH_PREFIX + '/data/protein/bio_train.dat')[:, 2]
indx0 = np.nonzero(y_train == 0)[0]
indx1 = np.nonzero(y_train == 1)[0]
labels = y_train.copy()
labels[indx0] = -1
labels[indx1] = 1
indx = np.arange(len(x_train))
np.random.seed(3000)
indx_sample = np.random.choice(indx, 50000, replace=False)
np.random.seed(None)
x_train = x_train[indx_sample]
labels = labels[indx_sample]
x_train = normalize_fvec(x_train)
w_opt = self.find_optimal_classifier((x_train, labels))
x_train = np.hstack((np.ones(shape=(np.shape(x_train)[0], 1)), x_train))
return x_train, labels, w_opt
def synthetic_dataset(self, num_samples=10000, dim=100):
"""Generates a synthetic dataset for logistic regression.
n = number of samples d = dimension Features are unit vectors (by default
uniformly random). Labels are sampled from logistic distribution, so w is
the "true" solution.
"""
mean = np.zeros(dim)
cov = np.eye(dim)
inputs_vec_un = np.random.multivariate_normal(mean, cov, num_samples)
nrm = np.linalg.norm(inputs_vec_un, axis=1)
inputs_vec = inputs_vec_un * 1 / nrm[:, None]
w_star = np.ones(dim)
w_star[0] = 1
inner_prod = np.dot(inputs_vec, w_star)
params = np.exp(inner_prod) / (1 + np.exp(inner_prod))
labels = 2 * np.random.binomial(1, params) - 1
dataset = inputs_vec, labels
w_opt = self.find_optimal_classifier(dataset, bias=False)
return inputs_vec, labels, w_opt

View file

@ -0,0 +1,205 @@
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
"""logistic regression class and its methods"""
# pylint: skip-file
# pyformat: disable
import numpy as np
class MyLogisticRegression:
"""return a logistic regression problem
There is a dataset consisting of features (vectors of norm <=1)
and labels (+1,-1), represented as a numpy array.
There is also an L2 regularizer.
"""
def __init__(self, input_vecs, labels, reg=1e-8):
"""Initialize the data and the regularizer.
X = n x d numpy array representing features
y = n x 1 numpy array representing labels
reg = L2 regularizing coefficient (to ensure solution is finite)
Data will be rescaled so that ||X[i,:]|| * |y[i]| <= 1 for all i.
"""
self.reg = float(reg)
input_vecs = np.array(input_vecs)
labels = np.array(labels)
assert len(input_vecs.shape) == 2
assert len(labels.shape) == 1
self.input_vecs = input_vecs
self.labels = labels
self.num_samples, self.dim = input_vecs.shape
assert labels.shape[0] == self.num_samples
signed_data = input_vecs * labels[:, np.newaxis]
norm = np.linalg.norm(signed_data, axis=1)
scale = np.maximum(norm, np.ones_like(norm))
self.data = (1 / scale[:, None]) * signed_data
def loss(self, param):
"""Computes the loss represented by this object at w.
If X,y is the data and reg is the regularizer, then the loss is (1/n)sum_i^n
log(1+exp(-<w,X[i,:]*y[i]>)) + (reg/2)||w||^2
"""
data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))
reg_loss = 0.5 * self.reg * np.linalg.norm(param) ** 2
return data_loss + reg_loss
def loss_wor(self, param):
"""Computes the loss represented by this object at w without regularizer.
If X,y is the data and reg is the regularizer, then the loss is
(1/n)sum_i^n log(1+exp(-<w,X[i,:]*y[i]>))
"""
data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))
return data_loss
def accuracy(self, param):
""" " computes the accuracy of the model gievn by w"""
score_pred = np.dot(self.input_vecs, param)
label1_prob = np.where(
score_pred >= 0,
1 / (1 + np.exp(-score_pred)),
np.exp(score_pred) / (1 + np.exp(score_pred)),
)
return np.mean(np.where(label1_prob >= 0.5, 1, -1) == self.labels)
def grad(self, param, batch_idx=None):
"""Computes the gradient of the logistic regression at a given point w.
If X,y is the data and reg is the regularizer, then the gradient is
(-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
"""
if batch_idx is not None:
data_batch = self.data[batch_idx]
else:
data_batch = self.data
coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))
data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)
return data_grad + self.reg * param
def grad_wor(self, param, batch_idx=None):
"""Computes the gradient of the logistic regression at a given point w.
If X,y is the data and reg is the regularizer, then the gradient is
(-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
"""
if batch_idx is not None:
data_batch = self.data[batch_idx]
else:
data_batch = self.data
coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))
data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)
return data_grad
def hess(self, param, batch_idx=None):
"""Computes the Hessian of the logistic regression at a given point w.
The Hessian is the matrix of second derivatives.
If X,y is the data and reg is the regularizer, then the Hessian is
(1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
where we assume y[i]^2==1.
"""
if batch_idx is not None:
data_batch = self.data[batch_idx]
batch_size = len(batch_idx)
else:
data_batch = self.data
batch_size = self.num_samples
temp_var = np.dot(data_batch, param) / 2
coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2
raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)
return raw_hess / batch_size + self.reg * np.eye(self.dim)
def hess_wor(self, param, batch_idx=None):
"""Computes the Hessian of the logistic regression at a given point w.
The Hessian is the matrix of second derivatives.
If X,y is the data, then the Hessian is
(1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
where we assume y[i]^2==1.
"""
if batch_idx is not None:
data_batch = self.data[batch_idx]
batch_size = len(batch_idx)
else:
data_batch = self.data
batch_size = self.num_samples
temp_var = np.dot(data_batch, param) / 2
coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2
raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)
return raw_hess / batch_size
def upperbound(self, param, batch_idx=None):
"""Tightest universal quadratic upper bound on the loss function.
log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
Constant and linear terms are just first-order Taylor This function gives
the quadratic term (which replaces the Hessian)
https://twitter.com/shortstein/status/1557961202256318464
"""
if batch_idx is not None:
data_batch = self.data[batch_idx]
batch_size = len(batch_idx)
else:
data_batch = self.data
batch_size = self.num_samples
temp_var = -np.dot(data_batch, param) # vector of y_i<x_i,w> for i in [n]
# v = 0.5*np.tanh(a/2)/a, but, avoid 0/0 by special rule
temp_var2 = np.divide(
0.5 * np.tanh(temp_var / 2),
temp_var,
out=np.ones(temp_var.shape) * 0.25,
where=np.abs(temp_var) > 1e-9,
)
hess_non = np.dot(data_batch.T * temp_var2, data_batch)
return hess_non / batch_size + self.reg * np.eye(self.dim)
def upperbound_wor(self, param, batch_idx=None):
"""Tightest universal quadratic upper bound on the loss function.
log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
Constant and linear terms are just first-order Taylor This function gives
the quadratic term (which replaces the Hessian)
"""
if batch_idx is not None:
data_batch = self.data[batch_idx]
batch_size = len(batch_idx)
else:
data_batch = self.data
batch_size = self.num_samples
temp_var = -np.dot(data_batch, param) # vector of y_i<x_i,w> for i in [n]
temp_var2 = np.divide(
0.5 * np.tanh(temp_var / 2),
temp_var,
out=np.ones(temp_var.shape) * 0.25,
where=np.abs(temp_var) > 1e-9,
)
hess_non = np.dot(data_batch.T * temp_var2, data_batch)
return hess_non / batch_size

View file

@ -0,0 +1,434 @@
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
"""file containing all auxillary functions for running the optimization algorithms"""
# pylint: skip-file
# pyformat: disable
import time
from my_logistic_regression import MyLogisticRegression
import numpy as np
class CompareAlgs:
"""Class to run multiple iterative algorithms and compare the results."""
def __init__(self, lrp, optimal_w, hyper_dict):
"""Initialize the problem.
lr = an instance of MyLogisticRegression
dataset = dataset in the format of (features,label)
optimal_w = optimal minimizer of logistic loss on dataset without privacy
pb = hyperparameters
"""
self.w_opt = optimal_w
self.lrp = lrp
self.iters = hyper_dict["num_iteration"]
self.hyper_params = hyper_dict
self.clock_time = []
self.params = []
self.names = []
def add_algo(self, update_rule, name):
"""Run an iterative update method"""
_, dim = self.lrp.num_samples, self.lrp.dim
wint_un = np.random.multivariate_normal(np.zeros(dim), np.eye(dim))
w_int = wint_un / np.linalg.norm(wint_un)
cutoff_norm = (
100 * np.linalg.norm(self.w_opt) + 100 * np.linalg.norm(w_int) + 100
)
w_cur = w_int
params = [w_cur]
start_t = time.time()
wall_clock = [0]
for _ in range(self.iters):
w_cur = update_rule(w_cur, self.lrp, self.hyper_params)
if np.linalg.norm(w_cur) > cutoff_norm:
w_cur = w_int
print("Stop Things Exploding!")
params.append(w_cur)
wall_clock.append(time.time() - start_t)
self.clock_time.append(wall_clock)
self.params.append(params)
self.names.append(name)
def wall_clock_alg(self):
"""compute the wall clock of different algorithms"""
clock_time_dict = {}
for time_alg, name in zip(self.clock_time, self.names):
clock_time_dict[name] = [time_alg]
return clock_time_dict
def loss_vals(self):
"""output the loss per iteration for different methods"""
baseline = self.lrp.loss_wor(self.w_opt)
loss_dict = {}
for params, name in zip(self.params, self.names):
losses = [self.lrp.loss_wor(w) - baseline for w in params]
loss_dict[name] = [losses]
return loss_dict
def accuracy_vals(self):
"""output the accuracy per iteration for different methods"""
acc_dict = {}
for params, name in zip(self.params, self.names):
acc_vec = [self.lrp.accuracy(w) for w in params]
acc_dict[name] = [acc_vec]
return acc_dict
def accuracy_np(self):
"""output the accuracy of the optimal model without privacy"""
return self.lrp.accuracy(self.w_opt)
def gradnorm_vals(self):
"""output the gradient norm per iteration for different methods"""
gradnorm_dict = {}
for params, name in zip(self.params, self.names):
grad_norms = [np.linalg.norm(self.lrp.grad_wor(w)) for w in params]
gradnorm_dict[name] = [grad_norms]
return gradnorm_dict
def private_newton(w_cur, lrp, hyper_dict):
"""implementation of private newton method from [ABL21]
w = current iterate
lr = an instance of MyLogisticRegression
i = the index of current iterate
iters = total number of iterations
pb = privacy budget and other info
return the next iterate
"""
total = hyper_dict["total"]
grad_frac = hyper_dict["grad_frac"]
iters = hyper_dict["num_iteration"]
hess = lrp.hess(w_cur)
rho_grad = grad_frac * total / iters # divide total privacy budget up.
rho_hess = (1 - grad_frac) * total / iters
hess_noise = np.random.normal(
scale=(0.25 / lrp.num_samples) * np.sqrt(0.5 / rho_hess),
size=(lrp.dim, lrp.dim),
)
hess_noise = (hess_noise + hess_noise.T) / 2
hess_noisy = eigenclip(hess + hess_noise)
grad = lrp.grad(w_cur)
grad_noisy = grad + np.random.normal(
scale=(1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad), size=lrp.dim
)
dir_noisy = np.linalg.solve(hess_noisy, grad_noisy)
dir_size = np.linalg.norm(np.linalg.solve(hess, grad))
return w_cur - min(np.log(1 + dir_size) * (1 / dir_size), 1) * dir_noisy
def eigenclip(sym_mat, min_eval=1e-5):
"""operation of the eigenclip
A = symmetric matrix
min_eval = minimum eigenvalue for clipping
return the modified matrix
"""
eig_val, eig_vec = np.linalg.eigh(sym_mat)
eval_mod = np.maximum(eig_val, min_eval * np.ones(eig_val.shape))
clipped_mat = np.dot(eig_vec * eval_mod, eig_vec.T)
return clipped_mat
def gd_priv(w_cur, lrp, hyper_dict):
"""Implementation of DP-GD.
w = current point
lr = logistic regression
i = iteration number
pb = auxillary information
output is the next iterate
"""
iters = hyper_dict["num_iteration"]
inv_lr_gd = 0.25 # learning rate based on the smoothness
sens = 1 / (lrp.num_samples * (inv_lr_gd)) # sensitivity
rho = hyper_dict["total"] / iters # divide total privacy budget up
noise = np.random.normal(scale=sens / np.sqrt(2 * rho), size=lrp.dim)
return w_cur - lrp.grad_wor(w_cur) / (inv_lr_gd) + noise
def sgd_priv(w_cur, lrp, hyper_dict):
"""Implementation of DP-SGD.
w = current point
lr = logistic regression
i = iteration number
pb = auxillary information
output is the next iterate
"""
batch_size = hyper_dict["batch_size"]
sigma_privacy = hyper_dict["noise_multiplier"]
lr_sgd = 4 # learning rate based on the smoothness
sample_rate = batch_size / lrp.num_samples # sampling probability
sample_vec = np.random.binomial(n=1, p=sample_rate, size=lrp.num_samples)
batch_idx = np.where(sample_vec == 1)[0] # index of batch
batch_size_t = len(batch_idx)
noise = np.random.normal(scale=sigma_privacy, size=lrp.dim)
grad_minibatch = lrp.grad_wor(
w_cur, batch_idx
) # average gradient over batch_idx
return w_cur - lr_sgd * (
batch_size_t / batch_size * grad_minibatch + noise / batch_size
)
def gd_priv_optls(w_cur, lrp, hyper_dict):
"""Implementation of DP-GD with back-tracking line search !!!
this method is not private. We only use it as a baseline.
w = current point
lr = logistic regression
i = iteration number
pb = auxillary information
output is the next iterate
"""
iters = hyper_dict["num_iteration"]
rho_grad = hyper_dict["total"] / iters # divide total privacy budget up
grad_scale = (1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad)
grad_noise = np.random.normal(scale=grad_scale, size=lrp.dim)
dir_srch = lrp.grad(w_cur) + grad_noise
stepsize_opt = backtracking_ls(lrp, dir_srch, w_cur)
return w_cur - stepsize_opt * dir_srch
def backtracking_ls(lrp, dir_srch, w_start, alpha=0.4, beta=0.95):
"""Implementation of backtracking line search
lr = logistic regression
dir = the "noisy" gradient direction
w_start = current point
alpha and beta tradeoff the precision and complexity of the linesearch
output is an (close to) optimal stepsize
"""
step_size = 100
val_0 = lrp.loss(w_start)
inner_prod = np.dot(dir_srch, lrp.grad(w_start))
while (
lrp.loss(w_start - step_size * dir_srch)
>= val_0 - step_size * alpha * inner_prod
):
step_size = beta * step_size
if step_size < 1e-6:
break
return step_size
def newton(dataset, w_init, bias=True):
"""Implementation of the newton method with linesearch without privacy constraints
dataset = dataset
w_init = initialization point
output is the model parameter
"""
feature_vecs, labels = dataset
if bias is True:
feature_vecs = np.hstack(
(np.ones(shape=(np.shape(feature_vecs)[0], 1)), feature_vecs)
)
lrp = MyLogisticRegression(feature_vecs, labels, reg=1e-9)
w_cur = w_init
for _ in range(8):
hess = lrp.hess(w_cur)
dir_srch = np.linalg.solve(hess, lrp.grad_wor(w_cur))
step_size = backtracking_ls(lrp, dir_srch, w_cur)
w_cur = w_cur - step_size * dir
if lrp.loss_wor(w_cur) < lrp.loss_wor(w_init):
w_out = w_cur
else:
w_out = w_init
return w_out
class DoubleNoiseMech:
"""Our Method: Double Noise Mechanism"""
def __init__(self, lrp, type_reg="add", curvature_info="hessian"):
"""Initializer of the double noise mechanism
lr = an instance of MyLogisticRegression
type_reg = minimum eigenvalue modification type, it can be either 'add' or
'clip'
curvature_info = type of the second-order information
"""
self.type_reg = type_reg
self.curvature_info = curvature_info
if self.curvature_info == "hessian":
self.hess = lrp.hess_wor
elif self.curvature_info == "ub":
self.hess = lrp.upperbound_wor
def update_rule(self, w_cur, lrp, hyper_dict):
"""Implementation of the double noise mechanism update rule--full batch"""
noisy_grad_cur = self.noisy_grad(w_cur, lrp, hyper_dict)
w_next = self.noisy_direction(w_cur, lrp, hyper_dict, noisy_grad_cur)
return w_next
def update_rule_stochastic(self, w_cur, lrp, hyper_dict):
"""Implementation of the double noise mechanism update rule--full batch"""
noisy_grad_cur = self.noisy_grad(w_cur, lrp, hyper_dict, True)
w_next = self.noisy_direction_stochastic(
w_cur, lrp, hyper_dict, noisy_grad_cur
)
return w_next
def noisy_grad(self, w_cur, lrp, hyper_dict, batch=False):
"""computing gradient"""
if batch is False:
rho_grad = (hyper_dict["grad_frac"] * hyper_dict["total"]) / hyper_dict[
"num_iteration"
]
noise_grad = np.random.normal(
scale=(1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad), size=lrp.dim
)
noisy_grad = lrp.grad(w_cur) + noise_grad
else:
std_grad = hyper_dict["noise_multiplier_grad"]
pgrad = hyper_dict["batchsize_grad"] / lrp.num_samples
sample_vec = np.random.binomial(n=1, p=pgrad, size=lrp.num_samples)
batch_idx_grad = np.where(sample_vec == 1)[0]
grad_minibatch = lrp.grad_wor(w_cur, batch_idx_grad)
noise_grad = np.random.normal(scale=std_grad, size=lrp.dim)
noisy_grad = (
len(batch_idx_grad) / (lrp.num_samples * pgrad)
) * grad_minibatch + (noise_grad) / (lrp.num_samples * pgrad)
return noisy_grad
def noisy_direction(self, w_cur, lrp, hyper_dict, noisy_grad):
"""computing direction"""
total = hyper_dict["total"]
grad_frac = hyper_dict["grad_frac"]
frac_trace = hyper_dict["trace_frac"]
trace_coeff = hyper_dict["trace_coeff"]
iters = hyper_dict["num_iteration"]
rho_hess = (1 - grad_frac) * total / iters
smooth_param = 0.25
hess_cur = self.hess(w_cur)
noisy_trace = trace_coeff * max(
np.trace(hess_cur)
+ np.random.normal(
scale=(0.25 / lrp.num_samples)
* np.sqrt(0.5 / (frac_trace * rho_hess))
),
0,
)
min_eval = max(
(noisy_trace / ((lrp.num_samples) ** 2 * (1 - frac_trace) * rho_hess))
** (1 / 3),
1 / (lrp.num_samples),
)
grad_norm = np.linalg.norm(noisy_grad)
if self.type_reg == "add": # Sensitivity is different for add vs clip
sens2 = (
grad_norm
* smooth_param
/ (lrp.num_samples * min_eval**2 + smooth_param * min_eval)
)
noise2 = np.random.normal(
scale=sens2 * np.sqrt(0.5 / ((1 - frac_trace) * rho_hess)),
size=lrp.dim,
)
return (
w_cur
- np.linalg.solve(hess_cur + min_eval * np.eye(lrp.dim), noisy_grad)
+ noise2
)
# type_reg=clip
sens2 = (
grad_norm
* smooth_param
/ (lrp.num_samples * min_eval**2 - smooth_param * min_eval)
)
noise2 = np.random.normal(
scale=sens2 * np.sqrt(0.5 / ((1 - frac_trace) * rho_hess)), size=lrp.dim
)
eval_hess, evec_hess = np.linalg.eigh(hess_cur)
eval_trunc = eval_hess[eval_hess >= min_eval]
num_eig = len(eval_trunc)
if num_eig == 0:
hess_modified_inv = 1 / min_eval * np.eye(lrp.dim)
else:
evec_trun = evec_hess[:, -num_eig:]
hess_modified_inv = np.dot(
evec_trun * (1 / eval_trunc - 1 / min_eval * np.ones(num_eig)),
evec_trun.T,
) + 1 / min_eval * np.eye(lrp.dim)
return w_cur - (hess_modified_inv @ noisy_grad) + noise2
def noisy_direction_stochastic(self, w_cur, lrp, hyper_dict, noisy_grad):
"""noisy direction for stochastic variant"""
std_hess = hyper_dict["noise_multiplier_hess"]
phess = hyper_dict["batchsize_hess"] / lrp.num_samples
min_eval = hyper_dict["min_eval"]
sample_vec = np.random.binomial(n=1, p=phess, size=lrp.num_samples)
batch_idx_hess = np.where(sample_vec == 1)[0]
batch_size_hess_t = len(batch_idx_hess)
hess_cur = (
(batch_size_hess_t)
/ (lrp.num_samples * phess)
* self.hess(w_cur, batch_idx_hess)
)
smooth_param = 0.25 # smoothness parameter
grad_norm = np.linalg.norm(noisy_grad)
if self.type_reg == "add": # Sensitivity is different for add vs clip
sens2 = (
grad_norm
* smooth_param
/ (
(lrp.num_samples * phess) * min_eval**2
+ smooth_param * min_eval
)
)
noise2 = np.random.normal(scale=sens2 * std_hess, size=lrp.dim)
return (
w_cur
- np.linalg.solve(
hess_cur + min_eval * np.eye(len(hess_cur)), noisy_grad
)
+ noise2
)
# type_reg=clip
min_eval_c = max(min_eval, 1 / ((lrp.num_samples * phess)))
sens2 = (
grad_norm
* smooth_param
/ (
(lrp.num_samples * phess) * min_eval_c**2
- smooth_param * min_eval_c
)
)
noise2 = np.random.normal(scale=sens2 * std_hess, size=lrp.dim)
eval_hess, evec_hess = np.linalg.eigh(hess_cur)
eval_trunc = eval_hess[eval_hess >= min_eval_c]
num_eig = len(eval_trunc)
if num_eig == 0:
hess_modified_inv = 1 / min_eval_c * np.eye(lrp.dim)
else:
evec_trun = evec_hess[:, -num_eig:]
hess_modified_inv = np.dot(
evec_trun * (1 / eval_trunc - 1 / min_eval_c * np.ones(num_eig)),
evec_trun.T,
) + 1 / min_eval_c * np.eye(lrp.dim)
return w_cur - (hess_modified_inv @ noisy_grad) + noise2

View file

@ -0,0 +1,47 @@
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
"""print the achievable error of different algorithms"""
# pylint: skip-file
# pyformat: disable
import json
import os
import numpy as np
RESULTS_PATH = './src/results/'
excess_loss = {}
opt_algs = [
'DPGD',
'DN-Hess-add',
'DN-UB-add',
'DN-Hess-clip',
'DN-UB-clip',
'private-newton',
]
for filename in os.listdir(RESULTS_PATH):
f = os.path.join(RESULTS_PATH, filename)
with open(f, encoding='utf-8') as json_file:
data = json.load(json_file)
for alg in data.keys():
if alg in opt_algs:
loss_avg = np.array(data[alg]['loss_avg'])
loss_std = np.array(data[alg]['loss_std'])
clock_time = np.array(data[alg]['clock_time_avg'])
print('optimization algorithm: ', alg)
print('excess loss: ' + str(loss_avg[-1]))
print('run time: ' + str(clock_time[-1]) + '(sec)')
print('-----')

View file

@ -0,0 +1,264 @@
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
"""collections of helper function to run and compare different algorithms"""
# pylint: skip-file
# pyformat: disable
import argparse
import json
from dataset_loader import Mydatasets
from my_logistic_regression import MyLogisticRegression
import numpy as np
from opt_algs import CompareAlgs, DoubleNoiseMech, gd_priv, private_newton
from scipy.optimize import fsolve
def zcdp_to_eps(rho, delta):
""" "
conversion of zcdp gurantee to (eps,delta)-DP using the formula in Lemma 3.6
of [BS16]
rho : zCDP
delta: delta in DP
return eps
"""
return rho + np.sqrt(4 * rho * np.log(np.sqrt(np.pi * rho) / delta))
def eps_to_zcdp(eps, delta):
""" "
conversion of (eps,delta) gurantee to rho-zCDP
eps : eps in DP
delta: delta in DP
return rho
"""
def func_root(rho_zcdp):
return zcdp_to_eps(rho_zcdp, delta) - eps
root = fsolve(func_root, x0=0.001)[-1]
return root
def helper_fun(datasetname, alg_type, params_exp):
"""helper function for running different algorithms
args:
datasetname = dataset
alg_type = type of the optimization algorithm
params_exp = hyperparameters
"""
feature_vecs, labels, w_opt = getattr(Mydatasets(), datasetname)()
privacy_dp = params_exp["total"]
params_exp["total"] = eps_to_zcdp(privacy_dp, (1.0 / len(labels)) ** 2)
log_reg = MyLogisticRegression(feature_vecs, labels)
alg_dict, filename_params = prepare_alg_dict(
alg_type, datasetname, privacy_dp, params_exp, log_reg
)
compare_algs = CompareAlgs(log_reg, w_opt, params_exp)
result = RunReleaseStats(compare_algs, alg_dict).summarize_stats()
result["num-samples"] = len(labels)
with open(
"src/results/" + filename_params, "w", encoding="utf8"
) as json_file:
json.dump(result, json_file)
def prepare_alg_dict(alg_type, datasetname, privacy_dp, params_exp, log_reg):
"""prepare update rule for algorithms and filename"""
alg_dict = None
filename_params = None
if alg_type == "double_noise":
filename_params = (
"so_"
+ datasetname
+ "_"
+ str(privacy_dp)
+ "_"
+ "DP"
+ "_"
+ str(params_exp["num_iteration"])
+ "_"
+ str(params_exp["grad_frac"])
+ "_"
+ str(params_exp["trace_frac"])
+ "_"
+ str(params_exp["trace_coeff"])
+ ".txt"
)
dnm_hess_add = DoubleNoiseMech(
log_reg, type_reg="add", curvature_info="hessian"
).update_rule
dnm_ub_add = DoubleNoiseMech(
log_reg, type_reg="add", curvature_info="ub"
).update_rule
dnm_hess_clip = DoubleNoiseMech(
log_reg, type_reg="clip", curvature_info="hessian"
).update_rule
dnm_ub_clip = DoubleNoiseMech(
log_reg, type_reg="clip", curvature_info="ub"
).update_rule
alg_dict = {
"DN-Hess-add": dnm_hess_add,
"DN-Hess-clip": dnm_hess_clip,
"DN-UB-clip": dnm_ub_clip,
"DN-UB-add": dnm_ub_add,
}
elif alg_type == "dp_gd":
filename_params = (
"gd_"
+ datasetname
+ "_"
+ str(privacy_dp)
+ "_"
+ "DP"
+ "_"
+ str(params_exp["num_iteration"])
+ ".txt"
)
alg_dict = {"DPGD": gd_priv}
elif alg_type == "damped_newton":
filename_params = (
"newton_"
+ datasetname
+ "_"
+ str(privacy_dp)
+ "_"
+ "DP"
+ "_"
+ str(params_exp["num_iteration"])
+ ".txt"
)
alg_dict = {"private-newton": private_newton}
return alg_dict, filename_params
class RunReleaseStats:
"""Helpfer function to run different algorithms and store the results"""
def __init__(self, compare_algs, algs_dict, num_rep=10):
self.compare_algs = compare_algs
self.algs_dict = algs_dict
self.num_rep = num_rep
self.losses = 0
self.gradnorm = 0
self.accuracy = 0
self.wall_clock = 0
def run_algs(self):
"""method to run different algorithms and store different stats"""
for rep in range(self.num_rep):
for alg_name, alg_update_rule in self.algs_dict.items():
self.compare_algs.add_algo(alg_update_rule, alg_name)
losses_dict = self.compare_algs.loss_vals()
gradnorm_dict = self.compare_algs.gradnorm_vals()
accuracy_dict = self.compare_algs.accuracy_vals()
wall_clock_dict = self.compare_algs.wall_clock_alg()
if rep == 0:
self.losses = losses_dict
self.gradnorm = gradnorm_dict
self.accuracy = accuracy_dict
self.wall_clock = wall_clock_dict
else:
for alg in self.losses:
self.losses[alg].extend(losses_dict[alg])
self.gradnorm[alg].extend(gradnorm_dict[alg])
self.accuracy[alg].extend(accuracy_dict[alg])
self.wall_clock[alg].extend(wall_clock_dict[alg])
def summarize_stats(self):
"""method to summarize the results"""
self.run_algs()
result = {}
result["acc-best"] = self.compare_algs.accuracy_np().tolist()
for alg in self.losses:
result[alg] = {}
loss_avg = np.mean(np.array(self.losses[alg]), axis=0)
loss_std = np.std(np.array(self.losses[alg]), axis=0)
result[alg]["loss_avg"] = (loss_avg).tolist()
result[alg]["loss_std"] = (loss_std / np.sqrt(self.num_rep)).tolist()
gradnorm_avg = np.mean(np.array(self.gradnorm[alg]), axis=0)
gradnorm_std = np.std(np.array(self.gradnorm[alg]), axis=0)
result[alg]["gradnorm_avg"] = (gradnorm_avg).tolist()
result[alg]["gradnorm_std"] = (gradnorm_std).tolist()
acc_avg = np.mean(np.array(self.accuracy[alg]), axis=0)
acc_std = np.std(np.array(self.accuracy[alg]), axis=0)
result[alg]["acc_avg"] = (acc_avg).tolist()
result[alg]["acc_std"] = (acc_std / np.sqrt(self.num_rep)).tolist()
clocktime_avg = np.mean(np.array(self.wall_clock[alg]), axis=0)
clocktime_std = np.std(np.array(self.wall_clock[alg]), axis=0)
result[alg]["clock_time_avg"] = (clocktime_avg).tolist()
result[alg]["clock_time_std"] = (
clocktime_std / np.sqrt(self.num_rep)
).tolist()
return result
def main():
"""main function"""
parser = argparse.ArgumentParser()
parser.add_argument("--datasetname")
parser.add_argument("--alg_type")
parser.add_argument("--total")
parser.add_argument("--numiter")
# double noise and newton
parser.add_argument("--grad_frac")
parser.add_argument("--trace_frac")
parser.add_argument("--trace_coeff")
args = parser.parse_args()
datasetname = args.datasetname
alg_type = args.alg_type
total = float(args.total)
num_iter = int(args.numiter)
if alg_type == "double_noise":
grad_frac = float(args.grad_frac)
trace_frac = float(args.trace_frac)
trace_coeff = float(args.trace_coeff)
hyper_parameters = {
"total": total,
"grad_frac": grad_frac,
"trace_frac": trace_frac,
"trace_coeff": trace_coeff,
"num_iteration": num_iter,
}
elif alg_type == "dp_gd":
hyper_parameters = {"total": total, "num_iteration": num_iter}
elif alg_type == "damped_newton":
grad_frac = float(args.grad_frac)
hyper_parameters = {
"total": total,
"num_iteration": num_iter,
"grad_frac": grad_frac,
}
else:
raise ValueError("no such optmization algorithm exists")
print(
"optimization algorithm "
+ alg_type
+ ","
+ "dataset name: "
+ datasetname
)
helper_fun(datasetname, alg_type, hyper_parameters)
if __name__ == "__main__":
main()