COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/privacy/pull/489 from mhaghifam:dp-second-order-optimization 024904810a8f130d554cc3f04713d5562ccfe5df
PiperOrigin-RevId: 547312570
This commit is contained in:
parent
6b8007ddde
commit
cb6659d11b
8 changed files with 1246 additions and 1238 deletions
32
research/dp_newton/README.md
Normal file
32
research/dp_newton/README.md
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
# Project Title
|
||||||
|
|
||||||
|
Faster Differentially Private Convex Optimization via Second-Order Methods
|
||||||
|
https://arxiv.org/abs/2112.03570 <br>
|
||||||
|
by Arun Ganesh, Mahdi Haghifam, Thomas Steinke, Abhradeep Thakurta.
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Implementation of the optimizatoin algorithms proposed in
|
||||||
|
https://arxiv.org/abs/2112.03570 <br>
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
You will need to install fairly standard dependencies
|
||||||
|
|
||||||
|
run 'run_privacy_utility' to compare the convergence speed and excess loss of
|
||||||
|
different algorithms.
|
||||||
|
|
||||||
|
### Citation
|
||||||
|
|
||||||
|
You can cite this paper with
|
||||||
|
|
||||||
|
```
|
||||||
|
@article{ganesh2023faster,
|
||||||
|
title={Faster Differentially Private Convex Optimization
|
||||||
|
via Second-Order Methods},
|
||||||
|
author={Ganesh, Arun and Haghifam, Mahdi and Steinke, Thomas
|
||||||
|
and Thakurta, Abhradeep},
|
||||||
|
journal={arXiv preprint arXiv:2305.13209},
|
||||||
|
year={2023}
|
||||||
|
}
|
||||||
|
```
|
File diff suppressed because it is too large
Load diff
28
research/dp_newton/run_privacy_utility
Normal file
28
research/dp_newton/run_privacy_utility
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
rm -rf ./src/results
|
||||||
|
mkdir -p ./src/results
|
||||||
|
rm -rf ./src/datasets_directory
|
||||||
|
mkdir -p ./src/datasets_directory
|
||||||
|
dataset="protein_dataset" # 'a1a_dataset', 'synthetic_dataset', 'fmnist_dataset'
|
||||||
|
privacy_budget="3.0" # epsilon in DP
|
||||||
|
num_iteration_GD="100" # number of iterations for DP-GD
|
||||||
|
num_iteration_NT="15" # number of iterations for damped newton
|
||||||
|
num_iteration_our="15" # number of iterations for double noise (proposed method)
|
||||||
|
$HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'dp_gd' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_GD
|
||||||
|
$HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'damped_newton' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_NT --grad_frac $"0.7"
|
||||||
|
$HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'double_noise' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_our --grad_frac $"0.7" --trace_frac $"0.1" --trace_coeff $"0.5"
|
||||||
|
$HOME/google-code/dpoptVenv/bin/python3 ./src/print_results.py
|
236
research/dp_newton/src/dataset_loader.py
Normal file
236
research/dp_newton/src/dataset_loader.py
Normal file
|
@ -0,0 +1,236 @@
|
||||||
|
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
"""dataset loader"""
|
||||||
|
|
||||||
|
# pylint: skip-file
|
||||||
|
# pyformat: disable
|
||||||
|
|
||||||
|
import os
|
||||||
|
import ssl
|
||||||
|
import tarfile
|
||||||
|
import urllib.request
|
||||||
|
from my_logistic_regression import MyLogisticRegression
|
||||||
|
import numpy as np
|
||||||
|
import requests
|
||||||
|
from sklearn import preprocessing
|
||||||
|
import sklearn.datasets
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
import torch
|
||||||
|
from torchvision import datasets, transforms
|
||||||
|
|
||||||
|
|
||||||
|
PATH_PREFIX = './src/datasets_directory'
|
||||||
|
ssl._create_default_https_context = ssl._create_unverified_context
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_fvec(x_train):
|
||||||
|
"""normalize feature vectors"""
|
||||||
|
feature_mean = np.mean(x_train, axis=0)
|
||||||
|
feature_std = np.std(x_train, axis=0)
|
||||||
|
x_train = (x_train - feature_mean) / feature_std
|
||||||
|
return x_train
|
||||||
|
|
||||||
|
|
||||||
|
def backtracking_ls(lrp, dir_srch, w_start, alpha=0.4, beta=0.95):
|
||||||
|
"""Implementation of backtracking line search
|
||||||
|
|
||||||
|
lr = logistic regression
|
||||||
|
dir = the "noisy" gradient direction
|
||||||
|
w_start = current point
|
||||||
|
alpha and beta tradeoff the precision and complexity of the linesearch
|
||||||
|
|
||||||
|
output is an (close to) optimal stepsize
|
||||||
|
"""
|
||||||
|
step_size = 100
|
||||||
|
val_0 = lrp.loss(w_start)
|
||||||
|
inner_prod = np.dot(dir_srch, lrp.grad(w_start))
|
||||||
|
while (
|
||||||
|
lrp.loss(w_start - step_size * dir_srch)
|
||||||
|
>= val_0 - step_size * alpha * inner_prod
|
||||||
|
):
|
||||||
|
step_size = beta * step_size
|
||||||
|
if step_size < 1e-6:
|
||||||
|
break
|
||||||
|
return step_size
|
||||||
|
|
||||||
|
|
||||||
|
def newton(dataset, w_init, bias=True):
|
||||||
|
"""Implementation of the newton method with linesearch without privacy constraints
|
||||||
|
|
||||||
|
dataset = dataset
|
||||||
|
w_init = initialization point
|
||||||
|
|
||||||
|
output is the model parameter
|
||||||
|
"""
|
||||||
|
feature_vecs, labels = dataset
|
||||||
|
if bias is True:
|
||||||
|
feature_vecs = np.hstack(
|
||||||
|
(np.ones(shape=(np.shape(feature_vecs)[0], 1)), feature_vecs)
|
||||||
|
)
|
||||||
|
lrp = MyLogisticRegression(feature_vecs, labels, reg=1e-9)
|
||||||
|
w_cur = w_init
|
||||||
|
for _ in range(8):
|
||||||
|
hess = lrp.hess(w_cur)
|
||||||
|
dir_srch = np.linalg.solve(hess, lrp.grad_wor(w_cur))
|
||||||
|
step_size = backtracking_ls(lrp, dir_srch, w_cur)
|
||||||
|
w_cur = w_cur - step_size * dir_srch
|
||||||
|
if lrp.loss_wor(w_cur) < lrp.loss_wor(w_init):
|
||||||
|
w_out = w_cur
|
||||||
|
else:
|
||||||
|
w_out = w_init
|
||||||
|
return w_out
|
||||||
|
|
||||||
|
|
||||||
|
class Mydatasets:
|
||||||
|
"""Represents datasets we use for expriments"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
data_dir = PATH_PREFIX + '/data'
|
||||||
|
cache_dir = PATH_PREFIX + '/cache_datasets'
|
||||||
|
if not os.path.exists(data_dir):
|
||||||
|
os.mkdir(data_dir)
|
||||||
|
if not os.path.exists(cache_dir):
|
||||||
|
os.mkdir(cache_dir)
|
||||||
|
|
||||||
|
def find_optimal_classifier(self, dataset, bias=True):
|
||||||
|
"""find the optimal weight vector for the logistic regression
|
||||||
|
|
||||||
|
for the problems with real datasets.
|
||||||
|
|
||||||
|
dataset = training dataset
|
||||||
|
bias = bias for the logistic model
|
||||||
|
"""
|
||||||
|
inputs_vec, labels = dataset
|
||||||
|
reg = 1e-9
|
||||||
|
if bias is True:
|
||||||
|
model_lr = LogisticRegression(max_iter=200, C=1 / reg).fit(
|
||||||
|
inputs_vec, labels
|
||||||
|
)
|
||||||
|
w_opt1 = np.concatenate([model_lr.intercept_, np.squeeze(model_lr.coef_)])
|
||||||
|
w_opt = newton(dataset, w_opt1, bias)
|
||||||
|
else:
|
||||||
|
model_lr = LogisticRegression(
|
||||||
|
max_iter=200, fit_intercept=False, C=1 / reg
|
||||||
|
).fit(inputs_vec, labels)
|
||||||
|
w_opt1 = np.squeeze(model_lr.coef_)
|
||||||
|
w_opt = newton(dataset, w_opt1, bias)
|
||||||
|
return w_opt
|
||||||
|
|
||||||
|
def fmnist_dataset(self):
|
||||||
|
"""fmnist dataset"""
|
||||||
|
transform_data = transforms.Compose(
|
||||||
|
[transforms.ToTensor(), transforms.Normalize((0.5), (0.5))]
|
||||||
|
)
|
||||||
|
train_data_trans = datasets.FashionMNIST(
|
||||||
|
root=PATH_PREFIX + '/data',
|
||||||
|
download=True,
|
||||||
|
train=True,
|
||||||
|
transform=transform_data,
|
||||||
|
)
|
||||||
|
train_loader = torch.utils.data.DataLoader(
|
||||||
|
train_data_trans, batch_size=len(train_data_trans)
|
||||||
|
)
|
||||||
|
x_train = next(iter(train_loader))[0].numpy()
|
||||||
|
x_train = x_train.reshape(len(x_train), -1)
|
||||||
|
y_train = next(iter(train_loader))[1].numpy()
|
||||||
|
label0 = 0
|
||||||
|
label1 = 3
|
||||||
|
indx0 = np.nonzero(y_train == label0)[0]
|
||||||
|
indx1 = np.nonzero(y_train == label1)[0]
|
||||||
|
labels = y_train.copy()
|
||||||
|
labels[indx0] = -1
|
||||||
|
labels[indx1] = 1
|
||||||
|
indx = np.concatenate((indx0, indx1))
|
||||||
|
x_train = x_train[indx]
|
||||||
|
labels = labels[indx]
|
||||||
|
dataset = x_train, labels
|
||||||
|
w_opt = self.find_optimal_classifier(dataset, bias=False)
|
||||||
|
return x_train, labels, w_opt
|
||||||
|
|
||||||
|
def a1a_dataset(self):
|
||||||
|
"""a1a dataset"""
|
||||||
|
a1a_url = (
|
||||||
|
'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a1a.t'
|
||||||
|
)
|
||||||
|
data_path = PATH_PREFIX + '/data/a1a'
|
||||||
|
if not os.path.exists(data_path):
|
||||||
|
_ = urllib.request.urlretrieve(a1a_url, data_path)
|
||||||
|
data = sklearn.datasets.load_svmlight_file(data_path)
|
||||||
|
inputs_vec, labels = data[0], data[1]
|
||||||
|
inputs_vec = inputs_vec.toarray()
|
||||||
|
scaler = preprocessing.StandardScaler().fit(inputs_vec)
|
||||||
|
inputs_vec = scaler.transform(inputs_vec)
|
||||||
|
labels = labels.astype(float)
|
||||||
|
dataset = inputs_vec, labels
|
||||||
|
w_opt = self.find_optimal_classifier(dataset)
|
||||||
|
inputs_vec = np.hstack(
|
||||||
|
(np.ones(shape=(np.shape(inputs_vec)[0], 1)), inputs_vec)
|
||||||
|
)
|
||||||
|
return inputs_vec, labels, w_opt
|
||||||
|
|
||||||
|
def protein_dataset(self):
|
||||||
|
"""protein dataset"""
|
||||||
|
path_protein = PATH_PREFIX + '/data/protein/'
|
||||||
|
if not os.path.exists(path_protein):
|
||||||
|
os.mkdir(path_protein)
|
||||||
|
protein_url = (
|
||||||
|
'https://kdd.org/cupfiles/KDDCupData/2004/data_kddcup04.tar.gz'
|
||||||
|
)
|
||||||
|
protein_file = PATH_PREFIX + '/data/protein/data_kddcup04.tar.gz'
|
||||||
|
response = requests.get(protein_url, stream=True, timeout=100)
|
||||||
|
if response.status_code == 200:
|
||||||
|
with open(protein_file, 'wb') as file_data:
|
||||||
|
file_data.write(response.raw.read())
|
||||||
|
with tarfile.open(protein_file, 'r:gz') as tar:
|
||||||
|
tar.extractall(path_protein)
|
||||||
|
x_train = np.loadtxt(PATH_PREFIX + '/data/protein/bio_train.dat')[:, 3:]
|
||||||
|
y_train = np.loadtxt(PATH_PREFIX + '/data/protein/bio_train.dat')[:, 2]
|
||||||
|
indx0 = np.nonzero(y_train == 0)[0]
|
||||||
|
indx1 = np.nonzero(y_train == 1)[0]
|
||||||
|
labels = y_train.copy()
|
||||||
|
labels[indx0] = -1
|
||||||
|
labels[indx1] = 1
|
||||||
|
indx = np.arange(len(x_train))
|
||||||
|
np.random.seed(3000)
|
||||||
|
indx_sample = np.random.choice(indx, 50000, replace=False)
|
||||||
|
np.random.seed(None)
|
||||||
|
x_train = x_train[indx_sample]
|
||||||
|
labels = labels[indx_sample]
|
||||||
|
x_train = normalize_fvec(x_train)
|
||||||
|
w_opt = self.find_optimal_classifier((x_train, labels))
|
||||||
|
x_train = np.hstack((np.ones(shape=(np.shape(x_train)[0], 1)), x_train))
|
||||||
|
return x_train, labels, w_opt
|
||||||
|
|
||||||
|
def synthetic_dataset(self, num_samples=10000, dim=100):
|
||||||
|
"""Generates a synthetic dataset for logistic regression.
|
||||||
|
|
||||||
|
n = number of samples d = dimension Features are unit vectors (by default
|
||||||
|
uniformly random). Labels are sampled from logistic distribution, so w is
|
||||||
|
the "true" solution.
|
||||||
|
"""
|
||||||
|
mean = np.zeros(dim)
|
||||||
|
cov = np.eye(dim)
|
||||||
|
inputs_vec_un = np.random.multivariate_normal(mean, cov, num_samples)
|
||||||
|
nrm = np.linalg.norm(inputs_vec_un, axis=1)
|
||||||
|
inputs_vec = inputs_vec_un * 1 / nrm[:, None]
|
||||||
|
w_star = np.ones(dim)
|
||||||
|
w_star[0] = 1
|
||||||
|
inner_prod = np.dot(inputs_vec, w_star)
|
||||||
|
params = np.exp(inner_prod) / (1 + np.exp(inner_prod))
|
||||||
|
labels = 2 * np.random.binomial(1, params) - 1
|
||||||
|
dataset = inputs_vec, labels
|
||||||
|
w_opt = self.find_optimal_classifier(dataset, bias=False)
|
||||||
|
return inputs_vec, labels, w_opt
|
205
research/dp_newton/src/my_logistic_regression.py
Normal file
205
research/dp_newton/src/my_logistic_regression.py
Normal file
|
@ -0,0 +1,205 @@
|
||||||
|
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
"""logistic regression class and its methods"""
|
||||||
|
|
||||||
|
# pylint: skip-file
|
||||||
|
# pyformat: disable
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class MyLogisticRegression:
|
||||||
|
"""return a logistic regression problem
|
||||||
|
|
||||||
|
There is a dataset consisting of features (vectors of norm <=1)
|
||||||
|
and labels (+1,-1), represented as a numpy array.
|
||||||
|
There is also an L2 regularizer.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, input_vecs, labels, reg=1e-8):
|
||||||
|
"""Initialize the data and the regularizer.
|
||||||
|
|
||||||
|
X = n x d numpy array representing features
|
||||||
|
y = n x 1 numpy array representing labels
|
||||||
|
reg = L2 regularizing coefficient (to ensure solution is finite)
|
||||||
|
|
||||||
|
Data will be rescaled so that ||X[i,:]|| * |y[i]| <= 1 for all i.
|
||||||
|
"""
|
||||||
|
self.reg = float(reg)
|
||||||
|
input_vecs = np.array(input_vecs)
|
||||||
|
labels = np.array(labels)
|
||||||
|
assert len(input_vecs.shape) == 2
|
||||||
|
assert len(labels.shape) == 1
|
||||||
|
self.input_vecs = input_vecs
|
||||||
|
self.labels = labels
|
||||||
|
self.num_samples, self.dim = input_vecs.shape
|
||||||
|
assert labels.shape[0] == self.num_samples
|
||||||
|
signed_data = input_vecs * labels[:, np.newaxis]
|
||||||
|
norm = np.linalg.norm(signed_data, axis=1)
|
||||||
|
scale = np.maximum(norm, np.ones_like(norm))
|
||||||
|
self.data = (1 / scale[:, None]) * signed_data
|
||||||
|
|
||||||
|
def loss(self, param):
|
||||||
|
"""Computes the loss represented by this object at w.
|
||||||
|
|
||||||
|
If X,y is the data and reg is the regularizer, then the loss is (1/n)sum_i^n
|
||||||
|
log(1+exp(-<w,X[i,:]*y[i]>)) + (reg/2)||w||^2
|
||||||
|
"""
|
||||||
|
data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))
|
||||||
|
reg_loss = 0.5 * self.reg * np.linalg.norm(param) ** 2
|
||||||
|
return data_loss + reg_loss
|
||||||
|
|
||||||
|
def loss_wor(self, param):
|
||||||
|
"""Computes the loss represented by this object at w without regularizer.
|
||||||
|
|
||||||
|
If X,y is the data and reg is the regularizer, then the loss is
|
||||||
|
(1/n)sum_i^n log(1+exp(-<w,X[i,:]*y[i]>))
|
||||||
|
"""
|
||||||
|
data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))
|
||||||
|
return data_loss
|
||||||
|
|
||||||
|
def accuracy(self, param):
|
||||||
|
""" " computes the accuracy of the model gievn by w"""
|
||||||
|
score_pred = np.dot(self.input_vecs, param)
|
||||||
|
label1_prob = np.where(
|
||||||
|
score_pred >= 0,
|
||||||
|
1 / (1 + np.exp(-score_pred)),
|
||||||
|
np.exp(score_pred) / (1 + np.exp(score_pred)),
|
||||||
|
)
|
||||||
|
return np.mean(np.where(label1_prob >= 0.5, 1, -1) == self.labels)
|
||||||
|
|
||||||
|
def grad(self, param, batch_idx=None):
|
||||||
|
"""Computes the gradient of the logistic regression at a given point w.
|
||||||
|
|
||||||
|
If X,y is the data and reg is the regularizer, then the gradient is
|
||||||
|
(-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
|
||||||
|
"""
|
||||||
|
if batch_idx is not None:
|
||||||
|
data_batch = self.data[batch_idx]
|
||||||
|
else:
|
||||||
|
data_batch = self.data
|
||||||
|
|
||||||
|
coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))
|
||||||
|
data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)
|
||||||
|
return data_grad + self.reg * param
|
||||||
|
|
||||||
|
def grad_wor(self, param, batch_idx=None):
|
||||||
|
"""Computes the gradient of the logistic regression at a given point w.
|
||||||
|
|
||||||
|
If X,y is the data and reg is the regularizer, then the gradient is
|
||||||
|
(-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
|
||||||
|
"""
|
||||||
|
if batch_idx is not None:
|
||||||
|
data_batch = self.data[batch_idx]
|
||||||
|
else:
|
||||||
|
data_batch = self.data
|
||||||
|
|
||||||
|
coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))
|
||||||
|
data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)
|
||||||
|
return data_grad
|
||||||
|
|
||||||
|
def hess(self, param, batch_idx=None):
|
||||||
|
"""Computes the Hessian of the logistic regression at a given point w.
|
||||||
|
|
||||||
|
The Hessian is the matrix of second derivatives.
|
||||||
|
|
||||||
|
If X,y is the data and reg is the regularizer, then the Hessian is
|
||||||
|
(1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
|
||||||
|
where we assume y[i]^2==1.
|
||||||
|
"""
|
||||||
|
if batch_idx is not None:
|
||||||
|
data_batch = self.data[batch_idx]
|
||||||
|
batch_size = len(batch_idx)
|
||||||
|
else:
|
||||||
|
data_batch = self.data
|
||||||
|
batch_size = self.num_samples
|
||||||
|
|
||||||
|
temp_var = np.dot(data_batch, param) / 2
|
||||||
|
coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2
|
||||||
|
raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)
|
||||||
|
return raw_hess / batch_size + self.reg * np.eye(self.dim)
|
||||||
|
|
||||||
|
def hess_wor(self, param, batch_idx=None):
|
||||||
|
"""Computes the Hessian of the logistic regression at a given point w.
|
||||||
|
|
||||||
|
The Hessian is the matrix of second derivatives.
|
||||||
|
|
||||||
|
If X,y is the data, then the Hessian is
|
||||||
|
(1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
|
||||||
|
where we assume y[i]^2==1.
|
||||||
|
"""
|
||||||
|
if batch_idx is not None:
|
||||||
|
data_batch = self.data[batch_idx]
|
||||||
|
batch_size = len(batch_idx)
|
||||||
|
else:
|
||||||
|
data_batch = self.data
|
||||||
|
batch_size = self.num_samples
|
||||||
|
|
||||||
|
temp_var = np.dot(data_batch, param) / 2
|
||||||
|
coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2
|
||||||
|
raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)
|
||||||
|
return raw_hess / batch_size
|
||||||
|
|
||||||
|
def upperbound(self, param, batch_idx=None):
|
||||||
|
"""Tightest universal quadratic upper bound on the loss function.
|
||||||
|
|
||||||
|
log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
|
||||||
|
Constant and linear terms are just first-order Taylor This function gives
|
||||||
|
the quadratic term (which replaces the Hessian)
|
||||||
|
https://twitter.com/shortstein/status/1557961202256318464
|
||||||
|
"""
|
||||||
|
|
||||||
|
if batch_idx is not None:
|
||||||
|
data_batch = self.data[batch_idx]
|
||||||
|
batch_size = len(batch_idx)
|
||||||
|
else:
|
||||||
|
data_batch = self.data
|
||||||
|
batch_size = self.num_samples
|
||||||
|
|
||||||
|
temp_var = -np.dot(data_batch, param) # vector of y_i<x_i,w> for i in [n]
|
||||||
|
# v = 0.5*np.tanh(a/2)/a, but, avoid 0/0 by special rule
|
||||||
|
temp_var2 = np.divide(
|
||||||
|
0.5 * np.tanh(temp_var / 2),
|
||||||
|
temp_var,
|
||||||
|
out=np.ones(temp_var.shape) * 0.25,
|
||||||
|
where=np.abs(temp_var) > 1e-9,
|
||||||
|
)
|
||||||
|
hess_non = np.dot(data_batch.T * temp_var2, data_batch)
|
||||||
|
return hess_non / batch_size + self.reg * np.eye(self.dim)
|
||||||
|
|
||||||
|
def upperbound_wor(self, param, batch_idx=None):
|
||||||
|
"""Tightest universal quadratic upper bound on the loss function.
|
||||||
|
|
||||||
|
log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
|
||||||
|
Constant and linear terms are just first-order Taylor This function gives
|
||||||
|
the quadratic term (which replaces the Hessian)
|
||||||
|
"""
|
||||||
|
if batch_idx is not None:
|
||||||
|
data_batch = self.data[batch_idx]
|
||||||
|
batch_size = len(batch_idx)
|
||||||
|
else:
|
||||||
|
data_batch = self.data
|
||||||
|
batch_size = self.num_samples
|
||||||
|
|
||||||
|
temp_var = -np.dot(data_batch, param) # vector of y_i<x_i,w> for i in [n]
|
||||||
|
temp_var2 = np.divide(
|
||||||
|
0.5 * np.tanh(temp_var / 2),
|
||||||
|
temp_var,
|
||||||
|
out=np.ones(temp_var.shape) * 0.25,
|
||||||
|
where=np.abs(temp_var) > 1e-9,
|
||||||
|
)
|
||||||
|
hess_non = np.dot(data_batch.T * temp_var2, data_batch)
|
||||||
|
return hess_non / batch_size
|
434
research/dp_newton/src/opt_algs.py
Normal file
434
research/dp_newton/src/opt_algs.py
Normal file
|
@ -0,0 +1,434 @@
|
||||||
|
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
"""file containing all auxillary functions for running the optimization algorithms"""
|
||||||
|
|
||||||
|
# pylint: skip-file
|
||||||
|
# pyformat: disable
|
||||||
|
|
||||||
|
import time
|
||||||
|
from my_logistic_regression import MyLogisticRegression
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class CompareAlgs:
|
||||||
|
"""Class to run multiple iterative algorithms and compare the results."""
|
||||||
|
|
||||||
|
def __init__(self, lrp, optimal_w, hyper_dict):
|
||||||
|
"""Initialize the problem.
|
||||||
|
|
||||||
|
lr = an instance of MyLogisticRegression
|
||||||
|
dataset = dataset in the format of (features,label)
|
||||||
|
optimal_w = optimal minimizer of logistic loss on dataset without privacy
|
||||||
|
pb = hyperparameters
|
||||||
|
"""
|
||||||
|
self.w_opt = optimal_w
|
||||||
|
self.lrp = lrp
|
||||||
|
self.iters = hyper_dict["num_iteration"]
|
||||||
|
self.hyper_params = hyper_dict
|
||||||
|
self.clock_time = []
|
||||||
|
self.params = []
|
||||||
|
self.names = []
|
||||||
|
|
||||||
|
def add_algo(self, update_rule, name):
|
||||||
|
"""Run an iterative update method"""
|
||||||
|
_, dim = self.lrp.num_samples, self.lrp.dim
|
||||||
|
wint_un = np.random.multivariate_normal(np.zeros(dim), np.eye(dim))
|
||||||
|
w_int = wint_un / np.linalg.norm(wint_un)
|
||||||
|
cutoff_norm = (
|
||||||
|
100 * np.linalg.norm(self.w_opt) + 100 * np.linalg.norm(w_int) + 100
|
||||||
|
)
|
||||||
|
w_cur = w_int
|
||||||
|
params = [w_cur]
|
||||||
|
start_t = time.time()
|
||||||
|
wall_clock = [0]
|
||||||
|
for _ in range(self.iters):
|
||||||
|
w_cur = update_rule(w_cur, self.lrp, self.hyper_params)
|
||||||
|
if np.linalg.norm(w_cur) > cutoff_norm:
|
||||||
|
w_cur = w_int
|
||||||
|
print("Stop Things Exploding!")
|
||||||
|
params.append(w_cur)
|
||||||
|
wall_clock.append(time.time() - start_t)
|
||||||
|
self.clock_time.append(wall_clock)
|
||||||
|
self.params.append(params)
|
||||||
|
self.names.append(name)
|
||||||
|
|
||||||
|
def wall_clock_alg(self):
|
||||||
|
"""compute the wall clock of different algorithms"""
|
||||||
|
clock_time_dict = {}
|
||||||
|
for time_alg, name in zip(self.clock_time, self.names):
|
||||||
|
clock_time_dict[name] = [time_alg]
|
||||||
|
return clock_time_dict
|
||||||
|
|
||||||
|
def loss_vals(self):
|
||||||
|
"""output the loss per iteration for different methods"""
|
||||||
|
baseline = self.lrp.loss_wor(self.w_opt)
|
||||||
|
loss_dict = {}
|
||||||
|
for params, name in zip(self.params, self.names):
|
||||||
|
losses = [self.lrp.loss_wor(w) - baseline for w in params]
|
||||||
|
loss_dict[name] = [losses]
|
||||||
|
return loss_dict
|
||||||
|
|
||||||
|
def accuracy_vals(self):
|
||||||
|
"""output the accuracy per iteration for different methods"""
|
||||||
|
acc_dict = {}
|
||||||
|
for params, name in zip(self.params, self.names):
|
||||||
|
acc_vec = [self.lrp.accuracy(w) for w in params]
|
||||||
|
acc_dict[name] = [acc_vec]
|
||||||
|
return acc_dict
|
||||||
|
|
||||||
|
def accuracy_np(self):
|
||||||
|
"""output the accuracy of the optimal model without privacy"""
|
||||||
|
return self.lrp.accuracy(self.w_opt)
|
||||||
|
|
||||||
|
def gradnorm_vals(self):
|
||||||
|
"""output the gradient norm per iteration for different methods"""
|
||||||
|
gradnorm_dict = {}
|
||||||
|
for params, name in zip(self.params, self.names):
|
||||||
|
grad_norms = [np.linalg.norm(self.lrp.grad_wor(w)) for w in params]
|
||||||
|
gradnorm_dict[name] = [grad_norms]
|
||||||
|
return gradnorm_dict
|
||||||
|
|
||||||
|
|
||||||
|
def private_newton(w_cur, lrp, hyper_dict):
|
||||||
|
"""implementation of private newton method from [ABL21]
|
||||||
|
|
||||||
|
w = current iterate
|
||||||
|
lr = an instance of MyLogisticRegression
|
||||||
|
i = the index of current iterate
|
||||||
|
iters = total number of iterations
|
||||||
|
pb = privacy budget and other info
|
||||||
|
return the next iterate
|
||||||
|
"""
|
||||||
|
total = hyper_dict["total"]
|
||||||
|
grad_frac = hyper_dict["grad_frac"]
|
||||||
|
iters = hyper_dict["num_iteration"]
|
||||||
|
hess = lrp.hess(w_cur)
|
||||||
|
rho_grad = grad_frac * total / iters # divide total privacy budget up.
|
||||||
|
rho_hess = (1 - grad_frac) * total / iters
|
||||||
|
hess_noise = np.random.normal(
|
||||||
|
scale=(0.25 / lrp.num_samples) * np.sqrt(0.5 / rho_hess),
|
||||||
|
size=(lrp.dim, lrp.dim),
|
||||||
|
)
|
||||||
|
hess_noise = (hess_noise + hess_noise.T) / 2
|
||||||
|
hess_noisy = eigenclip(hess + hess_noise)
|
||||||
|
grad = lrp.grad(w_cur)
|
||||||
|
grad_noisy = grad + np.random.normal(
|
||||||
|
scale=(1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad), size=lrp.dim
|
||||||
|
)
|
||||||
|
dir_noisy = np.linalg.solve(hess_noisy, grad_noisy)
|
||||||
|
dir_size = np.linalg.norm(np.linalg.solve(hess, grad))
|
||||||
|
return w_cur - min(np.log(1 + dir_size) * (1 / dir_size), 1) * dir_noisy
|
||||||
|
|
||||||
|
|
||||||
|
def eigenclip(sym_mat, min_eval=1e-5):
|
||||||
|
"""operation of the eigenclip
|
||||||
|
|
||||||
|
A = symmetric matrix
|
||||||
|
min_eval = minimum eigenvalue for clipping
|
||||||
|
|
||||||
|
return the modified matrix
|
||||||
|
"""
|
||||||
|
eig_val, eig_vec = np.linalg.eigh(sym_mat)
|
||||||
|
eval_mod = np.maximum(eig_val, min_eval * np.ones(eig_val.shape))
|
||||||
|
clipped_mat = np.dot(eig_vec * eval_mod, eig_vec.T)
|
||||||
|
return clipped_mat
|
||||||
|
|
||||||
|
|
||||||
|
def gd_priv(w_cur, lrp, hyper_dict):
|
||||||
|
"""Implementation of DP-GD.
|
||||||
|
|
||||||
|
w = current point
|
||||||
|
lr = logistic regression
|
||||||
|
i = iteration number
|
||||||
|
pb = auxillary information
|
||||||
|
|
||||||
|
output is the next iterate
|
||||||
|
"""
|
||||||
|
iters = hyper_dict["num_iteration"]
|
||||||
|
inv_lr_gd = 0.25 # learning rate based on the smoothness
|
||||||
|
sens = 1 / (lrp.num_samples * (inv_lr_gd)) # sensitivity
|
||||||
|
rho = hyper_dict["total"] / iters # divide total privacy budget up
|
||||||
|
noise = np.random.normal(scale=sens / np.sqrt(2 * rho), size=lrp.dim)
|
||||||
|
return w_cur - lrp.grad_wor(w_cur) / (inv_lr_gd) + noise
|
||||||
|
|
||||||
|
|
||||||
|
def sgd_priv(w_cur, lrp, hyper_dict):
|
||||||
|
"""Implementation of DP-SGD.
|
||||||
|
|
||||||
|
w = current point
|
||||||
|
lr = logistic regression
|
||||||
|
i = iteration number
|
||||||
|
pb = auxillary information
|
||||||
|
|
||||||
|
output is the next iterate
|
||||||
|
"""
|
||||||
|
batch_size = hyper_dict["batch_size"]
|
||||||
|
sigma_privacy = hyper_dict["noise_multiplier"]
|
||||||
|
lr_sgd = 4 # learning rate based on the smoothness
|
||||||
|
sample_rate = batch_size / lrp.num_samples # sampling probability
|
||||||
|
sample_vec = np.random.binomial(n=1, p=sample_rate, size=lrp.num_samples)
|
||||||
|
batch_idx = np.where(sample_vec == 1)[0] # index of batch
|
||||||
|
batch_size_t = len(batch_idx)
|
||||||
|
noise = np.random.normal(scale=sigma_privacy, size=lrp.dim)
|
||||||
|
grad_minibatch = lrp.grad_wor(
|
||||||
|
w_cur, batch_idx
|
||||||
|
) # average gradient over batch_idx
|
||||||
|
return w_cur - lr_sgd * (
|
||||||
|
batch_size_t / batch_size * grad_minibatch + noise / batch_size
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def gd_priv_optls(w_cur, lrp, hyper_dict):
|
||||||
|
"""Implementation of DP-GD with back-tracking line search !!!
|
||||||
|
|
||||||
|
this method is not private. We only use it as a baseline.
|
||||||
|
|
||||||
|
w = current point
|
||||||
|
lr = logistic regression
|
||||||
|
i = iteration number
|
||||||
|
pb = auxillary information
|
||||||
|
|
||||||
|
output is the next iterate
|
||||||
|
"""
|
||||||
|
iters = hyper_dict["num_iteration"]
|
||||||
|
rho_grad = hyper_dict["total"] / iters # divide total privacy budget up
|
||||||
|
grad_scale = (1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad)
|
||||||
|
grad_noise = np.random.normal(scale=grad_scale, size=lrp.dim)
|
||||||
|
dir_srch = lrp.grad(w_cur) + grad_noise
|
||||||
|
stepsize_opt = backtracking_ls(lrp, dir_srch, w_cur)
|
||||||
|
return w_cur - stepsize_opt * dir_srch
|
||||||
|
|
||||||
|
|
||||||
|
def backtracking_ls(lrp, dir_srch, w_start, alpha=0.4, beta=0.95):
|
||||||
|
"""Implementation of backtracking line search
|
||||||
|
|
||||||
|
lr = logistic regression
|
||||||
|
dir = the "noisy" gradient direction
|
||||||
|
w_start = current point
|
||||||
|
alpha and beta tradeoff the precision and complexity of the linesearch
|
||||||
|
|
||||||
|
output is an (close to) optimal stepsize
|
||||||
|
"""
|
||||||
|
step_size = 100
|
||||||
|
val_0 = lrp.loss(w_start)
|
||||||
|
inner_prod = np.dot(dir_srch, lrp.grad(w_start))
|
||||||
|
while (
|
||||||
|
lrp.loss(w_start - step_size * dir_srch)
|
||||||
|
>= val_0 - step_size * alpha * inner_prod
|
||||||
|
):
|
||||||
|
step_size = beta * step_size
|
||||||
|
if step_size < 1e-6:
|
||||||
|
break
|
||||||
|
return step_size
|
||||||
|
|
||||||
|
|
||||||
|
def newton(dataset, w_init, bias=True):
|
||||||
|
"""Implementation of the newton method with linesearch without privacy constraints
|
||||||
|
|
||||||
|
dataset = dataset
|
||||||
|
w_init = initialization point
|
||||||
|
|
||||||
|
output is the model parameter
|
||||||
|
"""
|
||||||
|
feature_vecs, labels = dataset
|
||||||
|
if bias is True:
|
||||||
|
feature_vecs = np.hstack(
|
||||||
|
(np.ones(shape=(np.shape(feature_vecs)[0], 1)), feature_vecs)
|
||||||
|
)
|
||||||
|
lrp = MyLogisticRegression(feature_vecs, labels, reg=1e-9)
|
||||||
|
w_cur = w_init
|
||||||
|
for _ in range(8):
|
||||||
|
hess = lrp.hess(w_cur)
|
||||||
|
dir_srch = np.linalg.solve(hess, lrp.grad_wor(w_cur))
|
||||||
|
step_size = backtracking_ls(lrp, dir_srch, w_cur)
|
||||||
|
w_cur = w_cur - step_size * dir
|
||||||
|
if lrp.loss_wor(w_cur) < lrp.loss_wor(w_init):
|
||||||
|
w_out = w_cur
|
||||||
|
else:
|
||||||
|
w_out = w_init
|
||||||
|
return w_out
|
||||||
|
|
||||||
|
|
||||||
|
class DoubleNoiseMech:
|
||||||
|
"""Our Method: Double Noise Mechanism"""
|
||||||
|
|
||||||
|
def __init__(self, lrp, type_reg="add", curvature_info="hessian"):
|
||||||
|
"""Initializer of the double noise mechanism
|
||||||
|
|
||||||
|
lr = an instance of MyLogisticRegression
|
||||||
|
type_reg = minimum eigenvalue modification type, it can be either 'add' or
|
||||||
|
'clip'
|
||||||
|
curvature_info = type of the second-order information
|
||||||
|
"""
|
||||||
|
self.type_reg = type_reg
|
||||||
|
self.curvature_info = curvature_info
|
||||||
|
if self.curvature_info == "hessian":
|
||||||
|
self.hess = lrp.hess_wor
|
||||||
|
elif self.curvature_info == "ub":
|
||||||
|
self.hess = lrp.upperbound_wor
|
||||||
|
|
||||||
|
def update_rule(self, w_cur, lrp, hyper_dict):
|
||||||
|
"""Implementation of the double noise mechanism update rule--full batch"""
|
||||||
|
noisy_grad_cur = self.noisy_grad(w_cur, lrp, hyper_dict)
|
||||||
|
w_next = self.noisy_direction(w_cur, lrp, hyper_dict, noisy_grad_cur)
|
||||||
|
return w_next
|
||||||
|
|
||||||
|
def update_rule_stochastic(self, w_cur, lrp, hyper_dict):
|
||||||
|
"""Implementation of the double noise mechanism update rule--full batch"""
|
||||||
|
noisy_grad_cur = self.noisy_grad(w_cur, lrp, hyper_dict, True)
|
||||||
|
w_next = self.noisy_direction_stochastic(
|
||||||
|
w_cur, lrp, hyper_dict, noisy_grad_cur
|
||||||
|
)
|
||||||
|
return w_next
|
||||||
|
|
||||||
|
def noisy_grad(self, w_cur, lrp, hyper_dict, batch=False):
|
||||||
|
"""computing gradient"""
|
||||||
|
if batch is False:
|
||||||
|
rho_grad = (hyper_dict["grad_frac"] * hyper_dict["total"]) / hyper_dict[
|
||||||
|
"num_iteration"
|
||||||
|
]
|
||||||
|
noise_grad = np.random.normal(
|
||||||
|
scale=(1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad), size=lrp.dim
|
||||||
|
)
|
||||||
|
noisy_grad = lrp.grad(w_cur) + noise_grad
|
||||||
|
else:
|
||||||
|
std_grad = hyper_dict["noise_multiplier_grad"]
|
||||||
|
pgrad = hyper_dict["batchsize_grad"] / lrp.num_samples
|
||||||
|
sample_vec = np.random.binomial(n=1, p=pgrad, size=lrp.num_samples)
|
||||||
|
batch_idx_grad = np.where(sample_vec == 1)[0]
|
||||||
|
grad_minibatch = lrp.grad_wor(w_cur, batch_idx_grad)
|
||||||
|
noise_grad = np.random.normal(scale=std_grad, size=lrp.dim)
|
||||||
|
noisy_grad = (
|
||||||
|
len(batch_idx_grad) / (lrp.num_samples * pgrad)
|
||||||
|
) * grad_minibatch + (noise_grad) / (lrp.num_samples * pgrad)
|
||||||
|
return noisy_grad
|
||||||
|
|
||||||
|
def noisy_direction(self, w_cur, lrp, hyper_dict, noisy_grad):
|
||||||
|
"""computing direction"""
|
||||||
|
total = hyper_dict["total"]
|
||||||
|
grad_frac = hyper_dict["grad_frac"]
|
||||||
|
frac_trace = hyper_dict["trace_frac"]
|
||||||
|
trace_coeff = hyper_dict["trace_coeff"]
|
||||||
|
iters = hyper_dict["num_iteration"]
|
||||||
|
rho_hess = (1 - grad_frac) * total / iters
|
||||||
|
smooth_param = 0.25
|
||||||
|
hess_cur = self.hess(w_cur)
|
||||||
|
noisy_trace = trace_coeff * max(
|
||||||
|
np.trace(hess_cur)
|
||||||
|
+ np.random.normal(
|
||||||
|
scale=(0.25 / lrp.num_samples)
|
||||||
|
* np.sqrt(0.5 / (frac_trace * rho_hess))
|
||||||
|
),
|
||||||
|
0,
|
||||||
|
)
|
||||||
|
min_eval = max(
|
||||||
|
(noisy_trace / ((lrp.num_samples) ** 2 * (1 - frac_trace) * rho_hess))
|
||||||
|
** (1 / 3),
|
||||||
|
1 / (lrp.num_samples),
|
||||||
|
)
|
||||||
|
grad_norm = np.linalg.norm(noisy_grad)
|
||||||
|
if self.type_reg == "add": # Sensitivity is different for add vs clip
|
||||||
|
sens2 = (
|
||||||
|
grad_norm
|
||||||
|
* smooth_param
|
||||||
|
/ (lrp.num_samples * min_eval**2 + smooth_param * min_eval)
|
||||||
|
)
|
||||||
|
noise2 = np.random.normal(
|
||||||
|
scale=sens2 * np.sqrt(0.5 / ((1 - frac_trace) * rho_hess)),
|
||||||
|
size=lrp.dim,
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
w_cur
|
||||||
|
- np.linalg.solve(hess_cur + min_eval * np.eye(lrp.dim), noisy_grad)
|
||||||
|
+ noise2
|
||||||
|
)
|
||||||
|
# type_reg=clip
|
||||||
|
sens2 = (
|
||||||
|
grad_norm
|
||||||
|
* smooth_param
|
||||||
|
/ (lrp.num_samples * min_eval**2 - smooth_param * min_eval)
|
||||||
|
)
|
||||||
|
noise2 = np.random.normal(
|
||||||
|
scale=sens2 * np.sqrt(0.5 / ((1 - frac_trace) * rho_hess)), size=lrp.dim
|
||||||
|
)
|
||||||
|
eval_hess, evec_hess = np.linalg.eigh(hess_cur)
|
||||||
|
eval_trunc = eval_hess[eval_hess >= min_eval]
|
||||||
|
num_eig = len(eval_trunc)
|
||||||
|
if num_eig == 0:
|
||||||
|
hess_modified_inv = 1 / min_eval * np.eye(lrp.dim)
|
||||||
|
else:
|
||||||
|
evec_trun = evec_hess[:, -num_eig:]
|
||||||
|
hess_modified_inv = np.dot(
|
||||||
|
evec_trun * (1 / eval_trunc - 1 / min_eval * np.ones(num_eig)),
|
||||||
|
evec_trun.T,
|
||||||
|
) + 1 / min_eval * np.eye(lrp.dim)
|
||||||
|
return w_cur - (hess_modified_inv @ noisy_grad) + noise2
|
||||||
|
|
||||||
|
def noisy_direction_stochastic(self, w_cur, lrp, hyper_dict, noisy_grad):
|
||||||
|
"""noisy direction for stochastic variant"""
|
||||||
|
std_hess = hyper_dict["noise_multiplier_hess"]
|
||||||
|
phess = hyper_dict["batchsize_hess"] / lrp.num_samples
|
||||||
|
min_eval = hyper_dict["min_eval"]
|
||||||
|
sample_vec = np.random.binomial(n=1, p=phess, size=lrp.num_samples)
|
||||||
|
batch_idx_hess = np.where(sample_vec == 1)[0]
|
||||||
|
batch_size_hess_t = len(batch_idx_hess)
|
||||||
|
hess_cur = (
|
||||||
|
(batch_size_hess_t)
|
||||||
|
/ (lrp.num_samples * phess)
|
||||||
|
* self.hess(w_cur, batch_idx_hess)
|
||||||
|
)
|
||||||
|
smooth_param = 0.25 # smoothness parameter
|
||||||
|
grad_norm = np.linalg.norm(noisy_grad)
|
||||||
|
if self.type_reg == "add": # Sensitivity is different for add vs clip
|
||||||
|
sens2 = (
|
||||||
|
grad_norm
|
||||||
|
* smooth_param
|
||||||
|
/ (
|
||||||
|
(lrp.num_samples * phess) * min_eval**2
|
||||||
|
+ smooth_param * min_eval
|
||||||
|
)
|
||||||
|
)
|
||||||
|
noise2 = np.random.normal(scale=sens2 * std_hess, size=lrp.dim)
|
||||||
|
return (
|
||||||
|
w_cur
|
||||||
|
- np.linalg.solve(
|
||||||
|
hess_cur + min_eval * np.eye(len(hess_cur)), noisy_grad
|
||||||
|
)
|
||||||
|
+ noise2
|
||||||
|
)
|
||||||
|
# type_reg=clip
|
||||||
|
min_eval_c = max(min_eval, 1 / ((lrp.num_samples * phess)))
|
||||||
|
sens2 = (
|
||||||
|
grad_norm
|
||||||
|
* smooth_param
|
||||||
|
/ (
|
||||||
|
(lrp.num_samples * phess) * min_eval_c**2
|
||||||
|
- smooth_param * min_eval_c
|
||||||
|
)
|
||||||
|
)
|
||||||
|
noise2 = np.random.normal(scale=sens2 * std_hess, size=lrp.dim)
|
||||||
|
eval_hess, evec_hess = np.linalg.eigh(hess_cur)
|
||||||
|
eval_trunc = eval_hess[eval_hess >= min_eval_c]
|
||||||
|
num_eig = len(eval_trunc)
|
||||||
|
if num_eig == 0:
|
||||||
|
hess_modified_inv = 1 / min_eval_c * np.eye(lrp.dim)
|
||||||
|
else:
|
||||||
|
evec_trun = evec_hess[:, -num_eig:]
|
||||||
|
hess_modified_inv = np.dot(
|
||||||
|
evec_trun * (1 / eval_trunc - 1 / min_eval_c * np.ones(num_eig)),
|
||||||
|
evec_trun.T,
|
||||||
|
) + 1 / min_eval_c * np.eye(lrp.dim)
|
||||||
|
return w_cur - (hess_modified_inv @ noisy_grad) + noise2
|
47
research/dp_newton/src/print_results.py
Normal file
47
research/dp_newton/src/print_results.py
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
"""print the achievable error of different algorithms"""
|
||||||
|
|
||||||
|
# pylint: skip-file
|
||||||
|
# pyformat: disable
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
RESULTS_PATH = './src/results/'
|
||||||
|
excess_loss = {}
|
||||||
|
opt_algs = [
|
||||||
|
'DPGD',
|
||||||
|
'DN-Hess-add',
|
||||||
|
'DN-UB-add',
|
||||||
|
'DN-Hess-clip',
|
||||||
|
'DN-UB-clip',
|
||||||
|
'private-newton',
|
||||||
|
]
|
||||||
|
for filename in os.listdir(RESULTS_PATH):
|
||||||
|
f = os.path.join(RESULTS_PATH, filename)
|
||||||
|
with open(f, encoding='utf-8') as json_file:
|
||||||
|
data = json.load(json_file)
|
||||||
|
for alg in data.keys():
|
||||||
|
if alg in opt_algs:
|
||||||
|
loss_avg = np.array(data[alg]['loss_avg'])
|
||||||
|
loss_std = np.array(data[alg]['loss_std'])
|
||||||
|
clock_time = np.array(data[alg]['clock_time_avg'])
|
||||||
|
print('optimization algorithm: ', alg)
|
||||||
|
print('excess loss: ' + str(loss_avg[-1]))
|
||||||
|
print('run time: ' + str(clock_time[-1]) + '(sec)')
|
||||||
|
print('-----')
|
264
research/dp_newton/src/run.py
Normal file
264
research/dp_newton/src/run.py
Normal file
|
@ -0,0 +1,264 @@
|
||||||
|
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
"""collections of helper function to run and compare different algorithms"""
|
||||||
|
|
||||||
|
# pylint: skip-file
|
||||||
|
# pyformat: disable
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
from dataset_loader import Mydatasets
|
||||||
|
from my_logistic_regression import MyLogisticRegression
|
||||||
|
import numpy as np
|
||||||
|
from opt_algs import CompareAlgs, DoubleNoiseMech, gd_priv, private_newton
|
||||||
|
from scipy.optimize import fsolve
|
||||||
|
|
||||||
|
|
||||||
|
def zcdp_to_eps(rho, delta):
|
||||||
|
""" "
|
||||||
|
|
||||||
|
conversion of zcdp gurantee to (eps,delta)-DP using the formula in Lemma 3.6
|
||||||
|
of [BS16]
|
||||||
|
rho : zCDP
|
||||||
|
delta: delta in DP
|
||||||
|
|
||||||
|
return eps
|
||||||
|
"""
|
||||||
|
return rho + np.sqrt(4 * rho * np.log(np.sqrt(np.pi * rho) / delta))
|
||||||
|
|
||||||
|
|
||||||
|
def eps_to_zcdp(eps, delta):
|
||||||
|
""" "
|
||||||
|
|
||||||
|
conversion of (eps,delta) gurantee to rho-zCDP
|
||||||
|
eps : eps in DP
|
||||||
|
delta: delta in DP
|
||||||
|
|
||||||
|
return rho
|
||||||
|
"""
|
||||||
|
|
||||||
|
def func_root(rho_zcdp):
|
||||||
|
return zcdp_to_eps(rho_zcdp, delta) - eps
|
||||||
|
|
||||||
|
root = fsolve(func_root, x0=0.001)[-1]
|
||||||
|
return root
|
||||||
|
|
||||||
|
|
||||||
|
def helper_fun(datasetname, alg_type, params_exp):
|
||||||
|
"""helper function for running different algorithms
|
||||||
|
|
||||||
|
args:
|
||||||
|
datasetname = dataset
|
||||||
|
alg_type = type of the optimization algorithm
|
||||||
|
params_exp = hyperparameters
|
||||||
|
"""
|
||||||
|
feature_vecs, labels, w_opt = getattr(Mydatasets(), datasetname)()
|
||||||
|
privacy_dp = params_exp["total"]
|
||||||
|
params_exp["total"] = eps_to_zcdp(privacy_dp, (1.0 / len(labels)) ** 2)
|
||||||
|
log_reg = MyLogisticRegression(feature_vecs, labels)
|
||||||
|
alg_dict, filename_params = prepare_alg_dict(
|
||||||
|
alg_type, datasetname, privacy_dp, params_exp, log_reg
|
||||||
|
)
|
||||||
|
compare_algs = CompareAlgs(log_reg, w_opt, params_exp)
|
||||||
|
result = RunReleaseStats(compare_algs, alg_dict).summarize_stats()
|
||||||
|
result["num-samples"] = len(labels)
|
||||||
|
with open(
|
||||||
|
"src/results/" + filename_params, "w", encoding="utf8"
|
||||||
|
) as json_file:
|
||||||
|
json.dump(result, json_file)
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_alg_dict(alg_type, datasetname, privacy_dp, params_exp, log_reg):
|
||||||
|
"""prepare update rule for algorithms and filename"""
|
||||||
|
alg_dict = None
|
||||||
|
filename_params = None
|
||||||
|
if alg_type == "double_noise":
|
||||||
|
filename_params = (
|
||||||
|
"so_"
|
||||||
|
+ datasetname
|
||||||
|
+ "_"
|
||||||
|
+ str(privacy_dp)
|
||||||
|
+ "_"
|
||||||
|
+ "DP"
|
||||||
|
+ "_"
|
||||||
|
+ str(params_exp["num_iteration"])
|
||||||
|
+ "_"
|
||||||
|
+ str(params_exp["grad_frac"])
|
||||||
|
+ "_"
|
||||||
|
+ str(params_exp["trace_frac"])
|
||||||
|
+ "_"
|
||||||
|
+ str(params_exp["trace_coeff"])
|
||||||
|
+ ".txt"
|
||||||
|
)
|
||||||
|
dnm_hess_add = DoubleNoiseMech(
|
||||||
|
log_reg, type_reg="add", curvature_info="hessian"
|
||||||
|
).update_rule
|
||||||
|
dnm_ub_add = DoubleNoiseMech(
|
||||||
|
log_reg, type_reg="add", curvature_info="ub"
|
||||||
|
).update_rule
|
||||||
|
dnm_hess_clip = DoubleNoiseMech(
|
||||||
|
log_reg, type_reg="clip", curvature_info="hessian"
|
||||||
|
).update_rule
|
||||||
|
dnm_ub_clip = DoubleNoiseMech(
|
||||||
|
log_reg, type_reg="clip", curvature_info="ub"
|
||||||
|
).update_rule
|
||||||
|
alg_dict = {
|
||||||
|
"DN-Hess-add": dnm_hess_add,
|
||||||
|
"DN-Hess-clip": dnm_hess_clip,
|
||||||
|
"DN-UB-clip": dnm_ub_clip,
|
||||||
|
"DN-UB-add": dnm_ub_add,
|
||||||
|
}
|
||||||
|
elif alg_type == "dp_gd":
|
||||||
|
filename_params = (
|
||||||
|
"gd_"
|
||||||
|
+ datasetname
|
||||||
|
+ "_"
|
||||||
|
+ str(privacy_dp)
|
||||||
|
+ "_"
|
||||||
|
+ "DP"
|
||||||
|
+ "_"
|
||||||
|
+ str(params_exp["num_iteration"])
|
||||||
|
+ ".txt"
|
||||||
|
)
|
||||||
|
alg_dict = {"DPGD": gd_priv}
|
||||||
|
elif alg_type == "damped_newton":
|
||||||
|
filename_params = (
|
||||||
|
"newton_"
|
||||||
|
+ datasetname
|
||||||
|
+ "_"
|
||||||
|
+ str(privacy_dp)
|
||||||
|
+ "_"
|
||||||
|
+ "DP"
|
||||||
|
+ "_"
|
||||||
|
+ str(params_exp["num_iteration"])
|
||||||
|
+ ".txt"
|
||||||
|
)
|
||||||
|
alg_dict = {"private-newton": private_newton}
|
||||||
|
return alg_dict, filename_params
|
||||||
|
|
||||||
|
|
||||||
|
class RunReleaseStats:
|
||||||
|
"""Helpfer function to run different algorithms and store the results"""
|
||||||
|
|
||||||
|
def __init__(self, compare_algs, algs_dict, num_rep=10):
|
||||||
|
self.compare_algs = compare_algs
|
||||||
|
self.algs_dict = algs_dict
|
||||||
|
self.num_rep = num_rep
|
||||||
|
self.losses = 0
|
||||||
|
self.gradnorm = 0
|
||||||
|
self.accuracy = 0
|
||||||
|
self.wall_clock = 0
|
||||||
|
|
||||||
|
def run_algs(self):
|
||||||
|
"""method to run different algorithms and store different stats"""
|
||||||
|
for rep in range(self.num_rep):
|
||||||
|
for alg_name, alg_update_rule in self.algs_dict.items():
|
||||||
|
self.compare_algs.add_algo(alg_update_rule, alg_name)
|
||||||
|
losses_dict = self.compare_algs.loss_vals()
|
||||||
|
gradnorm_dict = self.compare_algs.gradnorm_vals()
|
||||||
|
accuracy_dict = self.compare_algs.accuracy_vals()
|
||||||
|
wall_clock_dict = self.compare_algs.wall_clock_alg()
|
||||||
|
if rep == 0:
|
||||||
|
self.losses = losses_dict
|
||||||
|
self.gradnorm = gradnorm_dict
|
||||||
|
self.accuracy = accuracy_dict
|
||||||
|
self.wall_clock = wall_clock_dict
|
||||||
|
else:
|
||||||
|
for alg in self.losses:
|
||||||
|
self.losses[alg].extend(losses_dict[alg])
|
||||||
|
self.gradnorm[alg].extend(gradnorm_dict[alg])
|
||||||
|
self.accuracy[alg].extend(accuracy_dict[alg])
|
||||||
|
self.wall_clock[alg].extend(wall_clock_dict[alg])
|
||||||
|
|
||||||
|
def summarize_stats(self):
|
||||||
|
"""method to summarize the results"""
|
||||||
|
self.run_algs()
|
||||||
|
result = {}
|
||||||
|
result["acc-best"] = self.compare_algs.accuracy_np().tolist()
|
||||||
|
for alg in self.losses:
|
||||||
|
result[alg] = {}
|
||||||
|
loss_avg = np.mean(np.array(self.losses[alg]), axis=0)
|
||||||
|
loss_std = np.std(np.array(self.losses[alg]), axis=0)
|
||||||
|
result[alg]["loss_avg"] = (loss_avg).tolist()
|
||||||
|
result[alg]["loss_std"] = (loss_std / np.sqrt(self.num_rep)).tolist()
|
||||||
|
gradnorm_avg = np.mean(np.array(self.gradnorm[alg]), axis=0)
|
||||||
|
gradnorm_std = np.std(np.array(self.gradnorm[alg]), axis=0)
|
||||||
|
result[alg]["gradnorm_avg"] = (gradnorm_avg).tolist()
|
||||||
|
result[alg]["gradnorm_std"] = (gradnorm_std).tolist()
|
||||||
|
acc_avg = np.mean(np.array(self.accuracy[alg]), axis=0)
|
||||||
|
acc_std = np.std(np.array(self.accuracy[alg]), axis=0)
|
||||||
|
result[alg]["acc_avg"] = (acc_avg).tolist()
|
||||||
|
result[alg]["acc_std"] = (acc_std / np.sqrt(self.num_rep)).tolist()
|
||||||
|
clocktime_avg = np.mean(np.array(self.wall_clock[alg]), axis=0)
|
||||||
|
clocktime_std = np.std(np.array(self.wall_clock[alg]), axis=0)
|
||||||
|
result[alg]["clock_time_avg"] = (clocktime_avg).tolist()
|
||||||
|
result[alg]["clock_time_std"] = (
|
||||||
|
clocktime_std / np.sqrt(self.num_rep)
|
||||||
|
).tolist()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""main function"""
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--datasetname")
|
||||||
|
parser.add_argument("--alg_type")
|
||||||
|
parser.add_argument("--total")
|
||||||
|
parser.add_argument("--numiter")
|
||||||
|
# double noise and newton
|
||||||
|
parser.add_argument("--grad_frac")
|
||||||
|
parser.add_argument("--trace_frac")
|
||||||
|
parser.add_argument("--trace_coeff")
|
||||||
|
args = parser.parse_args()
|
||||||
|
datasetname = args.datasetname
|
||||||
|
alg_type = args.alg_type
|
||||||
|
total = float(args.total)
|
||||||
|
num_iter = int(args.numiter)
|
||||||
|
if alg_type == "double_noise":
|
||||||
|
grad_frac = float(args.grad_frac)
|
||||||
|
trace_frac = float(args.trace_frac)
|
||||||
|
trace_coeff = float(args.trace_coeff)
|
||||||
|
hyper_parameters = {
|
||||||
|
"total": total,
|
||||||
|
"grad_frac": grad_frac,
|
||||||
|
"trace_frac": trace_frac,
|
||||||
|
"trace_coeff": trace_coeff,
|
||||||
|
"num_iteration": num_iter,
|
||||||
|
}
|
||||||
|
elif alg_type == "dp_gd":
|
||||||
|
hyper_parameters = {"total": total, "num_iteration": num_iter}
|
||||||
|
elif alg_type == "damped_newton":
|
||||||
|
grad_frac = float(args.grad_frac)
|
||||||
|
hyper_parameters = {
|
||||||
|
"total": total,
|
||||||
|
"num_iteration": num_iter,
|
||||||
|
"grad_frac": grad_frac,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
raise ValueError("no such optmization algorithm exists")
|
||||||
|
print(
|
||||||
|
"optimization algorithm "
|
||||||
|
+ alg_type
|
||||||
|
+ ","
|
||||||
|
+ "dataset name: "
|
||||||
|
+ datasetname
|
||||||
|
)
|
||||||
|
helper_fun(datasetname, alg_type, hyper_parameters)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Reference in a new issue