diff --git a/research/dp_newton/README.md b/research/dp_newton/README.md
new file mode 100644
index 0000000..8239933
--- /dev/null
+++ b/research/dp_newton/README.md
@@ -0,0 +1,32 @@
+# Project Title
+
+Faster Differentially Private Convex Optimization via Second-Order Methods
+https://arxiv.org/abs/2112.03570
+by Arun Ganesh, Mahdi Haghifam, Thomas Steinke, Abhradeep Thakurta.
+
+## Description
+
+Implementation of the optimizatoin algorithms proposed in
+https://arxiv.org/abs/2112.03570
+
+## Getting Started
+
+You will need to install fairly standard dependencies
+
+run 'run_privacy_utility' to compare the convergence speed and excess loss of
+different algorithms.
+
+### Citation
+
+You can cite this paper with
+
+```
+@article{ganesh2023faster,
+ title={Faster Differentially Private Convex Optimization
+ via Second-Order Methods},
+ author={Ganesh, Arun and Haghifam, Mahdi and Steinke, Thomas
+ and Thakurta, Abhradeep},
+ journal={arXiv preprint arXiv:2305.13209},
+ year={2023}
+}
+```
diff --git a/research/dp_newton/dpso-logistic.py b/research/dp_newton/dpso-logistic.py
deleted file mode 100644
index 3e5ac43..0000000
--- a/research/dp_newton/dpso-logistic.py
+++ /dev/null
@@ -1,1238 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-# -*- coding: utf-8 -*-
-"""Differentially Private Second-Order Methods for Logistic Regression.
-
-This script implements several algorithms for DP logistic regression and
-tests them on various datasets. It produces plots for our upcoming paper.
-
-Code exported from Colab. Written by Mahdi Haghifam.
-
-"""
-
-# pylint: disable=invalid-name
-# We use upper case to denote matrices and lower case for vectors.
-# This conflicts with pylint's variable naming rules.
-# pylint: disable=redefined-outer-name
-# This is a script hence we have "global" variables.
-# pylint: disable=unused-argument
-# The update rule functions are meant to have the same signature,
-# so cannot just remove arguments. Ideally this should have been
-# implemented as a class, but much easier to define a function.
-
-import math
-import urllib.request
-
-# from keras.utils.np_utils import to_categorical
-import matplotlib.pyplot as plt
-import numpy as np
-from sklearn import preprocessing
-import sklearn.datasets
-from sklearn.linear_model import LogisticRegression
-import tensorflow as tf
-# from tensorflow import keras
-
-
-class MyLogisticRegression:
- """Represents a logistic regression problem.
-
- There is a dataset consisting of features (vectors of norm <=1)
- and labels (+1,-1), represented as a numpy array.
- There is also an L2 regularizer.
- """
-
- def __init__(self, X, y, reg=1e-8):
- """Initialize the data and the regularizer.
-
- Args:
- X: n x d numpy array representing features
- y: n x 1 numpy array representing labels
- reg: L2 regularizing coefficient (to ensure solution is finite)
-
- Data will be rescaled so that ||X[i,:]|| * |y[i]| <= 1 for all i.
- """
- self.reg = float(reg)
- X = np.array(X)
- y = np.array(y)
- assert len(X.shape) == 2
- assert len(y.shape) == 1
- self.n, self.d = X.shape
- assert y.shape[0] == self.n
- signed_data = X * y[:, np.newaxis]
- norm = np.linalg.norm(signed_data, axis=1)
- scale = np.maximum(norm, np.ones_like(norm))
- self.data = (1 / scale[:, None]) * signed_data
-
- def loss(self, w):
- """Computes the loss represented by this object at w.
-
- Args:
- w: weight vector
-
- Returns:
- If X,y is the data and reg is the regularizer, then the loss is
- (1/n)sum_i^n log(1+exp(-)) + (reg/2)||w||^2
- """
- data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, w))))
- reg_loss = 0.5 * self.reg * np.linalg.norm(w)**2
- return data_loss + reg_loss
-
- def loss_wor(self, w):
- """Computes the loss represented by this object at w without regularizer.
-
- Args:
- w: weight vector
-
- Returns:
- If X,y is the data and reg is the regularizer, then the loss is
- (1/n)sum_i^n log(1+exp(-))
- """
- data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, w))))
- return data_loss
-
- def grad(self, w):
- """Computes the gradient of the logistic regression at a given point w.
-
- Args:
- w: weight vector
-
- Returns:
- If X,y is the data and reg is the regularizer, then the gradient is
- (-1/n)sum_i^n X[i,:]*y[i]/(1+exp()) + reg*w
- """
- coeff_grad = -1/(1+np.exp(np.dot(self.data, w)))
- data_grad = np.mean(self.data * coeff_grad[:, np.newaxis], axis=0)
- return data_grad + self.reg * w
-
- def grad_wor(self, w):
- """Computes the gradient of the logistic regression at a given point w.
-
- Args:
- w: weight vector
-
- Returns:
- If X,y is the data and reg is the regularizer, then the gradient is
- (-1/n)sum_i^n X[i,:]*y[i]/(1+exp()) + reg*w
- """
- coeff_grad = -1/(1+np.exp(np.dot(self.data, w)))
- data_grad = np.mean(self.data * coeff_grad[:, np.newaxis], axis=0)
- return data_grad
-
- def hess(self, w):
- """Computes the Hessian of the logistic regression at a given point w.
-
- Args:
- w: weight vector
-
- Returns:
- The Hessian is the matrix of second derivatives.
- If X,y is the data and reg is the regularizer, then the Hessian is
- (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(/2)*2)^2
- where we assume y[i]^2==1.
- """
- a = np.dot(self.data, w)/2
- coeff_hess = 1 / (np.exp(a)+np.exp(-a))**2
- raw_hess = np.dot(self.data.T * coeff_hess, self.data)
- return raw_hess/self.n + self.reg * np.eye(self.d)
-
- def hess_wor(self, w):
- """Computes the Hessian of the logistic regression at a given point w.
-
- Args:
- w: weight vector
-
- Returns:
- The Hessian is the matrix of second derivatives.
- If X,y is the data, then the Hessian is
- (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(/2)*2)^2
- where we assume y[i]^2==1.
- """
- a = np.dot(self.data, w)/2
- coeff_hess = 1 / (np.exp(a)+np.exp(-a))**2
- raw_hess = np.dot(self.data.T * coeff_hess, self.data)
- return raw_hess/self.n
-
- def upperbound(self, w):
- """Computes tightest universal quadratic upper bound on the loss function.
-
- log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
- Constant and linear terms are just first-order Taylor
- This function gives the quadratic term (which replaces the Hessian)
- https://twitter.com/shortstein/status/1557961202256318464
-
- Args:
- w: weight vector
-
- Returns:
- Matrix H such that for all v
- loss(v) <= loss(w)+ + /2
- """
- a = -np.dot(self.data, w) # vector of y_i for i in [n]
- # v = 0.5*np.tanh(a/2)/a
- # But avoid 0/0 by special rule
- v = np.divide(
- 0.5 * np.tanh(a / 2),
- a,
- out=(np.ones(a.shape) * 0.25),
- where=(np.abs(a) > 1e-9))
- H = np.dot(self.data.T * v, self.data)
- return H / self.n + self.reg * np.eye(self.d)
-
- def upperbound_wor(self, w):
- """Computes tightest quadratic upper bound on the unregularized loss.
-
- log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
- Constant and linear terms are just first-order Taylor
- This function gives the quadratic term (which replaces the Hessian)
- https://twitter.com/shortstein/status/1557961202256318464
-
- Args:
- w: weight vector
-
- Returns:
- Matrix H such that for all v
- loss(v) <= loss(w)+ + /2
- """
- a = -np.dot(self.data, w) # vector of y_i for i in [n]
- # v = 0.5*np.tanh(a/2)/a
- # But avoid 0/0 by special rule
- v = np.divide(
- 0.5 * np.tanh(a / 2),
- a,
- out=(np.ones(a.shape) * 0.25),
- where=(np.abs(a) > 1e-9))
- H = np.dot(self.data.T * v, self.data)
- return H / self.n
-
-
-class Mydatasets:
- """Represents datasets we use for testing the algorithms.
- """
-
- def __init__(self):
- pass
-
- def find_optimal_classifier(self, dataset, reg=1e-9):
- """Find the optimal weight vector for the logistic regression.
-
- Args:
- dataset: training dataset
- reg: regularizer
-
- Returns:
- Optimal weight vector.
- """
- X, y = dataset
- model_lr = LogisticRegression(max_iter=10000, C=1/reg).fit(X, y)
- w_opt1 = np.concatenate([model_lr.intercept_, np.squeeze(model_lr.coef_)])
- w_opt = newton(dataset, w_opt1)
- print("optimal weight vector norms", np.linalg.norm(w_opt))
- return w_opt
-
- def mnist_binary(self):
- """Download and extract MNIST data.
-
- We also select only two labels for the binary classification task.
-
- Returns:
- Features, labels, and optimal weight vector.
- """
- labels = [1, 7]
- label0, label1 = int(labels[0]), int(labels[1])
- mnist = tf.keras.datasets.mnist
- (x_train, y_train), (_, _) = mnist.load_data()
- x_train = x_train.reshape(x_train.shape[0], -1)
- scaler = preprocessing.StandardScaler().fit(x_train)
- x_train = scaler.transform(x_train)
- nrm = np.linalg.norm(x_train, axis=1)
- x_train = x_train * 1/nrm[:, None]
- y_train = y_train.astype(float)
- indx0 = np.nonzero(y_train == label0)[0]
- indx1 = np.nonzero(y_train == label1)[0]
- y_train[indx0] = -1
- y_train[indx1] = 1
- indx = np.concatenate((indx0, indx1))
- x_train = x_train[indx]
- labels = y_train[indx]
- dataset = x_train, labels
- w_opt = self.find_optimal_classifier(dataset)
- X = np.hstack((np.ones(shape=(np.shape(x_train)[0], 1)),
- x_train)) # adding a dummy dimension for the bias term.
- return X, labels, w_opt
-
- def w8a_dataset(self):
- """w8a dataset for logistic regression.
- """
- num_points = 15e3
- w8a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/w8a"
- data_path = "./w8a"
- urllib.request.urlretrieve(w8a_url, data_path)
- X, labels = sklearn.datasets.load_svmlight_file(data_path)
- X = X.toarray()
- selected_samples = np.random.choice(len(X), int(num_points))
- X = X[selected_samples, :]
- labels = labels[selected_samples]
- scaler = preprocessing.StandardScaler().fit(X)
- X = scaler.transform(X)
- nrm = np.linalg.norm(X, axis=1)
- X = X * 1/nrm[:, None]
- labels = labels.astype(float)
- dataset = X, labels
- w_opt = self.find_optimal_classifier(dataset)
- X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
- X)) # adding a dummy dimension for the bias term.
- return X, labels, w_opt
-
- def a1a_dataset(self):
- """Loads a1a dataset for logistic regression.
- """
- a1a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a1a"
- data_path = "./a1a"
- urllib.request.urlretrieve(a1a_url, data_path)
- X, labels = sklearn.datasets.load_svmlight_file(data_path)
- X = X.toarray()
- scaler = preprocessing.StandardScaler().fit(X)
- X = scaler.transform(X)
- nrm = np.linalg.norm(X, axis=1)
- X = X * 1/nrm[:, None]
- labels = labels.astype(float)
- dataset = X, labels
- w_opt = self.find_optimal_classifier(dataset)
- X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
- X)) # adding a dummy dimension for the bias term.
- return X, labels, w_opt
-
- def phishing(self):
- """phishing dataset for logistic regression.
- """
- phishing_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/phishing"
- data_path = "./phishing"
- urllib.request.urlretrieve(phishing_url, data_path)
- X, labels = sklearn.datasets.load_svmlight_file(data_path)
- X = X.toarray()
- scaler = preprocessing.StandardScaler().fit(X)
- X = scaler.transform(X)
- nrm = np.linalg.norm(X, axis=1)
- X = X * 1/nrm[:, None]
- labels = labels.astype(float)
- dataset = X, labels
- w_opt = self.find_optimal_classifier(dataset)
- X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
- X)) # adding a dummy dimension for the bias term.
- return X, labels, w_opt
-
- def a5a_dataset(self):
- """a5a dataset for logistic regression.
- """
- a5a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a5a"
- data_path = "./a5a"
- urllib.request.urlretrieve(a5a_url, data_path)
- X, labels = sklearn.datasets.load_svmlight_file(data_path)
- X = X.toarray()
- scaler = preprocessing.StandardScaler().fit(X)
- X = scaler.transform(X)
- nrm = np.linalg.norm(X, axis=1)
- X = X * 1/nrm[:, None]
- labels = labels.astype(float)
- dataset = X, labels
- w_opt = self.find_optimal_classifier(dataset)
- X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
- X)) # adding a dummy dimension for the bias term.
- return X, labels, w_opt
-
- def a6a_dataset(self):
- """a6a dataset for logistic regression.
- """
- a6a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a5a"
- data_path = "./a6a"
- urllib.request.urlretrieve(a6a_url, data_path)
- X, labels = sklearn.datasets.load_svmlight_file(data_path)
- X = X.toarray()
- scaler = preprocessing.StandardScaler().fit(X)
- X = scaler.transform(X)
- nrm = np.linalg.norm(X, axis=1)
- X = X * 1/nrm[:, None]
- labels = labels.astype(float)
- dataset = X, labels
- w_opt = self.find_optimal_classifier(dataset)
- X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
- X)) # adding a dummy dimension for the bias term.
- return X, labels, w_opt
-
- def madelon(self):
- """madelon dataset for logistic regression.
- """
- madelon_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/madelon"
- data_path = "./madelon"
- urllib.request.urlretrieve(madelon_url, data_path)
- X, labels = sklearn.datasets.load_svmlight_file(data_path)
- X = X.toarray()
- scaler = preprocessing.StandardScaler().fit(X)
- X = scaler.transform(X)
- nrm = np.linalg.norm(X, axis=1)
- X = X * 1/nrm[:, None]
- labels = labels.astype(float)
- dataset = X, labels
- w_opt = self.find_optimal_classifier(dataset)
- X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
- X)) # adding a dummy dimension for the bias term.
- return X, labels, w_opt
-
- def mushroom_dataset(self):
- """mushroom dataset for logistic regression.
- """
- mushroom_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/mushrooms"
- data_path = "./mushrooms"
- urllib.request.urlretrieve(mushroom_url, data_path)
- X, labels = sklearn.datasets.load_svmlight_file(data_path)
- X = X.toarray()
- scaler = preprocessing.StandardScaler().fit(X)
- X = scaler.transform(X)
- nrm = np.linalg.norm(X, axis=1)
- X = X * 1/nrm[:, None]
- labels = labels.astype(float)
- dataset = X, labels
- w_opt = self.find_optimal_classifier(dataset)
- X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
- X)) # adding a dummy dimension for the bias term.
- return X, labels, w_opt
-
- def synthetic_data(self, n=5000, d=50, cov=None, w=None):
- """Generates a synthetic dataset for logistic regression.
-
- Args:
- n: number of samples
- d: dimension
- cov: covariance of the data (optional, default: identity)
- w: true coefficient vector (optional, default:first standard basis vector)
-
- Returns:
- Synthetic dataset.
- Features are unit vectors (by default uniformly random).
- Labels are sampled from logistic distribution,
- where argument w is the "true" solution.
- """
- mean = np.zeros(d)
- if cov is None:
- cov = np.eye(d)
- X_un = np.random.multivariate_normal(mean, cov, n)
- nrm = np.linalg.norm(X_un, axis=1)
- X = X_un * 1/nrm[:, None]
- if w is None:
- w = np.ones(d)
- w[0] = 1
- inner_prod = np.dot(X, w)
- params = np.exp(inner_prod)/(1+np.exp(inner_prod))
- labels = 2*np.random.binomial(1, params)-1
- dataset = X, labels
- w_opt = self.find_optimal_classifier(dataset)
- X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
- X)) # adding a dummy dimension for the bias term.
- return X, labels, w_opt
-
-
-class CompareAlgs:
- """Class to run multiple iterative algorithms and compare the results."""
-
- def __init__(self,
- lr,
- dataset,
- optimal_w,
- iters=10,
- w0=None,
- pb=None):
- """Initialize the problem."""
- X, _ = dataset
- self.w_opt = optimal_w
- n, d = np.shape(X)
- print("dataset is created: (number of samples, dimension)=" + str(n) + "," +
- str(d))
-
- if w0 is None:
- w0_un = np.random.multivariate_normal(np.zeros(d), np.eye(d))
- w0 = w0_un/np.linalg.norm(w0_un)
- self.w0 = w0 # initial iterate
- self.iters = iters
- self.pb = pb
- self.lr = lr
- self.plots = [] # List of lists of iterates
- self.names = [] # List of names
- self.linestyles = [] # List of line styles for plotting
- self.cutoff = 20 * np.linalg.norm(self.w_opt) + 20 * np.linalg.norm(
- self.w0) + 10 # how do you set this value? is it problem-specific?
-
- def add_plot(self, update_rule, name, linestyle):
- """Run an iterative update method & add to plot.
-
- Args:
- update_rule: a function that takes 4 arguments:
- current iterate
- LogisticRegression problem
- index of current iterate
- total number of iterations
- pb = privacy budget or similar
- name: string to display in legend
- linestyle: line style for plot
- """
- baseline = self.lr.loss_wor(self.w_opt)
- print(name)
- w = self.w0
- plot = [w]
- for i in range(self.iters):
- w = update_rule(w, self.lr, i, self.iters, self.pb)
- if np.linalg.norm(w) > self.cutoff:
- w = self.w0 # Stop things exploding
- print("Stop Things Exploding!")
- plot.append(w)
- print(
- str(i) + ": ||grad||=" + str(np.linalg.norm(self.lr.grad_wor(w))) +
- " ex_loss=" + str(self.lr.loss_wor(w) - baseline))
- self.plots.append(plot)
- self.names.append(name)
- self.linestyles.append(linestyle)
- print()
-
- def plot_grad_norms(self, legend=True):
- """Plot gradient norms for each iteration.
- """
- plt.clf()
- for plot, name, ls in zip(self.plots, self.names, self.linestyles):
- grad_norms = [np.linalg.norm(self.lr.grad_wor(w)) for w in plot]
- plt.plot(range(self.iters+1), grad_norms, ls, label=name)
- plt.yscale("log")
- ymax = np.linalg.norm(self.lr.grad(self.plots[0][0]))
- plt.ylim(top=ymax)
- # plt.ylim((1e-3, 1e-1))
- plt.xlabel("Iteration")
- plt.ylabel("Norm of Gradient")
- if legend: plt.legend()
- plt.show()
-
- def loss_vals(self):
- """Outputs the loss vector for different methods.
- """
- baseline = self.lr.loss_wor(self.w_opt)
- loss_dict = {}
- for plot, name in zip(self.plots, self.names):
- losses = [self.lr.loss_wor(w)-baseline for w in plot]
- loss_dict[name] = [losses]
- return loss_dict
-
- def gradnorm_vals(self):
- """Outputs the gradient norm for different methods.
- """
- gradnorm_dict = {}
- for plot, name in zip(self.plots, self.names):
- grad_norms = [np.linalg.norm(self.lr.grad_wor(w)) for w in plot]
- gradnorm_dict[name] = [grad_norms]
- return gradnorm_dict
-
- def plot_losses(self):
- """Plots excess loss for each iteration.
-
- output is a dictionary where the keys are name of method and value is
- the loss for each iteration.
- """
- baseline = self.lr.loss_wor(self.w_opt)
- plt.clf()
- for plot, name, ls in zip(self.plots, self.names, self.linestyles):
- losses = [self.lr.loss_wor(w)-baseline for w in plot]
- plt.plot(range(self.iters+1), losses, ls, label=name)
- # plt.yscale('log')
- plt.xlabel("Iteration")
- plt.ylabel("Excess Loss")
- plt.legend()
- plt.show()
-
-
-def gd_priv(w, lr, i, iters, pb):
- """Implementation of DP-GD.
-
- Args:
- w: current point
- lr: logistic regression
- i: iteration number
- iters: total number of iterations
- pb: auxillary information
-
- Returns:
- The next iterate.
- """
- inv_lr_gd = 0.25 # We select the learning rate based on the smoothness
- sens = 1/(lr.n*(inv_lr_gd+lr.reg)) # Sensitivity
- rho = pb["total"] / iters # divide total privacy budget up
- noise = np.random.normal(scale=sens/math.sqrt(2*rho), size=lr.d)
- return w - lr.grad(w)/(inv_lr_gd+lr.reg) + noise
-
-
-def gd_priv_backtrackingls(w, lr, i, iters, pb):
- """Implementation of DP-GD with back-tracking line search.
-
- !!! this method is not private. We only use it as a baseline.
-
- Args:
- w: current point
- lr: logistic regression
- i: iteration number
- iters: total number of iterations
- pb: auxillary information
-
- Returns:
- The next iterate
- """
- rho_grad = pb["total"] / iters # divide total privacy budget up
- grad_scale = (1/lr.n)*math.sqrt(0.5/rho_grad)
- grad_noise = np.random.normal(scale=grad_scale, size=lr.d)
- direction = lr.grad(w)+grad_noise
- stepsize_opt = backtracking_ls(lr, direction, w)
- return w - stepsize_opt * direction
-
-
-def backtracking_ls(lr, direction, w_0, alpha=0.4, beta=0.95):
- """Implementation of backtracking line search.
-
- Args:
- lr: logistic regression
- direction: the "noisy" gradient direction
- w_0: current point
- alpha: tradeoff the precision and complexity of the linesearch
- beta: tradeoff the precision and complexity of the linesearch
-
- Returns:
- A good stepsize
- """
- t = 100
- while lr.loss(w_0 - t * direction
- ) >= lr.loss(w_0) - t * alpha * np.dot(direction, lr.grad(w_0)):
- t = beta * t
- if t < 1e-10:
- break
- return t
-
-
-def newton(dataset, w_opt2):
- """Newton update rule.
- """
- X, y = dataset
- X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), X))
- lr = MyLogisticRegression(X, y, reg=1e-9)
- w_opt = w_opt2
- _, d = np.shape(X)
- w = np.zeros(d)
- for _ in range(30):
- H = lr.hess(w)
- direction = np.linalg.solve(H, lr.grad(w))
- step_size = backtracking_ls(lr, direction, w)
- w = w - step_size * direction
- if lr.loss_wor(w) < lr.loss_wor(w_opt2):
- w_opt = w
- return w_opt
-
-
-def newton_ur(w, lr, i, iters, pb):
- H = lr.hess(w)
- direction = np.linalg.solve(H, lr.grad(w))
- step_size = backtracking_ls(lr, direction, w)
- return w - step_size * direction
-
-
-class DoubleNoiseMech:
- """Our Method: Double Noise."""
-
- def __init__(self,
- lr,
- type_reg="add",
- hyper_tuning=False,
- curvature_info="hessian",
- plot_eigen=False):
- """Initializes the algorithm.
-
- Args:
- lr: logistic regression problem we are solving.
- type_reg: "add" or "clip" -- how we regularize eigenvalues.
- hyper_tuning: do we tune the hyperparameters.
- curvature_info: "hessian" or "ub" -- what quadratic we use.
- plot_eigen: show eigenvalues for debugging purposes.
-
- """
- self.type_reg = type_reg
- self.hyper_tuning = hyper_tuning
- self.curvature_info = curvature_info
- self.plot_eigen = plot_eigen
- if self.curvature_info == "hessian":
- self.H = lr.hess_wor
- elif self.curvature_info == "ub":
- self.H = lr.upperbound_wor
-
- def find_opt_reg_wop(self, w, lr, noisy_grad, rho_hess):
- """Implementation of finding the optimal lambda.
-
- Here, we don't pay for privacy of doing it.
-
- Args:
- w: current point
- lr: logistic regression problem
- noisy_grad: the gradient estimate
- rho_hess: the privacy budget
-
- Returns:
- The next iterate.
- """
- increase_factor = 1.5 # at each step we increase the clipping
- if self.type_reg == "add":
- lambda_cur = 5e-6 # starting parameter
- elif self.type_reg == "clip":
- lambda_cur = 0.25/lr.n + 1e-5 # starting parameter,
- num_noise_sample = 5 # we want to estimate expected value over the noise
- grad_norm = np.linalg.norm(noisy_grad)
- H = self.H(w)
- best_loss = 1e6 # a large dummy number
- while lambda_cur <= 0.25:
- H = self.hess_mod(w, lambda_cur)
- if self.type_reg == "add": # Sensitivity is different for add vs clip
- sens2 = grad_norm * 0.25/(lr.n*lambda_cur**2 + 0.25*lambda_cur)
- elif self.type_reg == "clip":
- sens2 = grad_norm * 0.25/(lr.n*lambda_cur**2 - 0.25*lambda_cur)
- loss_ = 0
- for _ in range(num_noise_sample):
- noise2 = np.random.normal(scale=sens2*math.sqrt(.5/rho_hess), size=lr.d)
- loss_ = loss_ + lr.loss_wor(w - np.linalg.solve(H, noisy_grad) + noise2)
- if loss_ < best_loss:
- best_loss = loss_
- lambda_star = lambda_cur
- lambda_cur = lambda_cur * increase_factor
- return lambda_star
-
- def update_rule(self, w, lr, i, iters, pb):
- """update rule."""
- total = pb["total"]
- grad_frac = pb["grad_frac4"]
- rho1 = grad_frac * total / iters # divide total privacy budget for gradient
- rho2 = (1-grad_frac) * total / iters # divide total privacy budget
- sc1 = (1/lr.n) * math.sqrt(0.5/rho1)
- noise1 = np.random.normal(scale=sc1, size=lr.d)
- noisy_grad = lr.grad(w)+noise1
- grad_norm = np.linalg.norm(noisy_grad)
- m = 0.25 # smoothness parameter
- frac_trace = 0.1 # fraction of privacy budget for estimating the trace.
- H = self.H(w)
- if self.plot_eigen:
- val, _ = np.linalg.eigh(H)
- hist, bin_edges = np.histogram(val, bins=300, range=(0, 0.01))
- cdf_vals = np.cumsum(hist)
- plt.clf()
- plt.plot(bin_edges[1:], cdf_vals)
- plt.show()
- if self.hyper_tuning:
- min_eval = self.find_opt_reg_wop(w, lr, noisy_grad, rho2)
- print("optimized min_eval", min_eval)
- else:
- noisy_trace = max(
- np.trace(H) + np.random.normal(
- scale=(0.25 / lr.n) * math.sqrt(0.5 / (frac_trace * rho2))), 0)
- min_eval = (noisy_trace / ((lr.n)**2 *
- (1 - frac_trace) * rho2))**(1 / 3) + 5e-4
- print("approx min_eval ", min_eval)
-
- H = self.hess_mod(w, min_eval, lr.reg)
- if self.type_reg == "add": # Sensitivity is different for add vs clip
- sens2 = grad_norm * m/(lr.n * min_eval**2 + m * min_eval)
- elif self.type_reg == "clip":
- sens2 = grad_norm * m / (lr.n * min_eval**2 - m * min_eval)
- noise2 = np.random.normal(
- scale=sens2 * math.sqrt(0.5 / ((1 - frac_trace) * rho2)), size=lr.d)
- return w - np.linalg.solve(H, noisy_grad) + noise2
-
- def hess_mod(self, w, min_eval, reg=1e-9):
- if self.type_reg == "clip":
- evals, evec = np.linalg.eigh(self.H(w))
- # true_min = np.min(evals)
- evals = np.maximum(evals, min_eval*np.ones(evals.shape))
- Hclipped = np.dot(evec * evals, evec.T)
- return Hclipped
- elif self.type_reg == "add":
- return self.H(w) + min_eval*np.eye(len(self.H(w)))
-
-
-def helper_fun(datasetname, pb, num_rep=5, Tuning=False, plot_eigen=False):
- """This function loads the data & runs the algorithms.
-
- Args:
- datasetname: name of the dataset
- pb: a dictionary with the parameters
- num_rep: number of times we repeat the optimization for reporting average
- Tuning: True or False exhustive search for fining the best min eigenval
- plot_eigen: Show eigenvalues
-
- Returns:
- losses and gradient norms
- """
- datasets = Mydatasets()
- X, y, w_opt = getattr(datasets, datasetname)()
- dataset = X, y
- lr = MyLogisticRegression(X, y, reg=1e-8)
- dnm_hess_add = DoubleNoiseMech(
- lr,
- type_reg="add",
- hyper_tuning=False,
- curvature_info="hessian",
- plot_eigen=plot_eigen).update_rule
- dnm_ub_add = DoubleNoiseMech(
- lr,
- type_reg="add",
- hyper_tuning=False,
- curvature_info="ub",
- plot_eigen=plot_eigen).update_rule
- dnm_hess_clip = DoubleNoiseMech(
- lr,
- type_reg="clip",
- hyper_tuning=False,
- curvature_info="hessian",
- plot_eigen=plot_eigen).update_rule
- dnm_ub_clip = DoubleNoiseMech(
- lr,
- type_reg="clip",
- hyper_tuning=False,
- curvature_info="ub",
- plot_eigen=plot_eigen).update_rule
- if Tuning:
- # dnm_hess_add_ht = DoubleNoiseMech(lr,type_reg='add',
- # hyper_tuning=True,curvature_info='hessian').update_rule
- # dnm_ub_add_ht = DoubleNoiseMech(lr,type_reg='add',
- # hyper_tuning=True,curvature_info='ub').update_rule
- dnm_hess_clip_ht = DoubleNoiseMech(
- lr,
- type_reg="clip",
- hyper_tuning=True,
- curvature_info="hessian",
- plot_eigen=plot_eigen).update_rule
- # dnm_ub_clip_ht = DoubleNoiseMech(lr,type_reg='clip',
- # hyper_tuning=True,curvature_info='ub').update_rule
- c = CompareAlgs(lr, dataset, w_opt, iters=10, pb=pb)
- for rep in range(num_rep):
- c.add_plot(gd_priv, "DPGD", "y--")
- c.add_plot(dnm_hess_add, "DN-Hess-add", "k-")
- c.add_plot(dnm_ub_add, "DN-UB-add", "b-")
- c.add_plot(dnm_hess_clip, "DN-Hess-clip", "k*-")
- c.add_plot(dnm_ub_clip, "DN-UB-clip", "b*-")
- c.add_plot(gd_priv_backtrackingls, "DP-GD-Oracle", "m")
- if Tuning:
- c.add_plot(dnm_hess_clip_ht, "DN-Hess-clip-T", "r*-")
- # c.add_plot(dnm_hess_add_ht,"DN-Hess-add-T",'r-')
- # c.add_plot(dnm_ub_clip_ht,"DN-UB-clip-T",'g*-')
- # c.add_plot(dnm_ub_add_ht,"DN-UB-add-T",'g-')
- losses_dict = c.loss_vals()
- gradnorm_dict = c.gradnorm_vals()
- if rep == 0:
- losses_total = losses_dict
- gradnorm_total = gradnorm_dict
- else:
- for names in losses_total:
- losses_total[names].extend(losses_dict[names])
- gradnorm_total[names].extend(gradnorm_dict[names])
- return losses_total, gradnorm_total
-
-linestyle = {
- "DPGD": "y-",
- "DN-Hess-add": "k+-",
- "DN-UB-add": "b-",
- "DN-Hess-clip": "r*-",
- "DN-UB-clip": "g-",
- "DP-GD-Oracle": "c-"
-}
-facecolor = {
- "DPGD": "yellow",
- "DN-Hess-add": "black",
- "DN-UB-add": "blue",
- "DN-Hess-clip": "red",
- "DN-UB-clip": "green",
- "DP-GD-Oracle": "cyan"
-}
-alg_plt = [
- "DPGD",
- "DN-Hess-add",
- "DN-UB-add",
- "DN-Hess-clip",
- "DN-UB-clip",
- "DP-GD-Oracle"
-]
-
-# Synthethic Data
-
-pb = {
- "total": 1, # Total privacy budget for zCDP
- "min_eval4": 5e-3, # Min eigenvalue for clipping
- "grad_frac4":
- 0.75 # Fraction of privacy budget for gradient vs matrix sensitivity
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
- "synthetic_data", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
- losses = np.array(losses_total[alg])
- gradnorm = np.array(gradnorm_total[alg])
- loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
- loss_std, gradnorm_std = np.std(
- losses, axis=0) / np.sqrt(num_rep), np.std(
- gradnorm, axis=0) / np.sqrt(num_rep)
- print(
- str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
- str(loss_std[-1]))
- if alg in alg_plt:
- iters = len(loss_avg)
- plt.figure(1)
- plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
- plt.fill_between(
- range(iters),
- loss_avg - loss_std,
- loss_avg + loss_std,
- facecolor=facecolor[alg])
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Excess Loss")
- plt.savefig("synth.pdf")
- plt.figure(2)
- plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
- plt.yscale("log")
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Norm of Gradient")
- plt.savefig("synth-grad.pdf")
-
-# a5a dataset
-
-pb = {
- "total": 1, # Total privacy budget for zCDP
- "min_eval4": 5e-3, # Min eigenvalue for clipping
- "grad_frac4":
- 0.75 # Fraction of privacy budget for gradient vs matrix sensitivity
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
- "a5a_dataset", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
- losses = np.array(losses_total[alg])
- gradnorm = np.array(gradnorm_total[alg])
- loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
- loss_std, gradnorm_std = np.std(
- losses, axis=0) / np.sqrt(num_rep), np.std(
- gradnorm, axis=0) / np.sqrt(num_rep)
- print(
- str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
- str(loss_std[-1]))
- if alg in alg_plt:
- iters = len(loss_avg)
- plt.figure(3)
- plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
- plt.fill_between(
- range(iters),
- loss_avg - loss_std,
- loss_avg + loss_std,
- facecolor=facecolor[alg])
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Excess Loss")
- plt.savefig("a5a.pdf")
- plt.figure(4)
- plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
- plt.yscale("log")
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Norm of Gradient")
- plt.savefig("a5a-grad.pdf")
-
-# w8a dataset
-
-pb = {
- "total": 1, # Total privacy budget for zCDP
- "min_eval4": 5e-3, # Min eigenvalue for clipping
- "grad_frac4":
- 0.75 # Fraction of privacy budget for gradient vs matrix sensitivity
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
- "w8a_dataset", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
- losses = np.array(losses_total[alg])
- gradnorm = np.array(gradnorm_total[alg])
- loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
- loss_std, gradnorm_std = np.std(
- losses, axis=0) / np.sqrt(num_rep), np.std(
- gradnorm, axis=0) / np.sqrt(num_rep)
- print(
- str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
- str(loss_std[-1]))
- if alg in alg_plt:
- iters = len(loss_avg)
- plt.figure(5)
- plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
- plt.fill_between(
- range(iters),
- loss_avg - loss_std,
- loss_avg + loss_std,
- facecolor=facecolor[alg])
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Excess Loss")
- plt.savefig("w8a.pdf")
- plt.figure(6)
- plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
- plt.yscale("log")
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Norm of Gradient")
- plt.savefig("w8a-grad.pdf")
-
-# a1a dataset
-
-pb = {
- "total": 1, # Total privacy budget for zCDP
- "min_eval4": 5e-3, # Min eigenvalue for clipping
- "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
- "a1a_dataset", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
- losses = np.array(losses_total[alg])
- gradnorm = np.array(gradnorm_total[alg])
- loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
- loss_std, gradnorm_std = np.std(
- losses, axis=0) / np.sqrt(num_rep), np.std(
- gradnorm, axis=0) / np.sqrt(num_rep)
- print(
- str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
- str(loss_std[-1]))
- if alg in alg_plt:
- iters = len(loss_avg)
- plt.figure(7)
- plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
- plt.fill_between(
- range(iters),
- loss_avg - loss_std,
- loss_avg + loss_std,
- facecolor=facecolor[alg])
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Excess Loss")
- plt.savefig("a1a.pdf")
- plt.figure(8)
- plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
- plt.yscale("log")
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Norm of Gradient")
- plt.savefig("a1a-grad.pdf")
-
-# mushroom dataset
-
-pb = {
- "total": 1, # Total privacy budget for zCDP
- "min_eval4": 5e-3, # Min eigenvalue for clipping
- "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
- "mushroom_dataset", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
- losses = np.array(losses_total[alg])
- gradnorm = np.array(gradnorm_total[alg])
- loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
- loss_std, gradnorm_std = np.std(
- losses, axis=0) / np.sqrt(num_rep), np.std(
- gradnorm, axis=0) / np.sqrt(num_rep)
- print(
- str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
- str(loss_std[-1]))
- if alg in alg_plt:
- iters = len(loss_avg)
- plt.figure(9)
- plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
- plt.fill_between(
- range(iters),
- loss_avg - loss_std,
- loss_avg + loss_std,
- facecolor=facecolor[alg])
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Excess Loss")
- plt.savefig("mushroom.pdf")
- plt.figure(10)
- plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
- plt.yscale("log")
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Norm of Gradient")
- plt.savefig("mushroom-grad.pdf")
-
-# MNIST
-
-pb = {
- "total": 1, # Total privacy budget for zCDP
- "min_eval4": 5e-3, # Min eigenvalue for clipping
- "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
- "mnist_binary", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
- losses = np.array(losses_total[alg])
- gradnorm = np.array(gradnorm_total[alg])
- loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
- loss_std, gradnorm_std = np.std(
- losses, axis=0) / np.sqrt(num_rep), np.std(
- gradnorm, axis=0) / np.sqrt(num_rep)
- print(
- str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
- str(loss_std[-1]))
- if alg in alg_plt:
- iters = len(loss_avg)
- plt.figure(11)
- plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
- plt.fill_between(
- range(iters),
- loss_avg - loss_std,
- loss_avg + loss_std,
- facecolor=facecolor[alg])
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Excess Loss")
- plt.savefig("mnist.pdf")
- plt.figure(12)
- plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
- plt.yscale("log")
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Norm of Gradient")
- plt.savefig("mnist-grad.pdf")
-
-# Dataset: phishing
-
-pb = {
- "total": 1, # Total privacy budget for zCDP
- "min_eval4": 5e-3, # Min eigenvalue for clipping
- "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
- "phishing", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
- losses = np.array(losses_total[alg])
- gradnorm = np.array(gradnorm_total[alg])
- loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
- loss_std, gradnorm_std = np.std(
- losses, axis=0) / np.sqrt(num_rep), np.std(
- gradnorm, axis=0) / np.sqrt(num_rep)
- print(
- str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
- str(loss_std[-1]))
- if alg in alg_plt:
- iters = len(loss_avg)
- plt.figure(13)
- plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
- plt.fill_between(
- range(iters),
- loss_avg - loss_std,
- loss_avg + loss_std,
- facecolor=facecolor[alg])
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Excess Loss")
- plt.savefig("phishing.pdf")
- plt.figure(14)
- plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
- plt.yscale("log")
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Norm of Gradient")
- plt.savefig("phishing-grad.pdf")
-
-# Dataset: Madelon
-
-# pb = {
-# "total": 1, # Total privacy budget for zCDP
-# "min_eval4": 5e-3, # Min eigenvalue for clipping
-# "grad_frac4": 0.4 # Fraction of privacy budget for gradient vs matrix
-# }
-# num_rep = 1
-# losses_total,gradnorm_total = helper_fun('madelon',pb,num_rep = num_rep,
-# Tuning=True,plot_eigen=True)
-# for alg in losses_total.keys():
-# losses = np.array(losses_total[alg])
-# gradnorm = np.array(gradnorm_total[alg])
-# loss_avg, gradnorm_avg = np.mean(losses,axis=0), np.mean(gradnorm,axis=0)
-# loss_std, gradnorm_std = np.std(losses,axis=0)/np.sqrt(num_rep),
-# np.std(gradnorm,axis=0)/np.sqrt(num_rep)
-# print(str(alg)+ ':' + " ex_loss="+str(loss_avg[-1])+ ',
-# std='+str(loss_std[-1]))
-# if alg in alg_plt:
-# iters = len(loss_avg)
-# plt.figure(1)
-# plt.plot(range(iters),loss_avg,linestyle[alg],label=alg)
-# plt.fill_between(range(iters), loss_avg-loss_std, loss_avg+loss_std,
-# facecolor=facecolor[alg])
-# plt.legend()
-# plt.xlabel("Iteration")
-# plt.ylabel("Excess Loss")
-# plt.savefig('madelon.pdf')
-# plt.figure(2)
-# plt.plot(range(iters),gradnorm_avg,linestyle[alg],label=alg)
-# plt.yscale('log')
-# plt.legend()
-# plt.xlabel("Iteration")
-# plt.ylabel("Norm of Gradient")
-# plt.savefig('madelon-grad.pdf')
-
-# Test) a6a Dataset
-
-pb = {
- "total": 1, # Total privacy budget for zCDP
- "min_eval4": 5e-3, # Min eigenvalue for clipping
- "grad_frac4": 0.75 # Fraction of privacy budget for gradient vs matrix
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
- "a6a_dataset", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
- losses = np.array(losses_total[alg])
- gradnorm = np.array(gradnorm_total[alg])
- loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
- loss_std, gradnorm_std = np.std(
- losses, axis=0) / np.sqrt(num_rep), np.std(
- gradnorm, axis=0) / np.sqrt(num_rep)
- print(
- str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
- str(loss_std[-1]))
- if alg in alg_plt:
- iters = len(loss_avg)
- plt.figure(15)
- plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
- plt.fill_between(
- range(iters),
- loss_avg - loss_std,
- loss_avg + loss_std,
- facecolor=facecolor[alg])
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Excess Loss")
- plt.savefig("a6a.pdf")
- plt.figure(16)
- plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
- plt.yscale("log")
- plt.legend()
- plt.xlabel("Iteration")
- plt.ylabel("Norm of Gradient")
- plt.savefig("a6a-grad.pdf")
diff --git a/research/dp_newton/run_privacy_utility b/research/dp_newton/run_privacy_utility
new file mode 100644
index 0000000..415d238
--- /dev/null
+++ b/research/dp_newton/run_privacy_utility
@@ -0,0 +1,28 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+rm -rf ./src/results
+mkdir -p ./src/results
+rm -rf ./src/datasets_directory
+mkdir -p ./src/datasets_directory
+dataset="protein_dataset" # 'a1a_dataset', 'synthetic_dataset', 'fmnist_dataset'
+privacy_budget="3.0" # epsilon in DP
+num_iteration_GD="100" # number of iterations for DP-GD
+num_iteration_NT="15" # number of iterations for damped newton
+num_iteration_our="15" # number of iterations for double noise (proposed method)
+$HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'dp_gd' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_GD
+$HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'damped_newton' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_NT --grad_frac $"0.7"
+$HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'double_noise' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_our --grad_frac $"0.7" --trace_frac $"0.1" --trace_coeff $"0.5"
+$HOME/google-code/dpoptVenv/bin/python3 ./src/print_results.py
diff --git a/research/dp_newton/src/dataset_loader.py b/research/dp_newton/src/dataset_loader.py
new file mode 100644
index 0000000..9f060ad
--- /dev/null
+++ b/research/dp_newton/src/dataset_loader.py
@@ -0,0 +1,236 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""dataset loader"""
+
+# pylint: skip-file
+# pyformat: disable
+
+import os
+import ssl
+import tarfile
+import urllib.request
+from my_logistic_regression import MyLogisticRegression
+import numpy as np
+import requests
+from sklearn import preprocessing
+import sklearn.datasets
+from sklearn.linear_model import LogisticRegression
+import torch
+from torchvision import datasets, transforms
+
+
+PATH_PREFIX = './src/datasets_directory'
+ssl._create_default_https_context = ssl._create_unverified_context
+
+
+def normalize_fvec(x_train):
+ """normalize feature vectors"""
+ feature_mean = np.mean(x_train, axis=0)
+ feature_std = np.std(x_train, axis=0)
+ x_train = (x_train - feature_mean) / feature_std
+ return x_train
+
+
+def backtracking_ls(lrp, dir_srch, w_start, alpha=0.4, beta=0.95):
+ """Implementation of backtracking line search
+
+ lr = logistic regression
+ dir = the "noisy" gradient direction
+ w_start = current point
+ alpha and beta tradeoff the precision and complexity of the linesearch
+
+ output is an (close to) optimal stepsize
+ """
+ step_size = 100
+ val_0 = lrp.loss(w_start)
+ inner_prod = np.dot(dir_srch, lrp.grad(w_start))
+ while (
+ lrp.loss(w_start - step_size * dir_srch)
+ >= val_0 - step_size * alpha * inner_prod
+ ):
+ step_size = beta * step_size
+ if step_size < 1e-6:
+ break
+ return step_size
+
+
+def newton(dataset, w_init, bias=True):
+ """Implementation of the newton method with linesearch without privacy constraints
+
+ dataset = dataset
+ w_init = initialization point
+
+ output is the model parameter
+ """
+ feature_vecs, labels = dataset
+ if bias is True:
+ feature_vecs = np.hstack(
+ (np.ones(shape=(np.shape(feature_vecs)[0], 1)), feature_vecs)
+ )
+ lrp = MyLogisticRegression(feature_vecs, labels, reg=1e-9)
+ w_cur = w_init
+ for _ in range(8):
+ hess = lrp.hess(w_cur)
+ dir_srch = np.linalg.solve(hess, lrp.grad_wor(w_cur))
+ step_size = backtracking_ls(lrp, dir_srch, w_cur)
+ w_cur = w_cur - step_size * dir_srch
+ if lrp.loss_wor(w_cur) < lrp.loss_wor(w_init):
+ w_out = w_cur
+ else:
+ w_out = w_init
+ return w_out
+
+
+class Mydatasets:
+ """Represents datasets we use for expriments"""
+
+ def __init__(self):
+ data_dir = PATH_PREFIX + '/data'
+ cache_dir = PATH_PREFIX + '/cache_datasets'
+ if not os.path.exists(data_dir):
+ os.mkdir(data_dir)
+ if not os.path.exists(cache_dir):
+ os.mkdir(cache_dir)
+
+ def find_optimal_classifier(self, dataset, bias=True):
+ """find the optimal weight vector for the logistic regression
+
+ for the problems with real datasets.
+
+ dataset = training dataset
+ bias = bias for the logistic model
+ """
+ inputs_vec, labels = dataset
+ reg = 1e-9
+ if bias is True:
+ model_lr = LogisticRegression(max_iter=200, C=1 / reg).fit(
+ inputs_vec, labels
+ )
+ w_opt1 = np.concatenate([model_lr.intercept_, np.squeeze(model_lr.coef_)])
+ w_opt = newton(dataset, w_opt1, bias)
+ else:
+ model_lr = LogisticRegression(
+ max_iter=200, fit_intercept=False, C=1 / reg
+ ).fit(inputs_vec, labels)
+ w_opt1 = np.squeeze(model_lr.coef_)
+ w_opt = newton(dataset, w_opt1, bias)
+ return w_opt
+
+ def fmnist_dataset(self):
+ """fmnist dataset"""
+ transform_data = transforms.Compose(
+ [transforms.ToTensor(), transforms.Normalize((0.5), (0.5))]
+ )
+ train_data_trans = datasets.FashionMNIST(
+ root=PATH_PREFIX + '/data',
+ download=True,
+ train=True,
+ transform=transform_data,
+ )
+ train_loader = torch.utils.data.DataLoader(
+ train_data_trans, batch_size=len(train_data_trans)
+ )
+ x_train = next(iter(train_loader))[0].numpy()
+ x_train = x_train.reshape(len(x_train), -1)
+ y_train = next(iter(train_loader))[1].numpy()
+ label0 = 0
+ label1 = 3
+ indx0 = np.nonzero(y_train == label0)[0]
+ indx1 = np.nonzero(y_train == label1)[0]
+ labels = y_train.copy()
+ labels[indx0] = -1
+ labels[indx1] = 1
+ indx = np.concatenate((indx0, indx1))
+ x_train = x_train[indx]
+ labels = labels[indx]
+ dataset = x_train, labels
+ w_opt = self.find_optimal_classifier(dataset, bias=False)
+ return x_train, labels, w_opt
+
+ def a1a_dataset(self):
+ """a1a dataset"""
+ a1a_url = (
+ 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a1a.t'
+ )
+ data_path = PATH_PREFIX + '/data/a1a'
+ if not os.path.exists(data_path):
+ _ = urllib.request.urlretrieve(a1a_url, data_path)
+ data = sklearn.datasets.load_svmlight_file(data_path)
+ inputs_vec, labels = data[0], data[1]
+ inputs_vec = inputs_vec.toarray()
+ scaler = preprocessing.StandardScaler().fit(inputs_vec)
+ inputs_vec = scaler.transform(inputs_vec)
+ labels = labels.astype(float)
+ dataset = inputs_vec, labels
+ w_opt = self.find_optimal_classifier(dataset)
+ inputs_vec = np.hstack(
+ (np.ones(shape=(np.shape(inputs_vec)[0], 1)), inputs_vec)
+ )
+ return inputs_vec, labels, w_opt
+
+ def protein_dataset(self):
+ """protein dataset"""
+ path_protein = PATH_PREFIX + '/data/protein/'
+ if not os.path.exists(path_protein):
+ os.mkdir(path_protein)
+ protein_url = (
+ 'https://kdd.org/cupfiles/KDDCupData/2004/data_kddcup04.tar.gz'
+ )
+ protein_file = PATH_PREFIX + '/data/protein/data_kddcup04.tar.gz'
+ response = requests.get(protein_url, stream=True, timeout=100)
+ if response.status_code == 200:
+ with open(protein_file, 'wb') as file_data:
+ file_data.write(response.raw.read())
+ with tarfile.open(protein_file, 'r:gz') as tar:
+ tar.extractall(path_protein)
+ x_train = np.loadtxt(PATH_PREFIX + '/data/protein/bio_train.dat')[:, 3:]
+ y_train = np.loadtxt(PATH_PREFIX + '/data/protein/bio_train.dat')[:, 2]
+ indx0 = np.nonzero(y_train == 0)[0]
+ indx1 = np.nonzero(y_train == 1)[0]
+ labels = y_train.copy()
+ labels[indx0] = -1
+ labels[indx1] = 1
+ indx = np.arange(len(x_train))
+ np.random.seed(3000)
+ indx_sample = np.random.choice(indx, 50000, replace=False)
+ np.random.seed(None)
+ x_train = x_train[indx_sample]
+ labels = labels[indx_sample]
+ x_train = normalize_fvec(x_train)
+ w_opt = self.find_optimal_classifier((x_train, labels))
+ x_train = np.hstack((np.ones(shape=(np.shape(x_train)[0], 1)), x_train))
+ return x_train, labels, w_opt
+
+ def synthetic_dataset(self, num_samples=10000, dim=100):
+ """Generates a synthetic dataset for logistic regression.
+
+ n = number of samples d = dimension Features are unit vectors (by default
+ uniformly random). Labels are sampled from logistic distribution, so w is
+ the "true" solution.
+ """
+ mean = np.zeros(dim)
+ cov = np.eye(dim)
+ inputs_vec_un = np.random.multivariate_normal(mean, cov, num_samples)
+ nrm = np.linalg.norm(inputs_vec_un, axis=1)
+ inputs_vec = inputs_vec_un * 1 / nrm[:, None]
+ w_star = np.ones(dim)
+ w_star[0] = 1
+ inner_prod = np.dot(inputs_vec, w_star)
+ params = np.exp(inner_prod) / (1 + np.exp(inner_prod))
+ labels = 2 * np.random.binomial(1, params) - 1
+ dataset = inputs_vec, labels
+ w_opt = self.find_optimal_classifier(dataset, bias=False)
+ return inputs_vec, labels, w_opt
diff --git a/research/dp_newton/src/my_logistic_regression.py b/research/dp_newton/src/my_logistic_regression.py
new file mode 100644
index 0000000..4def9dd
--- /dev/null
+++ b/research/dp_newton/src/my_logistic_regression.py
@@ -0,0 +1,205 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""logistic regression class and its methods"""
+
+# pylint: skip-file
+# pyformat: disable
+
+import numpy as np
+
+
+class MyLogisticRegression:
+ """return a logistic regression problem
+
+ There is a dataset consisting of features (vectors of norm <=1)
+ and labels (+1,-1), represented as a numpy array.
+ There is also an L2 regularizer.
+ """
+
+ def __init__(self, input_vecs, labels, reg=1e-8):
+ """Initialize the data and the regularizer.
+
+ X = n x d numpy array representing features
+ y = n x 1 numpy array representing labels
+ reg = L2 regularizing coefficient (to ensure solution is finite)
+
+ Data will be rescaled so that ||X[i,:]|| * |y[i]| <= 1 for all i.
+ """
+ self.reg = float(reg)
+ input_vecs = np.array(input_vecs)
+ labels = np.array(labels)
+ assert len(input_vecs.shape) == 2
+ assert len(labels.shape) == 1
+ self.input_vecs = input_vecs
+ self.labels = labels
+ self.num_samples, self.dim = input_vecs.shape
+ assert labels.shape[0] == self.num_samples
+ signed_data = input_vecs * labels[:, np.newaxis]
+ norm = np.linalg.norm(signed_data, axis=1)
+ scale = np.maximum(norm, np.ones_like(norm))
+ self.data = (1 / scale[:, None]) * signed_data
+
+ def loss(self, param):
+ """Computes the loss represented by this object at w.
+
+ If X,y is the data and reg is the regularizer, then the loss is (1/n)sum_i^n
+ log(1+exp(-)) + (reg/2)||w||^2
+ """
+ data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))
+ reg_loss = 0.5 * self.reg * np.linalg.norm(param) ** 2
+ return data_loss + reg_loss
+
+ def loss_wor(self, param):
+ """Computes the loss represented by this object at w without regularizer.
+
+ If X,y is the data and reg is the regularizer, then the loss is
+ (1/n)sum_i^n log(1+exp(-))
+ """
+ data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))
+ return data_loss
+
+ def accuracy(self, param):
+ """ " computes the accuracy of the model gievn by w"""
+ score_pred = np.dot(self.input_vecs, param)
+ label1_prob = np.where(
+ score_pred >= 0,
+ 1 / (1 + np.exp(-score_pred)),
+ np.exp(score_pred) / (1 + np.exp(score_pred)),
+ )
+ return np.mean(np.where(label1_prob >= 0.5, 1, -1) == self.labels)
+
+ def grad(self, param, batch_idx=None):
+ """Computes the gradient of the logistic regression at a given point w.
+
+ If X,y is the data and reg is the regularizer, then the gradient is
+ (-1/n)sum_i^n X[i,:]*y[i]/(1+exp()) + reg*w
+ """
+ if batch_idx is not None:
+ data_batch = self.data[batch_idx]
+ else:
+ data_batch = self.data
+
+ coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))
+ data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)
+ return data_grad + self.reg * param
+
+ def grad_wor(self, param, batch_idx=None):
+ """Computes the gradient of the logistic regression at a given point w.
+
+ If X,y is the data and reg is the regularizer, then the gradient is
+ (-1/n)sum_i^n X[i,:]*y[i]/(1+exp()) + reg*w
+ """
+ if batch_idx is not None:
+ data_batch = self.data[batch_idx]
+ else:
+ data_batch = self.data
+
+ coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))
+ data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)
+ return data_grad
+
+ def hess(self, param, batch_idx=None):
+ """Computes the Hessian of the logistic regression at a given point w.
+
+ The Hessian is the matrix of second derivatives.
+
+ If X,y is the data and reg is the regularizer, then the Hessian is
+ (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(/2)*2)^2
+ where we assume y[i]^2==1.
+ """
+ if batch_idx is not None:
+ data_batch = self.data[batch_idx]
+ batch_size = len(batch_idx)
+ else:
+ data_batch = self.data
+ batch_size = self.num_samples
+
+ temp_var = np.dot(data_batch, param) / 2
+ coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2
+ raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)
+ return raw_hess / batch_size + self.reg * np.eye(self.dim)
+
+ def hess_wor(self, param, batch_idx=None):
+ """Computes the Hessian of the logistic regression at a given point w.
+
+ The Hessian is the matrix of second derivatives.
+
+ If X,y is the data, then the Hessian is
+ (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(/2)*2)^2
+ where we assume y[i]^2==1.
+ """
+ if batch_idx is not None:
+ data_batch = self.data[batch_idx]
+ batch_size = len(batch_idx)
+ else:
+ data_batch = self.data
+ batch_size = self.num_samples
+
+ temp_var = np.dot(data_batch, param) / 2
+ coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2
+ raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)
+ return raw_hess / batch_size
+
+ def upperbound(self, param, batch_idx=None):
+ """Tightest universal quadratic upper bound on the loss function.
+
+ log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
+ Constant and linear terms are just first-order Taylor This function gives
+ the quadratic term (which replaces the Hessian)
+ https://twitter.com/shortstein/status/1557961202256318464
+ """
+
+ if batch_idx is not None:
+ data_batch = self.data[batch_idx]
+ batch_size = len(batch_idx)
+ else:
+ data_batch = self.data
+ batch_size = self.num_samples
+
+ temp_var = -np.dot(data_batch, param) # vector of y_i for i in [n]
+ # v = 0.5*np.tanh(a/2)/a, but, avoid 0/0 by special rule
+ temp_var2 = np.divide(
+ 0.5 * np.tanh(temp_var / 2),
+ temp_var,
+ out=np.ones(temp_var.shape) * 0.25,
+ where=np.abs(temp_var) > 1e-9,
+ )
+ hess_non = np.dot(data_batch.T * temp_var2, data_batch)
+ return hess_non / batch_size + self.reg * np.eye(self.dim)
+
+ def upperbound_wor(self, param, batch_idx=None):
+ """Tightest universal quadratic upper bound on the loss function.
+
+ log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
+ Constant and linear terms are just first-order Taylor This function gives
+ the quadratic term (which replaces the Hessian)
+ """
+ if batch_idx is not None:
+ data_batch = self.data[batch_idx]
+ batch_size = len(batch_idx)
+ else:
+ data_batch = self.data
+ batch_size = self.num_samples
+
+ temp_var = -np.dot(data_batch, param) # vector of y_i for i in [n]
+ temp_var2 = np.divide(
+ 0.5 * np.tanh(temp_var / 2),
+ temp_var,
+ out=np.ones(temp_var.shape) * 0.25,
+ where=np.abs(temp_var) > 1e-9,
+ )
+ hess_non = np.dot(data_batch.T * temp_var2, data_batch)
+ return hess_non / batch_size
diff --git a/research/dp_newton/src/opt_algs.py b/research/dp_newton/src/opt_algs.py
new file mode 100644
index 0000000..29cb74d
--- /dev/null
+++ b/research/dp_newton/src/opt_algs.py
@@ -0,0 +1,434 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""file containing all auxillary functions for running the optimization algorithms"""
+
+# pylint: skip-file
+# pyformat: disable
+
+import time
+from my_logistic_regression import MyLogisticRegression
+import numpy as np
+
+
+class CompareAlgs:
+ """Class to run multiple iterative algorithms and compare the results."""
+
+ def __init__(self, lrp, optimal_w, hyper_dict):
+ """Initialize the problem.
+
+ lr = an instance of MyLogisticRegression
+ dataset = dataset in the format of (features,label)
+ optimal_w = optimal minimizer of logistic loss on dataset without privacy
+ pb = hyperparameters
+ """
+ self.w_opt = optimal_w
+ self.lrp = lrp
+ self.iters = hyper_dict["num_iteration"]
+ self.hyper_params = hyper_dict
+ self.clock_time = []
+ self.params = []
+ self.names = []
+
+ def add_algo(self, update_rule, name):
+ """Run an iterative update method"""
+ _, dim = self.lrp.num_samples, self.lrp.dim
+ wint_un = np.random.multivariate_normal(np.zeros(dim), np.eye(dim))
+ w_int = wint_un / np.linalg.norm(wint_un)
+ cutoff_norm = (
+ 100 * np.linalg.norm(self.w_opt) + 100 * np.linalg.norm(w_int) + 100
+ )
+ w_cur = w_int
+ params = [w_cur]
+ start_t = time.time()
+ wall_clock = [0]
+ for _ in range(self.iters):
+ w_cur = update_rule(w_cur, self.lrp, self.hyper_params)
+ if np.linalg.norm(w_cur) > cutoff_norm:
+ w_cur = w_int
+ print("Stop Things Exploding!")
+ params.append(w_cur)
+ wall_clock.append(time.time() - start_t)
+ self.clock_time.append(wall_clock)
+ self.params.append(params)
+ self.names.append(name)
+
+ def wall_clock_alg(self):
+ """compute the wall clock of different algorithms"""
+ clock_time_dict = {}
+ for time_alg, name in zip(self.clock_time, self.names):
+ clock_time_dict[name] = [time_alg]
+ return clock_time_dict
+
+ def loss_vals(self):
+ """output the loss per iteration for different methods"""
+ baseline = self.lrp.loss_wor(self.w_opt)
+ loss_dict = {}
+ for params, name in zip(self.params, self.names):
+ losses = [self.lrp.loss_wor(w) - baseline for w in params]
+ loss_dict[name] = [losses]
+ return loss_dict
+
+ def accuracy_vals(self):
+ """output the accuracy per iteration for different methods"""
+ acc_dict = {}
+ for params, name in zip(self.params, self.names):
+ acc_vec = [self.lrp.accuracy(w) for w in params]
+ acc_dict[name] = [acc_vec]
+ return acc_dict
+
+ def accuracy_np(self):
+ """output the accuracy of the optimal model without privacy"""
+ return self.lrp.accuracy(self.w_opt)
+
+ def gradnorm_vals(self):
+ """output the gradient norm per iteration for different methods"""
+ gradnorm_dict = {}
+ for params, name in zip(self.params, self.names):
+ grad_norms = [np.linalg.norm(self.lrp.grad_wor(w)) for w in params]
+ gradnorm_dict[name] = [grad_norms]
+ return gradnorm_dict
+
+
+def private_newton(w_cur, lrp, hyper_dict):
+ """implementation of private newton method from [ABL21]
+
+ w = current iterate
+ lr = an instance of MyLogisticRegression
+ i = the index of current iterate
+ iters = total number of iterations
+ pb = privacy budget and other info
+ return the next iterate
+ """
+ total = hyper_dict["total"]
+ grad_frac = hyper_dict["grad_frac"]
+ iters = hyper_dict["num_iteration"]
+ hess = lrp.hess(w_cur)
+ rho_grad = grad_frac * total / iters # divide total privacy budget up.
+ rho_hess = (1 - grad_frac) * total / iters
+ hess_noise = np.random.normal(
+ scale=(0.25 / lrp.num_samples) * np.sqrt(0.5 / rho_hess),
+ size=(lrp.dim, lrp.dim),
+ )
+ hess_noise = (hess_noise + hess_noise.T) / 2
+ hess_noisy = eigenclip(hess + hess_noise)
+ grad = lrp.grad(w_cur)
+ grad_noisy = grad + np.random.normal(
+ scale=(1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad), size=lrp.dim
+ )
+ dir_noisy = np.linalg.solve(hess_noisy, grad_noisy)
+ dir_size = np.linalg.norm(np.linalg.solve(hess, grad))
+ return w_cur - min(np.log(1 + dir_size) * (1 / dir_size), 1) * dir_noisy
+
+
+def eigenclip(sym_mat, min_eval=1e-5):
+ """operation of the eigenclip
+
+ A = symmetric matrix
+ min_eval = minimum eigenvalue for clipping
+
+ return the modified matrix
+ """
+ eig_val, eig_vec = np.linalg.eigh(sym_mat)
+ eval_mod = np.maximum(eig_val, min_eval * np.ones(eig_val.shape))
+ clipped_mat = np.dot(eig_vec * eval_mod, eig_vec.T)
+ return clipped_mat
+
+
+def gd_priv(w_cur, lrp, hyper_dict):
+ """Implementation of DP-GD.
+
+ w = current point
+ lr = logistic regression
+ i = iteration number
+ pb = auxillary information
+
+ output is the next iterate
+ """
+ iters = hyper_dict["num_iteration"]
+ inv_lr_gd = 0.25 # learning rate based on the smoothness
+ sens = 1 / (lrp.num_samples * (inv_lr_gd)) # sensitivity
+ rho = hyper_dict["total"] / iters # divide total privacy budget up
+ noise = np.random.normal(scale=sens / np.sqrt(2 * rho), size=lrp.dim)
+ return w_cur - lrp.grad_wor(w_cur) / (inv_lr_gd) + noise
+
+
+def sgd_priv(w_cur, lrp, hyper_dict):
+ """Implementation of DP-SGD.
+
+ w = current point
+ lr = logistic regression
+ i = iteration number
+ pb = auxillary information
+
+ output is the next iterate
+ """
+ batch_size = hyper_dict["batch_size"]
+ sigma_privacy = hyper_dict["noise_multiplier"]
+ lr_sgd = 4 # learning rate based on the smoothness
+ sample_rate = batch_size / lrp.num_samples # sampling probability
+ sample_vec = np.random.binomial(n=1, p=sample_rate, size=lrp.num_samples)
+ batch_idx = np.where(sample_vec == 1)[0] # index of batch
+ batch_size_t = len(batch_idx)
+ noise = np.random.normal(scale=sigma_privacy, size=lrp.dim)
+ grad_minibatch = lrp.grad_wor(
+ w_cur, batch_idx
+ ) # average gradient over batch_idx
+ return w_cur - lr_sgd * (
+ batch_size_t / batch_size * grad_minibatch + noise / batch_size
+ )
+
+
+def gd_priv_optls(w_cur, lrp, hyper_dict):
+ """Implementation of DP-GD with back-tracking line search !!!
+
+ this method is not private. We only use it as a baseline.
+
+ w = current point
+ lr = logistic regression
+ i = iteration number
+ pb = auxillary information
+
+ output is the next iterate
+ """
+ iters = hyper_dict["num_iteration"]
+ rho_grad = hyper_dict["total"] / iters # divide total privacy budget up
+ grad_scale = (1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad)
+ grad_noise = np.random.normal(scale=grad_scale, size=lrp.dim)
+ dir_srch = lrp.grad(w_cur) + grad_noise
+ stepsize_opt = backtracking_ls(lrp, dir_srch, w_cur)
+ return w_cur - stepsize_opt * dir_srch
+
+
+def backtracking_ls(lrp, dir_srch, w_start, alpha=0.4, beta=0.95):
+ """Implementation of backtracking line search
+
+ lr = logistic regression
+ dir = the "noisy" gradient direction
+ w_start = current point
+ alpha and beta tradeoff the precision and complexity of the linesearch
+
+ output is an (close to) optimal stepsize
+ """
+ step_size = 100
+ val_0 = lrp.loss(w_start)
+ inner_prod = np.dot(dir_srch, lrp.grad(w_start))
+ while (
+ lrp.loss(w_start - step_size * dir_srch)
+ >= val_0 - step_size * alpha * inner_prod
+ ):
+ step_size = beta * step_size
+ if step_size < 1e-6:
+ break
+ return step_size
+
+
+def newton(dataset, w_init, bias=True):
+ """Implementation of the newton method with linesearch without privacy constraints
+
+ dataset = dataset
+ w_init = initialization point
+
+ output is the model parameter
+ """
+ feature_vecs, labels = dataset
+ if bias is True:
+ feature_vecs = np.hstack(
+ (np.ones(shape=(np.shape(feature_vecs)[0], 1)), feature_vecs)
+ )
+ lrp = MyLogisticRegression(feature_vecs, labels, reg=1e-9)
+ w_cur = w_init
+ for _ in range(8):
+ hess = lrp.hess(w_cur)
+ dir_srch = np.linalg.solve(hess, lrp.grad_wor(w_cur))
+ step_size = backtracking_ls(lrp, dir_srch, w_cur)
+ w_cur = w_cur - step_size * dir
+ if lrp.loss_wor(w_cur) < lrp.loss_wor(w_init):
+ w_out = w_cur
+ else:
+ w_out = w_init
+ return w_out
+
+
+class DoubleNoiseMech:
+ """Our Method: Double Noise Mechanism"""
+
+ def __init__(self, lrp, type_reg="add", curvature_info="hessian"):
+ """Initializer of the double noise mechanism
+
+ lr = an instance of MyLogisticRegression
+ type_reg = minimum eigenvalue modification type, it can be either 'add' or
+ 'clip'
+ curvature_info = type of the second-order information
+ """
+ self.type_reg = type_reg
+ self.curvature_info = curvature_info
+ if self.curvature_info == "hessian":
+ self.hess = lrp.hess_wor
+ elif self.curvature_info == "ub":
+ self.hess = lrp.upperbound_wor
+
+ def update_rule(self, w_cur, lrp, hyper_dict):
+ """Implementation of the double noise mechanism update rule--full batch"""
+ noisy_grad_cur = self.noisy_grad(w_cur, lrp, hyper_dict)
+ w_next = self.noisy_direction(w_cur, lrp, hyper_dict, noisy_grad_cur)
+ return w_next
+
+ def update_rule_stochastic(self, w_cur, lrp, hyper_dict):
+ """Implementation of the double noise mechanism update rule--full batch"""
+ noisy_grad_cur = self.noisy_grad(w_cur, lrp, hyper_dict, True)
+ w_next = self.noisy_direction_stochastic(
+ w_cur, lrp, hyper_dict, noisy_grad_cur
+ )
+ return w_next
+
+ def noisy_grad(self, w_cur, lrp, hyper_dict, batch=False):
+ """computing gradient"""
+ if batch is False:
+ rho_grad = (hyper_dict["grad_frac"] * hyper_dict["total"]) / hyper_dict[
+ "num_iteration"
+ ]
+ noise_grad = np.random.normal(
+ scale=(1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad), size=lrp.dim
+ )
+ noisy_grad = lrp.grad(w_cur) + noise_grad
+ else:
+ std_grad = hyper_dict["noise_multiplier_grad"]
+ pgrad = hyper_dict["batchsize_grad"] / lrp.num_samples
+ sample_vec = np.random.binomial(n=1, p=pgrad, size=lrp.num_samples)
+ batch_idx_grad = np.where(sample_vec == 1)[0]
+ grad_minibatch = lrp.grad_wor(w_cur, batch_idx_grad)
+ noise_grad = np.random.normal(scale=std_grad, size=lrp.dim)
+ noisy_grad = (
+ len(batch_idx_grad) / (lrp.num_samples * pgrad)
+ ) * grad_minibatch + (noise_grad) / (lrp.num_samples * pgrad)
+ return noisy_grad
+
+ def noisy_direction(self, w_cur, lrp, hyper_dict, noisy_grad):
+ """computing direction"""
+ total = hyper_dict["total"]
+ grad_frac = hyper_dict["grad_frac"]
+ frac_trace = hyper_dict["trace_frac"]
+ trace_coeff = hyper_dict["trace_coeff"]
+ iters = hyper_dict["num_iteration"]
+ rho_hess = (1 - grad_frac) * total / iters
+ smooth_param = 0.25
+ hess_cur = self.hess(w_cur)
+ noisy_trace = trace_coeff * max(
+ np.trace(hess_cur)
+ + np.random.normal(
+ scale=(0.25 / lrp.num_samples)
+ * np.sqrt(0.5 / (frac_trace * rho_hess))
+ ),
+ 0,
+ )
+ min_eval = max(
+ (noisy_trace / ((lrp.num_samples) ** 2 * (1 - frac_trace) * rho_hess))
+ ** (1 / 3),
+ 1 / (lrp.num_samples),
+ )
+ grad_norm = np.linalg.norm(noisy_grad)
+ if self.type_reg == "add": # Sensitivity is different for add vs clip
+ sens2 = (
+ grad_norm
+ * smooth_param
+ / (lrp.num_samples * min_eval**2 + smooth_param * min_eval)
+ )
+ noise2 = np.random.normal(
+ scale=sens2 * np.sqrt(0.5 / ((1 - frac_trace) * rho_hess)),
+ size=lrp.dim,
+ )
+ return (
+ w_cur
+ - np.linalg.solve(hess_cur + min_eval * np.eye(lrp.dim), noisy_grad)
+ + noise2
+ )
+ # type_reg=clip
+ sens2 = (
+ grad_norm
+ * smooth_param
+ / (lrp.num_samples * min_eval**2 - smooth_param * min_eval)
+ )
+ noise2 = np.random.normal(
+ scale=sens2 * np.sqrt(0.5 / ((1 - frac_trace) * rho_hess)), size=lrp.dim
+ )
+ eval_hess, evec_hess = np.linalg.eigh(hess_cur)
+ eval_trunc = eval_hess[eval_hess >= min_eval]
+ num_eig = len(eval_trunc)
+ if num_eig == 0:
+ hess_modified_inv = 1 / min_eval * np.eye(lrp.dim)
+ else:
+ evec_trun = evec_hess[:, -num_eig:]
+ hess_modified_inv = np.dot(
+ evec_trun * (1 / eval_trunc - 1 / min_eval * np.ones(num_eig)),
+ evec_trun.T,
+ ) + 1 / min_eval * np.eye(lrp.dim)
+ return w_cur - (hess_modified_inv @ noisy_grad) + noise2
+
+ def noisy_direction_stochastic(self, w_cur, lrp, hyper_dict, noisy_grad):
+ """noisy direction for stochastic variant"""
+ std_hess = hyper_dict["noise_multiplier_hess"]
+ phess = hyper_dict["batchsize_hess"] / lrp.num_samples
+ min_eval = hyper_dict["min_eval"]
+ sample_vec = np.random.binomial(n=1, p=phess, size=lrp.num_samples)
+ batch_idx_hess = np.where(sample_vec == 1)[0]
+ batch_size_hess_t = len(batch_idx_hess)
+ hess_cur = (
+ (batch_size_hess_t)
+ / (lrp.num_samples * phess)
+ * self.hess(w_cur, batch_idx_hess)
+ )
+ smooth_param = 0.25 # smoothness parameter
+ grad_norm = np.linalg.norm(noisy_grad)
+ if self.type_reg == "add": # Sensitivity is different for add vs clip
+ sens2 = (
+ grad_norm
+ * smooth_param
+ / (
+ (lrp.num_samples * phess) * min_eval**2
+ + smooth_param * min_eval
+ )
+ )
+ noise2 = np.random.normal(scale=sens2 * std_hess, size=lrp.dim)
+ return (
+ w_cur
+ - np.linalg.solve(
+ hess_cur + min_eval * np.eye(len(hess_cur)), noisy_grad
+ )
+ + noise2
+ )
+ # type_reg=clip
+ min_eval_c = max(min_eval, 1 / ((lrp.num_samples * phess)))
+ sens2 = (
+ grad_norm
+ * smooth_param
+ / (
+ (lrp.num_samples * phess) * min_eval_c**2
+ - smooth_param * min_eval_c
+ )
+ )
+ noise2 = np.random.normal(scale=sens2 * std_hess, size=lrp.dim)
+ eval_hess, evec_hess = np.linalg.eigh(hess_cur)
+ eval_trunc = eval_hess[eval_hess >= min_eval_c]
+ num_eig = len(eval_trunc)
+ if num_eig == 0:
+ hess_modified_inv = 1 / min_eval_c * np.eye(lrp.dim)
+ else:
+ evec_trun = evec_hess[:, -num_eig:]
+ hess_modified_inv = np.dot(
+ evec_trun * (1 / eval_trunc - 1 / min_eval_c * np.ones(num_eig)),
+ evec_trun.T,
+ ) + 1 / min_eval_c * np.eye(lrp.dim)
+ return w_cur - (hess_modified_inv @ noisy_grad) + noise2
diff --git a/research/dp_newton/src/print_results.py b/research/dp_newton/src/print_results.py
new file mode 100644
index 0000000..5458ad3
--- /dev/null
+++ b/research/dp_newton/src/print_results.py
@@ -0,0 +1,47 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""print the achievable error of different algorithms"""
+
+# pylint: skip-file
+# pyformat: disable
+
+import json
+import os
+import numpy as np
+
+RESULTS_PATH = './src/results/'
+excess_loss = {}
+opt_algs = [
+ 'DPGD',
+ 'DN-Hess-add',
+ 'DN-UB-add',
+ 'DN-Hess-clip',
+ 'DN-UB-clip',
+ 'private-newton',
+]
+for filename in os.listdir(RESULTS_PATH):
+ f = os.path.join(RESULTS_PATH, filename)
+ with open(f, encoding='utf-8') as json_file:
+ data = json.load(json_file)
+ for alg in data.keys():
+ if alg in opt_algs:
+ loss_avg = np.array(data[alg]['loss_avg'])
+ loss_std = np.array(data[alg]['loss_std'])
+ clock_time = np.array(data[alg]['clock_time_avg'])
+ print('optimization algorithm: ', alg)
+ print('excess loss: ' + str(loss_avg[-1]))
+ print('run time: ' + str(clock_time[-1]) + '(sec)')
+ print('-----')
diff --git a/research/dp_newton/src/run.py b/research/dp_newton/src/run.py
new file mode 100644
index 0000000..5d49910
--- /dev/null
+++ b/research/dp_newton/src/run.py
@@ -0,0 +1,264 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""collections of helper function to run and compare different algorithms"""
+
+# pylint: skip-file
+# pyformat: disable
+
+import argparse
+import json
+from dataset_loader import Mydatasets
+from my_logistic_regression import MyLogisticRegression
+import numpy as np
+from opt_algs import CompareAlgs, DoubleNoiseMech, gd_priv, private_newton
+from scipy.optimize import fsolve
+
+
+def zcdp_to_eps(rho, delta):
+ """ "
+
+ conversion of zcdp gurantee to (eps,delta)-DP using the formula in Lemma 3.6
+ of [BS16]
+ rho : zCDP
+ delta: delta in DP
+
+ return eps
+ """
+ return rho + np.sqrt(4 * rho * np.log(np.sqrt(np.pi * rho) / delta))
+
+
+def eps_to_zcdp(eps, delta):
+ """ "
+
+ conversion of (eps,delta) gurantee to rho-zCDP
+ eps : eps in DP
+ delta: delta in DP
+
+ return rho
+ """
+
+ def func_root(rho_zcdp):
+ return zcdp_to_eps(rho_zcdp, delta) - eps
+
+ root = fsolve(func_root, x0=0.001)[-1]
+ return root
+
+
+def helper_fun(datasetname, alg_type, params_exp):
+ """helper function for running different algorithms
+
+ args:
+ datasetname = dataset
+ alg_type = type of the optimization algorithm
+ params_exp = hyperparameters
+ """
+ feature_vecs, labels, w_opt = getattr(Mydatasets(), datasetname)()
+ privacy_dp = params_exp["total"]
+ params_exp["total"] = eps_to_zcdp(privacy_dp, (1.0 / len(labels)) ** 2)
+ log_reg = MyLogisticRegression(feature_vecs, labels)
+ alg_dict, filename_params = prepare_alg_dict(
+ alg_type, datasetname, privacy_dp, params_exp, log_reg
+ )
+ compare_algs = CompareAlgs(log_reg, w_opt, params_exp)
+ result = RunReleaseStats(compare_algs, alg_dict).summarize_stats()
+ result["num-samples"] = len(labels)
+ with open(
+ "src/results/" + filename_params, "w", encoding="utf8"
+ ) as json_file:
+ json.dump(result, json_file)
+
+
+def prepare_alg_dict(alg_type, datasetname, privacy_dp, params_exp, log_reg):
+ """prepare update rule for algorithms and filename"""
+ alg_dict = None
+ filename_params = None
+ if alg_type == "double_noise":
+ filename_params = (
+ "so_"
+ + datasetname
+ + "_"
+ + str(privacy_dp)
+ + "_"
+ + "DP"
+ + "_"
+ + str(params_exp["num_iteration"])
+ + "_"
+ + str(params_exp["grad_frac"])
+ + "_"
+ + str(params_exp["trace_frac"])
+ + "_"
+ + str(params_exp["trace_coeff"])
+ + ".txt"
+ )
+ dnm_hess_add = DoubleNoiseMech(
+ log_reg, type_reg="add", curvature_info="hessian"
+ ).update_rule
+ dnm_ub_add = DoubleNoiseMech(
+ log_reg, type_reg="add", curvature_info="ub"
+ ).update_rule
+ dnm_hess_clip = DoubleNoiseMech(
+ log_reg, type_reg="clip", curvature_info="hessian"
+ ).update_rule
+ dnm_ub_clip = DoubleNoiseMech(
+ log_reg, type_reg="clip", curvature_info="ub"
+ ).update_rule
+ alg_dict = {
+ "DN-Hess-add": dnm_hess_add,
+ "DN-Hess-clip": dnm_hess_clip,
+ "DN-UB-clip": dnm_ub_clip,
+ "DN-UB-add": dnm_ub_add,
+ }
+ elif alg_type == "dp_gd":
+ filename_params = (
+ "gd_"
+ + datasetname
+ + "_"
+ + str(privacy_dp)
+ + "_"
+ + "DP"
+ + "_"
+ + str(params_exp["num_iteration"])
+ + ".txt"
+ )
+ alg_dict = {"DPGD": gd_priv}
+ elif alg_type == "damped_newton":
+ filename_params = (
+ "newton_"
+ + datasetname
+ + "_"
+ + str(privacy_dp)
+ + "_"
+ + "DP"
+ + "_"
+ + str(params_exp["num_iteration"])
+ + ".txt"
+ )
+ alg_dict = {"private-newton": private_newton}
+ return alg_dict, filename_params
+
+
+class RunReleaseStats:
+ """Helpfer function to run different algorithms and store the results"""
+
+ def __init__(self, compare_algs, algs_dict, num_rep=10):
+ self.compare_algs = compare_algs
+ self.algs_dict = algs_dict
+ self.num_rep = num_rep
+ self.losses = 0
+ self.gradnorm = 0
+ self.accuracy = 0
+ self.wall_clock = 0
+
+ def run_algs(self):
+ """method to run different algorithms and store different stats"""
+ for rep in range(self.num_rep):
+ for alg_name, alg_update_rule in self.algs_dict.items():
+ self.compare_algs.add_algo(alg_update_rule, alg_name)
+ losses_dict = self.compare_algs.loss_vals()
+ gradnorm_dict = self.compare_algs.gradnorm_vals()
+ accuracy_dict = self.compare_algs.accuracy_vals()
+ wall_clock_dict = self.compare_algs.wall_clock_alg()
+ if rep == 0:
+ self.losses = losses_dict
+ self.gradnorm = gradnorm_dict
+ self.accuracy = accuracy_dict
+ self.wall_clock = wall_clock_dict
+ else:
+ for alg in self.losses:
+ self.losses[alg].extend(losses_dict[alg])
+ self.gradnorm[alg].extend(gradnorm_dict[alg])
+ self.accuracy[alg].extend(accuracy_dict[alg])
+ self.wall_clock[alg].extend(wall_clock_dict[alg])
+
+ def summarize_stats(self):
+ """method to summarize the results"""
+ self.run_algs()
+ result = {}
+ result["acc-best"] = self.compare_algs.accuracy_np().tolist()
+ for alg in self.losses:
+ result[alg] = {}
+ loss_avg = np.mean(np.array(self.losses[alg]), axis=0)
+ loss_std = np.std(np.array(self.losses[alg]), axis=0)
+ result[alg]["loss_avg"] = (loss_avg).tolist()
+ result[alg]["loss_std"] = (loss_std / np.sqrt(self.num_rep)).tolist()
+ gradnorm_avg = np.mean(np.array(self.gradnorm[alg]), axis=0)
+ gradnorm_std = np.std(np.array(self.gradnorm[alg]), axis=0)
+ result[alg]["gradnorm_avg"] = (gradnorm_avg).tolist()
+ result[alg]["gradnorm_std"] = (gradnorm_std).tolist()
+ acc_avg = np.mean(np.array(self.accuracy[alg]), axis=0)
+ acc_std = np.std(np.array(self.accuracy[alg]), axis=0)
+ result[alg]["acc_avg"] = (acc_avg).tolist()
+ result[alg]["acc_std"] = (acc_std / np.sqrt(self.num_rep)).tolist()
+ clocktime_avg = np.mean(np.array(self.wall_clock[alg]), axis=0)
+ clocktime_std = np.std(np.array(self.wall_clock[alg]), axis=0)
+ result[alg]["clock_time_avg"] = (clocktime_avg).tolist()
+ result[alg]["clock_time_std"] = (
+ clocktime_std / np.sqrt(self.num_rep)
+ ).tolist()
+
+ return result
+
+
+def main():
+ """main function"""
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--datasetname")
+ parser.add_argument("--alg_type")
+ parser.add_argument("--total")
+ parser.add_argument("--numiter")
+ # double noise and newton
+ parser.add_argument("--grad_frac")
+ parser.add_argument("--trace_frac")
+ parser.add_argument("--trace_coeff")
+ args = parser.parse_args()
+ datasetname = args.datasetname
+ alg_type = args.alg_type
+ total = float(args.total)
+ num_iter = int(args.numiter)
+ if alg_type == "double_noise":
+ grad_frac = float(args.grad_frac)
+ trace_frac = float(args.trace_frac)
+ trace_coeff = float(args.trace_coeff)
+ hyper_parameters = {
+ "total": total,
+ "grad_frac": grad_frac,
+ "trace_frac": trace_frac,
+ "trace_coeff": trace_coeff,
+ "num_iteration": num_iter,
+ }
+ elif alg_type == "dp_gd":
+ hyper_parameters = {"total": total, "num_iteration": num_iter}
+ elif alg_type == "damped_newton":
+ grad_frac = float(args.grad_frac)
+ hyper_parameters = {
+ "total": total,
+ "num_iteration": num_iter,
+ "grad_frac": grad_frac,
+ }
+ else:
+ raise ValueError("no such optmization algorithm exists")
+ print(
+ "optimization algorithm "
+ + alg_type
+ + ","
+ + "dataset name: "
+ + datasetname
+ )
+ helper_fun(datasetname, alg_type, hyper_parameters)
+
+
+if __name__ == "__main__":
+ main()