diff --git a/research/dp_newton/README.md b/research/dp_newton/README.md
new file mode 100644
index 0000000..8239933
--- /dev/null
+++ b/research/dp_newton/README.md
@@ -0,0 +1,32 @@
+# Project Title
+
+Faster Differentially Private Convex Optimization via Second-Order Methods
+https://arxiv.org/abs/2112.03570 <br>
+by Arun Ganesh, Mahdi Haghifam, Thomas Steinke, Abhradeep Thakurta.
+
+## Description
+
+Implementation of the optimizatoin algorithms proposed in
+https://arxiv.org/abs/2112.03570 <br>
+
+## Getting Started
+
+You will need to install fairly standard dependencies
+
+run 'run_privacy_utility' to compare the convergence speed and excess loss of
+different algorithms.
+
+### Citation
+
+You can cite this paper with
+
+```
+@article{ganesh2023faster,
+  title={Faster Differentially Private Convex Optimization
+    via Second-Order Methods},
+  author={Ganesh, Arun and Haghifam, Mahdi and Steinke, Thomas
+    and Thakurta, Abhradeep},
+  journal={arXiv preprint arXiv:2305.13209},
+  year={2023}
+}
+```
diff --git a/research/dp_newton/dpso-logistic.py b/research/dp_newton/dpso-logistic.py
deleted file mode 100644
index 3e5ac43..0000000
--- a/research/dp_newton/dpso-logistic.py
+++ /dev/null
@@ -1,1238 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-# -*- coding: utf-8 -*-
-"""Differentially Private Second-Order Methods for Logistic Regression.
-
-This script implements several algorithms for DP logistic regression and
-tests them on various datasets. It produces plots for our upcoming paper.
-
-Code exported from Colab. Written by Mahdi Haghifam.
-
-"""
-
-# pylint: disable=invalid-name
-#        We use upper case to denote matrices and lower case for vectors.
-#        This conflicts with pylint's variable naming rules.
-# pylint: disable=redefined-outer-name
-#        This is a script hence we have "global" variables.
-# pylint: disable=unused-argument
-#        The update rule functions are meant to have the same signature,
-#        so cannot just remove arguments. Ideally this should have been
-#        implemented as a class, but much easier to define a function.
-
-import math
-import urllib.request
-
-# from keras.utils.np_utils import to_categorical
-import matplotlib.pyplot as plt
-import numpy as np
-from sklearn import preprocessing
-import sklearn.datasets
-from sklearn.linear_model import LogisticRegression
-import tensorflow as tf
-# from tensorflow import keras
-
-
-class MyLogisticRegression:
-  """Represents a logistic regression problem.
-
-  There is a dataset consisting of features (vectors of norm <=1)
-  and labels (+1,-1), represented as a numpy array.
-  There is also an L2 regularizer.
-  """
-
-  def __init__(self, X, y, reg=1e-8):
-    """Initialize the data and the regularizer.
-
-    Args:
-      X: n x d numpy array representing features
-      y: n x 1 numpy array representing labels
-      reg: L2 regularizing coefficient (to ensure solution is finite)
-
-    Data will be rescaled so that ||X[i,:]|| * |y[i]| <= 1 for all i.
-    """
-    self.reg = float(reg)
-    X = np.array(X)
-    y = np.array(y)
-    assert len(X.shape) == 2
-    assert len(y.shape) == 1
-    self.n, self.d = X.shape
-    assert y.shape[0] == self.n
-    signed_data = X * y[:, np.newaxis]
-    norm = np.linalg.norm(signed_data, axis=1)
-    scale = np.maximum(norm, np.ones_like(norm))
-    self.data = (1 / scale[:, None]) * signed_data
-
-  def loss(self, w):
-    """Computes the loss represented by this object at w.
-
-    Args:
-      w: weight vector
-
-    Returns:
-      If X,y is the data and reg is the regularizer, then the loss is
-      (1/n)sum_i^n log(1+exp(-<w,X[i,:]*y[i]>)) + (reg/2)||w||^2
-    """
-    data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, w))))
-    reg_loss = 0.5 * self.reg * np.linalg.norm(w)**2
-    return data_loss + reg_loss
-
-  def loss_wor(self, w):
-    """Computes the loss represented by this object at w without regularizer.
-
-    Args:
-      w: weight vector
-
-    Returns:
-      If X,y is the data and reg is the regularizer, then the loss is
-      (1/n)sum_i^n log(1+exp(-<w,X[i,:]*y[i]>))
-    """
-    data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, w))))
-    return data_loss
-
-  def grad(self, w):
-    """Computes the gradient of the logistic regression at a given point w.
-
-    Args:
-      w: weight vector
-
-    Returns:
-      If X,y is the data and reg is the regularizer, then the gradient is
-      (-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
-    """
-    coeff_grad = -1/(1+np.exp(np.dot(self.data, w)))
-    data_grad = np.mean(self.data * coeff_grad[:, np.newaxis], axis=0)
-    return data_grad + self.reg * w
-
-  def grad_wor(self, w):
-    """Computes the gradient of the logistic regression at a given point w.
-
-    Args:
-      w: weight vector
-
-    Returns:
-      If X,y is the data and reg is the regularizer, then the gradient is
-      (-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
-    """
-    coeff_grad = -1/(1+np.exp(np.dot(self.data, w)))
-    data_grad = np.mean(self.data * coeff_grad[:, np.newaxis], axis=0)
-    return data_grad
-
-  def hess(self, w):
-    """Computes the Hessian of the logistic regression at a given point w.
-
-    Args:
-      w: weight vector
-
-    Returns:
-      The Hessian is the matrix of second derivatives.
-      If X,y is the data and reg is the regularizer, then the Hessian is
-      (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
-      where we assume y[i]^2==1.
-    """
-    a = np.dot(self.data, w)/2
-    coeff_hess = 1 / (np.exp(a)+np.exp(-a))**2
-    raw_hess = np.dot(self.data.T * coeff_hess, self.data)
-    return raw_hess/self.n + self.reg * np.eye(self.d)
-
-  def hess_wor(self, w):
-    """Computes the Hessian of the logistic regression at a given point w.
-
-    Args:
-      w: weight vector
-
-    Returns:
-      The Hessian is the matrix of second derivatives.
-      If X,y is the data, then the Hessian is
-      (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
-      where we assume y[i]^2==1.
-    """
-    a = np.dot(self.data, w)/2
-    coeff_hess = 1 / (np.exp(a)+np.exp(-a))**2
-    raw_hess = np.dot(self.data.T * coeff_hess, self.data)
-    return raw_hess/self.n
-
-  def upperbound(self, w):
-    """Computes tightest universal quadratic upper bound on the loss function.
-
-    log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
-    Constant and linear terms are just first-order Taylor
-    This function gives the quadratic term (which replaces the Hessian)
-    https://twitter.com/shortstein/status/1557961202256318464
-
-    Args:
-      w: weight vector
-
-    Returns:
-      Matrix H such that for all v
-      loss(v) <= loss(w)+<grad(w),w-v> + <H(w-v),w-v>/2
-    """
-    a = -np.dot(self.data, w)  # vector of y_i<x_i,w> for i in [n]
-    # v = 0.5*np.tanh(a/2)/a
-    # But avoid 0/0 by special rule
-    v = np.divide(
-        0.5 * np.tanh(a / 2),
-        a,
-        out=(np.ones(a.shape) * 0.25),
-        where=(np.abs(a) > 1e-9))
-    H = np.dot(self.data.T * v, self.data)
-    return H / self.n + self.reg * np.eye(self.d)
-
-  def upperbound_wor(self, w):
-    """Computes tightest quadratic upper bound on the unregularized loss.
-
-    log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
-    Constant and linear terms are just first-order Taylor
-    This function gives the quadratic term (which replaces the Hessian)
-    https://twitter.com/shortstein/status/1557961202256318464
-
-    Args:
-      w: weight vector
-
-    Returns:
-      Matrix H such that for all v
-      loss(v) <= loss(w)+<grad(w),w-v> + <H(w-v),w-v>/2
-    """
-    a = -np.dot(self.data, w)  # vector of y_i<x_i,w> for i in [n]
-    # v = 0.5*np.tanh(a/2)/a
-    # But avoid 0/0 by special rule
-    v = np.divide(
-        0.5 * np.tanh(a / 2),
-        a,
-        out=(np.ones(a.shape) * 0.25),
-        where=(np.abs(a) > 1e-9))
-    H = np.dot(self.data.T * v, self.data)
-    return H / self.n
-
-
-class Mydatasets:
-  """Represents datasets we use for testing the algorithms.
-  """
-
-  def __init__(self):
-    pass
-
-  def find_optimal_classifier(self, dataset, reg=1e-9):
-    """Find the optimal weight vector for the logistic regression.
-
-    Args:
-      dataset: training dataset
-      reg: regularizer
-
-    Returns:
-      Optimal weight vector.
-    """
-    X, y = dataset
-    model_lr = LogisticRegression(max_iter=10000, C=1/reg).fit(X, y)
-    w_opt1 = np.concatenate([model_lr.intercept_, np.squeeze(model_lr.coef_)])
-    w_opt = newton(dataset, w_opt1)
-    print("optimal weight vector norms", np.linalg.norm(w_opt))
-    return w_opt
-
-  def mnist_binary(self):
-    """Download and extract MNIST data.
-
-    We also select only two labels for the binary classification task.
-
-    Returns:
-      Features, labels, and optimal weight vector.
-    """
-    labels = [1, 7]
-    label0, label1 = int(labels[0]), int(labels[1])
-    mnist = tf.keras.datasets.mnist
-    (x_train, y_train), (_, _) = mnist.load_data()
-    x_train = x_train.reshape(x_train.shape[0], -1)
-    scaler = preprocessing.StandardScaler().fit(x_train)
-    x_train = scaler.transform(x_train)
-    nrm = np.linalg.norm(x_train, axis=1)
-    x_train = x_train * 1/nrm[:, None]
-    y_train = y_train.astype(float)
-    indx0 = np.nonzero(y_train == label0)[0]
-    indx1 = np.nonzero(y_train == label1)[0]
-    y_train[indx0] = -1
-    y_train[indx1] = 1
-    indx = np.concatenate((indx0, indx1))
-    x_train = x_train[indx]
-    labels = y_train[indx]
-    dataset = x_train, labels
-    w_opt = self.find_optimal_classifier(dataset)
-    X = np.hstack((np.ones(shape=(np.shape(x_train)[0], 1)),
-                   x_train))  # adding a dummy dimension for the bias term.
-    return X, labels, w_opt
-
-  def w8a_dataset(self):
-    """w8a dataset for logistic regression.
-    """
-    num_points = 15e3
-    w8a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/w8a"
-    data_path = "./w8a"
-    urllib.request.urlretrieve(w8a_url, data_path)
-    X, labels = sklearn.datasets.load_svmlight_file(data_path)
-    X = X.toarray()
-    selected_samples = np.random.choice(len(X), int(num_points))
-    X = X[selected_samples, :]
-    labels = labels[selected_samples]
-    scaler = preprocessing.StandardScaler().fit(X)
-    X = scaler.transform(X)
-    nrm = np.linalg.norm(X, axis=1)
-    X = X * 1/nrm[:, None]
-    labels = labels.astype(float)
-    dataset = X, labels
-    w_opt = self.find_optimal_classifier(dataset)
-    X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
-                   X))  # adding a dummy dimension for the bias term.
-    return X, labels, w_opt
-
-  def a1a_dataset(self):
-    """Loads a1a dataset for logistic regression.
-    """
-    a1a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a1a"
-    data_path = "./a1a"
-    urllib.request.urlretrieve(a1a_url, data_path)
-    X, labels = sklearn.datasets.load_svmlight_file(data_path)
-    X = X.toarray()
-    scaler = preprocessing.StandardScaler().fit(X)
-    X = scaler.transform(X)
-    nrm = np.linalg.norm(X, axis=1)
-    X = X * 1/nrm[:, None]
-    labels = labels.astype(float)
-    dataset = X, labels
-    w_opt = self.find_optimal_classifier(dataset)
-    X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
-                   X))  # adding a dummy dimension for the bias term.
-    return X, labels, w_opt
-
-  def phishing(self):
-    """phishing dataset for logistic regression.
-    """
-    phishing_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/phishing"
-    data_path = "./phishing"
-    urllib.request.urlretrieve(phishing_url, data_path)
-    X, labels = sklearn.datasets.load_svmlight_file(data_path)
-    X = X.toarray()
-    scaler = preprocessing.StandardScaler().fit(X)
-    X = scaler.transform(X)
-    nrm = np.linalg.norm(X, axis=1)
-    X = X * 1/nrm[:, None]
-    labels = labels.astype(float)
-    dataset = X, labels
-    w_opt = self.find_optimal_classifier(dataset)
-    X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
-                   X))  # adding a dummy dimension for the bias term.
-    return X, labels, w_opt
-
-  def a5a_dataset(self):
-    """a5a dataset for logistic regression.
-    """
-    a5a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a5a"
-    data_path = "./a5a"
-    urllib.request.urlretrieve(a5a_url, data_path)
-    X, labels = sklearn.datasets.load_svmlight_file(data_path)
-    X = X.toarray()
-    scaler = preprocessing.StandardScaler().fit(X)
-    X = scaler.transform(X)
-    nrm = np.linalg.norm(X, axis=1)
-    X = X * 1/nrm[:, None]
-    labels = labels.astype(float)
-    dataset = X, labels
-    w_opt = self.find_optimal_classifier(dataset)
-    X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
-                   X))  # adding a dummy dimension for the bias term.
-    return X, labels, w_opt
-
-  def a6a_dataset(self):
-    """a6a dataset for logistic regression.
-    """
-    a6a_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a5a"
-    data_path = "./a6a"
-    urllib.request.urlretrieve(a6a_url, data_path)
-    X, labels = sklearn.datasets.load_svmlight_file(data_path)
-    X = X.toarray()
-    scaler = preprocessing.StandardScaler().fit(X)
-    X = scaler.transform(X)
-    nrm = np.linalg.norm(X, axis=1)
-    X = X * 1/nrm[:, None]
-    labels = labels.astype(float)
-    dataset = X, labels
-    w_opt = self.find_optimal_classifier(dataset)
-    X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
-                   X))  # adding a dummy dimension for the bias term.
-    return X, labels, w_opt
-
-  def madelon(self):
-    """madelon dataset for logistic regression.
-    """
-    madelon_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/madelon"
-    data_path = "./madelon"
-    urllib.request.urlretrieve(madelon_url, data_path)
-    X, labels = sklearn.datasets.load_svmlight_file(data_path)
-    X = X.toarray()
-    scaler = preprocessing.StandardScaler().fit(X)
-    X = scaler.transform(X)
-    nrm = np.linalg.norm(X, axis=1)
-    X = X * 1/nrm[:, None]
-    labels = labels.astype(float)
-    dataset = X, labels
-    w_opt = self.find_optimal_classifier(dataset)
-    X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
-                   X))  # adding a dummy dimension for the bias term.
-    return X, labels, w_opt
-
-  def mushroom_dataset(self):
-    """mushroom dataset for logistic regression.
-    """
-    mushroom_url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/mushrooms"
-    data_path = "./mushrooms"
-    urllib.request.urlretrieve(mushroom_url, data_path)
-    X, labels = sklearn.datasets.load_svmlight_file(data_path)
-    X = X.toarray()
-    scaler = preprocessing.StandardScaler().fit(X)
-    X = scaler.transform(X)
-    nrm = np.linalg.norm(X, axis=1)
-    X = X * 1/nrm[:, None]
-    labels = labels.astype(float)
-    dataset = X, labels
-    w_opt = self.find_optimal_classifier(dataset)
-    X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
-                   X))  # adding a dummy dimension for the bias term.
-    return X, labels, w_opt
-
-  def synthetic_data(self, n=5000, d=50, cov=None, w=None):
-    """Generates a synthetic dataset for logistic regression.
-
-    Args:
-      n: number of samples
-      d: dimension
-      cov: covariance of the data (optional, default: identity)
-      w: true coefficient vector (optional, default:first standard basis vector)
-
-    Returns:
-      Synthetic dataset.
-      Features are unit vectors (by default uniformly random).
-      Labels are sampled from logistic distribution,
-      where argument w is the "true" solution.
-    """
-    mean = np.zeros(d)
-    if cov is None:
-      cov = np.eye(d)
-    X_un = np.random.multivariate_normal(mean, cov, n)
-    nrm = np.linalg.norm(X_un, axis=1)
-    X = X_un * 1/nrm[:, None]
-    if w is None:
-      w = np.ones(d)
-      w[0] = 1
-    inner_prod = np.dot(X, w)
-    params = np.exp(inner_prod)/(1+np.exp(inner_prod))
-    labels = 2*np.random.binomial(1, params)-1
-    dataset = X, labels
-    w_opt = self.find_optimal_classifier(dataset)
-    X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)),
-                   X))  # adding a dummy dimension for the bias term.
-    return X, labels, w_opt
-
-
-class CompareAlgs:
-  """Class to run multiple iterative algorithms and compare the results."""
-
-  def __init__(self,
-               lr,
-               dataset,
-               optimal_w,
-               iters=10,
-               w0=None,
-               pb=None):
-    """Initialize the problem."""
-    X, _ = dataset
-    self.w_opt = optimal_w
-    n, d = np.shape(X)
-    print("dataset is created: (number of samples, dimension)=" + str(n) + "," +
-          str(d))
-
-    if w0 is None:
-      w0_un = np.random.multivariate_normal(np.zeros(d), np.eye(d))
-      w0 = w0_un/np.linalg.norm(w0_un)
-    self.w0 = w0  # initial iterate
-    self.iters = iters
-    self.pb = pb
-    self.lr = lr
-    self.plots = []  # List of lists of iterates
-    self.names = []  # List of names
-    self.linestyles = []  # List of line styles for plotting
-    self.cutoff = 20 * np.linalg.norm(self.w_opt) + 20 * np.linalg.norm(
-        self.w0) + 10  # how do you set this value? is it problem-specific?
-
-  def add_plot(self, update_rule, name, linestyle):
-    """Run an iterative update method & add to plot.
-
-    Args:
-      update_rule: a function that takes 4 arguments:
-        current iterate
-        LogisticRegression problem
-        index of current iterate
-        total number of iterations
-        pb = privacy budget or similar
-      name: string to display in legend
-      linestyle: line style for plot
-    """
-    baseline = self.lr.loss_wor(self.w_opt)
-    print(name)
-    w = self.w0
-    plot = [w]
-    for i in range(self.iters):
-      w = update_rule(w, self.lr, i, self.iters, self.pb)
-      if np.linalg.norm(w) > self.cutoff:
-        w = self.w0  # Stop things exploding
-        print("Stop Things Exploding!")
-      plot.append(w)
-      print(
-          str(i) + ": ||grad||=" + str(np.linalg.norm(self.lr.grad_wor(w))) +
-          " ex_loss=" + str(self.lr.loss_wor(w) - baseline))
-    self.plots.append(plot)
-    self.names.append(name)
-    self.linestyles.append(linestyle)
-    print()
-
-  def plot_grad_norms(self, legend=True):
-    """Plot gradient norms for each iteration.
-    """
-    plt.clf()
-    for plot, name, ls in zip(self.plots, self.names, self.linestyles):
-      grad_norms = [np.linalg.norm(self.lr.grad_wor(w)) for w in plot]
-      plt.plot(range(self.iters+1), grad_norms, ls, label=name)
-    plt.yscale("log")
-    ymax = np.linalg.norm(self.lr.grad(self.plots[0][0]))
-    plt.ylim(top=ymax)
-    # plt.ylim((1e-3, 1e-1))
-    plt.xlabel("Iteration")
-    plt.ylabel("Norm of Gradient")
-    if legend: plt.legend()
-    plt.show()
-
-  def loss_vals(self):
-    """Outputs the loss vector for different methods.
-    """
-    baseline = self.lr.loss_wor(self.w_opt)
-    loss_dict = {}
-    for plot, name in zip(self.plots, self.names):
-      losses = [self.lr.loss_wor(w)-baseline for w in plot]
-      loss_dict[name] = [losses]
-    return loss_dict
-
-  def gradnorm_vals(self):
-    """Outputs the gradient norm for different methods.
-    """
-    gradnorm_dict = {}
-    for plot, name in zip(self.plots, self.names):
-      grad_norms = [np.linalg.norm(self.lr.grad_wor(w)) for w in plot]
-      gradnorm_dict[name] = [grad_norms]
-    return gradnorm_dict
-
-  def plot_losses(self):
-    """Plots excess loss for each iteration.
-
-        output is a dictionary where the keys are name of method and value is
-        the loss for each iteration.
-    """
-    baseline = self.lr.loss_wor(self.w_opt)
-    plt.clf()
-    for plot, name, ls in zip(self.plots, self.names, self.linestyles):
-      losses = [self.lr.loss_wor(w)-baseline for w in plot]
-      plt.plot(range(self.iters+1), losses, ls, label=name)
-    # plt.yscale('log')
-    plt.xlabel("Iteration")
-    plt.ylabel("Excess Loss")
-    plt.legend()
-    plt.show()
-
-
-def gd_priv(w, lr, i, iters, pb):
-  """Implementation of DP-GD.
-
-  Args:
-    w: current point
-    lr: logistic regression
-    i: iteration number
-    iters: total number of iterations
-    pb: auxillary information
-
-  Returns:
-    The next iterate.
-  """
-  inv_lr_gd = 0.25  # We select the learning rate based on the smoothness
-  sens = 1/(lr.n*(inv_lr_gd+lr.reg))  # Sensitivity
-  rho = pb["total"] / iters  # divide total privacy budget up
-  noise = np.random.normal(scale=sens/math.sqrt(2*rho), size=lr.d)
-  return w - lr.grad(w)/(inv_lr_gd+lr.reg) + noise
-
-
-def gd_priv_backtrackingls(w, lr, i, iters, pb):
-  """Implementation of DP-GD with back-tracking line search.
-
-  !!! this method is not private. We only use it as a baseline.
-
-  Args:
-    w: current point
-    lr: logistic regression
-    i: iteration number
-    iters: total number of iterations
-    pb: auxillary information
-
-  Returns:
-    The next iterate
-  """
-  rho_grad = pb["total"] / iters  # divide total privacy budget up
-  grad_scale = (1/lr.n)*math.sqrt(0.5/rho_grad)
-  grad_noise = np.random.normal(scale=grad_scale, size=lr.d)
-  direction = lr.grad(w)+grad_noise
-  stepsize_opt = backtracking_ls(lr, direction, w)
-  return w - stepsize_opt * direction
-
-
-def backtracking_ls(lr, direction, w_0, alpha=0.4, beta=0.95):
-  """Implementation of backtracking line search.
-
-  Args:
-    lr: logistic regression
-    direction: the "noisy" gradient direction
-    w_0: current point
-    alpha: tradeoff the precision and complexity of the linesearch
-    beta: tradeoff the precision and complexity of the linesearch
-
-  Returns:
-    A good stepsize
-  """
-  t = 100
-  while lr.loss(w_0 - t * direction
-               ) >= lr.loss(w_0) - t * alpha * np.dot(direction, lr.grad(w_0)):
-    t = beta * t
-    if t < 1e-10:
-      break
-  return t
-
-
-def newton(dataset, w_opt2):
-  """Newton update rule.
-  """
-  X, y = dataset
-  X = np.hstack((np.ones(shape=(np.shape(X)[0], 1)), X))
-  lr = MyLogisticRegression(X, y, reg=1e-9)
-  w_opt = w_opt2
-  _, d = np.shape(X)
-  w = np.zeros(d)
-  for _ in range(30):
-    H = lr.hess(w)
-    direction = np.linalg.solve(H, lr.grad(w))
-    step_size = backtracking_ls(lr, direction, w)
-    w = w - step_size * direction
-  if lr.loss_wor(w) < lr.loss_wor(w_opt2):
-    w_opt = w
-  return w_opt
-
-
-def newton_ur(w, lr, i, iters, pb):
-  H = lr.hess(w)
-  direction = np.linalg.solve(H, lr.grad(w))
-  step_size = backtracking_ls(lr, direction, w)
-  return w - step_size * direction
-
-
-class DoubleNoiseMech:
-  """Our Method: Double Noise."""
-
-  def __init__(self,
-               lr,
-               type_reg="add",
-               hyper_tuning=False,
-               curvature_info="hessian",
-               plot_eigen=False):
-    """Initializes the algorithm.
-
-    Args:
-      lr: logistic regression problem we are solving.
-      type_reg: "add" or "clip" -- how we regularize eigenvalues.
-      hyper_tuning: do we tune the hyperparameters.
-      curvature_info: "hessian" or "ub" -- what quadratic we use.
-      plot_eigen: show eigenvalues for debugging purposes.
-
-    """
-    self.type_reg = type_reg
-    self.hyper_tuning = hyper_tuning
-    self.curvature_info = curvature_info
-    self.plot_eigen = plot_eigen
-    if self.curvature_info == "hessian":
-      self.H = lr.hess_wor
-    elif self.curvature_info == "ub":
-      self.H = lr.upperbound_wor
-
-  def find_opt_reg_wop(self, w, lr, noisy_grad, rho_hess):
-    """Implementation of finding the optimal lambda.
-
-     Here, we don't pay for privacy of doing it.
-
-    Args:
-      w: current point
-      lr: logistic regression problem
-      noisy_grad: the gradient estimate
-      rho_hess: the privacy budget
-
-    Returns:
-      The next iterate.
-    """
-    increase_factor = 1.5  # at each step we increase the clipping
-    if self.type_reg == "add":
-      lambda_cur = 5e-6  # starting parameter
-    elif self.type_reg == "clip":
-      lambda_cur = 0.25/lr.n + 1e-5  # starting parameter,
-    num_noise_sample = 5  # we want to estimate expected value over the noise
-    grad_norm = np.linalg.norm(noisy_grad)
-    H = self.H(w)
-    best_loss = 1e6  # a large dummy number
-    while lambda_cur <= 0.25:
-      H = self.hess_mod(w, lambda_cur)
-      if self.type_reg == "add":  # Sensitivity is different for add vs clip
-        sens2 = grad_norm * 0.25/(lr.n*lambda_cur**2 + 0.25*lambda_cur)
-      elif self.type_reg == "clip":
-        sens2 = grad_norm * 0.25/(lr.n*lambda_cur**2 - 0.25*lambda_cur)
-      loss_ = 0
-      for _ in range(num_noise_sample):
-        noise2 = np.random.normal(scale=sens2*math.sqrt(.5/rho_hess), size=lr.d)
-        loss_ = loss_ + lr.loss_wor(w - np.linalg.solve(H, noisy_grad) + noise2)
-      if loss_ < best_loss:
-        best_loss = loss_
-        lambda_star = lambda_cur
-      lambda_cur = lambda_cur * increase_factor
-    return lambda_star
-
-  def update_rule(self, w, lr, i, iters, pb):
-    """update rule."""
-    total = pb["total"]
-    grad_frac = pb["grad_frac4"]
-    rho1 = grad_frac * total / iters  # divide total privacy budget for gradient
-    rho2 = (1-grad_frac) * total / iters  # divide total privacy budget
-    sc1 = (1/lr.n) * math.sqrt(0.5/rho1)
-    noise1 = np.random.normal(scale=sc1, size=lr.d)
-    noisy_grad = lr.grad(w)+noise1
-    grad_norm = np.linalg.norm(noisy_grad)
-    m = 0.25  # smoothness parameter
-    frac_trace = 0.1  # fraction of privacy budget for estimating the trace.
-    H = self.H(w)
-    if self.plot_eigen:
-      val, _ = np.linalg.eigh(H)
-      hist, bin_edges = np.histogram(val, bins=300, range=(0, 0.01))
-      cdf_vals = np.cumsum(hist)
-      plt.clf()
-      plt.plot(bin_edges[1:], cdf_vals)
-      plt.show()
-    if self.hyper_tuning:
-      min_eval = self.find_opt_reg_wop(w, lr, noisy_grad, rho2)
-      print("optimized min_eval", min_eval)
-    else:
-      noisy_trace = max(
-          np.trace(H) + np.random.normal(
-              scale=(0.25 / lr.n) * math.sqrt(0.5 / (frac_trace * rho2))), 0)
-      min_eval = (noisy_trace / ((lr.n)**2 *
-                                 (1 - frac_trace) * rho2))**(1 / 3) + 5e-4
-      print("approx min_eval ", min_eval)
-
-    H = self.hess_mod(w, min_eval, lr.reg)
-    if self.type_reg == "add":  # Sensitivity is different for add vs clip
-      sens2 = grad_norm * m/(lr.n * min_eval**2 + m * min_eval)
-    elif self.type_reg == "clip":
-      sens2 = grad_norm * m / (lr.n * min_eval**2 - m * min_eval)
-    noise2 = np.random.normal(
-        scale=sens2 * math.sqrt(0.5 / ((1 - frac_trace) * rho2)), size=lr.d)
-    return w - np.linalg.solve(H, noisy_grad) + noise2
-
-  def hess_mod(self, w, min_eval, reg=1e-9):
-    if self.type_reg == "clip":
-      evals, evec = np.linalg.eigh(self.H(w))
-      # true_min = np.min(evals)
-      evals = np.maximum(evals, min_eval*np.ones(evals.shape))
-      Hclipped = np.dot(evec * evals, evec.T)
-      return Hclipped
-    elif self.type_reg == "add":
-      return  self.H(w) + min_eval*np.eye(len(self.H(w)))
-
-
-def helper_fun(datasetname, pb, num_rep=5, Tuning=False, plot_eigen=False):
-  """This function loads the data & runs the algorithms.
-
-  Args:
-    datasetname: name of the dataset
-    pb: a dictionary with the parameters
-    num_rep: number of times we repeat the optimization for reporting average
-    Tuning: True or False exhustive search for fining the best min eigenval
-    plot_eigen: Show eigenvalues
-
-  Returns:
-    losses and gradient norms
-  """
-  datasets = Mydatasets()
-  X, y, w_opt = getattr(datasets, datasetname)()
-  dataset = X, y
-  lr = MyLogisticRegression(X, y, reg=1e-8)
-  dnm_hess_add = DoubleNoiseMech(
-      lr,
-      type_reg="add",
-      hyper_tuning=False,
-      curvature_info="hessian",
-      plot_eigen=plot_eigen).update_rule
-  dnm_ub_add = DoubleNoiseMech(
-      lr,
-      type_reg="add",
-      hyper_tuning=False,
-      curvature_info="ub",
-      plot_eigen=plot_eigen).update_rule
-  dnm_hess_clip = DoubleNoiseMech(
-      lr,
-      type_reg="clip",
-      hyper_tuning=False,
-      curvature_info="hessian",
-      plot_eigen=plot_eigen).update_rule
-  dnm_ub_clip = DoubleNoiseMech(
-      lr,
-      type_reg="clip",
-      hyper_tuning=False,
-      curvature_info="ub",
-      plot_eigen=plot_eigen).update_rule
-  if Tuning:
-    # dnm_hess_add_ht = DoubleNoiseMech(lr,type_reg='add',
-    #     hyper_tuning=True,curvature_info='hessian').update_rule
-    # dnm_ub_add_ht = DoubleNoiseMech(lr,type_reg='add',
-    #     hyper_tuning=True,curvature_info='ub').update_rule
-    dnm_hess_clip_ht = DoubleNoiseMech(
-        lr,
-        type_reg="clip",
-        hyper_tuning=True,
-        curvature_info="hessian",
-        plot_eigen=plot_eigen).update_rule
-    # dnm_ub_clip_ht = DoubleNoiseMech(lr,type_reg='clip',
-    #     hyper_tuning=True,curvature_info='ub').update_rule
-  c = CompareAlgs(lr, dataset, w_opt, iters=10, pb=pb)
-  for rep in range(num_rep):
-    c.add_plot(gd_priv, "DPGD", "y--")
-    c.add_plot(dnm_hess_add, "DN-Hess-add", "k-")
-    c.add_plot(dnm_ub_add, "DN-UB-add", "b-")
-    c.add_plot(dnm_hess_clip, "DN-Hess-clip", "k*-")
-    c.add_plot(dnm_ub_clip, "DN-UB-clip", "b*-")
-    c.add_plot(gd_priv_backtrackingls, "DP-GD-Oracle", "m")
-    if Tuning:
-      c.add_plot(dnm_hess_clip_ht, "DN-Hess-clip-T", "r*-")
-      # c.add_plot(dnm_hess_add_ht,"DN-Hess-add-T",'r-')
-      # c.add_plot(dnm_ub_clip_ht,"DN-UB-clip-T",'g*-')
-      # c.add_plot(dnm_ub_add_ht,"DN-UB-add-T",'g-')
-    losses_dict = c.loss_vals()
-    gradnorm_dict = c.gradnorm_vals()
-    if rep == 0:
-      losses_total = losses_dict
-      gradnorm_total = gradnorm_dict
-    else:
-      for names in losses_total:
-        losses_total[names].extend(losses_dict[names])
-        gradnorm_total[names].extend(gradnorm_dict[names])
-  return losses_total, gradnorm_total
-
-linestyle = {
-    "DPGD": "y-",
-    "DN-Hess-add": "k+-",
-    "DN-UB-add": "b-",
-    "DN-Hess-clip": "r*-",
-    "DN-UB-clip": "g-",
-    "DP-GD-Oracle": "c-"
-}
-facecolor = {
-    "DPGD": "yellow",
-    "DN-Hess-add": "black",
-    "DN-UB-add": "blue",
-    "DN-Hess-clip": "red",
-    "DN-UB-clip": "green",
-    "DP-GD-Oracle": "cyan"
-}
-alg_plt = [
-    "DPGD",
-    "DN-Hess-add",
-    "DN-UB-add",
-    "DN-Hess-clip",
-    "DN-UB-clip",
-    "DP-GD-Oracle"
-]
-
-# Synthethic Data
-
-pb = {
-    "total": 1,  # Total privacy budget for zCDP
-    "min_eval4": 5e-3,  # Min eigenvalue for clipping
-    "grad_frac4":
-        0.75  # Fraction of privacy budget for gradient vs matrix sensitivity
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
-    "synthetic_data", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
-  losses = np.array(losses_total[alg])
-  gradnorm = np.array(gradnorm_total[alg])
-  loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
-  loss_std, gradnorm_std = np.std(
-      losses, axis=0) / np.sqrt(num_rep), np.std(
-          gradnorm, axis=0) / np.sqrt(num_rep)
-  print(
-      str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
-      str(loss_std[-1]))
-  if alg in alg_plt:
-    iters = len(loss_avg)
-    plt.figure(1)
-    plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
-    plt.fill_between(
-        range(iters),
-        loss_avg - loss_std,
-        loss_avg + loss_std,
-        facecolor=facecolor[alg])
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Excess Loss")
-    plt.savefig("synth.pdf")
-    plt.figure(2)
-    plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
-    plt.yscale("log")
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Norm of Gradient")
-    plt.savefig("synth-grad.pdf")
-
-# a5a dataset
-
-pb = {
-    "total": 1,  # Total privacy budget for zCDP
-    "min_eval4": 5e-3,  # Min eigenvalue for clipping
-    "grad_frac4":
-        0.75  # Fraction of privacy budget for gradient vs matrix sensitivity
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
-    "a5a_dataset", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
-  losses = np.array(losses_total[alg])
-  gradnorm = np.array(gradnorm_total[alg])
-  loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
-  loss_std, gradnorm_std = np.std(
-      losses, axis=0) / np.sqrt(num_rep), np.std(
-          gradnorm, axis=0) / np.sqrt(num_rep)
-  print(
-      str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
-      str(loss_std[-1]))
-  if alg in alg_plt:
-    iters = len(loss_avg)
-    plt.figure(3)
-    plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
-    plt.fill_between(
-        range(iters),
-        loss_avg - loss_std,
-        loss_avg + loss_std,
-        facecolor=facecolor[alg])
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Excess Loss")
-    plt.savefig("a5a.pdf")
-    plt.figure(4)
-    plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
-    plt.yscale("log")
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Norm of Gradient")
-    plt.savefig("a5a-grad.pdf")
-
-# w8a dataset
-
-pb = {
-    "total": 1,  # Total privacy budget for zCDP
-    "min_eval4": 5e-3,  # Min eigenvalue for clipping
-    "grad_frac4":
-        0.75  # Fraction of privacy budget for gradient vs matrix sensitivity
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
-    "w8a_dataset", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
-  losses = np.array(losses_total[alg])
-  gradnorm = np.array(gradnorm_total[alg])
-  loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
-  loss_std, gradnorm_std = np.std(
-      losses, axis=0) / np.sqrt(num_rep), np.std(
-          gradnorm, axis=0) / np.sqrt(num_rep)
-  print(
-      str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
-      str(loss_std[-1]))
-  if alg in alg_plt:
-    iters = len(loss_avg)
-    plt.figure(5)
-    plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
-    plt.fill_between(
-        range(iters),
-        loss_avg - loss_std,
-        loss_avg + loss_std,
-        facecolor=facecolor[alg])
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Excess Loss")
-    plt.savefig("w8a.pdf")
-    plt.figure(6)
-    plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
-    plt.yscale("log")
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Norm of Gradient")
-    plt.savefig("w8a-grad.pdf")
-
-# a1a dataset
-
-pb = {
-    "total": 1,  #  Total privacy budget for zCDP
-    "min_eval4": 5e-3,  #  Min eigenvalue for clipping
-    "grad_frac4": 0.75  #  Fraction of privacy budget for gradient vs matrix
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
-    "a1a_dataset", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
-  losses = np.array(losses_total[alg])
-  gradnorm = np.array(gradnorm_total[alg])
-  loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
-  loss_std, gradnorm_std = np.std(
-      losses, axis=0) / np.sqrt(num_rep), np.std(
-          gradnorm, axis=0) / np.sqrt(num_rep)
-  print(
-      str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
-      str(loss_std[-1]))
-  if alg in alg_plt:
-    iters = len(loss_avg)
-    plt.figure(7)
-    plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
-    plt.fill_between(
-        range(iters),
-        loss_avg - loss_std,
-        loss_avg + loss_std,
-        facecolor=facecolor[alg])
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Excess Loss")
-    plt.savefig("a1a.pdf")
-    plt.figure(8)
-    plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
-    plt.yscale("log")
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Norm of Gradient")
-    plt.savefig("a1a-grad.pdf")
-
-# mushroom dataset
-
-pb = {
-    "total": 1,  #  Total privacy budget for zCDP
-    "min_eval4": 5e-3,  # Min eigenvalue for clipping
-    "grad_frac4": 0.75  #  Fraction of privacy budget for gradient vs matrix
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
-    "mushroom_dataset", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
-  losses = np.array(losses_total[alg])
-  gradnorm = np.array(gradnorm_total[alg])
-  loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
-  loss_std, gradnorm_std = np.std(
-      losses, axis=0) / np.sqrt(num_rep), np.std(
-          gradnorm, axis=0) / np.sqrt(num_rep)
-  print(
-      str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
-      str(loss_std[-1]))
-  if alg in alg_plt:
-    iters = len(loss_avg)
-    plt.figure(9)
-    plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
-    plt.fill_between(
-        range(iters),
-        loss_avg - loss_std,
-        loss_avg + loss_std,
-        facecolor=facecolor[alg])
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Excess Loss")
-    plt.savefig("mushroom.pdf")
-    plt.figure(10)
-    plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
-    plt.yscale("log")
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Norm of Gradient")
-    plt.savefig("mushroom-grad.pdf")
-
-# MNIST
-
-pb = {
-    "total": 1,  #  Total privacy budget for zCDP
-    "min_eval4": 5e-3,  #  Min eigenvalue for clipping
-    "grad_frac4": 0.75  #  Fraction of privacy budget for gradient vs matrix
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
-    "mnist_binary", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
-  losses = np.array(losses_total[alg])
-  gradnorm = np.array(gradnorm_total[alg])
-  loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
-  loss_std, gradnorm_std = np.std(
-      losses, axis=0) / np.sqrt(num_rep), np.std(
-          gradnorm, axis=0) / np.sqrt(num_rep)
-  print(
-      str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
-      str(loss_std[-1]))
-  if alg in alg_plt:
-    iters = len(loss_avg)
-    plt.figure(11)
-    plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
-    plt.fill_between(
-        range(iters),
-        loss_avg - loss_std,
-        loss_avg + loss_std,
-        facecolor=facecolor[alg])
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Excess Loss")
-    plt.savefig("mnist.pdf")
-    plt.figure(12)
-    plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
-    plt.yscale("log")
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Norm of Gradient")
-    plt.savefig("mnist-grad.pdf")
-
-# Dataset: phishing
-
-pb = {
-    "total": 1,  # Total privacy budget for zCDP
-    "min_eval4": 5e-3,  # Min eigenvalue for clipping
-    "grad_frac4": 0.75  # Fraction of privacy budget for gradient vs matrix
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
-    "phishing", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
-  losses = np.array(losses_total[alg])
-  gradnorm = np.array(gradnorm_total[alg])
-  loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
-  loss_std, gradnorm_std = np.std(
-      losses, axis=0) / np.sqrt(num_rep), np.std(
-          gradnorm, axis=0) / np.sqrt(num_rep)
-  print(
-      str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
-      str(loss_std[-1]))
-  if alg in alg_plt:
-    iters = len(loss_avg)
-    plt.figure(13)
-    plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
-    plt.fill_between(
-        range(iters),
-        loss_avg - loss_std,
-        loss_avg + loss_std,
-        facecolor=facecolor[alg])
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Excess Loss")
-    plt.savefig("phishing.pdf")
-    plt.figure(14)
-    plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
-    plt.yscale("log")
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Norm of Gradient")
-    plt.savefig("phishing-grad.pdf")
-
-# Dataset: Madelon
-
-# pb = {
-#     "total": 1,  # Total privacy budget for zCDP
-#     "min_eval4":  5e-3,  # Min eigenvalue for clipping
-#     "grad_frac4": 0.4 # Fraction of privacy budget for gradient vs matrix
-# }
-# num_rep = 1
-# losses_total,gradnorm_total = helper_fun('madelon',pb,num_rep = num_rep,
-#     Tuning=True,plot_eigen=True)
-# for alg in losses_total.keys():
-#     losses = np.array(losses_total[alg])
-#     gradnorm = np.array(gradnorm_total[alg])
-#     loss_avg, gradnorm_avg = np.mean(losses,axis=0), np.mean(gradnorm,axis=0)
-#     loss_std, gradnorm_std  = np.std(losses,axis=0)/np.sqrt(num_rep),
-#         np.std(gradnorm,axis=0)/np.sqrt(num_rep)
-#     print(str(alg)+ ':' + " ex_loss="+str(loss_avg[-1])+ ',
-#         std='+str(loss_std[-1]))
-#     if alg in alg_plt:
-#         iters = len(loss_avg)
-#         plt.figure(1)
-#         plt.plot(range(iters),loss_avg,linestyle[alg],label=alg)
-#         plt.fill_between(range(iters), loss_avg-loss_std, loss_avg+loss_std,
-#             facecolor=facecolor[alg])
-#         plt.legend()
-#         plt.xlabel("Iteration")
-#         plt.ylabel("Excess Loss")
-#         plt.savefig('madelon.pdf')
-#         plt.figure(2)
-#         plt.plot(range(iters),gradnorm_avg,linestyle[alg],label=alg)
-#         plt.yscale('log')
-#         plt.legend()
-#         plt.xlabel("Iteration")
-#         plt.ylabel("Norm of Gradient")
-#         plt.savefig('madelon-grad.pdf')
-
-# Test) a6a Dataset
-
-pb = {
-    "total": 1,  # Total privacy budget for zCDP
-    "min_eval4": 5e-3,  # Min eigenvalue for clipping
-    "grad_frac4": 0.75  # Fraction of privacy budget for gradient vs matrix
-}
-num_rep = 30
-losses_total, gradnorm_total = helper_fun(
-    "a6a_dataset", pb, num_rep=num_rep, Tuning=False)
-for alg in losses_total:
-  losses = np.array(losses_total[alg])
-  gradnorm = np.array(gradnorm_total[alg])
-  loss_avg, gradnorm_avg = np.mean(losses, axis=0), np.mean(gradnorm, axis=0)
-  loss_std, gradnorm_std = np.std(
-      losses, axis=0) / np.sqrt(num_rep), np.std(
-          gradnorm, axis=0) / np.sqrt(num_rep)
-  print(
-      str(alg) + ":" + " ex_loss=" + str(loss_avg[-1]) + ", std=" +
-      str(loss_std[-1]))
-  if alg in alg_plt:
-    iters = len(loss_avg)
-    plt.figure(15)
-    plt.plot(range(iters), loss_avg, linestyle[alg], label=alg)
-    plt.fill_between(
-        range(iters),
-        loss_avg - loss_std,
-        loss_avg + loss_std,
-        facecolor=facecolor[alg])
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Excess Loss")
-    plt.savefig("a6a.pdf")
-    plt.figure(16)
-    plt.plot(range(iters), gradnorm_avg, linestyle[alg], label=alg)
-    plt.yscale("log")
-    plt.legend()
-    plt.xlabel("Iteration")
-    plt.ylabel("Norm of Gradient")
-    plt.savefig("a6a-grad.pdf")
diff --git a/research/dp_newton/run_privacy_utility b/research/dp_newton/run_privacy_utility
new file mode 100644
index 0000000..415d238
--- /dev/null
+++ b/research/dp_newton/run_privacy_utility
@@ -0,0 +1,28 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+rm -rf ./src/results
+mkdir -p ./src/results
+rm -rf ./src/datasets_directory
+mkdir -p ./src/datasets_directory
+dataset="protein_dataset" # 'a1a_dataset', 'synthetic_dataset', 'fmnist_dataset'
+privacy_budget="3.0" # epsilon in DP
+num_iteration_GD="100" # number of iterations for DP-GD
+num_iteration_NT="15" # number of iterations for damped newton
+num_iteration_our="15" # number of iterations for double noise (proposed method)
+$HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'dp_gd' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_GD
+$HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'damped_newton' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_NT --grad_frac $"0.7"
+$HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'double_noise' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_our --grad_frac $"0.7" --trace_frac $"0.1" --trace_coeff $"0.5"
+$HOME/google-code/dpoptVenv/bin/python3 ./src/print_results.py
diff --git a/research/dp_newton/src/dataset_loader.py b/research/dp_newton/src/dataset_loader.py
new file mode 100644
index 0000000..9f060ad
--- /dev/null
+++ b/research/dp_newton/src/dataset_loader.py
@@ -0,0 +1,236 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""dataset loader"""
+
+# pylint: skip-file
+# pyformat: disable
+
+import os
+import ssl
+import tarfile
+import urllib.request
+from my_logistic_regression import MyLogisticRegression
+import numpy as np
+import requests
+from sklearn import preprocessing
+import sklearn.datasets
+from sklearn.linear_model import LogisticRegression
+import torch
+from torchvision import datasets, transforms
+
+
+PATH_PREFIX = './src/datasets_directory'
+ssl._create_default_https_context = ssl._create_unverified_context
+
+
+def normalize_fvec(x_train):
+  """normalize feature vectors"""
+  feature_mean = np.mean(x_train, axis=0)
+  feature_std = np.std(x_train, axis=0)
+  x_train = (x_train - feature_mean) / feature_std
+  return x_train
+
+
+def backtracking_ls(lrp, dir_srch, w_start, alpha=0.4, beta=0.95):
+  """Implementation of backtracking line search
+
+  lr = logistic regression
+  dir = the "noisy" gradient direction
+  w_start = current point
+  alpha and beta tradeoff the precision and complexity of the linesearch
+
+  output is an (close to) optimal stepsize
+  """
+  step_size = 100
+  val_0 = lrp.loss(w_start)
+  inner_prod = np.dot(dir_srch, lrp.grad(w_start))
+  while (
+      lrp.loss(w_start - step_size * dir_srch)
+      >= val_0 - step_size * alpha * inner_prod
+  ):
+    step_size = beta * step_size
+    if step_size < 1e-6:
+      break
+  return step_size
+
+
+def newton(dataset, w_init, bias=True):
+  """Implementation of the newton method with linesearch without privacy constraints
+
+  dataset = dataset
+  w_init = initialization point
+
+  output is the model parameter
+  """
+  feature_vecs, labels = dataset
+  if bias is True:
+    feature_vecs = np.hstack(
+        (np.ones(shape=(np.shape(feature_vecs)[0], 1)), feature_vecs)
+    )
+  lrp = MyLogisticRegression(feature_vecs, labels, reg=1e-9)
+  w_cur = w_init
+  for _ in range(8):
+    hess = lrp.hess(w_cur)
+    dir_srch = np.linalg.solve(hess, lrp.grad_wor(w_cur))
+    step_size = backtracking_ls(lrp, dir_srch, w_cur)
+    w_cur = w_cur - step_size * dir_srch
+  if lrp.loss_wor(w_cur) < lrp.loss_wor(w_init):
+    w_out = w_cur
+  else:
+    w_out = w_init
+  return w_out
+
+
+class Mydatasets:
+  """Represents datasets we use for expriments"""
+
+  def __init__(self):
+    data_dir = PATH_PREFIX + '/data'
+    cache_dir = PATH_PREFIX + '/cache_datasets'
+    if not os.path.exists(data_dir):
+      os.mkdir(data_dir)
+    if not os.path.exists(cache_dir):
+      os.mkdir(cache_dir)
+
+  def find_optimal_classifier(self, dataset, bias=True):
+    """find the optimal weight vector for the logistic regression
+
+        for the problems with real datasets.
+
+    dataset = training dataset
+    bias = bias for the logistic model
+    """
+    inputs_vec, labels = dataset
+    reg = 1e-9
+    if bias is True:
+      model_lr = LogisticRegression(max_iter=200, C=1 / reg).fit(
+          inputs_vec, labels
+      )
+      w_opt1 = np.concatenate([model_lr.intercept_, np.squeeze(model_lr.coef_)])
+      w_opt = newton(dataset, w_opt1, bias)
+    else:
+      model_lr = LogisticRegression(
+          max_iter=200, fit_intercept=False, C=1 / reg
+      ).fit(inputs_vec, labels)
+      w_opt1 = np.squeeze(model_lr.coef_)
+      w_opt = newton(dataset, w_opt1, bias)
+    return w_opt
+
+  def fmnist_dataset(self):
+    """fmnist dataset"""
+    transform_data = transforms.Compose(
+        [transforms.ToTensor(), transforms.Normalize((0.5), (0.5))]
+    )
+    train_data_trans = datasets.FashionMNIST(
+        root=PATH_PREFIX + '/data',
+        download=True,
+        train=True,
+        transform=transform_data,
+    )
+    train_loader = torch.utils.data.DataLoader(
+        train_data_trans, batch_size=len(train_data_trans)
+    )
+    x_train = next(iter(train_loader))[0].numpy()
+    x_train = x_train.reshape(len(x_train), -1)
+    y_train = next(iter(train_loader))[1].numpy()
+    label0 = 0
+    label1 = 3
+    indx0 = np.nonzero(y_train == label0)[0]
+    indx1 = np.nonzero(y_train == label1)[0]
+    labels = y_train.copy()
+    labels[indx0] = -1
+    labels[indx1] = 1
+    indx = np.concatenate((indx0, indx1))
+    x_train = x_train[indx]
+    labels = labels[indx]
+    dataset = x_train, labels
+    w_opt = self.find_optimal_classifier(dataset, bias=False)
+    return x_train, labels, w_opt
+
+  def a1a_dataset(self):
+    """a1a dataset"""
+    a1a_url = (
+        'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a1a.t'
+    )
+    data_path = PATH_PREFIX + '/data/a1a'
+    if not os.path.exists(data_path):
+      _ = urllib.request.urlretrieve(a1a_url, data_path)
+    data = sklearn.datasets.load_svmlight_file(data_path)
+    inputs_vec, labels = data[0], data[1]
+    inputs_vec = inputs_vec.toarray()
+    scaler = preprocessing.StandardScaler().fit(inputs_vec)
+    inputs_vec = scaler.transform(inputs_vec)
+    labels = labels.astype(float)
+    dataset = inputs_vec, labels
+    w_opt = self.find_optimal_classifier(dataset)
+    inputs_vec = np.hstack(
+        (np.ones(shape=(np.shape(inputs_vec)[0], 1)), inputs_vec)
+    )
+    return inputs_vec, labels, w_opt
+
+  def protein_dataset(self):
+    """protein dataset"""
+    path_protein = PATH_PREFIX + '/data/protein/'
+    if not os.path.exists(path_protein):
+      os.mkdir(path_protein)
+      protein_url = (
+          'https://kdd.org/cupfiles/KDDCupData/2004/data_kddcup04.tar.gz'
+      )
+      protein_file = PATH_PREFIX + '/data/protein/data_kddcup04.tar.gz'
+      response = requests.get(protein_url, stream=True, timeout=100)
+      if response.status_code == 200:
+        with open(protein_file, 'wb') as file_data:
+          file_data.write(response.raw.read())
+      with tarfile.open(protein_file, 'r:gz') as tar:
+        tar.extractall(path_protein)
+    x_train = np.loadtxt(PATH_PREFIX + '/data/protein/bio_train.dat')[:, 3:]
+    y_train = np.loadtxt(PATH_PREFIX + '/data/protein/bio_train.dat')[:, 2]
+    indx0 = np.nonzero(y_train == 0)[0]
+    indx1 = np.nonzero(y_train == 1)[0]
+    labels = y_train.copy()
+    labels[indx0] = -1
+    labels[indx1] = 1
+    indx = np.arange(len(x_train))
+    np.random.seed(3000)
+    indx_sample = np.random.choice(indx, 50000, replace=False)
+    np.random.seed(None)
+    x_train = x_train[indx_sample]
+    labels = labels[indx_sample]
+    x_train = normalize_fvec(x_train)
+    w_opt = self.find_optimal_classifier((x_train, labels))
+    x_train = np.hstack((np.ones(shape=(np.shape(x_train)[0], 1)), x_train))
+    return x_train, labels, w_opt
+
+  def synthetic_dataset(self, num_samples=10000, dim=100):
+    """Generates a synthetic dataset for logistic regression.
+
+    n = number of samples d = dimension Features are unit vectors (by default
+    uniformly random). Labels are sampled from logistic distribution, so w is
+    the "true" solution.
+    """
+    mean = np.zeros(dim)
+    cov = np.eye(dim)
+    inputs_vec_un = np.random.multivariate_normal(mean, cov, num_samples)
+    nrm = np.linalg.norm(inputs_vec_un, axis=1)
+    inputs_vec = inputs_vec_un * 1 / nrm[:, None]
+    w_star = np.ones(dim)
+    w_star[0] = 1
+    inner_prod = np.dot(inputs_vec, w_star)
+    params = np.exp(inner_prod) / (1 + np.exp(inner_prod))
+    labels = 2 * np.random.binomial(1, params) - 1
+    dataset = inputs_vec, labels
+    w_opt = self.find_optimal_classifier(dataset, bias=False)
+    return inputs_vec, labels, w_opt
diff --git a/research/dp_newton/src/my_logistic_regression.py b/research/dp_newton/src/my_logistic_regression.py
new file mode 100644
index 0000000..4def9dd
--- /dev/null
+++ b/research/dp_newton/src/my_logistic_regression.py
@@ -0,0 +1,205 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""logistic regression class and its methods"""
+
+# pylint: skip-file
+# pyformat: disable
+
+import numpy as np
+
+
+class MyLogisticRegression:
+  """return a logistic regression problem
+
+  There is a dataset consisting of features (vectors of norm <=1)
+  and labels (+1,-1), represented as a numpy array.
+  There is also an L2 regularizer.
+  """
+
+  def __init__(self, input_vecs, labels, reg=1e-8):
+    """Initialize the data and the regularizer.
+
+    X = n x d numpy array representing features
+    y = n x 1 numpy array representing labels
+    reg = L2 regularizing coefficient (to ensure solution is finite)
+
+    Data will be rescaled so that ||X[i,:]|| * |y[i]| <= 1 for all i.
+    """
+    self.reg = float(reg)
+    input_vecs = np.array(input_vecs)
+    labels = np.array(labels)
+    assert len(input_vecs.shape) == 2
+    assert len(labels.shape) == 1
+    self.input_vecs = input_vecs
+    self.labels = labels
+    self.num_samples, self.dim = input_vecs.shape
+    assert labels.shape[0] == self.num_samples
+    signed_data = input_vecs * labels[:, np.newaxis]
+    norm = np.linalg.norm(signed_data, axis=1)
+    scale = np.maximum(norm, np.ones_like(norm))
+    self.data = (1 / scale[:, None]) * signed_data
+
+  def loss(self, param):
+    """Computes the loss represented by this object at w.
+
+    If X,y is the data and reg is the regularizer, then the loss is (1/n)sum_i^n
+    log(1+exp(-<w,X[i,:]*y[i]>)) + (reg/2)||w||^2
+    """
+    data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))
+    reg_loss = 0.5 * self.reg * np.linalg.norm(param) ** 2
+    return data_loss + reg_loss
+
+  def loss_wor(self, param):
+    """Computes the loss represented by this object at w without regularizer.
+
+    If X,y is the data and reg is the regularizer, then the loss is
+    (1/n)sum_i^n log(1+exp(-<w,X[i,:]*y[i]>))
+    """
+    data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))
+    return data_loss
+
+  def accuracy(self, param):
+    """ " computes the accuracy of the model gievn by w"""
+    score_pred = np.dot(self.input_vecs, param)
+    label1_prob = np.where(
+        score_pred >= 0,
+        1 / (1 + np.exp(-score_pred)),
+        np.exp(score_pred) / (1 + np.exp(score_pred)),
+    )
+    return np.mean(np.where(label1_prob >= 0.5, 1, -1) == self.labels)
+
+  def grad(self, param, batch_idx=None):
+    """Computes the gradient of the logistic regression at a given point w.
+
+    If X,y is the data and reg is the regularizer, then the gradient is
+    (-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
+    """
+    if batch_idx is not None:
+      data_batch = self.data[batch_idx]
+    else:
+      data_batch = self.data
+
+    coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))
+    data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)
+    return data_grad + self.reg * param
+
+  def grad_wor(self, param, batch_idx=None):
+    """Computes the gradient of the logistic regression at a given point w.
+
+    If X,y is the data and reg is the regularizer, then the gradient is
+    (-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
+    """
+    if batch_idx is not None:
+      data_batch = self.data[batch_idx]
+    else:
+      data_batch = self.data
+
+    coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))
+    data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)
+    return data_grad
+
+  def hess(self, param, batch_idx=None):
+    """Computes the Hessian of the logistic regression at a given point w.
+
+    The Hessian is the matrix of second derivatives.
+
+    If X,y is the data and reg is the regularizer, then the Hessian is
+    (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
+    where we assume y[i]^2==1.
+    """
+    if batch_idx is not None:
+      data_batch = self.data[batch_idx]
+      batch_size = len(batch_idx)
+    else:
+      data_batch = self.data
+      batch_size = self.num_samples
+
+    temp_var = np.dot(data_batch, param) / 2
+    coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2
+    raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)
+    return raw_hess / batch_size + self.reg * np.eye(self.dim)
+
+  def hess_wor(self, param, batch_idx=None):
+    """Computes the Hessian of the logistic regression at a given point w.
+
+    The Hessian is the matrix of second derivatives.
+
+    If X,y is the data, then the Hessian is
+    (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
+    where we assume y[i]^2==1.
+    """
+    if batch_idx is not None:
+      data_batch = self.data[batch_idx]
+      batch_size = len(batch_idx)
+    else:
+      data_batch = self.data
+      batch_size = self.num_samples
+
+    temp_var = np.dot(data_batch, param) / 2
+    coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2
+    raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)
+    return raw_hess / batch_size
+
+  def upperbound(self, param, batch_idx=None):
+    """Tightest universal quadratic upper bound on the loss function.
+
+    log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
+    Constant and linear terms are just first-order Taylor This function gives
+    the quadratic term (which replaces the Hessian)
+    https://twitter.com/shortstein/status/1557961202256318464
+    """
+
+    if batch_idx is not None:
+      data_batch = self.data[batch_idx]
+      batch_size = len(batch_idx)
+    else:
+      data_batch = self.data
+      batch_size = self.num_samples
+
+    temp_var = -np.dot(data_batch, param)  # vector of y_i<x_i,w> for i in [n]
+    # v = 0.5*np.tanh(a/2)/a, but, avoid 0/0 by special rule
+    temp_var2 = np.divide(
+        0.5 * np.tanh(temp_var / 2),
+        temp_var,
+        out=np.ones(temp_var.shape) * 0.25,
+        where=np.abs(temp_var) > 1e-9,
+    )
+    hess_non = np.dot(data_batch.T * temp_var2, data_batch)
+    return hess_non / batch_size + self.reg * np.eye(self.dim)
+
+  def upperbound_wor(self, param, batch_idx=None):
+    """Tightest universal quadratic upper bound on the loss function.
+
+    log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
+    Constant and linear terms are just first-order Taylor This function gives
+    the quadratic term (which replaces the Hessian)
+    """
+    if batch_idx is not None:
+      data_batch = self.data[batch_idx]
+      batch_size = len(batch_idx)
+    else:
+      data_batch = self.data
+      batch_size = self.num_samples
+
+    temp_var = -np.dot(data_batch, param)  # vector of y_i<x_i,w> for i in [n]
+    temp_var2 = np.divide(
+        0.5 * np.tanh(temp_var / 2),
+        temp_var,
+        out=np.ones(temp_var.shape) * 0.25,
+        where=np.abs(temp_var) > 1e-9,
+    )
+    hess_non = np.dot(data_batch.T * temp_var2, data_batch)
+    return hess_non / batch_size
diff --git a/research/dp_newton/src/opt_algs.py b/research/dp_newton/src/opt_algs.py
new file mode 100644
index 0000000..29cb74d
--- /dev/null
+++ b/research/dp_newton/src/opt_algs.py
@@ -0,0 +1,434 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""file containing all auxillary functions for running the optimization algorithms"""
+
+# pylint: skip-file
+# pyformat: disable
+
+import time
+from my_logistic_regression import MyLogisticRegression
+import numpy as np
+
+
+class CompareAlgs:
+  """Class to run multiple iterative algorithms and compare the results."""
+
+  def __init__(self, lrp, optimal_w, hyper_dict):
+    """Initialize the problem.
+
+    lr = an instance of MyLogisticRegression
+    dataset = dataset in the format of (features,label)
+    optimal_w = optimal minimizer of logistic loss on dataset without privacy
+    pb = hyperparameters
+    """
+    self.w_opt = optimal_w
+    self.lrp = lrp
+    self.iters = hyper_dict["num_iteration"]
+    self.hyper_params = hyper_dict
+    self.clock_time = []
+    self.params = []
+    self.names = []
+
+  def add_algo(self, update_rule, name):
+    """Run an iterative update method"""
+    _, dim = self.lrp.num_samples, self.lrp.dim
+    wint_un = np.random.multivariate_normal(np.zeros(dim), np.eye(dim))
+    w_int = wint_un / np.linalg.norm(wint_un)
+    cutoff_norm = (
+        100 * np.linalg.norm(self.w_opt) + 100 * np.linalg.norm(w_int) + 100
+    )
+    w_cur = w_int
+    params = [w_cur]
+    start_t = time.time()
+    wall_clock = [0]
+    for _ in range(self.iters):
+      w_cur = update_rule(w_cur, self.lrp, self.hyper_params)
+      if np.linalg.norm(w_cur) > cutoff_norm:
+        w_cur = w_int
+        print("Stop Things Exploding!")
+      params.append(w_cur)
+      wall_clock.append(time.time() - start_t)
+    self.clock_time.append(wall_clock)
+    self.params.append(params)
+    self.names.append(name)
+
+  def wall_clock_alg(self):
+    """compute the wall clock of different algorithms"""
+    clock_time_dict = {}
+    for time_alg, name in zip(self.clock_time, self.names):
+      clock_time_dict[name] = [time_alg]
+    return clock_time_dict
+
+  def loss_vals(self):
+    """output the loss per iteration for different methods"""
+    baseline = self.lrp.loss_wor(self.w_opt)
+    loss_dict = {}
+    for params, name in zip(self.params, self.names):
+      losses = [self.lrp.loss_wor(w) - baseline for w in params]
+      loss_dict[name] = [losses]
+    return loss_dict
+
+  def accuracy_vals(self):
+    """output the accuracy per iteration for different methods"""
+    acc_dict = {}
+    for params, name in zip(self.params, self.names):
+      acc_vec = [self.lrp.accuracy(w) for w in params]
+      acc_dict[name] = [acc_vec]
+    return acc_dict
+
+  def accuracy_np(self):
+    """output the accuracy of the optimal model without privacy"""
+    return self.lrp.accuracy(self.w_opt)
+
+  def gradnorm_vals(self):
+    """output the gradient norm per iteration for different methods"""
+    gradnorm_dict = {}
+    for params, name in zip(self.params, self.names):
+      grad_norms = [np.linalg.norm(self.lrp.grad_wor(w)) for w in params]
+      gradnorm_dict[name] = [grad_norms]
+    return gradnorm_dict
+
+
+def private_newton(w_cur, lrp, hyper_dict):
+  """implementation of private newton method from [ABL21]
+
+  w = current iterate
+  lr = an instance of MyLogisticRegression
+  i = the index of current iterate
+  iters = total number of iterations
+  pb =  privacy budget and other info
+  return the next iterate
+  """
+  total = hyper_dict["total"]
+  grad_frac = hyper_dict["grad_frac"]
+  iters = hyper_dict["num_iteration"]
+  hess = lrp.hess(w_cur)
+  rho_grad = grad_frac * total / iters  # divide total privacy budget up.
+  rho_hess = (1 - grad_frac) * total / iters
+  hess_noise = np.random.normal(
+      scale=(0.25 / lrp.num_samples) * np.sqrt(0.5 / rho_hess),
+      size=(lrp.dim, lrp.dim),
+  )
+  hess_noise = (hess_noise + hess_noise.T) / 2
+  hess_noisy = eigenclip(hess + hess_noise)
+  grad = lrp.grad(w_cur)
+  grad_noisy = grad + np.random.normal(
+      scale=(1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad), size=lrp.dim
+  )
+  dir_noisy = np.linalg.solve(hess_noisy, grad_noisy)
+  dir_size = np.linalg.norm(np.linalg.solve(hess, grad))
+  return w_cur - min(np.log(1 + dir_size) * (1 / dir_size), 1) * dir_noisy
+
+
+def eigenclip(sym_mat, min_eval=1e-5):
+  """operation of the eigenclip
+
+  A = symmetric matrix
+  min_eval = minimum eigenvalue for clipping
+
+  return the modified matrix
+  """
+  eig_val, eig_vec = np.linalg.eigh(sym_mat)
+  eval_mod = np.maximum(eig_val, min_eval * np.ones(eig_val.shape))
+  clipped_mat = np.dot(eig_vec * eval_mod, eig_vec.T)
+  return clipped_mat
+
+
+def gd_priv(w_cur, lrp, hyper_dict):
+  """Implementation of DP-GD.
+
+  w = current point
+  lr = logistic regression
+  i = iteration number
+  pb = auxillary information
+
+  output is the next iterate
+  """
+  iters = hyper_dict["num_iteration"]
+  inv_lr_gd = 0.25  # learning rate based on the smoothness
+  sens = 1 / (lrp.num_samples * (inv_lr_gd))  # sensitivity
+  rho = hyper_dict["total"] / iters  # divide total privacy budget up
+  noise = np.random.normal(scale=sens / np.sqrt(2 * rho), size=lrp.dim)
+  return w_cur - lrp.grad_wor(w_cur) / (inv_lr_gd) + noise
+
+
+def sgd_priv(w_cur, lrp, hyper_dict):
+  """Implementation of DP-SGD.
+
+  w = current point
+  lr = logistic regression
+  i = iteration number
+  pb = auxillary information
+
+  output is the next iterate
+  """
+  batch_size = hyper_dict["batch_size"]
+  sigma_privacy = hyper_dict["noise_multiplier"]
+  lr_sgd = 4  # learning rate based on the smoothness
+  sample_rate = batch_size / lrp.num_samples  # sampling probability
+  sample_vec = np.random.binomial(n=1, p=sample_rate, size=lrp.num_samples)
+  batch_idx = np.where(sample_vec == 1)[0]  # index of batch
+  batch_size_t = len(batch_idx)
+  noise = np.random.normal(scale=sigma_privacy, size=lrp.dim)
+  grad_minibatch = lrp.grad_wor(
+      w_cur, batch_idx
+  )  # average gradient over batch_idx
+  return w_cur - lr_sgd * (
+      batch_size_t / batch_size * grad_minibatch + noise / batch_size
+  )
+
+
+def gd_priv_optls(w_cur, lrp, hyper_dict):
+  """Implementation of DP-GD with back-tracking line search !!!
+
+  this method is not private. We only use it as a baseline.
+
+  w = current point
+  lr = logistic regression
+  i = iteration number
+  pb = auxillary information
+
+  output is the next iterate
+  """
+  iters = hyper_dict["num_iteration"]
+  rho_grad = hyper_dict["total"] / iters  # divide total privacy budget up
+  grad_scale = (1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad)
+  grad_noise = np.random.normal(scale=grad_scale, size=lrp.dim)
+  dir_srch = lrp.grad(w_cur) + grad_noise
+  stepsize_opt = backtracking_ls(lrp, dir_srch, w_cur)
+  return w_cur - stepsize_opt * dir_srch
+
+
+def backtracking_ls(lrp, dir_srch, w_start, alpha=0.4, beta=0.95):
+  """Implementation of backtracking line search
+
+  lr = logistic regression
+  dir = the "noisy" gradient direction
+  w_start = current point
+  alpha and beta tradeoff the precision and complexity of the linesearch
+
+  output is an (close to) optimal stepsize
+  """
+  step_size = 100
+  val_0 = lrp.loss(w_start)
+  inner_prod = np.dot(dir_srch, lrp.grad(w_start))
+  while (
+      lrp.loss(w_start - step_size * dir_srch)
+      >= val_0 - step_size * alpha * inner_prod
+  ):
+    step_size = beta * step_size
+    if step_size < 1e-6:
+      break
+  return step_size
+
+
+def newton(dataset, w_init, bias=True):
+  """Implementation of the newton method with linesearch without privacy constraints
+
+  dataset = dataset
+  w_init = initialization point
+
+  output is the model parameter
+  """
+  feature_vecs, labels = dataset
+  if bias is True:
+    feature_vecs = np.hstack(
+        (np.ones(shape=(np.shape(feature_vecs)[0], 1)), feature_vecs)
+    )
+  lrp = MyLogisticRegression(feature_vecs, labels, reg=1e-9)
+  w_cur = w_init
+  for _ in range(8):
+    hess = lrp.hess(w_cur)
+    dir_srch = np.linalg.solve(hess, lrp.grad_wor(w_cur))
+    step_size = backtracking_ls(lrp, dir_srch, w_cur)
+    w_cur = w_cur - step_size * dir
+  if lrp.loss_wor(w_cur) < lrp.loss_wor(w_init):
+    w_out = w_cur
+  else:
+    w_out = w_init
+  return w_out
+
+
+class DoubleNoiseMech:
+  """Our Method: Double Noise Mechanism"""
+
+  def __init__(self, lrp, type_reg="add", curvature_info="hessian"):
+    """Initializer of the double noise mechanism
+
+    lr = an instance of MyLogisticRegression
+    type_reg = minimum eigenvalue modification type, it can be either 'add' or
+    'clip'
+    curvature_info = type of the second-order information
+    """
+    self.type_reg = type_reg
+    self.curvature_info = curvature_info
+    if self.curvature_info == "hessian":
+      self.hess = lrp.hess_wor
+    elif self.curvature_info == "ub":
+      self.hess = lrp.upperbound_wor
+
+  def update_rule(self, w_cur, lrp, hyper_dict):
+    """Implementation of the double noise mechanism update rule--full batch"""
+    noisy_grad_cur = self.noisy_grad(w_cur, lrp, hyper_dict)
+    w_next = self.noisy_direction(w_cur, lrp, hyper_dict, noisy_grad_cur)
+    return w_next
+
+  def update_rule_stochastic(self, w_cur, lrp, hyper_dict):
+    """Implementation of the double noise mechanism update rule--full batch"""
+    noisy_grad_cur = self.noisy_grad(w_cur, lrp, hyper_dict, True)
+    w_next = self.noisy_direction_stochastic(
+        w_cur, lrp, hyper_dict, noisy_grad_cur
+    )
+    return w_next
+
+  def noisy_grad(self, w_cur, lrp, hyper_dict, batch=False):
+    """computing gradient"""
+    if batch is False:
+      rho_grad = (hyper_dict["grad_frac"] * hyper_dict["total"]) / hyper_dict[
+          "num_iteration"
+      ]
+      noise_grad = np.random.normal(
+          scale=(1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad), size=lrp.dim
+      )
+      noisy_grad = lrp.grad(w_cur) + noise_grad
+    else:
+      std_grad = hyper_dict["noise_multiplier_grad"]
+      pgrad = hyper_dict["batchsize_grad"] / lrp.num_samples
+      sample_vec = np.random.binomial(n=1, p=pgrad, size=lrp.num_samples)
+      batch_idx_grad = np.where(sample_vec == 1)[0]
+      grad_minibatch = lrp.grad_wor(w_cur, batch_idx_grad)
+      noise_grad = np.random.normal(scale=std_grad, size=lrp.dim)
+      noisy_grad = (
+          len(batch_idx_grad) / (lrp.num_samples * pgrad)
+      ) * grad_minibatch + (noise_grad) / (lrp.num_samples * pgrad)
+    return noisy_grad
+
+  def noisy_direction(self, w_cur, lrp, hyper_dict, noisy_grad):
+    """computing direction"""
+    total = hyper_dict["total"]
+    grad_frac = hyper_dict["grad_frac"]
+    frac_trace = hyper_dict["trace_frac"]
+    trace_coeff = hyper_dict["trace_coeff"]
+    iters = hyper_dict["num_iteration"]
+    rho_hess = (1 - grad_frac) * total / iters
+    smooth_param = 0.25
+    hess_cur = self.hess(w_cur)
+    noisy_trace = trace_coeff * max(
+        np.trace(hess_cur)
+        + np.random.normal(
+            scale=(0.25 / lrp.num_samples)
+            * np.sqrt(0.5 / (frac_trace * rho_hess))
+        ),
+        0,
+    )
+    min_eval = max(
+        (noisy_trace / ((lrp.num_samples) ** 2 * (1 - frac_trace) * rho_hess))
+        ** (1 / 3),
+        1 / (lrp.num_samples),
+    )
+    grad_norm = np.linalg.norm(noisy_grad)
+    if self.type_reg == "add":  # Sensitivity is different for add vs clip
+      sens2 = (
+          grad_norm
+          * smooth_param
+          / (lrp.num_samples * min_eval**2 + smooth_param * min_eval)
+      )
+      noise2 = np.random.normal(
+          scale=sens2 * np.sqrt(0.5 / ((1 - frac_trace) * rho_hess)),
+          size=lrp.dim,
+      )
+      return (
+          w_cur
+          - np.linalg.solve(hess_cur + min_eval * np.eye(lrp.dim), noisy_grad)
+          + noise2
+      )
+    # type_reg=clip
+    sens2 = (
+        grad_norm
+        * smooth_param
+        / (lrp.num_samples * min_eval**2 - smooth_param * min_eval)
+    )
+    noise2 = np.random.normal(
+        scale=sens2 * np.sqrt(0.5 / ((1 - frac_trace) * rho_hess)), size=lrp.dim
+    )
+    eval_hess, evec_hess = np.linalg.eigh(hess_cur)
+    eval_trunc = eval_hess[eval_hess >= min_eval]
+    num_eig = len(eval_trunc)
+    if num_eig == 0:
+      hess_modified_inv = 1 / min_eval * np.eye(lrp.dim)
+    else:
+      evec_trun = evec_hess[:, -num_eig:]
+      hess_modified_inv = np.dot(
+          evec_trun * (1 / eval_trunc - 1 / min_eval * np.ones(num_eig)),
+          evec_trun.T,
+      ) + 1 / min_eval * np.eye(lrp.dim)
+    return w_cur - (hess_modified_inv @ noisy_grad) + noise2
+
+  def noisy_direction_stochastic(self, w_cur, lrp, hyper_dict, noisy_grad):
+    """noisy direction for stochastic variant"""
+    std_hess = hyper_dict["noise_multiplier_hess"]
+    phess = hyper_dict["batchsize_hess"] / lrp.num_samples
+    min_eval = hyper_dict["min_eval"]
+    sample_vec = np.random.binomial(n=1, p=phess, size=lrp.num_samples)
+    batch_idx_hess = np.where(sample_vec == 1)[0]
+    batch_size_hess_t = len(batch_idx_hess)
+    hess_cur = (
+        (batch_size_hess_t)
+        / (lrp.num_samples * phess)
+        * self.hess(w_cur, batch_idx_hess)
+    )
+    smooth_param = 0.25  # smoothness parameter
+    grad_norm = np.linalg.norm(noisy_grad)
+    if self.type_reg == "add":  # Sensitivity is different for add vs clip
+      sens2 = (
+          grad_norm
+          * smooth_param
+          / (
+              (lrp.num_samples * phess) * min_eval**2
+              + smooth_param * min_eval
+          )
+      )
+      noise2 = np.random.normal(scale=sens2 * std_hess, size=lrp.dim)
+      return (
+          w_cur
+          - np.linalg.solve(
+              hess_cur + min_eval * np.eye(len(hess_cur)), noisy_grad
+          )
+          + noise2
+      )
+    # type_reg=clip
+    min_eval_c = max(min_eval, 1 / ((lrp.num_samples * phess)))
+    sens2 = (
+        grad_norm
+        * smooth_param
+        / (
+            (lrp.num_samples * phess) * min_eval_c**2
+            - smooth_param * min_eval_c
+        )
+    )
+    noise2 = np.random.normal(scale=sens2 * std_hess, size=lrp.dim)
+    eval_hess, evec_hess = np.linalg.eigh(hess_cur)
+    eval_trunc = eval_hess[eval_hess >= min_eval_c]
+    num_eig = len(eval_trunc)
+    if num_eig == 0:
+      hess_modified_inv = 1 / min_eval_c * np.eye(lrp.dim)
+    else:
+      evec_trun = evec_hess[:, -num_eig:]
+      hess_modified_inv = np.dot(
+          evec_trun * (1 / eval_trunc - 1 / min_eval_c * np.ones(num_eig)),
+          evec_trun.T,
+      ) + 1 / min_eval_c * np.eye(lrp.dim)
+    return w_cur - (hess_modified_inv @ noisy_grad) + noise2
diff --git a/research/dp_newton/src/print_results.py b/research/dp_newton/src/print_results.py
new file mode 100644
index 0000000..5458ad3
--- /dev/null
+++ b/research/dp_newton/src/print_results.py
@@ -0,0 +1,47 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""print the achievable error of different algorithms"""
+
+# pylint: skip-file
+# pyformat: disable
+
+import json
+import os
+import numpy as np
+
+RESULTS_PATH = './src/results/'
+excess_loss = {}
+opt_algs = [
+    'DPGD',
+    'DN-Hess-add',
+    'DN-UB-add',
+    'DN-Hess-clip',
+    'DN-UB-clip',
+    'private-newton',
+]
+for filename in os.listdir(RESULTS_PATH):
+  f = os.path.join(RESULTS_PATH, filename)
+  with open(f, encoding='utf-8') as json_file:
+    data = json.load(json_file)
+    for alg in data.keys():
+      if alg in opt_algs:
+        loss_avg = np.array(data[alg]['loss_avg'])
+        loss_std = np.array(data[alg]['loss_std'])
+        clock_time = np.array(data[alg]['clock_time_avg'])
+        print('optimization algorithm: ', alg)
+        print('excess loss: ' + str(loss_avg[-1]))
+        print('run time: ' + str(clock_time[-1]) + '(sec)')
+        print('-----')
diff --git a/research/dp_newton/src/run.py b/research/dp_newton/src/run.py
new file mode 100644
index 0000000..5d49910
--- /dev/null
+++ b/research/dp_newton/src/run.py
@@ -0,0 +1,264 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""collections of helper function to run and compare different algorithms"""
+
+# pylint: skip-file
+# pyformat: disable
+
+import argparse
+import json
+from dataset_loader import Mydatasets
+from my_logistic_regression import MyLogisticRegression
+import numpy as np
+from opt_algs import CompareAlgs, DoubleNoiseMech, gd_priv, private_newton
+from scipy.optimize import fsolve
+
+
+def zcdp_to_eps(rho, delta):
+  """ "
+
+  conversion of zcdp gurantee to (eps,delta)-DP using the formula in Lemma 3.6
+  of [BS16]
+  rho : zCDP
+  delta: delta in DP
+
+  return eps
+  """
+  return rho + np.sqrt(4 * rho * np.log(np.sqrt(np.pi * rho) / delta))
+
+
+def eps_to_zcdp(eps, delta):
+  """ "
+
+  conversion of (eps,delta) gurantee to rho-zCDP
+  eps : eps in DP
+  delta: delta in DP
+
+  return rho
+  """
+
+  def func_root(rho_zcdp):
+    return zcdp_to_eps(rho_zcdp, delta) - eps
+
+  root = fsolve(func_root, x0=0.001)[-1]
+  return root
+
+
+def helper_fun(datasetname, alg_type, params_exp):
+  """helper function for running different algorithms
+
+  args:
+      datasetname = dataset
+      alg_type = type of the optimization algorithm
+      params_exp = hyperparameters
+  """
+  feature_vecs, labels, w_opt = getattr(Mydatasets(), datasetname)()
+  privacy_dp = params_exp["total"]
+  params_exp["total"] = eps_to_zcdp(privacy_dp, (1.0 / len(labels)) ** 2)
+  log_reg = MyLogisticRegression(feature_vecs, labels)
+  alg_dict, filename_params = prepare_alg_dict(
+      alg_type, datasetname, privacy_dp, params_exp, log_reg
+  )
+  compare_algs = CompareAlgs(log_reg, w_opt, params_exp)
+  result = RunReleaseStats(compare_algs, alg_dict).summarize_stats()
+  result["num-samples"] = len(labels)
+  with open(
+      "src/results/" + filename_params, "w", encoding="utf8"
+  ) as json_file:
+    json.dump(result, json_file)
+
+
+def prepare_alg_dict(alg_type, datasetname, privacy_dp, params_exp, log_reg):
+  """prepare update rule for algorithms and filename"""
+  alg_dict = None
+  filename_params = None
+  if alg_type == "double_noise":
+    filename_params = (
+        "so_"
+        + datasetname
+        + "_"
+        + str(privacy_dp)
+        + "_"
+        + "DP"
+        + "_"
+        + str(params_exp["num_iteration"])
+        + "_"
+        + str(params_exp["grad_frac"])
+        + "_"
+        + str(params_exp["trace_frac"])
+        + "_"
+        + str(params_exp["trace_coeff"])
+        + ".txt"
+    )
+    dnm_hess_add = DoubleNoiseMech(
+        log_reg, type_reg="add", curvature_info="hessian"
+    ).update_rule
+    dnm_ub_add = DoubleNoiseMech(
+        log_reg, type_reg="add", curvature_info="ub"
+    ).update_rule
+    dnm_hess_clip = DoubleNoiseMech(
+        log_reg, type_reg="clip", curvature_info="hessian"
+    ).update_rule
+    dnm_ub_clip = DoubleNoiseMech(
+        log_reg, type_reg="clip", curvature_info="ub"
+    ).update_rule
+    alg_dict = {
+        "DN-Hess-add": dnm_hess_add,
+        "DN-Hess-clip": dnm_hess_clip,
+        "DN-UB-clip": dnm_ub_clip,
+        "DN-UB-add": dnm_ub_add,
+    }
+  elif alg_type == "dp_gd":
+    filename_params = (
+        "gd_"
+        + datasetname
+        + "_"
+        + str(privacy_dp)
+        + "_"
+        + "DP"
+        + "_"
+        + str(params_exp["num_iteration"])
+        + ".txt"
+    )
+    alg_dict = {"DPGD": gd_priv}
+  elif alg_type == "damped_newton":
+    filename_params = (
+        "newton_"
+        + datasetname
+        + "_"
+        + str(privacy_dp)
+        + "_"
+        + "DP"
+        + "_"
+        + str(params_exp["num_iteration"])
+        + ".txt"
+    )
+    alg_dict = {"private-newton": private_newton}
+  return alg_dict, filename_params
+
+
+class RunReleaseStats:
+  """Helpfer function to run different algorithms and store the results"""
+
+  def __init__(self, compare_algs, algs_dict, num_rep=10):
+    self.compare_algs = compare_algs
+    self.algs_dict = algs_dict
+    self.num_rep = num_rep
+    self.losses = 0
+    self.gradnorm = 0
+    self.accuracy = 0
+    self.wall_clock = 0
+
+  def run_algs(self):
+    """method to run different algorithms and store different stats"""
+    for rep in range(self.num_rep):
+      for alg_name, alg_update_rule in self.algs_dict.items():
+        self.compare_algs.add_algo(alg_update_rule, alg_name)
+      losses_dict = self.compare_algs.loss_vals()
+      gradnorm_dict = self.compare_algs.gradnorm_vals()
+      accuracy_dict = self.compare_algs.accuracy_vals()
+      wall_clock_dict = self.compare_algs.wall_clock_alg()
+      if rep == 0:
+        self.losses = losses_dict
+        self.gradnorm = gradnorm_dict
+        self.accuracy = accuracy_dict
+        self.wall_clock = wall_clock_dict
+      else:
+        for alg in self.losses:
+          self.losses[alg].extend(losses_dict[alg])
+          self.gradnorm[alg].extend(gradnorm_dict[alg])
+          self.accuracy[alg].extend(accuracy_dict[alg])
+          self.wall_clock[alg].extend(wall_clock_dict[alg])
+
+  def summarize_stats(self):
+    """method to summarize the results"""
+    self.run_algs()
+    result = {}
+    result["acc-best"] = self.compare_algs.accuracy_np().tolist()
+    for alg in self.losses:
+      result[alg] = {}
+      loss_avg = np.mean(np.array(self.losses[alg]), axis=0)
+      loss_std = np.std(np.array(self.losses[alg]), axis=0)
+      result[alg]["loss_avg"] = (loss_avg).tolist()
+      result[alg]["loss_std"] = (loss_std / np.sqrt(self.num_rep)).tolist()
+      gradnorm_avg = np.mean(np.array(self.gradnorm[alg]), axis=0)
+      gradnorm_std = np.std(np.array(self.gradnorm[alg]), axis=0)
+      result[alg]["gradnorm_avg"] = (gradnorm_avg).tolist()
+      result[alg]["gradnorm_std"] = (gradnorm_std).tolist()
+      acc_avg = np.mean(np.array(self.accuracy[alg]), axis=0)
+      acc_std = np.std(np.array(self.accuracy[alg]), axis=0)
+      result[alg]["acc_avg"] = (acc_avg).tolist()
+      result[alg]["acc_std"] = (acc_std / np.sqrt(self.num_rep)).tolist()
+      clocktime_avg = np.mean(np.array(self.wall_clock[alg]), axis=0)
+      clocktime_std = np.std(np.array(self.wall_clock[alg]), axis=0)
+      result[alg]["clock_time_avg"] = (clocktime_avg).tolist()
+      result[alg]["clock_time_std"] = (
+          clocktime_std / np.sqrt(self.num_rep)
+      ).tolist()
+
+    return result
+
+
+def main():
+  """main function"""
+  parser = argparse.ArgumentParser()
+  parser.add_argument("--datasetname")
+  parser.add_argument("--alg_type")
+  parser.add_argument("--total")
+  parser.add_argument("--numiter")
+  # double noise and newton
+  parser.add_argument("--grad_frac")
+  parser.add_argument("--trace_frac")
+  parser.add_argument("--trace_coeff")
+  args = parser.parse_args()
+  datasetname = args.datasetname
+  alg_type = args.alg_type
+  total = float(args.total)
+  num_iter = int(args.numiter)
+  if alg_type == "double_noise":
+    grad_frac = float(args.grad_frac)
+    trace_frac = float(args.trace_frac)
+    trace_coeff = float(args.trace_coeff)
+    hyper_parameters = {
+        "total": total,
+        "grad_frac": grad_frac,
+        "trace_frac": trace_frac,
+        "trace_coeff": trace_coeff,
+        "num_iteration": num_iter,
+    }
+  elif alg_type == "dp_gd":
+    hyper_parameters = {"total": total, "num_iteration": num_iter}
+  elif alg_type == "damped_newton":
+    grad_frac = float(args.grad_frac)
+    hyper_parameters = {
+        "total": total,
+        "num_iteration": num_iter,
+        "grad_frac": grad_frac,
+    }
+  else:
+    raise ValueError("no such optmization algorithm exists")
+  print(
+      "optimization algorithm "
+      + alg_type
+      + ","
+      + "dataset name: "
+      + datasetname
+  )
+  helper_fun(datasetname, alg_type, hyper_parameters)
+
+
+if __name__ == "__main__":
+  main()