COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/privacy/pull/489 from mhaghifam:dp-second-order-optimization 024904810a8f130d554cc3f04713d5562ccfe5df

PiperOrigin-RevId: 547312570
2023-07-11 15:19:53 -07:00 · 2023-07-11 15:19:53 -07:00 · cb6659d11b
commit cb6659d11b
parent 6b8007ddde
8 changed files with 1246 additions and 1238 deletions
--- a/research/dp_newton/README.md
+++ b/research/dp_newton/README.md
@ -0,0 +1,32 @@
 # Project Title
 Faster Differentially Private Convex Optimization via Second-Order Methods
 https://arxiv.org/abs/2112.03570 <br>
 by Arun Ganesh, Mahdi Haghifam, Thomas Steinke, Abhradeep Thakurta.
 ## Description
 Implementation of the optimizatoin algorithms proposed in
 https://arxiv.org/abs/2112.03570 <br>
 ## Getting Started
 You will need to install fairly standard dependencies
 run 'run_privacy_utility' to compare the convergence speed and excess loss of
 different algorithms.
 ### Citation
 You can cite this paper with
 ```
@article{ganesh2023faster,
  title={Faster Differentially Private Convex Optimization
    via Second-Order Methods},
  author={Ganesh, Arun and Haghifam, Mahdi and Steinke, Thomas
    and Thakurta, Abhradeep},
  journal={arXiv preprint arXiv:2305.13209},
  year={2023}
 }
 ```
--- a/research/dp_newton/dpso-logistic.py
+++ b/research/dp_newton/dpso-logistic.py
--- a/research/dp_newton/run_privacy_utility
+++ b/research/dp_newton/run_privacy_utility
@ -0,0 +1,28 @@
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
 rm -rf ./src/results
 mkdir -p ./src/results
 rm -rf ./src/datasets_directory
 mkdir -p ./src/datasets_directory
 dataset="protein_dataset" # 'a1a_dataset', 'synthetic_dataset', 'fmnist_dataset'
 privacy_budget="3.0" # epsilon in DP
 num_iteration_GD="100" # number of iterations for DP-GD
 num_iteration_NT="15" # number of iterations for damped newton
 num_iteration_our="15" # number of iterations for double noise (proposed method)
 $HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'dp_gd' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_GD
 $HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'damped_newton' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_NT --grad_frac $"0.7"
 $HOME/google-code/dpoptVenv/bin/python3 ./src/run.py --alg_type $'double_noise' --datasetname $dataset --total $privacy_budget --numiter $num_iteration_our --grad_frac $"0.7" --trace_frac $"0.1" --trace_coeff $"0.5"
 $HOME/google-code/dpoptVenv/bin/python3 ./src/print_results.py
--- a/research/dp_newton/src/dataset_loader.py
+++ b/research/dp_newton/src/dataset_loader.py
@ -0,0 +1,236 @@
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
 """dataset loader"""
 # pylint: skip-file
 # pyformat: disable
 import os
 import ssl
 import tarfile
 import urllib.request
 from my_logistic_regression import MyLogisticRegression
 import numpy as np
 import requests
 from sklearn import preprocessing
 import sklearn.datasets
 from sklearn.linear_model import LogisticRegression
 import torch
 from torchvision import datasets, transforms
 PATH_PREFIX = './src/datasets_directory'
 ssl._create_default_https_context = ssl._create_unverified_context
 def normalize_fvec(x_train):
  """normalize feature vectors"""
  feature_mean = np.mean(x_train, axis=0)
  feature_std = np.std(x_train, axis=0)
  x_train = (x_train - feature_mean) / feature_std
  return x_train
 def backtracking_ls(lrp, dir_srch, w_start, alpha=0.4, beta=0.95):
  """Implementation of backtracking line search
  lr = logistic regression
  dir = the "noisy" gradient direction
  w_start = current point
  alpha and beta tradeoff the precision and complexity of the linesearch
  output is an (close to) optimal stepsize
  """
  step_size = 100
  val_0 = lrp.loss(w_start)
  inner_prod = np.dot(dir_srch, lrp.grad(w_start))
  while (
      lrp.loss(w_start - step_size * dir_srch)
      >= val_0 - step_size * alpha * inner_prod
  ):
    step_size = beta * step_size
    if step_size < 1e-6:
      break
  return step_size
 def newton(dataset, w_init, bias=True):
  """Implementation of the newton method with linesearch without privacy constraints
  dataset = dataset
  w_init = initialization point
  output is the model parameter
  """
  feature_vecs, labels = dataset
  if bias is True:
    feature_vecs = np.hstack(
        (np.ones(shape=(np.shape(feature_vecs)[0], 1)), feature_vecs)
    )
  lrp = MyLogisticRegression(feature_vecs, labels, reg=1e-9)
  w_cur = w_init
  for _ in range(8):
    hess = lrp.hess(w_cur)
    dir_srch = np.linalg.solve(hess, lrp.grad_wor(w_cur))
    step_size = backtracking_ls(lrp, dir_srch, w_cur)
    w_cur = w_cur - step_size * dir_srch
  if lrp.loss_wor(w_cur) < lrp.loss_wor(w_init):
    w_out = w_cur
  else:
    w_out = w_init
  return w_out
 class Mydatasets:
  """Represents datasets we use for expriments"""
  def __init__(self):
    data_dir = PATH_PREFIX + '/data'
    cache_dir = PATH_PREFIX + '/cache_datasets'
    if not os.path.exists(data_dir):
      os.mkdir(data_dir)
    if not os.path.exists(cache_dir):
      os.mkdir(cache_dir)
  def find_optimal_classifier(self, dataset, bias=True):
    """find the optimal weight vector for the logistic regression
        for the problems with real datasets.
    dataset = training dataset
    bias = bias for the logistic model
    """
    inputs_vec, labels = dataset
    reg = 1e-9
    if bias is True:
      model_lr = LogisticRegression(max_iter=200, C=1 / reg).fit(
          inputs_vec, labels
      )
      w_opt1 = np.concatenate([model_lr.intercept_, np.squeeze(model_lr.coef_)])
      w_opt = newton(dataset, w_opt1, bias)
    else:
      model_lr = LogisticRegression(
          max_iter=200, fit_intercept=False, C=1 / reg
      ).fit(inputs_vec, labels)
      w_opt1 = np.squeeze(model_lr.coef_)
      w_opt = newton(dataset, w_opt1, bias)
    return w_opt
  def fmnist_dataset(self):
    """fmnist dataset"""
    transform_data = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.5), (0.5))]
    )
    train_data_trans = datasets.FashionMNIST(
        root=PATH_PREFIX + '/data',
        download=True,
        train=True,
        transform=transform_data,
    )
    train_loader = torch.utils.data.DataLoader(
        train_data_trans, batch_size=len(train_data_trans)
    )
    x_train = next(iter(train_loader))[0].numpy()
    x_train = x_train.reshape(len(x_train), -1)
    y_train = next(iter(train_loader))[1].numpy()
    label0 = 0
    label1 = 3
    indx0 = np.nonzero(y_train == label0)[0]
    indx1 = np.nonzero(y_train == label1)[0]
    labels = y_train.copy()
    labels[indx0] = -1
    labels[indx1] = 1
    indx = np.concatenate((indx0, indx1))
    x_train = x_train[indx]
    labels = labels[indx]
    dataset = x_train, labels
    w_opt = self.find_optimal_classifier(dataset, bias=False)
    return x_train, labels, w_opt
  def a1a_dataset(self):
    """a1a dataset"""
    a1a_url = (
        'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a1a.t'
    )
    data_path = PATH_PREFIX + '/data/a1a'
    if not os.path.exists(data_path):
      _ = urllib.request.urlretrieve(a1a_url, data_path)
    data = sklearn.datasets.load_svmlight_file(data_path)
    inputs_vec, labels = data[0], data[1]
    inputs_vec = inputs_vec.toarray()
    scaler = preprocessing.StandardScaler().fit(inputs_vec)
    inputs_vec = scaler.transform(inputs_vec)
    labels = labels.astype(float)
    dataset = inputs_vec, labels
    w_opt = self.find_optimal_classifier(dataset)
    inputs_vec = np.hstack(
        (np.ones(shape=(np.shape(inputs_vec)[0], 1)), inputs_vec)
    )
    return inputs_vec, labels, w_opt
  def protein_dataset(self):
    """protein dataset"""
    path_protein = PATH_PREFIX + '/data/protein/'
    if not os.path.exists(path_protein):
      os.mkdir(path_protein)
      protein_url = (
          'https://kdd.org/cupfiles/KDDCupData/2004/data_kddcup04.tar.gz'
      )
      protein_file = PATH_PREFIX + '/data/protein/data_kddcup04.tar.gz'
      response = requests.get(protein_url, stream=True, timeout=100)
      if response.status_code == 200:
        with open(protein_file, 'wb') as file_data:
          file_data.write(response.raw.read())
      with tarfile.open(protein_file, 'r:gz') as tar:
        tar.extractall(path_protein)
    x_train = np.loadtxt(PATH_PREFIX + '/data/protein/bio_train.dat')[:, 3:]
    y_train = np.loadtxt(PATH_PREFIX + '/data/protein/bio_train.dat')[:, 2]
    indx0 = np.nonzero(y_train == 0)[0]
    indx1 = np.nonzero(y_train == 1)[0]
    labels = y_train.copy()
    labels[indx0] = -1
    labels[indx1] = 1
    indx = np.arange(len(x_train))
    np.random.seed(3000)
    indx_sample = np.random.choice(indx, 50000, replace=False)
    np.random.seed(None)
    x_train = x_train[indx_sample]
    labels = labels[indx_sample]
    x_train = normalize_fvec(x_train)
    w_opt = self.find_optimal_classifier((x_train, labels))
    x_train = np.hstack((np.ones(shape=(np.shape(x_train)[0], 1)), x_train))
    return x_train, labels, w_opt
  def synthetic_dataset(self, num_samples=10000, dim=100):
    """Generates a synthetic dataset for logistic regression.
    n = number of samples d = dimension Features are unit vectors (by default
    uniformly random). Labels are sampled from logistic distribution, so w is
    the "true" solution.
    """
    mean = np.zeros(dim)
    cov = np.eye(dim)
    inputs_vec_un = np.random.multivariate_normal(mean, cov, num_samples)
    nrm = np.linalg.norm(inputs_vec_un, axis=1)
    inputs_vec = inputs_vec_un * 1 / nrm[:, None]
    w_star = np.ones(dim)
    w_star[0] = 1
    inner_prod = np.dot(inputs_vec, w_star)
    params = np.exp(inner_prod) / (1 + np.exp(inner_prod))
    labels = 2 * np.random.binomial(1, params) - 1
    dataset = inputs_vec, labels
    w_opt = self.find_optimal_classifier(dataset, bias=False)
    return inputs_vec, labels, w_opt
--- a/research/dp_newton/src/my_logistic_regression.py
+++ b/research/dp_newton/src/my_logistic_regression.py
@ -0,0 +1,205 @@
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
 """logistic regression class and its methods"""
 # pylint: skip-file
 # pyformat: disable
 import numpy as np
 class MyLogisticRegression:
  """return a logistic regression problem
  There is a dataset consisting of features (vectors of norm <=1)
  and labels (+1,-1), represented as a numpy array.
  There is also an L2 regularizer.
  """
  def __init__(self, input_vecs, labels, reg=1e-8):
    """Initialize the data and the regularizer.
    X = n x d numpy array representing features
    y = n x 1 numpy array representing labels
    reg = L2 regularizing coefficient (to ensure solution is finite)
    Data will be rescaled so that ||X[i,:]|| * |y[i]| <= 1 for all i.
    """
    self.reg = float(reg)
    input_vecs = np.array(input_vecs)
    labels = np.array(labels)
    assert len(input_vecs.shape) == 2
    assert len(labels.shape) == 1
    self.input_vecs = input_vecs
    self.labels = labels
    self.num_samples, self.dim = input_vecs.shape
    assert labels.shape[0] == self.num_samples
    signed_data = input_vecs * labels[:, np.newaxis]
    norm = np.linalg.norm(signed_data, axis=1)
    scale = np.maximum(norm, np.ones_like(norm))
    self.data = (1 / scale[:, None]) * signed_data
  def loss(self, param):
    """Computes the loss represented by this object at w.
    If X,y is the data and reg is the regularizer, then the loss is (1/n)sum_i^n
    log(1+exp(-<w,X[i,:]*y[i]>)) + (reg/2)||w||^2
    """
    data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))
    reg_loss = 0.5 * self.reg * np.linalg.norm(param) ** 2
    return data_loss + reg_loss
  def loss_wor(self, param):
    """Computes the loss represented by this object at w without regularizer.
    If X,y is the data and reg is the regularizer, then the loss is
    (1/n)sum_i^n log(1+exp(-<w,X[i,:]*y[i]>))
    """
    data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))
    return data_loss
  def accuracy(self, param):
    """ " computes the accuracy of the model gievn by w"""
    score_pred = np.dot(self.input_vecs, param)
    label1_prob = np.where(
        score_pred >= 0,
        1 / (1 + np.exp(-score_pred)),
        np.exp(score_pred) / (1 + np.exp(score_pred)),
    )
    return np.mean(np.where(label1_prob >= 0.5, 1, -1) == self.labels)
  def grad(self, param, batch_idx=None):
    """Computes the gradient of the logistic regression at a given point w.
    If X,y is the data and reg is the regularizer, then the gradient is
    (-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
    """
    if batch_idx is not None:
      data_batch = self.data[batch_idx]
    else:
      data_batch = self.data
    coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))
    data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)
    return data_grad + self.reg * param
  def grad_wor(self, param, batch_idx=None):
    """Computes the gradient of the logistic regression at a given point w.
    If X,y is the data and reg is the regularizer, then the gradient is
    (-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
    """
    if batch_idx is not None:
      data_batch = self.data[batch_idx]
    else:
      data_batch = self.data
    coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))
    data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)
    return data_grad
  def hess(self, param, batch_idx=None):
    """Computes the Hessian of the logistic regression at a given point w.
    The Hessian is the matrix of second derivatives.
    If X,y is the data and reg is the regularizer, then the Hessian is
    (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
    where we assume y[i]^2==1.
    """
    if batch_idx is not None:
      data_batch = self.data[batch_idx]
      batch_size = len(batch_idx)
    else:
      data_batch = self.data
      batch_size = self.num_samples
    temp_var = np.dot(data_batch, param) / 2
    coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2
    raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)
    return raw_hess / batch_size + self.reg * np.eye(self.dim)
  def hess_wor(self, param, batch_idx=None):
    """Computes the Hessian of the logistic regression at a given point w.
    The Hessian is the matrix of second derivatives.
    If X,y is the data, then the Hessian is
    (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
    where we assume y[i]^2==1.
    """
    if batch_idx is not None:
      data_batch = self.data[batch_idx]
      batch_size = len(batch_idx)
    else:
      data_batch = self.data
      batch_size = self.num_samples
    temp_var = np.dot(data_batch, param) / 2
    coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2
    raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)
    return raw_hess / batch_size
  def upperbound(self, param, batch_idx=None):
    """Tightest universal quadratic upper bound on the loss function.
    log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
    Constant and linear terms are just first-order Taylor This function gives
    the quadratic term (which replaces the Hessian)
    https://twitter.com/shortstein/status/1557961202256318464
    """
    if batch_idx is not None:
      data_batch = self.data[batch_idx]
      batch_size = len(batch_idx)
    else:
      data_batch = self.data
      batch_size = self.num_samples
    temp_var = -np.dot(data_batch, param)  # vector of y_i<x_i,w> for i in [n]
    # v = 0.5*np.tanh(a/2)/a, but, avoid 0/0 by special rule
    temp_var2 = np.divide(
        0.5 * np.tanh(temp_var / 2),
        temp_var,
        out=np.ones(temp_var.shape) * 0.25,
        where=np.abs(temp_var) > 1e-9,
    )
    hess_non = np.dot(data_batch.T * temp_var2, data_batch)
    return hess_non / batch_size + self.reg * np.eye(self.dim)
  def upperbound_wor(self, param, batch_idx=None):
    """Tightest universal quadratic upper bound on the loss function.
    log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
    Constant and linear terms are just first-order Taylor This function gives
    the quadratic term (which replaces the Hessian)
    """
    if batch_idx is not None:
      data_batch = self.data[batch_idx]
      batch_size = len(batch_idx)
    else:
      data_batch = self.data
      batch_size = self.num_samples
    temp_var = -np.dot(data_batch, param)  # vector of y_i<x_i,w> for i in [n]
    temp_var2 = np.divide(
        0.5 * np.tanh(temp_var / 2),
        temp_var,
        out=np.ones(temp_var.shape) * 0.25,
        where=np.abs(temp_var) > 1e-9,
    )
    hess_non = np.dot(data_batch.T * temp_var2, data_batch)
    return hess_non / batch_size
--- a/research/dp_newton/src/opt_algs.py
+++ b/research/dp_newton/src/opt_algs.py
@ -0,0 +1,434 @@
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
 """file containing all auxillary functions for running the optimization algorithms"""
 # pylint: skip-file
 # pyformat: disable
 import time
 from my_logistic_regression import MyLogisticRegression
 import numpy as np
 class CompareAlgs:
  """Class to run multiple iterative algorithms and compare the results."""
  def __init__(self, lrp, optimal_w, hyper_dict):
    """Initialize the problem.
    lr = an instance of MyLogisticRegression
    dataset = dataset in the format of (features,label)
    optimal_w = optimal minimizer of logistic loss on dataset without privacy
    pb = hyperparameters
    """
    self.w_opt = optimal_w
    self.lrp = lrp
    self.iters = hyper_dict["num_iteration"]
    self.hyper_params = hyper_dict
    self.clock_time = []
    self.params = []
    self.names = []
  def add_algo(self, update_rule, name):
    """Run an iterative update method"""
    _, dim = self.lrp.num_samples, self.lrp.dim
    wint_un = np.random.multivariate_normal(np.zeros(dim), np.eye(dim))
    w_int = wint_un / np.linalg.norm(wint_un)
    cutoff_norm = (
        100 * np.linalg.norm(self.w_opt) + 100 * np.linalg.norm(w_int) + 100
    )
    w_cur = w_int
    params = [w_cur]
    start_t = time.time()
    wall_clock = [0]
    for _ in range(self.iters):
      w_cur = update_rule(w_cur, self.lrp, self.hyper_params)
      if np.linalg.norm(w_cur) > cutoff_norm:
        w_cur = w_int
        print("Stop Things Exploding!")
      params.append(w_cur)
      wall_clock.append(time.time() - start_t)
    self.clock_time.append(wall_clock)
    self.params.append(params)
    self.names.append(name)
  def wall_clock_alg(self):
    """compute the wall clock of different algorithms"""
    clock_time_dict = {}
    for time_alg, name in zip(self.clock_time, self.names):
      clock_time_dict[name] = [time_alg]
    return clock_time_dict
  def loss_vals(self):
    """output the loss per iteration for different methods"""
    baseline = self.lrp.loss_wor(self.w_opt)
    loss_dict = {}
    for params, name in zip(self.params, self.names):
      losses = [self.lrp.loss_wor(w) - baseline for w in params]
      loss_dict[name] = [losses]
    return loss_dict
  def accuracy_vals(self):
    """output the accuracy per iteration for different methods"""
    acc_dict = {}
    for params, name in zip(self.params, self.names):
      acc_vec = [self.lrp.accuracy(w) for w in params]
      acc_dict[name] = [acc_vec]
    return acc_dict
  def accuracy_np(self):
    """output the accuracy of the optimal model without privacy"""
    return self.lrp.accuracy(self.w_opt)
  def gradnorm_vals(self):
    """output the gradient norm per iteration for different methods"""
    gradnorm_dict = {}
    for params, name in zip(self.params, self.names):
      grad_norms = [np.linalg.norm(self.lrp.grad_wor(w)) for w in params]
      gradnorm_dict[name] = [grad_norms]
    return gradnorm_dict
 def private_newton(w_cur, lrp, hyper_dict):
  """implementation of private newton method from [ABL21]
  w = current iterate
  lr = an instance of MyLogisticRegression
  i = the index of current iterate
  iters = total number of iterations
  pb =  privacy budget and other info
  return the next iterate
  """
  total = hyper_dict["total"]
  grad_frac = hyper_dict["grad_frac"]
  iters = hyper_dict["num_iteration"]
  hess = lrp.hess(w_cur)
  rho_grad = grad_frac * total / iters  # divide total privacy budget up.
  rho_hess = (1 - grad_frac) * total / iters
  hess_noise = np.random.normal(
      scale=(0.25 / lrp.num_samples) * np.sqrt(0.5 / rho_hess),
      size=(lrp.dim, lrp.dim),
  )
  hess_noise = (hess_noise + hess_noise.T) / 2
  hess_noisy = eigenclip(hess + hess_noise)
  grad = lrp.grad(w_cur)
  grad_noisy = grad + np.random.normal(
      scale=(1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad), size=lrp.dim
  )
  dir_noisy = np.linalg.solve(hess_noisy, grad_noisy)
  dir_size = np.linalg.norm(np.linalg.solve(hess, grad))
  return w_cur - min(np.log(1 + dir_size) * (1 / dir_size), 1) * dir_noisy
 def eigenclip(sym_mat, min_eval=1e-5):
  """operation of the eigenclip
  A = symmetric matrix
  min_eval = minimum eigenvalue for clipping
  return the modified matrix
  """
  eig_val, eig_vec = np.linalg.eigh(sym_mat)
  eval_mod = np.maximum(eig_val, min_eval * np.ones(eig_val.shape))
  clipped_mat = np.dot(eig_vec * eval_mod, eig_vec.T)
  return clipped_mat
 def gd_priv(w_cur, lrp, hyper_dict):
  """Implementation of DP-GD.
  w = current point
  lr = logistic regression
  i = iteration number
  pb = auxillary information
  output is the next iterate
  """
  iters = hyper_dict["num_iteration"]
  inv_lr_gd = 0.25  # learning rate based on the smoothness
  sens = 1 / (lrp.num_samples * (inv_lr_gd))  # sensitivity
  rho = hyper_dict["total"] / iters  # divide total privacy budget up
  noise = np.random.normal(scale=sens / np.sqrt(2 * rho), size=lrp.dim)
  return w_cur - lrp.grad_wor(w_cur) / (inv_lr_gd) + noise
 def sgd_priv(w_cur, lrp, hyper_dict):
  """Implementation of DP-SGD.
  w = current point
  lr = logistic regression
  i = iteration number
  pb = auxillary information
  output is the next iterate
  """
  batch_size = hyper_dict["batch_size"]
  sigma_privacy = hyper_dict["noise_multiplier"]
  lr_sgd = 4  # learning rate based on the smoothness
  sample_rate = batch_size / lrp.num_samples  # sampling probability
  sample_vec = np.random.binomial(n=1, p=sample_rate, size=lrp.num_samples)
  batch_idx = np.where(sample_vec == 1)[0]  # index of batch
  batch_size_t = len(batch_idx)
  noise = np.random.normal(scale=sigma_privacy, size=lrp.dim)
  grad_minibatch = lrp.grad_wor(
      w_cur, batch_idx
  )  # average gradient over batch_idx
  return w_cur - lr_sgd * (
      batch_size_t / batch_size * grad_minibatch + noise / batch_size
  )
 def gd_priv_optls(w_cur, lrp, hyper_dict):
  """Implementation of DP-GD with back-tracking line search !!!
  this method is not private. We only use it as a baseline.
  w = current point
  lr = logistic regression
  i = iteration number
  pb = auxillary information
  output is the next iterate
  """
  iters = hyper_dict["num_iteration"]
  rho_grad = hyper_dict["total"] / iters  # divide total privacy budget up
  grad_scale = (1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad)
  grad_noise = np.random.normal(scale=grad_scale, size=lrp.dim)
  dir_srch = lrp.grad(w_cur) + grad_noise
  stepsize_opt = backtracking_ls(lrp, dir_srch, w_cur)
  return w_cur - stepsize_opt * dir_srch
 def backtracking_ls(lrp, dir_srch, w_start, alpha=0.4, beta=0.95):
  """Implementation of backtracking line search
  lr = logistic regression
  dir = the "noisy" gradient direction
  w_start = current point
  alpha and beta tradeoff the precision and complexity of the linesearch
  output is an (close to) optimal stepsize
  """
  step_size = 100
  val_0 = lrp.loss(w_start)
  inner_prod = np.dot(dir_srch, lrp.grad(w_start))
  while (
      lrp.loss(w_start - step_size * dir_srch)
      >= val_0 - step_size * alpha * inner_prod
  ):
    step_size = beta * step_size
    if step_size < 1e-6:
      break
  return step_size
 def newton(dataset, w_init, bias=True):
  """Implementation of the newton method with linesearch without privacy constraints
  dataset = dataset
  w_init = initialization point
  output is the model parameter
  """
  feature_vecs, labels = dataset
  if bias is True:
    feature_vecs = np.hstack(
        (np.ones(shape=(np.shape(feature_vecs)[0], 1)), feature_vecs)
    )
  lrp = MyLogisticRegression(feature_vecs, labels, reg=1e-9)
  w_cur = w_init
  for _ in range(8):
    hess = lrp.hess(w_cur)
    dir_srch = np.linalg.solve(hess, lrp.grad_wor(w_cur))
    step_size = backtracking_ls(lrp, dir_srch, w_cur)
    w_cur = w_cur - step_size * dir
  if lrp.loss_wor(w_cur) < lrp.loss_wor(w_init):
    w_out = w_cur
  else:
    w_out = w_init
  return w_out
 class DoubleNoiseMech:
  """Our Method: Double Noise Mechanism"""
  def __init__(self, lrp, type_reg="add", curvature_info="hessian"):
    """Initializer of the double noise mechanism
    lr = an instance of MyLogisticRegression
    type_reg = minimum eigenvalue modification type, it can be either 'add' or
    'clip'
    curvature_info = type of the second-order information
    """
    self.type_reg = type_reg
    self.curvature_info = curvature_info
    if self.curvature_info == "hessian":
      self.hess = lrp.hess_wor
    elif self.curvature_info == "ub":
      self.hess = lrp.upperbound_wor
  def update_rule(self, w_cur, lrp, hyper_dict):
    """Implementation of the double noise mechanism update rule--full batch"""
    noisy_grad_cur = self.noisy_grad(w_cur, lrp, hyper_dict)
    w_next = self.noisy_direction(w_cur, lrp, hyper_dict, noisy_grad_cur)
    return w_next
  def update_rule_stochastic(self, w_cur, lrp, hyper_dict):
    """Implementation of the double noise mechanism update rule--full batch"""
    noisy_grad_cur = self.noisy_grad(w_cur, lrp, hyper_dict, True)
    w_next = self.noisy_direction_stochastic(
        w_cur, lrp, hyper_dict, noisy_grad_cur
    )
    return w_next
  def noisy_grad(self, w_cur, lrp, hyper_dict, batch=False):
    """computing gradient"""
    if batch is False:
      rho_grad = (hyper_dict["grad_frac"] * hyper_dict["total"]) / hyper_dict[
          "num_iteration"
      ]
      noise_grad = np.random.normal(
          scale=(1 / lrp.num_samples) * np.sqrt(0.5 / rho_grad), size=lrp.dim
      )
      noisy_grad = lrp.grad(w_cur) + noise_grad
    else:
      std_grad = hyper_dict["noise_multiplier_grad"]
      pgrad = hyper_dict["batchsize_grad"] / lrp.num_samples
      sample_vec = np.random.binomial(n=1, p=pgrad, size=lrp.num_samples)
      batch_idx_grad = np.where(sample_vec == 1)[0]
      grad_minibatch = lrp.grad_wor(w_cur, batch_idx_grad)
      noise_grad = np.random.normal(scale=std_grad, size=lrp.dim)
      noisy_grad = (
          len(batch_idx_grad) / (lrp.num_samples * pgrad)
      ) * grad_minibatch + (noise_grad) / (lrp.num_samples * pgrad)
    return noisy_grad
  def noisy_direction(self, w_cur, lrp, hyper_dict, noisy_grad):
    """computing direction"""
    total = hyper_dict["total"]
    grad_frac = hyper_dict["grad_frac"]
    frac_trace = hyper_dict["trace_frac"]
    trace_coeff = hyper_dict["trace_coeff"]
    iters = hyper_dict["num_iteration"]
    rho_hess = (1 - grad_frac) * total / iters
    smooth_param = 0.25
    hess_cur = self.hess(w_cur)
    noisy_trace = trace_coeff * max(
        np.trace(hess_cur)
        + np.random.normal(
            scale=(0.25 / lrp.num_samples)
            * np.sqrt(0.5 / (frac_trace * rho_hess))
        ),
        0,
    )
    min_eval = max(
        (noisy_trace / ((lrp.num_samples) ** 2 * (1 - frac_trace) * rho_hess))
        ** (1 / 3),
        1 / (lrp.num_samples),
    )
    grad_norm = np.linalg.norm(noisy_grad)
    if self.type_reg == "add":  # Sensitivity is different for add vs clip
      sens2 = (
          grad_norm
          * smooth_param
          / (lrp.num_samples * min_eval**2 + smooth_param * min_eval)
      )
      noise2 = np.random.normal(
          scale=sens2 * np.sqrt(0.5 / ((1 - frac_trace) * rho_hess)),
          size=lrp.dim,
      )
      return (
          w_cur
          - np.linalg.solve(hess_cur + min_eval * np.eye(lrp.dim), noisy_grad)
          + noise2
      )
    # type_reg=clip
    sens2 = (
        grad_norm
        * smooth_param
        / (lrp.num_samples * min_eval**2 - smooth_param * min_eval)
    )
    noise2 = np.random.normal(
        scale=sens2 * np.sqrt(0.5 / ((1 - frac_trace) * rho_hess)), size=lrp.dim
    )
    eval_hess, evec_hess = np.linalg.eigh(hess_cur)
    eval_trunc = eval_hess[eval_hess >= min_eval]
    num_eig = len(eval_trunc)
    if num_eig == 0:
      hess_modified_inv = 1 / min_eval * np.eye(lrp.dim)
    else:
      evec_trun = evec_hess[:, -num_eig:]
      hess_modified_inv = np.dot(
          evec_trun * (1 / eval_trunc - 1 / min_eval * np.ones(num_eig)),
          evec_trun.T,
      ) + 1 / min_eval * np.eye(lrp.dim)
    return w_cur - (hess_modified_inv @ noisy_grad) + noise2
  def noisy_direction_stochastic(self, w_cur, lrp, hyper_dict, noisy_grad):
    """noisy direction for stochastic variant"""
    std_hess = hyper_dict["noise_multiplier_hess"]
    phess = hyper_dict["batchsize_hess"] / lrp.num_samples
    min_eval = hyper_dict["min_eval"]
    sample_vec = np.random.binomial(n=1, p=phess, size=lrp.num_samples)
    batch_idx_hess = np.where(sample_vec == 1)[0]
    batch_size_hess_t = len(batch_idx_hess)
    hess_cur = (
        (batch_size_hess_t)
        / (lrp.num_samples * phess)
        * self.hess(w_cur, batch_idx_hess)
    )
    smooth_param = 0.25  # smoothness parameter
    grad_norm = np.linalg.norm(noisy_grad)
    if self.type_reg == "add":  # Sensitivity is different for add vs clip
      sens2 = (
          grad_norm
          * smooth_param
          / (
              (lrp.num_samples * phess) * min_eval**2
              + smooth_param * min_eval
          )
      )
      noise2 = np.random.normal(scale=sens2 * std_hess, size=lrp.dim)
      return (
          w_cur
          - np.linalg.solve(
              hess_cur + min_eval * np.eye(len(hess_cur)), noisy_grad
          )
          + noise2
      )
    # type_reg=clip
    min_eval_c = max(min_eval, 1 / ((lrp.num_samples * phess)))
    sens2 = (
        grad_norm
        * smooth_param
        / (
            (lrp.num_samples * phess) * min_eval_c**2
            - smooth_param * min_eval_c
        )
    )
    noise2 = np.random.normal(scale=sens2 * std_hess, size=lrp.dim)
    eval_hess, evec_hess = np.linalg.eigh(hess_cur)
    eval_trunc = eval_hess[eval_hess >= min_eval_c]
    num_eig = len(eval_trunc)
    if num_eig == 0:
      hess_modified_inv = 1 / min_eval_c * np.eye(lrp.dim)
    else:
      evec_trun = evec_hess[:, -num_eig:]
      hess_modified_inv = np.dot(
          evec_trun * (1 / eval_trunc - 1 / min_eval_c * np.ones(num_eig)),
          evec_trun.T,
      ) + 1 / min_eval_c * np.eye(lrp.dim)
    return w_cur - (hess_modified_inv @ noisy_grad) + noise2
--- a/research/dp_newton/src/print_results.py
+++ b/research/dp_newton/src/print_results.py
@ -0,0 +1,47 @@
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
 """print the achievable error of different algorithms"""
 # pylint: skip-file
 # pyformat: disable
 import json
 import os
 import numpy as np
 RESULTS_PATH = './src/results/'
 excess_loss = {}
 opt_algs = [
    'DPGD',
    'DN-Hess-add',
    'DN-UB-add',
    'DN-Hess-clip',
    'DN-UB-clip',
    'private-newton',
 ]
 for filename in os.listdir(RESULTS_PATH):
  f = os.path.join(RESULTS_PATH, filename)
  with open(f, encoding='utf-8') as json_file:
    data = json.load(json_file)
    for alg in data.keys():
      if alg in opt_algs:
        loss_avg = np.array(data[alg]['loss_avg'])
        loss_std = np.array(data[alg]['loss_std'])
        clock_time = np.array(data[alg]['clock_time_avg'])
        print('optimization algorithm: ', alg)
        print('excess loss: ' + str(loss_avg[-1]))
        print('run time: ' + str(clock_time[-1]) + '(sec)')
        print('-----')
--- a/research/dp_newton/src/run.py
+++ b/research/dp_newton/src/run.py
@ -0,0 +1,264 @@
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
 """collections of helper function to run and compare different algorithms"""
 # pylint: skip-file
 # pyformat: disable
 import argparse
 import json
 from dataset_loader import Mydatasets
 from my_logistic_regression import MyLogisticRegression
 import numpy as np
 from opt_algs import CompareAlgs, DoubleNoiseMech, gd_priv, private_newton
 from scipy.optimize import fsolve
 def zcdp_to_eps(rho, delta):
  """ "
  conversion of zcdp gurantee to (eps,delta)-DP using the formula in Lemma 3.6
  of [BS16]
  rho : zCDP
  delta: delta in DP
  return eps
  """
  return rho + np.sqrt(4 * rho * np.log(np.sqrt(np.pi * rho) / delta))
 def eps_to_zcdp(eps, delta):
  """ "
  conversion of (eps,delta) gurantee to rho-zCDP
  eps : eps in DP
  delta: delta in DP
  return rho
  """
  def func_root(rho_zcdp):
    return zcdp_to_eps(rho_zcdp, delta) - eps
  root = fsolve(func_root, x0=0.001)[-1]
  return root
 def helper_fun(datasetname, alg_type, params_exp):
  """helper function for running different algorithms
  args:
      datasetname = dataset
      alg_type = type of the optimization algorithm
      params_exp = hyperparameters
  """
  feature_vecs, labels, w_opt = getattr(Mydatasets(), datasetname)()
  privacy_dp = params_exp["total"]
  params_exp["total"] = eps_to_zcdp(privacy_dp, (1.0 / len(labels)) ** 2)
  log_reg = MyLogisticRegression(feature_vecs, labels)
  alg_dict, filename_params = prepare_alg_dict(
      alg_type, datasetname, privacy_dp, params_exp, log_reg
  )
  compare_algs = CompareAlgs(log_reg, w_opt, params_exp)
  result = RunReleaseStats(compare_algs, alg_dict).summarize_stats()
  result["num-samples"] = len(labels)
  with open(
      "src/results/" + filename_params, "w", encoding="utf8"
  ) as json_file:
    json.dump(result, json_file)
 def prepare_alg_dict(alg_type, datasetname, privacy_dp, params_exp, log_reg):
  """prepare update rule for algorithms and filename"""
  alg_dict = None
  filename_params = None
  if alg_type == "double_noise":
    filename_params = (
        "so_"
        + datasetname
        + "_"
        + str(privacy_dp)
        + "_"
        + "DP"
        + "_"
        + str(params_exp["num_iteration"])
        + "_"
        + str(params_exp["grad_frac"])
        + "_"
        + str(params_exp["trace_frac"])
        + "_"
        + str(params_exp["trace_coeff"])
        + ".txt"
    )
    dnm_hess_add = DoubleNoiseMech(
        log_reg, type_reg="add", curvature_info="hessian"
    ).update_rule
    dnm_ub_add = DoubleNoiseMech(
        log_reg, type_reg="add", curvature_info="ub"
    ).update_rule
    dnm_hess_clip = DoubleNoiseMech(
        log_reg, type_reg="clip", curvature_info="hessian"
    ).update_rule
    dnm_ub_clip = DoubleNoiseMech(
        log_reg, type_reg="clip", curvature_info="ub"
    ).update_rule
    alg_dict = {
        "DN-Hess-add": dnm_hess_add,
        "DN-Hess-clip": dnm_hess_clip,
        "DN-UB-clip": dnm_ub_clip,
        "DN-UB-add": dnm_ub_add,
    }
  elif alg_type == "dp_gd":
    filename_params = (
        "gd_"
        + datasetname
        + "_"
        + str(privacy_dp)
        + "_"
        + "DP"
        + "_"
        + str(params_exp["num_iteration"])
        + ".txt"
    )
    alg_dict = {"DPGD": gd_priv}
  elif alg_type == "damped_newton":
    filename_params = (
        "newton_"
        + datasetname
        + "_"
        + str(privacy_dp)
        + "_"
        + "DP"
        + "_"
        + str(params_exp["num_iteration"])
        + ".txt"
    )
    alg_dict = {"private-newton": private_newton}
  return alg_dict, filename_params
 class RunReleaseStats:
  """Helpfer function to run different algorithms and store the results"""
  def __init__(self, compare_algs, algs_dict, num_rep=10):
    self.compare_algs = compare_algs
    self.algs_dict = algs_dict
    self.num_rep = num_rep
    self.losses = 0
    self.gradnorm = 0
    self.accuracy = 0
    self.wall_clock = 0
  def run_algs(self):
    """method to run different algorithms and store different stats"""
    for rep in range(self.num_rep):
      for alg_name, alg_update_rule in self.algs_dict.items():
        self.compare_algs.add_algo(alg_update_rule, alg_name)
      losses_dict = self.compare_algs.loss_vals()
      gradnorm_dict = self.compare_algs.gradnorm_vals()
      accuracy_dict = self.compare_algs.accuracy_vals()
      wall_clock_dict = self.compare_algs.wall_clock_alg()
      if rep == 0:
        self.losses = losses_dict
        self.gradnorm = gradnorm_dict
        self.accuracy = accuracy_dict
        self.wall_clock = wall_clock_dict
      else:
        for alg in self.losses:
          self.losses[alg].extend(losses_dict[alg])
          self.gradnorm[alg].extend(gradnorm_dict[alg])
          self.accuracy[alg].extend(accuracy_dict[alg])
          self.wall_clock[alg].extend(wall_clock_dict[alg])
  def summarize_stats(self):
    """method to summarize the results"""
    self.run_algs()
    result = {}
    result["acc-best"] = self.compare_algs.accuracy_np().tolist()
    for alg in self.losses:
      result[alg] = {}
      loss_avg = np.mean(np.array(self.losses[alg]), axis=0)
      loss_std = np.std(np.array(self.losses[alg]), axis=0)
      result[alg]["loss_avg"] = (loss_avg).tolist()
      result[alg]["loss_std"] = (loss_std / np.sqrt(self.num_rep)).tolist()
      gradnorm_avg = np.mean(np.array(self.gradnorm[alg]), axis=0)
      gradnorm_std = np.std(np.array(self.gradnorm[alg]), axis=0)
      result[alg]["gradnorm_avg"] = (gradnorm_avg).tolist()
      result[alg]["gradnorm_std"] = (gradnorm_std).tolist()
      acc_avg = np.mean(np.array(self.accuracy[alg]), axis=0)
      acc_std = np.std(np.array(self.accuracy[alg]), axis=0)
      result[alg]["acc_avg"] = (acc_avg).tolist()
      result[alg]["acc_std"] = (acc_std / np.sqrt(self.num_rep)).tolist()
      clocktime_avg = np.mean(np.array(self.wall_clock[alg]), axis=0)
      clocktime_std = np.std(np.array(self.wall_clock[alg]), axis=0)
      result[alg]["clock_time_avg"] = (clocktime_avg).tolist()
      result[alg]["clock_time_std"] = (
          clocktime_std / np.sqrt(self.num_rep)
      ).tolist()
    return result
 def main():
  """main function"""
  parser = argparse.ArgumentParser()
  parser.add_argument("--datasetname")
  parser.add_argument("--alg_type")
  parser.add_argument("--total")
  parser.add_argument("--numiter")
  # double noise and newton
  parser.add_argument("--grad_frac")
  parser.add_argument("--trace_frac")
  parser.add_argument("--trace_coeff")
  args = parser.parse_args()
  datasetname = args.datasetname
  alg_type = args.alg_type
  total = float(args.total)
  num_iter = int(args.numiter)
  if alg_type == "double_noise":
    grad_frac = float(args.grad_frac)
    trace_frac = float(args.trace_frac)
    trace_coeff = float(args.trace_coeff)
    hyper_parameters = {
        "total": total,
        "grad_frac": grad_frac,
        "trace_frac": trace_frac,
        "trace_coeff": trace_coeff,
        "num_iteration": num_iter,
    }
  elif alg_type == "dp_gd":
    hyper_parameters = {"total": total, "num_iteration": num_iter}
  elif alg_type == "damped_newton":
    grad_frac = float(args.grad_frac)
    hyper_parameters = {
        "total": total,
        "num_iteration": num_iter,
        "grad_frac": grad_frac,
    }
  else:
    raise ValueError("no such optmization algorithm exists")
  print(
      "optimization algorithm "
      + alg_type
      + ","
      + "dataset name: "
      + datasetname
  )
  helper_fun(datasetname, alg_type, hyper_parameters)
 if __name__ == "__main__":
  main()