tensorflow_privacy/research/dp_newton/src/my_logistic_regression.py

# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================

"""logistic regression class and its methods"""

# pylint: skip-file
# pyformat: disable

import numpy as np


class MyLogisticRegression:
  """return a logistic regression problem

  There is a dataset consisting of features (vectors of norm <=1)
  and labels (+1,-1), represented as a numpy array.
  There is also an L2 regularizer.
  """

  def __init__(self, input_vecs, labels, reg=1e-8):
    """Initialize the data and the regularizer.

    X = n x d numpy array representing features
    y = n x 1 numpy array representing labels
    reg = L2 regularizing coefficient (to ensure solution is finite)

    Data will be rescaled so that ||X[i,:]|| * |y[i]| <= 1 for all i.
    """
    self.reg = float(reg)
    input_vecs = np.array(input_vecs)
    labels = np.array(labels)
    assert len(input_vecs.shape) == 2
    assert len(labels.shape) == 1
    self.input_vecs = input_vecs
    self.labels = labels
    self.num_samples, self.dim = input_vecs.shape
    assert labels.shape[0] == self.num_samples
    signed_data = input_vecs * labels[:, np.newaxis]
    norm = np.linalg.norm(signed_data, axis=1)
    scale = np.maximum(norm, np.ones_like(norm))
    self.data = (1 / scale[:, None]) * signed_data

  def loss(self, param):
    """Computes the loss represented by this object at w.

    If X,y is the data and reg is the regularizer, then the loss is (1/n)sum_i^n
    log(1+exp(-<w,X[i,:]*y[i]>)) + (reg/2)||w||^2
    """
    data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))
    reg_loss = 0.5 * self.reg * np.linalg.norm(param) ** 2
    return data_loss + reg_loss

  def loss_wor(self, param):
    """Computes the loss represented by this object at w without regularizer.

    If X,y is the data and reg is the regularizer, then the loss is
    (1/n)sum_i^n log(1+exp(-<w,X[i,:]*y[i]>))
    """
    data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))
    return data_loss

  def accuracy(self, param):
    """ " computes the accuracy of the model gievn by w"""
    score_pred = np.dot(self.input_vecs, param)
    label1_prob = np.where(
        score_pred >= 0,
        1 / (1 + np.exp(-score_pred)),
        np.exp(score_pred) / (1 + np.exp(score_pred)),
    )
    return np.mean(np.where(label1_prob >= 0.5, 1, -1) == self.labels)

  def grad(self, param, batch_idx=None):
    """Computes the gradient of the logistic regression at a given point w.

    If X,y is the data and reg is the regularizer, then the gradient is
    (-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
    """
    if batch_idx is not None:
      data_batch = self.data[batch_idx]
    else:
      data_batch = self.data

    coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))
    data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)
    return data_grad + self.reg * param

  def grad_wor(self, param, batch_idx=None):
    """Computes the gradient of the logistic regression at a given point w.

    If X,y is the data and reg is the regularizer, then the gradient is
    (-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
    """
    if batch_idx is not None:
      data_batch = self.data[batch_idx]
    else:
      data_batch = self.data

    coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))
    data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)
    return data_grad

  def hess(self, param, batch_idx=None):
    """Computes the Hessian of the logistic regression at a given point w.

    The Hessian is the matrix of second derivatives.

    If X,y is the data and reg is the regularizer, then the Hessian is
    (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
    where we assume y[i]^2==1.
    """
    if batch_idx is not None:
      data_batch = self.data[batch_idx]
      batch_size = len(batch_idx)
    else:
      data_batch = self.data
      batch_size = self.num_samples

    temp_var = np.dot(data_batch, param) / 2
    coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2
    raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)
    return raw_hess / batch_size + self.reg * np.eye(self.dim)

  def hess_wor(self, param, batch_idx=None):
    """Computes the Hessian of the logistic regression at a given point w.

    The Hessian is the matrix of second derivatives.

    If X,y is the data, then the Hessian is
    (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
    where we assume y[i]^2==1.
    """
    if batch_idx is not None:
      data_batch = self.data[batch_idx]
      batch_size = len(batch_idx)
    else:
      data_batch = self.data
      batch_size = self.num_samples

    temp_var = np.dot(data_batch, param) / 2
    coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2
    raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)
    return raw_hess / batch_size

  def upperbound(self, param, batch_idx=None):
    """Tightest universal quadratic upper bound on the loss function.

    log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
    Constant and linear terms are just first-order Taylor This function gives
    the quadratic term (which replaces the Hessian)
    https://twitter.com/shortstein/status/1557961202256318464
    """

    if batch_idx is not None:
      data_batch = self.data[batch_idx]
      batch_size = len(batch_idx)
    else:
      data_batch = self.data
      batch_size = self.num_samples

    temp_var = -np.dot(data_batch, param)  # vector of y_i<x_i,w> for i in [n]
    # v = 0.5*np.tanh(a/2)/a, but, avoid 0/0 by special rule
    temp_var2 = np.divide(
        0.5 * np.tanh(temp_var / 2),
        temp_var,
        out=np.ones(temp_var.shape) * 0.25,
        where=np.abs(temp_var) > 1e-9,
    )
    hess_non = np.dot(data_batch.T * temp_var2, data_batch)
    return hess_non / batch_size + self.reg * np.eye(self.dim)

  def upperbound_wor(self, param, batch_idx=None):
    """Tightest universal quadratic upper bound on the loss function.

    log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
    Constant and linear terms are just first-order Taylor This function gives
    the quadratic term (which replaces the Hessian)
    """
    if batch_idx is not None:
      data_batch = self.data[batch_idx]
      batch_size = len(batch_idx)
    else:
      data_batch = self.data
      batch_size = self.num_samples

    temp_var = -np.dot(data_batch, param)  # vector of y_i<x_i,w> for i in [n]
    temp_var2 = np.divide(
        0.5 * np.tanh(temp_var / 2),
        temp_var,
        out=np.ones(temp_var.shape) * 0.25,
        where=np.abs(temp_var) > 1e-9,
    )
    hess_non = np.dot(data_batch.T * temp_var2, data_batch)
    return hess_non / batch_size
COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/privacy/pull/489 from mhaghifam:dp-second-order-optimization 024904810a8f130d554cc3f04713d5562ccfe5df PiperOrigin-RevId: 547312570 2023-07-11 16:19:53 -06:00			`# Copyright 2020 The TensorFlow Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`# =============================================================================`

			`"""logistic regression class and its methods"""`

			`# pylint: skip-file`
			`# pyformat: disable`

			`import numpy as np`


			`class MyLogisticRegression:`
			`"""return a logistic regression problem`

			`There is a dataset consisting of features (vectors of norm <=1)`
			`and labels (+1,-1), represented as a numpy array.`
			`There is also an L2 regularizer.`
			`"""`

			`def __init__(self, input_vecs, labels, reg=1e-8):`
			`"""Initialize the data and the regularizer.`

			`X = n x d numpy array representing features`
			`y = n x 1 numpy array representing labels`
			`reg = L2 regularizing coefficient (to ensure solution is finite)`

			`Data will be rescaled so that \|\|X[i,:]\|\| * \|y[i]\| <= 1 for all i.`
			`"""`
			`self.reg = float(reg)`
			`input_vecs = np.array(input_vecs)`
			`labels = np.array(labels)`
			`assert len(input_vecs.shape) == 2`
			`assert len(labels.shape) == 1`
			`self.input_vecs = input_vecs`
			`self.labels = labels`
			`self.num_samples, self.dim = input_vecs.shape`
			`assert labels.shape[0] == self.num_samples`
			`signed_data = input_vecs * labels[:, np.newaxis]`
			`norm = np.linalg.norm(signed_data, axis=1)`
			`scale = np.maximum(norm, np.ones_like(norm))`
			`self.data = (1 / scale[:, None]) * signed_data`

			`def loss(self, param):`
			`"""Computes the loss represented by this object at w.`

			`If X,y is the data and reg is the regularizer, then the loss is (1/n)sum_i^n`
			`log(1+exp(-<w,X[i,:]*y[i]>)) + (reg/2)\|\|w\|\|^2`
			`"""`
			`data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))`
			`reg_loss = 0.5 * self.reg * np.linalg.norm(param) ** 2`
			`return data_loss + reg_loss`

			`def loss_wor(self, param):`
			`"""Computes the loss represented by this object at w without regularizer.`

			`If X,y is the data and reg is the regularizer, then the loss is`
			`(1/n)sum_i^n log(1+exp(-<w,X[i,:]*y[i]>))`
			`"""`
			`data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))`
			`return data_loss`

			`def accuracy(self, param):`
			`""" " computes the accuracy of the model gievn by w"""`
			`score_pred = np.dot(self.input_vecs, param)`
			`label1_prob = np.where(`
			`score_pred >= 0,`
			`1 / (1 + np.exp(-score_pred)),`
			`np.exp(score_pred) / (1 + np.exp(score_pred)),`
			`)`
			`return np.mean(np.where(label1_prob >= 0.5, 1, -1) == self.labels)`

			`def grad(self, param, batch_idx=None):`
			`"""Computes the gradient of the logistic regression at a given point w.`

			`If X,y is the data and reg is the regularizer, then the gradient is`
			`(-1/n)sum_i^n X[i,:]y[i]/(1+exp(<w,X[i,:]y[i]>)) + reg*w`
			`"""`
			`if batch_idx is not None:`
			`data_batch = self.data[batch_idx]`
			`else:`
			`data_batch = self.data`

			`coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))`
			`data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)`
			`return data_grad + self.reg * param`

			`def grad_wor(self, param, batch_idx=None):`
			`"""Computes the gradient of the logistic regression at a given point w.`

			`If X,y is the data and reg is the regularizer, then the gradient is`
			`(-1/n)sum_i^n X[i,:]y[i]/(1+exp(<w,X[i,:]y[i]>)) + reg*w`
			`"""`
			`if batch_idx is not None:`
			`data_batch = self.data[batch_idx]`
			`else:`
			`data_batch = self.data`

			`coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))`
			`data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)`
			`return data_grad`

			`def hess(self, param, batch_idx=None):`
			`"""Computes the Hessian of the logistic regression at a given point w.`

			`The Hessian is the matrix of second derivatives.`

			`If X,y is the data and reg is the regularizer, then the Hessian is`
			`(1/n)sum_i^n X[i,:]X[i,:]^T / (cosh(<w,W[i,:]y[i]>/2)*2)^2`
			`where we assume y[i]^2==1.`
			`"""`
			`if batch_idx is not None:`
			`data_batch = self.data[batch_idx]`
			`batch_size = len(batch_idx)`
			`else:`
			`data_batch = self.data`
			`batch_size = self.num_samples`

			`temp_var = np.dot(data_batch, param) / 2`
			`coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2`
			`raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)`
			`return raw_hess / batch_size + self.reg * np.eye(self.dim)`

			`def hess_wor(self, param, batch_idx=None):`
			`"""Computes the Hessian of the logistic regression at a given point w.`

			`The Hessian is the matrix of second derivatives.`

			`If X,y is the data, then the Hessian is`
			`(1/n)sum_i^n X[i,:]X[i,:]^T / (cosh(<w,W[i,:]y[i]>/2)*2)^2`
			`where we assume y[i]^2==1.`
			`"""`
			`if batch_idx is not None:`
			`data_batch = self.data[batch_idx]`
			`batch_size = len(batch_idx)`
			`else:`
			`data_batch = self.data`
			`batch_size = self.num_samples`

			`temp_var = np.dot(data_batch, param) / 2`
			`coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2`
			`raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)`
			`return raw_hess / batch_size`

			`def upperbound(self, param, batch_idx=None):`
			`"""Tightest universal quadratic upper bound on the loss function.`

			`log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2tanh(a/2)/(4a)`
			`Constant and linear terms are just first-order Taylor This function gives`
			`the quadratic term (which replaces the Hessian)`
			`https://twitter.com/shortstein/status/1557961202256318464`
			`"""`

			`if batch_idx is not None:`
			`data_batch = self.data[batch_idx]`
			`batch_size = len(batch_idx)`
			`else:`
			`data_batch = self.data`
			`batch_size = self.num_samples`

			`temp_var = -np.dot(data_batch, param) # vector of y_i<x_i,w> for i in [n]`
			`# v = 0.5*np.tanh(a/2)/a, but, avoid 0/0 by special rule`
			`temp_var2 = np.divide(`
			`0.5 * np.tanh(temp_var / 2),`
			`temp_var,`
			`out=np.ones(temp_var.shape) * 0.25,`
			`where=np.abs(temp_var) > 1e-9,`
			`)`
			`hess_non = np.dot(data_batch.T * temp_var2, data_batch)`
			`return hess_non / batch_size + self.reg * np.eye(self.dim)`

			`def upperbound_wor(self, param, batch_idx=None):`
			`"""Tightest universal quadratic upper bound on the loss function.`

			`log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2tanh(a/2)/(4a)`
			`Constant and linear terms are just first-order Taylor This function gives`
			`the quadratic term (which replaces the Hessian)`
			`"""`
			`if batch_idx is not None:`
			`data_batch = self.data[batch_idx]`
			`batch_size = len(batch_idx)`
			`else:`
			`data_batch = self.data`
			`batch_size = self.num_samples`

			`temp_var = -np.dot(data_batch, param) # vector of y_i<x_i,w> for i in [n]`
			`temp_var2 = np.divide(`
			`0.5 * np.tanh(temp_var / 2),`
			`temp_var,`
			`out=np.ones(temp_var.shape) * 0.25,`
			`where=np.abs(temp_var) > 1e-9,`
			`)`
			`hess_non = np.dot(data_batch.T * temp_var2, data_batch)`
			`return hess_non / batch_size`