forked from 626_privacy/tensorflow_privacy
206 lines
7.2 KiB
Python
206 lines
7.2 KiB
Python
|
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
# =============================================================================
|
||
|
|
||
|
"""logistic regression class and its methods"""
|
||
|
|
||
|
# pylint: skip-file
|
||
|
# pyformat: disable
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
|
||
|
class MyLogisticRegression:
|
||
|
"""return a logistic regression problem
|
||
|
|
||
|
There is a dataset consisting of features (vectors of norm <=1)
|
||
|
and labels (+1,-1), represented as a numpy array.
|
||
|
There is also an L2 regularizer.
|
||
|
"""
|
||
|
|
||
|
def __init__(self, input_vecs, labels, reg=1e-8):
|
||
|
"""Initialize the data and the regularizer.
|
||
|
|
||
|
X = n x d numpy array representing features
|
||
|
y = n x 1 numpy array representing labels
|
||
|
reg = L2 regularizing coefficient (to ensure solution is finite)
|
||
|
|
||
|
Data will be rescaled so that ||X[i,:]|| * |y[i]| <= 1 for all i.
|
||
|
"""
|
||
|
self.reg = float(reg)
|
||
|
input_vecs = np.array(input_vecs)
|
||
|
labels = np.array(labels)
|
||
|
assert len(input_vecs.shape) == 2
|
||
|
assert len(labels.shape) == 1
|
||
|
self.input_vecs = input_vecs
|
||
|
self.labels = labels
|
||
|
self.num_samples, self.dim = input_vecs.shape
|
||
|
assert labels.shape[0] == self.num_samples
|
||
|
signed_data = input_vecs * labels[:, np.newaxis]
|
||
|
norm = np.linalg.norm(signed_data, axis=1)
|
||
|
scale = np.maximum(norm, np.ones_like(norm))
|
||
|
self.data = (1 / scale[:, None]) * signed_data
|
||
|
|
||
|
def loss(self, param):
|
||
|
"""Computes the loss represented by this object at w.
|
||
|
|
||
|
If X,y is the data and reg is the regularizer, then the loss is (1/n)sum_i^n
|
||
|
log(1+exp(-<w,X[i,:]*y[i]>)) + (reg/2)||w||^2
|
||
|
"""
|
||
|
data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))
|
||
|
reg_loss = 0.5 * self.reg * np.linalg.norm(param) ** 2
|
||
|
return data_loss + reg_loss
|
||
|
|
||
|
def loss_wor(self, param):
|
||
|
"""Computes the loss represented by this object at w without regularizer.
|
||
|
|
||
|
If X,y is the data and reg is the regularizer, then the loss is
|
||
|
(1/n)sum_i^n log(1+exp(-<w,X[i,:]*y[i]>))
|
||
|
"""
|
||
|
data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param))))
|
||
|
return data_loss
|
||
|
|
||
|
def accuracy(self, param):
|
||
|
""" " computes the accuracy of the model gievn by w"""
|
||
|
score_pred = np.dot(self.input_vecs, param)
|
||
|
label1_prob = np.where(
|
||
|
score_pred >= 0,
|
||
|
1 / (1 + np.exp(-score_pred)),
|
||
|
np.exp(score_pred) / (1 + np.exp(score_pred)),
|
||
|
)
|
||
|
return np.mean(np.where(label1_prob >= 0.5, 1, -1) == self.labels)
|
||
|
|
||
|
def grad(self, param, batch_idx=None):
|
||
|
"""Computes the gradient of the logistic regression at a given point w.
|
||
|
|
||
|
If X,y is the data and reg is the regularizer, then the gradient is
|
||
|
(-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
|
||
|
"""
|
||
|
if batch_idx is not None:
|
||
|
data_batch = self.data[batch_idx]
|
||
|
else:
|
||
|
data_batch = self.data
|
||
|
|
||
|
coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))
|
||
|
data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)
|
||
|
return data_grad + self.reg * param
|
||
|
|
||
|
def grad_wor(self, param, batch_idx=None):
|
||
|
"""Computes the gradient of the logistic regression at a given point w.
|
||
|
|
||
|
If X,y is the data and reg is the regularizer, then the gradient is
|
||
|
(-1/n)sum_i^n X[i,:]*y[i]/(1+exp(<w,X[i,:]*y[i]>)) + reg*w
|
||
|
"""
|
||
|
if batch_idx is not None:
|
||
|
data_batch = self.data[batch_idx]
|
||
|
else:
|
||
|
data_batch = self.data
|
||
|
|
||
|
coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param)))
|
||
|
data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0)
|
||
|
return data_grad
|
||
|
|
||
|
def hess(self, param, batch_idx=None):
|
||
|
"""Computes the Hessian of the logistic regression at a given point w.
|
||
|
|
||
|
The Hessian is the matrix of second derivatives.
|
||
|
|
||
|
If X,y is the data and reg is the regularizer, then the Hessian is
|
||
|
(1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
|
||
|
where we assume y[i]^2==1.
|
||
|
"""
|
||
|
if batch_idx is not None:
|
||
|
data_batch = self.data[batch_idx]
|
||
|
batch_size = len(batch_idx)
|
||
|
else:
|
||
|
data_batch = self.data
|
||
|
batch_size = self.num_samples
|
||
|
|
||
|
temp_var = np.dot(data_batch, param) / 2
|
||
|
coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2
|
||
|
raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)
|
||
|
return raw_hess / batch_size + self.reg * np.eye(self.dim)
|
||
|
|
||
|
def hess_wor(self, param, batch_idx=None):
|
||
|
"""Computes the Hessian of the logistic regression at a given point w.
|
||
|
|
||
|
The Hessian is the matrix of second derivatives.
|
||
|
|
||
|
If X,y is the data, then the Hessian is
|
||
|
(1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(<w,W[i,:]*y[i]>/2)*2)^2
|
||
|
where we assume y[i]^2==1.
|
||
|
"""
|
||
|
if batch_idx is not None:
|
||
|
data_batch = self.data[batch_idx]
|
||
|
batch_size = len(batch_idx)
|
||
|
else:
|
||
|
data_batch = self.data
|
||
|
batch_size = self.num_samples
|
||
|
|
||
|
temp_var = np.dot(data_batch, param) / 2
|
||
|
coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2
|
||
|
raw_hess = np.dot(data_batch.T * coeff_hess, data_batch)
|
||
|
return raw_hess / batch_size
|
||
|
|
||
|
def upperbound(self, param, batch_idx=None):
|
||
|
"""Tightest universal quadratic upper bound on the loss function.
|
||
|
|
||
|
log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
|
||
|
Constant and linear terms are just first-order Taylor This function gives
|
||
|
the quadratic term (which replaces the Hessian)
|
||
|
https://twitter.com/shortstein/status/1557961202256318464
|
||
|
"""
|
||
|
|
||
|
if batch_idx is not None:
|
||
|
data_batch = self.data[batch_idx]
|
||
|
batch_size = len(batch_idx)
|
||
|
else:
|
||
|
data_batch = self.data
|
||
|
batch_size = self.num_samples
|
||
|
|
||
|
temp_var = -np.dot(data_batch, param) # vector of y_i<x_i,w> for i in [n]
|
||
|
# v = 0.5*np.tanh(a/2)/a, but, avoid 0/0 by special rule
|
||
|
temp_var2 = np.divide(
|
||
|
0.5 * np.tanh(temp_var / 2),
|
||
|
temp_var,
|
||
|
out=np.ones(temp_var.shape) * 0.25,
|
||
|
where=np.abs(temp_var) > 1e-9,
|
||
|
)
|
||
|
hess_non = np.dot(data_batch.T * temp_var2, data_batch)
|
||
|
return hess_non / batch_size + self.reg * np.eye(self.dim)
|
||
|
|
||
|
def upperbound_wor(self, param, batch_idx=None):
|
||
|
"""Tightest universal quadratic upper bound on the loss function.
|
||
|
|
||
|
log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a)
|
||
|
Constant and linear terms are just first-order Taylor This function gives
|
||
|
the quadratic term (which replaces the Hessian)
|
||
|
"""
|
||
|
if batch_idx is not None:
|
||
|
data_batch = self.data[batch_idx]
|
||
|
batch_size = len(batch_idx)
|
||
|
else:
|
||
|
data_batch = self.data
|
||
|
batch_size = self.num_samples
|
||
|
|
||
|
temp_var = -np.dot(data_batch, param) # vector of y_i<x_i,w> for i in [n]
|
||
|
temp_var2 = np.divide(
|
||
|
0.5 * np.tanh(temp_var / 2),
|
||
|
temp_var,
|
||
|
out=np.ones(temp_var.shape) * 0.25,
|
||
|
where=np.abs(temp_var) > 1e-9,
|
||
|
)
|
||
|
hess_non = np.dot(data_batch.T * temp_var2, data_batch)
|
||
|
return hess_non / batch_size
|