# Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= """logistic regression class and its methods""" # pylint: skip-file # pyformat: disable import numpy as np class MyLogisticRegression: """return a logistic regression problem There is a dataset consisting of features (vectors of norm <=1) and labels (+1,-1), represented as a numpy array. There is also an L2 regularizer. """ def __init__(self, input_vecs, labels, reg=1e-8): """Initialize the data and the regularizer. X = n x d numpy array representing features y = n x 1 numpy array representing labels reg = L2 regularizing coefficient (to ensure solution is finite) Data will be rescaled so that ||X[i,:]|| * |y[i]| <= 1 for all i. """ self.reg = float(reg) input_vecs = np.array(input_vecs) labels = np.array(labels) assert len(input_vecs.shape) == 2 assert len(labels.shape) == 1 self.input_vecs = input_vecs self.labels = labels self.num_samples, self.dim = input_vecs.shape assert labels.shape[0] == self.num_samples signed_data = input_vecs * labels[:, np.newaxis] norm = np.linalg.norm(signed_data, axis=1) scale = np.maximum(norm, np.ones_like(norm)) self.data = (1 / scale[:, None]) * signed_data def loss(self, param): """Computes the loss represented by this object at w. If X,y is the data and reg is the regularizer, then the loss is (1/n)sum_i^n log(1+exp(-)) + (reg/2)||w||^2 """ data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param)))) reg_loss = 0.5 * self.reg * np.linalg.norm(param) ** 2 return data_loss + reg_loss def loss_wor(self, param): """Computes the loss represented by this object at w without regularizer. If X,y is the data and reg is the regularizer, then the loss is (1/n)sum_i^n log(1+exp(-)) """ data_loss = np.mean(np.log1p(np.exp(-np.dot(self.data, param)))) return data_loss def accuracy(self, param): """ " computes the accuracy of the model gievn by w""" score_pred = np.dot(self.input_vecs, param) label1_prob = np.where( score_pred >= 0, 1 / (1 + np.exp(-score_pred)), np.exp(score_pred) / (1 + np.exp(score_pred)), ) return np.mean(np.where(label1_prob >= 0.5, 1, -1) == self.labels) def grad(self, param, batch_idx=None): """Computes the gradient of the logistic regression at a given point w. If X,y is the data and reg is the regularizer, then the gradient is (-1/n)sum_i^n X[i,:]*y[i]/(1+exp()) + reg*w """ if batch_idx is not None: data_batch = self.data[batch_idx] else: data_batch = self.data coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param))) data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0) return data_grad + self.reg * param def grad_wor(self, param, batch_idx=None): """Computes the gradient of the logistic regression at a given point w. If X,y is the data and reg is the regularizer, then the gradient is (-1/n)sum_i^n X[i,:]*y[i]/(1+exp()) + reg*w """ if batch_idx is not None: data_batch = self.data[batch_idx] else: data_batch = self.data coeff_grad = -1 / (1 + np.exp(np.dot(data_batch, param))) data_grad = np.mean(data_batch * coeff_grad[:, np.newaxis], axis=0) return data_grad def hess(self, param, batch_idx=None): """Computes the Hessian of the logistic regression at a given point w. The Hessian is the matrix of second derivatives. If X,y is the data and reg is the regularizer, then the Hessian is (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(/2)*2)^2 where we assume y[i]^2==1. """ if batch_idx is not None: data_batch = self.data[batch_idx] batch_size = len(batch_idx) else: data_batch = self.data batch_size = self.num_samples temp_var = np.dot(data_batch, param) / 2 coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2 raw_hess = np.dot(data_batch.T * coeff_hess, data_batch) return raw_hess / batch_size + self.reg * np.eye(self.dim) def hess_wor(self, param, batch_idx=None): """Computes the Hessian of the logistic regression at a given point w. The Hessian is the matrix of second derivatives. If X,y is the data, then the Hessian is (1/n)sum_i^n X[i,:]*X[i,:]^T / (cosh(/2)*2)^2 where we assume y[i]^2==1. """ if batch_idx is not None: data_batch = self.data[batch_idx] batch_size = len(batch_idx) else: data_batch = self.data batch_size = self.num_samples temp_var = np.dot(data_batch, param) / 2 coeff_hess = 1 / (np.exp(temp_var) + np.exp(-temp_var)) ** 2 raw_hess = np.dot(data_batch.T * coeff_hess, data_batch) return raw_hess / batch_size def upperbound(self, param, batch_idx=None): """Tightest universal quadratic upper bound on the loss function. log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a) Constant and linear terms are just first-order Taylor This function gives the quadratic term (which replaces the Hessian) https://twitter.com/shortstein/status/1557961202256318464 """ if batch_idx is not None: data_batch = self.data[batch_idx] batch_size = len(batch_idx) else: data_batch = self.data batch_size = self.num_samples temp_var = -np.dot(data_batch, param) # vector of y_i for i in [n] # v = 0.5*np.tanh(a/2)/a, but, avoid 0/0 by special rule temp_var2 = np.divide( 0.5 * np.tanh(temp_var / 2), temp_var, out=np.ones(temp_var.shape) * 0.25, where=np.abs(temp_var) > 1e-9, ) hess_non = np.dot(data_batch.T * temp_var2, data_batch) return hess_non / batch_size + self.reg * np.eye(self.dim) def upperbound_wor(self, param, batch_idx=None): """Tightest universal quadratic upper bound on the loss function. log(1+exp(x))<=log(1+exp(a))+(x-a)/(1+exp(-a))+(x-a)^2*tanh(a/2)/(4*a) Constant and linear terms are just first-order Taylor This function gives the quadratic term (which replaces the Hessian) """ if batch_idx is not None: data_batch = self.data[batch_idx] batch_size = len(batch_idx) else: data_batch = self.data batch_size = self.num_samples temp_var = -np.dot(data_batch, param) # vector of y_i for i in [n] temp_var2 = np.divide( 0.5 * np.tanh(temp_var / 2), temp_var, out=np.ones(temp_var.shape) * 0.25, where=np.abs(temp_var) > 1e-9, ) hess_non = np.dot(data_batch.T * temp_var2, data_batch) return hess_non / batch_size