From 392c506c6255624eb848b44e9651a453e8c7bac5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 28 Jun 2021 11:08:19 -0700 Subject: [PATCH] Implementation of Differentially Private Logistic Regression. PiperOrigin-RevId: 381904153 --- .../privacy/logistic_regression/datasets.py | 125 +++++++++++ .../logistic_regression/datasets_test.py | 77 +++++++ .../multinomial_logistic.py | 208 ++++++++++++++++++ .../multinomial_logistic_test.py | 81 +++++++ .../single_layer_softmax.py | 59 +++++ .../single_layer_softmax_test.py | 39 ++++ 6 files changed, 589 insertions(+) create mode 100644 tensorflow_privacy/privacy/logistic_regression/datasets.py create mode 100644 tensorflow_privacy/privacy/logistic_regression/datasets_test.py create mode 100644 tensorflow_privacy/privacy/logistic_regression/multinomial_logistic.py create mode 100644 tensorflow_privacy/privacy/logistic_regression/multinomial_logistic_test.py create mode 100644 tensorflow_privacy/privacy/logistic_regression/single_layer_softmax.py create mode 100644 tensorflow_privacy/privacy/logistic_regression/single_layer_softmax_test.py diff --git a/tensorflow_privacy/privacy/logistic_regression/datasets.py b/tensorflow_privacy/privacy/logistic_regression/datasets.py new file mode 100644 index 0000000..92ea4a3 --- /dev/null +++ b/tensorflow_privacy/privacy/logistic_regression/datasets.py @@ -0,0 +1,125 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Functions for generating train and test data for logistic regression models. + +Includes two types of datasets: +- Synthetic linearly separable labeled examples. + Here, in the binary classification case, we generate training examples by + first sampling a random weight vector w from a multivariate Gaussian + distribution. Then, for each training example, we randomly sample a point x, + also from a multivariate Gaussian distribution, and then set the label y equal + to 1 if the inner product of w and x is positive, and equal to 0 otherwise. As + such, the training data is linearly separable. + More generally, in the case where there are num_classes many classes, we + sample num_classes different w vectors. After sampling x, we will set its + class label y to the class for which the corresponding w vector has the + largest inner product with x. +- MNIST 10-class classification dataset. +""" + +from typing import Tuple +import dataclasses +import numpy as np +from sklearn import preprocessing +import tensorflow as tf + + +@dataclasses.dataclass +class RegressionDataset: + """Class for storing labeled examples for a regression dataset. + + Attributes: + points: array of shape (num_examples, dimension) containing the points to + be classified. + labels: array of shape (num_examples,) containing the corresponding labels, + each belonging to the set {0,1,...,num_classes-1}, where num_classes is + the number of classes. + """ + points: np.ndarray + labels: np.ndarray + + +def linearly_separable_labeled_examples( + num_examples: int, weights: np.ndarray)-> RegressionDataset: + """Generates num_examples labeled examples using separator given by weights. + + Args: + num_examples: number of labeled examples to generate. + weights: dimension by num_classes matrix containing coefficients of linear + separator, where dimension is the dimension and num_classes is the number + of classes. + + Returns: + RegressionDataset consisting of points and labels. Each point has unit + l2-norm. + """ + dimension = weights.shape[0] + # Generate points and normalize each to have unit l2-norm. + points_non_normalized = np.random.normal(size=(num_examples, dimension)) + points = preprocessing.normalize(points_non_normalized) + # Compute labels. + labels = np.argmax(np.matmul(points, weights), axis=1) + return RegressionDataset(points, labels) + + +def synthetic_linearly_separable_data( + num_train: int, num_test: int, dimension: int, + num_classes: int)-> Tuple[RegressionDataset, RegressionDataset]: + """Generates synthetic train and test data for logistic regression. + + Args: + num_train: number of training data points. + num_test: number of test data points. + dimension: the dimension of the classification problem. + num_classes: number of classes, assumed to be at least 2. + + Returns: + train_dataset: num_train labeled examples, with unit l2-norm points. + test_dataset: num_test labeled examples, with unit l2-norm points. + """ + if num_classes < 2: + raise ValueError(f'num_classes must be at least 2. It is {num_classes}.') + + # Generate weight vector. + weights = np.random.normal(size=(dimension, num_classes)) + + # Generate train labeled examples. + train_dataset = linearly_separable_labeled_examples(num_train, weights) + + # Generate test labeled examples. + test_dataset = linearly_separable_labeled_examples(num_test, weights) + + return (train_dataset, test_dataset) + + +def mnist_dataset()-> Tuple[RegressionDataset, RegressionDataset]: + """Generates (normalized) train and test data for MNIST. + + Returns: + train_dataset: MNIST labeled examples, with unit l2-norm points. + test_dataset: MNIST labeled examples, with unit l2-norm points. + """ + train_data, test_data = tf.keras.datasets.mnist.load_data() + train_points_non_normalized, train_labels = train_data + test_points_non_normalized, test_labels = test_data + num_train = train_points_non_normalized.shape[0] + num_test = test_points_non_normalized.shape[0] + train_points_non_normalized = train_points_non_normalized.reshape( + (num_train, -1)) + test_points_non_normalized = test_points_non_normalized.reshape( + (num_test, -1)) + train_points = preprocessing.normalize(train_points_non_normalized) + test_points = preprocessing.normalize(test_points_non_normalized) + return (RegressionDataset(train_points, train_labels), + RegressionDataset(test_points, test_labels)) diff --git a/tensorflow_privacy/privacy/logistic_regression/datasets_test.py b/tensorflow_privacy/privacy/logistic_regression/datasets_test.py new file mode 100644 index 0000000..7d9b27f --- /dev/null +++ b/tensorflow_privacy/privacy/logistic_regression/datasets_test.py @@ -0,0 +1,77 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for tensorflow_privacy.privacy.logistic_regression.datasets.""" + +import unittest +from absl.testing import parameterized +import numpy as np +from tensorflow_privacy.privacy.logistic_regression import datasets + + +class DatasetsTest(parameterized.TestCase): + + @parameterized.parameters( + (1, np.array([[1],])), + (2, np.array([[1],])), + (5, np.array([[-1, 1], [1, -1]])), + (15, np.array([[-1, 1.5, 2.1], [1.3, -3.3, -7.1], [1.3, -3.3, -7.1]]))) + def test_linearly_separable_labeled_examples(self, num_examples, weights): + dimension, num_classes = weights.shape + dataset = datasets.linearly_separable_labeled_examples(num_examples, + weights) + self.assertEqual(dataset.points.shape, (num_examples, dimension)) + self.assertEqual(dataset.labels.shape, (num_examples,)) + product = np.matmul(dataset.points, weights) + for i in range(num_examples): + for j in range(num_classes): + self.assertGreaterEqual(product[i, dataset.labels[i]], product[i, j]) + + @parameterized.parameters( + (1, 1, 1, 2), + (20, 5, 1, 2), + (20, 5, 2, 2), + (1000, 10, 15, 10)) + def test_synthetic(self, num_train, num_test, dimension, num_classes): + (train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data( + num_train, num_test, dimension, num_classes) + self.assertEqual(train_dataset.points.shape, (num_train, dimension)) + self.assertEqual(train_dataset.labels.shape, (num_train,)) + self.assertEqual(test_dataset.points.shape, (num_test, dimension)) + self.assertEqual(test_dataset.labels.shape, (num_test,)) + # Check that each train and test point has unit l2-norm. + for i in range(num_train): + self.assertAlmostEqual(np.linalg.norm(train_dataset.points[i, :]), 1) + for i in range(num_test): + self.assertAlmostEqual(np.linalg.norm(test_dataset.points[i, :]), 1) + # Check that each train and test label is in {0,...,num_classes-1}. + self.assertTrue(np.all(np.isin(train_dataset.labels, range(num_classes)))) + self.assertTrue(np.all(np.isin(test_dataset.labels, range(num_classes)))) + + def test_mnist_dataset(self): + (train_dataset, test_dataset) = datasets.mnist_dataset() + self.assertEqual(train_dataset.points.shape, (60000, 784)) + self.assertEqual(train_dataset.labels.shape, (60000,)) + self.assertEqual(test_dataset.points.shape, (10000, 784)) + self.assertEqual(test_dataset.labels.shape, (10000,)) + # Check that each train and test point has unit l2-norm. + for i in range(train_dataset.points.shape[0]): + self.assertAlmostEqual(np.linalg.norm(train_dataset.points[i, :]), 1) + for i in range(test_dataset.points.shape[0]): + self.assertAlmostEqual(np.linalg.norm(test_dataset.points[i, :]), 1) + # Check that each train and test label is in {0,...,9}. + self.assertTrue(np.all(np.isin(train_dataset.labels, range(10)))) + self.assertTrue(np.all(np.isin(test_dataset.labels, range(10)))) + +if __name__ == '__main__': + unittest.main() diff --git a/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic.py b/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic.py new file mode 100644 index 0000000..fd80485 --- /dev/null +++ b/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic.py @@ -0,0 +1,208 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Implementation of differentially private multinomial logistic regression. + +Algorithms include: + +- Based on the differentially private objective perturbation method of Kifer et +al. (Colt 2012): http://proceedings.mlr.press/v23/kifer12/kifer12.pdf +Their algorithm can be used for convex optimization problems in general, and in +the case of multinomial logistic regression in particular. + +- Training procedure based on the Differentially Private Stochastic Gradient +Descent (DP-SGD) implementation in TensorFlow Privacy, which is itself based on +the algorithm of Abadi et al.: https://arxiv.org/pdf/1607.00133.pdf%20. +""" + +import math +from typing import List, Optional, Tuple +import numpy as np +import tensorflow as tf +from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy import compute_dp_sgd_privacy as compute_epsilon +from tensorflow_privacy.privacy.logistic_regression import datasets +from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax +from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras +from differential_privacy.python.accounting import common + + +@tf.keras.utils.register_keras_serializable(package='Custom', name='Kifer') +class KiferRegularizer(tf.keras.regularizers.Regularizer): + """Class corresponding to the regularizer in Algorithm 1 of Kifer et al. + + Attributes: + l2_regularizer: scalar coefficient for l2-regularization term. + num_train: number of training examples. + b: tensor of shape (d,num_classes) linearly translating the objective. + """ + + def __init__(self, num_train: int, dimension: int, epsilon: float, + delta: float, num_classes: int, input_clipping_norm: float): + self._num_train = num_train + (self._l2_regularizer, + variance) = self.logistic_objective_perturbation_parameters( + num_train, epsilon, delta, num_classes, input_clipping_norm) + self._b = tf.random.normal(shape=[dimension, num_classes], mean=0.0, + stddev=math.sqrt(variance), + dtype=tf.dtypes.float32) + + def __call__(self, x): + return (tf.reduce_sum(self._l2_regularizer*tf.square(x)) + + (1/self._num_train)*tf.reduce_sum(tf.multiply(x, self._b))) + + def get_config(self): + return {'l2_regularizer': self._l2_regularizer, + 'num_train': self._num_train, 'b': self._b} + + def logistic_objective_perturbation_parameters( + self, num_train: int, epsilon: float, delta: float, num_classes: int, + input_clipping_norm: float)-> Tuple[float, float]: + """Computes l2-regularization coefficient and Gaussian noise variance. + + The setting is based on Algorithm 1 of Kifer et al. + + Args: + num_train: number of input training points. + epsilon: epsilon parameter in (epsilon, delta)-DP. + delta: delta parameter in (epsilon, delta)-DP. + num_classes: number of classes. + input_clipping_norm: l2-norm according to which input points are clipped. + + Returns: + l2-regularization coefficient and variance of Gaussian noise added in + Algorithm 1 of Kifer et al. + """ + # zeta is an upper bound on the l2-norm of the loss function gradient. + zeta = input_clipping_norm + # variance is based on line 5 from Algorithm 1 of Kifer et al. (page 6): + variance = zeta*zeta*(8*np.log(2/delta)+4*epsilon)/(epsilon*epsilon) + # lambda_coefficient is an upper bound on the spectral norm of the Hessian + # of the loss function. + lambda_coefficient = math.sqrt(2*num_classes)*(input_clipping_norm**2)/4 + l2_regularizer = lambda_coefficient/(epsilon*num_train) + return (l2_regularizer, variance) + + +def logistic_objective_perturbation(train_dataset: datasets.RegressionDataset, + test_dataset: datasets.RegressionDataset, + epsilon: float, delta: float, + epochs: int, num_classes: int, + input_clipping_norm: float)-> List[float]: + """Trains and validates differentially private logistic regression model. + + The training is based on the Algorithm 1 of Kifer et al. + + Args: + train_dataset: consists of num_train many labeled examples, where the labels + are in {0,1,...,num_classes-1}. + test_dataset: consists of num_test many labeled examples, where the labels + are in {0,1,...,num_classes-1}. + epsilon: epsilon parameter in (epsilon, delta)-DP. + delta: delta parameter in (epsilon, delta)-DP. + epochs: number of training epochs. + num_classes: number of classes. + input_clipping_norm: l2-norm according to which input points are clipped. + + Returns: + List of test accuracies (one for each epoch) on test_dataset of model + trained on train_dataset. + """ + num_train, dimension = train_dataset.points.shape + # Normalize each training point (i.e., row of train_dataset.points) to have + # l2-norm at most input_clipping_norm. + train_dataset.points = tf.clip_by_norm(train_dataset.points, + input_clipping_norm, [1]).numpy() + optimizer = 'sgd' + loss = 'categorical_crossentropy' + kernel_regularizer = KiferRegularizer(num_train, dimension, epsilon, delta, + num_classes, input_clipping_norm) + return single_layer_softmax.single_layer_softmax_classifier( + train_dataset, test_dataset, epochs, num_classes, optimizer, loss, + kernel_regularizer=kernel_regularizer) + + +def compute_dpsgd_noise_multiplier( + num_train: int, epsilon: float, delta: float, epochs: int, + batch_size: int, tolerance: float = 1e-2) -> Optional[float]: + """Computes the noise multiplier for DP-SGD given privacy parameters. + + The algorithm performs binary search on the values of epsilon. + + Args: + num_train: number of input training points. + epsilon: epsilon parameter in (epsilon, delta)-DP. + delta: delta parameter in (epsilon, delta)-DP. + epochs: number of training epochs. + batch_size: the number of examples in each batch of gradient descent. + tolerance: an upper bound on the absolute difference between the input + (desired) epsilon and the epsilon value corresponding to the + noise_multiplier that is output. + + Returns: + noise_multiplier: the smallest noise multiplier value (within plus or minus + the given tolerance) for which using DPKerasAdamOptimizer will result in an + (epsilon, delta)-differentially private trained model. + """ + search_parameters = common.BinarySearchParameters(lower_bound=0, + upper_bound=math.inf, + initial_guess=1, + tolerance=tolerance) + return common.inverse_monotone_function( + lambda x: compute_epsilon(num_train, batch_size, x, epochs, delta)[0], + epsilon, search_parameters) + + +def logistic_dpsgd(train_dataset: datasets.RegressionDataset, + test_dataset: datasets.RegressionDataset, + epsilon: float, delta: float, epochs: int, num_classes: int, + batch_size: int, num_microbatches: int, + clipping_norm: float)-> List[float]: + """Trains and validates private logistic regression model via DP-SGD. + + The training is based on the differentially private stochasstic gradient + descent algorithm implemented in TensorFlow Privacy. + + Args: + train_dataset: consists of num_train many labeled examples, where the labels + are in {0,1,...,num_classes-1}. + test_dataset: consists of num_test many labeled examples, where the labels + are in {0,1,...,num_classes-1}. + epsilon: epsilon parameter in (epsilon, delta)-DP. + delta: delta parameter in (epsilon, delta)-DP. + epochs: number of training epochs. + num_classes: number of classes. + batch_size: the number of examples in each batch of gradient descent. + num_microbatches: the number of microbatches in gradient descent. + clipping_norm: the gradients will be normalized by DPKerasAdamOptimizer + to have l2-norm at most clipping_norm. + + Returns: + List of test accuracies (one for each epoch) on test_dataset of model + trained on train_dataset. + """ + num_train = train_dataset.points.shape[0] + remainder = num_train % batch_size + if remainder != 0: + train_dataset.points = train_dataset.points[:-remainder, :] + train_dataset.labels = train_dataset.labels[:-remainder] + num_train -= remainder + noise_multiplier = compute_dpsgd_noise_multiplier(num_train, epsilon, delta, + epochs, batch_size) + optimizer = dp_optimizer_keras.DPKerasAdamOptimizer( + l2_norm_clip=clipping_norm, noise_multiplier=noise_multiplier, + num_microbatches=num_microbatches) + loss = tf.keras.losses.CategoricalCrossentropy( + reduction=tf.losses.Reduction.NONE) + return single_layer_softmax.single_layer_softmax_classifier( + train_dataset, test_dataset, epochs, num_classes, optimizer, loss, + batch_size) diff --git a/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic_test.py b/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic_test.py new file mode 100644 index 0000000..eaa428c --- /dev/null +++ b/tensorflow_privacy/privacy/logistic_regression/multinomial_logistic_test.py @@ -0,0 +1,81 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for tensorflow_privacy.privacy.logistic_regression.multinomial_logistic.""" + +import unittest +from absl.testing import parameterized +from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy import compute_dp_sgd_privacy +from tensorflow_privacy.privacy.logistic_regression import datasets +from tensorflow_privacy.privacy.logistic_regression import multinomial_logistic + + +class MultinomialLogisticRegressionTest(parameterized.TestCase): + + @parameterized.parameters( + (5000, 500, 3, 1, 1e-5, 40, 2, 0.05), + (5000, 500, 4, 1, 1e-5, 40, 2, 0.05), + (10000, 1000, 3, 1, 1e-5, 40, 4, 0.1), + (10000, 1000, 4, 1, 1e-5, 40, 4, 0.1), + ) + def test_logistic_objective_perturbation(self, num_train, num_test, dimension, + epsilon, delta, epochs, num_classes, + tolerance): + (train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data( + num_train, num_test, dimension, num_classes) + accuracy = multinomial_logistic.logistic_objective_perturbation( + train_dataset, test_dataset, epsilon, delta, epochs, num_classes, 1) + # Since the synthetic data is linearly separable, we expect the test + # accuracy to come arbitrarily close to 1 as the number of training examples + # grows. + self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance) + + @parameterized.parameters( + (1, 1, 1e-5, 40, 1, 1e-2), + (500, 0.1, 1e-5, 40, 50, 1e-2), + (5000, 10, 1e-5, 40, 10, 1e-3), + ) + def test_compute_dpsgd_noise_multiplier(self, num_train, epsilon, delta, + epochs, batch_size, tolerance): + noise_multiplier = multinomial_logistic.compute_dpsgd_noise_multiplier( + num_train, epsilon, delta, epochs, batch_size, tolerance) + epsilon_lower_bound = compute_dp_sgd_privacy(num_train, batch_size, + noise_multiplier + tolerance, + epochs, delta)[0] + epsilon_upper_bound = compute_dp_sgd_privacy(num_train, batch_size, + noise_multiplier - tolerance, + epochs, delta)[0] + self.assertLess(epsilon_lower_bound, epsilon) + self.assertLess(epsilon, epsilon_upper_bound) + + @parameterized.parameters( + (5000, 500, 3, 1, 1e-5, 40, 2, 0.05, 10, 10, 1), + (5000, 500, 4, 1, 1e-5, 40, 2, 0.05, 10, 10, 1), + (5000, 500, 3, 2, 1e-4, 40, 4, 0.1, 10, 10, 1), + (5000, 500, 4, 2, 1e-4, 40, 4, 0.1, 10, 10, 1), + ) + def test_logistic_dpsgd(self, num_train, num_test, dimension, epsilon, + delta, epochs, num_classes, tolerance, + batch_size, num_microbatches, clipping_norm): + (train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data( + num_train, num_test, dimension, num_classes) + accuracy = multinomial_logistic.logistic_dpsgd( + train_dataset, test_dataset, epsilon, delta, epochs, num_classes, + batch_size, num_microbatches, clipping_norm) + # Since the synthetic data is linearly separable, we expect the test + # accuracy to come arbitrarily close to 1 as the number of training examples + # grows. + self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance) + +if __name__ == '__main__': + unittest.main() diff --git a/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax.py b/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax.py new file mode 100644 index 0000000..a8dcac3 --- /dev/null +++ b/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax.py @@ -0,0 +1,59 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Implementation of a single-layer softmax classifier. +""" + +from typing import List +import tensorflow as tf +from tensorflow_privacy.privacy.logistic_regression import datasets + + +def single_layer_softmax_classifier( + train_dataset: datasets.RegressionDataset, + test_dataset: datasets.RegressionDataset, + epochs: int, num_classes: int, optimizer: tf.keras.optimizers.Optimizer, + loss: tf.keras.losses.Loss = 'categorical_crossentropy', + batch_size: int = 32, + kernel_regularizer: tf.keras.regularizers.Regularizer = None)-> List[float]: + """Trains a single layer neural network classifier with softmax activation. + + Args: + train_dataset: consists of num_train many labeled examples, where the labels + are in {0,1,...,num_classes-1}. + test_dataset: consists of num_test many labeled examples, where the labels + are in {0,1,...,num_classes-1}. + epochs: the number of epochs. + num_classes: the number of classes. + optimizer: a tf.keras optimizer. + loss: a tf.keras loss function. + batch_size: a positive integer. + kernel_regularizer: a regularization function. + + Returns: + List of test accuracies (one for each epoch) on test_dataset of model + trained on train_dataset. + """ + one_hot_train_labels = tf.one_hot(train_dataset.labels, num_classes) + one_hot_test_labels = tf.one_hot(test_dataset.labels, num_classes) + model = tf.keras.Sequential() + model.add(tf.keras.layers.Dense(units=num_classes, + activation='softmax', + kernel_regularizer=kernel_regularizer)) + model.compile(optimizer, loss=loss, metrics=['accuracy']) + history = model.fit(train_dataset.points, one_hot_train_labels, + batch_size=batch_size, epochs=epochs, + validation_data=(test_dataset.points, + one_hot_test_labels), + verbose=0) + return history.history['val_accuracy'] diff --git a/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax_test.py b/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax_test.py new file mode 100644 index 0000000..7c0af27 --- /dev/null +++ b/tensorflow_privacy/privacy/logistic_regression/single_layer_softmax_test.py @@ -0,0 +1,39 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for tensorflow_privacy.privacy.logistic_regression.single_layer_softmax.""" + +import unittest +from absl.testing import parameterized +from tensorflow_privacy.privacy.logistic_regression import datasets +from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax + + +class SingleLayerSoftmaxTest(parameterized.TestCase): + + @parameterized.parameters( + (5000, 500, 3, 40, 2, 0.05), + (5000, 500, 4, 40, 2, 0.05), + (10000, 1000, 3, 40, 4, 0.1), + (10000, 1000, 4, 40, 4, 0.1), + ) + def test_single_layer_softmax(self, num_train, num_test, dimension, epochs, + num_classes, tolerance): + (train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data( + num_train, num_test, dimension, num_classes) + accuracy = single_layer_softmax.single_layer_softmax_classifier( + train_dataset, test_dataset, epochs, num_classes, 'sgd') + self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance) + +if __name__ == '__main__': + unittest.main()