Implementation of Differentially Private Logistic Regression.
PiperOrigin-RevId: 381904153
This commit is contained in:
parent
af87581387
commit
392c506c62
6 changed files with 589 additions and 0 deletions
125
tensorflow_privacy/privacy/logistic_regression/datasets.py
Normal file
125
tensorflow_privacy/privacy/logistic_regression/datasets.py
Normal file
|
@ -0,0 +1,125 @@
|
|||
# Copyright 2021, The TensorFlow Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Functions for generating train and test data for logistic regression models.
|
||||
|
||||
Includes two types of datasets:
|
||||
- Synthetic linearly separable labeled examples.
|
||||
Here, in the binary classification case, we generate training examples by
|
||||
first sampling a random weight vector w from a multivariate Gaussian
|
||||
distribution. Then, for each training example, we randomly sample a point x,
|
||||
also from a multivariate Gaussian distribution, and then set the label y equal
|
||||
to 1 if the inner product of w and x is positive, and equal to 0 otherwise. As
|
||||
such, the training data is linearly separable.
|
||||
More generally, in the case where there are num_classes many classes, we
|
||||
sample num_classes different w vectors. After sampling x, we will set its
|
||||
class label y to the class for which the corresponding w vector has the
|
||||
largest inner product with x.
|
||||
- MNIST 10-class classification dataset.
|
||||
"""
|
||||
|
||||
from typing import Tuple
|
||||
import dataclasses
|
||||
import numpy as np
|
||||
from sklearn import preprocessing
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class RegressionDataset:
|
||||
"""Class for storing labeled examples for a regression dataset.
|
||||
|
||||
Attributes:
|
||||
points: array of shape (num_examples, dimension) containing the points to
|
||||
be classified.
|
||||
labels: array of shape (num_examples,) containing the corresponding labels,
|
||||
each belonging to the set {0,1,...,num_classes-1}, where num_classes is
|
||||
the number of classes.
|
||||
"""
|
||||
points: np.ndarray
|
||||
labels: np.ndarray
|
||||
|
||||
|
||||
def linearly_separable_labeled_examples(
|
||||
num_examples: int, weights: np.ndarray)-> RegressionDataset:
|
||||
"""Generates num_examples labeled examples using separator given by weights.
|
||||
|
||||
Args:
|
||||
num_examples: number of labeled examples to generate.
|
||||
weights: dimension by num_classes matrix containing coefficients of linear
|
||||
separator, where dimension is the dimension and num_classes is the number
|
||||
of classes.
|
||||
|
||||
Returns:
|
||||
RegressionDataset consisting of points and labels. Each point has unit
|
||||
l2-norm.
|
||||
"""
|
||||
dimension = weights.shape[0]
|
||||
# Generate points and normalize each to have unit l2-norm.
|
||||
points_non_normalized = np.random.normal(size=(num_examples, dimension))
|
||||
points = preprocessing.normalize(points_non_normalized)
|
||||
# Compute labels.
|
||||
labels = np.argmax(np.matmul(points, weights), axis=1)
|
||||
return RegressionDataset(points, labels)
|
||||
|
||||
|
||||
def synthetic_linearly_separable_data(
|
||||
num_train: int, num_test: int, dimension: int,
|
||||
num_classes: int)-> Tuple[RegressionDataset, RegressionDataset]:
|
||||
"""Generates synthetic train and test data for logistic regression.
|
||||
|
||||
Args:
|
||||
num_train: number of training data points.
|
||||
num_test: number of test data points.
|
||||
dimension: the dimension of the classification problem.
|
||||
num_classes: number of classes, assumed to be at least 2.
|
||||
|
||||
Returns:
|
||||
train_dataset: num_train labeled examples, with unit l2-norm points.
|
||||
test_dataset: num_test labeled examples, with unit l2-norm points.
|
||||
"""
|
||||
if num_classes < 2:
|
||||
raise ValueError(f'num_classes must be at least 2. It is {num_classes}.')
|
||||
|
||||
# Generate weight vector.
|
||||
weights = np.random.normal(size=(dimension, num_classes))
|
||||
|
||||
# Generate train labeled examples.
|
||||
train_dataset = linearly_separable_labeled_examples(num_train, weights)
|
||||
|
||||
# Generate test labeled examples.
|
||||
test_dataset = linearly_separable_labeled_examples(num_test, weights)
|
||||
|
||||
return (train_dataset, test_dataset)
|
||||
|
||||
|
||||
def mnist_dataset()-> Tuple[RegressionDataset, RegressionDataset]:
|
||||
"""Generates (normalized) train and test data for MNIST.
|
||||
|
||||
Returns:
|
||||
train_dataset: MNIST labeled examples, with unit l2-norm points.
|
||||
test_dataset: MNIST labeled examples, with unit l2-norm points.
|
||||
"""
|
||||
train_data, test_data = tf.keras.datasets.mnist.load_data()
|
||||
train_points_non_normalized, train_labels = train_data
|
||||
test_points_non_normalized, test_labels = test_data
|
||||
num_train = train_points_non_normalized.shape[0]
|
||||
num_test = test_points_non_normalized.shape[0]
|
||||
train_points_non_normalized = train_points_non_normalized.reshape(
|
||||
(num_train, -1))
|
||||
test_points_non_normalized = test_points_non_normalized.reshape(
|
||||
(num_test, -1))
|
||||
train_points = preprocessing.normalize(train_points_non_normalized)
|
||||
test_points = preprocessing.normalize(test_points_non_normalized)
|
||||
return (RegressionDataset(train_points, train_labels),
|
||||
RegressionDataset(test_points, test_labels))
|
|
@ -0,0 +1,77 @@
|
|||
# Copyright 2021, The TensorFlow Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for tensorflow_privacy.privacy.logistic_regression.datasets."""
|
||||
|
||||
import unittest
|
||||
from absl.testing import parameterized
|
||||
import numpy as np
|
||||
from tensorflow_privacy.privacy.logistic_regression import datasets
|
||||
|
||||
|
||||
class DatasetsTest(parameterized.TestCase):
|
||||
|
||||
@parameterized.parameters(
|
||||
(1, np.array([[1],])),
|
||||
(2, np.array([[1],])),
|
||||
(5, np.array([[-1, 1], [1, -1]])),
|
||||
(15, np.array([[-1, 1.5, 2.1], [1.3, -3.3, -7.1], [1.3, -3.3, -7.1]])))
|
||||
def test_linearly_separable_labeled_examples(self, num_examples, weights):
|
||||
dimension, num_classes = weights.shape
|
||||
dataset = datasets.linearly_separable_labeled_examples(num_examples,
|
||||
weights)
|
||||
self.assertEqual(dataset.points.shape, (num_examples, dimension))
|
||||
self.assertEqual(dataset.labels.shape, (num_examples,))
|
||||
product = np.matmul(dataset.points, weights)
|
||||
for i in range(num_examples):
|
||||
for j in range(num_classes):
|
||||
self.assertGreaterEqual(product[i, dataset.labels[i]], product[i, j])
|
||||
|
||||
@parameterized.parameters(
|
||||
(1, 1, 1, 2),
|
||||
(20, 5, 1, 2),
|
||||
(20, 5, 2, 2),
|
||||
(1000, 10, 15, 10))
|
||||
def test_synthetic(self, num_train, num_test, dimension, num_classes):
|
||||
(train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data(
|
||||
num_train, num_test, dimension, num_classes)
|
||||
self.assertEqual(train_dataset.points.shape, (num_train, dimension))
|
||||
self.assertEqual(train_dataset.labels.shape, (num_train,))
|
||||
self.assertEqual(test_dataset.points.shape, (num_test, dimension))
|
||||
self.assertEqual(test_dataset.labels.shape, (num_test,))
|
||||
# Check that each train and test point has unit l2-norm.
|
||||
for i in range(num_train):
|
||||
self.assertAlmostEqual(np.linalg.norm(train_dataset.points[i, :]), 1)
|
||||
for i in range(num_test):
|
||||
self.assertAlmostEqual(np.linalg.norm(test_dataset.points[i, :]), 1)
|
||||
# Check that each train and test label is in {0,...,num_classes-1}.
|
||||
self.assertTrue(np.all(np.isin(train_dataset.labels, range(num_classes))))
|
||||
self.assertTrue(np.all(np.isin(test_dataset.labels, range(num_classes))))
|
||||
|
||||
def test_mnist_dataset(self):
|
||||
(train_dataset, test_dataset) = datasets.mnist_dataset()
|
||||
self.assertEqual(train_dataset.points.shape, (60000, 784))
|
||||
self.assertEqual(train_dataset.labels.shape, (60000,))
|
||||
self.assertEqual(test_dataset.points.shape, (10000, 784))
|
||||
self.assertEqual(test_dataset.labels.shape, (10000,))
|
||||
# Check that each train and test point has unit l2-norm.
|
||||
for i in range(train_dataset.points.shape[0]):
|
||||
self.assertAlmostEqual(np.linalg.norm(train_dataset.points[i, :]), 1)
|
||||
for i in range(test_dataset.points.shape[0]):
|
||||
self.assertAlmostEqual(np.linalg.norm(test_dataset.points[i, :]), 1)
|
||||
# Check that each train and test label is in {0,...,9}.
|
||||
self.assertTrue(np.all(np.isin(train_dataset.labels, range(10))))
|
||||
self.assertTrue(np.all(np.isin(test_dataset.labels, range(10))))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -0,0 +1,208 @@
|
|||
# Copyright 2021, The TensorFlow Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Implementation of differentially private multinomial logistic regression.
|
||||
|
||||
Algorithms include:
|
||||
|
||||
- Based on the differentially private objective perturbation method of Kifer et
|
||||
al. (Colt 2012): http://proceedings.mlr.press/v23/kifer12/kifer12.pdf
|
||||
Their algorithm can be used for convex optimization problems in general, and in
|
||||
the case of multinomial logistic regression in particular.
|
||||
|
||||
- Training procedure based on the Differentially Private Stochastic Gradient
|
||||
Descent (DP-SGD) implementation in TensorFlow Privacy, which is itself based on
|
||||
the algorithm of Abadi et al.: https://arxiv.org/pdf/1607.00133.pdf%20.
|
||||
"""
|
||||
|
||||
import math
|
||||
from typing import List, Optional, Tuple
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy import compute_dp_sgd_privacy as compute_epsilon
|
||||
from tensorflow_privacy.privacy.logistic_regression import datasets
|
||||
from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax
|
||||
from tensorflow_privacy.privacy.optimizers import dp_optimizer_keras
|
||||
from differential_privacy.python.accounting import common
|
||||
|
||||
|
||||
@tf.keras.utils.register_keras_serializable(package='Custom', name='Kifer')
|
||||
class KiferRegularizer(tf.keras.regularizers.Regularizer):
|
||||
"""Class corresponding to the regularizer in Algorithm 1 of Kifer et al.
|
||||
|
||||
Attributes:
|
||||
l2_regularizer: scalar coefficient for l2-regularization term.
|
||||
num_train: number of training examples.
|
||||
b: tensor of shape (d,num_classes) linearly translating the objective.
|
||||
"""
|
||||
|
||||
def __init__(self, num_train: int, dimension: int, epsilon: float,
|
||||
delta: float, num_classes: int, input_clipping_norm: float):
|
||||
self._num_train = num_train
|
||||
(self._l2_regularizer,
|
||||
variance) = self.logistic_objective_perturbation_parameters(
|
||||
num_train, epsilon, delta, num_classes, input_clipping_norm)
|
||||
self._b = tf.random.normal(shape=[dimension, num_classes], mean=0.0,
|
||||
stddev=math.sqrt(variance),
|
||||
dtype=tf.dtypes.float32)
|
||||
|
||||
def __call__(self, x):
|
||||
return (tf.reduce_sum(self._l2_regularizer*tf.square(x)) +
|
||||
(1/self._num_train)*tf.reduce_sum(tf.multiply(x, self._b)))
|
||||
|
||||
def get_config(self):
|
||||
return {'l2_regularizer': self._l2_regularizer,
|
||||
'num_train': self._num_train, 'b': self._b}
|
||||
|
||||
def logistic_objective_perturbation_parameters(
|
||||
self, num_train: int, epsilon: float, delta: float, num_classes: int,
|
||||
input_clipping_norm: float)-> Tuple[float, float]:
|
||||
"""Computes l2-regularization coefficient and Gaussian noise variance.
|
||||
|
||||
The setting is based on Algorithm 1 of Kifer et al.
|
||||
|
||||
Args:
|
||||
num_train: number of input training points.
|
||||
epsilon: epsilon parameter in (epsilon, delta)-DP.
|
||||
delta: delta parameter in (epsilon, delta)-DP.
|
||||
num_classes: number of classes.
|
||||
input_clipping_norm: l2-norm according to which input points are clipped.
|
||||
|
||||
Returns:
|
||||
l2-regularization coefficient and variance of Gaussian noise added in
|
||||
Algorithm 1 of Kifer et al.
|
||||
"""
|
||||
# zeta is an upper bound on the l2-norm of the loss function gradient.
|
||||
zeta = input_clipping_norm
|
||||
# variance is based on line 5 from Algorithm 1 of Kifer et al. (page 6):
|
||||
variance = zeta*zeta*(8*np.log(2/delta)+4*epsilon)/(epsilon*epsilon)
|
||||
# lambda_coefficient is an upper bound on the spectral norm of the Hessian
|
||||
# of the loss function.
|
||||
lambda_coefficient = math.sqrt(2*num_classes)*(input_clipping_norm**2)/4
|
||||
l2_regularizer = lambda_coefficient/(epsilon*num_train)
|
||||
return (l2_regularizer, variance)
|
||||
|
||||
|
||||
def logistic_objective_perturbation(train_dataset: datasets.RegressionDataset,
|
||||
test_dataset: datasets.RegressionDataset,
|
||||
epsilon: float, delta: float,
|
||||
epochs: int, num_classes: int,
|
||||
input_clipping_norm: float)-> List[float]:
|
||||
"""Trains and validates differentially private logistic regression model.
|
||||
|
||||
The training is based on the Algorithm 1 of Kifer et al.
|
||||
|
||||
Args:
|
||||
train_dataset: consists of num_train many labeled examples, where the labels
|
||||
are in {0,1,...,num_classes-1}.
|
||||
test_dataset: consists of num_test many labeled examples, where the labels
|
||||
are in {0,1,...,num_classes-1}.
|
||||
epsilon: epsilon parameter in (epsilon, delta)-DP.
|
||||
delta: delta parameter in (epsilon, delta)-DP.
|
||||
epochs: number of training epochs.
|
||||
num_classes: number of classes.
|
||||
input_clipping_norm: l2-norm according to which input points are clipped.
|
||||
|
||||
Returns:
|
||||
List of test accuracies (one for each epoch) on test_dataset of model
|
||||
trained on train_dataset.
|
||||
"""
|
||||
num_train, dimension = train_dataset.points.shape
|
||||
# Normalize each training point (i.e., row of train_dataset.points) to have
|
||||
# l2-norm at most input_clipping_norm.
|
||||
train_dataset.points = tf.clip_by_norm(train_dataset.points,
|
||||
input_clipping_norm, [1]).numpy()
|
||||
optimizer = 'sgd'
|
||||
loss = 'categorical_crossentropy'
|
||||
kernel_regularizer = KiferRegularizer(num_train, dimension, epsilon, delta,
|
||||
num_classes, input_clipping_norm)
|
||||
return single_layer_softmax.single_layer_softmax_classifier(
|
||||
train_dataset, test_dataset, epochs, num_classes, optimizer, loss,
|
||||
kernel_regularizer=kernel_regularizer)
|
||||
|
||||
|
||||
def compute_dpsgd_noise_multiplier(
|
||||
num_train: int, epsilon: float, delta: float, epochs: int,
|
||||
batch_size: int, tolerance: float = 1e-2) -> Optional[float]:
|
||||
"""Computes the noise multiplier for DP-SGD given privacy parameters.
|
||||
|
||||
The algorithm performs binary search on the values of epsilon.
|
||||
|
||||
Args:
|
||||
num_train: number of input training points.
|
||||
epsilon: epsilon parameter in (epsilon, delta)-DP.
|
||||
delta: delta parameter in (epsilon, delta)-DP.
|
||||
epochs: number of training epochs.
|
||||
batch_size: the number of examples in each batch of gradient descent.
|
||||
tolerance: an upper bound on the absolute difference between the input
|
||||
(desired) epsilon and the epsilon value corresponding to the
|
||||
noise_multiplier that is output.
|
||||
|
||||
Returns:
|
||||
noise_multiplier: the smallest noise multiplier value (within plus or minus
|
||||
the given tolerance) for which using DPKerasAdamOptimizer will result in an
|
||||
(epsilon, delta)-differentially private trained model.
|
||||
"""
|
||||
search_parameters = common.BinarySearchParameters(lower_bound=0,
|
||||
upper_bound=math.inf,
|
||||
initial_guess=1,
|
||||
tolerance=tolerance)
|
||||
return common.inverse_monotone_function(
|
||||
lambda x: compute_epsilon(num_train, batch_size, x, epochs, delta)[0],
|
||||
epsilon, search_parameters)
|
||||
|
||||
|
||||
def logistic_dpsgd(train_dataset: datasets.RegressionDataset,
|
||||
test_dataset: datasets.RegressionDataset,
|
||||
epsilon: float, delta: float, epochs: int, num_classes: int,
|
||||
batch_size: int, num_microbatches: int,
|
||||
clipping_norm: float)-> List[float]:
|
||||
"""Trains and validates private logistic regression model via DP-SGD.
|
||||
|
||||
The training is based on the differentially private stochasstic gradient
|
||||
descent algorithm implemented in TensorFlow Privacy.
|
||||
|
||||
Args:
|
||||
train_dataset: consists of num_train many labeled examples, where the labels
|
||||
are in {0,1,...,num_classes-1}.
|
||||
test_dataset: consists of num_test many labeled examples, where the labels
|
||||
are in {0,1,...,num_classes-1}.
|
||||
epsilon: epsilon parameter in (epsilon, delta)-DP.
|
||||
delta: delta parameter in (epsilon, delta)-DP.
|
||||
epochs: number of training epochs.
|
||||
num_classes: number of classes.
|
||||
batch_size: the number of examples in each batch of gradient descent.
|
||||
num_microbatches: the number of microbatches in gradient descent.
|
||||
clipping_norm: the gradients will be normalized by DPKerasAdamOptimizer
|
||||
to have l2-norm at most clipping_norm.
|
||||
|
||||
Returns:
|
||||
List of test accuracies (one for each epoch) on test_dataset of model
|
||||
trained on train_dataset.
|
||||
"""
|
||||
num_train = train_dataset.points.shape[0]
|
||||
remainder = num_train % batch_size
|
||||
if remainder != 0:
|
||||
train_dataset.points = train_dataset.points[:-remainder, :]
|
||||
train_dataset.labels = train_dataset.labels[:-remainder]
|
||||
num_train -= remainder
|
||||
noise_multiplier = compute_dpsgd_noise_multiplier(num_train, epsilon, delta,
|
||||
epochs, batch_size)
|
||||
optimizer = dp_optimizer_keras.DPKerasAdamOptimizer(
|
||||
l2_norm_clip=clipping_norm, noise_multiplier=noise_multiplier,
|
||||
num_microbatches=num_microbatches)
|
||||
loss = tf.keras.losses.CategoricalCrossentropy(
|
||||
reduction=tf.losses.Reduction.NONE)
|
||||
return single_layer_softmax.single_layer_softmax_classifier(
|
||||
train_dataset, test_dataset, epochs, num_classes, optimizer, loss,
|
||||
batch_size)
|
|
@ -0,0 +1,81 @@
|
|||
# Copyright 2021, The TensorFlow Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for tensorflow_privacy.privacy.logistic_regression.multinomial_logistic."""
|
||||
|
||||
import unittest
|
||||
from absl.testing import parameterized
|
||||
from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy import compute_dp_sgd_privacy
|
||||
from tensorflow_privacy.privacy.logistic_regression import datasets
|
||||
from tensorflow_privacy.privacy.logistic_regression import multinomial_logistic
|
||||
|
||||
|
||||
class MultinomialLogisticRegressionTest(parameterized.TestCase):
|
||||
|
||||
@parameterized.parameters(
|
||||
(5000, 500, 3, 1, 1e-5, 40, 2, 0.05),
|
||||
(5000, 500, 4, 1, 1e-5, 40, 2, 0.05),
|
||||
(10000, 1000, 3, 1, 1e-5, 40, 4, 0.1),
|
||||
(10000, 1000, 4, 1, 1e-5, 40, 4, 0.1),
|
||||
)
|
||||
def test_logistic_objective_perturbation(self, num_train, num_test, dimension,
|
||||
epsilon, delta, epochs, num_classes,
|
||||
tolerance):
|
||||
(train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data(
|
||||
num_train, num_test, dimension, num_classes)
|
||||
accuracy = multinomial_logistic.logistic_objective_perturbation(
|
||||
train_dataset, test_dataset, epsilon, delta, epochs, num_classes, 1)
|
||||
# Since the synthetic data is linearly separable, we expect the test
|
||||
# accuracy to come arbitrarily close to 1 as the number of training examples
|
||||
# grows.
|
||||
self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance)
|
||||
|
||||
@parameterized.parameters(
|
||||
(1, 1, 1e-5, 40, 1, 1e-2),
|
||||
(500, 0.1, 1e-5, 40, 50, 1e-2),
|
||||
(5000, 10, 1e-5, 40, 10, 1e-3),
|
||||
)
|
||||
def test_compute_dpsgd_noise_multiplier(self, num_train, epsilon, delta,
|
||||
epochs, batch_size, tolerance):
|
||||
noise_multiplier = multinomial_logistic.compute_dpsgd_noise_multiplier(
|
||||
num_train, epsilon, delta, epochs, batch_size, tolerance)
|
||||
epsilon_lower_bound = compute_dp_sgd_privacy(num_train, batch_size,
|
||||
noise_multiplier + tolerance,
|
||||
epochs, delta)[0]
|
||||
epsilon_upper_bound = compute_dp_sgd_privacy(num_train, batch_size,
|
||||
noise_multiplier - tolerance,
|
||||
epochs, delta)[0]
|
||||
self.assertLess(epsilon_lower_bound, epsilon)
|
||||
self.assertLess(epsilon, epsilon_upper_bound)
|
||||
|
||||
@parameterized.parameters(
|
||||
(5000, 500, 3, 1, 1e-5, 40, 2, 0.05, 10, 10, 1),
|
||||
(5000, 500, 4, 1, 1e-5, 40, 2, 0.05, 10, 10, 1),
|
||||
(5000, 500, 3, 2, 1e-4, 40, 4, 0.1, 10, 10, 1),
|
||||
(5000, 500, 4, 2, 1e-4, 40, 4, 0.1, 10, 10, 1),
|
||||
)
|
||||
def test_logistic_dpsgd(self, num_train, num_test, dimension, epsilon,
|
||||
delta, epochs, num_classes, tolerance,
|
||||
batch_size, num_microbatches, clipping_norm):
|
||||
(train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data(
|
||||
num_train, num_test, dimension, num_classes)
|
||||
accuracy = multinomial_logistic.logistic_dpsgd(
|
||||
train_dataset, test_dataset, epsilon, delta, epochs, num_classes,
|
||||
batch_size, num_microbatches, clipping_norm)
|
||||
# Since the synthetic data is linearly separable, we expect the test
|
||||
# accuracy to come arbitrarily close to 1 as the number of training examples
|
||||
# grows.
|
||||
self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -0,0 +1,59 @@
|
|||
# Copyright 2021, The TensorFlow Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Implementation of a single-layer softmax classifier.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
import tensorflow as tf
|
||||
from tensorflow_privacy.privacy.logistic_regression import datasets
|
||||
|
||||
|
||||
def single_layer_softmax_classifier(
|
||||
train_dataset: datasets.RegressionDataset,
|
||||
test_dataset: datasets.RegressionDataset,
|
||||
epochs: int, num_classes: int, optimizer: tf.keras.optimizers.Optimizer,
|
||||
loss: tf.keras.losses.Loss = 'categorical_crossentropy',
|
||||
batch_size: int = 32,
|
||||
kernel_regularizer: tf.keras.regularizers.Regularizer = None)-> List[float]:
|
||||
"""Trains a single layer neural network classifier with softmax activation.
|
||||
|
||||
Args:
|
||||
train_dataset: consists of num_train many labeled examples, where the labels
|
||||
are in {0,1,...,num_classes-1}.
|
||||
test_dataset: consists of num_test many labeled examples, where the labels
|
||||
are in {0,1,...,num_classes-1}.
|
||||
epochs: the number of epochs.
|
||||
num_classes: the number of classes.
|
||||
optimizer: a tf.keras optimizer.
|
||||
loss: a tf.keras loss function.
|
||||
batch_size: a positive integer.
|
||||
kernel_regularizer: a regularization function.
|
||||
|
||||
Returns:
|
||||
List of test accuracies (one for each epoch) on test_dataset of model
|
||||
trained on train_dataset.
|
||||
"""
|
||||
one_hot_train_labels = tf.one_hot(train_dataset.labels, num_classes)
|
||||
one_hot_test_labels = tf.one_hot(test_dataset.labels, num_classes)
|
||||
model = tf.keras.Sequential()
|
||||
model.add(tf.keras.layers.Dense(units=num_classes,
|
||||
activation='softmax',
|
||||
kernel_regularizer=kernel_regularizer))
|
||||
model.compile(optimizer, loss=loss, metrics=['accuracy'])
|
||||
history = model.fit(train_dataset.points, one_hot_train_labels,
|
||||
batch_size=batch_size, epochs=epochs,
|
||||
validation_data=(test_dataset.points,
|
||||
one_hot_test_labels),
|
||||
verbose=0)
|
||||
return history.history['val_accuracy']
|
|
@ -0,0 +1,39 @@
|
|||
# Copyright 2021, The TensorFlow Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for tensorflow_privacy.privacy.logistic_regression.single_layer_softmax."""
|
||||
|
||||
import unittest
|
||||
from absl.testing import parameterized
|
||||
from tensorflow_privacy.privacy.logistic_regression import datasets
|
||||
from tensorflow_privacy.privacy.logistic_regression import single_layer_softmax
|
||||
|
||||
|
||||
class SingleLayerSoftmaxTest(parameterized.TestCase):
|
||||
|
||||
@parameterized.parameters(
|
||||
(5000, 500, 3, 40, 2, 0.05),
|
||||
(5000, 500, 4, 40, 2, 0.05),
|
||||
(10000, 1000, 3, 40, 4, 0.1),
|
||||
(10000, 1000, 4, 40, 4, 0.1),
|
||||
)
|
||||
def test_single_layer_softmax(self, num_train, num_test, dimension, epochs,
|
||||
num_classes, tolerance):
|
||||
(train_dataset, test_dataset) = datasets.synthetic_linearly_separable_data(
|
||||
num_train, num_test, dimension, num_classes)
|
||||
accuracy = single_layer_softmax.single_layer_softmax_classifier(
|
||||
train_dataset, test_dataset, epochs, num_classes, 'sgd')
|
||||
self.assertAlmostEqual(accuracy[-1], 1, delta=tolerance)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in a new issue