2019-07-18 13:04:35 -06:00
|
|
|
# Copyright 2019, The TensorFlow Authors.
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2019-07-31 08:55:25 -06:00
|
|
|
"""Tutorial for bolt_on module, the model and the optimizer."""
|
2022-01-27 11:36:54 -07:00
|
|
|
|
2020-01-17 16:39:43 -07:00
|
|
|
import tensorflow.compat.v1 as tf # pylint: disable=wrong-import-position
|
2019-10-14 16:29:21 -06:00
|
|
|
from tensorflow_privacy.privacy.bolt_on import losses # pylint: disable=wrong-import-position
|
|
|
|
from tensorflow_privacy.privacy.bolt_on import models # pylint: disable=wrong-import-position
|
|
|
|
from tensorflow_privacy.privacy.bolt_on.optimizers import BoltOn # pylint: disable=wrong-import-position
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
|
|
|
# First, we will create a binary classification dataset with a single output
|
|
|
|
# dimension. The samples for each label are repeated data points at different
|
|
|
|
# points in space.
|
|
|
|
# -------
|
2019-07-16 08:33:57 -06:00
|
|
|
# Parameters for dataset
|
|
|
|
n_samples = 10
|
|
|
|
input_dim = 2
|
|
|
|
n_outputs = 1
|
|
|
|
# Create binary classification dataset:
|
|
|
|
x_stack = [tf.constant(-1, tf.float32, (n_samples, input_dim)),
|
|
|
|
tf.constant(1, tf.float32, (n_samples, input_dim))]
|
|
|
|
y_stack = [tf.constant(0, tf.float32, (n_samples, 1)),
|
|
|
|
tf.constant(1, tf.float32, (n_samples, 1))]
|
|
|
|
x, y = tf.concat(x_stack, 0), tf.concat(y_stack, 0)
|
|
|
|
print(x.shape, y.shape)
|
|
|
|
generator = tf.data.Dataset.from_tensor_slices((x, y))
|
|
|
|
generator = generator.batch(10)
|
|
|
|
generator = generator.shuffle(10)
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
2019-07-30 13:12:22 -06:00
|
|
|
# First, we will explore using the pre - built BoltOnModel, which is a thin
|
2019-07-18 13:04:35 -06:00
|
|
|
# wrapper around a Keras Model using a single - layer neural network.
|
2019-07-30 13:12:22 -06:00
|
|
|
# It automatically uses the BoltOn Optimizer which encompasses all the logic
|
|
|
|
# required for the BoltOn Differential Privacy method.
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
2019-07-30 13:12:22 -06:00
|
|
|
bolt = models.BoltOnModel(n_outputs) # tell the model how many outputs we have.
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
|
|
|
# Now, we will pick our optimizer and Strongly Convex Loss function. The loss
|
|
|
|
# must extend from StrongConvexMixin and implement the associated methods.Some
|
2019-07-31 08:55:25 -06:00
|
|
|
# existing loss functions are pre - implemented in bolt_on.loss
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
2019-07-16 08:33:57 -06:00
|
|
|
optimizer = tf.optimizers.SGD()
|
|
|
|
reg_lambda = 1
|
|
|
|
C = 1
|
|
|
|
radius_constant = 1
|
|
|
|
loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
|
|
|
# For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy
|
|
|
|
# to be 1; these are all tunable and their impact can be read in losses.
|
|
|
|
# StrongConvexBinaryCrossentropy.We then compile the model with the chosen
|
2019-07-25 09:37:54 -06:00
|
|
|
# optimizer and loss, which will automatically wrap the chosen optimizer with
|
2019-07-30 13:12:22 -06:00
|
|
|
# the BoltOn Optimizer, ensuring the required components function as required
|
2019-07-25 09:37:54 -06:00
|
|
|
# for privacy guarantees.
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
2019-07-16 08:33:57 -06:00
|
|
|
bolt.compile(optimizer, loss)
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
|
|
|
# To fit the model, the optimizer will require additional information about
|
|
|
|
# the dataset and model.These parameters are:
|
|
|
|
# 1. the class_weights used
|
|
|
|
# 2. the number of samples in the dataset
|
2019-07-25 09:37:54 -06:00
|
|
|
# 3. the batch size which the model will try to infer, if possible. If not,
|
|
|
|
# you will be required to pass these explicitly to the fit method.
|
2019-07-18 13:04:35 -06:00
|
|
|
#
|
|
|
|
# As well, there are two privacy parameters than can be altered:
|
|
|
|
# 1. epsilon, a float
|
2019-07-25 09:37:54 -06:00
|
|
|
# 2. noise_distribution, a valid string indicating the distriution to use (must
|
|
|
|
# be implemented)
|
2019-07-18 13:04:35 -06:00
|
|
|
#
|
2019-07-30 13:12:22 -06:00
|
|
|
# The BoltOnModel offers a helper method,.calculate_class_weight to aid in
|
2019-07-18 13:04:35 -06:00
|
|
|
# class_weight calculation.
|
2019-07-16 08:33:57 -06:00
|
|
|
# required parameters
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
|
|
|
class_weight = None # default, use .calculate_class_weight for other values
|
2019-07-16 08:33:57 -06:00
|
|
|
batch_size = None # default, if it cannot be inferred, specify this
|
|
|
|
n_samples = None # default, if it cannot be iferred, specify this
|
|
|
|
# privacy parameters
|
|
|
|
epsilon = 2
|
|
|
|
noise_distribution = 'laplace'
|
|
|
|
|
|
|
|
bolt.fit(x,
|
|
|
|
y,
|
|
|
|
epsilon=epsilon,
|
|
|
|
class_weight=class_weight,
|
|
|
|
batch_size=batch_size,
|
|
|
|
n_samples=n_samples,
|
|
|
|
noise_distribution=noise_distribution,
|
|
|
|
epochs=2)
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
|
|
|
# We may also train a generator object, or try different optimizers and loss
|
|
|
|
# functions. Below, we will see that we must pass the number of samples as the
|
|
|
|
# fit method is unable to infer it for a generator.
|
|
|
|
# -------
|
2019-07-16 08:33:57 -06:00
|
|
|
optimizer2 = tf.optimizers.Adam()
|
|
|
|
bolt.compile(optimizer2, loss)
|
|
|
|
# required parameters
|
2019-07-18 13:04:35 -06:00
|
|
|
class_weight = None # default, use .calculate_class_weight for other values
|
2019-07-16 08:33:57 -06:00
|
|
|
batch_size = None # default, if it cannot be inferred, specify this
|
|
|
|
n_samples = None # default, if it cannot be iferred, specify this
|
|
|
|
# privacy parameters
|
|
|
|
epsilon = 2
|
|
|
|
noise_distribution = 'laplace'
|
|
|
|
try:
|
|
|
|
bolt.fit(generator,
|
|
|
|
epsilon=epsilon,
|
|
|
|
class_weight=class_weight,
|
|
|
|
batch_size=batch_size,
|
|
|
|
n_samples=n_samples,
|
|
|
|
noise_distribution=noise_distribution,
|
2019-07-25 09:37:54 -06:00
|
|
|
verbose=0)
|
2019-07-16 08:33:57 -06:00
|
|
|
except ValueError as e:
|
2019-07-25 10:17:56 -06:00
|
|
|
print(e)
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
|
|
|
# And now, re running with the parameter set.
|
|
|
|
# -------
|
2019-07-16 08:33:57 -06:00
|
|
|
n_samples = 20
|
2019-08-21 20:57:35 -06:00
|
|
|
bolt.fit_generator(generator,
|
|
|
|
epsilon=epsilon,
|
|
|
|
class_weight=class_weight,
|
|
|
|
n_samples=n_samples,
|
|
|
|
noise_distribution=noise_distribution,
|
|
|
|
verbose=0)
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
2019-07-31 08:55:25 -06:00
|
|
|
# You don't have to use the BoltOn model to use the BoltOn method.
|
2019-07-18 13:04:35 -06:00
|
|
|
# There are only a few requirements:
|
|
|
|
# 1. make sure any requirements from the loss are implemented in the model.
|
|
|
|
# 2. instantiate the optimizer and use it as a context around the fit operation.
|
|
|
|
# -------
|
|
|
|
# -------------------- Part 2, using the Optimizer
|
2019-07-25 09:37:54 -06:00
|
|
|
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
2019-07-30 13:12:22 -06:00
|
|
|
# Here, we create our own model and setup the BoltOn optimizer.
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
2019-07-25 09:37:54 -06:00
|
|
|
|
|
|
|
|
2019-07-18 13:04:35 -06:00
|
|
|
class TestModel(tf.keras.Model): # pylint: disable=abstract-method
|
2019-07-25 09:37:54 -06:00
|
|
|
|
2019-07-18 13:04:35 -06:00
|
|
|
def __init__(self, reg_layer, number_of_outputs=1):
|
2022-01-28 12:26:05 -07:00
|
|
|
super().__init__(name='test')
|
2019-07-18 13:04:35 -06:00
|
|
|
self.output_layer = tf.keras.layers.Dense(number_of_outputs,
|
2019-07-25 09:37:54 -06:00
|
|
|
kernel_regularizer=reg_layer)
|
2019-07-16 08:33:57 -06:00
|
|
|
|
2019-07-18 13:04:35 -06:00
|
|
|
def call(self, inputs): # pylint: disable=arguments-differ
|
2019-07-16 08:33:57 -06:00
|
|
|
return self.output_layer(inputs)
|
|
|
|
|
|
|
|
|
|
|
|
optimizer = tf.optimizers.SGD()
|
|
|
|
loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
|
2019-07-30 13:12:22 -06:00
|
|
|
optimizer = BoltOn(optimizer, loss)
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
|
|
|
# Now, we instantiate our model and check for 1. Since our loss requires L2
|
|
|
|
# regularization over the kernel, we will pass it to the model.
|
|
|
|
# -------
|
2019-07-16 08:33:57 -06:00
|
|
|
n_outputs = 1 # parameter for model and optimizer context.
|
|
|
|
test_model = TestModel(loss.kernel_regularizer(), n_outputs)
|
|
|
|
test_model.compile(optimizer, loss)
|
2019-07-18 13:04:35 -06:00
|
|
|
# -------
|
2019-07-30 13:12:22 -06:00
|
|
|
# We comply with 2., and use the BoltOn Optimizer as a context around the fit
|
2019-07-18 13:04:35 -06:00
|
|
|
# method.
|
|
|
|
# -------
|
2019-07-16 08:33:57 -06:00
|
|
|
# parameters for context
|
|
|
|
noise_distribution = 'laplace'
|
|
|
|
epsilon = 2
|
|
|
|
class_weights = 1 # Previously, the fit method auto-detected the class_weights.
|
2019-07-18 13:04:35 -06:00
|
|
|
# Here, we need to pass the class_weights explicitly. 1 is the same as None.
|
2019-07-16 08:33:57 -06:00
|
|
|
n_samples = 20
|
|
|
|
batch_size = 5
|
|
|
|
|
|
|
|
with optimizer(
|
2019-07-18 13:04:35 -06:00
|
|
|
noise_distribution=noise_distribution,
|
|
|
|
epsilon=epsilon,
|
|
|
|
layers=test_model.layers,
|
|
|
|
class_weights=class_weights,
|
|
|
|
n_samples=n_samples,
|
|
|
|
batch_size=batch_size
|
2019-07-16 08:33:57 -06:00
|
|
|
) as _:
|
|
|
|
test_model.fit(x, y, batch_size=batch_size, epochs=2)
|