155 lines
5.9 KiB
Python
155 lines
5.9 KiB
Python
import sys
|
|
|
|
sys.path.append('..')
|
|
import tensorflow as tf
|
|
from privacy.bolton import losses
|
|
from privacy.bolton import models
|
|
|
|
"""First, we will create a binary classification dataset with a single output
|
|
dimension. The samples for each label are repeated data points at different
|
|
points in space."""
|
|
# Parameters for dataset
|
|
n_samples = 10
|
|
input_dim = 2
|
|
n_outputs = 1
|
|
# Create binary classification dataset:
|
|
x_stack = [tf.constant(-1, tf.float32, (n_samples, input_dim)),
|
|
tf.constant(1, tf.float32, (n_samples, input_dim))]
|
|
y_stack = [tf.constant(0, tf.float32, (n_samples, 1)),
|
|
tf.constant(1, tf.float32, (n_samples, 1))]
|
|
x, y = tf.concat(x_stack, 0), tf.concat(y_stack, 0)
|
|
print(x.shape, y.shape)
|
|
generator = tf.data.Dataset.from_tensor_slices((x, y))
|
|
generator = generator.batch(10)
|
|
generator = generator.shuffle(10)
|
|
"""First, we will explore using the pre - built BoltonModel, which is a thin
|
|
wrapper around a Keras Model using a single - layer neural network.
|
|
It automatically uses the Bolton Optimizer which encompasses all the logic
|
|
required for the Bolton Differential Privacy method."""
|
|
bolt = models.BoltonModel(n_outputs) # tell the model how many outputs we have.
|
|
"""Now, we will pick our optimizer and Strongly Convex Loss function. The loss
|
|
must extend from StrongConvexMixin and implement the associated methods.Some
|
|
existing loss functions are pre - implemented in bolton.loss"""
|
|
optimizer = tf.optimizers.SGD()
|
|
reg_lambda = 1
|
|
C = 1
|
|
radius_constant = 1
|
|
loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
|
|
"""For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy
|
|
to be 1; these are all tunable and their impact can be read in losses.
|
|
StrongConvexBinaryCrossentropy.We then compile the model with the chosen
|
|
optimizer and loss, which will automatically wrap the chosen optimizer with the
|
|
Bolton Optimizer, ensuring the required components function as required for
|
|
privacy guarantees."""
|
|
bolt.compile(optimizer, loss)
|
|
"""To fit the model, the optimizer will require additional information about
|
|
the dataset and model.These parameters are:
|
|
1. the class_weights used
|
|
2. the number of samples in the dataset
|
|
3. the batch size which the model will try to infer, if possible. If not, you
|
|
will be required to pass these explicitly to the fit method.
|
|
|
|
As well, there are two privacy parameters than can be altered:
|
|
1. epsilon, a float
|
|
2. noise_distribution, a valid string indicating the distriution to use (must be
|
|
implemented)
|
|
|
|
The BoltonModel offers a helper method,.calculate_class_weight to aid in
|
|
class_weight calculation."""
|
|
# required parameters
|
|
class_weight = None # default, use .calculate_class_weight to specify other values
|
|
batch_size = None # default, if it cannot be inferred, specify this
|
|
n_samples = None # default, if it cannot be iferred, specify this
|
|
# privacy parameters
|
|
epsilon = 2
|
|
noise_distribution = 'laplace'
|
|
|
|
bolt.fit(x,
|
|
y,
|
|
epsilon=epsilon,
|
|
class_weight=class_weight,
|
|
batch_size=batch_size,
|
|
n_samples=n_samples,
|
|
noise_distribution=noise_distribution,
|
|
epochs=2)
|
|
"""We may also train a generator object, or try different optimizers and loss
|
|
functions. Below, we will see that we must pass the number of samples as the fit
|
|
method is unable to infer it for a generator."""
|
|
optimizer2 = tf.optimizers.Adam()
|
|
bolt.compile(optimizer2, loss)
|
|
# required parameters
|
|
class_weight = None # default, use .calculate_class_weight to specify other values
|
|
batch_size = None # default, if it cannot be inferred, specify this
|
|
n_samples = None # default, if it cannot be iferred, specify this
|
|
# privacy parameters
|
|
epsilon = 2
|
|
noise_distribution = 'laplace'
|
|
try:
|
|
bolt.fit(generator,
|
|
epsilon=epsilon,
|
|
class_weight=class_weight,
|
|
batch_size=batch_size,
|
|
n_samples=n_samples,
|
|
noise_distribution=noise_distribution,
|
|
verbose=0
|
|
)
|
|
except ValueError as e:
|
|
print(e)
|
|
"""And now, re running with the parameter set."""
|
|
n_samples = 20
|
|
bolt.fit(generator,
|
|
epsilon=epsilon,
|
|
class_weight=class_weight,
|
|
batch_size=batch_size,
|
|
n_samples=n_samples,
|
|
noise_distribution=noise_distribution,
|
|
verbose=0
|
|
)
|
|
"""You don't have to use the bolton model to use the Bolton method.
|
|
There are only a few requirements:
|
|
1. make sure any requirements from the loss are implemented in the model.
|
|
2. instantiate the optimizer and use it as a context around your fit operation.
|
|
"""
|
|
|
|
from privacy.bolton.optimizers import Bolton
|
|
|
|
"""Here, we create our own model and setup the Bolton optimizer."""
|
|
|
|
class TestModel(tf.keras.Model):
|
|
def __init__(self, reg_layer, n_outputs=1):
|
|
super(TestModel, self).__init__(name='test')
|
|
self.output_layer = tf.keras.layers.Dense(n_outputs,
|
|
kernel_regularizer=reg_layer
|
|
)
|
|
|
|
def call(self, inputs):
|
|
return self.output_layer(inputs)
|
|
|
|
|
|
optimizer = tf.optimizers.SGD()
|
|
loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant)
|
|
optimizer = Bolton(optimizer, loss)
|
|
"""Now, we instantiate our model and check for 1. Since our loss requires L2
|
|
regularization over the kernel, we will pass it to the model."""
|
|
n_outputs = 1 # parameter for model and optimizer context.
|
|
test_model = TestModel(loss.kernel_regularizer(), n_outputs)
|
|
test_model.compile(optimizer, loss)
|
|
"""We comply with 2., and use the Bolton Optimizer as a context around the fit
|
|
method."""
|
|
# parameters for context
|
|
noise_distribution = 'laplace'
|
|
epsilon = 2
|
|
class_weights = 1 # Previously, the fit method auto-detected the class_weights.
|
|
# Here, we need to pass the class_weights explicitly. 1 is the equivalent of None.
|
|
n_samples = 20
|
|
batch_size = 5
|
|
|
|
with optimizer(
|
|
noise_distribution=noise_distribution,
|
|
epsilon=epsilon,
|
|
layers=test_model.layers,
|
|
class_weights=class_weights,
|
|
n_samples=n_samples,
|
|
batch_size=batch_size
|
|
) as _:
|
|
test_model.fit(x, y, batch_size=batch_size, epochs=2)
|