# Copyright 2018, The TensorFlow Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Bolton model for bolton method of differentially private ML""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf from tensorflow.python.keras.models import Model from tensorflow.python.keras import optimizers from tensorflow.python.framework import ops as _ops from privacy.bolton.loss import StrongConvexMixin from privacy.bolton.optimizer import Bolton _accepted_distributions = ['laplace'] class BoltonModel(Model): """ Bolton episilon-delta model Uses 4 key steps to achieve privacy guarantees: 1. Adds noise to weights after training (output perturbation). 2. Projects weights to R after each batch 3. Limits learning rate 4. Use a strongly convex loss function (see compile) For more details on the strong convexity requirements, see: Bolt-on Differential Privacy for Scalable Stochastic Gradient Descent-based Analytics by Xi Wu et. al. """ def __init__(self, n_classes, # noise_distribution='laplace', seed=1, dtype=tf.float32 ): """ private constructor. Args: n_classes: number of output classes to predict. epsilon: level of privacy guarantee noise_distribution: distribution to pull weight perturbations from weights_initializer: initializer for weights seed: random seed to use dtype: data type to use for tensors """ # if noise_distribution not in _accepted_distributions: # raise ValueError('Detected noise distribution: {0} not one of: {1} valid' # 'distributions'.format(noise_distribution, # _accepted_distributions)) # if epsilon <= 0: # raise ValueError('Detected epsilon: {0}. ' # 'Valid range is 0 < epsilon R-ball and only normalize then. # # Returns: # # """ # for layer in self.layers: # weight_norm = tf.norm(layer.kernel, axis=0) # if force: # layer.kernel = layer.kernel / (weight_norm / r) # elif tf.reduce_sum(tf.cast(weight_norm > r, dtype=self._dtype)) > 0: # layer.kernel = layer.kernel / (weight_norm / r) # def _get_noise(self, distribution, data_size): # """Sample noise to be added to weights for privacy guarantee # # Args: # distribution: the distribution type to pull noise from # data_size: the number of samples # # Returns: noise in shape of layer's weights to be added to the weights. # # """ # distribution = distribution.lower() # input_dim = self.layers[0].kernel.numpy().shape[0] # loss = self.loss # if distribution == _accepted_distributions[0]: # laplace # per_class_epsilon = self.epsilon / (self.n_classes) # l2_sensitivity = (2 * # loss.lipchitz_constant(self.class_weight)) / \ # (loss.gamma() * data_size) # unit_vector = tf.random.normal(shape=(input_dim, self.n_classes), # mean=0, # seed=1, # stddev=1.0, # dtype=self._dtype) # unit_vector = unit_vector / tf.math.sqrt( # tf.reduce_sum(tf.math.square(unit_vector), axis=0) # ) # # beta = l2_sensitivity / per_class_epsilon # alpha = input_dim # input_dim # gamma = tf.random.gamma([self.n_classes], # alpha, # beta=1 / beta, # seed=1, # dtype=self._dtype # ) # return unit_vector * gamma # raise NotImplementedError('Noise distribution: {0} is not ' # 'a valid distribution'.format(distribution)) if __name__ == '__main__': import tensorflow as tf import os import time import matplotlib.pyplot as plt _URL = 'https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/facades.tar.gz' path_to_zip = tf.keras.utils.get_file('facades.tar.gz', origin=_URL, extract=True) PATH = os.path.join(os.path.dirname(path_to_zip), 'facades/') BUFFER_SIZE = 400 BATCH_SIZE = 1 IMG_WIDTH = 256 IMG_HEIGHT = 256 def load(image_file): image = tf.io.read_file(image_file) image = tf.image.decode_jpeg(image) w = tf.shape(image)[1] w = w // 2 real_image = image[:, :w, :] input_image = image[:, w:, :] input_image = tf.cast(input_image, tf.float32) real_image = tf.cast(real_image, tf.float32) return input_image, real_image inp, re = load(PATH + 'train/100.jpg') # casting to int for matplotlib to show the image plt.figure() plt.imshow(inp / 255.0) plt.figure() plt.imshow(re / 255.0) def resize(input_image, real_image, height, width): input_image = tf.image.resize(input_image, [height, width], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) real_image = tf.image.resize(real_image, [height, width], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) return input_image, real_image def random_crop(input_image, real_image): stacked_image = tf.stack([input_image, real_image], axis=0) cropped_image = tf.image.random_crop( stacked_image, size=[2, IMG_HEIGHT, IMG_WIDTH, 3]) return cropped_image[0], cropped_image[1] def normalize(input_image, real_image): input_image = (input_image / 127.5) - 1 real_image = (real_image / 127.5) - 1 return input_image, real_image @tf.function() def random_jitter(input_image, real_image): # resizing to 286 x 286 x 3 input_image, real_image = resize(input_image, real_image, 286, 286) # randomly cropping to 256 x 256 x 3 input_image, real_image = random_crop(input_image, real_image) if tf.random.uniform(()) > 0.5: # random mirroring input_image = tf.image.flip_left_right(input_image) real_image = tf.image.flip_left_right(real_image) return input_image, real_image def load_image_train(image_file): input_image, real_image = load(image_file) input_image, real_image = random_jitter(input_image, real_image) input_image, real_image = normalize(input_image, real_image) return input_image, real_image def load_image_test(image_file): input_image, real_image = load(image_file) input_image, real_image = resize(input_image, real_image, IMG_HEIGHT, IMG_WIDTH) input_image, real_image = normalize(input_image, real_image) return input_image, real_image train_dataset = tf.data.Dataset.list_files(PATH + 'train/*.jpg') train_dataset = train_dataset.shuffle(BUFFER_SIZE) train_dataset = train_dataset.map(load_image_train, num_parallel_calls=tf.data.experimental.AUTOTUNE) train_dataset = train_dataset.batch(1) # steps_per_epoch = training_utils.infer_steps_for_dataset( # train_dataset, None, epochs=1, steps_name='steps') # for batch in train_dataset: # print(batch[1].shape) test_dataset = tf.data.Dataset.list_files(PATH + 'test/*.jpg') # shuffling so that for every epoch a different image is generated # to predict and display the progress of our model. train_dataset = train_dataset.shuffle(BUFFER_SIZE) test_dataset = test_dataset.map(load_image_test) test_dataset = test_dataset.batch(1) be = BoltonModel(3, 2) from tensorflow.python.keras.optimizer_v2 import adam from privacy.bolton import loss test = adam.Adam() l = loss.StrongConvexBinaryCrossentropy(1, 2, 1) be.compile(test, l) print("Eager exeuction: {0}".format(tf.executing_eagerly())) be.fit(train_dataset, verbose=0, steps_per_epoch=1, n_samples=1)