From 5bac796ee684023f852b1ba3b7a460d356cdfc66 Mon Sep 17 00:00:00 2001 From: an1006634493 <35823777+an1006634493@users.noreply.github.com> Date: Mon, 18 Mar 2019 22:09:46 +0800 Subject: [PATCH 01/11] Record one change for Python3 In Python 3, cPickle is replaced by pickle. --- research/pate_2017/input.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/research/pate_2017/input.py b/research/pate_2017/input.py index 0a1d89f..c2d669e 100644 --- a/research/pate_2017/input.py +++ b/research/pate_2017/input.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function import cPickle +#import pickle #for python 3 import gzip import math import numpy as np @@ -156,6 +157,7 @@ def unpickle_cifar_dic(file): """ fo = open(file, 'rb') dict = cPickle.load(fo) + #dict = pickle.load(fo) #for python 3 fo.close() return dict['data'], dict['labels'] From d139d94f32ca6e150220f8fc560b81cd1e9ca61a Mon Sep 17 00:00:00 2001 From: Nicolas Papernot Date: Mon, 18 Mar 2019 07:29:18 -0700 Subject: [PATCH 02/11] Update input.py --- research/pate_2017/input.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/research/pate_2017/input.py b/research/pate_2017/input.py index c2d669e..bcdb6d0 100644 --- a/research/pate_2017/input.py +++ b/research/pate_2017/input.py @@ -17,8 +17,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import cPickle -#import pickle #for python 3 +try: + import cPickle as pickle +except: + import pickle import gzip import math import numpy as np @@ -156,8 +158,7 @@ def unpickle_cifar_dic(file): :return: tuple of (images, labels) """ fo = open(file, 'rb') - dict = cPickle.load(fo) - #dict = pickle.load(fo) #for python 3 + dict = pickle.load(fo) fo.close() return dict['data'], dict['labels'] From 469c2da8c4efdf91983e24a1f600f0898637979c Mon Sep 17 00:00:00 2001 From: Nicolas Papernot Date: Mon, 18 Mar 2019 09:21:04 -0700 Subject: [PATCH 03/11] Update input.py --- research/pate_2017/input.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/research/pate_2017/input.py b/research/pate_2017/input.py index bcdb6d0..382787e 100644 --- a/research/pate_2017/input.py +++ b/research/pate_2017/input.py @@ -17,10 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -try: - import cPickle as pickle -except: - import pickle +from six.moves import pickle import gzip import math import numpy as np From 0b5780b4e6751863cc948b3fb16c764dc9cbbbfb Mon Sep 17 00:00:00 2001 From: Nicolas Papernot Date: Mon, 18 Mar 2019 09:22:38 -0700 Subject: [PATCH 04/11] Update input.py --- research/pate_2017/input.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/research/pate_2017/input.py b/research/pate_2017/input.py index 382787e..ac59b9c 100644 --- a/research/pate_2017/input.py +++ b/research/pate_2017/input.py @@ -17,7 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from six.moves import pickle +from six.moves import cPickle as pickle import gzip import math import numpy as np From 38ae6ce3b3586a046202590f32d902813e3890e2 Mon Sep 17 00:00:00 2001 From: Nicolas Papernot Date: Mon, 18 Mar 2019 09:23:59 -0700 Subject: [PATCH 05/11] reorder imports --- research/pate_2017/input.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/research/pate_2017/input.py b/research/pate_2017/input.py index ac59b9c..c27ff42 100644 --- a/research/pate_2017/input.py +++ b/research/pate_2017/input.py @@ -17,17 +17,17 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from six.moves import cPickle as pickle import gzip import math -import numpy as np import os -from scipy.io import loadmat as loadmat -from six.moves import urllib -from six.moves import xrange import sys import tarfile +import numpy as np +from scipy.io import loadmat as loadmat +from six.moves import cPickle as pickle +from six.moves import urllib +from six.moves import xrange import tensorflow as tf FLAGS = tf.flags.FLAGS From ec2204ac97ee2e1a7fb945eae514890be61b99dd Mon Sep 17 00:00:00 2001 From: Nicolas Papernot Date: Mon, 18 Mar 2019 09:27:57 -0700 Subject: [PATCH 06/11] rename var --- research/pate_2017/input.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/research/pate_2017/input.py b/research/pate_2017/input.py index c27ff42..c325941 100644 --- a/research/pate_2017/input.py +++ b/research/pate_2017/input.py @@ -127,10 +127,10 @@ def extract_svhn(local_url): with tf.gfile.Open(local_url, mode='r') as file_obj: # Load MATLAB matrix using scipy IO - dict = loadmat(file_obj) + data_dict = loadmat(file_obj) # Extract each dictionary (one for data, one for labels) - data, labels = dict["X"], dict["y"] + data, labels = data_dict["X"], data_dict["y"] # Set np type data = np.asarray(data, dtype=np.float32) @@ -155,9 +155,9 @@ def unpickle_cifar_dic(file): :return: tuple of (images, labels) """ fo = open(file, 'rb') - dict = pickle.load(fo) + data_dict = pickle.load(fo) fo.close() - return dict['data'], dict['labels'] + return data_dict['data'], data_dict['labels'] def extract_cifar10(local_url, data_dir): From 4784b0f31eeba332d779e41526f0cd20a6be8967 Mon Sep 17 00:00:00 2001 From: npapernot Date: Mon, 18 Mar 2019 16:42:59 +0000 Subject: [PATCH 07/11] pylint edits --- research/pate_2017/input.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/research/pate_2017/input.py b/research/pate_2017/input.py index c325941..65c300b 100644 --- a/research/pate_2017/input.py +++ b/research/pate_2017/input.py @@ -100,14 +100,14 @@ def image_whitening(data): nb_pixels = np.shape(data)[1] * np.shape(data)[2] * np.shape(data)[3] # Subtract mean - mean = np.mean(data, axis=(1,2,3)) + mean = np.mean(data, axis=(1, 2, 3)) ones = np.ones(np.shape(data)[1:4], dtype=np.float32) for i in xrange(len(data)): data[i, :, :, :] -= mean[i] * ones # Compute adjusted standard variance - adj_std_var = np.maximum(np.ones(len(data), dtype=np.float32) / math.sqrt(nb_pixels), np.std(data, axis=(1,2,3))) #NOLINT(long-line) + adj_std_var = np.maximum(np.ones(len(data), dtype=np.float32) / math.sqrt(nb_pixels), np.std(data, axis=(1, 2, 3))) # pylint: disable=line-too-long # Divide image for i in xrange(len(data)): @@ -148,15 +148,15 @@ def extract_svhn(local_url): return data, labels -def unpickle_cifar_dic(file): +def unpickle_cifar_dic(file): # pylint: disable=redefined-builtin """ Helper function: unpickles a dictionary (used for loading CIFAR) :param file: filename of the pickle :return: tuple of (images, labels) """ - fo = open(file, 'rb') - data_dict = pickle.load(fo) - fo.close() + file_obj = open(file, 'rb') + data_dict = pickle.load(file_obj) + file_obj.close() return data_dict['data'], data_dict['labels'] @@ -176,8 +176,8 @@ def extract_cifar10(local_url, data_dir): '/cifar10_test_labels.npy'] all_preprocessed = True - for file in preprocessed_files: - if not tf.gfile.Exists(data_dir + file): + for file_name in preprocessed_files: + if not tf.gfile.Exists(data_dir + file_name): all_preprocessed = False break @@ -197,7 +197,7 @@ def extract_cifar10(local_url, data_dir): else: # Do everything from scratch # Define lists of all files we should extract - train_files = ["data_batch_" + str(i) for i in xrange(1,6)] + train_files = ["data_batch_" + str(i) for i in xrange(1, 6)] test_file = ["test_batch"] cifar10_files = train_files + test_file @@ -227,7 +227,7 @@ def extract_cifar10(local_url, data_dir): labels.append(labels_tmp) # Convert to numpy arrays and reshape in the expected format - train_data = np.asarray(images, dtype=np.float32).reshape((50000,3,32,32)) + train_data = np.asarray(images, dtype=np.float32).reshape((50000, 3, 32, 32)) train_data = np.swapaxes(train_data, 1, 3) train_labels = np.asarray(labels, dtype=np.int32).reshape(50000) @@ -242,7 +242,7 @@ def extract_cifar10(local_url, data_dir): test_data, test_images = unpickle_cifar_dic(filename) # Convert to numpy arrays and reshape in the expected format - test_data = np.asarray(test_data,dtype=np.float32).reshape((10000,3,32,32)) + test_data = np.asarray(test_data, dtype=np.float32).reshape((10000, 3, 32, 32)) test_data = np.swapaxes(test_data, 1, 3) test_labels = np.asarray(test_images, dtype=np.int32).reshape(10000) @@ -332,7 +332,7 @@ def ld_svhn(extended=False, test_only=False): return train_data, train_labels, test_data, test_labels else: # Return training and extended training data separately - return train_data,train_labels, test_data,test_labels, ext_data,ext_labels + return train_data, train_labels, test_data, test_labels, ext_data, ext_labels def ld_cifar10(test_only=False): @@ -377,7 +377,7 @@ def ld_mnist(test_only=False): 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', - ] + ] # Maybe download data and retrieve local storage urls local_urls = maybe_download(file_urls, FLAGS.data_dir) From b6c932ec661caf0c7d248d79e4e62739ae0e228c Mon Sep 17 00:00:00 2001 From: npapernot Date: Mon, 18 Mar 2019 16:47:36 +0000 Subject: [PATCH 08/11] quotes --- research/pate_2017/input.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/research/pate_2017/input.py b/research/pate_2017/input.py index 65c300b..fd7a78d 100644 --- a/research/pate_2017/input.py +++ b/research/pate_2017/input.py @@ -130,7 +130,7 @@ def extract_svhn(local_url): data_dict = loadmat(file_obj) # Extract each dictionary (one for data, one for labels) - data, labels = data_dict["X"], data_dict["y"] + data, labels = data_dict['X'], data_dict['y'] # Set np type data = np.asarray(data, dtype=np.float32) @@ -197,8 +197,8 @@ def extract_cifar10(local_url, data_dir): else: # Do everything from scratch # Define lists of all files we should extract - train_files = ["data_batch_" + str(i) for i in xrange(1, 6)] - test_file = ["test_batch"] + train_files = ['data_batch_' + str(i) for i in xrange(1, 6)] + test_file = ['test_batch'] cifar10_files = train_files + test_file # Check if all files have already been extracted @@ -217,7 +217,7 @@ def extract_cifar10(local_url, data_dir): labels = [] for file in train_files: # Construct filename - filename = data_dir + "/cifar-10-batches-py/" + file + filename = data_dir + '/cifar-10-batches-py/' + file # Unpickle dictionary and extract images and labels images_tmp, labels_tmp = unpickle_cifar_dic(filename) @@ -236,7 +236,7 @@ def extract_cifar10(local_url, data_dir): np.save(data_dir + preprocessed_files[1], train_labels) # Construct filename for test file - filename = data_dir + "/cifar-10-batches-py/" + test_file[0] + filename = data_dir + '/cifar-10-batches-py/' + test_file[0] # Load test images and labels test_data, test_images = unpickle_cifar_dic(filename) @@ -260,7 +260,7 @@ def extract_mnist_data(filename, num_images, image_size, pixel_depth): Values are rescaled from [0, 255] down to [-0.5, 0.5]. """ # if not os.path.exists(file): - if not tf.gfile.Exists(filename+".npy"): + if not tf.gfile.Exists(filename+'.npy'): with gzip.open(filename) as bytestream: bytestream.read(16) buf = bytestream.read(image_size * image_size * num_images) @@ -270,7 +270,7 @@ def extract_mnist_data(filename, num_images, image_size, pixel_depth): np.save(filename, data) return data else: - with tf.gfile.Open(filename+".npy", mode='r') as file_obj: + with tf.gfile.Open(filename+'.npy', mode='r') as file_obj: return np.load(file_obj) @@ -279,7 +279,7 @@ def extract_mnist_labels(filename, num_images): Extract the labels into a vector of int64 label IDs. """ # if not os.path.exists(file): - if not tf.gfile.Exists(filename+".npy"): + if not tf.gfile.Exists(filename+'.npy'): with gzip.open(filename) as bytestream: bytestream.read(8) buf = bytestream.read(1 * num_images) @@ -287,7 +287,7 @@ def extract_mnist_labels(filename, num_images): np.save(filename, labels) return labels else: - with tf.gfile.Open(filename+".npy", mode='r') as file_obj: + with tf.gfile.Open(filename+'.npy', mode='r') as file_obj: return np.load(file_obj) From e55a832d54591e018d2df4fa4869c611f85c441c Mon Sep 17 00:00:00 2001 From: npapernot Date: Mon, 18 Mar 2019 16:49:34 +0000 Subject: [PATCH 09/11] fnames --- research/pate_2017/input.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/research/pate_2017/input.py b/research/pate_2017/input.py index fd7a78d..14c8023 100644 --- a/research/pate_2017/input.py +++ b/research/pate_2017/input.py @@ -148,13 +148,13 @@ def extract_svhn(local_url): return data, labels -def unpickle_cifar_dic(file): # pylint: disable=redefined-builtin +def unpickle_cifar_dic(file_path): """ Helper function: unpickles a dictionary (used for loading CIFAR) - :param file: filename of the pickle + :param file_path: filename of the pickle :return: tuple of (images, labels) """ - file_obj = open(file, 'rb') + file_obj = open(file_path, 'rb') data_dict = pickle.load(file_obj) file_obj.close() return data_dict['data'], data_dict['labels'] @@ -215,9 +215,9 @@ def extract_cifar10(local_url, data_dir): # Load training images and labels images = [] labels = [] - for file in train_files: + for train_file in train_files: # Construct filename - filename = data_dir + '/cifar-10-batches-py/' + file + filename = data_dir + '/cifar-10-batches-py/' + train_file # Unpickle dictionary and extract images and labels images_tmp, labels_tmp = unpickle_cifar_dic(filename) @@ -259,7 +259,6 @@ def extract_mnist_data(filename, num_images, image_size, pixel_depth): Values are rescaled from [0, 255] down to [-0.5, 0.5]. """ - # if not os.path.exists(file): if not tf.gfile.Exists(filename+'.npy'): with gzip.open(filename) as bytestream: bytestream.read(16) @@ -278,7 +277,6 @@ def extract_mnist_labels(filename, num_images): """ Extract the labels into a vector of int64 label IDs. """ - # if not os.path.exists(file): if not tf.gfile.Exists(filename+'.npy'): with gzip.open(filename) as bytestream: bytestream.read(8) From a209988d870489ebc17725ca6427c6d5d8ea9bcc Mon Sep 17 00:00:00 2001 From: npapernot Date: Mon, 18 Mar 2019 16:58:06 +0000 Subject: [PATCH 10/11] docstrings --- research/pate_2017/input.py | 72 ++++++++++++------------------------- 1 file changed, 22 insertions(+), 50 deletions(-) diff --git a/research/pate_2017/input.py b/research/pate_2017/input.py index 14c8023..86f302c 100644 --- a/research/pate_2017/input.py +++ b/research/pate_2017/input.py @@ -34,11 +34,7 @@ FLAGS = tf.flags.FLAGS def create_dir_if_needed(dest_directory): - """ - Create directory if doesn't exist - :param dest_directory: - :return: True if everything went well - """ + """Create directory if doesn't exist.""" if not tf.gfile.IsDirectory(dest_directory): tf.gfile.MakeDirs(dest_directory) @@ -46,11 +42,8 @@ def create_dir_if_needed(dest_directory): def maybe_download(file_urls, directory): - """ - Download a set of files in temporary local folder - :param directory: the directory where to download - :return: a tuple of filepaths corresponding to the files given as input - """ + """Download a set of files in temporary local folder.""" + # Create directory if doesn't exist assert create_dir_if_needed(directory) @@ -91,8 +84,6 @@ def image_whitening(data): """ Subtracts mean of image and divides by adjusted standard variance (for stability). Operations are per image but performed for the entire array. - :param image: 4D array (ID, Height, Weight, Channel) - :return: 4D array (ID, Height, Weight, Channel) """ assert len(np.shape(data)) == 4 @@ -119,11 +110,7 @@ def image_whitening(data): def extract_svhn(local_url): - """ - Extract a MATLAB matrix into two numpy arrays with data and labels - :param local_url: - :return: - """ + """Extract a MATLAB matrix into two numpy arrays with data and labels""" with tf.gfile.Open(local_url, mode='r') as file_obj: # Load MATLAB matrix using scipy IO @@ -149,11 +136,7 @@ def extract_svhn(local_url): def unpickle_cifar_dic(file_path): - """ - Helper function: unpickles a dictionary (used for loading CIFAR) - :param file_path: filename of the pickle - :return: tuple of (images, labels) - """ + """Helper function: unpickles a dictionary (used for loading CIFAR)""" file_obj = open(file_path, 'rb') data_dict = pickle.load(file_obj) file_obj.close() @@ -161,12 +144,8 @@ def unpickle_cifar_dic(file_path): def extract_cifar10(local_url, data_dir): - """ - Extracts the CIFAR-10 dataset and return numpy arrays with the different sets - :param local_url: where the tar.gz archive is located locally - :param data_dir: where to extract the archive's file - :return: a tuple (train data, train labels, test data, test labels) - """ + """Extracts CIFAR-10 and return numpy arrays with the different sets""" + # These numpy dumps can be reloaded to avoid performing the pre-processing # if they exist in the working directory. # Changing the order of this list will ruin the indices below. @@ -203,8 +182,8 @@ def extract_cifar10(local_url, data_dir): # Check if all files have already been extracted need_to_unpack = False - for file in cifar10_files: - if not tf.gfile.Exists(file): + for file_name in cifar10_files: + if not tf.gfile.Exists(file_name): need_to_unpack = True break @@ -292,9 +271,10 @@ def extract_mnist_labels(filename, num_images): def ld_svhn(extended=False, test_only=False): """ Load the original SVHN data - :param extended: include extended training data in the returned array - :param test_only: disables loading of both train and extra -> large speed up - :return: tuple of arrays which depend on the parameters + + Args: + extended: include extended training data in the returned array + test_only: disables loading of both train and extra -> large speed up """ # Define files to be downloaded # WARNING: changing the order of this list will break indices (cf. below) @@ -334,12 +314,8 @@ def ld_svhn(extended=False, test_only=False): def ld_cifar10(test_only=False): - """ - Load the original CIFAR10 data - :param extended: include extended training data in the returned array - :param test_only: disables loading of both train and extra -> large speed up - :return: tuple of arrays which depend on the parameters - """ + """Load the original CIFAR10 data""" + # Define files to be downloaded file_urls = ['https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'] @@ -363,12 +339,7 @@ def ld_cifar10(test_only=False): def ld_mnist(test_only=False): - """ - Load the MNIST dataset - :param extended: include extended training data in the returned array - :param test_only: disables loading of both train and extra -> large speed up - :return: tuple of arrays which depend on the parameters - """ + """Load the MNIST dataset.""" # Define files to be downloaded # WARNING: changing the order of this list will break indices (cf. below) file_urls = ['http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', @@ -396,12 +367,13 @@ def partition_dataset(data, labels, nb_teachers, teacher_id): """ Simple partitioning algorithm that returns the right portion of the data needed by a given teacher out of a certain nb of teachers - :param data: input data to be partitioned - :param labels: output data to be partitioned - :param nb_teachers: number of teachers in the ensemble (affects size of each + + Args: + data: input data to be partitioned + labels: output data to be partitioned + nb_teachers: number of teachers in the ensemble (affects size of each partition) - :param teacher_id: id of partition to retrieve - :return: + teacher_id: id of partition to retrieve """ # Sanity check From 2aa9debb91732b7025788ca923aa1001032b86e4 Mon Sep 17 00:00:00 2001 From: npapernot Date: Mon, 18 Mar 2019 17:01:25 +0000 Subject: [PATCH 11/11] glint --- research/pate_2017/input.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/research/pate_2017/input.py b/research/pate_2017/input.py index 86f302c..d838806 100644 --- a/research/pate_2017/input.py +++ b/research/pate_2017/input.py @@ -110,7 +110,7 @@ def image_whitening(data): def extract_svhn(local_url): - """Extract a MATLAB matrix into two numpy arrays with data and labels""" + """Extract a MATLAB matrix into two numpy arrays with data and labels.""" with tf.gfile.Open(local_url, mode='r') as file_obj: # Load MATLAB matrix using scipy IO @@ -136,7 +136,7 @@ def extract_svhn(local_url): def unpickle_cifar_dic(file_path): - """Helper function: unpickles a dictionary (used for loading CIFAR)""" + """Helper function: unpickles a dictionary (used for loading CIFAR).""" file_obj = open(file_path, 'rb') data_dict = pickle.load(file_obj) file_obj.close() @@ -144,7 +144,7 @@ def unpickle_cifar_dic(file_path): def extract_cifar10(local_url, data_dir): - """Extracts CIFAR-10 and return numpy arrays with the different sets""" + """Extracts CIFAR-10 and return numpy arrays with the different sets.""" # These numpy dumps can be reloaded to avoid performing the pre-processing # if they exist in the working directory. @@ -206,7 +206,8 @@ def extract_cifar10(local_url, data_dir): labels.append(labels_tmp) # Convert to numpy arrays and reshape in the expected format - train_data = np.asarray(images, dtype=np.float32).reshape((50000, 3, 32, 32)) + train_data = np.asarray(images, dtype=np.float32) + train_data = train_data.reshape((50000, 3, 32, 32)) train_data = np.swapaxes(train_data, 1, 3) train_labels = np.asarray(labels, dtype=np.int32).reshape(50000) @@ -221,7 +222,8 @@ def extract_cifar10(local_url, data_dir): test_data, test_images = unpickle_cifar_dic(filename) # Convert to numpy arrays and reshape in the expected format - test_data = np.asarray(test_data, dtype=np.float32).reshape((10000, 3, 32, 32)) + test_data = np.asarray(test_data, dtype=np.float32) + test_data = test_data.reshape((10000, 3, 32, 32)) test_data = np.swapaxes(test_data, 1, 3) test_labels = np.asarray(test_images, dtype=np.int32).reshape(10000) @@ -314,7 +316,7 @@ def ld_svhn(extended=False, test_only=False): def ld_cifar10(test_only=False): - """Load the original CIFAR10 data""" + """Load the original CIFAR10 data.""" # Define files to be downloaded file_urls = ['https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz']