From d435fcbf9af0d1ffd41ebc15ae23595124f7aa73 Mon Sep 17 00:00:00 2001
From: Ilya Mironov <mironov@google.com>
Date: Wed, 6 Feb 2019 11:06:10 -0800
Subject: [PATCH] Updating README.md to reflect ReLU activation function. +
 clean-up of mnist_dpsgd_tutorial

PiperOrigin-RevId: 232707393
---
 tutorials/README.md               | 27 ++++++++++++-----------
 tutorials/mnist_dpsgd_tutorial.py | 36 +++++++++++++++----------------
 2 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/tutorials/README.md b/tutorials/README.md
index 2f8589a..6328c2f 100644
--- a/tutorials/README.md
+++ b/tutorials/README.md
@@ -60,14 +60,14 @@ When the script is run with the default parameters, the output will
 contain the following lines (leaving out a lot of diagnostic info):
 ```
 ...
-Test accuracy after 1 epochs is: 0.743
-For delta=1e-5, the current epsilon is: 1.00
+Test accuracy after 1 epochs is: 0.774
+For delta=1e-5, the current epsilon is: 1.03
 ...
-Test accuracy after 2 epochs is: 0.839
-For delta=1e-5, the current epsilon is: 1.04
+Test accuracy after 2 epochs is: 0.877
+For delta=1e-5, the current epsilon is: 1.11
 ...
 Test accuracy after 60 epochs is: 0.966
-For delta=1e-5, the current epsilon is: 2.92
+For delta=1e-5, the current epsilon is: 3.01
 ```
 
 ## Using Command-Line Interface for Privacy Budgeting
@@ -77,22 +77,23 @@ to compute, quickly and accurately, privacy loss at any point of the training.
 To do so, run the script `privacy/analysis/compute_dp_sgd_privacy.py`, which
 does not have any TensorFlow dependencies. For example, executing
 ```
-compute_dp_sgd_privacy.py --N=60000 --batch_size=256 --noise_multiplier=1.12 --epochs=60 --delta=1e-5
+compute_dp_sgd_privacy.py --N=60000 --batch_size=256 --noise_multiplier=1.1 --epochs=60 --delta=1e-5
 ```
 allows us to conclude, in a matter of seconds, that DP-SGD run with default
-parameters satisfies differential privacy with eps = 2.92 and delta = 1e-05.
+parameters satisfies differential privacy with eps = 3.01 and delta = 1e-05.
 
 
 ## Select Parameters
 
 The table below has a few sample parameters illustrating various accuracy/privacy
 tradeoffs (default parameters are in __bold__; privacy epsilon is reported
-at delta=1e-5; accuracy is averaged over 10 runs).
+at delta=1e-5; accuracy is averaged over 10 runs, its standard deviation is
+less than .3% in all cases).
 
 | Learning rate | Noise multiplier | Clipping threshold | Number of microbatches | Number of epochs | Privacy eps | Accuracy |
-| ------------- | ---------------- | -----------------  | ---------------------  | ---------------- | ----------- | -------- |
-| 0.1          |                  |                    | __256__                | 10               | no privacy  | 98.8%    |
-| 0.32          | 1.2              | __1.0__            | __256__                | 10               | 1.20        | 95.0%    |
-| __0.08__      | __1.12__         | __1.0__            | __256__                | __60__           | 2.92        | 96.6%    |
-| 0.4           | 0.6              | __1.0__            | __256__                | 30               | 9.74        | 97.3%    |
+| ------------- | ---------------- | -----------------  | ---------------------- | ---------------- | ----------- | -------- |
+| 0.1           |                  |                    | __256__                | 20               | no privacy  | 99.0%    |
+| 0.25          | 1.3              | 1.5                | __256__                | 15               | 1.19        | 95.0%    |
+| __0.15__      | __1.1__          | __1.0__            | __256__                |__60__            | 3.01        | 96.6%    |
+| 0.25          | 0.7              | 1.5                | __256__                | 45               | 7.10        | 97.0%    |
 
diff --git a/tutorials/mnist_dpsgd_tutorial.py b/tutorials/mnist_dpsgd_tutorial.py
index 4e00853..9d182a1 100644
--- a/tutorials/mnist_dpsgd_tutorial.py
+++ b/tutorials/mnist_dpsgd_tutorial.py
@@ -27,8 +27,8 @@ from privacy.optimizers import dp_optimizer
 
 tf.flags.DEFINE_boolean('dpsgd', True, 'If True, train with DP-SGD. If False, '
                         'train with vanilla SGD.')
-tf.flags.DEFINE_float('learning_rate', 0.08, 'Learning rate for training')
-tf.flags.DEFINE_float('noise_multiplier', 1.12,
+tf.flags.DEFINE_float('learning_rate', .15, 'Learning rate for training')
+tf.flags.DEFINE_float('noise_multiplier', 1.1,
                       'Ratio of the standard deviation to the clipping norm')
 tf.flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm')
 tf.flags.DEFINE_integer('batch_size', 256, 'Batch size')
@@ -121,12 +121,26 @@ def load_mnist():
   assert train_data.max() == 1.
   assert test_data.min() == 0.
   assert test_data.max() == 1.
-  assert len(train_labels.shape) == 1
-  assert len(test_labels.shape) == 1
+  assert train_labels.ndim == 1
+  assert test_labels.ndim == 1
 
   return train_data, train_labels, test_data, test_labels
 
 
+def compute_epsilon(steps):
+  """Computes epsilon value for given hyperparameters."""
+  if FLAGS.noise_multiplier == 0.0:
+    return float('inf')
+  orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
+  sampling_probability = FLAGS.batch_size / 60000
+  rdp = compute_rdp(q=sampling_probability,
+                    noise_multiplier=FLAGS.noise_multiplier,
+                    steps=steps,
+                    orders=orders)
+  # Delta is set to 1e-5 because MNIST has 60000 training points.
+  return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
+
+
 def main(unused_argv):
   tf.logging.set_verbosity(tf.logging.INFO)
   if FLAGS.batch_size % FLAGS.microbatches != 0:
@@ -152,20 +166,6 @@ def main(unused_argv):
       num_epochs=1,
       shuffle=False)
 
-  # Define a function that computes privacy budget expended so far.
-  def compute_epsilon(steps):
-    """Computes epsilon value for given hyperparameters."""
-    if FLAGS.noise_multiplier == 0.0:
-      return float('inf')
-    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
-    sampling_probability = FLAGS.batch_size / 60000
-    rdp = compute_rdp(q=sampling_probability,
-                      noise_multiplier=FLAGS.noise_multiplier,
-                      steps=steps,
-                      orders=orders)
-    # Delta is set to 1e-5 because MNIST has 60000 training points.
-    return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
-
   # Training loop.
   steps_per_epoch = 60000 // FLAGS.batch_size
   for epoch in range(1, FLAGS.epochs + 1):