Renaming stddev_to_sensitivity_ratio to noise_multiplier in rdp_accountant.

PiperOrigin-RevId: 227552068
2019-01-02 11:26:30 -08:00 · 2019-01-02 11:26:30 -08:00 · 01ab549902
commit 01ab549902
parent 205e005f60
4 changed files with 36 additions and 37 deletions
--- a/privacy/analysis/rdp_accountant.py
+++ b/privacy/analysis/rdp_accountant.py
@ -1,4 +1,4 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""RDP analysis of the Sampled Gaussian mechanism.
+"""RDP analysis of the Sampled Gaussian Mechanism.

 Functionality for computing Renyi differential privacy (RDP) of an additive
-Sampled Gaussian mechanism (SGM). Its public interface consists of two methods:
-  compute_rdp(q, stddev_to_sensitivity_ratio, T, orders) computes RDP with for
-                                   SGM iterated T times.
+Sampled Gaussian Mechanism (SGM). Its public interface consists of two methods:
+  compute_rdp(q, noise_multiplier, T, orders) computes RDP for SGM iterated
+                                   T times.
  get_privacy_spent(orders, rdp, target_eps, target_delta) computes delta
                                   (or eps) given RDP at multiple orders and
                                   a target value for eps (or delta).
@ -63,7 +63,7 @@ def _log_add(logx, logy):
 def _log_sub(logx, logy):
  """Subtract two numbers in the log space. Answer must be non-negative."""
  if logx < logy:
-    raise ValueError("The result of subtraction must be non-negative .")
+    raise ValueError("The result of subtraction must be non-negative.")
  if logy == -np.inf:  # subtracting 0
    return logx
  if logx == logy:
@ -104,7 +104,7 @@ def _compute_log_a_int(q, sigma, alpha):

 def _compute_log_a_frac(q, sigma, alpha):
  """Compute log(A_alpha) for fractional alpha. 0 < q < 1."""
-  # The two parts of A_alpha, integrals over (-inf,z0] and (z0, +inf), are
+  # The two parts of A_alpha, integrals over (-inf,z0] and [z0, +inf), are
  # initialized to 0 in the log space:
  log_a0, log_a1 = -np.inf, -np.inf
  i = 0
@ -148,6 +148,7 @@ def _compute_log_a(q, sigma, alpha):


 def _log_erfc(x):
+  """Compute log(erfc(x)) with high accuracy for large x."""
  try:
    return math.log(2) + special.log_ndtr(-x * 2**.5)
  except NameError:
@ -165,7 +166,7 @@ def _log_erfc(x):


 def _compute_delta(orders, rdp, eps):
-  """Compute delta given an RDP curve and target epsilon.
+  """Compute delta given a list of RDP values and target epsilon.

  Args:
    orders: An array (or a scalar) of orders.
@ -191,7 +192,7 @@ def _compute_delta(orders, rdp, eps):


 def _compute_eps(orders, rdp, delta):
-  """Compute epsilon given an RDP curve and target delta.
+  """Compute epsilon given a list of RDP values and target delta.

  Args:
    orders: An array (or a scalar) of orders.
@ -240,31 +241,30 @@ def _compute_rdp(q, sigma, alpha):
  return _compute_log_a(q, sigma, alpha) / (alpha - 1)


-def compute_rdp(q, stddev_to_sensitivity_ratio, steps, orders):
-  """Compute RDP of the Sampled Gaussian Mechanism for given parameters.
+def compute_rdp(q, noise_multiplier, steps, orders):
+  """Compute RDP of the Sampled Gaussian Mechanism.

  Args:
    q: The sampling rate.
-    stddev_to_sensitivity_ratio: The ratio of std of the Gaussian noise to the
-      l2-sensitivity of the function to which it is added.
+    noise_multiplier: The ratio of the standard deviation of the Gaussian noise
+        to the l2-sensitivity of the function to which it is added.
    steps: The number of steps.
    orders: An array (or a scalar) of RDP orders.

  Returns:
    The RDPs at all orders, can be np.inf.
  """
-
  if np.isscalar(orders):
-    rdp = _compute_rdp(q, stddev_to_sensitivity_ratio, orders)
+    rdp = _compute_rdp(q, noise_multiplier, orders)
  else:
-    rdp = np.array([_compute_rdp(q, stddev_to_sensitivity_ratio, order)
+    rdp = np.array([_compute_rdp(q, noise_multiplier, order)
                    for order in orders])

  return rdp * steps


 def get_privacy_spent(orders, rdp, target_eps=None, target_delta=None):
-  """Compute delta (or eps) for given eps (or delta) from the RDP curve.
+  """Compute delta (or eps) for given eps (or delta) from RDP values.

  Args:
    orders: An array (or a scalar) of RDP orders.
@ -273,6 +273,7 @@ def get_privacy_spent(orders, rdp, target_eps=None, target_delta=None):
              delta.
    target_delta: If not None, the delta for which we compute the corresponding
              epsilon. Exactly one of target_eps and target_delta must be None.
+
  Returns:
    eps, delta, opt_order.

--- a/privacy/analysis/rdp_accountant_test.py
+++ b/privacy/analysis/rdp_accountant_test.py
@ -1,4 +1,4 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -27,7 +27,6 @@ import numpy as np

 from privacy.analysis import rdp_accountant

-
 class TestGaussianMoments(parameterized.TestCase):
  #################################
  # HELPER FUNCTIONS:             #
@ -134,7 +133,7 @@ class TestGaussianMoments(parameterized.TestCase):
              16., 20., 24., 28., 32., 64., 256.)

    rdp = rdp_accountant.compute_rdp(q=1e-4,
-                                     stddev_to_sensitivity_ratio=.4,
+                                     noise_multiplier=.4,
                                     steps=40000,
                                     orders=orders)

@ -142,7 +141,7 @@ class TestGaussianMoments(parameterized.TestCase):
                                                         target_delta=1e-6)

    rdp += rdp_accountant.compute_rdp(q=0.1,
-                                      stddev_to_sensitivity_ratio=2,
+                                      noise_multiplier=2,
                                      steps=100,
                                      orders=orders)
    eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp,
--- a/tutorials/README.md
+++ b/tutorials/README.md
@ -1,6 +1,6 @@
 # Tutorials

-As demonstrated on MNIST in `mnist_dpsgd_tutorial.py`, the easiest to use
+As demonstrated on MNIST in `mnist_dpsgd_tutorial.py`, the easiest way to use
 a differentially private optimizer is to modify an existing training loop
 to replace an existing vanilla optimizer with its differentially private
 counterpart implemented in the library.
@ -26,26 +26,25 @@ be tuned in addition to any existing hyperparameter. There are currently three:

 ## Measuring Privacy

-Differential privacy is measured by two values, epsilon and delta. Roughly 
-speaking, they mean the following:
+Differential privacy can be expressed using two values, epsilon and delta.
+Roughly speaking, they mean the following:

-* epsilon gives a ceiling on how much the probability of a change in model
-  behavior can increase by including a single extra training example. This is 
-  the far more sensitive value, and we usually want it to be at most 10.0 or 
-  so. However, note that this is only an upper bound, and a large value of 
-  epsilon may still mean good practical privacy.
-* delta bounds the probability of an "unconditional" change in model behavior. 
-  We can usually set this to a very small number (1e-7 or so) without 
-  compromising utility. A rule of thumb is to set it to the inverse of the
-  order of magnitude of the training data size.
+* epsilon gives a ceiling on how much the probability of a particular output
+  can increase by including (or removing) a single training example. We usually
+  want it to be a small constant (less than 10, or, for more stringent privacy
+  guarantees, less than 1). However, this is only an upper bound, and a large
+  value of epsilon may still mean good practical privacy.
+* delta bounds the probability of an arbitrary change in model behavior.
+  We can usually set this to a very small number (1e-7 or so) without
+  compromising utility. A rule of thumb is to set it to be less than the inverse
+  of the training data size.

 To find out the epsilon given a fixed delta value for your model, follow the
 approach demonstrated in the `compute_epsilon` of the `mnist_dpsgd_tutorial.py`
 where the arguments used to call the RDP accountant (i.e., the tool used to
 compute the privacy guarantee) are:

-* q : The sampling ratio, defined as (number of examples consumed in one 
+* q : The sampling ratio, defined as (number of examples consumed in one
  step) / (total training examples).
-* stddev_to_sensitivity_ratio : The noise_multiplier from your parameters above.
+* noise_multiplier : The noise_multiplier from your parameters above.
 * steps : The number of global steps taken.
-
--- a/tutorials/mnist_dpsgd_tutorial.py
+++ b/tutorials/mnist_dpsgd_tutorial.py
@ -156,7 +156,7 @@ def main(unused_argv):
    orders = [1 + x / 10. for x in range(1, 100)] + range(12, 64)
    sampling_probability = FLAGS.batch_size / 60000
    rdp = compute_rdp(q=sampling_probability,
-                      stddev_to_sensitivity_ratio=FLAGS.noise_multiplier,
+                      noise_multiplier=FLAGS.noise_multiplier,
                      steps=steps,
                      orders=orders)
    # Delta is set to 1e-5 because MNIST has 60000 training points.