tensorflow_privacy/privacy/analysis/rdp_accountant.py
Galen Andrew d5dcfec745 Remove set_denominator functions from DPQuery and make QueryWithLedger easier to use.
set_denominator was added so that the batch size doesn't need to be specified before constructing the optimizer, but it breaks the DPQuery abstraction. Now the optimizer uses a GaussianSumQuery instead of GaussianAverageQuery, and normalization by batch size is done inside the optimizer.

Also instead of creating all DPQueries with a PrivacyLedger and then wrapping with QueryWithLedger, it is now sufficient to create the queries with no ledger and QueryWithLedger will construct the ledger and pass it to all inner queries.

PiperOrigin-RevId: 251462353
2019-06-04 10:14:32 -07:00

318 lines
9.6 KiB
Python

# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""RDP analysis of the Sampled Gaussian Mechanism.
Functionality for computing Renyi differential privacy (RDP) of an additive
Sampled Gaussian Mechanism (SGM). Its public interface consists of two methods:
compute_rdp(q, noise_multiplier, T, orders) computes RDP for SGM iterated
T times.
get_privacy_spent(orders, rdp, target_eps, target_delta) computes delta
(or eps) given RDP at multiple orders and
a target value for eps (or delta).
Example use:
Suppose that we have run an SGM applied to a function with l2-sensitivity 1.
Its parameters are given as a list of tuples (q1, sigma1, T1), ...,
(qk, sigma_k, Tk), and we wish to compute eps for a given delta.
The example code would be:
max_order = 32
orders = range(2, max_order + 1)
rdp = np.zeros_like(orders, dtype=float)
for q, sigma, T in parameters:
rdp += rdp_accountant.compute_rdp(q, sigma, T, orders)
eps, _, opt_order = rdp_accountant.get_privacy_spent(rdp, target_delta=delta)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import sys
import numpy as np
from scipy import special
import six
########################
# LOG-SPACE ARITHMETIC #
########################
def _log_add(logx, logy):
"""Add two numbers in the log space."""
a, b = min(logx, logy), max(logx, logy)
if a == -np.inf: # adding 0
return b
# Use exp(a) + exp(b) = (exp(a - b) + 1) * exp(b)
return math.log1p(math.exp(a - b)) + b # log1p(x) = log(x + 1)
def _log_sub(logx, logy):
"""Subtract two numbers in the log space. Answer must be non-negative."""
if logx < logy:
raise ValueError("The result of subtraction must be non-negative.")
if logy == -np.inf: # subtracting 0
return logx
if logx == logy:
return -np.inf # 0 is represented as -np.inf in the log space.
try:
# Use exp(x) - exp(y) = (exp(x - y) - 1) * exp(y).
return math.log(math.expm1(logx - logy)) + logy # expm1(x) = exp(x) - 1
except OverflowError:
return logx
def _log_print(logx):
"""Pretty print."""
if logx < math.log(sys.float_info.max):
return "{}".format(math.exp(logx))
else:
return "exp({})".format(logx)
def _compute_log_a_int(q, sigma, alpha):
"""Compute log(A_alpha) for integer alpha. 0 < q < 1."""
assert isinstance(alpha, six.integer_types)
# Initialize with 0 in the log space.
log_a = -np.inf
for i in range(alpha + 1):
log_coef_i = (
math.log(special.binom(alpha, i)) + i * math.log(q) +
(alpha - i) * math.log(1 - q))
s = log_coef_i + (i * i - i) / (2 * (sigma**2))
log_a = _log_add(log_a, s)
return float(log_a)
def _compute_log_a_frac(q, sigma, alpha):
"""Compute log(A_alpha) for fractional alpha. 0 < q < 1."""
# The two parts of A_alpha, integrals over (-inf,z0] and [z0, +inf), are
# initialized to 0 in the log space:
log_a0, log_a1 = -np.inf, -np.inf
i = 0
z0 = sigma**2 * math.log(1 / q - 1) + .5
while True: # do ... until loop
coef = special.binom(alpha, i)
log_coef = math.log(abs(coef))
j = alpha - i
log_t0 = log_coef + i * math.log(q) + j * math.log(1 - q)
log_t1 = log_coef + j * math.log(q) + i * math.log(1 - q)
log_e0 = math.log(.5) + _log_erfc((i - z0) / (math.sqrt(2) * sigma))
log_e1 = math.log(.5) + _log_erfc((z0 - j) / (math.sqrt(2) * sigma))
log_s0 = log_t0 + (i * i - i) / (2 * (sigma**2)) + log_e0
log_s1 = log_t1 + (j * j - j) / (2 * (sigma**2)) + log_e1
if coef > 0:
log_a0 = _log_add(log_a0, log_s0)
log_a1 = _log_add(log_a1, log_s1)
else:
log_a0 = _log_sub(log_a0, log_s0)
log_a1 = _log_sub(log_a1, log_s1)
i += 1
if max(log_s0, log_s1) < -30:
break
return _log_add(log_a0, log_a1)
def _compute_log_a(q, sigma, alpha):
"""Compute log(A_alpha) for any positive finite alpha."""
if float(alpha).is_integer():
return _compute_log_a_int(q, sigma, int(alpha))
else:
return _compute_log_a_frac(q, sigma, alpha)
def _log_erfc(x):
"""Compute log(erfc(x)) with high accuracy for large x."""
try:
return math.log(2) + special.log_ndtr(-x * 2**.5)
except NameError:
# If log_ndtr is not available, approximate as follows:
r = special.erfc(x)
if r == 0.0:
# Using the Laurent series at infinity for the tail of the erfc function:
# erfc(x) ~ exp(-x^2-.5/x^2+.625/x^4)/(x*pi^.5)
# To verify in Mathematica:
# Series[Log[Erfc[x]] + Log[x] + Log[Pi]/2 + x^2, {x, Infinity, 6}]
return (-math.log(math.pi) / 2 - math.log(x) - x**2 - .5 * x**-2 +
.625 * x**-4 - 37. / 24. * x**-6 + 353. / 64. * x**-8)
else:
return math.log(r)
def _compute_delta(orders, rdp, eps):
"""Compute delta given a list of RDP values and target epsilon.
Args:
orders: An array (or a scalar) of orders.
rdp: A list (or a scalar) of RDP guarantees.
eps: The target epsilon.
Returns:
Pair of (delta, optimal_order).
Raises:
ValueError: If input is malformed.
"""
orders_vec = np.atleast_1d(orders)
rdp_vec = np.atleast_1d(rdp)
if len(orders_vec) != len(rdp_vec):
raise ValueError("Input lists must have the same length.")
deltas = np.exp((rdp_vec - eps) * (orders_vec - 1))
idx_opt = np.argmin(deltas)
return min(deltas[idx_opt], 1.), orders_vec[idx_opt]
def _compute_eps(orders, rdp, delta):
"""Compute epsilon given a list of RDP values and target delta.
Args:
orders: An array (or a scalar) of orders.
rdp: A list (or a scalar) of RDP guarantees.
delta: The target delta.
Returns:
Pair of (eps, optimal_order).
Raises:
ValueError: If input is malformed.
"""
orders_vec = np.atleast_1d(orders)
rdp_vec = np.atleast_1d(rdp)
if len(orders_vec) != len(rdp_vec):
raise ValueError("Input lists must have the same length.")
eps = rdp_vec - math.log(delta) / (orders_vec - 1)
idx_opt = np.nanargmin(eps) # Ignore NaNs
return eps[idx_opt], orders_vec[idx_opt]
def _compute_rdp(q, sigma, alpha):
"""Compute RDP of the Sampled Gaussian mechanism at order alpha.
Args:
q: The sampling rate.
sigma: The std of the additive Gaussian noise.
alpha: The order at which RDP is computed.
Returns:
RDP at alpha, can be np.inf.
"""
if q == 0:
return 0
if q == 1.:
return alpha / (2 * sigma**2)
if np.isinf(alpha):
return np.inf
return _compute_log_a(q, sigma, alpha) / (alpha - 1)
def compute_rdp(q, noise_multiplier, steps, orders):
"""Compute RDP of the Sampled Gaussian Mechanism.
Args:
q: The sampling rate.
noise_multiplier: The ratio of the standard deviation of the Gaussian noise
to the l2-sensitivity of the function to which it is added.
steps: The number of steps.
orders: An array (or a scalar) of RDP orders.
Returns:
The RDPs at all orders, can be np.inf.
"""
if np.isscalar(orders):
rdp = _compute_rdp(q, noise_multiplier, orders)
else:
rdp = np.array([_compute_rdp(q, noise_multiplier, order)
for order in orders])
return rdp * steps
def get_privacy_spent(orders, rdp, target_eps=None, target_delta=None):
"""Compute delta (or eps) for given eps (or delta) from RDP values.
Args:
orders: An array (or a scalar) of RDP orders.
rdp: An array of RDP values. Must be of the same length as the orders list.
target_eps: If not None, the epsilon for which we compute the corresponding
delta.
target_delta: If not None, the delta for which we compute the corresponding
epsilon. Exactly one of target_eps and target_delta must be None.
Returns:
eps, delta, opt_order.
Raises:
ValueError: If target_eps and target_delta are messed up.
"""
if target_eps is None and target_delta is None:
raise ValueError(
"Exactly one out of eps and delta must be None. (Both are).")
if target_eps is not None and target_delta is not None:
raise ValueError(
"Exactly one out of eps and delta must be None. (None is).")
if target_eps is not None:
delta, opt_order = _compute_delta(orders, rdp, target_eps)
return target_eps, delta, opt_order
else:
eps, opt_order = _compute_eps(orders, rdp, target_delta)
return eps, target_delta, opt_order
def compute_rdp_from_ledger(ledger, orders):
"""Compute RDP of Sampled Gaussian Mechanism from ledger.
Args:
ledger: A formatted privacy ledger.
orders: An array (or a scalar) of RDP orders.
Returns:
RDP at all orders, can be np.inf.
"""
total_rdp = np.zeros_like(orders, dtype=float)
for sample in ledger:
# Compute equivalent z from l2_clip_bounds and noise stddevs in sample.
# See https://arxiv.org/pdf/1812.06210.pdf for derivation of this formula.
effective_z = sum([
(q.noise_stddev / q.l2_norm_bound)**-2 for q in sample.queries])**-0.5
total_rdp += compute_rdp(
sample.selection_probability, effective_z, 1, orders)
return total_rdp