# Copyright 2022, The TensorFlow Privacy Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Code for reproducing Figure 7 of paper.""" import math import matplotlib.pyplot as plt import numpy as np import rdp_accountant # pylint: disable=bare-except # pylint: disable=g-import-not-at-top # pylint: disable=g-multiple-import # pylint: disable=missing-function-docstring # pylint: disable=redefined-outer-name #################################################### # This file loads default values to reproduce # figure 7 from the paper. If you'd like to # provide your own value, modify the variables # in the if statement controlled by this variable. #################################################### load_values_to_reproduce_paper_fig = True def repeat_logarithmic_rdp(orders, rdp, gamma): n = len(orders) assert len(rdp) == n assert min(orders) >= 1 rdp_out = [None] * n for i in range(n): if orders[i] == 1: continue # unfortunately the formula doesn't work in this case for j in range(n): # Compute (orders[i],eps)-RDP bound on A_gamma given that Q satisfies # (orders[i],rdp[i])-RDP and (orders[j],rdp[j])-RDP eps = rdp[i] + ( 1 - 1 / orders[j]) * rdp[j] + math.log(1 / gamma - 1) / orders[j] + ( math.log(1 / gamma - 1) - math.log(math.log(1 / gamma))) / ( orders[i] - 1) if rdp_out[i] is None or eps < rdp_out[i]: rdp_out[i] = eps return rdp_out def repeat_geometric_rdp(orders, rdp, gamma): n = len(orders) assert len(rdp) == n assert min(orders) >= 1 rdp_out = [None] * n for i in range(n): if orders[i] == 1: continue # formula doesn't work in this case for j in range(n): eps = rdp[i] + 2 * (1 - 1 / orders[j]) * rdp[j] + ( 2 / orders[j] + 1 / (orders[i] - 1)) * math.log(1 / gamma) if rdp_out[i] is None or eps < rdp_out[i]: rdp_out[i] = eps return rdp_out def repeat_negativebinomial_rdp(orders, rdp, gamma, eta): n = len(orders) assert len(rdp) == n assert min(orders) >= 1 assert 0 < gamma < 1 assert eta > 0 rdp_out = [None] * n # foo = log(eta/(1-gamma^eta)) foo = math.log(eta) - math.log1p(-math.pow(gamma, eta)) for i in range(n): if orders[i] == 1: continue # forumla doesn't work for lambda=1 for j in range(n): eps = rdp[i] + (1 + eta) * (1 - 1 / orders[j]) * rdp[j] - ( (1 + eta) / orders[j] + 1 / (orders[i] - 1)) * math.log(gamma) + foo / (orders[i] - 1) + ( 1 + eta) * math.log1p(-gamma) / orders[j] if rdp_out[i] is None or eps < rdp_out[i]: rdp_out[i] = eps return rdp_out def repeat_poisson_rdp(orders, rdp, tau): n = len(orders) assert len(rdp) == n assert min(orders) >= 1 rdp_out = [None] * n for i in range(n): if orders[i] == 1: continue # forumula doesn't work with lambda=1 _, delta, _ = rdp_accountant.get_privacy_spent( orders, rdp, target_eps=math.log1p(1 / (orders[i] - 1))) rdp_out[i] = rdp[i] + tau * delta + math.log(tau) / (orders[i] - 1) return rdp_out if load_values_to_reproduce_paper_fig: from figure7_default_values import orders, rdp, lr_acc, num_trials, lr_rates, gammas, non_private_acc else: orders = [] # Complete with the list of orders rdp = [] # Complete with the list of RDP lr_acc = {} # Complete with a dictionary such that keys # are learning rates and values are the # corresponding model's accuracy num_trials = 1000 # num_trials to average results over lr_rates = np.asarray([]) # 1D array of learning rate candidates gammas = np.asarray( []) # 1D array of gamma parameters to the random distributions. non_private_acc = 1. # accuracy of a non-private run (for plotting only) for dist_id in range(4): res_x = np.zeros_like(gammas) res_y = np.zeros_like(res_x) res_y_max = non_private_acc * np.ones_like(res_x) for gamma_id, gamma in enumerate(gammas): expected = (1 / gamma - 1) / np.log(1 / gamma) best_acc_trials = [] for trial in range(num_trials): if dist_id == 0: K = np.random.logseries(1 - gamma) label = 'logarithmic distribution $\\eta=0$' color = 'b' eps = repeat_logarithmic_rdp(orders, rdp, gamma) elif dist_id == 1: if load_values_to_reproduce_paper_fig and gamma < 1e-4: continue K = np.random.geometric(gamma) label = 'geometric distribution $\\eta=1$' color = 'g' eps = repeat_geometric_rdp(orders, rdp, gamma) elif dist_id == 2: if load_values_to_reproduce_paper_fig and gamma < 1e-07: continue eta = 0.5 K = 0 while K == 0: K = np.random.negative_binomial(eta, gamma) label = 'negative binomial $\\eta=0.5$' color = 'k' eps = repeat_negativebinomial_rdp(orders, rdp, gamma, eta) elif dist_id == 3: if load_values_to_reproduce_paper_fig and gamma < 0.0015: continue gamma_factor = 100 K = np.random.poisson(gamma * gamma_factor) label = 'poisson distribution' color = 'm' eps = repeat_poisson_rdp(orders, rdp, gamma * gamma_factor) best_acc = 0. best_lr = -1. for k in range(K): # pick a hyperparam candidate uniformly at random j = np.random.randint(0, len(lr_rates)) lr_candidate = lr_rates[j] try: acc = lr_acc[str(lr_candidate)] except: print('lr - acc pair missing for ' + str(lr_candidate)) acc = 0. if best_acc < acc: best_acc = acc best_lr = lr_candidate best_acc_trials.append(best_acc) try: res_x[gamma_id] = np.min(eps) res_y[gamma_id] = np.mean(best_acc_trials) except: print('skipping ' + str(gamma_id)) if dist_id == 0: plt.hlines( res_y_max[0], xmin=-1., xmax=20., color='r', label='baseline (non-private search)') if dist_id >= 1: res_x = res_x[2:] res_y = res_y[2:] plt.plot(res_x, res_y, label=label, color=color) if load_values_to_reproduce_paper_fig: plt.xlim([0.5, 8.]) plt.ylim([0.85, 0.97]) plt.xlabel('Privacy budget') plt.ylabel('Model Accuracy for Best Hyperparameter') plt.legend(loc='lower right') plt.savefig('rdp_hyper_search.pdf', bbox_inches='tight')