265 lines
8 KiB
Python
265 lines
8 KiB
Python
|
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
# =============================================================================
|
||
|
|
||
|
"""collections of helper function to run and compare different algorithms"""
|
||
|
|
||
|
# pylint: skip-file
|
||
|
# pyformat: disable
|
||
|
|
||
|
import argparse
|
||
|
import json
|
||
|
from dataset_loader import Mydatasets
|
||
|
from my_logistic_regression import MyLogisticRegression
|
||
|
import numpy as np
|
||
|
from opt_algs import CompareAlgs, DoubleNoiseMech, gd_priv, private_newton
|
||
|
from scipy.optimize import fsolve
|
||
|
|
||
|
|
||
|
def zcdp_to_eps(rho, delta):
|
||
|
""" "
|
||
|
|
||
|
conversion of zcdp gurantee to (eps,delta)-DP using the formula in Lemma 3.6
|
||
|
of [BS16]
|
||
|
rho : zCDP
|
||
|
delta: delta in DP
|
||
|
|
||
|
return eps
|
||
|
"""
|
||
|
return rho + np.sqrt(4 * rho * np.log(np.sqrt(np.pi * rho) / delta))
|
||
|
|
||
|
|
||
|
def eps_to_zcdp(eps, delta):
|
||
|
""" "
|
||
|
|
||
|
conversion of (eps,delta) gurantee to rho-zCDP
|
||
|
eps : eps in DP
|
||
|
delta: delta in DP
|
||
|
|
||
|
return rho
|
||
|
"""
|
||
|
|
||
|
def func_root(rho_zcdp):
|
||
|
return zcdp_to_eps(rho_zcdp, delta) - eps
|
||
|
|
||
|
root = fsolve(func_root, x0=0.001)[-1]
|
||
|
return root
|
||
|
|
||
|
|
||
|
def helper_fun(datasetname, alg_type, params_exp):
|
||
|
"""helper function for running different algorithms
|
||
|
|
||
|
args:
|
||
|
datasetname = dataset
|
||
|
alg_type = type of the optimization algorithm
|
||
|
params_exp = hyperparameters
|
||
|
"""
|
||
|
feature_vecs, labels, w_opt = getattr(Mydatasets(), datasetname)()
|
||
|
privacy_dp = params_exp["total"]
|
||
|
params_exp["total"] = eps_to_zcdp(privacy_dp, (1.0 / len(labels)) ** 2)
|
||
|
log_reg = MyLogisticRegression(feature_vecs, labels)
|
||
|
alg_dict, filename_params = prepare_alg_dict(
|
||
|
alg_type, datasetname, privacy_dp, params_exp, log_reg
|
||
|
)
|
||
|
compare_algs = CompareAlgs(log_reg, w_opt, params_exp)
|
||
|
result = RunReleaseStats(compare_algs, alg_dict).summarize_stats()
|
||
|
result["num-samples"] = len(labels)
|
||
|
with open(
|
||
|
"src/results/" + filename_params, "w", encoding="utf8"
|
||
|
) as json_file:
|
||
|
json.dump(result, json_file)
|
||
|
|
||
|
|
||
|
def prepare_alg_dict(alg_type, datasetname, privacy_dp, params_exp, log_reg):
|
||
|
"""prepare update rule for algorithms and filename"""
|
||
|
alg_dict = None
|
||
|
filename_params = None
|
||
|
if alg_type == "double_noise":
|
||
|
filename_params = (
|
||
|
"so_"
|
||
|
+ datasetname
|
||
|
+ "_"
|
||
|
+ str(privacy_dp)
|
||
|
+ "_"
|
||
|
+ "DP"
|
||
|
+ "_"
|
||
|
+ str(params_exp["num_iteration"])
|
||
|
+ "_"
|
||
|
+ str(params_exp["grad_frac"])
|
||
|
+ "_"
|
||
|
+ str(params_exp["trace_frac"])
|
||
|
+ "_"
|
||
|
+ str(params_exp["trace_coeff"])
|
||
|
+ ".txt"
|
||
|
)
|
||
|
dnm_hess_add = DoubleNoiseMech(
|
||
|
log_reg, type_reg="add", curvature_info="hessian"
|
||
|
).update_rule
|
||
|
dnm_ub_add = DoubleNoiseMech(
|
||
|
log_reg, type_reg="add", curvature_info="ub"
|
||
|
).update_rule
|
||
|
dnm_hess_clip = DoubleNoiseMech(
|
||
|
log_reg, type_reg="clip", curvature_info="hessian"
|
||
|
).update_rule
|
||
|
dnm_ub_clip = DoubleNoiseMech(
|
||
|
log_reg, type_reg="clip", curvature_info="ub"
|
||
|
).update_rule
|
||
|
alg_dict = {
|
||
|
"DN-Hess-add": dnm_hess_add,
|
||
|
"DN-Hess-clip": dnm_hess_clip,
|
||
|
"DN-UB-clip": dnm_ub_clip,
|
||
|
"DN-UB-add": dnm_ub_add,
|
||
|
}
|
||
|
elif alg_type == "dp_gd":
|
||
|
filename_params = (
|
||
|
"gd_"
|
||
|
+ datasetname
|
||
|
+ "_"
|
||
|
+ str(privacy_dp)
|
||
|
+ "_"
|
||
|
+ "DP"
|
||
|
+ "_"
|
||
|
+ str(params_exp["num_iteration"])
|
||
|
+ ".txt"
|
||
|
)
|
||
|
alg_dict = {"DPGD": gd_priv}
|
||
|
elif alg_type == "damped_newton":
|
||
|
filename_params = (
|
||
|
"newton_"
|
||
|
+ datasetname
|
||
|
+ "_"
|
||
|
+ str(privacy_dp)
|
||
|
+ "_"
|
||
|
+ "DP"
|
||
|
+ "_"
|
||
|
+ str(params_exp["num_iteration"])
|
||
|
+ ".txt"
|
||
|
)
|
||
|
alg_dict = {"private-newton": private_newton}
|
||
|
return alg_dict, filename_params
|
||
|
|
||
|
|
||
|
class RunReleaseStats:
|
||
|
"""Helpfer function to run different algorithms and store the results"""
|
||
|
|
||
|
def __init__(self, compare_algs, algs_dict, num_rep=10):
|
||
|
self.compare_algs = compare_algs
|
||
|
self.algs_dict = algs_dict
|
||
|
self.num_rep = num_rep
|
||
|
self.losses = 0
|
||
|
self.gradnorm = 0
|
||
|
self.accuracy = 0
|
||
|
self.wall_clock = 0
|
||
|
|
||
|
def run_algs(self):
|
||
|
"""method to run different algorithms and store different stats"""
|
||
|
for rep in range(self.num_rep):
|
||
|
for alg_name, alg_update_rule in self.algs_dict.items():
|
||
|
self.compare_algs.add_algo(alg_update_rule, alg_name)
|
||
|
losses_dict = self.compare_algs.loss_vals()
|
||
|
gradnorm_dict = self.compare_algs.gradnorm_vals()
|
||
|
accuracy_dict = self.compare_algs.accuracy_vals()
|
||
|
wall_clock_dict = self.compare_algs.wall_clock_alg()
|
||
|
if rep == 0:
|
||
|
self.losses = losses_dict
|
||
|
self.gradnorm = gradnorm_dict
|
||
|
self.accuracy = accuracy_dict
|
||
|
self.wall_clock = wall_clock_dict
|
||
|
else:
|
||
|
for alg in self.losses:
|
||
|
self.losses[alg].extend(losses_dict[alg])
|
||
|
self.gradnorm[alg].extend(gradnorm_dict[alg])
|
||
|
self.accuracy[alg].extend(accuracy_dict[alg])
|
||
|
self.wall_clock[alg].extend(wall_clock_dict[alg])
|
||
|
|
||
|
def summarize_stats(self):
|
||
|
"""method to summarize the results"""
|
||
|
self.run_algs()
|
||
|
result = {}
|
||
|
result["acc-best"] = self.compare_algs.accuracy_np().tolist()
|
||
|
for alg in self.losses:
|
||
|
result[alg] = {}
|
||
|
loss_avg = np.mean(np.array(self.losses[alg]), axis=0)
|
||
|
loss_std = np.std(np.array(self.losses[alg]), axis=0)
|
||
|
result[alg]["loss_avg"] = (loss_avg).tolist()
|
||
|
result[alg]["loss_std"] = (loss_std / np.sqrt(self.num_rep)).tolist()
|
||
|
gradnorm_avg = np.mean(np.array(self.gradnorm[alg]), axis=0)
|
||
|
gradnorm_std = np.std(np.array(self.gradnorm[alg]), axis=0)
|
||
|
result[alg]["gradnorm_avg"] = (gradnorm_avg).tolist()
|
||
|
result[alg]["gradnorm_std"] = (gradnorm_std).tolist()
|
||
|
acc_avg = np.mean(np.array(self.accuracy[alg]), axis=0)
|
||
|
acc_std = np.std(np.array(self.accuracy[alg]), axis=0)
|
||
|
result[alg]["acc_avg"] = (acc_avg).tolist()
|
||
|
result[alg]["acc_std"] = (acc_std / np.sqrt(self.num_rep)).tolist()
|
||
|
clocktime_avg = np.mean(np.array(self.wall_clock[alg]), axis=0)
|
||
|
clocktime_std = np.std(np.array(self.wall_clock[alg]), axis=0)
|
||
|
result[alg]["clock_time_avg"] = (clocktime_avg).tolist()
|
||
|
result[alg]["clock_time_std"] = (
|
||
|
clocktime_std / np.sqrt(self.num_rep)
|
||
|
).tolist()
|
||
|
|
||
|
return result
|
||
|
|
||
|
|
||
|
def main():
|
||
|
"""main function"""
|
||
|
parser = argparse.ArgumentParser()
|
||
|
parser.add_argument("--datasetname")
|
||
|
parser.add_argument("--alg_type")
|
||
|
parser.add_argument("--total")
|
||
|
parser.add_argument("--numiter")
|
||
|
# double noise and newton
|
||
|
parser.add_argument("--grad_frac")
|
||
|
parser.add_argument("--trace_frac")
|
||
|
parser.add_argument("--trace_coeff")
|
||
|
args = parser.parse_args()
|
||
|
datasetname = args.datasetname
|
||
|
alg_type = args.alg_type
|
||
|
total = float(args.total)
|
||
|
num_iter = int(args.numiter)
|
||
|
if alg_type == "double_noise":
|
||
|
grad_frac = float(args.grad_frac)
|
||
|
trace_frac = float(args.trace_frac)
|
||
|
trace_coeff = float(args.trace_coeff)
|
||
|
hyper_parameters = {
|
||
|
"total": total,
|
||
|
"grad_frac": grad_frac,
|
||
|
"trace_frac": trace_frac,
|
||
|
"trace_coeff": trace_coeff,
|
||
|
"num_iteration": num_iter,
|
||
|
}
|
||
|
elif alg_type == "dp_gd":
|
||
|
hyper_parameters = {"total": total, "num_iteration": num_iter}
|
||
|
elif alg_type == "damped_newton":
|
||
|
grad_frac = float(args.grad_frac)
|
||
|
hyper_parameters = {
|
||
|
"total": total,
|
||
|
"num_iteration": num_iter,
|
||
|
"grad_frac": grad_frac,
|
||
|
}
|
||
|
else:
|
||
|
raise ValueError("no such optmization algorithm exists")
|
||
|
print(
|
||
|
"optimization algorithm "
|
||
|
+ alg_type
|
||
|
+ ","
|
||
|
+ "dataset name: "
|
||
|
+ datasetname
|
||
|
)
|
||
|
helper_fun(datasetname, alg_type, hyper_parameters)
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|