From cfb1b881d84da0ee141a1e8682ee20460521f381 Mon Sep 17 00:00:00 2001 From: Michael Reneer Date: Thu, 27 Jan 2022 10:32:47 -0800 Subject: [PATCH] Normalize `scipy` imports in TensorFlow Privacy to be more friendly with strict dependencies and lint. PiperOrigin-RevId: 424649853 --- .../analysis/compute_noise_from_budget_lib.py | 4 ++-- .../privacy/analysis/gdp_accountant.py | 10 +++++----- .../membership_inference_attack/seq2seq_mia.py | 4 ++-- .../membership_inference_attack/utils.py | 4 ++-- .../privacy_tests/secret_sharer/exposures.py | 12 +++++++----- .../privacy_tests/secret_sharer/exposures_test.py | 14 ++++++++------ tutorials/movielens_tutorial.py | 4 ++-- 7 files changed, 28 insertions(+), 24 deletions(-) diff --git a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py index ccf1f1c..81fb06a 100644 --- a/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py +++ b/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py @@ -21,7 +21,7 @@ from __future__ import print_function import math from absl import app -from scipy.optimize import bisect +from scipy import optimize from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent @@ -69,7 +69,7 @@ def compute_noise(n, batch_size, target_epsilon, epochs, delta, noise_lbd): guess_epsilon = apply_dp_sgd_analysis(q, noise, steps, orders, delta)[0] return guess_epsilon - target_epsilon - target_noise = bisect(epsilon_fn, min_noise, max_noise) + target_noise = optimize.bisect(epsilon_fn, min_noise, max_noise) print( 'DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated' ' over {} steps satisfies'.format(100 * q, target_noise, steps), diff --git a/tensorflow_privacy/privacy/analysis/gdp_accountant.py b/tensorflow_privacy/privacy/analysis/gdp_accountant.py index b5dc193..a3674dc 100644 --- a/tensorflow_privacy/privacy/analysis/gdp_accountant.py +++ b/tensorflow_privacy/privacy/analysis/gdp_accountant.py @@ -21,7 +21,7 @@ subsampling). import numpy as np from scipy import optimize -from scipy.stats import norm +from scipy import stats def compute_mu_uniform(epoch, noise_multi, n, batch_size): @@ -30,8 +30,8 @@ def compute_mu_uniform(epoch, noise_multi, n, batch_size): t = epoch * n / batch_size c = batch_size * np.sqrt(t) / n return np.sqrt(2) * c * np.sqrt( - np.exp(noise_multi**(-2)) * norm.cdf(1.5 / noise_multi) + - 3 * norm.cdf(-0.5 / noise_multi) - 2) + np.exp(noise_multi**(-2)) * stats.norm.cdf(1.5 / noise_multi) + + 3 * stats.norm.cdf(-0.5 / noise_multi) - 2) def compute_mu_poisson(epoch, noise_multi, n, batch_size): @@ -43,8 +43,8 @@ def compute_mu_poisson(epoch, noise_multi, n, batch_size): def delta_eps_mu(eps, mu): """Compute dual between mu-GDP and (epsilon, delta)-DP.""" - return norm.cdf(-eps / mu + - mu / 2) - np.exp(eps) * norm.cdf(-eps / mu - mu / 2) + return stats.norm.cdf(-eps / mu + mu / + 2) - np.exp(eps) * stats.norm.cdf(-eps / mu - mu / 2) def eps_from_mu(mu, delta): diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/seq2seq_mia.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/seq2seq_mia.py index e4c97dd..1eef53c 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/seq2seq_mia.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/seq2seq_mia.py @@ -22,7 +22,7 @@ import dataclasses from typing import Iterator, List import numpy as np -from scipy.stats import rankdata +from scipy import stats from sklearn import metrics from sklearn import model_selection import tensorflow as tf @@ -205,7 +205,7 @@ def _get_ranks_for_sequence(logits: np.ndarray, """ sequence_ranks = [] for logit, label in zip(logits, labels.astype(int)): - rank = rankdata(-logit, method='min')[label] - 1.0 + rank = stats.rankdata(-logit, method='min')[label] - 1.0 sequence_ranks.append(rank) return sequence_ranks diff --git a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils.py b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils.py index a29081e..ceaefe4 100644 --- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils.py +++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils.py @@ -15,7 +15,7 @@ """Utility functions for membership inference attacks.""" import numpy as np -import scipy.special +from scipy import special def log_loss(labels: np.ndarray, pred: np.ndarray, small_value=1e-8): @@ -37,4 +37,4 @@ def log_loss(labels: np.ndarray, pred: np.ndarray, small_value=1e-8): def log_loss_from_logits(labels: np.ndarray, logits: np.ndarray): """Compute the cross entropy loss from logits.""" - return log_loss(labels, scipy.special.softmax(logits, axis=-1)) + return log_loss(labels, special.softmax(logits, axis=-1)) diff --git a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py index 0ad9867..8ec45e5 100644 --- a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py +++ b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py @@ -11,12 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Measuring exposure for secret sharer attack.""" from typing import Dict, List + import numpy as np -from scipy.stats import skewnorm +from scipy import stats def compute_exposure_interpolation( @@ -72,9 +72,11 @@ def compute_exposure_extrapolation( The exposure of every secret measured using extrapolation """ # Fit a skew normal distribution using the perplexities of the references - snormal_param = skewnorm.fit(perplexities_reference) + snormal_param = stats.skewnorm.fit(perplexities_reference) # Estimate exposure using the fitted distribution - exposures = {r: -np.log2(skewnorm.cdf(perplexities[r], *snormal_param)) - for r in perplexities.keys()} + exposures = { + r: -np.log2(stats.skewnorm.cdf(perplexities[r], *snormal_param)) + for r in perplexities.keys() + } return exposures diff --git a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py index 689be7f..bc44a8a 100644 --- a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py @@ -14,7 +14,7 @@ from absl.testing import absltest import numpy as np -from scipy.stats import skewnorm +from scipy import stats from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_extrapolation from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_interpolation @@ -49,18 +49,20 @@ class UtilsTest(absltest.TestCase): def test_exposure_extrapolation(self): parameters = (4, 0, 1) - perplexities = {1: skewnorm.rvs(*parameters, size=(2,)), - 10: skewnorm.rvs(*parameters, size=(5,))} - perplexities_reference = skewnorm.rvs(*parameters, size=(10000,)) + perplexities = { + 1: stats.skewnorm.rvs(*parameters, size=(2,)), + 10: stats.skewnorm.rvs(*parameters, size=(5,)) + } + perplexities_reference = stats.skewnorm.rvs(*parameters, size=(10000,)) exposures = compute_exposure_extrapolation(perplexities, perplexities_reference) - fitted_parameters = skewnorm.fit(perplexities_reference) + fitted_parameters = stats.skewnorm.fit(perplexities_reference) self.assertEqual(exposures.keys(), perplexities.keys()) for r in exposures.keys(): np.testing.assert_almost_equal( exposures[r], - -np.log2(skewnorm.cdf(perplexities[r], *fitted_parameters))) + -np.log2(stats.skewnorm.cdf(perplexities[r], *fitted_parameters))) if __name__ == '__main__': diff --git a/tutorials/movielens_tutorial.py b/tutorials/movielens_tutorial.py index 192db76..5cd9cbb 100644 --- a/tutorials/movielens_tutorial.py +++ b/tutorials/movielens_tutorial.py @@ -23,7 +23,7 @@ from absl import flags import numpy as np import pandas as pd -from scipy.stats import rankdata +from scipy import stats from sklearn.model_selection import train_test_split import tensorflow as tf @@ -148,7 +148,7 @@ def load_movielens(): print('number of user: ', n_users) # give unique dense movie index to movieId - data['movieIndex'] = rankdata(data['movieId'], method='dense') + data['movieIndex'] = stats.rankdata(data['movieId'], method='dense') # minus one to reduce the minimum value to 0, which is the start of col index print('number of ratings:', data.shape[0])