Normalize scipy imports in TensorFlow Privacy to be more friendly with strict dependencies and lint.

PiperOrigin-RevId: 424649853
This commit is contained in:
Michael Reneer 2022-01-27 10:32:47 -08:00 committed by A. Unique TensorFlower
parent 47b439e376
commit cfb1b881d8
7 changed files with 28 additions and 24 deletions

View file

@ -21,7 +21,7 @@ from __future__ import print_function
import math import math
from absl import app from absl import app
from scipy.optimize import bisect from scipy import optimize
from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top
from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent
@ -69,7 +69,7 @@ def compute_noise(n, batch_size, target_epsilon, epochs, delta, noise_lbd):
guess_epsilon = apply_dp_sgd_analysis(q, noise, steps, orders, delta)[0] guess_epsilon = apply_dp_sgd_analysis(q, noise, steps, orders, delta)[0]
return guess_epsilon - target_epsilon return guess_epsilon - target_epsilon
target_noise = bisect(epsilon_fn, min_noise, max_noise) target_noise = optimize.bisect(epsilon_fn, min_noise, max_noise)
print( print(
'DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated' 'DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated'
' over {} steps satisfies'.format(100 * q, target_noise, steps), ' over {} steps satisfies'.format(100 * q, target_noise, steps),

View file

@ -21,7 +21,7 @@ subsampling).
import numpy as np import numpy as np
from scipy import optimize from scipy import optimize
from scipy.stats import norm from scipy import stats
def compute_mu_uniform(epoch, noise_multi, n, batch_size): def compute_mu_uniform(epoch, noise_multi, n, batch_size):
@ -30,8 +30,8 @@ def compute_mu_uniform(epoch, noise_multi, n, batch_size):
t = epoch * n / batch_size t = epoch * n / batch_size
c = batch_size * np.sqrt(t) / n c = batch_size * np.sqrt(t) / n
return np.sqrt(2) * c * np.sqrt( return np.sqrt(2) * c * np.sqrt(
np.exp(noise_multi**(-2)) * norm.cdf(1.5 / noise_multi) + np.exp(noise_multi**(-2)) * stats.norm.cdf(1.5 / noise_multi) +
3 * norm.cdf(-0.5 / noise_multi) - 2) 3 * stats.norm.cdf(-0.5 / noise_multi) - 2)
def compute_mu_poisson(epoch, noise_multi, n, batch_size): def compute_mu_poisson(epoch, noise_multi, n, batch_size):
@ -43,8 +43,8 @@ def compute_mu_poisson(epoch, noise_multi, n, batch_size):
def delta_eps_mu(eps, mu): def delta_eps_mu(eps, mu):
"""Compute dual between mu-GDP and (epsilon, delta)-DP.""" """Compute dual between mu-GDP and (epsilon, delta)-DP."""
return norm.cdf(-eps / mu + return stats.norm.cdf(-eps / mu + mu /
mu / 2) - np.exp(eps) * norm.cdf(-eps / mu - mu / 2) 2) - np.exp(eps) * stats.norm.cdf(-eps / mu - mu / 2)
def eps_from_mu(mu, delta): def eps_from_mu(mu, delta):

View file

@ -22,7 +22,7 @@ import dataclasses
from typing import Iterator, List from typing import Iterator, List
import numpy as np import numpy as np
from scipy.stats import rankdata from scipy import stats
from sklearn import metrics from sklearn import metrics
from sklearn import model_selection from sklearn import model_selection
import tensorflow as tf import tensorflow as tf
@ -205,7 +205,7 @@ def _get_ranks_for_sequence(logits: np.ndarray,
""" """
sequence_ranks = [] sequence_ranks = []
for logit, label in zip(logits, labels.astype(int)): for logit, label in zip(logits, labels.astype(int)):
rank = rankdata(-logit, method='min')[label] - 1.0 rank = stats.rankdata(-logit, method='min')[label] - 1.0
sequence_ranks.append(rank) sequence_ranks.append(rank)
return sequence_ranks return sequence_ranks

View file

@ -15,7 +15,7 @@
"""Utility functions for membership inference attacks.""" """Utility functions for membership inference attacks."""
import numpy as np import numpy as np
import scipy.special from scipy import special
def log_loss(labels: np.ndarray, pred: np.ndarray, small_value=1e-8): def log_loss(labels: np.ndarray, pred: np.ndarray, small_value=1e-8):
@ -37,4 +37,4 @@ def log_loss(labels: np.ndarray, pred: np.ndarray, small_value=1e-8):
def log_loss_from_logits(labels: np.ndarray, logits: np.ndarray): def log_loss_from_logits(labels: np.ndarray, logits: np.ndarray):
"""Compute the cross entropy loss from logits.""" """Compute the cross entropy loss from logits."""
return log_loss(labels, scipy.special.softmax(logits, axis=-1)) return log_loss(labels, special.softmax(logits, axis=-1))

View file

@ -11,12 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Measuring exposure for secret sharer attack.""" """Measuring exposure for secret sharer attack."""
from typing import Dict, List from typing import Dict, List
import numpy as np import numpy as np
from scipy.stats import skewnorm from scipy import stats
def compute_exposure_interpolation( def compute_exposure_interpolation(
@ -72,9 +72,11 @@ def compute_exposure_extrapolation(
The exposure of every secret measured using extrapolation The exposure of every secret measured using extrapolation
""" """
# Fit a skew normal distribution using the perplexities of the references # Fit a skew normal distribution using the perplexities of the references
snormal_param = skewnorm.fit(perplexities_reference) snormal_param = stats.skewnorm.fit(perplexities_reference)
# Estimate exposure using the fitted distribution # Estimate exposure using the fitted distribution
exposures = {r: -np.log2(skewnorm.cdf(perplexities[r], *snormal_param)) exposures = {
for r in perplexities.keys()} r: -np.log2(stats.skewnorm.cdf(perplexities[r], *snormal_param))
for r in perplexities.keys()
}
return exposures return exposures

View file

@ -14,7 +14,7 @@
from absl.testing import absltest from absl.testing import absltest
import numpy as np import numpy as np
from scipy.stats import skewnorm from scipy import stats
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_extrapolation from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_extrapolation
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_interpolation from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_interpolation
@ -49,18 +49,20 @@ class UtilsTest(absltest.TestCase):
def test_exposure_extrapolation(self): def test_exposure_extrapolation(self):
parameters = (4, 0, 1) parameters = (4, 0, 1)
perplexities = {1: skewnorm.rvs(*parameters, size=(2,)), perplexities = {
10: skewnorm.rvs(*parameters, size=(5,))} 1: stats.skewnorm.rvs(*parameters, size=(2,)),
perplexities_reference = skewnorm.rvs(*parameters, size=(10000,)) 10: stats.skewnorm.rvs(*parameters, size=(5,))
}
perplexities_reference = stats.skewnorm.rvs(*parameters, size=(10000,))
exposures = compute_exposure_extrapolation(perplexities, exposures = compute_exposure_extrapolation(perplexities,
perplexities_reference) perplexities_reference)
fitted_parameters = skewnorm.fit(perplexities_reference) fitted_parameters = stats.skewnorm.fit(perplexities_reference)
self.assertEqual(exposures.keys(), perplexities.keys()) self.assertEqual(exposures.keys(), perplexities.keys())
for r in exposures.keys(): for r in exposures.keys():
np.testing.assert_almost_equal( np.testing.assert_almost_equal(
exposures[r], exposures[r],
-np.log2(skewnorm.cdf(perplexities[r], *fitted_parameters))) -np.log2(stats.skewnorm.cdf(perplexities[r], *fitted_parameters)))
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -23,7 +23,7 @@ from absl import flags
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from scipy.stats import rankdata from scipy import stats
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import tensorflow as tf import tensorflow as tf
@ -148,7 +148,7 @@ def load_movielens():
print('number of user: ', n_users) print('number of user: ', n_users)
# give unique dense movie index to movieId # give unique dense movie index to movieId
data['movieIndex'] = rankdata(data['movieId'], method='dense') data['movieIndex'] = stats.rankdata(data['movieId'], method='dense')
# minus one to reduce the minimum value to 0, which is the start of col index # minus one to reduce the minimum value to 0, which is the start of col index
print('number of ratings:', data.shape[0]) print('number of ratings:', data.shape[0])