From 438da5a09bf4b4393351fbd86ac0643982d5d1e3 Mon Sep 17 00:00:00 2001 From: Shuang Song Date: Mon, 31 Jan 2022 13:26:57 -0800 Subject: [PATCH] For secret sharer exposures, allow more dictionary key types, and break ties for the same perplexities. Fix a bug in the test. PiperOrigin-RevId: 425446829 --- .../privacy_tests/secret_sharer/exposures.py | 87 ++++++++++--------- .../secret_sharer/exposures_test.py | 46 +++++----- 2 files changed, 72 insertions(+), 61 deletions(-) diff --git a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py index 82ec230..1ad2097 100644 --- a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py +++ b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py @@ -13,74 +13,81 @@ # limitations under the License. """Measuring exposure for secret sharer attack.""" -from typing import Dict, List - +from typing import Iterable, TypeVar, Mapping import numpy as np from scipy import stats +_KT = TypeVar('_KT') + + def compute_exposure_interpolation( - perplexities: Dict[int, List[float]], - perplexities_reference: List[float]) -> Dict[int, List[float]]: - """Get exposure using interpolation. + perplexities: Mapping[_KT, Iterable[float]], + perplexities_reference: Iterable[float]) -> Mapping[_KT, Iterable[float]]: + """Gets exposure using interpolation. Args: - perplexities: a dictionary, key is number of secret repetitions, value is a - list of perplexities - perplexities_reference: a list, perplexities of the random sequences that - did not appear in the training data + perplexities: a `Mapping` where the key is an identifier for the secrets + set, e.g. number of secret repetitions, and the value is an iterable of + perplexities. + perplexities_reference: perplexities of the random sequences that did not + appear in the training data. Returns: The exposure of every secret measured using interpolation (not necessarily - in the same order as the input) + in the same order as the input), keyed in the same way as perplexities. """ - repetitions = list(perplexities.keys()) - # Concatenate all perplexities, including those for references - perplexities_concat = np.concatenate([perplexities[r] for r in repetitions] + - [perplexities_reference]) - # Concatenate the number of repetitions for each secret - repetitions_concat = np.concatenate([[r] * len(perplexities[r]) - for r in repetitions] + - [[0] * len(perplexities_reference)]) + # Get the keys in some fixed order which will be used internally only + # further down. + keys = list(perplexities) + # Concatenate all perplexities, including those from `perplexities_reference`. + # Add another dimension indicating which set the perplexity is from: -1 for + # reference, {0, ..., len(perplexities)} for secrets + perplexities_concat = [(p, -1) for p in perplexities_reference] + for i, k in enumerate(keys): + perplexities_concat.extend((p, i) for p in perplexities[k]) - # Sort the repetition list according to the corresponding perplexity - idx = np.argsort(perplexities_concat) - repetitions_concat = repetitions_concat[idx] + # Get the indices list sorted according to the corresponding perplexity, + # in case of tie, keep the reference before the secret + indices_concat = np.fromiter((i for _, i in sorted(perplexities_concat)), + dtype=int) - # In the sorted repetition list, if there are m examples with repetition 0 - # (does not appear in training) in front of an example, then its rank is - # (m + 1). To get the number of examples with repetition 0 in front of - # any example, we use the cummulative sum of the indicator vecotr - # (repetitions_concat == 0). - cum_sum = np.cumsum(repetitions_concat == 0) - ranks = {r: cum_sum[repetitions_concat == r] + 1 for r in repetitions} + # In the sorted indices list, if there are m examples with index -1 + # (from the reference set) in front of an example, then its rank is + # (m + 1). To get the number of examples with index -1 in front of + # any example, we use the cumulative sum of the indicator vector + # (indices_concat == -1). + cum_sum = np.cumsum(indices_concat == -1) + ranks = {k: cum_sum[indices_concat == i] + 1 for i, k in enumerate(keys)} exposures = { - r: np.log2(len(perplexities_reference)) - np.log2(ranks[r]) - for r in repetitions + k: np.log2(len(list(perplexities_reference))) - np.log2(ranks[k]) + for k in ranks } return exposures def compute_exposure_extrapolation( - perplexities: Dict[int, List[float]], - perplexities_reference: List[float]) -> Dict[int, List[float]]: - """Get exposure using extrapolation. + perplexities: Mapping[_KT, Iterable[float]], + perplexities_reference: Iterable[float]) -> Mapping[_KT, Iterable[float]]: + """Gets exposure using extrapolation. Args: - perplexities: a dictionary, key is number of secret repetitions, value is a - list of perplexities - perplexities_reference: a list, perplexities of the random sequences that - did not appear in the training data + perplexities: a `Mapping` where the key is an identifier for the secrets + set, e.g. number of secret repetitions, and the value is an iterable of + perplexities. + perplexities_reference: perplexities of the random sequences that did not + appear in the training data. Returns: - The exposure of every secret measured using extrapolation + The exposure of every secret measured using extrapolation, keyed in the same + way as perplexities. """ # Fit a skew normal distribution using the perplexities of the references snormal_param = stats.skewnorm.fit(perplexities_reference) # Estimate exposure using the fitted distribution exposures = { - r: -np.log2(stats.skewnorm.cdf(perplexities[r], *snormal_param)) - for r in perplexities.keys() + r: -np.log2(stats.skewnorm.cdf(p, *snormal_param)) + for r, p in perplexities.items() } return exposures diff --git a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py index bc95e51..dc83c6d 100644 --- a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py +++ b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py @@ -15,8 +15,8 @@ from absl.testing import absltest import numpy as np from scipy import stats -from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_extrapolation -from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_interpolation + +from tensorflow_privacy.privacy.privacy_tests.secret_sharer import exposures class UtilsTest(absltest.TestCase): @@ -28,42 +28,46 @@ class UtilsTest(absltest.TestCase): def test_exposure_interpolation(self): """Test exposure by interpolation.""" perplexities = { - 1: [0, 0.1], # smallest perplexities - 2: [20.0], # largest perplexities - 5: [3.5] - } # rank = 4 + '1': [0, 0.1], # smallest perplexities + '2': [20.0], # largest perplexities + '5': [3.5], # rank = 4 + '8': [3.5], # rank = 4 + } perplexities_reference = [float(x) for x in range(1, 17)] - exposures = compute_exposure_interpolation(perplexities, - perplexities_reference) + resulted_exposures = exposures.compute_exposure_interpolation( + perplexities, perplexities_reference) num_perplexities_reference = len(perplexities_reference) exposure_largest = np.log2(num_perplexities_reference) exposure_smallest = np.log2(num_perplexities_reference) - np.log2( num_perplexities_reference + 1) + exposure_rank4 = np.log2(num_perplexities_reference) - np.log2(4) expected_exposures = { - 1: np.array([exposure_largest] * 2), - 2: np.array([exposure_smallest]), - 5: np.array([np.log2(num_perplexities_reference) - np.log2(4)]) + '1': np.array([exposure_largest] * 2), + '2': np.array([exposure_smallest]), + '5': np.array([exposure_rank4]), + '8': np.array([exposure_rank4]) } - self.assertEqual(exposures.keys(), expected_exposures.keys()) - for r in exposures.keys(): - np.testing.assert_almost_equal(exposures[r], exposures[r]) + self.assertEqual(resulted_exposures.keys(), expected_exposures.keys()) + for r in resulted_exposures.keys(): + np.testing.assert_almost_equal(expected_exposures[r], + resulted_exposures[r]) def test_exposure_extrapolation(self): parameters = (4, 0, 1) perplexities = { - 1: stats.skewnorm.rvs(*parameters, size=(2,)), - 10: stats.skewnorm.rvs(*parameters, size=(5,)) + '1': stats.skewnorm.rvs(*parameters, size=(2,)), + '10': stats.skewnorm.rvs(*parameters, size=(5,)) } perplexities_reference = stats.skewnorm.rvs(*parameters, size=(10000,)) - exposures = compute_exposure_extrapolation(perplexities, - perplexities_reference) + resulted_exposures = exposures.compute_exposure_extrapolation( + perplexities, perplexities_reference) fitted_parameters = stats.skewnorm.fit(perplexities_reference) - self.assertEqual(exposures.keys(), perplexities.keys()) - for r in exposures.keys(): + self.assertEqual(resulted_exposures.keys(), perplexities.keys()) + for r in resulted_exposures.keys(): np.testing.assert_almost_equal( - exposures[r], + resulted_exposures[r], -np.log2(stats.skewnorm.cdf(perplexities[r], *fitted_parameters)))