For secret sharer exposures, allow more dictionary key types, and break ties for the same perplexities. Fix a bug in the test.

PiperOrigin-RevId: 425446829
This commit is contained in:
Shuang Song 2022-01-31 13:26:57 -08:00 committed by A. Unique TensorFlower
parent fd242e76b9
commit 438da5a09b
2 changed files with 72 additions and 61 deletions

View file

@ -13,74 +13,81 @@
# limitations under the License. # limitations under the License.
"""Measuring exposure for secret sharer attack.""" """Measuring exposure for secret sharer attack."""
from typing import Dict, List from typing import Iterable, TypeVar, Mapping
import numpy as np import numpy as np
from scipy import stats from scipy import stats
_KT = TypeVar('_KT')
def compute_exposure_interpolation( def compute_exposure_interpolation(
perplexities: Dict[int, List[float]], perplexities: Mapping[_KT, Iterable[float]],
perplexities_reference: List[float]) -> Dict[int, List[float]]: perplexities_reference: Iterable[float]) -> Mapping[_KT, Iterable[float]]:
"""Get exposure using interpolation. """Gets exposure using interpolation.
Args: Args:
perplexities: a dictionary, key is number of secret repetitions, value is a perplexities: a `Mapping` where the key is an identifier for the secrets
list of perplexities set, e.g. number of secret repetitions, and the value is an iterable of
perplexities_reference: a list, perplexities of the random sequences that perplexities.
did not appear in the training data perplexities_reference: perplexities of the random sequences that did not
appear in the training data.
Returns: Returns:
The exposure of every secret measured using interpolation (not necessarily The exposure of every secret measured using interpolation (not necessarily
in the same order as the input) in the same order as the input), keyed in the same way as perplexities.
""" """
repetitions = list(perplexities.keys()) # Get the keys in some fixed order which will be used internally only
# Concatenate all perplexities, including those for references # further down.
perplexities_concat = np.concatenate([perplexities[r] for r in repetitions] + keys = list(perplexities)
[perplexities_reference]) # Concatenate all perplexities, including those from `perplexities_reference`.
# Concatenate the number of repetitions for each secret # Add another dimension indicating which set the perplexity is from: -1 for
repetitions_concat = np.concatenate([[r] * len(perplexities[r]) # reference, {0, ..., len(perplexities)} for secrets
for r in repetitions] + perplexities_concat = [(p, -1) for p in perplexities_reference]
[[0] * len(perplexities_reference)]) for i, k in enumerate(keys):
perplexities_concat.extend((p, i) for p in perplexities[k])
# Sort the repetition list according to the corresponding perplexity # Get the indices list sorted according to the corresponding perplexity,
idx = np.argsort(perplexities_concat) # in case of tie, keep the reference before the secret
repetitions_concat = repetitions_concat[idx] indices_concat = np.fromiter((i for _, i in sorted(perplexities_concat)),
dtype=int)
# In the sorted repetition list, if there are m examples with repetition 0 # In the sorted indices list, if there are m examples with index -1
# (does not appear in training) in front of an example, then its rank is # (from the reference set) in front of an example, then its rank is
# (m + 1). To get the number of examples with repetition 0 in front of # (m + 1). To get the number of examples with index -1 in front of
# any example, we use the cummulative sum of the indicator vecotr # any example, we use the cumulative sum of the indicator vector
# (repetitions_concat == 0). # (indices_concat == -1).
cum_sum = np.cumsum(repetitions_concat == 0) cum_sum = np.cumsum(indices_concat == -1)
ranks = {r: cum_sum[repetitions_concat == r] + 1 for r in repetitions} ranks = {k: cum_sum[indices_concat == i] + 1 for i, k in enumerate(keys)}
exposures = { exposures = {
r: np.log2(len(perplexities_reference)) - np.log2(ranks[r]) k: np.log2(len(list(perplexities_reference))) - np.log2(ranks[k])
for r in repetitions for k in ranks
} }
return exposures return exposures
def compute_exposure_extrapolation( def compute_exposure_extrapolation(
perplexities: Dict[int, List[float]], perplexities: Mapping[_KT, Iterable[float]],
perplexities_reference: List[float]) -> Dict[int, List[float]]: perplexities_reference: Iterable[float]) -> Mapping[_KT, Iterable[float]]:
"""Get exposure using extrapolation. """Gets exposure using extrapolation.
Args: Args:
perplexities: a dictionary, key is number of secret repetitions, value is a perplexities: a `Mapping` where the key is an identifier for the secrets
list of perplexities set, e.g. number of secret repetitions, and the value is an iterable of
perplexities_reference: a list, perplexities of the random sequences that perplexities.
did not appear in the training data perplexities_reference: perplexities of the random sequences that did not
appear in the training data.
Returns: Returns:
The exposure of every secret measured using extrapolation The exposure of every secret measured using extrapolation, keyed in the same
way as perplexities.
""" """
# Fit a skew normal distribution using the perplexities of the references # Fit a skew normal distribution using the perplexities of the references
snormal_param = stats.skewnorm.fit(perplexities_reference) snormal_param = stats.skewnorm.fit(perplexities_reference)
# Estimate exposure using the fitted distribution # Estimate exposure using the fitted distribution
exposures = { exposures = {
r: -np.log2(stats.skewnorm.cdf(perplexities[r], *snormal_param)) r: -np.log2(stats.skewnorm.cdf(p, *snormal_param))
for r in perplexities.keys() for r, p in perplexities.items()
} }
return exposures return exposures

View file

@ -15,8 +15,8 @@
from absl.testing import absltest from absl.testing import absltest
import numpy as np import numpy as np
from scipy import stats from scipy import stats
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_extrapolation
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_interpolation from tensorflow_privacy.privacy.privacy_tests.secret_sharer import exposures
class UtilsTest(absltest.TestCase): class UtilsTest(absltest.TestCase):
@ -28,42 +28,46 @@ class UtilsTest(absltest.TestCase):
def test_exposure_interpolation(self): def test_exposure_interpolation(self):
"""Test exposure by interpolation.""" """Test exposure by interpolation."""
perplexities = { perplexities = {
1: [0, 0.1], # smallest perplexities '1': [0, 0.1], # smallest perplexities
2: [20.0], # largest perplexities '2': [20.0], # largest perplexities
5: [3.5] '5': [3.5], # rank = 4
} # rank = 4 '8': [3.5], # rank = 4
}
perplexities_reference = [float(x) for x in range(1, 17)] perplexities_reference = [float(x) for x in range(1, 17)]
exposures = compute_exposure_interpolation(perplexities, resulted_exposures = exposures.compute_exposure_interpolation(
perplexities_reference) perplexities, perplexities_reference)
num_perplexities_reference = len(perplexities_reference) num_perplexities_reference = len(perplexities_reference)
exposure_largest = np.log2(num_perplexities_reference) exposure_largest = np.log2(num_perplexities_reference)
exposure_smallest = np.log2(num_perplexities_reference) - np.log2( exposure_smallest = np.log2(num_perplexities_reference) - np.log2(
num_perplexities_reference + 1) num_perplexities_reference + 1)
exposure_rank4 = np.log2(num_perplexities_reference) - np.log2(4)
expected_exposures = { expected_exposures = {
1: np.array([exposure_largest] * 2), '1': np.array([exposure_largest] * 2),
2: np.array([exposure_smallest]), '2': np.array([exposure_smallest]),
5: np.array([np.log2(num_perplexities_reference) - np.log2(4)]) '5': np.array([exposure_rank4]),
'8': np.array([exposure_rank4])
} }
self.assertEqual(exposures.keys(), expected_exposures.keys()) self.assertEqual(resulted_exposures.keys(), expected_exposures.keys())
for r in exposures.keys(): for r in resulted_exposures.keys():
np.testing.assert_almost_equal(exposures[r], exposures[r]) np.testing.assert_almost_equal(expected_exposures[r],
resulted_exposures[r])
def test_exposure_extrapolation(self): def test_exposure_extrapolation(self):
parameters = (4, 0, 1) parameters = (4, 0, 1)
perplexities = { perplexities = {
1: stats.skewnorm.rvs(*parameters, size=(2,)), '1': stats.skewnorm.rvs(*parameters, size=(2,)),
10: stats.skewnorm.rvs(*parameters, size=(5,)) '10': stats.skewnorm.rvs(*parameters, size=(5,))
} }
perplexities_reference = stats.skewnorm.rvs(*parameters, size=(10000,)) perplexities_reference = stats.skewnorm.rvs(*parameters, size=(10000,))
exposures = compute_exposure_extrapolation(perplexities, resulted_exposures = exposures.compute_exposure_extrapolation(
perplexities_reference) perplexities, perplexities_reference)
fitted_parameters = stats.skewnorm.fit(perplexities_reference) fitted_parameters = stats.skewnorm.fit(perplexities_reference)
self.assertEqual(exposures.keys(), perplexities.keys()) self.assertEqual(resulted_exposures.keys(), perplexities.keys())
for r in exposures.keys(): for r in resulted_exposures.keys():
np.testing.assert_almost_equal( np.testing.assert_almost_equal(
exposures[r], resulted_exposures[r],
-np.log2(stats.skewnorm.cdf(perplexities[r], *fitted_parameters))) -np.log2(stats.skewnorm.cdf(perplexities[r], *fitted_parameters)))