For secret sharer exposures, allow more dictionary key types, and break ties for the same perplexities. Fix a bug in the test.

PiperOrigin-RevId: 425446829
This commit is contained in:
Shuang Song 2022-01-31 13:26:57 -08:00 committed by A. Unique TensorFlower
parent fd242e76b9
commit 438da5a09b
2 changed files with 72 additions and 61 deletions

View file

@ -13,74 +13,81 @@
# limitations under the License.
"""Measuring exposure for secret sharer attack."""
from typing import Dict, List
from typing import Iterable, TypeVar, Mapping
import numpy as np
from scipy import stats
_KT = TypeVar('_KT')
def compute_exposure_interpolation(
perplexities: Dict[int, List[float]],
perplexities_reference: List[float]) -> Dict[int, List[float]]:
"""Get exposure using interpolation.
perplexities: Mapping[_KT, Iterable[float]],
perplexities_reference: Iterable[float]) -> Mapping[_KT, Iterable[float]]:
"""Gets exposure using interpolation.
Args:
perplexities: a dictionary, key is number of secret repetitions, value is a
list of perplexities
perplexities_reference: a list, perplexities of the random sequences that
did not appear in the training data
perplexities: a `Mapping` where the key is an identifier for the secrets
set, e.g. number of secret repetitions, and the value is an iterable of
perplexities.
perplexities_reference: perplexities of the random sequences that did not
appear in the training data.
Returns:
The exposure of every secret measured using interpolation (not necessarily
in the same order as the input)
in the same order as the input), keyed in the same way as perplexities.
"""
repetitions = list(perplexities.keys())
# Concatenate all perplexities, including those for references
perplexities_concat = np.concatenate([perplexities[r] for r in repetitions] +
[perplexities_reference])
# Concatenate the number of repetitions for each secret
repetitions_concat = np.concatenate([[r] * len(perplexities[r])
for r in repetitions] +
[[0] * len(perplexities_reference)])
# Get the keys in some fixed order which will be used internally only
# further down.
keys = list(perplexities)
# Concatenate all perplexities, including those from `perplexities_reference`.
# Add another dimension indicating which set the perplexity is from: -1 for
# reference, {0, ..., len(perplexities)} for secrets
perplexities_concat = [(p, -1) for p in perplexities_reference]
for i, k in enumerate(keys):
perplexities_concat.extend((p, i) for p in perplexities[k])
# Sort the repetition list according to the corresponding perplexity
idx = np.argsort(perplexities_concat)
repetitions_concat = repetitions_concat[idx]
# Get the indices list sorted according to the corresponding perplexity,
# in case of tie, keep the reference before the secret
indices_concat = np.fromiter((i for _, i in sorted(perplexities_concat)),
dtype=int)
# In the sorted repetition list, if there are m examples with repetition 0
# (does not appear in training) in front of an example, then its rank is
# (m + 1). To get the number of examples with repetition 0 in front of
# any example, we use the cummulative sum of the indicator vecotr
# (repetitions_concat == 0).
cum_sum = np.cumsum(repetitions_concat == 0)
ranks = {r: cum_sum[repetitions_concat == r] + 1 for r in repetitions}
# In the sorted indices list, if there are m examples with index -1
# (from the reference set) in front of an example, then its rank is
# (m + 1). To get the number of examples with index -1 in front of
# any example, we use the cumulative sum of the indicator vector
# (indices_concat == -1).
cum_sum = np.cumsum(indices_concat == -1)
ranks = {k: cum_sum[indices_concat == i] + 1 for i, k in enumerate(keys)}
exposures = {
r: np.log2(len(perplexities_reference)) - np.log2(ranks[r])
for r in repetitions
k: np.log2(len(list(perplexities_reference))) - np.log2(ranks[k])
for k in ranks
}
return exposures
def compute_exposure_extrapolation(
perplexities: Dict[int, List[float]],
perplexities_reference: List[float]) -> Dict[int, List[float]]:
"""Get exposure using extrapolation.
perplexities: Mapping[_KT, Iterable[float]],
perplexities_reference: Iterable[float]) -> Mapping[_KT, Iterable[float]]:
"""Gets exposure using extrapolation.
Args:
perplexities: a dictionary, key is number of secret repetitions, value is a
list of perplexities
perplexities_reference: a list, perplexities of the random sequences that
did not appear in the training data
perplexities: a `Mapping` where the key is an identifier for the secrets
set, e.g. number of secret repetitions, and the value is an iterable of
perplexities.
perplexities_reference: perplexities of the random sequences that did not
appear in the training data.
Returns:
The exposure of every secret measured using extrapolation
The exposure of every secret measured using extrapolation, keyed in the same
way as perplexities.
"""
# Fit a skew normal distribution using the perplexities of the references
snormal_param = stats.skewnorm.fit(perplexities_reference)
# Estimate exposure using the fitted distribution
exposures = {
r: -np.log2(stats.skewnorm.cdf(perplexities[r], *snormal_param))
for r in perplexities.keys()
r: -np.log2(stats.skewnorm.cdf(p, *snormal_param))
for r, p in perplexities.items()
}
return exposures

View file

@ -15,8 +15,8 @@
from absl.testing import absltest
import numpy as np
from scipy import stats
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_extrapolation
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_interpolation
from tensorflow_privacy.privacy.privacy_tests.secret_sharer import exposures
class UtilsTest(absltest.TestCase):
@ -28,42 +28,46 @@ class UtilsTest(absltest.TestCase):
def test_exposure_interpolation(self):
"""Test exposure by interpolation."""
perplexities = {
1: [0, 0.1], # smallest perplexities
2: [20.0], # largest perplexities
5: [3.5]
} # rank = 4
'1': [0, 0.1], # smallest perplexities
'2': [20.0], # largest perplexities
'5': [3.5], # rank = 4
'8': [3.5], # rank = 4
}
perplexities_reference = [float(x) for x in range(1, 17)]
exposures = compute_exposure_interpolation(perplexities,
perplexities_reference)
resulted_exposures = exposures.compute_exposure_interpolation(
perplexities, perplexities_reference)
num_perplexities_reference = len(perplexities_reference)
exposure_largest = np.log2(num_perplexities_reference)
exposure_smallest = np.log2(num_perplexities_reference) - np.log2(
num_perplexities_reference + 1)
exposure_rank4 = np.log2(num_perplexities_reference) - np.log2(4)
expected_exposures = {
1: np.array([exposure_largest] * 2),
2: np.array([exposure_smallest]),
5: np.array([np.log2(num_perplexities_reference) - np.log2(4)])
'1': np.array([exposure_largest] * 2),
'2': np.array([exposure_smallest]),
'5': np.array([exposure_rank4]),
'8': np.array([exposure_rank4])
}
self.assertEqual(exposures.keys(), expected_exposures.keys())
for r in exposures.keys():
np.testing.assert_almost_equal(exposures[r], exposures[r])
self.assertEqual(resulted_exposures.keys(), expected_exposures.keys())
for r in resulted_exposures.keys():
np.testing.assert_almost_equal(expected_exposures[r],
resulted_exposures[r])
def test_exposure_extrapolation(self):
parameters = (4, 0, 1)
perplexities = {
1: stats.skewnorm.rvs(*parameters, size=(2,)),
10: stats.skewnorm.rvs(*parameters, size=(5,))
'1': stats.skewnorm.rvs(*parameters, size=(2,)),
'10': stats.skewnorm.rvs(*parameters, size=(5,))
}
perplexities_reference = stats.skewnorm.rvs(*parameters, size=(10000,))
exposures = compute_exposure_extrapolation(perplexities,
perplexities_reference)
resulted_exposures = exposures.compute_exposure_extrapolation(
perplexities, perplexities_reference)
fitted_parameters = stats.skewnorm.fit(perplexities_reference)
self.assertEqual(exposures.keys(), perplexities.keys())
for r in exposures.keys():
self.assertEqual(resulted_exposures.keys(), perplexities.keys())
for r in resulted_exposures.keys():
np.testing.assert_almost_equal(
exposures[r],
resulted_exposures[r],
-np.log2(stats.skewnorm.cdf(perplexities[r], *fitted_parameters)))