For secret sharer exposures, allow more dictionary key types, and break ties for the same perplexities. Fix a bug in the test.
PiperOrigin-RevId: 425446829
This commit is contained in:
parent
fd242e76b9
commit
438da5a09b
2 changed files with 72 additions and 61 deletions
|
@ -13,74 +13,81 @@
|
|||
# limitations under the License.
|
||||
"""Measuring exposure for secret sharer attack."""
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
from typing import Iterable, TypeVar, Mapping
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
|
||||
|
||||
_KT = TypeVar('_KT')
|
||||
|
||||
|
||||
def compute_exposure_interpolation(
|
||||
perplexities: Dict[int, List[float]],
|
||||
perplexities_reference: List[float]) -> Dict[int, List[float]]:
|
||||
"""Get exposure using interpolation.
|
||||
perplexities: Mapping[_KT, Iterable[float]],
|
||||
perplexities_reference: Iterable[float]) -> Mapping[_KT, Iterable[float]]:
|
||||
"""Gets exposure using interpolation.
|
||||
|
||||
Args:
|
||||
perplexities: a dictionary, key is number of secret repetitions, value is a
|
||||
list of perplexities
|
||||
perplexities_reference: a list, perplexities of the random sequences that
|
||||
did not appear in the training data
|
||||
perplexities: a `Mapping` where the key is an identifier for the secrets
|
||||
set, e.g. number of secret repetitions, and the value is an iterable of
|
||||
perplexities.
|
||||
perplexities_reference: perplexities of the random sequences that did not
|
||||
appear in the training data.
|
||||
|
||||
Returns:
|
||||
The exposure of every secret measured using interpolation (not necessarily
|
||||
in the same order as the input)
|
||||
in the same order as the input), keyed in the same way as perplexities.
|
||||
"""
|
||||
repetitions = list(perplexities.keys())
|
||||
# Concatenate all perplexities, including those for references
|
||||
perplexities_concat = np.concatenate([perplexities[r] for r in repetitions] +
|
||||
[perplexities_reference])
|
||||
# Concatenate the number of repetitions for each secret
|
||||
repetitions_concat = np.concatenate([[r] * len(perplexities[r])
|
||||
for r in repetitions] +
|
||||
[[0] * len(perplexities_reference)])
|
||||
# Get the keys in some fixed order which will be used internally only
|
||||
# further down.
|
||||
keys = list(perplexities)
|
||||
# Concatenate all perplexities, including those from `perplexities_reference`.
|
||||
# Add another dimension indicating which set the perplexity is from: -1 for
|
||||
# reference, {0, ..., len(perplexities)} for secrets
|
||||
perplexities_concat = [(p, -1) for p in perplexities_reference]
|
||||
for i, k in enumerate(keys):
|
||||
perplexities_concat.extend((p, i) for p in perplexities[k])
|
||||
|
||||
# Sort the repetition list according to the corresponding perplexity
|
||||
idx = np.argsort(perplexities_concat)
|
||||
repetitions_concat = repetitions_concat[idx]
|
||||
# Get the indices list sorted according to the corresponding perplexity,
|
||||
# in case of tie, keep the reference before the secret
|
||||
indices_concat = np.fromiter((i for _, i in sorted(perplexities_concat)),
|
||||
dtype=int)
|
||||
|
||||
# In the sorted repetition list, if there are m examples with repetition 0
|
||||
# (does not appear in training) in front of an example, then its rank is
|
||||
# (m + 1). To get the number of examples with repetition 0 in front of
|
||||
# any example, we use the cummulative sum of the indicator vecotr
|
||||
# (repetitions_concat == 0).
|
||||
cum_sum = np.cumsum(repetitions_concat == 0)
|
||||
ranks = {r: cum_sum[repetitions_concat == r] + 1 for r in repetitions}
|
||||
# In the sorted indices list, if there are m examples with index -1
|
||||
# (from the reference set) in front of an example, then its rank is
|
||||
# (m + 1). To get the number of examples with index -1 in front of
|
||||
# any example, we use the cumulative sum of the indicator vector
|
||||
# (indices_concat == -1).
|
||||
cum_sum = np.cumsum(indices_concat == -1)
|
||||
ranks = {k: cum_sum[indices_concat == i] + 1 for i, k in enumerate(keys)}
|
||||
exposures = {
|
||||
r: np.log2(len(perplexities_reference)) - np.log2(ranks[r])
|
||||
for r in repetitions
|
||||
k: np.log2(len(list(perplexities_reference))) - np.log2(ranks[k])
|
||||
for k in ranks
|
||||
}
|
||||
return exposures
|
||||
|
||||
|
||||
def compute_exposure_extrapolation(
|
||||
perplexities: Dict[int, List[float]],
|
||||
perplexities_reference: List[float]) -> Dict[int, List[float]]:
|
||||
"""Get exposure using extrapolation.
|
||||
perplexities: Mapping[_KT, Iterable[float]],
|
||||
perplexities_reference: Iterable[float]) -> Mapping[_KT, Iterable[float]]:
|
||||
"""Gets exposure using extrapolation.
|
||||
|
||||
Args:
|
||||
perplexities: a dictionary, key is number of secret repetitions, value is a
|
||||
list of perplexities
|
||||
perplexities_reference: a list, perplexities of the random sequences that
|
||||
did not appear in the training data
|
||||
perplexities: a `Mapping` where the key is an identifier for the secrets
|
||||
set, e.g. number of secret repetitions, and the value is an iterable of
|
||||
perplexities.
|
||||
perplexities_reference: perplexities of the random sequences that did not
|
||||
appear in the training data.
|
||||
|
||||
Returns:
|
||||
The exposure of every secret measured using extrapolation
|
||||
The exposure of every secret measured using extrapolation, keyed in the same
|
||||
way as perplexities.
|
||||
"""
|
||||
# Fit a skew normal distribution using the perplexities of the references
|
||||
snormal_param = stats.skewnorm.fit(perplexities_reference)
|
||||
|
||||
# Estimate exposure using the fitted distribution
|
||||
exposures = {
|
||||
r: -np.log2(stats.skewnorm.cdf(perplexities[r], *snormal_param))
|
||||
for r in perplexities.keys()
|
||||
r: -np.log2(stats.skewnorm.cdf(p, *snormal_param))
|
||||
for r, p in perplexities.items()
|
||||
}
|
||||
return exposures
|
||||
|
|
|
@ -15,8 +15,8 @@
|
|||
from absl.testing import absltest
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_extrapolation
|
||||
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_interpolation
|
||||
|
||||
from tensorflow_privacy.privacy.privacy_tests.secret_sharer import exposures
|
||||
|
||||
|
||||
class UtilsTest(absltest.TestCase):
|
||||
|
@ -28,42 +28,46 @@ class UtilsTest(absltest.TestCase):
|
|||
def test_exposure_interpolation(self):
|
||||
"""Test exposure by interpolation."""
|
||||
perplexities = {
|
||||
1: [0, 0.1], # smallest perplexities
|
||||
2: [20.0], # largest perplexities
|
||||
5: [3.5]
|
||||
} # rank = 4
|
||||
'1': [0, 0.1], # smallest perplexities
|
||||
'2': [20.0], # largest perplexities
|
||||
'5': [3.5], # rank = 4
|
||||
'8': [3.5], # rank = 4
|
||||
}
|
||||
perplexities_reference = [float(x) for x in range(1, 17)]
|
||||
exposures = compute_exposure_interpolation(perplexities,
|
||||
perplexities_reference)
|
||||
resulted_exposures = exposures.compute_exposure_interpolation(
|
||||
perplexities, perplexities_reference)
|
||||
num_perplexities_reference = len(perplexities_reference)
|
||||
exposure_largest = np.log2(num_perplexities_reference)
|
||||
exposure_smallest = np.log2(num_perplexities_reference) - np.log2(
|
||||
num_perplexities_reference + 1)
|
||||
exposure_rank4 = np.log2(num_perplexities_reference) - np.log2(4)
|
||||
expected_exposures = {
|
||||
1: np.array([exposure_largest] * 2),
|
||||
2: np.array([exposure_smallest]),
|
||||
5: np.array([np.log2(num_perplexities_reference) - np.log2(4)])
|
||||
'1': np.array([exposure_largest] * 2),
|
||||
'2': np.array([exposure_smallest]),
|
||||
'5': np.array([exposure_rank4]),
|
||||
'8': np.array([exposure_rank4])
|
||||
}
|
||||
|
||||
self.assertEqual(exposures.keys(), expected_exposures.keys())
|
||||
for r in exposures.keys():
|
||||
np.testing.assert_almost_equal(exposures[r], exposures[r])
|
||||
self.assertEqual(resulted_exposures.keys(), expected_exposures.keys())
|
||||
for r in resulted_exposures.keys():
|
||||
np.testing.assert_almost_equal(expected_exposures[r],
|
||||
resulted_exposures[r])
|
||||
|
||||
def test_exposure_extrapolation(self):
|
||||
parameters = (4, 0, 1)
|
||||
perplexities = {
|
||||
1: stats.skewnorm.rvs(*parameters, size=(2,)),
|
||||
10: stats.skewnorm.rvs(*parameters, size=(5,))
|
||||
'1': stats.skewnorm.rvs(*parameters, size=(2,)),
|
||||
'10': stats.skewnorm.rvs(*parameters, size=(5,))
|
||||
}
|
||||
perplexities_reference = stats.skewnorm.rvs(*parameters, size=(10000,))
|
||||
exposures = compute_exposure_extrapolation(perplexities,
|
||||
perplexities_reference)
|
||||
resulted_exposures = exposures.compute_exposure_extrapolation(
|
||||
perplexities, perplexities_reference)
|
||||
fitted_parameters = stats.skewnorm.fit(perplexities_reference)
|
||||
|
||||
self.assertEqual(exposures.keys(), perplexities.keys())
|
||||
for r in exposures.keys():
|
||||
self.assertEqual(resulted_exposures.keys(), perplexities.keys())
|
||||
for r in resulted_exposures.keys():
|
||||
np.testing.assert_almost_equal(
|
||||
exposures[r],
|
||||
resulted_exposures[r],
|
||||
-np.log2(stats.skewnorm.cdf(perplexities[r], *fitted_parameters)))
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue