For secret sharer exposures, allow more dictionary key types, and break ties for the same perplexities. Fix a bug in the test.
PiperOrigin-RevId: 425446829
This commit is contained in:
parent
fd242e76b9
commit
438da5a09b
2 changed files with 72 additions and 61 deletions
|
@ -13,74 +13,81 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
"""Measuring exposure for secret sharer attack."""
|
"""Measuring exposure for secret sharer attack."""
|
||||||
|
|
||||||
from typing import Dict, List
|
from typing import Iterable, TypeVar, Mapping
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import stats
|
from scipy import stats
|
||||||
|
|
||||||
|
|
||||||
|
_KT = TypeVar('_KT')
|
||||||
|
|
||||||
|
|
||||||
def compute_exposure_interpolation(
|
def compute_exposure_interpolation(
|
||||||
perplexities: Dict[int, List[float]],
|
perplexities: Mapping[_KT, Iterable[float]],
|
||||||
perplexities_reference: List[float]) -> Dict[int, List[float]]:
|
perplexities_reference: Iterable[float]) -> Mapping[_KT, Iterable[float]]:
|
||||||
"""Get exposure using interpolation.
|
"""Gets exposure using interpolation.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
perplexities: a dictionary, key is number of secret repetitions, value is a
|
perplexities: a `Mapping` where the key is an identifier for the secrets
|
||||||
list of perplexities
|
set, e.g. number of secret repetitions, and the value is an iterable of
|
||||||
perplexities_reference: a list, perplexities of the random sequences that
|
perplexities.
|
||||||
did not appear in the training data
|
perplexities_reference: perplexities of the random sequences that did not
|
||||||
|
appear in the training data.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The exposure of every secret measured using interpolation (not necessarily
|
The exposure of every secret measured using interpolation (not necessarily
|
||||||
in the same order as the input)
|
in the same order as the input), keyed in the same way as perplexities.
|
||||||
"""
|
"""
|
||||||
repetitions = list(perplexities.keys())
|
# Get the keys in some fixed order which will be used internally only
|
||||||
# Concatenate all perplexities, including those for references
|
# further down.
|
||||||
perplexities_concat = np.concatenate([perplexities[r] for r in repetitions] +
|
keys = list(perplexities)
|
||||||
[perplexities_reference])
|
# Concatenate all perplexities, including those from `perplexities_reference`.
|
||||||
# Concatenate the number of repetitions for each secret
|
# Add another dimension indicating which set the perplexity is from: -1 for
|
||||||
repetitions_concat = np.concatenate([[r] * len(perplexities[r])
|
# reference, {0, ..., len(perplexities)} for secrets
|
||||||
for r in repetitions] +
|
perplexities_concat = [(p, -1) for p in perplexities_reference]
|
||||||
[[0] * len(perplexities_reference)])
|
for i, k in enumerate(keys):
|
||||||
|
perplexities_concat.extend((p, i) for p in perplexities[k])
|
||||||
|
|
||||||
# Sort the repetition list according to the corresponding perplexity
|
# Get the indices list sorted according to the corresponding perplexity,
|
||||||
idx = np.argsort(perplexities_concat)
|
# in case of tie, keep the reference before the secret
|
||||||
repetitions_concat = repetitions_concat[idx]
|
indices_concat = np.fromiter((i for _, i in sorted(perplexities_concat)),
|
||||||
|
dtype=int)
|
||||||
|
|
||||||
# In the sorted repetition list, if there are m examples with repetition 0
|
# In the sorted indices list, if there are m examples with index -1
|
||||||
# (does not appear in training) in front of an example, then its rank is
|
# (from the reference set) in front of an example, then its rank is
|
||||||
# (m + 1). To get the number of examples with repetition 0 in front of
|
# (m + 1). To get the number of examples with index -1 in front of
|
||||||
# any example, we use the cummulative sum of the indicator vecotr
|
# any example, we use the cumulative sum of the indicator vector
|
||||||
# (repetitions_concat == 0).
|
# (indices_concat == -1).
|
||||||
cum_sum = np.cumsum(repetitions_concat == 0)
|
cum_sum = np.cumsum(indices_concat == -1)
|
||||||
ranks = {r: cum_sum[repetitions_concat == r] + 1 for r in repetitions}
|
ranks = {k: cum_sum[indices_concat == i] + 1 for i, k in enumerate(keys)}
|
||||||
exposures = {
|
exposures = {
|
||||||
r: np.log2(len(perplexities_reference)) - np.log2(ranks[r])
|
k: np.log2(len(list(perplexities_reference))) - np.log2(ranks[k])
|
||||||
for r in repetitions
|
for k in ranks
|
||||||
}
|
}
|
||||||
return exposures
|
return exposures
|
||||||
|
|
||||||
|
|
||||||
def compute_exposure_extrapolation(
|
def compute_exposure_extrapolation(
|
||||||
perplexities: Dict[int, List[float]],
|
perplexities: Mapping[_KT, Iterable[float]],
|
||||||
perplexities_reference: List[float]) -> Dict[int, List[float]]:
|
perplexities_reference: Iterable[float]) -> Mapping[_KT, Iterable[float]]:
|
||||||
"""Get exposure using extrapolation.
|
"""Gets exposure using extrapolation.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
perplexities: a dictionary, key is number of secret repetitions, value is a
|
perplexities: a `Mapping` where the key is an identifier for the secrets
|
||||||
list of perplexities
|
set, e.g. number of secret repetitions, and the value is an iterable of
|
||||||
perplexities_reference: a list, perplexities of the random sequences that
|
perplexities.
|
||||||
did not appear in the training data
|
perplexities_reference: perplexities of the random sequences that did not
|
||||||
|
appear in the training data.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The exposure of every secret measured using extrapolation
|
The exposure of every secret measured using extrapolation, keyed in the same
|
||||||
|
way as perplexities.
|
||||||
"""
|
"""
|
||||||
# Fit a skew normal distribution using the perplexities of the references
|
# Fit a skew normal distribution using the perplexities of the references
|
||||||
snormal_param = stats.skewnorm.fit(perplexities_reference)
|
snormal_param = stats.skewnorm.fit(perplexities_reference)
|
||||||
|
|
||||||
# Estimate exposure using the fitted distribution
|
# Estimate exposure using the fitted distribution
|
||||||
exposures = {
|
exposures = {
|
||||||
r: -np.log2(stats.skewnorm.cdf(perplexities[r], *snormal_param))
|
r: -np.log2(stats.skewnorm.cdf(p, *snormal_param))
|
||||||
for r in perplexities.keys()
|
for r, p in perplexities.items()
|
||||||
}
|
}
|
||||||
return exposures
|
return exposures
|
||||||
|
|
|
@ -15,8 +15,8 @@
|
||||||
from absl.testing import absltest
|
from absl.testing import absltest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import stats
|
from scipy import stats
|
||||||
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_extrapolation
|
|
||||||
from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_interpolation
|
from tensorflow_privacy.privacy.privacy_tests.secret_sharer import exposures
|
||||||
|
|
||||||
|
|
||||||
class UtilsTest(absltest.TestCase):
|
class UtilsTest(absltest.TestCase):
|
||||||
|
@ -28,42 +28,46 @@ class UtilsTest(absltest.TestCase):
|
||||||
def test_exposure_interpolation(self):
|
def test_exposure_interpolation(self):
|
||||||
"""Test exposure by interpolation."""
|
"""Test exposure by interpolation."""
|
||||||
perplexities = {
|
perplexities = {
|
||||||
1: [0, 0.1], # smallest perplexities
|
'1': [0, 0.1], # smallest perplexities
|
||||||
2: [20.0], # largest perplexities
|
'2': [20.0], # largest perplexities
|
||||||
5: [3.5]
|
'5': [3.5], # rank = 4
|
||||||
} # rank = 4
|
'8': [3.5], # rank = 4
|
||||||
|
}
|
||||||
perplexities_reference = [float(x) for x in range(1, 17)]
|
perplexities_reference = [float(x) for x in range(1, 17)]
|
||||||
exposures = compute_exposure_interpolation(perplexities,
|
resulted_exposures = exposures.compute_exposure_interpolation(
|
||||||
perplexities_reference)
|
perplexities, perplexities_reference)
|
||||||
num_perplexities_reference = len(perplexities_reference)
|
num_perplexities_reference = len(perplexities_reference)
|
||||||
exposure_largest = np.log2(num_perplexities_reference)
|
exposure_largest = np.log2(num_perplexities_reference)
|
||||||
exposure_smallest = np.log2(num_perplexities_reference) - np.log2(
|
exposure_smallest = np.log2(num_perplexities_reference) - np.log2(
|
||||||
num_perplexities_reference + 1)
|
num_perplexities_reference + 1)
|
||||||
|
exposure_rank4 = np.log2(num_perplexities_reference) - np.log2(4)
|
||||||
expected_exposures = {
|
expected_exposures = {
|
||||||
1: np.array([exposure_largest] * 2),
|
'1': np.array([exposure_largest] * 2),
|
||||||
2: np.array([exposure_smallest]),
|
'2': np.array([exposure_smallest]),
|
||||||
5: np.array([np.log2(num_perplexities_reference) - np.log2(4)])
|
'5': np.array([exposure_rank4]),
|
||||||
|
'8': np.array([exposure_rank4])
|
||||||
}
|
}
|
||||||
|
|
||||||
self.assertEqual(exposures.keys(), expected_exposures.keys())
|
self.assertEqual(resulted_exposures.keys(), expected_exposures.keys())
|
||||||
for r in exposures.keys():
|
for r in resulted_exposures.keys():
|
||||||
np.testing.assert_almost_equal(exposures[r], exposures[r])
|
np.testing.assert_almost_equal(expected_exposures[r],
|
||||||
|
resulted_exposures[r])
|
||||||
|
|
||||||
def test_exposure_extrapolation(self):
|
def test_exposure_extrapolation(self):
|
||||||
parameters = (4, 0, 1)
|
parameters = (4, 0, 1)
|
||||||
perplexities = {
|
perplexities = {
|
||||||
1: stats.skewnorm.rvs(*parameters, size=(2,)),
|
'1': stats.skewnorm.rvs(*parameters, size=(2,)),
|
||||||
10: stats.skewnorm.rvs(*parameters, size=(5,))
|
'10': stats.skewnorm.rvs(*parameters, size=(5,))
|
||||||
}
|
}
|
||||||
perplexities_reference = stats.skewnorm.rvs(*parameters, size=(10000,))
|
perplexities_reference = stats.skewnorm.rvs(*parameters, size=(10000,))
|
||||||
exposures = compute_exposure_extrapolation(perplexities,
|
resulted_exposures = exposures.compute_exposure_extrapolation(
|
||||||
perplexities_reference)
|
perplexities, perplexities_reference)
|
||||||
fitted_parameters = stats.skewnorm.fit(perplexities_reference)
|
fitted_parameters = stats.skewnorm.fit(perplexities_reference)
|
||||||
|
|
||||||
self.assertEqual(exposures.keys(), perplexities.keys())
|
self.assertEqual(resulted_exposures.keys(), perplexities.keys())
|
||||||
for r in exposures.keys():
|
for r in resulted_exposures.keys():
|
||||||
np.testing.assert_almost_equal(
|
np.testing.assert_almost_equal(
|
||||||
exposures[r],
|
resulted_exposures[r],
|
||||||
-np.log2(stats.skewnorm.cdf(perplexities[r], *fitted_parameters)))
|
-np.log2(stats.skewnorm.cdf(perplexities[r], *fitted_parameters)))
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue