For secret sharer exposures, allow more dictionary key types, and break ties for the same perplexities. Fix a bug in the test.

PiperOrigin-RevId: 425446829
2022-01-31 13:26:57 -08:00 · 2022-01-31 13:26:57 -08:00 · 438da5a09b
commit 438da5a09b
parent fd242e76b9
2 changed files with 72 additions and 61 deletions
--- a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py
+++ b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py
@ -13,74 +13,81 @@
 # limitations under the License.
 """Measuring exposure for secret sharer attack."""
-from typing import Dict, List
+from typing import Iterable, TypeVar, Mapping
 import numpy as np
 from scipy import stats
 _KT = TypeVar('_KT')
 def compute_exposure_interpolation(
-    perplexities: Dict[int, List[float]],
+    perplexities: Mapping[_KT, Iterable[float]],
-    perplexities_reference: List[float]) -> Dict[int, List[float]]:
+    perplexities_reference: Iterable[float]) -> Mapping[_KT, Iterable[float]]:
-  """Get exposure using interpolation.
+  """Gets exposure using interpolation.
  Args:
-    perplexities: a dictionary, key is number of secret repetitions, value is a
+    perplexities: a `Mapping` where the key is an identifier for the secrets
-      list of perplexities
+      set, e.g. number of secret repetitions, and the value is an iterable of
-    perplexities_reference: a list, perplexities of the random sequences that
+      perplexities.
-      did not appear in the training data
+    perplexities_reference: perplexities of the random sequences that did not
      appear in the training data.
  Returns:
    The exposure of every secret measured using interpolation (not necessarily
-    in the same order as the input)
+    in the same order as the input), keyed in the same way as perplexities.
  """
-  repetitions = list(perplexities.keys())
+  # Get the keys in some fixed order which will be used internally only
-  # Concatenate all perplexities, including those for references
+  # further down.
-  perplexities_concat = np.concatenate([perplexities[r] for r in repetitions] +
+  keys = list(perplexities)
-                                       [perplexities_reference])
+  # Concatenate all perplexities, including those from `perplexities_reference`.
-  # Concatenate the number of repetitions for each secret
+  # Add another dimension indicating which set the perplexity is from: -1 for
-  repetitions_concat = np.concatenate([[r] * len(perplexities[r])
+  # reference, {0, ..., len(perplexities)} for secrets
-                                       for r in repetitions] +
+  perplexities_concat = [(p, -1) for p in perplexities_reference]
-                                      [[0] * len(perplexities_reference)])
+  for i, k in enumerate(keys):
    perplexities_concat.extend((p, i) for p in perplexities[k])
-  # Sort the repetition list according to the corresponding perplexity
+  # Get the indices list sorted according to the corresponding perplexity,
-  idx = np.argsort(perplexities_concat)
+  # in case of tie, keep the reference before the secret
-  repetitions_concat = repetitions_concat[idx]
+  indices_concat = np.fromiter((i for _, i in sorted(perplexities_concat)),
                               dtype=int)
-  # In the sorted repetition list, if there are m examples with repetition 0
+  # In the sorted indices list, if there are m examples with index -1
-  # (does not appear in training) in front of an example, then its rank is
+  # (from the reference set) in front of an example, then its rank is
-  # (m + 1). To get the number of examples with repetition 0 in front of
+  # (m + 1). To get the number of examples with index -1 in front of
-  # any example, we use the cummulative sum of the indicator vecotr
+  # any example, we use the cumulative sum of the indicator vector
-  # (repetitions_concat == 0).
+  # (indices_concat == -1).
-  cum_sum = np.cumsum(repetitions_concat == 0)
+  cum_sum = np.cumsum(indices_concat == -1)
-  ranks = {r: cum_sum[repetitions_concat == r] + 1 for r in repetitions}
+  ranks = {k: cum_sum[indices_concat == i] + 1 for i, k in enumerate(keys)}
  exposures = {
-      r: np.log2(len(perplexities_reference)) - np.log2(ranks[r])
+      k: np.log2(len(list(perplexities_reference))) - np.log2(ranks[k])
-      for r in repetitions
+      for k in ranks
  }
  return exposures
 def compute_exposure_extrapolation(
-    perplexities: Dict[int, List[float]],
+    perplexities: Mapping[_KT, Iterable[float]],
-    perplexities_reference: List[float]) -> Dict[int, List[float]]:
+    perplexities_reference: Iterable[float]) -> Mapping[_KT, Iterable[float]]:
-  """Get exposure using extrapolation.
+  """Gets exposure using extrapolation.
  Args:
-    perplexities: a dictionary, key is number of secret repetitions, value is a
+    perplexities: a `Mapping` where the key is an identifier for the secrets
-      list of perplexities
+      set, e.g. number of secret repetitions, and the value is an iterable of
-    perplexities_reference: a list, perplexities of the random sequences that
+      perplexities.
-      did not appear in the training data
+    perplexities_reference: perplexities of the random sequences that did not
      appear in the training data.
  Returns:
-    The exposure of every secret measured using extrapolation
+    The exposure of every secret measured using extrapolation, keyed in the same
    way as perplexities.
  """
  # Fit a skew normal distribution using the perplexities of the references
  snormal_param = stats.skewnorm.fit(perplexities_reference)
  # Estimate exposure using the fitted distribution
  exposures = {
-      r: -np.log2(stats.skewnorm.cdf(perplexities[r], *snormal_param))
+      r: -np.log2(stats.skewnorm.cdf(p, *snormal_param))
-      for r in perplexities.keys()
+      for r, p in perplexities.items()
  }
  return exposures
--- a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py
+++ b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py
@ -15,8 +15,8 @@
 from absl.testing import absltest
 import numpy as np
 from scipy import stats
-from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_extrapolation
+
-from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_interpolation
+from tensorflow_privacy.privacy.privacy_tests.secret_sharer import exposures
 class UtilsTest(absltest.TestCase):
@ -28,42 +28,46 @@ class UtilsTest(absltest.TestCase):
  def test_exposure_interpolation(self):
    """Test exposure by interpolation."""
    perplexities = {
-        1: [0, 0.1],  # smallest perplexities
+        '1': [0, 0.1],  # smallest perplexities
-        2: [20.0],  # largest perplexities
+        '2': [20.0],  # largest perplexities
-        5: [3.5]
+        '5': [3.5],  # rank = 4
-    }  # rank = 4
+        '8': [3.5],  # rank = 4
    }
    perplexities_reference = [float(x) for x in range(1, 17)]
-    exposures = compute_exposure_interpolation(perplexities,
+    resulted_exposures = exposures.compute_exposure_interpolation(
-                                               perplexities_reference)
+        perplexities, perplexities_reference)
    num_perplexities_reference = len(perplexities_reference)
    exposure_largest = np.log2(num_perplexities_reference)
    exposure_smallest = np.log2(num_perplexities_reference) - np.log2(
        num_perplexities_reference + 1)
    exposure_rank4 = np.log2(num_perplexities_reference) - np.log2(4)
    expected_exposures = {
-        1: np.array([exposure_largest] * 2),
+        '1': np.array([exposure_largest] * 2),
-        2: np.array([exposure_smallest]),
+        '2': np.array([exposure_smallest]),
-        5: np.array([np.log2(num_perplexities_reference) - np.log2(4)])
+        '5': np.array([exposure_rank4]),
        '8': np.array([exposure_rank4])
    }
-    self.assertEqual(exposures.keys(), expected_exposures.keys())
+    self.assertEqual(resulted_exposures.keys(), expected_exposures.keys())
-    for r in exposures.keys():
+    for r in resulted_exposures.keys():
-      np.testing.assert_almost_equal(exposures[r], exposures[r])
+      np.testing.assert_almost_equal(expected_exposures[r],
                                     resulted_exposures[r])
  def test_exposure_extrapolation(self):
    parameters = (4, 0, 1)
    perplexities = {
-        1: stats.skewnorm.rvs(*parameters, size=(2,)),
+        '1': stats.skewnorm.rvs(*parameters, size=(2,)),
-        10: stats.skewnorm.rvs(*parameters, size=(5,))
+        '10': stats.skewnorm.rvs(*parameters, size=(5,))
    }
    perplexities_reference = stats.skewnorm.rvs(*parameters, size=(10000,))
-    exposures = compute_exposure_extrapolation(perplexities,
+    resulted_exposures = exposures.compute_exposure_extrapolation(
-                                               perplexities_reference)
+        perplexities, perplexities_reference)
    fitted_parameters = stats.skewnorm.fit(perplexities_reference)
-    self.assertEqual(exposures.keys(), perplexities.keys())
+    self.assertEqual(resulted_exposures.keys(), perplexities.keys())
-    for r in exposures.keys():
+    for r in resulted_exposures.keys():
      np.testing.assert_almost_equal(
-          exposures[r],
+          resulted_exposures[r],
          -np.log2(stats.skewnorm.cdf(perplexities[r], *fitted_parameters)))