For secret sharer exposures, allow more dictionary key types, and break ties for the same perplexities. Fix a bug in the test.

PiperOrigin-RevId: 425446829
2022-01-31 13:26:57 -08:00 · 2022-01-31 13:26:57 -08:00 · 438da5a09b
commit 438da5a09b
parent fd242e76b9
2 changed files with 72 additions and 61 deletions
--- a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py
+++ b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures.py
@ -13,74 +13,81 @@
 # limitations under the License.
 """Measuring exposure for secret sharer attack."""

-from typing import Dict, List
-
+from typing import Iterable, TypeVar, Mapping
 import numpy as np
 from scipy import stats


+_KT = TypeVar('_KT')
+
+
 def compute_exposure_interpolation(
-    perplexities: Dict[int, List[float]],
-    perplexities_reference: List[float]) -> Dict[int, List[float]]:
-  """Get exposure using interpolation.
+    perplexities: Mapping[_KT, Iterable[float]],
+    perplexities_reference: Iterable[float]) -> Mapping[_KT, Iterable[float]]:
+  """Gets exposure using interpolation.

  Args:
-    perplexities: a dictionary, key is number of secret repetitions, value is a
-      list of perplexities
-    perplexities_reference: a list, perplexities of the random sequences that
-      did not appear in the training data
+    perplexities: a `Mapping` where the key is an identifier for the secrets
+      set, e.g. number of secret repetitions, and the value is an iterable of
+      perplexities.
+    perplexities_reference: perplexities of the random sequences that did not
+      appear in the training data.

  Returns:
    The exposure of every secret measured using interpolation (not necessarily
-    in the same order as the input)
+    in the same order as the input), keyed in the same way as perplexities.
  """
-  repetitions = list(perplexities.keys())
-  # Concatenate all perplexities, including those for references
-  perplexities_concat = np.concatenate([perplexities[r] for r in repetitions] +
-                                       [perplexities_reference])
-  # Concatenate the number of repetitions for each secret
-  repetitions_concat = np.concatenate([[r] * len(perplexities[r])
-                                       for r in repetitions] +
-                                      [[0] * len(perplexities_reference)])
+  # Get the keys in some fixed order which will be used internally only
+  # further down.
+  keys = list(perplexities)
+  # Concatenate all perplexities, including those from `perplexities_reference`.
+  # Add another dimension indicating which set the perplexity is from: -1 for
+  # reference, {0, ..., len(perplexities)} for secrets
+  perplexities_concat = [(p, -1) for p in perplexities_reference]
+  for i, k in enumerate(keys):
+    perplexities_concat.extend((p, i) for p in perplexities[k])

-  # Sort the repetition list according to the corresponding perplexity
-  idx = np.argsort(perplexities_concat)
-  repetitions_concat = repetitions_concat[idx]
+  # Get the indices list sorted according to the corresponding perplexity,
+  # in case of tie, keep the reference before the secret
+  indices_concat = np.fromiter((i for _, i in sorted(perplexities_concat)),
+                               dtype=int)

-  # In the sorted repetition list, if there are m examples with repetition 0
-  # (does not appear in training) in front of an example, then its rank is
-  # (m + 1). To get the number of examples with repetition 0 in front of
-  # any example, we use the cummulative sum of the indicator vecotr
-  # (repetitions_concat == 0).
-  cum_sum = np.cumsum(repetitions_concat == 0)
-  ranks = {r: cum_sum[repetitions_concat == r] + 1 for r in repetitions}
+  # In the sorted indices list, if there are m examples with index -1
+  # (from the reference set) in front of an example, then its rank is
+  # (m + 1). To get the number of examples with index -1 in front of
+  # any example, we use the cumulative sum of the indicator vector
+  # (indices_concat == -1).
+  cum_sum = np.cumsum(indices_concat == -1)
+  ranks = {k: cum_sum[indices_concat == i] + 1 for i, k in enumerate(keys)}
  exposures = {
-      r: np.log2(len(perplexities_reference)) - np.log2(ranks[r])
-      for r in repetitions
+      k: np.log2(len(list(perplexities_reference))) - np.log2(ranks[k])
+      for k in ranks
  }
  return exposures


 def compute_exposure_extrapolation(
-    perplexities: Dict[int, List[float]],
-    perplexities_reference: List[float]) -> Dict[int, List[float]]:
-  """Get exposure using extrapolation.
+    perplexities: Mapping[_KT, Iterable[float]],
+    perplexities_reference: Iterable[float]) -> Mapping[_KT, Iterable[float]]:
+  """Gets exposure using extrapolation.

  Args:
-    perplexities: a dictionary, key is number of secret repetitions, value is a
-      list of perplexities
-    perplexities_reference: a list, perplexities of the random sequences that
-      did not appear in the training data
+    perplexities: a `Mapping` where the key is an identifier for the secrets
+      set, e.g. number of secret repetitions, and the value is an iterable of
+      perplexities.
+    perplexities_reference: perplexities of the random sequences that did not
+      appear in the training data.

  Returns:
-    The exposure of every secret measured using extrapolation
+    The exposure of every secret measured using extrapolation, keyed in the same
+    way as perplexities.
  """
  # Fit a skew normal distribution using the perplexities of the references
  snormal_param = stats.skewnorm.fit(perplexities_reference)

  # Estimate exposure using the fitted distribution
  exposures = {
-      r: -np.log2(stats.skewnorm.cdf(perplexities[r], *snormal_param))
-      for r in perplexities.keys()
+      r: -np.log2(stats.skewnorm.cdf(p, *snormal_param))
+      for r, p in perplexities.items()
  }
  return exposures
--- a/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py
+++ b/tensorflow_privacy/privacy/privacy_tests/secret_sharer/exposures_test.py
@ -15,8 +15,8 @@
 from absl.testing import absltest
 import numpy as np
 from scipy import stats
-from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_extrapolation
-from tensorflow_privacy.privacy.privacy_tests.secret_sharer.exposures import compute_exposure_interpolation
+
+from tensorflow_privacy.privacy.privacy_tests.secret_sharer import exposures


 class UtilsTest(absltest.TestCase):
@ -28,42 +28,46 @@ class UtilsTest(absltest.TestCase):
  def test_exposure_interpolation(self):
    """Test exposure by interpolation."""
    perplexities = {
-        1: [0, 0.1],  # smallest perplexities
-        2: [20.0],  # largest perplexities
-        5: [3.5]
-    }  # rank = 4
+        '1': [0, 0.1],  # smallest perplexities
+        '2': [20.0],  # largest perplexities
+        '5': [3.5],  # rank = 4
+        '8': [3.5],  # rank = 4
+    }
    perplexities_reference = [float(x) for x in range(1, 17)]
-    exposures = compute_exposure_interpolation(perplexities,
-                                               perplexities_reference)
+    resulted_exposures = exposures.compute_exposure_interpolation(
+        perplexities, perplexities_reference)
    num_perplexities_reference = len(perplexities_reference)
    exposure_largest = np.log2(num_perplexities_reference)
    exposure_smallest = np.log2(num_perplexities_reference) - np.log2(
        num_perplexities_reference + 1)
+    exposure_rank4 = np.log2(num_perplexities_reference) - np.log2(4)
    expected_exposures = {
-        1: np.array([exposure_largest] * 2),
-        2: np.array([exposure_smallest]),
-        5: np.array([np.log2(num_perplexities_reference) - np.log2(4)])
+        '1': np.array([exposure_largest] * 2),
+        '2': np.array([exposure_smallest]),
+        '5': np.array([exposure_rank4]),
+        '8': np.array([exposure_rank4])
    }

-    self.assertEqual(exposures.keys(), expected_exposures.keys())
-    for r in exposures.keys():
-      np.testing.assert_almost_equal(exposures[r], exposures[r])
+    self.assertEqual(resulted_exposures.keys(), expected_exposures.keys())
+    for r in resulted_exposures.keys():
+      np.testing.assert_almost_equal(expected_exposures[r],
+                                     resulted_exposures[r])

  def test_exposure_extrapolation(self):
    parameters = (4, 0, 1)
    perplexities = {
-        1: stats.skewnorm.rvs(*parameters, size=(2,)),
-        10: stats.skewnorm.rvs(*parameters, size=(5,))
+        '1': stats.skewnorm.rvs(*parameters, size=(2,)),
+        '10': stats.skewnorm.rvs(*parameters, size=(5,))
    }
    perplexities_reference = stats.skewnorm.rvs(*parameters, size=(10000,))
-    exposures = compute_exposure_extrapolation(perplexities,
-                                               perplexities_reference)
+    resulted_exposures = exposures.compute_exposure_extrapolation(
+        perplexities, perplexities_reference)
    fitted_parameters = stats.skewnorm.fit(perplexities_reference)

-    self.assertEqual(exposures.keys(), perplexities.keys())
-    for r in exposures.keys():
+    self.assertEqual(resulted_exposures.keys(), perplexities.keys())
+    for r in resulted_exposures.keys():
      np.testing.assert_almost_equal(
-          exposures[r],
+          resulted_exposures[r],
          -np.log2(stats.skewnorm.cdf(perplexities[r], *fitted_parameters)))