Finish implementation of custom indices names.

PiperOrigin-RevId: 545440374
2023-07-04 07:18:38 -07:00 · 2023-07-04 07:18:38 -07:00 · a147a480a5
commit a147a480a5
parent 93f5a5249c
5 changed files with 111 additions and 23 deletions
--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/example.py
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/codelabs/example.py
@ -20,6 +20,7 @@ import os
 import tempfile
 from absl import app
 from absl import flags
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
@ -31,6 +32,15 @@ from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import plotting
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import privacy_report
 _CUSTOM_SLICES = flags.DEFINE_boolean(
    "custom_slices",
    default=False,
    help="If true, custom slices are used.",
 )
 TRAIN_SET_SIZE = TEST_SET_SIZE = 1000
 def generate_random_cluster(center, scale, num_points):
  return np.random.normal(size=(num_points, len(center))) * scale + center
@ -104,9 +114,11 @@ def main(unused_argv):
  # the generated clusters. More noise makes the classification harder.
  noise_scale = 2
  training_features, training_labels = generate_features_and_labels(
-      samples_per_cluster=250, scale=noise_scale)
+      samples_per_cluster=TRAIN_SET_SIZE // 4, scale=noise_scale
  )
  test_features, test_labels = generate_features_and_labels(
-      samples_per_cluster=250, scale=noise_scale)
+      samples_per_cluster=TEST_SET_SIZE // 4, scale=noise_scale
  )
  num_clusters = int(round(np.max(training_labels))) + 1
@ -143,6 +155,21 @@ def main(unused_argv):
          epoch_num=num_epochs_per_round * (i + 1),
          model_variant_label=model_name)
      if _CUSTOM_SLICES.value:
        custom_train_indices = np.array([i % 2 for i in range(TRAIN_SET_SIZE)])
        custom_test_indices = np.array(
            [(i + 1) % 2 for i in range(TEST_SET_SIZE)]
        )
        slicing_spec = data_structures.SlicingSpec(
            all_custom_train_indices=[custom_train_indices],
            all_custom_test_indices=[custom_test_indices],
            custom_slices_names={0: "name0", 1: "name1"},
        )
      else:
        slicing_spec = data_structures.SlicingSpec(
            entire_dataset=True, by_class=True
        )
      attack_results = mia.run_attacks(
          data_structures.AttackInputData(
              labels_train=training_labels,
@ -150,7 +177,7 @@ def main(unused_argv):
              probs_train=training_pred,
              probs_test=test_pred,
          ),
-          data_structures.SlicingSpec(entire_dataset=True, by_class=True),
+          slicing_spec,
          attack_types=(
              data_structures.AttackType.THRESHOLD_ATTACK,
              data_structures.AttackType.LOGISTIC_REGRESSION,
--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py
@ -67,9 +67,15 @@ class SingleSliceSpec:
      return 'Loss percentiles: %d-%d' % self.value
    if self.feature == SlicingFeature.CUSTOM:
-      custom_train_indices, custom_test_indices, group_value = self.value
+      custom_train_indices, custom_test_indices, slice_value, slice_name = (
-      return (f'Custom indices: train = {custom_train_indices}, '
+          self.value
-              f'test = {custom_test_indices}, group_value = {group_value}')
+      )
      if slice_name is not None:
        return f'Custom indices: slice_name = {slice_name}'
      return (
          f'Custom indices: train = {custom_train_indices}, '
          f'test = {custom_test_indices}, group_value = {slice_value}'
      )
    return '%s=%s' % (self.feature.name, self.value)
--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures_test.py
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures_test.py
@ -44,8 +44,11 @@ class SingleSliceSpecTest(parameterized.TestCase):
      (SlicingFeature.CLASS, 2, 'CLASS=2'),
      (SlicingFeature.PERCENTILE, (10, 20), 'Loss percentiles: 10-20'),
      (SlicingFeature.CORRECTLY_CLASSIFIED, True, 'CORRECTLY_CLASSIFIED=True'),
-      (SlicingFeature.CUSTOM, (np.array([1]), np.array([2, 1]), 1),
+      (
-       'Custom indices: train = [1], test = [2 1], group_value = 1'),
+          SlicingFeature.CUSTOM,
          (np.array([1]), np.array([2, 1]), 1, None),
          'Custom indices: train = [1], test = [2 1], group_value = 1',
      ),
  )
  def testStr(self, feature, value, expected_str):
    self.assertEqual(str(SingleSliceSpec(feature, value)), expected_str)
--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/dataset_slicing.py
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/dataset_slicing.py
@ -264,13 +264,18 @@ def get_single_slice_specs(
            f"Too many groups ({groups.size}) for slicing by custom indices. "
            f"Should be no more than {_MAX_NUM_OF_SLICES}.")
      for g in groups:
        group_name = None
        if slicing_spec.custom_slices_names is not None:
          if g not in slicing_spec.custom_slices_names:
            raise ValueError(f"Custom slice={g} is not in custom_slices_names")
-          group_id = slicing_spec.custom_slices_names[g]
+          group_name = slicing_spec.custom_slices_names[g]
-        else:
+
-          group_id = (custom_train_indices, custom_test_indices, g)
+        result.append(
-        result.append(SingleSliceSpec(SlicingFeature.CUSTOM, group_id))
+            SingleSliceSpec(
                SlicingFeature.CUSTOM,
                (custom_train_indices, custom_test_indices, g, group_name),
            )
        )
  return result
@ -294,7 +299,8 @@ def get_slice(
        data, slice_spec.value, return_slice_indices
    )
  elif slice_spec.feature == SlicingFeature.CUSTOM:
-    custom_train_indices, custom_test_indices, group_value = slice_spec.value
+    custom_train_indices, custom_test_indices, group_value, _ = slice_spec.value
    data_slice = _slice_by_custom_indices(
        data,
        custom_train_indices,
--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/dataset_slicing_test.py
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/dataset_slicing_test.py
@ -109,8 +109,10 @@ class SingleSliceSpecsTest(parameterized.TestCase):
        all_custom_train_indices=[custom_train_indices],
        all_custom_test_indices=[custom_test_indices])
    expected = [self.ENTIRE_DATASET_SLICE] + [
-        SingleSliceSpec(SlicingFeature.CUSTOM,
+        SingleSliceSpec(
-                        (custom_train_indices, custom_test_indices, g))
+            SlicingFeature.CUSTOM,
            (custom_train_indices, custom_test_indices, g, None),
        )
        for g in expected_groups
    ]
    output = get_single_slice_specs(input_data)
@ -138,11 +140,49 @@ class SingleSliceSpecsTest(parameterized.TestCase):
    for custom_train_indices, custom_test_indices, eg in zip(
        all_custom_train_indices, all_custom_test_indices,
        expected_group_values):
-      expected.extend([
+      expected.extend(
-          SingleSliceSpec(SlicingFeature.CUSTOM,
+          [
-                          (custom_train_indices, custom_test_indices, g))
+              SingleSliceSpec(
-          for g in eg
+                  SlicingFeature.CUSTOM,
-      ])
+                  (custom_train_indices, custom_test_indices, g, None),
              )
              for g in eg
          ]
      )
    output = get_single_slice_specs(input_data)
    self.assertTrue(_are_lists_equal(output, expected))
  def test_slicing_by_custom_indices_slice_name(self):
    all_custom_train_indices = [
        np.array([1, 2, 1, 2]),
    ]
    all_custom_test_indices = [
        np.array([2, 2, 1, 2]),
    ]
    custom_slices_names = {1: 'slice1', 2: 'slice2'}
    expected_group_values = [[1, 2]]
    input_data = SlicingSpec(
        all_custom_train_indices=all_custom_train_indices,
        all_custom_test_indices=all_custom_test_indices,
        custom_slices_names=custom_slices_names,
    )
    expected = [self.ENTIRE_DATASET_SLICE]
    for custom_train_indices, custom_test_indices, eg in zip(
        all_custom_train_indices, all_custom_test_indices, expected_group_values
    ):
      for g in eg:
        expected.append(
            SingleSliceSpec(
                SlicingFeature.CUSTOM,
                (
                    custom_train_indices,
                    custom_test_indices,
                    g,
                    custom_slices_names[g],
                ),
            )
        )
    output = get_single_slice_specs(input_data)
    self.assertTrue(_are_lists_equal(output, expected))
@ -298,7 +338,9 @@ class GetSliceTest(parameterized.TestCase):
    custom_train_indices = np.array([2, 2, 100, 4])
    custom_test_indices = np.array([100, 2, 2, 2])
    custom_slice = SingleSliceSpec(
-        SlicingFeature.CUSTOM, (custom_train_indices, custom_test_indices, 2))
+        SlicingFeature.CUSTOM,
        (custom_train_indices, custom_test_indices, 2, None),
    )
    output = get_slice(self.input_data, custom_slice)
    np.testing.assert_array_equal(output.logits_train,
                                  np.array([[0, 1, 0], [2, 0, 3]]))
@ -325,7 +367,9 @@ class GetSliceTest(parameterized.TestCase):
  def test_slice_by_custom_indices_wrong_size(self, custom_train_indices,
                                              custom_test_indices):
    custom_slice = SingleSliceSpec(
-        SlicingFeature.CUSTOM, (custom_train_indices, custom_test_indices, 2))
+        SlicingFeature.CUSTOM,
        (custom_train_indices, custom_test_indices, 2, None),
    )
    self.assertRaises(ValueError, get_slice, self.input_data, custom_slice)
@ -420,7 +464,9 @@ class GetSliceTestForMultilabelData(absltest.TestCase):
    custom_train_indices = np.array([2, 2, 100, 4])
    custom_test_indices = np.array([100, 2, 2, 2])
    custom_slice = SingleSliceSpec(
-        SlicingFeature.CUSTOM, (custom_train_indices, custom_test_indices, 2))
+        SlicingFeature.CUSTOM,
        (custom_train_indices, custom_test_indices, 2, 'slice_name'),
    )
    output = get_slice(self.input_data, custom_slice)
    # Check logits.
    with self.subTest(msg='Check logits'):