Finish implementation of custom indices names.

PiperOrigin-RevId: 545440374
This commit is contained in:
Vadym Doroshenko 2023-07-04 07:18:38 -07:00 committed by A. Unique TensorFlower
parent 93f5a5249c
commit a147a480a5
5 changed files with 111 additions and 23 deletions

View file

@ -20,6 +20,7 @@ import os
import tempfile
from absl import app
from absl import flags
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
@ -31,6 +32,15 @@ from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import plotting
from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import privacy_report
_CUSTOM_SLICES = flags.DEFINE_boolean(
"custom_slices",
default=False,
help="If true, custom slices are used.",
)
TRAIN_SET_SIZE = TEST_SET_SIZE = 1000
def generate_random_cluster(center, scale, num_points):
return np.random.normal(size=(num_points, len(center))) * scale + center
@ -104,9 +114,11 @@ def main(unused_argv):
# the generated clusters. More noise makes the classification harder.
noise_scale = 2
training_features, training_labels = generate_features_and_labels(
samples_per_cluster=250, scale=noise_scale)
samples_per_cluster=TRAIN_SET_SIZE // 4, scale=noise_scale
)
test_features, test_labels = generate_features_and_labels(
samples_per_cluster=250, scale=noise_scale)
samples_per_cluster=TEST_SET_SIZE // 4, scale=noise_scale
)
num_clusters = int(round(np.max(training_labels))) + 1
@ -143,6 +155,21 @@ def main(unused_argv):
epoch_num=num_epochs_per_round * (i + 1),
model_variant_label=model_name)
if _CUSTOM_SLICES.value:
custom_train_indices = np.array([i % 2 for i in range(TRAIN_SET_SIZE)])
custom_test_indices = np.array(
[(i + 1) % 2 for i in range(TEST_SET_SIZE)]
)
slicing_spec = data_structures.SlicingSpec(
all_custom_train_indices=[custom_train_indices],
all_custom_test_indices=[custom_test_indices],
custom_slices_names={0: "name0", 1: "name1"},
)
else:
slicing_spec = data_structures.SlicingSpec(
entire_dataset=True, by_class=True
)
attack_results = mia.run_attacks(
data_structures.AttackInputData(
labels_train=training_labels,
@ -150,7 +177,7 @@ def main(unused_argv):
probs_train=training_pred,
probs_test=test_pred,
),
data_structures.SlicingSpec(entire_dataset=True, by_class=True),
slicing_spec,
attack_types=(
data_structures.AttackType.THRESHOLD_ATTACK,
data_structures.AttackType.LOGISTIC_REGRESSION,

View file

@ -67,9 +67,15 @@ class SingleSliceSpec:
return 'Loss percentiles: %d-%d' % self.value
if self.feature == SlicingFeature.CUSTOM:
custom_train_indices, custom_test_indices, group_value = self.value
return (f'Custom indices: train = {custom_train_indices}, '
f'test = {custom_test_indices}, group_value = {group_value}')
custom_train_indices, custom_test_indices, slice_value, slice_name = (
self.value
)
if slice_name is not None:
return f'Custom indices: slice_name = {slice_name}'
return (
f'Custom indices: train = {custom_train_indices}, '
f'test = {custom_test_indices}, group_value = {slice_value}'
)
return '%s=%s' % (self.feature.name, self.value)

View file

@ -44,8 +44,11 @@ class SingleSliceSpecTest(parameterized.TestCase):
(SlicingFeature.CLASS, 2, 'CLASS=2'),
(SlicingFeature.PERCENTILE, (10, 20), 'Loss percentiles: 10-20'),
(SlicingFeature.CORRECTLY_CLASSIFIED, True, 'CORRECTLY_CLASSIFIED=True'),
(SlicingFeature.CUSTOM, (np.array([1]), np.array([2, 1]), 1),
'Custom indices: train = [1], test = [2 1], group_value = 1'),
(
SlicingFeature.CUSTOM,
(np.array([1]), np.array([2, 1]), 1, None),
'Custom indices: train = [1], test = [2 1], group_value = 1',
),
)
def testStr(self, feature, value, expected_str):
self.assertEqual(str(SingleSliceSpec(feature, value)), expected_str)

View file

@ -264,13 +264,18 @@ def get_single_slice_specs(
f"Too many groups ({groups.size}) for slicing by custom indices. "
f"Should be no more than {_MAX_NUM_OF_SLICES}.")
for g in groups:
group_name = None
if slicing_spec.custom_slices_names is not None:
if g not in slicing_spec.custom_slices_names:
raise ValueError(f"Custom slice={g} is not in custom_slices_names")
group_id = slicing_spec.custom_slices_names[g]
else:
group_id = (custom_train_indices, custom_test_indices, g)
result.append(SingleSliceSpec(SlicingFeature.CUSTOM, group_id))
group_name = slicing_spec.custom_slices_names[g]
result.append(
SingleSliceSpec(
SlicingFeature.CUSTOM,
(custom_train_indices, custom_test_indices, g, group_name),
)
)
return result
@ -294,7 +299,8 @@ def get_slice(
data, slice_spec.value, return_slice_indices
)
elif slice_spec.feature == SlicingFeature.CUSTOM:
custom_train_indices, custom_test_indices, group_value = slice_spec.value
custom_train_indices, custom_test_indices, group_value, _ = slice_spec.value
data_slice = _slice_by_custom_indices(
data,
custom_train_indices,

View file

@ -109,8 +109,10 @@ class SingleSliceSpecsTest(parameterized.TestCase):
all_custom_train_indices=[custom_train_indices],
all_custom_test_indices=[custom_test_indices])
expected = [self.ENTIRE_DATASET_SLICE] + [
SingleSliceSpec(SlicingFeature.CUSTOM,
(custom_train_indices, custom_test_indices, g))
SingleSliceSpec(
SlicingFeature.CUSTOM,
(custom_train_indices, custom_test_indices, g, None),
)
for g in expected_groups
]
output = get_single_slice_specs(input_data)
@ -138,11 +140,49 @@ class SingleSliceSpecsTest(parameterized.TestCase):
for custom_train_indices, custom_test_indices, eg in zip(
all_custom_train_indices, all_custom_test_indices,
expected_group_values):
expected.extend([
SingleSliceSpec(SlicingFeature.CUSTOM,
(custom_train_indices, custom_test_indices, g))
expected.extend(
[
SingleSliceSpec(
SlicingFeature.CUSTOM,
(custom_train_indices, custom_test_indices, g, None),
)
for g in eg
])
]
)
output = get_single_slice_specs(input_data)
self.assertTrue(_are_lists_equal(output, expected))
def test_slicing_by_custom_indices_slice_name(self):
all_custom_train_indices = [
np.array([1, 2, 1, 2]),
]
all_custom_test_indices = [
np.array([2, 2, 1, 2]),
]
custom_slices_names = {1: 'slice1', 2: 'slice2'}
expected_group_values = [[1, 2]]
input_data = SlicingSpec(
all_custom_train_indices=all_custom_train_indices,
all_custom_test_indices=all_custom_test_indices,
custom_slices_names=custom_slices_names,
)
expected = [self.ENTIRE_DATASET_SLICE]
for custom_train_indices, custom_test_indices, eg in zip(
all_custom_train_indices, all_custom_test_indices, expected_group_values
):
for g in eg:
expected.append(
SingleSliceSpec(
SlicingFeature.CUSTOM,
(
custom_train_indices,
custom_test_indices,
g,
custom_slices_names[g],
),
)
)
output = get_single_slice_specs(input_data)
self.assertTrue(_are_lists_equal(output, expected))
@ -298,7 +338,9 @@ class GetSliceTest(parameterized.TestCase):
custom_train_indices = np.array([2, 2, 100, 4])
custom_test_indices = np.array([100, 2, 2, 2])
custom_slice = SingleSliceSpec(
SlicingFeature.CUSTOM, (custom_train_indices, custom_test_indices, 2))
SlicingFeature.CUSTOM,
(custom_train_indices, custom_test_indices, 2, None),
)
output = get_slice(self.input_data, custom_slice)
np.testing.assert_array_equal(output.logits_train,
np.array([[0, 1, 0], [2, 0, 3]]))
@ -325,7 +367,9 @@ class GetSliceTest(parameterized.TestCase):
def test_slice_by_custom_indices_wrong_size(self, custom_train_indices,
custom_test_indices):
custom_slice = SingleSliceSpec(
SlicingFeature.CUSTOM, (custom_train_indices, custom_test_indices, 2))
SlicingFeature.CUSTOM,
(custom_train_indices, custom_test_indices, 2, None),
)
self.assertRaises(ValueError, get_slice, self.input_data, custom_slice)
@ -420,7 +464,9 @@ class GetSliceTestForMultilabelData(absltest.TestCase):
custom_train_indices = np.array([2, 2, 100, 4])
custom_test_indices = np.array([100, 2, 2, 2])
custom_slice = SingleSliceSpec(
SlicingFeature.CUSTOM, (custom_train_indices, custom_test_indices, 2))
SlicingFeature.CUSTOM,
(custom_train_indices, custom_test_indices, 2, 'slice_name'),
)
output = get_slice(self.input_data, custom_slice)
# Check logits.
with self.subTest(msg='Check logits'):