Add Seq2SeqAttackInputData data structure

2020-11-06 16:42:31 +08:00 · 2020-11-06 16:42:31 +08:00 · 9f07f2a871
commit 9f07f2a871
parent f0daaf085f
1 changed files with 79 additions and 1 deletions
--- a/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py
+++ b/tensorflow_privacy/privacy/membership_inference_attack/data_structures.py
@ -18,7 +18,7 @@ import enum
 import glob
 import os
 import pickle
-from typing import Any, Iterable, Union
+from typing import Any, Iterable, Union, Iterator
 from dataclasses import dataclass
 import numpy as np
@ -374,6 +374,84 @@ def _append_array_shape(arr: np.array, arr_name: str, result):
    result.append(' %s with shape: %s,' % (arr_name, arr.shape))
 def _is_generator(gen, gen_name):
  """Checks whether gen is a generator."""
  if gen is not None and not isinstance(gen, Iterator):
    raise ValueError('%s should be a generator.' % gen_name)
@dataclass
 class Seq2SeqAttackInputData:
  """Input data for running an attack on seq2seq models.
  This includes only the data, and not configuration.
  """
  logits_train: Iterator[np.ndarray] = None
  logits_test: Iterator[np.ndarray] = None
  # Contains ground-truth token indices for the target sequences.
  labels_train: Iterator[np.ndarray] = None
  labels_test: Iterator[np.ndarray] = None
  # Denotes size of the target sequence vocabulary.
  vocab_size: int = None
  # Train, test size = number of batches in training, test set.
  # These values need to be supplied by the user as logits, labels
  # are lazy loaded for seq2seq models.
  train_size: int = 0
  test_size: int = 0
  def validate(self):
    """Validates the inputs."""
    if (self.logits_train is None) != (self.logits_test is None):
      raise ValueError(
          'logits_train and logits_test should both be either set or unset')
    if (self.labels_train is None) != (self.labels_test is None):
      raise ValueError(
          'labels_train and labels_test should both be either set or unset')
    if self.logits_train is None or self.labels_train is None:
      raise ValueError(
          'Labels, logits of training, test sets should all be set')
    if (self.vocab_size is None or self.train_size is None
        or self.test_size is None):
      raise ValueError(
          'vocab_size, train_size, test_size should all be set')
    if self.vocab_size is not None and not int:
      raise ValueError('vocab_size should be of integer type')
    if self.train_size is not None and not int:
      raise ValueError('train_size should be of integer type')
    if self.test_size is not None and not int:
      raise ValueError('test_size should be of integer type')
    _is_generator(self.logits_train, 'logits_train')
    _is_generator(self.logits_test, 'logits_test')
    _is_generator(self.labels_train, 'labels_train')
    _is_generator(self.labels_test, 'labels_test')
  def __str__(self):
    """Return the shapes of variables that are not None."""
    result = ['AttackInputData(']
    if self.vocab_size is not None and self.train_size is not None:
      result.append('logits_train with shape (%d, num_sequences, num_tokens, %d)' % (self.train_size, self.vocab_size))
      result.append('labels_train with shape (%d, num_sequences, num_tokens, 1)' % self.train_size)
    if self.vocab_size is not None and self.test_size is not None:
      result.append('logits_test with shape (%d, num_sequences, num_tokens, %d)' % (self.test_size, self.vocab_size))
      result.append('labels_test with shape (%d, num_sequences, num_tokens, 1)' % self.test_size)
    result.append(')')
    return '\n'.join(result)
@dataclass
 class RocCurve:
  """Represents ROC curve of a membership inference classifier."""