Refactor: move loss computation utilities under privacy_tests.

PiperOrigin-RevId: 463391913
2022-07-26 11:49:14 -07:00 · 2022-07-26 11:49:14 -07:00 · 17cd0c52bc
commit 17cd0c52bc
parent 44dc40454b
10 changed files with 119 additions and 88 deletions
--- a/tensorflow_privacy/privacy/privacy_tests/BUILD
+++ b/tensorflow_privacy/privacy/privacy_tests/BUILD
@ -1,6 +1,6 @@
-load("@rules_python//python:defs.bzl", "py_library")
+load("@rules_python//python:defs.bzl", "py_library", "py_test")

-package(default_visibility = ["//visibility:private"])
+package(default_visibility = ["//visibility:public"])

 licenses(["notice"])

@ -8,3 +8,18 @@ py_library(
    name = "privacy_tests",
    srcs = ["__init__.py"],
 )
+
+py_test(
+    name = "utils_test",
+    timeout = "long",
+    srcs = ["utils_test.py"],
+    python_version = "PY3",
+    srcs_version = "PY3",
+    deps = [":utils"],
+)
+
+py_library(
+    name = "utils",
+    srcs = ["utils.py"],
+    srcs_version = "PY3",
+)
--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/BUILD
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/BUILD
@ -15,21 +15,6 @@ py_library(
    srcs_version = "PY3",
 )

-py_library(
-    name = "utils",
-    srcs = ["utils.py"],
-    srcs_version = "PY3",
-)
-
-py_test(
-    name = "utils_test",
-    timeout = "long",
-    srcs = ["utils_test.py"],
-    python_version = "PY3",
-    srcs_version = "PY3",
-    deps = [":utils"],
-)
-
 py_test(
    name = "membership_inference_attack_test",
    timeout = "long",
@ -45,7 +30,10 @@ py_test(
    srcs = ["data_structures_test.py"],
    python_version = "PY3",
    srcs_version = "PY3",
-    deps = [":membership_inference_attack"],
+    deps = [
+        ":membership_inference_attack",
+        "//tensorflow_privacy/privacy/privacy_tests:utils",
+    ],
 )

 py_test(
@ -95,7 +83,7 @@ py_library(
        "seq2seq_mia.py",
    ],
    srcs_version = "PY3",
-    deps = [":utils"],
+    deps = ["//tensorflow_privacy/privacy/privacy_tests:utils"],
 )

 py_library(
@ -122,8 +110,8 @@ py_library(
    srcs_version = "PY3",
    deps = [
        ":membership_inference_attack",
-        ":utils",
        ":utils_tensorboard",
+        "//tensorflow_privacy/privacy/privacy_tests:utils",
    ],
 )

@ -144,8 +132,8 @@ py_library(
    srcs_version = "PY3",
    deps = [
        ":membership_inference_attack",
-        ":utils",
        ":utils_tensorboard",
+        "//tensorflow_privacy/privacy/privacy_tests:utils",
    ],
 )

@ -185,7 +173,7 @@ py_library(
        "advanced_mia.py",
    ],
    srcs_version = "PY3",
-    deps = [":utils"],
+    deps = ["//tensorflow_privacy/privacy/privacy_tests:utils"],
 )

 py_test(
@ -205,6 +193,6 @@ py_binary(
    deps = [
        ":advanced_mia",
        ":membership_inference_attack",
-        ":utils",
+        "//tensorflow_privacy/privacy/privacy_tests:utils",
    ],
 )
--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/advanced_mia.py
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/advanced_mia.py
@ -17,7 +17,7 @@ import functools
 from typing import Sequence, Union
 import numpy as np
 import scipy.stats
-from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.utils import log_loss
+from tensorflow_privacy.privacy.privacy_tests.utils import log_loss


 def replace_nan_with_column_mean(a: np.ndarray):
--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/advanced_mia_example.py
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/advanced_mia_example.py
@ -21,11 +21,10 @@ from absl import flags
 import matplotlib.pyplot as plt
 import numpy as np
 import tensorflow as tf
-
+from tensorflow_privacy.privacy.privacy_tests import utils
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import advanced_mia as amia
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import plotting as mia_plotting
-from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import utils
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData

 FLAGS = flags.FLAGS
--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures.py
@ -26,7 +26,7 @@ import numpy as np
 import pandas as pd
 from scipy import special
 from sklearn import metrics
-from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import utils
+from tensorflow_privacy.privacy.privacy_tests import utils

 # The minimum TPR or FPR below which they are considered equal.
 _ABSOLUTE_TOLERANCE = 1e-3
@ -183,12 +183,6 @@ def _log_value(probs, small_value=1e-30):
  return -np.log(np.maximum(probs, small_value))


-class LossFunction(enum.Enum):
-  """An enum that defines loss function to use in `AttackInputData`."""
-  CROSS_ENTROPY = 'cross_entropy'
-  SQUARED = 'squared'
-
-
@dataclasses.dataclass
 class AttackInputData:
  """Input data for running an attack.
@ -225,7 +219,7 @@ class AttackInputData:
  # If a callable is provided, it should take in two argument, the 1st is
  # labels, the 2nd is logits or probs.
  loss_function: Union[Callable[[np.ndarray, np.ndarray], np.ndarray],
-                       LossFunction] = LossFunction.CROSS_ENTROPY
+                       utils.LossFunction] = utils.LossFunction.CROSS_ENTROPY
  # Whether `loss_function` will be called with logits or probs. If not set
  # (None), will decide by availablity of logits and probs and logits is
  # preferred when both are available.
@ -298,52 +292,6 @@ class AttackInputData:
                                                  true_labels]
      return np.sum(np.multiply(modified_probs, modified_log_probs), axis=1)

-  @staticmethod
-  def _get_loss(loss: Optional[np.ndarray], labels: Optional[np.ndarray],
-                logits: Optional[np.ndarray], probs: Optional[np.ndarray],
-                loss_function: Union[Callable[[np.ndarray, np.ndarray],
-                                              np.ndarray], LossFunction],
-                loss_function_using_logits: Optional[bool],
-                multilabel_data: Optional[bool]) -> Optional[np.ndarray]:
-    """Calculates (if needed) losses.
-
-    Args:
-      loss: the loss of each example.
-      labels: the scalar label of each example.
-      logits: the logits vector of each example.
-      probs: the probability vector of each example.
-      loss_function: if `loss` is not available, `labels` and one of `logits`
-        and `probs` are available, we will use this function to compute loss. It
-        is supposed to take in (label, logits / probs) as input.
-      loss_function_using_logits: if `loss_function` expects `logits` or
-        `probs`.
-      multilabel_data: if the data is from a multilabel classification problem.
-
-    Returns:
-      Loss (or None if neither the loss nor the labels are present).
-    """
-    if loss is not None:
-      return loss
-    if labels is None or (logits is None and probs is None):
-      return None
-    if loss_function_using_logits and logits is None:
-      raise ValueError('We need logits to compute loss, but it is set to None.')
-    if not loss_function_using_logits and probs is None:
-      raise ValueError('We need probs to compute loss, but it is set to None.')
-
-    predictions = logits if loss_function_using_logits else probs
-    if loss_function == LossFunction.CROSS_ENTROPY:
-      if multilabel_data:
-        loss = utils.multilabel_bce_loss(labels, predictions,
-                                         loss_function_using_logits)
-      else:
-        loss = utils.log_loss(labels, predictions, loss_function_using_logits)
-    elif loss_function == LossFunction.SQUARED:
-      loss = utils.squared_loss(labels, predictions)
-    else:
-      loss = loss_function(labels, predictions)
-    return loss
-
  def __post_init__(self):
    """Checks performed after instantiation of the AttackInputData dataclass."""
    # Check if the data is multilabel.
@ -358,7 +306,7 @@ class AttackInputData:
    """
    if self.loss_function_using_logits is None:
      self.loss_function_using_logits = (self.logits_train is not None)
-    return self._get_loss(self.loss_train, self.labels_train, self.logits_train,
+    return utils.get_loss(self.loss_train, self.labels_train, self.logits_train,
                          self.probs_train, self.loss_function,
                          self.loss_function_using_logits, self.multilabel_data)

@ -370,7 +318,7 @@ class AttackInputData:
    """
    if self.loss_function_using_logits is None:
      self.loss_function_using_logits = bool(self.logits_test)
-    return self._get_loss(self.loss_test, self.labels_test, self.logits_test,
+    return utils.get_loss(self.loss_test, self.labels_test, self.logits_test,
                          self.probs_test, self.loss_function,
                          self.loss_function_using_logits, self.multilabel_data)

--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures_test.py
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/data_structures_test.py
@ -20,13 +20,13 @@ from absl.testing import absltest
 from absl.testing import parameterized
 import numpy as np
 import pandas as pd
+from tensorflow_privacy.privacy.privacy_tests import utils
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import _log_value
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackInputData
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResults
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackResultsCollection
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import DataSize
-from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import LossFunction
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import PrivacyReportMetadata
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import RocCurve
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SingleAttackResult
@ -123,7 +123,7 @@ class AttackInputDataTest(parameterized.TestCase):
        probs_test=np.array([1, 1.]),
        labels_train=np.array([1, 0.]),
        labels_test=np.array([0, 2.]),
-        loss_function=LossFunction.SQUARED,
+        loss_function=utils.LossFunction.SQUARED,
        loss_function_using_logits=loss_function_using_logits,
    )
    np.testing.assert_allclose(attack_input.get_loss_train(), expected_train)
@ -175,7 +175,7 @@ class AttackInputDataTest(parameterized.TestCase):
        probs_test=probs,
        labels_train=np.array([1, 0.]),
        labels_test=np.array([1, 0.]),
-        loss_function=LossFunction.SQUARED,
+        loss_function=utils.LossFunction.SQUARED,
    )
    np.testing.assert_allclose(attack_input.get_loss_train(), expected)
    np.testing.assert_allclose(attack_input.get_loss_test(), expected)
--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation.py
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/keras_evaluation.py
@ -24,7 +24,6 @@ from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_s
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import AttackType
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import get_flattened_attack_metrics
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.data_structures import SlicingSpec
-from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.utils import log_loss
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack.utils_tensorboard import write_results_to_tensorboard_tf2 as write_results_to_tensorboard


--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation.py
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/tf_estimator_evaluation.py
@ -20,9 +20,9 @@ from absl import logging
 import numpy as np
 import tensorflow as tf
 from tensorflow import estimator as tf_estimator
+from tensorflow_privacy.privacy.privacy_tests import utils
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import data_structures
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import membership_inference_attack as mia
-from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import utils
 from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import utils_tensorboard


--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils.py
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils.py
@ -13,7 +13,10 @@
 # limitations under the License.
 """Utility functions for membership inference attacks."""

+import enum
 import logging
+from typing import Callable, Optional, Union
+
 import numpy as np
 from scipy import special

@ -122,3 +125,65 @@ def multilabel_bce_loss(labels: np.ndarray,
  bce = labels * np.log(pred + small_value)
  bce += (1 - labels) * np.log(1 - pred + small_value)
  return -bce
+
+
+class LossFunction(enum.Enum):
+  """An enum that defines loss function."""
+  CROSS_ENTROPY = 'cross_entropy'
+  SQUARED = 'squared'
+
+
+def string_to_loss_function(string: str):
+  """Convert string to the corresponding LossFunction."""
+
+  if string == LossFunction.CROSS_ENTROPY.value:
+    return LossFunction.CROSS_ENTROPY
+  if string == LossFunction.SQUARED.value:
+    return LossFunction.SQUARED
+  raise ValueError(f'{string} is not a valid loss function name.')
+
+
+def get_loss(loss: Optional[np.ndarray], labels: Optional[np.ndarray],
+             logits: Optional[np.ndarray], probs: Optional[np.ndarray],
+             loss_function: Union[Callable[[np.ndarray, np.ndarray],
+                                           np.ndarray], LossFunction],
+             loss_function_using_logits: Optional[bool],
+             multilabel_data: Optional[bool]) -> Optional[np.ndarray]:
+  """Calculates (if needed) losses.
+
+  Args:
+    loss: the loss of each example.
+    labels: the scalar label of each example.
+    logits: the logits vector of each example.
+    probs: the probability vector of each example.
+    loss_function: if `loss` is not available, `labels` and one of `logits`
+      and `probs` are available, we will use this function to compute loss. It
+      is supposed to take in (label, logits / probs) as input.
+    loss_function_using_logits: if `loss_function` expects `logits` or
+      `probs`.
+    multilabel_data: if the data is from a multilabel classification problem.
+
+  Returns:
+    Loss (or None if neither the loss nor the labels are present).
+  """
+  if loss is not None:
+    return loss
+  if labels is None or (logits is None and probs is None):
+    return None
+  if loss_function_using_logits and logits is None:
+    raise ValueError('We need logits to compute loss, but it is set to None.')
+  if not loss_function_using_logits and probs is None:
+    raise ValueError('We need probs to compute loss, but it is set to None.')
+
+  predictions = logits if loss_function_using_logits else probs
+  if loss_function == LossFunction.CROSS_ENTROPY:
+    if multilabel_data:
+      loss = multilabel_bce_loss(labels, predictions,
+                                 loss_function_using_logits)
+    else:
+      loss = log_loss(labels, predictions, loss_function_using_logits)
+  elif loss_function == LossFunction.SQUARED:
+    loss = squared_loss(labels, predictions)
+  else:
+    loss = loss_function(labels, predictions)
+  return loss
--- a/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils_test.py
+++ b/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/utils_test.py
@ -16,7 +16,24 @@ from absl.testing import absltest
 from absl.testing import parameterized
 import numpy as np

-from tensorflow_privacy.privacy.privacy_tests.membership_inference_attack import utils
+from tensorflow_privacy.privacy.privacy_tests import utils
+
+
+class LossFunctionFromStringTest(parameterized.TestCase):
+
+  @parameterized.parameters(
+      (utils.LossFunction.CROSS_ENTROPY, 'cross_entropy'),
+      (utils.LossFunction.SQUARED, 'squared'),
+  )
+  def test_from_str(self, en, string):
+    self.assertEqual(utils.string_to_loss_function(string), en)
+
+  @parameterized.parameters(
+      ('random string'),
+      (''),
+  )
+  def test_from_str_wrong_input(self, string):
+    self.assertRaises(ValueError, utils.string_to_loss_function, string)


 class TestLogLoss(parameterized.TestCase):