From 7436930c64572a9279500cfb47cfe3afed69e538 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 1 Mar 2023 10:55:23 -0800
Subject: [PATCH] Improve documentation and logging of fast gradient clipping
 modules and callers.

PiperOrigin-RevId: 513283486
---
 .../privacy/fast_gradient_clipping/BUILD      |  1 +
 .../fast_gradient_clipping/clip_grads.py      | 28 ++++++++--
 .../fast_gradient_clipping/clip_grads_test.py | 44 ++++++++++-----
 .../gradient_clipping_utils.py                | 43 +++++++++++----
 .../fast_gradient_clipping/layer_registry.py  | 55 +++++++++++++++----
 5 files changed, 129 insertions(+), 42 deletions(-)

diff --git a/tensorflow_privacy/privacy/fast_gradient_clipping/BUILD b/tensorflow_privacy/privacy/fast_gradient_clipping/BUILD
index ffa666f..e50781d 100644
--- a/tensorflow_privacy/privacy/fast_gradient_clipping/BUILD
+++ b/tensorflow_privacy/privacy/fast_gradient_clipping/BUILD
@@ -6,6 +6,7 @@ py_library(
     name = "gradient_clipping_utils",
     srcs = ["gradient_clipping_utils.py"],
     srcs_version = "PY3",
+    deps = [":layer_registry"],
 )
 
 py_library(
diff --git a/tensorflow_privacy/privacy/fast_gradient_clipping/clip_grads.py b/tensorflow_privacy/privacy/fast_gradient_clipping/clip_grads.py
index 6a37ae3..00e12a2 100644
--- a/tensorflow_privacy/privacy/fast_gradient_clipping/clip_grads.py
+++ b/tensorflow_privacy/privacy/fast_gradient_clipping/clip_grads.py
@@ -21,11 +21,18 @@ of the approach given in https://arxiv.org/pdf/2009.03106.pdf (see the
 `compute_gradient_norms()` function).
 """
 
+from typing import Union, Iterable, Text
+
 import tensorflow as tf
 from tensorflow_privacy.privacy.fast_gradient_clipping import gradient_clipping_utils
+from tensorflow_privacy.privacy.fast_gradient_clipping import layer_registry as lr
+
+InputTensor = Union[tf.Tensor, Iterable[tf.Tensor], dict[Text, tf.Tensor]]
 
 
-def get_registry_generator_fn(tape, layer_registry):
+def get_registry_generator_fn(
+    tape: tf.GradientTape, layer_registry: lr.LayerRegistry
+):
   """Creates the generator function for `compute_gradient_norms()`."""
   if layer_registry is None:
     # Needed for backwards compatibility.
@@ -53,7 +60,12 @@ def get_registry_generator_fn(tape, layer_registry):
     return registry_generator_fn
 
 
-def compute_gradient_norms(input_model, x_batch, y_batch, layer_registry):
+def compute_gradient_norms(
+    input_model: tf.keras.Model,
+    x_batch: InputTensor,
+    y_batch: tf.Tensor,
+    layer_registry: lr.LayerRegistry,
+):
   """Computes the per-example loss gradient norms for given data.
 
   Applies a variant of the approach given in
@@ -62,7 +74,7 @@ def compute_gradient_norms(input_model, x_batch, y_batch, layer_registry):
   Args:
     input_model: The `tf.keras.Model` from which to obtain the layers from. The
       loss of the model *must* be a scalar loss.
-    x_batch: A `tf.Tensor` representing a batch of inputs to the model. The
+    x_batch: An `InputTensor` representing a batch of inputs to the model. The
       first axis must be the batch dimension.
     y_batch: A `tf.Tensor` representing a batch of output labels. The first axis
       must be the batch dimension. The number of examples should match the
@@ -106,7 +118,7 @@ def compute_gradient_norms(input_model, x_batch, y_batch, layer_registry):
   return tf.sqrt(tf.reduce_sum(sqr_norm_tsr, axis=1))
 
 
-def compute_clip_weights(l2_norm_clip, gradient_norms):
+def compute_clip_weights(l2_norm_clip: float, gradient_norms: tf.Tensor):
   """Computes the per-example loss/clip weights for clipping.
 
   When the sum of the per-example losses is replaced a weighted sum, where
@@ -132,7 +144,11 @@ def compute_clip_weights(l2_norm_clip, gradient_norms):
 
 
 def compute_pred_and_clipped_gradients(
-    input_model, x_batch, y_batch, l2_norm_clip, layer_registry
+    input_model: tf.keras.Model,
+    x_batch: InputTensor,
+    y_batch: tf.Tensor,
+    l2_norm_clip: float,
+    layer_registry: lr.LayerRegistry,
 ):
   """Computes the per-example predictions and per-example clipped loss gradient.
 
@@ -147,7 +163,7 @@ def compute_pred_and_clipped_gradients(
 
   Args:
     input_model: The `tf.keras.Model` from which to obtain the layers from.
-    x_batch: A `tf.Tensor` representing a batch of inputs to the model. The
+    x_batch: An `InputTensor` representing a batch of inputs to the model. The
       first axis must be the batch dimension.
     y_batch: A `tf.Tensor` representing a batch of output labels. The first axis
       must be the batch dimension. The number of examples should match the
diff --git a/tensorflow_privacy/privacy/fast_gradient_clipping/clip_grads_test.py b/tensorflow_privacy/privacy/fast_gradient_clipping/clip_grads_test.py
index 183d890..1933e21 100644
--- a/tensorflow_privacy/privacy/fast_gradient_clipping/clip_grads_test.py
+++ b/tensorflow_privacy/privacy/fast_gradient_clipping/clip_grads_test.py
@@ -13,6 +13,8 @@
 # limitations under the License.
 
 import itertools
+from typing import Callable, Any, List, Union
+
 from absl.testing import parameterized
 import tensorflow as tf
 
@@ -20,23 +22,35 @@ from tensorflow_privacy.privacy.fast_gradient_clipping import clip_grads
 from tensorflow_privacy.privacy.fast_gradient_clipping import layer_registry
 
 
+# ==============================================================================
+# Type aliases
+# ==============================================================================
+LayerGenerator = Callable[[int, int], tf.keras.layers.Layer]
+
+ModelGenerator = Callable[
+    [LayerGenerator, Union[int, List[int]], int], tf.keras.Model
+]
+
+
 # ==============================================================================
 # Helper functions and classes.
 # ==============================================================================
 class DoubleDense(tf.keras.layers.Layer):
   """Generates two dense layers nested together."""
 
-  def __init__(self, units):
+  def __init__(self, units: int):
     super().__init__()
     self.dense1 = tf.keras.layers.Dense(units)
     self.dense2 = tf.keras.layers.Dense(1)
 
-  def call(self, inputs):
+  def call(self, inputs: Any):
     x = self.dense1(inputs)
     return self.dense2(x)
 
 
-def double_dense_layer_computation(layer_instance, inputs, tape):
+def double_dense_layer_computation(
+    layer_instance: tf.keras.layers.Layer, inputs: Any, tape: tf.GradientTape
+):
   """Layer registry function for the custom `DoubleDense` layer class."""
   vars1, outputs, sqr_norm_fn1 = layer_registry.dense_layer_computation(
       layer_instance.dense1, inputs, tape
@@ -53,7 +67,9 @@ def double_dense_layer_computation(layer_instance, inputs, tape):
   return [vars1, vars2], outputs, sqr_norm_fn
 
 
-def compute_true_gradient_norms(input_model, x_batch, y_batch):
+def compute_true_gradient_norms(
+    input_model: tf.keras.Model, x_batch: tf.Tensor, y_batch: tf.Tensor
+):
   """Computes the real gradient norms for an input `(model, x, y)`."""
   loss_config = input_model.loss.get_config()
   loss_config['reduction'] = tf.keras.losses.Reduction.NONE
@@ -73,14 +89,14 @@ def compute_true_gradient_norms(input_model, x_batch, y_batch):
 
 
 def get_computed_and_true_norms(
-    model_generator,
-    layer_generator,
-    input_dims,
-    output_dim,
-    is_eager,
-    x_input,
-    rng_seed=777,
-    registry=None,
+    model_generator: ModelGenerator,
+    layer_generator: LayerGenerator,
+    input_dims: Union[int, List[int]],
+    output_dim: int,
+    is_eager: bool,
+    x_input: tf.Tensor,
+    rng_seed: int = 777,
+    registry: layer_registry.LayerRegistry = None,
 ):
   """Obtains the true and computed gradient norms for a model and batch input.
 
@@ -238,7 +254,7 @@ def make_weighted_bow_model(layer_generator, input_dims, output_dim):
 # ==============================================================================
 # Factory functions.
 # ==============================================================================
-def get_nd_test_tensors(n):
+def get_nd_test_tensors(n: int):
   """Returns a list of candidate tests for a given dimension n."""
   return [
       tf.zeros((n,), dtype=tf.float64),
@@ -246,7 +262,7 @@ def get_nd_test_tensors(n):
   ]
 
 
-def get_nd_test_batches(n):
+def get_nd_test_batches(n: int):
   """Returns a list of candidate input batches of dimension n."""
   result = []
   tensors = get_nd_test_tensors(n)
diff --git a/tensorflow_privacy/privacy/fast_gradient_clipping/gradient_clipping_utils.py b/tensorflow_privacy/privacy/fast_gradient_clipping/gradient_clipping_utils.py
index 6dd0d49..f6c22f0 100644
--- a/tensorflow_privacy/privacy/fast_gradient_clipping/gradient_clipping_utils.py
+++ b/tensorflow_privacy/privacy/fast_gradient_clipping/gradient_clipping_utils.py
@@ -13,11 +13,19 @@
 # limitations under the License.
 """Utility functions that help in the computation of per-example gradient norms."""
 
+from typing import Any, Union, Iterable, Text, Callable, Tuple, Optional
+
 from absl import logging
 import tensorflow as tf
 
+from tensorflow_privacy.privacy.fast_gradient_clipping import layer_registry as lr
 
-def has_internal_compute_graph(input_object):
+InputTensor = Union[tf.Tensor, Iterable[tf.Tensor], dict[Text, tf.Tensor]]
+
+GeneratorFunction = Optional[Callable[[Any, Tuple, dict], Tuple[Any, Any]]]
+
+
+def has_internal_compute_graph(input_object: Any):
   """Checks if input is a TF model and has a TF internal compute graph."""
   return (
       isinstance(input_object, tf.keras.Model)
@@ -28,7 +36,9 @@ def has_internal_compute_graph(input_object):
   )
 
 
-def _get_internal_layers(input_layer):
+def _get_internal_layers(
+    input_layer: tf.keras.layers.Layer,
+) -> list[tf.keras.layers.Layer]:
   """Returns a list of layers that are nested within a given layer."""
   internal_layers = []
   if isinstance(input_layer, tf.keras.Model) and hasattr(input_layer, 'layers'):
@@ -39,7 +49,11 @@ def _get_internal_layers(input_layer):
   return internal_layers
 
 
-def model_forward_pass(input_model, inputs, generator_fn=None):
+def model_forward_pass(
+    input_model: tf.keras.Model,
+    inputs: InputTensor,
+    generator_fn: GeneratorFunction = None,
+) -> Tuple[tf.Tensor, list[Any]]:
   """Does a forward pass of a model and returns useful intermediates.
 
   NOTE: the graph traversal algorithm is an adaptation of the logic in the
@@ -118,7 +132,9 @@ def model_forward_pass(input_model, inputs, generator_fn=None):
   return node_layer_outputs, generator_outputs_list
 
 
-def all_trainable_layers_are_registered(input_model, layer_registry):
+def all_trainable_layers_are_registered(
+    input_model: tf.keras.Model, layer_registry: lr.LayerRegistry
+) -> bool:
   """Check if an input model's trainable layers are all registered.
 
   Args:
@@ -140,18 +156,21 @@ def all_trainable_layers_are_registered(input_model, layer_registry):
 
 
 def add_aggregate_noise(
-    input_model, x_batch, clipped_grads, l2_norm_clip, noise_multiplier
-):
+    input_model: tf.keras.Model,
+    x_batch: InputTensor,
+    clipped_grads: list[tf.Tensor],
+    l2_norm_clip: float,
+    noise_multiplier: float,
+) -> list[tf.Tensor]:
   """Adds noise to a collection of clipped gradients.
 
   The magnitude of the noise depends on the aggregation strategy of the
   input model's loss function.
 
   Args:
-    input_model: The Keras model to obtain the layers from.
-    x_batch: A collection of Tensors to be fed into the input layer of the
-      model.
-    clipped_grads: A list of tensors representing the clipped gradients.
+    input_model: The `tf.keras.Model` to obtain the layers from.
+    x_batch: An `InputTensor` to be fed into the input layer of the model.
+    clipped_grads: A list of `tf.Tensor`s representing the clipped gradients.
     l2_norm_clip: Clipping norm (max L2 norm of each gradient).
     noise_multiplier: Ratio of the standard deviation to the clipping norm.
 
@@ -187,7 +206,9 @@ def add_aggregate_noise(
   return tf.nest.map_structure(add_noise, clipped_grads)
 
 
-def generate_model_outputs_using_core_keras_layers(input_model):
+def generate_model_outputs_using_core_keras_layers(
+    input_model: tf.keras.Model,
+) -> tf.Tensor:
   """Returns the model outputs generated by only core Keras layers."""
   cust_obj_dict = dict.copy(tf.keras.utils.get_custom_objects())
   cust_hash_set = set([hash(v) for v in cust_obj_dict.values()])
diff --git a/tensorflow_privacy/privacy/fast_gradient_clipping/layer_registry.py b/tensorflow_privacy/privacy/fast_gradient_clipping/layer_registry.py
index c8279ba..60b0d6b 100644
--- a/tensorflow_privacy/privacy/fast_gradient_clipping/layer_registry.py
+++ b/tensorflow_privacy/privacy/fast_gradient_clipping/layer_registry.py
@@ -40,9 +40,24 @@ where `l2_row_norm(y)` computes the L2 norm for each row of an input `y`.
 Details of this decomposition can be found in https://arxiv.org/abs/1510.01799
 """
 
+from typing import Callable, Type, Any, Union, Iterable, Text
 import tensorflow as tf
 
 
+# ==============================================================================
+# Type aliases
+# ==============================================================================
+SquareNormFunction = Callable[[Any], tf.Tensor]
+
+RegistryFunctionOutput = tuple[Any, tf.Tensor, SquareNormFunction]
+
+RegistryFunction = Callable[
+    [Any, tuple[Any], tf.GradientTape], RegistryFunctionOutput
+]
+
+InputTensor = Union[tf.Tensor, Iterable[tf.Tensor], dict[Text, tf.Tensor]]
+
+
 # ==============================================================================
 # Main class
 # ==============================================================================
@@ -54,15 +69,19 @@ class LayerRegistry:
     self._layer_class_dict = {}
     self._registry = {}
 
-  def is_elem(self, layer_instance):
+  def is_elem(self, layer_instance: tf.keras.layers.Layer) -> bool:
     """Checks if a layer instance's class is in the registry."""
     return hash(layer_instance.__class__) in self._registry
 
-  def lookup(self, layer_instance):
+  def lookup(self, layer_instance: tf.keras.layers.Layer) -> RegistryFunction:
     """Returns the layer registry function for a given layer instance."""
     return self._registry[hash(layer_instance.__class__)]
 
-  def insert(self, layer_class, layer_registry_function):
+  def insert(
+      self,
+      layer_class: Type[tf.keras.layers.Layer],
+      layer_registry_function: RegistryFunction,
+  ):
     """Inserts a layer registry function into the internal dictionaries."""
     layer_key = hash(layer_class)
     self._layer_class_dict[layer_key] = layer_class
@@ -72,7 +91,11 @@ class LayerRegistry:
 # ==============================================================================
 # Supported Keras layers
 # ==============================================================================
-def dense_layer_computation(layer_instance, inputs, tape):
+def dense_layer_computation(
+    layer_instance: tf.keras.layers.Dense,
+    inputs: tuple[InputTensor],
+    tape: tf.GradientTape,
+) -> RegistryFunctionOutput:
   """Registry function for `tf.keras.layers.Dense`.
 
   The logic for this computation is based on the following paper:
@@ -83,8 +106,9 @@ def dense_layer_computation(layer_instance, inputs, tape):
 
   Args:
     layer_instance: A `tf.keras.layers.Dense` instance.
-    inputs: A `tf.Tensor` which can be passed into the layer instance, i.e.,
-      `layer_instance(inputs)` returns a valid output.
+    inputs: A tuple containing a single `InputTensor` which can be passed into
+      the layer instance, i.e., `layer_instance(*inputs)` returns a valid
+      output.
     tape: A `tf.GradientTape` instance that will be used to watch the output
       `base_vars`.
 
@@ -100,6 +124,8 @@ def dense_layer_computation(layer_instance, inputs, tape):
     trainable variables in `layer_instance`. These squared norms should be a 1D
     `tf.Tensor` of length `batch_size`.
   """
+  if len(inputs) != 1:
+    raise ValueError("Only layer inputs of length 1 are permitted.")
   orig_activation = layer_instance.activation
   layer_instance.activation = None
   base_vars = layer_instance(*inputs)
@@ -125,7 +151,11 @@ def dense_layer_computation(layer_instance, inputs, tape):
   return base_vars, outputs, sqr_norm_fn
 
 
-def embedding_layer_computation(layer_instance, inputs, tape):
+def embedding_layer_computation(
+    layer_instance: tf.keras.layers.Embedding,
+    inputs: tuple[InputTensor],
+    tape: tf.GradientTape,
+) -> RegistryFunctionOutput:
   """Registry function for `tf.keras.layers.Embedding`.
 
   The logic of this computation is based on the `tf.keras.layers.Dense`
@@ -136,8 +166,9 @@ def embedding_layer_computation(layer_instance, inputs, tape):
 
   Args:
     layer_instance: A `tf.keras.layers.Embedding` instance.
-    inputs: A `tf.Tensor` which can be passed into the layer instance, i.e.,
-      `layer_instance(inputs)` returns a valid output.
+    inputs: A tuple containing a single `InputTensor` which can be passed into
+      the layer instance, i.e., `layer_instance(*inputs)` returns a valid
+      output.
     tape: A `tf.GradientTape` instance that will be used to watch the output
       `base_vars`.
 
@@ -153,10 +184,12 @@ def embedding_layer_computation(layer_instance, inputs, tape):
     trainable variables in `layer_instance`. These squared norms should be a 1D
     `tf.Tensor` of length `batch_size`.
   """
+  if len(inputs) != 1:
+    raise ValueError("Only layer inputs of length 1 are permitted.")
   if hasattr(layer_instance, "sparse"):  # for backwards compatibility
     if layer_instance.sparse:
       raise NotImplementedError("Sparse output tensors are not supported.")
-  if isinstance(inputs, tf.SparseTensor):
+  if isinstance(inputs[0], tf.SparseTensor):
     raise NotImplementedError("Sparse input tensors are not supported.")
 
   # Disable experimental features.
@@ -225,7 +258,7 @@ def embedding_layer_computation(layer_instance, inputs, tape):
 # ==============================================================================
 # Main factory methods
 # ==============================================================================
-def make_default_layer_registry():
+def make_default_layer_registry() -> LayerRegistry:
   registry = LayerRegistry()
   registry.insert(tf.keras.layers.Dense, dense_layer_computation)
   registry.insert(tf.keras.layers.Embedding, embedding_layer_computation)