tensorflow_privacy/privacy/optimizers/dp_optimizer.py

# Copyright 2018, The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Differentially private optimizers for TensorFlow."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from tensorflow_privacy.privacy.optimizers import gaussian_query


def make_optimizer_class(cls):
  """Constructs a DP optimizer class from an existing one."""
  if (tf.train.Optimizer.compute_gradients.__code__ is
      not cls.compute_gradients.__code__):
    tf.logging.warning(
        'WARNING: Calling make_optimizer_class() on class %s that overrides '
        'method compute_gradients(). Check to ensure that '
        'make_optimizer_class() does not interfere with overridden version.',
        cls.__name__)

  class DPOptimizerClass(cls):
    """Differentially private subclass of given class cls."""

    def __init__(
        self,
        dp_average_query,
        num_microbatches,
        unroll_microbatches=False,
        *args,  # pylint: disable=keyword-arg-before-vararg
        **kwargs):
      super(DPOptimizerClass, self).__init__(*args, **kwargs)
      self._dp_average_query = dp_average_query
      self._num_microbatches = num_microbatches
      self._global_state = self._dp_average_query.initial_global_state()
      # TODO(b/122613513): Set unroll_microbatches=True to avoid this bug.
      # Beware: When num_microbatches is large (>100), enabling this parameter
      # may cause an OOM error.
      self._unroll_microbatches = unroll_microbatches

    def compute_gradients(self,
                          loss,
                          var_list,
                          gate_gradients=tf.train.Optimizer.GATE_OP,
                          aggregation_method=None,
                          colocate_gradients_with_ops=False,
                          grad_loss=None):

      # Note: it would be closer to the correct i.i.d. sampling of records if
      # we sampled each microbatch from the appropriate binomial distribution,
      # although that still wouldn't be quite correct because it would be
      # sampling from the dataset without replacement.
      microbatches_losses = tf.reshape(loss, [self._num_microbatches, -1])
      sample_params = (
          self._dp_average_query.derive_sample_params(self._global_state))

      def process_microbatch(i, sample_state):
        """Process one microbatch (record) with privacy helper."""
        grads, _ = zip(*super(cls, self).compute_gradients(
            tf.gather(microbatches_losses, [i]), var_list, gate_gradients,
            aggregation_method, colocate_gradients_with_ops, grad_loss))
        grads_list = list(grads)
        sample_state = self._dp_average_query.accumulate_record(
            sample_params, sample_state, grads_list)
        return sample_state

      if var_list is None:
        var_list = (
            tf.trainable_variables() + tf.get_collection(
                tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))
      sample_state = self._dp_average_query.initial_sample_state(
          self._global_state, var_list)

      if self._unroll_microbatches:
        for idx in range(self._num_microbatches):
          sample_state = process_microbatch(idx, sample_state)
      else:
        # Use of while_loop here requires that sample_state be a nested
        # structure of tensors. In general, we would prefer to allow it to be
        # an arbitrary opaque type.
        cond_fn = lambda i, _: tf.less(i, self._num_microbatches)
        body_fn = lambda i, state: [tf.add(i, 1), process_microbatch(i, state)]
        idx = tf.constant(0)
        _, sample_state = tf.while_loop(cond_fn, body_fn, [idx, sample_state])

      final_grads, self._global_state = (
          self._dp_average_query.get_noised_result(
              sample_state, self._global_state))

      return list(zip(final_grads, var_list))

  return DPOptimizerClass


def make_gaussian_optimizer_class(cls):
  """Constructs a DP optimizer with Gaussian averaging of updates."""

  class DPGaussianOptimizerClass(make_optimizer_class(cls)):
    """DP subclass of given class cls using Gaussian averaging."""

    def __init__(
        self,
        l2_norm_clip,
        noise_multiplier,
        num_microbatches,
        unroll_microbatches=False,
        *args,  # pylint: disable=keyword-arg-before-vararg
        **kwargs):
      dp_average_query = gaussian_query.GaussianAverageQuery(
          l2_norm_clip, l2_norm_clip * noise_multiplier, num_microbatches)
      super(DPGaussianOptimizerClass, self).__init__(
          dp_average_query,
          num_microbatches,
          unroll_microbatches,
          *args,
          **kwargs)

  return DPGaussianOptimizerClass


DPAdagradOptimizer = make_optimizer_class(tf.train.AdagradOptimizer)
DPAdamOptimizer = make_optimizer_class(tf.train.AdamOptimizer)
DPGradientDescentOptimizer = make_optimizer_class(
    tf.train.GradientDescentOptimizer)

DPAdagradGaussianOptimizer = make_gaussian_optimizer_class(
    tf.train.AdagradOptimizer)
DPAdamGaussianOptimizer = make_gaussian_optimizer_class(tf.train.AdamOptimizer)
DPGradientDescentGaussianOptimizer = make_gaussian_optimizer_class(
    tf.train.GradientDescentOptimizer)
Project import generated by Copybara. PiperOrigin-RevId: 226056146 2018-12-18 15:06:54 -07:00			`# Copyright 2018, The TensorFlow Authors.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`"""Differentially private optimizers for TensorFlow."""`

			`from __future__ import absolute_import`
			`from __future__ import division`
			`from __future__ import print_function`

			`import tensorflow as tf`

internal change PiperOrigin-RevId: 233093203 2019-02-08 12:16:44 -07:00			`from tensorflow_privacy.privacy.optimizers import gaussian_query`
Project import generated by Copybara. PiperOrigin-RevId: 226056146 2018-12-18 15:06:54 -07:00

			`def make_optimizer_class(cls):`
			`"""Constructs a DP optimizer class from an existing one."""`
			`if (tf.train.Optimizer.compute_gradients.__code__ is`
			`not cls.compute_gradients.__code__):`
			`tf.logging.warning(`
			`'WARNING: Calling make_optimizer_class() on class %s that overrides '`
			`'method compute_gradients(). Check to ensure that '`
			`'make_optimizer_class() does not interfere with overridden version.',`
			`cls.__name__)`

			`class DPOptimizerClass(cls):`
			`"""Differentially private subclass of given class cls."""`

Created the optional unroll_microbatches parameter for the DpOptimizerClass as a workaround for b/122613513. PiperOrigin-RevId: 229955297 2019-01-18 11:47:04 -07:00			`def __init__(`
			`self,`
General cleanup. 1. Rename PrivateQuery to DPQuery. 2. Move construction of DPQuery to outside of optimizer. 3. Remove PrivateAverageQuery and PrivateSumQuery, and rename DPQuery's 'get_query_result' method to 'get_noised_result'. Rename private_queries.py to dp_query.py. 4. Remove thrice-replicated run_query function from the test classes and replace with a single function in new test_utils.py. 5. Add functions gaussian_sum_query_from_noise_multplier and gaussian_average_query_from_noise_multplier. PiperOrigin-RevId: 230595991 2019-01-23 14:51:58 -07:00			`dp_average_query,`
Created the optional unroll_microbatches parameter for the DpOptimizerClass as a workaround for b/122613513. PiperOrigin-RevId: 229955297 2019-01-18 11:47:04 -07:00			`num_microbatches,`
			`unroll_microbatches=False,`
			`*args, # pylint: disable=keyword-arg-before-vararg`
			`**kwargs):`
Project import generated by Copybara. PiperOrigin-RevId: 226056146 2018-12-18 15:06:54 -07:00			`super(DPOptimizerClass, self).__init__(args, *kwargs)`
General cleanup. 1. Rename PrivateQuery to DPQuery. 2. Move construction of DPQuery to outside of optimizer. 3. Remove PrivateAverageQuery and PrivateSumQuery, and rename DPQuery's 'get_query_result' method to 'get_noised_result'. Rename private_queries.py to dp_query.py. 4. Remove thrice-replicated run_query function from the test classes and replace with a single function in new test_utils.py. 5. Add functions gaussian_sum_query_from_noise_multplier and gaussian_average_query_from_noise_multplier. PiperOrigin-RevId: 230595991 2019-01-23 14:51:58 -07:00			`self._dp_average_query = dp_average_query`
Project import generated by Copybara. PiperOrigin-RevId: 226056146 2018-12-18 15:06:54 -07:00			`self._num_microbatches = num_microbatches`
General cleanup. 1. Rename PrivateQuery to DPQuery. 2. Move construction of DPQuery to outside of optimizer. 3. Remove PrivateAverageQuery and PrivateSumQuery, and rename DPQuery's 'get_query_result' method to 'get_noised_result'. Rename private_queries.py to dp_query.py. 4. Remove thrice-replicated run_query function from the test classes and replace with a single function in new test_utils.py. 5. Add functions gaussian_sum_query_from_noise_multplier and gaussian_average_query_from_noise_multplier. PiperOrigin-RevId: 230595991 2019-01-23 14:51:58 -07:00			`self._global_state = self._dp_average_query.initial_global_state()`
Created the optional unroll_microbatches parameter for the DpOptimizerClass as a workaround for b/122613513. PiperOrigin-RevId: 229955297 2019-01-18 11:47:04 -07:00			`# TODO(b/122613513): Set unroll_microbatches=True to avoid this bug.`
			`# Beware: When num_microbatches is large (>100), enabling this parameter`
			`# may cause an OOM error.`
			`self._unroll_microbatches = unroll_microbatches`
Project import generated by Copybara. PiperOrigin-RevId: 226056146 2018-12-18 15:06:54 -07:00
			`def compute_gradients(self,`
			`loss,`
			`var_list,`
			`gate_gradients=tf.train.Optimizer.GATE_OP,`
			`aggregation_method=None,`
			`colocate_gradients_with_ops=False,`
			`grad_loss=None):`

			`# Note: it would be closer to the correct i.i.d. sampling of records if`
			`# we sampled each microbatch from the appropriate binomial distribution,`
			`# although that still wouldn't be quite correct because it would be`
			`# sampling from the dataset without replacement.`
			`microbatches_losses = tf.reshape(loss, [self._num_microbatches, -1])`
			`sample_params = (`
General cleanup. 1. Rename PrivateQuery to DPQuery. 2. Move construction of DPQuery to outside of optimizer. 3. Remove PrivateAverageQuery and PrivateSumQuery, and rename DPQuery's 'get_query_result' method to 'get_noised_result'. Rename private_queries.py to dp_query.py. 4. Remove thrice-replicated run_query function from the test classes and replace with a single function in new test_utils.py. 5. Add functions gaussian_sum_query_from_noise_multplier and gaussian_average_query_from_noise_multplier. PiperOrigin-RevId: 230595991 2019-01-23 14:51:58 -07:00			`self._dp_average_query.derive_sample_params(self._global_state))`
Project import generated by Copybara. PiperOrigin-RevId: 226056146 2018-12-18 15:06:54 -07:00
			`def process_microbatch(i, sample_state):`
			`"""Process one microbatch (record) with privacy helper."""`
			`grads, _ = zip(*super(cls, self).compute_gradients(`
			`tf.gather(microbatches_losses, [i]), var_list, gate_gradients,`
			`aggregation_method, colocate_gradients_with_ops, grad_loss))`
			`grads_list = list(grads)`
General cleanup. 1. Rename PrivateQuery to DPQuery. 2. Move construction of DPQuery to outside of optimizer. 3. Remove PrivateAverageQuery and PrivateSumQuery, and rename DPQuery's 'get_query_result' method to 'get_noised_result'. Rename private_queries.py to dp_query.py. 4. Remove thrice-replicated run_query function from the test classes and replace with a single function in new test_utils.py. 5. Add functions gaussian_sum_query_from_noise_multplier and gaussian_average_query_from_noise_multplier. PiperOrigin-RevId: 230595991 2019-01-23 14:51:58 -07:00			`sample_state = self._dp_average_query.accumulate_record(`
Project import generated by Copybara. PiperOrigin-RevId: 226056146 2018-12-18 15:06:54 -07:00			`sample_params, sample_state, grads_list)`
Created the optional unroll_microbatches parameter for the DpOptimizerClass as a workaround for b/122613513. PiperOrigin-RevId: 229955297 2019-01-18 11:47:04 -07:00			`return sample_state`
Project import generated by Copybara. PiperOrigin-RevId: 226056146 2018-12-18 15:06:54 -07:00
			`if var_list is None:`
			`var_list = (`
			`tf.trainable_variables() + tf.get_collection(`
			`tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES))`
General cleanup. 1. Rename PrivateQuery to DPQuery. 2. Move construction of DPQuery to outside of optimizer. 3. Remove PrivateAverageQuery and PrivateSumQuery, and rename DPQuery's 'get_query_result' method to 'get_noised_result'. Rename private_queries.py to dp_query.py. 4. Remove thrice-replicated run_query function from the test classes and replace with a single function in new test_utils.py. 5. Add functions gaussian_sum_query_from_noise_multplier and gaussian_average_query_from_noise_multplier. PiperOrigin-RevId: 230595991 2019-01-23 14:51:58 -07:00			`sample_state = self._dp_average_query.initial_sample_state(`
Project import generated by Copybara. PiperOrigin-RevId: 226345615 2018-12-20 10:13:33 -07:00			`self._global_state, var_list)`
Project import generated by Copybara. PiperOrigin-RevId: 226056146 2018-12-18 15:06:54 -07:00
Created the optional unroll_microbatches parameter for the DpOptimizerClass as a workaround for b/122613513. PiperOrigin-RevId: 229955297 2019-01-18 11:47:04 -07:00			`if self._unroll_microbatches:`
			`for idx in range(self._num_microbatches):`
			`sample_state = process_microbatch(idx, sample_state)`
			`else:`
			`# Use of while_loop here requires that sample_state be a nested`
			`# structure of tensors. In general, we would prefer to allow it to be`
			`# an arbitrary opaque type.`
			`cond_fn = lambda i, _: tf.less(i, self._num_microbatches)`
			`body_fn = lambda i, state: [tf.add(i, 1), process_microbatch(i, state)]`
			`idx = tf.constant(0)`
			`_, sample_state = tf.while_loop(cond_fn, body_fn, [idx, sample_state])`

Project import generated by Copybara. PiperOrigin-RevId: 226345615 2018-12-20 10:13:33 -07:00			`final_grads, self._global_state = (`
General cleanup. 1. Rename PrivateQuery to DPQuery. 2. Move construction of DPQuery to outside of optimizer. 3. Remove PrivateAverageQuery and PrivateSumQuery, and rename DPQuery's 'get_query_result' method to 'get_noised_result'. Rename private_queries.py to dp_query.py. 4. Remove thrice-replicated run_query function from the test classes and replace with a single function in new test_utils.py. 5. Add functions gaussian_sum_query_from_noise_multplier and gaussian_average_query_from_noise_multplier. PiperOrigin-RevId: 230595991 2019-01-23 14:51:58 -07:00			`self._dp_average_query.get_noised_result(`
			`sample_state, self._global_state))`
Project import generated by Copybara. PiperOrigin-RevId: 226056146 2018-12-18 15:06:54 -07:00
Closes #4 PiperOrigin-RevId: 229212766 2019-01-14 11:54:49 -07:00			`return list(zip(final_grads, var_list))`
Project import generated by Copybara. PiperOrigin-RevId: 226056146 2018-12-18 15:06:54 -07:00
			`return DPOptimizerClass`


General cleanup. 1. Rename PrivateQuery to DPQuery. 2. Move construction of DPQuery to outside of optimizer. 3. Remove PrivateAverageQuery and PrivateSumQuery, and rename DPQuery's 'get_query_result' method to 'get_noised_result'. Rename private_queries.py to dp_query.py. 4. Remove thrice-replicated run_query function from the test classes and replace with a single function in new test_utils.py. 5. Add functions gaussian_sum_query_from_noise_multplier and gaussian_average_query_from_noise_multplier. PiperOrigin-RevId: 230595991 2019-01-23 14:51:58 -07:00			`def make_gaussian_optimizer_class(cls):`
			`"""Constructs a DP optimizer with Gaussian averaging of updates."""`

			`class DPGaussianOptimizerClass(make_optimizer_class(cls)):`
			`"""DP subclass of given class cls using Gaussian averaging."""`

			`def __init__(`
			`self,`
			`l2_norm_clip,`
			`noise_multiplier,`
			`num_microbatches,`
			`unroll_microbatches=False,`
			`*args, # pylint: disable=keyword-arg-before-vararg`
			`**kwargs):`
			`dp_average_query = gaussian_query.GaussianAverageQuery(`
			`l2_norm_clip, l2_norm_clip * noise_multiplier, num_microbatches)`
			`super(DPGaussianOptimizerClass, self).__init__(`
			`dp_average_query,`
			`num_microbatches,`
			`unroll_microbatches,`
			`*args,`
			`**kwargs)`

			`return DPGaussianOptimizerClass`


Project import generated by Copybara. PiperOrigin-RevId: 226056146 2018-12-18 15:06:54 -07:00			`DPAdagradOptimizer = make_optimizer_class(tf.train.AdagradOptimizer)`
			`DPAdamOptimizer = make_optimizer_class(tf.train.AdamOptimizer)`
			`DPGradientDescentOptimizer = make_optimizer_class(`
			`tf.train.GradientDescentOptimizer)`
General cleanup. 1. Rename PrivateQuery to DPQuery. 2. Move construction of DPQuery to outside of optimizer. 3. Remove PrivateAverageQuery and PrivateSumQuery, and rename DPQuery's 'get_query_result' method to 'get_noised_result'. Rename private_queries.py to dp_query.py. 4. Remove thrice-replicated run_query function from the test classes and replace with a single function in new test_utils.py. 5. Add functions gaussian_sum_query_from_noise_multplier and gaussian_average_query_from_noise_multplier. PiperOrigin-RevId: 230595991 2019-01-23 14:51:58 -07:00
			`DPAdagradGaussianOptimizer = make_gaussian_optimizer_class(`
			`tf.train.AdagradOptimizer)`
			`DPAdamGaussianOptimizer = make_gaussian_optimizer_class(tf.train.AdamOptimizer)`
			`DPGradientDescentGaussianOptimizer = make_gaussian_optimizer_class(`
			`tf.train.GradientDescentOptimizer)`