tensorflow_privacy/privacy/dp_query/gaussian_query.py

# Copyright 2018, The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Implements DPQuery interface for Gaussian average queries.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from distutils.version import LooseVersion
import tensorflow as tf

from privacy.dp_query import dp_query
from privacy.dp_query import normalized_query

if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):
  nest = tf.contrib.framework.nest
else:
  nest = tf.nest


class GaussianSumQuery(dp_query.DPQuery):
  """Implements DPQuery interface for Gaussian sum queries.

  Accumulates clipped vectors, then adds Gaussian noise to the sum.
  """

  def __init__(self, l2_norm_clip, stddev, ledger=None):
    """Initializes the GaussianSumQuery.

    Args:
      l2_norm_clip: The clipping norm to apply to the global norm of each
        record.
      stddev: The stddev of the noise added to the sum.
      ledger: The privacy ledger to which queries should be recorded.
    """
    self._l2_norm_clip = tf.to_float(l2_norm_clip)
    self._stddev = tf.to_float(stddev)
    self._ledger = ledger

  def initial_global_state(self):
    """Returns the initial global state for the GaussianSumQuery."""
    return None

  def derive_sample_params(self, global_state):
    """Given the global state, derives parameters to use for the next sample.

    Args:
      global_state: The current global state.

    Returns:
      Parameters to use to process records in the next sample.
    """
    return self._l2_norm_clip

  def initial_sample_state(self, global_state, tensors):
    """Returns an initial state to use for the next sample.

    Args:
      global_state: The current global state.
      tensors: A structure of tensors used as a template to create the initial
        sample state.

    Returns: An initial sample state.
    """
    if self._ledger:
      dependencies = [
          self._ledger.record_sum_query(self._l2_norm_clip, self._stddev)
      ]
    else:
      dependencies = []
    with tf.control_dependencies(dependencies):
      return nest.map_structure(tf.zeros_like, tensors)

  def accumulate_record_impl(self, params, sample_state, record):
    """Accumulates a single record into the sample state.

    Args:
      params: The parameters for the sample.
      sample_state: The current sample state.
      record: The record to accumulate.

    Returns:
      A tuple containing the updated sample state and the global norm.
    """
    l2_norm_clip = params
    record_as_list = nest.flatten(record)
    clipped_as_list, norm = tf.clip_by_global_norm(record_as_list, l2_norm_clip)
    clipped = nest.pack_sequence_as(record, clipped_as_list)
    return nest.map_structure(tf.add, sample_state, clipped), norm

  def accumulate_record(self, params, sample_state, record):
    """Accumulates a single record into the sample state.

    Args:
      params: The parameters for the sample.
      sample_state: The current sample state.
      record: The record to accumulate.

    Returns:
      The updated sample state.
    """
    new_sample_state, _ = self.accumulate_record_impl(
        params, sample_state, record)
    return new_sample_state

  def get_noised_result(self, sample_state, global_state):
    """Gets noised sum after all records of sample have been accumulated.

    Args:
      sample_state: The sample state after all records have been accumulated.
      global_state: The global state.

    Returns:
      A tuple (estimate, new_global_state) where "estimate" is the estimated
      sum of the records and "new_global_state" is the updated global state.
    """
    def add_noise(v):
      return v + tf.random_normal(tf.shape(v), stddev=self._stddev)

    return nest.map_structure(add_noise, sample_state), global_state


class GaussianAverageQuery(normalized_query.NormalizedQuery):
  """Implements DPQuery interface for Gaussian average queries.

  Accumulates clipped vectors, adds Gaussian noise, and normalizes.

  Note that we use "fixed-denominator" estimation: the denominator should be
  specified as the expected number of records per sample. Accumulating the
  denominator separately would also be possible but would be produce a higher
  variance estimator.
  """

  def __init__(self,
               l2_norm_clip,
               sum_stddev,
               denominator,
               ledger=None):
    """Initializes the GaussianAverageQuery.

    Args:
      l2_norm_clip: The clipping norm to apply to the global norm of each
        record.
      sum_stddev: The stddev of the noise added to the sum (before
        normalization).
      denominator: The normalization constant (applied after noise is added to
        the sum).
      ledger: The privacy ledger to which queries should be recorded.
    """
    super(GaussianAverageQuery, self).__init__(
        numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev, ledger),
        denominator=tf.to_float(denominator))
PiperOrigin-RevId: 224061027 2018-12-04 16:50:21 -07:00			`# Copyright 2018, The TensorFlow Authors.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

General cleanup. 1. Rename PrivateQuery to DPQuery. 2. Move construction of DPQuery to outside of optimizer. 3. Remove PrivateAverageQuery and PrivateSumQuery, and rename DPQuery's 'get_query_result' method to 'get_noised_result'. Rename private_queries.py to dp_query.py. 4. Remove thrice-replicated run_query function from the test classes and replace with a single function in new test_utils.py. 5. Add functions gaussian_sum_query_from_noise_multplier and gaussian_average_query_from_noise_multplier. PiperOrigin-RevId: 230595991 2019-01-23 14:51:58 -07:00			`"""Implements DPQuery interface for Gaussian average queries.`
PiperOrigin-RevId: 224061027 2018-12-04 16:50:21 -07:00			`"""`

			`from __future__ import absolute_import`
			`from __future__ import division`
			`from __future__ import print_function`

Closes #29 PiperOrigin-RevId: 239030654 2019-03-18 12:51:21 -06:00			`from distutils.version import LooseVersion`
PiperOrigin-RevId: 224061027 2018-12-04 16:50:21 -07:00			`import tensorflow as tf`

Cleanup directory structure, add top-level imports and add normalized_query. Moved query classes from dir optimizers into new dir dp_query. Added NormalizedQuery class for queries that divide the output of another query by a constant like GaussianAverageQuery. PiperOrigin-RevId: 240167115 2019-03-25 11:20:41 -06:00			`from privacy.dp_query import dp_query`
			`from privacy.dp_query import normalized_query`
PiperOrigin-RevId: 224061027 2018-12-04 16:50:21 -07:00
Closes #29 PiperOrigin-RevId: 239030654 2019-03-18 12:51:21 -06:00			`if LooseVersion(tf.__version__) < LooseVersion('2.0.0'):`
			`nest = tf.contrib.framework.nest`
			`else:`
			`nest = tf.nest`
PiperOrigin-RevId: 224061027 2018-12-04 16:50:21 -07:00

General cleanup. 1. Rename PrivateQuery to DPQuery. 2. Move construction of DPQuery to outside of optimizer. 3. Remove PrivateAverageQuery and PrivateSumQuery, and rename DPQuery's 'get_query_result' method to 'get_noised_result'. Rename private_queries.py to dp_query.py. 4. Remove thrice-replicated run_query function from the test classes and replace with a single function in new test_utils.py. 5. Add functions gaussian_sum_query_from_noise_multplier and gaussian_average_query_from_noise_multplier. PiperOrigin-RevId: 230595991 2019-01-23 14:51:58 -07:00			`class GaussianSumQuery(dp_query.DPQuery):`
			`"""Implements DPQuery interface for Gaussian sum queries.`
Add GaussianSumQuery and express GaussianAverageQuery in terms of it. Also: 1. Add unit tests for both types of query. 2. Add function "get_query_result" to PrivateQuery. (The utility of having this function is made clear in the test class, where the function _run_query operates on either GaussianSum- or GaussianAverageQueries.) PiperOrigin-RevId: 225609398 2018-12-14 15:57:07 -07:00
			`Accumulates clipped vectors, then adds Gaussian noise to the sum.`
PiperOrigin-RevId: 224061027 2018-12-04 16:50:21 -07:00			`"""`

Add privacy ledger. The privacy ledger keeps a record of all sampling and query events for analysis post hoc by the privacy accountant. PiperOrigin-RevId: 233094012 2019-02-08 12:21:20 -07:00			`def __init__(self, l2_norm_clip, stddev, ledger=None):`
Add GaussianSumQuery and express GaussianAverageQuery in terms of it. Also: 1. Add unit tests for both types of query. 2. Add function "get_query_result" to PrivateQuery. (The utility of having this function is made clear in the test class, where the function _run_query operates on either GaussianSum- or GaussianAverageQueries.) PiperOrigin-RevId: 225609398 2018-12-14 15:57:07 -07:00			`"""Initializes the GaussianSumQuery.`

			`Args:`
			`l2_norm_clip: The clipping norm to apply to the global norm of each`
			`record.`
			`stddev: The stddev of the noise added to the sum.`
Add privacy ledger. The privacy ledger keeps a record of all sampling and query events for analysis post hoc by the privacy accountant. PiperOrigin-RevId: 233094012 2019-02-08 12:21:20 -07:00			`ledger: The privacy ledger to which queries should be recorded.`
Add GaussianSumQuery and express GaussianAverageQuery in terms of it. Also: 1. Add unit tests for both types of query. 2. Add function "get_query_result" to PrivateQuery. (The utility of having this function is made clear in the test class, where the function _run_query operates on either GaussianSum- or GaussianAverageQueries.) PiperOrigin-RevId: 225609398 2018-12-14 15:57:07 -07:00			`"""`
Simplify GaussianQuery by removing _GlobalState. The global state for DP query is intended for aspects of the query that change across samples under the query's own control. It was therefore unnecessary to wrap "l2_norm_clip" and "sum_stddev" in the namedtuple _GlobalState for the basic GaussianQuery classes. PiperOrigin-RevId: 237528962 2019-03-08 16:17:30 -07:00			`self._l2_norm_clip = tf.to_float(l2_norm_clip)`
			`self._stddev = tf.to_float(stddev)`
Add privacy ledger. The privacy ledger keeps a record of all sampling and query events for analysis post hoc by the privacy accountant. PiperOrigin-RevId: 233094012 2019-02-08 12:21:20 -07:00			`self._ledger = ledger`
PiperOrigin-RevId: 224061027 2018-12-04 16:50:21 -07:00
			`def initial_global_state(self):`
Add GaussianSumQuery and express GaussianAverageQuery in terms of it. Also: 1. Add unit tests for both types of query. 2. Add function "get_query_result" to PrivateQuery. (The utility of having this function is made clear in the test class, where the function _run_query operates on either GaussianSum- or GaussianAverageQueries.) PiperOrigin-RevId: 225609398 2018-12-14 15:57:07 -07:00			`"""Returns the initial global state for the GaussianSumQuery."""`
Simplify GaussianQuery by removing _GlobalState. The global state for DP query is intended for aspects of the query that change across samples under the query's own control. It was therefore unnecessary to wrap "l2_norm_clip" and "sum_stddev" in the namedtuple _GlobalState for the basic GaussianQuery classes. PiperOrigin-RevId: 237528962 2019-03-08 16:17:30 -07:00			`return None`
PiperOrigin-RevId: 224061027 2018-12-04 16:50:21 -07:00
			`def derive_sample_params(self, global_state):`
			`"""Given the global state, derives parameters to use for the next sample.`

			`Args:`
			`global_state: The current global state.`

			`Returns:`
			`Parameters to use to process records in the next sample.`
			`"""`
Simplify GaussianQuery by removing _GlobalState. The global state for DP query is intended for aspects of the query that change across samples under the query's own control. It was therefore unnecessary to wrap "l2_norm_clip" and "sum_stddev" in the namedtuple _GlobalState for the basic GaussianQuery classes. PiperOrigin-RevId: 237528962 2019-03-08 16:17:30 -07:00			`return self._l2_norm_clip`
PiperOrigin-RevId: 224061027 2018-12-04 16:50:21 -07:00
			`def initial_sample_state(self, global_state, tensors):`
			`"""Returns an initial state to use for the next sample.`

			`Args:`
			`global_state: The current global state.`
			`tensors: A structure of tensors used as a template to create the initial`
			`sample state.`

			`Returns: An initial sample state.`
			`"""`
Add privacy ledger. The privacy ledger keeps a record of all sampling and query events for analysis post hoc by the privacy accountant. PiperOrigin-RevId: 233094012 2019-02-08 12:21:20 -07:00			`if self._ledger:`
Simplify GaussianQuery by removing _GlobalState. The global state for DP query is intended for aspects of the query that change across samples under the query's own control. It was therefore unnecessary to wrap "l2_norm_clip" and "sum_stddev" in the namedtuple _GlobalState for the basic GaussianQuery classes. PiperOrigin-RevId: 237528962 2019-03-08 16:17:30 -07:00			`dependencies = [`
			`self._ledger.record_sum_query(self._l2_norm_clip, self._stddev)`
			`]`
Add privacy ledger. The privacy ledger keeps a record of all sampling and query events for analysis post hoc by the privacy accountant. PiperOrigin-RevId: 233094012 2019-02-08 12:21:20 -07:00			`else:`
			`dependencies = []`
			`with tf.control_dependencies(dependencies):`
			`return nest.map_structure(tf.zeros_like, tensors)`
PiperOrigin-RevId: 224061027 2018-12-04 16:50:21 -07:00
Adds AdaptiveClipAverageQuery which performs adaptive adjustment of the clipping norm to approximate a specified quantile of clipped updates per round. PiperOrigin-RevId: 238698171 2019-03-15 14:18:58 -06:00			`def accumulate_record_impl(self, params, sample_state, record):`
PiperOrigin-RevId: 224061027 2018-12-04 16:50:21 -07:00			`"""Accumulates a single record into the sample state.`

			`Args:`
			`params: The parameters for the sample.`
			`sample_state: The current sample state.`
			`record: The record to accumulate.`

			`Returns:`
Adds AdaptiveClipAverageQuery which performs adaptive adjustment of the clipping norm to approximate a specified quantile of clipped updates per round. PiperOrigin-RevId: 238698171 2019-03-15 14:18:58 -06:00			`A tuple containing the updated sample state and the global norm.`
PiperOrigin-RevId: 224061027 2018-12-04 16:50:21 -07:00			`"""`
			`l2_norm_clip = params`
Add GaussianSumQuery and express GaussianAverageQuery in terms of it. Also: 1. Add unit tests for both types of query. 2. Add function "get_query_result" to PrivateQuery. (The utility of having this function is made clear in the test class, where the function _run_query operates on either GaussianSum- or GaussianAverageQueries.) PiperOrigin-RevId: 225609398 2018-12-14 15:57:07 -07:00			`record_as_list = nest.flatten(record)`
Adds AdaptiveClipAverageQuery which performs adaptive adjustment of the clipping norm to approximate a specified quantile of clipped updates per round. PiperOrigin-RevId: 238698171 2019-03-15 14:18:58 -06:00			`clipped_as_list, norm = tf.clip_by_global_norm(record_as_list, l2_norm_clip)`
Add GaussianSumQuery and express GaussianAverageQuery in terms of it. Also: 1. Add unit tests for both types of query. 2. Add function "get_query_result" to PrivateQuery. (The utility of having this function is made clear in the test class, where the function _run_query operates on either GaussianSum- or GaussianAverageQueries.) PiperOrigin-RevId: 225609398 2018-12-14 15:57:07 -07:00			`clipped = nest.pack_sequence_as(record, clipped_as_list)`
Adds AdaptiveClipAverageQuery which performs adaptive adjustment of the clipping norm to approximate a specified quantile of clipped updates per round. PiperOrigin-RevId: 238698171 2019-03-15 14:18:58 -06:00			`return nest.map_structure(tf.add, sample_state, clipped), norm`

			`def accumulate_record(self, params, sample_state, record):`
			`"""Accumulates a single record into the sample state.`

			`Args:`
			`params: The parameters for the sample.`
			`sample_state: The current sample state.`
			`record: The record to accumulate.`

			`Returns:`
			`The updated sample state.`
			`"""`
			`new_sample_state, _ = self.accumulate_record_impl(`
			`params, sample_state, record)`
			`return new_sample_state`
Add GaussianSumQuery and express GaussianAverageQuery in terms of it. Also: 1. Add unit tests for both types of query. 2. Add function "get_query_result" to PrivateQuery. (The utility of having this function is made clear in the test class, where the function _run_query operates on either GaussianSum- or GaussianAverageQueries.) PiperOrigin-RevId: 225609398 2018-12-14 15:57:07 -07:00
General cleanup. 1. Rename PrivateQuery to DPQuery. 2. Move construction of DPQuery to outside of optimizer. 3. Remove PrivateAverageQuery and PrivateSumQuery, and rename DPQuery's 'get_query_result' method to 'get_noised_result'. Rename private_queries.py to dp_query.py. 4. Remove thrice-replicated run_query function from the test classes and replace with a single function in new test_utils.py. 5. Add functions gaussian_sum_query_from_noise_multplier and gaussian_average_query_from_noise_multplier. PiperOrigin-RevId: 230595991 2019-01-23 14:51:58 -07:00			`def get_noised_result(self, sample_state, global_state):`
Add GaussianSumQuery and express GaussianAverageQuery in terms of it. Also: 1. Add unit tests for both types of query. 2. Add function "get_query_result" to PrivateQuery. (The utility of having this function is made clear in the test class, where the function _run_query operates on either GaussianSum- or GaussianAverageQueries.) PiperOrigin-RevId: 225609398 2018-12-14 15:57:07 -07:00			`"""Gets noised sum after all records of sample have been accumulated.`

			`Args:`
			`sample_state: The sample state after all records have been accumulated.`
			`global_state: The global state.`

			`Returns:`
			`A tuple (estimate, new_global_state) where "estimate" is the estimated`
			`sum of the records and "new_global_state" is the updated global state.`
			`"""`
			`def add_noise(v):`
Simplify GaussianQuery by removing _GlobalState. The global state for DP query is intended for aspects of the query that change across samples under the query's own control. It was therefore unnecessary to wrap "l2_norm_clip" and "sum_stddev" in the namedtuple _GlobalState for the basic GaussianQuery classes. PiperOrigin-RevId: 237528962 2019-03-08 16:17:30 -07:00			`return v + tf.random_normal(tf.shape(v), stddev=self._stddev)`
Add GaussianSumQuery and express GaussianAverageQuery in terms of it. Also: 1. Add unit tests for both types of query. 2. Add function "get_query_result" to PrivateQuery. (The utility of having this function is made clear in the test class, where the function _run_query operates on either GaussianSum- or GaussianAverageQueries.) PiperOrigin-RevId: 225609398 2018-12-14 15:57:07 -07:00
			`return nest.map_structure(add_noise, sample_state), global_state`


Cleanup directory structure, add top-level imports and add normalized_query. Moved query classes from dir optimizers into new dir dp_query. Added NormalizedQuery class for queries that divide the output of another query by a constant like GaussianAverageQuery. PiperOrigin-RevId: 240167115 2019-03-25 11:20:41 -06:00			`class GaussianAverageQuery(normalized_query.NormalizedQuery):`
General cleanup. 1. Rename PrivateQuery to DPQuery. 2. Move construction of DPQuery to outside of optimizer. 3. Remove PrivateAverageQuery and PrivateSumQuery, and rename DPQuery's 'get_query_result' method to 'get_noised_result'. Rename private_queries.py to dp_query.py. 4. Remove thrice-replicated run_query function from the test classes and replace with a single function in new test_utils.py. 5. Add functions gaussian_sum_query_from_noise_multplier and gaussian_average_query_from_noise_multplier. PiperOrigin-RevId: 230595991 2019-01-23 14:51:58 -07:00			`"""Implements DPQuery interface for Gaussian average queries.`
Add GaussianSumQuery and express GaussianAverageQuery in terms of it. Also: 1. Add unit tests for both types of query. 2. Add function "get_query_result" to PrivateQuery. (The utility of having this function is made clear in the test class, where the function _run_query operates on either GaussianSum- or GaussianAverageQueries.) PiperOrigin-RevId: 225609398 2018-12-14 15:57:07 -07:00
			`Accumulates clipped vectors, adds Gaussian noise, and normalizes.`
General cleanup. 1. Rename PrivateQuery to DPQuery. 2. Move construction of DPQuery to outside of optimizer. 3. Remove PrivateAverageQuery and PrivateSumQuery, and rename DPQuery's 'get_query_result' method to 'get_noised_result'. Rename private_queries.py to dp_query.py. 4. Remove thrice-replicated run_query function from the test classes and replace with a single function in new test_utils.py. 5. Add functions gaussian_sum_query_from_noise_multplier and gaussian_average_query_from_noise_multplier. PiperOrigin-RevId: 230595991 2019-01-23 14:51:58 -07:00
			`Note that we use "fixed-denominator" estimation: the denominator should be`
			`specified as the expected number of records per sample. Accumulating the`
			`denominator separately would also be possible but would be produce a higher`
			`variance estimator.`
Add GaussianSumQuery and express GaussianAverageQuery in terms of it. Also: 1. Add unit tests for both types of query. 2. Add function "get_query_result" to PrivateQuery. (The utility of having this function is made clear in the test class, where the function _run_query operates on either GaussianSum- or GaussianAverageQueries.) PiperOrigin-RevId: 225609398 2018-12-14 15:57:07 -07:00			`"""`

Add privacy ledger. The privacy ledger keeps a record of all sampling and query events for analysis post hoc by the privacy accountant. PiperOrigin-RevId: 233094012 2019-02-08 12:21:20 -07:00			`def __init__(self,`
			`l2_norm_clip,`
			`sum_stddev,`
			`denominator,`
			`ledger=None):`
Add GaussianSumQuery and express GaussianAverageQuery in terms of it. Also: 1. Add unit tests for both types of query. 2. Add function "get_query_result" to PrivateQuery. (The utility of having this function is made clear in the test class, where the function _run_query operates on either GaussianSum- or GaussianAverageQueries.) PiperOrigin-RevId: 225609398 2018-12-14 15:57:07 -07:00			`"""Initializes the GaussianAverageQuery.`

			`Args:`
			`l2_norm_clip: The clipping norm to apply to the global norm of each`
			`record.`
			`sum_stddev: The stddev of the noise added to the sum (before`
			`normalization).`
			`denominator: The normalization constant (applied after noise is added to`
			`the sum).`
Add privacy ledger. The privacy ledger keeps a record of all sampling and query events for analysis post hoc by the privacy accountant. PiperOrigin-RevId: 233094012 2019-02-08 12:21:20 -07:00			`ledger: The privacy ledger to which queries should be recorded.`
Add GaussianSumQuery and express GaussianAverageQuery in terms of it. Also: 1. Add unit tests for both types of query. 2. Add function "get_query_result" to PrivateQuery. (The utility of having this function is made clear in the test class, where the function _run_query operates on either GaussianSum- or GaussianAverageQueries.) PiperOrigin-RevId: 225609398 2018-12-14 15:57:07 -07:00			`"""`
Cleanup directory structure, add top-level imports and add normalized_query. Moved query classes from dir optimizers into new dir dp_query. Added NormalizedQuery class for queries that divide the output of another query by a constant like GaussianAverageQuery. PiperOrigin-RevId: 240167115 2019-03-25 11:20:41 -06:00			`super(GaussianAverageQuery, self).__init__(`
			`numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev, ledger),`
			`denominator=tf.to_float(denominator))`