Allow exact denominator for below estimate fraction used by quantile estimator.

Also: 1) Check that records for quantile estimator query are scalars. 2) Add tests of quantile estimator with noise. 3) Add privacy ledger to no-privacy queries. PiperOrigin-RevId: 320633937
2020-07-10 10:55:48 -07:00 · 2020-07-10 10:55:48 -07:00 · 2f51adac89
commit 2f51adac89
parent d1e2cc1930
5 changed files with 177 additions and 43 deletions
--- a/tensorflow_privacy/privacy/dp_query/BUILD
+++ b/tensorflow_privacy/privacy/dp_query/BUILD
@ -146,6 +146,7 @@ py_library(
    deps = [
        ":dp_query",
        ":gaussian_query",
        ":no_privacy_query",
        "//third_party/py/tensorflow",
    ],
 )
--- a/tensorflow_privacy/privacy/dp_query/no_privacy_query.py
+++ b/tensorflow_privacy/privacy/dp_query/no_privacy_query.py
@ -1,4 +1,4 @@
-# Copyright 2018, The TensorFlow Authors.
+# Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import warnings
 import tensorflow.compat.v1 as tf
 from tensorflow_privacy.privacy.dp_query import dp_query
@ -28,8 +30,26 @@ class NoPrivacySumQuery(dp_query.SumAggregationDPQuery):
  Accumulates vectors without clipping or adding noise.
  """
  def __init__(self):
    self._ledger = None
  def set_ledger(self, ledger):
    warnings.warn(
        'Attempt to use NoPrivacySumQuery with privacy ledger. Privacy '
        'guarantees will be vacuous.')
    self._ledger = ledger
  def get_noised_result(self, sample_state, global_state):
    """See base class."""
    if self._ledger:
      dependencies = [
          self._ledger.record_sum_query(float('inf'), 0.0)
      ]
    else:
      dependencies = []
    with tf.control_dependencies(dependencies):
      return sample_state, global_state
@ -39,6 +59,15 @@ class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery):
  Accumulates vectors and normalizes by the total number of accumulated vectors.
  """
  def __init__(self):
    self._ledger = None
  def set_ledger(self, ledger):
    warnings.warn(
        'Attempt to use NoPrivacyAverageQuery with privacy ledger. Privacy '
        'guarantees will be vacuous.')
    self._ledger = ledger
  def initial_sample_state(self, template):
    """See base class."""
    return (super(NoPrivacyAverageQuery, self).initial_sample_state(template),
@ -59,5 +88,13 @@ class NoPrivacyAverageQuery(dp_query.SumAggregationDPQuery):
    """See base class."""
    sum_state, denominator = sample_state
    if self._ledger:
      dependencies = [
          self._ledger.record_sum_query(float('inf'), 0.0)
      ]
    else:
      dependencies = []
    with tf.control_dependencies(dependencies):
      return (tf.nest.map_structure(lambda t: t / denominator,
                                    sum_state), global_state)
--- a/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query.py
+++ b/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query.py
@ -1,4 +1,4 @@
-# Copyright 2019, The TensorFlow Authors.
+# Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -126,7 +126,7 @@ class QuantileAdaptiveClipSumQuery(dp_query.SumAggregationDPQuery):
    """See base class."""
    return self._SampleState(
        self._sum_query.initial_sample_state(template),
-        self._quantile_estimator_query.initial_sample_state(tf.constant(0.0)))
+        self._quantile_estimator_query.initial_sample_state())
  def preprocess_record(self, params, record):
    clipped_record, global_norm = (
--- a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py
+++ b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py
@ -1,4 +1,4 @@
-# Copyright 2019, The TensorFlow Authors.
+# Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -30,10 +30,11 @@ import tensorflow.compat.v1 as tf
 from tensorflow_privacy.privacy.dp_query import dp_query
 from tensorflow_privacy.privacy.dp_query import gaussian_query
 from tensorflow_privacy.privacy.dp_query import no_privacy_query
 class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery):
-  """Defines iterative process to estimate a target quantile of a distribution.
+  """Iterative process to estimate target quantile of a univariate distribution.
  """
  # pylint: disable=invalid-name
@ -82,6 +83,15 @@ class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery):
    self._target_quantile = target_quantile
    self._learning_rate = learning_rate
    self._below_estimate_query = self._construct_below_estimate_query(
        below_estimate_stddev, expected_num_records)
    assert isinstance(self._below_estimate_query,
                      dp_query.SumAggregationDPQuery)
    self._geometric_update = geometric_update
  def _construct_below_estimate_query(
      self, below_estimate_stddev, expected_num_records):
    # A DPQuery used to estimate the fraction of records that are less than the
    # current quantile estimate. It accumulates an indicator 0/1 of whether each
    # record is below the estimate, and normalizes by the expected number of
@ -91,16 +101,11 @@ class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery):
    # affect the count is 0.5. Note that although the l2_norm_clip of the
    # below_estimate query is 0.5, no clipping will ever actually occur
    # because the value of each record is always +/-0.5.
-    self._below_estimate_query = gaussian_query.GaussianAverageQuery(
+    return gaussian_query.GaussianAverageQuery(
        l2_norm_clip=0.5,
        sum_stddev=below_estimate_stddev,
        denominator=expected_num_records)
    self._geometric_update = geometric_update
    assert isinstance(self._below_estimate_query,
                      dp_query.SumAggregationDPQuery)
  def set_ledger(self, ledger):
    """See base class."""
    self._below_estimate_query.set_ledger(ledger)
@ -120,7 +125,15 @@ class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery):
    return self._SampleParams(global_state.current_estimate,
                              below_estimate_params)
  def initial_sample_state(self, template=None):
    # Template is ignored because records are required to be scalars.
    del template
    return self._below_estimate_query.initial_sample_state(0.0)
  def preprocess_record(self, params, record):
    tf.debugging.assert_scalar(record)
    # We accumulate counts shifted by 0.5 so they are centered at zero.
    # This makes the sensitivity of the count query 0.5 instead of 1.0.
    below = tf.cast(record <= params.current_estimate, tf.float32) - 0.5
@ -156,3 +169,42 @@ class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery):
        below_estimate_state=new_below_estimate_state)
    return new_estimate, new_global_state
 class NoPrivacyQuantileEstimatorQuery(QuantileEstimatorQuery):
  """Iterative process to estimate target quantile of a univariate distribution.
  Unlike the base class, this uses a NoPrivacyQuery to estimate the fraction
  below estimate with an exact denominator.
  """
  def __init__(
      self,
      initial_estimate,
      target_quantile,
      learning_rate,
      geometric_update=False):
    """Initializes the NoPrivacyQuantileEstimatorQuery.
    Args:
      initial_estimate: The initial estimate of the quantile.
      target_quantile: The target quantile. I.e., a value of 0.8 means a value
        should be found for which approximately 80% of updates are
        less than the estimate each round.
      learning_rate: The learning rate. A rate of r means that the estimate
        will change by a maximum of r at each step (for arithmetic updating) or
        by a maximum factor of exp(r) (for geometric updating).
      geometric_update: If True, use geometric updating of estimate. Geometric
        updating is preferred for non-negative records like vector norms that
        could potentially be very large or very close to zero.
    """
    super(NoPrivacyQuantileEstimatorQuery, self).__init__(
        initial_estimate, target_quantile, learning_rate,
        below_estimate_stddev=None, expected_num_records=None,
        geometric_update=geometric_update)
  def _construct_below_estimate_query(
      self, below_estimate_stddev, expected_num_records):
    del below_estimate_stddev
    del expected_num_records
    return no_privacy_query.NoPrivacyAverageQuery()
--- a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query_test.py
+++ b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query_test.py
@ -1,4 +1,4 @@
-# Copyright 2019, The TensorFlow Authors.
+# Copyright 2020, The TensorFlow Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -29,18 +29,42 @@ from tensorflow_privacy.privacy.dp_query import test_utils
 tf.enable_eager_execution()
 def _make_quantile_estimator_query(
    initial_estimate,
    target_quantile,
    learning_rate,
    below_estimate_stddev,
    expected_num_records,
    geometric_update):
  if expected_num_records is not None:
    return quantile_estimator_query.QuantileEstimatorQuery(
        initial_estimate,
        target_quantile,
        learning_rate,
        below_estimate_stddev,
        expected_num_records,
        geometric_update)
  else:
    return quantile_estimator_query.NoPrivacyQuantileEstimatorQuery(
        initial_estimate,
        target_quantile,
        learning_rate,
        geometric_update)
 class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
-  def test_target_zero(self):
+  @parameterized.named_parameters(('exact', True), ('fixed', False))
  def test_target_zero(self, exact):
    record1 = tf.constant(8.5)
    record2 = tf.constant(7.25)
-    query = quantile_estimator_query.QuantileEstimatorQuery(
+    query = _make_quantile_estimator_query(
        initial_estimate=10.0,
        target_quantile=0.0,
        learning_rate=1.0,
        below_estimate_stddev=0.0,
-        expected_num_records=2.0,
+        expected_num_records=(None if exact else 2.0),
        geometric_update=False)
    global_state = query.initial_global_state()
@ -60,16 +84,17 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
      self.assertAllClose(actual_estimate.numpy(), expected_estimate)
-  def test_target_zero_geometric(self):
+  @parameterized.named_parameters(('exact', True), ('fixed', False))
  def test_target_zero_geometric(self, exact):
    record1 = tf.constant(5.0)
    record2 = tf.constant(2.5)
-    query = quantile_estimator_query.QuantileEstimatorQuery(
+    query = _make_quantile_estimator_query(
        initial_estimate=16.0,
        target_quantile=0.0,
        learning_rate=np.log(2.0),        # Geometric steps in powers of 2.
        below_estimate_stddev=0.0,
-        expected_num_records=2.0,
+        expected_num_records=(None if exact else 2.0),
        geometric_update=True)
    global_state = query.initial_global_state()
@ -91,16 +116,17 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
      self.assertAllClose(actual_estimate.numpy(), expected_estimate)
-  def test_target_one(self):
+  @parameterized.named_parameters(('exact', True), ('fixed', False))
  def test_target_one(self, exact):
    record1 = tf.constant(1.5)
    record2 = tf.constant(2.75)
-    query = quantile_estimator_query.QuantileEstimatorQuery(
+    query = _make_quantile_estimator_query(
        initial_estimate=0.0,
        target_quantile=1.0,
        learning_rate=1.0,
        below_estimate_stddev=0.0,
-        expected_num_records=2.0,
+        expected_num_records=(None if exact else 2.0),
        geometric_update=False)
    global_state = query.initial_global_state()
@ -120,16 +146,17 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
      self.assertAllClose(actual_estimate.numpy(), expected_estimate)
-  def test_target_one_geometric(self):
+  @parameterized.named_parameters(('exact', True), ('fixed', False))
  def test_target_one_geometric(self, exact):
    record1 = tf.constant(1.5)
    record2 = tf.constant(3.0)
-    query = quantile_estimator_query.QuantileEstimatorQuery(
+    query = _make_quantile_estimator_query(
        initial_estimate=0.5,
        target_quantile=1.0,
        learning_rate=np.log(2.0),        # Geometric steps in powers of 2.
        below_estimate_stddev=0.0,
-        expected_num_records=2.0,
+        expected_num_records=(None if exact else 2.0),
        geometric_update=True)
    global_state = query.initial_global_state()
@ -152,23 +179,27 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
      self.assertAllClose(actual_estimate.numpy(), expected_estimate)
  @parameterized.named_parameters(
-      ('start_low_arithmetic', True, False),
+      ('start_low_geometric_exact', True, True, True),
-      ('start_low_geometric', True, True),
+      ('start_low_arithmetic_exact', True, True, False),
-      ('start_high_arithmetic', False, False),
+      ('start_high_geometric_exact', True, False, True),
-      ('start_high_geometric', False, True))
+      ('start_high_arithmetic_exact', True, False, False),
-  def test_linspace(self, start_low, geometric):
+      ('start_low_geometric_noised', False, True, True),
      ('start_low_arithmetic_noised', False, True, False),
      ('start_high_geometric_noised', False, False, True),
      ('start_high_arithmetic_noised', False, False, False))
  def test_linspace(self, exact, start_low, geometric):
    # 100 records equally spaced from 0 to 10 in 0.1 increments.
    # Test that we converge to the correct median value and bounce around it.
    num_records = 21
    records = [tf.constant(x) for x in np.linspace(
        0.0, 10.0, num=num_records, dtype=np.float32)]
-    query = quantile_estimator_query.QuantileEstimatorQuery(
+    query = _make_quantile_estimator_query(
        initial_estimate=(1.0 if start_low else 10.0),
        target_quantile=0.5,
        learning_rate=1.0,
-        below_estimate_stddev=0.0,
+        below_estimate_stddev=(0.0 if exact else 1e-2),
-        expected_num_records=num_records,
+        expected_num_records=(None if exact else num_records),
        geometric_update=geometric)
    global_state = query.initial_global_state()
@ -182,11 +213,15 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
        self.assertNear(actual_estimate, 5.0, 0.25)
  @parameterized.named_parameters(
-      ('start_low_arithmetic', True, False),
+      ('start_low_geometric_exact', True, True, True),
-      ('start_low_geometric', True, True),
+      ('start_low_arithmetic_exact', True, True, False),
-      ('start_high_arithmetic', False, False),
+      ('start_high_geometric_exact', True, False, True),
-      ('start_high_geometric', False, True))
+      ('start_high_arithmetic_exact', True, False, False),
-  def test_all_equal(self, start_low, geometric):
+      ('start_low_geometric_noised', False, True, True),
      ('start_low_arithmetic_noised', False, True, False),
      ('start_high_geometric_noised', False, False, True),
      ('start_high_arithmetic_noised', False, False, False))
  def test_all_equal(self, exact, start_low, geometric):
    # 20 equal records. Test that we converge to that record and bounce around
    # it. Unlike the linspace test, the quantile-matching objective is very
    # sharp at the optimum so a decaying learning rate is necessary.
@ -195,12 +230,12 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
    learning_rate = tf.Variable(1.0)
-    query = quantile_estimator_query.QuantileEstimatorQuery(
+    query = _make_quantile_estimator_query(
        initial_estimate=(1.0 if start_low else 10.0),
        target_quantile=0.5,
        learning_rate=learning_rate,
-        below_estimate_stddev=0.0,
+        below_estimate_stddev=(0.0 if exact else 1e-2),
-        expected_num_records=num_records,
+        expected_num_records=(None if exact else num_records),
        geometric_update=geometric)
    global_state = query.initial_global_state()
@ -214,6 +249,15 @@ class QuantileEstimatorQueryTest(tf.test.TestCase, parameterized.TestCase):
      if t > 40:
        self.assertNear(actual_estimate, 5.0, 0.5)
  def test_raises_with_non_scalar_record(self):
    query = quantile_estimator_query.NoPrivacyQuantileEstimatorQuery(
        initial_estimate=1.0,
        target_quantile=0.5,
        learning_rate=1.0)
    with self.assertRaisesRegex(ValueError, 'scalar'):
      query.accumulate_record(None, None, [1.0, 2.0])
 if __name__ == '__main__':
  tf.test.main()