From e5848656ed7214fa6a1e21a64aec23f0cad00563 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Wed, 19 May 2021 20:19:39 -0700 Subject: [PATCH] Remove GaussianAverageQuery. Users can simply wrap GaussianSumQuery with a NormalizedQuery. PiperOrigin-RevId: 374784618 --- tensorflow_privacy/__init__.py | 1 - .../privacy/analysis/privacy_ledger_test.py | 18 ++-- .../privacy/dp_query/gaussian_query.py | 50 ++--------- .../privacy/dp_query/gaussian_query_test.py | 45 ++-------- .../privacy/dp_query/nested_query_test.py | 79 +++++++---------- .../privacy/dp_query/normalized_query_test.py | 6 +- .../dp_query/quantile_estimator_query.py | 86 +++++++++---------- 7 files changed, 96 insertions(+), 189 deletions(-) diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index 71afc2a..fcdfd50 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -39,7 +39,6 @@ else: # DPQuery classes from tensorflow_privacy.privacy.dp_query.dp_query import DPQuery from tensorflow_privacy.privacy.dp_query.dp_query import SumAggregationDPQuery - from tensorflow_privacy.privacy.dp_query.gaussian_query import GaussianAverageQuery from tensorflow_privacy.privacy.dp_query.gaussian_query import GaussianSumQuery from tensorflow_privacy.privacy.dp_query.nested_query import NestedQuery from tensorflow_privacy.privacy.dp_query.no_privacy_query import NoPrivacyAverageQuery diff --git a/tensorflow_privacy/privacy/analysis/privacy_ledger_test.py b/tensorflow_privacy/privacy/analysis/privacy_ledger_test.py index 3a6b00f..3d07eb6 100644 --- a/tensorflow_privacy/privacy/analysis/privacy_ledger_test.py +++ b/tensorflow_privacy/privacy/analysis/privacy_ledger_test.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Tests for PrivacyLedger.""" from __future__ import absolute_import @@ -57,10 +56,9 @@ class PrivacyLedgerTest(tf.test.TestCase): population_size = tf.Variable(0) selection_probability = tf.Variable(1.0) - query = gaussian_query.GaussianSumQuery( - l2_norm_clip=10.0, stddev=0.0) - query = privacy_ledger.QueryWithLedger( - query, population_size, selection_probability) + query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=0.0) + query = privacy_ledger.QueryWithLedger(query, population_size, + selection_probability) # First sample. tf.assign(population_size, 10) @@ -93,14 +91,12 @@ class PrivacyLedgerTest(tf.test.TestCase): population_size = tf.Variable(0) selection_probability = tf.Variable(1.0) - query1 = gaussian_query.GaussianAverageQuery( - l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0) - query2 = gaussian_query.GaussianAverageQuery( - l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0) + query1 = gaussian_query.GaussianSumQuery(l2_norm_clip=4.0, stddev=2.0) + query2 = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=1.0) query = nested_query.NestedQuery([query1, query2]) - query = privacy_ledger.QueryWithLedger( - query, population_size, selection_probability) + query = privacy_ledger.QueryWithLedger(query, population_size, + selection_probability) record1 = [1.0, [12.0, 9.0]] record2 = [5.0, [1.0, 2.0]] diff --git a/tensorflow_privacy/privacy/dp_query/gaussian_query.py b/tensorflow_privacy/privacy/dp_query/gaussian_query.py index 9095c44..28a8bb5 100644 --- a/tensorflow_privacy/privacy/dp_query/gaussian_query.py +++ b/tensorflow_privacy/privacy/dp_query/gaussian_query.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -"""Implements DPQuery interface for Gaussian average queries. -""" +"""Implements DPQuery interface for Gaussian sum queries.""" from __future__ import absolute_import from __future__ import division @@ -25,7 +23,6 @@ import distutils import tensorflow.compat.v1 as tf from tensorflow_privacy.privacy.dp_query import dp_query -from tensorflow_privacy.privacy.dp_query import normalized_query class GaussianSumQuery(dp_query.SumAggregationDPQuery): @@ -35,8 +32,8 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery): """ # pylint: disable=invalid-name - _GlobalState = collections.namedtuple( - '_GlobalState', ['l2_norm_clip', 'stddev']) + _GlobalState = collections.namedtuple('_GlobalState', + ['l2_norm_clip', 'stddev']) def __init__(self, l2_norm_clip, stddev): """Initializes the GaussianSumQuery. @@ -55,8 +52,8 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery): def make_global_state(self, l2_norm_clip, stddev): """Creates a global state from the given parameters.""" - return self._GlobalState(tf.cast(l2_norm_clip, tf.float32), - tf.cast(stddev, tf.float32)) + return self._GlobalState( + tf.cast(l2_norm_clip, tf.float32), tf.cast(stddev, tf.float32)) def initial_global_state(self): return self.make_global_state(self._l2_norm_clip, self._stddev) @@ -94,48 +91,17 @@ class GaussianSumQuery(dp_query.SumAggregationDPQuery): return v + tf.random.normal( tf.shape(input=v), stddev=global_state.stddev, dtype=v.dtype) else: - random_normal = tf.random_normal_initializer( - stddev=global_state.stddev) + random_normal = tf.random_normal_initializer(stddev=global_state.stddev) def add_noise(v): return v + tf.cast(random_normal(tf.shape(input=v)), dtype=v.dtype) if self._ledger: dependencies = [ - self._ledger.record_sum_query( - global_state.l2_norm_clip, global_state.stddev) + self._ledger.record_sum_query(global_state.l2_norm_clip, + global_state.stddev) ] else: dependencies = [] with tf.control_dependencies(dependencies): return tf.nest.map_structure(add_noise, sample_state), global_state - - -class GaussianAverageQuery(normalized_query.NormalizedQuery): - """Implements DPQuery interface for Gaussian average queries. - - Accumulates clipped vectors, adds Gaussian noise, and normalizes. - - Note that we use "fixed-denominator" estimation: the denominator should be - specified as the expected number of records per sample. Accumulating the - denominator separately would also be possible but would be produce a higher - variance estimator. - """ - - def __init__(self, - l2_norm_clip, - sum_stddev, - denominator): - """Initializes the GaussianAverageQuery. - - Args: - l2_norm_clip: The clipping norm to apply to the global norm of each - record. - sum_stddev: The stddev of the noise added to the sum (before - normalization). - denominator: The normalization constant (applied after noise is added to - the sum). - """ - super(GaussianAverageQuery, self).__init__( - numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev), - denominator=denominator) diff --git a/tensorflow_privacy/privacy/dp_query/gaussian_query_test.py b/tensorflow_privacy/privacy/dp_query/gaussian_query_test.py index 0e8e3db..cd9fe61 100644 --- a/tensorflow_privacy/privacy/dp_query/gaussian_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/gaussian_query_test.py @@ -11,8 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -"""Tests for GaussianAverageQuery.""" +"""Tests for GaussianSumQuery.""" from __future__ import absolute_import from __future__ import division @@ -34,8 +33,7 @@ class GaussianQueryTest(tf.test.TestCase, parameterized.TestCase): record1 = tf.constant([2.0, 0.0]) record2 = tf.constant([-1.0, 1.0]) - query = gaussian_query.GaussianSumQuery( - l2_norm_clip=10.0, stddev=0.0) + query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=0.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [1.0, 1.0] @@ -46,8 +44,7 @@ class GaussianQueryTest(tf.test.TestCase, parameterized.TestCase): record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0]. record2 = tf.constant([4.0, -3.0]) # Not clipped. - query = gaussian_query.GaussianSumQuery( - l2_norm_clip=5.0, stddev=0.0) + query = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=0.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [1.0, 1.0] @@ -80,8 +77,7 @@ class GaussianQueryTest(tf.test.TestCase, parameterized.TestCase): record1, record2 = 2.71828, 3.14159 stddev = 1.0 - query = gaussian_query.GaussianSumQuery( - l2_norm_clip=5.0, stddev=stddev) + query = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=stddev) query_result, _ = test_utils.run_query(query, [record1, record2]) noised_sums = [] @@ -108,8 +104,7 @@ class GaussianQueryTest(tf.test.TestCase, parameterized.TestCase): sample_state_2 = get_sample_state(records2) merged = gaussian_query.GaussianSumQuery(10.0, 1.0).merge_sample_states( - sample_state_1, - sample_state_2) + sample_state_1, sample_state_2) with self.cached_session() as sess: result = sess.run(merged) @@ -117,36 +112,6 @@ class GaussianQueryTest(tf.test.TestCase, parameterized.TestCase): expected = [3.0, 10.0] self.assertAllClose(result, expected) - def test_gaussian_average_no_noise(self): - with self.cached_session() as sess: - record1 = tf.constant([5.0, 0.0]) # Clipped to [3.0, 0.0]. - record2 = tf.constant([-1.0, 2.0]) # Not clipped. - - query = gaussian_query.GaussianAverageQuery( - l2_norm_clip=3.0, sum_stddev=0.0, denominator=2.0) - query_result, _ = test_utils.run_query(query, [record1, record2]) - result = sess.run(query_result) - expected_average = [1.0, 1.0] - self.assertAllClose(result, expected_average) - - def test_gaussian_average_with_noise(self): - with self.cached_session() as sess: - record1, record2 = 2.71828, 3.14159 - sum_stddev = 1.0 - denominator = 2.0 - - query = gaussian_query.GaussianAverageQuery( - l2_norm_clip=5.0, sum_stddev=sum_stddev, denominator=denominator) - query_result, _ = test_utils.run_query(query, [record1, record2]) - - noised_averages = [] - for _ in range(1000): - noised_averages.append(sess.run(query_result)) - - result_stddev = np.std(noised_averages) - avg_stddev = sum_stddev / denominator - self.assertNear(result_stddev, avg_stddev, 0.1) - @parameterized.named_parameters( ('type_mismatch', [1.0], (1.0,), TypeError), ('too_few_on_left', [1.0], [1.0, 1.0], ValueError), diff --git a/tensorflow_privacy/privacy/dp_query/nested_query_test.py b/tensorflow_privacy/privacy/dp_query/nested_query_test.py index d486c16..a0f62e1 100644 --- a/tensorflow_privacy/privacy/dp_query/nested_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/nested_query_test.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Tests for NestedQuery.""" from __future__ import absolute_import @@ -27,9 +26,9 @@ import tensorflow.compat.v1 as tf from tensorflow_privacy.privacy.dp_query import dp_query from tensorflow_privacy.privacy.dp_query import gaussian_query from tensorflow_privacy.privacy.dp_query import nested_query +from tensorflow_privacy.privacy.dp_query import normalized_query from tensorflow_privacy.privacy.dp_query import test_utils - _basic_query = gaussian_query.GaussianSumQuery(1.0, 0.0) @@ -37,10 +36,8 @@ class NestedQueryTest(tf.test.TestCase, parameterized.TestCase): def test_nested_gaussian_sum_no_clip_no_noise(self): with self.cached_session() as sess: - query1 = gaussian_query.GaussianSumQuery( - l2_norm_clip=10.0, stddev=0.0) - query2 = gaussian_query.GaussianSumQuery( - l2_norm_clip=10.0, stddev=0.0) + query1 = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=0.0) + query2 = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=0.0) query = nested_query.NestedSumQuery([query1, query2]) @@ -52,34 +49,19 @@ class NestedQueryTest(tf.test.TestCase, parameterized.TestCase): expected = [5.0, [5.0, 5.0]] self.assertAllClose(result, expected) - def test_nested_gaussian_average_no_clip_no_noise(self): - with self.cached_session() as sess: - query1 = gaussian_query.GaussianAverageQuery( - l2_norm_clip=10.0, sum_stddev=0.0, denominator=5.0) - query2 = gaussian_query.GaussianAverageQuery( - l2_norm_clip=10.0, sum_stddev=0.0, denominator=5.0) - - query = nested_query.NestedSumQuery([query1, query2]) - - record1 = [1.0, [2.0, 3.0]] - record2 = [4.0, [3.0, 2.0]] - - query_result, _ = test_utils.run_query(query, [record1, record2]) - result = sess.run(query_result) - expected = [1.0, [1.0, 1.0]] - self.assertAllClose(result, expected) - def test_nested_gaussian_average_with_clip_no_noise(self): with self.cached_session() as sess: - query1 = gaussian_query.GaussianAverageQuery( - l2_norm_clip=4.0, sum_stddev=0.0, denominator=5.0) - query2 = gaussian_query.GaussianAverageQuery( - l2_norm_clip=5.0, sum_stddev=0.0, denominator=5.0) + query1 = normalized_query.NormalizedQuery( + gaussian_query.GaussianSumQuery(l2_norm_clip=4.0, stddev=0.0), + denominator=5.0) + query2 = normalized_query.NormalizedQuery( + gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=0.0), + denominator=5.0) query = nested_query.NestedSumQuery([query1, query2]) record1 = [1.0, [12.0, 9.0]] # Clipped to [1.0, [4.0, 3.0]] - record2 = [5.0, [1.0, 2.0]] # Clipped to [4.0, [1.0, 2.0]] + record2 = [5.0, [1.0, 2.0]] # Clipped to [4.0, [1.0, 2.0]] query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) @@ -88,15 +70,17 @@ class NestedQueryTest(tf.test.TestCase, parameterized.TestCase): def test_complex_nested_query(self): with self.cached_session() as sess: - query_ab = gaussian_query.GaussianSumQuery( - l2_norm_clip=1.0, stddev=0.0) - query_c = gaussian_query.GaussianAverageQuery( - l2_norm_clip=10.0, sum_stddev=0.0, denominator=2.0) - query_d = gaussian_query.GaussianSumQuery( - l2_norm_clip=10.0, stddev=0.0) + query_ab = gaussian_query.GaussianSumQuery(l2_norm_clip=1.0, stddev=0.0) + query_c = normalized_query.NormalizedQuery( + gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=0.0), + denominator=2.0) + query_d = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=0.0) query = nested_query.NestedSumQuery( - [query_ab, {'c': query_c, 'd': [query_d]}]) + [query_ab, { + 'c': query_c, + 'd': [query_d] + }]) record1 = [{'a': 0.0, 'b': 2.71828}, {'c': (-4.0, 6.0), 'd': [-4.0]}] record2 = [{'a': 3.14159, 'b': 0.0}, {'c': (6.0, -4.0), 'd': [5.0]}] @@ -108,13 +92,13 @@ class NestedQueryTest(tf.test.TestCase, parameterized.TestCase): def test_nested_query_with_noise(self): with self.cached_session() as sess: - sum_stddev = 2.71828 + stddev = 2.71828 denominator = 3.14159 - query1 = gaussian_query.GaussianSumQuery( - l2_norm_clip=1.5, stddev=sum_stddev) - query2 = gaussian_query.GaussianAverageQuery( - l2_norm_clip=0.5, sum_stddev=sum_stddev, denominator=denominator) + query1 = gaussian_query.GaussianSumQuery(l2_norm_clip=1.5, stddev=stddev) + query2 = normalized_query.NormalizedQuery( + gaussian_query.GaussianSumQuery(l2_norm_clip=0.5, stddev=stddev), + denominator=denominator) query = nested_query.NestedSumQuery((query1, query2)) record1 = (3.0, [2.0, 1.5]) @@ -127,20 +111,20 @@ class NestedQueryTest(tf.test.TestCase, parameterized.TestCase): noised_averages.append(tf.nest.flatten(sess.run(query_result))) result_stddev = np.std(noised_averages, 0) - avg_stddev = sum_stddev / denominator - expected_stddev = [sum_stddev, avg_stddev, avg_stddev] + avg_stddev = stddev / denominator + expected_stddev = [stddev, avg_stddev, avg_stddev] self.assertArrayNear(result_stddev, expected_stddev, 0.1) @parameterized.named_parameters( ('type_mismatch', [_basic_query], (1.0,), TypeError), ('too_many_queries', [_basic_query, _basic_query], [1.0], ValueError), ('query_too_deep', [_basic_query, [_basic_query]], [1.0, 1.0], TypeError)) - def test_record_incompatible_with_query( - self, queries, record, error_type): + def test_record_incompatible_with_query(self, queries, record, error_type): with self.assertRaises(error_type): test_utils.run_query(nested_query.NestedSumQuery(queries), [record]) def test_raises_with_non_sum(self): + class NonSumDPQuery(dp_query.DPQuery): pass @@ -154,6 +138,7 @@ class NestedQueryTest(tf.test.TestCase, parameterized.TestCase): nested_query.NestedSumQuery(non_sum_query) def test_metrics(self): + class QueryWithMetric(dp_query.SumAggregationDPQuery): def __init__(self, metric_val): @@ -171,8 +156,10 @@ class NestedQueryTest(tf.test.TestCase, parameterized.TestCase): metric_val = nested_a.derive_metrics(global_state) self.assertEqual(metric_val['metric'], 1) - nested_b = nested_query.NestedSumQuery( - {'key1': query1, 'key2': [query2, query3]}) + nested_b = nested_query.NestedSumQuery({ + 'key1': query1, + 'key2': [query2, query3] + }) global_state = nested_b.initial_global_state() metric_val = nested_b.derive_metrics(global_state) self.assertEqual(metric_val['key1/metric'], 1) diff --git a/tensorflow_privacy/privacy/dp_query/normalized_query_test.py b/tensorflow_privacy/privacy/dp_query/normalized_query_test.py index ec66b35..2dd0157 100644 --- a/tensorflow_privacy/privacy/dp_query/normalized_query_test.py +++ b/tensorflow_privacy/privacy/dp_query/normalized_query_test.py @@ -11,8 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -"""Tests for GaussianAverageQuery.""" +"""Tests for NormalizedQuery.""" from __future__ import absolute_import from __future__ import division @@ -32,8 +31,7 @@ class NormalizedQueryTest(tf.test.TestCase): record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0]. record2 = tf.constant([4.0, -3.0]) # Not clipped. - sum_query = gaussian_query.GaussianSumQuery( - l2_norm_clip=5.0, stddev=0.0) + sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=0.0) query = normalized_query.NormalizedQuery( numerator_query=sum_query, denominator=2.0) diff --git a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py index 68b2865..405d83f 100644 --- a/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py +++ b/tensorflow_privacy/privacy/dp_query/quantile_estimator_query.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Implements DPQuery interface for quantile estimator. From a starting estimate of the target quantile, the estimate is updated @@ -27,23 +26,20 @@ from __future__ import print_function import collections import tensorflow.compat.v1 as tf - from tensorflow_privacy.privacy.dp_query import dp_query from tensorflow_privacy.privacy.dp_query import gaussian_query from tensorflow_privacy.privacy.dp_query import no_privacy_query +from tensorflow_privacy.privacy.dp_query import normalized_query class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery): - """Iterative process to estimate target quantile of a univariate distribution. - """ + """Iterative process to estimate target quantile of a univariate distribution.""" # pylint: disable=invalid-name - _GlobalState = collections.namedtuple( - '_GlobalState', [ - 'current_estimate', - 'target_quantile', - 'learning_rate', - 'below_estimate_state']) + _GlobalState = collections.namedtuple('_GlobalState', [ + 'current_estimate', 'target_quantile', 'learning_rate', + 'below_estimate_state' + ]) # pylint: disable=invalid-name _SampleParams = collections.namedtuple( @@ -52,24 +48,23 @@ class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery): # No separate SampleState-- sample state is just below_estimate_query's # SampleState. - def __init__( - self, - initial_estimate, - target_quantile, - learning_rate, - below_estimate_stddev, - expected_num_records, - geometric_update=False): + def __init__(self, + initial_estimate, + target_quantile, + learning_rate, + below_estimate_stddev, + expected_num_records, + geometric_update=False): """Initializes the QuantileAdaptiveClipSumQuery. Args: initial_estimate: The initial estimate of the quantile. target_quantile: The target quantile. I.e., a value of 0.8 means a value - should be found for which approximately 80% of updates are - less than the estimate each round. - learning_rate: The learning rate. A rate of r means that the estimate - will change by a maximum of r at each step (for arithmetic updating) or - by a maximum factor of exp(r) (for geometric updating). + should be found for which approximately 80% of updates are less than the + estimate each round. + learning_rate: The learning rate. A rate of r means that the estimate will + change by a maximum of r at each step (for arithmetic updating) or by a + maximum factor of exp(r) (for geometric updating). below_estimate_stddev: The stddev of the noise added to the count of records currently below the estimate. Since the sensitivity of the count query is 0.5, as a rule of thumb it should be about 0.5 for reasonable @@ -90,8 +85,8 @@ class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery): self._geometric_update = geometric_update - def _construct_below_estimate_query( - self, below_estimate_stddev, expected_num_records): + def _construct_below_estimate_query(self, below_estimate_stddev, + expected_num_records): # A DPQuery used to estimate the fraction of records that are less than the # current quantile estimate. It accumulates an indicator 0/1 of whether each # record is below the estimate, and normalizes by the expected number of @@ -101,9 +96,9 @@ class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery): # affect the count is 0.5. Note that although the l2_norm_clip of the # below_estimate query is 0.5, no clipping will ever actually occur # because the value of each record is always +/-0.5. - return gaussian_query.GaussianAverageQuery( - l2_norm_clip=0.5, - sum_stddev=below_estimate_stddev, + return normalized_query.NormalizedQuery( + gaussian_query.GaussianSumQuery( + l2_norm_clip=0.5, stddev=below_estimate_stddev), denominator=expected_num_records) def set_ledger(self, ledger): @@ -140,8 +135,7 @@ class QuantileEstimatorQuery(dp_query.SumAggregationDPQuery): def get_noised_result(self, sample_state, global_state): below_estimate_result, new_below_estimate_state = ( self._below_estimate_query.get_noised_result( - sample_state, - global_state.below_estimate_state)) + sample_state, global_state.below_estimate_state)) # Unshift below_estimate percentile by 0.5. (See comment in initializer.) below_estimate = below_estimate_result + 0.5 @@ -177,33 +171,35 @@ class NoPrivacyQuantileEstimatorQuery(QuantileEstimatorQuery): below estimate with an exact denominator. """ - def __init__( - self, - initial_estimate, - target_quantile, - learning_rate, - geometric_update=False): + def __init__(self, + initial_estimate, + target_quantile, + learning_rate, + geometric_update=False): """Initializes the NoPrivacyQuantileEstimatorQuery. Args: initial_estimate: The initial estimate of the quantile. target_quantile: The target quantile. I.e., a value of 0.8 means a value - should be found for which approximately 80% of updates are - less than the estimate each round. - learning_rate: The learning rate. A rate of r means that the estimate - will change by a maximum of r at each step (for arithmetic updating) or - by a maximum factor of exp(r) (for geometric updating). + should be found for which approximately 80% of updates are less than the + estimate each round. + learning_rate: The learning rate. A rate of r means that the estimate will + change by a maximum of r at each step (for arithmetic updating) or by a + maximum factor of exp(r) (for geometric updating). geometric_update: If True, use geometric updating of estimate. Geometric updating is preferred for non-negative records like vector norms that could potentially be very large or very close to zero. """ super(NoPrivacyQuantileEstimatorQuery, self).__init__( - initial_estimate, target_quantile, learning_rate, - below_estimate_stddev=None, expected_num_records=None, + initial_estimate, + target_quantile, + learning_rate, + below_estimate_stddev=None, + expected_num_records=None, geometric_update=geometric_update) - def _construct_below_estimate_query( - self, below_estimate_stddev, expected_num_records): + def _construct_below_estimate_query(self, below_estimate_stddev, + expected_num_records): del below_estimate_stddev del expected_num_records return no_privacy_query.NoPrivacyAverageQuery()