From 6231d0802db8536ae630a643ffe946c08ecf6eb1 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Mon, 25 Mar 2019 10:20:41 -0700 Subject: [PATCH] Cleanup directory structure, add top-level imports and add normalized_query. Moved query classes from dir optimizers into new dir dp_query. Added NormalizedQuery class for queries that divide the output of another query by a constant like GaussianAverageQuery. PiperOrigin-RevId: 240167115 --- privacy/__init__.py | 38 +++++++ privacy/analysis/privacy_ledger.py | 2 +- privacy/analysis/privacy_ledger_test.py | 6 +- privacy/dp_query/BUILD | 96 +++++++++++++++++ privacy/{optimizers => dp_query}/dp_query.py | 0 .../gaussian_query.py | 70 ++---------- .../gaussian_query_test.py | 4 +- .../{optimizers => dp_query}/nested_query.py | 2 +- .../nested_query_test.py | 6 +- .../no_privacy_query.py | 2 +- .../no_privacy_query_test.py | 4 +- privacy/dp_query/normalized_query.py | 102 ++++++++++++++++++ privacy/dp_query/normalized_query_test.py | 47 ++++++++ .../{optimizers => dp_query}/test_utils.py | 0 privacy/optimizers/dp_optimizer.py | 2 +- privacy/optimizers/dp_optimizer_eager_test.py | 2 +- privacy/optimizers/dp_optimizer_test.py | 2 +- 17 files changed, 305 insertions(+), 80 deletions(-) create mode 100644 privacy/dp_query/BUILD rename privacy/{optimizers => dp_query}/dp_query.py (100%) rename privacy/{optimizers => dp_query}/gaussian_query.py (69%) rename privacy/{optimizers => dp_query}/gaussian_query_test.py (98%) rename privacy/{optimizers => dp_query}/nested_query.py (99%) rename privacy/{optimizers => dp_query}/nested_query_test.py (97%) rename privacy/{optimizers => dp_query}/no_privacy_query.py (98%) rename privacy/{optimizers => dp_query}/no_privacy_query_test.py (97%) create mode 100644 privacy/dp_query/normalized_query.py create mode 100644 privacy/dp_query/normalized_query_test.py rename privacy/{optimizers => dp_query}/test_utils.py (100%) diff --git a/privacy/__init__.py b/privacy/__init__.py index e69de29..f8d51a2 100644 --- a/privacy/__init__.py +++ b/privacy/__init__.py @@ -0,0 +1,38 @@ +# Copyright 2019, The TensorFlow Privacy Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""TensorFlow Privacy library.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from privacy.analysis.privacy_ledger import GaussianSumQueryEntry +from privacy.analysis.privacy_ledger import PrivacyLedger +from privacy.analysis.privacy_ledger import QueryWithLedger +from privacy.analysis.privacy_ledger import SampleEntry + +from privacy.dp_query.dp_query import DPQuery +from privacy.dp_query.gaussian_query import GaussianAverageQuery +from privacy.dp_query.gaussian_query import GaussianSumQuery +from privacy.dp_query.nested_query import NestedQuery +from privacy.dp_query.no_privacy_query import NoPrivacyAverageQuery +from privacy.dp_query.no_privacy_query import NoPrivacySumQuery +from privacy.dp_query.normalized_query import NormalizedQuery + +from privacy.optimizers.dp_optimizer import DPAdagradGaussianOptimizer +from privacy.optimizers.dp_optimizer import DPAdagradOptimizer +from privacy.optimizers.dp_optimizer import DPAdamGaussianOptimizer +from privacy.optimizers.dp_optimizer import DPAdamOptimizer +from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer +from privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer diff --git a/privacy/analysis/privacy_ledger.py b/privacy/analysis/privacy_ledger.py index 2b53589..eb135ff 100644 --- a/privacy/analysis/privacy_ledger.py +++ b/privacy/analysis/privacy_ledger.py @@ -25,7 +25,7 @@ from distutils.version import LooseVersion import tensorflow as tf from privacy.analysis import tensor_buffer -from privacy.optimizers import dp_query +from privacy.dp_query import dp_query if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): nest = tf.contrib.framework.nest diff --git a/privacy/analysis/privacy_ledger_test.py b/privacy/analysis/privacy_ledger_test.py index c50d29c..8997c26 100644 --- a/privacy/analysis/privacy_ledger_test.py +++ b/privacy/analysis/privacy_ledger_test.py @@ -21,9 +21,9 @@ from __future__ import print_function import tensorflow as tf from privacy.analysis import privacy_ledger -from privacy.optimizers import gaussian_query -from privacy.optimizers import nested_query -from privacy.optimizers import test_utils +from privacy.dp_query import gaussian_query +from privacy.dp_query import nested_query +from privacy.dp_query import test_utils tf.enable_eager_execution() diff --git a/privacy/dp_query/BUILD b/privacy/dp_query/BUILD new file mode 100644 index 0000000..bbaaed3 --- /dev/null +++ b/privacy/dp_query/BUILD @@ -0,0 +1,96 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +py_library( + name = "dp_query", + srcs = ["dp_query.py"], +) + +py_library( + name = "gaussian_query", + srcs = ["gaussian_query.py"], + deps = [ + ":dp_query", + ":normalized_query", + "//third_party/py/tensorflow", + ], +) + +py_test( + name = "gaussian_query_test", + srcs = ["gaussian_query_test.py"], + deps = [ + ":gaussian_query", + ":test_utils", + "//third_party/py/absl/testing:parameterized", + "//third_party/py/tensorflow", + ], +) + +py_library( + name = "no_privacy_query", + srcs = ["no_privacy_query.py"], + deps = [ + ":dp_query", + "//third_party/py/tensorflow", + ], +) + +py_test( + name = "no_privacy_query_test", + srcs = ["no_privacy_query_test.py"], + deps = [ + ":no_privacy_query", + ":test_utils", + "//third_party/py/absl/testing:parameterized", + "//third_party/py/tensorflow", + ], +) + +py_library( + name = "normalized_query", + srcs = ["normalized_query.py"], + deps = [ + ":dp_query", + "//third_party/py/tensorflow", + ], +) + +py_test( + name = "normalized_query_test", + srcs = ["normalized_query_test.py"], + deps = [ + ":gaussian_query", + ":normalized_query", + ":test_utils", + "//third_party/py/tensorflow", + ], +) + +py_library( + name = "nested_query", + srcs = ["nested_query.py"], + deps = [ + ":dp_query", + "//third_party/py/tensorflow", + ], +) + +py_test( + name = "nested_query_test", + srcs = ["nested_query_test.py"], + deps = [ + ":gaussian_query", + ":nested_query", + ":test_utils", + "//third_party/py/absl/testing:parameterized", + "//third_party/py/tensorflow", + ], +) + +py_library( + name = "test_utils", + srcs = ["test_utils.py"], + deps = [], +) diff --git a/privacy/optimizers/dp_query.py b/privacy/dp_query/dp_query.py similarity index 100% rename from privacy/optimizers/dp_query.py rename to privacy/dp_query/dp_query.py diff --git a/privacy/optimizers/gaussian_query.py b/privacy/dp_query/gaussian_query.py similarity index 69% rename from privacy/optimizers/gaussian_query.py rename to privacy/dp_query/gaussian_query.py index b85b9e5..a29648d 100644 --- a/privacy/optimizers/gaussian_query.py +++ b/privacy/dp_query/gaussian_query.py @@ -22,7 +22,8 @@ from __future__ import print_function from distutils.version import LooseVersion import tensorflow as tf -from privacy.optimizers import dp_query +from privacy.dp_query import dp_query +from privacy.dp_query import normalized_query if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): nest = tf.contrib.framework.nest @@ -132,7 +133,7 @@ class GaussianSumQuery(dp_query.DPQuery): return nest.map_structure(add_noise, sample_state), global_state -class GaussianAverageQuery(dp_query.DPQuery): +class GaussianAverageQuery(normalized_query.NormalizedQuery): """Implements DPQuery interface for Gaussian average queries. Accumulates clipped vectors, adds Gaussian noise, and normalizes. @@ -159,65 +160,6 @@ class GaussianAverageQuery(dp_query.DPQuery): the sum). ledger: The privacy ledger to which queries should be recorded. """ - self._numerator = GaussianSumQuery(l2_norm_clip, sum_stddev, ledger) - self._denominator = tf.to_float(denominator) - - def initial_global_state(self): - """Returns the initial global state for the GaussianAverageQuery.""" - # GaussianAverageQuery has no global state beyond the numerator state. - return self._numerator.initial_global_state() - - def derive_sample_params(self, global_state): - """Given the global state, derives parameters to use for the next sample. - - Args: - global_state: The current global state. - - Returns: - Parameters to use to process records in the next sample. - """ - return self._numerator.derive_sample_params(global_state) - - def initial_sample_state(self, global_state, tensors): - """Returns an initial state to use for the next sample. - - Args: - global_state: The current global state. - tensors: A structure of tensors used as a template to create the initial - sample state. - - Returns: An initial sample state. - """ - # GaussianAverageQuery has no sample state beyond the sum state. - return self._numerator.initial_sample_state(global_state, tensors) - - def accumulate_record(self, params, sample_state, record): - """Accumulates a single record into the sample state. - - Args: - params: The parameters for the sample. - sample_state: The current sample state. - record: The record to accumulate. - - Returns: - The updated sample state. - """ - return self._numerator.accumulate_record(params, sample_state, record) - - def get_noised_result(self, sample_state, global_state): - """Gets noised average after all records of sample have been accumulated. - - Args: - sample_state: The sample state after all records have been accumulated. - global_state: The global state. - - Returns: - A tuple (estimate, new_global_state) where "estimate" is the estimated - average of the records and "new_global_state" is the updated global state. - """ - noised_sum, new_sum_global_state = self._numerator.get_noised_result( - sample_state, global_state) - def normalize(v): - return tf.truediv(v, self._denominator) - - return nest.map_structure(normalize, noised_sum), new_sum_global_state + super(GaussianAverageQuery, self).__init__( + numerator_query=GaussianSumQuery(l2_norm_clip, sum_stddev, ledger), + denominator=tf.to_float(denominator)) diff --git a/privacy/optimizers/gaussian_query_test.py b/privacy/dp_query/gaussian_query_test.py similarity index 98% rename from privacy/optimizers/gaussian_query_test.py rename to privacy/dp_query/gaussian_query_test.py index 4e0c57d..08107e6 100644 --- a/privacy/optimizers/gaussian_query_test.py +++ b/privacy/dp_query/gaussian_query_test.py @@ -23,8 +23,8 @@ import numpy as np from six.moves import xrange import tensorflow as tf -from privacy.optimizers import gaussian_query -from privacy.optimizers import test_utils +from privacy.dp_query import gaussian_query +from privacy.dp_query import test_utils class GaussianQueryTest(tf.test.TestCase, parameterized.TestCase): diff --git a/privacy/optimizers/nested_query.py b/privacy/dp_query/nested_query.py similarity index 99% rename from privacy/optimizers/nested_query.py rename to privacy/dp_query/nested_query.py index 3b5daa8..8e10c09 100644 --- a/privacy/optimizers/nested_query.py +++ b/privacy/dp_query/nested_query.py @@ -22,7 +22,7 @@ from __future__ import print_function from distutils.version import LooseVersion import tensorflow as tf -from privacy.optimizers import dp_query +from privacy.dp_query import dp_query if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): nest = tf.contrib.framework.nest diff --git a/privacy/optimizers/nested_query_test.py b/privacy/dp_query/nested_query_test.py similarity index 97% rename from privacy/optimizers/nested_query_test.py rename to privacy/dp_query/nested_query_test.py index d46af2f..9796584 100644 --- a/privacy/optimizers/nested_query_test.py +++ b/privacy/dp_query/nested_query_test.py @@ -24,9 +24,9 @@ from distutils.version import LooseVersion import numpy as np import tensorflow as tf -from privacy.optimizers import gaussian_query -from privacy.optimizers import nested_query -from privacy.optimizers import test_utils +from privacy.dp_query import gaussian_query +from privacy.dp_query import nested_query +from privacy.dp_query import test_utils if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): nest = tf.contrib.framework.nest diff --git a/privacy/optimizers/no_privacy_query.py b/privacy/dp_query/no_privacy_query.py similarity index 98% rename from privacy/optimizers/no_privacy_query.py rename to privacy/dp_query/no_privacy_query.py index 91aa3ec..a40e2c4 100644 --- a/privacy/optimizers/no_privacy_query.py +++ b/privacy/dp_query/no_privacy_query.py @@ -20,7 +20,7 @@ from __future__ import print_function from distutils.version import LooseVersion import tensorflow as tf -from privacy.optimizers import dp_query +from privacy.dp_query import dp_query if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): nest = tf.contrib.framework.nest diff --git a/privacy/optimizers/no_privacy_query_test.py b/privacy/dp_query/no_privacy_query_test.py similarity index 97% rename from privacy/optimizers/no_privacy_query_test.py rename to privacy/dp_query/no_privacy_query_test.py index fa73b00..f408dc5 100644 --- a/privacy/optimizers/no_privacy_query_test.py +++ b/privacy/dp_query/no_privacy_query_test.py @@ -21,8 +21,8 @@ from __future__ import print_function from absl.testing import parameterized import tensorflow as tf -from privacy.optimizers import no_privacy_query -from privacy.optimizers import test_utils +from privacy.dp_query import no_privacy_query +from privacy.dp_query import test_utils class NoPrivacyQueryTest(tf.test.TestCase, parameterized.TestCase): diff --git a/privacy/dp_query/normalized_query.py b/privacy/dp_query/normalized_query.py new file mode 100644 index 0000000..de78fe0 --- /dev/null +++ b/privacy/dp_query/normalized_query.py @@ -0,0 +1,102 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Implements DPQuery interface for normalized queries. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from privacy.dp_query import dp_query + +nest = tf.contrib.framework.nest + + +class NormalizedQuery(dp_query.DPQuery): + """DPQuery for queries with a DPQuery numerator and fixed denominator.""" + + def __init__(self, numerator_query, denominator): + """Initializer for NormalizedQuery. + + Args: + numerator_query: A DPQuery for the numerator. + denominator: A value for the denominator. + """ + self._numerator = numerator_query + self._denominator = tf.to_float(denominator) + + def initial_global_state(self): + """Returns the initial global state for the NormalizedQuery.""" + # NormalizedQuery has no global state beyond the numerator state. + return self._numerator.initial_global_state() + + def derive_sample_params(self, global_state): + """Given the global state, derives parameters to use for the next sample. + + Args: + global_state: The current global state. + + Returns: + Parameters to use to process records in the next sample. + """ + return self._numerator.derive_sample_params(global_state) + + def initial_sample_state(self, global_state, tensors): + """Returns an initial state to use for the next sample. + + Args: + global_state: The current global state. + tensors: A structure of tensors used as a template to create the initial + sample state. + + Returns: An initial sample state. + """ + # NormalizedQuery has no sample state beyond the numerator state. + return self._numerator.initial_sample_state(global_state, tensors) + + def accumulate_record(self, params, sample_state, record): + """Accumulates a single record into the sample state. + + Args: + params: The parameters for the sample. + sample_state: The current sample state. + record: The record to accumulate. + + Returns: + The updated sample state. + """ + return self._numerator.accumulate_record(params, sample_state, record) + + def get_noised_result(self, sample_state, global_state): + """Gets noised average after all records of sample have been accumulated. + + Args: + sample_state: The sample state after all records have been accumulated. + global_state: The global state. + + Returns: + A tuple (estimate, new_global_state) where "estimate" is the estimated + average of the records and "new_global_state" is the updated global state. + """ + noised_sum, new_sum_global_state = self._numerator.get_noised_result( + sample_state, global_state) + def normalize(v): + return tf.truediv(v, self._denominator) + + return nest.map_structure(normalize, noised_sum), new_sum_global_state + + diff --git a/privacy/dp_query/normalized_query_test.py b/privacy/dp_query/normalized_query_test.py new file mode 100644 index 0000000..9f30fba --- /dev/null +++ b/privacy/dp_query/normalized_query_test.py @@ -0,0 +1,47 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for GaussianAverageQuery.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from privacy.dp_query import gaussian_query +from privacy.dp_query import normalized_query +from privacy.dp_query import test_utils + + +class NormalizedQueryTest(tf.test.TestCase): + + def test_normalization(self): + with self.cached_session() as sess: + record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0]. + record2 = tf.constant([4.0, -3.0]) # Not clipped. + + sum_query = gaussian_query.GaussianSumQuery( + l2_norm_clip=5.0, stddev=0.0) + query = normalized_query.NormalizedQuery( + numerator_query=sum_query, denominator=2.0) + + query_result, _ = test_utils.run_query(query, [record1, record2]) + result = sess.run(query_result) + expected = [0.5, 0.5] + self.assertAllClose(result, expected) + + +if __name__ == '__main__': + tf.test.main() diff --git a/privacy/optimizers/test_utils.py b/privacy/dp_query/test_utils.py similarity index 100% rename from privacy/optimizers/test_utils.py rename to privacy/dp_query/test_utils.py diff --git a/privacy/optimizers/dp_optimizer.py b/privacy/optimizers/dp_optimizer.py index 12dafc6..1318259 100644 --- a/privacy/optimizers/dp_optimizer.py +++ b/privacy/optimizers/dp_optimizer.py @@ -20,7 +20,7 @@ from __future__ import print_function import tensorflow as tf from privacy.analysis import privacy_ledger -from privacy.optimizers import gaussian_query +from privacy.dp_query import gaussian_query def make_optimizer_class(cls): diff --git a/privacy/optimizers/dp_optimizer_eager_test.py b/privacy/optimizers/dp_optimizer_eager_test.py index 8028fc2..cfb2ccb 100644 --- a/privacy/optimizers/dp_optimizer_eager_test.py +++ b/privacy/optimizers/dp_optimizer_eager_test.py @@ -22,8 +22,8 @@ import numpy as np import tensorflow as tf from privacy.analysis import privacy_ledger +from privacy.dp_query import gaussian_query from privacy.optimizers import dp_optimizer -from privacy.optimizers import gaussian_query class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase): diff --git a/privacy/optimizers/dp_optimizer_test.py b/privacy/optimizers/dp_optimizer_test.py index 45eb11e..b048210 100644 --- a/privacy/optimizers/dp_optimizer_test.py +++ b/privacy/optimizers/dp_optimizer_test.py @@ -23,8 +23,8 @@ import numpy as np import tensorflow as tf from privacy.analysis import privacy_ledger +from privacy.dp_query import gaussian_query from privacy.optimizers import dp_optimizer -from privacy.optimizers import gaussian_query class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase):