From 313edfc80c10f1ae770f9528f641da7662911f40 Mon Sep 17 00:00:00 2001 From: Steve Chien Date: Tue, 8 Oct 2019 11:37:22 -0700 Subject: [PATCH] Update setup.py to new release. PiperOrigin-RevId: 273569076 --- tensorflow_privacy/CONTRIBUTING.md | 28 + tensorflow_privacy/LICENSE | 202 ++++++ tensorflow_privacy/README.md | 113 ++++ tensorflow_privacy/privacy/BUILD | 21 + tensorflow_privacy/privacy/__init__.py | 56 ++ .../privacy/analysis/__init__.py | 0 .../analysis/compute_dp_sgd_privacy.py | 97 +++ .../privacy/analysis/privacy_ledger.py | 257 ++++++++ .../privacy/analysis/privacy_ledger_test.py | 137 ++++ .../privacy/analysis/rdp_accountant.py | 318 +++++++++ .../privacy/analysis/rdp_accountant_test.py | 177 +++++ .../privacy/analysis/tensor_buffer.py | 134 ++++ .../analysis/tensor_buffer_test_eager.py | 84 +++ .../analysis/tensor_buffer_test_graph.py | 72 +++ tensorflow_privacy/privacy/bolt_on/README.md | 67 ++ .../privacy/bolt_on/__init__.py | 29 + tensorflow_privacy/privacy/bolt_on/losses.py | 304 +++++++++ .../privacy/bolt_on/losses_test.py | 431 +++++++++++++ tensorflow_privacy/privacy/bolt_on/models.py | 303 +++++++++ .../privacy/bolt_on/models_test.py | 548 ++++++++++++++++ .../privacy/bolt_on/optimizers.py | 388 +++++++++++ .../privacy/bolt_on/optimizers_test.py | 579 +++++++++++++++++ tensorflow_privacy/privacy/dp_query/BUILD | 140 ++++ .../privacy/dp_query/__init__.py | 0 .../privacy/dp_query/dp_query.py | 225 +++++++ .../privacy/dp_query/gaussian_query.py | 145 +++++ .../privacy/dp_query/gaussian_query_test.py | 161 +++++ .../privacy/dp_query/nested_query.py | 116 ++++ .../privacy/dp_query/nested_query_test.py | 148 +++++ .../privacy/dp_query/no_privacy_query.py | 70 ++ .../privacy/dp_query/no_privacy_query_test.py | 77 +++ .../privacy/dp_query/normalized_query.py | 97 +++ .../privacy/dp_query/normalized_query_test.py | 47 ++ .../quantile_adaptive_clip_sum_query.py | 288 +++++++++ .../quantile_adaptive_clip_sum_query_test.py | 296 +++++++++ .../privacy/dp_query/test_utils.py | 49 ++ .../privacy/optimizers/__init__.py | 0 .../privacy/optimizers/dp_optimizer.py | 239 +++++++ .../optimizers/dp_optimizer_eager_test.py | 130 ++++ .../privacy/optimizers/dp_optimizer_test.py | 241 +++++++ .../optimizers/dp_optimizer_vectorized.py | 153 +++++ .../dp_optimizer_vectorized_test.py | 204 ++++++ tensorflow_privacy/requirements.txt | 3 + tensorflow_privacy/research/README.md | 9 + .../research/pate_2017/README.md | 123 ++++ .../research/pate_2017/__init__.py | 1 + .../research/pate_2017/aggregation.py | 130 ++++ .../research/pate_2017/analysis.py | 304 +++++++++ .../research/pate_2017/deep_cnn.py | 603 ++++++++++++++++++ .../research/pate_2017/input.py | 396 ++++++++++++ .../research/pate_2017/metrics.py | 49 ++ .../research/pate_2017/train_student.py | 205 ++++++ ...nt_mnist_250_lap_20_count_50_epochs_600.sh | 25 + .../research/pate_2017/train_teachers.py | 101 +++ .../research/pate_2017/utils.py | 35 + .../research/pate_2018/ICLR2018/README.md | 61 ++ .../research/pate_2018/ICLR2018/download.py | 43 ++ .../pate_2018/ICLR2018/generate_figures.sh | 43 ++ .../pate_2018/ICLR2018/generate_table.sh | 93 +++ .../generate_table_data_independent.sh | 99 +++ .../research/pate_2018/ICLR2018/plot_ls_q.py | 105 +++ .../pate_2018/ICLR2018/plot_partition.py | 412 ++++++++++++ .../pate_2018/ICLR2018/plots_for_slides.py | 283 ++++++++ .../pate_2018/ICLR2018/rdp_bucketized.py | 264 ++++++++ .../pate_2018/ICLR2018/rdp_cumulative.py | 378 +++++++++++ .../ICLR2018/smooth_sensitivity_table.py | 358 +++++++++++ .../ICLR2018/utility_queries_answered.py | 90 +++ .../research/pate_2018/README.md | 71 +++ tensorflow_privacy/research/pate_2018/core.py | 370 +++++++++++ .../research/pate_2018/core_test.py | 124 ++++ .../research/pate_2018/smooth_sensitivity.py | 419 ++++++++++++ .../pate_2018/smooth_sensitivity_test.py | 126 ++++ tensorflow_privacy/setup.py | 32 + tensorflow_privacy/tutorials/README.md | 129 ++++ .../tutorials/bolton_tutorial.py | 187 ++++++ .../tutorials/lm_dpsgd_tutorial.py | 225 +++++++ .../tutorials/mnist_dpsgd_tutorial.py | 212 ++++++ .../tutorials/mnist_dpsgd_tutorial_eager.py | 153 +++++ .../tutorials/mnist_dpsgd_tutorial_keras.py | 150 +++++ .../mnist_dpsgd_tutorial_vectorized.py | 207 ++++++ .../tutorials/mnist_lr_tutorial.py | 250 ++++++++ .../tutorials/walkthrough/mnist_scratch.py | 134 ++++ .../tutorials/walkthrough/walkthrough.md | 431 +++++++++++++ 83 files changed, 14604 insertions(+) create mode 100644 tensorflow_privacy/CONTRIBUTING.md create mode 100644 tensorflow_privacy/LICENSE create mode 100644 tensorflow_privacy/README.md create mode 100644 tensorflow_privacy/privacy/BUILD create mode 100644 tensorflow_privacy/privacy/__init__.py create mode 100644 tensorflow_privacy/privacy/analysis/__init__.py create mode 100644 tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py create mode 100644 tensorflow_privacy/privacy/analysis/privacy_ledger.py create mode 100644 tensorflow_privacy/privacy/analysis/privacy_ledger_test.py create mode 100644 tensorflow_privacy/privacy/analysis/rdp_accountant.py create mode 100644 tensorflow_privacy/privacy/analysis/rdp_accountant_test.py create mode 100644 tensorflow_privacy/privacy/analysis/tensor_buffer.py create mode 100644 tensorflow_privacy/privacy/analysis/tensor_buffer_test_eager.py create mode 100644 tensorflow_privacy/privacy/analysis/tensor_buffer_test_graph.py create mode 100644 tensorflow_privacy/privacy/bolt_on/README.md create mode 100644 tensorflow_privacy/privacy/bolt_on/__init__.py create mode 100644 tensorflow_privacy/privacy/bolt_on/losses.py create mode 100644 tensorflow_privacy/privacy/bolt_on/losses_test.py create mode 100644 tensorflow_privacy/privacy/bolt_on/models.py create mode 100644 tensorflow_privacy/privacy/bolt_on/models_test.py create mode 100644 tensorflow_privacy/privacy/bolt_on/optimizers.py create mode 100644 tensorflow_privacy/privacy/bolt_on/optimizers_test.py create mode 100644 tensorflow_privacy/privacy/dp_query/BUILD create mode 100644 tensorflow_privacy/privacy/dp_query/__init__.py create mode 100644 tensorflow_privacy/privacy/dp_query/dp_query.py create mode 100644 tensorflow_privacy/privacy/dp_query/gaussian_query.py create mode 100644 tensorflow_privacy/privacy/dp_query/gaussian_query_test.py create mode 100644 tensorflow_privacy/privacy/dp_query/nested_query.py create mode 100644 tensorflow_privacy/privacy/dp_query/nested_query_test.py create mode 100644 tensorflow_privacy/privacy/dp_query/no_privacy_query.py create mode 100644 tensorflow_privacy/privacy/dp_query/no_privacy_query_test.py create mode 100644 tensorflow_privacy/privacy/dp_query/normalized_query.py create mode 100644 tensorflow_privacy/privacy/dp_query/normalized_query_test.py create mode 100644 tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query.py create mode 100644 tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py create mode 100644 tensorflow_privacy/privacy/dp_query/test_utils.py create mode 100644 tensorflow_privacy/privacy/optimizers/__init__.py create mode 100644 tensorflow_privacy/privacy/optimizers/dp_optimizer.py create mode 100644 tensorflow_privacy/privacy/optimizers/dp_optimizer_eager_test.py create mode 100644 tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py create mode 100644 tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized.py create mode 100644 tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized_test.py create mode 100644 tensorflow_privacy/requirements.txt create mode 100644 tensorflow_privacy/research/README.md create mode 100644 tensorflow_privacy/research/pate_2017/README.md create mode 100644 tensorflow_privacy/research/pate_2017/__init__.py create mode 100644 tensorflow_privacy/research/pate_2017/aggregation.py create mode 100644 tensorflow_privacy/research/pate_2017/analysis.py create mode 100644 tensorflow_privacy/research/pate_2017/deep_cnn.py create mode 100644 tensorflow_privacy/research/pate_2017/input.py create mode 100644 tensorflow_privacy/research/pate_2017/metrics.py create mode 100644 tensorflow_privacy/research/pate_2017/train_student.py create mode 100644 tensorflow_privacy/research/pate_2017/train_student_mnist_250_lap_20_count_50_epochs_600.sh create mode 100644 tensorflow_privacy/research/pate_2017/train_teachers.py create mode 100644 tensorflow_privacy/research/pate_2017/utils.py create mode 100644 tensorflow_privacy/research/pate_2018/ICLR2018/README.md create mode 100644 tensorflow_privacy/research/pate_2018/ICLR2018/download.py create mode 100644 tensorflow_privacy/research/pate_2018/ICLR2018/generate_figures.sh create mode 100644 tensorflow_privacy/research/pate_2018/ICLR2018/generate_table.sh create mode 100644 tensorflow_privacy/research/pate_2018/ICLR2018/generate_table_data_independent.sh create mode 100644 tensorflow_privacy/research/pate_2018/ICLR2018/plot_ls_q.py create mode 100644 tensorflow_privacy/research/pate_2018/ICLR2018/plot_partition.py create mode 100644 tensorflow_privacy/research/pate_2018/ICLR2018/plots_for_slides.py create mode 100644 tensorflow_privacy/research/pate_2018/ICLR2018/rdp_bucketized.py create mode 100644 tensorflow_privacy/research/pate_2018/ICLR2018/rdp_cumulative.py create mode 100644 tensorflow_privacy/research/pate_2018/ICLR2018/smooth_sensitivity_table.py create mode 100644 tensorflow_privacy/research/pate_2018/ICLR2018/utility_queries_answered.py create mode 100644 tensorflow_privacy/research/pate_2018/README.md create mode 100644 tensorflow_privacy/research/pate_2018/core.py create mode 100644 tensorflow_privacy/research/pate_2018/core_test.py create mode 100644 tensorflow_privacy/research/pate_2018/smooth_sensitivity.py create mode 100644 tensorflow_privacy/research/pate_2018/smooth_sensitivity_test.py create mode 100644 tensorflow_privacy/setup.py create mode 100644 tensorflow_privacy/tutorials/README.md create mode 100644 tensorflow_privacy/tutorials/bolton_tutorial.py create mode 100644 tensorflow_privacy/tutorials/lm_dpsgd_tutorial.py create mode 100644 tensorflow_privacy/tutorials/mnist_dpsgd_tutorial.py create mode 100644 tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_eager.py create mode 100644 tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_keras.py create mode 100644 tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_vectorized.py create mode 100644 tensorflow_privacy/tutorials/mnist_lr_tutorial.py create mode 100644 tensorflow_privacy/tutorials/walkthrough/mnist_scratch.py create mode 100644 tensorflow_privacy/tutorials/walkthrough/walkthrough.md diff --git a/tensorflow_privacy/CONTRIBUTING.md b/tensorflow_privacy/CONTRIBUTING.md new file mode 100644 index 0000000..2dfad80 --- /dev/null +++ b/tensorflow_privacy/CONTRIBUTING.md @@ -0,0 +1,28 @@ +# How to Contribute + +We'd love to accept your patches and contributions to this project. There are +just a few small guidelines you need to follow. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement. You (or your employer) retain the copyright to your contribution; +this simply gives us permission to use and redistribute your contributions as +part of the project. Head over to to see +your current agreements on file or to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted one +(even if it was for a different project), you probably don't need to do it +again. + +## Code reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. + +## Community Guidelines + +This project follows Google's +[Open Source Community Guidelines](https://opensource.google.com/conduct/). diff --git a/tensorflow_privacy/LICENSE b/tensorflow_privacy/LICENSE new file mode 100644 index 0000000..0a849ed --- /dev/null +++ b/tensorflow_privacy/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018, The TensorFlow Privacy Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/tensorflow_privacy/README.md b/tensorflow_privacy/README.md new file mode 100644 index 0000000..1c8b009 --- /dev/null +++ b/tensorflow_privacy/README.md @@ -0,0 +1,113 @@ +# TensorFlow Privacy + +This repository contains the source code for TensorFlow Privacy, a Python +library that includes implementations of TensorFlow optimizers for training +machine learning models with differential privacy. The library comes with +tutorials and analysis tools for computing the privacy guarantees provided. + +The TensorFlow Privacy library is under continual development, always welcoming +contributions. In particular, we always welcome help towards resolving the +issues currently open. + +## Setting up TensorFlow Privacy + +### Dependencies + +This library uses [TensorFlow](https://www.tensorflow.org/) to define machine +learning models. Therefore, installing TensorFlow (>= 1.14) is a pre-requisite. +You can find instructions [here](https://www.tensorflow.org/install/). For +better performance, it is also recommended to install TensorFlow with GPU +support (detailed instructions on how to do this are available in the TensorFlow +installation documentation). + +In addition to TensorFlow and its dependencies, other prerequisites are: + + * `scipy` >= 0.17 + + * `mpmath` (for testing) + + * `tensorflow_datasets` (for the RNN tutorial `lm_dpsgd_tutorial.py` only) + +### Installing TensorFlow Privacy + +First, clone this GitHub repository into a directory of your choice: + +``` +git clone https://github.com/tensorflow/privacy +``` + +You can then install the local package in "editable" mode in order to add it to +your `PYTHONPATH`: + +``` +cd privacy +pip install -e . +``` + +If you'd like to make contributions, we recommend first forking the repository +and then cloning your fork rather than cloning this repository directly. + +## Contributing + +Contributions are welcomed! Bug fixes and new features can be initiated through +GitHub pull requests. To speed the code review process, we ask that: + +* When making code contributions to TensorFlow Privacy, you follow the `PEP8 + with two spaces` coding style (the same as the one used by TensorFlow) in + your pull requests. In most cases this can be done by running `autopep8 -i + --indent-size 2 ` on the files you have edited. + +* You should also check your code with pylint and TensorFlow's pylint + [configuration file](https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/tools/ci_build/pylintrc) + by running `pylint --rcfile=/path/to/the/tf/rcfile `. + +* When making your first pull request, you + [sign the Google CLA](https://cla.developers.google.com/clas) + +* We do not accept pull requests that add git submodules because of + [the problems that arise when maintaining git submodules](https://medium.com/@porteneuve/mastering-git-submodules-34c65e940407) + +## Tutorials directory + +To help you get started with the functionalities provided by this library, we +provide a detailed walkthrough [here](tutorials/walkthrough/walkthrough.md) that +will teach you how to wrap existing optimizers +(e.g., SGD, Adam, ...) into their differentially private counterparts using +TensorFlow (TF) Privacy. You will also learn how to tune the parameters +introduced by differentially private optimization and how to +measure the privacy guarantees provided using analysis tools included in TF +Privacy. + +In addition, the +`tutorials/` folder comes with scripts demonstrating how to use the library +features. The list of tutorials is described in the README included in the +tutorials directory. + +NOTE: the tutorials are maintained carefully. However, they are not considered +part of the API and they can change at any time without warning. You should not +write 3rd party code that imports the tutorials and expect that the interface +will not break. + +## Research directory + +This folder contains code to reproduce results from research papers related to +privacy in machine learning. It is not maintained as carefully as the tutorials +directory, but rather intended as a convenient archive. + +## Remarks + +The content of this repository supersedes the following existing folder in the +tensorflow/models [repository](https://github.com/tensorflow/models/tree/master/research/differential_privacy) + +## Contacts + +If you have any questions that cannot be addressed by raising an issue, feel +free to contact: + +* Galen Andrew (@galenmandrew) +* Steve Chien (@schien1729) +* Nicolas Papernot (@npapernot) + +## Copyright + +Copyright 2019 - Google LLC diff --git a/tensorflow_privacy/privacy/BUILD b/tensorflow_privacy/privacy/BUILD new file mode 100644 index 0000000..6efaad7 --- /dev/null +++ b/tensorflow_privacy/privacy/BUILD @@ -0,0 +1,21 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +py_library( + name = "privacy", + srcs = ["__init__.py"], + deps = [ + "//third_party/py/tensorflow_privacy/privacy/analysis:privacy_ledger", + "//third_party/py/tensorflow_privacy/privacy/analysis:rdp_accountant", + "//third_party/py/tensorflow_privacy/privacy/dp_query", + "//third_party/py/tensorflow_privacy/privacy/dp_query:gaussian_query", + "//third_party/py/tensorflow_privacy/privacy/dp_query:nested_query", + "//third_party/py/tensorflow_privacy/privacy/dp_query:no_privacy_query", + "//third_party/py/tensorflow_privacy/privacy/dp_query:normalized_query", + "//third_party/py/tensorflow_privacy/privacy/dp_query:quantile_adaptive_clip_sum_query", + "//third_party/py/tensorflow_privacy/privacy/optimizers:dp_optimizer", + ], +) diff --git a/tensorflow_privacy/privacy/__init__.py b/tensorflow_privacy/privacy/__init__.py new file mode 100644 index 0000000..de38736 --- /dev/null +++ b/tensorflow_privacy/privacy/__init__.py @@ -0,0 +1,56 @@ +# Copyright 2019, The TensorFlow Privacy Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""TensorFlow Privacy library.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + +# pylint: disable=g-import-not-at-top + +if hasattr(sys, 'skip_tf_privacy_import'): # Useful for standalone scripts. + pass +else: + from tensorflow_privacy.privacy.analysis.privacy_ledger import GaussianSumQueryEntry + from tensorflow_privacy.privacy.analysis.privacy_ledger import PrivacyLedger + from tensorflow_privacy.privacy.analysis.privacy_ledger import QueryWithLedger + from tensorflow_privacy.privacy.analysis.privacy_ledger import SampleEntry + + from tensorflow_privacy.privacy.dp_query.dp_query import DPQuery + from tensorflow_privacy.privacy.dp_query.gaussian_query import GaussianAverageQuery + from tensorflow_privacy.privacy.dp_query.gaussian_query import GaussianSumQuery + from tensorflow_privacy.privacy.dp_query.nested_query import NestedQuery + from tensorflow_privacy.privacy.dp_query.no_privacy_query import NoPrivacyAverageQuery + from tensorflow_privacy.privacy.dp_query.no_privacy_query import NoPrivacySumQuery + from tensorflow_privacy.privacy.dp_query.normalized_query import NormalizedQuery + from tensorflow_privacy.privacy.dp_query.quantile_adaptive_clip_sum_query import QuantileAdaptiveClipSumQuery + from tensorflow_privacy.privacy.dp_query.quantile_adaptive_clip_sum_query import QuantileAdaptiveClipAverageQuery + + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradGaussianOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdagradOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamGaussianOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer + from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentOptimizer + + try: + from tensorflow_privacy.privacy.bolt_on.models import BoltOnModel + from tensorflow_privacy.privacy.bolt_on.optimizers import BoltOn + from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexMixin + from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexBinaryCrossentropy + from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexHuber + except ImportError: + print('module `bolt_on` was not found in this version of TF Privacy') diff --git a/tensorflow_privacy/privacy/analysis/__init__.py b/tensorflow_privacy/privacy/analysis/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py new file mode 100644 index 0000000..296618b --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py @@ -0,0 +1,97 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Command-line script for computing privacy of a model trained with DP-SGD. + +The script applies the RDP accountant to estimate privacy budget of an iterated +Sampled Gaussian Mechanism. The mechanism's parameters are controlled by flags. + +Example: + compute_dp_sgd_privacy + --N=60000 \ + --batch_size=256 \ + --noise_multiplier=1.12 \ + --epochs=60 \ + --delta=1e-5 + +The output states that DP-SGD with these parameters satisfies (2.92, 1e-5)-DP. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import sys + +from absl import app +from absl import flags + +# Opting out of loading all sibling packages and their dependencies. +sys.skip_tf_privacy_import = True + +from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp # pylint: disable=g-import-not-at-top +from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent + +FLAGS = flags.FLAGS + +flags.DEFINE_integer('N', None, 'Total number of examples') +flags.DEFINE_integer('batch_size', None, 'Batch size') +flags.DEFINE_float('noise_multiplier', None, 'Noise multiplier for DP-SGD') +flags.DEFINE_float('epochs', None, 'Number of epochs (may be fractional)') +flags.DEFINE_float('delta', 1e-6, 'Target delta') + +flags.mark_flag_as_required('N') +flags.mark_flag_as_required('batch_size') +flags.mark_flag_as_required('noise_multiplier') +flags.mark_flag_as_required('epochs') + + +def apply_dp_sgd_analysis(q, sigma, steps, orders, delta): + """Compute and print results of DP-SGD analysis.""" + + # compute_rdp requires that sigma be the ratio of the standard deviation of + # the Gaussian noise to the l2-sensitivity of the function to which it is + # added. Hence, sigma here corresponds to the `noise_multiplier` parameter + # in the DP-SGD implementation found in privacy.optimizers.dp_optimizer + rdp = compute_rdp(q, sigma, steps, orders) + + eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta) + + print('DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated' + ' over {} steps satisfies'.format(100 * q, sigma, steps), end=' ') + print('differential privacy with eps = {:.3g} and delta = {}.'.format( + eps, delta)) + print('The optimal RDP order is {}.'.format(opt_order)) + + if opt_order == max(orders) or opt_order == min(orders): + print('The privacy estimate is likely to be improved by expanding ' + 'the set of orders.') + + +def main(argv): + del argv # argv is not used. + + q = FLAGS.batch_size / FLAGS.N # q - the sampling ratio. + if q > 1: + raise app.UsageError('N must be larger than the batch size.') + orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] + + list(range(5, 64)) + [128, 256, 512]) + steps = int(math.ceil(FLAGS.epochs * FLAGS.N / FLAGS.batch_size)) + + apply_dp_sgd_analysis(q, FLAGS.noise_multiplier, steps, orders, FLAGS.delta) + + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow_privacy/privacy/analysis/privacy_ledger.py b/tensorflow_privacy/privacy/analysis/privacy_ledger.py new file mode 100644 index 0000000..22eb1f0 --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/privacy_ledger.py @@ -0,0 +1,257 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PrivacyLedger class for keeping a record of private queries.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from distutils.version import LooseVersion +import numpy as np +import tensorflow as tf + +from tensorflow_privacy.privacy.analysis import tensor_buffer +from tensorflow_privacy.privacy.dp_query import dp_query + +if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): + nest = tf.contrib.framework.nest +else: + nest = tf.nest + +SampleEntry = collections.namedtuple( # pylint: disable=invalid-name + 'SampleEntry', ['population_size', 'selection_probability', 'queries']) + +GaussianSumQueryEntry = collections.namedtuple( # pylint: disable=invalid-name + 'GaussianSumQueryEntry', ['l2_norm_bound', 'noise_stddev']) + + +def format_ledger(sample_array, query_array): + """Converts array representation into a list of SampleEntries.""" + samples = [] + query_pos = 0 + sample_pos = 0 + for sample in sample_array: + population_size, selection_probability, num_queries = sample + queries = [] + for _ in range(int(num_queries)): + query = query_array[query_pos] + assert int(query[0]) == sample_pos + queries.append(GaussianSumQueryEntry(*query[1:])) + query_pos += 1 + samples.append(SampleEntry(population_size, selection_probability, queries)) + sample_pos += 1 + return samples + + +class PrivacyLedger(object): + """Class for keeping a record of private queries. + + The PrivacyLedger keeps a record of all queries executed over a given dataset + for the purpose of computing privacy guarantees. + """ + + def __init__(self, + population_size, + selection_probability): + """Initialize the PrivacyLedger. + + Args: + population_size: An integer (may be variable) specifying the size of the + population, i.e. size of the training data used in each epoch. + selection_probability: A float (may be variable) specifying the + probability each record is included in a sample. + + Raises: + ValueError: If selection_probability is 0. + """ + self._population_size = population_size + self._selection_probability = selection_probability + + if tf.executing_eagerly(): + if tf.equal(selection_probability, 0): + raise ValueError('Selection probability cannot be 0.') + init_capacity = tf.cast(tf.ceil(1 / selection_probability), tf.int32) + else: + if selection_probability == 0: + raise ValueError('Selection probability cannot be 0.') + init_capacity = np.int(np.ceil(1 / selection_probability)) + + # The query buffer stores rows corresponding to GaussianSumQueryEntries. + self._query_buffer = tensor_buffer.TensorBuffer( + init_capacity, [3], tf.float32, 'query') + self._sample_var = tf.Variable( + initial_value=tf.zeros([3]), trainable=False, name='sample') + + # The sample buffer stores rows corresponding to SampleEntries. + self._sample_buffer = tensor_buffer.TensorBuffer( + init_capacity, [3], tf.float32, 'sample') + self._sample_count = tf.Variable( + initial_value=0.0, trainable=False, name='sample_count') + self._query_count = tf.Variable( + initial_value=0.0, trainable=False, name='query_count') + try: + # Newer versions of TF + self._cs = tf.CriticalSection() + except AttributeError: + # Older versions of TF + self._cs = tf.contrib.framework.CriticalSection() + + def record_sum_query(self, l2_norm_bound, noise_stddev): + """Records that a query was issued. + + Args: + l2_norm_bound: The maximum l2 norm of the tensor group in the query. + noise_stddev: The standard deviation of the noise applied to the sum. + + Returns: + An operation recording the sum query to the ledger. + """ + + def _do_record_query(): + with tf.control_dependencies( + [tf.assign(self._query_count, self._query_count + 1)]): + return self._query_buffer.append( + [self._sample_count, l2_norm_bound, noise_stddev]) + + return self._cs.execute(_do_record_query) + + def finalize_sample(self): + """Finalizes sample and records sample ledger entry.""" + with tf.control_dependencies([ + tf.assign(self._sample_var, [ + self._population_size, self._selection_probability, + self._query_count + ]) + ]): + with tf.control_dependencies([ + tf.assign(self._sample_count, self._sample_count + 1), + tf.assign(self._query_count, 0) + ]): + return self._sample_buffer.append(self._sample_var) + + def get_unformatted_ledger(self): + return self._sample_buffer.values, self._query_buffer.values + + def get_formatted_ledger(self, sess): + """Gets the formatted query ledger. + + Args: + sess: The tensorflow session in which the ledger was created. + + Returns: + The query ledger as a list of SampleEntries. + """ + sample_array = sess.run(self._sample_buffer.values) + query_array = sess.run(self._query_buffer.values) + + return format_ledger(sample_array, query_array) + + def get_formatted_ledger_eager(self): + """Gets the formatted query ledger. + + Returns: + The query ledger as a list of SampleEntries. + """ + sample_array = self._sample_buffer.values.numpy() + query_array = self._query_buffer.values.numpy() + + return format_ledger(sample_array, query_array) + + +class QueryWithLedger(dp_query.DPQuery): + """A class for DP queries that record events to a PrivacyLedger. + + QueryWithLedger should be the top-level query in a structure of queries that + may include sum queries, nested queries, etc. It should simply wrap another + query and contain a reference to the ledger. Any contained queries (including + those contained in the leaves of a nested query) should also contain a + reference to the same ledger object. + + For example usage, see privacy_ledger_test.py. + """ + + def __init__(self, query, + population_size=None, selection_probability=None, + ledger=None): + """Initializes the QueryWithLedger. + + Args: + query: The query whose events should be recorded to the ledger. Any + subqueries (including those in the leaves of a nested query) should also + contain a reference to the same ledger given here. + population_size: An integer (may be variable) specifying the size of the + population, i.e. size of the training data used in each epoch. May be + None if `ledger` is specified. + selection_probability: A float (may be variable) specifying the + probability each record is included in a sample. May be None if `ledger` + is specified. + ledger: A PrivacyLedger to use. Must be specified if either of + `population_size` or `selection_probability` is None. + """ + self._query = query + if population_size is not None and selection_probability is not None: + self.set_ledger(PrivacyLedger(population_size, selection_probability)) + elif ledger is not None: + self.set_ledger(ledger) + else: + raise ValueError('One of (population_size, selection_probability) or ' + 'ledger must be specified.') + + @property + def ledger(self): + return self._ledger + + def set_ledger(self, ledger): + self._ledger = ledger + self._query.set_ledger(ledger) + + def initial_global_state(self): + """See base class.""" + return self._query.initial_global_state() + + def derive_sample_params(self, global_state): + """See base class.""" + return self._query.derive_sample_params(global_state) + + def initial_sample_state(self, template): + """See base class.""" + return self._query.initial_sample_state(template) + + def preprocess_record(self, params, record): + """See base class.""" + return self._query.preprocess_record(params, record) + + def accumulate_preprocessed_record(self, sample_state, preprocessed_record): + """See base class.""" + return self._query.accumulate_preprocessed_record( + sample_state, preprocessed_record) + + def merge_sample_states(self, sample_state_1, sample_state_2): + """See base class.""" + return self._query.merge_sample_states(sample_state_1, sample_state_2) + + def get_noised_result(self, sample_state, global_state): + """Ensures sample is recorded to the ledger and returns noised result.""" + # Ensure sample_state is fully aggregated before calling get_noised_result. + with tf.control_dependencies(nest.flatten(sample_state)): + result, new_global_state = self._query.get_noised_result( + sample_state, global_state) + # Ensure inner queries have recorded before finalizing. + with tf.control_dependencies(nest.flatten(result)): + finalize = self._ledger.finalize_sample() + # Ensure finalizing happens. + with tf.control_dependencies([finalize]): + return nest.map_structure(tf.identity, result), new_global_state diff --git a/tensorflow_privacy/privacy/analysis/privacy_ledger_test.py b/tensorflow_privacy/privacy/analysis/privacy_ledger_test.py new file mode 100644 index 0000000..4407ad2 --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/privacy_ledger_test.py @@ -0,0 +1,137 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for PrivacyLedger.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from tensorflow_privacy.privacy.analysis import privacy_ledger +from tensorflow_privacy.privacy.dp_query import gaussian_query +from tensorflow_privacy.privacy.dp_query import nested_query +from tensorflow_privacy.privacy.dp_query import test_utils + +tf.enable_eager_execution() + + +class PrivacyLedgerTest(tf.test.TestCase): + + def test_fail_on_probability_zero(self): + with self.assertRaisesRegexp(ValueError, + 'Selection probability cannot be 0.'): + privacy_ledger.PrivacyLedger(10, 0) + + def test_basic(self): + ledger = privacy_ledger.PrivacyLedger(10, 0.1) + ledger.record_sum_query(5.0, 1.0) + ledger.record_sum_query(2.0, 0.5) + + ledger.finalize_sample() + + expected_queries = [[5.0, 1.0], [2.0, 0.5]] + formatted = ledger.get_formatted_ledger_eager() + + sample = formatted[0] + self.assertAllClose(sample.population_size, 10.0) + self.assertAllClose(sample.selection_probability, 0.1) + self.assertAllClose(sorted(sample.queries), sorted(expected_queries)) + + def test_sum_query(self): + record1 = tf.constant([2.0, 0.0]) + record2 = tf.constant([-1.0, 1.0]) + + population_size = tf.Variable(0) + selection_probability = tf.Variable(1.0) + + query = gaussian_query.GaussianSumQuery( + l2_norm_clip=10.0, stddev=0.0) + query = privacy_ledger.QueryWithLedger( + query, population_size, selection_probability) + + # First sample. + tf.assign(population_size, 10) + tf.assign(selection_probability, 0.1) + test_utils.run_query(query, [record1, record2]) + + expected_queries = [[10.0, 0.0]] + formatted = query.ledger.get_formatted_ledger_eager() + sample_1 = formatted[0] + self.assertAllClose(sample_1.population_size, 10.0) + self.assertAllClose(sample_1.selection_probability, 0.1) + self.assertAllClose(sample_1.queries, expected_queries) + + # Second sample. + tf.assign(population_size, 20) + tf.assign(selection_probability, 0.2) + test_utils.run_query(query, [record1, record2]) + + formatted = query.ledger.get_formatted_ledger_eager() + sample_1, sample_2 = formatted + self.assertAllClose(sample_1.population_size, 10.0) + self.assertAllClose(sample_1.selection_probability, 0.1) + self.assertAllClose(sample_1.queries, expected_queries) + + self.assertAllClose(sample_2.population_size, 20.0) + self.assertAllClose(sample_2.selection_probability, 0.2) + self.assertAllClose(sample_2.queries, expected_queries) + + def test_nested_query(self): + population_size = tf.Variable(0) + selection_probability = tf.Variable(1.0) + + query1 = gaussian_query.GaussianAverageQuery( + l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0) + query2 = gaussian_query.GaussianAverageQuery( + l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0) + + query = nested_query.NestedQuery([query1, query2]) + query = privacy_ledger.QueryWithLedger( + query, population_size, selection_probability) + + record1 = [1.0, [12.0, 9.0]] + record2 = [5.0, [1.0, 2.0]] + + # First sample. + tf.assign(population_size, 10) + tf.assign(selection_probability, 0.1) + test_utils.run_query(query, [record1, record2]) + + expected_queries = [[4.0, 2.0], [5.0, 1.0]] + formatted = query.ledger.get_formatted_ledger_eager() + sample_1 = formatted[0] + self.assertAllClose(sample_1.population_size, 10.0) + self.assertAllClose(sample_1.selection_probability, 0.1) + self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) + + # Second sample. + tf.assign(population_size, 20) + tf.assign(selection_probability, 0.2) + test_utils.run_query(query, [record1, record2]) + + formatted = query.ledger.get_formatted_ledger_eager() + sample_1, sample_2 = formatted + self.assertAllClose(sample_1.population_size, 10.0) + self.assertAllClose(sample_1.selection_probability, 0.1) + self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) + + self.assertAllClose(sample_2.population_size, 20.0) + self.assertAllClose(sample_2.selection_probability, 0.2) + self.assertAllClose(sorted(sample_2.queries), sorted(expected_queries)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_accountant.py new file mode 100644 index 0000000..195b91e --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant.py @@ -0,0 +1,318 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""RDP analysis of the Sampled Gaussian Mechanism. + +Functionality for computing Renyi differential privacy (RDP) of an additive +Sampled Gaussian Mechanism (SGM). Its public interface consists of two methods: + compute_rdp(q, noise_multiplier, T, orders) computes RDP for SGM iterated + T times. + get_privacy_spent(orders, rdp, target_eps, target_delta) computes delta + (or eps) given RDP at multiple orders and + a target value for eps (or delta). + +Example use: + +Suppose that we have run an SGM applied to a function with l2-sensitivity 1. +Its parameters are given as a list of tuples (q1, sigma1, T1), ..., +(qk, sigma_k, Tk), and we wish to compute eps for a given delta. +The example code would be: + + max_order = 32 + orders = range(2, max_order + 1) + rdp = np.zeros_like(orders, dtype=float) + for q, sigma, T in parameters: + rdp += rdp_accountant.compute_rdp(q, sigma, T, orders) + eps, _, opt_order = rdp_accountant.get_privacy_spent(rdp, target_delta=delta) +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import sys + +import numpy as np +from scipy import special +import six + +######################## +# LOG-SPACE ARITHMETIC # +######################## + + +def _log_add(logx, logy): + """Add two numbers in the log space.""" + a, b = min(logx, logy), max(logx, logy) + if a == -np.inf: # adding 0 + return b + # Use exp(a) + exp(b) = (exp(a - b) + 1) * exp(b) + return math.log1p(math.exp(a - b)) + b # log1p(x) = log(x + 1) + + +def _log_sub(logx, logy): + """Subtract two numbers in the log space. Answer must be non-negative.""" + if logx < logy: + raise ValueError("The result of subtraction must be non-negative.") + if logy == -np.inf: # subtracting 0 + return logx + if logx == logy: + return -np.inf # 0 is represented as -np.inf in the log space. + + try: + # Use exp(x) - exp(y) = (exp(x - y) - 1) * exp(y). + return math.log(math.expm1(logx - logy)) + logy # expm1(x) = exp(x) - 1 + except OverflowError: + return logx + + +def _log_print(logx): + """Pretty print.""" + if logx < math.log(sys.float_info.max): + return "{}".format(math.exp(logx)) + else: + return "exp({})".format(logx) + + +def _compute_log_a_int(q, sigma, alpha): + """Compute log(A_alpha) for integer alpha. 0 < q < 1.""" + assert isinstance(alpha, six.integer_types) + + # Initialize with 0 in the log space. + log_a = -np.inf + + for i in range(alpha + 1): + log_coef_i = ( + math.log(special.binom(alpha, i)) + i * math.log(q) + + (alpha - i) * math.log(1 - q)) + + s = log_coef_i + (i * i - i) / (2 * (sigma**2)) + log_a = _log_add(log_a, s) + + return float(log_a) + + +def _compute_log_a_frac(q, sigma, alpha): + """Compute log(A_alpha) for fractional alpha. 0 < q < 1.""" + # The two parts of A_alpha, integrals over (-inf,z0] and [z0, +inf), are + # initialized to 0 in the log space: + log_a0, log_a1 = -np.inf, -np.inf + i = 0 + + z0 = sigma**2 * math.log(1 / q - 1) + .5 + + while True: # do ... until loop + coef = special.binom(alpha, i) + log_coef = math.log(abs(coef)) + j = alpha - i + + log_t0 = log_coef + i * math.log(q) + j * math.log(1 - q) + log_t1 = log_coef + j * math.log(q) + i * math.log(1 - q) + + log_e0 = math.log(.5) + _log_erfc((i - z0) / (math.sqrt(2) * sigma)) + log_e1 = math.log(.5) + _log_erfc((z0 - j) / (math.sqrt(2) * sigma)) + + log_s0 = log_t0 + (i * i - i) / (2 * (sigma**2)) + log_e0 + log_s1 = log_t1 + (j * j - j) / (2 * (sigma**2)) + log_e1 + + if coef > 0: + log_a0 = _log_add(log_a0, log_s0) + log_a1 = _log_add(log_a1, log_s1) + else: + log_a0 = _log_sub(log_a0, log_s0) + log_a1 = _log_sub(log_a1, log_s1) + + i += 1 + if max(log_s0, log_s1) < -30: + break + + return _log_add(log_a0, log_a1) + + +def _compute_log_a(q, sigma, alpha): + """Compute log(A_alpha) for any positive finite alpha.""" + if float(alpha).is_integer(): + return _compute_log_a_int(q, sigma, int(alpha)) + else: + return _compute_log_a_frac(q, sigma, alpha) + + +def _log_erfc(x): + """Compute log(erfc(x)) with high accuracy for large x.""" + try: + return math.log(2) + special.log_ndtr(-x * 2**.5) + except NameError: + # If log_ndtr is not available, approximate as follows: + r = special.erfc(x) + if r == 0.0: + # Using the Laurent series at infinity for the tail of the erfc function: + # erfc(x) ~ exp(-x^2-.5/x^2+.625/x^4)/(x*pi^.5) + # To verify in Mathematica: + # Series[Log[Erfc[x]] + Log[x] + Log[Pi]/2 + x^2, {x, Infinity, 6}] + return (-math.log(math.pi) / 2 - math.log(x) - x**2 - .5 * x**-2 + + .625 * x**-4 - 37. / 24. * x**-6 + 353. / 64. * x**-8) + else: + return math.log(r) + + +def _compute_delta(orders, rdp, eps): + """Compute delta given a list of RDP values and target epsilon. + + Args: + orders: An array (or a scalar) of orders. + rdp: A list (or a scalar) of RDP guarantees. + eps: The target epsilon. + + Returns: + Pair of (delta, optimal_order). + + Raises: + ValueError: If input is malformed. + + """ + orders_vec = np.atleast_1d(orders) + rdp_vec = np.atleast_1d(rdp) + + if len(orders_vec) != len(rdp_vec): + raise ValueError("Input lists must have the same length.") + + deltas = np.exp((rdp_vec - eps) * (orders_vec - 1)) + idx_opt = np.argmin(deltas) + return min(deltas[idx_opt], 1.), orders_vec[idx_opt] + + +def _compute_eps(orders, rdp, delta): + """Compute epsilon given a list of RDP values and target delta. + + Args: + orders: An array (or a scalar) of orders. + rdp: A list (or a scalar) of RDP guarantees. + delta: The target delta. + + Returns: + Pair of (eps, optimal_order). + + Raises: + ValueError: If input is malformed. + + """ + orders_vec = np.atleast_1d(orders) + rdp_vec = np.atleast_1d(rdp) + + if len(orders_vec) != len(rdp_vec): + raise ValueError("Input lists must have the same length.") + + eps = rdp_vec - math.log(delta) / (orders_vec - 1) + + idx_opt = np.nanargmin(eps) # Ignore NaNs + return eps[idx_opt], orders_vec[idx_opt] + + +def _compute_rdp(q, sigma, alpha): + """Compute RDP of the Sampled Gaussian mechanism at order alpha. + + Args: + q: The sampling rate. + sigma: The std of the additive Gaussian noise. + alpha: The order at which RDP is computed. + + Returns: + RDP at alpha, can be np.inf. + """ + if q == 0: + return 0 + + if q == 1.: + return alpha / (2 * sigma**2) + + if np.isinf(alpha): + return np.inf + + return _compute_log_a(q, sigma, alpha) / (alpha - 1) + + +def compute_rdp(q, noise_multiplier, steps, orders): + """Compute RDP of the Sampled Gaussian Mechanism. + + Args: + q: The sampling rate. + noise_multiplier: The ratio of the standard deviation of the Gaussian noise + to the l2-sensitivity of the function to which it is added. + steps: The number of steps. + orders: An array (or a scalar) of RDP orders. + + Returns: + The RDPs at all orders, can be np.inf. + """ + if np.isscalar(orders): + rdp = _compute_rdp(q, noise_multiplier, orders) + else: + rdp = np.array([_compute_rdp(q, noise_multiplier, order) + for order in orders]) + + return rdp * steps + + +def get_privacy_spent(orders, rdp, target_eps=None, target_delta=None): + """Compute delta (or eps) for given eps (or delta) from RDP values. + + Args: + orders: An array (or a scalar) of RDP orders. + rdp: An array of RDP values. Must be of the same length as the orders list. + target_eps: If not None, the epsilon for which we compute the corresponding + delta. + target_delta: If not None, the delta for which we compute the corresponding + epsilon. Exactly one of target_eps and target_delta must be None. + + Returns: + eps, delta, opt_order. + + Raises: + ValueError: If target_eps and target_delta are messed up. + """ + if target_eps is None and target_delta is None: + raise ValueError( + "Exactly one out of eps and delta must be None. (Both are).") + + if target_eps is not None and target_delta is not None: + raise ValueError( + "Exactly one out of eps and delta must be None. (None is).") + + if target_eps is not None: + delta, opt_order = _compute_delta(orders, rdp, target_eps) + return target_eps, delta, opt_order + else: + eps, opt_order = _compute_eps(orders, rdp, target_delta) + return eps, target_delta, opt_order + + +def compute_rdp_from_ledger(ledger, orders): + """Compute RDP of Sampled Gaussian Mechanism from ledger. + + Args: + ledger: A formatted privacy ledger. + orders: An array (or a scalar) of RDP orders. + + Returns: + RDP at all orders, can be np.inf. + """ + total_rdp = np.zeros_like(orders, dtype=float) + for sample in ledger: + # Compute equivalent z from l2_clip_bounds and noise stddevs in sample. + # See https://arxiv.org/pdf/1812.06210.pdf for derivation of this formula. + effective_z = sum([ + (q.noise_stddev / q.l2_norm_bound)**-2 for q in sample.queries])**-0.5 + total_rdp += compute_rdp( + sample.selection_probability, effective_z, 1, orders) + return total_rdp diff --git a/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py new file mode 100644 index 0000000..acc46a8 --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/rdp_accountant_test.py @@ -0,0 +1,177 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for rdp_accountant.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + +from absl.testing import absltest +from absl.testing import parameterized +from mpmath import exp +from mpmath import inf +from mpmath import log +from mpmath import npdf +from mpmath import quad +import numpy as np + +from tensorflow_privacy.privacy.analysis import privacy_ledger +from tensorflow_privacy.privacy.analysis import rdp_accountant + + +class TestGaussianMoments(parameterized.TestCase): + ################################# + # HELPER FUNCTIONS: # + # Exact computations using # + # multi-precision arithmetic. # + ################################# + + def _log_float_mp(self, x): + # Convert multi-precision input to float log space. + if x >= sys.float_info.min: + return float(log(x)) + else: + return -np.inf + + def _integral_mp(self, fn, bounds=(-inf, inf)): + integral, _ = quad(fn, bounds, error=True, maxdegree=8) + return integral + + def _distributions_mp(self, sigma, q): + + def _mu0(x): + return npdf(x, mu=0, sigma=sigma) + + def _mu1(x): + return npdf(x, mu=1, sigma=sigma) + + def _mu(x): + return (1 - q) * _mu0(x) + q * _mu1(x) + + return _mu0, _mu # Closure! + + def _mu1_over_mu0(self, x, sigma): + # Closed-form expression for N(1, sigma^2) / N(0, sigma^2) at x. + return exp((2 * x - 1) / (2 * sigma**2)) + + def _mu_over_mu0(self, x, q, sigma): + return (1 - q) + q * self._mu1_over_mu0(x, sigma) + + def _compute_a_mp(self, sigma, q, alpha): + """Compute A_alpha for arbitrary alpha by numerical integration.""" + mu0, _ = self._distributions_mp(sigma, q) + a_alpha_fn = lambda z: mu0(z) * self._mu_over_mu0(z, q, sigma)**alpha + a_alpha = self._integral_mp(a_alpha_fn) + return a_alpha + + # TEST ROUTINES + def test_compute_rdp_no_data(self): + # q = 0 + self.assertEqual(rdp_accountant.compute_rdp(0, 10, 1, 20), 0) + + def test_compute_rdp_no_sampling(self): + # q = 1, RDP = alpha/2 * sigma^2 + self.assertEqual(rdp_accountant.compute_rdp(1, 10, 1, 20), 0.1) + + def test_compute_rdp_scalar(self): + rdp_scalar = rdp_accountant.compute_rdp(0.1, 2, 10, 5) + self.assertAlmostEqual(rdp_scalar, 0.07737, places=5) + + def test_compute_rdp_sequence(self): + rdp_vec = rdp_accountant.compute_rdp(0.01, 2.5, 50, + [1.5, 2.5, 5, 50, 100, np.inf]) + self.assertSequenceAlmostEqual( + rdp_vec, [0.00065, 0.001085, 0.00218075, 0.023846, 167.416307, np.inf], + delta=1e-5) + + params = ({'q': 1e-7, 'sigma': .1, 'order': 1.01}, + {'q': 1e-6, 'sigma': .1, 'order': 256}, + {'q': 1e-5, 'sigma': .1, 'order': 256.1}, + {'q': 1e-6, 'sigma': 1, 'order': 27}, + {'q': 1e-4, 'sigma': 1., 'order': 1.5}, + {'q': 1e-3, 'sigma': 1., 'order': 2}, + {'q': .01, 'sigma': 10, 'order': 20}, + {'q': .1, 'sigma': 100, 'order': 20.5}, + {'q': .99, 'sigma': .1, 'order': 256}, + {'q': .999, 'sigma': 100, 'order': 256.1}) + + # pylint:disable=undefined-variable + @parameterized.parameters(p for p in params) + def test_compute_log_a_equals_mp(self, q, sigma, order): + # Compare the cheap computation of log(A) with an expensive, multi-precision + # computation. + log_a = rdp_accountant._compute_log_a(q, sigma, order) + log_a_mp = self._log_float_mp(self._compute_a_mp(sigma, q, order)) + np.testing.assert_allclose(log_a, log_a_mp, rtol=1e-4) + + def test_get_privacy_spent_check_target_delta(self): + orders = range(2, 33) + rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders) + eps, _, opt_order = rdp_accountant.get_privacy_spent( + orders, rdp, target_delta=1e-5) + self.assertAlmostEqual(eps, 1.258575, places=5) + self.assertEqual(opt_order, 20) + + def test_get_privacy_spent_check_target_eps(self): + orders = range(2, 33) + rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders) + _, delta, opt_order = rdp_accountant.get_privacy_spent( + orders, rdp, target_eps=1.258575) + self.assertAlmostEqual(delta, 1e-5) + self.assertEqual(opt_order, 20) + + def test_check_composition(self): + orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14., + 16., 20., 24., 28., 32., 64., 256.) + + rdp = rdp_accountant.compute_rdp(q=1e-4, + noise_multiplier=.4, + steps=40000, + orders=orders) + + eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp, + target_delta=1e-6) + + rdp += rdp_accountant.compute_rdp(q=0.1, + noise_multiplier=2, + steps=100, + orders=orders) + eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp, + target_delta=1e-5) + self.assertAlmostEqual(eps, 8.509656, places=5) + self.assertEqual(opt_order, 2.5) + + def test_compute_rdp_from_ledger(self): + orders = range(2, 33) + q = 0.1 + n = 1000 + l2_norm_clip = 3.14159 + noise_stddev = 2.71828 + steps = 3 + + query_entry = privacy_ledger.GaussianSumQueryEntry( + l2_norm_clip, noise_stddev) + ledger = [privacy_ledger.SampleEntry(n, q, [query_entry])] * steps + + z = noise_stddev / l2_norm_clip + rdp = rdp_accountant.compute_rdp(q, z, steps, orders) + rdp_from_ledger = rdp_accountant.compute_rdp_from_ledger(ledger, orders) + self.assertSequenceAlmostEqual(rdp, rdp_from_ledger) + + +if __name__ == '__main__': + absltest.main() diff --git a/tensorflow_privacy/privacy/analysis/tensor_buffer.py b/tensorflow_privacy/privacy/analysis/tensor_buffer.py new file mode 100644 index 0000000..a0cf665 --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/tensor_buffer.py @@ -0,0 +1,134 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A lightweight buffer for maintaining tensors.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + + +class TensorBuffer(object): + """A lightweight buffer for maintaining lists. + + The TensorBuffer accumulates tensors of the given shape into a tensor (whose + rank is one more than that of the given shape) via calls to `append`. The + current value of the accumulated tensor can be extracted via the property + `values`. + """ + + def __init__(self, capacity, shape, dtype=tf.int32, name=None): + """Initializes the TensorBuffer. + + Args: + capacity: Initial capacity. Buffer will double in capacity each time it is + filled to capacity. + shape: The shape (as tuple or list) of the tensors to accumulate. + dtype: The type of the tensors. + name: A string name for the variable_scope used. + + Raises: + ValueError: If the shape is empty (specifies scalar shape). + """ + shape = list(shape) + self._rank = len(shape) + self._name = name + self._dtype = dtype + if not self._rank: + raise ValueError('Shape cannot be scalar.') + shape = [capacity] + shape + + with tf.variable_scope(self._name): + # We need to use a placeholder as the initial value to allow resizing. + self._buffer = tf.Variable( + initial_value=tf.placeholder_with_default( + tf.zeros(shape, dtype), shape=None), + trainable=False, + name='buffer', + use_resource=True) + self._current_size = tf.Variable( + initial_value=0, dtype=tf.int32, trainable=False, name='current_size') + self._capacity = tf.Variable( + initial_value=capacity, + dtype=tf.int32, + trainable=False, + name='capacity') + + def append(self, value): + """Appends a new tensor to the end of the buffer. + + Args: + value: The tensor to append. Must match the shape specified in the + initializer. + + Returns: + An op appending the new tensor to the end of the buffer. + """ + + def _double_capacity(): + """Doubles the capacity of the current tensor buffer.""" + padding = tf.zeros_like(self._buffer, self._buffer.dtype) + new_buffer = tf.concat([self._buffer, padding], axis=0) + if tf.executing_eagerly(): + with tf.variable_scope(self._name, reuse=True): + self._buffer = tf.get_variable( + name='buffer', + dtype=self._dtype, + initializer=new_buffer, + trainable=False) + return self._buffer, tf.assign(self._capacity, + tf.multiply(self._capacity, 2)) + else: + return tf.assign( + self._buffer, new_buffer, + validate_shape=False), tf.assign(self._capacity, + tf.multiply(self._capacity, 2)) + + update_buffer, update_capacity = tf.cond( + tf.equal(self._current_size, self._capacity), + _double_capacity, lambda: (self._buffer, self._capacity)) + + with tf.control_dependencies([update_buffer, update_capacity]): + with tf.control_dependencies([ + tf.assert_less( + self._current_size, + self._capacity, + message='Appending past end of TensorBuffer.'), + tf.assert_equal( + tf.shape(value), + tf.shape(self._buffer)[1:], + message='Appending value of inconsistent shape.') + ]): + with tf.control_dependencies( + [tf.assign(self._buffer[self._current_size, :], value)]): + return tf.assign_add(self._current_size, 1) + + @property + def values(self): + """Returns the accumulated tensor.""" + begin_value = tf.zeros([self._rank + 1], dtype=tf.int32) + value_size = tf.concat([[self._current_size], + tf.constant(-1, tf.int32, [self._rank])], 0) + return tf.slice(self._buffer, begin_value, value_size) + + @property + def current_size(self): + """Returns the current number of tensors in the buffer.""" + return self._current_size + + @property + def capacity(self): + """Returns the current capacity of the buffer.""" + return self._capacity diff --git a/tensorflow_privacy/privacy/analysis/tensor_buffer_test_eager.py b/tensorflow_privacy/privacy/analysis/tensor_buffer_test_eager.py new file mode 100644 index 0000000..ef01910 --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/tensor_buffer_test_eager.py @@ -0,0 +1,84 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for tensor_buffer in eager mode.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from tensorflow_privacy.privacy.analysis import tensor_buffer + +tf.enable_eager_execution() + + +class TensorBufferTest(tf.test.TestCase): + """Tests for TensorBuffer in eager mode.""" + + def test_basic(self): + size, shape = 2, [2, 3] + + my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') + + value1 = [[1, 2, 3], [4, 5, 6]] + my_buffer.append(value1) + self.assertAllEqual(my_buffer.values.numpy(), [value1]) + + value2 = [[4, 5, 6], [7, 8, 9]] + my_buffer.append(value2) + self.assertAllEqual(my_buffer.values.numpy(), [value1, value2]) + + def test_fail_on_scalar(self): + with self.assertRaisesRegexp(ValueError, 'Shape cannot be scalar.'): + tensor_buffer.TensorBuffer(1, ()) + + def test_fail_on_inconsistent_shape(self): + size, shape = 1, [2, 3] + + my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') + + with self.assertRaisesRegexp( + tf.errors.InvalidArgumentError, + 'Appending value of inconsistent shape.'): + my_buffer.append(tf.ones(shape=[3, 4], dtype=tf.int32)) + + def test_resize(self): + size, shape = 2, [2, 3] + + my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') + + # Append three buffers. Third one should succeed after resizing. + value1 = [[1, 2, 3], [4, 5, 6]] + my_buffer.append(value1) + self.assertAllEqual(my_buffer.values.numpy(), [value1]) + self.assertAllEqual(my_buffer.current_size.numpy(), 1) + self.assertAllEqual(my_buffer.capacity.numpy(), 2) + + value2 = [[4, 5, 6], [7, 8, 9]] + my_buffer.append(value2) + self.assertAllEqual(my_buffer.values.numpy(), [value1, value2]) + self.assertAllEqual(my_buffer.current_size.numpy(), 2) + self.assertAllEqual(my_buffer.capacity.numpy(), 2) + + value3 = [[7, 8, 9], [10, 11, 12]] + my_buffer.append(value3) + self.assertAllEqual(my_buffer.values.numpy(), [value1, value2, value3]) + self.assertAllEqual(my_buffer.current_size.numpy(), 3) + # Capacity should have doubled. + self.assertAllEqual(my_buffer.capacity.numpy(), 4) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_privacy/privacy/analysis/tensor_buffer_test_graph.py b/tensorflow_privacy/privacy/analysis/tensor_buffer_test_graph.py new file mode 100644 index 0000000..5a66ec6 --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/tensor_buffer_test_graph.py @@ -0,0 +1,72 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for tensor_buffer in graph mode.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from tensorflow_privacy.privacy.analysis import tensor_buffer + + +class TensorBufferTest(tf.test.TestCase): + """Tests for TensorBuffer in graph mode.""" + + def test_noresize(self): + """Test buffer does not resize if capacity is not exceeded.""" + with self.cached_session() as sess: + size, shape = 2, [2, 3] + + my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') + value1 = [[1, 2, 3], [4, 5, 6]] + with tf.control_dependencies([my_buffer.append(value1)]): + value2 = [[7, 8, 9], [10, 11, 12]] + with tf.control_dependencies([my_buffer.append(value2)]): + values = my_buffer.values + current_size = my_buffer.current_size + capacity = my_buffer.capacity + self.evaluate(tf.global_variables_initializer()) + + v, cs, cap = sess.run([values, current_size, capacity]) + self.assertAllEqual(v, [value1, value2]) + self.assertEqual(cs, 2) + self.assertEqual(cap, 2) + + def test_resize(self): + """Test buffer resizes if capacity is exceeded.""" + with self.cached_session() as sess: + size, shape = 2, [2, 3] + + my_buffer = tensor_buffer.TensorBuffer(size, shape, name='my_buffer') + value1 = [[1, 2, 3], [4, 5, 6]] + with tf.control_dependencies([my_buffer.append(value1)]): + value2 = [[7, 8, 9], [10, 11, 12]] + with tf.control_dependencies([my_buffer.append(value2)]): + value3 = [[13, 14, 15], [16, 17, 18]] + with tf.control_dependencies([my_buffer.append(value3)]): + values = my_buffer.values + current_size = my_buffer.current_size + capacity = my_buffer.capacity + self.evaluate(tf.global_variables_initializer()) + + v, cs, cap = sess.run([values, current_size, capacity]) + self.assertAllEqual(v, [value1, value2, value3]) + self.assertEqual(cs, 3) + self.assertEqual(cap, 4) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_privacy/privacy/bolt_on/README.md b/tensorflow_privacy/privacy/bolt_on/README.md new file mode 100644 index 0000000..1eb9a6a --- /dev/null +++ b/tensorflow_privacy/privacy/bolt_on/README.md @@ -0,0 +1,67 @@ +# BoltOn Subpackage + +This package contains source code for the BoltOn method, a particular +differential-privacy (DP) technique that uses output perturbations and +leverages additional assumptions to provide a new way of approaching the +privacy guarantees. + +## BoltOn Description + +This method uses 4 key steps to achieve privacy guarantees: + 1. Adds noise to weights after training (output perturbation). + 2. Projects weights to R, the radius of the hypothesis space, + after each batch. This value is configurable by the user. + 3. Limits learning rate + 4. Uses a strongly convex loss function (see compile) + +For more details on the strong convexity requirements, see: +Bolt-on Differential Privacy for Scalable Stochastic Gradient +Descent-based Analytics by Xi Wu et al. at https://arxiv.org/pdf/1606.04722.pdf + +## Why BoltOn? + +The major difference for the BoltOn method is that it injects noise post model +convergence, rather than noising gradients or weights during training. This +approach requires some additional constraints listed in the Description. +Should the use-case and model satisfy these constraints, this is another +approach that can be trained to maximize utility while maintaining the privacy. +The paper describes in detail the advantages and disadvantages of this approach +and its results compared to some other methods, namely noising at each iteration +and no noising. + +## Tutorials + +This package has a tutorial that can be found in the root tutorials directory, +under `bolton_tutorial.py`. + +## Contribution + +This package was initially contributed by Georgian Partners with the hope of +growing the tensorflow/privacy library. There are several rich use cases for +delta-epsilon privacy in machine learning, some of which can be explored here: +https://medium.com/apache-mxnet/epsilon-differential-privacy-for-machine-learning-using-mxnet-a4270fe3865e +https://arxiv.org/pdf/1811.04911.pdf + +## Stability + +As we are pegged on tensorflow2.0, this package may encounter stability +issues in the ongoing development of tensorflow2.0. + +This sub-package is currently stable for 2.0.0a0, 2.0.0b0, and 2.0.0.b1 If you +would like to use this subpackage, please do use one of these versions as we +cannot guarantee it will work for all latest releases. If you do find issues, +feel free to raise an issue to the contributors listed below. + +## Contacts + +In addition to the maintainers of tensorflow/privacy listed in the root +README.md, please feel free to contact members of Georgian Partners. In +particular, + +* Georgian Partners(@georgianpartners) +* Ji Chao Zhang(@Jichaogp) +* Christopher Choquette(@cchoquette) + +## Copyright + +Copyright 2019 - Google LLC diff --git a/tensorflow_privacy/privacy/bolt_on/__init__.py b/tensorflow_privacy/privacy/bolt_on/__init__.py new file mode 100644 index 0000000..2f87e3c --- /dev/null +++ b/tensorflow_privacy/privacy/bolt_on/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2019, The TensorFlow Privacy Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BoltOn Method for privacy.""" +import sys +from distutils.version import LooseVersion +import tensorflow as tf + +if LooseVersion(tf.__version__) < LooseVersion("2.0.0"): + raise ImportError("Please upgrade your version " + "of tensorflow from: {0} to at least 2.0.0 to " + "use privacy/bolt_on".format(LooseVersion(tf.__version__))) +if hasattr(sys, "skip_tf_privacy_import"): # Useful for standalone scripts. + pass +else: + from tensorflow_privacy.privacy.bolt_on.models import BoltOnModel # pylint: disable=g-import-not-at-top + from tensorflow_privacy.privacy.bolt_on.optimizers import BoltOn # pylint: disable=g-import-not-at-top + from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexHuber # pylint: disable=g-import-not-at-top + from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexBinaryCrossentropy # pylint: disable=g-import-not-at-top diff --git a/tensorflow_privacy/privacy/bolt_on/losses.py b/tensorflow_privacy/privacy/bolt_on/losses.py new file mode 100644 index 0000000..81bd0c3 --- /dev/null +++ b/tensorflow_privacy/privacy/bolt_on/losses.py @@ -0,0 +1,304 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Loss functions for BoltOn method.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.framework import ops as _ops +from tensorflow.python.keras import losses +from tensorflow.python.keras.regularizers import L1L2 +from tensorflow.python.keras.utils import losses_utils +from tensorflow.python.platform import tf_logging as logging + + +class StrongConvexMixin: # pylint: disable=old-style-class + """Strong Convex Mixin base class. + + Strong Convex Mixin base class for any loss function that will be used with + BoltOn model. Subclasses must be strongly convex and implement the + associated constants. They must also conform to the requirements of tf losses + (see super class). + + For more details on the strong convexity requirements, see: + Bolt-on Differential Privacy for Scalable Stochastic Gradient + Descent-based Analytics by Xi Wu et. al. + """ + + def radius(self): + """Radius, R, of the hypothesis space W. + + W is a convex set that forms the hypothesis space. + + Returns: + R + """ + raise NotImplementedError("Radius not implemented for StrongConvex Loss" + "function: %s" % str(self.__class__.__name__)) + + def gamma(self): + """Returns strongly convex parameter, gamma.""" + raise NotImplementedError("Gamma not implemented for StrongConvex Loss" + "function: %s" % str(self.__class__.__name__)) + + def beta(self, class_weight): + """Smoothness, beta. + + Args: + class_weight: the class weights as scalar or 1d tensor, where its + dimensionality is equal to the number of outputs. + + Returns: + Beta + """ + raise NotImplementedError("Beta not implemented for StrongConvex Loss" + "function: %s" % str(self.__class__.__name__)) + + def lipchitz_constant(self, class_weight): + """Lipchitz constant, L. + + Args: + class_weight: class weights used + + Returns: L + """ + raise NotImplementedError("lipchitz constant not implemented for " + "StrongConvex Loss" + "function: %s" % str(self.__class__.__name__)) + + def kernel_regularizer(self): + """Returns the kernel_regularizer to be used. + + Any subclass should override this method if they want a kernel_regularizer + (if required for the loss function to be StronglyConvex. + """ + return None + + def max_class_weight(self, class_weight, dtype): + """The maximum weighting in class weights (max value) as a scalar tensor. + + Args: + class_weight: class weights used + dtype: the data type for tensor conversions. + + Returns: + maximum class weighting as tensor scalar + """ + class_weight = _ops.convert_to_tensor_v2(class_weight, dtype) + return tf.math.reduce_max(class_weight) + + +class StrongConvexHuber(losses.Loss, StrongConvexMixin): + """Strong Convex version of Huber loss using l2 weight regularization.""" + + def __init__(self, + reg_lambda, + c_arg, + radius_constant, + delta, + reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, + dtype=tf.float32): + """Constructor. + + Args: + reg_lambda: Weight regularization constant + c_arg: Penalty parameter C of the loss term + radius_constant: constant defining the length of the radius + delta: delta value in huber loss. When to switch from quadratic to + absolute deviation. + reduction: reduction type to use. See super class + dtype: tf datatype to use for tensor conversions. + + Returns: + Loss values per sample. + """ + if c_arg <= 0: + raise ValueError("c: {0}, should be >= 0".format(c_arg)) + if reg_lambda <= 0: + raise ValueError("reg lambda: {0} must be positive".format(reg_lambda)) + if radius_constant <= 0: + raise ValueError("radius_constant: {0}, should be >= 0".format( + radius_constant + )) + if delta <= 0: + raise ValueError("delta: {0}, should be >= 0".format( + delta + )) + self.C = c_arg # pylint: disable=invalid-name + self.delta = delta + self.radius_constant = radius_constant + self.dtype = dtype + self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype) + super(StrongConvexHuber, self).__init__( + name="strongconvexhuber", + reduction=reduction, + ) + + def call(self, y_true, y_pred): + """Computes loss. + + Args: + y_true: Ground truth values. One hot encoded using -1 and 1. + y_pred: The predicted values. + + Returns: + Loss values per sample. + """ + h = self.delta + z = y_pred * y_true + one = tf.constant(1, dtype=self.dtype) + four = tf.constant(4, dtype=self.dtype) + + if z > one + h: # pylint: disable=no-else-return + return _ops.convert_to_tensor_v2(0, dtype=self.dtype) + elif tf.math.abs(one - z) <= h: + return one / (four * h) * tf.math.pow(one + h - z, 2) + return one - z + + def radius(self): + """See super class.""" + return self.radius_constant / self.reg_lambda + + def gamma(self): + """See super class.""" + return self.reg_lambda + + def beta(self, class_weight): + """See super class.""" + max_class_weight = self.max_class_weight(class_weight, self.dtype) + delta = _ops.convert_to_tensor_v2(self.delta, + dtype=self.dtype + ) + return self.C * max_class_weight / (delta * + tf.constant(2, dtype=self.dtype)) + \ + self.reg_lambda + + def lipchitz_constant(self, class_weight): + """See super class.""" + # if class_weight is provided, + # it should be a vector of the same size of number of classes + max_class_weight = self.max_class_weight(class_weight, self.dtype) + lc = self.C * max_class_weight + \ + self.reg_lambda * self.radius() + return lc + + def kernel_regularizer(self): + """Return l2 loss using 0.5*reg_lambda as the l2 term (as desired). + + L2 regularization is required for this loss function to be strongly convex. + + Returns: + The L2 regularizer layer for this loss function, with regularizer constant + set to half the 0.5 * reg_lambda. + """ + return L1L2(l2=self.reg_lambda/2) + + +class StrongConvexBinaryCrossentropy( + losses.BinaryCrossentropy, + StrongConvexMixin +): + """Strongly Convex BinaryCrossentropy loss using l2 weight regularization.""" + + def __init__(self, + reg_lambda, + c_arg, + radius_constant, + from_logits=True, + label_smoothing=0, + reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE, + dtype=tf.float32): + """StrongConvexBinaryCrossentropy class. + + Args: + reg_lambda: Weight regularization constant + c_arg: Penalty parameter C of the loss term + radius_constant: constant defining the length of the radius + from_logits: True if the input are unscaled logits. False if they are + already scaled. + label_smoothing: amount of smoothing to perform on labels + relaxation of trust in labels, e.g. (1 -> 1-x, 0 -> 0+x). Note, the + impact of this parameter's effect on privacy is not known and thus the + default should be used. + reduction: reduction type to use. See super class + dtype: tf datatype to use for tensor conversions. + """ + if label_smoothing != 0: + logging.warning("The impact of label smoothing on privacy is unknown. " + "Use label smoothing at your own risk as it may not " + "guarantee privacy.") + + if reg_lambda <= 0: + raise ValueError("reg lambda: {0} must be positive".format(reg_lambda)) + if c_arg <= 0: + raise ValueError("c: {0}, should be >= 0".format(c_arg)) + if radius_constant <= 0: + raise ValueError("radius_constant: {0}, should be >= 0".format( + radius_constant + )) + self.dtype = dtype + self.C = c_arg # pylint: disable=invalid-name + self.reg_lambda = tf.constant(reg_lambda, dtype=self.dtype) + super(StrongConvexBinaryCrossentropy, self).__init__( + reduction=reduction, + name="strongconvexbinarycrossentropy", + from_logits=from_logits, + label_smoothing=label_smoothing, + ) + self.radius_constant = radius_constant + + def call(self, y_true, y_pred): + """Computes loss. + + Args: + y_true: Ground truth values. + y_pred: The predicted values. + + Returns: + Loss values per sample. + """ + loss = super(StrongConvexBinaryCrossentropy, self).call(y_true, y_pred) + loss = loss * self.C + return loss + + def radius(self): + """See super class.""" + return self.radius_constant / self.reg_lambda + + def gamma(self): + """See super class.""" + return self.reg_lambda + + def beta(self, class_weight): + """See super class.""" + max_class_weight = self.max_class_weight(class_weight, self.dtype) + return self.C * max_class_weight + self.reg_lambda + + def lipchitz_constant(self, class_weight): + """See super class.""" + max_class_weight = self.max_class_weight(class_weight, self.dtype) + return self.C * max_class_weight + self.reg_lambda * self.radius() + + def kernel_regularizer(self): + """Return l2 loss using 0.5*reg_lambda as the l2 term (as desired). + + L2 regularization is required for this loss function to be strongly convex. + + Returns: + The L2 regularizer layer for this loss function, with regularizer constant + set to half the 0.5 * reg_lambda. + """ + return L1L2(l2=self.reg_lambda/2) diff --git a/tensorflow_privacy/privacy/bolt_on/losses_test.py b/tensorflow_privacy/privacy/bolt_on/losses_test.py new file mode 100644 index 0000000..67f3d9c --- /dev/null +++ b/tensorflow_privacy/privacy/bolt_on/losses_test.py @@ -0,0 +1,431 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit testing for losses.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from contextlib import contextmanager # pylint: disable=g-importing-member +from io import StringIO # pylint: disable=g-importing-member +import sys +from absl.testing import parameterized +import tensorflow as tf +from tensorflow.python.framework import test_util +from tensorflow.python.keras import keras_parameterized +from tensorflow.python.keras.regularizers import L1L2 +from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexBinaryCrossentropy +from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexHuber +from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexMixin + + +@contextmanager +def captured_output(): + """Capture std_out and std_err within context.""" + new_out, new_err = StringIO(), StringIO() + old_out, old_err = sys.stdout, sys.stderr + try: + sys.stdout, sys.stderr = new_out, new_err + yield sys.stdout, sys.stderr + finally: + sys.stdout, sys.stderr = old_out, old_err + + +class StrongConvexMixinTests(keras_parameterized.TestCase): + """Tests for the StrongConvexMixin.""" + @parameterized.named_parameters([ + {'testcase_name': 'beta not implemented', + 'fn': 'beta', + 'args': [1]}, + {'testcase_name': 'gamma not implemented', + 'fn': 'gamma', + 'args': []}, + {'testcase_name': 'lipchitz not implemented', + 'fn': 'lipchitz_constant', + 'args': [1]}, + {'testcase_name': 'radius not implemented', + 'fn': 'radius', + 'args': []}, + ]) + + def test_not_implemented(self, fn, args): + """Test that the given fn's are not implemented on the mixin. + + Args: + fn: fn on Mixin to test + args: arguments to fn of Mixin + """ + with self.assertRaises(NotImplementedError): + loss = StrongConvexMixin() + getattr(loss, fn, None)(*args) + + @parameterized.named_parameters([ + {'testcase_name': 'radius not implemented', + 'fn': 'kernel_regularizer', + 'args': []}, + ]) + def test_return_none(self, fn, args): + """Test that fn of Mixin returns None. + + Args: + fn: fn of Mixin to test + args: arguments to fn of Mixin + """ + loss = StrongConvexMixin() + ret = getattr(loss, fn, None)(*args) + self.assertEqual(ret, None) + + +class BinaryCrossesntropyTests(keras_parameterized.TestCase): + """tests for BinaryCrossesntropy StrongConvex loss.""" + + @parameterized.named_parameters([ + {'testcase_name': 'normal', + 'reg_lambda': 1, + 'C': 1, + 'radius_constant': 1 + }, # pylint: disable=invalid-name + ]) + def test_init_params(self, reg_lambda, C, radius_constant): + """Test initialization for given arguments. + + Args: + reg_lambda: initialization value for reg_lambda arg + C: initialization value for C arg + radius_constant: initialization value for radius_constant arg + """ + # test valid domains for each variable + loss = StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant) + self.assertIsInstance(loss, StrongConvexBinaryCrossentropy) + + @parameterized.named_parameters([ + {'testcase_name': 'negative c', + 'reg_lambda': 1, + 'C': -1, + 'radius_constant': 1 + }, + {'testcase_name': 'negative radius', + 'reg_lambda': 1, + 'C': 1, + 'radius_constant': -1 + }, + {'testcase_name': 'negative lambda', + 'reg_lambda': -1, + 'C': 1, + 'radius_constant': 1 + }, # pylint: disable=invalid-name + ]) + def test_bad_init_params(self, reg_lambda, C, radius_constant): + """Test invalid domain for given params. Should return ValueError. + + Args: + reg_lambda: initialization value for reg_lambda arg + C: initialization value for C arg + radius_constant: initialization value for radius_constant arg + """ + # test valid domains for each variable + with self.assertRaises(ValueError): + StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant) + + @test_util.run_all_in_graph_and_eager_modes + @parameterized.named_parameters([ + # [] for compatibility with tensorflow loss calculation + {'testcase_name': 'both positive', + 'logits': [10000], + 'y_true': [1], + 'result': 0, + }, + {'testcase_name': 'positive gradient negative logits', + 'logits': [-10000], + 'y_true': [1], + 'result': 10000, + }, + {'testcase_name': 'positivee gradient positive logits', + 'logits': [10000], + 'y_true': [0], + 'result': 10000, + }, + {'testcase_name': 'both negative', + 'logits': [-10000], + 'y_true': [0], + 'result': 0 + }, + ]) + def test_calculation(self, logits, y_true, result): + """Test the call method to ensure it returns the correct value. + + Args: + logits: unscaled output of model + y_true: label + result: correct loss calculation value + """ + logits = tf.Variable(logits, False, dtype=tf.float32) + y_true = tf.Variable(y_true, False, dtype=tf.float32) + loss = StrongConvexBinaryCrossentropy(0.00001, 1, 1) + loss = loss(y_true, logits) + self.assertEqual(loss.numpy(), result) + + @parameterized.named_parameters([ + {'testcase_name': 'beta', + 'init_args': [1, 1, 1], + 'fn': 'beta', + 'args': [1], + 'result': tf.constant(2, dtype=tf.float32) + }, + {'testcase_name': 'gamma', + 'fn': 'gamma', + 'init_args': [1, 1, 1], + 'args': [], + 'result': tf.constant(1, dtype=tf.float32), + }, + {'testcase_name': 'lipchitz constant', + 'fn': 'lipchitz_constant', + 'init_args': [1, 1, 1], + 'args': [1], + 'result': tf.constant(2, dtype=tf.float32), + }, + {'testcase_name': 'kernel regularizer', + 'fn': 'kernel_regularizer', + 'init_args': [1, 1, 1], + 'args': [], + 'result': L1L2(l2=0.5), + }, + ]) + def test_fns(self, init_args, fn, args, result): + """Test that fn of BinaryCrossentropy loss returns the correct result. + + Args: + init_args: init values for loss instance + fn: the fn to test + args: the arguments to above function + result: the correct result from the fn + """ + loss = StrongConvexBinaryCrossentropy(*init_args) + expected = getattr(loss, fn, lambda: 'fn not found')(*args) + if hasattr(expected, 'numpy') and hasattr(result, 'numpy'): # both tensor + expected = expected.numpy() + result = result.numpy() + if hasattr(expected, 'l2') and hasattr(result, 'l2'): # both l2 regularizer + expected = expected.l2 + result = result.l2 + self.assertEqual(expected, result) + + @parameterized.named_parameters([ + {'testcase_name': 'label_smoothing', + 'init_args': [1, 1, 1, True, 0.1], + 'fn': None, + 'args': None, + 'print_res': 'The impact of label smoothing on privacy is unknown.' + }, + ]) + def test_prints(self, init_args, fn, args, print_res): + """Test logger warning from StrongConvexBinaryCrossentropy. + + Args: + init_args: arguments to init the object with. + fn: function to test + args: arguments to above function + print_res: print result that should have been printed. + """ + with captured_output() as (out, err): # pylint: disable=unused-variable + loss = StrongConvexBinaryCrossentropy(*init_args) + if fn is not None: + getattr(loss, fn, lambda *arguments: print('error'))(*args) + self.assertRegexMatch(err.getvalue().strip(), [print_res]) + + +class HuberTests(keras_parameterized.TestCase): + """tests for BinaryCrossesntropy StrongConvex loss.""" + + @parameterized.named_parameters([ + {'testcase_name': 'normal', + 'reg_lambda': 1, + 'c': 1, + 'radius_constant': 1, + 'delta': 1, + }, + ]) + def test_init_params(self, reg_lambda, c, radius_constant, delta): + """Test initialization for given arguments. + + Args: + reg_lambda: initialization value for reg_lambda arg + c: initialization value for C arg + radius_constant: initialization value for radius_constant arg + delta: the delta parameter for the huber loss + """ + # test valid domains for each variable + loss = StrongConvexHuber(reg_lambda, c, radius_constant, delta) + self.assertIsInstance(loss, StrongConvexHuber) + + @parameterized.named_parameters([ + {'testcase_name': 'negative c', + 'reg_lambda': 1, + 'c': -1, + 'radius_constant': 1, + 'delta': 1 + }, + {'testcase_name': 'negative radius', + 'reg_lambda': 1, + 'c': 1, + 'radius_constant': -1, + 'delta': 1 + }, + {'testcase_name': 'negative lambda', + 'reg_lambda': -1, + 'c': 1, + 'radius_constant': 1, + 'delta': 1 + }, + {'testcase_name': 'negative delta', + 'reg_lambda': 1, + 'c': 1, + 'radius_constant': 1, + 'delta': -1 + }, + ]) + def test_bad_init_params(self, reg_lambda, c, radius_constant, delta): + """Test invalid domain for given params. Should return ValueError. + + Args: + reg_lambda: initialization value for reg_lambda arg + c: initialization value for C arg + radius_constant: initialization value for radius_constant arg + delta: the delta parameter for the huber loss + """ + # test valid domains for each variable + with self.assertRaises(ValueError): + StrongConvexHuber(reg_lambda, c, radius_constant, delta) + + # test the bounds and test varied delta's + @test_util.run_all_in_graph_and_eager_modes + @parameterized.named_parameters([ + {'testcase_name': 'delta=1,y_true=1 z>1+h decision boundary', + 'logits': 2.1, + 'y_true': 1, + 'delta': 1, + 'result': 0, + }, + {'testcase_name': 'delta=1,y_true=1 z<1+h decision boundary', + 'logits': 1.9, + 'y_true': 1, + 'delta': 1, + 'result': 0.01*0.25, + }, + {'testcase_name': 'delta=1,y_true=1 1-z< h decision boundary', + 'logits': 0.1, + 'y_true': 1, + 'delta': 1, + 'result': 1.9**2 * 0.25, + }, + {'testcase_name': 'delta=1,y_true=1 z < 1-h decision boundary', + 'logits': -0.1, + 'y_true': 1, + 'delta': 1, + 'result': 1.1, + }, + {'testcase_name': 'delta=2,y_true=1 z>1+h decision boundary', + 'logits': 3.1, + 'y_true': 1, + 'delta': 2, + 'result': 0, + }, + {'testcase_name': 'delta=2,y_true=1 z<1+h decision boundary', + 'logits': 2.9, + 'y_true': 1, + 'delta': 2, + 'result': 0.01*0.125, + }, + {'testcase_name': 'delta=2,y_true=1 1-z < h decision boundary', + 'logits': 1.1, + 'y_true': 1, + 'delta': 2, + 'result': 1.9**2 * 0.125, + }, + {'testcase_name': 'delta=2,y_true=1 z < 1-h decision boundary', + 'logits': -1.1, + 'y_true': 1, + 'delta': 2, + 'result': 2.1, + }, + {'testcase_name': 'delta=1,y_true=-1 z>1+h decision boundary', + 'logits': -2.1, + 'y_true': -1, + 'delta': 1, + 'result': 0, + }, + ]) + def test_calculation(self, logits, y_true, delta, result): + """Test the call method to ensure it returns the correct value. + + Args: + logits: unscaled output of model + y_true: label + delta: delta value for StrongConvexHuber loss. + result: correct loss calculation value + """ + logits = tf.Variable(logits, False, dtype=tf.float32) + y_true = tf.Variable(y_true, False, dtype=tf.float32) + loss = StrongConvexHuber(0.00001, 1, 1, delta) + loss = loss(y_true, logits) + self.assertAllClose(loss.numpy(), result) + + @parameterized.named_parameters([ + {'testcase_name': 'beta', + 'init_args': [1, 1, 1, 1], + 'fn': 'beta', + 'args': [1], + 'result': tf.Variable(1.5, dtype=tf.float32) + }, + {'testcase_name': 'gamma', + 'fn': 'gamma', + 'init_args': [1, 1, 1, 1], + 'args': [], + 'result': tf.Variable(1, dtype=tf.float32), + }, + {'testcase_name': 'lipchitz constant', + 'fn': 'lipchitz_constant', + 'init_args': [1, 1, 1, 1], + 'args': [1], + 'result': tf.Variable(2, dtype=tf.float32), + }, + {'testcase_name': 'kernel regularizer', + 'fn': 'kernel_regularizer', + 'init_args': [1, 1, 1, 1], + 'args': [], + 'result': L1L2(l2=0.5), + }, + ]) + def test_fns(self, init_args, fn, args, result): + """Test that fn of BinaryCrossentropy loss returns the correct result. + + Args: + init_args: init values for loss instance + fn: the fn to test + args: the arguments to above function + result: the correct result from the fn + """ + loss = StrongConvexHuber(*init_args) + expected = getattr(loss, fn, lambda: 'fn not found')(*args) + if hasattr(expected, 'numpy') and hasattr(result, 'numpy'): # both tensor + expected = expected.numpy() + result = result.numpy() + if hasattr(expected, 'l2') and hasattr(result, 'l2'): # both l2 regularizer + expected = expected.l2 + result = result.l2 + self.assertEqual(expected, result) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_privacy/privacy/bolt_on/models.py b/tensorflow_privacy/privacy/bolt_on/models.py new file mode 100644 index 0000000..efea5cd --- /dev/null +++ b/tensorflow_privacy/privacy/bolt_on/models.py @@ -0,0 +1,303 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BoltOn model for Bolt-on method of differentially private ML.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import tensorflow as tf +from tensorflow.python.framework import ops as _ops +from tensorflow.python.keras import optimizers +from tensorflow.python.keras.models import Model +from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexMixin +from tensorflow_privacy.privacy.bolt_on.optimizers import BoltOn + + +class BoltOnModel(Model): # pylint: disable=abstract-method + """BoltOn episilon-delta differential privacy model. + + The privacy guarantees are dependent on the noise that is sampled. Please + see the paper linked below for more details. + + Uses 4 key steps to achieve privacy guarantees: + 1. Adds noise to weights after training (output perturbation). + 2. Projects weights to R after each batch + 3. Limits learning rate + 4. Use a strongly convex loss function (see compile) + + For more details on the strong convexity requirements, see: + Bolt-on Differential Privacy for Scalable Stochastic Gradient + Descent-based Analytics by Xi Wu et al. + """ + + def __init__(self, + n_outputs, + seed=1, + dtype=tf.float32): + """Private constructor. + + Args: + n_outputs: number of output classes to predict. + seed: random seed to use + dtype: data type to use for tensors + """ + super(BoltOnModel, self).__init__(name='bolton', dynamic=False) + if n_outputs <= 0: + raise ValueError('n_outputs = {0} is not valid. Must be > 0.'.format( + n_outputs + )) + self.n_outputs = n_outputs + self.seed = seed + self._layers_instantiated = False + self._dtype = dtype + + def call(self, inputs): # pylint: disable=arguments-differ + """Forward pass of network. + + Args: + inputs: inputs to neural network + + Returns: + Output logits for the given inputs. + + """ + return self.output_layer(inputs) + + def compile(self, + optimizer, + loss, + kernel_initializer=tf.initializers.GlorotUniform, + **kwargs): # pylint: disable=arguments-differ + """See super class. Default optimizer used in BoltOn method is SGD. + + Args: + optimizer: The optimizer to use. This will be automatically wrapped + with the BoltOn Optimizer. + loss: The loss function to use. Must be a StrongConvex loss (extend the + StrongConvexMixin). + kernel_initializer: The kernel initializer to use for the single layer. + **kwargs: kwargs to keras Model.compile. See super. + """ + if not isinstance(loss, StrongConvexMixin): + raise ValueError('loss function must be a Strongly Convex and therefore ' + 'extend the StrongConvexMixin.') + if not self._layers_instantiated: # compile may be called multiple times + # for instance, if the input/outputs are not defined until fit. + self.output_layer = tf.keras.layers.Dense( + self.n_outputs, + kernel_regularizer=loss.kernel_regularizer(), + kernel_initializer=kernel_initializer(), + ) + self._layers_instantiated = True + if not isinstance(optimizer, BoltOn): + optimizer = optimizers.get(optimizer) + optimizer = BoltOn(optimizer, loss) + + super(BoltOnModel, self).compile(optimizer, loss=loss, **kwargs) + + def fit(self, + x=None, + y=None, + batch_size=None, + class_weight=None, + n_samples=None, + epsilon=2, + noise_distribution='laplace', + steps_per_epoch=None, + **kwargs): # pylint: disable=arguments-differ + """Reroutes to super fit with BoltOn delta-epsilon privacy requirements. + + Note, inputs must be normalized s.t. ||x|| < 1. + Requirements are as follows: + 1. Adds noise to weights after training (output perturbation). + 2. Projects weights to R after each batch + 3. Limits learning rate + 4. Use a strongly convex loss function (see compile) + See super implementation for more details. + + Args: + x: Inputs to fit on, see super. + y: Labels to fit on, see super. + batch_size: The batch size to use for training, see super. + class_weight: the class weights to be used. Can be a scalar or 1D tensor + whose dim == n_classes. + n_samples: the number of individual samples in x. + epsilon: privacy parameter, which trades off between utility an privacy. + See the bolt-on paper for more description. + noise_distribution: the distribution to pull noise from. + steps_per_epoch: + **kwargs: kwargs to keras Model.fit. See super. + + Returns: + Output from super fit method. + """ + if class_weight is None: + class_weight_ = self.calculate_class_weights(class_weight) + else: + class_weight_ = class_weight + if n_samples is not None: + data_size = n_samples + elif hasattr(x, 'shape'): + data_size = x.shape[0] + elif hasattr(x, '__len__'): + data_size = len(x) + else: + data_size = None + batch_size_ = self._validate_or_infer_batch_size(batch_size, + steps_per_epoch, + x) + if batch_size_ is None: + batch_size_ = 32 + # inferring batch_size to be passed to optimizer. batch_size must remain its + # initial value when passed to super().fit() + if batch_size_ is None: + raise ValueError('batch_size: {0} is an ' + 'invalid value'.format(batch_size_)) + if data_size is None: + raise ValueError('Could not infer the number of samples. Please pass ' + 'this in using n_samples.') + with self.optimizer(noise_distribution, + epsilon, + self.layers, + class_weight_, + data_size, + batch_size_) as _: + out = super(BoltOnModel, self).fit(x=x, + y=y, + batch_size=batch_size, + class_weight=class_weight, + steps_per_epoch=steps_per_epoch, + **kwargs) + return out + + def fit_generator(self, + generator, + class_weight=None, + noise_distribution='laplace', + epsilon=2, + n_samples=None, + steps_per_epoch=None, + **kwargs): # pylint: disable=arguments-differ + """Fit with a generator. + + This method is the same as fit except for when the passed dataset + is a generator. See super method and fit for more details. + + Args: + generator: Inputs generator following Tensorflow guidelines, see super. + class_weight: the class weights to be used. Can be a scalar or 1D tensor + whose dim == n_classes. + noise_distribution: the distribution to get noise from. + epsilon: privacy parameter, which trades off utility and privacy. See + BoltOn paper for more description. + n_samples: number of individual samples in x + steps_per_epoch: Number of steps per training epoch, see super. + **kwargs: **kwargs + + Returns: + Output from super fit_generator method. + """ + if class_weight is None: + class_weight = self.calculate_class_weights(class_weight) + if n_samples is not None: + data_size = n_samples + elif hasattr(generator, 'shape'): + data_size = generator.shape[0] + elif hasattr(generator, '__len__'): + data_size = len(generator) + else: + raise ValueError('The number of samples could not be determined. ' + 'Please make sure that if you are using a generator' + 'to call this method directly with n_samples kwarg ' + 'passed.') + batch_size = self._validate_or_infer_batch_size(None, steps_per_epoch, + generator) + if batch_size is None: + batch_size = 32 + with self.optimizer(noise_distribution, + epsilon, + self.layers, + class_weight, + data_size, + batch_size) as _: + out = super(BoltOnModel, self).fit_generator( + generator, + class_weight=class_weight, + steps_per_epoch=steps_per_epoch, + **kwargs) + return out + + def calculate_class_weights(self, + class_weights=None, + class_counts=None, + num_classes=None): + """Calculates class weighting to be used in training. + + Args: + class_weights: str specifying type, array giving weights, or None. + class_counts: If class_weights is not None, then an array of + the number of samples for each class + num_classes: If class_weights is not None, then the number of + classes. + Returns: + class_weights as 1D tensor, to be passed to model's fit method. + """ + # Value checking + class_keys = ['balanced'] + is_string = False + if isinstance(class_weights, str): + is_string = True + if class_weights not in class_keys: + raise ValueError('Detected string class_weights with ' + 'value: {0}, which is not one of {1}.' + 'Please select a valid class_weight type' + 'or pass an array'.format(class_weights, + class_keys)) + if class_counts is None: + raise ValueError('Class counts must be provided if using ' + 'class_weights=%s' % class_weights) + class_counts_shape = tf.Variable(class_counts, + trainable=False, + dtype=self._dtype).shape + if len(class_counts_shape) != 1: + raise ValueError('class counts must be a 1D array.' + 'Detected: {0}'.format(class_counts_shape)) + if num_classes is None: + raise ValueError('num_classes must be provided if using ' + 'class_weights=%s' % class_weights) + elif class_weights is not None: + if num_classes is None: + raise ValueError('You must pass a value for num_classes if ' + 'creating an array of class_weights') + # performing class weight calculation + if class_weights is None: + class_weights = 1 + elif is_string and class_weights == 'balanced': + num_samples = sum(class_counts) + weighted_counts = tf.dtypes.cast(tf.math.multiply(num_classes, + class_counts), + self._dtype) + class_weights = tf.Variable(num_samples, dtype=self._dtype) / \ + tf.Variable(weighted_counts, dtype=self._dtype) + else: + class_weights = _ops.convert_to_tensor_v2(class_weights) + if len(class_weights.shape) != 1: + raise ValueError('Detected class_weights shape: {0} instead of ' + '1D array'.format(class_weights.shape)) + if class_weights.shape[0] != num_classes: + raise ValueError( + 'Detected array length: {0} instead of: {1}'.format( + class_weights.shape[0], + num_classes)) + return class_weights diff --git a/tensorflow_privacy/privacy/bolt_on/models_test.py b/tensorflow_privacy/privacy/bolt_on/models_test.py new file mode 100644 index 0000000..a47e8b4 --- /dev/null +++ b/tensorflow_privacy/privacy/bolt_on/models_test.py @@ -0,0 +1,548 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit testing for models.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import tensorflow as tf +from tensorflow.python.framework import ops as _ops +from tensorflow.python.keras import keras_parameterized +from tensorflow.python.keras import losses +from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2 +from tensorflow.python.keras.regularizers import L1L2 +from tensorflow_privacy.privacy.bolt_on import models +from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexMixin +from tensorflow_privacy.privacy.bolt_on.optimizers import BoltOn + + +class TestLoss(losses.Loss, StrongConvexMixin): + """Test loss function for testing BoltOn model.""" + + def __init__(self, reg_lambda, c_arg, radius_constant, name='test'): + super(TestLoss, self).__init__(name=name) + self.reg_lambda = reg_lambda + self.C = c_arg # pylint: disable=invalid-name + self.radius_constant = radius_constant + + def radius(self): + """Radius, R, of the hypothesis space W. + + W is a convex set that forms the hypothesis space. + + Returns: + radius + """ + return _ops.convert_to_tensor_v2(1, dtype=tf.float32) + + def gamma(self): + """Returns strongly convex parameter, gamma.""" + return _ops.convert_to_tensor_v2(1, dtype=tf.float32) + + def beta(self, class_weight): # pylint: disable=unused-argument + """Smoothness, beta. + + Args: + class_weight: the class weights as scalar or 1d tensor, where its + dimensionality is equal to the number of outputs. + + Returns: + Beta + """ + return _ops.convert_to_tensor_v2(1, dtype=tf.float32) + + def lipchitz_constant(self, class_weight): # pylint: disable=unused-argument + """Lipchitz constant, L. + + Args: + class_weight: class weights used + + Returns: + L + """ + return _ops.convert_to_tensor_v2(1, dtype=tf.float32) + + def call(self, y_true, y_pred): + """Loss function that is minimized at the mean of the input points.""" + return 0.5 * tf.reduce_sum( + tf.math.squared_difference(y_true, y_pred), + axis=1 + ) + + def max_class_weight(self, class_weight): + """the maximum weighting in class weights (max value) as a scalar tensor. + + Args: + class_weight: class weights used + + Returns: + maximum class weighting as tensor scalar + """ + if class_weight is None: + return 1 + raise ValueError('') + + def kernel_regularizer(self): + """Returns the kernel_regularizer to be used. + + Any subclass should override this method if they want a kernel_regularizer + (if required for the loss function to be StronglyConvex. + """ + return L1L2(l2=self.reg_lambda) + + +class TestOptimizer(OptimizerV2): + """Test optimizer used for testing BoltOn model.""" + + def __init__(self): + super(TestOptimizer, self).__init__('test') + + def compute_gradients(self): + return 0 + + def get_config(self): + return {} + + def _create_slots(self, var): + pass + + def _resource_apply_dense(self, grad, handle): + return grad + + def _resource_apply_sparse(self, grad, handle, indices): + return grad + + +class InitTests(keras_parameterized.TestCase): + """Tests for keras model initialization.""" + + @parameterized.named_parameters([ + {'testcase_name': 'normal', + 'n_outputs': 1, + }, + {'testcase_name': 'many outputs', + 'n_outputs': 100, + }, + ]) + def test_init_params(self, n_outputs): + """Test initialization of BoltOnModel. + + Args: + n_outputs: number of output neurons + """ + # test valid domains for each variable + clf = models.BoltOnModel(n_outputs) + self.assertIsInstance(clf, models.BoltOnModel) + + @parameterized.named_parameters([ + {'testcase_name': 'invalid n_outputs', + 'n_outputs': -1, + }, + ]) + def test_bad_init_params(self, n_outputs): + """test bad initializations of BoltOnModel that should raise errors. + + Args: + n_outputs: number of output neurons + """ + # test invalid domains for each variable, especially noise + with self.assertRaises(ValueError): + models.BoltOnModel(n_outputs) + + @parameterized.named_parameters([ + {'testcase_name': 'string compile', + 'n_outputs': 1, + 'loss': TestLoss(1, 1, 1), + 'optimizer': 'adam', + }, + {'testcase_name': 'test compile', + 'n_outputs': 100, + 'loss': TestLoss(1, 1, 1), + 'optimizer': TestOptimizer(), + }, + ]) + def test_compile(self, n_outputs, loss, optimizer): + """Test compilation of BoltOnModel. + + Args: + n_outputs: number of output neurons + loss: instantiated TestLoss instance + optimizer: instantiated TestOptimizer instance + """ + # test compilation of valid tf.optimizer and tf.loss + with self.cached_session(): + clf = models.BoltOnModel(n_outputs) + clf.compile(optimizer, loss) + self.assertEqual(clf.loss, loss) + + @parameterized.named_parameters([ + {'testcase_name': 'Not strong loss', + 'n_outputs': 1, + 'loss': losses.BinaryCrossentropy(), + 'optimizer': 'adam', + }, + {'testcase_name': 'Not valid optimizer', + 'n_outputs': 1, + 'loss': TestLoss(1, 1, 1), + 'optimizer': 'ada', + } + ]) + def test_bad_compile(self, n_outputs, loss, optimizer): + """test bad compilations of BoltOnModel that should raise errors. + + Args: + n_outputs: number of output neurons + loss: instantiated TestLoss instance + optimizer: instantiated TestOptimizer instance + """ + # test compilaton of invalid tf.optimizer and non instantiated loss. + with self.cached_session(): + with self.assertRaises((ValueError, AttributeError)): + clf = models.BoltOnModel(n_outputs) + clf.compile(optimizer, loss) + + +def _cat_dataset(n_samples, input_dim, n_classes, batch_size, generator=False): + """Creates a categorically encoded dataset. + + Creates a categorically encoded dataset (y is categorical). + returns the specified dataset either as a static array or as a generator. + Will have evenly split samples across each output class. + Each output class will be a different point in the input space. + + Args: + n_samples: number of rows + input_dim: input dimensionality + n_classes: output dimensionality + batch_size: The desired batch_size + generator: False for array, True for generator + + Returns: + X as (n_samples, input_dim), Y as (n_samples, n_outputs) + """ + x_stack = [] + y_stack = [] + for i_class in range(n_classes): + x_stack.append( + tf.constant(1*i_class, tf.float32, (n_samples, input_dim)) + ) + y_stack.append( + tf.constant(i_class, tf.float32, (n_samples, n_classes)) + ) + x_set, y_set = tf.stack(x_stack), tf.stack(y_stack) + if generator: + dataset = tf.data.Dataset.from_tensor_slices( + (x_set, y_set) + ) + dataset = dataset.batch(batch_size=batch_size) + return dataset + return x_set, y_set + + +def _do_fit(n_samples, + input_dim, + n_outputs, + epsilon, + generator, + batch_size, + reset_n_samples, + optimizer, + loss, + distribution='laplace'): + """Instantiate necessary components for fitting and perform a model fit. + + Args: + n_samples: number of samples in dataset + input_dim: the sample dimensionality + n_outputs: number of output neurons + epsilon: privacy parameter + generator: True to create a generator, False to use an iterator + batch_size: batch_size to use + reset_n_samples: True to set _samples to None prior to fitting. + False does nothing + optimizer: instance of TestOptimizer + loss: instance of TestLoss + distribution: distribution to get noise from. + + Returns: + BoltOnModel instsance + """ + clf = models.BoltOnModel(n_outputs) + clf.compile(optimizer, loss) + if generator: + x = _cat_dataset( + n_samples, + input_dim, + n_outputs, + batch_size, + generator=generator + ) + y = None + # x = x.batch(batch_size) + x = x.shuffle(n_samples//2) + batch_size = None + if reset_n_samples: + n_samples = None + clf.fit_generator(x, + n_samples=n_samples, + noise_distribution=distribution, + epsilon=epsilon) + else: + x, y = _cat_dataset( + n_samples, + input_dim, + n_outputs, + batch_size, + generator=generator) + if reset_n_samples: + n_samples = None + clf.fit(x, + y, + batch_size=batch_size, + n_samples=n_samples, + noise_distribution=distribution, + epsilon=epsilon) + return clf + + +class FitTests(keras_parameterized.TestCase): + """Test cases for keras model fitting.""" + + # @test_util.run_all_in_graph_and_eager_modes + @parameterized.named_parameters([ + {'testcase_name': 'iterator fit', + 'generator': False, + 'reset_n_samples': True, + }, + {'testcase_name': 'iterator fit no samples', + 'generator': False, + 'reset_n_samples': True, + }, + {'testcase_name': 'generator fit', + 'generator': True, + 'reset_n_samples': False, + }, + {'testcase_name': 'with callbacks', + 'generator': True, + 'reset_n_samples': False, + }, + ]) + def test_fit(self, generator, reset_n_samples): + """Tests fitting of BoltOnModel. + + Args: + generator: True for generator test, False for iterator test. + reset_n_samples: True to reset the n_samples to None, False does nothing + """ + loss = TestLoss(1, 1, 1) + optimizer = BoltOn(TestOptimizer(), loss) + n_classes = 2 + input_dim = 5 + epsilon = 1 + batch_size = 1 + n_samples = 10 + clf = _do_fit( + n_samples, + input_dim, + n_classes, + epsilon, + generator, + batch_size, + reset_n_samples, + optimizer, + loss, + ) + self.assertEqual(hasattr(clf, 'layers'), True) + + @parameterized.named_parameters([ + {'testcase_name': 'generator fit', + 'generator': True, + }, + ]) + def test_fit_gen(self, generator): + """Tests the fit_generator method of BoltOnModel. + + Args: + generator: True to test with a generator dataset + """ + loss = TestLoss(1, 1, 1) + optimizer = TestOptimizer() + n_classes = 2 + input_dim = 5 + batch_size = 1 + n_samples = 10 + clf = models.BoltOnModel(n_classes) + clf.compile(optimizer, loss) + x = _cat_dataset( + n_samples, + input_dim, + n_classes, + batch_size, + generator=generator + ) + x = x.batch(batch_size) + x = x.shuffle(n_samples // 2) + clf.fit_generator(x, n_samples=n_samples) + self.assertEqual(hasattr(clf, 'layers'), True) + + @parameterized.named_parameters([ + {'testcase_name': 'iterator no n_samples', + 'generator': True, + 'reset_n_samples': True, + 'distribution': 'laplace' + }, + {'testcase_name': 'invalid distribution', + 'generator': True, + 'reset_n_samples': True, + 'distribution': 'not_valid' + }, + ]) + def test_bad_fit(self, generator, reset_n_samples, distribution): + """Tests fitting with invalid parameters, which should raise an error. + + Args: + generator: True to test with generator, False is iterator + reset_n_samples: True to reset the n_samples param to None prior to + passing it to fit + distribution: distribution to get noise from. + """ + with self.assertRaises(ValueError): + loss = TestLoss(1, 1, 1) + optimizer = TestOptimizer() + n_classes = 2 + input_dim = 5 + epsilon = 1 + batch_size = 1 + n_samples = 10 + _do_fit( + n_samples, + input_dim, + n_classes, + epsilon, + generator, + batch_size, + reset_n_samples, + optimizer, + loss, + distribution + ) + + @parameterized.named_parameters([ + {'testcase_name': 'None class_weights', + 'class_weights': None, + 'class_counts': None, + 'num_classes': None, + 'result': 1}, + {'testcase_name': 'class weights array', + 'class_weights': [1, 1], + 'class_counts': [1, 1], + 'num_classes': 2, + 'result': [1, 1]}, + {'testcase_name': 'class weights balanced', + 'class_weights': 'balanced', + 'class_counts': [1, 1], + 'num_classes': 2, + 'result': [1, 1]}, + ]) + def test_class_calculate(self, + class_weights, + class_counts, + num_classes, + result): + """Tests the BOltonModel calculate_class_weights method. + + Args: + class_weights: the class_weights to use + class_counts: count of number of samples for each class + num_classes: number of outputs neurons + result: expected result + """ + clf = models.BoltOnModel(1, 1) + expected = clf.calculate_class_weights(class_weights, + class_counts, + num_classes) + + if hasattr(expected, 'numpy'): + expected = expected.numpy() + self.assertAllEqual( + expected, + result + ) + @parameterized.named_parameters([ + {'testcase_name': 'class weight not valid str', + 'class_weights': 'not_valid', + 'class_counts': 1, + 'num_classes': 1, + 'err_msg': 'Detected string class_weights with value: not_valid'}, + {'testcase_name': 'no class counts', + 'class_weights': 'balanced', + 'class_counts': None, + 'num_classes': 1, + 'err_msg': 'Class counts must be provided if ' + 'using class_weights=balanced'}, + {'testcase_name': 'no num classes', + 'class_weights': 'balanced', + 'class_counts': [1], + 'num_classes': None, + 'err_msg': 'num_classes must be provided if ' + 'using class_weights=balanced'}, + {'testcase_name': 'class counts not array', + 'class_weights': 'balanced', + 'class_counts': 1, + 'num_classes': None, + 'err_msg': 'class counts must be a 1D array.'}, + {'testcase_name': 'class counts array, no num classes', + 'class_weights': [1], + 'class_counts': None, + 'num_classes': None, + 'err_msg': 'You must pass a value for num_classes if ' + 'creating an array of class_weights'}, + {'testcase_name': 'class counts array, improper shape', + 'class_weights': [[1], [1]], + 'class_counts': None, + 'num_classes': 2, + 'err_msg': 'Detected class_weights shape'}, + {'testcase_name': 'class counts array, wrong number classes', + 'class_weights': [1, 1, 1], + 'class_counts': None, + 'num_classes': 2, + 'err_msg': 'Detected array length:'}, + ]) + + def test_class_errors(self, + class_weights, + class_counts, + num_classes, + err_msg): + """Tests the BOltonModel calculate_class_weights method. + + This test passes invalid params which should raise the expected errors. + + Args: + class_weights: the class_weights to use. + class_counts: count of number of samples for each class. + num_classes: number of outputs neurons. + err_msg: The expected error message. + """ + clf = models.BoltOnModel(1, 1) + with self.assertRaisesRegexp(ValueError, err_msg): # pylint: disable=deprecated-method + clf.calculate_class_weights(class_weights, + class_counts, + num_classes) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_privacy/privacy/bolt_on/optimizers.py b/tensorflow_privacy/privacy/bolt_on/optimizers.py new file mode 100644 index 0000000..eac6641 --- /dev/null +++ b/tensorflow_privacy/privacy/bolt_on/optimizers.py @@ -0,0 +1,388 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BoltOn Optimizer for Bolt-on method.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.optimizer_v2 import optimizer_v2 +from tensorflow.python.ops import math_ops +from tensorflow_privacy.privacy.bolt_on.losses import StrongConvexMixin + +_accepted_distributions = ['laplace'] # implemented distributions for noising + + +class GammaBetaDecreasingStep( + optimizer_v2.learning_rate_schedule.LearningRateSchedule): + """Computes LR as minimum of 1/beta and 1/(gamma * step) at each step. + + This is a required step for privacy guarantees. + """ + + def __init__(self): + self.is_init = False + self.beta = None + self.gamma = None + + def __call__(self, step): + """Computes and returns the learning rate. + + Args: + step: the current iteration number + + Returns: + decayed learning rate to minimum of 1/beta and 1/(gamma * step) as per + the BoltOn privacy requirements. + """ + if not self.is_init: + raise AttributeError('Please initialize the {0} Learning Rate Scheduler.' + 'This is performed automatically by using the ' + '{1} as a context manager, ' + 'as desired'.format(self.__class__.__name__, + BoltOn.__class__.__name__ + ) + ) + dtype = self.beta.dtype + one = tf.constant(1, dtype) + return tf.math.minimum(tf.math.reduce_min(one/self.beta), + one/(self.gamma*math_ops.cast(step, dtype)) + ) + + def get_config(self): + """Return config to setup the learning rate scheduler.""" + return {'beta': self.beta, 'gamma': self.gamma} + + def initialize(self, beta, gamma): + """Setups scheduler with beta and gamma values from the loss function. + + Meant to be used with .fit as the loss params may depend on values passed to + fit. + + Args: + beta: Smoothness value. See StrongConvexMixin + gamma: Strong Convexity parameter. See StrongConvexMixin. + """ + self.is_init = True + self.beta = beta + self.gamma = gamma + + def de_initialize(self): + """De initialize post fit, as another fit call may use other parameters.""" + self.is_init = False + self.beta = None + self.gamma = None + + +class BoltOn(optimizer_v2.OptimizerV2): + """Wrap another tf optimizer with BoltOn privacy protocol. + + BoltOn optimizer wraps another tf optimizer to be used + as the visible optimizer to the tf model. No matter the optimizer + passed, "BoltOn" enables the bolt-on model to control the learning rate + based on the strongly convex loss. + + To use the BoltOn method, you must: + 1. instantiate it with an instantiated tf optimizer and StrongConvexLoss. + 2. use it as a context manager around your .fit method internals. + + This can be accomplished by the following: + optimizer = tf.optimizers.SGD() + loss = privacy.bolt_on.losses.StrongConvexBinaryCrossentropy() + bolton = BoltOn(optimizer, loss) + with bolton(*args) as _: + model.fit() + The args required for the context manager can be found in the __call__ + method. + + For more details on the strong convexity requirements, see: + Bolt-on Differential Privacy for Scalable Stochastic Gradient + Descent-based Analytics by Xi Wu et. al. + """ + + def __init__(self, # pylint: disable=super-init-not-called + optimizer, + loss, + dtype=tf.float32, + ): + """Constructor. + + Args: + optimizer: Optimizer_v2 or subclass to be used as the optimizer + (wrapped). + loss: StrongConvexLoss function that the model is being compiled with. + dtype: dtype + """ + + if not isinstance(loss, StrongConvexMixin): + raise ValueError('loss function must be a Strongly Convex and therefore ' + 'extend the StrongConvexMixin.') + self._private_attributes = [ + '_internal_optimizer', + 'dtype', + 'noise_distribution', + 'epsilon', + 'loss', + 'class_weights', + 'input_dim', + 'n_samples', + 'layers', + 'batch_size', + '_is_init', + ] + self._internal_optimizer = optimizer + self.learning_rate = GammaBetaDecreasingStep() # use the BoltOn Learning + # rate scheduler, as required for privacy guarantees. This will still need + # to get values from the loss function near the time that .fit is called + # on the model (when this optimizer will be called as a context manager) + self.dtype = dtype + self.loss = loss + self._is_init = False + + def get_config(self): + """Reroutes to _internal_optimizer. See super/_internal_optimizer.""" + return self._internal_optimizer.get_config() + + def project_weights_to_r(self, force=False): + """Normalize the weights to the R-ball. + + Args: + force: True to normalize regardless of previous weight values. + False to check if weights > R-ball and only normalize then. + + Raises: + Exception: If not called from inside this optimizer context. + """ + if not self._is_init: + raise Exception('This method must be called from within the optimizer\'s ' + 'context.') + radius = self.loss.radius() + for layer in self.layers: + weight_norm = tf.norm(layer.kernel, axis=0) + if force: + layer.kernel = layer.kernel / (weight_norm / radius) + else: + layer.kernel = tf.cond( + tf.reduce_sum(tf.cast(weight_norm > radius, dtype=self.dtype)) > 0, + lambda k=layer.kernel, w=weight_norm, r=radius: k / (w / r), # pylint: disable=cell-var-from-loop + lambda k=layer.kernel: k # pylint: disable=cell-var-from-loop + ) + + def get_noise(self, input_dim, output_dim): + """Sample noise to be added to weights for privacy guarantee. + + Args: + input_dim: the input dimensionality for the weights + output_dim: the output dimensionality for the weights + + Returns: + Noise in shape of layer's weights to be added to the weights. + + Raises: + Exception: If not called from inside this optimizer's context. + """ + if not self._is_init: + raise Exception('This method must be called from within the optimizer\'s ' + 'context.') + loss = self.loss + distribution = self.noise_distribution.lower() + if distribution == _accepted_distributions[0]: # laplace + per_class_epsilon = self.epsilon / (output_dim) + l2_sensitivity = (2 * + loss.lipchitz_constant(self.class_weights)) / \ + (loss.gamma() * self.n_samples * self.batch_size) + unit_vector = tf.random.normal(shape=(input_dim, output_dim), + mean=0, + seed=1, + stddev=1.0, + dtype=self.dtype) + unit_vector = unit_vector / tf.math.sqrt( + tf.reduce_sum(tf.math.square(unit_vector), axis=0) + ) + + beta = l2_sensitivity / per_class_epsilon + alpha = input_dim # input_dim + gamma = tf.random.gamma([output_dim], + alpha, + beta=1 / beta, + seed=1, + dtype=self.dtype + ) + return unit_vector * gamma + raise NotImplementedError('Noise distribution: {0} is not ' + 'a valid distribution'.format(distribution)) + + def from_config(self, *args, **kwargs): # pylint: disable=arguments-differ + """Reroutes to _internal_optimizer. See super/_internal_optimizer.""" + return self._internal_optimizer.from_config(*args, **kwargs) + + def __getattr__(self, name): + """Get attr. + + return _internal_optimizer off self instance, and everything else + from the _internal_optimizer instance. + + Args: + name: Name of attribute to get from this or aggregate optimizer. + + Returns: + attribute from BoltOn if specified to come from self, else + from _internal_optimizer. + """ + if name == '_private_attributes' or name in self._private_attributes: + return getattr(self, name) + optim = object.__getattribute__(self, '_internal_optimizer') + try: + return object.__getattribute__(optim, name) + except AttributeError: + raise AttributeError( + "Neither '{0}' nor '{1}' object has attribute '{2}'" + "".format(self.__class__.__name__, + self._internal_optimizer.__class__.__name__, + name) + ) + + def __setattr__(self, key, value): + """Set attribute to self instance if its the internal optimizer. + + Reroute everything else to the _internal_optimizer. + + Args: + key: attribute name + value: attribute value + """ + if key == '_private_attributes': + object.__setattr__(self, key, value) + elif key in self._private_attributes: + object.__setattr__(self, key, value) + else: + setattr(self._internal_optimizer, key, value) + + def _resource_apply_dense(self, *args, **kwargs): # pylint: disable=arguments-differ + """Reroutes to _internal_optimizer. See super/_internal_optimizer.""" + return self._internal_optimizer._resource_apply_dense(*args, **kwargs) # pylint: disable=protected-access + + def _resource_apply_sparse(self, *args, **kwargs): # pylint: disable=arguments-differ + """Reroutes to _internal_optimizer. See super/_internal_optimizer.""" + return self._internal_optimizer._resource_apply_sparse(*args, **kwargs) # pylint: disable=protected-access + + def get_updates(self, loss, params): + """Reroutes to _internal_optimizer. See super/_internal_optimizer.""" + out = self._internal_optimizer.get_updates(loss, params) + self.project_weights_to_r() + return out + + def apply_gradients(self, *args, **kwargs): # pylint: disable=arguments-differ + """Reroutes to _internal_optimizer. See super/_internal_optimizer.""" + out = self._internal_optimizer.apply_gradients(*args, **kwargs) + self.project_weights_to_r() + return out + + def minimize(self, *args, **kwargs): # pylint: disable=arguments-differ + """Reroutes to _internal_optimizer. See super/_internal_optimizer.""" + out = self._internal_optimizer.minimize(*args, **kwargs) + self.project_weights_to_r() + return out + + def _compute_gradients(self, *args, **kwargs): # pylint: disable=arguments-differ,protected-access + """Reroutes to _internal_optimizer. See super/_internal_optimizer.""" + return self._internal_optimizer._compute_gradients(*args, **kwargs) # pylint: disable=protected-access + + def get_gradients(self, *args, **kwargs): # pylint: disable=arguments-differ + """Reroutes to _internal_optimizer. See super/_internal_optimizer.""" + return self._internal_optimizer.get_gradients(*args, **kwargs) + + def __enter__(self): + """Context manager call at the beginning of with statement. + + Returns: + self, to be used in context manager + """ + self._is_init = True + return self + + def __call__(self, + noise_distribution, + epsilon, + layers, + class_weights, + n_samples, + batch_size): + """Accepts required values for bolton method from context entry point. + + Stores them on the optimizer for use throughout fitting. + + Args: + noise_distribution: the noise distribution to pick. + see _accepted_distributions and get_noise for possible values. + epsilon: privacy parameter. Lower gives more privacy but less utility. + layers: list of Keras/Tensorflow layers. Can be found as model.layers + class_weights: class_weights used, which may either be a scalar or 1D + tensor with dim == n_classes. + n_samples: number of rows/individual samples in the training set + batch_size: batch size used. + + Returns: + self, to be used by the __enter__ method for context. + """ + if epsilon <= 0: + raise ValueError('Detected epsilon: {0}. ' + 'Valid range is 0 < epsilon = l2_norm_clip, tf.float32) - 0.5 + + preprocessed_clipped_fraction_record = ( + self._clipped_fraction_query.preprocess_record( + params.clipped_fraction_params, was_clipped)) + + return preprocessed_sum_record, preprocessed_clipped_fraction_record + + def accumulate_preprocessed_record( + self, sample_state, preprocessed_record, weight=1): + """See base class.""" + preprocessed_sum_record, preprocessed_clipped_fraction_record = preprocessed_record + sum_state = self._sum_query.accumulate_preprocessed_record( + sample_state.sum_state, preprocessed_sum_record) + + clipped_fraction_state = self._clipped_fraction_query.accumulate_preprocessed_record( + sample_state.clipped_fraction_state, + preprocessed_clipped_fraction_record) + return self._SampleState(sum_state, clipped_fraction_state) + + def merge_sample_states(self, sample_state_1, sample_state_2): + """See base class.""" + return self._SampleState( + self._sum_query.merge_sample_states( + sample_state_1.sum_state, + sample_state_2.sum_state), + self._clipped_fraction_query.merge_sample_states( + sample_state_1.clipped_fraction_state, + sample_state_2.clipped_fraction_state)) + + def get_noised_result(self, sample_state, global_state): + """See base class.""" + gs = global_state + + noised_vectors, sum_state = self._sum_query.get_noised_result( + sample_state.sum_state, gs.sum_state) + del sum_state # Unused. To be set explicitly later. + + clipped_fraction_result, new_clipped_fraction_state = ( + self._clipped_fraction_query.get_noised_result( + sample_state.clipped_fraction_state, + gs.clipped_fraction_state)) + + # Unshift clipped percentile by 0.5. (See comment in accumulate_record.) + clipped_quantile = clipped_fraction_result + 0.5 + unclipped_quantile = 1.0 - clipped_quantile + + # Protect against out-of-range estimates. + unclipped_quantile = tf.minimum(1.0, tf.maximum(0.0, unclipped_quantile)) + + # Loss function is convex, with derivative in [-1, 1], and minimized when + # the true quantile matches the target. + loss_grad = unclipped_quantile - global_state.target_unclipped_quantile + + new_l2_norm_clip = gs.l2_norm_clip - global_state.learning_rate * loss_grad + new_l2_norm_clip = tf.maximum(0.0, new_l2_norm_clip) + + new_sum_stddev = new_l2_norm_clip * global_state.noise_multiplier + new_sum_query_global_state = self._sum_query.make_global_state( + l2_norm_clip=new_l2_norm_clip, + stddev=new_sum_stddev) + + new_global_state = global_state._replace( + l2_norm_clip=new_l2_norm_clip, + sum_state=new_sum_query_global_state, + clipped_fraction_state=new_clipped_fraction_state) + + return noised_vectors, new_global_state + + +class QuantileAdaptiveClipAverageQuery(normalized_query.NormalizedQuery): + """DPQuery for average queries with adaptive clipping. + + Clipping norm is tuned adaptively to converge to a value such that a specified + quantile of updates are clipped. + + Note that we use "fixed-denominator" estimation: the denominator should be + specified as the expected number of records per sample. Accumulating the + denominator separately would also be possible but would be produce a higher + variance estimator. + """ + + def __init__( + self, + initial_l2_norm_clip, + noise_multiplier, + denominator, + target_unclipped_quantile, + learning_rate, + clipped_count_stddev, + expected_num_records): + """Initializes the AdaptiveClipAverageQuery. + + Args: + initial_l2_norm_clip: The initial value of clipping norm. + noise_multiplier: The multiplier of the l2_norm_clip to make the stddev of + the noise. + denominator: The normalization constant (applied after noise is added to + the sum). + target_unclipped_quantile: The desired quantile of updates which should be + clipped. + learning_rate: The learning rate for the clipping norm adaptation. A + rate of r means that the clipping norm will change by a maximum of r at + each step. The maximum is attained when |clip - target| is 1.0. + clipped_count_stddev: The stddev of the noise added to the clipped_count. + Since the sensitivity of the clipped count is 0.5, as a rule of thumb it + should be about 0.5 for reasonable privacy. + expected_num_records: The expected number of records, used to estimate the + clipped count quantile. + """ + numerator_query = QuantileAdaptiveClipSumQuery( + initial_l2_norm_clip, + noise_multiplier, + target_unclipped_quantile, + learning_rate, + clipped_count_stddev, + expected_num_records) + super(QuantileAdaptiveClipAverageQuery, self).__init__( + numerator_query=numerator_query, + denominator=denominator) diff --git a/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py b/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py new file mode 100644 index 0000000..e7521d5 --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/quantile_adaptive_clip_sum_query_test.py @@ -0,0 +1,296 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for QuantileAdaptiveClipSumQuery.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import tensorflow as tf + +from tensorflow_privacy.privacy.analysis import privacy_ledger +from tensorflow_privacy.privacy.dp_query import quantile_adaptive_clip_sum_query +from tensorflow_privacy.privacy.dp_query import test_utils + +tf.enable_eager_execution() + + +class QuantileAdaptiveClipSumQueryTest(tf.test.TestCase): + + def test_sum_no_clip_no_noise(self): + record1 = tf.constant([2.0, 0.0]) + record2 = tf.constant([-1.0, 1.0]) + + query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( + initial_l2_norm_clip=10.0, + noise_multiplier=0.0, + target_unclipped_quantile=1.0, + learning_rate=0.0, + clipped_count_stddev=0.0, + expected_num_records=2.0) + query_result, _ = test_utils.run_query(query, [record1, record2]) + result = query_result.numpy() + expected = [1.0, 1.0] + self.assertAllClose(result, expected) + + def test_sum_with_clip_no_noise(self): + record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0]. + record2 = tf.constant([4.0, -3.0]) # Not clipped. + + query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( + initial_l2_norm_clip=5.0, + noise_multiplier=0.0, + target_unclipped_quantile=1.0, + learning_rate=0.0, + clipped_count_stddev=0.0, + expected_num_records=2.0) + + query_result, _ = test_utils.run_query(query, [record1, record2]) + result = query_result.numpy() + expected = [1.0, 1.0] + self.assertAllClose(result, expected) + + def test_sum_with_noise(self): + record1, record2 = 2.71828, 3.14159 + stddev = 1.0 + clip = 5.0 + + query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( + initial_l2_norm_clip=clip, + noise_multiplier=stddev / clip, + target_unclipped_quantile=1.0, + learning_rate=0.0, + clipped_count_stddev=0.0, + expected_num_records=2.0) + + noised_sums = [] + for _ in xrange(1000): + query_result, _ = test_utils.run_query(query, [record1, record2]) + noised_sums.append(query_result.numpy()) + + result_stddev = np.std(noised_sums) + self.assertNear(result_stddev, stddev, 0.1) + + def test_average_no_noise(self): + record1 = tf.constant([5.0, 0.0]) # Clipped to [3.0, 0.0]. + record2 = tf.constant([-1.0, 2.0]) # Not clipped. + + query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipAverageQuery( + initial_l2_norm_clip=3.0, + noise_multiplier=0.0, + denominator=2.0, + target_unclipped_quantile=1.0, + learning_rate=0.0, + clipped_count_stddev=0.0, + expected_num_records=2.0) + query_result, _ = test_utils.run_query(query, [record1, record2]) + result = query_result.numpy() + expected_average = [1.0, 1.0] + self.assertAllClose(result, expected_average) + + def test_average_with_noise(self): + record1, record2 = 2.71828, 3.14159 + sum_stddev = 1.0 + denominator = 2.0 + clip = 3.0 + + query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipAverageQuery( + initial_l2_norm_clip=clip, + noise_multiplier=sum_stddev / clip, + denominator=denominator, + target_unclipped_quantile=1.0, + learning_rate=0.0, + clipped_count_stddev=0.0, + expected_num_records=2.0) + + noised_averages = [] + for _ in range(1000): + query_result, _ = test_utils.run_query(query, [record1, record2]) + noised_averages.append(query_result.numpy()) + + result_stddev = np.std(noised_averages) + avg_stddev = sum_stddev / denominator + self.assertNear(result_stddev, avg_stddev, 0.1) + + def test_adaptation_target_zero(self): + record1 = tf.constant([8.5]) + record2 = tf.constant([-7.25]) + + query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( + initial_l2_norm_clip=10.0, + noise_multiplier=0.0, + target_unclipped_quantile=0.0, + learning_rate=1.0, + clipped_count_stddev=0.0, + expected_num_records=2.0) + + global_state = query.initial_global_state() + + initial_clip = global_state.l2_norm_clip + self.assertAllClose(initial_clip, 10.0) + + # On the first two iterations, nothing is clipped, so the clip goes down + # by 1.0 (the learning rate). When the clip reaches 8.0, one record is + # clipped, so the clip goes down by only 0.5. After two more iterations, + # both records are clipped, and the clip norm stays there (at 7.0). + + expected_sums = [1.25, 1.25, 0.75, 0.25, 0.0] + expected_clips = [9.0, 8.0, 7.5, 7.0, 7.0] + for expected_sum, expected_clip in zip(expected_sums, expected_clips): + actual_sum, global_state = test_utils.run_query( + query, [record1, record2], global_state) + + actual_clip = global_state.l2_norm_clip + + self.assertAllClose(actual_clip.numpy(), expected_clip) + self.assertAllClose(actual_sum.numpy(), (expected_sum,)) + + def test_adaptation_target_one(self): + record1 = tf.constant([-1.5]) + record2 = tf.constant([2.75]) + + query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( + initial_l2_norm_clip=0.0, + noise_multiplier=0.0, + target_unclipped_quantile=1.0, + learning_rate=1.0, + clipped_count_stddev=0.0, + expected_num_records=2.0) + + global_state = query.initial_global_state() + + initial_clip = global_state.l2_norm_clip + self.assertAllClose(initial_clip, 0.0) + + # On the first two iterations, both are clipped, so the clip goes up + # by 1.0 (the learning rate). When the clip reaches 2.0, only one record is + # clipped, so the clip goes up by only 0.5. After two more iterations, + # both records are clipped, and the clip norm stays there (at 3.0). + + expected_sums = [0.0, 0.0, 0.5, 1.0, 1.25] + expected_clips = [1.0, 2.0, 2.5, 3.0, 3.0] + for expected_sum, expected_clip in zip(expected_sums, expected_clips): + actual_sum, global_state = test_utils.run_query( + query, [record1, record2], global_state) + + actual_clip = global_state.l2_norm_clip + + self.assertAllClose(actual_clip.numpy(), expected_clip) + self.assertAllClose(actual_sum.numpy(), (expected_sum,)) + + def test_adaptation_linspace(self): + # 100 records equally spaced from 0 to 10 in 0.1 increments. + # Test that with a decaying learning rate we converge to the correct + # median with error at most 0.1. + records = [tf.constant(x) for x in np.linspace( + 0.0, 10.0, num=21, dtype=np.float32)] + + learning_rate = tf.Variable(1.0) + + query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( + initial_l2_norm_clip=0.0, + noise_multiplier=0.0, + target_unclipped_quantile=0.5, + learning_rate=learning_rate, + clipped_count_stddev=0.0, + expected_num_records=2.0) + + global_state = query.initial_global_state() + + for t in range(50): + tf.assign(learning_rate, 1.0 / np.sqrt(t+1)) + _, global_state = test_utils.run_query(query, records, global_state) + + actual_clip = global_state.l2_norm_clip + + if t > 40: + self.assertNear(actual_clip, 5.0, 0.25) + + def test_adaptation_all_equal(self): + # 100 equal records. Test that with a decaying learning rate we converge to + # that record and bounce around it. + records = [tf.constant(5.0)] * 20 + + learning_rate = tf.Variable(1.0) + + query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( + initial_l2_norm_clip=0.0, + noise_multiplier=0.0, + target_unclipped_quantile=0.5, + learning_rate=learning_rate, + clipped_count_stddev=0.0, + expected_num_records=2.0) + + global_state = query.initial_global_state() + + for t in range(50): + tf.assign(learning_rate, 1.0 / np.sqrt(t+1)) + _, global_state = test_utils.run_query(query, records, global_state) + + actual_clip = global_state.l2_norm_clip + + if t > 40: + self.assertNear(actual_clip, 5.0, 0.25) + + def test_ledger(self): + record1 = tf.constant([8.5]) + record2 = tf.constant([-7.25]) + + population_size = tf.Variable(0) + selection_probability = tf.Variable(1.0) + + query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( + initial_l2_norm_clip=10.0, + noise_multiplier=1.0, + target_unclipped_quantile=0.0, + learning_rate=1.0, + clipped_count_stddev=0.0, + expected_num_records=2.0) + + query = privacy_ledger.QueryWithLedger( + query, population_size, selection_probability) + + # First sample. + tf.assign(population_size, 10) + tf.assign(selection_probability, 0.1) + _, global_state = test_utils.run_query(query, [record1, record2]) + + expected_queries = [[10.0, 10.0], [0.5, 0.0]] + formatted = query.ledger.get_formatted_ledger_eager() + sample_1 = formatted[0] + self.assertAllClose(sample_1.population_size, 10.0) + self.assertAllClose(sample_1.selection_probability, 0.1) + self.assertAllClose(sample_1.queries, expected_queries) + + # Second sample. + tf.assign(population_size, 20) + tf.assign(selection_probability, 0.2) + test_utils.run_query(query, [record1, record2], global_state) + + formatted = query.ledger.get_formatted_ledger_eager() + sample_1, sample_2 = formatted + self.assertAllClose(sample_1.population_size, 10.0) + self.assertAllClose(sample_1.selection_probability, 0.1) + self.assertAllClose(sample_1.queries, expected_queries) + + expected_queries_2 = [[9.0, 9.0], [0.5, 0.0]] + self.assertAllClose(sample_2.population_size, 20.0) + self.assertAllClose(sample_2.selection_probability, 0.2) + self.assertAllClose(sample_2.queries, expected_queries_2) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_privacy/privacy/dp_query/test_utils.py b/tensorflow_privacy/privacy/dp_query/test_utils.py new file mode 100644 index 0000000..18456b3 --- /dev/null +++ b/tensorflow_privacy/privacy/dp_query/test_utils.py @@ -0,0 +1,49 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utility methods for testing private queries. + +Utility methods for testing private queries. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +def run_query(query, records, global_state=None, weights=None): + """Executes query on the given set of records as a single sample. + + Args: + query: A PrivateQuery to run. + records: An iterable containing records to pass to the query. + global_state: The current global state. If None, an initial global state is + generated. + weights: An optional iterable containing the weights of the records. + + Returns: + A tuple (result, new_global_state) where "result" is the result of the + query and "new_global_state" is the updated global state. + """ + if not global_state: + global_state = query.initial_global_state() + params = query.derive_sample_params(global_state) + sample_state = query.initial_sample_state(next(iter(records))) + if weights is None: + for record in records: + sample_state = query.accumulate_record(params, sample_state, record) + else: + for weight, record in zip(weights, records): + sample_state = query.accumulate_record( + params, sample_state, record, weight) + return query.get_noised_result(sample_state, global_state) diff --git a/tensorflow_privacy/privacy/optimizers/__init__.py b/tensorflow_privacy/privacy/optimizers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer.py new file mode 100644 index 0000000..fecfd5b --- /dev/null +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer.py @@ -0,0 +1,239 @@ +# Copyright 2018, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Differentially private optimizers for TensorFlow.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from distutils.version import LooseVersion +import tensorflow as tf + +from tensorflow_privacy.privacy.analysis import privacy_ledger +from tensorflow_privacy.privacy.dp_query import gaussian_query + +if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): + nest = tf.contrib.framework.nest +else: + nest = tf.nest + + +def make_optimizer_class(cls): + """Constructs a DP optimizer class from an existing one.""" + if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): + parent_code = tf.train.Optimizer.compute_gradients.__code__ + child_code = cls.compute_gradients.__code__ + GATE_OP = tf.train.Optimizer.GATE_OP # pylint: disable=invalid-name + else: + parent_code = tf.optimizers.Optimizer._compute_gradients.__code__ # pylint: disable=protected-access + child_code = cls._compute_gradients.__code__ # pylint: disable=protected-access + GATE_OP = None # pylint: disable=invalid-name + if child_code is not parent_code: + tf.logging.warning( + 'WARNING: Calling make_optimizer_class() on class %s that overrides ' + 'method compute_gradients(). Check to ensure that ' + 'make_optimizer_class() does not interfere with overridden version.', + cls.__name__) + + class DPOptimizerClass(cls): + """Differentially private subclass of given class cls.""" + + def __init__( + self, + dp_sum_query, + num_microbatches=None, + unroll_microbatches=False, + *args, # pylint: disable=keyword-arg-before-vararg, g-doc-args + **kwargs): + """Initialize the DPOptimizerClass. + + Args: + dp_sum_query: DPQuery object, specifying differential privacy + mechanism to use. + num_microbatches: How many microbatches into which the minibatch is + split. If None, will default to the size of the minibatch, and + per-example gradients will be computed. + unroll_microbatches: If true, processes microbatches within a Python + loop instead of a tf.while_loop. Can be used if using a tf.while_loop + raises an exception. + """ + super(DPOptimizerClass, self).__init__(*args, **kwargs) + self._dp_sum_query = dp_sum_query + self._num_microbatches = num_microbatches + self._global_state = self._dp_sum_query.initial_global_state() + # TODO(b/122613513): Set unroll_microbatches=True to avoid this bug. + # Beware: When num_microbatches is large (>100), enabling this parameter + # may cause an OOM error. + self._unroll_microbatches = unroll_microbatches + + def compute_gradients(self, + loss, + var_list, + gate_gradients=GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + grad_loss=None, + gradient_tape=None): + if callable(loss): + # TF is running in Eager mode, check we received a vanilla tape. + if not gradient_tape: + raise ValueError('When in Eager mode, a tape needs to be passed.') + + vector_loss = loss() + if self._num_microbatches is None: + self._num_microbatches = tf.shape(vector_loss)[0] + sample_state = self._dp_sum_query.initial_sample_state(var_list) + microbatches_losses = tf.reshape(vector_loss, + [self._num_microbatches, -1]) + sample_params = ( + self._dp_sum_query.derive_sample_params(self._global_state)) + + def process_microbatch(i, sample_state): + """Process one microbatch (record) with privacy helper.""" + microbatch_loss = tf.reduce_mean(tf.gather(microbatches_losses, [i])) + grads = gradient_tape.gradient(microbatch_loss, var_list) + sample_state = self._dp_sum_query.accumulate_record( + sample_params, sample_state, grads) + return sample_state + + for idx in range(self._num_microbatches): + sample_state = process_microbatch(idx, sample_state) + + grad_sums, self._global_state = ( + self._dp_sum_query.get_noised_result( + sample_state, self._global_state)) + + def normalize(v): + return v / tf.cast(self._num_microbatches, tf.float32) + + final_grads = nest.map_structure(normalize, grad_sums) + + grads_and_vars = list(zip(final_grads, var_list)) + return grads_and_vars + + else: + # TF is running in graph mode, check we did not receive a gradient tape. + if gradient_tape: + raise ValueError('When in graph mode, a tape should not be passed.') + + # Note: it would be closer to the correct i.i.d. sampling of records if + # we sampled each microbatch from the appropriate binomial distribution, + # although that still wouldn't be quite correct because it would be + # sampling from the dataset without replacement. + if self._num_microbatches is None: + self._num_microbatches = tf.shape(loss)[0] + + microbatches_losses = tf.reshape(loss, [self._num_microbatches, -1]) + sample_params = ( + self._dp_sum_query.derive_sample_params(self._global_state)) + + def process_microbatch(i, sample_state): + """Process one microbatch (record) with privacy helper.""" + grads, _ = zip(*super(cls, self).compute_gradients( + tf.reduce_mean(tf.gather(microbatches_losses, + [i])), var_list, gate_gradients, + aggregation_method, colocate_gradients_with_ops, grad_loss)) + grads_list = [ + g if g is not None else tf.zeros_like(v) + for (g, v) in zip(list(grads), var_list) + ] + sample_state = self._dp_sum_query.accumulate_record( + sample_params, sample_state, grads_list) + return sample_state + + if var_list is None: + var_list = ( + tf.trainable_variables() + tf.get_collection( + tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) + + sample_state = self._dp_sum_query.initial_sample_state(var_list) + + if self._unroll_microbatches: + for idx in range(self._num_microbatches): + sample_state = process_microbatch(idx, sample_state) + else: + # Use of while_loop here requires that sample_state be a nested + # structure of tensors. In general, we would prefer to allow it to be + # an arbitrary opaque type. + cond_fn = lambda i, _: tf.less(i, self._num_microbatches) + body_fn = lambda i, state: [tf.add(i, 1), process_microbatch(i, state)] # pylint: disable=line-too-long + idx = tf.constant(0) + _, sample_state = tf.while_loop(cond_fn, body_fn, [idx, sample_state]) + + grad_sums, self._global_state = ( + self._dp_sum_query.get_noised_result( + sample_state, self._global_state)) + + def normalize(v): + return tf.truediv(v, tf.cast(self._num_microbatches, tf.float32)) + + final_grads = nest.map_structure(normalize, grad_sums) + + return list(zip(final_grads, var_list)) + + return DPOptimizerClass + + +def make_gaussian_optimizer_class(cls): + """Constructs a DP optimizer with Gaussian averaging of updates.""" + + class DPGaussianOptimizerClass(make_optimizer_class(cls)): + """DP subclass of given class cls using Gaussian averaging.""" + + def __init__( + self, + l2_norm_clip, + noise_multiplier, + num_microbatches=None, + ledger=None, + unroll_microbatches=False, + *args, # pylint: disable=keyword-arg-before-vararg + **kwargs): + dp_sum_query = gaussian_query.GaussianSumQuery( + l2_norm_clip, l2_norm_clip * noise_multiplier) + + if ledger: + dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, + ledger=ledger) + + super(DPGaussianOptimizerClass, self).__init__( + dp_sum_query, + num_microbatches, + unroll_microbatches, + *args, + **kwargs) + + @property + def ledger(self): + return self._dp_sum_query.ledger + + return DPGaussianOptimizerClass + +if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): + AdagradOptimizer = tf.train.AdagradOptimizer + AdamOptimizer = tf.train.AdamOptimizer + GradientDescentOptimizer = tf.train.GradientDescentOptimizer +else: + AdagradOptimizer = tf.optimizers.Adagrad + AdamOptimizer = tf.optimizers.Adam + GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name + +DPAdagradOptimizer = make_optimizer_class(AdagradOptimizer) +DPAdamOptimizer = make_optimizer_class(AdamOptimizer) +DPGradientDescentOptimizer = make_optimizer_class(GradientDescentOptimizer) + +DPAdagradGaussianOptimizer = make_gaussian_optimizer_class(AdagradOptimizer) +DPAdamGaussianOptimizer = make_gaussian_optimizer_class(AdamOptimizer) +DPGradientDescentGaussianOptimizer = make_gaussian_optimizer_class( + GradientDescentOptimizer) diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_eager_test.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_eager_test.py new file mode 100644 index 0000000..b2bf1b8 --- /dev/null +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_eager_test.py @@ -0,0 +1,130 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for differentially private optimizers.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow_privacy.privacy.analysis import privacy_ledger +from tensorflow_privacy.privacy.dp_query import gaussian_query +from tensorflow_privacy.privacy.optimizers import dp_optimizer + + +class DPOptimizerEagerTest(tf.test.TestCase, parameterized.TestCase): + + def setUp(self): + tf.enable_eager_execution() + super(DPOptimizerEagerTest, self).setUp() + + def _loss_fn(self, val0, val1): + return 0.5 * tf.reduce_sum(tf.squared_difference(val0, val1), axis=1) + + @parameterized.named_parameters( + ('DPGradientDescent 1', dp_optimizer.DPGradientDescentOptimizer, 1, + [-2.5, -2.5]), + ('DPGradientDescent 2', dp_optimizer.DPGradientDescentOptimizer, 2, + [-2.5, -2.5]), + ('DPGradientDescent 4', dp_optimizer.DPGradientDescentOptimizer, 4, + [-2.5, -2.5]), + ('DPAdagrad 1', dp_optimizer.DPAdagradOptimizer, 1, [-2.5, -2.5]), + ('DPAdagrad 2', dp_optimizer.DPAdagradOptimizer, 2, [-2.5, -2.5]), + ('DPAdagrad 4', dp_optimizer.DPAdagradOptimizer, 4, [-2.5, -2.5]), + ('DPAdam 1', dp_optimizer.DPAdamOptimizer, 1, [-2.5, -2.5]), + ('DPAdam 2', dp_optimizer.DPAdamOptimizer, 2, [-2.5, -2.5]), + ('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5])) + def testBaseline(self, cls, num_microbatches, expected_answer): + with tf.GradientTape(persistent=True) as gradient_tape: + var0 = tf.Variable([1.0, 2.0]) + data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) + + dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) + dp_sum_query = privacy_ledger.QueryWithLedger( + dp_sum_query, 1e6, num_microbatches / 1e6) + + opt = cls( + dp_sum_query, + num_microbatches=num_microbatches, + learning_rate=2.0) + + self.evaluate(tf.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + + # Expected gradient is sum of differences divided by number of + # microbatches. + grads_and_vars = opt.compute_gradients( + lambda: self._loss_fn(var0, data0), [var0], + gradient_tape=gradient_tape) + self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0]) + + @parameterized.named_parameters( + ('DPGradientDescent', dp_optimizer.DPGradientDescentOptimizer), + ('DPAdagrad', dp_optimizer.DPAdagradOptimizer), + ('DPAdam', dp_optimizer.DPAdamOptimizer)) + def testClippingNorm(self, cls): + with tf.GradientTape(persistent=True) as gradient_tape: + var0 = tf.Variable([0.0, 0.0]) + data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) + + dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) + dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) + + opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) + + self.evaluate(tf.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([0.0, 0.0], self.evaluate(var0)) + + # Expected gradient is sum of differences. + grads_and_vars = opt.compute_gradients( + lambda: self._loss_fn(var0, data0), [var0], + gradient_tape=gradient_tape) + self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0]) + + @parameterized.named_parameters( + ('DPGradientDescent', dp_optimizer.DPGradientDescentOptimizer), + ('DPAdagrad', dp_optimizer.DPAdagradOptimizer), + ('DPAdam', dp_optimizer.DPAdamOptimizer)) + def testNoiseMultiplier(self, cls): + with tf.GradientTape(persistent=True) as gradient_tape: + var0 = tf.Variable([0.0]) + data0 = tf.Variable([[0.0]]) + + dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0) + dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) + + opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) + + self.evaluate(tf.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([0.0], self.evaluate(var0)) + + grads = [] + for _ in range(1000): + grads_and_vars = opt.compute_gradients( + lambda: self._loss_fn(var0, data0), [var0], + gradient_tape=gradient_tape) + grads.append(grads_and_vars[0][0]) + + # Test standard deviation is close to l2_norm_clip * noise_multiplier. + self.assertNear(np.std(grads), 2.0 * 4.0, 0.5) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py new file mode 100644 index 0000000..5237b61 --- /dev/null +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_test.py @@ -0,0 +1,241 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for differentially private optimizers.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import mock +import numpy as np +import tensorflow as tf + +from tensorflow_privacy.privacy.analysis import privacy_ledger +from tensorflow_privacy.privacy.dp_query import gaussian_query +from tensorflow_privacy.privacy.optimizers import dp_optimizer + + +class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): + + def _loss(self, val0, val1): + """Loss function that is minimized at the mean of the input points.""" + return 0.5 * tf.reduce_sum(tf.squared_difference(val0, val1), axis=1) + + # Parameters for testing: optimizer, num_microbatches, expected answer. + @parameterized.named_parameters( + ('DPGradientDescent 1', dp_optimizer.DPGradientDescentOptimizer, 1, + [-2.5, -2.5]), + ('DPGradientDescent 2', dp_optimizer.DPGradientDescentOptimizer, 2, + [-2.5, -2.5]), + ('DPGradientDescent 4', dp_optimizer.DPGradientDescentOptimizer, 4, + [-2.5, -2.5]), + ('DPAdagrad 1', dp_optimizer.DPAdagradOptimizer, 1, [-2.5, -2.5]), + ('DPAdagrad 2', dp_optimizer.DPAdagradOptimizer, 2, [-2.5, -2.5]), + ('DPAdagrad 4', dp_optimizer.DPAdagradOptimizer, 4, [-2.5, -2.5]), + ('DPAdam 1', dp_optimizer.DPAdamOptimizer, 1, [-2.5, -2.5]), + ('DPAdam 2', dp_optimizer.DPAdamOptimizer, 2, [-2.5, -2.5]), + ('DPAdam 4', dp_optimizer.DPAdamOptimizer, 4, [-2.5, -2.5])) + def testBaseline(self, cls, num_microbatches, expected_answer): + with self.cached_session() as sess: + var0 = tf.Variable([1.0, 2.0]) + data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) + + dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) + dp_sum_query = privacy_ledger.QueryWithLedger( + dp_sum_query, 1e6, num_microbatches / 1e6) + + opt = cls( + dp_sum_query, + num_microbatches=num_microbatches, + learning_rate=2.0) + + self.evaluate(tf.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + + # Expected gradient is sum of differences divided by number of + # microbatches. + gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) + grads_and_vars = sess.run(gradient_op) + self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0]) + + @parameterized.named_parameters( + ('DPGradientDescent', dp_optimizer.DPGradientDescentOptimizer), + ('DPAdagrad', dp_optimizer.DPAdagradOptimizer), + ('DPAdam', dp_optimizer.DPAdamOptimizer)) + def testClippingNorm(self, cls): + with self.cached_session() as sess: + var0 = tf.Variable([0.0, 0.0]) + data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) + + dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) + dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) + + opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) + + self.evaluate(tf.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([0.0, 0.0], self.evaluate(var0)) + + # Expected gradient is sum of differences. + gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) + grads_and_vars = sess.run(gradient_op) + self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0]) + + @parameterized.named_parameters( + ('DPGradientDescent', dp_optimizer.DPGradientDescentOptimizer), + ('DPAdagrad', dp_optimizer.DPAdagradOptimizer), + ('DPAdam', dp_optimizer.DPAdamOptimizer)) + def testNoiseMultiplier(self, cls): + with self.cached_session() as sess: + var0 = tf.Variable([0.0]) + data0 = tf.Variable([[0.0]]) + + dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0) + dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) + + opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) + + self.evaluate(tf.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([0.0], self.evaluate(var0)) + + gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) + grads = [] + for _ in range(1000): + grads_and_vars = sess.run(gradient_op) + grads.append(grads_and_vars[0][0]) + + # Test standard deviation is close to l2_norm_clip * noise_multiplier. + self.assertNear(np.std(grads), 2.0 * 4.0, 0.5) + + @mock.patch.object(tf, 'logging') + def testComputeGradientsOverrideWarning(self, mock_logging): + + class SimpleOptimizer(tf.train.Optimizer): + + def compute_gradients(self): + return 0 + + dp_optimizer.make_optimizer_class(SimpleOptimizer) + mock_logging.warning.assert_called_once_with( + 'WARNING: Calling make_optimizer_class() on class %s that overrides ' + 'method compute_gradients(). Check to ensure that ' + 'make_optimizer_class() does not interfere with overridden version.', + 'SimpleOptimizer') + + def testEstimator(self): + """Tests that DP optimizers work with tf.estimator.""" + + def linear_model_fn(features, labels, mode): + preds = tf.keras.layers.Dense( + 1, activation='linear', name='dense').apply(features['x']) + + vector_loss = tf.squared_difference(labels, preds) + scalar_loss = tf.reduce_mean(vector_loss) + dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) + dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) + optimizer = dp_optimizer.DPGradientDescentOptimizer( + dp_sum_query, + num_microbatches=1, + learning_rate=1.0) + global_step = tf.train.get_global_step() + train_op = optimizer.minimize(loss=vector_loss, global_step=global_step) + return tf.estimator.EstimatorSpec( + mode=mode, loss=scalar_loss, train_op=train_op) + + linear_regressor = tf.estimator.Estimator(model_fn=linear_model_fn) + true_weights = np.array([[-5], [4], [3], [2]]).astype(np.float32) + true_bias = 6.0 + train_data = np.random.normal(scale=3.0, size=(200, 4)).astype(np.float32) + + train_labels = np.matmul(train_data, + true_weights) + true_bias + np.random.normal( + scale=0.1, size=(200, 1)).astype(np.float32) + + train_input_fn = tf.estimator.inputs.numpy_input_fn( + x={'x': train_data}, + y=train_labels, + batch_size=20, + num_epochs=10, + shuffle=True) + linear_regressor.train(input_fn=train_input_fn, steps=100) + self.assertAllClose( + linear_regressor.get_variable_value('dense/kernel'), + true_weights, + atol=1.0) + + @parameterized.named_parameters( + ('DPGradientDescent', dp_optimizer.DPGradientDescentOptimizer), + ('DPAdagrad', dp_optimizer.DPAdagradOptimizer), + ('DPAdam', dp_optimizer.DPAdamOptimizer)) + def testUnrollMicrobatches(self, cls): + with self.cached_session() as sess: + var0 = tf.Variable([1.0, 2.0]) + data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) + + num_microbatches = 4 + + dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) + dp_sum_query = privacy_ledger.QueryWithLedger( + dp_sum_query, 1e6, num_microbatches / 1e6) + + opt = cls( + dp_sum_query, + num_microbatches=num_microbatches, + learning_rate=2.0, + unroll_microbatches=True) + + self.evaluate(tf.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + + # Expected gradient is sum of differences divided by number of + # microbatches. + gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) + grads_and_vars = sess.run(gradient_op) + self.assertAllCloseAccordingToType([-2.5, -2.5], grads_and_vars[0][0]) + + @parameterized.named_parameters( + ('DPGradientDescent', dp_optimizer.DPGradientDescentGaussianOptimizer), + ('DPAdagrad', dp_optimizer.DPAdagradGaussianOptimizer), + ('DPAdam', dp_optimizer.DPAdamGaussianOptimizer)) + def testDPGaussianOptimizerClass(self, cls): + with self.cached_session() as sess: + var0 = tf.Variable([0.0]) + data0 = tf.Variable([[0.0]]) + + opt = cls( + l2_norm_clip=4.0, + noise_multiplier=2.0, + num_microbatches=1, + learning_rate=2.0) + + self.evaluate(tf.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([0.0], self.evaluate(var0)) + + gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) + grads = [] + for _ in range(1000): + grads_and_vars = sess.run(gradient_op) + grads.append(grads_and_vars[0][0]) + + # Test standard deviation is close to l2_norm_clip * noise_multiplier. + self.assertNear(np.std(grads), 2.0 * 4.0, 0.5) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized.py new file mode 100644 index 0000000..7295e1d --- /dev/null +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized.py @@ -0,0 +1,153 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Vectorized differentially private optimizers for TensorFlow.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from distutils.version import LooseVersion +import tensorflow as tf + +if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): + nest = tf.contrib.framework.nest + AdagradOptimizer = tf.train.AdagradOptimizer + AdamOptimizer = tf.train.AdamOptimizer + GradientDescentOptimizer = tf.train.GradientDescentOptimizer + parent_code = tf.train.Optimizer.compute_gradients.__code__ + GATE_OP = tf.train.Optimizer.GATE_OP # pylint: disable=invalid-name +else: + nest = tf.nest + AdagradOptimizer = tf.optimizers.Adagrad + AdamOptimizer = tf.optimizers.Adam + GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name + parent_code = tf.optimizers.Optimizer._compute_gradients.__code__ # pylint: disable=protected-access + GATE_OP = None # pylint: disable=invalid-name + + +def make_vectorized_optimizer_class(cls): + """Constructs a vectorized DP optimizer class from an existing one.""" + if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): + child_code = cls.compute_gradients.__code__ + else: + child_code = cls._compute_gradients.__code__ # pylint: disable=protected-access + if child_code is not parent_code: + tf.logging.warning( + 'WARNING: Calling make_optimizer_class() on class %s that overrides ' + 'method compute_gradients(). Check to ensure that ' + 'make_optimizer_class() does not interfere with overridden version.', + cls.__name__) + + class DPOptimizerClass(cls): + """Differentially private subclass of given class cls.""" + + def __init__( + self, + l2_norm_clip, + noise_multiplier, + num_microbatches=None, + *args, # pylint: disable=keyword-arg-before-vararg, g-doc-args + **kwargs): + """Initialize the DPOptimizerClass. + + Args: + l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients) + noise_multiplier: Ratio of the standard deviation to the clipping norm + num_microbatches: How many microbatches into which the minibatch is + split. If None, will default to the size of the minibatch, and + per-example gradients will be computed. + """ + super(DPOptimizerClass, self).__init__(*args, **kwargs) + self._l2_norm_clip = l2_norm_clip + self._noise_multiplier = noise_multiplier + self._num_microbatches = num_microbatches + + def compute_gradients(self, + loss, + var_list, + gate_gradients=GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + grad_loss=None, + gradient_tape=None): + if callable(loss): + # TF is running in Eager mode + raise NotImplementedError('Vectorized optimizer unavailable for TF2.') + else: + # TF is running in graph mode, check we did not receive a gradient tape. + if gradient_tape: + raise ValueError('When in graph mode, a tape should not be passed.') + + batch_size = tf.shape(loss)[0] + if self._num_microbatches is None: + self._num_microbatches = batch_size + + # Note: it would be closer to the correct i.i.d. sampling of records if + # we sampled each microbatch from the appropriate binomial distribution, + # although that still wouldn't be quite correct because it would be + # sampling from the dataset without replacement. + microbatch_losses = tf.reshape(loss, [self._num_microbatches, -1]) + + if var_list is None: + var_list = ( + tf.trainable_variables() + tf.get_collection( + tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) + + def process_microbatch(microbatch_loss): + """Compute clipped grads for one microbatch.""" + microbatch_loss = tf.reduce_mean(microbatch_loss) + grads, _ = zip(*super(DPOptimizerClass, self).compute_gradients( + microbatch_loss, + var_list, + gate_gradients, + aggregation_method, + colocate_gradients_with_ops, + grad_loss)) + grads_list = [ + g if g is not None else tf.zeros_like(v) + for (g, v) in zip(list(grads), var_list) + ] + # Clip gradients to have L2 norm of l2_norm_clip. + # Here, we use TF primitives rather than the built-in + # tf.clip_by_global_norm() so that operations can be vectorized + # across microbatches. + grads_flat = nest.flatten(grads_list) + squared_l2_norms = [tf.reduce_sum(tf.square(g)) for g in grads_flat] + global_norm = tf.sqrt(tf.add_n(squared_l2_norms)) + div = tf.maximum(global_norm / self._l2_norm_clip, 1.) + clipped_flat = [g / div for g in grads_flat] + clipped_grads = nest.pack_sequence_as(grads_list, clipped_flat) + return clipped_grads + + clipped_grads = tf.vectorized_map(process_microbatch, microbatch_losses) + + def reduce_noise_normalize_batch(stacked_grads): + summed_grads = tf.reduce_sum(stacked_grads, axis=0) + noise_stddev = self._l2_norm_clip * self._noise_multiplier + noise = tf.random.normal(tf.shape(summed_grads), + stddev=noise_stddev) + noised_grads = summed_grads + noise + return noised_grads / tf.cast(self._num_microbatches, tf.float32) + + final_grads = nest.map_structure(reduce_noise_normalize_batch, + clipped_grads) + + return list(zip(final_grads, var_list)) + + return DPOptimizerClass + + +VectorizedDPAdagrad = make_vectorized_optimizer_class(AdagradOptimizer) +VectorizedDPAdam = make_vectorized_optimizer_class(AdamOptimizer) +VectorizedDPSGD = make_vectorized_optimizer_class(GradientDescentOptimizer) diff --git a/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized_test.py b/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized_test.py new file mode 100644 index 0000000..21f00e8 --- /dev/null +++ b/tensorflow_privacy/privacy/optimizers/dp_optimizer_vectorized_test.py @@ -0,0 +1,204 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for differentially private optimizers.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import mock +import numpy as np +import tensorflow as tf + +from tensorflow_privacy.privacy.optimizers import dp_optimizer_vectorized +from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdagrad +from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPAdam +from tensorflow_privacy.privacy.optimizers.dp_optimizer_vectorized import VectorizedDPSGD + + +class DPOptimizerTest(tf.test.TestCase, parameterized.TestCase): + + def _loss(self, val0, val1): + """Loss function that is minimized at the mean of the input points.""" + return 0.5 * tf.reduce_sum(tf.squared_difference(val0, val1), axis=1) + + # Parameters for testing: optimizer, num_microbatches, expected answer. + @parameterized.named_parameters( + ('DPGradientDescent 1', VectorizedDPSGD, 1, [-2.5, -2.5]), + ('DPGradientDescent 2', VectorizedDPSGD, 2, [-2.5, -2.5]), + ('DPGradientDescent 4', VectorizedDPSGD, 4, [-2.5, -2.5]), + ('DPAdagrad 1', VectorizedDPAdagrad, 1, [-2.5, -2.5]), + ('DPAdagrad 2', VectorizedDPAdagrad, 2, [-2.5, -2.5]), + ('DPAdagrad 4', VectorizedDPAdagrad, 4, [-2.5, -2.5]), + ('DPAdam 1', VectorizedDPAdam, 1, [-2.5, -2.5]), + ('DPAdam 2', VectorizedDPAdam, 2, [-2.5, -2.5]), + ('DPAdam 4', VectorizedDPAdam, 4, [-2.5, -2.5])) + def testBaseline(self, cls, num_microbatches, expected_answer): + with self.cached_session() as sess: + var0 = tf.Variable([1.0, 2.0]) + data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) + + opt = cls( + l2_norm_clip=1.0e9, + noise_multiplier=0.0, + num_microbatches=num_microbatches, + learning_rate=2.0) + + self.evaluate(tf.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + + # Expected gradient is sum of differences divided by number of + # microbatches. + gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) + grads_and_vars = sess.run(gradient_op) + self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0]) + + @parameterized.named_parameters( + ('DPGradientDescent', VectorizedDPSGD), + ('DPAdagrad', VectorizedDPAdagrad), + ('DPAdam', VectorizedDPAdam)) + def testClippingNorm(self, cls): + with self.cached_session() as sess: + var0 = tf.Variable([0.0, 0.0]) + data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) + + opt = cls(l2_norm_clip=1.0, + noise_multiplier=0., + num_microbatches=1, + learning_rate=2.0) + + self.evaluate(tf.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([0.0, 0.0], self.evaluate(var0)) + + # Expected gradient is sum of differences. + gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) + grads_and_vars = sess.run(gradient_op) + self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0]) + + @parameterized.named_parameters( + ('DPGradientDescent', VectorizedDPSGD), + ('DPAdagrad', VectorizedDPAdagrad), + ('DPAdam', VectorizedDPAdam)) + def testNoiseMultiplier(self, cls): + with self.cached_session() as sess: + var0 = tf.Variable([0.0]) + data0 = tf.Variable([[0.0]]) + + opt = cls(l2_norm_clip=4.0, + noise_multiplier=8.0, + num_microbatches=1, + learning_rate=2.0) + + self.evaluate(tf.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([0.0], self.evaluate(var0)) + + gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) + grads = [] + for _ in range(5000): + grads_and_vars = sess.run(gradient_op) + grads.append(grads_and_vars[0][0]) + + # Test standard deviation is close to l2_norm_clip * noise_multiplier. + self.assertNear(np.std(grads), 4.0 * 8.0, 0.5) + + @mock.patch.object(tf, 'logging') + def testComputeGradientsOverrideWarning(self, mock_logging): + + class SimpleOptimizer(tf.train.Optimizer): + + def compute_gradients(self): + return 0 + + dp_optimizer_vectorized.make_vectorized_optimizer_class(SimpleOptimizer) + mock_logging.warning.assert_called_once_with( + 'WARNING: Calling make_optimizer_class() on class %s that overrides ' + 'method compute_gradients(). Check to ensure that ' + 'make_optimizer_class() does not interfere with overridden version.', + 'SimpleOptimizer') + + def testEstimator(self): + """Tests that DP optimizers work with tf.estimator.""" + + def linear_model_fn(features, labels, mode): + preds = tf.keras.layers.Dense( + 1, activation='linear', name='dense').apply(features['x']) + + vector_loss = tf.squared_difference(labels, preds) + scalar_loss = tf.reduce_mean(vector_loss) + optimizer = VectorizedDPSGD( + l2_norm_clip=1.0, + noise_multiplier=0., + num_microbatches=1, + learning_rate=1.0) + global_step = tf.train.get_global_step() + train_op = optimizer.minimize(loss=vector_loss, global_step=global_step) + return tf.estimator.EstimatorSpec( + mode=mode, loss=scalar_loss, train_op=train_op) + + linear_regressor = tf.estimator.Estimator(model_fn=linear_model_fn) + true_weights = np.array([[-5], [4], [3], [2]]).astype(np.float32) + true_bias = 6.0 + train_data = np.random.normal(scale=3.0, size=(200, 4)).astype(np.float32) + + train_labels = np.matmul(train_data, + true_weights) + true_bias + np.random.normal( + scale=0.1, size=(200, 1)).astype(np.float32) + + train_input_fn = tf.estimator.inputs.numpy_input_fn( + x={'x': train_data}, + y=train_labels, + batch_size=20, + num_epochs=10, + shuffle=True) + linear_regressor.train(input_fn=train_input_fn, steps=100) + self.assertAllClose( + linear_regressor.get_variable_value('dense/kernel'), + true_weights, + atol=1.0) + + @parameterized.named_parameters( + ('DPGradientDescent', VectorizedDPSGD), + ('DPAdagrad', VectorizedDPAdagrad), + ('DPAdam', VectorizedDPAdam)) + def testDPGaussianOptimizerClass(self, cls): + with self.cached_session() as sess: + var0 = tf.Variable([0.0]) + data0 = tf.Variable([[0.0]]) + + opt = cls( + l2_norm_clip=4.0, + noise_multiplier=2.0, + num_microbatches=1, + learning_rate=2.0) + + self.evaluate(tf.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([0.0], self.evaluate(var0)) + + gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) + grads = [] + for _ in range(1000): + grads_and_vars = sess.run(gradient_op) + grads.append(grads_and_vars[0][0]) + + # Test standard deviation is close to l2_norm_clip * noise_multiplier. + self.assertNear(np.std(grads), 2.0 * 4.0, 0.5) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_privacy/requirements.txt b/tensorflow_privacy/requirements.txt new file mode 100644 index 0000000..cb596eb --- /dev/null +++ b/tensorflow_privacy/requirements.txt @@ -0,0 +1,3 @@ +tensorflow>=1.13 +mpmath +scipy>=0.17 diff --git a/tensorflow_privacy/research/README.md b/tensorflow_privacy/research/README.md new file mode 100644 index 0000000..84ac2a9 --- /dev/null +++ b/tensorflow_privacy/research/README.md @@ -0,0 +1,9 @@ +# Research + +This folder contains code to reproduce results from research papers. Currently, +the following papers are included: + +* Semi-supervised Knowledge Transfer for Deep Learning from Private Training + Data (ICLR 2017): `pate_2017` + +* Scalable Private Learning with PATE (ICLR 2018): `pate_2018` diff --git a/tensorflow_privacy/research/pate_2017/README.md b/tensorflow_privacy/research/pate_2017/README.md new file mode 100644 index 0000000..b08d63a --- /dev/null +++ b/tensorflow_privacy/research/pate_2017/README.md @@ -0,0 +1,123 @@ +# Learning private models with multiple teachers + +This repository contains code to create a setup for learning privacy-preserving +student models by transferring knowledge from an ensemble of teachers trained +on disjoint subsets of the data for which privacy guarantees are to be provided. + +Knowledge acquired by teachers is transferred to the student in a differentially +private manner by noisily aggregating the teacher decisions before feeding them +to the student during training. + +The paper describing the approach is [arXiv:1610.05755](https://arxiv.org/abs/1610.05755) + +## Dependencies + +This model uses `TensorFlow` to perform numerical computations associated with +machine learning models, as well as common Python libraries like: `numpy`, +`scipy`, and `six`. Instructions to install these can be found in their +respective documentations. + +## How to run + +This repository supports the MNIST and SVHN datasets. The following +instructions are given for MNIST but can easily be adapted by replacing the +flag `--dataset=mnist` by `--dataset=svhn`. +There are 2 steps: teacher training and student training. Data will be +automatically downloaded when you start the teacher training. + +The following is a two-step process: first we train an ensemble of teacher +models and second we train a student using predictions made by this ensemble. + +**Training the teachers:** first run the `train_teachers.py` file with at least +three flags specifying (1) the number of teachers, (2) the ID of the teacher +you are training among these teachers, and (3) the dataset on which to train. +For instance, to train teacher number 10 among an ensemble of 100 teachers for +MNIST, you use the following command: + +``` +python train_teachers.py --nb_teachers=100 --teacher_id=10 --dataset=mnist +``` + +Other flags like `train_dir` and `data_dir` should optionally be set to +respectively point to the directory where model checkpoints and temporary data +(like the dataset) should be saved. The flag `max_steps` (default at 3000) +controls the length of training. See `train_teachers.py` and `deep_cnn.py` +to find available flags and their descriptions. + +**Training the student:** once the teachers are all trained, e.g., teachers +with IDs `0` to `99` are trained for `nb_teachers=100`, we are ready to train +the student. The student is trained by labeling some of the test data with +predictions from the teachers. The predictions are aggregated by counting the +votes assigned to each class among the ensemble of teachers, adding Laplacian +noise to these votes, and assigning the label with the maximum noisy vote count +to the sample. This is detailed in function `noisy_max` in the file +`aggregation.py`. To learn the student, use the following command: + +``` +python train_student.py --nb_teachers=100 --dataset=mnist --stdnt_share=5000 +``` + +The flag `--stdnt_share=5000` indicates that the student should be able to +use the first `5000` samples of the dataset's test subset as unlabeled +training points (they will be labeled using the teacher predictions). The +remaining samples are used for evaluation of the student's accuracy, which +is displayed upon completion of training. + +## Using semi-supervised GANs to train the student + +In the paper, we describe how to train the student in a semi-supervised +fashion using Generative Adversarial Networks. This can be reproduced for MNIST +by cloning the [improved-gan](https://github.com/openai/improved-gan) +repository and adding to your `PATH` variable before running the shell +script `train_student_mnist_250_lap_20_count_50_epochs_600.sh`. + +``` +export PATH="/path/to/improved-gan/mnist_svhn_cifar10":$PATH +sh train_student_mnist_250_lap_20_count_50_epochs_600.sh +``` + + +## Alternative deeper convolutional architecture + +Note that a deeper convolutional model is available. Both the default and +deeper models graphs are defined in `deep_cnn.py`, respectively by +functions `inference` and `inference_deeper`. Use the flag `--deeper=true` +to switch to that model when launching `train_teachers.py` and +`train_student.py`. + +## Privacy analysis + +In the paper, we detail how data-dependent differential privacy bounds can be +computed to estimate the cost of training the student. In order to reproduce +the bounds given in the paper, we include the label predicted by our two +teacher ensembles: MNIST and SVHN. You can run the privacy analysis for each +dataset with the following commands: + +``` +python analysis.py --counts_file=mnist_250_teachers_labels.npy --indices_file=mnist_250_teachers_100_indices_used_by_student.npy + +python analysis.py --counts_file=svhn_250_teachers_labels.npy --max_examples=1000 --delta=1e-6 +``` + +To expedite experimentation with the privacy analysis of student training, +the `analysis.py` file is configured to download the labels produced by 250 +teacher models, for MNIST and SVHN when running the two commands included +above. These 250 teacher models were trained using the following command lines, +where `XXX` takes values between `0` and `249`: + +``` +python train_teachers.py --nb_teachers=250 --teacher_id=XXX --dataset=mnist +python train_teachers.py --nb_teachers=250 --teacher_id=XXX --dataset=svhn +``` + +Note that these labels may also be used in lieu of function `ensemble_preds` +in `train_student.py`, to compare the performance of alternative student model +architectures and learning techniques. This facilitates future work, by +removing the need for training the MNIST and SVHN teacher ensembles when +proposing new student training approaches. + +## Contact + +To ask questions, please email `nicolas@papernot.fr` or open an issue on +the `tensorflow/models` issues tracker. Please assign issues to +[@npapernot](https://github.com/npapernot). diff --git a/tensorflow_privacy/research/pate_2017/__init__.py b/tensorflow_privacy/research/pate_2017/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tensorflow_privacy/research/pate_2017/__init__.py @@ -0,0 +1 @@ + diff --git a/tensorflow_privacy/research/pate_2017/aggregation.py b/tensorflow_privacy/research/pate_2017/aggregation.py new file mode 100644 index 0000000..5cad35c --- /dev/null +++ b/tensorflow_privacy/research/pate_2017/aggregation.py @@ -0,0 +1,130 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from six.moves import xrange + + +def labels_from_probs(probs): + """ + Helper function: computes argmax along last dimension of array to obtain + labels (max prob or max logit value) + :param probs: numpy array where probabilities or logits are on last dimension + :return: array with same shape as input besides last dimension with shape 1 + now containing the labels + """ + # Compute last axis index + last_axis = len(np.shape(probs)) - 1 + + # Label is argmax over last dimension + labels = np.argmax(probs, axis=last_axis) + + # Return as np.int32 + return np.asarray(labels, dtype=np.int32) + + +def noisy_max(logits, lap_scale, return_clean_votes=False): + """ + This aggregation mechanism takes the softmax/logit output of several models + resulting from inference on identical inputs and computes the noisy-max of + the votes for candidate classes to select a label for each sample: it + adds Laplacian noise to label counts and returns the most frequent label. + :param logits: logits or probabilities for each sample + :param lap_scale: scale of the Laplacian noise to be added to counts + :param return_clean_votes: if set to True, also returns clean votes (without + Laplacian noise). This can be used to perform the + privacy analysis of this aggregation mechanism. + :return: pair of result and (if clean_votes is set to True) the clean counts + for each class per sample and the original labels produced by + the teachers. + """ + + # Compute labels from logits/probs and reshape array properly + labels = labels_from_probs(logits) + labels_shape = np.shape(labels) + labels = labels.reshape((labels_shape[0], labels_shape[1])) + + # Initialize array to hold final labels + result = np.zeros(int(labels_shape[1])) + + if return_clean_votes: + # Initialize array to hold clean votes for each sample + clean_votes = np.zeros((int(labels_shape[1]), 10)) + + # Parse each sample + for i in xrange(int(labels_shape[1])): + # Count number of votes assigned to each class + label_counts = np.bincount(labels[:, i], minlength=10) + + if return_clean_votes: + # Store vote counts for export + clean_votes[i] = label_counts + + # Cast in float32 to prepare before addition of Laplacian noise + label_counts = np.asarray(label_counts, dtype=np.float32) + + # Sample independent Laplacian noise for each class + for item in xrange(10): + label_counts[item] += np.random.laplace(loc=0.0, scale=float(lap_scale)) + + # Result is the most frequent label + result[i] = np.argmax(label_counts) + + # Cast labels to np.int32 for compatibility with deep_cnn.py feed dictionaries + result = np.asarray(result, dtype=np.int32) + + if return_clean_votes: + # Returns several array, which are later saved: + # result: labels obtained from the noisy aggregation + # clean_votes: the number of teacher votes assigned to each sample and class + # labels: the labels assigned by teachers (before the noisy aggregation) + return result, clean_votes, labels + else: + # Only return labels resulting from noisy aggregation + return result + + +def aggregation_most_frequent(logits): + """ + This aggregation mechanism takes the softmax/logit output of several models + resulting from inference on identical inputs and computes the most frequent + label. It is deterministic (no noise injection like noisy_max() above. + :param logits: logits or probabilities for each sample + :return: + """ + # Compute labels from logits/probs and reshape array properly + labels = labels_from_probs(logits) + labels_shape = np.shape(labels) + labels = labels.reshape((labels_shape[0], labels_shape[1])) + + # Initialize array to hold final labels + result = np.zeros(int(labels_shape[1])) + + # Parse each sample + for i in xrange(int(labels_shape[1])): + # Count number of votes assigned to each class + label_counts = np.bincount(labels[:, i], minlength=10) + + label_counts = np.asarray(label_counts, dtype=np.int32) + + # Result is the most frequent label + result[i] = np.argmax(label_counts) + + return np.asarray(result, dtype=np.int32) diff --git a/tensorflow_privacy/research/pate_2017/analysis.py b/tensorflow_privacy/research/pate_2017/analysis.py new file mode 100644 index 0000000..111a68c --- /dev/null +++ b/tensorflow_privacy/research/pate_2017/analysis.py @@ -0,0 +1,304 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +""" +This script computes bounds on the privacy cost of training the +student model from noisy aggregation of labels predicted by teachers. +It should be used only after training the student (and therefore the +teachers as well). We however include the label files required to +reproduce key results from our paper (https://arxiv.org/abs/1610.05755): +the epsilon bounds for MNIST and SVHN students. + +The command that computes the epsilon bound associated +with the training of the MNIST student model (100 label queries +with a (1/20)*2=0.1 epsilon bound each) is: + +python analysis.py + --counts_file=mnist_250_teachers_labels.npy + --indices_file=mnist_250_teachers_100_indices_used_by_student.npy + +The command that computes the epsilon bound associated +with the training of the SVHN student model (1000 label queries +with a (1/20)*2=0.1 epsilon bound each) is: + +python analysis.py + --counts_file=svhn_250_teachers_labels.npy + --max_examples=1000 + --delta=1e-6 +""" +import os +import math +import numpy as np +from six.moves import xrange +import tensorflow as tf + +import maybe_download + +# These parameters can be changed to compute bounds for different failure rates +# or different model predictions. + +tf.flags.DEFINE_integer("moments",8, "Number of moments") +tf.flags.DEFINE_float("noise_eps", 0.1, "Eps value for each call to noisymax.") +tf.flags.DEFINE_float("delta", 1e-5, "Target value of delta.") +tf.flags.DEFINE_float("beta", 0.09, "Value of beta for smooth sensitivity") +tf.flags.DEFINE_string("counts_file","","Numpy matrix with raw counts") +tf.flags.DEFINE_string("indices_file","", + "File containting a numpy matrix with indices used." + "Optional. Use the first max_examples indices if this is not provided.") +tf.flags.DEFINE_integer("max_examples",1000, + "Number of examples to use. We will use the first" + " max_examples many examples from the counts_file" + " or indices_file to do the privacy cost estimate") +tf.flags.DEFINE_float("too_small", 1e-10, "Small threshold to avoid log of 0") +tf.flags.DEFINE_bool("input_is_counts", False, "False if labels, True if counts") + +FLAGS = tf.flags.FLAGS + + +def compute_q_noisy_max(counts, noise_eps): + """returns ~ Pr[outcome != winner]. + + Args: + counts: a list of scores + noise_eps: privacy parameter for noisy_max + Returns: + q: the probability that outcome is different from true winner. + """ + # For noisy max, we only get an upper bound. + # Pr[ j beats i*] \leq (2+gap(j,i*))/ 4 exp(gap(j,i*) + # proof at http://mathoverflow.net/questions/66763/ + # tight-bounds-on-probability-of-sum-of-laplace-random-variables + + winner = np.argmax(counts) + counts_normalized = noise_eps * (counts - counts[winner]) + counts_rest = np.array( + [counts_normalized[i] for i in xrange(len(counts)) if i != winner]) + q = 0.0 + for c in counts_rest: + gap = -c + q += (gap + 2.0) / (4.0 * math.exp(gap)) + return min(q, 1.0 - (1.0/len(counts))) + + +def compute_q_noisy_max_approx(counts, noise_eps): + """returns ~ Pr[outcome != winner]. + + Args: + counts: a list of scores + noise_eps: privacy parameter for noisy_max + Returns: + q: the probability that outcome is different from true winner. + """ + # For noisy max, we only get an upper bound. + # Pr[ j beats i*] \leq (2+gap(j,i*))/ 4 exp(gap(j,i*) + # proof at http://mathoverflow.net/questions/66763/ + # tight-bounds-on-probability-of-sum-of-laplace-random-variables + # This code uses an approximation that is faster and easier + # to get local sensitivity bound on. + + winner = np.argmax(counts) + counts_normalized = noise_eps * (counts - counts[winner]) + counts_rest = np.array( + [counts_normalized[i] for i in xrange(len(counts)) if i != winner]) + gap = -max(counts_rest) + q = (len(counts) - 1) * (gap + 2.0) / (4.0 * math.exp(gap)) + return min(q, 1.0 - (1.0/len(counts))) + + +def logmgf_exact(q, priv_eps, l): + """Computes the logmgf value given q and privacy eps. + + The bound used is the min of three terms. The first term is from + https://arxiv.org/pdf/1605.02065.pdf. + The second term is based on the fact that when event has probability (1-q) for + q close to zero, q can only change by exp(eps), which corresponds to a + much smaller multiplicative change in (1-q) + The third term comes directly from the privacy guarantee. + Args: + q: pr of non-optimal outcome + priv_eps: eps parameter for DP + l: moment to compute. + Returns: + Upper bound on logmgf + """ + if q < 0.5: + t_one = (1-q) * math.pow((1-q) / (1 - math.exp(priv_eps) * q), l) + t_two = q * math.exp(priv_eps * l) + t = t_one + t_two + try: + log_t = math.log(t) + except ValueError: + print("Got ValueError in math.log for values :" + str((q, priv_eps, l, t))) + log_t = priv_eps * l + else: + log_t = priv_eps * l + + return min(0.5 * priv_eps * priv_eps * l * (l + 1), log_t, priv_eps * l) + + +def logmgf_from_counts(counts, noise_eps, l): + """ + ReportNoisyMax mechanism with noise_eps with 2*noise_eps-DP + in our setting where one count can go up by one and another + can go down by 1. + """ + + q = compute_q_noisy_max(counts, noise_eps) + return logmgf_exact(q, 2.0 * noise_eps, l) + + +def sens_at_k(counts, noise_eps, l, k): + """Return sensitivity at distane k. + + Args: + counts: an array of scores + noise_eps: noise parameter used + l: moment whose sensitivity is being computed + k: distance + Returns: + sensitivity: at distance k + """ + counts_sorted = sorted(counts, reverse=True) + if 0.5 * noise_eps * l > 1: + print("l too large to compute sensitivity") + return 0 + # Now we can assume that at k, gap remains positive + # or we have reached the point where logmgf_exact is + # determined by the first term and ind of q. + if counts[0] < counts[1] + k: + return 0 + counts_sorted[0] -= k + counts_sorted[1] += k + val = logmgf_from_counts(counts_sorted, noise_eps, l) + counts_sorted[0] -= 1 + counts_sorted[1] += 1 + val_changed = logmgf_from_counts(counts_sorted, noise_eps, l) + return val_changed - val + + +def smoothed_sens(counts, noise_eps, l, beta): + """Compute beta-smooth sensitivity. + + Args: + counts: array of scors + noise_eps: noise parameter + l: moment of interest + beta: smoothness parameter + Returns: + smooth_sensitivity: a beta smooth upper bound + """ + k = 0 + smoothed_sensitivity = sens_at_k(counts, noise_eps, l, k) + while k < max(counts): + k += 1 + sensitivity_at_k = sens_at_k(counts, noise_eps, l, k) + smoothed_sensitivity = max( + smoothed_sensitivity, + math.exp(-beta * k) * sensitivity_at_k) + if sensitivity_at_k == 0.0: + break + return smoothed_sensitivity + + +def main(unused_argv): + ################################################################## + # If we are reproducing results from paper https://arxiv.org/abs/1610.05755, + # download the required binaries with label information. + ################################################################## + + # Binaries for MNIST results + paper_binaries_mnist = \ + ["https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_teachers_labels.npy?raw=true", + "https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_teachers_100_indices_used_by_student.npy?raw=true"] + if FLAGS.counts_file == "mnist_250_teachers_labels.npy" \ + or FLAGS.indices_file == "mnist_250_teachers_100_indices_used_by_student.npy": + maybe_download(paper_binaries_mnist, os.getcwd()) + + # Binaries for SVHN results + paper_binaries_svhn = ["https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/svhn_250_teachers_labels.npy?raw=true"] + if FLAGS.counts_file == "svhn_250_teachers_labels.npy": + maybe_download(paper_binaries_svhn, os.getcwd()) + + input_mat = np.load(FLAGS.counts_file) + if FLAGS.input_is_counts: + counts_mat = input_mat + else: + # In this case, the input is the raw predictions. Transform + num_teachers, n = input_mat.shape + counts_mat = np.zeros((n, 10)).astype(np.int32) + for i in range(n): + for j in range(num_teachers): + counts_mat[i, int(input_mat[j, i])] += 1 + n = counts_mat.shape[0] + num_examples = min(n, FLAGS.max_examples) + + if not FLAGS.indices_file: + indices = np.array(range(num_examples)) + else: + index_list = np.load(FLAGS.indices_file) + indices = index_list[:num_examples] + + l_list = 1.0 + np.array(xrange(FLAGS.moments)) + beta = FLAGS.beta + total_log_mgf_nm = np.array([0.0 for _ in l_list]) + total_ss_nm = np.array([0.0 for _ in l_list]) + noise_eps = FLAGS.noise_eps + + for i in indices: + total_log_mgf_nm += np.array( + [logmgf_from_counts(counts_mat[i], noise_eps, l) + for l in l_list]) + total_ss_nm += np.array( + [smoothed_sens(counts_mat[i], noise_eps, l, beta) + for l in l_list]) + delta = FLAGS.delta + + # We want delta = exp(alpha - eps l). + # Solving gives eps = (alpha - ln (delta))/l + eps_list_nm = (total_log_mgf_nm - math.log(delta)) / l_list + + print("Epsilons (Noisy Max): " + str(eps_list_nm)) + print("Smoothed sensitivities (Noisy Max): " + str(total_ss_nm / l_list)) + + # If beta < eps / 2 ln (1/delta), then adding noise Lap(1) * 2 SS/eps + # is eps,delta DP + # Also if beta < eps / 2(gamma +1), then adding noise 2(gamma+1) SS eta / eps + # where eta has density proportional to 1 / (1+|z|^gamma) is eps-DP + # Both from Corolloary 2.4 in + # http://www.cse.psu.edu/~ads22/pubs/NRS07/NRS07-full-draft-v1.pdf + # Print the first one's scale + ss_eps = 2.0 * beta * math.log(1/delta) + ss_scale = 2.0 / ss_eps + print("To get an " + str(ss_eps) + "-DP estimate of epsilon, ") + print("..add noise ~ " + str(ss_scale)) + print("... times " + str(total_ss_nm / l_list)) + print("Epsilon = " + str(min(eps_list_nm)) + ".") + if min(eps_list_nm) == eps_list_nm[-1]: + print("Warning: May not have used enough values of l") + + # Data independent bound, as mechanism is + # 2*noise_eps DP. + data_ind_log_mgf = np.array([0.0 for _ in l_list]) + data_ind_log_mgf += num_examples * np.array( + [logmgf_exact(1.0, 2.0 * noise_eps, l) for l in l_list]) + + data_ind_eps_list = (data_ind_log_mgf - math.log(delta)) / l_list + print("Data independent bound = " + str(min(data_ind_eps_list)) + ".") + + return + + +if __name__ == "__main__": + tf.app.run() diff --git a/tensorflow_privacy/research/pate_2017/deep_cnn.py b/tensorflow_privacy/research/pate_2017/deep_cnn.py new file mode 100644 index 0000000..8bd9442 --- /dev/null +++ b/tensorflow_privacy/research/pate_2017/deep_cnn.py @@ -0,0 +1,603 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from datetime import datetime +import math +import numpy as np +from six.moves import xrange +import tensorflow as tf +import time + +import utils + +FLAGS = tf.app.flags.FLAGS + +# Basic model parameters. +tf.app.flags.DEFINE_integer('dropout_seed', 123, """seed for dropout.""") +tf.app.flags.DEFINE_integer('batch_size', 128, """Nb of images in a batch.""") +tf.app.flags.DEFINE_integer('epochs_per_decay', 350, """Nb epochs per decay""") +tf.app.flags.DEFINE_integer('learning_rate', 5, """100 * learning rate""") +tf.app.flags.DEFINE_boolean('log_device_placement', False, """see TF doc""") + + +# Constants describing the training process. +MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average. +LEARNING_RATE_DECAY_FACTOR = 0.1 # Learning rate decay factor. + + +def _variable_on_cpu(name, shape, initializer): + """Helper to create a Variable stored on CPU memory. + + Args: + name: name of the variable + shape: list of ints + initializer: initializer for Variable + + Returns: + Variable Tensor + """ + with tf.device('/cpu:0'): + var = tf.get_variable(name, shape, initializer=initializer) + return var + + +def _variable_with_weight_decay(name, shape, stddev, wd): + """Helper to create an initialized Variable with weight decay. + + Note that the Variable is initialized with a truncated normal distribution. + A weight decay is added only if one is specified. + + Args: + name: name of the variable + shape: list of ints + stddev: standard deviation of a truncated Gaussian + wd: add L2Loss weight decay multiplied by this float. If None, weight + decay is not added for this Variable. + + Returns: + Variable Tensor + """ + var = _variable_on_cpu(name, shape, + tf.truncated_normal_initializer(stddev=stddev)) + if wd is not None: + weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss') + tf.add_to_collection('losses', weight_decay) + return var + + +def inference(images, dropout=False): + """Build the CNN model. + Args: + images: Images returned from distorted_inputs() or inputs(). + dropout: Boolean controlling whether to use dropout or not + Returns: + Logits + """ + if FLAGS.dataset == 'mnist': + first_conv_shape = [5, 5, 1, 64] + else: + first_conv_shape = [5, 5, 3, 64] + + # conv1 + with tf.variable_scope('conv1') as scope: + kernel = _variable_with_weight_decay('weights', + shape=first_conv_shape, + stddev=1e-4, + wd=0.0) + conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME') + biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0)) + bias = tf.nn.bias_add(conv, biases) + conv1 = tf.nn.relu(bias, name=scope.name) + if dropout: + conv1 = tf.nn.dropout(conv1, 0.3, seed=FLAGS.dropout_seed) + + + # pool1 + pool1 = tf.nn.max_pool(conv1, + ksize=[1, 3, 3, 1], + strides=[1, 2, 2, 1], + padding='SAME', + name='pool1') + + # norm1 + norm1 = tf.nn.lrn(pool1, + 4, + bias=1.0, + alpha=0.001 / 9.0, + beta=0.75, + name='norm1') + + # conv2 + with tf.variable_scope('conv2') as scope: + kernel = _variable_with_weight_decay('weights', + shape=[5, 5, 64, 128], + stddev=1e-4, + wd=0.0) + conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME') + biases = _variable_on_cpu('biases', [128], tf.constant_initializer(0.1)) + bias = tf.nn.bias_add(conv, biases) + conv2 = tf.nn.relu(bias, name=scope.name) + if dropout: + conv2 = tf.nn.dropout(conv2, 0.3, seed=FLAGS.dropout_seed) + + + # norm2 + norm2 = tf.nn.lrn(conv2, + 4, + bias=1.0, + alpha=0.001 / 9.0, + beta=0.75, + name='norm2') + + # pool2 + pool2 = tf.nn.max_pool(norm2, + ksize=[1, 3, 3, 1], + strides=[1, 2, 2, 1], + padding='SAME', + name='pool2') + + # local3 + with tf.variable_scope('local3') as scope: + # Move everything into depth so we can perform a single matrix multiply. + reshape = tf.reshape(pool2, [FLAGS.batch_size, -1]) + dim = reshape.get_shape()[1].value + weights = _variable_with_weight_decay('weights', + shape=[dim, 384], + stddev=0.04, + wd=0.004) + biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1)) + local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name) + if dropout: + local3 = tf.nn.dropout(local3, 0.5, seed=FLAGS.dropout_seed) + + # local4 + with tf.variable_scope('local4') as scope: + weights = _variable_with_weight_decay('weights', + shape=[384, 192], + stddev=0.04, + wd=0.004) + biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1)) + local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name) + if dropout: + local4 = tf.nn.dropout(local4, 0.5, seed=FLAGS.dropout_seed) + + # compute logits + with tf.variable_scope('softmax_linear') as scope: + weights = _variable_with_weight_decay('weights', + [192, FLAGS.nb_labels], + stddev=1/192.0, + wd=0.0) + biases = _variable_on_cpu('biases', + [FLAGS.nb_labels], + tf.constant_initializer(0.0)) + logits = tf.add(tf.matmul(local4, weights), biases, name=scope.name) + + return logits + + +def inference_deeper(images, dropout=False): + """Build a deeper CNN model. + Args: + images: Images returned from distorted_inputs() or inputs(). + dropout: Boolean controlling whether to use dropout or not + Returns: + Logits + """ + if FLAGS.dataset == 'mnist': + first_conv_shape = [3, 3, 1, 96] + else: + first_conv_shape = [3, 3, 3, 96] + + # conv1 + with tf.variable_scope('conv1') as scope: + kernel = _variable_with_weight_decay('weights', + shape=first_conv_shape, + stddev=0.05, + wd=0.0) + conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME') + biases = _variable_on_cpu('biases', [96], tf.constant_initializer(0.0)) + bias = tf.nn.bias_add(conv, biases) + conv1 = tf.nn.relu(bias, name=scope.name) + + # conv2 + with tf.variable_scope('conv2') as scope: + kernel = _variable_with_weight_decay('weights', + shape=[3, 3, 96, 96], + stddev=0.05, + wd=0.0) + conv = tf.nn.conv2d(conv1, kernel, [1, 1, 1, 1], padding='SAME') + biases = _variable_on_cpu('biases', [96], tf.constant_initializer(0.0)) + bias = tf.nn.bias_add(conv, biases) + conv2 = tf.nn.relu(bias, name=scope.name) + + # conv3 + with tf.variable_scope('conv3') as scope: + kernel = _variable_with_weight_decay('weights', + shape=[3, 3, 96, 96], + stddev=0.05, + wd=0.0) + conv = tf.nn.conv2d(conv2, kernel, [1, 2, 2, 1], padding='SAME') + biases = _variable_on_cpu('biases', [96], tf.constant_initializer(0.0)) + bias = tf.nn.bias_add(conv, biases) + conv3 = tf.nn.relu(bias, name=scope.name) + if dropout: + conv3 = tf.nn.dropout(conv3, 0.5, seed=FLAGS.dropout_seed) + + # conv4 + with tf.variable_scope('conv4') as scope: + kernel = _variable_with_weight_decay('weights', + shape=[3, 3, 96, 192], + stddev=0.05, + wd=0.0) + conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding='SAME') + biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.0)) + bias = tf.nn.bias_add(conv, biases) + conv4 = tf.nn.relu(bias, name=scope.name) + + # conv5 + with tf.variable_scope('conv5') as scope: + kernel = _variable_with_weight_decay('weights', + shape=[3, 3, 192, 192], + stddev=0.05, + wd=0.0) + conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding='SAME') + biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.0)) + bias = tf.nn.bias_add(conv, biases) + conv5 = tf.nn.relu(bias, name=scope.name) + + # conv6 + with tf.variable_scope('conv6') as scope: + kernel = _variable_with_weight_decay('weights', + shape=[3, 3, 192, 192], + stddev=0.05, + wd=0.0) + conv = tf.nn.conv2d(conv5, kernel, [1, 2, 2, 1], padding='SAME') + biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.0)) + bias = tf.nn.bias_add(conv, biases) + conv6 = tf.nn.relu(bias, name=scope.name) + if dropout: + conv6 = tf.nn.dropout(conv6, 0.5, seed=FLAGS.dropout_seed) + + + # conv7 + with tf.variable_scope('conv7') as scope: + kernel = _variable_with_weight_decay('weights', + shape=[5, 5, 192, 192], + stddev=1e-4, + wd=0.0) + conv = tf.nn.conv2d(conv6, kernel, [1, 1, 1, 1], padding='SAME') + biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1)) + bias = tf.nn.bias_add(conv, biases) + conv7 = tf.nn.relu(bias, name=scope.name) + + + # local1 + with tf.variable_scope('local1') as scope: + # Move everything into depth so we can perform a single matrix multiply. + reshape = tf.reshape(conv7, [FLAGS.batch_size, -1]) + dim = reshape.get_shape()[1].value + weights = _variable_with_weight_decay('weights', + shape=[dim, 192], + stddev=0.05, + wd=0) + biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1)) + local1 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name) + + # local2 + with tf.variable_scope('local2') as scope: + weights = _variable_with_weight_decay('weights', + shape=[192, 192], + stddev=0.05, + wd=0) + biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1)) + local2 = tf.nn.relu(tf.matmul(local1, weights) + biases, name=scope.name) + if dropout: + local2 = tf.nn.dropout(local2, 0.5, seed=FLAGS.dropout_seed) + + # compute logits + with tf.variable_scope('softmax_linear') as scope: + weights = _variable_with_weight_decay('weights', + [192, FLAGS.nb_labels], + stddev=0.05, + wd=0.0) + biases = _variable_on_cpu('biases', + [FLAGS.nb_labels], + tf.constant_initializer(0.0)) + logits = tf.add(tf.matmul(local2, weights), biases, name=scope.name) + + return logits + + +def loss_fun(logits, labels): + """Add L2Loss to all the trainable variables. + + Add summary for "Loss" and "Loss/avg". + Args: + logits: Logits from inference(). + labels: Labels from distorted_inputs or inputs(). 1-D tensor + of shape [batch_size] + distillation: if set to True, use probabilities and not class labels to + compute softmax loss + + Returns: + Loss tensor of type float. + """ + + # Calculate the cross entropy between labels and predictions + labels = tf.cast(labels, tf.int64) + cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( + logits=logits, labels=labels, name='cross_entropy_per_example') + + # Calculate the average cross entropy loss across the batch. + cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') + + # Add to TF collection for losses + tf.add_to_collection('losses', cross_entropy_mean) + + # The total loss is defined as the cross entropy loss plus all of the weight + # decay terms (L2 loss). + return tf.add_n(tf.get_collection('losses'), name='total_loss') + + +def moving_av(total_loss): + """ + Generates moving average for all losses + + Args: + total_loss: Total loss from loss(). + Returns: + loss_averages_op: op for generating moving averages of losses. + """ + # Compute the moving average of all individual losses and the total loss. + loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') + losses = tf.get_collection('losses') + loss_averages_op = loss_averages.apply(losses + [total_loss]) + + return loss_averages_op + + +def train_op_fun(total_loss, global_step): + """Train model. + + Create an optimizer and apply to all trainable variables. Add moving + average for all trainable variables. + + Args: + total_loss: Total loss from loss(). + global_step: Integer Variable counting the number of training steps + processed. + Returns: + train_op: op for training. + """ + # Variables that affect learning rate. + nb_ex_per_train_epoch = int(60000 / FLAGS.nb_teachers) + + num_batches_per_epoch = nb_ex_per_train_epoch / FLAGS.batch_size + decay_steps = int(num_batches_per_epoch * FLAGS.epochs_per_decay) + + initial_learning_rate = float(FLAGS.learning_rate) / 100.0 + + # Decay the learning rate exponentially based on the number of steps. + lr = tf.train.exponential_decay(initial_learning_rate, + global_step, + decay_steps, + LEARNING_RATE_DECAY_FACTOR, + staircase=True) + tf.summary.scalar('learning_rate', lr) + + # Generate moving averages of all losses and associated summaries. + loss_averages_op = moving_av(total_loss) + + # Compute gradients. + with tf.control_dependencies([loss_averages_op]): + opt = tf.train.GradientDescentOptimizer(lr) + grads = opt.compute_gradients(total_loss) + + # Apply gradients. + apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) + + # Add histograms for trainable variables. + for var in tf.trainable_variables(): + tf.summary.histogram(var.op.name, var) + + # Track the moving averages of all trainable variables. + variable_averages = tf.train.ExponentialMovingAverage( + MOVING_AVERAGE_DECAY, global_step) + variables_averages_op = variable_averages.apply(tf.trainable_variables()) + + with tf.control_dependencies([apply_gradient_op, variables_averages_op]): + train_op = tf.no_op(name='train') + + return train_op + + +def _input_placeholder(): + """ + This helper function declares a TF placeholder for the graph input data + :return: TF placeholder for the graph input data + """ + if FLAGS.dataset == 'mnist': + image_size = 28 + num_channels = 1 + else: + image_size = 32 + num_channels = 3 + + # Declare data placeholder + train_node_shape = (FLAGS.batch_size, image_size, image_size, num_channels) + return tf.placeholder(tf.float32, shape=train_node_shape) + + +def train(images, labels, ckpt_path, dropout=False): + """ + This function contains the loop that actually trains the model. + :param images: a numpy array with the input data + :param labels: a numpy array with the output labels + :param ckpt_path: a path (including name) where model checkpoints are saved + :param dropout: Boolean, whether to use dropout or not + :return: True if everything went well + """ + + # Check training data + assert len(images) == len(labels) + assert images.dtype == np.float32 + assert labels.dtype == np.int32 + + # Set default TF graph + with tf.Graph().as_default(): + global_step = tf.Variable(0, trainable=False) + + # Declare data placeholder + train_data_node = _input_placeholder() + + # Create a placeholder to hold labels + train_labels_shape = (FLAGS.batch_size,) + train_labels_node = tf.placeholder(tf.int32, shape=train_labels_shape) + + print("Done Initializing Training Placeholders") + + # Build a Graph that computes the logits predictions from the placeholder + if FLAGS.deeper: + logits = inference_deeper(train_data_node, dropout=dropout) + else: + logits = inference(train_data_node, dropout=dropout) + + # Calculate loss + loss = loss_fun(logits, train_labels_node) + + # Build a Graph that trains the model with one batch of examples and + # updates the model parameters. + train_op = train_op_fun(loss, global_step) + + # Create a saver. + saver = tf.train.Saver(tf.global_variables()) + + print("Graph constructed and saver created") + + # Build an initialization operation to run below. + init = tf.global_variables_initializer() + + # Create and init sessions + sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)) #NOLINT(long-line) + sess.run(init) + + print("Session ready, beginning training loop") + + # Initialize the number of batches + data_length = len(images) + nb_batches = math.ceil(data_length / FLAGS.batch_size) + + for step in xrange(FLAGS.max_steps): + # for debug, save start time + start_time = time.time() + + # Current batch number + batch_nb = step % nb_batches + + # Current batch start and end indices + start, end = utils.batch_indices(batch_nb, data_length, FLAGS.batch_size) + + # Prepare dictionnary to feed the session with + feed_dict = {train_data_node: images[start:end], + train_labels_node: labels[start:end]} + + # Run training step + _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) + + # Compute duration of training step + duration = time.time() - start_time + + # Sanity check + assert not np.isnan(loss_value), 'Model diverged with loss = NaN' + + # Echo loss once in a while + if step % 100 == 0: + num_examples_per_step = FLAGS.batch_size + examples_per_sec = num_examples_per_step / duration + sec_per_batch = float(duration) + + format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' + 'sec/batch)') + print (format_str % (datetime.now(), step, loss_value, + examples_per_sec, sec_per_batch)) + + # Save the model checkpoint periodically. + if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: + saver.save(sess, ckpt_path, global_step=step) + + return True + + +def softmax_preds(images, ckpt_path, return_logits=False): + """ + Compute softmax activations (probabilities) with the model saved in the path + specified as an argument + :param images: a np array of images + :param ckpt_path: a TF model checkpoint + :param logits: if set to True, return logits instead of probabilities + :return: probabilities (or logits if logits is set to True) + """ + # Compute nb samples and deduce nb of batches + data_length = len(images) + nb_batches = math.ceil(len(images) / FLAGS.batch_size) + + # Declare data placeholder + train_data_node = _input_placeholder() + + # Build a Graph that computes the logits predictions from the placeholder + if FLAGS.deeper: + logits = inference_deeper(train_data_node) + else: + logits = inference(train_data_node) + + if return_logits: + # We are returning the logits directly (no need to apply softmax) + output = logits + else: + # Add softmax predictions to graph: will return probabilities + output = tf.nn.softmax(logits) + + # Restore the moving average version of the learned variables for eval. + variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY) + variables_to_restore = variable_averages.variables_to_restore() + saver = tf.train.Saver(variables_to_restore) + + # Will hold the result + preds = np.zeros((data_length, FLAGS.nb_labels), dtype=np.float32) + + # Create TF session + with tf.Session() as sess: + # Restore TF session from checkpoint file + saver.restore(sess, ckpt_path) + + # Parse data by batch + for batch_nb in xrange(0, int(nb_batches+1)): + # Compute batch start and end indices + start, end = utils.batch_indices(batch_nb, data_length, FLAGS.batch_size) + + # Prepare feed dictionary + feed_dict = {train_data_node: images[start:end]} + + # Run session ([0] because run returns a batch with len 1st dim == 1) + preds[start:end, :] = sess.run([output], feed_dict=feed_dict)[0] + + # Reset graph to allow multiple calls + tf.reset_default_graph() + + return preds diff --git a/tensorflow_privacy/research/pate_2017/input.py b/tensorflow_privacy/research/pate_2017/input.py new file mode 100644 index 0000000..4316b62 --- /dev/null +++ b/tensorflow_privacy/research/pate_2017/input.py @@ -0,0 +1,396 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import gzip +import math +import os +import sys +import tarfile + +import numpy as np +from scipy.io import loadmat as loadmat +from six.moves import cPickle as pickle +from six.moves import urllib +from six.moves import xrange +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + + +def create_dir_if_needed(dest_directory): + """Create directory if doesn't exist.""" + if not tf.gfile.IsDirectory(dest_directory): + tf.gfile.MakeDirs(dest_directory) + + return True + + +def maybe_download(file_urls, directory): + """Download a set of files in temporary local folder.""" + + # Create directory if doesn't exist + assert create_dir_if_needed(directory) + + # This list will include all URLS of the local copy of downloaded files + result = [] + + # For each file of the dataset + for file_url in file_urls: + # Extract filename + filename = file_url.split('/')[-1] + + # If downloading from GitHub, remove suffix ?raw=True from local filename + if filename.endswith("?raw=true"): + filename = filename[:-9] + + # Deduce local file url + #filepath = os.path.join(directory, filename) + filepath = directory + '/' + filename + + # Add to result list + result.append(filepath) + + # Test if file already exists + if not tf.gfile.Exists(filepath): + def _progress(count, block_size, total_size): + sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename, + float(count * block_size) / float(total_size) * 100.0)) + sys.stdout.flush() + filepath, _ = urllib.request.urlretrieve(file_url, filepath, _progress) + print() + statinfo = os.stat(filepath) + print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') + + return result + + +def image_whitening(data): + """ + Subtracts mean of image and divides by adjusted standard variance (for + stability). Operations are per image but performed for the entire array. + """ + assert len(np.shape(data)) == 4 + + # Compute number of pixels in image + nb_pixels = np.shape(data)[1] * np.shape(data)[2] * np.shape(data)[3] + + # Subtract mean + mean = np.mean(data, axis=(1, 2, 3)) + + ones = np.ones(np.shape(data)[1:4], dtype=np.float32) + for i in xrange(len(data)): + data[i, :, :, :] -= mean[i] * ones + + # Compute adjusted standard variance + adj_std_var = np.maximum(np.ones(len(data), dtype=np.float32) / math.sqrt(nb_pixels), np.std(data, axis=(1, 2, 3))) # pylint: disable=line-too-long + + # Divide image + for i in xrange(len(data)): + data[i, :, :, :] = data[i, :, :, :] / adj_std_var[i] + + print(np.shape(data)) + + return data + + +def extract_svhn(local_url): + """Extract a MATLAB matrix into two numpy arrays with data and labels.""" + + with tf.gfile.Open(local_url, mode='r') as file_obj: + # Load MATLAB matrix using scipy IO + data_dict = loadmat(file_obj) + + # Extract each dictionary (one for data, one for labels) + data, labels = data_dict['X'], data_dict['y'] + + # Set np type + data = np.asarray(data, dtype=np.float32) + labels = np.asarray(labels, dtype=np.int32) + + # Transpose data to match TF model input format + data = data.transpose(3, 0, 1, 2) + + # Fix the SVHN labels which label 0s as 10s + labels[labels == 10] = 0 + + # Fix label dimensions + labels = labels.reshape(len(labels)) + + return data, labels + + +def unpickle_cifar_dic(file_path): + """Helper function: unpickles a dictionary (used for loading CIFAR).""" + file_obj = open(file_path, 'rb') + data_dict = pickle.load(file_obj) + file_obj.close() + return data_dict['data'], data_dict['labels'] + + +def extract_cifar10(local_url, data_dir): + """Extracts CIFAR-10 and return numpy arrays with the different sets.""" + + # These numpy dumps can be reloaded to avoid performing the pre-processing + # if they exist in the working directory. + # Changing the order of this list will ruin the indices below. + preprocessed_files = ['/cifar10_train.npy', + '/cifar10_train_labels.npy', + '/cifar10_test.npy', + '/cifar10_test_labels.npy'] + + all_preprocessed = True + for file_name in preprocessed_files: + if not tf.gfile.Exists(data_dir + file_name): + all_preprocessed = False + break + + if all_preprocessed: + # Reload pre-processed training data from numpy dumps + with tf.gfile.Open(data_dir + preprocessed_files[0], mode='r') as file_obj: + train_data = np.load(file_obj) + with tf.gfile.Open(data_dir + preprocessed_files[1], mode='r') as file_obj: + train_labels = np.load(file_obj) + + # Reload pre-processed testing data from numpy dumps + with tf.gfile.Open(data_dir + preprocessed_files[2], mode='r') as file_obj: + test_data = np.load(file_obj) + with tf.gfile.Open(data_dir + preprocessed_files[3], mode='r') as file_obj: + test_labels = np.load(file_obj) + + else: + # Do everything from scratch + # Define lists of all files we should extract + train_files = ['data_batch_' + str(i) for i in xrange(1, 6)] + test_file = ['test_batch'] + cifar10_files = train_files + test_file + + # Check if all files have already been extracted + need_to_unpack = False + for file_name in cifar10_files: + if not tf.gfile.Exists(file_name): + need_to_unpack = True + break + + # We have to unpack the archive + if need_to_unpack: + tarfile.open(local_url, 'r:gz').extractall(data_dir) + + # Load training images and labels + images = [] + labels = [] + for train_file in train_files: + # Construct filename + filename = data_dir + '/cifar-10-batches-py/' + train_file + + # Unpickle dictionary and extract images and labels + images_tmp, labels_tmp = unpickle_cifar_dic(filename) + + # Append to lists + images.append(images_tmp) + labels.append(labels_tmp) + + # Convert to numpy arrays and reshape in the expected format + train_data = np.asarray(images, dtype=np.float32) + train_data = train_data.reshape((50000, 3, 32, 32)) + train_data = np.swapaxes(train_data, 1, 3) + train_labels = np.asarray(labels, dtype=np.int32).reshape(50000) + + # Save so we don't have to do this again + np.save(data_dir + preprocessed_files[0], train_data) + np.save(data_dir + preprocessed_files[1], train_labels) + + # Construct filename for test file + filename = data_dir + '/cifar-10-batches-py/' + test_file[0] + + # Load test images and labels + test_data, test_images = unpickle_cifar_dic(filename) + + # Convert to numpy arrays and reshape in the expected format + test_data = np.asarray(test_data, dtype=np.float32) + test_data = test_data.reshape((10000, 3, 32, 32)) + test_data = np.swapaxes(test_data, 1, 3) + test_labels = np.asarray(test_images, dtype=np.int32).reshape(10000) + + # Save so we don't have to do this again + np.save(data_dir + preprocessed_files[2], test_data) + np.save(data_dir + preprocessed_files[3], test_labels) + + return train_data, train_labels, test_data, test_labels + + +def extract_mnist_data(filename, num_images, image_size, pixel_depth): + """ + Extract the images into a 4D tensor [image index, y, x, channels]. + + Values are rescaled from [0, 255] down to [-0.5, 0.5]. + """ + if not tf.gfile.Exists(filename+'.npy'): + with gzip.open(filename) as bytestream: + bytestream.read(16) + buf = bytestream.read(image_size * image_size * num_images) + data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32) + data = (data - (pixel_depth / 2.0)) / pixel_depth + data = data.reshape(num_images, image_size, image_size, 1) + np.save(filename, data) + return data + else: + with tf.gfile.Open(filename+'.npy', mode='rb') as file_obj: + return np.load(file_obj) + + +def extract_mnist_labels(filename, num_images): + """ + Extract the labels into a vector of int64 label IDs. + """ + if not tf.gfile.Exists(filename+'.npy'): + with gzip.open(filename) as bytestream: + bytestream.read(8) + buf = bytestream.read(1 * num_images) + labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int32) + np.save(filename, labels) + return labels + else: + with tf.gfile.Open(filename+'.npy', mode='rb') as file_obj: + return np.load(file_obj) + + +def ld_svhn(extended=False, test_only=False): + """ + Load the original SVHN data + + Args: + extended: include extended training data in the returned array + test_only: disables loading of both train and extra -> large speed up + """ + # Define files to be downloaded + # WARNING: changing the order of this list will break indices (cf. below) + file_urls = ['http://ufldl.stanford.edu/housenumbers/train_32x32.mat', + 'http://ufldl.stanford.edu/housenumbers/test_32x32.mat', + 'http://ufldl.stanford.edu/housenumbers/extra_32x32.mat'] + + # Maybe download data and retrieve local storage urls + local_urls = maybe_download(file_urls, FLAGS.data_dir) + + # Extra Train, Test, and Extended Train data + if not test_only: + # Load and applying whitening to train data + train_data, train_labels = extract_svhn(local_urls[0]) + train_data = image_whitening(train_data) + + # Load and applying whitening to extended train data + ext_data, ext_labels = extract_svhn(local_urls[2]) + ext_data = image_whitening(ext_data) + + # Load and applying whitening to test data + test_data, test_labels = extract_svhn(local_urls[1]) + test_data = image_whitening(test_data) + + if test_only: + return test_data, test_labels + else: + if extended: + # Stack train data with the extended training data + train_data = np.vstack((train_data, ext_data)) + train_labels = np.hstack((train_labels, ext_labels)) + + return train_data, train_labels, test_data, test_labels + else: + # Return training and extended training data separately + return train_data, train_labels, test_data, test_labels, ext_data, ext_labels + + +def ld_cifar10(test_only=False): + """Load the original CIFAR10 data.""" + + # Define files to be downloaded + file_urls = ['https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'] + + # Maybe download data and retrieve local storage urls + local_urls = maybe_download(file_urls, FLAGS.data_dir) + + # Extract archives and return different sets + dataset = extract_cifar10(local_urls[0], FLAGS.data_dir) + + # Unpack tuple + train_data, train_labels, test_data, test_labels = dataset + + # Apply whitening to input data + train_data = image_whitening(train_data) + test_data = image_whitening(test_data) + + if test_only: + return test_data, test_labels + else: + return train_data, train_labels, test_data, test_labels + + +def ld_mnist(test_only=False): + """Load the MNIST dataset.""" + # Define files to be downloaded + # WARNING: changing the order of this list will break indices (cf. below) + file_urls = ['http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', + 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', + 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', + 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', + ] + + # Maybe download data and retrieve local storage urls + local_urls = maybe_download(file_urls, FLAGS.data_dir) + + # Extract it into np arrays. + train_data = extract_mnist_data(local_urls[0], 60000, 28, 1) + train_labels = extract_mnist_labels(local_urls[1], 60000) + test_data = extract_mnist_data(local_urls[2], 10000, 28, 1) + test_labels = extract_mnist_labels(local_urls[3], 10000) + + if test_only: + return test_data, test_labels + else: + return train_data, train_labels, test_data, test_labels + + +def partition_dataset(data, labels, nb_teachers, teacher_id): + """ + Simple partitioning algorithm that returns the right portion of the data + needed by a given teacher out of a certain nb of teachers + + Args: + data: input data to be partitioned + labels: output data to be partitioned + nb_teachers: number of teachers in the ensemble (affects size of each + partition) + teacher_id: id of partition to retrieve + """ + + # Sanity check + assert len(data) == len(labels) + assert int(teacher_id) < int(nb_teachers) + + # This will floor the possible number of batches + batch_len = int(len(data) / nb_teachers) + + # Compute start, end indices of partition + start = teacher_id * batch_len + end = (teacher_id+1) * batch_len + + # Slice partition off + partition_data = data[start:end] + partition_labels = labels[start:end] + + return partition_data, partition_labels diff --git a/tensorflow_privacy/research/pate_2017/metrics.py b/tensorflow_privacy/research/pate_2017/metrics.py new file mode 100644 index 0000000..d9c7119 --- /dev/null +++ b/tensorflow_privacy/research/pate_2017/metrics.py @@ -0,0 +1,49 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + + +def accuracy(logits, labels): + """ + Return accuracy of the array of logits (or label predictions) wrt the labels + :param logits: this can either be logits, probabilities, or a single label + :param labels: the correct labels to match against + :return: the accuracy as a float + """ + assert len(logits) == len(labels) + + if len(np.shape(logits)) > 1: + # Predicted labels are the argmax over axis 1 + predicted_labels = np.argmax(logits, axis=1) + else: + # Input was already labels + assert len(np.shape(logits)) == 1 + predicted_labels = logits + + # Check against correct labels to compute correct guesses + correct = np.sum(predicted_labels == labels.reshape(len(labels))) + + # Divide by number of labels to obtain accuracy + accuracy = float(correct) / len(labels) + + # Return float value + return accuracy + + diff --git a/tensorflow_privacy/research/pate_2017/train_student.py b/tensorflow_privacy/research/pate_2017/train_student.py new file mode 100644 index 0000000..ab8330d --- /dev/null +++ b/tensorflow_privacy/research/pate_2017/train_student.py @@ -0,0 +1,205 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import aggregation +import deep_cnn +import input # pylint: disable=redefined-builtin +import metrics +import numpy as np +from six.moves import xrange +import tensorflow as tf + +FLAGS = tf.flags.FLAGS + +tf.flags.DEFINE_string('dataset', 'svhn', 'The name of the dataset to use') +tf.flags.DEFINE_integer('nb_labels', 10, 'Number of output classes') + +tf.flags.DEFINE_string('data_dir','/tmp','Temporary storage') +tf.flags.DEFINE_string('train_dir','/tmp/train_dir','Where model chkpt are saved') +tf.flags.DEFINE_string('teachers_dir','/tmp/train_dir', + 'Directory where teachers checkpoints are stored.') + +tf.flags.DEFINE_integer('teachers_max_steps', 3000, + 'Number of steps teachers were ran.') +tf.flags.DEFINE_integer('max_steps', 3000, 'Number of steps to run student.') +tf.flags.DEFINE_integer('nb_teachers', 10, 'Teachers in the ensemble.') +tf.flags.DEFINE_integer('stdnt_share', 1000, + 'Student share (last index) of the test data') +tf.flags.DEFINE_integer('lap_scale', 10, + 'Scale of the Laplacian noise added for privacy') +tf.flags.DEFINE_boolean('save_labels', False, + 'Dump numpy arrays of labels and clean teacher votes') +tf.flags.DEFINE_boolean('deeper', False, 'Activate deeper CNN model') + + +def ensemble_preds(dataset, nb_teachers, stdnt_data): + """ + Given a dataset, a number of teachers, and some input data, this helper + function queries each teacher for predictions on the data and returns + all predictions in a single array. (That can then be aggregated into + one single prediction per input using aggregation.py (cf. function + prepare_student_data() below) + :param dataset: string corresponding to mnist, cifar10, or svhn + :param nb_teachers: number of teachers (in the ensemble) to learn from + :param stdnt_data: unlabeled student training data + :return: 3d array (teacher id, sample id, probability per class) + """ + + # Compute shape of array that will hold probabilities produced by each + # teacher, for each training point, and each output class + result_shape = (nb_teachers, len(stdnt_data), FLAGS.nb_labels) + + # Create array that will hold result + result = np.zeros(result_shape, dtype=np.float32) + + # Get predictions from each teacher + for teacher_id in xrange(nb_teachers): + # Compute path of checkpoint file for teacher model with ID teacher_id + if FLAGS.deeper: + ckpt_path = FLAGS.teachers_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt-' + str(FLAGS.teachers_max_steps - 1) #NOLINT(long-line) + else: + ckpt_path = FLAGS.teachers_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt-' + str(FLAGS.teachers_max_steps - 1) # NOLINT(long-line) + + # Get predictions on our training data and store in result array + result[teacher_id] = deep_cnn.softmax_preds(stdnt_data, ckpt_path) + + # This can take a while when there are a lot of teachers so output status + print("Computed Teacher " + str(teacher_id) + " softmax predictions") + + return result + + +def prepare_student_data(dataset, nb_teachers, save=False): + """ + Takes a dataset name and the size of the teacher ensemble and prepares + training data for the student model, according to parameters indicated + in flags above. + :param dataset: string corresponding to mnist, cifar10, or svhn + :param nb_teachers: number of teachers (in the ensemble) to learn from + :param save: if set to True, will dump student training labels predicted by + the ensemble of teachers (with Laplacian noise) as npy files. + It also dumps the clean votes for each class (without noise) and + the labels assigned by teachers + :return: pairs of (data, labels) to be used for student training and testing + """ + assert input.create_dir_if_needed(FLAGS.train_dir) + + # Load the dataset + if dataset == 'svhn': + test_data, test_labels = input.ld_svhn(test_only=True) + elif dataset == 'cifar10': + test_data, test_labels = input.ld_cifar10(test_only=True) + elif dataset == 'mnist': + test_data, test_labels = input.ld_mnist(test_only=True) + else: + print("Check value of dataset flag") + return False + + # Make sure there is data leftover to be used as a test set + assert FLAGS.stdnt_share < len(test_data) + + # Prepare [unlabeled] student training data (subset of test set) + stdnt_data = test_data[:FLAGS.stdnt_share] + + # Compute teacher predictions for student training data + teachers_preds = ensemble_preds(dataset, nb_teachers, stdnt_data) + + # Aggregate teacher predictions to get student training labels + if not save: + stdnt_labels = aggregation.noisy_max(teachers_preds, FLAGS.lap_scale) + else: + # Request clean votes and clean labels as well + stdnt_labels, clean_votes, labels_for_dump = aggregation.noisy_max(teachers_preds, FLAGS.lap_scale, return_clean_votes=True) #NOLINT(long-line) + + # Prepare filepath for numpy dump of clean votes + filepath = FLAGS.data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_student_clean_votes_lap_' + str(FLAGS.lap_scale) + '.npy' # NOLINT(long-line) + + # Prepare filepath for numpy dump of clean labels + filepath_labels = FLAGS.data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_teachers_labels_lap_' + str(FLAGS.lap_scale) + '.npy' # NOLINT(long-line) + + # Dump clean_votes array + with tf.gfile.Open(filepath, mode='w') as file_obj: + np.save(file_obj, clean_votes) + + # Dump labels_for_dump array + with tf.gfile.Open(filepath_labels, mode='w') as file_obj: + np.save(file_obj, labels_for_dump) + + # Print accuracy of aggregated labels + ac_ag_labels = metrics.accuracy(stdnt_labels, test_labels[:FLAGS.stdnt_share]) + print("Accuracy of the aggregated labels: " + str(ac_ag_labels)) + + # Store unused part of test set for use as a test set after student training + stdnt_test_data = test_data[FLAGS.stdnt_share:] + stdnt_test_labels = test_labels[FLAGS.stdnt_share:] + + if save: + # Prepare filepath for numpy dump of labels produced by noisy aggregation + filepath = FLAGS.data_dir + "/" + str(dataset) + '_' + str(nb_teachers) + '_student_labels_lap_' + str(FLAGS.lap_scale) + '.npy' #NOLINT(long-line) + + # Dump student noisy labels array + with tf.gfile.Open(filepath, mode='w') as file_obj: + np.save(file_obj, stdnt_labels) + + return stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels + + +def train_student(dataset, nb_teachers): + """ + This function trains a student using predictions made by an ensemble of + teachers. The student and teacher models are trained using the same + neural network architecture. + :param dataset: string corresponding to mnist, cifar10, or svhn + :param nb_teachers: number of teachers (in the ensemble) to learn from + :return: True if student training went well + """ + assert input.create_dir_if_needed(FLAGS.train_dir) + + # Call helper function to prepare student data using teacher predictions + stdnt_dataset = prepare_student_data(dataset, nb_teachers, save=True) + + # Unpack the student dataset + stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels = stdnt_dataset + + # Prepare checkpoint filename and path + if FLAGS.deeper: + ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_student_deeper.ckpt' #NOLINT(long-line) + else: + ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_student.ckpt' # NOLINT(long-line) + + # Start student training + assert deep_cnn.train(stdnt_data, stdnt_labels, ckpt_path) + + # Compute final checkpoint name for student (with max number of steps) + ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) + + # Compute student label predictions on remaining chunk of test set + student_preds = deep_cnn.softmax_preds(stdnt_test_data, ckpt_path_final) + + # Compute teacher accuracy + precision = metrics.accuracy(student_preds, stdnt_test_labels) + print('Precision of student after training: ' + str(precision)) + + return True + +def main(argv=None): # pylint: disable=unused-argument + # Run student training according to values specified in flags + assert train_student(FLAGS.dataset, FLAGS.nb_teachers) + +if __name__ == '__main__': + tf.app.run() diff --git a/tensorflow_privacy/research/pate_2017/train_student_mnist_250_lap_20_count_50_epochs_600.sh b/tensorflow_privacy/research/pate_2017/train_student_mnist_250_lap_20_count_50_epochs_600.sh new file mode 100644 index 0000000..de81e9b --- /dev/null +++ b/tensorflow_privacy/research/pate_2017/train_student_mnist_250_lap_20_count_50_epochs_600.sh @@ -0,0 +1,25 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +# Be sure to clone https://github.com/openai/improved-gan +# and add improved-gan/mnist_svhn_cifar10 to your PATH variable + +# Download labels used to train the student +wget https://github.com/npapernot/multiple-teachers-for-privacy/blob/master/mnist_250_student_labels_lap_20.npy + +# Train the student using improved-gan +THEANO_FLAGS='floatX=float32,device=gpu,lib.cnmem=1' train_mnist_fm_custom_labels.py --labels mnist_250_student_labels_lap_20.npy --count 50 --epochs 600 + diff --git a/tensorflow_privacy/research/pate_2017/train_teachers.py b/tensorflow_privacy/research/pate_2017/train_teachers.py new file mode 100644 index 0000000..c6ca5d2 --- /dev/null +++ b/tensorflow_privacy/research/pate_2017/train_teachers.py @@ -0,0 +1,101 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import deep_cnn +import input # pylint: disable=redefined-builtin +import metrics +import tensorflow as tf + + +tf.flags.DEFINE_string('dataset', 'svhn', 'The name of the dataset to use') +tf.flags.DEFINE_integer('nb_labels', 10, 'Number of output classes') + +tf.flags.DEFINE_string('data_dir','/tmp','Temporary storage') +tf.flags.DEFINE_string('train_dir','/tmp/train_dir', + 'Where model ckpt are saved') + +tf.flags.DEFINE_integer('max_steps', 3000, 'Number of training steps to run.') +tf.flags.DEFINE_integer('nb_teachers', 50, 'Teachers in the ensemble.') +tf.flags.DEFINE_integer('teacher_id', 0, 'ID of teacher being trained.') + +tf.flags.DEFINE_boolean('deeper', False, 'Activate deeper CNN model') + +FLAGS = tf.flags.FLAGS + + +def train_teacher(dataset, nb_teachers, teacher_id): + """ + This function trains a teacher (teacher id) among an ensemble of nb_teachers + models for the dataset specified. + :param dataset: string corresponding to dataset (svhn, cifar10) + :param nb_teachers: total number of teachers in the ensemble + :param teacher_id: id of the teacher being trained + :return: True if everything went well + """ + # If working directories do not exist, create them + assert input.create_dir_if_needed(FLAGS.data_dir) + assert input.create_dir_if_needed(FLAGS.train_dir) + + # Load the dataset + if dataset == 'svhn': + train_data,train_labels,test_data,test_labels = input.ld_svhn(extended=True) + elif dataset == 'cifar10': + train_data, train_labels, test_data, test_labels = input.ld_cifar10() + elif dataset == 'mnist': + train_data, train_labels, test_data, test_labels = input.ld_mnist() + else: + print("Check value of dataset flag") + return False + + # Retrieve subset of data for this teacher + data, labels = input.partition_dataset(train_data, + train_labels, + nb_teachers, + teacher_id) + + print("Length of training data: " + str(len(labels))) + + # Define teacher checkpoint filename and full path + if FLAGS.deeper: + filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '_deep.ckpt' + else: + filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt' + ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + filename + + # Perform teacher training + assert deep_cnn.train(data, labels, ckpt_path) + + # Append final step value to checkpoint for evaluation + ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1) + + # Retrieve teacher probability estimates on the test data + teacher_preds = deep_cnn.softmax_preds(test_data, ckpt_path_final) + + # Compute teacher accuracy + precision = metrics.accuracy(teacher_preds, test_labels) + print('Precision of teacher after training: ' + str(precision)) + + return True + + +def main(argv=None): # pylint: disable=unused-argument + # Make a call to train_teachers with values specified in flags + assert train_teacher(FLAGS.dataset, FLAGS.nb_teachers, FLAGS.teacher_id) + +if __name__ == '__main__': + tf.app.run() diff --git a/tensorflow_privacy/research/pate_2017/utils.py b/tensorflow_privacy/research/pate_2017/utils.py new file mode 100644 index 0000000..9e3db83 --- /dev/null +++ b/tensorflow_privacy/research/pate_2017/utils.py @@ -0,0 +1,35 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +def batch_indices(batch_nb, data_length, batch_size): + """ + This helper function computes a batch start and end index + :param batch_nb: the batch number + :param data_length: the total length of the data being parsed by batches + :param batch_size: the number of inputs in each batch + :return: pair of (start, end) indices + """ + # Batch start and end index + start = int(batch_nb * batch_size) + end = int((batch_nb + 1) * batch_size) + + # When there are not enough inputs left, we reuse some to complete the batch + if end > data_length: + shift = end - data_length + start -= shift + end -= shift + + return start, end diff --git a/tensorflow_privacy/research/pate_2018/ICLR2018/README.md b/tensorflow_privacy/research/pate_2018/ICLR2018/README.md new file mode 100644 index 0000000..baa1db5 --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/ICLR2018/README.md @@ -0,0 +1,61 @@ +Scripts in support of the paper "Scalable Private Learning with PATE" by Nicolas +Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar, Ulfar +Erlingsson (ICLR 2018, https://arxiv.org/abs/1802.08908). + + +### Requirements + +* Python, version ≥ 2.7 +* absl (see [here](https://github.com/abseil/abseil-py), or just type `pip install absl-py`) +* matplotlib +* numpy +* scipy +* sympy (for smooth sensitivity analysis) +* write access to the current directory (otherwise, output directories in download.py and *.sh +scripts must be changed) + +## Reproducing Figures 1 and 5, and Table 2 + +Before running any of the analysis scripts, create the data/ directory and download votes files by running\ +`$ python download.py` + +To generate Figures 1 and 5 run\ +`$ sh generate_figures.sh`\ +The output is written to the figures/ directory. + +For Table 2 run (may take several hours)\ +`$ sh generate_table.sh`\ +The output is written to the console. + +For data-independent bounds (for comparison with Table 2), run\ +`$ sh generate_table_data_independent.sh`\ +The output is written to the console. + +## Files in this directory + +* generate_figures.sh — Master script for generating Figures 1 and 5. + +* generate_table.sh — Master script for generating Table 2. + +* generate_table_data_independent.sh — Master script for computing data-independent + bounds. + +* rdp_bucketized.py — Script for producing Figure 1 (right) and Figure 5 (right). + +* rdp_cumulative.py — Script for producing Figure 1 (middle) and Figure 5 (left). + +* smooth_sensitivity_table.py — Script for generating Table 2. + +* utility_queries_answered — Script for producing Figure 1 (left). + +* plot_partition.py — Script for producing partition.pdf, a detailed breakdown of privacy +costs for Confident-GNMax with smooth sensitivity analysis (takes ~50 hours). + +* plots_for_slides.py — Script for producing several plots for the slide deck. + +* download.py — Utility script for populating the data/ directory. + +* plot_ls_q.py is not used. + + +All Python files take flags. Run script_name.py --help for help on flags. diff --git a/tensorflow_privacy/research/pate_2018/ICLR2018/download.py b/tensorflow_privacy/research/pate_2018/ICLR2018/download.py new file mode 100644 index 0000000..022df1d --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/ICLR2018/download.py @@ -0,0 +1,43 @@ +# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Script to download votes files to the data/ directory. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from six.moves import urllib +import os +import tarfile + +FILE_URI = 'https://storage.googleapis.com/pate-votes/votes.gz' +DATA_DIR = 'data/' + + +def download(): + print('Downloading ' + FILE_URI) + tar_filename, _ = urllib.request.urlretrieve(FILE_URI) + print('Unpacking ' + tar_filename) + with tarfile.open(tar_filename, "r:gz") as tar: + tar.extractall(DATA_DIR) + print('Done!') + + +if __name__ == '__main__': + if not os.path.exists(DATA_DIR): + print('Data directory does not exist. Creating ' + DATA_DIR) + os.makedirs(DATA_DIR) + download() diff --git a/tensorflow_privacy/research/pate_2018/ICLR2018/generate_figures.sh b/tensorflow_privacy/research/pate_2018/ICLR2018/generate_figures.sh new file mode 100644 index 0000000..cbcf248 --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/ICLR2018/generate_figures.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +counts_file="data/glyph_5000_teachers.npy" +output_dir="figures/" + +mkdir -p $output_dir + +if [ ! -d "$output_dir" ]; then + echo "Directory $output_dir does not exist." + exit 1 +fi + +python rdp_bucketized.py \ + --plot=small \ + --counts_file=$counts_file \ + --plot_file=$output_dir"noisy_thresholding_check_perf.pdf" + +python rdp_bucketized.py \ + --plot=large \ + --counts_file=$counts_file \ + --plot_file=$output_dir"noisy_thresholding_check_perf_details.pdf" + +python rdp_cumulative.py \ + --cache=False \ + --counts_file=$counts_file \ + --figures_dir=$output_dir + +python utility_queries_answered.py --plot_file=$output_dir"utility_queries_answered.pdf" \ No newline at end of file diff --git a/tensorflow_privacy/research/pate_2018/ICLR2018/generate_table.sh b/tensorflow_privacy/research/pate_2018/ICLR2018/generate_table.sh new file mode 100644 index 0000000..7625bd4 --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/ICLR2018/generate_table.sh @@ -0,0 +1,93 @@ +#!/bin/bash +# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +echo "Reproducing Table 2. Takes a couple of hours." + +executable="python smooth_sensitivity_table.py" +data_dir="data" + +echo +echo "######## MNIST ########" +echo + +$executable \ + --counts_file=$data_dir"/mnist_250_teachers.npy" \ + --threshold=200 \ + --sigma1=150 \ + --sigma2=40 \ + --queries=640 \ + --delta=1e-5 + +echo +echo "######## SVHN ########" +echo + +$executable \ + --counts_file=$data_dir"/svhn_250_teachers.npy" \ + --threshold=300 \ + --sigma1=200 \ + --sigma2=40 \ + --queries=8500 \ + --delta=1e-6 + +echo +echo "######## Adult ########" +echo + +$executable \ + --counts_file=$data_dir"/adult_250_teachers.npy" \ + --threshold=300 \ + --sigma1=200 \ + --sigma2=40 \ + --queries=1500 \ + --delta=1e-5 + +echo +echo "######## Glyph (Confident) ########" +echo + +$executable \ + --counts_file=$data_dir"/glyph_5000_teachers.npy" \ + --threshold=1000 \ + --sigma1=500 \ + --sigma2=100 \ + --queries=12000 \ + --delta=1e-8 + +echo +echo "######## Glyph (Interactive, Round 1) ########" +echo + +$executable \ + --counts_file=$data_dir"/glyph_round1.npy" \ + --threshold=3500 \ + --sigma1=1500 \ + --sigma2=100 \ + --delta=1e-8 + +echo +echo "######## Glyph (Interactive, Round 2) ########" +echo + +$executable \ + --counts_file=$data_dir"/glyph_round2.npy" \ + --baseline_file=$data_dir"/glyph_round2_student.npy" \ + --threshold=3500 \ + --sigma1=2000 \ + --sigma2=200 \ + --teachers=5000 \ + --delta=1e-8 diff --git a/tensorflow_privacy/research/pate_2018/ICLR2018/generate_table_data_independent.sh b/tensorflow_privacy/research/pate_2018/ICLR2018/generate_table_data_independent.sh new file mode 100644 index 0000000..3ac3ef7 --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/ICLR2018/generate_table_data_independent.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +echo "Table 2 with data-independent analysis." + +executable="python smooth_sensitivity_table.py" +data_dir="data" + +echo +echo "######## MNIST ########" +echo + +$executable \ + --counts_file=$data_dir"/mnist_250_teachers.npy" \ + --threshold=200 \ + --sigma1=150 \ + --sigma2=40 \ + --queries=640 \ + --delta=1e-5 \ + --data_independent +echo +echo "######## SVHN ########" +echo + +$executable \ + --counts_file=$data_dir"/svhn_250_teachers.npy" \ + --threshold=300 \ + --sigma1=200 \ + --sigma2=40 \ + --queries=8500 \ + --delta=1e-6 \ + --data_independent + +echo +echo "######## Adult ########" +echo + +$executable \ + --counts_file=$data_dir"/adult_250_teachers.npy" \ + --threshold=300 \ + --sigma1=200 \ + --sigma2=40 \ + --queries=1500 \ + --delta=1e-5 \ + --data_independent + +echo +echo "######## Glyph (Confident) ########" +echo + +$executable \ + --counts_file=$data_dir"/glyph_5000_teachers.npy" \ + --threshold=1000 \ + --sigma1=500 \ + --sigma2=100 \ + --queries=12000 \ + --delta=1e-8 \ + --data_independent + +echo +echo "######## Glyph (Interactive, Round 1) ########" +echo + +$executable \ + --counts_file=$data_dir"/glyph_round1.npy" \ + --threshold=3500 \ + --sigma1=1500 \ + --sigma2=100 \ + --delta=1e-8 \ + --data_independent + +echo +echo "######## Glyph (Interactive, Round 2) ########" +echo + +$executable \ + --counts_file=$data_dir"/glyph_round2.npy" \ + --baseline_file=$data_dir"/glyph_round2_student.npy" \ + --threshold=3500 \ + --sigma1=2000 \ + --sigma2=200 \ + --teachers=5000 \ + --delta=1e-8 \ + --order=8.5 \ + --data_independent diff --git a/tensorflow_privacy/research/pate_2018/ICLR2018/plot_ls_q.py b/tensorflow_privacy/research/pate_2018/ICLR2018/plot_ls_q.py new file mode 100644 index 0000000..a1e0a49 --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/ICLR2018/plot_ls_q.py @@ -0,0 +1,105 @@ +# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Plots LS(q). + +A script in support of the PATE2 paper. NOT PRESENTLY USED. + +The output is written to a specified directory as a pdf file. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import os +import sys + +sys.path.append('..') # Main modules reside in the parent directory. + + +from absl import app +from absl import flags +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top +import numpy as np +import smooth_sensitivity as pate_ss + +plt.style.use('ggplot') + +FLAGS = flags.FLAGS + +flags.DEFINE_string('figures_dir', '', 'Path where the output is written to.') + + +def compute_ls_q(sigma, order, num_classes): + + def beta(q): + return pate_ss._compute_rdp_gnmax(sigma, math.log(q), order) + + def bu(q): + return pate_ss._compute_bu_gnmax(q, sigma, order) + + def bl(q): + return pate_ss._compute_bl_gnmax(q, sigma, order) + + def delta_beta(q): + if q == 0 or q > .8: + return 0 + beta_q = beta(q) + beta_bu_q = beta(bu(q)) + beta_bl_q = beta(bl(q)) + assert beta_bl_q <= beta_q <= beta_bu_q + return beta_bu_q - beta_q # max(beta_bu_q - beta_q, beta_q - beta_bl_q) + + logq0 = pate_ss.compute_logq0_gnmax(sigma, order) + logq1 = pate_ss._compute_logq1(sigma, order, num_classes) + print(math.exp(logq1), math.exp(logq0)) + xs = np.linspace(0, .1, num=1000, endpoint=True) + ys = [delta_beta(x) for x in xs] + return xs, ys + + +def main(argv): + del argv # Unused. + + sigma = 20 + order = 20. + num_classes = 10 + + # sigma = 20 + # order = 25. + # num_classes = 10 + + x_axis, ys = compute_ls_q(sigma, order, num_classes) + + fig, ax = plt.subplots() + fig.set_figheight(4.5) + fig.set_figwidth(4.7) + + ax.plot(x_axis, ys, alpha=.8, linewidth=5) + plt.xlabel('Number of queries answered', fontsize=16) + plt.ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16) + ax.tick_params(labelsize=14) + fout_name = os.path.join(FLAGS.figures_dir, 'ls_of_q.pdf') + print('Saving the graph to ' + fout_name) + plt.show() + + plt.close('all') + + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow_privacy/research/pate_2018/ICLR2018/plot_partition.py b/tensorflow_privacy/research/pate_2018/ICLR2018/plot_partition.py new file mode 100644 index 0000000..ed07a17 --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/ICLR2018/plot_partition.py @@ -0,0 +1,412 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Produces two plots. One compares aggregators and their analyses. The other +illustrates sources of privacy loss for Confident-GNMax. + +A script in support of the paper "Scalable Private Learning with PATE" by +Nicolas Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar, +Ulfar Erlingsson (https://arxiv.org/abs/1802.08908). + +The input is a file containing a numpy array of votes, one query per row, one +class per column. Ex: + 43, 1821, ..., 3 + 31, 16, ..., 0 + ... + 0, 86, ..., 438 +The output is written to a specified directory and consists of two files. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import os +import pickle +import sys + +sys.path.append('..') # Main modules reside in the parent directory. + +from absl import app +from absl import flags +from collections import namedtuple +import matplotlib + +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top +import numpy as np +import core as pate +import smooth_sensitivity as pate_ss + +plt.style.use('ggplot') + +FLAGS = flags.FLAGS +flags.DEFINE_boolean('cache', False, + 'Read results of privacy analysis from cache.') +flags.DEFINE_string('counts_file', None, 'Counts file.') +flags.DEFINE_string('figures_dir', '', 'Path where figures are written to.') +flags.DEFINE_float('threshold', None, 'Threshold for step 1 (selection).') +flags.DEFINE_float('sigma1', None, 'Sigma for step 1 (selection).') +flags.DEFINE_float('sigma2', None, 'Sigma for step 2 (argmax).') +flags.DEFINE_integer('queries', None, 'Number of queries made by the student.') +flags.DEFINE_float('delta', 1e-8, 'Target delta.') + +flags.mark_flag_as_required('counts_file') +flags.mark_flag_as_required('threshold') +flags.mark_flag_as_required('sigma1') +flags.mark_flag_as_required('sigma2') + +Partition = namedtuple('Partition', ['step1', 'step2', 'ss', 'delta']) + + +def analyze_gnmax_conf_data_ind(votes, threshold, sigma1, sigma2, delta): + orders = np.logspace(np.log10(1.5), np.log10(500), num=100) + n = votes.shape[0] + + rdp_total = np.zeros(len(orders)) + answered_total = 0 + answered = np.zeros(n) + eps_cum = np.full(n, None, dtype=float) + + for i in range(n): + v = votes[i,] + if threshold is not None and sigma1 is not None: + q_step1 = np.exp(pate.compute_logpr_answered(threshold, sigma1, v)) + rdp_total += pate.rdp_data_independent_gaussian(sigma1, orders) + else: + q_step1 = 1. # always answer + + answered_total += q_step1 + answered[i] = answered_total + + rdp_total += q_step1 * pate.rdp_data_independent_gaussian(sigma2, orders) + + eps_cum[i], order_opt = pate.compute_eps_from_delta(orders, rdp_total, + delta) + + if i > 0 and (i + 1) % 1000 == 0: + print('queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} ' + 'at order = {:.2f}.'.format( + i + 1, + answered[i], + eps_cum[i], + order_opt)) + sys.stdout.flush() + + return eps_cum, answered + + +def analyze_gnmax_conf_data_dep(votes, threshold, sigma1, sigma2, delta): + # Short list of orders. + # orders = np.round(np.logspace(np.log10(20), np.log10(200), num=20)) + + # Long list of orders. + orders = np.concatenate((np.arange(20, 40, .2), + np.arange(40, 75, .5), + np.logspace(np.log10(75), np.log10(200), num=20))) + + n = votes.shape[0] + num_classes = votes.shape[1] + num_teachers = int(sum(votes[0,])) + + if threshold is not None and sigma1 is not None: + is_data_ind_step1 = pate.is_data_independent_always_opt_gaussian( + num_teachers, num_classes, sigma1, orders) + else: + is_data_ind_step1 = [True] * len(orders) + + is_data_ind_step2 = pate.is_data_independent_always_opt_gaussian( + num_teachers, num_classes, sigma2, orders) + + eps_partitioned = np.full(n, None, dtype=Partition) + order_opt = np.full(n, None, dtype=float) + ss_std_opt = np.full(n, None, dtype=float) + answered = np.zeros(n) + + rdp_step1_total = np.zeros(len(orders)) + rdp_step2_total = np.zeros(len(orders)) + + ls_total = np.zeros((len(orders), num_teachers)) + answered_total = 0 + + for i in range(n): + v = votes[i,] + + if threshold is not None and sigma1 is not None: + logq_step1 = pate.compute_logpr_answered(threshold, sigma1, v) + rdp_step1_total += pate.compute_rdp_threshold(logq_step1, sigma1, orders) + else: + logq_step1 = 0. # always answer + + pr_answered = np.exp(logq_step1) + logq_step2 = pate.compute_logq_gaussian(v, sigma2) + rdp_step2_total += pr_answered * pate.rdp_gaussian(logq_step2, sigma2, + orders) + + answered_total += pr_answered + + rdp_ss = np.zeros(len(orders)) + ss_std = np.zeros(len(orders)) + + for j, order in enumerate(orders): + if not is_data_ind_step1[j]: + ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold(v, + num_teachers, threshold, sigma1, order) + else: + ls_step1 = np.full(num_teachers, 0, dtype=float) + + if not is_data_ind_step2[j]: + ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax( + v, num_teachers, sigma2, order) + else: + ls_step2 = np.full(num_teachers, 0, dtype=float) + + ls_total[j,] += ls_step1 + pr_answered * ls_step2 + + beta_ss = .49 / order + + ss = pate_ss.compute_discounted_max(beta_ss, ls_total[j,]) + sigma_ss = ((order * math.exp(2 * beta_ss)) / ss) ** (1 / 3) + rdp_ss[j] = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian( + beta_ss, sigma_ss, order) + ss_std[j] = ss * sigma_ss + + rdp_total = rdp_step1_total + rdp_step2_total + rdp_ss + + answered[i] = answered_total + _, order_opt[i] = pate.compute_eps_from_delta(orders, rdp_total, delta) + order_idx = np.searchsorted(orders, order_opt[i]) + + # Since optimal orders are always non-increasing, shrink orders array + # and all cumulative arrays to speed up computation. + if order_idx < len(orders): + orders = orders[:order_idx + 1] + rdp_step1_total = rdp_step1_total[:order_idx + 1] + rdp_step2_total = rdp_step2_total[:order_idx + 1] + + eps_partitioned[i] = Partition(step1=rdp_step1_total[order_idx], + step2=rdp_step2_total[order_idx], + ss=rdp_ss[order_idx], + delta=-math.log(delta) / (order_opt[i] - 1)) + ss_std_opt[i] = ss_std[order_idx] + if i > 0 and (i + 1) % 1 == 0: + print('queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} +/- {:.3f} ' + 'at order = {:.2f}. Contributions: delta = {:.3f}, step1 = {:.3f}, ' + 'step2 = {:.3f}, ss = {:.3f}'.format( + i + 1, + answered[i], + sum(eps_partitioned[i]), + ss_std_opt[i], + order_opt[i], + eps_partitioned[i].delta, + eps_partitioned[i].step1, + eps_partitioned[i].step2, + eps_partitioned[i].ss)) + sys.stdout.flush() + + return eps_partitioned, answered, ss_std_opt, order_opt + + +def plot_comparison(figures_dir, simple_ind, conf_ind, simple_dep, conf_dep): + """Plots variants of GNMax algorithm and their analyses. + """ + + def pivot(x_axis, eps, answered): + y = np.full(len(x_axis), None, dtype=float) # delta + for i, x in enumerate(x_axis): + idx = np.searchsorted(answered, x) + if idx < len(eps): + y[i] = eps[idx] + return y + + def pivot_dep(x_axis, data_dep): + eps_partitioned, answered, _, _ = data_dep + eps = [sum(p) for p in eps_partitioned] # Flatten eps + return pivot(x_axis, eps, answered) + + xlim = 10000 + x_axis = range(0, xlim, 10) + + y_simple_ind = pivot(x_axis, *simple_ind) + y_conf_ind = pivot(x_axis, *conf_ind) + + y_simple_dep = pivot_dep(x_axis, simple_dep) + y_conf_dep = pivot_dep(x_axis, conf_dep) + + # plt.close('all') + fig, ax = plt.subplots() + fig.set_figheight(4.5) + fig.set_figwidth(4.7) + + ax.plot(x_axis, y_simple_ind, ls='--', color='r', lw=3, label=r'Simple GNMax, data-ind analysis') + ax.plot(x_axis, y_conf_ind, ls='--', color='b', lw=3, label=r'Confident GNMax, data-ind analysis') + ax.plot(x_axis, y_simple_dep, ls='-', color='r', lw=3, label=r'Simple GNMax, data-dep analysis') + ax.plot(x_axis, y_conf_dep, ls='-', color='b', lw=3, label=r'Confident GNMax, data-dep analysis') + + plt.xticks(np.arange(0, xlim + 1000, 2000)) + plt.xlim([0, xlim]) + plt.ylim(bottom=0) + plt.legend(fontsize=16) + ax.set_xlabel('Number of queries answered', fontsize=16) + ax.set_ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16) + + ax.tick_params(labelsize=14) + plot_filename = os.path.join(figures_dir, 'comparison.pdf') + print('Saving the graph to ' + plot_filename) + fig.savefig(plot_filename, bbox_inches='tight') + plt.show() + + +def plot_partition(figures_dir, gnmax_conf, print_order): + """Plots an expert version of the privacy-per-answered-query graph. + + Args: + figures_dir: A name of the directory where to save the plot. + eps: The cumulative privacy cost. + partition: Allocation of the privacy cost. + answered: Cumulative number of queries answered. + order_opt: The list of optimal orders. + """ + eps_partitioned, answered, ss_std_opt, order_opt = gnmax_conf + + xlim = 10000 + x = range(0, int(xlim), 10) + lenx = len(x) + y0 = np.full(lenx, np.nan, dtype=float) # delta + y1 = np.full(lenx, np.nan, dtype=float) # delta + step1 + y2 = np.full(lenx, np.nan, dtype=float) # delta + step1 + step2 + y3 = np.full(lenx, np.nan, dtype=float) # delta + step1 + step2 + ss + noise_std = np.full(lenx, np.nan, dtype=float) + + y_right = np.full(lenx, np.nan, dtype=float) + + for i in range(lenx): + idx = np.searchsorted(answered, x[i]) + if idx < len(eps_partitioned): + y0[i] = eps_partitioned[idx].delta + y1[i] = y0[i] + eps_partitioned[idx].step1 + y2[i] = y1[i] + eps_partitioned[idx].step2 + y3[i] = y2[i] + eps_partitioned[idx].ss + + noise_std[i] = ss_std_opt[idx] + y_right[i] = order_opt[idx] + + # plt.close('all') + fig, ax = plt.subplots() + fig.set_figheight(4.5) + fig.set_figwidth(4.7) + fig.patch.set_alpha(0) + + l1 = ax.plot( + x, y3, color='b', ls='-', label=r'Total privacy cost', linewidth=1).pop() + + for y in (y0, y1, y2): + ax.plot(x, y, color='b', ls='-', label=r'_nolegend_', alpha=.5, linewidth=1) + + ax.fill_between(x, [0] * lenx, y0.tolist(), facecolor='b', alpha=.5) + ax.fill_between(x, y0.tolist(), y1.tolist(), facecolor='b', alpha=.4) + ax.fill_between(x, y1.tolist(), y2.tolist(), facecolor='b', alpha=.3) + ax.fill_between(x, y2.tolist(), y3.tolist(), facecolor='b', alpha=.2) + + ax.fill_between(x, (y3 - noise_std).tolist(), (y3 + noise_std).tolist(), + facecolor='r', alpha=.5) + + + plt.xticks(np.arange(0, xlim + 1000, 2000)) + plt.xlim([0, xlim]) + ax.set_ylim([0, 3.]) + + ax.set_xlabel('Number of queries answered', fontsize=16) + ax.set_ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16) + + # Merging legends. + if print_order: + ax2 = ax.twinx() + l2 = ax2.plot( + x, y_right, 'r', ls='-', label=r'Optimal order', linewidth=5, + alpha=.5).pop() + ax2.grid(False) + # ax2.set_ylabel(r'Optimal Renyi order', fontsize=16) + ax2.set_ylim([0, 200.]) + # ax.legend((l1, l2), (l1.get_label(), l2.get_label()), loc=0, fontsize=13) + + ax.tick_params(labelsize=14) + plot_filename = os.path.join(figures_dir, 'partition.pdf') + print('Saving the graph to ' + plot_filename) + fig.savefig(plot_filename, bbox_inches='tight', dpi=800) + plt.show() + + +def run_all_analyses(votes, threshold, sigma1, sigma2, delta): + simple_ind = analyze_gnmax_conf_data_ind(votes, None, None, sigma2, + delta) + + conf_ind = analyze_gnmax_conf_data_ind(votes, threshold, sigma1, sigma2, + delta) + + simple_dep = analyze_gnmax_conf_data_dep(votes, None, None, sigma2, + delta) + + conf_dep = analyze_gnmax_conf_data_dep(votes, threshold, sigma1, sigma2, + delta) + + return (simple_ind, conf_ind, simple_dep, conf_dep) + + +def run_or_load_all_analyses(): + temp_filename = os.path.expanduser('~/tmp/partition_cached.pkl') + + if FLAGS.cache and os.path.isfile(temp_filename): + print('Reading from cache ' + temp_filename) + with open(temp_filename, 'rb') as f: + all_analyses = pickle.load(f) + else: + fin_name = os.path.expanduser(FLAGS.counts_file) + print('Reading raw votes from ' + fin_name) + sys.stdout.flush() + + votes = np.load(fin_name) + + if FLAGS.queries is not None: + if votes.shape[0] < FLAGS.queries: + raise ValueError('Expect {} rows, got {} in {}'.format( + FLAGS.queries, votes.shape[0], fin_name)) + # Truncate the votes matrix to the number of queries made. + votes = votes[:FLAGS.queries, ] + + all_analyses = run_all_analyses(votes, FLAGS.threshold, FLAGS.sigma1, + FLAGS.sigma2, FLAGS.delta) + + print('Writing to cache ' + temp_filename) + with open(temp_filename, 'wb') as f: + pickle.dump(all_analyses, f) + + return all_analyses + + +def main(argv): + del argv # Unused. + + simple_ind, conf_ind, simple_dep, conf_dep = run_or_load_all_analyses() + + figures_dir = os.path.expanduser(FLAGS.figures_dir) + + plot_comparison(figures_dir, simple_ind, conf_ind, simple_dep, conf_dep) + plot_partition(figures_dir, conf_dep, True) + plt.close('all') + + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow_privacy/research/pate_2018/ICLR2018/plots_for_slides.py b/tensorflow_privacy/research/pate_2018/ICLR2018/plots_for_slides.py new file mode 100644 index 0000000..52c36b7 --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/ICLR2018/plots_for_slides.py @@ -0,0 +1,283 @@ +# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Plots graphs for the slide deck. + +A script in support of the PATE2 paper. The input is a file containing a numpy +array of votes, one query per row, one class per column. Ex: + 43, 1821, ..., 3 + 31, 16, ..., 0 + ... + 0, 86, ..., 438 +The output graphs are visualized using the TkAgg backend. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import os +import sys + +sys.path.append('..') # Main modules reside in the parent directory. + +from absl import app +from absl import flags +import matplotlib + +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top +import numpy as np +import core as pate +import random + +plt.style.use('ggplot') + +FLAGS = flags.FLAGS +flags.DEFINE_string('counts_file', None, 'Counts file.') +flags.DEFINE_string('figures_dir', '', 'Path where figures are written to.') +flags.DEFINE_boolean('transparent', False, 'Set background to transparent.') + +flags.mark_flag_as_required('counts_file') + + +def setup_plot(): + fig, ax = plt.subplots() + fig.set_figheight(4.5) + fig.set_figwidth(4.7) + + if FLAGS.transparent: + fig.patch.set_alpha(0) + + return fig, ax + + +def plot_rdp_curve_per_example(votes, sigmas): + orders = np.linspace(1., 100., endpoint=True, num=1000) + orders[0] = 1.001 + fig, ax = setup_plot() + + for i in range(votes.shape[0]): + for sigma in sigmas: + logq = pate.compute_logq_gaussian(votes[i,], sigma) + rdp = pate.rdp_gaussian(logq, sigma, orders) + ax.plot( + orders, + rdp, + alpha=1., + label=r'Data-dependent bound, $\sigma$={}'.format(int(sigma)), + linewidth=5) + + for sigma in sigmas: + ax.plot( + orders, + pate.rdp_data_independent_gaussian(sigma, orders), + alpha=.3, + label=r'Data-independent bound, $\sigma$={}'.format(int(sigma)), + linewidth=10) + + plt.xlim(xmin=1, xmax=100) + plt.ylim(ymin=0) + plt.xticks([1, 20, 40, 60, 80, 100]) + plt.yticks([0, .0025, .005, .0075, .01]) + plt.xlabel(r'Order $\alpha$', fontsize=16) + plt.ylabel(r'RDP value $\varepsilon$ at $\alpha$', fontsize=16) + ax.tick_params(labelsize=14) + + plt.legend(loc=0, fontsize=13) + plt.show() + + +def plot_rdp_of_sigma(v, order): + sigmas = np.linspace(1., 1000., endpoint=True, num=1000) + fig, ax = setup_plot() + + y = np.zeros(len(sigmas)) + + for i, sigma in enumerate(sigmas): + logq = pate.compute_logq_gaussian(v, sigma) + y[i] = pate.rdp_gaussian(logq, sigma, order) + + ax.plot(sigmas, y, alpha=.8, linewidth=5) + + plt.xlim(xmin=1, xmax=1000) + plt.ylim(ymin=0) + # plt.yticks([0, .0004, .0008, .0012]) + ax.tick_params(labelleft='off') + plt.xlabel(r'Noise $\sigma$', fontsize=16) + plt.ylabel(r'RDP at order $\alpha={}$'.format(order), fontsize=16) + ax.tick_params(labelsize=14) + + # plt.legend(loc=0, fontsize=13) + plt.show() + + +def compute_rdp_curve(votes, threshold, sigma1, sigma2, orders, + target_answered): + rdp_cum = np.zeros(len(orders)) + answered = 0 + for i, v in enumerate(votes): + v = sorted(v, reverse=True) + q_step1 = math.exp(pate.compute_logpr_answered(threshold, sigma1, v)) + logq_step2 = pate.compute_logq_gaussian(v, sigma2) + rdp = pate.rdp_gaussian(logq_step2, sigma2, orders) + rdp_cum += q_step1 * rdp + + answered += q_step1 + if answered >= target_answered: + print('Processed {} queries to answer {}.'.format(i, target_answered)) + return rdp_cum + + assert False, 'Never reached {} answered queries.'.format(target_answered) + + +def plot_rdp_total(votes, sigmas): + orders = np.linspace(1., 100., endpoint=True, num=100) + orders[0] = 1.1 + + fig, ax = setup_plot() + + target_answered = 2000 + + for sigma in sigmas: + rdp = compute_rdp_curve(votes, 5000, 1000, sigma, orders, target_answered) + ax.plot( + orders, + rdp, + alpha=.8, + label=r'Data-dependent bound, $\sigma$={}'.format(int(sigma)), + linewidth=5) + + # for sigma in sigmas: + # ax.plot( + # orders, + # target_answered * pate.rdp_data_independent_gaussian(sigma, orders), + # alpha=.3, + # label=r'Data-independent bound, $\sigma$={}'.format(int(sigma)), + # linewidth=10) + + plt.xlim(xmin=1, xmax=100) + plt.ylim(ymin=0) + plt.xticks([1, 20, 40, 60, 80, 100]) + plt.yticks([0, .0005, .001, .0015, .002]) + + plt.xlabel(r'Order $\alpha$', fontsize=16) + plt.ylabel(r'RDP value $\varepsilon$ at $\alpha$', fontsize=16) + ax.tick_params(labelsize=14) + + plt.legend(loc=0, fontsize=13) + plt.show() + + +def plot_data_ind_curve(): + fig, ax = setup_plot() + + orders = np.linspace(1., 10., endpoint=True, num=1000) + orders[0] = 1.01 + + ax.plot( + orders, + pate.rdp_data_independent_gaussian(1., orders), + alpha=.5, + color='gray', + linewidth=10) + + # plt.yticks([]) + plt.xlim(xmin=1, xmax=10) + plt.ylim(ymin=0) + plt.xticks([1, 3, 5, 7, 9]) + ax.tick_params(labelsize=14) + plt.show() + + +def plot_two_data_ind_curves(): + orders = np.linspace(1., 100., endpoint=True, num=1000) + orders[0] = 1.001 + + fig, ax = setup_plot() + + for sigma in [100, 150]: + ax.plot( + orders, + pate.rdp_data_independent_gaussian(sigma, orders), + alpha=.3, + label=r'Data-independent bound, $\sigma$={}'.format(int(sigma)), + linewidth=10) + + plt.xlim(xmin=1, xmax=100) + plt.ylim(ymin=0) + plt.xticks([1, 20, 40, 60, 80, 100]) + plt.yticks([0, .0025, .005, .0075, .01]) + plt.xlabel(r'Order $\alpha$', fontsize=16) + plt.ylabel(r'RDP value $\varepsilon$ at $\alpha$', fontsize=16) + ax.tick_params(labelsize=14) + + plt.legend(loc=0, fontsize=13) + plt.show() + + +def scatter_plot(votes, threshold, sigma1, sigma2, order): + fig, ax = setup_plot() + x = [] + y = [] + for i, v in enumerate(votes): + if threshold is not None and sigma1 is not None: + q_step1 = math.exp(pate.compute_logpr_answered(threshold, sigma1, v)) + else: + q_step1 = 1. + if random.random() < q_step1: + logq_step2 = pate.compute_logq_gaussian(v, sigma2) + x.append(max(v)) + y.append(pate.rdp_gaussian(logq_step2, sigma2, order)) + + print('Selected {} queries.'.format(len(x))) + # Plot the data-independent curve: + # data_ind = pate.rdp_data_independent_gaussian(sigma, order) + # plt.plot([0, 5000], [data_ind, data_ind], color='tab:blue', linestyle='-', linewidth=2) + ax.set_yscale('log') + plt.xlim(xmin=0, xmax=5000) + plt.ylim(ymin=1e-300, ymax=1) + plt.yticks([1, 1e-100, 1e-200, 1e-300]) + plt.scatter(x, y, s=1, alpha=0.5) + plt.ylabel(r'RDP at $\alpha={}$'.format(order), fontsize=16) + plt.xlabel(r'max count', fontsize=16) + ax.tick_params(labelsize=14) + plt.show() + + +def main(argv): + del argv # Unused. + fin_name = os.path.expanduser(FLAGS.counts_file) + print('Reading raw votes from ' + fin_name) + sys.stdout.flush() + + plot_data_ind_curve() + plot_two_data_ind_curves() + + v1 = [2550, 2200, 250] # based on votes[2,] + # v2 = [2600, 2200, 200] # based on votes[381,] + plot_rdp_curve_per_example(np.array([v1]), (100., 150.)) + + plot_rdp_of_sigma(np.array(v1), 20.) + + votes = np.load(fin_name) + + plot_rdp_total(votes[:12000, ], (100., 150.)) + scatter_plot(votes[:6000, ], None, None, 100, 20) # w/o thresholding + scatter_plot(votes[:6000, ], 3500, 1500, 100, 20) # with thresholding + + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow_privacy/research/pate_2018/ICLR2018/rdp_bucketized.py b/tensorflow_privacy/research/pate_2018/ICLR2018/rdp_bucketized.py new file mode 100644 index 0000000..8527b46 --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/ICLR2018/rdp_bucketized.py @@ -0,0 +1,264 @@ +# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Illustrates how noisy thresholding check changes distribution of queries. + +A script in support of the paper "Scalable Private Learning with PATE" by +Nicolas Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar, +Ulfar Erlingsson (https://arxiv.org/abs/1802.08908). + +The input is a file containing a numpy array of votes, one query per row, one +class per column. Ex: + 43, 1821, ..., 3 + 31, 16, ..., 0 + ... + 0, 86, ..., 438 +The output is one of two graphs depending on the setting of the plot variable. +The output is written to a pdf file. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import os +import sys + +sys.path.append('..') # Main modules reside in the parent directory. + +from absl import app +from absl import flags +import core as pate +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top +import numpy as np +from six.moves import xrange + +plt.style.use('ggplot') + +FLAGS = flags.FLAGS +flags.DEFINE_enum('plot', 'small', ['small', 'large'], 'Selects which of' + 'the two plots is produced.') +flags.DEFINE_string('counts_file', None, 'Counts file.') +flags.DEFINE_string('plot_file', '', 'Plot file to write.') + +flags.mark_flag_as_required('counts_file') + + +def compute_count_per_bin(bin_num, votes): + """Tabulates number of examples in each bin. + + Args: + bin_num: Number of bins. + votes: A matrix of votes, where each row contains votes in one instance. + + Returns: + Array of counts of length bin_num. + """ + sums = np.sum(votes, axis=1) + # Check that all rows contain the same number of votes. + assert max(sums) == min(sums) + + s = max(sums) + + counts = np.zeros(bin_num) + n = votes.shape[0] + + for i in xrange(n): + v = votes[i,] + bin_idx = int(math.floor(max(v) * bin_num / s)) + assert 0 <= bin_idx < bin_num + counts[bin_idx] += 1 + + return counts + + +def compute_privacy_cost_per_bins(bin_num, votes, sigma2, order): + """Outputs average privacy cost per bin. + + Args: + bin_num: Number of bins. + votes: A matrix of votes, where each row contains votes in one instance. + sigma2: The scale (std) of the Gaussian noise. (Same as sigma_2 in + Algorithms 1 and 2.) + order: The Renyi order for which privacy cost is computed. + + Returns: + Expected eps of RDP (ignoring delta) per example in each bin. + """ + n = votes.shape[0] + + bin_counts = np.zeros(bin_num) + bin_rdp = np.zeros(bin_num) # RDP at order=order + + for i in xrange(n): + v = votes[i,] + logq = pate.compute_logq_gaussian(v, sigma2) + rdp_at_order = pate.rdp_gaussian(logq, sigma2, order) + + bin_idx = int(math.floor(max(v) * bin_num / sum(v))) + assert 0 <= bin_idx < bin_num + bin_counts[bin_idx] += 1 + bin_rdp[bin_idx] += rdp_at_order + if (i + 1) % 1000 == 0: + print('example {}'.format(i + 1)) + sys.stdout.flush() + + return bin_rdp / bin_counts + + +def compute_expected_answered_per_bin(bin_num, votes, threshold, sigma1): + """Computes expected number of answers per bin. + + Args: + bin_num: Number of bins. + votes: A matrix of votes, where each row contains votes in one instance. + threshold: The threshold against which check is performed. + sigma1: The std of the Gaussian noise with which check is performed. (Same + as sigma_1 in Algorithms 1 and 2.) + + Returns: + Expected number of queries answered per bin. + """ + n = votes.shape[0] + + bin_answered = np.zeros(bin_num) + + for i in xrange(n): + v = votes[i,] + p = math.exp(pate.compute_logpr_answered(threshold, sigma1, v)) + bin_idx = int(math.floor(max(v) * bin_num / sum(v))) + assert 0 <= bin_idx < bin_num + bin_answered[bin_idx] += p + if (i + 1) % 1000 == 0: + print('example {}'.format(i + 1)) + sys.stdout.flush() + + return bin_answered + + +def main(argv): + del argv # Unused. + fin_name = os.path.expanduser(FLAGS.counts_file) + print('Reading raw votes from ' + fin_name) + sys.stdout.flush() + + votes = np.load(fin_name) + votes = votes[:4000,] # truncate to 4000 samples + + if FLAGS.plot == 'small': + bin_num = 5 + m_check = compute_expected_answered_per_bin(bin_num, votes, 3500, 1500) + elif FLAGS.plot == 'large': + bin_num = 10 + m_check = compute_expected_answered_per_bin(bin_num, votes, 3500, 1500) + a_check = compute_expected_answered_per_bin(bin_num, votes, 5000, 1500) + eps = compute_privacy_cost_per_bins(bin_num, votes, 100, 50) + else: + raise ValueError('--plot flag must be one of ["small", "large"]') + + counts = compute_count_per_bin(bin_num, votes) + bins = np.linspace(0, 100, num=bin_num, endpoint=False) + + plt.close('all') + fig, ax = plt.subplots() + if FLAGS.plot == 'small': + fig.set_figheight(5) + fig.set_figwidth(5) + ax.bar( + bins, + counts, + 20, + color='orangered', + linestyle='dotted', + linewidth=5, + edgecolor='red', + fill=False, + alpha=.5, + align='edge', + label='LNMax answers') + ax.bar( + bins, + m_check, + 20, + color='g', + alpha=.5, + linewidth=0, + edgecolor='g', + align='edge', + label='Confident-GNMax\nanswers') + elif FLAGS.plot == 'large': + fig.set_figheight(4.7) + fig.set_figwidth(7) + ax.bar( + bins, + counts, + 10, + linestyle='dashed', + linewidth=5, + edgecolor='red', + fill=False, + alpha=.5, + align='edge', + label='LNMax answers') + ax.bar( + bins, + m_check, + 10, + color='g', + alpha=.5, + linewidth=0, + edgecolor='g', + align='edge', + label='Confident-GNMax\nanswers (moderate)') + ax.bar( + bins, + a_check, + 10, + color='b', + alpha=.5, + align='edge', + label='Confident-GNMax\nanswers (aggressive)') + ax2 = ax.twinx() + bin_centers = [x + 5 for x in bins] + ax2.plot(bin_centers, eps, 'ko', alpha=.8) + ax2.set_ylim([1e-200, 1.]) + ax2.set_yscale('log') + ax2.grid(False) + ax2.set_yticks([1e-3, 1e-50, 1e-100, 1e-150, 1e-200]) + plt.tick_params(which='minor', right='off') + ax2.set_ylabel(r'Per query privacy cost $\varepsilon$', fontsize=16) + + plt.xlim([0, 100]) + ax.set_ylim([0, 2500]) + # ax.set_yscale('log') + ax.set_xlabel('Percentage of teachers that agree', fontsize=16) + ax.set_ylabel('Number of queries answered', fontsize=16) + vals = ax.get_xticks() + ax.set_xticklabels([str(int(x)) + '%' for x in vals]) + ax.tick_params(labelsize=14, bottom=True, top=True, left=True, right=True) + ax.legend(loc=2, prop={'size': 16}) + + # simple: 'figures/noisy_thresholding_check_perf.pdf') + # detailed: 'figures/noisy_thresholding_check_perf_details.pdf' + + print('Saving the graph to ' + FLAGS.plot_file) + plt.savefig(os.path.expanduser(FLAGS.plot_file), bbox_inches='tight') + plt.show() + + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow_privacy/research/pate_2018/ICLR2018/rdp_cumulative.py b/tensorflow_privacy/research/pate_2018/ICLR2018/rdp_cumulative.py new file mode 100644 index 0000000..d4b1c65 --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/ICLR2018/rdp_cumulative.py @@ -0,0 +1,378 @@ +# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Plots three graphs illustrating cost of privacy per answered query. + +A script in support of the paper "Scalable Private Learning with PATE" by +Nicolas Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar, +Ulfar Erlingsson (https://arxiv.org/abs/1802.08908). + +The input is a file containing a numpy array of votes, one query per row, one +class per column. Ex: + 43, 1821, ..., 3 + 31, 16, ..., 0 + ... + 0, 86, ..., 438 +The output is written to a specified directory and consists of three pdf files. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import os +import pickle +import sys + +sys.path.append('..') # Main modules reside in the parent directory. + +from absl import app +from absl import flags +import matplotlib + +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top +import numpy as np +import core as pate + +plt.style.use('ggplot') + +FLAGS = flags.FLAGS +flags.DEFINE_boolean('cache', False, + 'Read results of privacy analysis from cache.') +flags.DEFINE_string('counts_file', None, 'Counts file.') +flags.DEFINE_string('figures_dir', '', 'Path where figures are written to.') + +flags.mark_flag_as_required('counts_file') + +def run_analysis(votes, mechanism, noise_scale, params): + """Computes data-dependent privacy. + + Args: + votes: A matrix of votes, where each row contains votes in one instance. + mechanism: A name of the mechanism ('lnmax', 'gnmax', or 'gnmax_conf') + noise_scale: A mechanism privacy parameter. + params: Other privacy parameters. + + Returns: + Four lists: cumulative privacy cost epsilon, how privacy budget is split, + how many queries were answered, optimal order. + """ + + def compute_partition(order_opt, eps): + order_opt_idx = np.searchsorted(orders, order_opt) + if mechanism == 'gnmax_conf': + p = (rdp_select_cum[order_opt_idx], + rdp_cum[order_opt_idx] - rdp_select_cum[order_opt_idx], + -math.log(delta) / (order_opt - 1)) + else: + p = (rdp_cum[order_opt_idx], -math.log(delta) / (order_opt - 1)) + return [x / eps for x in p] # Ensures that sum(x) == 1 + + # Short list of orders. + # orders = np.round(np.concatenate((np.arange(2, 50 + 1, 1), + # np.logspace(np.log10(50), np.log10(1000), num=20)))) + + # Long list of orders. + orders = np.concatenate((np.arange(2, 100 + 1, .5), + np.logspace(np.log10(100), np.log10(500), num=100))) + delta = 1e-8 + + n = votes.shape[0] + eps_total = np.zeros(n) + partition = [None] * n + order_opt = np.full(n, np.nan, dtype=float) + answered = np.zeros(n, dtype=float) + + rdp_cum = np.zeros(len(orders)) + rdp_sqrd_cum = np.zeros(len(orders)) + rdp_select_cum = np.zeros(len(orders)) + answered_sum = 0 + + for i in range(n): + v = votes[i,] + if mechanism == 'lnmax': + logq_lnmax = pate.compute_logq_laplace(v, noise_scale) + rdp_query = pate.rdp_pure_eps(logq_lnmax, 2. / noise_scale, orders) + rdp_sqrd = rdp_query ** 2 + pr_answered = 1 + elif mechanism == 'gnmax': + logq_gmax = pate.compute_logq_gaussian(v, noise_scale) + rdp_query = pate.rdp_gaussian(logq_gmax, noise_scale, orders) + rdp_sqrd = rdp_query ** 2 + pr_answered = 1 + elif mechanism == 'gnmax_conf': + logq_step1 = pate.compute_logpr_answered(params['t'], params['sigma1'], v) + logq_step2 = pate.compute_logq_gaussian(v, noise_scale) + q_step1 = np.exp(logq_step1) + logq_step1_min = min(logq_step1, math.log1p(-q_step1)) + rdp_gnmax_step1 = pate.rdp_gaussian(logq_step1_min, + 2 ** .5 * params['sigma1'], orders) + rdp_gnmax_step2 = pate.rdp_gaussian(logq_step2, noise_scale, orders) + rdp_query = rdp_gnmax_step1 + q_step1 * rdp_gnmax_step2 + # The expression below evaluates + # E[(cost_of_step_1 + Bernoulli(pr_of_step_2) * cost_of_step_2)^2] + rdp_sqrd = ( + rdp_gnmax_step1 ** 2 + 2 * rdp_gnmax_step1 * q_step1 * rdp_gnmax_step2 + + q_step1 * rdp_gnmax_step2 ** 2) + rdp_select_cum += rdp_gnmax_step1 + pr_answered = q_step1 + else: + raise ValueError( + 'Mechanism must be one of ["lnmax", "gnmax", "gnmax_conf"]') + + rdp_cum += rdp_query + rdp_sqrd_cum += rdp_sqrd + answered_sum += pr_answered + + answered[i] = answered_sum + eps_total[i], order_opt[i] = pate.compute_eps_from_delta( + orders, rdp_cum, delta) + partition[i] = compute_partition(order_opt[i], eps_total[i]) + + if i > 0 and (i + 1) % 1000 == 0: + rdp_var = rdp_sqrd_cum / i - ( + rdp_cum / i) ** 2 # Ignore Bessel's correction. + order_opt_idx = np.searchsorted(orders, order_opt[i]) + eps_std = ((i + 1) * rdp_var[order_opt_idx]) ** .5 # Std of the sum. + print( + 'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} (std = {:.5f}) ' + 'at order = {:.2f} (contribution from delta = {:.3f})'.format( + i + 1, answered_sum, eps_total[i], eps_std, order_opt[i], + -math.log(delta) / (order_opt[i] - 1))) + sys.stdout.flush() + + return eps_total, partition, answered, order_opt + + +def print_plot_small(figures_dir, eps_lap, eps_gnmax, answered_gnmax): + """Plots a graph of LNMax vs GNMax. + + Args: + figures_dir: A name of the directory where to save the plot. + eps_lap: The cumulative privacy costs of the Laplace mechanism. + eps_gnmax: The cumulative privacy costs of the Gaussian mechanism + answered_gnmax: The cumulative count of queries answered. + """ + xlim = 6000 + x_axis = range(0, int(xlim), 10) + y_lap = np.zeros(len(x_axis), dtype=float) + y_gnmax = np.full(len(x_axis), np.nan, dtype=float) + + for i in range(len(x_axis)): + x = x_axis[i] + y_lap[i] = eps_lap[x] + idx = np.searchsorted(answered_gnmax, x) + if idx < len(eps_gnmax): + y_gnmax[i] = eps_gnmax[idx] + + fig, ax = plt.subplots() + fig.set_figheight(4.5) + fig.set_figwidth(4.7) + ax.plot( + x_axis, y_lap, color='r', ls='--', label='LNMax', alpha=.5, linewidth=5) + ax.plot( + x_axis, + y_gnmax, + color='g', + ls='-', + label='Confident-GNMax', + alpha=.5, + linewidth=5) + plt.xticks(np.arange(0, 7000, 1000)) + plt.xlim([0, 6000]) + plt.ylim([0, 6.]) + plt.xlabel('Number of queries answered', fontsize=16) + plt.ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16) + plt.legend(loc=2, fontsize=13) # loc=2 -- upper left + ax.tick_params(labelsize=14) + fout_name = os.path.join(figures_dir, 'lnmax_vs_gnmax.pdf') + print('Saving the graph to ' + fout_name) + fig.savefig(fout_name, bbox_inches='tight') + plt.show() + + +def print_plot_large(figures_dir, eps_lap, eps_gnmax1, answered_gnmax1, + eps_gnmax2, partition_gnmax2, answered_gnmax2): + """Plots a graph of LNMax vs GNMax with two parameters. + + Args: + figures_dir: A name of the directory where to save the plot. + eps_lap: The cumulative privacy costs of the Laplace mechanism. + eps_gnmax1: The cumulative privacy costs of the Gaussian mechanism (set 1). + answered_gnmax1: The cumulative count of queries answered (set 1). + eps_gnmax2: The cumulative privacy costs of the Gaussian mechanism (set 2). + partition_gnmax2: Allocation of eps for set 2. + answered_gnmax2: The cumulative count of queries answered (set 2). + """ + xlim = 6000 + x_axis = range(0, int(xlim), 10) + lenx = len(x_axis) + y_lap = np.zeros(lenx) + y_gnmax1 = np.full(lenx, np.nan, dtype=float) + y_gnmax2 = np.full(lenx, np.nan, dtype=float) + y1_gnmax2 = np.full(lenx, np.nan, dtype=float) + + for i in range(lenx): + x = x_axis[i] + y_lap[i] = eps_lap[x] + idx1 = np.searchsorted(answered_gnmax1, x) + if idx1 < len(eps_gnmax1): + y_gnmax1[i] = eps_gnmax1[idx1] + idx2 = np.searchsorted(answered_gnmax2, x) + if idx2 < len(eps_gnmax2): + y_gnmax2[i] = eps_gnmax2[idx2] + fraction_step1, fraction_step2, _ = partition_gnmax2[idx2] + y1_gnmax2[i] = eps_gnmax2[idx2] * fraction_step1 / ( + fraction_step1 + fraction_step2) + + fig, ax = plt.subplots() + fig.set_figheight(4.5) + fig.set_figwidth(4.7) + ax.plot( + x_axis, + y_lap, + color='r', + ls='dashed', + label='LNMax', + alpha=.5, + linewidth=5) + ax.plot( + x_axis, + y_gnmax1, + color='g', + ls='-', + label='Confident-GNMax (moderate)', + alpha=.5, + linewidth=5) + ax.plot( + x_axis, + y_gnmax2, + color='b', + ls='-', + label='Confident-GNMax (aggressive)', + alpha=.5, + linewidth=5) + ax.fill_between( + x_axis, [0] * lenx, + y1_gnmax2.tolist(), + facecolor='b', + alpha=.3, + hatch='\\') + ax.plot( + x_axis, + y1_gnmax2, + color='b', + ls='-', + label='_nolegend_', + alpha=.5, + linewidth=1) + ax.fill_between( + x_axis, y1_gnmax2.tolist(), y_gnmax2.tolist(), facecolor='b', alpha=.3) + plt.xticks(np.arange(0, 7000, 1000)) + plt.xlim([0, xlim]) + plt.ylim([0, 1.]) + plt.xlabel('Number of queries answered', fontsize=16) + plt.ylabel(r'Privacy cost $\varepsilon$ at $\delta=10^{-8}$', fontsize=16) + plt.legend(loc=2, fontsize=13) # loc=2 -- upper left + ax.tick_params(labelsize=14) + fout_name = os.path.join(figures_dir, 'lnmax_vs_2xgnmax_large.pdf') + print('Saving the graph to ' + fout_name) + fig.savefig(fout_name, bbox_inches='tight') + plt.show() + + +def run_all_analyses(votes, lambda_laplace, gnmax_parameters, sigma2): + """Sequentially runs all analyses. + + Args: + votes: A matrix of votes, where each row contains votes in one instance. + lambda_laplace: The scale of the Laplace noise (lambda). + gnmax_parameters: A list of parameters for GNMax. + sigma2: Shared parameter for the GNMax mechanisms. + + Returns: + Five lists whose length is the number of queries. + """ + print('=== Laplace Mechanism ===') + eps_lap, _, _, _ = run_analysis(votes, 'lnmax', lambda_laplace, None) + print() + + # Does not go anywhere, for now + # print('=== Gaussian Mechanism (simple) ===') + # eps, _, _, _ = run_analysis(votes[:n,], 'gnmax', sigma1, None) + + eps_gnmax = [[] for p in gnmax_parameters] + partition_gmax = [[] for p in gnmax_parameters] + answered = [[] for p in gnmax_parameters] + order_opt = [[] for p in gnmax_parameters] + for i, p in enumerate(gnmax_parameters): + print('=== Gaussian Mechanism (confident) {}: ==='.format(p)) + eps_gnmax[i], partition_gmax[i], answered[i], order_opt[i] = run_analysis( + votes, 'gnmax_conf', sigma2, p) + print() + + return eps_lap, eps_gnmax, partition_gmax, answered, order_opt + + +def main(argv): + del argv # Unused. + lambda_laplace = 50. # corresponds to eps = 1. / lambda_laplace + + # Paramaters of the GNMax + gnmax_parameters = ({ + 't': 1000, + 'sigma1': 500 + }, { + 't': 3500, + 'sigma1': 1500 + }, { + 't': 5000, + 'sigma1': 1500 + }) + sigma2 = 100 # GNMax parameters differ only in Step 1 (selection). + ftemp_name = '/tmp/precomputed.pkl' + + figures_dir = os.path.expanduser(FLAGS.figures_dir) + + if FLAGS.cache and os.path.isfile(ftemp_name): + print('Reading from cache ' + ftemp_name) + with open(ftemp_name, 'rb') as f: + (eps_lap, eps_gnmax, partition_gmax, answered_gnmax, + orders_opt_gnmax) = pickle.load(f) + else: + fin_name = os.path.expanduser(FLAGS.counts_file) + print('Reading raw votes from ' + fin_name) + sys.stdout.flush() + + votes = np.load(fin_name) + + (eps_lap, eps_gnmax, partition_gmax, + answered_gnmax, orders_opt_gnmax) = run_all_analyses( + votes, lambda_laplace, gnmax_parameters, sigma2) + + print('Writing to cache ' + ftemp_name) + with open(ftemp_name, 'wb') as f: + pickle.dump((eps_lap, eps_gnmax, partition_gmax, answered_gnmax, + orders_opt_gnmax), f) + + print_plot_small(figures_dir, eps_lap, eps_gnmax[0], answered_gnmax[0]) + print_plot_large(figures_dir, eps_lap, eps_gnmax[1], answered_gnmax[1], + eps_gnmax[2], partition_gmax[2], answered_gnmax[2]) + plt.close('all') + + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow_privacy/research/pate_2018/ICLR2018/smooth_sensitivity_table.py b/tensorflow_privacy/research/pate_2018/ICLR2018/smooth_sensitivity_table.py new file mode 100644 index 0000000..89d4c28 --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/ICLR2018/smooth_sensitivity_table.py @@ -0,0 +1,358 @@ +# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Performs privacy analysis of GNMax with smooth sensitivity. + +A script in support of the paper "Scalable Private Learning with PATE" by +Nicolas Papernot, Shuang Song, Ilya Mironov, Ananth Raghunathan, Kunal Talwar, +Ulfar Erlingsson (https://arxiv.org/abs/1802.08908). + +Several flavors of the GNMax algorithm can be analyzed. + - Plain GNMax (argmax w/ Gaussian noise) is assumed when arguments threshold + and sigma2 are missing. + - Confident GNMax (thresholding + argmax w/ Gaussian noise) is used when + threshold, sigma1, and sigma2 are given. + - Interactive GNMax (two- or multi-round) is triggered by specifying + baseline_file, which provides baseline values for votes selection in Step 1. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import os +import sys + +sys.path.append('..') # Main modules reside in the parent directory. + +from absl import app +from absl import flags +import numpy as np +import core as pate +import smooth_sensitivity as pate_ss + +FLAGS = flags.FLAGS + +flags.DEFINE_string('counts_file', None, 'Counts file.') +flags.DEFINE_string('baseline_file', None, 'File with baseline scores.') +flags.DEFINE_boolean('data_independent', False, + 'Force data-independent bounds.') +flags.DEFINE_float('threshold', None, 'Threshold for step 1 (selection).') +flags.DEFINE_float('sigma1', None, 'Sigma for step 1 (selection).') +flags.DEFINE_float('sigma2', None, 'Sigma for step 2 (argmax).') +flags.DEFINE_integer('queries', None, 'Number of queries made by the student.') +flags.DEFINE_float('delta', 1e-8, 'Target delta.') +flags.DEFINE_float( + 'order', None, + 'Fixes a Renyi DP order (if unspecified, finds an optimal order from a ' + 'hardcoded list).') +flags.DEFINE_integer( + 'teachers', None, + 'Number of teachers (if unspecified, derived from the counts file).') + +flags.mark_flag_as_required('counts_file') +flags.mark_flag_as_required('sigma2') + + +def _check_conditions(sigma, num_classes, orders): + """Symbolic-numeric verification of conditions C5 and C6. + + The conditions on the beta function are verified by constructing the beta + function symbolically, and then checking that its derivative (computed + symbolically) is non-negative within the interval of conjectured monotonicity. + The last check is performed numerically. + """ + + print('Checking conditions C5 and C6 for all orders.') + sys.stdout.flush() + conditions_hold = True + + for order in orders: + cond5, cond6 = pate_ss.check_conditions(sigma, num_classes, order) + conditions_hold &= cond5 and cond6 + if not cond5: + print('Condition C5 does not hold for order =', order) + elif not cond6: + print('Condition C6 does not hold for order =', order) + + if conditions_hold: + print('Conditions C5-C6 hold for all orders.') + sys.stdout.flush() + return conditions_hold + + +def _compute_rdp(votes, baseline, threshold, sigma1, sigma2, delta, orders, + data_ind): + """Computes the (data-dependent) RDP curve for Confident GNMax.""" + rdp_cum = np.zeros(len(orders)) + rdp_sqrd_cum = np.zeros(len(orders)) + answered = 0 + + for i, v in enumerate(votes): + if threshold is None: + logq_step1 = 0 # No thresholding, always proceed to step 2. + rdp_step1 = np.zeros(len(orders)) + else: + logq_step1 = pate.compute_logpr_answered(threshold, sigma1, + v - baseline[i,]) + if data_ind: + rdp_step1 = pate.compute_rdp_data_independent_threshold(sigma1, orders) + else: + rdp_step1 = pate.compute_rdp_threshold(logq_step1, sigma1, orders) + + if data_ind: + rdp_step2 = pate.rdp_data_independent_gaussian(sigma2, orders) + else: + logq_step2 = pate.compute_logq_gaussian(v, sigma2) + rdp_step2 = pate.rdp_gaussian(logq_step2, sigma2, orders) + + q_step1 = np.exp(logq_step1) + rdp = rdp_step1 + rdp_step2 * q_step1 + # The expression below evaluates + # E[(cost_of_step_1 + Bernoulli(pr_of_step_2) * cost_of_step_2)^2] + rdp_sqrd = ( + rdp_step1**2 + 2 * rdp_step1 * q_step1 * rdp_step2 + + q_step1 * rdp_step2**2) + rdp_sqrd_cum += rdp_sqrd + + rdp_cum += rdp + answered += q_step1 + if ((i + 1) % 1000 == 0) or (i == votes.shape[0] - 1): + rdp_var = rdp_sqrd_cum / i - ( + rdp_cum / i)**2 # Ignore Bessel's correction. + eps_total, order_opt = pate.compute_eps_from_delta(orders, rdp_cum, delta) + order_opt_idx = np.searchsorted(orders, order_opt) + eps_std = ((i + 1) * rdp_var[order_opt_idx])**.5 # Std of the sum. + print( + 'queries = {}, E[answered] = {:.2f}, E[eps] = {:.3f} (std = {:.5f}) ' + 'at order = {:.2f} (contribution from delta = {:.3f})'.format( + i + 1, answered, eps_total, eps_std, order_opt, + -math.log(delta) / (order_opt - 1))) + sys.stdout.flush() + + _, order_opt = pate.compute_eps_from_delta(orders, rdp_cum, delta) + + return order_opt + + +def _find_optimal_smooth_sensitivity_parameters( + votes, baseline, num_teachers, threshold, sigma1, sigma2, delta, ind_step1, + ind_step2, order): + """Optimizes smooth sensitivity parameters by minimizing a cost function. + + The cost function is + exact_eps + cost of GNSS + two stds of noise, + which captures that upper bound of the confidence interval of the sanitized + privacy budget. + + Since optimization is done with full view of sensitive data, the results + cannot be released. + """ + rdp_cum = 0 + answered_cum = 0 + ls_cum = 0 + + # Define a plausible range for the beta values. + betas = np.arange(.3 / order, .495 / order, .01 / order) + cost_delta = math.log(1 / delta) / (order - 1) + + for i, v in enumerate(votes): + if threshold is None: + log_pr_answered = 0 + rdp1 = 0 + ls_step1 = np.zeros(num_teachers) + else: + log_pr_answered = pate.compute_logpr_answered(threshold, sigma1, + v - baseline[i,]) + if ind_step1: # apply data-independent bound for step 1 (thresholding). + rdp1 = pate.compute_rdp_data_independent_threshold(sigma1, order) + ls_step1 = np.zeros(num_teachers) + else: + rdp1 = pate.compute_rdp_threshold(log_pr_answered, sigma1, order) + ls_step1 = pate_ss.compute_local_sensitivity_bounds_threshold( + v - baseline[i,], num_teachers, threshold, sigma1, order) + + pr_answered = math.exp(log_pr_answered) + answered_cum += pr_answered + + if ind_step2: # apply data-independent bound for step 2 (GNMax). + rdp2 = pate.rdp_data_independent_gaussian(sigma2, order) + ls_step2 = np.zeros(num_teachers) + else: + logq_step2 = pate.compute_logq_gaussian(v, sigma2) + rdp2 = pate.rdp_gaussian(logq_step2, sigma2, order) + # Compute smooth sensitivity. + ls_step2 = pate_ss.compute_local_sensitivity_bounds_gnmax( + v, num_teachers, sigma2, order) + + rdp_cum += rdp1 + pr_answered * rdp2 + ls_cum += ls_step1 + pr_answered * ls_step2 # Expected local sensitivity. + + if ind_step1 and ind_step2: + # Data-independent bounds. + cost_opt, beta_opt, ss_opt, sigma_ss_opt = None, 0., 0., np.inf + else: + # Data-dependent bounds. + cost_opt, beta_opt, ss_opt, sigma_ss_opt = np.inf, None, None, None + + for beta in betas: + ss = pate_ss.compute_discounted_max(beta, ls_cum) + + # Solution to the minimization problem: + # min_sigma {order * exp(2 * beta)/ sigma^2 + 2 * ss * sigma} + sigma_ss = ((order * math.exp(2 * beta)) / ss)**(1 / 3) + cost_ss = pate_ss.compute_rdp_of_smooth_sensitivity_gaussian( + beta, sigma_ss, order) + + # Cost captures exact_eps + cost of releasing SS + two stds of noise. + cost = rdp_cum + cost_ss + 2 * ss * sigma_ss + if cost < cost_opt: + cost_opt, beta_opt, ss_opt, sigma_ss_opt = cost, beta, ss, sigma_ss + + if ((i + 1) % 100 == 0) or (i == votes.shape[0] - 1): + eps_before_ss = rdp_cum + cost_delta + eps_with_ss = ( + eps_before_ss + pate_ss.compute_rdp_of_smooth_sensitivity_gaussian( + beta_opt, sigma_ss_opt, order)) + print('{}: E[answered queries] = {:.1f}, RDP at {} goes from {:.3f} to ' + '{:.3f} +/- {:.3f} (ss = {:.4}, beta = {:.4f}, sigma_ss = {:.3f})'. + format(i + 1, answered_cum, order, eps_before_ss, eps_with_ss, + ss_opt * sigma_ss_opt, ss_opt, beta_opt, sigma_ss_opt)) + sys.stdout.flush() + + # Return optimal parameters for the last iteration. + return beta_opt, ss_opt, sigma_ss_opt + + +#################### +# HELPER FUNCTIONS # +#################### + + +def _load_votes(counts_file, baseline_file, queries): + counts_file_expanded = os.path.expanduser(counts_file) + print('Reading raw votes from ' + counts_file_expanded) + sys.stdout.flush() + + votes = np.load(counts_file_expanded) + print('Shape of the votes matrix = {}'.format(votes.shape)) + + if baseline_file is not None: + baseline_file_expanded = os.path.expanduser(baseline_file) + print('Reading baseline values from ' + baseline_file_expanded) + sys.stdout.flush() + baseline = np.load(baseline_file_expanded) + if votes.shape != baseline.shape: + raise ValueError( + 'Counts file and baseline file must have the same shape. Got {} and ' + '{} instead.'.format(votes.shape, baseline.shape)) + else: + baseline = np.zeros_like(votes) + + if queries is not None: + if votes.shape[0] < queries: + raise ValueError('Expect {} rows, got {} in {}'.format( + queries, votes.shape[0], counts_file)) + # Truncate the votes matrix to the number of queries made. + votes = votes[:queries,] + baseline = baseline[:queries,] + else: + print('Process all {} input rows. (Use --queries flag to truncate.)'.format( + votes.shape[0])) + + return votes, baseline + + +def _count_teachers(votes): + s = np.sum(votes, axis=1) + num_teachers = int(max(s)) + if min(s) != num_teachers: + raise ValueError( + 'Matrix of votes is malformed: the number of votes is not the same ' + 'across rows.') + return num_teachers + + +def _is_data_ind_step1(num_teachers, threshold, sigma1, orders): + if threshold is None: + return True + return np.all( + pate.is_data_independent_always_opt_threshold(num_teachers, threshold, + sigma1, orders)) + + +def _is_data_ind_step2(num_teachers, num_classes, sigma, orders): + return np.all( + pate.is_data_independent_always_opt_gaussian(num_teachers, num_classes, + sigma, orders)) + + +def main(argv): + del argv # Unused. + + if (FLAGS.threshold is None) != (FLAGS.sigma1 is None): + raise ValueError( + '--threshold flag and --sigma1 flag must be present or absent ' + 'simultaneously.') + + if FLAGS.order is None: + # Long list of orders. + orders = np.concatenate((np.arange(2, 100 + 1, .5), + np.logspace(np.log10(100), np.log10(500), + num=100))) + # Short list of orders. + # orders = np.round( + # np.concatenate((np.arange(2, 50 + 1, 1), + # np.logspace(np.log10(50), np.log10(1000), num=20)))) + else: + orders = np.array([FLAGS.order]) + + votes, baseline = _load_votes(FLAGS.counts_file, FLAGS.baseline_file, + FLAGS.queries) + + if FLAGS.teachers is None: + num_teachers = _count_teachers(votes) + else: + num_teachers = FLAGS.teachers + + num_classes = votes.shape[1] + + order = _compute_rdp(votes, baseline, FLAGS.threshold, FLAGS.sigma1, + FLAGS.sigma2, FLAGS.delta, orders, + FLAGS.data_independent) + + ind_step1 = _is_data_ind_step1(num_teachers, FLAGS.threshold, FLAGS.sigma1, + order) + + ind_step2 = _is_data_ind_step2(num_teachers, num_classes, FLAGS.sigma2, order) + + if FLAGS.data_independent or (ind_step1 and ind_step2): + print('Nothing to do here, all analyses are data-independent.') + return + + if not _check_conditions(FLAGS.sigma2, num_classes, [order]): + return # Quit early: sufficient conditions for correctness fail to hold. + + beta_opt, ss_opt, sigma_ss_opt = _find_optimal_smooth_sensitivity_parameters( + votes, baseline, num_teachers, FLAGS.threshold, FLAGS.sigma1, + FLAGS.sigma2, FLAGS.delta, ind_step1, ind_step2, order) + + print('Optimal beta = {:.4f}, E[SS_beta] = {:.4}, sigma_ss = {:.2f}'.format( + beta_opt, ss_opt, sigma_ss_opt)) + + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow_privacy/research/pate_2018/ICLR2018/utility_queries_answered.py b/tensorflow_privacy/research/pate_2018/ICLR2018/utility_queries_answered.py new file mode 100644 index 0000000..d8663ad --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/ICLR2018/utility_queries_answered.py @@ -0,0 +1,90 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import app +from absl import flags +import matplotlib +import os + +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt + +plt.style.use('ggplot') + +FLAGS = flags.FLAGS +flags.DEFINE_string('plot_file', '', 'Output file name.') + +qa_lnmax = [500, 750] + range(1000, 12500, 500) + +acc_lnmax = [43.3, 52.3, 59.8, 66.7, 68.8, 70.5, 71.6, 72.3, 72.6, 72.9, 73.4, + 73.4, 73.7, 73.9, 74.2, 74.4, 74.5, 74.7, 74.8, 75, 75.1, 75.1, + 75.4, 75.4, 75.4] + +qa_gnmax = [456, 683, 908, 1353, 1818, 2260, 2702, 3153, 3602, 4055, 4511, 4964, + 5422, 5875, 6332, 6792, 7244, 7696, 8146, 8599, 9041, 9496, 9945, + 10390, 10842] + +acc_gnmax = [39.6, 52.2, 59.6, 66.6, 69.6, 70.5, 71.8, 72, 72.7, 72.9, 73.3, + 73.4, 73.4, 73.8, 74, 74.2, 74.4, 74.5, 74.5, 74.7, 74.8, 75, 75.1, + 75.1, 75.4] + +qa_gnmax_aggressive = [167, 258, 322, 485, 647, 800, 967, 1133, 1282, 1430, + 1573, 1728, 1889, 2028, 2190, 2348, 2510, 2668, 2950, + 3098, 3265, 3413, 3581, 3730] + +acc_gnmax_aggressive = [17.8, 26.8, 39.3, 48, 55.7, 61, 62.8, 64.8, 65.4, 66.7, + 66.2, 68.3, 68.3, 68.7, 69.1, 70, 70.2, 70.5, 70.9, + 70.7, 71.3, 71.3, 71.3, 71.8] + + +def main(argv): + del argv # Unused. + + plt.close('all') + fig, ax = plt.subplots() + fig.set_figheight(4.7) + fig.set_figwidth(5) + ax.plot(qa_lnmax, acc_lnmax, color='r', ls='--', linewidth=5., marker='o', + alpha=.5, label='LNMax') + ax.plot(qa_gnmax, acc_gnmax, color='g', ls='-', linewidth=5., marker='o', + alpha=.5, label='Confident-GNMax') + # ax.plot(qa_gnmax_aggressive, acc_gnmax_aggressive, color='b', ls='-', marker='o', alpha=.5, label='Confident-GNMax (aggressive)') + plt.xticks([0, 2000, 4000, 6000]) + plt.xlim([0, 6000]) + # ax.set_yscale('log') + plt.ylim([65, 76]) + ax.tick_params(labelsize=14) + plt.xlabel('Number of queries answered', fontsize=16) + plt.ylabel('Student test accuracy (%)', fontsize=16) + plt.legend(loc=2, prop={'size': 16}) + + x = [400, 2116, 4600, 4680] + y = [69.5, 68.5, 74, 72.5] + annotations = [0.76, 2.89, 1.42, 5.76] + color_annotations = ['g', 'r', 'g', 'r'] + for i, txt in enumerate(annotations): + ax.annotate(r'${\varepsilon=}$' + str(txt), (x[i], y[i]), fontsize=16, + color=color_annotations[i]) + + plot_filename = os.path.expanduser(FLAGS.plot_file) + plt.savefig(plot_filename, bbox_inches='tight') + plt.show() + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow_privacy/research/pate_2018/README.md b/tensorflow_privacy/research/pate_2018/README.md new file mode 100644 index 0000000..decd633 --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/README.md @@ -0,0 +1,71 @@ +Implementation of an RDP privacy accountant and smooth sensitivity analysis for +the PATE framework. The underlying theory and supporting experiments appear in +"Scalable Private Learning with PATE" by Nicolas Papernot, Shuang Song, Ilya +Mironov, Ananth Raghunathan, Kunal Talwar, Ulfar Erlingsson (ICLR 2018, +https://arxiv.org/abs/1802.08908). + +## Overview + +The PATE ('Private Aggregation of Teacher Ensembles') framework was introduced +by Papernot et al. in "Semi-supervised Knowledge Transfer for Deep Learning from +Private Training Data" (ICLR 2017, https://arxiv.org/abs/1610.05755). The +framework enables model-agnostic training that provably provides [differential +privacy](https://en.wikipedia.org/wiki/Differential_privacy) of the training +dataset. + +The framework consists of _teachers_, the _student_ model, and the _aggregator_. The +teachers are models trained on disjoint subsets of the training datasets. The student +model has access to an insensitive (e.g., public) unlabelled dataset, which is labelled by +interacting with the ensemble of teachers via the _aggregator_. The aggregator tallies +outputs of the teacher models, and either forwards a (noisy) aggregate to the student, or +refuses to answer. + +Differential privacy is enforced by the aggregator. The privacy guarantees can be _data-independent_, +which means that they are solely the function of the aggregator's parameters. Alternatively, privacy +analysis can be _data-dependent_, which allows for finer reasoning where, under certain conditions on +the input distribution, the final privacy guarantees can be improved relative to the data-independent +analysis. Data-dependent privacy guarantees may, by themselves, be a function of sensitive data and +therefore publishing these guarantees requires its own sanitization procedure. In our case +sanitization of data-dependent privacy guarantees proceeds via _smooth sensitivity_ analysis. + +The common machinery used for all privacy analyses in this repository is the +Rényi differential privacy, or RDP (see https://arxiv.org/abs/1702.07476). + +This repository contains implementations of privacy accountants and smooth +sensitivity analysis for several data-independent and data-dependent mechanism that together +comprise the PATE framework. + + +### Requirements + +* Python, version ≥ 2.7 +* absl (see [here](https://github.com/abseil/abseil-py), or just type `pip install absl-py`) +* numpy +* scipy +* sympy (for smooth sensitivity analysis) +* unittest (for testing) + + +### Self-testing + +To verify the installation run +```bash +$ python core_test.py +$ python smooth_sensitivity_test.py +``` + + +## Files in this directory + +* core.py — RDP privacy accountant for several vote aggregators (GNMax, + Threshold, Laplace). + +* smooth_sensitivity.py — Smooth sensitivity analysis for GNMax and + Threshold mechanisms. + +* core_test.py and smooth_sensitivity_test.py — Unit tests for the + files above. + +## Contact information + +You may direct your comments to mironov@google.com and PR to @ilyamironov. diff --git a/tensorflow_privacy/research/pate_2018/core.py b/tensorflow_privacy/research/pate_2018/core.py new file mode 100644 index 0000000..84c79dc --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/core.py @@ -0,0 +1,370 @@ +# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Core functions for RDP analysis in PATE framework. + +This library comprises the core functions for doing differentially private +analysis of the PATE architecture and its various Noisy Max and other +mechanisms. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +from absl import app +import numpy as np +import scipy.stats + + +def _logaddexp(x): + """Addition in the log space. Analogue of numpy.logaddexp for a list.""" + m = max(x) + return m + math.log(sum(np.exp(x - m))) + + +def _log1mexp(x): + """Numerically stable computation of log(1-exp(x)).""" + if x < -1: + return math.log1p(-math.exp(x)) + elif x < 0: + return math.log(-math.expm1(x)) + elif x == 0: + return -np.inf + else: + raise ValueError("Argument must be non-positive.") + + +def compute_eps_from_delta(orders, rdp, delta): + """Translates between RDP and (eps, delta)-DP. + + Args: + orders: A list (or a scalar) of orders. + rdp: A list of RDP guarantees (of the same length as orders). + delta: Target delta. + + Returns: + Pair of (eps, optimal_order). + + Raises: + ValueError: If input is malformed. + """ + if len(orders) != len(rdp): + raise ValueError("Input lists must have the same length.") + eps = np.array(rdp) - math.log(delta) / (np.array(orders) - 1) + idx_opt = np.argmin(eps) + return eps[idx_opt], orders[idx_opt] + + +##################### +# RDP FOR THE GNMAX # +##################### + + +def compute_logq_gaussian(counts, sigma): + """Returns an upper bound on ln Pr[outcome != argmax] for GNMax. + + Implementation of Proposition 7. + + Args: + counts: A numpy array of scores. + sigma: The standard deviation of the Gaussian noise in the GNMax mechanism. + + Returns: + logq: Natural log of the probability that outcome is different from argmax. + """ + n = len(counts) + variance = sigma**2 + idx_max = np.argmax(counts) + counts_normalized = counts[idx_max] - counts + counts_rest = counts_normalized[np.arange(n) != idx_max] # exclude one index + # Upper bound q via a union bound rather than a more precise calculation. + logq = _logaddexp( + scipy.stats.norm.logsf(counts_rest, scale=math.sqrt(2 * variance))) + + # A sketch of a more accurate estimate, which is currently disabled for two + # reasons: + # 1. Numerical instability; + # 2. Not covered by smooth sensitivity analysis. + # covariance = variance * (np.ones((n - 1, n - 1)) + np.identity(n - 1)) + # logq = np.log1p(-statsmodels.sandbox.distributions.extras.mvnormcdf( + # counts_rest, np.zeros(n - 1), covariance, maxpts=1e4)) + + return min(logq, math.log(1 - (1 / n))) + + +def rdp_data_independent_gaussian(sigma, orders): + """Computes a data-independent RDP curve for GNMax. + + Implementation of Proposition 8. + + Args: + sigma: Standard deviation of Gaussian noise. + orders: An array_like list of Renyi orders. + + Returns: + Upper bound on RPD for all orders. A scalar if orders is a scalar. + + Raises: + ValueError: If the input is malformed. + """ + if sigma < 0 or np.any(orders <= 1): # not defined for alpha=1 + raise ValueError("Inputs are malformed.") + + variance = sigma**2 + if np.isscalar(orders): + return orders / variance + else: + return np.atleast_1d(orders) / variance + + +def rdp_gaussian(logq, sigma, orders): + """Bounds RDP from above of GNMax given an upper bound on q (Theorem 6). + + Args: + logq: Natural logarithm of the probability of a non-argmax outcome. + sigma: Standard deviation of Gaussian noise. + orders: An array_like list of Renyi orders. + + Returns: + Upper bound on RPD for all orders. A scalar if orders is a scalar. + + Raises: + ValueError: If the input is malformed. + """ + if logq > 0 or sigma < 0 or np.any(orders <= 1): # not defined for alpha=1 + raise ValueError("Inputs are malformed.") + + if np.isneginf(logq): # If the mechanism's output is fixed, it has 0-DP. + if np.isscalar(orders): + return 0. + else: + return np.full_like(orders, 0., dtype=np.float) + + variance = sigma**2 + + # Use two different higher orders: mu_hi1 and mu_hi2 computed according to + # Proposition 10. + mu_hi2 = math.sqrt(variance * -logq) + mu_hi1 = mu_hi2 + 1 + + orders_vec = np.atleast_1d(orders) + + ret = orders_vec / variance # baseline: data-independent bound + + # Filter out entries where data-dependent bound does not apply. + mask = np.logical_and(mu_hi1 > orders_vec, mu_hi2 > 1) + + rdp_hi1 = mu_hi1 / variance + rdp_hi2 = mu_hi2 / variance + + log_a2 = (mu_hi2 - 1) * rdp_hi2 + + # Make sure q is in the increasing wrt q range and A is positive. + if (np.any(mask) and logq <= log_a2 - mu_hi2 * + (math.log(1 + 1 / (mu_hi1 - 1)) + math.log(1 + 1 / (mu_hi2 - 1))) and + -logq > rdp_hi2): + # Use log1p(x) = log(1 + x) to avoid catastrophic cancellations when x ~ 0. + log1q = _log1mexp(logq) # log1q = log(1-q) + log_a = (orders - 1) * ( + log1q - _log1mexp((logq + rdp_hi2) * (1 - 1 / mu_hi2))) + log_b = (orders - 1) * (rdp_hi1 - logq / (mu_hi1 - 1)) + + # Use logaddexp(x, y) = log(e^x + e^y) to avoid overflow for large x, y. + log_s = np.logaddexp(log1q + log_a, logq + log_b) + ret[mask] = np.minimum(ret, log_s / (orders - 1))[mask] + + assert np.all(ret >= 0) + + if np.isscalar(orders): + return np.asscalar(ret) + else: + return ret + + +def is_data_independent_always_opt_gaussian(num_teachers, num_classes, sigma, + orders): + """Tests whether data-ind bound is always optimal for GNMax. + + Args: + num_teachers: Number of teachers. + num_classes: Number of classes. + sigma: Standard deviation of the Gaussian noise. + orders: An array_like list of Renyi orders. + + Returns: + Boolean array of length |orders| (a scalar if orders is a scalar). True if + the data-independent bound is always the same as the data-dependent bound. + + """ + unanimous = np.array([num_teachers] + [0] * (num_classes - 1)) + logq = compute_logq_gaussian(unanimous, sigma) + + rdp_dep = rdp_gaussian(logq, sigma, orders) + rdp_ind = rdp_data_independent_gaussian(sigma, orders) + return np.isclose(rdp_dep, rdp_ind) + + +################################### +# RDP FOR THE THRESHOLD MECHANISM # +################################### + + +def compute_logpr_answered(t, sigma, counts): + """Computes log of the probability that a noisy threshold is crossed. + + Args: + t: The threshold. + sigma: The stdev of the Gaussian noise added to the threshold. + counts: An array of votes. + + Returns: + Natural log of the probability that max is larger than a noisy threshold. + """ + # Compared to the paper, max(counts) is rounded to the nearest integer. This + # is done to facilitate computation of smooth sensitivity for the case of + # the interactive mechanism, where votes are not necessarily integer. + return scipy.stats.norm.logsf(t - round(max(counts)), scale=sigma) + + +def compute_rdp_data_independent_threshold(sigma, orders): + # The input to the threshold mechanism has stability 1, compared to + # GNMax, which has stability = 2. Hence the sqrt(2) factor below. + return rdp_data_independent_gaussian(2**.5 * sigma, orders) + + +def compute_rdp_threshold(log_pr_answered, sigma, orders): + logq = min(log_pr_answered, _log1mexp(log_pr_answered)) + # The input to the threshold mechanism has stability 1, compared to + # GNMax, which has stability = 2. Hence the sqrt(2) factor below. + return rdp_gaussian(logq, 2**.5 * sigma, orders) + + +def is_data_independent_always_opt_threshold(num_teachers, threshold, sigma, + orders): + """Tests whether data-ind bound is always optimal for the threshold mechanism. + + Args: + num_teachers: Number of teachers. + threshold: The cut-off threshold. + sigma: Standard deviation of the Gaussian noise. + orders: An array_like list of Renyi orders. + + Returns: + Boolean array of length |orders| (a scalar if orders is a scalar). True if + the data-independent bound is always the same as the data-dependent bound. + """ + + # Since the data-dependent bound depends only on max(votes), it suffices to + # check whether the data-dependent bounds are better than data-independent + # bounds in the extreme cases when max(votes) is minimal or maximal. + # For both Confident GNMax and Interactive GNMax it holds that + # 0 <= max(votes) <= num_teachers. + # The upper bound is trivial in both cases. + # The lower bound is trivial for Confident GNMax (and a stronger one, based on + # the pigeonhole principle, is possible). + # For Interactive GNMax (Algorithm 2), the lower bound follows from the + # following argument. Since the votes vector is the difference between the + # actual teachers' votes and the student's baseline, we need to argue that + # max(n_j - M * p_j) >= 0. + # The bound holds because sum_j n_j = sum M * p_j = M. Thus, + # sum_j (n_j - M * p_j) = 0, and max_j (n_j - M * p_j) >= 0 as needed. + logq1 = compute_logpr_answered(threshold, sigma, [0]) + logq2 = compute_logpr_answered(threshold, sigma, [num_teachers]) + + rdp_dep1 = compute_rdp_threshold(logq1, sigma, orders) + rdp_dep2 = compute_rdp_threshold(logq2, sigma, orders) + + rdp_ind = compute_rdp_data_independent_threshold(sigma, orders) + return np.isclose(rdp_dep1, rdp_ind) and np.isclose(rdp_dep2, rdp_ind) + + +############################# +# RDP FOR THE LAPLACE NOISE # +############################# + + +def compute_logq_laplace(counts, lmbd): + """Computes an upper bound on log Pr[outcome != argmax] for LNMax. + + Args: + counts: A list of scores. + lmbd: The lambda parameter of the Laplace distribution ~exp(-|x| / lambda). + + Returns: + logq: Natural log of the probability that outcome is different from argmax. + """ + # For noisy max, we only get an upper bound via the union bound. See Lemma 4 + # in https://arxiv.org/abs/1610.05755. + # + # Pr[ j beats i*] = (2+gap(j,i*))/ 4 exp(gap(j,i*) + # proof at http://mathoverflow.net/questions/66763/ + + idx_max = np.argmax(counts) + counts_normalized = (counts - counts[idx_max]) / lmbd + counts_rest = np.array( + [counts_normalized[i] for i in range(len(counts)) if i != idx_max]) + + logq = _logaddexp(np.log(2 - counts_rest) + math.log(.25) + counts_rest) + + return min(logq, math.log(1 - (1 / len(counts)))) + + +def rdp_pure_eps(logq, pure_eps, orders): + """Computes the RDP value given logq and pure privacy eps. + + Implementation of https://arxiv.org/abs/1610.05755, Theorem 3. + + The bound used is the min of three terms. The first term is from + https://arxiv.org/pdf/1605.02065.pdf. + The second term is based on the fact that when event has probability (1-q) for + q close to zero, q can only change by exp(eps), which corresponds to a + much smaller multiplicative change in (1-q) + The third term comes directly from the privacy guarantee. + + Args: + logq: Natural logarithm of the probability of a non-optimal outcome. + pure_eps: eps parameter for DP + orders: array_like list of moments to compute. + + Returns: + Array of upper bounds on rdp (a scalar if orders is a scalar). + """ + orders_vec = np.atleast_1d(orders) + q = math.exp(logq) + log_t = np.full_like(orders_vec, np.inf) + if q <= 1 / (math.exp(pure_eps) + 1): + logt_one = math.log1p(-q) + ( + math.log1p(-q) - _log1mexp(pure_eps + logq)) * ( + orders_vec - 1) + logt_two = logq + pure_eps * (orders_vec - 1) + log_t = np.logaddexp(logt_one, logt_two) + + ret = np.minimum( + np.minimum(0.5 * pure_eps * pure_eps * orders_vec, + log_t / (orders_vec - 1)), pure_eps) + if np.isscalar(orders): + return np.asscalar(ret) + else: + return ret + + +def main(argv): + del argv # Unused. + + +if __name__ == "__main__": + app.run(main) diff --git a/tensorflow_privacy/research/pate_2018/core_test.py b/tensorflow_privacy/research/pate_2018/core_test.py new file mode 100644 index 0000000..933f5c2 --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/core_test.py @@ -0,0 +1,124 @@ +# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for pate.core.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys +import unittest +import numpy as np + +import core as pate + + +class PateTest(unittest.TestCase): + + def _test_rdp_gaussian_value_errors(self): + # Test for ValueErrors. + with self.assertRaises(ValueError): + pate.rdp_gaussian(1.0, 1.0, np.array([2, 3, 4])) + with self.assertRaises(ValueError): + pate.rdp_gaussian(np.log(0.5), -1.0, np.array([2, 3, 4])) + with self.assertRaises(ValueError): + pate.rdp_gaussian(np.log(0.5), 1.0, np.array([1, 3, 4])) + + def _test_rdp_gaussian_as_function_of_q(self): + # Test for data-independent and data-dependent ranges over q. + # The following corresponds to orders 1.1, 2.5, 32, 250 + # sigmas 1.5, 15, 1500, 15000. + # Hand calculated -log(q0)s arranged in a 'sigma major' ordering. + neglogq0s = [ + 2.8, 2.6, 427, None, 4.8, 4.0, 4.7, 275, 9.6, 8.8, 6.0, 4, 12, 11.2, + 8.6, 6.4 + ] + idx_neglogq0s = 0 # To iterate through neglogq0s. + orders = [1.1, 2.5, 32, 250] + sigmas = [1.5, 15, 1500, 15000] + for sigma in sigmas: + for order in orders: + curr_neglogq0 = neglogq0s[idx_neglogq0s] + idx_neglogq0s += 1 + if curr_neglogq0 is None: # sigma == 1.5 and order == 250: + continue + + rdp_at_q0 = pate.rdp_gaussian(-curr_neglogq0, sigma, order) + + # Data-dependent range. (Successively halve the value of q.) + logq_dds = (-curr_neglogq0 - np.array( + [0, np.log(2), np.log(4), np.log(8)])) + # Check that in q_dds, rdp is decreasing. + for idx in range(len(logq_dds) - 1): + self.assertGreater( + pate.rdp_gaussian(logq_dds[idx], sigma, order), + pate.rdp_gaussian(logq_dds[idx + 1], sigma, order)) + + # Data-independent range. + q_dids = np.exp(-curr_neglogq0) + np.array([0.1, 0.2, 0.3, 0.4]) + # Check that in q_dids, rdp is constant. + for q in q_dids: + self.assertEqual(rdp_at_q0, pate.rdp_gaussian( + np.log(q), sigma, order)) + + def _test_compute_eps_from_delta_value_error(self): + # Test for ValueError. + with self.assertRaises(ValueError): + pate.compute_eps_from_delta([1.1, 2, 3, 4], [1, 2, 3], 0.001) + + def _test_compute_eps_from_delta_monotonicity(self): + # Test for monotonicity with respect to delta. + orders = [1.1, 2.5, 250.0] + sigmas = [1e-3, 1.0, 1e5] + deltas = [1e-60, 1e-6, 0.1, 0.999] + for sigma in sigmas: + list_of_eps = [] + rdps_for_gaussian = np.array(orders) / (2 * sigma**2) + for delta in deltas: + list_of_eps.append( + pate.compute_eps_from_delta(orders, rdps_for_gaussian, delta)[0]) + + # Check that in list_of_eps, epsilons are decreasing (as delta increases). + sorted_list_of_eps = list(list_of_eps) + sorted_list_of_eps.sort(reverse=True) + self.assertEqual(list_of_eps, sorted_list_of_eps) + + def _test_compute_q0(self): + # Stub code to search a logq space and figure out logq0 by eyeballing + # results. This code does not run with the tests. Remove underscore to run. + sigma = 15 + order = 250 + logqs = np.arange(-290, -270, 1) + count = 0 + for logq in logqs: + count += 1 + sys.stdout.write("\t%0.5g: %0.10g" % + (logq, pate.rdp_gaussian(logq, sigma, order))) + sys.stdout.flush() + if count % 5 == 0: + print("") + + def test_rdp_gaussian(self): + self._test_rdp_gaussian_value_errors() + self._test_rdp_gaussian_as_function_of_q() + + def test_compute_eps_from_delta(self): + self._test_compute_eps_from_delta_value_error() + self._test_compute_eps_from_delta_monotonicity() + + +if __name__ == "__main__": + unittest.main() diff --git a/tensorflow_privacy/research/pate_2018/smooth_sensitivity.py b/tensorflow_privacy/research/pate_2018/smooth_sensitivity.py new file mode 100644 index 0000000..3525bab --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/smooth_sensitivity.py @@ -0,0 +1,419 @@ +# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Functions for smooth sensitivity analysis for PATE mechanisms. + +This library implements functionality for doing smooth sensitivity analysis +for Gaussian Noise Max (GNMax), Threshold with Gaussian noise, and Gaussian +Noise with Smooth Sensitivity (GNSS) mechanisms. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +from absl import app +import numpy as np +import scipy +import sympy as sp + +import core as pate + +################################ +# SMOOTH SENSITIVITY FOR GNMAX # +################################ + +# Global dictionary for storing cached q0 values keyed by (sigma, order). +_logq0_cache = {} + + +def _compute_logq0(sigma, order): + key = (sigma, order) + if key in _logq0_cache: + return _logq0_cache[key] + + logq0 = compute_logq0_gnmax(sigma, order) + + _logq0_cache[key] = logq0 # Update the global variable. + return logq0 + + +def _compute_logq1(sigma, order, num_classes): + logq0 = _compute_logq0(sigma, order) # Most likely already cached. + logq1 = math.log(_compute_bl_gnmax(math.exp(logq0), sigma, num_classes)) + assert logq1 <= logq0 + return logq1 + + +def _compute_mu1_mu2_gnmax(sigma, logq): + # Computes mu1, mu2 according to Proposition 10. + mu2 = sigma * math.sqrt(-logq) + mu1 = mu2 + 1 + return mu1, mu2 + + +def _compute_data_dep_bound_gnmax(sigma, logq, order): + # Applies Theorem 6 in Appendix without checking that logq satisfies necessary + # constraints. The pre-conditions must be assured by comparing logq against + # logq0 by the caller. + variance = sigma**2 + mu1, mu2 = _compute_mu1_mu2_gnmax(sigma, logq) + eps1 = mu1 / variance + eps2 = mu2 / variance + + log1q = np.log1p(-math.exp(logq)) # log1q = log(1-q) + log_a = (order - 1) * ( + log1q - (np.log1p(-math.exp((logq + eps2) * (1 - 1 / mu2))))) + log_b = (order - 1) * (eps1 - logq / (mu1 - 1)) + + return np.logaddexp(log1q + log_a, logq + log_b) / (order - 1) + + +def _compute_rdp_gnmax(sigma, logq, order): + logq0 = _compute_logq0(sigma, order) + if logq >= logq0: + return pate.rdp_data_independent_gaussian(sigma, order) + else: + return _compute_data_dep_bound_gnmax(sigma, logq, order) + + +def compute_logq0_gnmax(sigma, order): + """Computes the point where we start using data-independent bounds. + + Args: + sigma: std of the Gaussian noise + order: Renyi order lambda + + Returns: + logq0: the point above which the data-ind bound overtakes data-dependent + bound. + """ + + def _check_validity_conditions(logq): + # Function returns true iff logq is in the range where data-dependent bound + # is valid. (Theorem 6 in Appendix.) + mu1, mu2 = _compute_mu1_mu2_gnmax(sigma, logq) + if mu1 < order: + return False + eps2 = mu2 / sigma**2 + # Do computation in the log space. The condition below comes from Lemma 9 + # from Appendix. + return (logq <= (mu2 - 1) * eps2 - mu2 * math.log(mu1 / (mu1 - 1) * mu2 / + (mu2 - 1))) + + def _compare_dep_vs_ind(logq): + return (_compute_data_dep_bound_gnmax(sigma, logq, order) - + pate.rdp_data_independent_gaussian(sigma, order)) + + # Natural upper bounds on q0. + logub = min(-(1 + 1. / sigma)**2, -((order - .99) / sigma)**2, -1 / sigma**2) + assert _check_validity_conditions(logub) + + # If data-dependent bound is already better, we are done already. + if _compare_dep_vs_ind(logub) < 0: + return logub + + # Identifying a reasonable lower bound to bracket logq0. + loglb = 2 * logub # logub is negative, and thus loglb < logub. + while _compare_dep_vs_ind(loglb) > 0: + assert loglb > -10000, "The lower bound on q0 is way too low." + loglb *= 1.5 + + logq0, r = scipy.optimize.brentq( + _compare_dep_vs_ind, loglb, logub, full_output=True) + assert r.converged, "The root finding procedure failed to converge." + assert _check_validity_conditions(logq0) # just in case. + + return logq0 + + +def _compute_bl_gnmax(q, sigma, num_classes): + return ((num_classes - 1) / 2 * scipy.special.erfc( + 1 / sigma + scipy.special.erfcinv(2 * q / (num_classes - 1)))) + + +def _compute_bu_gnmax(q, sigma, num_classes): + return min(1, (num_classes - 1) / 2 * scipy.special.erfc( + -1 / sigma + scipy.special.erfcinv(2 * q / (num_classes - 1)))) + + +def _compute_local_sens_gnmax(logq, sigma, num_classes, order): + """Implements Algorithm 3 (computes an upper bound on local sensitivity). + + (See Proposition 13 for proof of correctness.) + """ + logq0 = _compute_logq0(sigma, order) + logq1 = _compute_logq1(sigma, order, num_classes) + if logq1 <= logq <= logq0: + logq = logq1 + + beta = _compute_rdp_gnmax(sigma, logq, order) + beta_bu_q = _compute_rdp_gnmax( + sigma, math.log(_compute_bu_gnmax(math.exp(logq), sigma, num_classes)), + order) + beta_bl_q = _compute_rdp_gnmax( + sigma, math.log(_compute_bl_gnmax(math.exp(logq), sigma, num_classes)), + order) + return max(beta_bu_q - beta, beta - beta_bl_q) + + +def compute_local_sensitivity_bounds_gnmax(votes, num_teachers, sigma, order): + """Computes a list of max-LS-at-distance-d for the GNMax mechanism. + + A more efficient implementation of Algorithms 4 and 5 working in time + O(teachers*classes). A naive implementation is O(teachers^2*classes) or worse. + + Args: + votes: A numpy array of votes. + num_teachers: Total number of voting teachers. + sigma: Standard deviation of the Guassian noise. + order: The Renyi order. + + Returns: + A numpy array of local sensitivities at distances d, 0 <= d <= num_teachers. + """ + + num_classes = len(votes) # Called m in the paper. + + logq0 = _compute_logq0(sigma, order) + logq1 = _compute_logq1(sigma, order, num_classes) + logq = pate.compute_logq_gaussian(votes, sigma) + plateau = _compute_local_sens_gnmax(logq1, sigma, num_classes, order) + + res = np.full(num_teachers, plateau) + + if logq1 <= logq <= logq0: + return res + + # Invariant: votes is sorted in the non-increasing order. + votes = sorted(votes, reverse=True) + + res[0] = _compute_local_sens_gnmax(logq, sigma, num_classes, order) + curr_d = 0 + + go_left = logq > logq0 # Otherwise logq < logq1 and we go right. + + # Iterate while the following is true: + # 1. If we are going left, logq is still larger than logq0 and we may still + # increase the gap between votes[0] and votes[1]. + # 2. If we are going right, logq is still smaller than logq1. + while ((go_left and logq > logq0 and votes[1] > 0) or + (not go_left and logq < logq1)): + curr_d += 1 + if go_left: # Try decreasing logq. + votes[0] += 1 + votes[1] -= 1 + idx = 1 + # Restore the invariant. (Can be implemented more efficiently by keeping + # track of the range of indices equal to votes[1]. Does not seem to matter + # for the overall running time.) + while idx < len(votes) - 1 and votes[idx] < votes[idx + 1]: + votes[idx], votes[idx + 1] = votes[idx + 1], votes[idx] + idx += 1 + else: # Go right, i.e., try increasing logq. + votes[0] -= 1 + votes[1] += 1 # The invariant holds since otherwise logq >= logq1. + + logq = pate.compute_logq_gaussian(votes, sigma) + res[curr_d] = _compute_local_sens_gnmax(logq, sigma, num_classes, order) + + return res + + +################################################## +# SMOOTH SENSITIVITY FOR THE THRESHOLD MECHANISM # +################################################## + +# A global dictionary of RDPs for various threshold values. Indexed by a 4-tuple +# (num_teachers, threshold, sigma, order). +_rdp_thresholds = {} + + +def _compute_rdp_list_threshold(num_teachers, threshold, sigma, order): + key = (num_teachers, threshold, sigma, order) + if key in _rdp_thresholds: + return _rdp_thresholds[key] + + res = np.zeros(num_teachers + 1) + for v in range(0, num_teachers + 1): + logp = scipy.stats.norm.logsf(threshold - v, scale=sigma) + res[v] = pate.compute_rdp_threshold(logp, sigma, order) + + _rdp_thresholds[key] = res + return res + + +def compute_local_sensitivity_bounds_threshold(counts, num_teachers, threshold, + sigma, order): + """Computes a list of max-LS-at-distance-d for the threshold mechanism.""" + + def _compute_ls(v): + ls_step_up, ls_step_down = float("-inf"), float("-inf") + if v > 0: + ls_step_down = abs(rdp_list[v - 1] - rdp_list[v]) + if v < num_teachers: + ls_step_up = abs(rdp_list[v + 1] - rdp_list[v]) + return max(ls_step_down, ls_step_up) # Rely on max(x, None) = x. + + cur_max = int(round(max(counts))) + rdp_list = _compute_rdp_list_threshold(num_teachers, threshold, sigma, order) + + ls = np.zeros(num_teachers) + for d in range(max(cur_max, num_teachers - cur_max)): + ls_up, ls_down = float("-inf"), float("-inf") + if cur_max + d <= num_teachers: + ls_up = _compute_ls(cur_max + d) + if cur_max - d >= 0: + ls_down = _compute_ls(cur_max - d) + ls[d] = max(ls_up, ls_down) + return ls + + +############################################# +# PROCEDURES FOR SMOOTH SENSITIVITY RELEASE # +############################################# + +# A global dictionary of exponentially decaying arrays. Indexed by beta. +dict_beta_discount = {} + + +def compute_discounted_max(beta, a): + n = len(a) + + if beta not in dict_beta_discount or (len(dict_beta_discount[beta]) < n): + dict_beta_discount[beta] = np.exp(-beta * np.arange(n)) + + return max(a * dict_beta_discount[beta][:n]) + + +def compute_smooth_sensitivity_gnmax(beta, counts, num_teachers, sigma, order): + """Computes smooth sensitivity of a single application of GNMax.""" + + ls = compute_local_sensitivity_bounds_gnmax(counts, sigma, order, + num_teachers) + return compute_discounted_max(beta, ls) + + +def compute_rdp_of_smooth_sensitivity_gaussian(beta, sigma, order): + """Computes the RDP curve for the GNSS mechanism. + + Implements Theorem 23 (https://arxiv.org/pdf/1802.08908.pdf). + """ + if beta > 0 and not 1 < order < 1 / (2 * beta): + raise ValueError("Order outside the (1, 1/(2*beta)) range.") + + return order * math.exp(2 * beta) / sigma**2 + ( + -.5 * math.log(1 - 2 * order * beta) + beta * order) / ( + order - 1) + + +def compute_params_for_ss_release(eps, delta): + """Computes sigma for additive Gaussian noise scaled by smooth sensitivity. + + Presently not used. (We proceed via RDP analysis.) + + Compute beta, sigma for applying Lemma 2.6 (full version of Nissim et al.) via + Lemma 2.10. + """ + # Rather than applying Lemma 2.10 directly, which would give suboptimal alpha, + # (see http://www.cse.psu.edu/~ads22/pubs/NRS07/NRS07-full-draft-v1.pdf), + # we extract a sufficient condition on alpha from its proof. + # + # Let a = rho_(delta/2)(Z_1). Then solve for alpha such that + # 2 alpha a + alpha^2 = eps/2. + a = scipy.special.ndtri(1 - delta / 2) + alpha = math.sqrt(a**2 + eps / 2) - a + + beta = eps / (2 * scipy.special.chdtri(1, delta / 2)) + + return alpha, beta + + +####################################################### +# SYMBOLIC-NUMERIC VERIFICATION OF CONDITIONS C5--C6. # +####################################################### + + +def _construct_symbolic_beta(q, sigma, order): + mu2 = sigma * sp.sqrt(sp.log(1 / q)) + mu1 = mu2 + 1 + eps1 = mu1 / sigma**2 + eps2 = mu2 / sigma**2 + a = (1 - q) / (1 - (q * sp.exp(eps2))**(1 - 1 / mu2)) + b = sp.exp(eps1) / q**(1 / (mu1 - 1)) + s = (1 - q) * a**(order - 1) + q * b**(order - 1) + return (1 / (order - 1)) * sp.log(s) + + +def _construct_symbolic_bu(q, sigma, m): + return (m - 1) / 2 * sp.erfc(sp.erfcinv(2 * q / (m - 1)) - 1 / sigma) + + +def _is_non_decreasing(fn, q, bounds): + """Verifies whether the function is non-decreasing within a range. + + Args: + fn: Symbolic function of a single variable. + q: The name of f's variable. + bounds: Pair of (lower_bound, upper_bound) reals. + + Returns: + True iff the function is non-decreasing in the range. + """ + diff_fn = sp.diff(fn, q) # Symbolically compute the derivative. + diff_fn_lambdified = sp.lambdify( + q, + diff_fn, + modules=[ + "numpy", { + "erfc": scipy.special.erfc, + "erfcinv": scipy.special.erfcinv + } + ]) + r = scipy.optimize.minimize_scalar( + diff_fn_lambdified, bounds=bounds, method="bounded") + assert r.success, "Minimizer failed to converge." + return r.fun >= 0 # Check whether the derivative is non-negative. + + +def check_conditions(sigma, m, order): + """Checks conditions C5 and C6 (Section B.4.2 in Appendix).""" + q = sp.symbols("q", positive=True, real=True) + + beta = _construct_symbolic_beta(q, sigma, order) + q0 = math.exp(compute_logq0_gnmax(sigma, order)) + + cond5 = _is_non_decreasing(beta, q, (0, q0)) + + if cond5: + bl_q0 = _compute_bl_gnmax(q0, sigma, m) + + bu = _construct_symbolic_bu(q, sigma, m) + delta_beta = beta.subs(q, bu) - beta + + cond6 = _is_non_decreasing(delta_beta, q, (0, bl_q0)) + else: + cond6 = False # Skip the check, since Condition 5 is false already. + + return (cond5, cond6) + + +def main(argv): + del argv # Unused. + + +if __name__ == "__main__": + app.run(main) diff --git a/tensorflow_privacy/research/pate_2018/smooth_sensitivity_test.py b/tensorflow_privacy/research/pate_2018/smooth_sensitivity_test.py new file mode 100644 index 0000000..c1f371a --- /dev/null +++ b/tensorflow_privacy/research/pate_2018/smooth_sensitivity_test.py @@ -0,0 +1,126 @@ +# Copyright 2017 The 'Scalable Private Learning with PATE' Authors All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for pate.smooth_sensitivity.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import unittest +import numpy as np + +import smooth_sensitivity as pate_ss + + +class PateSmoothSensitivityTest(unittest.TestCase): + + def test_check_conditions(self): + self.assertEqual(pate_ss.check_conditions(20, 10, 25.), (True, False)) + self.assertEqual(pate_ss.check_conditions(30, 10, 25.), (True, True)) + + def _assert_all_close(self, x, y): + """Asserts that two numpy arrays are close.""" + self.assertEqual(len(x), len(y)) + self.assertTrue(np.allclose(x, y, rtol=1e-8, atol=0)) + + def test_compute_local_sensitivity_bounds_gnmax(self): + counts1 = np.array([10, 0, 0]) + sigma1 = .5 + order1 = 1.5 + + answer1 = np.array( + [3.13503646e-17, 1.60178280e-08, 5.90681786e-03] + [5.99981308e+00] * 7) + + # Test for "going right" in the smooth sensitivity computation. + out1 = pate_ss.compute_local_sensitivity_bounds_gnmax( + counts1, 10, sigma1, order1) + + self._assert_all_close(out1, answer1) + + counts2 = np.array([1000, 500, 300, 200, 0]) + sigma2 = 250. + order2 = 10. + + # Test for "going left" in the smooth sensitivity computation. + out2 = pate_ss.compute_local_sensitivity_bounds_gnmax( + counts2, 2000, sigma2, order2) + + answer2 = np.array([0.] * 298 + [2.77693450548e-7, 2.10853979548e-6] + + [2.73113623988e-6] * 1700) + self._assert_all_close(out2, answer2) + + def test_compute_local_sensitivity_bounds_threshold(self): + counts1_3 = np.array([20, 10, 0]) + num_teachers = sum(counts1_3) + t1 = 16 # high threshold + sigma = 2 + order = 10 + + out1 = pate_ss.compute_local_sensitivity_bounds_threshold( + counts1_3, num_teachers, t1, sigma, order) + answer1 = np.array([0] * 3 + [ + 1.48454129e-04, 1.47826870e-02, 3.94153241e-02, 6.45775697e-02, + 9.01543247e-02, 1.16054002e-01, 1.42180452e-01, 1.42180452e-01, + 1.48454129e-04, 1.47826870e-02, 3.94153241e-02, 6.45775697e-02, + 9.01543266e-02, 1.16054000e-01, 1.42180452e-01, 1.68302106e-01, + 1.93127860e-01 + ] + [0] * 10) + self._assert_all_close(out1, answer1) + + t2 = 2 # low threshold + + out2 = pate_ss.compute_local_sensitivity_bounds_threshold( + counts1_3, num_teachers, t2, sigma, order) + answer2 = np.array([ + 1.60212079e-01, 2.07021132e-01, 2.07021132e-01, 1.93127860e-01, + 1.68302106e-01, 1.42180452e-01, 1.16054002e-01, 9.01543247e-02, + 6.45775697e-02, 3.94153241e-02, 1.47826870e-02, 1.48454129e-04 + ] + [0] * 18) + self._assert_all_close(out2, answer2) + + t3 = 50 # very high threshold (larger than the number of teachers). + + out3 = pate_ss.compute_local_sensitivity_bounds_threshold( + counts1_3, num_teachers, t3, sigma, order) + + answer3 = np.array([ + 1.35750725752e-19, 1.88990500499e-17, 2.05403154065e-15, + 1.74298153642e-13, 1.15489723995e-11, 5.97584949325e-10, + 2.41486826748e-08, 7.62150641922e-07, 1.87846248741e-05, + 0.000360973025976, 0.000360973025976, 2.76377015215e-50, + 1.00904975276e-53, 2.87254164748e-57, 6.37583360761e-61, + 1.10331620211e-64, 1.48844393335e-68, 1.56535552444e-72, + 1.28328011060e-76, 8.20047697109e-81 + ] + [0] * 10) + + self._assert_all_close(out3, answer3) + + # Fractional values. + counts4 = np.array([19.5, -5.1, 0]) + t4 = 10.1 + out4 = pate_ss.compute_local_sensitivity_bounds_threshold( + counts4, num_teachers, t4, sigma, order) + + answer4 = np.array([ + 0.0620410301, 0.0875807131, 0.113451958, 0.139561671, 0.1657074530, + 0.1908244840, 0.2070270720, 0.207027072, 0.169718100, 0.0575152142, + 0.00678695871 + ] + [0] * 6 + [0.000536304908, 0.0172181073, 0.041909870] + [0] * 10) + self._assert_all_close(out4, answer4) + + +if __name__ == "__main__": + unittest.main() diff --git a/tensorflow_privacy/setup.py b/tensorflow_privacy/setup.py new file mode 100644 index 0000000..be172db --- /dev/null +++ b/tensorflow_privacy/setup.py @@ -0,0 +1,32 @@ +# Copyright 2018, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""TensorFlow Privacy library setup file for pip.""" +from setuptools import find_packages +from setuptools import setup + +setup(name='tensorflow_privacy', + version='0.1.0', + url='https://github.com/tensorflow/privacy', + license='Apache-2.0', + install_requires=[ + 'scipy>=0.17', + 'mpmath', # used in tests only + ], + # Explicit dependence on TensorFlow is not supported. + # See https://github.com/tensorflow/tensorflow/issues/7166 + extras_require={ + 'tf': ['tensorflow>=1.0.0'], + 'tf_gpu': ['tensorflow-gpu>=1.0.0'], + }, + packages=find_packages()) diff --git a/tensorflow_privacy/tutorials/README.md b/tensorflow_privacy/tutorials/README.md new file mode 100644 index 0000000..8214a9b --- /dev/null +++ b/tensorflow_privacy/tutorials/README.md @@ -0,0 +1,129 @@ +# Tutorials + +This folder contains a set of tutorials that demonstrate the features of this +library. +As demonstrated on MNIST in `mnist_dpsgd_tutorial.py`, the easiest way to use +a differentially private optimizer is to modify an existing TF training loop +to replace an existing vanilla optimizer with its differentially private +counterpart implemented in the library. + +Here is a list of all the tutorials included: + +* `lm_dpsgd_tutorial.py`: learn a language model with differential privacy. + +* `mnist_dpsgd_tutorial.py`: learn a convolutional neural network on MNIST with + differential privacy. + +* `mnist_dpsgd_tutorial_eager.py`: learn a convolutional neural network on MNIST + with differential privacy using Eager mode. + +* `mnist_dpsgd_tutorial_keras.py`: learn a convolutional neural network on MNIST + with differential privacy using tf.Keras. + +* `mnist_lr_tutorial.py`: learn a differentially private logistic regression + model on MNIST. The model illustrates application of the + "amplification-by-iteration" analysis (https://arxiv.org/abs/1808.06651). + +The rest of this README describes the different parameters used to configure +DP-SGD as well as expected outputs for the `mnist_dpsgd_tutorial.py` tutorial. + +## Parameters + +All of the optimizers share some privacy-specific parameters that need to +be tuned in addition to any existing hyperparameter. There are currently four: + +* `learning_rate` (float): The learning rate of the SGD training algorithm. The + higher the learning rate, the more each update matters. If the updates are noisy + (such as when the additive noise is large compared to the clipping + threshold), the learning rate must be kept low for the training procedure to converge. +* `num_microbatches` (int): The input data for each step (i.e., batch) of your + original training algorithm is split into this many microbatches. Generally, + increasing this will improve your utility but slow down your training in terms + of wall-clock time. The total number of examples consumed in one global step + remains the same. This number should evenly divide your input batch size. +* `l2_norm_clip` (float): The cumulative gradient across all network parameters + from each microbatch will be clipped so that its L2 norm is at most this + value. You should set this to something close to some percentile of what + you expect the gradient from each microbatch to be. In previous experiments, + we've found numbers from 0.5 to 1.0 to work reasonably well. +* `noise_multiplier` (float): This governs the amount of noise added during + training. Generally, more noise results in better privacy and lower utility. + This generally has to be at least 0.3 to obtain rigorous privacy guarantees, + but smaller values may still be acceptable for practical purposes. + +## Measuring Privacy + +Differential privacy can be expressed using two values, epsilon and delta. +Roughly speaking, they mean the following: + +* epsilon gives a ceiling on how much the probability of a particular output + can increase by including (or removing) a single training example. We usually + want it to be a small constant (less than 10, or, for more stringent privacy + guarantees, less than 1). However, this is only an upper bound, and a large + value of epsilon may still mean good practical privacy. +* delta bounds the probability of an arbitrary change in model behavior. + We can usually set this to a very small number (1e-7 or so) without + compromising utility. A rule of thumb is to set it to be less than the inverse + of the training data size. + +To find out the epsilon given a fixed delta value for your model, follow the +approach demonstrated in the `compute_epsilon` of the `mnist_dpsgd_tutorial.py` +where the arguments used to call the RDP accountant (i.e., the tool used to +compute the privacy guarantee) are: + +* `q` : The sampling ratio, defined as (number of examples consumed in one + step) / (total training examples). +* `noise_multiplier` : The noise_multiplier from your parameters above. +* `steps` : The number of global steps taken. + +A detailed writeup of the theory behind the computation of epsilon and delta +is available at https://arxiv.org/abs/1908.10530. + +## Expected Output + +When the `mnist_dpsgd_tutorial.py` script is run with the default parameters, +the output will contain the following lines (leaving out a lot of diagnostic +info): +``` +... +Test accuracy after 1 epochs is: 0.774 +For delta=1e-5, the current epsilon is: 1.03 +... +Test accuracy after 2 epochs is: 0.877 +For delta=1e-5, the current epsilon is: 1.11 +... +Test accuracy after 60 epochs is: 0.966 +For delta=1e-5, the current epsilon is: 3.01 +``` + +## Using Command-Line Interface for Privacy Budgeting + +Before launching a (possibly quite lengthy) training procedure, it is possible +to compute, quickly and accurately, privacy loss at any point of the training. +To do so, run the script `privacy/analysis/compute_dp_sgd_privacy.py`, which +does not have any TensorFlow dependencies. For example, executing +``` +compute_dp_sgd_privacy.py --N=60000 --batch_size=256 --noise_multiplier=1.1 --epochs=60 --delta=1e-5 +``` +allows us to conclude, in a matter of seconds, that DP-SGD run with default +parameters satisfies differential privacy with eps = 3.01 and delta = 1e-05. +Note that the flags provided in the command above correspond to the tutorial in +`mnist_dpsgd_tutorial.py`. The command is applicable to other datasets but the +values passed must be adapted (e.g., N the number of training points). + + +## Select Parameters + +The table below has a few sample parameters illustrating various +accuracy/privacy tradeoffs achieved by the MNIST tutorial in +`mnist_dpsgd_tutorial.py` (default parameters are in __bold__; privacy epsilon +is reported at delta=1e-5; accuracy is averaged over 10 runs, its standard +deviation is less than .3% in all cases). + +| Learning rate | Noise multiplier | Clipping threshold | Number of microbatches | Number of epochs | Privacy eps | Accuracy | +| ------------- | ---------------- | ----------------- | ---------------------- | ---------------- | ----------- | -------- | +| 0.1 | | | __256__ | 20 | no privacy | 99.0% | +| 0.25 | 1.3 | 1.5 | __256__ | 15 | 1.19 | 95.0% | +| __0.15__ | __1.1__ | __1.0__ | __256__ |__60__ | 3.01 | 96.6% | +| 0.25 | 0.7 | 1.5 | __256__ | 45 | 7.10 | 97.0% | + diff --git a/tensorflow_privacy/tutorials/bolton_tutorial.py b/tensorflow_privacy/tutorials/bolton_tutorial.py new file mode 100644 index 0000000..55c8682 --- /dev/null +++ b/tensorflow_privacy/tutorials/bolton_tutorial.py @@ -0,0 +1,187 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tutorial for bolt_on module, the model and the optimizer.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import tensorflow as tf # pylint: disable=wrong-import-position +from tensorflow_privacy.privacy.bolt_on import losses # pylint: disable=wrong-import-position +from tensorflow_privacy.privacy.bolt_on import models # pylint: disable=wrong-import-position +from tensorflow_privacy.privacy.bolt_on.optimizers import BoltOn # pylint: disable=wrong-import-position +# ------- +# First, we will create a binary classification dataset with a single output +# dimension. The samples for each label are repeated data points at different +# points in space. +# ------- +# Parameters for dataset +n_samples = 10 +input_dim = 2 +n_outputs = 1 +# Create binary classification dataset: +x_stack = [tf.constant(-1, tf.float32, (n_samples, input_dim)), + tf.constant(1, tf.float32, (n_samples, input_dim))] +y_stack = [tf.constant(0, tf.float32, (n_samples, 1)), + tf.constant(1, tf.float32, (n_samples, 1))] +x, y = tf.concat(x_stack, 0), tf.concat(y_stack, 0) +print(x.shape, y.shape) +generator = tf.data.Dataset.from_tensor_slices((x, y)) +generator = generator.batch(10) +generator = generator.shuffle(10) +# ------- +# First, we will explore using the pre - built BoltOnModel, which is a thin +# wrapper around a Keras Model using a single - layer neural network. +# It automatically uses the BoltOn Optimizer which encompasses all the logic +# required for the BoltOn Differential Privacy method. +# ------- +bolt = models.BoltOnModel(n_outputs) # tell the model how many outputs we have. +# ------- +# Now, we will pick our optimizer and Strongly Convex Loss function. The loss +# must extend from StrongConvexMixin and implement the associated methods.Some +# existing loss functions are pre - implemented in bolt_on.loss +# ------- +optimizer = tf.optimizers.SGD() +reg_lambda = 1 +C = 1 +radius_constant = 1 +loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant) +# ------- +# For simplicity, we pick all parameters of the StrongConvexBinaryCrossentropy +# to be 1; these are all tunable and their impact can be read in losses. +# StrongConvexBinaryCrossentropy.We then compile the model with the chosen +# optimizer and loss, which will automatically wrap the chosen optimizer with +# the BoltOn Optimizer, ensuring the required components function as required +# for privacy guarantees. +# ------- +bolt.compile(optimizer, loss) +# ------- +# To fit the model, the optimizer will require additional information about +# the dataset and model.These parameters are: +# 1. the class_weights used +# 2. the number of samples in the dataset +# 3. the batch size which the model will try to infer, if possible. If not, +# you will be required to pass these explicitly to the fit method. +# +# As well, there are two privacy parameters than can be altered: +# 1. epsilon, a float +# 2. noise_distribution, a valid string indicating the distriution to use (must +# be implemented) +# +# The BoltOnModel offers a helper method,.calculate_class_weight to aid in +# class_weight calculation. +# required parameters +# ------- +class_weight = None # default, use .calculate_class_weight for other values +batch_size = None # default, if it cannot be inferred, specify this +n_samples = None # default, if it cannot be iferred, specify this +# privacy parameters +epsilon = 2 +noise_distribution = 'laplace' + +bolt.fit(x, + y, + epsilon=epsilon, + class_weight=class_weight, + batch_size=batch_size, + n_samples=n_samples, + noise_distribution=noise_distribution, + epochs=2) +# ------- +# We may also train a generator object, or try different optimizers and loss +# functions. Below, we will see that we must pass the number of samples as the +# fit method is unable to infer it for a generator. +# ------- +optimizer2 = tf.optimizers.Adam() +bolt.compile(optimizer2, loss) +# required parameters +class_weight = None # default, use .calculate_class_weight for other values +batch_size = None # default, if it cannot be inferred, specify this +n_samples = None # default, if it cannot be iferred, specify this +# privacy parameters +epsilon = 2 +noise_distribution = 'laplace' +try: + bolt.fit(generator, + epsilon=epsilon, + class_weight=class_weight, + batch_size=batch_size, + n_samples=n_samples, + noise_distribution=noise_distribution, + verbose=0) +except ValueError as e: + print(e) +# ------- +# And now, re running with the parameter set. +# ------- +n_samples = 20 +bolt.fit_generator(generator, + epsilon=epsilon, + class_weight=class_weight, + n_samples=n_samples, + noise_distribution=noise_distribution, + verbose=0) +# ------- +# You don't have to use the BoltOn model to use the BoltOn method. +# There are only a few requirements: +# 1. make sure any requirements from the loss are implemented in the model. +# 2. instantiate the optimizer and use it as a context around the fit operation. +# ------- +# -------------------- Part 2, using the Optimizer + +# ------- +# Here, we create our own model and setup the BoltOn optimizer. +# ------- + + +class TestModel(tf.keras.Model): # pylint: disable=abstract-method + + def __init__(self, reg_layer, number_of_outputs=1): + super(TestModel, self).__init__(name='test') + self.output_layer = tf.keras.layers.Dense(number_of_outputs, + kernel_regularizer=reg_layer) + + def call(self, inputs): # pylint: disable=arguments-differ + return self.output_layer(inputs) + + +optimizer = tf.optimizers.SGD() +loss = losses.StrongConvexBinaryCrossentropy(reg_lambda, C, radius_constant) +optimizer = BoltOn(optimizer, loss) +# ------- +# Now, we instantiate our model and check for 1. Since our loss requires L2 +# regularization over the kernel, we will pass it to the model. +# ------- +n_outputs = 1 # parameter for model and optimizer context. +test_model = TestModel(loss.kernel_regularizer(), n_outputs) +test_model.compile(optimizer, loss) +# ------- +# We comply with 2., and use the BoltOn Optimizer as a context around the fit +# method. +# ------- +# parameters for context +noise_distribution = 'laplace' +epsilon = 2 +class_weights = 1 # Previously, the fit method auto-detected the class_weights. +# Here, we need to pass the class_weights explicitly. 1 is the same as None. +n_samples = 20 +batch_size = 5 + +with optimizer( + noise_distribution=noise_distribution, + epsilon=epsilon, + layers=test_model.layers, + class_weights=class_weights, + n_samples=n_samples, + batch_size=batch_size +) as _: + test_model.fit(x, y, batch_size=batch_size, epochs=2) diff --git a/tensorflow_privacy/tutorials/lm_dpsgd_tutorial.py b/tensorflow_privacy/tutorials/lm_dpsgd_tutorial.py new file mode 100644 index 0000000..d41dda3 --- /dev/null +++ b/tensorflow_privacy/tutorials/lm_dpsgd_tutorial.py @@ -0,0 +1,225 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Training a language model (recurrent neural network) with DP-SGD optimizer. + +This tutorial uses a corpus of text from TensorFlow datasets unless a +FLAGS.data_dir is specified with the path to a directory containing two files +train.txt and test.txt corresponding to a training and test corpus. + +Even though we haven't done any hyperparameter tuning, and the analytical +epsilon upper bound can't offer any strong guarantees, the benefits of training +with differential privacy can be clearly seen by examining the trained model. +In particular, such inspection can confirm that the set of training-data +examples that the model fails to learn (i.e., has high perplexity for) comprises +outliers and rare sentences outside the distribution to be learned (see examples +and a discussion in this blog post). This can be further confirmed by +testing the differentially-private model's propensity for memorization, e.g., +using the exposure metric of https://arxiv.org/abs/1802.08232. + +This example is decribed in more details in this post: https://goo.gl/UKr7vH +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from absl import app +from absl import flags + +import numpy as np +import tensorflow as tf +import tensorflow_datasets as tfds + +from tensorflow_privacy.privacy.analysis import privacy_ledger +from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp +from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent +from tensorflow_privacy.privacy.optimizers import dp_optimizer + +flags.DEFINE_boolean( + 'dpsgd', True, 'If True, train with DP-SGD. If False, ' + 'train with vanilla SGD.') +flags.DEFINE_float('learning_rate', 0.001, 'Learning rate for training') +flags.DEFINE_float('noise_multiplier', 0.001, + 'Ratio of the standard deviation to the clipping norm') +flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm') +flags.DEFINE_integer('batch_size', 256, 'Batch size') +flags.DEFINE_integer('epochs', 60, 'Number of epochs') +flags.DEFINE_integer( + 'microbatches', 256, 'Number of microbatches ' + '(must evenly divide batch_size)') +flags.DEFINE_string('model_dir', None, 'Model directory') +flags.DEFINE_string('data_dir', None, 'Directory containing the PTB data.') + +FLAGS = flags.FLAGS + +SEQ_LEN = 80 +NB_TRAIN = 45000 + + +def rnn_model_fn(features, labels, mode): # pylint: disable=unused-argument + """Model function for a RNN.""" + + # Define RNN architecture using tf.keras.layers. + x = features['x'] + x = tf.reshape(x, [-1, SEQ_LEN]) + input_layer = x[:, :-1] + input_one_hot = tf.one_hot(input_layer, 256) + lstm = tf.keras.layers.LSTM(256, return_sequences=True).apply(input_one_hot) + logits = tf.keras.layers.Dense(256).apply(lstm) + + # Calculate loss as a vector (to support microbatches in DP-SGD). + vector_loss = tf.nn.softmax_cross_entropy_with_logits( + labels=tf.cast(tf.one_hot(x[:, 1:], 256), dtype=tf.float32), + logits=logits) + # Define mean of loss across minibatch (for reporting through tf.Estimator). + scalar_loss = tf.reduce_mean(vector_loss) + + # Configure the training op (for TRAIN mode). + if mode == tf.estimator.ModeKeys.TRAIN: + if FLAGS.dpsgd: + + ledger = privacy_ledger.PrivacyLedger( + population_size=NB_TRAIN, + selection_probability=(FLAGS.batch_size / NB_TRAIN)) + + optimizer = dp_optimizer.DPAdamGaussianOptimizer( + l2_norm_clip=FLAGS.l2_norm_clip, + noise_multiplier=FLAGS.noise_multiplier, + num_microbatches=FLAGS.microbatches, + ledger=ledger, + learning_rate=FLAGS.learning_rate, + unroll_microbatches=True) + opt_loss = vector_loss + else: + optimizer = tf.train.AdamOptimizer( + learning_rate=FLAGS.learning_rate) + opt_loss = scalar_loss + global_step = tf.train.get_global_step() + train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) + return tf.estimator.EstimatorSpec(mode=mode, + loss=scalar_loss, + train_op=train_op) + + # Add evaluation metrics (for EVAL mode). + elif mode == tf.estimator.ModeKeys.EVAL: + eval_metric_ops = { + 'accuracy': + tf.metrics.accuracy( + labels=tf.cast(x[:, 1:], dtype=tf.int32), + predictions=tf.argmax(input=logits, axis=2)) + } + return tf.estimator.EstimatorSpec(mode=mode, + loss=scalar_loss, + eval_metric_ops=eval_metric_ops) + + +def load_data(): + """Load training and validation data.""" + if not FLAGS.data_dir: + print('FLAGS.data_dir containing train.txt and test.txt was not specified, ' + 'using a substitute dataset from the tensorflow_datasets module.') + train_dataset = tfds.load(name='lm1b/subwords8k', + split=tfds.Split.TRAIN, + batch_size=NB_TRAIN, + shuffle_files=True) + test_dataset = tfds.load(name='lm1b/subwords8k', + split=tfds.Split.TEST, + batch_size=10000) + train_data = next(tfds.as_numpy(train_dataset)) + test_data = next(tfds.as_numpy(test_dataset)) + train_data = train_data['text'].flatten() + test_data = test_data['text'].flatten() + else: + train_fpath = os.path.join(FLAGS.data_dir, 'train.txt') + test_fpath = os.path.join(FLAGS.data_dir, 'test.txt') + train_txt = open(train_fpath).read().split() + test_txt = open(test_fpath).read().split() + keys = sorted(set(train_txt)) + remap = {k: i for i, k in enumerate(keys)} + train_data = np.array([remap[x] for x in train_txt], dtype=np.uint8) + test_data = np.array([remap[x] for x in test_txt], dtype=np.uint8) + + return train_data, test_data + + +def compute_epsilon(steps): + """Computes epsilon value for given hyperparameters.""" + if FLAGS.noise_multiplier == 0.0: + return float('inf') + orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) + sampling_probability = FLAGS.batch_size / NB_TRAIN + rdp = compute_rdp(q=sampling_probability, + noise_multiplier=FLAGS.noise_multiplier, + steps=steps, + orders=orders) + # Delta is set to 1e-5 because Penn TreeBank has 60000 training points. + return get_privacy_spent(orders, rdp, target_delta=1e-5)[0] + + +def main(unused_argv): + tf.logging.set_verbosity(tf.logging.INFO) + if FLAGS.batch_size % FLAGS.microbatches != 0: + raise ValueError('Number of microbatches should divide evenly batch_size') + + # Load training and test data. + train_data, test_data = load_data() + + # Instantiate the tf.Estimator. + conf = tf.estimator.RunConfig(save_summary_steps=1000) + lm_classifier = tf.estimator.Estimator(model_fn=rnn_model_fn, + model_dir=FLAGS.model_dir, + config=conf) + + # Create tf.Estimator input functions for the training and test data. + batch_len = FLAGS.batch_size * SEQ_LEN + train_data_end = len(train_data) - len(train_data) % batch_len + test_data_end = len(test_data) - len(test_data) % batch_len + train_input_fn = tf.estimator.inputs.numpy_input_fn( + x={'x': train_data[:train_data_end]}, + batch_size=batch_len, + num_epochs=FLAGS.epochs, + shuffle=False) + eval_input_fn = tf.estimator.inputs.numpy_input_fn( + x={'x': test_data[:test_data_end]}, + batch_size=batch_len, + num_epochs=1, + shuffle=False) + + # Training loop. + steps_per_epoch = len(train_data) // batch_len + for epoch in range(1, FLAGS.epochs + 1): + print('epoch', epoch) + # Train the model for one epoch. + lm_classifier.train(input_fn=train_input_fn, steps=steps_per_epoch) + + if epoch % 5 == 0: + name_input_fn = [('Train', train_input_fn), ('Eval', eval_input_fn)] + for name, input_fn in name_input_fn: + # Evaluate the model and print results + eval_results = lm_classifier.evaluate(input_fn=input_fn) + result_tuple = (epoch, eval_results['accuracy'], eval_results['loss']) + print(name, 'accuracy after %d epochs is: %.3f (%.4f)' % result_tuple) + + # Compute the privacy budget expended so far. + if FLAGS.dpsgd: + eps = compute_epsilon(epoch * steps_per_epoch) + print('For delta=1e-5, the current epsilon is: %.2f' % eps) + else: + print('Trained with vanilla non-private SGD optimizer') + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial.py b/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial.py new file mode 100644 index 0000000..64f03c3 --- /dev/null +++ b/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial.py @@ -0,0 +1,212 @@ +# Copyright 2018, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Training a CNN on MNIST with differentially private SGD optimizer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import app +from absl import flags + +from distutils.version import LooseVersion + +import numpy as np +import tensorflow as tf + +from tensorflow_privacy.privacy.analysis import privacy_ledger +from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp_from_ledger +from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent +from tensorflow_privacy.privacy.optimizers import dp_optimizer + +if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): + GradientDescentOptimizer = tf.train.GradientDescentOptimizer +else: + GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name + +FLAGS = flags.FLAGS + +flags.DEFINE_boolean( + 'dpsgd', True, 'If True, train with DP-SGD. If False, ' + 'train with vanilla SGD.') +flags.DEFINE_float('learning_rate', .15, 'Learning rate for training') +flags.DEFINE_float('noise_multiplier', 1.1, + 'Ratio of the standard deviation to the clipping norm') +flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm') +flags.DEFINE_integer('batch_size', 256, 'Batch size') +flags.DEFINE_integer('epochs', 60, 'Number of epochs') +flags.DEFINE_integer( + 'microbatches', 256, 'Number of microbatches ' + '(must evenly divide batch_size)') +flags.DEFINE_string('model_dir', None, 'Model directory') + + +class EpsilonPrintingTrainingHook(tf.estimator.SessionRunHook): + """Training hook to print current value of epsilon after an epoch.""" + + def __init__(self, ledger): + """Initalizes the EpsilonPrintingTrainingHook. + + Args: + ledger: The privacy ledger. + """ + self._samples, self._queries = ledger.get_unformatted_ledger() + + def end(self, session): + orders = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)) + samples = session.run(self._samples) + queries = session.run(self._queries) + formatted_ledger = privacy_ledger.format_ledger(samples, queries) + rdp = compute_rdp_from_ledger(formatted_ledger, orders) + eps = get_privacy_spent(orders, rdp, target_delta=1e-5)[0] + print('For delta=1e-5, the current epsilon is: %.2f' % eps) + + +def cnn_model_fn(features, labels, mode): + """Model function for a CNN.""" + + # Define CNN architecture using tf.keras.layers. + input_layer = tf.reshape(features['x'], [-1, 28, 28, 1]) + y = tf.keras.layers.Conv2D(16, 8, + strides=2, + padding='same', + activation='relu').apply(input_layer) + y = tf.keras.layers.MaxPool2D(2, 1).apply(y) + y = tf.keras.layers.Conv2D(32, 4, + strides=2, + padding='valid', + activation='relu').apply(y) + y = tf.keras.layers.MaxPool2D(2, 1).apply(y) + y = tf.keras.layers.Flatten().apply(y) + y = tf.keras.layers.Dense(32, activation='relu').apply(y) + logits = tf.keras.layers.Dense(10).apply(y) + + # Calculate loss as a vector (to support microbatches in DP-SGD). + vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=labels, logits=logits) + # Define mean of loss across minibatch (for reporting through tf.Estimator). + scalar_loss = tf.reduce_mean(vector_loss) + + # Configure the training op (for TRAIN mode). + if mode == tf.estimator.ModeKeys.TRAIN: + + if FLAGS.dpsgd: + ledger = privacy_ledger.PrivacyLedger( + population_size=60000, + selection_probability=(FLAGS.batch_size / 60000)) + + # Use DP version of GradientDescentOptimizer. Other optimizers are + # available in dp_optimizer. Most optimizers inheriting from + # tf.train.Optimizer should be wrappable in differentially private + # counterparts by calling dp_optimizer.optimizer_from_args(). + optimizer = dp_optimizer.DPGradientDescentGaussianOptimizer( + l2_norm_clip=FLAGS.l2_norm_clip, + noise_multiplier=FLAGS.noise_multiplier, + num_microbatches=FLAGS.microbatches, + ledger=ledger, + learning_rate=FLAGS.learning_rate) + training_hooks = [ + EpsilonPrintingTrainingHook(ledger) + ] + opt_loss = vector_loss + else: + optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) + training_hooks = [] + opt_loss = scalar_loss + global_step = tf.train.get_global_step() + train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) + # In the following, we pass the mean of the loss (scalar_loss) rather than + # the vector_loss because tf.estimator requires a scalar loss. This is only + # used for evaluation and debugging by tf.estimator. The actual loss being + # minimized is opt_loss defined above and passed to optimizer.minimize(). + return tf.estimator.EstimatorSpec(mode=mode, + loss=scalar_loss, + train_op=train_op, + training_hooks=training_hooks) + + # Add evaluation metrics (for EVAL mode). + elif mode == tf.estimator.ModeKeys.EVAL: + eval_metric_ops = { + 'accuracy': + tf.metrics.accuracy( + labels=labels, + predictions=tf.argmax(input=logits, axis=1)) + } + + return tf.estimator.EstimatorSpec(mode=mode, + loss=scalar_loss, + eval_metric_ops=eval_metric_ops) + + +def load_mnist(): + """Loads MNIST and preprocesses to combine training and validation data.""" + train, test = tf.keras.datasets.mnist.load_data() + train_data, train_labels = train + test_data, test_labels = test + + train_data = np.array(train_data, dtype=np.float32) / 255 + test_data = np.array(test_data, dtype=np.float32) / 255 + + train_labels = np.array(train_labels, dtype=np.int32) + test_labels = np.array(test_labels, dtype=np.int32) + + assert train_data.min() == 0. + assert train_data.max() == 1. + assert test_data.min() == 0. + assert test_data.max() == 1. + assert train_labels.ndim == 1 + assert test_labels.ndim == 1 + + return train_data, train_labels, test_data, test_labels + + +def main(unused_argv): + tf.logging.set_verbosity(tf.logging.INFO) + if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0: + raise ValueError('Number of microbatches should divide evenly batch_size') + + # Load training and test data. + train_data, train_labels, test_data, test_labels = load_mnist() + + # Instantiate the tf.Estimator. + mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, + model_dir=FLAGS.model_dir) + + # Create tf.Estimator input functions for the training and test data. + train_input_fn = tf.estimator.inputs.numpy_input_fn( + x={'x': train_data}, + y=train_labels, + batch_size=FLAGS.batch_size, + num_epochs=FLAGS.epochs, + shuffle=True) + eval_input_fn = tf.estimator.inputs.numpy_input_fn( + x={'x': test_data}, + y=test_labels, + num_epochs=1, + shuffle=False) + + # Training loop. + steps_per_epoch = 60000 // FLAGS.batch_size + for epoch in range(1, FLAGS.epochs + 1): + # Train the model for one epoch. + mnist_classifier.train(input_fn=train_input_fn, steps=steps_per_epoch) + + # Evaluate the model and print results + eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn) + test_accuracy = eval_results['accuracy'] + print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy)) + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_eager.py b/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_eager.py new file mode 100644 index 0000000..07af602 --- /dev/null +++ b/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_eager.py @@ -0,0 +1,153 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Training a CNN on MNIST in TF Eager mode with DP-SGD optimizer.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import app +from absl import flags + +from distutils.version import LooseVersion + +import numpy as np +import tensorflow as tf + +from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp +from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent +from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer + +if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): + GradientDescentOptimizer = tf.train.GradientDescentOptimizer + tf.enable_eager_execution() +else: + GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name + +flags.DEFINE_boolean('dpsgd', True, 'If True, train with DP-SGD. If False, ' + 'train with vanilla SGD.') +flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training') +flags.DEFINE_float('noise_multiplier', 1.1, + 'Ratio of the standard deviation to the clipping norm') +flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm') +flags.DEFINE_integer('batch_size', 250, 'Batch size') +flags.DEFINE_integer('epochs', 60, 'Number of epochs') +flags.DEFINE_integer('microbatches', 250, 'Number of microbatches ' + '(must evenly divide batch_size)') + +FLAGS = flags.FLAGS + + +def compute_epsilon(steps): + """Computes epsilon value for given hyperparameters.""" + if FLAGS.noise_multiplier == 0.0: + return float('inf') + orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) + sampling_probability = FLAGS.batch_size / 60000 + rdp = compute_rdp(q=sampling_probability, + noise_multiplier=FLAGS.noise_multiplier, + steps=steps, + orders=orders) + # Delta is set to 1e-5 because MNIST has 60000 training points. + return get_privacy_spent(orders, rdp, target_delta=1e-5)[0] + + +def main(_): + if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0: + raise ValueError('Number of microbatches should divide evenly batch_size') + + # Fetch the mnist data + train, test = tf.keras.datasets.mnist.load_data() + train_images, train_labels = train + test_images, test_labels = test + + # Create a dataset object and batch for the training data + dataset = tf.data.Dataset.from_tensor_slices( + (tf.cast(train_images[..., tf.newaxis]/255, tf.float32), + tf.cast(train_labels, tf.int64))) + dataset = dataset.shuffle(1000).batch(FLAGS.batch_size) + + # Create a dataset object and batch for the test data + eval_dataset = tf.data.Dataset.from_tensor_slices( + (tf.cast(test_images[..., tf.newaxis]/255, tf.float32), + tf.cast(test_labels, tf.int64))) + eval_dataset = eval_dataset.batch(10000) + + # Define the model using tf.keras.layers + mnist_model = tf.keras.Sequential([ + tf.keras.layers.Conv2D(16, 8, + strides=2, + padding='same', + activation='relu'), + tf.keras.layers.MaxPool2D(2, 1), + tf.keras.layers.Conv2D(32, 4, strides=2, activation='relu'), + tf.keras.layers.MaxPool2D(2, 1), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(32, activation='relu'), + tf.keras.layers.Dense(10) + ]) + + # Instantiate the optimizer + if FLAGS.dpsgd: + opt = DPGradientDescentGaussianOptimizer( + l2_norm_clip=FLAGS.l2_norm_clip, + noise_multiplier=FLAGS.noise_multiplier, + num_microbatches=FLAGS.microbatches, + learning_rate=FLAGS.learning_rate) + else: + opt = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) + + # Training loop. + steps_per_epoch = 60000 // FLAGS.batch_size + for epoch in range(FLAGS.epochs): + # Train the model for one epoch. + for (_, (images, labels)) in enumerate(dataset.take(-1)): + with tf.GradientTape(persistent=True) as gradient_tape: + # This dummy call is needed to obtain the var list. + logits = mnist_model(images, training=True) + var_list = mnist_model.trainable_variables + + # In Eager mode, the optimizer takes a function that returns the loss. + def loss_fn(): + logits = mnist_model(images, training=True) # pylint: disable=undefined-loop-variable,cell-var-from-loop + loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=labels, logits=logits) # pylint: disable=undefined-loop-variable,cell-var-from-loop + # If training without privacy, the loss is a scalar not a vector. + if not FLAGS.dpsgd: + loss = tf.reduce_mean(loss) + return loss + + if FLAGS.dpsgd: + grads_and_vars = opt.compute_gradients(loss_fn, var_list, + gradient_tape=gradient_tape) + else: + grads_and_vars = opt.compute_gradients(loss_fn, var_list) + + opt.apply_gradients(grads_and_vars) + + # Evaluate the model and print results + for (_, (images, labels)) in enumerate(eval_dataset.take(-1)): + logits = mnist_model(images, training=False) + correct_preds = tf.equal(tf.argmax(logits, axis=1), labels) + test_accuracy = np.mean(correct_preds.numpy()) + print('Test accuracy after epoch %d is: %.3f' % (epoch, test_accuracy)) + + # Compute the privacy budget expended so far. + if FLAGS.dpsgd: + eps = compute_epsilon((epoch + 1) * steps_per_epoch) + print('For delta=1e-5, the current epsilon is: %.2f' % eps) + else: + print('Trained with vanilla non-private SGD optimizer') + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_keras.py b/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_keras.py new file mode 100644 index 0000000..89ce1dc --- /dev/null +++ b/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_keras.py @@ -0,0 +1,150 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Training a CNN on MNIST with Keras and the DP SGD optimizer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import app +from absl import flags + +from distutils.version import LooseVersion + +import numpy as np +import tensorflow as tf + +from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp +from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent +from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer + +if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): + GradientDescentOptimizer = tf.train.GradientDescentOptimizer +else: + GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name + +flags.DEFINE_boolean( + 'dpsgd', True, 'If True, train with DP-SGD. If False, ' + 'train with vanilla SGD.') +flags.DEFINE_float('learning_rate', 0.15, 'Learning rate for training') +flags.DEFINE_float('noise_multiplier', 1.1, + 'Ratio of the standard deviation to the clipping norm') +flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm') +flags.DEFINE_integer('batch_size', 250, 'Batch size') +flags.DEFINE_integer('epochs', 60, 'Number of epochs') +flags.DEFINE_integer( + 'microbatches', 250, 'Number of microbatches ' + '(must evenly divide batch_size)') +flags.DEFINE_string('model_dir', None, 'Model directory') + +FLAGS = flags.FLAGS + + +def compute_epsilon(steps): + """Computes epsilon value for given hyperparameters.""" + if FLAGS.noise_multiplier == 0.0: + return float('inf') + orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) + sampling_probability = FLAGS.batch_size / 60000 + rdp = compute_rdp(q=sampling_probability, + noise_multiplier=FLAGS.noise_multiplier, + steps=steps, + orders=orders) + # Delta is set to 1e-5 because MNIST has 60000 training points. + return get_privacy_spent(orders, rdp, target_delta=1e-5)[0] + + +def load_mnist(): + """Loads MNIST and preprocesses to combine training and validation data.""" + train, test = tf.keras.datasets.mnist.load_data() + train_data, train_labels = train + test_data, test_labels = test + + train_data = np.array(train_data, dtype=np.float32) / 255 + test_data = np.array(test_data, dtype=np.float32) / 255 + + train_data = train_data.reshape(train_data.shape[0], 28, 28, 1) + test_data = test_data.reshape(test_data.shape[0], 28, 28, 1) + + train_labels = np.array(train_labels, dtype=np.int32) + test_labels = np.array(test_labels, dtype=np.int32) + + train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10) + test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10) + + assert train_data.min() == 0. + assert train_data.max() == 1. + assert test_data.min() == 0. + assert test_data.max() == 1. + + return train_data, train_labels, test_data, test_labels + + +def main(unused_argv): + tf.logging.set_verbosity(tf.logging.INFO) + if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0: + raise ValueError('Number of microbatches should divide evenly batch_size') + + # Load training and test data. + train_data, train_labels, test_data, test_labels = load_mnist() + + # Define a sequential Keras model + model = tf.keras.Sequential([ + tf.keras.layers.Conv2D(16, 8, + strides=2, + padding='same', + activation='relu', + input_shape=(28, 28, 1)), + tf.keras.layers.MaxPool2D(2, 1), + tf.keras.layers.Conv2D(32, 4, + strides=2, + padding='valid', + activation='relu'), + tf.keras.layers.MaxPool2D(2, 1), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(32, activation='relu'), + tf.keras.layers.Dense(10) + ]) + + if FLAGS.dpsgd: + optimizer = DPGradientDescentGaussianOptimizer( + l2_norm_clip=FLAGS.l2_norm_clip, + noise_multiplier=FLAGS.noise_multiplier, + num_microbatches=FLAGS.microbatches, + learning_rate=FLAGS.learning_rate) + # Compute vector of per-example loss rather than its mean over a minibatch. + loss = tf.keras.losses.CategoricalCrossentropy( + from_logits=True, reduction=tf.losses.Reduction.NONE) + else: + optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) + loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True) + + # Compile model with Keras + model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) + + # Train model with Keras + model.fit(train_data, train_labels, + epochs=FLAGS.epochs, + validation_data=(test_data, test_labels), + batch_size=FLAGS.batch_size) + + # Compute the privacy budget expended. + if FLAGS.dpsgd: + eps = compute_epsilon(FLAGS.epochs * 60000 // FLAGS.batch_size) + print('For delta=1e-5, the current epsilon is: %.2f' % eps) + else: + print('Trained with vanilla non-private SGD optimizer') + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_vectorized.py b/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_vectorized.py new file mode 100644 index 0000000..a075cd4 --- /dev/null +++ b/tensorflow_privacy/tutorials/mnist_dpsgd_tutorial_vectorized.py @@ -0,0 +1,207 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Training a CNN on MNIST with vectorized DP-SGD optimizer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import app +from absl import flags + +from distutils.version import LooseVersion + +import numpy as np +import tensorflow as tf + +from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp +from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent +from tensorflow_privacy.privacy.optimizers import dp_optimizer_vectorized + + +flags.DEFINE_boolean( + 'dpsgd', True, 'If True, train with DP-SGD. If False, ' + 'train with vanilla SGD.') +flags.DEFINE_float('learning_rate', .15, 'Learning rate for training') +flags.DEFINE_float('noise_multiplier', 1.1, + 'Ratio of the standard deviation to the clipping norm') +flags.DEFINE_float('l2_norm_clip', 1.0, 'Clipping norm') +flags.DEFINE_integer('batch_size', 200, 'Batch size') +flags.DEFINE_integer('epochs', 60, 'Number of epochs') +flags.DEFINE_integer( + 'microbatches', 200, 'Number of microbatches ' + '(must evenly divide batch_size)') +flags.DEFINE_string('model_dir', None, 'Model directory') + + +FLAGS = flags.FLAGS + + +NUM_TRAIN_EXAMPLES = 60000 + + +if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): + GradientDescentOptimizer = tf.train.GradientDescentOptimizer +else: + GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name + + +def compute_epsilon(steps): + """Computes epsilon value for given hyperparameters.""" + if FLAGS.noise_multiplier == 0.0: + return float('inf') + orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) + sampling_probability = FLAGS.batch_size / NUM_TRAIN_EXAMPLES + rdp = compute_rdp(q=sampling_probability, + noise_multiplier=FLAGS.noise_multiplier, + steps=steps, + orders=orders) + # Delta is set to approximate 1 / (number of training points). + return get_privacy_spent(orders, rdp, target_delta=1e-5)[0] + + +def cnn_model_fn(features, labels, mode): + """Model function for a CNN.""" + + # Define CNN architecture using tf.keras.layers. + input_layer = tf.reshape(features['x'], [-1, 28, 28, 1]) + y = tf.keras.layers.Conv2D(16, 8, + strides=2, + padding='same', + activation='relu').apply(input_layer) + y = tf.keras.layers.MaxPool2D(2, 1).apply(y) + y = tf.keras.layers.Conv2D(32, 4, + strides=2, + padding='valid', + activation='relu').apply(y) + y = tf.keras.layers.MaxPool2D(2, 1).apply(y) + y = tf.keras.layers.Flatten().apply(y) + y = tf.keras.layers.Dense(32, activation='relu').apply(y) + logits = tf.keras.layers.Dense(10).apply(y) + + # Calculate loss as a vector (to support microbatches in DP-SGD). + vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=labels, logits=logits) + # Define mean of loss across minibatch (for reporting through tf.Estimator). + scalar_loss = tf.reduce_mean(vector_loss) + + # Configure the training op (for TRAIN mode). + if mode == tf.estimator.ModeKeys.TRAIN: + + if FLAGS.dpsgd: + # Use DP version of GradientDescentOptimizer. Other optimizers are + # available in dp_optimizer. Most optimizers inheriting from + # tf.train.Optimizer should be wrappable in differentially private + # counterparts by calling dp_optimizer.optimizer_from_args(). + optimizer = dp_optimizer_vectorized.VectorizedDPSGD( + l2_norm_clip=FLAGS.l2_norm_clip, + noise_multiplier=FLAGS.noise_multiplier, + num_microbatches=FLAGS.microbatches, + learning_rate=FLAGS.learning_rate) + opt_loss = vector_loss + else: + optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) + opt_loss = scalar_loss + global_step = tf.train.get_global_step() + train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) + # In the following, we pass the mean of the loss (scalar_loss) rather than + # the vector_loss because tf.estimator requires a scalar loss. This is only + # used for evaluation and debugging by tf.estimator. The actual loss being + # minimized is opt_loss defined above and passed to optimizer.minimize(). + return tf.estimator.EstimatorSpec(mode=mode, + loss=scalar_loss, + train_op=train_op) + + # Add evaluation metrics (for EVAL mode). + elif mode == tf.estimator.ModeKeys.EVAL: + eval_metric_ops = { + 'accuracy': + tf.metrics.accuracy( + labels=labels, + predictions=tf.argmax(input=logits, axis=1)) + } + + return tf.estimator.EstimatorSpec(mode=mode, + loss=scalar_loss, + eval_metric_ops=eval_metric_ops) + + +def load_mnist(): + """Loads MNIST and preprocesses to combine training and validation data.""" + train, test = tf.keras.datasets.mnist.load_data() + train_data, train_labels = train + test_data, test_labels = test + + train_data = np.array(train_data, dtype=np.float32) / 255 + test_data = np.array(test_data, dtype=np.float32) / 255 + + train_labels = np.array(train_labels, dtype=np.int32) + test_labels = np.array(test_labels, dtype=np.int32) + + assert train_data.min() == 0. + assert train_data.max() == 1. + assert test_data.min() == 0. + assert test_data.max() == 1. + assert train_labels.ndim == 1 + assert test_labels.ndim == 1 + + return train_data, train_labels, test_data, test_labels + + +def main(unused_argv): + tf.logging.set_verbosity(tf.logging.INFO) + if FLAGS.dpsgd and FLAGS.batch_size % FLAGS.microbatches != 0: + raise ValueError('Number of microbatches should divide evenly batch_size') + + # Load training and test data. + train_data, train_labels, test_data, test_labels = load_mnist() + + # Instantiate the tf.Estimator. + mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, + model_dir=FLAGS.model_dir) + + # Create tf.Estimator input functions for the training and test data. + train_input_fn = tf.estimator.inputs.numpy_input_fn( + x={'x': train_data}, + y=train_labels, + batch_size=FLAGS.batch_size, + num_epochs=FLAGS.epochs, + shuffle=True) + eval_input_fn = tf.estimator.inputs.numpy_input_fn( + x={'x': test_data}, + y=test_labels, + num_epochs=1, + shuffle=False) + + # Training loop. + steps_per_epoch = NUM_TRAIN_EXAMPLES // FLAGS.batch_size + for epoch in range(1, FLAGS.epochs + 1): + # Train the model for one epoch. + mnist_classifier.train(input_fn=train_input_fn, steps=steps_per_epoch) + + # Evaluate the model and print results + eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn) + test_accuracy = eval_results['accuracy'] + print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy)) + + # Compute the privacy budget expended. + if FLAGS.dpsgd: + eps = compute_epsilon(epoch * NUM_TRAIN_EXAMPLES // FLAGS.batch_size) + print('For delta=1e-5, the current epsilon is: %.2f' % eps) + else: + print('Trained with vanilla non-private SGD optimizer') + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow_privacy/tutorials/mnist_lr_tutorial.py b/tensorflow_privacy/tutorials/mnist_lr_tutorial.py new file mode 100644 index 0000000..c8bbf04 --- /dev/null +++ b/tensorflow_privacy/tutorials/mnist_lr_tutorial.py @@ -0,0 +1,250 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""DP Logistic Regression on MNIST. + +DP Logistic Regression on MNIST with support for privacy-by-iteration analysis. +Vitaly Feldman, Ilya Mironov, Kunal Talwar, and Abhradeep Thakurta. +"Privacy amplification by iteration." +In 2018 IEEE 59th Annual Symposium on Foundations of Computer Science (FOCS), +pp. 521-532. IEEE, 2018. +https://arxiv.org/abs/1808.06651. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +from absl import app +from absl import flags + +from distutils.version import LooseVersion + +import numpy as np +import tensorflow as tf + +from tensorflow_privacy.privacy.analysis.rdp_accountant import compute_rdp +from tensorflow_privacy.privacy.analysis.rdp_accountant import get_privacy_spent +from tensorflow_privacy.privacy.optimizers import dp_optimizer + +if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): + GradientDescentOptimizer = tf.train.GradientDescentOptimizer +else: + GradientDescentOptimizer = tf.optimizers.SGD # pylint: disable=invalid-name + +FLAGS = flags.FLAGS + +flags.DEFINE_boolean( + 'dpsgd', True, 'If True, train with DP-SGD. If False, ' + 'train with vanilla SGD.') +flags.DEFINE_float('learning_rate', 0.001, 'Learning rate for training') +flags.DEFINE_float('noise_multiplier', 0.05, + 'Ratio of the standard deviation to the clipping norm') +flags.DEFINE_integer('batch_size', 5, 'Batch size') +flags.DEFINE_integer('epochs', 5, 'Number of epochs') +flags.DEFINE_float('regularizer', 0, 'L2 regularizer coefficient') +flags.DEFINE_string('model_dir', None, 'Model directory') +flags.DEFINE_float('data_l2_norm', 8, 'Bound on the L2 norm of normalized data') + + +def lr_model_fn(features, labels, mode, nclasses, dim): + """Model function for logistic regression.""" + input_layer = tf.reshape(features['x'], tuple([-1]) + dim) + + logits = tf.layers.dense( + inputs=input_layer, + units=nclasses, + kernel_regularizer=tf.contrib.layers.l2_regularizer( + scale=FLAGS.regularizer), + bias_regularizer=tf.contrib.layers.l2_regularizer( + scale=FLAGS.regularizer)) + + # Calculate loss as a vector (to support microbatches in DP-SGD). + vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=labels, logits=logits) + tf.losses.get_regularization_loss() + # Define mean of loss across minibatch (for reporting through tf.Estimator). + scalar_loss = tf.reduce_mean(vector_loss) + + # Configure the training op (for TRAIN mode). + if mode == tf.estimator.ModeKeys.TRAIN: + if FLAGS.dpsgd: + # The loss function is L-Lipschitz with L = sqrt(2*(||x||^2 + 1)) where + # ||x|| is the norm of the data. + # We don't use microbatches (thus speeding up computation), since no + # clipping is necessary due to data normalization. + optimizer = dp_optimizer.DPGradientDescentGaussianOptimizer( + l2_norm_clip=math.sqrt(2 * (FLAGS.data_l2_norm**2 + 1)), + noise_multiplier=FLAGS.noise_multiplier, + num_microbatches=1, + learning_rate=FLAGS.learning_rate) + opt_loss = vector_loss + else: + optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) + opt_loss = scalar_loss + global_step = tf.train.get_global_step() + train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) + # In the following, we pass the mean of the loss (scalar_loss) rather than + # the vector_loss because tf.estimator requires a scalar loss. This is only + # used for evaluation and debugging by tf.estimator. The actual loss being + # minimized is opt_loss defined above and passed to optimizer.minimize(). + return tf.estimator.EstimatorSpec( + mode=mode, loss=scalar_loss, train_op=train_op) + + # Add evaluation metrics (for EVAL mode). + elif mode == tf.estimator.ModeKeys.EVAL: + eval_metric_ops = { + 'accuracy': + tf.metrics.accuracy( + labels=labels, predictions=tf.argmax(input=logits, axis=1)) + } + return tf.estimator.EstimatorSpec( + mode=mode, loss=scalar_loss, eval_metric_ops=eval_metric_ops) + + +def normalize_data(data, data_l2_norm): + """Normalizes data such that each samples has bounded L2 norm. + + Args: + data: the dataset. Each row represents one samples. + data_l2_norm: the target upper bound on the L2 norm. + """ + + for i in range(data.shape[0]): + norm = np.linalg.norm(data[i]) + if norm > data_l2_norm: + data[i] = data[i] / norm * data_l2_norm + + +def load_mnist(data_l2_norm=float('inf')): + """Loads MNIST and preprocesses to combine training and validation data.""" + train, test = tf.keras.datasets.mnist.load_data() + train_data, train_labels = train + test_data, test_labels = test + + train_data = np.array(train_data, dtype=np.float32) / 255 + test_data = np.array(test_data, dtype=np.float32) / 255 + + train_data = train_data.reshape(train_data.shape[0], -1) + test_data = test_data.reshape(test_data.shape[0], -1) + + idx = np.random.permutation(len(train_data)) # shuffle data once + train_data = train_data[idx] + train_labels = train_labels[idx] + + normalize_data(train_data, data_l2_norm) + normalize_data(test_data, data_l2_norm) + + train_labels = np.array(train_labels, dtype=np.int32) + test_labels = np.array(test_labels, dtype=np.int32) + + return train_data, train_labels, test_data, test_labels + + +def print_privacy_guarantees(epochs, batch_size, samples, noise_multiplier): + """Tabulating position-dependent privacy guarantees.""" + if noise_multiplier == 0: + print('No differential privacy (additive noise is 0).') + return + + print('In the conditions of Theorem 34 (https://arxiv.org/abs/1808.06651) ' + 'the training procedure results in the following privacy guarantees.') + + print('Out of the total of {} samples:'.format(samples)) + + steps_per_epoch = samples // batch_size + orders = np.concatenate( + [np.linspace(2, 20, num=181), + np.linspace(20, 100, num=81)]) + delta = 1e-5 + for p in (.5, .9, .99): + steps = math.ceil(steps_per_epoch * p) # Steps in the last epoch. + coef = 2 * (noise_multiplier * batch_size)**-2 * ( + # Accounting for privacy loss + (epochs - 1) / steps_per_epoch + # ... from all-but-last epochs + 1 / (steps_per_epoch - steps + 1)) # ... due to the last epoch + # Using RDP accountant to compute eps. Doing computation analytically is + # an option. + rdp = [order * coef for order in orders] + eps, _, _ = get_privacy_spent(orders, rdp, target_delta=delta) + print('\t{:g}% enjoy at least ({:.2f}, {})-DP'.format( + p * 100, eps, delta)) + + # Compute privacy guarantees for the Sampled Gaussian Mechanism. + rdp_sgm = compute_rdp(batch_size / samples, noise_multiplier, + epochs * steps_per_epoch, orders) + eps_sgm, _, _ = get_privacy_spent(orders, rdp_sgm, target_delta=delta) + print('By comparison, DP-SGD analysis for training done with the same ' + 'parameters and random shuffling in each epoch guarantees ' + '({:.2f}, {})-DP for all samples.'.format(eps_sgm, delta)) + + +def main(unused_argv): + tf.logging.set_verbosity(tf.logging.INFO) + if FLAGS.data_l2_norm <= 0: + raise ValueError('data_l2_norm must be positive.') + if FLAGS.dpsgd and FLAGS.learning_rate > 8 / FLAGS.data_l2_norm**2: + raise ValueError('The amplification-by-iteration analysis requires' + 'learning_rate <= 2 / beta, where beta is the smoothness' + 'of the loss function and is upper bounded by ||x||^2 / 4' + 'with ||x|| being the largest L2 norm of the samples.') + + # Load training and test data. + # Smoothness = ||x||^2 / 4 where ||x|| is the largest L2 norm of the samples. + # To get bounded smoothness, we normalize the data such that each sample has a + # bounded L2 norm. + train_data, train_labels, test_data, test_labels = load_mnist( + data_l2_norm=FLAGS.data_l2_norm) + + # Instantiate tf.Estimator. + # pylint: disable=g-long-lambda + model_fn = lambda features, labels, mode: lr_model_fn( + features, labels, mode, nclasses=10, dim=train_data.shape[1:]) + mnist_classifier = tf.estimator.Estimator( + model_fn=model_fn, model_dir=FLAGS.model_dir) + + # Create tf.Estimator input functions for the training and test data. + # To analyze the per-user privacy loss, we keep the same orders of samples in + # each epoch by setting shuffle=False. + train_input_fn = tf.estimator.inputs.numpy_input_fn( + x={'x': train_data}, + y=train_labels, + batch_size=FLAGS.batch_size, + num_epochs=FLAGS.epochs, + shuffle=False) + eval_input_fn = tf.estimator.inputs.numpy_input_fn( + x={'x': test_data}, y=test_labels, num_epochs=1, shuffle=False) + + # Train the model. + num_samples = train_data.shape[0] + steps_per_epoch = num_samples // FLAGS.batch_size + + mnist_classifier.train( + input_fn=train_input_fn, steps=steps_per_epoch * FLAGS.epochs) + + # Evaluate the model and print results. + eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn) + print('Test accuracy after {} epochs is: {:.2f}'.format( + FLAGS.epochs, eval_results['accuracy'])) + + if FLAGS.dpsgd: + print_privacy_guarantees( + epochs=FLAGS.epochs, + batch_size=FLAGS.batch_size, + samples=num_samples, + noise_multiplier=FLAGS.noise_multiplier, + ) + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow_privacy/tutorials/walkthrough/mnist_scratch.py b/tensorflow_privacy/tutorials/walkthrough/mnist_scratch.py new file mode 100644 index 0000000..aa74ea6 --- /dev/null +++ b/tensorflow_privacy/tutorials/walkthrough/mnist_scratch.py @@ -0,0 +1,134 @@ +# Copyright 2019, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Scratchpad for training a CNN on MNIST with DPSGD.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import tensorflow as tf + +tf.flags.DEFINE_float('learning_rate', .15, 'Learning rate for training') +tf.flags.DEFINE_integer('batch_size', 256, 'Batch size') +tf.flags.DEFINE_integer('epochs', 15, 'Number of epochs') + +FLAGS = tf.flags.FLAGS + + +def cnn_model_fn(features, labels, mode): + """Model function for a CNN.""" + + # Define CNN architecture using tf.keras.layers. + input_layer = tf.reshape(features['x'], [-1, 28, 28, 1]) + y = tf.keras.layers.Conv2D(16, 8, + strides=2, + padding='same', + activation='relu').apply(input_layer) + y = tf.keras.layers.MaxPool2D(2, 1).apply(y) + y = tf.keras.layers.Conv2D(32, 4, + strides=2, + padding='valid', + activation='relu').apply(y) + y = tf.keras.layers.MaxPool2D(2, 1).apply(y) + y = tf.keras.layers.Flatten().apply(y) + y = tf.keras.layers.Dense(32, activation='relu').apply(y) + logits = tf.keras.layers.Dense(10).apply(y) + + # Calculate loss as a vector and as its average across minibatch. + vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, + logits=logits) + scalar_loss = tf.reduce_mean(vector_loss) + + # Configure the training op (for TRAIN mode). + if mode == tf.estimator.ModeKeys.TRAIN: + optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) + opt_loss = scalar_loss + global_step = tf.train.get_global_step() + train_op = optimizer.minimize(loss=opt_loss, global_step=global_step) + return tf.estimator.EstimatorSpec(mode=mode, + loss=scalar_loss, + train_op=train_op) + + # Add evaluation metrics (for EVAL mode). + elif mode == tf.estimator.ModeKeys.EVAL: + eval_metric_ops = { + 'accuracy': + tf.metrics.accuracy( + labels=labels, + predictions=tf.argmax(input=logits, axis=1)) + } + return tf.estimator.EstimatorSpec(mode=mode, + loss=scalar_loss, + eval_metric_ops=eval_metric_ops) + + +def load_mnist(): + """Loads MNIST and preprocesses to combine training and validation data.""" + train, test = tf.keras.datasets.mnist.load_data() + train_data, train_labels = train + test_data, test_labels = test + + train_data = np.array(train_data, dtype=np.float32) / 255 + test_data = np.array(test_data, dtype=np.float32) / 255 + + train_labels = np.array(train_labels, dtype=np.int32) + test_labels = np.array(test_labels, dtype=np.int32) + + assert train_data.min() == 0. + assert train_data.max() == 1. + assert test_data.min() == 0. + assert test_data.max() == 1. + assert train_labels.ndim == 1 + assert test_labels.ndim == 1 + + return train_data, train_labels, test_data, test_labels + + +def main(unused_argv): + tf.logging.set_verbosity(tf.logging.INFO) + + # Load training and test data. + train_data, train_labels, test_data, test_labels = load_mnist() + + # Instantiate the tf.Estimator. + mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn) + + # Create tf.Estimator input functions for the training and test data. + train_input_fn = tf.estimator.inputs.numpy_input_fn( + x={'x': train_data}, + y=train_labels, + batch_size=FLAGS.batch_size, + num_epochs=FLAGS.epochs, + shuffle=True) + eval_input_fn = tf.estimator.inputs.numpy_input_fn( + x={'x': test_data}, + y=test_labels, + num_epochs=1, + shuffle=False) + + # Training loop. + steps_per_epoch = 60000 // FLAGS.batch_size + for epoch in range(1, FLAGS.epochs + 1): + # Train the model for one epoch. + mnist_classifier.train(input_fn=train_input_fn, steps=steps_per_epoch) + + # Evaluate the model and print results + eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn) + test_accuracy = eval_results['accuracy'] + print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy)) + +if __name__ == '__main__': + tf.app.run() diff --git a/tensorflow_privacy/tutorials/walkthrough/walkthrough.md b/tensorflow_privacy/tutorials/walkthrough/walkthrough.md new file mode 100644 index 0000000..20f3f8a --- /dev/null +++ b/tensorflow_privacy/tutorials/walkthrough/walkthrough.md @@ -0,0 +1,431 @@ +# Machine Learning with Differential Privacy in TensorFlow + +*Cross-posted from [cleverhans.io](http://www.cleverhans.io/privacy/2019/03/26/machine-learning-with-differential-privacy-in-tensorflow.html)* + +Differential privacy is a framework for measuring the privacy guarantees +provided by an algorithm. Through the lens of differential privacy, we can +design machine learning algorithms that responsibly train models on private +data. Learning with differential privacy provides provable guarantees of +privacy, mitigating the risk of exposing sensitive training data in machine +learning. Intuitively, a model trained with differential privacy should not be +affected by any single training example, or small set of training examples, in its data set. + +You may recall our [previous blog post on PATE](http://www.cleverhans.io/privacy/2018/04/29/privacy-and-machine-learning.html), +an approach that achieves private learning by carefully +coordinating the activity of several different ML +models [[Papernot et al.]](https://arxiv.org/abs/1610.05755). +In this post, you will learn how to train a differentially private model with +another approach that relies on Differentially +Private Stochastic Gradient Descent (DP-SGD) [[Abadi et al.]](https://arxiv.org/abs/1607.00133). +DP-SGD and PATE are two different ways to achieve the same goal of privacy-preserving +machine learning. DP-SGD makes less assumptions about the ML task than PATE, +but this comes at the expense of making modifications to the training algorithm. + +Indeed, DP-SGD is +a modification of the stochastic gradient descent algorithm, +which is the basis for many optimizers that are popular in machine learning. +Models trained with DP-SGD have provable privacy guarantees expressed in terms +of differential privacy (we will explain what this means at the end of this +post). We will be using the [TensorFlow Privacy](https://github.com/tensorflow/privacy) library, +which provides an implementation of DP-SGD, to illustrate our presentation of DP-SGD +and provide a hands-on tutorial. + +The only prerequisite for following this tutorial is to be able to train a +simple neural network with TensorFlow. If you are not familiar with +convolutional neural networks or how to train them, we recommend reading +[this tutorial first](https://www.tensorflow.org/tutorials/keras/basic_classification) +to get started with TensorFlow and machine learning. + +Upon completing the tutorial presented in this post, +you will be able to wrap existing optimizers +(e.g., SGD, Adam, ...) into their differentially private counterparts using +TensorFlow (TF) Privacy. You will also learn how to tune the parameters +introduced by differentially private optimization. Finally, we will learn how to +measure the privacy guarantees provided using analysis tools included in TF +Privacy. + +## Getting started + +Before we get started with DP-SGD and TF Privacy, we need to put together a +script that trains a simple neural network with TensorFlow. + +In the interest of keeping this tutorial focused on the privacy aspects of +training, we've included +such a script as companion code for this blog post in the `walkthrough` [subdirectory](https://github.com/tensorflow/privacy/tree/master/tutorials/walkthrough) of the +`tutorials` found in the [TensorFlow Privacy](https://github.com/tensorflow/privacy) repository. The code found in the file `mnist_scratch.py` +trains a small +convolutional neural network on the MNIST dataset for handwriting recognition. +This script will be used as the basis for our exercise below. + +Next, we highlight some important code snippets from the `mnist_scratch.py` +script. + +The first snippet includes the definition of a convolutional neural network +using `tf.keras.layers`. The model contains two convolutional layers coupled +with max pooling layers, a fully-connected layer, and a softmax. The model's +output is a vector where each component indicates how likely the input is to be +in one of the 10 classes of the handwriting recognition problem we considered. +If any of this sounds unfamiliar, we recommend reading +[this tutorial first](https://www.tensorflow.org/tutorials/keras/basic_classification) +to get started with TensorFlow and machine learning. + +```python +input_layer = tf.reshape(features['x'], [-1, 28, 28, 1]) +y = tf.keras.layers.Conv2D(16, 8, + strides=2, + padding='same', + activation='relu').apply(input_layer) +y = tf.keras.layers.MaxPool2D(2, 1).apply(y) +y = tf.keras.layers.Conv2D(32, 4, + strides=2, + padding='valid', + activation='relu').apply(y) +y = tf.keras.layers.MaxPool2D(2, 1).apply(y) +y = tf.keras.layers.Flatten().apply(y) +y = tf.keras.layers.Dense(32, activation='relu').apply(y) +logits = tf.keras.layers.Dense(10).apply(y) +predicted_labels = tf.argmax(input=logits, axis=1) +``` + +The second snippet shows how the model is trained using the `tf.Estimator` API, +which takes care of all the boilerplate code required to form minibatches used +to train and evaluate the model. To prepare ourselves for the modifications we +will be making to provide differential privacy, we still expose the loop over +different epochs of learning: an epoch is defined as one pass over all of the +training points included in the training set. + +```python +steps_per_epoch = 60000 // FLAGS.batch_size +for epoch in range(1, FLAGS.epochs + 1): + # Train the model for one epoch. + mnist_classifier.train(input_fn=train_input_fn, steps=steps_per_epoch) + + # Evaluate the model and print results + eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn) + test_accuracy = eval_results['accuracy'] + print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy)) +``` + +We are now ready to train our MNIST model without privacy. The model should +achieve above 99% test accuracy after 15 epochs at a learning rate of 0.15 on +minibatches of 256 training points. + +```shell +python mnist_scratch.py +``` + +### Stochastic Gradient Descent + +Before we dive into how DP-SGD and TF Privacy can be used to provide differential privacy +during machine learning, we first provide a brief overview of the stochastic +gradient descent algorithm, which is one of the most popular optimizers for +neural networks. + +Stochastic gradient descent is an iterative procedure. At each iteration, a +batch of data is randomly sampled from the training set (this is where the +*stochasticity* comes from). The error between the model's prediction and the +training labels is then computed. This error, also called the loss, is then +differentiated with respect to the model's parameters. These derivatives (or +*gradients*) tell us how we should update each parameter to bring the model +closer to predicting the correct label. Iteratively recomputing gradients and +applying them to update the model's parameters is what is referred to as the +*descent*. To summarize, the following steps are repeated until the model's +performance is satisfactory: + +1. Sample a minibatch of training points `(x, y)` where `x` is an input and `y` + a label. + +2. Compute loss (i.e., error) `L(theta, x, y)` between the model's prediction + `f_theta(x)` and label `y` where `theta` represents the model parameters. + +3. Compute gradient of the loss `L(theta, x, y)` with respect to the model + parameters `theta`. + +4. Multiply these gradients by the learning rate and apply the product to + update model parameters `theta`. + +### Modifications needed to make stochastic gradient descent a differentially private algorithm + +Two modifications are needed to ensure that stochastic gradient descent is a +differentially private algorithm. + +First, the sensitivity of each gradient needs to be bounded. In other words, we +need to limit how much each individual training point sampled in a minibatch can +influence the resulting gradient computation. This can be done by clipping each +gradient computed on each training point between steps 3 and 4 above. +Intuitively, this allows us to bound how much each training point can possibly +impact model parameters. + +Second, we need to randomize the algorithm's behavior to make it statistically +impossible to know whether or not a particular point was included in the +training set by comparing the updates stochastic gradient descent applies when +it operates with or without this particular point in the training set. This is +achieved by sampling random noise and adding it to the clipped gradients. + +Thus, here is the stochastic gradient descent algorithm adapted from above to be +differentially private: + +1. Sample a minibatch of training points `(x, y)` where `x` is an input and `y` + a label. + +2. Compute loss (i.e., error) `L(theta, x, y)` between the model's prediction + `f_theta(x)` and label `y` where `theta` represents the model parameters. + +3. Compute gradient of the loss `L(theta, x, y)` with respect to the model + parameters `theta`. + +4. Clip gradients, per training example included in the minibatch, to ensure + each gradient has a known maximum Euclidean norm. + +5. Add random noise to the clipped gradients. + +6. Multiply these clipped and noised gradients by the learning rate and apply + the product to update model parameters `theta`. + +### Implementing DP-SGD with TF Privacy + +It's now time to make changes to the code we started with to take into account +the two modifications outlined in the previous paragraph: gradient clipping and +noising. This is where TF Privacy kicks in: it provides code that wraps an +existing TF optimizer to create a variant that performs both of these steps +needed to obtain differential privacy. + +As mentioned above, step 1 of the algorithm, that is forming minibatches of +training data and labels, is implemented by the `tf.Estimator` API in our +tutorial. We can thus go straight to step 2 of the algorithm outlined above and +compute the loss (i.e., model error) between the model's predictions and labels. + +```python +vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=labels, logits=logits) +``` + +TensorFlow provides implementations of common losses, here we use the +cross-entropy, which is well-suited for our classification problem. Note how we +computed the loss as a vector, where each component of the vector corresponds to +an individual training point and label. This is required to support per example +gradient manipulation later at step 4. + +We are now ready to create an optimizer. In TensorFlow, an optimizer object can +be instantiated by passing it a learning rate value, which is used in step 6 +outlined above. +This is what the code would look like *without* differential privacy: + +```python +optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) +train_op = optimizer.minimize(loss=scalar_loss) +``` + +Note that our code snippet assumes that a TensorFlow flag was +defined for the learning rate value. + +Now, we use the `optimizers.dp_optimizer` module of TF Privacy to implement the +optimizer with differential privacy. Under the hood, this code implements steps +3-6 of the algorithm above: + +```python +optimizer = optimizers.dp_optimizer.DPGradientDescentGaussianOptimizer( + l2_norm_clip=FLAGS.l2_norm_clip, + noise_multiplier=FLAGS.noise_multiplier, + num_microbatches=FLAGS.microbatches, + learning_rate=FLAGS.learning_rate, + population_size=60000) +train_op = optimizer.minimize(loss=vector_loss) +``` + +In these two code snippets, we used the stochastic gradient descent +optimizer but it could be replaced by another optimizer implemented in +TensorFlow. For instance, the `AdamOptimizer` can be replaced by `DPAdamGaussianOptimizer`. In addition to the standard optimizers already +included in TF Privacy, most optimizers which are objects from a child class +of `tf.train.Optimizer` +can be made differentially private by calling `optimizers.dp_optimizer.make_gaussian_optimizer_class()`. + +As you can see, only one line needs to change but there are a few things going +on that are best to unwrap before we continue. In addition to the learning rate, we +passed the size of the training set as the `population_size` parameter. This is +used to measure the strength of privacy achieved; we will come back to this +accounting aspect later. + +More importantly, TF Privacy introduces three new hyperparameters to the +optimizer object: `l2_norm_clip`, `noise_multiplier`, and `num_microbatches`. +You may have deduced what `l2_norm_clip` and `noise_multiplier` are from the two +changes outlined above. + +Parameter `l2_norm_clip` is the maximum Euclidean norm of each individual +gradient that is computed on an individual training example from a minibatch. This +parameter is used to bound the optimizer's sensitivity to individual training +points. Note how in order for the optimizer to be able to compute these per +example gradients, we must pass it a *vector* loss as defined previously, rather +than the loss averaged over the entire minibatch. + +Next, the `noise_multiplier` parameter is used to control how much noise is +sampled and added to gradients before they are applied by the optimizer. +Generally, more noise results in better privacy (often, but not necessarily, at +the expense of lower utility). + +The third parameter relates to an aspect of DP-SGD that was not discussed +previously. In practice, clipping gradients on a per example basis can be +detrimental to the performance of our approach because computations can no +longer be batched and parallelized at the granularity of minibatches. Hence, we +introduce a new granularity by splitting each minibatch into multiple +microbatches [[McMahan et al.]](https://arxiv.org/abs/1812.06210). Rather than +clipping gradients on a per example basis, we clip them on a microbatch basis. +For instance, if we have a minibatch of 256 training examples, rather than +clipping each of the 256 gradients individually, we would clip 32 gradients +averaged over microbatches of 8 training examples when `num_microbatches=32`. +This allows for some degree of parallelism. Hence, one can think of +`num_microbatches` as a parameter that allows us to trade off performance (when +the parameter is set to a small value) with utility (when the parameter is set +to a value close to the minibatch size). + +Once you've implemented all these changes, try training your model again with +the differentially private stochastic gradient optimizer. You can use the +following hyperparameter values to obtain a reasonable model (95% test +accuracy): + +```python +learning_rate=0.25 +noise_multiplier=1.3 +l2_norm_clip=1.5 +batch_size=256 +epochs=15 +num_microbatches=256 +``` + +### Measuring the privacy guarantee achieved + +At this point, we made all the changes needed to train our model with +differential privacy. Congratulations! Yet, we are still missing one crucial +piece of the puzzle: we have not computed the privacy guarantee achieved. Recall +the two modifications we made to the original stochastic gradient descent +algorithm: clip and randomize gradients. + +It is intuitive to machine learning practitioners how clipping gradients limits +the ability of the model to overfit to any of its training points. In fact, +gradient clipping is commonly employed in machine learning even when privacy is +not a concern. The intuition for introducing randomness to a learning algorithm +that is already randomized is a little more subtle but this additional +randomization is required to make it hard to tell which behavioral aspects of +the model defined by the learned parameters came from randomness and which came +from the training data. Without randomness, we would be able to ask questions +like: “What parameters does the learning algorithm choose when we train it on +this specific dataset?” With randomness in the learning algorithm, we instead +ask questions like: “What is the probability that the learning algorithm will +choose parameters in this set of possible parameters, when we train it on this +specific dataset?” + +We use a version of differential privacy which requires that the probability of +learning any particular set of parameters stays roughly the same if we change a +single training example in the training set. This could mean to add a training +example, remove a training example, or change the values within one training +example. The intuition is that if a single training point does not affect the +outcome of learning, the information contained in that training point cannot be +memorized and the privacy of the individual who contributed this data point to our +dataset is respected. We often refer to this probability as the privacy budget: +smaller privacy budgets correspond to stronger privacy guarantees. + +Accounting required to compute the privacy budget spent to train our machine +learning model is another feature provided by TF Privacy. Knowing what level of +differential privacy was achieved allows us to put into perspective the drop in +utility that is often observed when switching to differentially private +optimization. It also allows us to compare two models objectively to determine +which of the two is more privacy-preserving than the other. + +Before we derive a bound on the privacy guarantee achieved by our optimizer, we +first need to identify all the parameters that are relevant to measuring the +potential privacy loss induced by training. These are the `noise_multiplier`, +the sampling ratio `q` (the probability of an individual training point being +included in a minibatch), and the number of `steps` the optimizer takes over the +training data. We simply report the `noise_multiplier` value provided to the +optimizer and compute the sampling ratio and number of steps as follows: + +```python +noise_multiplier = FLAGS.noise_multiplier +sampling_probability = FLAGS.batch_size / 60000 +steps = FLAGS.epochs * 60000 // FLAGS.batch_size +``` + +At a high level, the privacy analysis measures how including or excluding any +particular point in the training data is likely to change the probability that +we learn any particular set of parameters. In other words, the analysis measures +the difference between the distributions of model parameters on neighboring training +sets (pairs of any training sets with a Hamming distance of 1). In TF Privacy, +we use the Rényi divergence to measure this distance between distributions. +Indeed, our analysis is performed in the framework of Rényi Differential Privacy +(RDP), which is a generalization of pure differential privacy +[[Mironov]](https://arxiv.org/abs/1702.07476). RDP is a useful tool here because +it is particularly well suited to analyze the differential privacy guarantees +provided by sampling followed by Gaussian noise addition, which is how gradients +are randomized in the TF Privacy implementation of the DP-SGD optimizer. + +We will express our differential privacy guarantee using two parameters: +`epsilon` and `delta`. + +* Delta bounds the probability of our privacy guarantee not holding. A rule of + thumb is to set it to be less than the inverse of the training data size + (i.e., the population size). Here, we set it to `10^-5` because MNIST has + 60000 training points. + +* Epsilon measures the strength of our privacy guarantee. In the case of + differentially private machine learning, it gives a bound on how much the + probability of a particular model output can vary by including (or removing) + a single training example. We usually want it to be a small constant. + However, this is only an upper bound, and a large value of epsilon could + still mean good practical privacy. + +The TF Privacy library provides two methods relevant to derive privacy +guarantees achieved from the three parameters outlined in the last code snippet: `compute_rdp` +and `get_privacy_spent`. +These methods are found in its `analysis.rdp_accountant` module. Here is how to use them. + +First, we need to define a list of orders, at which the Rényi divergence will be +computed. While some finer points of how to use the RDP accountant are outside the +scope of this document, it is useful to keep in mind the following. +First, there is very little downside in expanding the list of orders for which RDP +is computed. Second, the computed privacy budget is typically not very sensitive to +the exact value of the order (being close enough will land you in the right neighborhood). +Finally, if you are targeting a particular range of epsilons (say, 1—10) and your delta is +fixed (say, `10^-5`), then your orders must cover the range between `1+ln(1/delta)/10≈2.15` and +`1+ln(1/delta)/1≈12.5`. This last rule may appear circular (how do you know what privacy +parameters you get without running the privacy accountant?!), one or two adjustments +of the range of the orders would usually suffice. + +```python +orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) +rdp = compute_rdp(q=sampling_probability, + noise_multiplier=FLAGS.noise_multiplier, + steps=steps, + orders=orders) +``` + +Then, the method `get_privacy_spent` computes the best `epsilon` for a given +`target_delta` value of delta by taking the minimum over all orders. + +```python +epsilon = get_privacy_spent(orders, rdp, target_delta=1e-5)[0] +``` + +Running the code snippets above with the hyperparameter values used during +training will estimate the `epsilon` value that was achieved by the +differentially private optimizer, and thus the strength of the privacy guarantee +which comes with the model we trained. Once we computed the value of `epsilon`, +interpreting this value is at times +difficult. One possibility is to purposely +insert secrets in the model's training set and measure how likely +they are to be leaked by a differentially private model +(compared to a non-private model) at inference time +[[Carlini et al.]](https://arxiv.org/abs/1802.08232). + +### Putting all the pieces together + +We covered a lot in this blog post! If you made all the changes discussed +directly into the `mnist_scratch.py` file, you should have been able to train a +differentially private neural network on MNIST and measure the privacy guarantee +achieved. + +However, in case you ran into an issue or you'd like to see what a complete +implementation looks like, the "solution" to the tutorial presented in this blog +post can be [found](https://github.com/tensorflow/privacy/blob/master/tutorials/mnist_dpsgd_tutorial.py) in the +tutorials directory of TF Privacy. It is the script called `mnist_dpsgd_tutorial.py`. + +