diff --git a/tensorflow_privacy/privacy/analysis/BUILD b/tensorflow_privacy/privacy/analysis/BUILD index 63abc69..fa64c94 100644 --- a/tensorflow_privacy/privacy/analysis/BUILD +++ b/tensorflow_privacy/privacy/analysis/BUILD @@ -51,6 +51,12 @@ py_test( deps = [":compute_noise_from_budget_lib"], ) +py_library( + name = "dp_event", + srcs = ["dp_event.py"], + srcs_version = "PY3", +) + py_library( name = "gdp_accountant", srcs = ["gdp_accountant.py"], diff --git a/tensorflow_privacy/privacy/analysis/dp_event.py b/tensorflow_privacy/privacy/analysis/dp_event.py new file mode 100644 index 0000000..5d37b43 --- /dev/null +++ b/tensorflow_privacy/privacy/analysis/dp_event.py @@ -0,0 +1,214 @@ +# Copyright 2021, The TensorFlow Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Standard DpEvent classes. + +A `DpEvent` represents the (hyper)parameters of a differentially +private query, amplification mechanism, or composition, that are necessary +and sufficient for privacy accounting. Various independent implementations of DP +algorithms that are functionally equivalent from an accounting perspective may +correspond to the same `DpEvent`. Similarly, various independent implementations +of accounting algorithms may consume the same `DpEvent`. + +All `DpEvents` processed together are assumed to take place on a single dataset +of records. `DpEvents` fall into roughly three categories: + - `DpEvents` that release an output, and incur a privacy cost, + e.g., `GaussianDpEvent`. + - `DpEvents` that select a subset (or subsets) of the dataset, and run nested + `DpEvents` on those subsets, e.g., `PoissonSampledDpEvent`. + - `DpEvents` that represent (possibly sequentially) applying (multiple) + mechanisms to the dataset (or currently active subset). Currently, this is + only `ComposedDpEvent` and `SelfComposedDpEvent`. + +Each `DpEvent` should completely document the mathematical behavior and +assumptions of the mechanism it represents so that the writer of an accountant +class can implement the accounting correctly without knowing any other +implementation details of the algorithm that produced it. + +New mechanism types should be given a corresponding `DpEvent` class, although +not all accountants will be required to support them. In general, +`PrivacyAccountant` implementations are not required to be aware of all +`DpEvent` classes, but they should support the following basic events and handle +them appropriately: `NoOpDpEvent`, `NonPrivateDpEvent`, `ComposedDpEvent`, and +`SelfComposedDpEvent`. They should return `supports(event)` is False for +`UnsupportedDpEvent` or any other event type they have not been designed to +handle. + +To ensure that a `PrivacyAccountant` does not accidentally start to return +incorrect results, the following should be enforced: + * `DpEvent` classes and their parameters should never be removed, barring some + extended, onerous deprecation process. + * New parameters cannot be added to existing mechanisms unless they are + optional. That is, old composed `DpEvent` objects that do not include them + must remain valid. + * The meaning of existing mechanisms or parameters must not change. That is, + existing mechanisms should not have their implementations change in ways that + alter their privacy properties; new `DpEvent` classes should be added + instead. + * `PrivacyAccountant` implementations are expected to return `supports(event)` + is `False` when processing unknown mechanisms. +""" + +from typing import List, Union + +import attr + + +class DpEvent(object): + """Represents application of a private mechanism. + + A `DpEvent` describes a differentially private mechanism sufficiently for + computing the associated privacy losses, both in isolation and in combination + with other `DpEvent`s. + """ + + +@attr.s(frozen=True) +class NoOpDpEvent(DpEvent): + """Represents appplication of an operation with no privacy impact. + + A `NoOpDpEvent` is generally never required, but it can be useful as a + placeholder where a `DpEvent` is expected, such as in tests or some live + accounting pipelines. + """ + + +@attr.s(frozen=True) +class NonPrivateDpEvent(DpEvent): + """Represents application of a non-private operation. + + This `DpEvent` should be used when an operation is performed that does not + satisfy (epsilon, delta)-DP. All `PrivacyAccountant`s should return infinite + epsilon/delta when encountering a `NonPrivateDpEvent`. + """ + + +@attr.s(frozen=True) +class UnsupportedDpEvent(DpEvent): + """Represents application of an as-yet unsupported operation. + + This `DpEvent` should be used when an operation is performed that does not yet + have any associated DP description, or if the description is temporarily + inaccessible, for example, during development. All `PrivacyAccountant`s should + return `supports(event) == False` for `UnsupportedDpEvent`. + """ + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class GaussianDpEvent(DpEvent): + """Represents an application of the Gaussian mechanism. + + For values v_i and noise z ~ N(0, s^2I), this mechanism returns sum_i v_i + z. + If the norms of the values are bounded ||v_i|| <= C, the noise_multiplier is + defined as s / C. + """ + noise_multiplier: float + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class LaplaceDpEvent(DpEvent): + """Represents an application of the Laplace mechanism. + + For values v_i and noise z sampled coordinate-wise from the Laplace + distribution L(0, s), this mechanism returns sum_i v_i + z. + The probability density function of the Laplace distribution L(0, s) with + parameter s is given as exp(-|x|/s) * (0.5/s) at x for any real value x. + If the L_1 norm of the values are bounded ||v_i||_1 <= C, the noise_multiplier + is defined as s / C. + """ + noise_multiplier: float + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class SelfComposedDpEvent(DpEvent): + """Represents repeated application of a mechanism. + + The repeated applications may be adaptive, where the query producing each + event depends on the results of prior queries. + + This is equivalent to `ComposedDpEvent` that contains a list of length `count` + of identical copies of `event`. + """ + event: DpEvent + count: int + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class ComposedDpEvent(DpEvent): + """Represents application of a series of composed mechanisms. + + The composition may be adaptive, where the query producing each event depends + on the results of prior queries. + """ + events: List[DpEvent] + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class PoissonSampledDpEvent(DpEvent): + """Represents an application of Poisson subsampling. + + Each record in the dataset is included in the sample independently with + probability `sampling_probability`. Then the `DpEvent` `event` is applied + to the sample of records. + """ + sampling_probability: float + event: DpEvent + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class SampledWithReplacementDpEvent(DpEvent): + """Represents sampling a fixed sized batch of records with replacement. + + A sample of `sample_size` (possibly repeated) records is drawn uniformly at + random from the set of possible samples of a source dataset of size + `source_dataset_size`. Then the `DpEvent` `event` is applied to the sample of + records. + """ + source_dataset_size: int + sample_size: int + event: DpEvent + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class SampledWithoutReplacementDpEvent(DpEvent): + """Represents sampling a fixed sized batch of records without replacement. + + A sample of `sample_size` unique records is drawn uniformly at random from the + set of possible samples of a source dataset of size `source_dataset_size`. + Then the `DpEvent` `event` is applied to the sample of records. + """ + source_dataset_size: int + sample_size: int + event: DpEvent + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class SingleEpochTreeAggregationDpEvent(DpEvent): + """Represents aggregation for a single epoch using one or more trees. + + Multiple tree-aggregation steps can occur, but it is required that each + record occurs at most once *across all trees*. See appendix D of + "Practical and Private (Deep) Learning without Sampling or Shuffling" + https://arxiv.org/abs/2103.00039. + + To represent the common case where the same record can occur in multiple + trees (but still at most once per tree), wrap this with `SelfComposedDpEvent` + or `ComposedDpEvent` and use a scalar for `step_counts`. + + Attributes: + noise_multiplier: The ratio of the noise per node to the sensitivity. + step_counts: The number of steps in each tree. May be a scalar for a single + tree. + """ + noise_multiplier: float + step_counts: Union[int, List[int]]