diff --git a/tensorflow_privacy/privacy/analysis/BUILD b/tensorflow_privacy/privacy/analysis/BUILD index fa64c94..63abc69 100644 --- a/tensorflow_privacy/privacy/analysis/BUILD +++ b/tensorflow_privacy/privacy/analysis/BUILD @@ -51,12 +51,6 @@ py_test( deps = [":compute_noise_from_budget_lib"], ) -py_library( - name = "dp_event", - srcs = ["dp_event.py"], - srcs_version = "PY3", -) - py_library( name = "gdp_accountant", srcs = ["gdp_accountant.py"], diff --git a/tensorflow_privacy/privacy/analysis/dp_event.py b/tensorflow_privacy/privacy/analysis/dp_event.py deleted file mode 100644 index 5d37b43..0000000 --- a/tensorflow_privacy/privacy/analysis/dp_event.py +++ /dev/null @@ -1,214 +0,0 @@ -# Copyright 2021, The TensorFlow Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Standard DpEvent classes. - -A `DpEvent` represents the (hyper)parameters of a differentially -private query, amplification mechanism, or composition, that are necessary -and sufficient for privacy accounting. Various independent implementations of DP -algorithms that are functionally equivalent from an accounting perspective may -correspond to the same `DpEvent`. Similarly, various independent implementations -of accounting algorithms may consume the same `DpEvent`. - -All `DpEvents` processed together are assumed to take place on a single dataset -of records. `DpEvents` fall into roughly three categories: - - `DpEvents` that release an output, and incur a privacy cost, - e.g., `GaussianDpEvent`. - - `DpEvents` that select a subset (or subsets) of the dataset, and run nested - `DpEvents` on those subsets, e.g., `PoissonSampledDpEvent`. - - `DpEvents` that represent (possibly sequentially) applying (multiple) - mechanisms to the dataset (or currently active subset). Currently, this is - only `ComposedDpEvent` and `SelfComposedDpEvent`. - -Each `DpEvent` should completely document the mathematical behavior and -assumptions of the mechanism it represents so that the writer of an accountant -class can implement the accounting correctly without knowing any other -implementation details of the algorithm that produced it. - -New mechanism types should be given a corresponding `DpEvent` class, although -not all accountants will be required to support them. In general, -`PrivacyAccountant` implementations are not required to be aware of all -`DpEvent` classes, but they should support the following basic events and handle -them appropriately: `NoOpDpEvent`, `NonPrivateDpEvent`, `ComposedDpEvent`, and -`SelfComposedDpEvent`. They should return `supports(event)` is False for -`UnsupportedDpEvent` or any other event type they have not been designed to -handle. - -To ensure that a `PrivacyAccountant` does not accidentally start to return -incorrect results, the following should be enforced: - * `DpEvent` classes and their parameters should never be removed, barring some - extended, onerous deprecation process. - * New parameters cannot be added to existing mechanisms unless they are - optional. That is, old composed `DpEvent` objects that do not include them - must remain valid. - * The meaning of existing mechanisms or parameters must not change. That is, - existing mechanisms should not have their implementations change in ways that - alter their privacy properties; new `DpEvent` classes should be added - instead. - * `PrivacyAccountant` implementations are expected to return `supports(event)` - is `False` when processing unknown mechanisms. -""" - -from typing import List, Union - -import attr - - -class DpEvent(object): - """Represents application of a private mechanism. - - A `DpEvent` describes a differentially private mechanism sufficiently for - computing the associated privacy losses, both in isolation and in combination - with other `DpEvent`s. - """ - - -@attr.s(frozen=True) -class NoOpDpEvent(DpEvent): - """Represents appplication of an operation with no privacy impact. - - A `NoOpDpEvent` is generally never required, but it can be useful as a - placeholder where a `DpEvent` is expected, such as in tests or some live - accounting pipelines. - """ - - -@attr.s(frozen=True) -class NonPrivateDpEvent(DpEvent): - """Represents application of a non-private operation. - - This `DpEvent` should be used when an operation is performed that does not - satisfy (epsilon, delta)-DP. All `PrivacyAccountant`s should return infinite - epsilon/delta when encountering a `NonPrivateDpEvent`. - """ - - -@attr.s(frozen=True) -class UnsupportedDpEvent(DpEvent): - """Represents application of an as-yet unsupported operation. - - This `DpEvent` should be used when an operation is performed that does not yet - have any associated DP description, or if the description is temporarily - inaccessible, for example, during development. All `PrivacyAccountant`s should - return `supports(event) == False` for `UnsupportedDpEvent`. - """ - - -@attr.s(frozen=True, slots=True, auto_attribs=True) -class GaussianDpEvent(DpEvent): - """Represents an application of the Gaussian mechanism. - - For values v_i and noise z ~ N(0, s^2I), this mechanism returns sum_i v_i + z. - If the norms of the values are bounded ||v_i|| <= C, the noise_multiplier is - defined as s / C. - """ - noise_multiplier: float - - -@attr.s(frozen=True, slots=True, auto_attribs=True) -class LaplaceDpEvent(DpEvent): - """Represents an application of the Laplace mechanism. - - For values v_i and noise z sampled coordinate-wise from the Laplace - distribution L(0, s), this mechanism returns sum_i v_i + z. - The probability density function of the Laplace distribution L(0, s) with - parameter s is given as exp(-|x|/s) * (0.5/s) at x for any real value x. - If the L_1 norm of the values are bounded ||v_i||_1 <= C, the noise_multiplier - is defined as s / C. - """ - noise_multiplier: float - - -@attr.s(frozen=True, slots=True, auto_attribs=True) -class SelfComposedDpEvent(DpEvent): - """Represents repeated application of a mechanism. - - The repeated applications may be adaptive, where the query producing each - event depends on the results of prior queries. - - This is equivalent to `ComposedDpEvent` that contains a list of length `count` - of identical copies of `event`. - """ - event: DpEvent - count: int - - -@attr.s(frozen=True, slots=True, auto_attribs=True) -class ComposedDpEvent(DpEvent): - """Represents application of a series of composed mechanisms. - - The composition may be adaptive, where the query producing each event depends - on the results of prior queries. - """ - events: List[DpEvent] - - -@attr.s(frozen=True, slots=True, auto_attribs=True) -class PoissonSampledDpEvent(DpEvent): - """Represents an application of Poisson subsampling. - - Each record in the dataset is included in the sample independently with - probability `sampling_probability`. Then the `DpEvent` `event` is applied - to the sample of records. - """ - sampling_probability: float - event: DpEvent - - -@attr.s(frozen=True, slots=True, auto_attribs=True) -class SampledWithReplacementDpEvent(DpEvent): - """Represents sampling a fixed sized batch of records with replacement. - - A sample of `sample_size` (possibly repeated) records is drawn uniformly at - random from the set of possible samples of a source dataset of size - `source_dataset_size`. Then the `DpEvent` `event` is applied to the sample of - records. - """ - source_dataset_size: int - sample_size: int - event: DpEvent - - -@attr.s(frozen=True, slots=True, auto_attribs=True) -class SampledWithoutReplacementDpEvent(DpEvent): - """Represents sampling a fixed sized batch of records without replacement. - - A sample of `sample_size` unique records is drawn uniformly at random from the - set of possible samples of a source dataset of size `source_dataset_size`. - Then the `DpEvent` `event` is applied to the sample of records. - """ - source_dataset_size: int - sample_size: int - event: DpEvent - - -@attr.s(frozen=True, slots=True, auto_attribs=True) -class SingleEpochTreeAggregationDpEvent(DpEvent): - """Represents aggregation for a single epoch using one or more trees. - - Multiple tree-aggregation steps can occur, but it is required that each - record occurs at most once *across all trees*. See appendix D of - "Practical and Private (Deep) Learning without Sampling or Shuffling" - https://arxiv.org/abs/2103.00039. - - To represent the common case where the same record can occur in multiple - trees (but still at most once per tree), wrap this with `SelfComposedDpEvent` - or `ComposedDpEvent` and use a scalar for `step_counts`. - - Attributes: - noise_multiplier: The ratio of the noise per node to the sensitivity. - step_counts: The number of steps in each tree. May be a scalar for a single - tree. - """ - noise_multiplier: float - step_counts: Union[int, List[int]]