diff --git a/tensorflow_privacy/__init__.py b/tensorflow_privacy/__init__.py index 29b64ff..d3503f3 100644 --- a/tensorflow_privacy/__init__.py +++ b/tensorflow_privacy/__init__.py @@ -38,10 +38,8 @@ else: from tensorflow_privacy.privacy.analysis.dp_event import SelfComposedDpEvent from tensorflow_privacy.privacy.analysis.dp_event import ComposedDpEvent from tensorflow_privacy.privacy.analysis.dp_event import PoissonSampledDpEvent - from tensorflow_privacy.privacy.analysis.dp_event import FixedBatchSampledWrDpEvent - from tensorflow_privacy.privacy.analysis.dp_event import FixedBatchSampledWorDpEvent - from tensorflow_privacy.privacy.analysis.dp_event import ShuffledDatasetDpEvent - from tensorflow_privacy.privacy.analysis.dp_event import TreeAggregationDpEvent + from tensorflow_privacy.privacy.analysis.dp_event import SampledWithReplacementDpEvent + from tensorflow_privacy.privacy.analysis.dp_event import SampledWithoutReplacementDpEvent # Analysis from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy diff --git a/tensorflow_privacy/privacy/analysis/dp_event.py b/tensorflow_privacy/privacy/analysis/dp_event.py index b40b823..290970f 100644 --- a/tensorflow_privacy/privacy/analysis/dp_event.py +++ b/tensorflow_privacy/privacy/analysis/dp_event.py @@ -11,7 +11,53 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Standard DpEvent classes.""" +"""Standard DpEvent classes. + +A `DpEvent` represents the (hyper)parameters of a differentially +private query, amplification mechanism, or composition, that are necessary +and sufficient for privacy accounting. Various independent implementations of DP +algorithms that are functionally equivalent from an accounting perspective may +correspond to the same `DpEvent`. Similarly, various independent implementations +of accounting algorithms may consume the same `DpEvent`. + +All `DpEvents` processed together are assumed to take place on a single dataset +of records. `DpEvents` fall into roughly three categories: + - `DpEvents` that release an output, and incur a privacy cost, + e.g., `GaussianDpEvent`. + - `DpEvents` that select a subset (or subsets) of the dataset, and run nested + `DpEvents` on those subsets, e.g., `PoissonSampledDpEvent`. + - `DpEvents` that represent (possibly sequentially) applying (multiple) + mechanisms to the dataset (or currently active subset). Currently, this is + only `ComposedDpEvent` and `SelfComposedDpEvent`. + +Each `DpEvent` should completely document the mathematical behavior and +assumptions of the mechanism it represents so that the writer of an accountant +class can implement the accounting correctly without knowing any other +implementation details of the algorithm that produced it. + +New mechanism types should be given a corresponding `DpEvent` class, although +not all accountants will be required to support them. In general, +`PrivacyAccountant` implementations are not required to be aware of all +`DpEvent` classes, but they should support the following basic events and handle +them appropriately: `NoOpDpEvent`, `NonPrivateDpEvent`, `ComposedDpEvent`, and +`SelfComposedDpEvent`. They should return `supports(event)` is False for +`UnsupportedDpEvent` or any other event type they have not been designed to +handle. + +To ensure that a `PrivacyAccountant` does not accidentally start to return +incorrect results, the following should be enforced: + * `DpEvent` classes and their parameters should never be removed, barring some + extended, onerous deprecation process. + * New parameters cannot be added to existing mechanisms unless they are + optional. That is, old composed `DpEvent` objects that do not include them + must remain valid. + * The meaning of existing mechanisms or parameters must not change. That is, + existing mechanisms should not have their implementations change in ways that + alter their privacy properties; new `DpEvent` classes should be added + instead. + * `PrivacyAccountant` implementations are expected to return `supports(event)` + is `False` when processing unknown mechanisms. +""" from typing import List @@ -19,7 +65,7 @@ import attr class DpEvent(object): - """Base class for `DpEvent`s. + """Represents application of a private mechanism. A `DpEvent` describes a differentially private mechanism sufficiently for computing the associated privacy losses, both in isolation and in combination @@ -29,7 +75,7 @@ class DpEvent(object): @attr.s(frozen=True) class NoOpDpEvent(DpEvent): - """A `DpEvent` to represent operations with no privacy impact. + """Represents appplication of an operation with no privacy impact. A `NoOpDpEvent` is generally never required, but it can be useful as a placeholder where a `DpEvent` is expected, such as in tests or some live @@ -39,7 +85,7 @@ class NoOpDpEvent(DpEvent): @attr.s(frozen=True) class NonPrivateDpEvent(DpEvent): - """A `DpEvent` to represent non-private operations. + """Represents application of a non-private operation. This `DpEvent` should be used when an operation is performed that does not satisfy (epsilon, delta)-DP. All `PrivacyAccountant`s should return infinite @@ -49,65 +95,85 @@ class NonPrivateDpEvent(DpEvent): @attr.s(frozen=True) class UnsupportedDpEvent(DpEvent): - """A `DpEvent` to represent as-yet unsupported operations. + """Represents application of an as-yet unsupported operation. This `DpEvent` should be used when an operation is performed that does not yet have any associated DP description, or if the description is temporarily inaccessible, for example, during development. All `PrivacyAccountant`s should - return `is_supported(event)` is `False` for `UnsupportedDpEvent`. + return `supports(event) == False` for `UnsupportedDpEvent`. """ @attr.s(frozen=True, slots=True, auto_attribs=True) class GaussianDpEvent(DpEvent): - """The Gaussian mechanism.""" + """Represents an application of the Gaussian mechanism. + + For values v_i and noise z ~ N(0, s^2I), this mechanism returns sum_i v_i + z. + If the norms of the values are bounded ||v_i|| <= C, the noise_multiplier is + defined as s / C. + """ noise_multiplier: float @attr.s(frozen=True, slots=True, auto_attribs=True) class SelfComposedDpEvent(DpEvent): - """A mechanism composed with itself multiple times.""" + """Represents repeated application of a mechanism. + + The repeated applications may be adaptive, where the query producing each + event depends on the results of prior queries. + + This is equivalent to `ComposedDpEvent` that contains a list of length `count` + of identical copies of `event`. + """ event: DpEvent count: int @attr.s(frozen=True, slots=True, auto_attribs=True) class ComposedDpEvent(DpEvent): - """A series of composed mechanisms.""" + """Represents application of a series of composed mechanisms. + + The composition may be adaptive, where the query producing each event depends + on the results of prior queries. + """ events: List[DpEvent] @attr.s(frozen=True, slots=True, auto_attribs=True) class PoissonSampledDpEvent(DpEvent): - """An application of Poisson subsampling.""" + """Represents an application of Poisson subsampling. + + Each record in the dataset is included in the sample independently with + probability `sampling_probability`. Then the `DpEvent` `event` is applied + to the sample of records. + """ sampling_probability: float event: DpEvent @attr.s(frozen=True, slots=True, auto_attribs=True) -class FixedBatchSampledWrDpEvent(DpEvent): - """Sampling exactly `batch_size` records with replacement.""" - dataset_size: int - batch_size: int +class SampledWithReplacementDpEvent(DpEvent): + """Represents sampling a fixed sized batch of records with replacement. + + A sample of `sample_size` (possibly repeated) records is drawn uniformly at + random from the set of possible samples of a source dataset of size + `source_dataset_size`. Then the `DpEvent` `event` is applied to the sample of + records. + """ + source_dataset_size: int + sample_size: int event: DpEvent @attr.s(frozen=True, slots=True, auto_attribs=True) -class FixedBatchSampledWorDpEvent(DpEvent): - """Sampling exactly `batch_size` records without replacement.""" - dataset_size: int - batch_size: int +class SampledWithoutReplacementDpEvent(DpEvent): + """Represents sampling a fixed sized batch of records without replacement. + + A sample of `sample_size` unique records is drawn uniformly at random from the + set of possible samples of a source dataset of size `source_dataset_size`. + Then the `DpEvent` `event` is applied to the sample of records. + """ + source_dataset_size: int + sample_size: int event: DpEvent - -@attr.s(frozen=True, slots=True, auto_attribs=True) -class ShuffledDatasetDpEvent(DpEvent): - """Shuffling a dataset and applying a mechanism to each partition.""" - partition_events: ComposedDpEvent - - -@attr.s(frozen=True, slots=True, auto_attribs=True) -class TreeAggregationDpEvent(DpEvent): - """Applying a series of mechanisms with tree aggregation.""" - round_events: ComposedDpEvent - max_record_occurences_across_all_rounds: int diff --git a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py index 2bbc327..24463b8 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py +++ b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py @@ -551,13 +551,13 @@ class RdpAccountant(privacy_accountant.PrivacyAccountant): noise_multiplier=event.event.noise_multiplier, orders=self._orders) return True - elif isinstance(event, dp_event.FixedBatchSampledWorDpEvent): + elif isinstance(event, dp_event.SampledWithoutReplacementDpEvent): if (self._neighboring_relation is not NeighborRel.REPLACE_ONE or not isinstance(event.event, dp_event.GaussianDpEvent)): return False if do_compose: self._rdp += count * _compute_rdp_sample_wor_gaussian( - q=event.batch_size / event.dataset_size, + q=event.sample_size / event.source_dataset_size, noise_multiplier=event.event.noise_multiplier, orders=self._orders) return True diff --git a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py index 817d41c..e030fcd 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py +++ b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py @@ -94,13 +94,13 @@ class RdpPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest, self.assertTrue(aor_accountant.supports(event)) self.assertFalse(ro_accountant.supports(event)) - event = dp_event.FixedBatchSampledWorDpEvent(1000, 10, - dp_event.GaussianDpEvent(1.0)) + event = dp_event.SampledWithoutReplacementDpEvent( + 1000, 10, dp_event.GaussianDpEvent(1.0)) self.assertFalse(aor_accountant.supports(event)) self.assertTrue(ro_accountant.supports(event)) - event = dp_event.FixedBatchSampledWrDpEvent(1000, 10, - dp_event.GaussianDpEvent(1.0)) + event = dp_event.SampledWithReplacementDpEvent( + 1000, 10, dp_event.GaussianDpEvent(1.0)) self.assertFalse(aor_accountant.supports(event)) self.assertFalse(ro_accountant.supports(event)) @@ -148,8 +148,8 @@ class RdpPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest, accountant = rdp_privacy_accountant.RdpAccountant( [3.14159], privacy_accountant.NeighboringRelation.REPLACE_ONE) accountant.compose( - dp_event.FixedBatchSampledWorDpEvent(1000, 0, - dp_event.GaussianDpEvent(1.0))) + dp_event.SampledWithoutReplacementDpEvent( + 1000, 0, dp_event.GaussianDpEvent(1.0))) self.assertEqual(accountant.get_epsilon(1e-10), 0) self.assertEqual(accountant.get_delta(1e-10), 0)