Much more detailed documentation for DpEvent.

The as yet unused `TreeAggregationDpEvent` is removed. It will be added as a custom `DpEvent` alongside the DpQueries in tree_aggregation_query.py in the near future.

PiperOrigin-RevId: 398808647
This commit is contained in:
Galen Andrew 2021-09-24 13:59:16 -07:00 committed by A. Unique TensorFlower
parent 39c75f62af
commit b8b4c4b264
4 changed files with 106 additions and 42 deletions

View file

@ -38,10 +38,8 @@ else:
from tensorflow_privacy.privacy.analysis.dp_event import SelfComposedDpEvent
from tensorflow_privacy.privacy.analysis.dp_event import ComposedDpEvent
from tensorflow_privacy.privacy.analysis.dp_event import PoissonSampledDpEvent
from tensorflow_privacy.privacy.analysis.dp_event import FixedBatchSampledWrDpEvent
from tensorflow_privacy.privacy.analysis.dp_event import FixedBatchSampledWorDpEvent
from tensorflow_privacy.privacy.analysis.dp_event import ShuffledDatasetDpEvent
from tensorflow_privacy.privacy.analysis.dp_event import TreeAggregationDpEvent
from tensorflow_privacy.privacy.analysis.dp_event import SampledWithReplacementDpEvent
from tensorflow_privacy.privacy.analysis.dp_event import SampledWithoutReplacementDpEvent
# Analysis
from tensorflow_privacy.privacy.analysis.compute_dp_sgd_privacy_lib import compute_dp_sgd_privacy

View file

@ -11,7 +11,53 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Standard DpEvent classes."""
"""Standard DpEvent classes.
A `DpEvent` represents the (hyper)parameters of a differentially
private query, amplification mechanism, or composition, that are necessary
and sufficient for privacy accounting. Various independent implementations of DP
algorithms that are functionally equivalent from an accounting perspective may
correspond to the same `DpEvent`. Similarly, various independent implementations
of accounting algorithms may consume the same `DpEvent`.
All `DpEvents` processed together are assumed to take place on a single dataset
of records. `DpEvents` fall into roughly three categories:
- `DpEvents` that release an output, and incur a privacy cost,
e.g., `GaussianDpEvent`.
- `DpEvents` that select a subset (or subsets) of the dataset, and run nested
`DpEvents` on those subsets, e.g., `PoissonSampledDpEvent`.
- `DpEvents` that represent (possibly sequentially) applying (multiple)
mechanisms to the dataset (or currently active subset). Currently, this is
only `ComposedDpEvent` and `SelfComposedDpEvent`.
Each `DpEvent` should completely document the mathematical behavior and
assumptions of the mechanism it represents so that the writer of an accountant
class can implement the accounting correctly without knowing any other
implementation details of the algorithm that produced it.
New mechanism types should be given a corresponding `DpEvent` class, although
not all accountants will be required to support them. In general,
`PrivacyAccountant` implementations are not required to be aware of all
`DpEvent` classes, but they should support the following basic events and handle
them appropriately: `NoOpDpEvent`, `NonPrivateDpEvent`, `ComposedDpEvent`, and
`SelfComposedDpEvent`. They should return `supports(event)` is False for
`UnsupportedDpEvent` or any other event type they have not been designed to
handle.
To ensure that a `PrivacyAccountant` does not accidentally start to return
incorrect results, the following should be enforced:
* `DpEvent` classes and their parameters should never be removed, barring some
extended, onerous deprecation process.
* New parameters cannot be added to existing mechanisms unless they are
optional. That is, old composed `DpEvent` objects that do not include them
must remain valid.
* The meaning of existing mechanisms or parameters must not change. That is,
existing mechanisms should not have their implementations change in ways that
alter their privacy properties; new `DpEvent` classes should be added
instead.
* `PrivacyAccountant` implementations are expected to return `supports(event)`
is `False` when processing unknown mechanisms.
"""
from typing import List
@ -19,7 +65,7 @@ import attr
class DpEvent(object):
"""Base class for `DpEvent`s.
"""Represents application of a private mechanism.
A `DpEvent` describes a differentially private mechanism sufficiently for
computing the associated privacy losses, both in isolation and in combination
@ -29,7 +75,7 @@ class DpEvent(object):
@attr.s(frozen=True)
class NoOpDpEvent(DpEvent):
"""A `DpEvent` to represent operations with no privacy impact.
"""Represents appplication of an operation with no privacy impact.
A `NoOpDpEvent` is generally never required, but it can be useful as a
placeholder where a `DpEvent` is expected, such as in tests or some live
@ -39,7 +85,7 @@ class NoOpDpEvent(DpEvent):
@attr.s(frozen=True)
class NonPrivateDpEvent(DpEvent):
"""A `DpEvent` to represent non-private operations.
"""Represents application of a non-private operation.
This `DpEvent` should be used when an operation is performed that does not
satisfy (epsilon, delta)-DP. All `PrivacyAccountant`s should return infinite
@ -49,65 +95,85 @@ class NonPrivateDpEvent(DpEvent):
@attr.s(frozen=True)
class UnsupportedDpEvent(DpEvent):
"""A `DpEvent` to represent as-yet unsupported operations.
"""Represents application of an as-yet unsupported operation.
This `DpEvent` should be used when an operation is performed that does not yet
have any associated DP description, or if the description is temporarily
inaccessible, for example, during development. All `PrivacyAccountant`s should
return `is_supported(event)` is `False` for `UnsupportedDpEvent`.
return `supports(event) == False` for `UnsupportedDpEvent`.
"""
@attr.s(frozen=True, slots=True, auto_attribs=True)
class GaussianDpEvent(DpEvent):
"""The Gaussian mechanism."""
"""Represents an application of the Gaussian mechanism.
For values v_i and noise z ~ N(0, s^2I), this mechanism returns sum_i v_i + z.
If the norms of the values are bounded ||v_i|| <= C, the noise_multiplier is
defined as s / C.
"""
noise_multiplier: float
@attr.s(frozen=True, slots=True, auto_attribs=True)
class SelfComposedDpEvent(DpEvent):
"""A mechanism composed with itself multiple times."""
"""Represents repeated application of a mechanism.
The repeated applications may be adaptive, where the query producing each
event depends on the results of prior queries.
This is equivalent to `ComposedDpEvent` that contains a list of length `count`
of identical copies of `event`.
"""
event: DpEvent
count: int
@attr.s(frozen=True, slots=True, auto_attribs=True)
class ComposedDpEvent(DpEvent):
"""A series of composed mechanisms."""
"""Represents application of a series of composed mechanisms.
The composition may be adaptive, where the query producing each event depends
on the results of prior queries.
"""
events: List[DpEvent]
@attr.s(frozen=True, slots=True, auto_attribs=True)
class PoissonSampledDpEvent(DpEvent):
"""An application of Poisson subsampling."""
"""Represents an application of Poisson subsampling.
Each record in the dataset is included in the sample independently with
probability `sampling_probability`. Then the `DpEvent` `event` is applied
to the sample of records.
"""
sampling_probability: float
event: DpEvent
@attr.s(frozen=True, slots=True, auto_attribs=True)
class FixedBatchSampledWrDpEvent(DpEvent):
"""Sampling exactly `batch_size` records with replacement."""
dataset_size: int
batch_size: int
class SampledWithReplacementDpEvent(DpEvent):
"""Represents sampling a fixed sized batch of records with replacement.
A sample of `sample_size` (possibly repeated) records is drawn uniformly at
random from the set of possible samples of a source dataset of size
`source_dataset_size`. Then the `DpEvent` `event` is applied to the sample of
records.
"""
source_dataset_size: int
sample_size: int
event: DpEvent
@attr.s(frozen=True, slots=True, auto_attribs=True)
class FixedBatchSampledWorDpEvent(DpEvent):
"""Sampling exactly `batch_size` records without replacement."""
dataset_size: int
batch_size: int
class SampledWithoutReplacementDpEvent(DpEvent):
"""Represents sampling a fixed sized batch of records without replacement.
A sample of `sample_size` unique records is drawn uniformly at random from the
set of possible samples of a source dataset of size `source_dataset_size`.
Then the `DpEvent` `event` is applied to the sample of records.
"""
source_dataset_size: int
sample_size: int
event: DpEvent
@attr.s(frozen=True, slots=True, auto_attribs=True)
class ShuffledDatasetDpEvent(DpEvent):
"""Shuffling a dataset and applying a mechanism to each partition."""
partition_events: ComposedDpEvent
@attr.s(frozen=True, slots=True, auto_attribs=True)
class TreeAggregationDpEvent(DpEvent):
"""Applying a series of mechanisms with tree aggregation."""
round_events: ComposedDpEvent
max_record_occurences_across_all_rounds: int

View file

@ -551,13 +551,13 @@ class RdpAccountant(privacy_accountant.PrivacyAccountant):
noise_multiplier=event.event.noise_multiplier,
orders=self._orders)
return True
elif isinstance(event, dp_event.FixedBatchSampledWorDpEvent):
elif isinstance(event, dp_event.SampledWithoutReplacementDpEvent):
if (self._neighboring_relation is not NeighborRel.REPLACE_ONE or
not isinstance(event.event, dp_event.GaussianDpEvent)):
return False
if do_compose:
self._rdp += count * _compute_rdp_sample_wor_gaussian(
q=event.batch_size / event.dataset_size,
q=event.sample_size / event.source_dataset_size,
noise_multiplier=event.event.noise_multiplier,
orders=self._orders)
return True

View file

@ -94,13 +94,13 @@ class RdpPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest,
self.assertTrue(aor_accountant.supports(event))
self.assertFalse(ro_accountant.supports(event))
event = dp_event.FixedBatchSampledWorDpEvent(1000, 10,
dp_event.GaussianDpEvent(1.0))
event = dp_event.SampledWithoutReplacementDpEvent(
1000, 10, dp_event.GaussianDpEvent(1.0))
self.assertFalse(aor_accountant.supports(event))
self.assertTrue(ro_accountant.supports(event))
event = dp_event.FixedBatchSampledWrDpEvent(1000, 10,
dp_event.GaussianDpEvent(1.0))
event = dp_event.SampledWithReplacementDpEvent(
1000, 10, dp_event.GaussianDpEvent(1.0))
self.assertFalse(aor_accountant.supports(event))
self.assertFalse(ro_accountant.supports(event))
@ -148,8 +148,8 @@ class RdpPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest,
accountant = rdp_privacy_accountant.RdpAccountant(
[3.14159], privacy_accountant.NeighboringRelation.REPLACE_ONE)
accountant.compose(
dp_event.FixedBatchSampledWorDpEvent(1000, 0,
dp_event.GaussianDpEvent(1.0)))
dp_event.SampledWithoutReplacementDpEvent(
1000, 0, dp_event.GaussianDpEvent(1.0)))
self.assertEqual(accountant.get_epsilon(1e-10), 0)
self.assertEqual(accountant.get_delta(1e-10), 0)