From 977647a3bf3ff11643804169b52be5bdddb8f666 Mon Sep 17 00:00:00 2001 From: Galen Andrew Date: Tue, 12 Oct 2021 17:13:03 -0700 Subject: [PATCH] Add support for subsampled multi-Gaussian queries (composition of several Gaussian queries that may have different noise multipliers). This is used, for example, by QuantileAdaptiveClipSumQuery. PiperOrigin-RevId: 402693872 --- .../analysis/rdp_privacy_accountant.py | 54 ++++++++++++++++--- .../analysis/rdp_privacy_accountant_test.py | 48 +++++++++++++++++ 2 files changed, 96 insertions(+), 6 deletions(-) diff --git a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py index 24463b8..5c012a3 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py +++ b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant.py @@ -483,6 +483,42 @@ def _compute_rdp_sample_wor_gaussian_int(q, sigma, alpha): return log_a +def _effective_gaussian_noise_multiplier(event: dp_event.DpEvent): + """Determines the effective noise multiplier of nested structure of Gaussians. + + A series of Gaussian queries on the same data can be reexpressed as a single + query with pre- and post- processing. For details, see section 3 of + https://arxiv.org/pdf/1812.06210.pdf. + + Args: + event: A `dp_event.DpEvent`. In order for conversion to be successful it + must consist of a single `dp_event.GaussianDpEvent`, or a nested structure + of `dp_event.ComposedDpEvent` and/or `dp_event.SelfComposedDpEvent` + bottoming out in `dp_event.GaussianDpEvent`s. + + Returns: + The noise multiplier of the equivalent `dp_event.GaussianDpEvent`, or None + if the input event was not a `dp_event.GaussianDpEvent` or a nested + structure of `dp_event.ComposedDpEvent` and/or + `dp_event.SelfComposedDpEvent` bottoming out in `dp_event.GaussianDpEvent`s. + """ + if isinstance(event, dp_event.GaussianDpEvent): + return event.noise_multiplier + elif isinstance(event, dp_event.ComposedDpEvent): + sum_sigma_inv_sq = 0 + for e in event.events: + sigma = _effective_gaussian_noise_multiplier(e) + if sigma is None: + return None + sum_sigma_inv_sq += sigma**-2 + return sum_sigma_inv_sq**-0.5 + elif isinstance(event, dp_event.SelfComposedDpEvent): + sigma = _effective_gaussian_noise_multiplier(event.event) + return None if sigma is None else (event.count * sigma**-2)**-0.5 + else: + return None + + class RdpAccountant(privacy_accountant.PrivacyAccountant): """Privacy accountant that uses Renyi differential privacy.""" @@ -542,23 +578,29 @@ class RdpAccountant(privacy_accountant.PrivacyAccountant): q=1.0, noise_multiplier=event.noise_multiplier, orders=self._orders) return True elif isinstance(event, dp_event.PoissonSampledDpEvent): - if (self._neighboring_relation is not NeighborRel.ADD_OR_REMOVE_ONE or - not isinstance(event.event, dp_event.GaussianDpEvent)): + if self._neighboring_relation is not NeighborRel.ADD_OR_REMOVE_ONE: + return False + gaussian_noise_multiplier = _effective_gaussian_noise_multiplier( + event.event) + if gaussian_noise_multiplier is None: return False if do_compose: self._rdp += count * _compute_rdp_poisson_subsampled_gaussian( q=event.sampling_probability, - noise_multiplier=event.event.noise_multiplier, + noise_multiplier=gaussian_noise_multiplier, orders=self._orders) return True elif isinstance(event, dp_event.SampledWithoutReplacementDpEvent): - if (self._neighboring_relation is not NeighborRel.REPLACE_ONE or - not isinstance(event.event, dp_event.GaussianDpEvent)): + if self._neighboring_relation is not NeighborRel.REPLACE_ONE: + return False + gaussian_noise_multiplier = _effective_gaussian_noise_multiplier( + event.event) + if gaussian_noise_multiplier is None: return False if do_compose: self._rdp += count * _compute_rdp_sample_wor_gaussian( q=event.sample_size / event.source_dataset_size, - noise_multiplier=event.event.noise_multiplier, + noise_multiplier=gaussian_noise_multiplier, orders=self._orders) return True else: diff --git a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py index e030fcd..fc4b8dd 100644 --- a/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py +++ b/tensorflow_privacy/privacy/analysis/rdp_privacy_accountant_test.py @@ -94,11 +94,23 @@ class RdpPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest, self.assertTrue(aor_accountant.supports(event)) self.assertFalse(ro_accountant.supports(event)) + composed_gaussian = dp_event.ComposedDpEvent( + [dp_event.GaussianDpEvent(1.0), + dp_event.GaussianDpEvent(2.0)]) + event = dp_event.PoissonSampledDpEvent(0.1, composed_gaussian) + self.assertTrue(aor_accountant.supports(event)) + self.assertFalse(ro_accountant.supports(event)) + event = dp_event.SampledWithoutReplacementDpEvent( 1000, 10, dp_event.GaussianDpEvent(1.0)) self.assertFalse(aor_accountant.supports(event)) self.assertTrue(ro_accountant.supports(event)) + event = dp_event.SampledWithoutReplacementDpEvent(1000, 10, + composed_gaussian) + self.assertFalse(aor_accountant.supports(event)) + self.assertTrue(ro_accountant.supports(event)) + event = dp_event.SampledWithReplacementDpEvent( 1000, 10, dp_event.GaussianDpEvent(1.0)) self.assertFalse(aor_accountant.supports(event)) @@ -166,6 +178,42 @@ class RdpPrivacyAccountantTest(privacy_accountant_test.PrivacyAccountantTest, accountant.compose(event) self.assertAlmostEqual(accountant._rdp[0], alpha / (2 * sigma**2)) + def test_compute_rdp_multi_gaussian(self): + alpha = 3.14159 + sigma1, sigma2 = 2.71828, 6.28319 + + rdp1 = alpha / (2 * sigma1**2) + rdp2 = alpha / (2 * sigma2**2) + rdp = rdp1 + rdp2 + + accountant = rdp_privacy_accountant.RdpAccountant(orders=[alpha]) + accountant.compose( + dp_event.PoissonSampledDpEvent( + 1.0, + dp_event.ComposedDpEvent([ + dp_event.GaussianDpEvent(sigma1), + dp_event.GaussianDpEvent(sigma2) + ]))) + self.assertAlmostEqual(accountant._rdp[0], rdp) + + def test_effective_gaussian_noise_multiplier(self): + np.random.seed(0xBAD5EED) + sigmas = np.random.uniform(size=(4,)) + + event = dp_event.ComposedDpEvent([ + dp_event.GaussianDpEvent(sigmas[0]), + dp_event.SelfComposedDpEvent(dp_event.GaussianDpEvent(sigmas[1]), 3), + dp_event.ComposedDpEvent([ + dp_event.GaussianDpEvent(sigmas[2]), + dp_event.GaussianDpEvent(sigmas[3]) + ]) + ]) + + sigma = rdp_privacy_accountant._effective_gaussian_noise_multiplier(event) + multi_sigmas = list(sigmas) + [sigmas[1]] * 2 + expected = sum(s**-2 for s in multi_sigmas)**-0.5 + self.assertAlmostEqual(sigma, expected) + def test_compute_rdp_poisson_sampled_gaussian(self): orders = [1.5, 2.5, 5, 50, 100, np.inf] noise_multiplier = 2.5