Add functions to derive epsilon lower bounds.

PiperOrigin-RevId: 484021227
This commit is contained in:
Shuang Song 2022-10-26 11:15:08 -07:00 committed by A. Unique TensorFlower
parent 3f16540bfc
commit 7d7b670f5d
3 changed files with 651 additions and 0 deletions

View file

@ -18,8 +18,23 @@ py_test(
deps = [":utils"],
)
py_test(
name = "epsilon_lower_bound_test",
srcs = ["epsilon_lower_bound_test.py"],
deps = [":epsilon_lower_bound"],
)
py_library(
name = "utils",
srcs = ["utils.py"],
srcs_version = "PY3",
)
py_library(
name = "epsilon_lower_bound",
srcs = ["epsilon_lower_bound.py"],
deps = [
"//third_party/py/immutabledict",
"//third_party/py/statsmodels",
],
)

View file

@ -0,0 +1,360 @@
# Copyright 2022, The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Various functions to convert MIA or secret sharer to epsilon lower bounds."""
import enum
import numbers
from typing import Dict, Iterable, Optional, Sequence, Union
import immutabledict
import numpy as np
import numpy.typing as npt
import scipy.integrate
import scipy.optimize
import scipy.stats
import sklearn.metrics
from statsmodels.stats import proportion
def _get_tp_fp_for_thresholds(pos_scores: np.ndarray,
neg_scores: np.ndarray,
thresholds: Optional[np.ndarray] = None):
"""Gets all the tp and fp for a given array of thresholds.
Args:
pos_scores: per-example scores for the positive class.
neg_scores: per-example scores for the negative class.
thresholds: an array of thresholds to consider. Will consider elements
**above** as positive. If not provided, will enumerate through all
possible thresholds.
Returns:
A tuple as the true positives and false positives.
"""
if thresholds is None:
# pylint:disable=protected-access
fp, tp, _ = sklearn.metrics._ranking._binary_clf_curve(
y_true=np.concatenate([
np.ones_like(pos_scores, dtype=int),
np.zeros_like(neg_scores, dtype=int)
]),
y_score=np.concatenate([pos_scores, neg_scores]))
return tp, fp
def get_cum_sum(scores, thresholds):
values = np.concatenate([scores, thresholds])
indicators = np.concatenate(
[np.ones_like(scores, dtype=int),
np.zeros_like(thresholds, dtype=int)])
sort_idx = np.argsort(values)[::-1] # Descending
indicators = indicators[sort_idx]
return np.cumsum(indicators)[indicators == 0]
tp = get_cum_sum(pos_scores, thresholds)
fp = get_cum_sum(neg_scores, thresholds)
return tp, fp
class BoundMethod(enum.Enum):
"""Methods to use for bound of ratio of binomial proportions."""
KATZ_LOG = 'katz-log'
ADJUSTED_LOG = 'adjusted-log'
BAILEY = 'bailey'
INV_SINH = 'inv-sinh'
CLOPPER_PEARSON = 'clopper-pearson'
class EpsilonLowerBound:
"""Differential privacy (DP) epsilon lower bound.
This class computes a statistical epsilon lower bound by looking at the log
ratio of tpr and fpr. The tpr / fpr ratio bound is from `RatioBound` class.
For example, in membership inference attack, the attacker sets a threshold and
predicts samples with top probability larger than the thresholds as member.
If the model is trained withs DP guarantee, then we should expect
log(tpr / fpr) <= epsilon, where tpr and fpr are the true positive and false
positive rates of the attacker. Therefore, we can use log(tpr / fpr) to derive
an epsilon lower bound.
The idea of using Clopper Pearson for estimating epsilon lower bound is from
https://arxiv.org/pdf/2006.07709.pdf.
The idea of using log Katz is from https://arxiv.org/pdf/2210.08643.pdf.
Examples:
>>> lb = elb.EpsilonLowerBound(train_top_probs, test_top_probs, alpha=0.05)
>>> methods = [BoundMethod.BAILEY, BoundMethod.KATZ_LOG]
>>> lb.compute_epsilon_lower_bounds(methods, k=5)
"""
def __init__(self,
pos_scores: np.ndarray,
neg_scores: np.ndarray,
alpha: float,
two_sided_threshold: bool = True,
thresholds: Optional[np.ndarray] = None):
"""Initializes the epsilon lower bound class.
Args:
pos_scores: per-example scores for the positive class.
neg_scores: per-example scores for the negative class.
alpha: the confidence level, must be < 0.5.
two_sided_threshold: if False, will consider thresholds such that elements
**above** are predicted as positive, i.e., tpr / fpr and tnr / fnr. If
True, will also consider fpr / tpr and fnr / tnr.
thresholds: an array of thresholds to consider. If not provided, will
enumerate through all possible thresholds.
"""
if pos_scores.ndim != 1:
raise ValueError('pos_score should be a 1-dimensional array, '
f'but got {pos_scores.ndim}.')
if neg_scores.ndim != 1:
raise ValueError('pos_score should be a 1-dimensional array, '
f'but got {neg_scores.ndim}.')
if alpha >= 0.5:
raise ValueError('alpha should be < 0.5, e.g. alpha=0.05, '
f'but got {alpha}.')
pos_size, neg_size = pos_scores.size, neg_scores.size
tp, fp = _get_tp_fp_for_thresholds(pos_scores, neg_scores, thresholds)
fn, tn = pos_size - tp, neg_size - fp
# We consider both tpr / fpr and tnr / fnr.
self._rbs = [
RatioBound(tp, fp, pos_size, neg_size, alpha),
RatioBound(tn, fn, neg_size, pos_size, alpha)
]
if two_sided_threshold:
self._rbs.extend([
# pylint: disable-next=arguments-out-of-order
RatioBound(fp, tp, neg_size, pos_size, alpha),
RatioBound(fn, tn, pos_size, neg_size, alpha)
])
def compute_epsilon_lower_bound(self,
method: BoundMethod,
k: Optional[int] = None
) -> npt.NDArray[float]:
"""Computes lower bound w/ a specified method and returns top-k epsilons.
Args:
method: the method to use for ratio bound.
k: if specified, will return top-k values.
Returns:
An array of bounds.
"""
if method not in self._rbs[0].available_methods:
raise ValueError(f'Method {method} not recognized.')
ratio_bound = np.concatenate([rb.compute_bound(method) for rb in self._rbs])
bounds = np.log(ratio_bound[ratio_bound > 0])
bounds = np.sort(bounds)[::-1]
if k is None or k >= bounds.size:
return bounds
return bounds[:k]
def compute_epsilon_lower_bounds(
self,
methods: Optional[Iterable[BoundMethod]] = None,
k: Optional[int] = None) -> Dict[BoundMethod, npt.NDArray[float]]:
"""Computes lower bounds with all methods and returns the top-k epsilons.
Args:
methods: the methods to use for ratio bound. If not specified, will use
all available methods.
k: if specified, will return top-k values for each method.
Returns:
A dictionary, mapping method to the corresponding bound array.
"""
return {
method: self.compute_epsilon_lower_bound(method, k)
for method in methods or self._rbs[0].available_methods.keys()
}
class RatioBound:
"""Lower bound of ratio of binomial proportions.
This class implements several methods to compute a statistical lower bound of
the ratio of binomial proportions, e.g. tpr / fpr.
Most of the methods are based on https://doi.org/10.1111/2041-210X.12304 and
their code at https://CRAN.R-project.org/package=asbio.
Clopper pearson is based on https://arxiv.org/pdf/2006.07709.pdf.
Examples:
>>> tp, fp = np.array([100, 90]), np.array([10, 5])
>>> pos_size, neg_size = 110, 80
>>> rb = elb.RatioBound(tp, fp, pos_size, neg_size, 0.05)
>>> rb.compute_bound(BoundMethod.BAILEY)
array([4.61953896, 6.87647915])
>>> rb.compute_bounds([BoundMethod.BAILEY, BoundMethod.KATZ_LOG])
{<BoundMethod.BAILEY: 'bailey'>: array([4.61953896, 6.87647915]),
<BoundMethod.KATZ_LOG: 'katz-log'>: array([4.45958661, 6.39712581])}
Attributes:
available_methods: a dictionary mapping BoundMethod to the function.
"""
def __init__(self, tp: Union[Sequence[int], int], fp: Union[Sequence[int],
int],
pos_size: int, neg_size: int, alpha: float):
"""Initializes the ratio bound class.
Args:
tp: true positives.
fp: false positives. Should be of the same length as tp.
pos_size: number of real positive samples.
neg_size: number of real negative samples.
alpha: the confidence level, must be < 0.5.
"""
if alpha >= 0.5:
raise ValueError('alpha should be < 0.5, e.g. alpha=0.05, '
f'but got {alpha}.')
self._is_scalar = False # Would return scalar if `tp` is a scalar.
# Convert tp or fp to list if it is a scalar.
if isinstance(tp, numbers.Number):
tp = [tp]
self._is_scalar = True
if isinstance(fp, numbers.Number):
fp = [fp]
if len(tp) != len(fp):
raise ValueError('tp and fp should have the same number of elements, '
f'but get {len(tp)} and {len(fp)} respectively.')
# Some methods need the original values.
self._tp_orig = np.array(tp, dtype=float)
self._fp_orig = np.array(fp, dtype=float)
if np.any(self._tp_orig > pos_size) or np.any(self._tp_orig < 0):
raise ValueError('tp needs to be in [0, pos_size].')
if np.any(self._fp_orig > neg_size) or np.any(self._fp_orig < 0):
raise ValueError('fp needs to be in [0, neg_size].')
self.available_methods = immutabledict.immutabledict({
BoundMethod.KATZ_LOG: self._bound_katz_log,
BoundMethod.ADJUSTED_LOG: self._bound_adjusted_log,
BoundMethod.BAILEY: self._bound_bailey,
BoundMethod.INV_SINH: self._bound_inv_hyperbolic_sine,
BoundMethod.CLOPPER_PEARSON: self._bound_clopper_pearson,
})
self._alpha = alpha
self._z = scipy.stats.norm.ppf(alpha)
self._pos_size, self._neg_size = pos_size, neg_size
# Some methods need to adjust maximum possible values. We record the
# adjusted arrays.
idx_max = np.logical_and(self._tp_orig == self._pos_size,
self._fp_orig == self._neg_size)
self._tp = np.where(idx_max, self._pos_size - 0.5, self._tp_orig)
self._fp = np.where(idx_max, self._neg_size - 0.5, self._fp_orig)
# Some methods need to handle 0 specifically. We record the indices.
self._idx_tp_0, self._idx_fp_0 = (self._tp == 0), (self._fp == 0)
def _get_statistics(self, tp, fp):
"""Returns tpr, fpr, fnr, tnr for given tp, fp."""
tpr, fpr = tp / self._pos_size, fp / self._neg_size
fnr, tnr = 1 - tpr, 1 - fpr
return tpr, fpr, fnr, tnr
def compute_bound(self,
method: BoundMethod) -> Union[float, npt.NDArray[float]]:
"""Computes ratio bound using a specified method.
Args:
method: the method to use for ratio bound.
Returns:
An array of bounds or a scalar if the input tp is scalar.
"""
if method not in self.available_methods:
raise ValueError(f'Method {method} not recognized.')
bound = self.available_methods[method]()
if self._is_scalar:
bound = bound[0] # Take the element if of size 1
return bound
def compute_bounds(
self,
methods: Optional[Iterable[BoundMethod]] = None
) -> Dict[BoundMethod, Union[float, npt.NDArray[float]]]:
"""Computes ratio bounds for specified methods.
Args:
methods: the methods to use for ratio bound. If not specified, will use
all available methods.
Returns:
A dictionary, mapping method to the corresponding bound.
"""
return {
method: self.compute_bound(method)
for method in methods or self.available_methods.keys()
}
def _bound_katz_log(self) -> npt.NDArray[float]:
"""Uses the logarithm Katz method to compute lower bound of ratio."""
tp, fp = self._tp, np.where(self._idx_fp_0, 0.5, self._fp)
tpr, fpr, fnr, tnr = self._get_statistics(tp, fp)
empirical_ratio = tpr / fpr
sqrt_term = np.sqrt(fnr / tp + tnr / fp)
return np.where(self._idx_tp_0, 0,
empirical_ratio * np.exp(self._z * sqrt_term))
def _bound_adjusted_log(self) -> npt.NDArray[float]:
"""Uses the logarithm Walters method to compute lower bound of ratio."""
log_empirical_ratio = (
np.log((self._tp + 0.5) / (self._pos_size + 0.5)) - np.log(
(self._fp + 0.5) / (self._neg_size + 0.5)))
sqrt_term = np.sqrt(1 / (self._tp + 0.5) - 1 / (self._pos_size + 0.5) + 1 /
(self._fp + 0.5) - 1 / (self._neg_size + 0.5))
return np.where(
np.logical_and(self._idx_tp_0, self._idx_fp_0), 0,
np.exp(log_empirical_ratio) * np.exp(self._z * sqrt_term))
def _bound_bailey(self) -> npt.NDArray[float]:
"""Uses the Bailey method to compute lower bound of ratio."""
tp = np.where(self._tp_orig == self._pos_size, self._pos_size - 0.5,
self._tp_orig)
fp = np.where(self._fp_orig == self._neg_size, self._neg_size - 0.5,
self._fp_orig)
fp[self._idx_fp_0] = 0.5
tpr, fpr, fnr, tnr = self._get_statistics(tp, fp)
empirical_ratio = tpr / fpr
power_3_term_numer = 1 + self._z / 3 * np.sqrt(fnr / tp + tnr / fp -
(self._z**2 * fnr * tnr) /
(9 * tp * fp))
power_3_term_denom = 1 - (self._z**2 * tnr) / (9 * fp)
return np.where(
self._idx_tp_0, 0,
empirical_ratio * (power_3_term_numer / power_3_term_denom)**3)
def _bound_inv_hyperbolic_sine(self) -> npt.NDArray[float]:
"""Uses the inverse sinh method to compute lower bound of ratio."""
tp, fp = self._tp, np.where(self._idx_fp_0, self._z**2, self._fp)
empirical_ratio = (tp / fp) / (self._pos_size / self._neg_size)
in_inve_sinh = self._z / 2 * np.sqrt(1 / tp - 1 / self._pos_size + 1 / fp -
1 / self._neg_size)
return np.where(self._idx_tp_0, 0,
empirical_ratio * np.exp(2 * np.arcsinh(in_inve_sinh)))
def _bound_clopper_pearson(self) -> npt.NDArray[float]:
"""Uses the Clopper-Pearson method to compute lower bound of ratio."""
# proportion_confint uses alpha / 2 budget on upper and lower, so total
# budget will be 2 * alpha/2 = alpha.
p1, _ = proportion.proportion_confint(
self._tp_orig, self._pos_size, self._alpha, method='beta')
_, p0 = proportion.proportion_confint(
self._fp_orig, self._neg_size, self._alpha, method='beta')
# Handles divide by zero issues
return np.where(np.logical_or(p1 <= 0, p0 >= 1), 0, p1 / p0)

View file

@ -0,0 +1,276 @@
# Copyright 2022, The TensorFlow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from absl.testing import absltest
from absl.testing import parameterized
import numpy as np
from tensorflow_privacy.privacy.privacy_tests import epsilon_lower_bound as elb
class TPFPTest(parameterized.TestCase):
def test_tp_fp_given_thresholds(self):
pos_scores = np.array([9, 4, 5, 0.])
neg_scores = np.array([3, 8, 6, 1, 2, 7])
thresholds = np.array([9.5, 8.5, 5.5, 3.5, 0.5, -1])
expected_tp = np.array([0, 1, 1, 3, 3, 4])
expected_fp = np.array([0, 0, 3, 3, 6, 6])
tp, fp = elb._get_tp_fp_for_thresholds(pos_scores, neg_scores, thresholds)
np.testing.assert_array_equal(tp, expected_tp)
np.testing.assert_array_equal(fp, expected_fp)
def test_tp_fp_all_thresholds(self):
pos_scores = np.array([9, 4, 5, 0])
neg_scores = np.array([3, 8, 6, 1, 2, 7.])
expected_tp = np.array([1, 1, 1, 1, 2, 3, 3, 3, 3, 4])
expected_fp = np.array([0, 1, 2, 3, 3, 3, 4, 5, 6, 6])
tp, fp = elb._get_tp_fp_for_thresholds(pos_scores, neg_scores)
np.testing.assert_array_equal(tp, expected_tp)
np.testing.assert_array_equal(fp, expected_fp)
class RatioBoundTest(parameterized.TestCase):
# For every method except for clopper pearson, expected results are from
# https://CRAN.R-project.org/package=asbio `ci.prat`.
# For clopper pearson, test case is from https://arxiv.org/pdf/2006.07709.pdf.
@parameterized.parameters(
(473, 511, 755, 950, elb.BoundMethod.KATZ_LOG, 1.01166194),
(473, 511, 755, 950, elb.BoundMethod.ADJUSTED_LOG, 1.01189079),
(473, 511, 755, 950, elb.BoundMethod.BAILEY, 1.01148934),
(473, 511, 755, 950, elb.BoundMethod.INV_SINH, 1.01167669),
(2, 697, 213, 313, elb.BoundMethod.KATZ_LOG, 0.00220470),
(2, 697, 213, 313, elb.BoundMethod.ADJUSTED_LOG, 0.00310880),
(2, 697, 213, 313, elb.BoundMethod.BAILEY, 0.00162322),
(2, 697, 213, 313, elb.BoundMethod.INV_SINH, 0.00233503),
(1000, 10, 10, 10000, elb.BoundMethod.KATZ_LOG, 589.27335593),
(1000, 10, 10, 10000, elb.BoundMethod.ADJUSTED_LOG, 568.32659563),
(1000, 10, 10, 10000, elb.BoundMethod.BAILEY, 613.58766147),
(1000, 10, 10, 10000, elb.BoundMethod.INV_SINH, 592.63264572),
)
def test_bound_scalar(self, tp, fn, fp, tn, method, expected_value):
rb = elb.RatioBound(tp, fp, tp + fn, fp + tn, 0.05)
res = rb.compute_bound(method)
self.assertAlmostEqual(res, expected_value)
@parameterized.parameters(
(0, 511, 755, 950, elb.BoundMethod.KATZ_LOG, 0.),
(0, 511, 755, 950, elb.BoundMethod.ADJUSTED_LOG, 0.00021568),
(0, 511, 755, 950, elb.BoundMethod.BAILEY, 0.),
(0, 511, 755, 950, elb.BoundMethod.INV_SINH, 0.),
(2, 697, 0, 313, elb.BoundMethod.KATZ_LOG, 0.13325535),
(2, 697, 0, 313, elb.BoundMethod.ADJUSTED_LOG, 0.17571862),
(2, 697, 0, 313, elb.BoundMethod.BAILEY, 0.18481008),
(2, 697, 0, 313, elb.BoundMethod.INV_SINH, 0.08081083),
(0, 10, 0, 10000, elb.BoundMethod.KATZ_LOG, 0.),
(0, 10, 0, 10000, elb.BoundMethod.ADJUSTED_LOG, 0.),
(0, 10, 0, 10000, elb.BoundMethod.BAILEY, 0.),
(0, 10, 0, 10000, elb.BoundMethod.INV_SINH, 0.),
)
def test_bound_scalar_with_0(self, tp, fn, fp, tn, method, expected_value):
rb = elb.RatioBound(tp, fp, tp + fn, fp + tn, 0.05)
res = rb.compute_bound(method)
self.assertAlmostEqual(res, expected_value)
@parameterized.parameters(
(473, 0, 755, 950, elb.BoundMethod.KATZ_LOG, 2.15959024),
(473, 0, 755, 950, elb.BoundMethod.ADJUSTED_LOG, 2.15883995),
(473, 0, 755, 950, elb.BoundMethod.BAILEY, 2.15787013),
(473, 0, 755, 950, elb.BoundMethod.INV_SINH, 2.15959827),
(2, 697, 213, 0, elb.BoundMethod.KATZ_LOG, 0.00089568),
(2, 697, 213, 0, elb.BoundMethod.ADJUSTED_LOG, 0.00126522),
(2, 697, 213, 0, elb.BoundMethod.BAILEY, 0.00066016),
(2, 697, 213, 0, elb.BoundMethod.INV_SINH, 0.00094821),
(1000, 0, 10, 0, elb.BoundMethod.KATZ_LOG, 0.93375356),
(1000, 0, 10, 0, elb.BoundMethod.ADJUSTED_LOG, 0.93686007),
(1000, 0, 10, 0, elb.BoundMethod.BAILEY, 0.93591485),
(1000, 0, 10, 0, elb.BoundMethod.INV_SINH, 0.93381958),
)
def test_bound_scalar_with_large(self, tp, fn, fp, tn, method,
expected_value):
rb = elb.RatioBound(tp, fp, tp + fn, fp + tn, 0.05)
res = rb.compute_bound(method)
self.assertAlmostEqual(res, expected_value)
@parameterized.parameters(
(elb.BoundMethod.KATZ_LOG,
np.array([1.71127264, 0., 2.21549347, 0.99840355])),
(elb.BoundMethod.ADJUSTED_LOG,
np.array([1.71070821, 0.00388765, 2.33237436, 0.99840433])),
(elb.BoundMethod.BAILEY, np.array(
[1.71182320, 0., 3.86813751, 0.99840348])),
(elb.BoundMethod.INV_SINH,
np.array([1.71128185, 0., 1.51711034, 0.99840355])),
)
def test_bound_array(self, method, expected_values):
pos_size, neg_size = 1000, 1200
tp, fp = np.array([900, 0, 10, 1000]), np.array([600, 14, 0, 1200])
rb = elb.RatioBound(tp, fp, pos_size, neg_size, 0.05)
res = rb.compute_bound(method)
np.testing.assert_allclose(res, expected_values, atol=1e-7)
# Also test when the input is 1-element array
rb = elb.RatioBound(tp[:1], fp[:1], pos_size, neg_size, 0.05)
res = rb.compute_bound(method)
self.assertLen(res, 1)
np.testing.assert_allclose(res, expected_values[:1], atol=1e-7)
def test_bound_scalar_clopper_pearson(self):
tp, fp, tn, fn = 500, 0, 500, 0
rb = elb.RatioBound(tp, fp, tp + fn, fp + tn, 0.01)
res = rb.compute_bound(elb.BoundMethod.CLOPPER_PEARSON)
self.assertAlmostEqual(np.log(res), 4.54, places=2)
def test_bounds_scalar(self):
rb = elb.RatioBound(500, 0, 500, 500, 0.01)
# Expected result except for clopper pearson.
expected_res = {
elb.BoundMethod.KATZ_LOG: 37.31696140,
elb.BoundMethod.ADJUSTED_LOG: 37.35421695,
elb.BoundMethod.BAILEY: 108.47420518,
elb.BoundMethod.INV_SINH: 35.46128691
}
res = rb.compute_bounds()
# For clopper pearson we only have this test. So we handle it separately.
self.assertAlmostEqual(
np.log(res[elb.BoundMethod.CLOPPER_PEARSON]), 4.54, places=2)
del res[elb.BoundMethod.CLOPPER_PEARSON]
self.assertEqual(res.keys(), expected_res.keys())
np.testing.assert_almost_equal([res[k] for k in res],
[expected_res[k] for k in res])
# Specify methods to use
methods = set([elb.BoundMethod.KATZ_LOG, elb.BoundMethod.INV_SINH])
res = rb.compute_bounds(methods)
self.assertEqual(set(res.keys()), methods)
np.testing.assert_almost_equal([res[k] for k in res],
[expected_res[k] for k in res])
class EpsilonLowerBoundTest(parameterized.TestCase):
def test_epsilon_bound(self):
pos_scores = np.array([9, 4, 5, 0.])
neg_scores = np.array([3, 8, 6, 1, 2, 7])
thresholds = np.array([9.5, 8.5, 5.5, 0.5])
alpha = 0.05
method = elb.BoundMethod.ADJUSTED_LOG
# Therefore,
# pos_size = 4, neg_size = 6
# tp = [0, 1, 1, 3]
# fp = [0, 0, 3, 6]
# fn = [4, 3, 3, 1]
# tn = [6, 6, 3, 0]
# The expected epsilon bounds for the four ratios:
tpr_fpr = [-1.02310327, -1.72826789, -0.66577858]
tnr_fnr = [-0.29368152, -0.16314973, -1.09474300, -3.95577741]
fpr_tpr = [-3.95577741, -0.76912173, -0.16314973]
fnr_tnr = [-0.36916217, -0.66577858, -0.35929344, -1.02310327]
expected_one_sided = np.sort(tpr_fpr + tnr_fnr)[::-1]
expected_two_sided = np.sort(tpr_fpr + tnr_fnr + fpr_tpr + fnr_tnr)[::-1]
common_kwargs = {
'pos_scores': pos_scores,
'neg_scores': neg_scores,
'alpha': alpha,
'thresholds': thresholds
}
# one-sided
lb = elb.EpsilonLowerBound(two_sided_threshold=False, **common_kwargs)
np.testing.assert_almost_equal(
lb.compute_epsilon_lower_bound(method), expected_one_sided)
# two-sided
lb = elb.EpsilonLowerBound(two_sided_threshold=True, **common_kwargs)
np.testing.assert_almost_equal(
lb.compute_epsilon_lower_bound(method), expected_two_sided)
# test for top-k
k = 5
np.testing.assert_almost_equal(
lb.compute_epsilon_lower_bound(method, k), expected_two_sided[:k])
k = 100
np.testing.assert_almost_equal(
lb.compute_epsilon_lower_bound(method, k), expected_two_sided)
def test_epsilon_bounds(self):
pos_scores = np.array([9, 4, 5, 0.])
neg_scores = np.array([3, 8, 6, 1, 2, 7])
thresholds = np.array([9.5, 8.5, 5.5, 0.5])
alpha = 0.05
# Therefore,
# tp = [0, 1, 1, 3]
# fp = [0, 0, 3, 6]
# fn = [4, 3, 3, 1]
# tn = [6, 6, 3, 0]
# The expected epsilon bounds for using tpr / fpr, tnr / fnr:
expected = {
elb.BoundMethod.ADJUSTED_LOG:
np.sort([-1.02310327, -1.72826789, -0.66577858] +
[-0.29368152, -0.16314973, -1.09474300, -3.95577741])[::-1],
elb.BoundMethod.INV_SINH:
np.sort([-2.05961264, -2.13876684, -0.75815925] +
[-0.32235626, -0.18279510, -1.20631805])[::-1]
}
lb = elb.EpsilonLowerBound(
pos_scores,
neg_scores,
alpha,
two_sided_threshold=False,
thresholds=thresholds)
res = lb.compute_epsilon_lower_bounds(expected.keys())
self.assertEqual(res.keys(), expected.keys())
for method in expected:
np.testing.assert_almost_equal(res[method], expected[method])
# test for top-k
k = 5
res = lb.compute_epsilon_lower_bounds(expected.keys(), k)
self.assertEqual(res.keys(), expected.keys())
for method in expected:
np.testing.assert_almost_equal(res[method], expected[method][:k])
def test_epsilon_bound_clopper_pearson(self):
# Try to create tp, fp, tn, fn = 500, 0, 500, 0
pos_scores = np.ones(500)
neg_scores = np.zeros(500)
thresholds = np.array([0.5])
alpha = 0.01
expected_eps = 4.54
method = elb.BoundMethod.CLOPPER_PEARSON
# one-sided
lb = elb.EpsilonLowerBound(
pos_scores,
neg_scores,
alpha,
thresholds=thresholds,
two_sided_threshold=False)
np.testing.assert_almost_equal(
lb.compute_epsilon_lower_bound(method), [expected_eps, expected_eps],
decimal=2)
# two-sided. fpr / tpr = fnr / tnr = 0
lb = elb.EpsilonLowerBound(
pos_scores,
neg_scores,
alpha,
thresholds=thresholds,
two_sided_threshold=True)
np.testing.assert_almost_equal(
lb.compute_epsilon_lower_bound(method), [expected_eps, expected_eps],
decimal=2)
if __name__ == '__main__':
absltest.main()