Fix bug in keras optimizers where tape was not being used with tensor (as opposed to callable) losses.
PiperOrigin-RevId: 327276721
This commit is contained in:
parent
193ac3b1c8
commit
6dccd9b537
2 changed files with 37 additions and 3 deletions
|
@ -83,8 +83,9 @@ def make_keras_optimizer_class(cls):
|
||||||
if callable(var_list):
|
if callable(var_list):
|
||||||
var_list = var_list()
|
var_list = var_list()
|
||||||
else:
|
else:
|
||||||
microbatch_losses = tf.reduce_mean(
|
with tape:
|
||||||
tf.reshape(loss, [self._num_microbatches, -1]), axis=1)
|
microbatch_losses = tf.reduce_mean(
|
||||||
|
tf.reshape(loss, [self._num_microbatches, -1]), axis=1)
|
||||||
|
|
||||||
var_list = tf.nest.flatten(var_list)
|
var_list = tf.nest.flatten(var_list)
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,8 @@ class DPOptimizerComputeGradientsTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
('DPAdagrad 4', dp_optimizer_keras.DPKerasAdagradOptimizer, 4,
|
('DPAdagrad 4', dp_optimizer_keras.DPKerasAdagradOptimizer, 4,
|
||||||
[-2.5, -2.5], [-0.5]),
|
[-2.5, -2.5], [-0.5]),
|
||||||
)
|
)
|
||||||
def testBaseline(self, cls, num_microbatches, expected_grad0, expected_grad1):
|
def testBaselineWithCallableLoss(self, cls, num_microbatches, expected_grad0,
|
||||||
|
expected_grad1):
|
||||||
var0 = tf.Variable([1.0, 2.0])
|
var0 = tf.Variable([1.0, 2.0])
|
||||||
var1 = tf.Variable([3.0])
|
var1 = tf.Variable([3.0])
|
||||||
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
||||||
|
@ -60,6 +61,38 @@ class DPOptimizerComputeGradientsTest(tf.test.TestCase, parameterized.TestCase):
|
||||||
self.assertAllCloseAccordingToType(expected_grad0, grads_and_vars[0][0])
|
self.assertAllCloseAccordingToType(expected_grad0, grads_and_vars[0][0])
|
||||||
self.assertAllCloseAccordingToType(expected_grad1, grads_and_vars[1][0])
|
self.assertAllCloseAccordingToType(expected_grad1, grads_and_vars[1][0])
|
||||||
|
|
||||||
|
# Parameters for testing: optimizer, num_microbatches, expected gradient for
|
||||||
|
# var0, expected gradient for var1.
|
||||||
|
@parameterized.named_parameters(
|
||||||
|
('DPGradientDescent 1', dp_optimizer_keras.DPKerasSGDOptimizer, 1,
|
||||||
|
[-2.5, -2.5], [-0.5]),
|
||||||
|
('DPAdam 2', dp_optimizer_keras.DPKerasAdamOptimizer, 2, [-2.5, -2.5
|
||||||
|
], [-0.5]),
|
||||||
|
('DPAdagrad 4', dp_optimizer_keras.DPKerasAdagradOptimizer, 4,
|
||||||
|
[-2.5, -2.5], [-0.5]),
|
||||||
|
)
|
||||||
|
def testBaselineWithTensorLoss(self, cls, num_microbatches, expected_grad0,
|
||||||
|
expected_grad1):
|
||||||
|
var0 = tf.Variable([1.0, 2.0])
|
||||||
|
var1 = tf.Variable([3.0])
|
||||||
|
data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]])
|
||||||
|
data1 = tf.Variable([[8.0], [2.0], [3.0], [1.0]])
|
||||||
|
|
||||||
|
opt = cls(
|
||||||
|
l2_norm_clip=100.0,
|
||||||
|
noise_multiplier=0.0,
|
||||||
|
num_microbatches=num_microbatches,
|
||||||
|
learning_rate=2.0)
|
||||||
|
|
||||||
|
tape = tf.GradientTape()
|
||||||
|
with tape:
|
||||||
|
loss = self._loss(data0, var0) + self._loss(data1, var1)
|
||||||
|
|
||||||
|
grads_and_vars = opt._compute_gradients(
|
||||||
|
loss, [var0, var1], tape=tape)
|
||||||
|
self.assertAllCloseAccordingToType(expected_grad0, grads_and_vars[0][0])
|
||||||
|
self.assertAllCloseAccordingToType(expected_grad1, grads_and_vars[1][0])
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
@parameterized.named_parameters(
|
||||||
('DPGradientDescent', dp_optimizer_keras.DPKerasSGDOptimizer),)
|
('DPGradientDescent', dp_optimizer_keras.DPKerasSGDOptimizer),)
|
||||||
def testClippingNorm(self, cls):
|
def testClippingNorm(self, cls):
|
||||||
|
|
Loading…
Reference in a new issue