blob: f46e81534214bdf8c316bd541600f190fca88793 [file] [log] [blame]
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for Momentum."""
import numpy as np
from tensorflow.compiler.tests import xla_test
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import resource_variable_ops
from tensorflow.python.ops import variables
from tensorflow.python.platform import test
from tensorflow.python.training import momentum as momentum_lib
class MomentumOptimizerTest(xla_test.XLATestCase):
def _update_nesterov_momentum_numpy(self, var, accum, g, lr, momentum):
var += accum * lr * momentum
accum = accum * momentum + g
var -= lr * accum
var -= accum * lr * momentum
return var, accum
def testBasic(self):
for dtype in self.float_types:
with self.session(), self.test_scope():
var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
mom_opt = momentum_lib.MomentumOptimizer(
learning_rate=2.0, momentum=0.9)
mom_update = mom_opt.apply_gradients(
zip([grads0, grads1], [var0, var1]))
self.evaluate(variables.global_variables_initializer())
# Check we have slots
self.assertEqual(["momentum"], mom_opt.get_slot_names())
slot0 = mom_opt.get_slot(var0, "momentum")
self.assertEqual(slot0.get_shape(), var0.get_shape())
self.assertFalse(slot0 in variables.trainable_variables())
slot1 = mom_opt.get_slot(var1, "momentum")
self.assertEqual(slot1.get_shape(), var1.get_shape())
self.assertFalse(slot1 in variables.trainable_variables())
# Fetch params to validate initial values
self.assertAllClose([1.0, 2.0], self.evaluate(var0))
self.assertAllClose([3.0, 4.0], self.evaluate(var1))
# Step 1: the momentum accumulators where 0. So we should see a normal
# update: v -= grad * learning_rate
mom_update.run()
# Check that the momentum accumulators have been updated.
self.assertAllCloseAccordingToType(
np.array([0.1, 0.1]), self.evaluate(slot0))
self.assertAllCloseAccordingToType(
np.array([0.01, 0.01]), self.evaluate(slot1))
# Check that the parameters have been updated.
self.assertAllCloseAccordingToType(
np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
self.evaluate(var0))
self.assertAllCloseAccordingToType(
np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
self.evaluate(var1))
# Step 2: the momentum accumulators contain the previous update.
mom_update.run()
# Check that the momentum accumulators have been updated.
self.assertAllCloseAccordingToType(
np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]),
self.evaluate(slot0))
self.assertAllCloseAccordingToType(
np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
self.evaluate(slot1))
# Check that the parameters have been updated.
self.assertAllCloseAccordingToType(
np.array([
1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
]), self.evaluate(var0))
self.assertAllCloseAccordingToType(
np.array([
2.98 - ((0.9 * 0.01 + 0.01) * 2.0),
3.98 - ((0.9 * 0.01 + 0.01) * 2.0)
]), self.evaluate(var1))
def testNesterovMomentum(self):
for dtype in self.float_types:
with self.session(), self.test_scope():
var0 = resource_variable_ops.ResourceVariable([0.1, 0.2], dtype=dtype)
var1 = resource_variable_ops.ResourceVariable([0.3, 0.4], dtype=dtype)
var0_np = np.array([0.1, 0.2], dtype=dtype)
var1_np = np.array([0.3, 0.4], dtype=dtype)
accum0_np = np.array([0.0, 0.0], dtype=dtype)
accum1_np = np.array([0.0, 0.0], dtype=dtype)
cost = 0.4 * var0 * var0 + 0.9 * var1
global_step = resource_variable_ops.ResourceVariable(
array_ops.zeros([], dtypes.int32), name="global_step")
mom_op = momentum_lib.MomentumOptimizer(
learning_rate=0.1, momentum=0.9, use_nesterov=True)
opt_op = mom_op.minimize(cost, global_step, [var0, var1])
self.evaluate(variables.global_variables_initializer())
for _ in range(1, 5):
opt_op.run()
var0_np, accum0_np = self._update_nesterov_momentum_numpy(
var0_np, accum0_np, var0_np * 0.8, 0.1, 0.9)
var1_np, accum1_np = self._update_nesterov_momentum_numpy(
var1_np, accum1_np, 0.9, 0.1, 0.9)
self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
def testTensorLearningRateAndMomentum(self):
for dtype in self.float_types:
with self.session(), self.test_scope():
var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
mom_opt = momentum_lib.MomentumOptimizer(
learning_rate=constant_op.constant(2.0),
momentum=constant_op.constant(0.9))
mom_update = mom_opt.apply_gradients(
zip([grads0, grads1], [var0, var1]))
self.evaluate(variables.global_variables_initializer())
# Check we have slots
self.assertEqual(["momentum"], mom_opt.get_slot_names())
slot0 = mom_opt.get_slot(var0, "momentum")
self.assertEqual(slot0.get_shape(), var0.get_shape())
self.assertFalse(slot0 in variables.trainable_variables())
slot1 = mom_opt.get_slot(var1, "momentum")
self.assertEqual(slot1.get_shape(), var1.get_shape())
self.assertFalse(slot1 in variables.trainable_variables())
# Fetch params to validate initial values
self.assertAllClose([1.0, 2.0], self.evaluate(var0))
self.assertAllClose([3.0, 4.0], self.evaluate(var1))
# Step 1: the momentum accumulators where 0. So we should see a normal
# update: v -= grad * learning_rate
mom_update.run()
# Check that the momentum accumulators have been updated.
self.assertAllCloseAccordingToType(
np.array([0.1, 0.1]), self.evaluate(slot0))
self.assertAllCloseAccordingToType(
np.array([0.01, 0.01]), self.evaluate(slot1))
# Check that the parameters have been updated.
self.assertAllCloseAccordingToType(
np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
self.evaluate(var0))
self.assertAllCloseAccordingToType(
np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
self.evaluate(var1))
# Step 2: the momentum accumulators contain the previous update.
mom_update.run()
# Check that the momentum accumulators have been updated.
self.assertAllCloseAccordingToType(
np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]),
self.evaluate(slot0))
self.assertAllCloseAccordingToType(
np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
self.evaluate(slot1))
# Check that the parameters have been updated.
self.assertAllCloseAccordingToType(
np.array([
1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
]), self.evaluate(var0))
self.assertAllCloseAccordingToType(
np.array([
2.98 - ((0.9 * 0.01 + 0.01) * 2.0),
3.98 - ((0.9 * 0.01 + 0.01) * 2.0)
]), self.evaluate(var1))
if __name__ == "__main__":
test.main()