| # Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============================================================================== |
| """Tests for compute_gradient.""" |
| |
| import numpy as np |
| |
| from tensorflow.python.eager import backprop |
| from tensorflow.python.framework import constant_op |
| from tensorflow.python.framework import dtypes |
| from tensorflow.python.framework import sparse_tensor |
| from tensorflow.python.framework import test_util |
| from tensorflow.python.ops import array_ops |
| from tensorflow.python.ops import custom_gradient |
| from tensorflow.python.ops import \ |
| gradient_checker_v2 as gradient_checker |
| from tensorflow.python.ops import math_ops |
| from tensorflow.python.ops import nn_ops |
| from tensorflow.python.ops import sparse_ops |
| # needs this to register gradient for SoftmaxCrossEntropyWithLogits: |
| import tensorflow.python.ops.nn_grad # pylint: disable=unused-import |
| from tensorflow.python.platform import test |
| from tensorflow.python.platform import tf_logging |
| |
| |
| def _random_complex(shape, dtype): |
| data = np.random.random_sample(shape).astype(dtype.as_numpy_dtype) |
| if dtype.is_complex: |
| data.imag = np.random.random_sample(shape) |
| return data |
| |
| |
| @test_util.run_all_in_graph_and_eager_modes |
| class GradientCheckerTest(test.TestCase): |
| |
| def testSparseTensorReshape(self): |
| x = constant_op.constant(2.0, shape=(2,)) |
| |
| def sparse_tensor_reshape(values): |
| sparse = sparse_tensor.SparseTensor( |
| indices=[[0, 0], [1, 2]], values=values, dense_shape=[3, 4]) |
| sparse = sparse_ops.sparse_reshape(sparse, shape=(12,)) |
| return sparse.values |
| |
| error = gradient_checker.max_error( |
| *gradient_checker.compute_gradient(sparse_tensor_reshape, [x])) |
| |
| self.assertLess(error, 1e-4) |
| |
| def testWithStaticShape(self): |
| size = (2, 3) |
| constant = constant_op.constant(2.0, shape=size, name="const") |
| |
| def add_constant_with_static_shape_check(x): |
| self.assertAllEqual(x.shape.as_list(), constant.shape.as_list()) |
| return x + constant |
| |
| x = constant_op.constant(3.0, shape=size, name="x") |
| |
| error = gradient_checker.max_error(*gradient_checker.compute_gradient( |
| add_constant_with_static_shape_check, [x])) |
| |
| self.assertLess(error, 1e-4) |
| |
| def testWithArgumentsAsTuple(self): |
| size = (2, 3) |
| x1 = constant_op.constant(2.0, shape=size, name="x1") |
| x2 = constant_op.constant(3.0, shape=size, name="x2") |
| |
| error = gradient_checker.max_error(*gradient_checker.compute_gradient( |
| lambda x1: math_ops.add(x1, x2), (x1,))) |
| |
| tf_logging.info("x1 error = %f", error) |
| self.assertLess(error, 1e-4) |
| |
| def testAddSimple(self): |
| size = (2, 3) |
| x1 = constant_op.constant(2.0, shape=size, name="x1") |
| x2 = constant_op.constant(3.0, shape=size, name="x2") |
| error = gradient_checker.max_error(*gradient_checker.compute_gradient( |
| lambda x1: math_ops.add(x1, x2), [x1])) |
| tf_logging.info("x1 error = %f", error) |
| self.assertLess(error, 1e-4) |
| |
| def testBfloat16(self): |
| x1 = constant_op.constant(2.0, dtype="bfloat16") |
| x2 = constant_op.constant(3.0, dtype="bfloat16") |
| # bfloat16 is very imprecise, so we use very large delta and error bar here. |
| error = gradient_checker.max_error(*gradient_checker.compute_gradient( |
| lambda x1: math_ops.add(x1, x2), [x1], delta=0.1)) |
| tf_logging.info("x1 error = %f", error) |
| self.assertLess(error, 0.07) |
| |
| def testAddCustomized(self): |
| size = (2, 3) |
| x1 = constant_op.constant(2.0, shape=size, dtype=dtypes.float64, name="x1") |
| x2 = np.asarray(np.arange(6, dtype=np.float64).reshape(2, 3)) |
| # checkint gradients for x2 using a special delta |
| error = gradient_checker.max_error(*gradient_checker.compute_gradient( |
| lambda x2: math_ops.add(x1, x2), [x2], delta=1e-2)) |
| tf_logging.info("x2 error = %f", error) |
| self.assertLess(error, 1e-10) |
| |
| def testGather(self): |
| |
| def f(params): |
| index_values = [1, 3] |
| indices = constant_op.constant(index_values, name="i") |
| return array_ops.gather(params, indices, name="y") |
| |
| p_shape = (4, 2) |
| p_size = 8 |
| params = constant_op.constant( |
| np.arange(p_size).astype(np.float64), shape=p_shape, name="p") |
| error = gradient_checker.max_error( |
| *gradient_checker.compute_gradient(f, [params])) |
| tf_logging.info("gather error = %f", error) |
| self.assertLess(error, 1e-4) |
| |
| def testNestedGather(self): |
| |
| def f(params): |
| index_values = [1, 3, 5, 6] |
| indices = constant_op.constant(index_values, name="i") |
| y = array_ops.gather(params, indices, name="y") |
| index_values2 = [0, 2] |
| indices2 = constant_op.constant(index_values2, name="i2") |
| return array_ops.gather(y, indices2, name="y2") |
| |
| p_shape = (8, 2) |
| p_size = 16 |
| params = constant_op.constant( |
| np.arange(p_size).astype(np.float64), shape=p_shape, name="p") |
| error = gradient_checker.max_error( |
| *gradient_checker.compute_gradient(f, [params])) |
| tf_logging.info("nested gather error = %f", error) |
| self.assertLess(error, 1e-4) |
| |
| def testComplexMul(self): |
| c = constant_op.constant(5 + 7j, dtype=dtypes.complex64) |
| |
| def f(x): |
| return c * x |
| |
| x_shape = c.shape |
| x_dtype = c.dtype |
| x = constant_op.constant(_random_complex(x_shape, x_dtype)) |
| analytical, numerical = gradient_checker.compute_gradient(f, [x]) |
| correct = np.array([[5, -7], [7, 5]]) |
| self.assertAllEqual(correct, analytical[0]) |
| self.assertAllClose(correct, numerical[0], rtol=1e-4) |
| x = constant_op.constant(_random_complex(x_shape, x_dtype)) |
| self.assertLess( |
| gradient_checker.max_error(*gradient_checker.compute_gradient(f, [x])), |
| 3e-4) |
| |
| def testComplexConj(self): |
| |
| def f(x): |
| return math_ops.conj(x) |
| |
| x_shape = () |
| x_dtype = dtypes.complex64 |
| x = constant_op.constant(_random_complex(x_shape, x_dtype)) |
| analytical, numerical = gradient_checker.compute_gradient(f, [x]) |
| correct = np.array([[1, 0], [0, -1]]) |
| self.assertAllEqual(correct, analytical[0]) |
| self.assertAllClose(correct, numerical[0], rtol=2e-5) |
| x = constant_op.constant(_random_complex(x_shape, x_dtype)) |
| self.assertLess( |
| gradient_checker.max_error(*gradient_checker.compute_gradient(f, [x])), |
| 2e-5) |
| |
| def testEmptySucceeds(self): |
| |
| def f(x): |
| return array_ops.identity(x) |
| |
| x = constant_op.constant( |
| np.random.random_sample((0, 3)), dtype=dtypes.float32) |
| for grad in gradient_checker.compute_gradient(f, [x]): |
| self.assertEqual(grad[0].shape, (0, 0)) |
| error = gradient_checker.max_error( |
| *gradient_checker.compute_gradient(f, [x])) |
| self.assertEqual(error, 0) |
| |
| def testEmptyMatMul(self): |
| |
| def f(x, y): |
| return math_ops.matmul(x, y) |
| |
| x = constant_op.constant( |
| np.random.random_sample((0, 3)), dtype=dtypes.float32) |
| y = constant_op.constant( |
| np.random.random_sample((3, 4)), dtype=dtypes.float32) |
| for grad in gradient_checker.compute_gradient(f, [x, y]): |
| self.assertEqual(grad[0].shape, (0, 0)) |
| self.assertEqual(grad[1].shape, (0, 12)) |
| error = gradient_checker.max_error( |
| *gradient_checker.compute_gradient(f, [x, y])) |
| self.assertEqual(error, 0) |
| |
| def testEmptyFails(self): |
| |
| @custom_gradient.custom_gradient |
| def id_bad_grad(x): |
| y = array_ops.identity(x) |
| |
| def grad_fn(dy): |
| # dx = constant_op.constant(np.zeros((1, 4)), dtype=dtypes.float32) |
| dx = array_ops.transpose(dy) |
| return dx |
| |
| return y, grad_fn |
| |
| def f(x): |
| return id_bad_grad(x) |
| |
| x = constant_op.constant( |
| np.random.random_sample((0, 3)), dtype=dtypes.float32) |
| bad = r"Empty gradient has wrong shape: expected \(0, 3\), got \(3, 0\)" |
| with self.assertRaisesRegex(ValueError, bad): |
| gradient_checker.compute_gradient(f, [x]) |
| |
| def testNaNGradFails(self): |
| |
| @custom_gradient.custom_gradient |
| def id_nan_grad(x): |
| y = array_ops.identity(x) |
| |
| def grad_fn(dy): |
| dx = np.nan * dy |
| # dx = dy |
| return dx |
| |
| return y, grad_fn |
| |
| def f(x): |
| return id_nan_grad(x) |
| |
| x = constant_op.constant( |
| np.random.random_sample((1, 1)), dtype=dtypes.float32) |
| error = gradient_checker.max_error( |
| *gradient_checker.compute_gradient(f, [x])) |
| # Typical test would assert error < max_err, so assert this test would |
| # raise AssertionError, since NaN is not < 1.0. |
| with self.assertRaisesRegex(AssertionError, "nan not less than 1.0"): |
| self.assertLess(error, 1.0) |
| |
| def testGradGrad(self): |
| |
| def f(x): |
| with backprop.GradientTape() as tape: |
| tape.watch(x) |
| y = math_ops.square(x) |
| z = math_ops.square(y) |
| return tape.gradient(z, x) |
| |
| analytical, numerical = gradient_checker.compute_gradient(f, [2.0]) |
| self.assertAllEqual([[[48.]]], analytical) |
| self.assertAllClose([[[48.]]], numerical, rtol=1e-4) |
| |
| |
| @test_util.run_all_in_graph_and_eager_modes |
| class MiniMNISTTest(test.TestCase): |
| |
| # Gradient checker for MNIST. |
| def _BuildAndTestMiniMNIST(self, param_index, tag): |
| # Fix seed to avoid occasional flakiness |
| np.random.seed(6) |
| |
| # Hyperparameters |
| batch = 3 |
| inputs = 16 |
| features = 32 |
| classes = 10 |
| |
| # Define the parameters |
| inp_data = np.random.random_sample(inputs * batch) |
| hidden_weight_data = np.random.randn(inputs * features) / np.sqrt(inputs) |
| hidden_bias_data = np.random.random_sample(features) |
| sm_weight_data = np.random.randn(features * classes) / np.sqrt(features) |
| sm_bias_data = np.random.random_sample(classes) |
| |
| # special care for labels since they need to be normalized per batch |
| label_data = np.random.random(batch * classes).reshape((batch, classes)) |
| s = label_data.sum(axis=1) |
| label_data /= s[:, None] |
| |
| # We treat the inputs as "parameters" here |
| inp = constant_op.constant( |
| inp_data.tolist(), |
| shape=[batch, inputs], |
| dtype=dtypes.float64, |
| name="inp") |
| hidden_weight = constant_op.constant( |
| hidden_weight_data.tolist(), |
| shape=[inputs, features], |
| dtype=dtypes.float64, |
| name="hidden_weight") |
| hidden_bias = constant_op.constant( |
| hidden_bias_data.tolist(), |
| shape=[features], |
| dtype=dtypes.float64, |
| name="hidden_bias") |
| softmax_weight = constant_op.constant( |
| sm_weight_data.tolist(), |
| shape=[features, classes], |
| dtype=dtypes.float64, |
| name="softmax_weight") |
| softmax_bias = constant_op.constant( |
| sm_bias_data.tolist(), |
| shape=[classes], |
| dtype=dtypes.float64, |
| name="softmax_bias") |
| |
| # List all the parameter so that we can test them one at a time |
| all_params = [inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias] |
| |
| # Now, Building MNIST |
| def f(inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias): |
| features = nn_ops.relu( |
| nn_ops.xw_plus_b(inp, hidden_weight, hidden_bias), name="features") |
| logits = nn_ops.xw_plus_b( |
| features, softmax_weight, softmax_bias, name="logits") |
| labels = constant_op.constant( |
| label_data.tolist(), |
| shape=[batch, classes], |
| dtype=dtypes.float64, |
| name="labels") |
| cost = nn_ops.softmax_cross_entropy_with_logits( |
| labels=labels, logits=logits, name="cost") |
| return cost |
| |
| def f_restricted(x): |
| xs = all_params |
| i = param_index |
| # use x for the i-th parameter |
| xs = xs[0:i] + [x] + xs[i + 1:] |
| return f(*xs) |
| |
| # Test the gradients. |
| err = gradient_checker.max_error(*gradient_checker.compute_gradient( |
| f_restricted, [all_params[param_index]], delta=1e-5)) |
| |
| tf_logging.info("Mini MNIST: %s gradient error = %g", tag, err) |
| return err |
| |
| def testInputGradient(self): |
| self.assertLess(self._BuildAndTestMiniMNIST(0, "input"), 1e-8) |
| |
| def testHiddenWeightGradient(self): |
| self.assertLess(self._BuildAndTestMiniMNIST(1, "hidden_weight"), 1e-8) |
| |
| def testHiddenBiasGradient(self): |
| self.assertLess(self._BuildAndTestMiniMNIST(2, "hidden_bias"), 1e-8) |
| |
| def testSoftmaxWeightGradient(self): |
| self.assertLess(self._BuildAndTestMiniMNIST(3, "softmax_weight"), 1e-8) |
| |
| def testSoftmaxBiasGradient(self): |
| self.assertLess(self._BuildAndTestMiniMNIST(4, "softmax_bias"), 1e-8) |
| |
| |
| if __name__ == "__main__": |
| test.main() |