| # Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============================================================================== |
| |
| """Operations for clipping (gradient, weight) tensors to min/max values.""" |
| from tensorflow.python.framework import constant_op |
| from tensorflow.python.framework import dtypes |
| from tensorflow.python.framework import indexed_slices |
| from tensorflow.python.framework import ops |
| from tensorflow.python.ops import array_ops |
| from tensorflow.python.ops import array_ops_stack |
| from tensorflow.python.ops import gen_array_ops |
| from tensorflow.python.ops import gen_nn_ops |
| from tensorflow.python.ops import math_ops |
| from tensorflow.python.util import deprecation |
| from tensorflow.python.util import dispatch |
| from tensorflow.python.util.compat import collections_abc |
| from tensorflow.python.util.tf_export import tf_export |
| |
| |
| @tf_export("clip_by_value") |
| @dispatch.register_unary_elementwise_api |
| @dispatch.add_dispatch_support |
| def clip_by_value(t, clip_value_min, clip_value_max, |
| name=None): |
| """Clips tensor values to a specified min and max. |
| |
| Given a tensor `t`, this operation returns a tensor of the same type and |
| shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`. |
| Any values less than `clip_value_min` are set to `clip_value_min`. Any values |
| greater than `clip_value_max` are set to `clip_value_max`. |
| |
| Note: `clip_value_min` needs to be smaller or equal to `clip_value_max` for |
| correct results. |
| |
| For example: |
| |
| Basic usage passes a scalar as the min and max value. |
| |
| >>> t = tf.constant([[-10., -1., 0.], [0., 2., 10.]]) |
| >>> t2 = tf.clip_by_value(t, clip_value_min=-1, clip_value_max=1) |
| >>> t2.numpy() |
| array([[-1., -1., 0.], |
| [ 0., 1., 1.]], dtype=float32) |
| |
| The min and max can be the same size as `t`, or broadcastable to that size. |
| |
| >>> t = tf.constant([[-1, 0., 10.], [-1, 0, 10]]) |
| >>> clip_min = [[2],[1]] |
| >>> t3 = tf.clip_by_value(t, clip_value_min=clip_min, clip_value_max=100) |
| >>> t3.numpy() |
| array([[ 2., 2., 10.], |
| [ 1., 1., 10.]], dtype=float32) |
| |
| Broadcasting fails, intentionally, if you would expand the dimensions of `t` |
| |
| >>> t = tf.constant([[-1, 0., 10.], [-1, 0, 10]]) |
| >>> clip_min = [[[2, 1]]] # Has a third axis |
| >>> t4 = tf.clip_by_value(t, clip_value_min=clip_min, clip_value_max=100) |
| Traceback (most recent call last): |
| ... |
| InvalidArgumentError: Incompatible shapes: [2,3] vs. [1,1,2] |
| |
| It throws a `TypeError` if you try to clip an `int` to a `float` value |
| (`tf.cast` the input to `float` first). |
| |
| >>> t = tf.constant([[1, 2], [3, 4]], dtype=tf.int32) |
| >>> t5 = tf.clip_by_value(t, clip_value_min=-3.1, clip_value_max=3.1) |
| Traceback (most recent call last): |
| ... |
| TypeError: Cannot convert ... |
| |
| |
| Args: |
| t: A `Tensor` or `IndexedSlices`. |
| clip_value_min: The minimum value to clip to. A scalar `Tensor` or one that |
| is broadcastable to the shape of `t`. |
| clip_value_max: The maximum value to clip to. A scalar `Tensor` or one that |
| is broadcastable to the shape of `t`. |
| name: A name for the operation (optional). |
| |
| Returns: |
| A clipped `Tensor` or `IndexedSlices`. |
| |
| Raises: |
| `tf.errors.InvalidArgumentError`: If the clip tensors would trigger array |
| broadcasting that would make the returned tensor larger than the input. |
| TypeError: If dtype of the input is `int32` and dtype of |
| the `clip_value_min` or `clip_value_max` is `float32` |
| """ |
| with ops.name_scope(name, "clip_by_value", |
| [t, clip_value_min, clip_value_max]) as name: |
| values = ops.convert_to_tensor( |
| t.values if isinstance(t, indexed_slices.IndexedSlices) else t, |
| name="t") |
| |
| # Go through list of tensors, for each value in each tensor clip |
| t_min = math_ops.minimum(values, clip_value_max) |
| # Assert that the shape is compatible with the initial shape, |
| # to prevent unintentional broadcasting. |
| values.shape.assert_is_compatible_with(t_min.shape) |
| |
| t_max = math_ops.maximum(t_min, clip_value_min, name=name) |
| values.shape.assert_is_compatible_with(t_max.shape) |
| |
| if isinstance(t, indexed_slices.IndexedSlices): |
| t_max = indexed_slices.IndexedSlices(t_max, t.indices, t.dense_shape) |
| |
| return t_max |
| # TODO(scottzhu): switch to use new implementation in 2 weeks. |
| # return gen_math_ops.clip_by_value( |
| # t, clip_value_min, clip_value_max, name=name) |
| |
| |
| @ops.RegisterGradient("ClipByValue") |
| def _clip_by_value_grad(op, grad): |
| """Returns grad of clip_by_value.""" |
| x = op.inputs[0] |
| y = op.inputs[1] |
| z = op.inputs[2] |
| gdtype = grad.dtype |
| sx = array_ops.shape(x) |
| sy = array_ops.shape(y) |
| sz = array_ops.shape(z) |
| gradshape = array_ops.shape(grad) |
| zeros = array_ops.zeros(gradshape, gdtype) |
| xymask = math_ops.less(x, y) |
| xzmask = math_ops.greater(x, z) |
| _, ry = gen_array_ops.broadcast_gradient_args(sx, sy) |
| _, rz = gen_array_ops.broadcast_gradient_args(sx, sz) |
| xgrad = array_ops.where(math_ops.logical_or(xymask, xzmask), zeros, grad) |
| ygrad = array_ops.where(xymask, grad, zeros) |
| zgrad = array_ops.where(xzmask, grad, zeros) |
| gy = array_ops.reshape(math_ops.reduce_sum(ygrad, ry), sy) |
| gz = array_ops.reshape(math_ops.reduce_sum(zgrad, rz), sz) |
| return xgrad, gy, gz |
| |
| |
| @tf_export("clip_by_norm") |
| @dispatch.add_dispatch_support |
| def clip_by_norm(t, clip_norm, axes=None, name=None): |
| """Clips tensor values to a maximum L2-norm. |
| |
| Given a tensor `t`, and a maximum clip value `clip_norm`, this operation |
| normalizes `t` so that its L2-norm is less than or equal to `clip_norm`, |
| along the dimensions given in `axes`. Specifically, in the default case |
| where all dimensions are used for calculation, if the L2-norm of `t` is |
| already less than or equal to `clip_norm`, then `t` is not modified. If |
| the L2-norm is greater than `clip_norm`, then this operation returns a |
| tensor of the same type and shape as `t` with its values set to: |
| |
| `t * clip_norm / l2norm(t)` |
| |
| In this case, the L2-norm of the output tensor is `clip_norm`. |
| |
| As another example, if `t` is a matrix and `axes == [1]`, then each row |
| of the output will have L2-norm less than or equal to `clip_norm`. If |
| `axes == [0]` instead, each column of the output will be clipped. |
| |
| Code example: |
| |
| >>> some_nums = tf.constant([[1, 2, 3, 4, 5]], dtype=tf.float32) |
| >>> tf.clip_by_norm(some_nums, 2.0).numpy() |
| array([[0.26967996, 0.5393599 , 0.80903983, 1.0787199 , 1.3483998 ]], |
| dtype=float32) |
| |
| This operation is typically used to clip gradients before applying them with |
| an optimizer. Most gradient data is a collection of different shaped tensors |
| for different parts of the model. Thus, this is a common usage: |
| |
| ``` |
| # Get your gradients after training |
| loss_value, grads = grad(model, features, labels) |
| |
| # Apply some clipping |
| grads = [tf.clip_by_norm(g, norm) |
| for g in grads] |
| |
| # Continue on with training |
| optimizer.apply_gradients(grads) |
| ``` |
| |
| Args: |
| t: A `Tensor` or `IndexedSlices`. This must be a floating point type. |
| clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value, also |
| floating point |
| axes: A 1-D (vector) `Tensor` of type int32 containing the dimensions |
| to use for computing the L2-norm. If `None` (the default), uses all |
| dimensions. |
| name: A name for the operation (optional). |
| |
| Returns: |
| A clipped `Tensor` or `IndexedSlices`. |
| |
| Raises: |
| ValueError: If the clip_norm tensor is not a 0-D scalar tensor. |
| TypeError: If dtype of the input is not a floating point or |
| complex type. |
| """ |
| with ops.name_scope(name, "clip_by_norm", [t, clip_norm]) as name: |
| values = ops.convert_to_tensor( |
| t.values if isinstance(t, indexed_slices.IndexedSlices) else t, |
| name="t") |
| |
| # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm |
| l2sum = math_ops.reduce_sum(values * values, axes, keepdims=True) |
| pred = l2sum > 0 |
| # Two-tap tf.where trick to bypass NaN gradients |
| l2sum_safe = array_ops.where(pred, l2sum, array_ops.ones_like(l2sum)) |
| l2norm = array_ops.where(pred, math_ops.sqrt(l2sum_safe), l2sum) |
| intermediate = values * clip_norm |
| # Assert that the shape is compatible with the initial shape, |
| # to prevent unintentional broadcasting. |
| values.shape.assert_is_compatible_with(intermediate.shape) |
| values_clip = array_ops.identity( |
| intermediate / math_ops.maximum(l2norm, clip_norm), name=name) |
| |
| if isinstance(t, indexed_slices.IndexedSlices): |
| return indexed_slices.IndexedSlices(values_clip, t.indices, t.dense_shape) |
| |
| return values_clip |
| |
| |
| @tf_export("linalg.global_norm", v1=["linalg.global_norm", "global_norm"]) |
| @dispatch.add_dispatch_support |
| @deprecation.deprecated_endpoints("global_norm") |
| def global_norm(t_list, name=None): |
| """Computes the global norm of multiple tensors. |
| |
| Given a tuple or list of tensors `t_list`, this operation returns the |
| global norm of the elements in all tensors in `t_list`. The global norm is |
| computed as: |
| |
| `global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))` |
| |
| Any entries in `t_list` that are of type None are ignored. |
| |
| Args: |
| t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None. |
| name: A name for the operation (optional). |
| |
| Returns: |
| A 0-D (scalar) `Tensor` of type `float`. |
| |
| Raises: |
| TypeError: If `t_list` is not a sequence. |
| """ |
| if (not isinstance(t_list, collections_abc.Sequence) or |
| isinstance(t_list, str)): |
| raise TypeError("`t_list` should be a sequence of tensors. Received " |
| f"{type(t_list)}.") |
| t_list = list(t_list) |
| with ops.name_scope(name, "global_norm", t_list) as name: |
| values = [ |
| ops.convert_to_tensor( |
| t.values if isinstance(t, indexed_slices.IndexedSlices) else t, |
| name="t_%d" % i) if t is not None else t |
| for i, t in enumerate(t_list) |
| ] |
| half_squared_norms = [] |
| for v in values: |
| if v is not None: |
| with ops.colocate_with(v): |
| half_squared_norms.append(gen_nn_ops.l2_loss(v)) |
| |
| half_squared_norm = math_ops.reduce_sum( |
| array_ops_stack.stack(half_squared_norms)) |
| |
| norm = math_ops.sqrt( |
| half_squared_norm * |
| constant_op.constant(2.0, dtype=half_squared_norm.dtype), |
| name="global_norm") |
| |
| return norm |
| |
| |
| @tf_export("clip_by_global_norm") |
| @dispatch.add_dispatch_support |
| def clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None): |
| """Clips values of multiple tensors by the ratio of the sum of their norms. |
| |
| Given a tuple or list of tensors `t_list`, and a clipping ratio `clip_norm`, |
| this operation returns a list of clipped tensors `list_clipped` |
| and the global norm (`global_norm`) of all tensors in `t_list`. Optionally, |
| if you've already computed the global norm for `t_list`, you can specify |
| the global norm with `use_norm`. |
| |
| To perform the clipping, the values `t_list[i]` are set to: |
| |
| t_list[i] * clip_norm / max(global_norm, clip_norm) |
| |
| where: |
| |
| global_norm = sqrt(sum([l2norm(t)**2 for t in t_list])) |
| |
| If `clip_norm > global_norm` then the entries in `t_list` remain as they are, |
| otherwise they're all shrunk by the global ratio. |
| |
| If `global_norm == infinity` then the entries in `t_list` are all set to `NaN` |
| to signal that an error occurred. |
| |
| Any of the entries of `t_list` that are of type `None` are ignored. |
| |
| This is the correct way to perform gradient clipping (Pascanu et al., 2012). |
| |
| However, it is slower than `clip_by_norm()` because all the parameters must be |
| ready before the clipping operation can be performed. |
| |
| Args: |
| t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None. |
| clip_norm: A 0-D (scalar) `Tensor` > 0. The clipping ratio. |
| use_norm: A 0-D (scalar) `Tensor` of type `float` (optional). The global |
| norm to use. If not provided, `global_norm()` is used to compute the norm. |
| name: A name for the operation (optional). |
| |
| Returns: |
| list_clipped: A list of `Tensors` of the same type as `list_t`. |
| global_norm: A 0-D (scalar) `Tensor` representing the global norm. |
| |
| Raises: |
| TypeError: If `t_list` is not a sequence. |
| |
| References: |
| On the difficulty of training Recurrent Neural Networks: |
| [Pascanu et al., 2012](http://proceedings.mlr.press/v28/pascanu13.html) |
| ([pdf](http://proceedings.mlr.press/v28/pascanu13.pdf)) |
| """ |
| if (not isinstance(t_list, collections_abc.Sequence) or |
| isinstance(t_list, str)): |
| raise TypeError("`t_list` should be a sequence of tensors. Received " |
| f"{type(t_list)}.") |
| t_list = list(t_list) |
| if use_norm is None: |
| use_norm = global_norm(t_list, name) |
| |
| with ops.name_scope(name, "clip_by_global_norm", |
| t_list + [clip_norm]) as name: |
| # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm |
| scale_for_finite = clip_norm * math_ops.minimum( |
| 1.0 / use_norm, |
| constant_op.constant(1.0, dtype=use_norm.dtype) / clip_norm) |
| # If use_norm is any finite number, this is a no-op. For inf/-inf/NaN, |
| # this will make scale NaN. |
| scale = scale_for_finite + (use_norm - use_norm) |
| |
| values = [ |
| ops.convert_to_tensor( |
| t.values if isinstance(t, indexed_slices.IndexedSlices) else t, |
| name="t_%d" % i) if t is not None else t |
| for i, t in enumerate(t_list) |
| ] |
| |
| values_clipped = [] |
| for i, v in enumerate(values): |
| if v is None: |
| values_clipped.append(None) |
| else: |
| with ops.colocate_with(v): |
| values_clipped.append( |
| array_ops.identity(v * scale, name="%s_%d" % (name, i))) |
| |
| list_clipped = [ |
| indexed_slices.IndexedSlices(c_v, t.indices, t.dense_shape) |
| if isinstance(t, indexed_slices.IndexedSlices) else c_v |
| for (c_v, t) in zip(values_clipped, t_list) |
| ] |
| |
| return list_clipped, use_norm |
| |
| |
| @deprecation.deprecated( |
| date=None, |
| instructions="clip_by_average_norm is deprecated in TensorFlow 2.0. Please " |
| "use clip_by_norm(t, clip_norm * tf.cast(tf.size(t), tf.float32), name) " |
| "instead.") |
| @tf_export(v1=["clip_by_average_norm"]) |
| @dispatch.add_dispatch_support |
| def clip_by_average_norm(t, clip_norm, name=None): |
| """Clips tensor values to a maximum average L2-norm. |
| |
| Given a tensor `t`, and a maximum clip value `clip_norm`, this operation |
| normalizes `t` so that its average L2-norm is less than or equal to |
| `clip_norm`. Specifically, if the average L2-norm is already less than or |
| equal to `clip_norm`, then `t` is not modified. If the average L2-norm is |
| greater than `clip_norm`, then this operation returns a tensor of the same |
| type and shape as `t` with its values set to: |
| |
| `t * clip_norm / l2norm_avg(t)` |
| |
| In this case, the average L2-norm of the output tensor is `clip_norm`. |
| |
| This operation is typically used to clip gradients before applying them with |
| an optimizer. |
| |
| Args: |
| t: A `Tensor`. |
| clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value. |
| name: A name for the operation (optional). |
| |
| Returns: |
| A clipped `Tensor`. |
| """ |
| with ops.name_scope(name, "clip_by_average_norm", [t, clip_norm]) as name: |
| t = ops.convert_to_tensor(t, name="t") |
| |
| # Calculate L2-norm per element, clip elements by ratio of clip_norm to |
| # L2-norm per element |
| n_element = math_ops.cast(array_ops.size(t), dtypes.float32) |
| l2norm_inv = math_ops.rsqrt( |
| math_ops.reduce_sum(t * t, math_ops.range(array_ops.rank(t)))) |
| tclip = array_ops.identity( |
| t * clip_norm * math_ops.minimum( |
| l2norm_inv * n_element, constant_op.constant(1.0) / clip_norm), |
| name=name) |
| |
| return tclip |