| # Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============================================================================== |
| """Gradients for operators defined in tensor_array_ops.py.""" |
| from tensorflow.python.framework import ops |
| from tensorflow.python.ops import array_ops |
| from tensorflow.python.ops import tensor_array_ops |
| |
| # TODO(b/31222613): These ops may be differentiable, and there may be |
| # latent bugs here. |
| ops.NotDifferentiable("TensorArray") |
| ops.NotDifferentiable("TensorArrayGrad") |
| ops.NotDifferentiable("TensorArraySize") |
| ops.NotDifferentiable("TensorArrayClose") |
| |
| ops.NotDifferentiable("TensorArrayV2") |
| ops.NotDifferentiable("TensorArrayGradV2") |
| ops.NotDifferentiable("TensorArraySizeV2") |
| ops.NotDifferentiable("TensorArrayCloseV2") |
| |
| ops.NotDifferentiable("TensorArrayV3") |
| ops.NotDifferentiable("TensorArrayGradV3") |
| ops.NotDifferentiable("TensorArrayGradWithShape") |
| ops.NotDifferentiable("TensorArraySizeV3") |
| ops.NotDifferentiable("TensorArrayCloseV3") |
| |
| |
| def _GetGradSource(op_or_tensor): |
| """Identify which call to tf.gradients created this gradient op or tensor. |
| |
| TensorArray gradient calls use an accumulator TensorArray object. If |
| multiple gradients are calculated and run in the same session, the multiple |
| gradient nodes may accidentally flow through the same accumulator TensorArray. |
| This double counting breaks the TensorArray gradient flow. |
| |
| The solution is to identify which gradient call this particular |
| TensorArray*Grad is being called in, by looking at the input gradient |
| tensor's name, and create or lookup an accumulator gradient TensorArray |
| associated with this specific call. This solves any confusion and ensures |
| different gradients from the same forward graph get their own accumulators. |
| |
| This function creates the unique label associated with the tf.gradients call |
| that is used to create the gradient TensorArray. |
| |
| Args: |
| op_or_tensor: `Tensor` or `Operation` which is an input to a |
| TensorArray*Grad call. |
| |
| Returns: |
| A python string, the unique label associated with this particular |
| gradients calculation. |
| |
| Raises: |
| ValueError: If not called within a gradients calculation. |
| """ |
| name_tokens = op_or_tensor.name.split("/") |
| grad_pos = [i for i, x in enumerate(name_tokens) if x.startswith("gradients")] |
| if not grad_pos: |
| raise ValueError( |
| "Expected op/tensor name to start with gradients (excluding scope)" |
| f", got: {op_or_tensor.name}. This means that a tf.gradients op with " |
| "this op in its dependency path has a custom name that does not start " |
| "with 'gradients'. Please make sure all calls to tf.gradients that " |
| "have non-empty `name` arguments use names that start with " |
| "'gradients'.") |
| return "/".join(name_tokens[:grad_pos[-1] + 1]) |
| |
| |
| @ops.RegisterGradient("TensorArrayRead") |
| @ops.RegisterGradient("TensorArrayReadV2") |
| @ops.RegisterGradient("TensorArrayReadV3") |
| def _TensorArrayReadGrad(op, grad): |
| """Gradient for TensorArrayRead. |
| |
| Args: |
| op: Forward TensorArrayRead op. |
| grad: Gradient `Tensor` to TensorArrayRead. |
| |
| Returns: |
| A flow `Tensor`, which can be used in control dependencies to |
| force the write of `grad` to the gradient `TensorArray`. |
| """ |
| # Note: the forward flow dependency in the call to grad() is necessary for |
| # the case of dynamic sized TensorArrays. When creating the gradient |
| # TensorArray, the final size of the forward array must be known. |
| # For this we need to wait until it has been created by depending on |
| # the input flow of the original op. |
| handle = op.inputs[0] |
| index = op.inputs[1] |
| flow = op.inputs[2] |
| dtype = op.get_attr("dtype") |
| grad_source = _GetGradSource(grad) |
| g = (tensor_array_ops.TensorArray(dtype=dtype, handle=handle, flow=flow, |
| colocate_with_first_write_call=False) |
| .grad(source=grad_source, flow=flow)) |
| w_g = g.write(index, grad) |
| return [None, None, w_g.flow] |
| |
| |
| @ops.RegisterGradient("TensorArrayWrite") |
| @ops.RegisterGradient("TensorArrayWriteV2") |
| @ops.RegisterGradient("TensorArrayWriteV3") |
| def _TensorArrayWriteGrad(op, flow): |
| """Gradient for TensorArrayWrite. |
| |
| Args: |
| op: Forward TensorArrayWrite op. |
| flow: Gradient `Tensor` flow to TensorArrayWrite. |
| |
| Returns: |
| A grad `Tensor`, the gradient created in an upstream ReadGrad or PackGrad. |
| """ |
| # handle is the output store_handle of TensorArrayReadGrad or |
| # the handle output of TensorArrayWriteGrad. we must use this one. |
| handle = op.inputs[0] |
| index = op.inputs[1] |
| dtype = op.get_attr("T") |
| grad_source = _GetGradSource(flow) |
| flow_out = array_ops.identity(op.outputs[0], "flow_out") |
| # Avoid a race condition where the TensorArrayGrad op is executed before the |
| # final TensorArrayWrite by adding a control dependency on the output flow of |
| # the write to the input flow to the TensorArrayGrad. |
| with ops.control_dependencies([flow_out]): |
| flow = array_ops.identity(flow, "write_barrier") |
| g = (tensor_array_ops.TensorArray(dtype=dtype, handle=handle, flow=flow, |
| colocate_with_first_write_call=False) |
| .grad(source=grad_source, flow=flow)) |
| grad = g.read(index) |
| return [None, None, grad, flow] |
| |
| |
| @ops.RegisterGradient("TensorArrayGather") |
| @ops.RegisterGradient("TensorArrayGatherV2") |
| @ops.RegisterGradient("TensorArrayGatherV3") |
| def _TensorArrayGatherGrad(op, grad): |
| """Gradient for TensorArrayGather. |
| |
| Args: |
| op: Forward TensorArrayGather op. |
| grad: Gradient `Tensor` to TensorArrayGather. |
| |
| Returns: |
| A flow `Tensor`, which can be used in control dependencies to |
| force the write of `grad` to the gradient `TensorArray`. |
| """ |
| # Note: the forward flow dependency in the call to grad() is necessary for |
| # the case of dynamic sized TensorArrays. When creating the gradient |
| # TensorArray, the final size of the forward array must be known. |
| # For this we need to wait until it has been created by depending on |
| # the input flow of the original op. |
| handle = op.inputs[0] |
| indices = op.inputs[1] |
| flow = op.inputs[2] |
| dtype = op.get_attr("dtype") |
| grad_source = _GetGradSource(grad) |
| g = (tensor_array_ops.TensorArray(dtype=dtype, handle=handle, flow=flow, |
| colocate_with_first_write_call=False) |
| .grad(source=grad_source, flow=flow)) |
| u_g = g.scatter(indices, grad) |
| return [None, None, u_g.flow] |
| |
| |
| @ops.RegisterGradient("TensorArrayScatter") |
| @ops.RegisterGradient("TensorArrayScatterV2") |
| @ops.RegisterGradient("TensorArrayScatterV3") |
| def _TensorArrayScatterGrad(op, flow): |
| """Gradient for TensorArrayScatter. |
| |
| Args: |
| op: Forward TensorArrayScatter op. |
| flow: Gradient `Tensor` flow to TensorArrayScatter. |
| |
| Returns: |
| A grad `Tensor`, the gradient created in upstream ReadGrads or PackGrad. |
| """ |
| handle = op.inputs[0] |
| indices = op.inputs[1] |
| dtype = op.get_attr("T") |
| grad_source = _GetGradSource(flow) |
| flow_out = array_ops.identity(op.outputs[0], "flow_out") |
| # Avoid a race condition where the TensorArrayGrad op is executed before the |
| # TensorArrayScatter by adding a control dependency on the output flow of |
| # the scatter to the input flow to the TensorArrayGrad. |
| with ops.control_dependencies([flow_out]): |
| flow = array_ops.identity(flow, "write_barrier") |
| g = (tensor_array_ops.TensorArray(dtype=dtype, handle=handle, flow=flow, |
| colocate_with_first_write_call=False) |
| .grad(source=grad_source, flow=flow)) |
| grad = g.gather(indices) |
| return [None, None, grad, flow] |
| |
| |
| @ops.RegisterGradient("TensorArrayConcat") |
| @ops.RegisterGradient("TensorArrayConcatV2") |
| @ops.RegisterGradient("TensorArrayConcatV3") |
| def _TensorArrayConcatGrad(op, grad, unused_lengths_grad): |
| """Gradient for TensorArrayConcat. |
| |
| Args: |
| op: Forward TensorArrayConcat op. |
| grad: Gradient `Tensor` to TensorArrayConcat. |
| |
| Returns: |
| A flow `Tensor`, which can be used in control dependencies to |
| force the write of `grad` to the gradient `TensorArray`. |
| """ |
| # Note: the forward flow dependency in the call to grad() is necessary for |
| # the case of dynamic sized TensorArrays. When creating the gradient |
| # TensorArray, the final size of the forward array must be known. |
| # For this we need to wait until it has been created by depending on |
| # the input flow of the original op. |
| handle = op.inputs[0] |
| flow = op.inputs[1] |
| lengths = op.outputs[1] |
| dtype = op.get_attr("dtype") |
| grad_source = _GetGradSource(grad) |
| g = (tensor_array_ops.TensorArray(dtype=dtype, handle=handle, flow=flow, |
| colocate_with_first_write_call=False) |
| .grad(source=grad_source, flow=flow)) |
| u_g = g.split(grad, lengths=lengths) |
| # handle, flow_in |
| return [None, u_g.flow] |
| |
| |
| @ops.RegisterGradient("TensorArraySplit") |
| @ops.RegisterGradient("TensorArraySplitV2") |
| @ops.RegisterGradient("TensorArraySplitV3") |
| def _TensorArraySplitGrad(op, flow): |
| """Gradient for TensorArraySplit. |
| |
| Args: |
| op: Forward TensorArraySplit op. |
| flow: Gradient `Tensor` flow to TensorArraySplit. |
| |
| Returns: |
| A grad `Tensor`, the gradient created in upstream ReadGrads or PackGrad. |
| """ |
| handle = op.inputs[0] |
| dtype = op.get_attr("T") |
| grad_source = _GetGradSource(flow) |
| flow_out = array_ops.identity(op.outputs[0], "flow_out") |
| # Avoid a race condition where the TensorArrayGrad op is executed before the |
| # TensorArraySplit by adding a control dependency on the output flow of |
| # the split to the input flow to the TensorArrayGrad. |
| with ops.control_dependencies([flow_out]): |
| flow = array_ops.identity(flow, "write_barrier") |
| g = (tensor_array_ops.TensorArray(dtype=dtype, handle=handle, flow=flow, |
| colocate_with_first_write_call=False) |
| .grad(source=grad_source, flow=flow)) |
| grad = g.concat() |
| # handle, value, lengths, flow_in |
| return [None, grad, None, flow] |