| # Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============================================================================== |
| """A simple LSTM layer with benchmarks. |
| |
| This sets up a simple LSTM (Long Short Term Memory) layer, unrolled to a fixed |
| length sequence. The only deviation from standard LSTM cells is that |
| activations are clipped, inspired by the GNMT machine translation model. |
| The GNMT paper has more details: https://arxiv.org/abs/1609.08144 |
| """ |
| |
| from six.moves import range |
| |
| from tensorflow.python.framework import dtypes |
| from tensorflow.python.framework import ops |
| from tensorflow.python.ops import array_ops |
| from tensorflow.python.ops import math_ops |
| from tensorflow.python.ops import random_ops |
| from tensorflow.python.ops import variable_v1 |
| |
| |
| def Clip(x): |
| """Clips x to the range [-1., 1.].""" |
| return math_ops.maximum(math_ops.minimum(x, 1.), -1.) |
| |
| |
| def LSTMCellWeightsShape(num_inputs, num_nodes): |
| """Returns the shape of the weights for a single LSTM cell.""" |
| # Dimension 0 accounts for combining x with the previous m state. |
| # Dimension 1 accounts for the in value and the (in, forget, out) gates. |
| return [num_inputs + num_nodes, 4 * num_nodes] |
| |
| |
| def LSTMCell(weights, m_prev, c_prev, x, pad): |
| """Unrolls a single LSTM cell with clipped activations forward by one step. |
| |
| Args: |
| weights: Weight matrix with shape LSTMCellWeightsShape. |
| m_prev: Previous m states with shape [batch_size, num_nodes]. |
| c_prev: Previous c states with shape [batch_size, num_nodes]. |
| x: Input with shape [batch_size, num_inputs]. |
| pad: Padding with shape [batch_size, 1]. Each padding value is either |
| 0 or 1, where 1 indicates padding; i.e. the input is shorter than the |
| sequence length, and the (m, c) states should simply be passed through |
| from the previous states. |
| Returns: |
| The next (m, c) states, each with shape [batch_size, num_nodes]. |
| """ |
| # Apply weights to the input and previous hidden state. |
| # The matmul here is the "big" operation. |
| xm = array_ops.concat([x, m_prev], 1) |
| xmw = math_ops.matmul(xm, weights) |
| |
| # Element-wise ops for the standard LSTM cell, with clipped activations. |
| # XLA can fuse these operations into a single loop. |
| in_value, in_gate, forget_gate, out_gate = array_ops.split( |
| value=xmw, num_or_size_splits=4, axis=1) |
| in_value = math_ops.tanh(in_value) |
| in_gate = math_ops.sigmoid(in_gate) |
| forget_gate = math_ops.sigmoid(forget_gate) |
| out_gate = math_ops.sigmoid(out_gate) |
| c_next = Clip(Clip(forget_gate * c_prev) + Clip(in_gate * in_value)) |
| m_next = Clip(out_gate * c_next) |
| |
| # Account for padding. |
| c_next = c_prev * pad + c_next * (1.0 - pad) |
| m_next = m_prev * pad + m_next * (1.0 - pad) |
| |
| return m_next, c_next |
| |
| |
| def LSTMLayer(cell_name, weights, m, c, x_seq, pad_seq): |
| """Unrolls a layer of LSTM cells forward by the sequence length. |
| |
| The sequence length is determined by the length of x_seq and pad_seq, which |
| must be the same. |
| |
| Args: |
| cell_name: Base name of each cell. |
| weights: Weight matrix with shape LSTMCellWeightsShape. |
| m: Initial m states with shape [batch_size, num_nodes]. |
| c: Initial c states with shape [batch_size, num_nodes]. |
| x_seq: List of inputs, each with shape [batch_size, num_inputs]. |
| The length of the list is the sequence length. |
| pad_seq: List of paddings, each with shape [batch_size, 1]. |
| The length of the list is the sequence length. |
| Each padding value is either 0 or 1, where 1 indicates padding; |
| i.e. the input is shorter than the sequence length. |
| Returns: |
| List of per-sequence-step outputs, each with shape [batch_size, num_nodes]. |
| Raises: |
| ValueError: If len(x_seq) != len(pad_seq). |
| """ |
| if len(x_seq) != len(pad_seq): |
| raise ValueError('length of x_seq(%d) != pad_seq(%d)' % |
| (len(x_seq), len(pad_seq))) |
| out_seq = [] |
| for seq in range(len(x_seq)): |
| with ops.name_scope('%s_%d' % (cell_name, seq)): |
| m, c = LSTMCell(weights, m, c, x_seq[seq], pad_seq[seq]) |
| out_seq.append(array_ops.identity(m, name='out')) |
| return out_seq |
| |
| |
| def RandomVar(shape, name=None): |
| """Returns a variable of the given shape initialized to random values.""" |
| return variable_v1.VariableV1( |
| random_ops.random_uniform(shape), dtype=dtypes.float32, name=name) |
| |
| |
| def RandomInputs(batch_size, seq_length, num_inputs): |
| """Returns randomly initialized (x_seq, pad_seq) sequences.""" |
| x_seq = [] |
| pad_seq = [] |
| with ops.name_scope('inputs'): |
| for seq in range(seq_length): |
| x_seq.append(RandomVar([batch_size, num_inputs], name='x_seq_%d' % seq)) |
| # Real padding values are always a sequence of 0 followed by a |
| # sequence of 1, but random values are fine for benchmarking. |
| pad_seq.append(RandomVar([batch_size, 1], name='pad_seq_%d' % seq)) |
| return x_seq, pad_seq |
| |
| |
| def BuildLSTMLayer(batch_size, seq_length, num_inputs, num_nodes): |
| """Builds a single LSTM layer with random weights and inputs. |
| |
| Args: |
| batch_size: Inputs are fed in batches of this size. |
| seq_length: The sequence length to unroll the LSTM layer. |
| num_inputs: Dimension of inputs that are fed into each LSTM cell. |
| num_nodes: The number of nodes in each LSTM cell. |
| |
| Returns: |
| (out_seq, weights) pair. The out_seq is a list of per-sequence-step |
| outputs, each with shape [batch_size, num_nodes]. The weights are a list of |
| weight variables that may be trained. |
| """ |
| weights = RandomVar( |
| LSTMCellWeightsShape(num_inputs, num_nodes), name='weights') |
| m = array_ops.zeros([batch_size, num_nodes], name='init_m') |
| c = array_ops.zeros([batch_size, num_nodes], name='init_c') |
| x_seq, pad_seq = RandomInputs(batch_size, seq_length, num_inputs) |
| |
| out_seq = LSTMLayer('lstm', weights, m, c, x_seq, pad_seq) |
| return out_seq, [weights] |