blob: 05d5ad44294cfd73a5e9b4beb58dc748db8eeb77 [file] [log] [blame]
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Benchmark for split and grad of split."""
import numpy as np
from tensorflow.core.protobuf import config_pb2
from tensorflow.python.client import session as session_lib
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import variables
from tensorflow.python.platform import benchmark
from tensorflow.python.platform import test
from tensorflow.python.platform import tf_logging as logging
def build_graph(device, input_shape, output_sizes, axis):
"""Build a graph containing a sequence of split operations.
Args:
device: string, the device to run on.
input_shape: shape of the input tensor.
output_sizes: size of each output along axis.
axis: axis to be split along.
Returns:
An array of tensors to run()
"""
with ops.device("/%s:0" % device):
inp = array_ops.zeros(input_shape)
outputs = []
for _ in range(100):
outputs.extend(array_ops.split(inp, output_sizes, axis))
return control_flow_ops.group(*outputs)
class SplitBenchmark(test.Benchmark):
"""Benchmark split!"""
def _run_graph(self, device, output_shape, variable, num_outputs, axis):
"""Run the graph and print its execution time.
Args:
device: string, the device to run on.
output_shape: shape of each output tensors.
variable: whether or not the output shape should be fixed
num_outputs: the number of outputs to split the input into
axis: axis to be split
Returns:
The duration of the run in seconds.
"""
graph = ops.Graph()
with graph.as_default():
if not variable:
if axis == 0:
input_shape = [output_shape[0] * num_outputs, output_shape[1]]
sizes = [output_shape[0] for _ in range(num_outputs)]
else:
input_shape = [output_shape[0], output_shape[1] * num_outputs]
sizes = [output_shape[1] for _ in range(num_outputs)]
else:
sizes = np.random.randint(
low=max(1, output_shape[axis] - 2),
high=output_shape[axis] + 2,
size=num_outputs)
total_size = np.sum(sizes)
if axis == 0:
input_shape = [total_size, output_shape[1]]
else:
input_shape = [output_shape[0], total_size]
outputs = build_graph(device, input_shape, sizes, axis)
config = config_pb2.ConfigProto(graph_options=config_pb2.GraphOptions(
optimizer_options=config_pb2.OptimizerOptions(
opt_level=config_pb2.OptimizerOptions.L0)))
with session_lib.Session(graph=graph, config=config) as session:
logging.set_verbosity("info")
variables.global_variables_initializer().run()
bench = benchmark.TensorFlowBenchmark()
bench.run_op_benchmark(
session,
outputs,
mbs=input_shape[0] * input_shape[1] * 4 * 2 * 100 / 1e6,
extras={
"input_shape": input_shape,
"variable": variable,
"axis": axis
})
def benchmark_split(self):
print("Forward vs backward concat")
shapes = [[2000, 8], [8, 2000], [100, 18], [1000, 18], [10000, 18],
[100, 97], [1000, 97], [10000, 1], [1, 10000]]
axis_ = [1] # 0 is very fast because it doesn't actually do any copying
num_outputs = 100
variable = [False, True] # fixed input size or not
for shape in shapes:
for axis in axis_:
for v in variable:
self._run_graph("gpu", shape, v, num_outputs, axis)
if __name__ == "__main__":
test.main()