blob: d0ca2712d8a838e2bfb4a06a39006d6f624724ff [file] [log] [blame]
# coding=utf-8
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module for running compilation and collect training data."""
import subprocess
from typing import Tuple, Optional
import tensorflow as tf
def _postprocessing_sequence_example(
sequence_example: tf.train.SequenceExample, default_reward: float,
policy_reward: float) -> tf.train.SequenceExample:
"""Post-processing of the trace (sequence_example).
It computes the reward ratio change of the TF policy compared with the
default policy, and uses this ratio as the whole trajectory reward to
overwrite the original reward after each action.
Args:
sequence_example: A tf.SequenceExample proto describing compilation trace.
default_reward: The reward under default policy.
policy_reward: The reward under the TF policy.
Returns:
The tf.SequenceExample proto after post-processing.
"""
reward = 1 - policy_reward / default_reward
sequence_length = len(
next(iter(sequence_example.feature_lists.feature_list.values())).feature)
reward_list = sequence_example.feature_lists.feature_list['reward']
for _ in range(sequence_length):
added_feature = reward_list.feature.add()
added_feature.float_list.value.append(reward)
return sequence_example
class CompilationRunner:
"""Base class for collecting compilation data."""
def __init__(self,
clang_path: str,
llvm_size_path: str,
launcher_path: Optional[str] = None,
moving_average_decay_rate: float = 1):
"""Initialization of CompilationRunner class.
Args:
clang_path: path to the clang binary.
llvm_size_path: path to the llvm-size binary.
launcher_path: path to the launcher binary.
moving_average_decay_rate: moving average decay rate during training.
"""
self._clang_path = clang_path
self._llvm_size_path = llvm_size_path
self._launcher_path = launcher_path
self._moving_average_decay_rate = moving_average_decay_rate
def collect_data(
self, file_paths: Tuple[str, ...], tf_policy_path: str,
default_reward: Optional[float], moving_average_reward: Optional[float]
) -> Tuple[str, float, float, float]:
"""Collect data for the given IR file and policy.
Args:
file_paths: path to files needed for inlining, Tuple of (.bc, .cmd).
tf_policy_path: path to the tensorflow policy.
default_reward: reward under default policy, None if unknown.
moving_average_reward: moving average reward during training, None if
unknown.
Returns:
A tuple containing:
sequence_example: A serialized tf.SequenceExample proto.
default_reward: reward under default policy.
moving_average_reward: updated moving average reward.
policy_reward: reward under the ml policy.
Raises:
subprocess.CalledProcessError if process fails.
"""
try:
if default_reward is None:
default_sequence_example, default_reward = self._compile_fn(
file_paths, tf_policy_path='', reward_only=bool(tf_policy_path))
moving_average_reward = default_reward
# Return empty example if the default policy size is 0 since it is a data
# only module and we can do nothing about it.
if default_reward == 0:
return '', 0, 0, 0
if tf_policy_path:
sequence_example, policy_reward = self._compile_fn(
file_paths, tf_policy_path, reward_only=False)
else:
(sequence_example, policy_reward) = (default_sequence_example,
default_reward)
except subprocess.CalledProcessError as e:
raise e
if not sequence_example.HasField('feature_lists'):
return '', 0, 0, 0
sequence_example = _postprocessing_sequence_example(sequence_example,
moving_average_reward,
policy_reward)
moving_average_reward = (
moving_average_reward * self._moving_average_decay_rate +
policy_reward * (1 - self._moving_average_decay_rate))
return (sequence_example.SerializeToString(), default_reward,
moving_average_reward, policy_reward)
def _compile_fn(self, file_paths: Tuple[str, ...], tf_policy_path: str,
reward_only: bool) -> Tuple[tf.train.SequenceExample, float]:
"""Compiles for the given IR file under the given policy.
Args:
file_paths: path to files needed for compilation.
tf_policy_path: path to TF policy directory on local disk.
reward_only: whether only return reward.
Returns:
A tuple containing:
sequence_example: A tf.SequenceExample proto describing compilation
trace.
reward: reward under the policy.
Raises:
subprocess.CalledProcessError if process fails.
"""
raise NotImplementedError('Not implemented compile fn.')