compiler_opt/rl/compilation_runner.py - third_party/github.com/google/ml-compiler-opt - Git at Google

 # coding=utf-8
 # Copyright 2020 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 """Module for running compilation and collect training data."""

 import subprocess
 from typing import Tuple, Optional

 import tensorflow as tf


 def _postprocessing_sequence_example(
     sequence_example: tf.train.SequenceExample, default_reward: float,
     policy_reward: float) -> tf.train.SequenceExample:
   """Post-processing of the trace (sequence_example).

   It computes the reward ratio change of the TF policy compared with the
   default policy, and uses this ratio as the whole trajectory reward to
   overwrite the original reward after each action.

   Args:
     sequence_example: A tf.SequenceExample proto describing compilation trace.
     default_reward: The reward under default policy.
     policy_reward: The reward under the TF policy.

   Returns:
     The tf.SequenceExample proto after post-processing.
   """
   reward = 1 - policy_reward / default_reward

   sequence_length = len(
       next(iter(sequence_example.feature_lists.feature_list.values())).feature)

   reward_list = sequence_example.feature_lists.feature_list['reward']
   for _ in range(sequence_length):
     added_feature = reward_list.feature.add()
     added_feature.float_list.value.append(reward)

   return sequence_example


 class CompilationRunner:
   """Base class for collecting compilation data."""

   def __init__(self,
                clang_path: str,
                llvm_size_path: str,
                launcher_path: Optional[str] = None,
                moving_average_decay_rate: float = 1):
     """Initialization of CompilationRunner class.

     Args:
       clang_path: path to the clang binary.
       llvm_size_path: path to the llvm-size binary.
       launcher_path: path to the launcher binary.
       moving_average_decay_rate: moving average decay rate during training.
     """
     self._clang_path = clang_path
     self._llvm_size_path = llvm_size_path
     self._launcher_path = launcher_path
     self._moving_average_decay_rate = moving_average_decay_rate

   def collect_data(
       self, file_paths: Tuple[str, ...], tf_policy_path: str,
       default_reward: Optional[float], moving_average_reward: Optional[float]
   ) -> Tuple[str, float, float, float]:
     """Collect data for the given IR file and policy.

     Args:
       file_paths: path to files needed for inlining, Tuple of (.bc, .cmd).
       tf_policy_path: path to the tensorflow policy.
       default_reward: reward under default policy, None if unknown.
       moving_average_reward: moving average reward during training, None if
                              unknown.

     Returns:
       A tuple containing:
         sequence_example: A serialized tf.SequenceExample proto.
         default_reward: reward under default policy.
         moving_average_reward: updated moving average reward.
         policy_reward: reward under the ml policy.

     Raises:
       subprocess.CalledProcessError if process fails.
     """
     try:
       if default_reward is None:
         default_sequence_example, default_reward = self._compile_fn(
             file_paths, tf_policy_path='', reward_only=bool(tf_policy_path))
         moving_average_reward = default_reward

       # Return empty example if the default policy size is 0 since it is a data
       # only module and we can do nothing about it.
       if default_reward == 0:
         return '', 0, 0, 0

       if tf_policy_path:
         sequence_example, policy_reward = self._compile_fn(
             file_paths, tf_policy_path, reward_only=False)
       else:
         (sequence_example, policy_reward) = (default_sequence_example,
                                              default_reward)

     except subprocess.CalledProcessError as e:
       raise e

     if not sequence_example.HasField('feature_lists'):
       return '', 0, 0, 0

     sequence_example = _postprocessing_sequence_example(sequence_example,
                                                         moving_average_reward,
                                                         policy_reward)
     moving_average_reward = (
         moving_average_reward * self._moving_average_decay_rate +
         policy_reward * (1 - self._moving_average_decay_rate))

     return (sequence_example.SerializeToString(), default_reward,
             moving_average_reward, policy_reward)

   def _compile_fn(self, file_paths: Tuple[str, ...], tf_policy_path: str,
                   reward_only: bool) -> Tuple[tf.train.SequenceExample, float]:
     """Compiles for the given IR file under the given policy.

     Args:
       file_paths: path to files needed for compilation.
       tf_policy_path: path to TF policy directory on local disk.
       reward_only: whether only return reward.

     Returns:
       A tuple containing:
         sequence_example: A tf.SequenceExample proto describing compilation
         trace.
         reward: reward under the policy.

     Raises:
       subprocess.CalledProcessError if process fails.
     """
     raise NotImplementedError('Not implemented compile fn.')
	# coding=utf-8
	# Copyright 2020 Google LLC
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Module for running compilation and collect training data."""

	import subprocess
	from typing import Tuple, Optional

	import tensorflow as tf


	def _postprocessing_sequence_example(
	sequence_example: tf.train.SequenceExample, default_reward: float,
	policy_reward: float) -> tf.train.SequenceExample:
	"""Post-processing of the trace (sequence_example).

	It computes the reward ratio change of the TF policy compared with the
	default policy, and uses this ratio as the whole trajectory reward to
	overwrite the original reward after each action.

	Args:
	sequence_example: A tf.SequenceExample proto describing compilation trace.
	default_reward: The reward under default policy.
	policy_reward: The reward under the TF policy.

	Returns:
	The tf.SequenceExample proto after post-processing.
	"""
	reward = 1 - policy_reward / default_reward

	sequence_length = len(
	next(iter(sequence_example.feature_lists.feature_list.values())).feature)

	reward_list = sequence_example.feature_lists.feature_list['reward']
	for _ in range(sequence_length):
	added_feature = reward_list.feature.add()
	added_feature.float_list.value.append(reward)

	return sequence_example


	class CompilationRunner:
	"""Base class for collecting compilation data."""

	def __init__(self,
	clang_path: str,
	llvm_size_path: str,
	launcher_path: Optional[str] = None,
	moving_average_decay_rate: float = 1):
	"""Initialization of CompilationRunner class.

	Args:
	clang_path: path to the clang binary.
	llvm_size_path: path to the llvm-size binary.
	launcher_path: path to the launcher binary.
	moving_average_decay_rate: moving average decay rate during training.
	"""
	self._clang_path = clang_path
	self._llvm_size_path = llvm_size_path
	self._launcher_path = launcher_path
	self._moving_average_decay_rate = moving_average_decay_rate

	def collect_data(
	self, file_paths: Tuple[str, ...], tf_policy_path: str,
	default_reward: Optional[float], moving_average_reward: Optional[float]
	) -> Tuple[str, float, float, float]:
	"""Collect data for the given IR file and policy.

	Args:
	file_paths: path to files needed for inlining, Tuple of (.bc, .cmd).
	tf_policy_path: path to the tensorflow policy.
	default_reward: reward under default policy, None if unknown.
	moving_average_reward: moving average reward during training, None if
	unknown.

	Returns:
	A tuple containing:
	sequence_example: A serialized tf.SequenceExample proto.
	default_reward: reward under default policy.
	moving_average_reward: updated moving average reward.
	policy_reward: reward under the ml policy.

	Raises:
	subprocess.CalledProcessError if process fails.
	"""
	try:
	if default_reward is None:
	default_sequence_example, default_reward = self._compile_fn(
	file_paths, tf_policy_path='', reward_only=bool(tf_policy_path))
	moving_average_reward = default_reward

	# Return empty example if the default policy size is 0 since it is a data
	# only module and we can do nothing about it.
	if default_reward == 0:
	return '', 0, 0, 0

	if tf_policy_path:
	sequence_example, policy_reward = self._compile_fn(
	file_paths, tf_policy_path, reward_only=False)
	else:
	(sequence_example, policy_reward) = (default_sequence_example,
	default_reward)

	except subprocess.CalledProcessError as e:
	raise e

	if not sequence_example.HasField('feature_lists'):
	return '', 0, 0, 0

	sequence_example = _postprocessing_sequence_example(sequence_example,
	moving_average_reward,
	policy_reward)
	moving_average_reward = (
	moving_average_reward * self._moving_average_decay_rate +
	policy_reward * (1 - self._moving_average_decay_rate))

	return (sequence_example.SerializeToString(), default_reward,
	moving_average_reward, policy_reward)

	def _compile_fn(self, file_paths: Tuple[str, ...], tf_policy_path: str,
	reward_only: bool) -> Tuple[tf.train.SequenceExample, float]:
	"""Compiles for the given IR file under the given policy.

	Args:
	file_paths: path to files needed for compilation.
	tf_policy_path: path to TF policy directory on local disk.
	reward_only: whether only return reward.

	Returns:
	A tuple containing:
	sequence_example: A tf.SequenceExample proto describing compilation
	trace.
	reward: reward under the policy.

	Raises:
	subprocess.CalledProcessError if process fails.
	"""
	raise NotImplementedError('Not implemented compile fn.')