compiler_opt/rl/agent_config.py - third_party/github.com/google/ml-compiler-opt - Git at Google

 # coding=utf-8
 # Copyright 2020 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """util function to create a tf_agent."""

 from typing import Any, Callable, Dict

 import abc
 import gin
 import tensorflow as tf

 from tf_agents.agents import tf_agent
 from tf_agents.agents.behavioral_cloning import behavioral_cloning_agent
 from tf_agents.agents.dqn import dqn_agent
 from tf_agents.agents.ppo import ppo_agent
 from tf_agents.specs import tensor_spec
 from tf_agents.typing import types

 from compiler_opt.rl import constant_value_network
 from compiler_opt.rl.distributed import agent as distributed_ppo_agent


 class AgentConfig(metaclass=abc.ABCMeta):
   """Agent creation and data processing hook-ups."""

   def __init__(self, *, time_step_spec: types.NestedTensorSpec,
                action_spec: types.NestedTensorSpec):
     self._time_step_spec = time_step_spec
     self._action_spec = action_spec

   @property
   def time_step_spec(self):
     return self._time_step_spec

   @property
   def action_spec(self):
     return self._action_spec

   @abc.abstractmethod
   def create_agent(self, preprocessing_layers: tf.keras.layers.Layer,
                    policy_network: types.Network) -> tf_agent.TFAgent:
     """Specific agent configs must implement this."""
     raise NotImplementedError()

   def get_policy_info_parsing_dict(
       self) -> Dict[str, tf.io.FixedLenSequenceFeature]:
     """Return the parsing dict for the policy info."""
     return {}

   # pylint: disable=unused-argument
   def process_parsed_sequence_and_get_policy_info(
       self, parsed_sequence: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
     """Function to process parsed_sequence and to return policy_info.

     Args:
       parsed_sequence: A dict from feature_name to feature_value parsed from TF
         SequenceExample.

     Returns:
       A nested policy_info for given agent.
     """
     return {}


 @gin.configurable
 def create_agent(agent_config: AgentConfig,
                  preprocessing_layer_creator: Callable[[types.TensorSpec],
                                                        tf.keras.layers.Layer],
                  policy_network: types.Network):
   """Gin configurable wrapper of AgentConfig.create_agent.
   Works around the fact that class members aren't gin-configurable."""
   preprocessing_layers = tf.nest.map_structure(
       preprocessing_layer_creator, agent_config.time_step_spec.observation)
   return agent_config.create_agent(preprocessing_layers, policy_network)


 @gin.configurable(module='agents')
 class BCAgentConfig(AgentConfig):
   """Behavioral Cloning agent configuration."""

   def create_agent(self, preprocessing_layers: tf.keras.layers.Layer,
                    policy_network: types.Network) -> tf_agent.TFAgent:
     """Creates a behavioral_cloning_agent."""

     network = policy_network(
         self.time_step_spec.observation,
         self.action_spec,
         preprocessing_layers=preprocessing_layers,
         name='QNetwork')

     return behavioral_cloning_agent.BehavioralCloningAgent(
         self.time_step_spec,
         self.action_spec,
         cloning_network=network,
         num_outer_dims=2)


 @gin.configurable(module='agents')
 class DQNAgentConfig(AgentConfig):
   """DQN agent configuration."""

   def create_agent(self, preprocessing_layers: tf.keras.layers.Layer,
                    policy_network: types.Network) -> tf_agent.TFAgent:
     """Creates a dqn_agent."""
     network = policy_network(
         self.time_step_spec.observation,
         self.action_spec,
         preprocessing_layers=preprocessing_layers,
         name='QNetwork')

     return dqn_agent.DqnAgent(
         self.time_step_spec, self.action_spec, q_network=network)


 @gin.configurable(module='agents')
 class PPOAgentConfig(AgentConfig):
   """PPO/Reinforce agent configuration."""

   def create_agent(self, preprocessing_layers: tf.keras.layers.Layer,
                    policy_network: types.Network) -> tf_agent.TFAgent:
     """Creates a ppo_agent."""

     actor_network = policy_network(
         self.time_step_spec.observation,
         self.action_spec,
         preprocessing_layers=preprocessing_layers,
         name='ActorDistributionNetwork')

     critic_network = constant_value_network.ConstantValueNetwork(
         self.time_step_spec.observation, name='ConstantValueNetwork')

     return ppo_agent.PPOAgent(
         self.time_step_spec,
         self.action_spec,
         actor_net=actor_network,
         value_net=critic_network)

   def get_policy_info_parsing_dict(
       self) -> Dict[str, tf.io.FixedLenSequenceFeature]:
     if tensor_spec.is_discrete(self._action_spec):
       return {
           'CategoricalProjectionNetwork_logits':
               tf.io.FixedLenSequenceFeature(
                   shape=(self._action_spec.maximum - self._action_spec.minimum +
                          1),
                   dtype=tf.float32)
       }
     else:
       return {
           'NormalProjectionNetwork_scale':
               tf.io.FixedLenSequenceFeature(shape=(), dtype=tf.float32),
           'NormalProjectionNetwork_loc':
               tf.io.FixedLenSequenceFeature(shape=(), dtype=tf.float32)
       }

   def process_parsed_sequence_and_get_policy_info(
       self, parsed_sequence: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
     if tensor_spec.is_discrete(self._action_spec):
       policy_info = {
           'dist_params': {
               'logits': parsed_sequence['CategoricalProjectionNetwork_logits']
           }
       }
       del parsed_sequence['CategoricalProjectionNetwork_logits']
     else:
       policy_info = {
           'dist_params': {
               'scale': parsed_sequence['NormalProjectionNetwork_scale'],
               'loc': parsed_sequence['NormalProjectionNetwork_loc']
           }
       }
       del parsed_sequence['NormalProjectionNetwork_scale']
       del parsed_sequence['NormalProjectionNetwork_loc']
     return policy_info


 @gin.configurable(module='agents')
 class DistributedPPOAgentConfig(PPOAgentConfig):
   """Distributed PPO/Reinforce agent configuration."""

   def _create_agent_implt(self, preprocessing_layers: tf.keras.layers.Layer,
                           policy_network: types.Network) -> tf_agent.TFAgent:
     """Creates a ppo_distributed agent."""
     actor_network = policy_network(
         self.time_step_spec.observation,
         self.action_spec,
         preprocessing_layers=preprocessing_layers,
         preprocessing_combiner=tf.keras.layers.Concatenate(),
         name='ActorDistributionNetwork')

     critic_network = constant_value_network.ConstantValueNetwork(
         self.time_step_spec.observation, name='ConstantValueNetwork')

     return distributed_ppo_agent.MLGOPPOAgent(
         self.time_step_spec,
         self.action_spec,
         optimizer=tf.keras.optimizers.Adam(learning_rate=4e-4, epsilon=1e-5),
         actor_net=actor_network,
         value_net=critic_network,
         value_pred_loss_coef=0.0,
         entropy_regularization=0.01,
         importance_ratio_clipping=0.2,
         discount_factor=1.0,
         gradient_clipping=1.0,
         debug_summaries=False,
         value_clipping=None,
         aggregate_losses_across_replicas=True,
         loss_scaling_factor=1.0)
	# coding=utf-8
	# Copyright 2020 Google LLC
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""util function to create a tf_agent."""

	from typing import Any, Callable, Dict

	import abc
	import gin
	import tensorflow as tf

	from tf_agents.agents import tf_agent
	from tf_agents.agents.behavioral_cloning import behavioral_cloning_agent
	from tf_agents.agents.dqn import dqn_agent
	from tf_agents.agents.ppo import ppo_agent
	from tf_agents.specs import tensor_spec
	from tf_agents.typing import types

	from compiler_opt.rl import constant_value_network
	from compiler_opt.rl.distributed import agent as distributed_ppo_agent


	class AgentConfig(metaclass=abc.ABCMeta):
	"""Agent creation and data processing hook-ups."""

	def __init__(self, *, time_step_spec: types.NestedTensorSpec,
	action_spec: types.NestedTensorSpec):
	self._time_step_spec = time_step_spec
	self._action_spec = action_spec

	@property
	def time_step_spec(self):
	return self._time_step_spec

	@property
	def action_spec(self):
	return self._action_spec

	@abc.abstractmethod
	def create_agent(self, preprocessing_layers: tf.keras.layers.Layer,
	policy_network: types.Network) -> tf_agent.TFAgent:
	"""Specific agent configs must implement this."""
	raise NotImplementedError()

	def get_policy_info_parsing_dict(
	self) -> Dict[str, tf.io.FixedLenSequenceFeature]:
	"""Return the parsing dict for the policy info."""
	return {}

	# pylint: disable=unused-argument
	def process_parsed_sequence_and_get_policy_info(
	self, parsed_sequence: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
	"""Function to process parsed_sequence and to return policy_info.

	Args:
	parsed_sequence: A dict from feature_name to feature_value parsed from TF
	SequenceExample.

	Returns:
	A nested policy_info for given agent.
	"""
	return {}


	@gin.configurable
	def create_agent(agent_config: AgentConfig,
	preprocessing_layer_creator: Callable[[types.TensorSpec],
	tf.keras.layers.Layer],
	policy_network: types.Network):
	"""Gin configurable wrapper of AgentConfig.create_agent.
	Works around the fact that class members aren't gin-configurable."""
	preprocessing_layers = tf.nest.map_structure(
	preprocessing_layer_creator, agent_config.time_step_spec.observation)
	return agent_config.create_agent(preprocessing_layers, policy_network)


	@gin.configurable(module='agents')
	class BCAgentConfig(AgentConfig):
	"""Behavioral Cloning agent configuration."""

	def create_agent(self, preprocessing_layers: tf.keras.layers.Layer,
	policy_network: types.Network) -> tf_agent.TFAgent:
	"""Creates a behavioral_cloning_agent."""

	network = policy_network(
	self.time_step_spec.observation,
	self.action_spec,
	preprocessing_layers=preprocessing_layers,
	name='QNetwork')

	return behavioral_cloning_agent.BehavioralCloningAgent(
	self.time_step_spec,
	self.action_spec,
	cloning_network=network,
	num_outer_dims=2)


	@gin.configurable(module='agents')
	class DQNAgentConfig(AgentConfig):
	"""DQN agent configuration."""

	def create_agent(self, preprocessing_layers: tf.keras.layers.Layer,
	policy_network: types.Network) -> tf_agent.TFAgent:
	"""Creates a dqn_agent."""
	network = policy_network(
	self.time_step_spec.observation,
	self.action_spec,
	preprocessing_layers=preprocessing_layers,
	name='QNetwork')

	return dqn_agent.DqnAgent(
	self.time_step_spec, self.action_spec, q_network=network)


	@gin.configurable(module='agents')
	class PPOAgentConfig(AgentConfig):
	"""PPO/Reinforce agent configuration."""

	def create_agent(self, preprocessing_layers: tf.keras.layers.Layer,
	policy_network: types.Network) -> tf_agent.TFAgent:
	"""Creates a ppo_agent."""

	actor_network = policy_network(
	self.time_step_spec.observation,
	self.action_spec,
	preprocessing_layers=preprocessing_layers,
	name='ActorDistributionNetwork')

	critic_network = constant_value_network.ConstantValueNetwork(
	self.time_step_spec.observation, name='ConstantValueNetwork')

	return ppo_agent.PPOAgent(
	self.time_step_spec,
	self.action_spec,
	actor_net=actor_network,
	value_net=critic_network)

	def get_policy_info_parsing_dict(
	self) -> Dict[str, tf.io.FixedLenSequenceFeature]:
	if tensor_spec.is_discrete(self._action_spec):
	return {
	'CategoricalProjectionNetwork_logits':
	tf.io.FixedLenSequenceFeature(
	shape=(self._action_spec.maximum - self._action_spec.minimum +
	1),
	dtype=tf.float32)
	}
	else:
	return {
	'NormalProjectionNetwork_scale':
	tf.io.FixedLenSequenceFeature(shape=(), dtype=tf.float32),
	'NormalProjectionNetwork_loc':
	tf.io.FixedLenSequenceFeature(shape=(), dtype=tf.float32)
	}

	def process_parsed_sequence_and_get_policy_info(
	self, parsed_sequence: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
	if tensor_spec.is_discrete(self._action_spec):
	policy_info = {
	'dist_params': {
	'logits': parsed_sequence['CategoricalProjectionNetwork_logits']
	}
	}
	del parsed_sequence['CategoricalProjectionNetwork_logits']
	else:
	policy_info = {
	'dist_params': {
	'scale': parsed_sequence['NormalProjectionNetwork_scale'],
	'loc': parsed_sequence['NormalProjectionNetwork_loc']
	}
	}
	del parsed_sequence['NormalProjectionNetwork_scale']
	del parsed_sequence['NormalProjectionNetwork_loc']
	return policy_info


	@gin.configurable(module='agents')
	class DistributedPPOAgentConfig(PPOAgentConfig):
	"""Distributed PPO/Reinforce agent configuration."""

	def _create_agent_implt(self, preprocessing_layers: tf.keras.layers.Layer,
	policy_network: types.Network) -> tf_agent.TFAgent:
	"""Creates a ppo_distributed agent."""
	actor_network = policy_network(
	self.time_step_spec.observation,
	self.action_spec,
	preprocessing_layers=preprocessing_layers,
	preprocessing_combiner=tf.keras.layers.Concatenate(),
	name='ActorDistributionNetwork')

	critic_network = constant_value_network.ConstantValueNetwork(
	self.time_step_spec.observation, name='ConstantValueNetwork')

	return distributed_ppo_agent.MLGOPPOAgent(
	self.time_step_spec,
	self.action_spec,
	optimizer=tf.keras.optimizers.Adam(learning_rate=4e-4, epsilon=1e-5),
	actor_net=actor_network,
	value_net=critic_network,
	value_pred_loss_coef=0.0,
	entropy_regularization=0.01,
	importance_ratio_clipping=0.2,
	discount_factor=1.0,
	gradient_clipping=1.0,
	debug_summaries=False,
	value_clipping=None,
	aggregate_losses_across_replicas=True,
	loss_scaling_factor=1.0)