update regalloc hyperparameters
diff --git a/compiler_opt/rl/regalloc/gin_configs/ppo_nn_agent.gin b/compiler_opt/rl/regalloc/gin_configs/ppo_nn_agent.gin
index 6c283d1..4551d14 100644
--- a/compiler_opt/rl/regalloc/gin_configs/ppo_nn_agent.gin
+++ b/compiler_opt/rl/regalloc/gin_configs/ppo_nn_agent.gin
@@ -11,8 +11,8 @@
 train_eval.problem_type='regalloc'
 train_eval.warmstart_policy_dir=''
 train_eval.num_policy_iterations=3000
-train_eval.num_iterations=300
-train_eval.batch_size=128
+train_eval.num_iterations=200
+train_eval.batch_size=256
 train_eval.train_sequence_length=16
 train_eval.deploy_policy_name='saved_collect_policy'
 train_eval.moving_average_decay_rate=0.8
@@ -35,8 +35,8 @@
 RandomNetworkDistillation.half_decay_steps = 10000
 
 regalloc.config.get_observation_processing_layer_creator.quantile_file_dir='compiler_opt/rl/regalloc/vocab'
-regalloc.config.get_observation_processing_layer_creator.with_sqrt = True
-regalloc.config.get_observation_processing_layer_creator.with_z_score_normalization = True
+regalloc.config.get_observation_processing_layer_creator.with_sqrt = False
+regalloc.config.get_observation_processing_layer_creator.with_z_score_normalization = False
 
 create_agent.policy_network = @regalloc_network.RegAllocNetwork
 
@@ -47,15 +47,15 @@
 
 ConstantValueNetwork.constant_output_val=0
 
-tf.train.AdamOptimizer.learning_rate = 0.00003
+tf.train.AdamOptimizer.learning_rate = 0.0003
 tf.train.AdamOptimizer.epsilon = 0.0003125
 
 PPOAgent.optimizer = @tf.train.AdamOptimizer()
 PPOAgent.importance_ratio_clipping = 0.2
 PPOAgent.lambda_value = 0.0
 PPOAgent.discount_factor = 0.0
-PPOAgent.entropy_regularization = 0.003
-PPOAgent.policy_l2_reg = 0.000001
+PPOAgent.entropy_regularization = 0.005
+PPOAgent.policy_l2_reg = 0.00001
 PPOAgent.value_function_l2_reg = 0.0
 PPOAgent.shared_vars_l2_reg = 0.0
 PPOAgent.value_pred_loss_coef = 0.0