Convert indirect calls to direct when possible.

Please see patch discussion:
https://www.sourceware.org/ml/binutils/2016-05/msg00322.html

2016-06-28  Sriraman Tallam  <tmsriram@google.com>

	* x86_64.cc (Lazy_view): New class.
	(can_convert_mov_to_lea): Templatize function.  Make the function
	check for appropriate relocation types and use the view parameter
	to get section contents.
	(can_convert_callq_to_direct): New function.
	(Target_x86_64<size>::Scan::global): Refactor.
	(Target_x86_64<size>::Relocate::relocate): Refactor. Change any indirect
	call via GOT that can be converted.
	* testsuite/Makefile.am (x86_64_indirect_call_to_direct.sh): New test.
	* testsuite/Makefile.in: Regenerate.
	* testsuite/x86_64_indirect_call_to_direct1.s: New file.
	* testsuite/x86_64_indirect_jump_to_direct1.s: New file.
diff --git a/gold/ChangeLog b/gold/ChangeLog
index d6bae2b..8b293cd 100644
--- a/gold/ChangeLog
+++ b/gold/ChangeLog
@@ -1,3 +1,18 @@
+2016-06-28  Sriraman Tallam  <tmsriram@google.com>
+
+	* x86_64.cc (Lazy_view): New class.
+	(can_convert_mov_to_lea): Templatize function.  Make the function
+	check for appropriate relocation types and use the view parameter
+	to get section contents.
+	(can_convert_callq_to_direct): New function.
+	(Target_x86_64<size>::Scan::global): Refactor.
+	(Target_x86_64<size>::Relocate::relocate): Refactor. Change any indirect
+	call via GOT that can be converted.
+	* testsuite/Makefile.am (x86_64_indirect_call_to_direct.sh): New test.
+	* testsuite/Makefile.in: Regenerate.
+	* testsuite/x86_64_indirect_call_to_direct1.s: New file.
+	* testsuite/x86_64_indirect_jump_to_direct1.s: New file.
+
 2016-06-28  Igor Kudrin  <ikudrin@accesssoftek.com>
 
 	* aarch64.cc (Target_aarch64::Scan::local): Move the call to got_section
diff --git a/gold/testsuite/Makefile.am b/gold/testsuite/Makefile.am
index 9e232e9..9d4326b 100644
--- a/gold/testsuite/Makefile.am
+++ b/gold/testsuite/Makefile.am
@@ -1096,6 +1096,25 @@
 x86_64_mov_to_lea14.stdout: x86_64_mov_to_lea14
 	$(TEST_OBJDUMP) -dw $< > $@
 
+check_SCRIPTS += x86_64_indirect_call_to_direct.sh
+check_DATA += x86_64_indirect_call_to_direct1.stdout \
+	x86_64_indirect_jump_to_direct1.stdout
+MOSTLYCLEANFILES += x86_64_indirect_call_to_direct1 \
+	x86_64_indirect_jump_to_direct1
+
+x86_64_indirect_call_to_direct1.o: x86_64_indirect_call_to_direct1.s
+	$(TEST_AS) --64 -mrelax-relocations=yes -o $@ $<
+x86_64_indirect_call_to_direct1: x86_64_indirect_call_to_direct1.o gcctestdir/ld
+	gcctestdir/ld -o $@ $<
+x86_64_indirect_call_to_direct1.stdout: x86_64_indirect_call_to_direct1
+	$(TEST_OBJDUMP) -dw $< > $@
+x86_64_indirect_jump_to_direct1.o: x86_64_indirect_jump_to_direct1.s
+	$(TEST_AS) --64 -mrelax-relocations=yes -o $@ $<
+x86_64_indirect_jump_to_direct1: x86_64_indirect_jump_to_direct1.o gcctestdir/ld
+	gcctestdir/ld -o $@ $<
+x86_64_indirect_jump_to_direct1.stdout: x86_64_indirect_jump_to_direct1
+	$(TEST_OBJDUMP) -dw $< > $@
+
 check_SCRIPTS += x86_64_overflow_pc32.sh
 check_DATA += x86_64_overflow_pc32.err
 MOSTLYCLEANFILES += x86_64_overflow_pc32.err
diff --git a/gold/testsuite/Makefile.in b/gold/testsuite/Makefile.in
index 5b66c9c..894e6dd 100644
--- a/gold/testsuite/Makefile.in
+++ b/gold/testsuite/Makefile.in
@@ -223,6 +223,7 @@
 @GCC_TRUE@@HAVE_STATIC_TRUE@@NATIVE_LINKER_TRUE@@STATIC_TLS_TRUE@@TLS_TRUE@	tls_static_pic_test
 @FN_PTRS_IN_SO_WITHOUT_PIC_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@@TLS_TRUE@am__append_26 = tls_shared_nonpic_test
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_27 = x86_64_mov_to_lea.sh \
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x86_64_indirect_call_to_direct.sh \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x86_64_overflow_pc32.sh \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x32_overflow_pc32.sh
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_28 = x86_64_mov_to_lea1.stdout \
@@ -239,6 +240,8 @@
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x86_64_mov_to_lea12.stdout \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x86_64_mov_to_lea13.stdout \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x86_64_mov_to_lea14.stdout \
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x86_64_indirect_call_to_direct1.stdout \
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x86_64_indirect_jump_to_direct1.stdout \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x86_64_overflow_pc32.err \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x32_overflow_pc32.err
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_29 = x86_64_mov_to_lea1 \
@@ -255,6 +258,8 @@
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x86_64_mov_to_lea12 \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x86_64_mov_to_lea13 \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x86_64_mov_to_lea14 \
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x86_64_indirect_call_to_direct1 \
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x86_64_indirect_jump_to_direct1 \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x86_64_overflow_pc32.err \
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	x32_overflow_pc32.err
 @DEFAULT_TARGET_I386_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@am__append_30 = i386_mov_to_lea.sh
@@ -4564,6 +4569,8 @@
 	@p='tls_pie_test.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post)
 x86_64_mov_to_lea.sh.log: x86_64_mov_to_lea.sh
 	@p='x86_64_mov_to_lea.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post)
+x86_64_indirect_call_to_direct.sh.log: x86_64_indirect_call_to_direct.sh
+	@p='x86_64_indirect_call_to_direct.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post)
 x86_64_overflow_pc32.sh.log: x86_64_overflow_pc32.sh
 	@p='x86_64_overflow_pc32.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post)
 x32_overflow_pc32.sh.log: x32_overflow_pc32.sh
@@ -5641,6 +5648,19 @@
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	$(TEST_OBJDUMP) -dw $< > $@
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_mov_to_lea14.stdout: x86_64_mov_to_lea14
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	$(TEST_OBJDUMP) -dw $< > $@
+
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_call_to_direct1.o: x86_64_indirect_call_to_direct1.s
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	$(TEST_AS) --64 -mrelax-relocations=yes -o $@ $<
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_call_to_direct1: x86_64_indirect_call_to_direct1.o gcctestdir/ld
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	gcctestdir/ld -o $@ $<
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_call_to_direct1.stdout: x86_64_indirect_call_to_direct1
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	$(TEST_OBJDUMP) -dw $< > $@
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_jump_to_direct1.o: x86_64_indirect_jump_to_direct1.s
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	$(TEST_AS) --64 -mrelax-relocations=yes -o $@ $<
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_jump_to_direct1: x86_64_indirect_jump_to_direct1.o gcctestdir/ld
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	gcctestdir/ld -o $@ $<
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_indirect_jump_to_direct1.stdout: x86_64_indirect_jump_to_direct1
+@DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	$(TEST_OBJDUMP) -dw $< > $@
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_overflow_pc32.o: x86_64_overflow_pc32.s
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@	$(TEST_AS) -o $@ $<
 @DEFAULT_TARGET_X86_64_TRUE@@GCC_TRUE@@NATIVE_LINKER_TRUE@x86_64_overflow_pc32.err: x86_64_overflow_pc32.o gcctestdir/ld
diff --git a/gold/testsuite/x86_64_indirect_call_to_direct.sh b/gold/testsuite/x86_64_indirect_call_to_direct.sh
new file mode 100755
index 0000000..916e1a3
--- /dev/null
+++ b/gold/testsuite/x86_64_indirect_call_to_direct.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+# x86_64_indirect_call_to_direct.sh -- a test for indirect call(jump) to direct
+# conversion.
+
+# Copyright (C) 2016 Free Software Foundation, Inc.
+# Written by Sriraman Tallam <tmsriram@google.com>
+
+# This file is part of gold.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
+# MA 02110-1301, USA.
+
+set -e
+
+grep -q "callq[ ]\+[a-f0-9]\+ <foo>" x86_64_indirect_call_to_direct1.stdout
+grep -q "jmpq[ ]\+[a-f0-9]\+ <foo>" x86_64_indirect_jump_to_direct1.stdout
diff --git a/gold/testsuite/x86_64_indirect_call_to_direct1.s b/gold/testsuite/x86_64_indirect_call_to_direct1.s
new file mode 100644
index 0000000..5ca2e38
--- /dev/null
+++ b/gold/testsuite/x86_64_indirect_call_to_direct1.s
@@ -0,0 +1,12 @@
+	.text
+	.globl	foo
+	.type	foo, @function
+foo:
+	ret
+	.size	foo, .-foo
+	.globl	main
+	.type	main, @function
+main:
+	call	*foo@GOTPCREL(%rip)
+	ret
+	.size	main, .-main
diff --git a/gold/testsuite/x86_64_indirect_jump_to_direct1.s b/gold/testsuite/x86_64_indirect_jump_to_direct1.s
new file mode 100644
index 0000000..b817e34
--- /dev/null
+++ b/gold/testsuite/x86_64_indirect_jump_to_direct1.s
@@ -0,0 +1,11 @@
+	.text
+	.globl	foo
+	.type	foo, @function
+foo:
+	ret
+	.size	foo, .-foo
+	.globl	main
+	.type	main, @function
+main:
+	jmp	*foo@GOTPCREL(%rip)
+	.size	main, .-main
diff --git a/gold/x86_64.cc b/gold/x86_64.cc
index d069957..6c511e2 100644
--- a/gold/x86_64.cc
+++ b/gold/x86_64.cc
@@ -403,6 +403,33 @@
   static const unsigned char plt_eh_frame_fde[plt_eh_frame_fde_size];
 };
 
+template<int size>
+class Lazy_view
+{
+ public:
+  Lazy_view(Sized_relobj_file<size, false>* object, unsigned int data_shndx)
+    : object_(object), data_shndx_(data_shndx), view_(NULL), view_size_(0)
+  { }
+
+  inline unsigned char
+  operator[](size_t offset)
+  {
+    if (this->view_ == NULL)
+      this->view_ = this->object_->section_contents(this->data_shndx_,
+                                                    &this->view_size_,
+                                                    true);
+    if (offset >= this->view_size_)
+      return 0;
+    return this->view_[offset];
+  }
+
+ private:
+  Sized_relobj_file<size, false>* object_;
+  unsigned int data_shndx_;
+  const unsigned char* view_;
+  section_size_type view_size_;
+};
+
 // The x86_64 target class.
 // See the ABI at
 //   http://www.x86-64.org/documentation/abi.pdf
@@ -876,19 +903,62 @@
   // conversion from
   // mov foo@GOTPCREL(%rip), %reg
   // to lea foo(%rip), %reg.
-  static bool
-  can_convert_mov_to_lea(const Symbol* gsym)
+  template<class View_type>
+  static inline bool
+  can_convert_mov_to_lea(const Symbol* gsym, unsigned int r_type,
+                         size_t r_offset, View_type* view)
   {
     gold_assert(gsym != NULL);
-    return (gsym->type() != elfcpp::STT_GNU_IFUNC
-	    && !gsym->is_undefined ()
-	    && !gsym->is_from_dynobj()
-	    && !gsym->is_preemptible()
-	    && (!parameters->options().shared()
-		|| (gsym->visibility() != elfcpp::STV_DEFAULT
-		    && gsym->visibility() != elfcpp::STV_PROTECTED)
-		|| parameters->options().Bsymbolic())
-	    && strcmp(gsym->name(), "_DYNAMIC") != 0);
+    // We cannot do the conversion unless it's one of these relocations.
+    if (r_type != elfcpp::R_X86_64_GOTPCREL
+        && r_type != elfcpp::R_X86_64_GOTPCRELX
+        && r_type != elfcpp::R_X86_64_REX_GOTPCRELX)
+      return false;
+    // We cannot convert references to IFUNC symbols, or to symbols that
+    // are not local to the current module.
+    if (gsym->type() == elfcpp::STT_GNU_IFUNC
+        || gsym->is_undefined ()
+        || gsym->is_from_dynobj()
+        || gsym->is_preemptible())
+      return false;
+    // If we are building a shared object and the symbol is protected, we may
+    // need to go through the GOT.
+    if (parameters->options().shared()
+        && gsym->visibility() == elfcpp::STV_PROTECTED)
+      return false;
+    // We cannot convert references to the _DYNAMIC symbol.
+    if (strcmp(gsym->name(), "_DYNAMIC") == 0)
+      return false;
+    // Check for a MOV opcode.
+    return (*view)[r_offset - 2] == 0x8b;
+  }
+
+  // Convert
+  // callq *foo@GOTPCRELX(%rip) to
+  // addr32 callq foo
+  // and jmpq *foo@GOTPCRELX(%rip) to
+  // jmpq foo
+  // nop
+  template<class View_type>
+  static inline bool
+  can_convert_callq_to_direct(const Symbol* gsym, unsigned int r_type,
+			      size_t r_offset, View_type* view)
+  {
+    gold_assert(gsym != NULL);
+    // We cannot do the conversion unless it's a GOTPCRELX relocation.
+    if (r_type != elfcpp::R_X86_64_GOTPCRELX)
+      return false;
+    // We cannot convert references to IFUNC symbols, or to symbols that
+    // are not local to the current module.
+    if (gsym->type() == elfcpp::STT_GNU_IFUNC
+        || gsym->is_undefined ()
+        || gsym->is_from_dynobj()
+        || gsym->is_preemptible())
+      return false;
+    // Check for a CALLQ or JMPQ opcode.
+    return ((*view)[r_offset - 2] == 0xff
+            && ((*view)[r_offset - 1] == 0x15
+                || (*view)[r_offset - 1] == 0x25));
   }
 
   // Adjust TLS relocation type based on the options and whether this
@@ -2935,19 +3005,24 @@
 	// If we convert this from
 	// mov foo@GOTPCREL(%rip), %reg
 	// to lea foo(%rip), %reg.
+	// OR
+	// if we convert
+	// (callq|jmpq) *foo@GOTPCRELX(%rip) to
+	// (callq|jmpq) foo
 	// in Relocate::relocate, then there is nothing to do here.
-	if ((r_type == elfcpp::R_X86_64_GOTPCREL
-	     || r_type == elfcpp::R_X86_64_GOTPCRELX
-	     || r_type == elfcpp::R_X86_64_REX_GOTPCRELX)
-	    && reloc.get_r_offset() >= 2
-	    && Target_x86_64<size>::can_convert_mov_to_lea(gsym))
-	  {
-	    section_size_type stype;
-	    const unsigned char* view = object->section_contents(data_shndx,
-								 &stype, true);
-	    if (view[reloc.get_r_offset() - 2] == 0x8b)
-	      break;
-	  }
+
+        Lazy_view<size> view(object, data_shndx);
+        size_t r_offset = reloc.get_r_offset();
+        if (r_offset >= 2
+            && Target_x86_64<size>::can_convert_mov_to_lea(gsym, r_type,
+                                                           r_offset, &view))
+          break;
+
+	if (r_offset >= 2
+	    && Target_x86_64<size>::can_convert_callq_to_direct(gsym, r_type,
+								r_offset,
+								&view))
+          break;
 
 	if (gsym->final_value_is_known())
 	  {
@@ -3629,15 +3704,56 @@
       // mov foo@GOTPCREL(%rip), %reg
       // to lea foo(%rip), %reg.
       // if possible.
-      if (rela.get_r_offset() >= 2
-	  && view[-2] == 0x8b
-	  && ((gsym == NULL && !psymval->is_ifunc_symbol())
-	      || (gsym != NULL
-		  && Target_x86_64<size>::can_convert_mov_to_lea(gsym))))
+       if ((gsym == NULL
+             && rela.get_r_offset() >= 2
+             && view[-2] == 0x8b
+             && !psymval->is_ifunc_symbol())
+            || (gsym != NULL
+                && rela.get_r_offset() >= 2
+                && Target_x86_64<size>::can_convert_mov_to_lea(gsym, r_type,
+                                                               0, &view)))
 	{
 	  view[-2] = 0x8d;
 	  Reloc_funcs::pcrela32(view, object, psymval, addend, address);
 	}
+      // Convert
+      // callq *foo@GOTPCRELX(%rip) to
+      // addr32 callq foo
+      // and jmpq *foo@GOTPCRELX(%rip) to
+      // jmpq foo
+      // nop
+      else if (gsym != NULL
+	       && rela.get_r_offset() >= 2
+	       && Target_x86_64<size>::can_convert_callq_to_direct(gsym,
+								   r_type,
+								   0, &view))
+	{
+	  if (view[-1] == 0x15)
+	    {
+	      // Convert callq *foo@GOTPCRELX(%rip) to addr32 callq.
+	      // Opcode of addr32 is 0x67 and opcode of direct callq is 0xe8.
+	      view[-2] = 0x67;
+	      view[-1] = 0xe8;
+	      // Convert GOTPCRELX to 32-bit pc relative reloc.
+	      Reloc_funcs::pcrela32(view, object, psymval, addend, address);
+	    }
+	  else
+	    {
+	      // Convert jmpq *foo@GOTPCRELX(%rip) to
+	      // jmpq foo
+	      // nop
+	      // The opcode of direct jmpq is 0xe9.
+	      view[-2] = 0xe9;
+	      // The opcode of nop is 0x90.
+	      view[3] = 0x90;
+	      // Convert GOTPCRELX to 32-bit pc relative reloc.  jmpq is rip
+	      // relative and since the instruction following the jmpq is now
+	      // the nop, offset the address by 1 byte.  The start of the
+              // relocation also moves ahead by 1 byte.
+	      Reloc_funcs::pcrela32(&view[-1], object, psymval, addend,
+				    address - 1);
+	    }
+	}
       else
 	{
 	  if (gsym != NULL)