llvm/lib/Target/X86/X86IndirectThunks.cpp - third_party/llvm-project - Git at Google

 //==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86  --=//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 /// \file
 ///
 /// Pass that injects an MI thunk that is used to lower indirect calls in a way
 /// that prevents speculation on some x86 processors and can be used to mitigate
 /// security vulnerabilities due to targeted speculative execution and side
 /// channels such as CVE-2017-5715.
 ///
 /// Currently supported thunks include:
 /// - Retpoline -- A RET-implemented trampoline that lowers indirect calls
 /// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization
 ///   before making an indirect call/jump
 ///
 /// Note that the reason that this is implemented as a MachineFunctionPass and
 /// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline
 /// serialize all transformations, which can consume lots of memory.
 ///
 /// TODO(chandlerc): All of this code could use better comments and
 /// documentation.
 ///
 //===----------------------------------------------------------------------===//

 #include "X86.h"
 #include "X86InstrBuilder.h"
 #include "X86Subtarget.h"
 #include "llvm/CodeGen/IndirectThunks.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"

 using namespace llvm;

 #define DEBUG_TYPE "x86-retpoline-thunks"

 static const char RetpolineNamePrefix[] = "__llvm_retpoline_";
 static const char R11RetpolineName[] = "__llvm_retpoline_r11";
 static const char EAXRetpolineName[] = "__llvm_retpoline_eax";
 static const char ECXRetpolineName[] = "__llvm_retpoline_ecx";
 static const char EDXRetpolineName[] = "__llvm_retpoline_edx";
 static const char EDIRetpolineName[] = "__llvm_retpoline_edi";

 static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_";
 static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11";

 namespace {
 struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> {
   const char *getThunkPrefix() { return RetpolineNamePrefix; }
   bool mayUseThunk(const MachineFunction &MF, bool InsertedThunks) {
     if (InsertedThunks)
       return false;
     const auto &STI = MF.getSubtarget<X86Subtarget>();
     return (STI.useRetpolineIndirectCalls() ||
             STI.useRetpolineIndirectBranches()) &&
            !STI.useRetpolineExternalThunk();
   }
   bool insertThunks(MachineModuleInfo &MMI, MachineFunction &MF);
   void populateThunk(MachineFunction &MF);
 };

 struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> {
   const char *getThunkPrefix() { return LVIThunkNamePrefix; }
   bool mayUseThunk(const MachineFunction &MF, bool InsertedThunks) {
     if (InsertedThunks)
       return false;
     return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity();
   }
   bool insertThunks(MachineModuleInfo &MMI, MachineFunction &MF) {
     createThunkFunction(MMI, R11LVIThunkName);
     return true;
   }
   void populateThunk(MachineFunction &MF) {
     assert (MF.size() == 1);
     MachineBasicBlock *Entry = &MF.front();
     Entry->clear();

     // This code mitigates LVI by replacing each indirect call/jump with a
     // direct call/jump to a thunk that looks like:
     // ```
     // lfence
     // jmpq *%r11
     // ```
     // This ensures that if the value in register %r11 was loaded from memory,
     // then the value in %r11 is (architecturally) correct prior to the jump.
     const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
     BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE));
     BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11);
     MF.front().addLiveIn(X86::R11);
   }
 };

 class X86IndirectThunks : public MachineFunctionPass {
 public:
   static char ID;

   X86IndirectThunks() : MachineFunctionPass(ID) {}

   StringRef getPassName() const override { return "X86 Indirect Thunks"; }

   bool doInitialization(Module &M) override;
   bool runOnMachineFunction(MachineFunction &MF) override;

 private:
   std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs;

   template <typename... ThunkInserterT>
   static void initTIs(Module &M,
                       std::tuple<ThunkInserterT...> &ThunkInserters) {
     (..., std::get<ThunkInserterT>(ThunkInserters).init(M));
   }
   template <typename... ThunkInserterT>
   static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
                      std::tuple<ThunkInserterT...> &ThunkInserters) {
     return (0 | ... | std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF));
   }
 };

 } // end anonymous namespace

 bool RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI,
                                           MachineFunction &MF) {
   if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64)
     createThunkFunction(MMI, R11RetpolineName);
   else
     for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName,
                            EDIRetpolineName})
       createThunkFunction(MMI, Name);
   return true;
 }

 void RetpolineThunkInserter::populateThunk(MachineFunction &MF) {
   bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64;
   Register ThunkReg;
   if (Is64Bit) {
     assert(MF.getName() == "__llvm_retpoline_r11" &&
            "Should only have an r11 thunk on 64-bit targets");

     // __llvm_retpoline_r11:
     //   callq .Lr11_call_target
     // .Lr11_capture_spec:
     //   pause
     //   lfence
     //   jmp .Lr11_capture_spec
     // .align 16
     // .Lr11_call_target:
     //   movq %r11, (%rsp)
     //   retq
     ThunkReg = X86::R11;
   } else {
     // For 32-bit targets we need to emit a collection of thunks for various
     // possible scratch registers as well as a fallback that uses EDI, which is
     // normally callee saved.
     //   __llvm_retpoline_eax:
     //         calll .Leax_call_target
     //   .Leax_capture_spec:
     //         pause
     //         jmp .Leax_capture_spec
     //   .align 16
     //   .Leax_call_target:
     //         movl %eax, (%esp)  # Clobber return addr
     //         retl
     //
     //   __llvm_retpoline_ecx:
     //   ... # Same setup
     //         movl %ecx, (%esp)
     //         retl
     //
     //   __llvm_retpoline_edx:
     //   ... # Same setup
     //         movl %edx, (%esp)
     //         retl
     //
     //   __llvm_retpoline_edi:
     //   ... # Same setup
     //         movl %edi, (%esp)
     //         retl
     if (MF.getName() == EAXRetpolineName)
       ThunkReg = X86::EAX;
     else if (MF.getName() == ECXRetpolineName)
       ThunkReg = X86::ECX;
     else if (MF.getName() == EDXRetpolineName)
       ThunkReg = X86::EDX;
     else if (MF.getName() == EDIRetpolineName)
       ThunkReg = X86::EDI;
     else
       llvm_unreachable("Invalid thunk name on x86-32!");
   }

   const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
   assert (MF.size() == 1);
   MachineBasicBlock *Entry = &MF.front();
   Entry->clear();

   MachineBasicBlock *CaptureSpec =
       MF.CreateMachineBasicBlock(Entry->getBasicBlock());
   MachineBasicBlock *CallTarget =
       MF.CreateMachineBasicBlock(Entry->getBasicBlock());
   MCSymbol *TargetSym = MF.getContext().createTempSymbol();
   MF.push_back(CaptureSpec);
   MF.push_back(CallTarget);

   const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
   const unsigned RetOpc = Is64Bit ? X86::RET64 : X86::RET32;

   Entry->addLiveIn(ThunkReg);
   BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);

   // The MIR verifier thinks that the CALL in the entry block will fall through
   // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
   // the successor, but the MIR verifier doesn't know how to cope with that.
   Entry->addSuccessor(CaptureSpec);

   // In the capture loop for speculation, we want to stop the processor from
   // speculating as fast as possible. On Intel processors, the PAUSE instruction
   // will block speculation without consuming any execution resources. On AMD
   // processors, the PAUSE instruction is (essentially) a nop, so we also use an
   // LFENCE instruction which they have advised will stop speculation as well
   // with minimal resource utilization. We still end the capture with a jump to
   // form an infinite loop to fully guarantee that no matter what implementation
   // of the x86 ISA, speculating this code path never escapes.
   BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
   BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
   BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
   CaptureSpec->setMachineBlockAddressTaken();
   CaptureSpec->addSuccessor(CaptureSpec);

   CallTarget->addLiveIn(ThunkReg);
   CallTarget->setMachineBlockAddressTaken();
   CallTarget->setAlignment(Align(16));

   // Insert return address clobber
   const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
   const Register SPReg = Is64Bit ? X86::RSP : X86::ESP;
   addRegOffset(BuildMI(CallTarget, DebugLoc(), TII->get(MovOpc)), SPReg, false,
                0)
       .addReg(ThunkReg);

   CallTarget->back().setPreInstrSymbol(MF, TargetSym);
   BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
 }

 FunctionPass *llvm::createX86IndirectThunksPass() {
   return new X86IndirectThunks();
 }

 char X86IndirectThunks::ID = 0;

 bool X86IndirectThunks::doInitialization(Module &M) {
   initTIs(M, TIs);
   return false;
 }

 bool X86IndirectThunks::runOnMachineFunction(MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << getPassName() << '\n');
   auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
   return runTIs(MMI, MF, TIs);
 }
	//==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86 --=//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	/// \file
	///
	/// Pass that injects an MI thunk that is used to lower indirect calls in a way
	/// that prevents speculation on some x86 processors and can be used to mitigate
	/// security vulnerabilities due to targeted speculative execution and side
	/// channels such as CVE-2017-5715.
	///
	/// Currently supported thunks include:
	/// - Retpoline -- A RET-implemented trampoline that lowers indirect calls
	/// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization
	/// before making an indirect call/jump
	///
	/// Note that the reason that this is implemented as a MachineFunctionPass and
	/// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline
	/// serialize all transformations, which can consume lots of memory.
	///
	/// TODO(chandlerc): All of this code could use better comments and
	/// documentation.
	///
	//===----------------------------------------------------------------------===//

	#include "X86.h"
	#include "X86InstrBuilder.h"
	#include "X86Subtarget.h"
	#include "llvm/CodeGen/IndirectThunks.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineFunctionPass.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineModuleInfo.h"
	#include "llvm/CodeGen/Passes.h"
	#include "llvm/CodeGen/TargetPassConfig.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/Module.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Target/TargetMachine.h"

	using namespace llvm;

	#define DEBUG_TYPE "x86-retpoline-thunks"

	static const char RetpolineNamePrefix[] = "__llvm_retpoline_";
	static const char R11RetpolineName[] = "__llvm_retpoline_r11";
	static const char EAXRetpolineName[] = "__llvm_retpoline_eax";
	static const char ECXRetpolineName[] = "__llvm_retpoline_ecx";
	static const char EDXRetpolineName[] = "__llvm_retpoline_edx";
	static const char EDIRetpolineName[] = "__llvm_retpoline_edi";

	static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_";
	static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11";

	namespace {
	struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> {
	const char *getThunkPrefix() { return RetpolineNamePrefix; }
	bool mayUseThunk(const MachineFunction &MF, bool InsertedThunks) {
	if (InsertedThunks)
	return false;
	const auto &STI = MF.getSubtarget<X86Subtarget>();
	return (STI.useRetpolineIndirectCalls() \|\|
	STI.useRetpolineIndirectBranches()) &&
	!STI.useRetpolineExternalThunk();
	}
	bool insertThunks(MachineModuleInfo &MMI, MachineFunction &MF);
	void populateThunk(MachineFunction &MF);
	};

	struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> {
	const char *getThunkPrefix() { return LVIThunkNamePrefix; }
	bool mayUseThunk(const MachineFunction &MF, bool InsertedThunks) {
	if (InsertedThunks)
	return false;
	return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity();
	}
	bool insertThunks(MachineModuleInfo &MMI, MachineFunction &MF) {
	createThunkFunction(MMI, R11LVIThunkName);
	return true;
	}
	void populateThunk(MachineFunction &MF) {
	assert (MF.size() == 1);
	MachineBasicBlock *Entry = &MF.front();
	Entry->clear();

	// This code mitigates LVI by replacing each indirect call/jump with a
	// direct call/jump to a thunk that looks like:
	// ```
	// lfence
	// jmpq *%r11
	// ```
	// This ensures that if the value in register %r11 was loaded from memory,
	// then the value in %r11 is (architecturally) correct prior to the jump.
	const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
	BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE));
	BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11);
	MF.front().addLiveIn(X86::R11);
	}
	};

	class X86IndirectThunks : public MachineFunctionPass {
	public:
	static char ID;

	X86IndirectThunks() : MachineFunctionPass(ID) {}

	StringRef getPassName() const override { return "X86 Indirect Thunks"; }

	bool doInitialization(Module &M) override;
	bool runOnMachineFunction(MachineFunction &MF) override;

	private:
	std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs;

	template <typename... ThunkInserterT>
	static void initTIs(Module &M,
	std::tuple<ThunkInserterT...> &ThunkInserters) {
	(..., std::get<ThunkInserterT>(ThunkInserters).init(M));
	}
	template <typename... ThunkInserterT>
	static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
	std::tuple<ThunkInserterT...> &ThunkInserters) {
	return (0 \| ... \| std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF));
	}
	};

	} // end anonymous namespace

	bool RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI,
	MachineFunction &MF) {
	if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64)
	createThunkFunction(MMI, R11RetpolineName);
	else
	for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName,
	EDIRetpolineName})
	createThunkFunction(MMI, Name);
	return true;
	}

	void RetpolineThunkInserter::populateThunk(MachineFunction &MF) {
	bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64;
	Register ThunkReg;
	if (Is64Bit) {
	assert(MF.getName() == "__llvm_retpoline_r11" &&
	"Should only have an r11 thunk on 64-bit targets");

	// __llvm_retpoline_r11:
	// callq .Lr11_call_target
	// .Lr11_capture_spec:
	// pause
	// lfence
	// jmp .Lr11_capture_spec
	// .align 16
	// .Lr11_call_target:
	// movq %r11, (%rsp)
	// retq
	ThunkReg = X86::R11;
	} else {
	// For 32-bit targets we need to emit a collection of thunks for various
	// possible scratch registers as well as a fallback that uses EDI, which is
	// normally callee saved.
	// __llvm_retpoline_eax:
	// calll .Leax_call_target
	// .Leax_capture_spec:
	// pause
	// jmp .Leax_capture_spec
	// .align 16
	// .Leax_call_target:
	// movl %eax, (%esp) # Clobber return addr
	// retl
	//
	// __llvm_retpoline_ecx:
	// ... # Same setup
	// movl %ecx, (%esp)
	// retl
	//
	// __llvm_retpoline_edx:
	// ... # Same setup
	// movl %edx, (%esp)
	// retl
	//
	// __llvm_retpoline_edi:
	// ... # Same setup
	// movl %edi, (%esp)
	// retl
	if (MF.getName() == EAXRetpolineName)
	ThunkReg = X86::EAX;
	else if (MF.getName() == ECXRetpolineName)
	ThunkReg = X86::ECX;
	else if (MF.getName() == EDXRetpolineName)
	ThunkReg = X86::EDX;
	else if (MF.getName() == EDIRetpolineName)
	ThunkReg = X86::EDI;
	else
	llvm_unreachable("Invalid thunk name on x86-32!");
	}

	const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
	assert (MF.size() == 1);
	MachineBasicBlock *Entry = &MF.front();
	Entry->clear();

	MachineBasicBlock *CaptureSpec =
	MF.CreateMachineBasicBlock(Entry->getBasicBlock());
	MachineBasicBlock *CallTarget =
	MF.CreateMachineBasicBlock(Entry->getBasicBlock());
	MCSymbol *TargetSym = MF.getContext().createTempSymbol();
	MF.push_back(CaptureSpec);
	MF.push_back(CallTarget);

	const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
	const unsigned RetOpc = Is64Bit ? X86::RET64 : X86::RET32;

	Entry->addLiveIn(ThunkReg);
	BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);

	// The MIR verifier thinks that the CALL in the entry block will fall through
	// to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
	// the successor, but the MIR verifier doesn't know how to cope with that.
	Entry->addSuccessor(CaptureSpec);

	// In the capture loop for speculation, we want to stop the processor from
	// speculating as fast as possible. On Intel processors, the PAUSE instruction
	// will block speculation without consuming any execution resources. On AMD
	// processors, the PAUSE instruction is (essentially) a nop, so we also use an
	// LFENCE instruction which they have advised will stop speculation as well
	// with minimal resource utilization. We still end the capture with a jump to
	// form an infinite loop to fully guarantee that no matter what implementation
	// of the x86 ISA, speculating this code path never escapes.
	BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
	BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
	BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
	CaptureSpec->setMachineBlockAddressTaken();
	CaptureSpec->addSuccessor(CaptureSpec);

	CallTarget->addLiveIn(ThunkReg);
	CallTarget->setMachineBlockAddressTaken();
	CallTarget->setAlignment(Align(16));

	// Insert return address clobber
	const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
	const Register SPReg = Is64Bit ? X86::RSP : X86::ESP;
	addRegOffset(BuildMI(CallTarget, DebugLoc(), TII->get(MovOpc)), SPReg, false,
	0)
	.addReg(ThunkReg);

	CallTarget->back().setPreInstrSymbol(MF, TargetSym);
	BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
	}

	FunctionPass *llvm::createX86IndirectThunksPass() {
	return new X86IndirectThunks();
	}

	char X86IndirectThunks::ID = 0;

	bool X86IndirectThunks::doInitialization(Module &M) {
	initTIs(M, TIs);
	return false;
	}

	bool X86IndirectThunks::runOnMachineFunction(MachineFunction &MF) {
	LLVM_DEBUG(dbgs() << getPassName() << '\n');
	auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
	return runTIs(MMI, MF, TIs);
	}