blob: 060061c27eebd4d604fd0f50fa4cc3019f7272b1 [file] [log] [blame]
// Copyright 2021 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <lib/llvm-profdata/llvm-profdata.h>
#include <lib/stdcompat/span.h>
#include <zircon/assert.h>
#ifndef HAVE_PROFDATA
#error "build system regression"
#endif
#if !HAVE_PROFDATA
// If not compiled with instrumentation at all, then all the link-time
// references in the real implementation below won't work. So provide stubs.
void LlvmProfdata::Init(cpp20::span<const std::byte> build_id) {}
cpp20::span<std::byte> LlvmProfdata::DoFixedData(cpp20::span<std::byte> data, bool match) {
return {};
}
void LlvmProfdata::CopyCounters(cpp20::span<std::byte> data) {}
void LlvmProfdata::MergeCounters(cpp20::span<std::byte> data) {}
void LlvmProfdata::UseCounters(cpp20::span<std::byte> data) {}
#else // HAVE_PROFDATA
#include <array>
#include <atomic>
#include <cstdint>
#include <cstring>
#include <profile/InstrProfData.inc>
namespace {
constexpr uint64_t kMagic = INSTR_PROF_RAW_MAGIC_64;
using IntPtrT = intptr_t;
enum ValueKind {
#define VALUE_PROF_KIND(Enumerator, Value, Descr) Enumerator = Value,
#include <profile/InstrProfData.inc>
};
struct __llvm_profile_data {
#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) Type Name;
#include <profile/InstrProfData.inc>
};
extern "C" {
// This is sometimes emitted by the compiler with a different value.
// The header is expected to use whichever value this had at link time.
// This supplies the default value when the compiler doesn't supply it.
[[gnu::weak]] extern const uint64_t INSTR_PROF_RAW_VERSION_VAR = INSTR_PROF_RAW_VERSION;
// The compiler emits phantom references to this as a way to ensure
// that the runtime is linked in.
extern const int INSTR_PROF_PROFILE_RUNTIME_VAR = 0;
// In relocating mode, the compiler adds this to the address of a profiling
// counter in .bss for the counter to actually update. At startup, this is
// zero so the .bss counters get updated. When data is being published, the
// live-published counters get copied from the .bss counters and then this is
// set so future updates are redirected to the published copy.
//
// This definition is weak in case the standard profile runtime is also linked
// in with its own definition.
[[gnu::weak]] extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR = 0;
} // extern "C"
// Here _WIN32 really means EFI. At link-time, it's Windows/x64 essentially.
// InstrProfData.inc uses #ifdef _WIN32, so match that.
#if defined(_WIN32)
// These magic section names don't have macros in InstrProfData.inc,
// though their ".blah$M" counterparts do.
// Merge read-write sections into .data.
#pragma comment(linker, "/MERGE:.lprfc=.data")
#pragma comment(linker, "/MERGE:.lprfd=.data")
// Do not merge .lprfn and .lcovmap into .rdata.
// `llvm-cov` must be able to find them after the fact.
// Allocate read-only section bounds.
#pragma section(".lprfn$A", read)
#pragma section(".lprfn$Z", read)
// Allocate read-write section bounds.
#pragma section(".lprfd$A", read, write)
#pragma section(".lprfd$Z", read, write)
#pragma section(".lprfc$A", read, write)
#pragma section(".lprfc$Z", read, write)
// The ".blah$A" and ".blah$Z" placeholder sections get magically sorted with
// ".blah$M" in between them, so these symbols identify the bounds of the
// compiler-emitted data at link time. The compiler seems to accept emitting
// a zero-length array if it has an explicit initializer.
// This data is morally `const`, i.e. it's a RELRO case in the ELF world. But
// there is no RELRO in PE-COFF (?) so it's just a writable section and the
// compiler wants the declaration's constness to match #pragma section above.
[[gnu::section(".lprfd$A")]] static __llvm_profile_data DataBegin[0] = {};
[[gnu::section(".lprfd$Z")]] static __llvm_profile_data DataEnd[0] = {};
[[gnu::section(".lprfn$A")]] static const char NamesBegin[0] = {};
[[gnu::section(".lprfn$Z")]] static const char NamesEnd[0] = {};
[[gnu::section(".lprfc$A")]] static uint64_t CountersBegin[0] = {};
[[gnu::section(".lprfc$Z")]] static uint64_t CountersEnd[0] = {};
#elif defined(__APPLE__)
extern "C" {
[[gnu::visibility("hidden")]] extern const __llvm_profile_data DataBegin[] __asm__(
"section$start$__DATA$" INSTR_PROF_DATA_SECT_NAME);
[[gnu::visibility("hidden")]] extern const __llvm_profile_data DataEnd[] __asm__(
"section$end$__DATA$" INSTR_PROF_DATA_SECT_NAME);
[[gnu::visibility("hidden")]] extern const char NamesBegin[] __asm__(
"section$start$__DATA$" INSTR_PROF_NAME_SECT_NAME);
[[gnu::visibility("hidden")]] extern const char NamesEnd[] __asm__(
"section$end$__DATA$" INSTR_PROF_NAME_SECT_NAME);
[[gnu::visibility("hidden")]] extern uint64_t CountersBegin[] __asm__(
"section$start$__DATA$" INSTR_PROF_CNTS_SECT_NAME);
[[gnu::visibility("hidden")]] extern uint64_t CountersEnd[] __asm__(
"section$end$__DATA$" INSTR_PROF_CNTS_SECT_NAME);
} // extern "C"
#else // Not _WIN32 or __APPLE__.
#ifndef __ELF__
#error "unsupported object file format???"
#endif
extern "C" {
// ELF linkers implicitly provide __start_SECNAME and __stop_SECNAME symbols
// when there is a SECNAME output section. If selective instrumentation causes
// no actual metadata sections to be emitted, or even if all instrumentation
// sections in the input are in GC'd groups, then there is no such output
// section and so these symbols aren't defined. In the userland runtime, this
// is handled simply by using weak references to the symbols. However, those
// references require GOT slots for PIC-friendly links even with hidden
// visibility since there is no way for a PC-relative relocation to be resolved
// to absolute zero to indicate a missing value. So instead, we need to ensure
// that there will be a zero-length section of the expected name that induces
// the linker to resolve the __start_SECNAME and __stop_SECNAME symbols.
// Having an explicit empty section with SHF_GNU_RETAIN accomplishes that
// without adding anything to the actual memory image. Since the start and
// stop symbols are equal, the loops across them will just do nothing.
#define PROFDATA_SECTION(type, begin, end, section, writable) \
[[gnu::visibility("hidden")]] extern type begin[] __asm__( \
INSTR_PROF_QUOTE(INSTR_PROF_SECT_START(section))); \
[[gnu::visibility("hidden")]] extern type end[] __asm__( \
INSTR_PROF_QUOTE(INSTR_PROF_SECT_STOP(section))); \
__asm__(".pushsection " INSTR_PROF_QUOTE(section) ",\"aR" writable \
"\",%progbits\n" \
".popsection")
PROFDATA_SECTION(const __llvm_profile_data, DataBegin, DataEnd, INSTR_PROF_DATA_COMMON, "");
PROFDATA_SECTION(const char, NamesBegin, NamesEnd, INSTR_PROF_NAME_COMMON, "");
PROFDATA_SECTION(uint64_t, CountersBegin, CountersEnd, INSTR_PROF_CNTS_COMMON, "w");
} // extern "C"
#endif // Not _WIN32 or __APPLE__.
struct ProfRawHeader {
size_t binary_ids_size() const {
if constexpr (INSTR_PROF_RAW_VERSION < 6) {
return 0;
} else {
return static_cast<size_t>(BinaryIdsSize);
}
}
#define INSTR_PROF_RAW_HEADER(Type, Name, Initializer) Type Name;
#include <profile/InstrProfData.inc>
};
constexpr size_t kAlignAfterBuildId = sizeof(uint64_t);
constexpr size_t PaddingSize(size_t chunk_size_bytes) {
return (kAlignAfterBuildId - (chunk_size_bytes % kAlignAfterBuildId)) % kAlignAfterBuildId;
}
constexpr size_t PaddingSize(cpp20::span<const std::byte> chunk) {
return PaddingSize(chunk.size_bytes());
}
constexpr size_t BinaryIdsSize(cpp20::span<const std::byte> build_id) {
if (build_id.empty()) {
return 0;
}
return sizeof(uint64_t) + build_id.size_bytes() + PaddingSize(build_id);
}
[[gnu::const]] cpp20::span<const __llvm_profile_data> ProfDataArray() {
return {
DataBegin,
(reinterpret_cast<const std::byte*>(DataEnd) - reinterpret_cast<const std::byte*>(DataBegin) +
sizeof(__llvm_profile_data) - 1) /
sizeof(__llvm_profile_data),
};
}
// This is the .bss data that gets updated live by instrumented code when the
// bias is set to zero.
[[gnu::const]] cpp20::span<uint64_t> ProfCountersData() {
return cpp20::span<uint64_t>(CountersBegin, CountersEnd - CountersBegin);
}
[[gnu::const]] ProfRawHeader GetHeader(cpp20::span<const std::byte> build_id) {
// These are used by the INSTR_PROF_RAW_HEADER initializers.
const uint64_t DataSize = ProfDataArray().size();
const uint64_t PaddingBytesBeforeCounters = 0;
const uint64_t CountersSize = ProfCountersData().size();
const uint64_t PaddingBytesAfterCounters = 0;
const uint64_t NamesSize = NamesEnd - NamesBegin;
auto __llvm_profile_get_magic = []() -> uint64_t { return kMagic; };
auto __llvm_profile_get_version = []() -> uint64_t { return INSTR_PROF_RAW_VERSION_VAR; };
auto __llvm_write_binary_ids = [build_id](void* ignored) -> uint64_t {
ZX_DEBUG_ASSERT(ignored == nullptr);
return BinaryIdsSize(build_id);
};
return {
#define INSTR_PROF_RAW_HEADER(Type, Name, Initializer) .Name = Initializer,
#include <profile/InstrProfData.inc>
};
}
// Don't publish anything if no functions were actually instrumented.
[[gnu::const]] bool NoData() { return ProfCountersData().empty(); }
} // namespace
void LlvmProfdata::Init(cpp20::span<const std::byte> build_id) {
build_id_ = build_id;
if (NoData()) {
return;
}
// The sequence and sizes here should match the PublishLiveData() code.
const ProfRawHeader header = GetHeader(build_id_);
counters_offset_ = sizeof(header) + header.binary_ids_size() +
(static_cast<size_t>(header.DataSize) * sizeof(__llvm_profile_data)) +
static_cast<size_t>(header.PaddingBytesBeforeCounters);
counters_size_bytes_ = static_cast<size_t>(header.CountersSize) * sizeof(uint64_t);
ZX_ASSERT(counters_size_bytes_ == ProfCountersData().size_bytes());
size_bytes_ = counters_offset_ + counters_size_bytes_ +
static_cast<size_t>(header.PaddingBytesAfterCounters);
const size_t PaddingBytesAfterNames = PaddingSize(static_cast<size_t>(header.NamesSize));
size_bytes_ += header.NamesSize + PaddingBytesAfterNames;
}
cpp20::span<std::byte> LlvmProfdata::DoFixedData(cpp20::span<std::byte> data, bool match) {
if (size_bytes_ == 0) {
return {};
}
// Write bytes at the start of data and then advance data to be the remaining
// subspan where the next call will write its data. When merging, this
// doesn't actually write but instead asserts that the destination already
// has identical contents.
auto write_bytes = [&](cpp20::span<const std::byte> bytes, const char* what) {
ZX_ASSERT_MSG(data.size_bytes() >= bytes.size_bytes(),
"%s of %zu bytes with only %zu bytes left!", what, bytes.size_bytes(),
data.size_bytes());
if (match) {
ZX_ASSERT_MSG(!memcmp(data.data(), bytes.data(), bytes.size()),
"mismatch somewhere in %zu bytes of %s", bytes.size(), what);
} else {
memcpy(data.data(), bytes.data(), bytes.size());
}
data = data.subspan(bytes.size());
};
constexpr std::array<std::byte, sizeof(uint64_t)> kPaddingBytes{};
const cpp20::span kPadding(kPaddingBytes);
constexpr const char* kPaddingDoc = "alignment padding";
// These are all the chunks to be written.
// The sequence and sizes here must match the size_bytes() code.
const ProfRawHeader header = GetHeader(build_id_);
write_bytes(cpp20::as_bytes(cpp20::span{&header, 1}), "INSTR_PROF_RAW_HEADER");
const uint64_t build_id_size = build_id_.size_bytes();
if (build_id_size > 0) {
write_bytes(cpp20::as_bytes(cpp20::span{&build_id_size, 1}), "build ID size");
write_bytes(cpp20::as_bytes(build_id_), "build ID");
write_bytes(kPadding.subspan(0, PaddingSize(build_id_)), kPaddingDoc);
}
auto prof_data = cpp20::span(DataBegin, DataEnd - DataBegin);
write_bytes(cpp20::as_bytes(prof_data), INSTR_PROF_DATA_SECT_NAME);
write_bytes(kPadding.subspan(0, static_cast<size_t>(header.PaddingBytesBeforeCounters)),
kPaddingDoc);
// Skip over the space in the data blob for the counters.
ZX_ASSERT(counters_size_bytes_ == ProfCountersData().size_bytes());
ZX_ASSERT_MSG(data.size_bytes() >= counters_size_bytes_,
"%zu bytes of counters with only %zu bytes left!", counters_size_bytes_,
data.size_bytes());
cpp20::span counters_data = data.subspan(0, counters_size_bytes_);
data = data.subspan(counters_size_bytes_);
write_bytes(kPadding.subspan(0, static_cast<size_t>(header.PaddingBytesAfterCounters)),
kPaddingDoc);
auto prof_names = cpp20::span(NamesBegin, NamesEnd - NamesBegin);
const size_t PaddingBytesAfterNames = PaddingSize(static_cast<size_t>(header.NamesSize));
write_bytes(cpp20::as_bytes(prof_names), INSTR_PROF_NAME_SECT_NAME);
write_bytes(kPadding.subspan(0, PaddingBytesAfterNames), kPaddingDoc);
return counters_data;
}
void LlvmProfdata::CopyCounters(cpp20::span<std::byte> data) {
auto prof_counters = ProfCountersData();
ZX_ASSERT_MSG(data.size_bytes() >= prof_counters.size_bytes(),
"writing %zu bytes of counters with only %zu bytes left!", data.size_bytes(),
data.size_bytes());
memcpy(data.data(), prof_counters.data(), prof_counters.size_bytes());
}
// Instead of copying, merge the old counters with our values by summation.
void LlvmProfdata::MergeCounters(cpp20::span<std::byte> data) {
auto prof_counters = ProfCountersData();
ZX_ASSERT_MSG(data.size_bytes() >= prof_counters.size_bytes(),
"merging %zu bytes of counters with only %zu bytes left!",
prof_counters.size_bytes(), data.size_bytes());
MergeCounters(data.subspan(0, prof_counters.size_bytes()), cpp20::as_bytes(prof_counters));
}
void LlvmProfdata::MergeCounters(cpp20::span<std::byte> to, cpp20::span<const std::byte> from) {
ZX_ASSERT(to.size_bytes() == from.size_bytes());
ZX_ASSERT(to.size_bytes() % sizeof(uint64_t) == 0);
cpp20::span to_counters{reinterpret_cast<uint64_t*>(to.data()),
to.size_bytes() / sizeof(uint64_t)};
cpp20::span from_counters{reinterpret_cast<const uint64_t*>(from.data()),
from.size_bytes() / sizeof(uint64_t)};
for (size_t i = 0; i < to_counters.size(); ++i) {
to_counters[i] += from_counters[i];
}
}
void LlvmProfdata::UseCounters(cpp20::span<std::byte> data) {
auto prof_counters = ProfCountersData();
ZX_ASSERT_MSG(data.size_bytes() >= prof_counters.size_bytes(),
"cannot relocate %zu bytes of counters with only %zu bytes left!",
prof_counters.size_bytes(), data.size_bytes());
const uintptr_t old_addr = reinterpret_cast<uintptr_t>(prof_counters.data());
const uintptr_t new_addr = reinterpret_cast<uintptr_t>(data.data());
ZX_ASSERT(new_addr % kAlign == 0);
const intptr_t counters_bias = new_addr - old_addr;
// Now that the data has been copied (or merged), start updating the new
// copy. These compiler barriers should ensure we've finished all the
// copying before updating the bias that the instrumented code uses.
std::atomic_signal_fence(std::memory_order_seq_cst);
INSTR_PROF_PROFILE_COUNTER_BIAS_VAR = counters_bias;
std::atomic_signal_fence(std::memory_order_seq_cst);
}
void LlvmProfdata::UseLinkTimeCounters() {
std::atomic_signal_fence(std::memory_order_seq_cst);
INSTR_PROF_PROFILE_COUNTER_BIAS_VAR = 0;
std::atomic_signal_fence(std::memory_order_seq_cst);
}
cpp20::span<const std::byte> LlvmProfdata::BuildIdFromRawProfile(
cpp20::span<const std::byte> data) {
ProfRawHeader header;
if (data.size() < sizeof(header)) {
return {};
}
memcpy(&header, data.data(), sizeof(header));
data = data.subspan(sizeof(header));
if (header.Magic != kMagic || header.Version < 7) {
return {};
}
if (header.binary_ids_size() == 0 || header.binary_ids_size() > data.size()) {
return {};
}
data = data.subspan(0, header.binary_ids_size());
uint64_t build_id_size;
if (data.size() < sizeof(build_id_size)) {
return {};
}
memcpy(&build_id_size, data.data(), sizeof(build_id_size));
data = data.subspan(sizeof(build_id_size));
if (data.size() < build_id_size) {
return {};
}
return data.subspan(0, static_cast<size_t>(build_id_size));
}
bool LlvmProfdata::Match(cpp20::span<const std::byte> data) {
cpp20::span id = BuildIdFromRawProfile(data);
return !id.empty() && id.size_bytes() == build_id_.size_bytes() &&
!memcmp(id.data(), build_id_.data(), build_id_.size_bytes());
}
#endif // HAVE_PROFDATA