blob: bed06871099214dff1d663178dc419f4afdb6d24 [file] [log] [blame]
// Copyright 2021 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/lib/unwinder/cfi_module.h"
#include <elf.h>
#include <algorithm>
#include <cinttypes>
#include <cstdint>
#include <cstdio>
#include <map>
#include <string>
#include "src/lib/unwinder/cfi_parser.h"
#include "src/lib/unwinder/error.h"
#include "src/lib/unwinder/module.h"
#include "src/lib/unwinder/registers.h"
namespace unwinder {
namespace {
// The CIE ID that distinguishes a CIE from an FDE. They are only used in version 4.
// In version 1, the CIE ID is 0.
const constexpr uint32_t kDwarf32CieId = std::numeric_limits<uint32_t>::max();
const constexpr uint64_t kDwarf64CieId = std::numeric_limits<uint64_t>::max();
// Check and return the size of each entry in the table. It's doubled because each entry contains
// 2 addresses, i.e., the start_pc and the fde_offset.
[[nodiscard]] Error DecodeTableEntrySize(uint8_t table_enc, uint64_t& res) {
if (table_enc == 0xFF) { // DW_EH_PE_omit
return Error("no binary search table");
}
if ((table_enc & 0xF0) != 0x30) {
return Error("invalid table_enc");
}
switch (table_enc & 0x0F) {
case 0x02: // DW_EH_PE_udata2 A 2 bytes unsigned value.
case 0x0A: // DW_EH_PE_sdata2 A 2 bytes signed value.
res = 4;
return Success();
case 0x03: // DW_EH_PE_udata4 A 4 bytes unsigned value.
case 0x0B: // DW_EH_PE_sdata4 A 4 bytes signed value.
res = 8;
return Success();
case 0x04: // DW_EH_PE_udata8 An 8 bytes unsigned value.
case 0x0C: // DW_EH_PE_sdata8 An 8 bytes signed value.
res = 16;
return Success();
default:
return Error("unsupported table_enc: %#x", table_enc);
}
}
// Decode the length and cie_ptr field in CIE/FDE. It's awkward because we want to support both
// .eh_frame format and .debug_frame format.
//
// When returned, |ptr| will point to the beginning of the body, and |end| will point to the end
// of the entry.
[[nodiscard]] Error DecodeCieFdeHdrAndAdvance(Memory* elf, uint8_t version, uint64_t& ptr,
uint64_t& end, uint64_t& cie_id) {
uint32_t short_length;
if (auto err = elf->ReadAndAdvance(ptr, short_length); err.has_err()) {
return err;
}
if (short_length == 0) {
return Error("not a valid CIE/FDE");
}
if (short_length != 0xFFFFFFFF) {
end = ptr + short_length;
} else {
uint64_t length;
if (auto err = elf->ReadAndAdvance(ptr, length); err.has_err()) {
return err;
}
end = ptr + length;
}
// The cie_id is 8-bytes only when the version is 4 and it's a 64-bit DWARF format.
if (version == 4 && short_length == 0xFFFFFFFF) {
if (auto err = elf->ReadAndAdvance(ptr, cie_id); err.has_err()) {
return err;
}
} else {
uint32_t short_cie_id;
if (auto err = elf->ReadAndAdvance(ptr, short_cie_id); err.has_err()) {
return err;
}
// Special handling for cie_id in .debug_frame so that the callers don't need to distinguish
// 32/64-bit DWARF to know whether an entry is CIE or FDE.
if (version == 4 && short_cie_id == kDwarf32CieId) {
cie_id = kDwarf64CieId;
} else {
cie_id = short_cie_id;
}
}
return Success();
}
} // namespace
// Load the .eh_frame from the ELF image in memory.
//
// See the Linux Standard Base Core Specification
// https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
// and a reference implementation in LLVM
// https://github.com/llvm/llvm-project/blob/main/libunwind/src/DwarfParser.hpp
// https://github.com/llvm/llvm-project/blob/main/libunwind/src/EHHeaderParser.hpp
Error CfiModule::Load() {
if (!elf_) {
return Error("no elf memory");
}
Elf64_Ehdr ehdr;
if (auto err = elf_->Read(elf_ptr_, ehdr); err.has_err()) {
return err;
}
// Header magic should be correct.
if (strncmp(reinterpret_cast<const char*>(ehdr.e_ident), ELFMAG, SELFMAG) != 0) {
return Error("not an ELF image");
}
// ==============================================================================================
// Load from the .eh_frame_hdr section.
// ==============================================================================================
eh_frame_hdr_ptr_ = 0;
pc_begin_ = -1;
pc_end_ = 0;
for (uint64_t i = 0; i < ehdr.e_phnum; i++) {
Elf64_Phdr phdr;
if (auto err = elf_->Read(elf_ptr_ + ehdr.e_phoff + ehdr.e_phentsize * i, phdr);
err.has_err()) {
return err;
}
if (phdr.p_type == PT_GNU_EH_FRAME) {
if (address_mode_ == Module::AddressMode::kProcess) {
eh_frame_hdr_ptr_ = elf_ptr_ + phdr.p_vaddr;
} else {
eh_frame_hdr_ptr_ = elf_ptr_ + phdr.p_offset;
}
} else if (phdr.p_type == PT_LOAD && phdr.p_flags & PF_X) {
pc_begin_ = std::min(pc_begin_, elf_ptr_ + phdr.p_vaddr);
pc_end_ = std::max(pc_end_, elf_ptr_ + phdr.p_vaddr + phdr.p_memsz);
}
}
if (!eh_frame_hdr_ptr_) {
return Error("no PT_GNU_EH_FRAME segment");
}
auto p = eh_frame_hdr_ptr_;
uint8_t version;
if (auto err = elf_->ReadAndAdvance(p, version); err.has_err()) {
return err;
}
if (version != 1) {
return Error("unknown eh_frame_hdr version %d", version);
}
uint8_t eh_frame_ptr_enc;
uint8_t fde_count_enc;
uint64_t eh_frame_ptr; // not used
if (auto err = elf_->ReadAndAdvance(p, eh_frame_ptr_enc); err.has_err()) {
return err;
}
if (auto err = elf_->ReadAndAdvance(p, fde_count_enc); err.has_err()) {
return err;
}
if (auto err = elf_->ReadAndAdvance(p, table_enc_); err.has_err()) {
return err;
}
if (auto err = DecodeTableEntrySize(table_enc_, table_entry_size_); err.has_err()) {
return err;
}
if (auto err = elf_->ReadEncodedAndAdvance(p, eh_frame_ptr, eh_frame_ptr_enc, eh_frame_hdr_ptr_);
err.has_err()) {
return err;
}
if (auto err = elf_->ReadEncodedAndAdvance(p, fde_count_, fde_count_enc, eh_frame_hdr_ptr_);
err.has_err()) {
return err;
}
table_ptr_ = p;
if (fde_count_ == 0) {
return Error("empty binary search table");
}
// ==============================================================================================
// Optionally load from the .debug_frame section. Any failure is not an error here.
// ==============================================================================================
debug_frame_ptr_ = 0;
debug_frame_end_ = 0;
// Section headers and .debug_frame section are not loaded.
if (address_mode_ == Module::AddressMode::kProcess) {
return Success();
}
// if ehdr.e_shstrndx is 0, it means there's no section info, i.e., the binary is stripped.
if (!ehdr.e_shstrndx) {
return Success();
}
uint64_t shstr_hdr_ptr =
elf_ptr_ + ehdr.e_shoff + static_cast<uint64_t>(ehdr.e_shentsize) * ehdr.e_shstrndx;
Elf64_Shdr shstr_hdr;
if (elf_->Read(shstr_hdr_ptr, shstr_hdr).has_err()) {
return Success();
}
for (uint64_t i = 0; i < ehdr.e_shnum; i++) {
Elf64_Shdr shdr;
if (elf_->Read(elf_ptr_ + ehdr.e_shoff + ehdr.e_shentsize * i, shdr).has_err()) {
continue;
}
static constexpr char target_section_name[] = ".debug_frame";
char section_name[sizeof(target_section_name)];
if (elf_->Read(elf_ptr_ + shstr_hdr.sh_offset + shdr.sh_name, section_name).has_err()) {
continue;
}
if (strncmp(section_name, target_section_name, sizeof(section_name)) == 0) {
debug_frame_ptr_ = elf_ptr_ + shdr.sh_offset;
debug_frame_end_ = debug_frame_ptr_ + shdr.sh_size;
}
}
return Success();
}
Error CfiModule::Step(Memory* stack, const Registers& current, Registers& next) {
uint64_t pc;
if (auto err = current.GetPC(pc); err.has_err()) {
return err;
}
if (!IsValidPC(pc)) {
return Error("pc %#" PRIx64 " is outside of the executable area", pc);
}
DwarfCie cie;
DwarfFde fde;
// Search for .eh_frame first.
if (auto err = SearchEhFrame(pc, cie, fde); err.has_err()) {
// Cannot find the correct FDE in .eh_frame, try to find in .debug_frame.
if (SearchDebugFrame(pc, cie, fde).has_err()) {
return err; // return the error from .eh_frame.
}
}
CfiParser cfi_parser(current.arch(), cie.code_alignment_factor, cie.data_alignment_factor);
// Parse instructions in CIE first.
if (auto err =
cfi_parser.ParseInstructions(elf_, cie.instructions_begin, cie.instructions_end, -1);
err.has_err()) {
return err;
}
cfi_parser.Snapshot();
// Parse instructions in FDE until pc.
if (auto err = cfi_parser.ParseInstructions(elf_, fde.instructions_begin, fde.instructions_end,
pc - fde.pc_begin);
err.has_err()) {
return err;
}
if (auto err = cfi_parser.Step(stack, cie.return_address_register, current, next);
err.has_err()) {
return err;
}
return Success();
}
Error CfiModule::SearchEhFrame(uint64_t pc, DwarfCie& cie, DwarfFde& fde) {
// Binary search for fde_ptr in the range [low, high).
uint64_t low = 0;
uint64_t high = fde_count_;
while (low + 1 < high) {
uint64_t mid = (low + high) / 2;
uint64_t addr = table_ptr_ + mid * table_entry_size_;
uint64_t mid_pc;
if (auto err = elf_->ReadEncoded(addr, mid_pc, table_enc_, eh_frame_hdr_ptr_); err.has_err()) {
return err;
}
if (pc < mid_pc) {
high = mid;
} else {
low = mid;
}
}
uint64_t addr = table_ptr_ + low * table_entry_size_ + table_entry_size_ / 2;
uint64_t fde_ptr;
if (auto err = elf_->ReadEncoded(addr, fde_ptr, table_enc_, eh_frame_hdr_ptr_); err.has_err()) {
return err;
}
if (auto err = DecodeFde(1, fde_ptr, cie, fde); err.has_err()) {
return err;
}
if (pc < fde.pc_begin || pc >= fde.pc_end) {
return Error("cannot find FDE for pc %#" PRIx64, pc);
}
return Success();
}
Error CfiModule::SearchDebugFrame(uint64_t pc, DwarfCie& cie, DwarfFde& fde) {
if (!debug_frame_ptr_) {
return Error("no .debug_frame section");
}
if (debug_frame_map_.empty()) {
if (auto err = BuildDebugFrameMap(); err.has_err()) {
return err;
}
}
auto debug_frame_map_it = debug_frame_map_.upper_bound(pc);
if (debug_frame_map_it == debug_frame_map_.begin()) {
return Error("cannot find FDE for pc %#" PRIx64 " in .debug_frame", pc);
}
debug_frame_map_it--;
uint64_t fde_ptr = debug_frame_map_it->second;
if (auto err = DecodeFde(4, fde_ptr, cie, fde); err.has_err()) {
return err;
}
if (pc < fde.pc_begin || pc >= fde.pc_end) {
return Error("cannot find FDE for pc %#" PRIx64 " in .debug_frame", pc);
}
return Success();
}
// In order to read less memory, this function assumes the address encoding of all CIEs is the same,
// so that it only needs to decode the first CIE.
Error CfiModule::BuildDebugFrameMap() {
debug_frame_map_.clear();
uint8_t fde_address_encoding = 0;
for (uint64_t p = debug_frame_ptr_, next_p; p < debug_frame_end_; p = next_p) {
uint64_t hdr_p = p;
uint64_t cie_id;
if (auto err = DecodeCieFdeHdrAndAdvance(elf_, 4, p, next_p, cie_id); err.has_err()) {
return err;
}
if (cie_id == kDwarf64CieId) {
if (fde_address_encoding) {
// Assume address encoding is the same for all CIEs. Skip all other CIEs to accelerate.
continue;
}
DwarfCie cie;
// This will only be called once, so it's fine that |DecodeCie| decodes the header again.
if (auto err = DecodeCie(4, hdr_p, cie); err.has_err()) {
return err;
}
fde_address_encoding = cie.fde_address_encoding;
} else { // is FDE
// We only need pc_begin, so don't go through |DecodeFde|.
uint64_t pc_begin;
if (auto err = elf_->ReadEncoded(p, pc_begin, fde_address_encoding, elf_ptr_);
err.has_err()) {
return err;
}
debug_frame_map_.emplace(pc_begin, hdr_p);
}
}
if (debug_frame_map_.empty()) {
return Error("empty .debug_frame");
}
return Success();
}
// When version == 1, check the spec at
// https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
// When version == 4, check the spec at http://www.dwarfstd.org/doc/DWARF5.pdf
Error CfiModule::DecodeCie(uint8_t version, uint64_t cie_ptr, DwarfCie& cie) {
uint64_t cie_id;
if (auto err = DecodeCieFdeHdrAndAdvance(elf_, version, cie_ptr, cie.instructions_end, cie_id);
err.has_err()) {
return err;
}
if ((version == 1 && cie_id != 0) || (version == 4 && cie_id != kDwarf64CieId)) {
return Error("not a valid CIE");
}
// Versions should match.
uint8_t this_version;
if (auto err = elf_->ReadAndAdvance(cie_ptr, this_version); err.has_err()) {
return err;
}
if (this_version != version) {
return Error("unexpected CIE version: %d", this_version);
}
std::string augmentation_string;
while (true) {
char ch;
if (auto err = elf_->ReadAndAdvance(cie_ptr, ch); err.has_err()) {
return err;
}
if (ch) {
augmentation_string.push_back(ch);
} else {
break;
}
}
if (version == 4) {
// Read the address_size.
uint8_t address_size;
if (auto err = elf_->ReadAndAdvance(cie_ptr, address_size); err.has_err()) {
return err;
}
// Set fde_address_encoding to DW_EH_PE_datarel so that we can set the base to elf_ptr_.
switch (address_size) {
case 2:
cie.fde_address_encoding = 0x32;
break;
case 4:
cie.fde_address_encoding = 0x33;
break;
case 8:
cie.fde_address_encoding = 0x34;
break;
default:
return Error("unsupported CIE address_size: %d", address_size);
}
// Skip the segment_selector_size.
cie_ptr++;
}
if (auto err = elf_->ReadULEB128AndAdvance(cie_ptr, cie.code_alignment_factor); err.has_err()) {
return err;
}
if (auto err = elf_->ReadSLEB128AndAdvance(cie_ptr, cie.data_alignment_factor); err.has_err()) {
return err;
}
if (version == 4) {
uint64_t return_address_register;
if (auto err = elf_->ReadULEB128AndAdvance(cie_ptr, return_address_register); err.has_err()) {
return err;
}
cie.return_address_register = static_cast<RegisterID>(return_address_register);
} else {
if (auto err = elf_->ReadAndAdvance(cie_ptr, cie.return_address_register); err.has_err()) {
return err;
}
}
if (augmentation_string.empty()) {
cie.instructions_begin = cie_ptr;
cie.fde_have_augmentation_data = false;
} else {
// DWARF standard doesn't say anything about the possibility of the augmentation string and
// we have never seen a use case of augmentation string in .debug_frame, which is understandable
// because the augmentation string is mainly useful for unwinding during an exception.
// For now, we don't support it.
if (version == 4) {
return Error("unsupported augmentation string in .debug_frame: %s",
augmentation_string.c_str());
}
if (augmentation_string[0] != 'z') {
return Error("invalid augmentation string: %s", augmentation_string.c_str());
}
uint64_t augmentation_length;
if (auto err = elf_->ReadULEB128AndAdvance(cie_ptr, augmentation_length); err.has_err()) {
return err;
}
cie.instructions_begin = cie_ptr + augmentation_length;
cie.fde_have_augmentation_data = true;
for (char ch : augmentation_string) {
switch (ch) {
case 'L':
// LSDA (language-specific data area) is used by some languages such as C++ to ensure
// the correct destruction of objects on stack. We don't need to handle it.
uint8_t lsda_encoding;
if (auto err = elf_->ReadAndAdvance(cie_ptr, lsda_encoding); err.has_err()) {
return err;
}
break;
case 'P':
// The personality routine is used to handle language and vendor-specific tasks to ensure
// the correct unwinding. We don't need to handle it.
uint8_t enc;
if (auto err = elf_->ReadAndAdvance(cie_ptr, enc); err.has_err()) {
return err;
}
uint64_t personality;
if (auto err = elf_->ReadEncodedAndAdvance(cie_ptr, personality, enc, 0); err.has_err()) {
return err;
}
break;
case 'R':
if (auto err = elf_->ReadAndAdvance(cie_ptr, cie.fde_address_encoding); err.has_err()) {
return err;
}
break;
}
}
}
return Success();
}
Error CfiModule::DecodeFde(uint8_t version, uint64_t fde_ptr, DwarfCie& cie, DwarfFde& fde) {
uint64_t cie_offset;
if (auto err =
DecodeCieFdeHdrAndAdvance(elf_, version, fde_ptr, fde.instructions_end, cie_offset);
err.has_err()) {
return err;
}
uint64_t cie_ptr;
if (version == 4) {
cie_ptr = debug_frame_ptr_ + cie_offset;
} else {
cie_ptr = fde_ptr - 4 - cie_offset;
}
if (auto err = DecodeCie(version, cie_ptr, cie); err.has_err()) {
return err;
}
if (auto err =
elf_->ReadEncodedAndAdvance(fde_ptr, fde.pc_begin, cie.fde_address_encoding, elf_ptr_);
err.has_err()) {
return err;
}
if (auto err = elf_->ReadEncodedAndAdvance(fde_ptr, fde.pc_end, cie.fde_address_encoding & 0x0F);
err.has_err()) {
return err;
}
fde.pc_end += fde.pc_begin;
if (cie.fde_have_augmentation_data) {
uint64_t augmentation_length;
if (auto err = elf_->ReadULEB128AndAdvance(fde_ptr, augmentation_length); err.has_err()) {
return err;
}
// We don't really care about the augmentation data.
fde_ptr += augmentation_length;
}
fde.instructions_begin = fde_ptr;
return Success();
}
} // namespace unwinder