| // Copyright 2024 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_DWARF_ENCODING_H_ |
| #define SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_DWARF_ENCODING_H_ |
| |
| #include <lib/stdcompat/span.h> |
| |
| #include <cstdint> |
| #include <optional> |
| |
| #include "../layout.h" |
| |
| namespace elfldltl::dwarf { |
| |
| // ULEB128 is a byte-granularity "bignum"-style encoding used in DWARF. |
| // Encodings use as few bytes as needed to represent the value, 7 bits of |
| // value in each byte of encoding: up to 5 bytes for up to 32 bits, up to |
| // 10 bytes for up to 64 bits. |
| struct Uleb128 { |
| static constexpr size_t kMaxBytes = 10; |
| |
| // Read one ULEB128 value from the byte buffer. Returns std::nullopt if the |
| // buffer is too short or if the encoding uses more bytes than should be |
| // necessary for a 64-bit value. |
| static std::optional<Uleb128> Read(cpp20::span<const std::byte> bytes); |
| |
| // This is the value, zero-extended to uint64_t. |
| uint64_t value = 0; |
| |
| // This is the number of bytes in the encoding: how many bytes were |
| // consumed by the Read call that created this Uleb128 object. |
| size_t size_bytes = 0; |
| }; |
| |
| // SLEB128 is the same encoding as ULEB128, but the value is understood to |
| // be sign-extended from the highest bit present in the encoded value. |
| struct Sleb128 { |
| static constexpr size_t kMaxBytes = 10; |
| |
| // Read one SLEB128 value from the byte buffer. Returns std::nullopt if the |
| // buffer is too short or if the encoding uses more bytes than should be |
| // necessary for a 64-bit value. |
| static std::optional<Sleb128> Read(cpp20::span<const std::byte> bytes); |
| |
| // This is the value, sign-extended to int64_t. |
| int64_t value = 0; |
| |
| // This is the number of bytes in the encoding: how many bytes were |
| // consumed by the Read call that created this Sleb128 object. |
| size_t size_bytes = 0; |
| }; |
| |
| // This is the encoding byte used in the DW_OP_GNU_encoded_addr extension, |
| // in GNU ..eh_frame_hdr format, and in GNU de facto standard augmentation |
| // for .debug_frame formats. This is a struct with non-scoped enums rather |
| // than using `enum class`, so that the names are scoped to the struct type |
| // but the values are convertible to uint8_t and implicitly usable with |
| // bitwise operations. |
| // |
| // The default-constructed EncodedPtr object represents an omitted value. |
| // This gives an integer value of zero, but takes no space to encode. |
| struct EncodedPtr { |
| // These are the primary values that indicate basic integer encoding. |
| enum PtrType : uint8_t { |
| kOmit = 0xff, // No value present. |
| |
| kPtr = 0x00, // Address size, unsigned. |
| kUleb128 = 0x01, // ULEB128 encoded (unsigned), variable length. |
| kUdata2 = 0x02, // 16 bits unsigned. |
| kUdata4 = 0x03, // 32 bits unsigned. |
| kUdata8 = 0x04, // 64 bits unsigned. |
| |
| // This is actually a flag bit, combined with one of the unsigned |
| // encodings above to yield their signed counterparts below. |
| kSigned = 0x08, |
| |
| kSleb128 = 0x09, // SLEB128 encoded (signed), variable length. |
| kSdata2 = 0x0a, // 16 bits signed. |
| kSdata4 = 0x0b, // 32 bits signed. |
| kSdata8 = 0x0c, // 64 bits signed. |
| }; |
| |
| // One of these can be OR'd in with one of the basic encodings above. Note |
| // that the relative encodings implicitly refer to different base addresses |
| // in different contexts, e.g. kDatarel inside .eh_frame_hdr is relative to |
| // the beginning of .eh_frame_hdr itself. |
| enum PtrModifier : uint8_t { |
| kAbs = 0x00, // Value is absolute. |
| kPcrel = 0x10, // Value is relative to its own location. |
| kTextrel = 0x20, // Value is relative to "text" segment (contextual). |
| kDatarel = 0x30, // Value is relative to "data" segment (contextual). |
| kFuncrel = 0x40, // Value is relative to function. |
| kAligned = 0x50, // Encoded value starts at naturally aligned location. |
| }; |
| |
| // This can be separately OR'd to indicate that the encoded address is |
| // actually the location of the value as for Encoding::kAbsptr. |
| static constexpr uint8_t kIndirect = 0x80; |
| |
| // This yields just the basic encoding, regardless of indirection or |
| // adjustments. This is all that's needed to determine the encoded size. |
| static constexpr PtrType Type(uint8_t encoding) { |
| return encoding == kOmit ? kOmit : static_cast<PtrType>(encoding & 0x0f); |
| } |
| |
| // This yields just the modifier for a relative address. After the basic |
| // value is decoded according to Type(encoding), this is what adjustment must |
| // be done to the value. |
| static constexpr PtrModifier Modifier(uint8_t encoding) { |
| return encoding == kOmit ? kAbs : static_cast<PtrModifier>(encoding & 0x70); |
| } |
| |
| // This indicates that the value is actually stored elsewhere in memory. The |
| // Type(encoding) still indicates the type of that stored pointer, as well as |
| // the basic type of the encoding used to locate it. After applying the |
| // Modifier(encoding) adjustments to the encoded pointer, that pointer must |
| // be dereferenced to fetch the desired value. |
| static constexpr bool Indirect(uint8_t encoding) { |
| return encoding != kOmit && (encoding & kIndirect); |
| } |
| |
| // This indicates if the encoded value is signed, so it should be |
| // sign-extended from narrower encoding to a wider integer type. |
| static constexpr bool Signed(uint8_t encoding) { |
| return encoding != kOmit && (encoding & kSigned); |
| } |
| |
| // EncodedSize returns this value for the LEB128 types, which have a |
| // variable-sized encoding. The exact size can only be determined by |
| // actually decoding the value. |
| static constexpr uint8_t kDynamicSize = -1; |
| |
| // This returns the encoded size, which may depend on the contextual address |
| // size. It returns kDynamicSize for LEB128 types whose exact size cannot be |
| // known without the actual data. |
| static constexpr uint8_t EncodedSize(uint8_t encoding, uint8_t address_size) { |
| switch (Type(encoding)) { |
| case kPtr: |
| case kSigned: |
| return address_size; |
| case kOmit: |
| return 0; |
| case kUdata2: |
| case kSdata2: |
| return 2; |
| case kUdata4: |
| case kSdata4: |
| return 4; |
| case kUdata8: |
| case kSdata8: |
| return 8; |
| case kUleb128: |
| case kSleb128: |
| break; |
| } |
| return kDynamicSize; |
| } |
| |
| // This normalizes the encoding so that it's unambiguous with respect to |
| // address size. After normalization, an encoding can be used directly |
| // without keeping track of the address size that's indicated by, or implicit |
| // in, the context it came from. |
| template <class Elf = Elf<>> |
| static constexpr uint8_t Normalize(uint8_t encoding, |
| uint8_t address_size = sizeof(typename Elf::Addr)) { |
| if ((encoding & 0x7) == 0) { |
| encoding |= 3 + (address_size >> 3); |
| } |
| return encoding; |
| } |
| |
| // Read an encoded value via the Memory object. Both the vaddr argument and |
| // the encoded addresses (in case of indirection) are in whatever address |
| // space the Memory object provides. To support the indirection case |
| // properly, don't adjust the vaddr argument for use with a generic Memory |
| // object. Instead use a Memory object that takes the unadjusted address and |
| // implicitly applies the runtime load bias for the module containing the |
| // DWARF metadata being read; this ensures that a possible second call to the |
| // Memory object will correctly handle an address read from the metadata |
| // rather than the given vaddr argument. When reading variable-sized |
| // (LEB128) data, the single-argument ReadArray method of the Memory object |
| // is expected to return at least as much data as the value encoding requires |
| // in the single call. Returns std::nullopt if the Memory object fails. |
| // Otherwise the value is extended to 64 bits. In the case of a signed |
| // encoding, bit_cast<int64_t> should be used on the value. |
| template <class Elf = Elf<>, class Memory> |
| static constexpr std::optional<uint64_t> FromMemory( // |
| uint8_t encoding, Memory& memory, typename Elf::size_type vaddr, |
| uint8_t address_size = sizeof(typename Elf::Addr)) { |
| uint8_t size = EncodedSize(encoding, address_size); |
| if (size == 0) { |
| return 0; |
| } |
| |
| std::optional<EncodedPtr> encoded; |
| if (auto read = size == kDynamicSize // |
| ? memory.template ReadArray<std::byte>(vaddr, size) |
| : memory.template ReadArray<std::byte>(vaddr)) { |
| uint8_t read_encoding = encoding; |
| if (Indirect(encoding) && Modifier(encoding) == kPcrel) { |
| // Always sign-extend a relative value. |
| read_encoding |= kSigned; |
| } |
| encoded = Read<Elf>(read_encoding, *read, address_size); |
| } |
| if (!encoded) { |
| return std::nullopt; |
| } |
| switch (Modifier(encoding)) { |
| case kAbs: |
| break; |
| case kPcrel: |
| encoded->ptr = vaddr + encoded->sptr; |
| break; |
| default: |
| return std::nullopt; |
| } |
| if (Indirect(encoding)) { |
| if (auto read = memory.template ReadArray<typename Elf::Addr>(encoded->ptr, 1)) { |
| return read->front(); |
| } |
| return std::nullopt; |
| } |
| return encoded->ptr; |
| } |
| |
| // Read an encoded value from the byte buffer. This returns an |
| // EncodedPtr object rather than the resolved value. The caller is |
| // responsible for applying modifiers and indirection to the value. |
| template <class Elf = Elf<>> |
| static constexpr std::optional<EncodedPtr> Read( |
| uint8_t encoding, cpp20::span<const std::byte> bytes, |
| uint8_t address_size = sizeof(typename Elf::Addr)) { |
| if (Type(encoding) == kSleb128) { |
| if (auto leb = Sleb128::Read(bytes)) { |
| return EncodedPtr{ |
| .sptr = leb->value, |
| .encoding = encoding, |
| .encoded_size = static_cast<uint8_t>(leb->size_bytes), |
| }; |
| return std::nullopt; |
| } |
| } |
| if (Type(encoding) == kUleb128) { |
| if (auto leb = Uleb128::Read(bytes)) { |
| return EncodedPtr{ |
| .ptr = leb->value, |
| .encoding = encoding, |
| .encoded_size = static_cast<uint8_t>(leb->size_bytes), |
| }; |
| return std::nullopt; |
| } |
| } |
| |
| const uint8_t encoded_size = EncodedSize(encoding, address_size); |
| if (encoded_size == 0) { |
| return EncodedPtr{}; |
| } |
| |
| assert(encoded_size != kDynamicSize); // LEB128 was caught above. |
| if (encoded_size > bytes.size_bytes()) [[unlikely]] { |
| return std::nullopt; |
| } |
| |
| auto decode = [encoding, bytes](auto unsigned_value) -> EncodedPtr { |
| if (Signed(encoding)) { |
| typename decltype(unsigned_value)::Signed value; |
| memcpy(&value, bytes.data(), sizeof(value)); |
| return { |
| .sptr = static_cast<int64_t>(value), |
| .encoding = encoding, |
| .encoded_size = sizeof(value), |
| }; |
| } |
| memcpy(&unsigned_value, bytes.data(), sizeof(unsigned_value)); |
| return { |
| .ptr = unsigned_value, |
| .encoding = encoding, |
| .encoded_size = sizeof(unsigned_value), |
| }; |
| }; |
| |
| switch (encoded_size) { |
| case 2: |
| return decode(typename Elf::Half{}); |
| case 4: |
| return decode(typename Elf::Word{}); |
| case 8: |
| return decode(typename Elf::Xword{}); |
| } |
| |
| return std::nullopt; |
| } |
| |
| // The value is either signed or unsigned, as indicated by the encoding. |
| // Narrower signed values have been sign-extended to int64_t. This is |
| // only the final value for encodings with no modifiers or indirection. |
| union { |
| uint64_t ptr = 0; |
| int64_t sptr; |
| }; |
| |
| // This records the original encoding, including modifiers and |
| // indirection. The .ptr or .sptr value must be adjusted according to |
| // any relative modifier (usually "PC-relative", meaning relative to its |
| // own encoding location). If indirection is indicated, the resulting |
| // pointer must be used to fetch the actual value (of the same size). |
| uint8_t encoding = kOmit; |
| |
| // This gives the total size of the encoding: how many bytes were |
| // consumed by the Read call that created this EncodedPtr. |
| uint8_t encoded_size = 0; |
| }; |
| |
| } // namespace elfldltl::dwarf |
| |
| #endif // SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_DWARF_ENCODING_H_ |