src/lib/elfldltl/include/lib/elfldltl/dwarf/section-data.h - fuchsia - Git at Google

 // Copyright 2024 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_DWARF_SECTION_DATA_H_
 #define SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_DWARF_SECTION_DATA_H_

 #include <lib/stdcompat/span.h>

 #include <cstdint>
 #include <optional>
 #include <utility>

 #include "../layout.h"

 namespace elfldltl::dwarf {

 // DWARF is generally independent of ELF.  However, decoding it requires
 // knowing its byte order; and in a few places, knowing the environment's idea
 // of address size (although in most cases DWARF self-identifies its own idea
 // of the address size).  These are exactly the two things represented by the
 // elfldltl::Elf<...> template types, so some template APIs here use an Elf
 // class template parameter to indicate those aspects.  When DWARF is encoded
 // in ELF files, the Elf parameter matches what DWARF decoding needs to know.

 // Many kinds of DWARF data come in partly self-identifying units.  An object
 // file section holding a certain kind of DWARF data often has an arbitrary
 // sequence of data units of a particular kind (each kind in its own section),
 // collected by the linker.  Across all the kinds of section data, the formats
 // for data units have one thing in common: the initial length field.  This
 // small header (usually 4 bytes, theoretically sometimes 12) indicates both
 // the total length of the data unit and the size of offsets used in the DWARF
 // format within that unit: 32-bit DWARF offsets, or 64-bit DWARF offsets.
 // Note that this 32-bit / 64-bit distinction is unrelated to machine register
 // size, address size, pointer ABI, etc.  The "32-bit DWARF" and "64-bit DWARF"
 // formats refer purely to what size is used for DWARF "offset" values, which
 // are usually offsets from the beginning of a whole section (so a single DWARF
 // section has to exceed 4GiB to warrant using the 64-bit format in units that
 // hold offsets into that section).

 // This value for the initial length of a data unit indicates that it uses
 // the 64-bit DWARF format, and the 64-bit length follows.
 static constexpr uint32_t kDwarf64Length = 0xffffffff;

 // Other values starting here are reserved, meanings not yet specified.  If
 // these values are encountered, we don't know how to determine the length of
 // the data unit.
 static constexpr uint32_t kDwarf32Limit = 0xfffffff0;

 // elfldltl::dwarf::SectionData represents a DWARF data unit in memory.  It
 // identifies the DWARF format (offset size) used inside the unit, and
 // holds a std::span<const std::byte> of the unit's contents.  SectionData
 // objects are small and trivially copyable like std::span.
 class SectionData {
  public:
   // This indicates the DWARF format used in a single data unit.  This
   // implies the offset size used in its data, as well as the form and size
   // of its initial length header.
   enum class Format : uint8_t {
     kDwarf32,  // 32-bit DWARF offsets.
     kDwarf64,  // 64-bit DWARF offsets.
   };

   constexpr SectionData() = default;

   constexpr SectionData(const SectionData&) = default;

   constexpr SectionData(cpp20::span<const std::byte> contents, Format format)
       : contents_{contents}, format_{format} {}

   constexpr SectionData& operator=(const SectionData&) = default;

   // The total size of this data unit, including its initial length header.
   // This is the amount of the data that is consumed by Read (see below).
   constexpr size_t size_bytes() const { return initial_length_size() + contents_.size_bytes(); }

   // The contents of this data unit, not including its initial length header.
   constexpr cpp20::span<const std::byte> contents() const { return contents_; }

   // The format of this data unit.
   constexpr Format format() const { return format_; }

   // The size of this data unit's initial length header.
   constexpr uint8_t initial_length_size() const { return InitialLengthSize(format_); }

   // The size of any data unit's initial length header given the format.
   static constexpr uint8_t InitialLengthSize(Format format) {
     return sizeof(uint32_t) + (format == Format::kDwarf64 ? sizeof(uint64_t) : 0);
   }

   // The size in bytes of DWARF offsets (4 or 8) used in this data unit.
   constexpr uint8_t offset_size() const { return OffsetSize(format_); }

   // The size of DWARF offsets used in any data unit given the format.
   static constexpr uint8_t OffsetSize(Format format) {
     return format == Format::kDwarf64 ? sizeof(uint64_t) : sizeof(uint32_t);
   }

   // Read an offset in this data unit's format from the byte buffer.
   // Returns std::nullopt if the buffer is too small.  The Elf template
   // parameter indicates the byte order used in the data unit.
   template <class Elf = Elf<>>
   constexpr std::optional<uint64_t> read_offset(size_t pos = 0) {
     return ReadOffset<Elf>(format_, contents_.subspan(pos));
   }

   // Read an offset in the indicated DWARF format from the byte buffer.
   // A 32-bit offset is zero-extended to uint64_t.
   template <class Elf = Elf<>>
   static constexpr std::optional<uint64_t> ReadOffset(  //
       Format format, cpp20::span<const std::byte> bytes) {
     switch (format) {
       case Format::kDwarf32:
         return ReadFromBytes<typename Elf::Word>(bytes);
       case Format::kDwarf64:
         return ReadFromBytes<typename Elf::Xword>(bytes);
     }
     __builtin_trap();  // Should be unreachable.
   }

   // This identifies the size and format of a data unit in the byte stream.
   // If there is a fundamental format error, it's reported via the
   // Diagnostics object with the error_args appended after the generic
   // message and std::nullopt is returned.  Otherwise, size_bytes()
   // indicates how much of the byte stream was consumed by this unit, and
   // contents() holds the data unit bytes after the initial length: where
   // the header for the particular kind of data starts.
   template <class Elf = Elf<>, class Diagnostics, typename... ErrorArgs>
   static constexpr std::optional<SectionData> Read(  //
       Diagnostics& diag, cpp20::span<const std::byte> bytes, ErrorArgs&&... error_args);

   // This combines Read with peeling off the size_bytes() consumed from the
   // byte stream, returning the new tail:
   // ```
   // auto [data, bytes] = SectionData::Consume(diag, bytes);
   // if (data) {
   //   Decode(data->contents);
   // }
   // ```
   // On failure (when `data == std::nullopt`), `bytes` is returned unchanged.
   template <class Elf = Elf<>, class Diagnostics, typename... ErrorArgs>
   static constexpr std::pair<std::optional<SectionData>, cpp20::span<const std::byte>> Consume(
       Diagnostics& diag, cpp20::span<const std::byte> bytes, ErrorArgs&&... error_args) {
     auto read = Read<Elf>(diag, bytes, std::forward<ErrorArgs>(error_args)...);
     return {read, read ? bytes.subspan(read->size_bytes()) : bytes};
   }

  private:
   template <typename T>
   static constexpr std::optional<T> ReadFromBytes(cpp20::span<const std::byte> bytes) {
     if (bytes.size_bytes() < sizeof(T)) [[unlikely]] {
       return std::nullopt;
     }
     T value;
     memcpy(&value, bytes.data(), sizeof(value));
     return value;
   }

   cpp20::span<const std::byte> contents_;
   Format format_ = Format::kDwarf32;
 };

 template <class Elf, class Diagnostics, typename... ErrorArgs>
 constexpr std::optional<SectionData> SectionData::Read(  //
     Diagnostics& diag, cpp20::span<const std::byte> bytes, ErrorArgs&&... error_args) {
   using Word = typename Elf::Word;
   using Xword = typename Elf::Xword;

   const size_t input_size = bytes.size_bytes();
   auto consume = [&](auto& value) -> bool {
     if (bytes.size_bytes() < sizeof(value)) [[unlikely]] {
       diag.FormatError("data size ", input_size, " too small for DWARF header",
                        std::forward<ErrorArgs>(error_args)...);
       return false;
     }
     memcpy(&value, bytes.data(), sizeof(value));
     bytes = bytes.subspan(sizeof(value));
     return true;
   };

   Word initial_length;
   if (!consume(initial_length)) [[unlikely]] {
     return std::nullopt;
   }

   Format format = Format::kDwarf32;
   size_t data_size = initial_length;
   if (initial_length >= kDwarf32Limit) {
     if (initial_length != kDwarf64Length) [[unlikely]] {
       diag.FormatError("Reserved initial-length value ", initial_length, " used in DWARF header",
                        std::forward<ErrorArgs>(error_args)...);
       return std::nullopt;
     }
     Xword size64;
     if (!consume(size64)) [[unlikely]] {
       return std::nullopt;
     }
     format = Format::kDwarf64;
     data_size = static_cast<size_t>(size64);
     if (data_size != size64) [[unlikely]] {
       diag.FormatError("64-bit initial length ", size64, " in DWARF header too big",
                        std::forward<ErrorArgs>(error_args)...);
       return std::nullopt;
     }
   }

   if (bytes.size_bytes() < data_size) [[unlikely]] {
     diag.FormatError("data size ", input_size, " < ", InitialLengthSize(format) + data_size,
                      " required by DWARF header", std::forward<ErrorArgs>(error_args)...);
     return std::nullopt;
   }

   return SectionData{bytes.first(data_size), format};
 }

 }  // namespace elfldltl::dwarf

 #endif  // SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_DWARF_SECTION_DATA_H_
	// Copyright 2024 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_DWARF_SECTION_DATA_H_
	#define SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_DWARF_SECTION_DATA_H_

	#include <lib/stdcompat/span.h>

	#include <cstdint>
	#include <optional>
	#include <utility>

	#include "../layout.h"

	namespace elfldltl::dwarf {

	// DWARF is generally independent of ELF. However, decoding it requires
	// knowing its byte order; and in a few places, knowing the environment's idea
	// of address size (although in most cases DWARF self-identifies its own idea
	// of the address size). These are exactly the two things represented by the
	// elfldltl::Elf<...> template types, so some template APIs here use an Elf
	// class template parameter to indicate those aspects. When DWARF is encoded
	// in ELF files, the Elf parameter matches what DWARF decoding needs to know.

	// Many kinds of DWARF data come in partly self-identifying units. An object
	// file section holding a certain kind of DWARF data often has an arbitrary
	// sequence of data units of a particular kind (each kind in its own section),
	// collected by the linker. Across all the kinds of section data, the formats
	// for data units have one thing in common: the initial length field. This
	// small header (usually 4 bytes, theoretically sometimes 12) indicates both
	// the total length of the data unit and the size of offsets used in the DWARF
	// format within that unit: 32-bit DWARF offsets, or 64-bit DWARF offsets.
	// Note that this 32-bit / 64-bit distinction is unrelated to machine register
	// size, address size, pointer ABI, etc. The "32-bit DWARF" and "64-bit DWARF"
	// formats refer purely to what size is used for DWARF "offset" values, which
	// are usually offsets from the beginning of a whole section (so a single DWARF
	// section has to exceed 4GiB to warrant using the 64-bit format in units that
	// hold offsets into that section).

	// This value for the initial length of a data unit indicates that it uses
	// the 64-bit DWARF format, and the 64-bit length follows.
	static constexpr uint32_t kDwarf64Length = 0xffffffff;

	// Other values starting here are reserved, meanings not yet specified. If
	// these values are encountered, we don't know how to determine the length of
	// the data unit.
	static constexpr uint32_t kDwarf32Limit = 0xfffffff0;

	// elfldltl::dwarf::SectionData represents a DWARF data unit in memory. It
	// identifies the DWARF format (offset size) used inside the unit, and
	// holds a std::span<const std::byte> of the unit's contents. SectionData
	// objects are small and trivially copyable like std::span.
	class SectionData {
	public:
	// This indicates the DWARF format used in a single data unit. This
	// implies the offset size used in its data, as well as the form and size
	// of its initial length header.
	enum class Format : uint8_t {
	kDwarf32, // 32-bit DWARF offsets.
	kDwarf64, // 64-bit DWARF offsets.
	};

	constexpr SectionData() = default;

	constexpr SectionData(const SectionData&) = default;

	constexpr SectionData(cpp20::span<const std::byte> contents, Format format)
	: contents_{contents}, format_{format} {}

	constexpr SectionData& operator=(const SectionData&) = default;

	// The total size of this data unit, including its initial length header.
	// This is the amount of the data that is consumed by Read (see below).
	constexpr size_t size_bytes() const { return initial_length_size() + contents_.size_bytes(); }

	// The contents of this data unit, not including its initial length header.
	constexpr cpp20::span<const std::byte> contents() const { return contents_; }

	// The format of this data unit.
	constexpr Format format() const { return format_; }

	// The size of this data unit's initial length header.
	constexpr uint8_t initial_length_size() const { return InitialLengthSize(format_); }

	// The size of any data unit's initial length header given the format.
	static constexpr uint8_t InitialLengthSize(Format format) {
	return sizeof(uint32_t) + (format == Format::kDwarf64 ? sizeof(uint64_t) : 0);
	}

	// The size in bytes of DWARF offsets (4 or 8) used in this data unit.
	constexpr uint8_t offset_size() const { return OffsetSize(format_); }

	// The size of DWARF offsets used in any data unit given the format.
	static constexpr uint8_t OffsetSize(Format format) {
	return format == Format::kDwarf64 ? sizeof(uint64_t) : sizeof(uint32_t);
	}

	// Read an offset in this data unit's format from the byte buffer.
	// Returns std::nullopt if the buffer is too small. The Elf template
	// parameter indicates the byte order used in the data unit.
	template <class Elf = Elf<>>
	constexpr std::optional<uint64_t> read_offset(size_t pos = 0) {
	return ReadOffset<Elf>(format_, contents_.subspan(pos));
	}

	// Read an offset in the indicated DWARF format from the byte buffer.
	// A 32-bit offset is zero-extended to uint64_t.
	template <class Elf = Elf<>>
	static constexpr std::optional<uint64_t> ReadOffset( //
	Format format, cpp20::span<const std::byte> bytes) {
	switch (format) {
	case Format::kDwarf32:
	return ReadFromBytes<typename Elf::Word>(bytes);
	case Format::kDwarf64:
	return ReadFromBytes<typename Elf::Xword>(bytes);
	}
	__builtin_trap(); // Should be unreachable.
	}

	// This identifies the size and format of a data unit in the byte stream.
	// If there is a fundamental format error, it's reported via the
	// Diagnostics object with the error_args appended after the generic
	// message and std::nullopt is returned. Otherwise, size_bytes()
	// indicates how much of the byte stream was consumed by this unit, and
	// contents() holds the data unit bytes after the initial length: where
	// the header for the particular kind of data starts.
	template <class Elf = Elf<>, class Diagnostics, typename... ErrorArgs>
	static constexpr std::optional<SectionData> Read( //
	Diagnostics& diag, cpp20::span<const std::byte> bytes, ErrorArgs&&... error_args);

	// This combines Read with peeling off the size_bytes() consumed from the
	// byte stream, returning the new tail:
	// ```
	// auto [data, bytes] = SectionData::Consume(diag, bytes);
	// if (data) {
	// Decode(data->contents);
	// }
	// ```
	// On failure (when `data == std::nullopt`), `bytes` is returned unchanged.
	template <class Elf = Elf<>, class Diagnostics, typename... ErrorArgs>
	static constexpr std::pair<std::optional<SectionData>, cpp20::span<const std::byte>> Consume(
	Diagnostics& diag, cpp20::span<const std::byte> bytes, ErrorArgs&&... error_args) {
	auto read = Read<Elf>(diag, bytes, std::forward<ErrorArgs>(error_args)...);
	return {read, read ? bytes.subspan(read->size_bytes()) : bytes};
	}

	private:
	template <typename T>
	static constexpr std::optional<T> ReadFromBytes(cpp20::span<const std::byte> bytes) {
	if (bytes.size_bytes() < sizeof(T)) [[unlikely]] {
	return std::nullopt;
	}
	T value;
	memcpy(&value, bytes.data(), sizeof(value));
	return value;
	}

	cpp20::span<const std::byte> contents_;
	Format format_ = Format::kDwarf32;
	};

	template <class Elf, class Diagnostics, typename... ErrorArgs>
	constexpr std::optional<SectionData> SectionData::Read( //
	Diagnostics& diag, cpp20::span<const std::byte> bytes, ErrorArgs&&... error_args) {
	using Word = typename Elf::Word;
	using Xword = typename Elf::Xword;

	const size_t input_size = bytes.size_bytes();
	auto consume = [&](auto& value) -> bool {
	if (bytes.size_bytes() < sizeof(value)) [[unlikely]] {
	diag.FormatError("data size ", input_size, " too small for DWARF header",
	std::forward<ErrorArgs>(error_args)...);
	return false;
	}
	memcpy(&value, bytes.data(), sizeof(value));
	bytes = bytes.subspan(sizeof(value));
	return true;
	};

	Word initial_length;
	if (!consume(initial_length)) [[unlikely]] {
	return std::nullopt;
	}

	Format format = Format::kDwarf32;
	size_t data_size = initial_length;
	if (initial_length >= kDwarf32Limit) {
	if (initial_length != kDwarf64Length) [[unlikely]] {
	diag.FormatError("Reserved initial-length value ", initial_length, " used in DWARF header",
	std::forward<ErrorArgs>(error_args)...);
	return std::nullopt;
	}
	Xword size64;
	if (!consume(size64)) [[unlikely]] {
	return std::nullopt;
	}
	format = Format::kDwarf64;
	data_size = static_cast<size_t>(size64);
	if (data_size != size64) [[unlikely]] {
	diag.FormatError("64-bit initial length ", size64, " in DWARF header too big",
	std::forward<ErrorArgs>(error_args)...);
	return std::nullopt;
	}
	}

	if (bytes.size_bytes() < data_size) [[unlikely]] {
	diag.FormatError("data size ", input_size, " < ", InitialLengthSize(format) + data_size,
	" required by DWARF header", std::forward<ErrorArgs>(error_args)...);
	return std::nullopt;
	}

	return SectionData{bytes.first(data_size), format};
	}

	} // namespace elfldltl::dwarf

	#endif // SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_DWARF_SECTION_DATA_H_