blob: 1ff96906056d2fe93434ce63e3bf698340959bde [file] [log] [blame]
// Copyright 2021 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_SYMBOL_H_
#define SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_SYMBOL_H_
#include <lib/stdcompat/span.h>
#include <lib/stdcompat/type_traits.h>
#include <cstdint>
#include <optional>
#include <string_view>
#include "abi-span.h"
#include "compat-hash.h"
#include "gnu-hash.h"
#include "layout.h"
namespace elfldltl {
// SymbolName represents an identifier to be looked up in a symbol table. It's
// really just a string_view with a cache of the string's hash value(s). The
// type is constexpr friendly and when used in a constexpr context with a
// string literal, it can precompute at compile time to optimize.
//
// The Lookup calls are just front-ends that take a SymbolInfo object and call
// its Lookup method (see below).
//
// Note that though this is a cheaply-copyable type, it's always best to pass
// it by mutable reference so its cache can be updated as needed (outside
// constexpr context). Lookup has both const and non-const overloads, but the
// const overload has to recompute every time if the hash isn't already cached.
//
// ELF symbol names cannot contain NUL characters, so it's an invariant that
// SymbolName cannot contain embedded NULs (unlike std::string_view, which can).
// Construction and assignment enforce this by turning a std::string_view argument
// that contains embedded NULs into the empty string.
//
class SymbolName : public std::string_view {
public:
using std::string_view::string_view;
constexpr SymbolName() = default;
constexpr SymbolName(const SymbolName&) = default;
// When constructing from a constant, precompute the hashes since it can be
// done entirely in constexpr context.
template <size_t N>
constexpr explicit SymbolName(const char (&name)[N])
: std::string_view(name),
compat_hash_(CompatHashString(*this)),
gnu_hash_(GnuHashString(*this)) {}
// This will precompute the hashes in constexpr context, see below.
constexpr explicit SymbolName(std::string_view name) { *this = name; }
constexpr explicit SymbolName(const char* name) { *this = name; }
// Convenient constructor using a symbol table entry (see below).
template <class SymbolInfo, class Sym>
constexpr SymbolName(const SymbolInfo& si, const Sym& sym) : SymbolName(si.string(sym.name)) {}
constexpr SymbolName& operator=(const SymbolName&) = default;
constexpr SymbolName& operator=(std::string_view name) {
// No valid symbol will have an embedded NUL character, in which case just yield an empty
// symbol.
if (!name.empty() && name.find_first_of('\0') != std::string_view::npos) [[unlikely]] {
name = {};
}
return ChangeName(name);
}
constexpr SymbolName& operator=(const char* name) {
// Implicit const char* -> std::string_view conversion makes it impossible
// for there to be an embedded NUL, so no need to check that invariant.
return ChangeName(name);
}
constexpr uint32_t compat_hash() {
if (compat_hash_ == kCompatNoHash) {
compat_hash_ = CompatHashString(*this);
}
return compat_hash_;
}
constexpr uint32_t compat_hash() const { return SymbolName(*this).compat_hash(); }
constexpr uint32_t gnu_hash() {
if (gnu_hash_ == kGnuNoHash) {
gnu_hash_ = GnuHashString(*this);
}
return gnu_hash_;
}
constexpr uint32_t gnu_hash() const { return SymbolName(*this).gnu_hash(); }
template <class SymbolInfoType, typename Filter>
constexpr const typename SymbolInfoType::Sym* Lookup(const SymbolInfoType& si, Filter&& filter) {
// DT_GNU_HASH format is superior when available. Modern systems should
// default to --hash-style=gnu or --hash-style=both so it's available.
if (auto gnu = si.gnu_hash()) {
return si.Lookup(*gnu, *this, gnu_hash(), std::forward<Filter>(filter));
}
// But it's easy enough to support the old format (--hash-style=sysv) too.
if (auto compat = si.compat_hash()) {
return si.Lookup(*compat, *this, compat_hash(), std::forward<Filter>(filter));
}
return nullptr;
}
// A const object can't update its cache, but constexpr will already have it.
template <class SymbolInfoType, typename Filter>
constexpr const typename SymbolInfoType::Sym* Lookup(const SymbolInfoType& si,
Filter&& filter) const {
// The copy is mutable in case we don't already have cached hash values.
return SymbolName(*this).Lookup(si, std::forward<Filter>(filter));
}
template <class SymbolInfoType>
constexpr auto Lookup(const SymbolInfoType& si) {
return Lookup(si, SymbolInfoType::DefinedSymbol);
}
template <class SymbolInfoType>
constexpr auto Lookup(const SymbolInfoType& si) const {
return Lookup(si, SymbolInfoType::DefinedSymbol);
}
private:
constexpr SymbolName& ChangeName(std::string_view name) {
std::string_view::operator=(name);
compat_hash_ = kCompatNoHash;
gnu_hash_ = kGnuNoHash;
if (cpp20::is_constant_evaluated()) { // Precompute in constexpr.
compat_hash();
gnu_hash();
}
return *this;
}
uint32_t compat_hash_ = // Precompute in constexpr.
cpp20::is_constant_evaluated() ? CompatHashString(*this) : kCompatNoHash;
uint32_t gnu_hash_ = // Precompute in constexpr.
cpp20::is_constant_evaluated() ? GnuHashString(*this) : kGnuNoHash;
};
// This type can be used as a constructor tag to zero-construct an object whose
// default constructor would otherwise not be zero initializable. This can
// allow and object to be placed in bss. See
// `SymbolInfo::SymbolInfo(LinkerZeroInitialized)` for more.
struct LinkerZeroInitialized {};
inline constexpr LinkerZeroInitialized kLinkerZeroInitialized{};
// This represents all the dynamic symbol table information for one ELF file.
// It's primarily used for hash table lookup via SymbolName::Lookup, but can
// also be used to enumerate the symbol table or the hash tables. It holds
// non-owning pointers into target data normally found in the RODATA segment.
//
template <class ElfLayout, class AbiTraits = LocalAbiTraits>
class SymbolInfo {
public:
using Elf = ElfLayout;
using Word = typename Elf::Word;
using Addr = typename Elf::Addr;
using size_type = typename Elf::size_type;
using Sym = typename Elf::Sym;
constexpr SymbolInfo() = default;
// This constructor can be used to zero-initialize a SymbolInfo object.
// This can be useful for performance reasons. Note, a SymbolInfo object in
// this state must never be used until `InitLinkerZeroInitialized` has been
// called.
constexpr explicit SymbolInfo(LinkerZeroInitialized) : strtab_{} {}
constexpr void InitLinkerZeroInitialized() { strtab_ = kEmptyStrtab; }
// Each flavor of hash table has a support class with a compatible API,
// except for the argument to the constructor and Valid, which is a
// span<Word> for DT_HASH and a span<Addr> for DT_GNU_HASH.
//
// * `static bool Valid(span table);` returns true if the table is usable.
// If this returns true, it's safe to pass `table` to the constructor.
//
// * `uint32_t symtab_size() const;` computes the maximum size of the symbol
// table. This is not normally needed for plain lookups; it may be costly.
//
// * `uint32_t Bucket(uint32_t hash) const;` returns the hash bucket for
// symbol names with the given hash value. Bucket number zero is invalid.
// This can be returned if no buckets contain this hash value.
//
// * `BucketIterator` is a forward-iterator type that has a three-argument
// constructor `(const Table&, uint32_t bucket, uint32_t hash)` that yields
// a "begin" iterator for the hash bucket and a two-argument constructor
// that yields an "end" iterator for the hash bucket. The iterator yields
// a nonzero uint32_t symbol table index.
//
// * `<some type> begin(); const` and `<some type> end(); const` return
// iterators over the set of buckets, whose operator*() returns a
// BucketIterator, such that iterating through from begin() to end() with
// an inner iteration through each BucketIterator to its end state from
// there exhaustively visits every symbol in the whole hash table exactly
// once. This is only used for diagnostic purposes.
//
using CompatHash = ::elfldltl::CompatHash<Elf, AbiTraits>; // compat-hash.h
using GnuHash = ::elfldltl::GnuHash<Elf, AbiTraits>; // gnu-hash.h
// This is a forward-iterable container view of a symbol table hash bucket.
// Each uint32_t element is a symbol table index.
template <class HashTable>
class HashBucket {
public:
using iterator = typename HashTable::BucketIterator;
using const_iterator = iterator;
constexpr explicit HashBucket(const HashTable& table, iterator first)
: begin_(first), end_(table) {}
constexpr explicit HashBucket(const HashTable& table, uint32_t bucket, uint32_t hash)
: HashBucket(table, iterator(table, bucket, hash)) {}
constexpr iterator begin() const { return begin_; }
constexpr iterator end() const { return end_; }
private:
iterator begin_, end_;
};
// This is the degenerate (always true) filter predicate for Lookup.
static constexpr bool AnySymbol(const Sym& sym) { return true; }
// This is the default filter predicate for Lookup to match defined symbols.
static constexpr bool DefinedSymbol(const Sym& sym) {
if (sym.shndx != 0) {
switch (sym.type()) {
case ElfSymType::kNoType:
case ElfSymType::kObject:
case ElfSymType::kFunc:
case ElfSymType::kCommon:
case ElfSymType::kTls:
case ElfSymType::kIfunc:
return true;
default:
break;
}
}
return false;
}
// Look up a symbol in one of the hash tables. The filter is a predicate to
// accept or reject symbols before name matching.
// This takes a SymbolName to enforce the invariant that there are no embedded NUL characters.
// It's hash fields are not used.
template <class HashTable, typename Filter>
constexpr const Sym* Lookup(const HashTable& table, const SymbolName& name, uint32_t hash,
Filter&& filter = DefinedSymbol) const {
static_assert(std::is_invocable_r_v<bool, Filter, const Sym&>);
const uint32_t bucket = table.Bucket(hash);
if (bucket != 0 && name.size() && name.size() < strtab().size()) {
for (uint32_t i : HashBucket<HashTable>(table, bucket, hash)) {
if (i >= symtab_.size()) [[unlikely]] {
break;
}
const Sym& sym = symtab_[i];
// TODO(mcgrathr): diag for bad st_name
if (filter(sym) && sym.name < strtab().size() && strtab().size() - name.size() > sym.name &&
strtab()[sym.name + name.size()] == '\0' &&
strtab().substr(sym.name, name.size()) == name) {
return &sym;
}
}
}
return nullptr;
}
// Fetch the raw string table.
constexpr std::string_view strtab() const { return strtab_; }
// Fetch a NUL-terminated string from the string table by offset,
// e.g. as stored in st_name or DT_SONAME.
constexpr const char* string(size_t offset) const {
if (strtab().size() && offset < strtab().size() - 1) [[likely]] {
return strtab().data() + offset;
}
return "";
}
// Fetch the raw symbol table. Note this size may be an upper bound. It's
// all valid memory to read, but there might be garbage data past the last
// actual valid symbol table index.
constexpr cpp20::span<const Sym> symtab() const { return symtab_; }
// Fetch the symbol table and try to reduce its apparent size to its real
// size or at least a better approximation. This provides no guarantee that
// the size will be smaller than the raw symtab() size, but does a bit more
// work to try to ensure it. If using only indices that are presumed to be
// valid, such as those in relocation entries, just use symtab() instead.
// This is better for blind enumeration.
cpp20::span<const Sym> safe_symtab() const { return symtab_.subspan(0, safe_symtab_size()); }
// Return the CompatHash object (see compat-hash.h) if DT_HASH is present.
constexpr std::optional<CompatHash> compat_hash() const {
if (CompatHash::Valid(compat_hash_)) {
return CompatHash(compat_hash_);
}
return {};
}
constexpr std::optional<GnuHash> gnu_hash() const {
if (GnuHash::Valid(gnu_hash_)) {
return GnuHash(gnu_hash_);
}
return {};
}
constexpr std::string_view soname() const {
if (soname_ != 0) {
return string(soname_);
}
return {};
}
// Return the DT_FLAGS bits.
constexpr size_type flags() const { return flags_; }
// Return the DT_FLAGS_1 bits.
constexpr size_type flags1() const { return flags1_; }
// Install data for the various tables. These return *this so they can be
// called in fluent style, e.g. in a constexpr initializer.
constexpr SymbolInfo& set_strtab(std::string_view strtab) {
if (strtab.empty() || strtab.back() != '\0') {
// Invalid string table has no NUL terminator; don't use it at all.
strtab = kEmptyStrtab;
}
strtab_ = strtab;
return *this;
}
constexpr SymbolInfo& set_strtab_as_span(cpp20::span<const char> strtab) {
return set_strtab({strtab.data(), strtab.size()});
}
constexpr SymbolInfo& set_symtab(cpp20::span<const Sym> symtab) {
symtab_ = symtab;
return *this;
}
constexpr SymbolInfo& set_compat_hash(cpp20::span<const Word> table) {
compat_hash_ = table;
return *this;
}
constexpr SymbolInfo& set_gnu_hash(cpp20::span<const Addr> table) {
gnu_hash_ = table;
return *this;
}
constexpr SymbolInfo& set_soname(size_type soname) {
soname_ = soname;
return *this;
}
constexpr SymbolInfo& set_flags(size_type flags) {
flags_ = flags;
return *this;
}
constexpr SymbolInfo& set_flags1(size_type flags1) {
flags1_ = flags1;
return *this;
}
private:
template <typename T>
using Span = AbiSpan<const T, cpp20::dynamic_extent, Elf, AbiTraits>;
// In directly-usable instantiations, ensure that the empty state is
// guaranteed NUL terminated. In remoting instantiations, the values will
// always be reset from known-valid local instantiations so the zero-length
// view will never be used.
static constexpr AbiStringView<Elf, AbiTraits> kEmptyStrtab = []() {
AbiStringView<Elf, AbiTraits> empty;
if constexpr (std::is_constructible_v<AbiStringView<Elf, AbiTraits>, std::string_view>) {
empty = std::string_view{"", 1};
}
return empty;
}();
size_type safe_symtab_size() const {
if (symtab_.empty()) {
return 0;
}
// The old format makes it very cheap to detect, so prefer that.
if (CompatHash::Valid(compat_hash_)) {
size_type hash_max = CompatHash(compat_hash_).symtab_size();
return std::min(symtab_.size(), hash_max);
}
// The DT_GNU_HASH format has to be fully scanned to determine the size.
if (GnuHash::Valid(gnu_hash_)) {
size_type hash_max = GnuHash(gnu_hash_).symtab_size();
return std::min(symtab_.size(), hash_max);
}
// With neither format available, there is no way to know the constraint
// directly. DT_STRTAB is usually right after, so that might be an upper
// bound, but that's only a (likely) heuristic and not guaranteed.
auto base = reinterpret_cast<const char*>(symtab_.data());
auto limit = reinterpret_cast<const char*>(symtab_.data() + symtab_.size());
if (base < strtab_.data() && limit > strtab_.data()) {
return static_cast<size_type>(strtab_.data() - base) / sizeof(Sym);
}
// Worst case, there might still be some garbage entries at the end. We
// could scan through them all looking for invalid data (st_name out of
// bounds, unsupported st_info bits, etc.), but that seems excessive.
return symtab_.size();
}
AbiStringView<Elf, AbiTraits> strtab_ = kEmptyStrtab;
Span<Sym> symtab_;
Span<Word> compat_hash_;
Span<Addr> gnu_hash_;
Addr soname_ = 0;
Addr flags_ = 0; // DT_FLAGS
Addr flags1_ = 0; // DT_FLAGS_1
public:
// <lib/ld/remote-abi-transcriber.h> introspection API. These aliases must
// be public, but can't be defined lexically before the private: section that
// declares the members; so this special public: section is at the end.
using AbiLocal = SymbolInfo<Elf, LocalAbiTraits>;
template <template <class...> class Template>
using AbiBases = Template<>;
template <template <auto...> class Template>
using AbiMembers = Template<&SymbolInfo::strtab_, &SymbolInfo::symtab_, &SymbolInfo::compat_hash_,
&SymbolInfo::gnu_hash_, &SymbolInfo::soname_, &SymbolInfo::flags_,
&SymbolInfo::flags1_>;
};
// This constructs a SymbolInfo that just contains a single undefined symbol.
// It can be used with a resolver function (see link.h and resolve.h).
template <class Elf>
class SymbolInfoForSingleLookup : public SymbolInfo<Elf> {
public:
using typename SymbolInfo<Elf>::Sym;
constexpr SymbolInfoForSingleLookup() = default;
constexpr SymbolInfoForSingleLookup(const SymbolInfoForSingleLookup&) = default;
explicit constexpr SymbolInfoForSingleLookup(const char* name,
ElfSymType type = ElfSymType::kNoType,
ElfSymBind bind = ElfSymBind::kGlobal)
: symbol_{.info = Sym::MakeInfo(bind, type)} {
this->set_strtab({name, std::string_view{name}.size() + 1});
this->set_symtab(cpp20::span{&symbol_, 1});
}
const Sym& symbol() const { return symbol_; }
private:
Sym symbol_;
};
} // namespace elfldltl
#endif // SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_SYMBOL_H_