blob: 064ed7db0c0697d328a695bc1ff0882815e1f799 [file] [log] [blame]
// Copyright 2021 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_SYMBOL_H_
#define SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_SYMBOL_H_
#include <lib/stdcompat/span.h>
#include <lib/stdcompat/type_traits.h>
#include <cstdint>
#include <optional>
#include <string_view>
#include "compat-hash.h"
#include "gnu-hash.h"
#include "layout.h"
namespace elfldltl {
// SymbolName represents an identifier to be looked up in a symbol table. It's
// really just a string_view with a cache of the string's hash value(s). The
// type is constexpr friendly and when used in a constexpr context with a
// string literal, it can precompute at compile time to optimize.
//
// The Lookup calls are just front-ends that take a SymbolInfo object and call
// its Lookup method (see below).
//
// Note that though this is a cheaply-copyable type, it's always best to pass
// it by mutable reference so its cache can be updated as needed (outside
// constexpr context). Lookup has both const and non-const overloads, but the
// const overload has to recompute every time if the hash isn't already cached.
//
class SymbolName : public std::string_view {
public:
using std::string_view::string_view;
constexpr SymbolName() = default;
constexpr SymbolName(const SymbolName&) = default;
// When constructing from a constant, precompute the hashes since it can be
// done entirely in constexpr context.
template <size_t N>
constexpr explicit SymbolName(const char (&name)[N])
: std::string_view(name),
compat_hash_(CompatHashString(*this)),
gnu_hash_(GnuHashString(*this)) {}
// This will precompute the hashes in constexpr context, see below.
constexpr explicit SymbolName(std::string_view name) { *this = name; }
// Convenient constructor using a symbol table entry (see below).
template <class SymbolInfo, class Sym>
constexpr SymbolName(const SymbolInfo& si, const Sym& sym) : SymbolName(si.string(sym.name)) {}
constexpr SymbolName& operator=(const SymbolName&) = default;
constexpr SymbolName& operator=(const std::string_view& name) {
std::string_view::operator=(name);
compat_hash_ = kCompatNoHash;
gnu_hash_ = kGnuNoHash;
if (cpp20::is_constant_evaluated()) { // Precompute in constexpr.
compat_hash();
gnu_hash();
}
return *this;
}
constexpr uint32_t compat_hash() {
if (compat_hash_ == kCompatNoHash) {
compat_hash_ = CompatHashString(*this);
}
return compat_hash_;
}
constexpr uint32_t compat_hash() const { return SymbolName(*this).compat_hash(); }
constexpr uint32_t gnu_hash() {
if (gnu_hash_ == kGnuNoHash) {
gnu_hash_ = GnuHashString(*this);
}
return gnu_hash_;
}
constexpr uint32_t gnu_hash() const { return SymbolName(*this).gnu_hash(); }
template <class SymbolInfoType, typename Filter>
constexpr const typename SymbolInfoType::Sym* Lookup(const SymbolInfoType& si, Filter&& filter) {
// DT_GNU_HASH format is superior when available. Modern systems should
// default to --hash-style=gnu or --hash-style=both so it's available.
if (auto gnu = si.gnu_hash()) {
return si.Lookup(*gnu, *this, gnu_hash(), std::forward<Filter>(filter));
}
// But it's easy enough to support the old format (--hash-style=sysv) too.
if (auto compat = si.compat_hash()) {
return si.Lookup(*compat, *this, compat_hash(), std::forward<Filter>(filter));
}
return nullptr;
}
// A const object can't update its cache, but constexpr will already have it.
template <class SymbolInfoType, typename Filter>
constexpr const typename SymbolInfoType::Sym* Lookup(const SymbolInfoType& si,
Filter&& filter) const {
// The copy is mutable in case we don't already have cached hash values.
return SymbolName(*this).Lookup(si, std::forward<Filter>(filter));
}
template <class SymbolInfoType>
constexpr auto Lookup(const SymbolInfoType& si) {
return Lookup(si, SymbolInfoType::DefinedSymbol);
}
template <class SymbolInfoType>
constexpr auto Lookup(const SymbolInfoType& si) const {
return Lookup(si, SymbolInfoType::DefinedSymbol);
}
private:
uint32_t compat_hash_ = // Precompute in constexpr.
cpp20::is_constant_evaluated() ? CompatHashString(*this) : kCompatNoHash;
uint32_t gnu_hash_ = // Precompute in constexpr.
cpp20::is_constant_evaluated() ? GnuHashString(*this) : kGnuNoHash;
};
// This represents all the dynamic symbol table information for one ELF file.
// It's primarily used for hash table lookup via SymbolName::Lookup, but can
// also be used to enumerate the symbol table or the hash tables. It holds
// non-owning pointers into target data normally found in the RODATA segment.
//
template <class Elf>
class SymbolInfo {
public:
using Word = typename Elf::Word;
using Addr = typename Elf::Addr;
using Sym = typename Elf::Sym;
// Each flavor of hash table has a support class with a compatible API,
// except for the argument to the constructor and Valid, which is a
// span<Word> for DT_HASH and a span<Addr> for DT_GNU_HASH.
//
// * `static bool Valid(span table);` returns true if the table is usable.
// If this returns true, it's safe to pass `table` to the constructor.
//
// * `uint32_t size() const;` computes the maximum size of the symbol table.
// This is not normally needed for plain lookups, and may be costly.
//
// * `uint32_t Bucket(uint32_t hash) const;` returns the hash bucket for
// symbol names with the given hash value. Bucket number zero is invalid.
// This can be returned if no buckets contain this hash value.
//
// * `BucketIterator` is a forward-iterator type that has a three-argument
// constructor `(const Table&, uint32_t bucket, uint32_t hash)` that yields
// a "begin" iterator for the hash bucket and a two-argument constructor
// that yields an "end" iterator for the hash bucket. The iterator yields
// a nonzero uint32_t symbol table index.
//
using CompatHash = ::elfldltl::CompatHash<Word>; // See compat-hash.h.
using GnuHash = ::elfldltl::GnuHash<Word, Addr>; // See gnu-hash.h.
// This is a forward-iterable container view of a symbol table hash bucket.
// Each uint32_t element is a symbol table index.
template <class HashTable>
class HashBucket {
public:
using iterator = typename HashTable::BucketIterator;
using const_iterator = iterator;
constexpr explicit HashBucket(const HashTable& table, uint32_t bucket, uint32_t hash)
: begin_(table, bucket, hash), end_(table) {}
constexpr iterator begin() const { return begin_; }
constexpr iterator end() const { return end_; }
private:
iterator begin_, end_;
};
// This is the degenerate (always true) filter predicate for Lookup.
static constexpr bool AnySymbol(const Sym& sym) { return true; }
// This is the default filter predicate for Lookup to match defined symbols.
static constexpr bool DefinedSymbol(const Sym& sym) {
if (sym.shndx != 0) {
switch (sym.type()) {
case ElfSymType::kNoType:
case ElfSymType::kObject:
case ElfSymType::kFunc:
case ElfSymType::kCommon:
case ElfSymType::kTls:
case ElfSymType::kIfunc:
return true;
default:
break;
}
}
return false;
}
// Look up a symbol in one of the hash tables. The filter is a predicate to
// accept or reject symbols before name matching.
template <class HashTable, typename Filter>
constexpr const Sym* Lookup(const HashTable& table, std::string_view name, uint32_t hash,
Filter&& filter = DefinedSymbol) const {
static_assert(std::is_invocable_r_v<bool, Filter, const Sym&>);
uint32_t bucket = table.Bucket(hash);
if (bucket != 0 && name.size() < strtab_.size()) {
for (uint32_t i : HashBucket<HashTable>(table, bucket, hash)) {
if (i >= symtab_.size()) [[unlikely]] {
break;
}
const Sym& sym = symtab_[i];
// TODO(mcgrathr): diag for bad st_name
if (filter(sym) && sym.name < strtab_.size() && strtab_.size() - name.size() > sym.name &&
strtab_[sym.name + name.size()] == '\0' &&
strtab_.substr(sym.name, name.size()) == name) {
return &sym;
}
}
}
return nullptr;
}
// Fetch the raw string table.
constexpr auto strtab() const { return strtab_; }
// Fetch a NUL-terminated string from the string table by offset,
// e.g. as stored in st_name or DT_SONAME.
constexpr std::string_view string(size_t offset) const {
if (offset < strtab_.size()) {
size_t pos = strtab_.find_first_of('\0', offset);
if (pos != std::string_view::npos) {
return strtab_.substr(offset, pos - offset);
}
}
return {};
}
// Fetch the raw symbol table. Note this size may be an upper bound. It's
// all valid memory to read, but there might be garbage data past the last
// actual valid symbol table index.
constexpr auto symtab() const { return symtab_; }
// Fetch the symbol table and try to reduce its apparent size to its real
// size or at least a better approximation. This provides no guarantee that
// the size will be smaller than the raw symtab() size, but does a bit more
// work to try to ensure it. If using only indices that are presumed to be
// valid, such as those in relocation entries, just use symtab() instead.
// This is better for blind enumeration.
auto safe_symtab() const { return symtab_.subspan(0, safe_symtab_size()); }
// Return the CompatHash object (see compat-hash.h) if DT_HASH is present.
constexpr std::optional<CompatHash> compat_hash() const {
if (CompatHash::Valid(compat_hash_)) {
return CompatHash(compat_hash_);
}
return {};
}
constexpr std::optional<GnuHash> gnu_hash() const {
if (GnuHash::Valid(gnu_hash_)) {
return GnuHash(gnu_hash_);
}
return {};
}
constexpr std::string_view soname() const {
if (soname_ != 0) {
return string(soname_);
}
return {};
}
// Install data for the various tables. These return *this so they can be
// called in fluent style, e.g. in a constexpr initializer.
constexpr SymbolInfo& set_strtab(std::string_view strtab) {
strtab_ = strtab;
return *this;
}
constexpr SymbolInfo& set_strtab_as_span(cpp20::span<const char> strtab) {
return set_strtab({strtab.data(), strtab.size()});
}
constexpr SymbolInfo& set_symtab(cpp20::span<const Sym> symtab) {
symtab_ = symtab;
return *this;
}
constexpr SymbolInfo& set_compat_hash(cpp20::span<const Word> table) {
compat_hash_ = table;
return *this;
}
constexpr SymbolInfo& set_gnu_hash(cpp20::span<const Addr> table) {
gnu_hash_ = table;
return *this;
}
constexpr SymbolInfo& set_soname(typename Elf::size_type soname) {
soname_ = soname;
return *this;
}
private:
size_t safe_symtab_size() const {
if (symtab_.empty()) {
return 0;
}
// The old format makes it very cheap to detect, so prefer that.
if (CompatHash::Valid(compat_hash_)) {
size_t hash_max = CompatHash(compat_hash_).size();
return std::min(symtab_.size(), hash_max);
}
// The DT_GNU_HASH format has to be fully scanned to determine the size.
if (GnuHash::Valid(gnu_hash_)) {
size_t hash_max = GnuHash(gnu_hash_).size();
return std::min(symtab_.size(), hash_max);
}
// With neither format available, there is no way to know the constraint
// directly. DT_STRTAB is usually right after, so that might be an upper
// bound, but that's only a (likely) heuristic and not guaranteed.
auto base = reinterpret_cast<const char*>(symtab_.data());
auto limit = reinterpret_cast<const char*>(symtab_.data() + symtab_.size());
if (base < strtab_.data() && limit > strtab_.data()) {
return (strtab_.data() - base) / sizeof(Sym);
}
// Worst case, there might still be some garbage entries at the end. We
// could scan through them all looking for invalid data (st_name out of
// bounds, unsupported st_info bits, etc.), but that seems excessive.
return symtab_.size();
}
std::string_view strtab_;
cpp20::span<const Sym> symtab_;
cpp20::span<const Word> compat_hash_;
cpp20::span<const Addr> gnu_hash_;
typename Elf::size_type soname_ = 0;
};
} // namespace elfldltl
#endif // SRC_LIB_ELFLDLTL_INCLUDE_LIB_ELFLDLTL_SYMBOL_H_