| //===- Symbols.h ------------------------------------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLD_MACHO_SYMBOLS_H |
| #define LLD_MACHO_SYMBOLS_H |
| |
| #include "InputSection.h" |
| #include "Target.h" |
| #include "lld/Common/ErrorHandler.h" |
| #include "lld/Common/Strings.h" |
| #include "llvm/Object/Archive.h" |
| #include "llvm/Support/MathExtras.h" |
| |
| namespace lld { |
| namespace macho { |
| |
| class InputSection; |
| class MachHeaderSection; |
| class DylibFile; |
| class ArchiveFile; |
| |
| struct StringRefZ { |
| StringRefZ(const char *s) : data(s), size(-1) {} |
| StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} |
| |
| const char *data; |
| const uint32_t size; |
| }; |
| |
| class Symbol { |
| public: |
| enum Kind { |
| DefinedKind, |
| UndefinedKind, |
| CommonKind, |
| DylibKind, |
| LazyKind, |
| DSOHandleKind, |
| }; |
| |
| virtual ~Symbol() {} |
| |
| Kind kind() const { return static_cast<Kind>(symbolKind); } |
| |
| StringRef getName() const { return {name.data, name.size}; } |
| |
| virtual uint64_t getVA() const { return 0; } |
| |
| virtual uint64_t getFileOffset() const { |
| llvm_unreachable("attempt to get an offset from a non-defined symbol"); |
| } |
| |
| virtual bool isWeakDef() const { llvm_unreachable("cannot be weak"); } |
| |
| virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } |
| |
| // Whether this symbol is in the GOT or TLVPointer sections. |
| bool isInGot() const { return gotIndex != UINT32_MAX; } |
| |
| // Whether this symbol is in the StubsSection. |
| bool isInStubs() const { return stubsIndex != UINT32_MAX; } |
| |
| // The index of this symbol in the GOT or the TLVPointer section, depending |
| // on whether it is a thread-local. A given symbol cannot be referenced by |
| // both these sections at once. |
| uint32_t gotIndex = UINT32_MAX; |
| |
| uint32_t stubsIndex = UINT32_MAX; |
| |
| uint32_t symtabIndex = UINT32_MAX; |
| |
| protected: |
| Symbol(Kind k, StringRefZ name) : symbolKind(k), name(name) {} |
| |
| Kind symbolKind; |
| StringRefZ name; |
| }; |
| |
| class Defined : public Symbol { |
| public: |
| Defined(StringRefZ name, InputSection *isec, uint32_t value, bool isWeakDef, |
| bool isExternal) |
| : Symbol(DefinedKind, name), isec(isec), value(value), |
| overridesWeakDef(false), weakDef(isWeakDef), external(isExternal) {} |
| |
| bool isWeakDef() const override { return weakDef; } |
| bool isTlv() const override { |
| return !isAbsolute() && isThreadLocalVariables(isec->flags); |
| } |
| |
| bool isExternal() const { return external; } |
| bool isAbsolute() const { return isec == nullptr; } |
| |
| uint64_t getVA() const override; |
| uint64_t getFileOffset() const override; |
| |
| static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } |
| |
| InputSection *isec; |
| uint32_t value; |
| |
| bool overridesWeakDef : 1; |
| |
| private: |
| const bool weakDef : 1; |
| const bool external : 1; |
| }; |
| |
| class Undefined : public Symbol { |
| public: |
| Undefined(StringRefZ name) : Symbol(UndefinedKind, name) {} |
| |
| static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } |
| }; |
| |
| // On Unix, it is traditionally allowed to write variable definitions without |
| // initialization expressions (such as "int foo;") to header files. These are |
| // called tentative definitions. |
| // |
| // Using tentative definitions is usually considered a bad practice; you should |
| // write only declarations (such as "extern int foo;") to header files. |
| // Nevertheless, the linker and the compiler have to do something to support |
| // bad code by allowing duplicate definitions for this particular case. |
| // |
| // The compiler creates common symbols when it sees tentative definitions. |
| // (You can suppress this behavior and let the compiler create a regular |
| // defined symbol by passing -fno-common.) When linking the final binary, if |
| // there are remaining common symbols after name resolution is complete, the |
| // linker converts them to regular defined symbols in a __common section. |
| class CommonSymbol : public Symbol { |
| public: |
| CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align) |
| : Symbol(CommonKind, name), file(file), size(size), |
| align(align != 1 ? align : llvm::PowerOf2Ceil(size)) { |
| // TODO: cap maximum alignment |
| } |
| |
| static bool classof(const Symbol *s) { return s->kind() == CommonKind; } |
| |
| InputFile *const file; |
| const uint64_t size; |
| const uint32_t align; |
| }; |
| |
| class DylibSymbol : public Symbol { |
| public: |
| DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, bool isTlv) |
| : Symbol(DylibKind, name), file(file), weakDef(isWeakDef), tlv(isTlv) {} |
| |
| bool isWeakDef() const override { return weakDef; } |
| bool isTlv() const override { return tlv; } |
| bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } |
| |
| static bool classof(const Symbol *s) { return s->kind() == DylibKind; } |
| |
| DylibFile *file; |
| uint32_t stubsHelperIndex = UINT32_MAX; |
| uint32_t lazyBindOffset = UINT32_MAX; |
| |
| private: |
| const bool weakDef; |
| const bool tlv; |
| }; |
| |
| class LazySymbol : public Symbol { |
| public: |
| LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) |
| : Symbol(LazyKind, sym.getName()), file(file), sym(sym) {} |
| |
| static bool classof(const Symbol *s) { return s->kind() == LazyKind; } |
| |
| void fetchArchiveMember(); |
| |
| private: |
| ArchiveFile *file; |
| const llvm::object::Archive::Symbol sym; |
| }; |
| |
| // The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit which |
| // does e.g. cleanup of static global variables. The ABI document says that the |
| // pointer can point to any address in one of the dylib's segments, but in |
| // practice ld64 seems to set it to point to the header, so that's what's |
| // implemented here. |
| // |
| // The ARM C++ ABI uses __dso_handle similarly, but I (int3) have not yet |
| // tested this on an ARM platform. |
| // |
| // DSOHandle effectively functions like a Defined symbol, but it doesn't belong |
| // to an InputSection. |
| class DSOHandle : public Symbol { |
| public: |
| DSOHandle(const MachHeaderSection *header) |
| : Symbol(DSOHandleKind, name), header(header) {} |
| |
| const MachHeaderSection *header; |
| |
| uint64_t getVA() const override; |
| |
| uint64_t getFileOffset() const override; |
| |
| bool isWeakDef() const override { return false; } |
| |
| bool isTlv() const override { return false; } |
| |
| static constexpr StringRef name = "___dso_handle"; |
| |
| static bool classof(const Symbol *s) { return s->kind() == DSOHandleKind; } |
| }; |
| |
| union SymbolUnion { |
| alignas(Defined) char a[sizeof(Defined)]; |
| alignas(Undefined) char b[sizeof(Undefined)]; |
| alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; |
| alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; |
| alignas(LazySymbol) char e[sizeof(LazySymbol)]; |
| alignas(DSOHandle) char f[sizeof(DSOHandle)]; |
| }; |
| |
| template <typename T, typename... ArgT> |
| T *replaceSymbol(Symbol *s, ArgT &&... arg) { |
| static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); |
| static_assert(alignof(T) <= alignof(SymbolUnion), |
| "SymbolUnion not aligned enough"); |
| assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && |
| "Not a Symbol"); |
| |
| return new (s) T(std::forward<ArgT>(arg)...); |
| } |
| |
| } // namespace macho |
| |
| std::string toString(const macho::Symbol &); |
| } // namespace lld |
| |
| #endif |