| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "src/developer/debug/zxdb/symbols/module_symbols_impl.h" |
| |
| #include <stdio.h> |
| |
| #include <algorithm> |
| #include <memory> |
| |
| #include "llvm/DebugInfo/DIContext.h" |
| #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
| #include "llvm/DebugInfo/DWARF/DWARFUnit.h" |
| #include "llvm/Object/Binary.h" |
| #include "llvm/Object/ELFObjectFile.h" |
| #include "llvm/Object/ObjectFile.h" |
| #include "src/developer/debug/ipc/protocol.h" |
| #include "src/developer/debug/shared/largest_less_or_equal.h" |
| #include "src/developer/debug/shared/logging/logging.h" |
| #include "src/developer/debug/shared/message_loop.h" |
| #include "src/developer/debug/zxdb/common/file_util.h" |
| #include "src/developer/debug/zxdb/common/string_util.h" |
| #include "src/developer/debug/zxdb/symbols/dwarf_binary.h" |
| #include "src/developer/debug/zxdb/symbols/dwarf_expr_eval.h" |
| #include "src/developer/debug/zxdb/symbols/dwarf_symbol_factory.h" |
| #include "src/developer/debug/zxdb/symbols/elf_symbol.h" |
| #include "src/developer/debug/zxdb/symbols/find_line.h" |
| #include "src/developer/debug/zxdb/symbols/function.h" |
| #include "src/developer/debug/zxdb/symbols/input_location.h" |
| #include "src/developer/debug/zxdb/symbols/line_details.h" |
| #include "src/developer/debug/zxdb/symbols/line_table_impl.h" |
| #include "src/developer/debug/zxdb/symbols/resolve_options.h" |
| #include "src/developer/debug/zxdb/symbols/symbol_context.h" |
| #include "src/developer/debug/zxdb/symbols/symbol_data_provider.h" |
| #include "src/developer/debug/zxdb/symbols/variable.h" |
| #include "src/lib/elflib/elflib.h" |
| |
| namespace zxdb { |
| |
| namespace { |
| |
| // When looking for symbol matches, don't consider any symbol further than this from the looked-up |
| // location to be a match. We don't want this number to be too large as users can be confused by |
| // seeing a name for a symbol that's unrelated to the address at hand. |
| // |
| // However, when dealing with unsymbolized code, the nearest previous Elf symbol name can be a |
| // valuable hint about the location of an address. |
| constexpr uint64_t kMaxElfOffsetForMatch = 4096; |
| |
| // Implementation of SymbolDataProvider that returns no memory or registers. This is used when |
| // evaluating global variables' location expressions which normally just declare an address. See |
| // LocationForVariable(). |
| class GlobalSymbolDataProvider : public SymbolDataProvider { |
| public: |
| static Err GetContextError() { |
| return Err( |
| "Global variable requires register or memory data to locate. " |
| "Please file a bug with a repro."); |
| } |
| |
| // SymbolDataProvider implementation. |
| debug_ipc::Arch GetArch() override { return debug_ipc::Arch::kUnknown; } |
| void GetRegisterAsync(debug_ipc::RegisterID, GetRegisterCallback callback) override { |
| debug_ipc::MessageLoop::Current()->PostTask( |
| FROM_HERE, [cb = std::move(callback)]() mutable { cb(GetContextError(), {}); }); |
| } |
| void GetFrameBaseAsync(GetFrameBaseCallback callback) override { |
| debug_ipc::MessageLoop::Current()->PostTask( |
| FROM_HERE, [cb = std::move(callback)]() mutable { cb(GetContextError(), 0); }); |
| } |
| void GetMemoryAsync(uint64_t address, uint32_t size, GetMemoryCallback callback) override { |
| debug_ipc::MessageLoop::Current()->PostTask(FROM_HERE, [cb = std::move(callback)]() mutable { |
| cb(GetContextError(), std::vector<uint8_t>()); |
| }); |
| } |
| void WriteMemory(uint64_t address, std::vector<uint8_t> data, WriteCallback cb) override { |
| debug_ipc::MessageLoop::Current()->PostTask( |
| FROM_HERE, [cb = std::move(cb)]() mutable { cb(GetContextError()); }); |
| } |
| }; |
| |
| // The order of the parameters matters because "line 0" is handled in "greedy" mode only for the |
| // candidate line. If the caller is asking about an address that matches line 0, we don't want to |
| // expand that past line boundaries, but we do want to expand other lines actoss line 0 in greedy |
| // mode. |
| bool SameFileLine(const llvm::DWARFDebugLine::Row& reference, |
| const llvm::DWARFDebugLine::Row& candidate, bool greedy) { |
| // EndSequence entries don't have files or lines associated with them, it's just a marker. The |
| // table will report the previous line's file+line as a side-effect fo the way it's encoded, so |
| // explicitly fail matching for these. |
| if (reference.EndSequence || candidate.EndSequence) |
| return false; |
| |
| if (greedy && candidate.Line == 0) |
| return true; |
| return reference.File == candidate.File && reference.Line == candidate.Line; |
| } |
| |
| // Determines if the given input location references a special identifier of the given type. If it |
| // does, returns the name of that symbol. If it does not, returns a null optional. |
| std::optional<std::string> GetSpecialInputLocation(const InputLocation& loc, SpecialIdentifier si) { |
| if (loc.type != InputLocation::Type::kName || loc.name.components().size() != 1) |
| return std::nullopt; |
| |
| if (loc.name.components()[0].special() == si) |
| return loc.name.components()[0].name(); |
| |
| return std::nullopt; |
| } |
| |
| // Returns true if the given input references the special "main" function annotation. |
| bool ReferencesMainFunction(const InputLocation& loc) { |
| if (loc.type != InputLocation::Type::kName || loc.name.components().size() != 1) |
| return false; |
| return loc.name.components()[0].special() == SpecialIdentifier::kMain; |
| } |
| |
| // Returns true if any component of this identifier isn't supported via lookup in the |
| // ModuleSymbolsImpl. |
| bool HasOnlySupportedSpecialIdentifierTypes(const Identifier& ident) { |
| for (const auto& comp : ident.components()) { |
| switch (comp.special()) { |
| case SpecialIdentifier::kNone: |
| case SpecialIdentifier::kElf: |
| case SpecialIdentifier::kPlt: |
| case SpecialIdentifier::kAnon: |
| break; // Normal boring component. |
| case SpecialIdentifier::kEscaped: |
| FX_NOTREACHED(); // "Escaped" annotations shouldn't appear in identifiers. |
| break; |
| case SpecialIdentifier::kMain: |
| // "$main" is supported only when it's the only component ("foo::$main" is invalid). |
| return ident.components().size() == 1; |
| case SpecialIdentifier::kRegister: |
| return false; // Can't look up registers in the symbols. |
| case SpecialIdentifier::kLast: |
| FX_NOTREACHED(); // Not supposed to be a valid value. |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| } // namespace |
| |
| ModuleSymbolsImpl::ModuleSymbolsImpl(std::unique_ptr<DwarfBinary> binary, |
| const std::string& build_dir, bool create_index) |
| : binary_(std::move(binary)), build_dir_(build_dir), weak_factory_(this) { |
| symbol_factory_ = fxl::MakeRefCounted<DwarfSymbolFactory>(GetWeakPtr()); |
| FillElfSymbols(); |
| |
| if (create_index) { |
| // We could consider creating a new binary/object file just for indexing. The indexing will page |
| // all of the binary in, and most of it won't be needed again (it will be paged back in slowly, |
| // savings may make such a change worth it for large programs as needed). |
| // |
| // Although it will be slightly slower to create, the memory savings may make such a change |
| // worth it for large programs. |
| if (llvm::object::ObjectFile* object_file = binary_->GetLLVMObjectFile()) |
| index_.CreateIndex(object_file); |
| } |
| } |
| |
| ModuleSymbolsImpl::~ModuleSymbolsImpl() = default; |
| |
| fxl::WeakPtr<ModuleSymbolsImpl> ModuleSymbolsImpl::GetWeakPtr() { |
| return weak_factory_.GetWeakPtr(); |
| } |
| |
| ModuleSymbolStatus ModuleSymbolsImpl::GetStatus() const { |
| ModuleSymbolStatus status; |
| status.build_id = binary_->GetBuildID(); |
| status.base = 0; // We don't know this, only ProcessSymbols does. |
| status.symbols_loaded = true; // Since this instance exists at all. |
| status.functions_indexed = index_.CountSymbolsIndexed(); |
| status.files_indexed = index_.files_indexed(); |
| status.symbol_file = binary_->GetName(); |
| return status; |
| } |
| |
| std::time_t ModuleSymbolsImpl::GetModificationTime() const { |
| return binary_->GetModificationTime(); |
| } |
| |
| std::string ModuleSymbolsImpl::GetBuildDir() const { return build_dir_; } |
| |
| uint64_t ModuleSymbolsImpl::GetMappedLength() const { return binary_->GetMappedLength(); } |
| |
| std::vector<Location> ModuleSymbolsImpl::ResolveInputLocation(const SymbolContext& symbol_context, |
| const InputLocation& input_location, |
| const ResolveOptions& options) const { |
| // Thie skip_function_prologue option requires that symbolize be set. |
| FX_DCHECK(!options.skip_function_prologue || options.symbolize); |
| |
| switch (input_location.type) { |
| case InputLocation::Type::kNone: |
| return std::vector<Location>(); |
| case InputLocation::Type::kLine: |
| return ResolveLineInputLocation(symbol_context, input_location, options); |
| case InputLocation::Type::kName: |
| return ResolveSymbolInputLocation(symbol_context, input_location, options); |
| case InputLocation::Type::kAddress: |
| return ResolveAddressInputLocation(symbol_context, input_location, options); |
| } |
| } |
| |
| fxl::RefPtr<DwarfUnit> ModuleSymbolsImpl::GetDwarfUnit(const SymbolContext& symbol_context, |
| uint64_t absolute_address) const { |
| return binary_->UnitForRelativeAddress(symbol_context.AbsoluteToRelative(absolute_address)); |
| } |
| |
| LineDetails ModuleSymbolsImpl::LineDetailsForAddress(const SymbolContext& symbol_context, |
| uint64_t absolute_address, bool greedy) const { |
| uint64_t relative_address = symbol_context.AbsoluteToRelative(absolute_address); |
| auto unit = binary_->UnitForRelativeAddress(relative_address); |
| if (!unit) |
| return LineDetails(); |
| |
| // TODO(brettw) this should use our LineTable wrapper instead of LLVM's so it can be mocked. |
| const llvm::DWARFDebugLine::LineTable* line_table = unit->GetLLVMLineTable(); |
| if (!line_table && line_table->Rows.empty()) |
| return LineDetails(); |
| |
| const auto& rows = line_table->Rows; |
| uint32_t found_row_index = line_table->lookupAddress({relative_address}); |
| |
| // The row could be not found or it could be in a "nop" range indicated by an "end sequence" |
| // marker. For padding between functions, the compiler will insert a row with this marker to |
| // indicate everything until the next address isn't an instruction. With this flag, the other |
| // information on the line will be irrelevant (in practice it will be the same as for the previous |
| // entry). |
| if (found_row_index == line_table->UnknownRowIndex || rows[found_row_index].EndSequence) |
| return LineDetails(); |
| |
| // Adjust the beginning and end ranges to include all matching entries of the same line. |
| // |
| // Note that this code must not try to hide "line 0" entries (corresponding to compiler-generated |
| // code). This function is used by the stepping code which has its own handling for these ranges. |
| // Trying to put "line 0" code in with the previous or next entry (what some other code does that |
| // tries to hide this from the user) will confuse the stepping code which will always step through |
| // these instructions. |
| uint32_t first_row_index = found_row_index; |
| while (first_row_index > 0 && |
| SameFileLine(rows[found_row_index], rows[first_row_index - 1], greedy)) { |
| first_row_index--; |
| } |
| uint32_t last_row_index = found_row_index; // Inclusive. |
| while (last_row_index < rows.size() - 1 && |
| SameFileLine(rows[found_row_index], rows[last_row_index + 1], greedy)) { |
| last_row_index++; |
| } |
| |
| // Resolve the file name. Skip for "line 0" entries which are compiled-generated code not |
| // associated with a line entry, leaving the file name and compilation directory empty. Typically |
| // there will be a file if we ask, but that's leftover from the previous row in the table by the |
| // state machine and is not relevant. |
| std::string file_name; |
| std::string compilation_dir; |
| if (rows[first_row_index].Line) { |
| line_table->getFileNameByIndex(rows[first_row_index].File, "", |
| llvm::DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, |
| file_name); |
| compilation_dir = unit->GetCompilationDir(); |
| } |
| |
| if (!build_dir_.empty()) { |
| compilation_dir = build_dir_; |
| } |
| |
| if (rows[first_row_index].Line == 0) { |
| // Line 0 entries get no file name nor compilation dir to avoid a FileLine assert. |
| file_name.clear(); |
| compilation_dir.clear(); |
| } |
| |
| LineDetails result(FileLine(file_name, compilation_dir, rows[first_row_index].Line)); |
| |
| // Add entries for each row. The last row doesn't count because it should be |
| // an end_sequence marker to provide the ending size of the previous entry. |
| // So never include that. |
| for (uint32_t i = first_row_index; i <= last_row_index && i < rows.size() - 1; i++) { |
| // With loop bounds we can always dereference @ i + 1. |
| if (rows[i + 1].Address < rows[i].Address) |
| break; // Going backwards, corrupted so give up. |
| |
| LineDetails::LineEntry entry; |
| entry.column = rows[i].Column; |
| entry.range = AddressRange(symbol_context.RelativeToAbsolute(rows[i].Address.Address), |
| symbol_context.RelativeToAbsolute(rows[i + 1].Address.Address)); |
| result.entries().push_back(entry); |
| } |
| |
| return result; |
| } |
| |
| std::vector<std::string> ModuleSymbolsImpl::FindFileMatches(std::string_view name) const { |
| return index_.FindFileMatches(name); |
| } |
| |
| std::vector<fxl::RefPtr<Function>> ModuleSymbolsImpl::GetMainFunctions() const { |
| std::vector<fxl::RefPtr<Function>> result; |
| for (const auto& ref : index_.main_functions()) { |
| auto symbol_ref = IndexSymbolRefToSymbol(ref); |
| const Function* func = symbol_ref.Get()->AsFunction(); |
| if (func) |
| result.emplace_back(RefPtrTo(func)); |
| } |
| return result; |
| } |
| |
| const Index& ModuleSymbolsImpl::GetIndex() const { return index_; } |
| |
| LazySymbol ModuleSymbolsImpl::IndexSymbolRefToSymbol(const IndexNode::SymbolRef& ref) const { |
| // TODO(bug 53091) in the future we may want to add ELF symbol support here. |
| switch (ref.kind()) { |
| case IndexNode::SymbolRef::kNull: |
| break; |
| case IndexNode::SymbolRef::kDwarf: |
| case IndexNode::SymbolRef::kDwarfDeclaration: |
| // Handled by the DWARF symbol factory. |
| return symbol_factory_->MakeLazy(ref.offset()); |
| } |
| return LazySymbol(); |
| } |
| |
| bool ModuleSymbolsImpl::HasBinary() const { return binary_->HasBinary(); } |
| |
| void ModuleSymbolsImpl::AppendLocationForFunction(const SymbolContext& symbol_context, |
| const ResolveOptions& options, |
| const Function* func, |
| std::vector<Location>* result) const { |
| if (func->code_ranges().empty()) |
| return; // No code associated with this. |
| |
| // Compute the full file/line information if requested. This recomputes function DIE which is |
| // unnecessary but makes the code structure simpler and ensures the results are always the same |
| // with regard to how things like inlined functions are handled (if the location maps to both a |
| // function and an inlined function inside of it). |
| uint64_t abs_addr = symbol_context.RelativeToAbsolute(func->code_ranges()[0].begin()); |
| if (options.symbolize) |
| result->push_back(LocationForAddress(symbol_context, abs_addr, options, func)); |
| else |
| result->emplace_back(Location::State::kAddress, abs_addr); |
| } |
| |
| std::vector<Location> ModuleSymbolsImpl::ResolveLineInputLocation( |
| const SymbolContext& symbol_context, const InputLocation& input_location, |
| const ResolveOptions& options) const { |
| std::vector<Location> result; |
| for (const std::string& file : FindFileMatches(input_location.line.file())) { |
| ResolveLineInputLocationForFile(symbol_context, file, input_location.line.line(), options, |
| &result); |
| } |
| return result; |
| } |
| |
| std::vector<Location> ModuleSymbolsImpl::ResolveSymbolInputLocation( |
| const SymbolContext& symbol_context, const InputLocation& input_location, |
| const ResolveOptions& options) const { |
| FX_DCHECK(input_location.type == InputLocation::Type::kName); |
| if (!HasOnlySupportedSpecialIdentifierTypes(input_location.name)) |
| return {}; // Unsupported symbol type. |
| |
| // Special-case for ELF/PLT functions. |
| // |
| // Note that this only checks the ELF index when explicitly requested. This is because for a given |
| // function, say "pthread_key_create", it will have a .so with the implementation, and each module |
| // that references it will have a PLT thunk (a type of ELF symbol). Matching all the ELF symbols |
| // is not what the user wants when they ask for information or a breakpoint on this function (the |
| // breakpoint will end up meaning 2 breaks per call). |
| // |
| // At the same time, it would be nice to use ELF symbols when debugging non-symbolized binaries. |
| // We can't just ask if the index is empty to detech this case since "unsymbolized" binaries can |
| // sometimes have a few trivial DWARF symbols. |
| // |
| // Just falling back to ELF symbols here when the main lookup matches nothing doesn't work because |
| // the calling modules in the pthread example above will have only the PLT match. To make it work, |
| // every caller of this function that combines results from more than one module (including |
| // FindName and ProcessSymbols) needs to have some filtering or prioritizing and how this should |
| // work is non-obvious. |
| if (auto plt_name = GetSpecialInputLocation(input_location, SpecialIdentifier::kPlt)) |
| return ResolvePltName(symbol_context, *plt_name); |
| if (auto elf_name = GetSpecialInputLocation(input_location, SpecialIdentifier::kElf)) |
| return ResolveElfName(symbol_context, *elf_name); |
| |
| std::vector<Location> result; |
| |
| auto symbol_to_find = input_location.name; |
| |
| // Special-case for main functions. |
| if (ReferencesMainFunction(input_location)) { |
| auto main_functions = GetMainFunctions(); |
| if (!main_functions.empty()) { |
| for (const auto& func : GetMainFunctions()) |
| AppendLocationForFunction(symbol_context, options, func.get(), &result); |
| return result; |
| } else { |
| // Nothing explicitly marked as the main function, fall back on anything in the toplevel |
| // namespace named "main". |
| symbol_to_find = Identifier(IdentifierQualification::kGlobal, IdentifierComponent("main")); |
| |
| // Fall through to symbol finding on the new name. |
| } |
| } |
| |
| // TODO(bug 37654) it would be nice if this could be deleted and all code go through |
| // expr/find_name.h to query the index. As-is this duplicates some of FindName's logic in a less |
| // flexible way. |
| for (const auto& ref : index_.FindExact(symbol_to_find)) { |
| LazySymbol lazy_symbol = IndexSymbolRefToSymbol(ref); |
| const Symbol* symbol = lazy_symbol.Get(); |
| |
| if (const Function* function = symbol->AsFunction()) { |
| // Symbol is a function. |
| AppendLocationForFunction(symbol_context, options, function, &result); |
| } else if (const Variable* variable = symbol->AsVariable()) { |
| // Symbol is a variable. This will be the case for global variables and file- and class-level |
| // statics. This always symbolizes since we already computed the symbol. |
| result.push_back(LocationForVariable(symbol_context, RefPtrTo(variable))); |
| } else { |
| // Unknown type of symbol. |
| continue; |
| } |
| } |
| |
| return result; |
| } |
| |
| std::vector<Location> ModuleSymbolsImpl::ResolveAddressInputLocation( |
| const SymbolContext& symbol_context, const InputLocation& input_location, |
| const ResolveOptions& options) const { |
| std::vector<Location> result; |
| if (options.symbolize) { |
| result.push_back(LocationForAddress(symbol_context, input_location.address, options, nullptr)); |
| } else { |
| result.emplace_back(Location::State::kAddress, input_location.address); |
| } |
| return result; |
| } |
| |
| Location ModuleSymbolsImpl::LocationForAddress(const SymbolContext& symbol_context, |
| uint64_t absolute_address, |
| const ResolveOptions& options, |
| const Function* optional_func) const { |
| if (auto dwarf_loc = |
| DwarfLocationForAddress(symbol_context, absolute_address, options, optional_func)) |
| return std::move(*dwarf_loc); |
| if (auto elf_locs = ElfLocationForAddress(symbol_context, absolute_address, options)) |
| return std::move(*elf_locs); |
| |
| // Not symbolizable, return an "address" with no symbol information. Mark it symbolized to record |
| // that we tried and failed. |
| return Location(Location::State::kSymbolized, absolute_address); |
| } |
| |
| // This function is similar to llvm::DWARFContext::getLineInfoForAddress. |
| std::optional<Location> ModuleSymbolsImpl::DwarfLocationForAddress( |
| const SymbolContext& symbol_context, uint64_t absolute_address, const ResolveOptions& options, |
| const Function* optional_func) const { |
| // TODO(bug 5544) handle addresses that aren't code like global variables. |
| uint64_t relative_address = symbol_context.AbsoluteToRelative(absolute_address); |
| fxl::RefPtr<DwarfUnit> unit = binary_->UnitForRelativeAddress(relative_address); |
| if (!unit) // No DWARF symbol. |
| return std::nullopt; |
| |
| FileLine file_line; |
| int column = 0; |
| |
| // Get the innermost subroutine or inlined function for the address. This may be empty, but still |
| // lookup the line info below in case its present. This computes both a LazySymbol which we |
| // pass to the result, and a possibly-null containing Function* (not an inlined subroutine) to do |
| // later computations on. |
| fxl::RefPtr<Function> function; // For prologue computations. |
| LazySymbol lazy_function; |
| if (optional_func) { |
| // The function was passed in and we want to return that exact one. This will happen if the |
| // caller has asked for the location of a named function. |
| function = RefPtrTo(optional_func); |
| lazy_function = LazySymbol(optional_func); |
| } else { |
| // Resolve the function for this address. |
| if (llvm::DWARFDie subroutine = unit->FunctionForRelativeAddress(relative_address)) { |
| // getSubroutineForAddress() will return the most specific inlined function for the address. |
| lazy_function = symbol_factory_->MakeLazy(subroutine); |
| function = RefPtrTo(lazy_function.Get()->AsFunction()); |
| |
| // The is_inline() check is strictly unnecessary since ambiguous inline computations will |
| // work either way. This check allows us to skip the ambiguous inline computations in the |
| // common case that we're not in an inline. |
| if (function && function->is_inline() && |
| options.ambiguous_inline == ResolveOptions::AmbiguousInline::kOuter) { |
| // Adjust the function to be the outermost frame (should be the non-inlined function) |
| // for ambiguous locations (at the beginning of one or more inlined functions). |
| std::vector<fxl::RefPtr<Function>> inline_chain = |
| function->GetAmbiguousInlineChain(symbol_context, absolute_address); |
| if (inline_chain.size() > 1) { |
| lazy_function = inline_chain.back(); |
| |
| // Since we picked a non-topmost inline subroutine, we know the file/line because |
| // it's the call location of the inline subroutine we skipped. DWARF doesn't encode |
| // column information for this type of call. |
| const auto& calling_func = inline_chain[inline_chain.size() - 2]; |
| file_line = calling_func->call_line(); |
| } |
| } |
| } |
| } |
| |
| // Get the file/line location (may fail). Don't overwrite one computed above if already set above |
| // using the ambigous inline call site. |
| if (!file_line.is_valid()) { |
| const LineTable& line_table = unit->GetLineTable(); |
| |
| // Use the line table to move the address to after the function prologue. Assume if the function |
| // is inline there's no prologue. Inlines themselves will have no prologues, and we assume |
| // inlines won't appear in the prologue of other functions. |
| if (function && !function->is_inline() && options.skip_function_prologue) { |
| if (size_t prologue_size = GetFunctionPrologueSize(line_table, function.get())) { |
| // The function has a prologue. When it does, we know it has code ranges so don't need to |
| // validate it's nonempty before using. |
| uint64_t function_begin = function->code_ranges().front().begin(); |
| if (relative_address >= function_begin && |
| relative_address < function_begin + prologue_size) { |
| // Adjust address to the first real instruction. |
| relative_address = function_begin + prologue_size; |
| absolute_address = symbol_context.RelativeToAbsolute(relative_address); |
| } |
| } |
| } |
| |
| // Look up the line info for this address. |
| // |
| // This re-computes some of what GetFunctionPrologueSize() may have done above. This could be |
| // enhanced in the future by having LineTable::GetRowForAddress that include the prologue |
| // adjustment as part of one computation. |
| LineTable::FoundRow found_row = line_table.GetRowForAddress(symbol_context, absolute_address); |
| if (!found_row.empty()) { |
| // Line info present. Only set the file name if there's a nonzero line number. "Line 0" |
| // entries which are compiled-generated code not associated with a line entry. Typically there |
| // will be a file if we ask, but that's leftover from the previous row in the table by the |
| // state machine and is not relevant. |
| const LineTable::Row& row = found_row.get(); |
| std::optional<std::string> file_name; |
| if (row.Line) |
| file_name = line_table.GetFileNameForRow(row); // Could still return nullopt. |
| if (file_name) { |
| // It's important this only gets called when row.Line > 0. FileLine will assert for line 0 |
| // if a file name or build directory is given to ensure that all "no code" locations compare |
| // identically. This is guaranteed here because file_name will only be set when the line is |
| // nonzero. |
| if (build_dir_.empty()) |
| file_line = FileLine(std::move(*file_name), unit->GetCompilationDir(), row.Line); |
| else |
| file_line = FileLine(std::move(*file_name), build_dir_, row.Line); |
| } |
| column = row.Column; |
| } |
| } |
| |
| return Location(absolute_address, file_line, column, symbol_context, std::move(lazy_function)); |
| } |
| |
| std::optional<Location> ModuleSymbolsImpl::ElfLocationForAddress( |
| const SymbolContext& symbol_context, uint64_t absolute_address, |
| const ResolveOptions& options) const { |
| if (elf_addresses_.empty()) |
| return std::nullopt; |
| |
| uint64_t relative_addr = symbol_context.AbsoluteToRelative(absolute_address); |
| auto found = debug_ipc::LargestLessOrEqual( |
| elf_addresses_.begin(), elf_addresses_.end(), relative_addr, |
| [](const ElfSymbolRecord* r, uint64_t addr) { return r->relative_address < addr; }, |
| [](const ElfSymbolRecord* r, uint64_t addr) { return r->relative_address == addr; }); |
| if (found == elf_addresses_.end()) |
| return std::nullopt; |
| |
| // There could theoretically be multiple matches for this address, but we return only the first. |
| const ElfSymbolRecord* record = *found; |
| if (relative_addr - record->relative_address > kMaxElfOffsetForMatch) |
| return std::nullopt; // Too far away. |
| return Location( |
| absolute_address, FileLine(), 0, symbol_context, |
| fxl::MakeRefCounted<ElfSymbol>(const_cast<ModuleSymbolsImpl*>(this)->GetWeakPtr(), *record)); |
| } |
| |
| Location ModuleSymbolsImpl::LocationForVariable(const SymbolContext& symbol_context, |
| fxl::RefPtr<Variable> variable) const { |
| // Evaluate the DWARF expression for the variable. Global and static variables' locations aren't |
| // based on CPU state. In some cases like TLS the location may require CPU state or may result in |
| // a constant instead of an address. In these cases give up and return an "unlocated variable." |
| // These can easily be evaluated by the expression system so we can still print their values. |
| |
| // Need one unique location. |
| if (variable->location().locations().size() != 1) |
| return Location(symbol_context, std::move(variable)); |
| |
| auto global_data_provider = fxl::MakeRefCounted<GlobalSymbolDataProvider>(); |
| DwarfExprEval eval; |
| eval.Eval(global_data_provider, symbol_context, variable->location().locations()[0].expression, |
| [](DwarfExprEval* eval, const Err& err) {}); |
| |
| // Only evaluate synchronous outputs that result in a pointer. |
| if (!eval.is_complete() || !eval.is_success() || |
| eval.GetResultType() != DwarfExprEval::ResultType::kPointer) |
| return Location(symbol_context, std::move(variable)); |
| |
| // TODO(brettw) in all of the return cases we could in the future fill in the file/line of the |
| // definition of the variable. Currently Variables don't provide that (even though it's usually in |
| // the DWARF symbols). |
| return Location(eval.GetResult(), FileLine(), 0, symbol_context, std::move(variable)); |
| } |
| |
| std::vector<Location> ModuleSymbolsImpl::ResolvePltName(const SymbolContext& symbol_context, |
| const std::string& mangled_name) const { |
| // There can theoretically be multiple symbols with the given name, some might be PLT symbols, |
| // some might not be. Check all name matches for a PLT one. |
| auto cur = mangled_elf_symbols_.lower_bound(mangled_name); |
| while (cur != mangled_elf_symbols_.end() && cur->first == mangled_name) { |
| if (cur->second.type == ElfSymbolType::kPlt) |
| return {MakeElfSymbolLocation(symbol_context, std::nullopt, cur->second)}; |
| ++cur; |
| } |
| |
| // No PLT locations found for this name. |
| return {}; |
| } |
| |
| std::vector<Location> ModuleSymbolsImpl::ResolveElfName(const SymbolContext& symbol_context, |
| const std::string& mangled_name) const { |
| std::vector<Location> result; |
| |
| // There can theoretically be multiple symbols with the given name. |
| auto cur = mangled_elf_symbols_.lower_bound(mangled_name); |
| while (cur != mangled_elf_symbols_.end() && cur->first == mangled_name) { |
| result.push_back(MakeElfSymbolLocation(symbol_context, std::nullopt, cur->second)); |
| ++cur; |
| } |
| |
| return result; |
| } |
| |
| // To a first approximation we just look up the line in the line table for each compilation unit |
| // that references the file. Complications: |
| // |
| // 1. The line might not be an exact match (the user can specify a blank line or something optimized |
| // out). In this case, find the next valid line. |
| // |
| // 2. The above step can find many different locations. Maybe some code from the file in question is |
| // inlined into the compilation unit, but not the function with the line in it. Or different |
| // template instantiations can mean that a line of code is in some instantiations but don't apply |
| // to others. |
| // |
| // To solve this duplication problem, get the resolved line of each of the addresses found above |
| // and find the best one. Keep only those locations matching the best one (there can still be |
| // multiple). |
| // |
| // 3. Inlining and templates can mean there can be multiple matches of the exact same line. Only |
| // keep the first match per function or inlined function to catch the case where a line is spread |
| // across multiple line table entries. |
| void ModuleSymbolsImpl::ResolveLineInputLocationForFile(const SymbolContext& symbol_context, |
| const std::string& canonical_file, |
| int line_number, |
| const ResolveOptions& options, |
| std::vector<Location>* output) const { |
| const std::vector<unsigned>* units = index_.FindFileUnitIndices(canonical_file); |
| if (!units) |
| return; |
| |
| std::vector<LineMatch> matches; |
| for (unsigned index : *units) { |
| fxl::RefPtr<DwarfUnit> unit = binary_->GetUnitAtIndex(index); |
| const LineTable& line_table = unit->GetLineTable(); |
| |
| // Complication 1 above: find all matches for this line in the unit. |
| std::vector<LineMatch> unit_matches = |
| GetAllLineTableMatchesInUnit(line_table, canonical_file, line_number); |
| |
| matches.insert(matches.end(), unit_matches.begin(), unit_matches.end()); |
| } |
| |
| if (matches.empty()) |
| return; |
| |
| // Complications 2 & 3 above: Get all instances of the best match only with a max of one per |
| // function. The best match is the one with the lowest line number (found matches should all be |
| // bigger than the input line, so this will be the closest). |
| for (const LineMatch& match : GetBestLineMatches(matches)) { |
| uint64_t abs_addr = symbol_context.RelativeToAbsolute(match.address); |
| if (options.symbolize) |
| output->push_back(LocationForAddress(symbol_context, abs_addr, options, nullptr)); |
| else |
| output->push_back(Location(Location::State::kAddress, abs_addr)); |
| } |
| } |
| |
| Location ModuleSymbolsImpl::MakeElfSymbolLocation(const SymbolContext& symbol_context, |
| std::optional<uint64_t> relative_address, |
| const ElfSymbolRecord& record) const { |
| uint64_t absolute_address; |
| if (relative_address) { |
| // Caller specified a more specific address (normally inside the ELF symbol). |
| absolute_address = symbol_context.RelativeToAbsolute(*relative_address); |
| } else { |
| // Take address from the ELF symbol. |
| absolute_address = symbol_context.RelativeToAbsolute(record.relative_address); |
| } |
| |
| return Location( |
| absolute_address, FileLine(), 0, symbol_context, |
| fxl::MakeRefCounted<ElfSymbol>(const_cast<ModuleSymbolsImpl*>(this)->GetWeakPtr(), record)); |
| } |
| |
| void ModuleSymbolsImpl::FillElfSymbols() { |
| FX_DCHECK(mangled_elf_symbols_.empty()); |
| FX_DCHECK(elf_addresses_.empty()); |
| |
| const std::map<std::string, llvm::ELF::Elf64_Sym>& elf_syms = binary_->GetELFSymbols(); |
| const std::map<std::string, uint64_t>& plt_syms = binary_->GetPLTSymbols(); |
| |
| // Insert the regular symbols. |
| // |
| // The |st_value| is the relative virtual address we want to index. Potentially we might want to |
| // save more flags and expose them in the ElfSymbol class. |
| for (const auto& [name, sym] : elf_syms) { |
| // The symbol type is the low 4 bits. The higher bits encode the visibility which we don't |
| // care about. We only need to index objects and code, and a couple of special symbols left |
| // specifically for Zxdb's usage. |
| int symbol_type = sym.st_info & 0xf; |
| if (symbol_type != elflib::STT_OBJECT && symbol_type != elflib::STT_FUNC && |
| symbol_type != elflib::STT_TLS && name.substr(0, 5) != "zxdb.") |
| continue; |
| |
| if (sym.st_value == 0) |
| continue; // No address for this symbol. Probably imported. |
| |
| auto inserted = mangled_elf_symbols_.emplace( |
| std::piecewise_construct, std::forward_as_tuple(name), |
| std::forward_as_tuple(ElfSymbolType::kNormal, sym.st_value, sym.st_size, name)); |
| |
| // Append all addresses for now, this will be sorted at the bottom. |
| elf_addresses_.push_back(&inserted->second); |
| } |
| |
| // Insert PLT symbols. |
| for (const auto& [name, addr] : plt_syms) { |
| // TODO(sadmac): Set the symbol size to the size of a PLT entry on this architecture. |
| auto inserted = |
| mangled_elf_symbols_.emplace(std::piecewise_construct, std::forward_as_tuple(name), |
| std::forward_as_tuple(ElfSymbolType::kPlt, addr, 0, name)); |
| |
| // Append all addresses for now, this will be sorted at the bottom. |
| elf_addresses_.push_back(&inserted->second); |
| } |
| |
| std::sort(elf_addresses_.begin(), elf_addresses_.end(), |
| [](const ElfSymbolRecord* left, const ElfSymbolRecord* right) { |
| return left->relative_address < right->relative_address; |
| }); |
| } |
| |
| } // namespace zxdb |