blob: a41a67c35585c8e7602e6bb915c5d46e952520d3 [file] [log] [blame] [edit]
// Copyright 2016 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This file contains APIs for use within Bloaty. None of these APIs have any
// guarantees whatsoever about their stability! The public API for bloaty is
// its command-line interface.
#ifndef BLOATY_H_
#define BLOATY_H_
#include <stdlib.h>
#define __STDC_LIMIT_MACROS
#define __STDC_FORMAT_MACROS
#include <stdint.h>
#include <inttypes.h>
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
#include "absl/strings/string_view.h"
#include "absl/strings/strip.h"
#include "capstone/capstone.h"
#include "dwarf/debug_info.h"
#include "bloaty.pb.h"
#include "range_map.h"
#include "re.h"
#include "util.h"
namespace bloaty {
extern int verbose_level;
class NameMunger;
class Options;
struct DualMap;
struct DisassemblyInfo;
enum class DataSource {
kArchiveMembers,
kAccessPattern,
kCompileUnits,
kInlines,
kInputFiles,
kRawRanges,
kSections,
kSegments,
// We always set this to one of the concrete symbol types below before
// setting it on a sink.
kSymbols,
kRawSymbols,
kFullSymbols,
kShortSymbols
};
class InputFile {
public:
InputFile(absl::string_view filename) : filename_(filename) {}
InputFile(const InputFile&) = delete;
InputFile& operator=(const InputFile&) = delete;
virtual bool TryOpen(absl::string_view filename,
std::unique_ptr<InputFile>& file) = 0;
virtual ~InputFile() {}
const std::string& filename() const { return filename_; }
absl::string_view data() const { return data_; }
private:
const std::string filename_;
protected:
absl::string_view data_;
};
class InputFileFactory {
public:
virtual ~InputFileFactory() {}
// Throws if the file could not be opened.
virtual std::unique_ptr<InputFile> OpenFile(
const std::string& filename) const = 0;
};
class MmapInputFileFactory : public InputFileFactory {
public:
std::unique_ptr<InputFile> OpenFile(
const std::string& filename) const override;
};
// NOTE: all sizes are uint64, even on 32-bit platforms:
// - 32-bit platforms can have files >4GB in some cases.
// - for object files (not executables/shared libs) we pack both a section
// index and an address into the "vmaddr" value, and we need enough bits to
// safely do this.
// A RangeSink allows data sources to assign labels to ranges of VM address
// space and/or file offsets.
class RangeSink {
public:
RangeSink(const InputFile *file, const Options &options,
DataSource data_source, const DualMap *translator,
google::protobuf::Arena *arena);
RangeSink(const RangeSink &) = delete;
RangeSink &operator=(const RangeSink &) = delete;
~RangeSink();
const Options &options() const { return options_; }
void AddOutput(DualMap *map, const NameMunger *munger);
DataSource data_source() const { return data_source_; }
const InputFile &input_file() const { return *file_; }
bool IsBaseMap() const { return translator_ == nullptr; }
// If vmsize or filesize is zero, this mapping is presumed not to exist in
// that domain. For example, .bss mappings don't exist in the file, and
// .debug_* mappings don't exist in memory.
void AddRange(const char *analyzer, absl::string_view name, uint64_t vmaddr,
uint64_t vmsize, uint64_t fileoff, uint64_t filesize);
void AddRange(const char *analyzer, absl::string_view name, uint64_t vmaddr,
uint64_t vmsize, absl::string_view file_range) {
AddRange(analyzer, name, vmaddr, vmsize,
file_range.data() - file_->data().data(), file_range.size());
}
void AddFileRange(const char* analyzer, absl::string_view name,
uint64_t fileoff, uint64_t filesize);
// Like AddFileRange(), but the label is whatever label was previously
// assigned to VM address |label_from_vmaddr|. If no existing label is
// assigned to |label_from_vmaddr|, this function does nothing.
void AddFileRangeForVMAddr(const char* analyzer, uint64_t label_from_vmaddr,
absl::string_view file_range);
void AddVMRangeForVMAddr(const char* analyzer, uint64_t label_from_vmaddr,
uint64_t addr, uint64_t size);
// Applies this label from |from_file_range| to |file_range|, but only if the
// entire |from_file_range| has a single label. If not, this does nothing.
void AddFileRangeForFileRange(const char* analyzer,
absl::string_view from_file_range,
absl::string_view file_range);
void AddFileRange(const char* analyzer, absl::string_view name,
absl::string_view file_range) {
// When separate debug files are being used, the DWARF analyzer will try to
// add sections of the debug file. We want to prevent this because we only
// want to profile the main file (not the debug file), so we filter these
// out. This approach is simple to implement, but does result in some
// useless work being done. We may want to avoid doing this useless work in
// the first place.
if (FileContainsPointer(file_range.data())) {
AddFileRange(analyzer, name, file_range.data() - file_->data().data(),
file_range.size());
}
}
// The VM-only functions below may not be used to populate the base map!
// Adds a region to the memory map. It should not overlap any previous
// region added with Add(), but it should overlap the base memory map.
void AddVMRange(const char* analyzer, uint64_t vmaddr, uint64_t vmsize,
const std::string& name);
// Like Add(), but allows that this addr/size might have previously been added
// already under a different name. If so, this name becomes an alias of the
// previous name.
//
// This is for things like symbol tables that sometimes map multiple names to
// the same physical function.
void AddVMRangeAllowAlias(const char* analyzer, uint64_t vmaddr,
uint64_t size, const std::string& name);
// Like Add(), but allows that this addr/size might have previously been added
// already under a different name. If so, this add is simply ignored.
//
// This is for cases like sourcefiles. Sometimes a single function appears to
// come from multiple source files. But if it does, we don't want to alias
// the entire source file to another, because it's probably only part of the
// source file that overlaps.
void AddVMRangeIgnoreDuplicate(const char* analyzer, uint64_t vmaddr,
uint64_t size, const std::string& name);
const DualMap& MapAtIndex(size_t index) const {
return *outputs_[index].first;
}
// Translates the given pointer (which must be within the range of
// input_file().data()) to a VM address.
uint64_t TranslateFileToVM(const char* ptr);
absl::string_view TranslateVMToFile(uint64_t address);
const DualMap* Translator() { return translator_; }
// Decompresses zlib-formatted data and returns the decompressed data.
// Since the decompressed data is not actually part of the file, any
// Add*Range() calls to this region will be no-ops.
absl::string_view ZlibDecompress(absl::string_view contents,
uint64_t uncompressed_size);
static constexpr uint64_t kUnknownSize = RangeMap::kUnknownSize;
private:
bool FileContainsPointer(const void* ptr) const {
absl::string_view file_data = file_->data();
return ptr >= file_data.data() && ptr < file_data.data() + file_data.size();
}
bool ContainsVerboseVMAddr(uint64_t vmaddr, uint64_t vmsize);
bool ContainsVerboseFileOffset(uint64_t fileoff, uint64_t filesize);
bool IsVerboseForVMRange(uint64_t vmaddr, uint64_t vmsize);
bool IsVerboseForFileRange(uint64_t fileoff, uint64_t filesize);
const InputFile* file_;
const Options options_;
DataSource data_source_;
const DualMap* translator_;
std::vector<std::pair<DualMap*, const NameMunger*>> outputs_;
google::protobuf::Arena *arena_;
};
// NameMunger //////////////////////////////////////////////////////////////////
// Use to transform input names according to the user's configuration.
// For example, the user can use regexes.
class NameMunger {
public:
NameMunger() {}
NameMunger(const NameMunger&) = delete;
NameMunger& operator=(const NameMunger&) = delete;
// Adds a regex that will be applied to all names. All regexes will be
// applied in sequence.
void AddRegex(const std::string& regex, const std::string& replacement);
std::string Munge(absl::string_view name) const;
bool IsEmpty() const { return regexes_.empty(); }
private:
std::vector<std::pair<std::unique_ptr<ReImpl>, std::string>> regexes_;
};
typedef std::map<absl::string_view, std::pair<uint64_t, uint64_t>> SymbolTable;
// Represents an object/executable file in a format like ELF, Mach-O, PE, etc.
// To support a new file type, implement this interface.
class ObjectFile {
public:
ObjectFile(std::unique_ptr<InputFile> file_data)
: file_data_(std::move(file_data)), debug_file_(this) {}
virtual ~ObjectFile() {}
// Searches for a binary string representing the identity of this object file.
// It is typically a hash of the unstripped object during the build.
// If not found, returns an empty string.
virtual std::string GetBuildId() const = 0;
// Process this file, pushing data to |sinks| as appropriate for each data
// source. If any debug files match the build id for this file, it will be
// given here, otherwise it is |this|.
virtual void ProcessFile(const std::vector<RangeSink*>& sinks) const = 0;
virtual bool GetDisassemblyInfo(absl::string_view symbol,
DataSource symbol_source,
DisassemblyInfo* info) const = 0;
virtual std::optional<std::unordered_map<std::string, std::string>> TakeSymbolToCrateMap() {
return std::nullopt;
}
const InputFile& file_data() const { return *file_data_; }
// Sets the debug file for |this|. |file| must outlive this instance.
void set_debug_file(const ObjectFile* file) {
assert(debug_file_->GetBuildId() == GetBuildId());
debug_file_ = file;
}
const ObjectFile& debug_file() const { return *debug_file_; }
private:
std::unique_ptr<InputFile> file_data_;
const ObjectFile* debug_file_;
};
std::unique_ptr<ObjectFile> TryOpenELFFile(std::unique_ptr<InputFile>& file, std::optional<std::string> link_map_file);
std::unique_ptr<ObjectFile> TryOpenMachOFile(std::unique_ptr<InputFile>& file);
std::unique_ptr<ObjectFile> TryOpenWebAssemblyFile(std::unique_ptr<InputFile>& file);
std::unique_ptr<ObjectFile> TryOpenPEFile(std::unique_ptr<InputFile>& file);
// Provided by dwarf.cc. To use these, a module should fill in a dwarf::File
// and then call these functions.
void ReadDWARFCompileUnits(const dwarf::File& file, const DualMap& map,
const dwarf::CU* skeleton, RangeSink* sink);
inline void ReadDWARFCompileUnits(const dwarf::File& file, const DualMap& map,
RangeSink* sink) {
return ReadDWARFCompileUnits(file, map, nullptr, sink);
}
void ReadDWARFInlines(const dwarf::File& file, RangeSink* sink,
bool include_line);
void ReadEhFrame(absl::string_view contents, RangeSink* sink);
void ReadEhFrameHdr(absl::string_view contents, RangeSink* sink);
// Demangle C++ symbols according to the Itanium ABI. The |source| argument
// controls what demangling mode we are using.
std::string ItaniumDemangle(absl::string_view symbol, DataSource source);
// Encode |symbol| and |crate| together into a single string.
std::string EncodeSymbolWithCrateId(absl::string_view symbol, absl::string_view crate);
struct DecodeCrateIdResult {
std::string symbol;
std::string crate;
};
inline bool operator==(const DecodeCrateIdResult& lhs, const DecodeCrateIdResult& rhs) {
return lhs.symbol == rhs.symbol && lhs.crate == rhs.crate;
}
// Parse out |symbol| and |crate| from a string encoded with |EncodeSymbolWithCrateId|.
// If the string was not encoded, |crate| will be the empty string.
DecodeCrateIdResult DecodeSymbolWithCrateId(absl::string_view symbol);
// DualMap /////////////////////////////////////////////////////////////////////
// Contains a RangeMap for VM space and file space for a given file.
struct DualMap {
RangeMap vm_map;
RangeMap file_map;
};
struct DisassemblyInfo {
absl::string_view text;
DualMap symbol_map;
cs_arch arch;
cs_mode mode;
uint64_t start_address;
};
std::string DisassembleFunction(const DisassemblyInfo& info);
void DisassembleFindReferences(const DisassemblyInfo& info, RangeSink* sink);
// Top-level API ///////////////////////////////////////////////////////////////
// This should only be used by main.cc and unit tests.
class Rollup;
struct DomainSizes {
int64_t vm;
int64_t file;
};
struct RollupRow {
RollupRow(const std::string& name_) : name(name_) {}
std::string name;
DomainSizes size = {0, 0};
DomainSizes filtered_size = {0, 0};
int64_t other_count = 0;
int64_t sortkey;
double vmpercent;
double filepercent;
// The size of the base in a diff mode. Otherwise stay 0.
DomainSizes old_size = {0, 0};
std::vector<RollupRow> sorted_children;
static bool Compare(const RollupRow& a, const RollupRow& b) {
// Sort value high-to-low.
if (a.sortkey != b.sortkey) {
return a.sortkey > b.sortkey;
}
// Sort name low to high.
return a.name < b.name;
}
};
enum class OutputFormat {
kPrettyPrint,
kCSV,
kTSV,
kProtobuf,
};
enum class ShowDomain {
kShowFile,
kShowVM,
kShowBoth,
};
struct OutputOptions {
OutputFormat output_format = OutputFormat::kPrettyPrint;
size_t max_label_len = 80;
ShowDomain show = ShowDomain::kShowBoth;
bool showAllSizesCSV = false;
};
struct RollupOutput {
public:
RollupOutput() : toplevel_row_("TOTAL") {}
RollupOutput(const RollupOutput&) = delete;
RollupOutput& operator=(const RollupOutput&) = delete;
void AddDataSourceName(absl::string_view name) {
source_names_.emplace_back(std::string(name));
}
const std::vector<std::string>& source_names() const { return source_names_; }
void Print(const OutputOptions& options, std::ostream* out);
void SetDisassembly(absl::string_view disassembly) {
disassembly_ = std::string(disassembly);
}
absl::string_view GetDisassembly() { return disassembly_; }
// For debugging.
const RollupRow& toplevel_row() const { return toplevel_row_; }
bool diff_mode() const { return diff_mode_; }
private:
friend class Rollup;
std::vector<std::string> source_names_;
RollupRow toplevel_row_;
std::string disassembly_;
// When we are in diff mode, rollup sizes are relative to the baseline.
bool diff_mode_ = false;
static bool IsSame(const std::string& a, const std::string& b);
void PrettyPrint(const OutputOptions& options, std::ostream* out) const;
void PrintToCSV(std::ostream* out, bool tabs, bool csvDiff) const;
void PrintToProtobuf(std::ostream* out) const;
void PrettyPrintRow(const RollupRow& row, size_t indent,
const OutputOptions& options, std::ostream* out) const;
void PrettyPrintTree(const RollupRow& row, size_t indent,
const OutputOptions& options, std::ostream* out) const;
void PrintRowToCSV(const RollupRow& row,
std::vector<std::string> parent_labels,
std::ostream* out, bool tabs, bool csvDiff) const;
void PrintTreeToCSV(const RollupRow& row,
std::vector<std::string> parent_labels,
std::ostream* out, bool tabs, bool csvDiff) const;
};
// Shim for `std::filesystem::path(filename).stem()`.
std::string GetPathStem(const std::string& filename);
bool ParseOptions(bool skip_unknown, int* argc, char** argv[], Options* options,
OutputOptions* output_options, std::string* error);
bool BloatyMain(const Options& options, const InputFileFactory& file_factory,
RollupOutput* output, std::string* error);
} // namespace bloaty
#endif