Merge pull request #481 from rjmansfield/macho-archs-support
Add -d archs data source for Mach-O universal binaries
diff --git a/src/bloaty.cc b/src/bloaty.cc
index e772988..5232959 100644
--- a/src/bloaty.cc
+++ b/src/bloaty.cc
@@ -85,6 +85,7 @@
constexpr DataSourceDefinition data_sources[] = {
{DataSource::kArchiveMembers, "armembers", "the .o files in a .a file"},
+ {DataSource::kArchs, "archs", "architecture slices in universal binaries"},
{DataSource::kCompileUnits, "compileunits",
"source file for the .o file (translation unit). requires debug info."},
{DataSource::kInputFiles, "inputfiles",
diff --git a/src/bloaty.h b/src/bloaty.h
index 85515fc..c9e2d1f 100644
--- a/src/bloaty.h
+++ b/src/bloaty.h
@@ -58,6 +58,7 @@
kRawRanges,
kSections,
kSegments,
+ kArchs,
// We always set this to one of the concrete symbol types below before
// setting it on a sink.
diff --git a/src/elf.cc b/src/elf.cc
index ac0f610..de2df46 100644
--- a/src/elf.cc
+++ b/src/elf.cc
@@ -1401,6 +1401,8 @@
DoReadELFSections(sink, kReportByEscapedSectionName);
break;
}
+ case DataSource::kArchs:
+ THROW("ELF files do not support 'archs' data source");
default:
THROW("unknown data source");
}
diff --git a/src/macho.cc b/src/macho.cc
index 3f849ac..5b8ca82 100644
--- a/src/macho.cc
+++ b/src/macho.cc
@@ -21,7 +21,9 @@
#include <string_view>
#include "absl/strings/str_join.h"
+#include "absl/strings/str_format.h"
#include "absl/strings/substitute.h"
+#include "third_party/darwin_xnu_macho/mach/machine.h"
#include "third_party/darwin_xnu_macho/mach-o/loader.h"
#include "third_party/darwin_xnu_macho/mach-o/fat.h"
#include "third_party/darwin_xnu_macho/mach-o/nlist.h"
@@ -69,6 +71,57 @@
}
}
+// ARM64E capability field constants
+static constexpr uint32_t ARM64E_SUBTYPE_MASK = 0x00FFFFFF; // Low 24 bits: subtype proper
+
+static bool IsArm64eSubtype(uint32_t cpusubtype) {
+ uint32_t subtype_proper = cpusubtype & ARM64E_SUBTYPE_MASK;
+ return subtype_proper == CPU_SUBTYPE_ARM64E;
+}
+
+std::string CpuTypeToString(uint32_t cputype, uint32_t cpusubtype) {
+ switch (cputype) {
+ case CPU_TYPE_X86_64:
+ switch (cpusubtype) {
+ case CPU_SUBTYPE_X86_64_H:
+ return "x86_64h";
+ default:
+ return "x86_64";
+ }
+ case CPU_TYPE_ARM64:
+ if (IsArm64eSubtype(cpusubtype)) {
+ return "arm64e";
+ }
+ switch (cpusubtype) {
+ case CPU_SUBTYPE_ARM64_V8:
+ return "arm64v8";
+ default:
+ return "arm64";
+ }
+ case CPU_TYPE_X86:
+ return "i386";
+ case CPU_TYPE_ARM:
+ switch (cpusubtype) {
+ case CPU_SUBTYPE_ARM_V6:
+ return "armv6";
+ case CPU_SUBTYPE_ARM_V7:
+ return "armv7";
+ case CPU_SUBTYPE_ARM_V7F:
+ return "armv7f";
+ case CPU_SUBTYPE_ARM_V7S:
+ return "armv7s";
+ case CPU_SUBTYPE_ARM_V7K:
+ return "armv7k";
+ case CPU_SUBTYPE_ARM_V8:
+ return "armv8";
+ default:
+ return "arm";
+ }
+ default:
+ return absl::StrFormat("cpu_%d", cputype);
+ }
+}
+
struct LoadCommand {
bool is64bit;
uint32_t cmd;
@@ -652,6 +705,10 @@
ReadDWARFInlines(dwarf, sink, true);
break;
}
+ case DataSource::kArchs: {
+ ProcessArchitectures(sink);
+ break;
+ }
case DataSource::kArchiveMembers:
default:
THROW("Mach-O doesn't support this data source");
@@ -660,6 +717,34 @@
}
}
+ void ProcessArchitectures(RangeSink* sink) const {
+ uint32_t magic = ReadMagic(file_data().data());
+
+ if (magic == FAT_CIGAM) {
+ string_view header_data = file_data().data();
+ auto header = GetStructPointerAndAdvance<fat_header>(&header_data);
+ uint32_t nfat_arch = ByteSwap(header->nfat_arch);
+
+ for (uint32_t i = 0; i < nfat_arch; i++) {
+ auto arch = GetStructPointerAndAdvance<fat_arch>(&header_data);
+ uint32_t cputype = ByteSwap(arch->cputype);
+ uint32_t cpusubtype = ByteSwap(arch->cpusubtype);
+ uint32_t offset = ByteSwap(arch->offset);
+ uint32_t size = ByteSwap(arch->size);
+
+ std::string arch_name = CpuTypeToString(cputype, cpusubtype);
+ string_view slice_data = StrictSubstr(file_data().data(), offset, size);
+
+ sink->AddFileRange("archs", arch_name, slice_data);
+ }
+ } else {
+ auto header = GetStructPointer<mach_header>(file_data().data());
+ std::string arch_name = CpuTypeToString(header->cputype, header->cpusubtype);
+
+ sink->AddFileRange("archs", arch_name, file_data().data());
+ }
+ }
+
bool GetDisassemblyInfo(std::string_view /*symbol*/,
DataSource /*symbol_source*/,
DisassemblyInfo* /*info*/) const override {
diff --git a/tests/macho/archs.test b/tests/macho/archs.test
new file mode 100644
index 0000000..a8374b4
--- /dev/null
+++ b/tests/macho/archs.test
@@ -0,0 +1,263 @@
+# Test -d archs data source for mach-o universal binaries
+#
+# Tests that the 'archs' data source correctly reports architecture slices
+# in universal binaries and single-architecture binaries.
+
+## Test 1: Universal binary with two architectures (x86_64 and arm64)
+# RUN: %yaml2obj --docnum=1 %s -o %t.universal
+# RUN: %bloaty %t.universal -d archs --domain=file | %FileCheck --check-prefix=UNIVERSAL %s
+
+# UNIVERSAL: FILE SIZE
+# UNIVERSAL-DAG: x86_64
+# UNIVERSAL-DAG: arm64
+# UNIVERSAL-DAG: [Unmapped]
+
+## Test 2: Filter to x86_64 architecture only
+# RUN: %bloaty %t.universal -d archs,segments --source-filter=x86_64 --domain=file | %FileCheck --check-prefix=FILTER-X86 %s
+
+# FILTER-X86: FILE SIZE
+# FILTER-X86: x86_64
+# FILTER-X86: __TEXT
+# FILTER-X86: __LINKEDIT
+# FILTER-X86-NOT: arm64
+
+## Test 3: Filter to arm64 architecture only
+# RUN: %bloaty %t.universal -d archs,segments --source-filter=arm64 --domain=file | %FileCheck --check-prefix=FILTER-ARM %s
+
+# FILTER-ARM: FILE SIZE
+# FILTER-ARM: arm64
+# FILTER-ARM: __TEXT
+# FILTER-ARM: __LINKEDIT
+# FILTER-ARM-NOT: x86_64
+
+## Test 4: Single architecture binary
+# RUN: %yaml2obj --docnum=2 %s -o %t.single
+# RUN: %bloaty %t.single -d archs --domain=file | %FileCheck --check-prefix=SINGLE %s
+
+# SINGLE: FILE SIZE
+# SINGLE: x86_64
+# SINGLE-NOT: arm64
+
+## Universal binary with x86_64 and arm64 slices
+--- !fat-mach-o
+FatHeader:
+ magic: 0xCAFEBABE
+ nfat_arch: 2
+FatArchs:
+ - cputype: 0x1000007
+ cpusubtype: 0x3
+ offset: 0x1000
+ size: 4176
+ align: 12
+ - cputype: 0x100000C
+ cpusubtype: 0x0
+ offset: 0x2050
+ size: 8280
+ align: 12
+Slices:
+ - !mach-o
+ FileHeader:
+ magic: 0xFEEDFACF
+ cputype: 0x1000007
+ cpusubtype: 0x3
+ filetype: 0x2
+ ncmds: 3
+ sizeofcmds: 328
+ flags: 0x200085
+ reserved: 0x0
+ LoadCommands:
+ - cmd: LC_SEGMENT_64
+ cmdsize: 72
+ segname: __PAGEZERO
+ vmaddr: 0
+ vmsize: 4294967296
+ fileoff: 0
+ filesize: 0
+ maxprot: 0
+ initprot: 0
+ nsects: 0
+ flags: 0
+ - cmd: LC_SEGMENT_64
+ cmdsize: 152
+ segname: __TEXT
+ vmaddr: 4294967296
+ vmsize: 4096
+ fileoff: 0
+ filesize: 4096
+ maxprot: 5
+ initprot: 5
+ nsects: 1
+ flags: 0
+ Sections:
+ - sectname: __text
+ segname: __TEXT
+ addr: 0x100000F80
+ size: 8
+ offset: 0xF80
+ align: 4
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x80000400
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: 554889E531C05DC3
+ - cmd: LC_SEGMENT_64
+ cmdsize: 72
+ segname: __LINKEDIT
+ vmaddr: 4294971392
+ vmsize: 4096
+ fileoff: 4096
+ filesize: 80
+ maxprot: 1
+ initprot: 1
+ nsects: 0
+ flags: 0
+ LinkEditData:
+ NameList:
+ - n_strx: 1
+ n_type: 0xF
+ n_sect: 1
+ n_desc: 0
+ n_value: 4294971264
+ StringTable:
+ - ' '
+ - _main
+ - !mach-o
+ FileHeader:
+ magic: 0xFEEDFACF
+ cputype: 0x100000C
+ cpusubtype: 0x0
+ filetype: 0x2
+ ncmds: 3
+ sizeofcmds: 328
+ flags: 0x200085
+ reserved: 0x0
+ LoadCommands:
+ - cmd: LC_SEGMENT_64
+ cmdsize: 72
+ segname: __PAGEZERO
+ vmaddr: 0
+ vmsize: 4294967296
+ fileoff: 0
+ filesize: 0
+ maxprot: 0
+ initprot: 0
+ nsects: 0
+ flags: 0
+ - cmd: LC_SEGMENT_64
+ cmdsize: 152
+ segname: __TEXT
+ vmaddr: 4294967296
+ vmsize: 8192
+ fileoff: 0
+ filesize: 8192
+ maxprot: 5
+ initprot: 5
+ nsects: 1
+ flags: 0
+ Sections:
+ - sectname: __text
+ segname: __TEXT
+ addr: 0x100001F80
+ size: 8
+ offset: 0x1F80
+ align: 2
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x80000400
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: 00008052C0035FD6
+ - cmd: LC_SEGMENT_64
+ cmdsize: 72
+ segname: __LINKEDIT
+ vmaddr: 4294975488
+ vmsize: 4096
+ fileoff: 8192
+ filesize: 88
+ maxprot: 1
+ initprot: 1
+ nsects: 0
+ flags: 0
+ LinkEditData:
+ NameList:
+ - n_strx: 1
+ n_type: 0xF
+ n_sect: 1
+ n_desc: 0
+ n_value: 4294975360
+ StringTable:
+ - ' '
+ - _main
+
+## Single x86_64 Mach-O executable
+--- !mach-o
+FileHeader:
+ magic: 0xFEEDFACF
+ cputype: 0x1000007
+ cpusubtype: 0x3
+ filetype: 0x2
+ ncmds: 3
+ sizeofcmds: 328
+ flags: 0x200085
+ reserved: 0x0
+LoadCommands:
+ - cmd: LC_SEGMENT_64
+ cmdsize: 72
+ segname: __PAGEZERO
+ vmaddr: 0
+ vmsize: 4294967296
+ fileoff: 0
+ filesize: 0
+ maxprot: 0
+ initprot: 0
+ nsects: 0
+ flags: 0
+ - cmd: LC_SEGMENT_64
+ cmdsize: 152
+ segname: __TEXT
+ vmaddr: 4294967296
+ vmsize: 4096
+ fileoff: 0
+ filesize: 4096
+ maxprot: 5
+ initprot: 5
+ nsects: 1
+ flags: 0
+ Sections:
+ - sectname: __text
+ segname: __TEXT
+ addr: 0x100000F80
+ size: 8
+ offset: 0xF80
+ align: 4
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x80000400
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: 554889E531C05DC3
+ - cmd: LC_SEGMENT_64
+ cmdsize: 72
+ segname: __LINKEDIT
+ vmaddr: 4294971392
+ vmsize: 4096
+ fileoff: 4096
+ filesize: 80
+ maxprot: 1
+ initprot: 1
+ nsects: 0
+ flags: 0
+LinkEditData:
+ NameList:
+ - n_strx: 1
+ n_type: 0xF
+ n_sect: 1
+ n_desc: 0
+ n_value: 4294971264
+ StringTable:
+ - ' '
+ - _main