Merge pull request #135 from haberman/macho

Added proper support for 32-bit symtabs on Mach-O.
diff --git a/src/macho.cc b/src/macho.cc
index 3058774..e318c90 100644
--- a/src/macho.cc
+++ b/src/macho.cc
@@ -113,6 +113,19 @@
   }
 }
 
+struct LoadCommand {
+  bool is64bit;
+  uint32_t cmd;
+  string_view command_data;
+  string_view file_data;
+};
+
+template <class Struct>
+bool Is64Bit() { return false; }
+
+template <>
+bool Is64Bit<mach_header_64>() { return true; }
+
 template <class Struct, class Func>
 void ParseMachOHeaderImpl(string_view macho_data, RangeSink* overhead_sink,
                           Func&& loadcmd_func) {
@@ -133,9 +146,14 @@
       THROW("Mach-O load command had zero size.");
     }
 
-    string_view command_data = StrictSubstr(header_data, 0, command->cmdsize);
-    std::forward<Func>(loadcmd_func)(command->cmd, command_data, macho_data);
-    MaybeAddOverhead(overhead_sink, "[Mach-O Headers]", command_data);
+    LoadCommand data;
+    data.is64bit = Is64Bit<Struct>();
+    data.cmd = command->cmd;
+    data.command_data = StrictSubstr(header_data, 0, command->cmdsize);
+    data.file_data = macho_data;
+    std::forward<Func>(loadcmd_func)(data);
+
+    MaybeAddOverhead(overhead_sink, "[Mach-O Headers]", data.command_data);
     header_data = header_data.substr(command->cmdsize);
   }
 }
@@ -257,9 +275,8 @@
 }
 
 template <class Segment, class Section>
-void ParseSegment(string_view command_data, string_view file_data,
-                  RangeSink* sink) {
-  auto segment = GetStructPointerAndAdvance<Segment>(&command_data);
+void ParseSegment(LoadCommand cmd, RangeSink* sink) {
+  auto segment = GetStructPointerAndAdvance<Segment>(&cmd.command_data);
 
   if (segment->maxprot == VM_PROT_NONE) {
     return;
@@ -270,11 +287,11 @@
   if (sink->data_source() == DataSource::kSegments) {
     sink->AddRange(
         "macho_segment", segname, segment->vmaddr, segment->vmsize,
-        StrictSubstr(file_data, segment->fileoff, segment->filesize));
+        StrictSubstr(cmd.file_data, segment->fileoff, segment->filesize));
   } else if (sink->data_source() == DataSource::kSections) {
     uint32_t nsects = segment->nsects;
     for (uint32_t j = 0; j < nsects; j++) {
-      auto section = GetStructPointerAndAdvance<Section>(&command_data);
+      auto section = GetStructPointerAndAdvance<Section>(&cmd.command_data);
 
       // filesize equals vmsize unless the section is zerofill
       uint64_t filesize = section->size;
@@ -291,119 +308,114 @@
       std::string label = absl::StrJoin(
           std::make_tuple(segname, ArrayToStr(section->sectname, 16)), ",");
       sink->AddRange("macho_section", label, section->addr, section->size,
-                     StrictSubstr(file_data, section->offset, filesize));
+                     StrictSubstr(cmd.file_data, section->offset, filesize));
     }
   } else {
     BLOATY_UNREACHABLE();
   }
 }
 
-static void ParseDyldInfo(string_view command_data, string_view file_data,
-                          RangeSink* sink) {
-  auto info = GetStructPointer<dyld_info_command>(command_data);
+static void ParseDyldInfo(const LoadCommand& cmd, RangeSink* sink) {
+  auto info = GetStructPointer<dyld_info_command>(cmd.command_data);
 
   sink->AddFileRange(
       "macho_dyld", "Rebase Info",
-      StrictSubstr(file_data, info->rebase_off, info->rebase_size));
-  sink->AddFileRange("macho_dyld", "Binding Info",
-                     StrictSubstr(file_data, info->bind_off, info->bind_size));
+      StrictSubstr(cmd.file_data, info->rebase_off, info->rebase_size));
+  sink->AddFileRange(
+      "macho_dyld", "Binding Info",
+      StrictSubstr(cmd.file_data, info->bind_off, info->bind_size));
   sink->AddFileRange(
       "macho_dyld", "Weak Binding Info",
-      StrictSubstr(file_data, info->weak_bind_off, info->weak_bind_size));
+      StrictSubstr(cmd.file_data, info->weak_bind_off, info->weak_bind_size));
   sink->AddFileRange(
       "macho_dyld", "Lazy Binding Info",
-      StrictSubstr(file_data, info->lazy_bind_off, info->lazy_bind_size));
+      StrictSubstr(cmd.file_data, info->lazy_bind_off, info->lazy_bind_size));
   sink->AddFileRange(
       "macho_dyld", "Export Info",
-      StrictSubstr(file_data, info->export_off, info->export_size));
+      StrictSubstr(cmd.file_data, info->export_off, info->export_size));
 }
 
-static void ParseSymbolTable(string_view command_data, string_view file_data,
-                             RangeSink* sink) {
-  auto symtab = GetStructPointer<symtab_command>(command_data);
+static void ParseSymbolTable(const LoadCommand& cmd, RangeSink* sink) {
+  auto symtab = GetStructPointer<symtab_command>(cmd.command_data);
 
-  // TODO(haberman): use 32-bit symbol size where appropriate.
-  sink->AddFileRange("macho_symtab", "Symbol Table",
-                     StrictSubstr(file_data, symtab->symoff,
-                                  symtab->nsyms * sizeof(nlist_64)));
-  sink->AddFileRange("macho_symtab", "String Table",
-                     StrictSubstr(file_data, symtab->stroff, symtab->strsize));
+  size_t size = cmd.is64bit ? sizeof(nlist_64) : sizeof(struct nlist);
+  sink->AddFileRange(
+      "macho_symtab", "Symbol Table",
+      StrictSubstr(cmd.file_data, symtab->symoff, symtab->nsyms * size));
+  sink->AddFileRange(
+      "macho_symtab", "String Table",
+      StrictSubstr(cmd.file_data, symtab->stroff, symtab->strsize));
 }
 
-static void ParseDynamicSymbolTable(string_view command_data,
-                                    string_view file_data, RangeSink* sink) {
-  auto dysymtab = GetStructPointer<dysymtab_command>(command_data);
+static void ParseDynamicSymbolTable(const LoadCommand& cmd, RangeSink* sink) {
+  auto dysymtab = GetStructPointer<dysymtab_command>(cmd.command_data);
 
   sink->AddFileRange(
       "macho_dynsymtab", "Table of Contents",
-      StrictSubstr(file_data, dysymtab->tocoff,
+      StrictSubstr(cmd.file_data, dysymtab->tocoff,
                    dysymtab->ntoc * sizeof(dylib_table_of_contents)));
   sink->AddFileRange("macho_dynsymtab", "Module Table",
-                     StrictSubstr(file_data, dysymtab->modtaboff,
+                     StrictSubstr(cmd.file_data, dysymtab->modtaboff,
                                   dysymtab->nmodtab * sizeof(dylib_module_64)));
   sink->AddFileRange(
       "macho_dynsymtab", "Referenced Symbol Table",
-      StrictSubstr(file_data, dysymtab->extrefsymoff,
+      StrictSubstr(cmd.file_data, dysymtab->extrefsymoff,
                    dysymtab->nextrefsyms * sizeof(dylib_reference)));
   sink->AddFileRange("macho_dynsymtab", "Indirect Symbol Table",
-                     StrictSubstr(file_data, dysymtab->indirectsymoff,
+                     StrictSubstr(cmd.file_data, dysymtab->indirectsymoff,
                                   dysymtab->nindirectsyms * sizeof(uint32_t)));
   sink->AddFileRange("macho_dynsymtab", "External Relocation Entries",
-                     StrictSubstr(file_data, dysymtab->extreloff,
+                     StrictSubstr(cmd.file_data, dysymtab->extreloff,
                                   dysymtab->nextrel * sizeof(relocation_info)));
   sink->AddFileRange(
       "macho_dynsymtab", "Local Relocation Entries",
-      StrictSubstr(file_data, dysymtab->locreloff,
+      StrictSubstr(cmd.file_data, dysymtab->locreloff,
                    dysymtab->nlocrel * sizeof(struct relocation_info)));
 }
 
-static void ParseLinkeditCommand(string_view label, string_view command_data,
-                                 string_view file_data, RangeSink* sink) {
-  auto linkedit = GetStructPointer<linkedit_data_command>(command_data);
+static void ParseLinkeditCommand(string_view label, const LoadCommand& cmd,
+                                 RangeSink* sink) {
+  auto linkedit = GetStructPointer<linkedit_data_command>(cmd.command_data);
   sink->AddFileRange(
       "macho_linkedit", label,
-      StrictSubstr(file_data, linkedit->dataoff, linkedit->datasize));
+      StrictSubstr(cmd.file_data, linkedit->dataoff, linkedit->datasize));
 }
 
-void ParseLoadCommand(uint32_t cmd, string_view command_data,
-                      string_view file_data, RangeSink* sink) {
-  switch (cmd) {
+void ParseLoadCommand(const LoadCommand& cmd, RangeSink* sink) {
+  switch (cmd.cmd) {
     case LC_SEGMENT_64:
-      ParseSegment<segment_command_64, section_64>(command_data, file_data,
-                                                   sink);
+      ParseSegment<segment_command_64, section_64>(cmd, sink);
       break;
     case LC_SEGMENT:
-      ParseSegment<segment_command, section>(command_data, file_data, sink);
+      ParseSegment<segment_command, section>(cmd, sink);
       break;
     case LC_DYLD_INFO:
     case LC_DYLD_INFO_ONLY:
-      ParseDyldInfo(command_data, file_data, sink);
+      ParseDyldInfo(cmd, sink);
       break;
     case LC_SYMTAB:
-      ParseSymbolTable(command_data, file_data, sink);
+      ParseSymbolTable(cmd, sink);
       break;
     case LC_DYSYMTAB:
-      ParseDynamicSymbolTable(command_data, file_data, sink);
+      ParseDynamicSymbolTable(cmd, sink);
       break;
     case LC_CODE_SIGNATURE:
-      ParseLinkeditCommand("Code Signature", command_data, file_data, sink);
+      ParseLinkeditCommand("Code Signature", cmd, sink);
       break;
     case LC_SEGMENT_SPLIT_INFO:
-      ParseLinkeditCommand("Segment Split Info", command_data, file_data, sink);
+      ParseLinkeditCommand("Segment Split Info", cmd, sink);
       break;
     case LC_FUNCTION_STARTS:
-      ParseLinkeditCommand("Function Start Addresses", command_data, file_data,
-                           sink);
+      ParseLinkeditCommand("Function Start Addresses", cmd, sink);
       break;
     case LC_DATA_IN_CODE:
-      ParseLinkeditCommand("Table of Non-instructions", command_data, file_data,
-                           sink);
+      ParseLinkeditCommand("Table of Non-instructions", cmd, sink);
       break;
     case LC_DYLIB_CODE_SIGN_DRS:
-      ParseLinkeditCommand("Code Signing DRs", command_data, file_data, sink);
+      ParseLinkeditCommand("Code Signing DRs", cmd, sink);
       break;
     case LC_LINKER_OPTIMIZATION_HINT:
-      ParseLinkeditCommand("Optimization Hints", command_data, file_data, sink);
+      ParseLinkeditCommand("Optimization Hints", cmd, sink);
       break;
   }
 }
@@ -411,24 +423,21 @@
 void ParseLoadCommands(RangeSink* sink) {
   ForEachLoadCommand(
       sink->input_file().data(), sink,
-      [sink](uint32_t cmd, string_view command_data, string_view file_data) {
-        ParseLoadCommand(cmd, command_data, file_data, sink);
-      });
+      [sink](const LoadCommand& cmd) { ParseLoadCommand(cmd, sink); });
 }
 
-void ParseSymbolsFromSymbolTable(string_view command_data,
-                                 string_view file_data, RangeSink* sink) {
-  auto symtab_cmd = GetStructPointer<symtab_command>(command_data);
+template <class NList>
+void ParseSymbolsFromSymbolTable(const LoadCommand& cmd, RangeSink* sink) {
+  auto symtab_cmd = GetStructPointer<symtab_command>(cmd.command_data);
 
-  // TODO(haberman): use 32-bit symbol size where appropriate.
-  string_view symtab = StrictSubstr(file_data, symtab_cmd->symoff,
-                                    symtab_cmd->nsyms * sizeof(nlist_64));
+  string_view symtab = StrictSubstr(cmd.file_data, symtab_cmd->symoff,
+                                    symtab_cmd->nsyms * sizeof(NList));
   string_view strtab =
-      StrictSubstr(file_data, symtab_cmd->stroff, symtab_cmd->strsize);
+      StrictSubstr(cmd.file_data, symtab_cmd->stroff, symtab_cmd->strsize);
 
   uint32_t nsyms = symtab_cmd->nsyms;
   for (uint32_t i = 0; i < nsyms; i++) {
-    auto sym = GetStructPointerAndAdvance<nlist_64>(&symtab);
+    auto sym = GetStructPointerAndAdvance<NList>(&symtab);
 
     if (sym->n_type & N_STAB || sym->n_value == 0) {
       continue;
@@ -443,10 +452,14 @@
 void ParseSymbols(string_view file_data, RangeSink* sink) {
   ForEachLoadCommand(
       file_data, sink,
-      [sink](uint32_t cmd, string_view command_data, string_view file_data) {
-        switch (cmd) {
+      [sink](const LoadCommand& cmd) {
+        switch (cmd.cmd) {
           case LC_SYMTAB:
-            ParseSymbolsFromSymbolTable(command_data, file_data, sink);
+            if (cmd.is64bit) {
+              ParseSymbolsFromSymbolTable<nlist_64>(cmd, sink);
+            } else {
+              ParseSymbolsFromSymbolTable<struct nlist>(cmd, sink);
+            }
             break;
           case LC_DYSYMTAB:
             //ParseSymbolsFromDynamicSymbolTable(command_data, file_data, sink);
@@ -458,15 +471,15 @@
 static void AddMachOFallback(RangeSink* sink) {
   ForEachLoadCommand(
       sink->input_file().data(), sink,
-      [sink](uint32_t cmd, string_view command_data, string_view file_data) {
-        switch (cmd) {
+      [sink](const LoadCommand& cmd) {
+        switch (cmd.cmd) {
           case LC_SEGMENT_64:
             AddSegmentAsFallback<segment_command_64, section_64>(
-                command_data, file_data, sink);
+                cmd.command_data, cmd.file_data, sink);
             break;
           case LC_SEGMENT:
-            AddSegmentAsFallback<segment_command, section>(command_data,
-                                                           file_data, sink);
+            AddSegmentAsFallback<segment_command, section>(cmd.command_data,
+                                                           cmd.file_data, sink);
             break;
         }
       });
@@ -481,20 +494,17 @@
   std::string GetBuildId() const override {
     std::string id;
 
-    ForEachLoadCommand(
-        file_data().data(), nullptr,
-        [&id](uint32_t cmd, string_view command_data,
-              string_view /* file_data */) {
-          if (cmd == LC_UUID) {
-            auto uuid_cmd =
-                GetStructPointerAndAdvance<uuid_command>(&command_data);
-            if (!command_data.empty()) {
-              THROWF("Unexpected excess uuid data: $0", command_data.size());
-            }
-            id.resize(sizeof(uuid_cmd->uuid));
-            memcpy(&id[0], &uuid_cmd->uuid[0], sizeof(uuid_cmd->uuid));
-          }
-        });
+    ForEachLoadCommand(file_data().data(), nullptr, [&id](LoadCommand cmd) {
+      if (cmd.cmd == LC_UUID) {
+        auto uuid_cmd =
+            GetStructPointerAndAdvance<uuid_command>(&cmd.command_data);
+        if (!cmd.command_data.empty()) {
+          THROWF("Unexpected excess uuid data: $0", cmd.command_data.size());
+        }
+        id.resize(sizeof(uuid_cmd->uuid));
+        memcpy(&id[0], &uuid_cmd->uuid[0], sizeof(uuid_cmd->uuid));
+      }
+    });
 
     return id;
   }