[accesspattern] Able to customize the size of a frame

Defaults to 8192 bytes (8 KiB), which is what blobfs now records.

At the same time, fixes a off-by-one bug in detecting out-of-range frame
being specified.

Tested by running manually and with
https://fuchsia-review.googlesource.com/c/fuchsia/+/428019

Change-Id: If7b58e0b6d65457fba25507a8521d0735406bb5d
Reviewed-on: https://fuchsia-review.googlesource.com/c/third_party/bloaty/+/428018
Reviewed-by: Felix Zhu <fcz@google.com>
Reviewed-by: Xyan Bhatnagar <xbhatnag@google.com>
Commit-Queue: Yifei Teng <yifeit@google.com>
diff --git a/src/bloaty.cc b/src/bloaty.cc
index 3dfb2df..fb28cd7 100644
--- a/src/bloaty.cc
+++ b/src/bloaty.cc
@@ -2103,6 +2103,8 @@
       options->add_link_map_filename(std::string(option));
     } else if (args.TryParseOption("--cold-bytes-filter", &option)) {
       options->set_cold_bytes_filter(std::string(option));
+    } else if (args.TryParseUint64Option("--access-pattern-frame-size", &uint64_option)) {
+      options->set_access_pattern_frame_size(uint64_option);
     } else if (args.TryParseUint64Option("--debug-fileoff", &uint64_option)) {
       if (options->has_debug_fileoff()) {
         THROW("currently we only support a single debug fileoff");
diff --git a/src/bloaty.proto b/src/bloaty.proto
index da18671..ec9fcc3 100644
--- a/src/bloaty.proto
+++ b/src/bloaty.proto
@@ -35,17 +35,21 @@
   repeated string link_map_filename = 30;
 
   // If set, reads how frequently a range of bytes is accessed in CSV format.
-  // The filter is in a sparse pattern format: for example, given a 96 KiB file
-  // and a `0:1,2:3` pattern, it means that the first 32 KiB frame is accessed
-  // once, and the third 32 KiB frame is accessed 3 times. The second 32 KiB
-  // frame is left out, meaning it was never accessed. The frame indices in
-  // the pattern is not guaranteed to be sorted.
-  // This option works together with the `accesspattern` data source.
-  // When the `accesspattern` data source is the first data source, the protobuf
-  // output format will only output items whose region overlaps with a
-  // never-accessed frame.
+  // The filter is in a sparse pattern format: assuming the frame size (see
+  // access_pattern_frame_size) is 8 KiB, given a 24 KiB file and a `0:1,2:3`
+  // pattern, it means that the first 8 KiB frame is accessed once, and the
+  // third 8 KiB frame is accessed 3 times. The second 8 KiB frame is left
+  // out, meaning it was never accessed. The frame indices in the pattern is not
+  // guaranteed to be sorted. This option works together with the
+  // `accesspattern` data source. When the `accesspattern` data source is the
+  // first data source, the protobuf output format will only output items whose
+  // region overlaps with a never-accessed frame.
   optional string cold_bytes_filter = 31;
 
+  // If set, customizes how many bytes is in a frame as used by the pattern
+  // in cold_bytes_filter.
+  optional uint64 access_pattern_frame_size = 32 [default = 8192];
+
   // The data sources to scan in each file.  At least one data source must be
   // specified.  If more than one source is specified, the output is
   // hierarchical.
diff --git a/src/elf.cc b/src/elf.cc
index 53674f0..4430a0c 100644
--- a/src/elf.cc
+++ b/src/elf.cc
@@ -1343,9 +1343,9 @@
     if (!sink->options().has_cold_bytes_filter()) {
       THROW("need to specify cold bytes filter");
     }
-    // Each element corresponds to 32 KiB.
+    // Each element corresponds to |kAccessPatternFrameSize| bytes.
     std::vector<bool> access_pattern;
-    constexpr static uint64_t kAccessPatternFrameSize = 32 * 1024;
+    uint64_t kAccessPatternFrameSize = sink->options().access_pattern_frame_size();
     auto frequencies = sink->options().cold_bytes_filter();
     size_t file_size = sink->input_file().data().size();
     size_t num_frames = (file_size + kAccessPatternFrameSize - 1) / kAccessPatternFrameSize;
@@ -1361,7 +1361,7 @@
       }
       size_t frame = std::stoi(frame_and_count[0]);
       size_t count = std::stoi(frame_and_count[1]);
-      if (frame > access_pattern.size()) {
+      if (frame >= access_pattern.size()) {
         THROW("access pattern exceeded end of file");
       }
       if (count > 0) {
diff --git a/tests/bloaty_report_test.cc b/tests/bloaty_report_test.cc
index f03fff6..355991a 100644
--- a/tests/bloaty_report_test.cc
+++ b/tests/bloaty_report_test.cc
@@ -60,14 +60,15 @@
   EXPECT_NEAR(10.3, report.vm_total() / 1024.0, .1);
 }
 
-TEST_F(BloatyTest, ProtobufOutputFilterHotSymbols) {
+TEST_F(BloatyTest, ProtobufOutputFilterHotSymbolsAllHot) {
   std::string file = "05-binary.bin";
   uint64_t size;
   ASSERT_TRUE(GetFileSize(file, &size));
 
   RunBloaty({"bloaty", "-d", "accesspattern,compileunits,symbols", file,
              // The first 32 KiB frame is accessed 100 times.
-             "--cold-bytes-filter", "0:100"});
+             "--cold-bytes-filter", "0:100",
+             "--access-pattern-frame-size", "32768"});
 
   // Select protobuf output.
   std::ostringstream stream;
@@ -89,6 +90,46 @@
   EXPECT_NEAR(10.3, report.vm_total() / 1024.0, .1);
 }
 
+TEST_F(BloatyTest, ProtobufOutputFilterHotSymbols) {
+  std::string file = "05-binary.bin";
+  uint64_t size;
+  ASSERT_TRUE(GetFileSize(file, &size));
+
+  RunBloaty({"bloaty", "-d", "accesspattern,compileunits,symbols", file,
+             // Default frame size is 8 KiB.
+             // The first 8 KiB frame is accessed 100 times.
+             "--cold-bytes-filter", "0:100"});
+
+  // Select protobuf output.
+  std::ostringstream stream;
+  bloaty::OutputOptions options;
+  options.output_format = bloaty::OutputFormat::kProtobuf;
+  output_->Print(options, &stream);
+
+  // Check Protobuf output.
+  bloaty_report::Report report;
+  ASSERT_TRUE(report.ParseFromString(stream.str()));
+
+  // This test is only run on x86_64.
+  // The test binary `05-binary.bin` is around 14 KiB.
+  // We would expect to see some compile units.
+  ASSERT_EQ(report.compile_units().size(), 13);
+
+  EXPECT_NEAR(14.1, static_cast<double>(size) / 1024.0, .1);
+  EXPECT_NEAR(14.1, report.file_total() / 1024.0, .1);
+  EXPECT_NEAR(10.3, report.vm_total() / 1024.0, .1);
+}
+
+TEST_F(BloatyTest, ProtobufOutputFilterHotSymbolsPatternTooLarge) {
+  std::string file = "05-binary.bin";
+  std::string errmsg = "access pattern exceeded end of file";
+  AssertBloatyFails({"bloaty", "-d", "accesspattern,compileunits,symbols", file,
+                     // Default frame size is 8 KiB.
+                     // The file is 14 KiB, so specifying the non-existent
+                     // third frame should fail.
+                     "--cold-bytes-filter", "0:100,1:100,2:100"}, errmsg);
+}
+
 // Here is a regular bloaty print-out on the same file for reference:
 //
 //    FILE SIZE        VM SIZE