[feedback] Sort logs from previous boot

Logs from the previous boot are sorted when the files they're contained
in are concatenated together.

Fixed: 55087

TESTED=`fx test feedback-data-tests`
TESTED=`crasher` d56e0162550fb216

Change-Id: I1be2c374bb9475a2120d7937b30f506a51cd55a4
Reviewed-on: https://fuchsia-review.googlesource.com/c/fuchsia/+/403566
Commit-Queue: Alex Pankhurst <pankhurst@google.com>
Reviewed-by: Daniel Salazar Villarreal <villarreald@google.com>
Testability-Review: Alex Pankhurst <pankhurst@google.com>
diff --git a/src/developer/forensics/feedback_data/attachments/utils.cc b/src/developer/forensics/feedback_data/attachments/utils.cc
index 8f404fa..dbc9c87 100644
--- a/src/developer/forensics/feedback_data/attachments/utils.cc
+++ b/src/developer/forensics/feedback_data/attachments/utils.cc
@@ -21,7 +21,7 @@
     const Attachments& attachments) {
   std::vector<fuchsia::feedback::Attachment> vec;
   for (const auto& [key, value] : attachments) {
-    if (!value.HasValue()) {
+    if (!value.HasValue() || value.Value().empty()) {
       continue;
     }
 
diff --git a/src/developer/forensics/feedback_data/system_log_recorder/BUILD.gn b/src/developer/forensics/feedback_data/system_log_recorder/BUILD.gn
index 19337d4..9732bc3 100644
--- a/src/developer/forensics/feedback_data/system_log_recorder/BUILD.gn
+++ b/src/developer/forensics/feedback_data/system_log_recorder/BUILD.gn
@@ -64,6 +64,7 @@
 
   deps = [
     "//sdk/lib/syslog/cpp",
+    "//src/lib/files",
     "//src/lib/fsl",
     "//src/lib/fxl",
     "//zircon/system/ulib/trace",
diff --git a/src/developer/forensics/feedback_data/system_log_recorder/reader.cc b/src/developer/forensics/feedback_data/system_log_recorder/reader.cc
index 49e6b6d..d5ad40b 100644
--- a/src/developer/forensics/feedback_data/system_log_recorder/reader.cc
+++ b/src/developer/forensics/feedback_data/system_log_recorder/reader.cc
@@ -6,54 +6,106 @@
 
 #include <assert.h>
 #include <lib/trace/event.h>
-#include <sys/stat.h>
 
 #include <fstream>
+#include <regex>
 #include <sstream>
 
+#include "src/lib/files/file.h"
 #include "src/lib/fsl/vmo/file.h"
+#include "src/lib/fxl/strings/split_string.h"
 
 namespace forensics {
 namespace feedback_data {
 namespace system_log_recorder {
-
 namespace {
 
-size_t GetFileSize(const std::string& file_path) {
-  struct stat file_status;
-  file_status.st_size = 0;
-  stat(file_path.c_str(), &file_status);
-  return file_status.st_size;
+// Check if the start of |line| is formatted like a log message by checking that the timestamp, pid,
+// and tid tags are present. The formatting is defined in
+// //src/developer/forensics/utils/log_format.h.
+//
+// Note: this definition of this function needs to in the same file as SortLog otherwise. We
+// experienced significant performance issue when this was not done and the log being sorted was
+// large.
+bool MatchesLogMessage(std::string_view line) {
+  std::regex line_start("^\\[\\d{5,9}\\.\\d{3}\\]\\[\\d{5,9}\\]\\[\\d{5,9}\\]");
+  return std::regex_search(line.cbegin(), line.cend(), line_start);
+}
+
+std::string SortLog(const std::string& log) {
+  // Sort the log by:
+  //   1) Splitting it into lines.
+  //   2) Merging multiline messages into a single message.
+  //   3) Stable sorting the messages by timestamp.
+  //   4) Combining the messages into a sorted log.
+
+  std::vector<std::string_view> lines = fxl::SplitString(
+      log, "\n", fxl::WhiteSpaceHandling::kKeepWhitespace, fxl::SplitResult::kSplitWantAll);
+
+  std::vector<std::string_view> messages;
+
+  // Update the end pointer of the last message in |messages|.
+  auto GrowTailMessage = [&messages](const char* new_end) mutable {
+    if (messages.empty()) {
+      return;
+    }
+
+    messages.back() = std::string_view(messages.back().data(), new_end - messages.back().data());
+  };
+
+  for (const std::string_view& line : lines) {
+    // If a new log message is found, update the last log message to span up until the new message.
+    if (MatchesLogMessage(line)) {
+      GrowTailMessage(line.data());
+      messages.push_back(line);
+    }
+  }
+  // The last log message needs to span until the end of the log.
+  GrowTailMessage(log.data() + log.size());
+
+  std::stable_sort(
+      messages.begin(), messages.end(), [](const std::string_view lhs, const std::string_view rhs) {
+        const std::string_view lhs_timestamp = lhs.substr(lhs.find('['), lhs.find(']'));
+        const std::string_view rhs_timestamp = rhs.substr(rhs.find('['), rhs.find(']'));
+        return lhs_timestamp < rhs_timestamp;
+      });
+
+  std::string sorted_log;
+  sorted_log.reserve(log.size());
+  for (const auto& message : messages) {
+    sorted_log.append(message);
+  }
+
+  return sorted_log;
 }
 
 }  // namespace
 
 bool Concatenate(const std::vector<const std::string>& input_file_paths, Decoder* decoder,
                  const std::string& output_file_path) {
-  size_t total_bytes = 0;
+  uint64_t log_size{0};
   for (auto path = input_file_paths.crbegin(); path != input_file_paths.crend(); ++path) {
-    total_bytes += GetFileSize(*path);
+    uint64_t size;
+    files::GetFileSize(*path, &size);
+    log_size += size;
   }
 
-  if (total_bytes == 0) {
+  if (log_size == 0) {
     return false;
   }
 
-  std::ofstream out(output_file_path, std::iostream::out | std::iostream::trunc);
-  if (!out.is_open()) {
+  std::string log;
+  for (auto path = input_file_paths.crbegin(); path != input_file_paths.crend(); ++path) {
+    fsl::SizedVmo vmo;
+    fsl::VmoFromFilename(*path, &vmo);
+    log += decoder->Decode(vmo);
+  }
+
+  if (log.empty()) {
     return false;
   }
 
-  for (auto path = input_file_paths.crbegin(); path != input_file_paths.crend(); ++path) {
-    fsl::SizedVmo block;
-    fsl::VmoFromFilename(*path, &block);
-    out << decoder->Decode(block);
-  }
-
-  out.flush();
-  out.close();
-
-  return true;
+  return files::WriteFile(output_file_path, SortLog(log));
 }
 
 }  // namespace system_log_recorder
diff --git a/src/developer/forensics/feedback_data/tests/BUILD.gn b/src/developer/forensics/feedback_data/tests/BUILD.gn
index cadcf58..d292432 100644
--- a/src/developer/forensics/feedback_data/tests/BUILD.gn
+++ b/src/developer/forensics/feedback_data/tests/BUILD.gn
@@ -146,6 +146,7 @@
     "//src/developer/forensics/testing/stubs:last_reboot_info_provider",
     "//src/developer/forensics/testing/stubs:logger",
     "//src/developer/forensics/testing/stubs:product_info_provider",
+    "//src/developer/forensics/utils:logs",
     "//src/developer/forensics/utils:time",
     "//src/lib/files",
     "//src/lib/fxl",
diff --git a/src/developer/forensics/feedback_data/tests/datastore_unittest.cc b/src/developer/forensics/feedback_data/tests/datastore_unittest.cc
index 74e1e3b..608b7fb 100644
--- a/src/developer/forensics/feedback_data/tests/datastore_unittest.cc
+++ b/src/developer/forensics/feedback_data/tests/datastore_unittest.cc
@@ -36,6 +36,7 @@
 #include "src/developer/forensics/testing/stubs/product_info_provider.h"
 #include "src/developer/forensics/testing/unit_test_fixture.h"
 #include "src/developer/forensics/utils/cobalt/logger.h"
+#include "src/developer/forensics/utils/log_format.h"
 #include "src/developer/forensics/utils/time.h"
 #include "src/lib/files/file.h"
 #include "src/lib/files/path.h"
@@ -45,6 +46,7 @@
 namespace feedback_data {
 namespace {
 
+using stubs::BuildLogMessage;
 using testing::Contains;
 using testing::ElementsAreArray;
 using testing::Eq;
@@ -453,7 +455,7 @@
 TEST_F(DatastoreTest, GetAttachments_PreviousSyslog) {
   std::string previous_log_contents = "";
   for (const auto& filepath : kCurrentLogsFilePaths) {
-    const std::string str = "Log for file:" + filepath + "\n";
+    const std::string str = Format(BuildLogMessage(FX_LOG_INFO, "Log for file: " + filepath));
     previous_log_contents = str + previous_log_contents;
     WriteFile(filepath, str);
   }
@@ -461,7 +463,7 @@
 
   ::fit::result<Attachments> attachments = GetAttachments();
   ASSERT_TRUE(attachments.is_ok());
-  EXPECT_THAT(attachments.take_value(),
+  EXPECT_THAT(attachments.value(),
               ElementsAreArray(
                   {Pair(kAttachmentLogSystemPrevious, AttachmentValue(previous_log_contents))}));
 
diff --git a/src/developer/forensics/feedback_data/tests/system_log_recorder_unittest.cc b/src/developer/forensics/feedback_data/tests/system_log_recorder_unittest.cc
index fd68288..cace73b 100644
--- a/src/developer/forensics/feedback_data/tests/system_log_recorder_unittest.cc
+++ b/src/developer/forensics/feedback_data/tests/system_log_recorder_unittest.cc
@@ -466,6 +466,84 @@
 )");
 }
 
+TEST(ReaderTest, SortsMessages) {
+  files::ScopedTempDir temp_dir;
+  const std::vector<const std::string> file_paths = MakeLogFilePaths(temp_dir, /*num_files=*/1);
+
+  LogMessageStore store(8 * 1024, 8 * 1024, MakeIdentityEncoder());
+  SystemLogWriter writer(file_paths, &store);
+
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "line 0", zx::msec(0))));
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "line 3", zx::msec(3))));
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "line 2", zx::msec(2))));
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "line 1", zx::msec(1))));
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "line 1.1", zx::msec(1))));
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "duplicated line", zx::msec(5))));
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "duplicated line", zx::msec(6))));
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "duplicated line", zx::msec(7))));
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "multi\nline\nmessage", zx::msec(4))));
+  writer.Write();
+
+  const std::string output_path = files::JoinPath(temp_dir.path(), "output.txt");
+  IdentityDecoder decoder;
+
+  ASSERT_TRUE(Concatenate(file_paths, &decoder, output_path));
+
+  std::string contents;
+  ASSERT_TRUE(files::ReadFileToString(output_path, &contents));
+  EXPECT_EQ(contents, R"([15604.000][07559][07687][] INFO: line 0
+[15604.001][07559][07687][] INFO: line 1
+[15604.001][07559][07687][] INFO: line 1.1
+[15604.002][07559][07687][] INFO: line 2
+[15604.003][07559][07687][] INFO: line 3
+[15604.004][07559][07687][] INFO: multi
+line
+message
+[15604.005][07559][07687][] INFO: duplicated line
+!!! MESSAGE REPEATED 2 MORE TIMES !!!
+)");
+}
+
+TEST(ReaderTest, SortsMessagesMultipleFiles) {
+  files::ScopedTempDir temp_dir;
+  const std::vector<const std::string> file_paths = MakeLogFilePaths(temp_dir, /*num_files=*/8);
+
+  // Set the block and buffer to both hold 4 log messages.
+  LogMessageStore store(kMaxLogLineSize * 4, kMaxLogLineSize * 4, MakeIdentityEncoder());
+  SystemLogWriter writer(file_paths, &store);
+
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "line 0", zx::msec(0))));
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "line 3", zx::msec(3))));
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "line 2", zx::msec(2))));
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "line 1", zx::msec(1))));
+  writer.Write();
+
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "line11", zx::msec(1))));
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "dup", zx::msec(5))));
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "dup", zx::msec(6))));
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "dup", zx::msec(7))));
+  EXPECT_TRUE(store.Add(BuildLogMessage(FX_LOG_INFO, "line\n4", zx::msec(4))));
+  writer.Write();
+
+  const std::string output_path = files::JoinPath(temp_dir.path(), "output.txt");
+  IdentityDecoder decoder;
+
+  ASSERT_TRUE(Concatenate(file_paths, &decoder, output_path));
+
+  std::string contents;
+  ASSERT_TRUE(files::ReadFileToString(output_path, &contents));
+  EXPECT_EQ(contents, R"([15604.000][07559][07687][] INFO: line 0
+[15604.001][07559][07687][] INFO: line 1
+[15604.001][07559][07687][] INFO: line11
+[15604.002][07559][07687][] INFO: line 2
+[15604.003][07559][07687][] INFO: line 3
+[15604.004][07559][07687][] INFO: line
+4
+[15604.005][07559][07687][] INFO: dup
+!!! MESSAGE REPEATED 2 MORE TIMES !!!
+)");
+}
+
 using SystemLogRecorderTest = UnitTestFixture;
 
 TEST_F(SystemLogRecorderTest, SingleThreaded_SmokeTest) {