[local aggregation] Migrate to FF64 in local aggregation

Soft migrate to FF64 in string histogram aggregation and at least once string aggregation.

Note that FF64 is hard migrated in at least once string aggregation for multi-days report observation generation, i.e. the aggregated data using legacy hash are dropped when generating observation for multi-days report.

Bug: 321745113
Tested: ./cobaltb.py test
Change-Id: I2ce81a2633ed4cb64ab470a5166398946f3fc40c
Reviewed-on: https://fuchsia-review.googlesource.com/c/cobalt/+/1014313
Commit-Queue: Anivia Li <aniviali@google.com>
Reviewed-by: Cameron Dale <camrdale@google.com>
Reviewed-by: Alex Pankhurst <pankhurst@google.com>
diff --git a/src/local_aggregation/aggregation_procedures/aggregation_procedure.cc b/src/local_aggregation/aggregation_procedures/aggregation_procedure.cc
index 72876bc..01c3fca 100644
--- a/src/local_aggregation/aggregation_procedures/aggregation_procedure.cc
+++ b/src/local_aggregation/aggregation_procedures/aggregation_procedure.cc
@@ -329,6 +329,23 @@
   return current_time_info;
 }
 
+namespace {
+
+// Create aggregate data needed to generate a single string histogram observation for a aggregation
+// period bucket. Use legacy hashes if they're present. Use Farmhash Fingerprint 64 hashes
+// otherwise.
+//
+// TODO(https://fxbug.dev/322409910): Delete usage of legacy hash after clients no longer
+// store them.
+AggregateDataToGenerate GetAggregateDataToGenerateForFF64Migration(AggregationPeriodBucket *agg) {
+  if (!agg->string_hashes().empty()) {
+    return AggregateDataToGenerate(agg->string_hashes(), /*use_legacy_hash=*/true);
+  }
+  return AggregateDataToGenerate(agg->string_hashes_ff64(), /*use_legacy_hash=*/false);
+}
+
+}  // namespace
+
 std::map<uint64_t, std::vector<AggregateDataToGenerate>>
 AggregationProcedure::GetAggregateDataToGenerate(const util::TimeInfo &time_info,
                                                  ReportAggregate &aggregate) const {
@@ -348,7 +365,7 @@
           continue;
         }
         AggregationPeriodBucket *agg = &(*aggregate.mutable_daily()->mutable_by_day_index())[i];
-        AggregateDataToGenerate agg_to_generate(agg->string_hashes());
+        AggregateDataToGenerate agg_to_generate = GetAggregateDataToGenerateForFF64Migration(agg);
         for (SystemProfileAggregate &system_profile_aggregate :
              *agg->mutable_system_profile_aggregates()) {
           // For SELECT_FIRST and SELECT_LAST there should only be one SystemProfileAggregate, but
@@ -383,7 +400,7 @@
         AggregationPeriodBucket *agg = &(*aggregate.mutable_daily()->mutable_by_day_index())[i];
         for (SystemProfileAggregate &system_profile_aggregate :
              *agg->mutable_system_profile_aggregates()) {
-          AggregateDataToGenerate agg_to_generate(agg->string_hashes());
+          AggregateDataToGenerate agg_to_generate = GetAggregateDataToGenerateForFF64Migration(agg);
           for (EventCodesAggregateData &data : *system_profile_aggregate.mutable_by_event_code()) {
             agg_to_generate.aggregate_data.push_back(data);
           }
@@ -398,7 +415,7 @@
           &(*aggregate.mutable_hourly()->mutable_by_hour_id())[start_time_info.hour_id];
       for (SystemProfileAggregate &system_profile_aggregate :
            *agg->mutable_system_profile_aggregates()) {
-        AggregateDataToGenerate agg_to_generate(agg->string_hashes());
+        AggregateDataToGenerate agg_to_generate = GetAggregateDataToGenerateForFF64Migration(agg);
         for (EventCodesAggregateData &data : *system_profile_aggregate.mutable_by_event_code()) {
           agg_to_generate.aggregate_data.push_back(data);
         }
diff --git a/src/local_aggregation/aggregation_procedures/aggregation_procedure.h b/src/local_aggregation/aggregation_procedures/aggregation_procedure.h
index a094368..23b6c0b 100644
--- a/src/local_aggregation/aggregation_procedures/aggregation_procedure.h
+++ b/src/local_aggregation/aggregation_procedures/aggregation_procedure.h
@@ -33,13 +33,17 @@
 // For multi-day reports, multiple of these objects are needed to generate the observation. When
 // generating observations, all AggregateDataToGenerate and all the aggregate_data they contain must
 // be for the same system profile.
+//
+// TODO(https://fxbug.dev/322409910): Delete usage of |use_legacy_hash| after clients no longer
+// store them.
 struct AggregateDataToGenerate {
   std::vector<std::reference_wrapper<EventCodesAggregateData>> aggregate_data;
   const google::protobuf::RepeatedPtrField<std::string> &string_hashes;
+  bool use_legacy_hash;
 
   explicit AggregateDataToGenerate(
-      const google::protobuf::RepeatedPtrField<std::string> &string_hashes)
-      : string_hashes(string_hashes) {}
+      const google::protobuf::RepeatedPtrField<std::string> &string_hashes, bool use_legacy_hash)
+      : string_hashes(string_hashes), use_legacy_hash(use_legacy_hash) {}
 
   // Make the struct move only
   AggregateDataToGenerate(AggregateDataToGenerate const &) = delete;
diff --git a/src/local_aggregation/aggregation_procedures/at_least_once_string_aggregation_procedure.cc b/src/local_aggregation/aggregation_procedures/at_least_once_string_aggregation_procedure.cc
index c7a5aca..8da595f 100644
--- a/src/local_aggregation/aggregation_procedures/at_least_once_string_aggregation_procedure.cc
+++ b/src/local_aggregation/aggregation_procedures/at_least_once_string_aggregation_procedure.cc
@@ -30,14 +30,22 @@
   Map<uint32_t, UniqueString> *unique_strings =
       aggregate_data.mutable_unique_strings()->mutable_unique_strings();
 
+  // TODO(https://fxbug.dev/322409910): Delete usage of legacy hash after clients no longer store
+  // them. Continue to use legacy hashes if they're already stored in the current aggregation period
+  // bucket. Use Farmhash Fingerprint 64 hashes otherwise.
+  bool use_legacy_hash = !bucket.string_hashes().empty();
   std::string bytes =
-      util::FarmhashFingerprint(event_record.event()->string_event().string_value());
+      use_legacy_hash
+          ? util::FarmhashFingerprint(event_record.event()->string_event().string_value())
+          : util::FarmhashFingerprint64(event_record.event()->string_event().string_value());
+  const google::protobuf::RepeatedPtrField<std::string> &string_hashes =
+      use_legacy_hash ? bucket.string_hashes() : bucket.string_hashes_ff64();
 
   // Check if the current string event value's byte representation has appeared before in
   // the string hashes of the current period bucket, if so, then initialize a UniqueString message
   // if the index of the string hash doesn't exist in the current UniqueString mapping.
-  for (int i = 0; i < bucket.string_hashes_size(); i++) {
-    if (bucket.string_hashes(i) == bytes) {
+  for (int i = 0; i < string_hashes.size(); i++) {
+    if (string_hashes.at(i) == bytes) {
       if (!unique_strings->contains(i)) {
         (*unique_strings)[i] = UniqueString();
       }
@@ -45,10 +53,14 @@
     }
   }
 
-  if (bucket.string_hashes_size() < string_buffer_max_) {
+  if (string_hashes.size() < string_buffer_max_) {
     // Add new entry
-    (*unique_strings)[bucket.string_hashes_size()] = UniqueString();
-    bucket.add_string_hashes(bytes);
+    (*unique_strings)[string_hashes.size()] = UniqueString();
+    if (use_legacy_hash) {
+      bucket.add_string_hashes(bytes);
+    } else {
+      bucket.add_string_hashes_ff64(bytes);
+    }
   }
 }
 
@@ -93,10 +105,26 @@
   // of string hashes in the hashes vector above.
   std::map<std::string, uint32_t> seen_hashes;
 
-  // TODO(https://fxbug.dev/321745113): Support Farmhash Fingerprint 64 string hashes once they are
-  // supported in local aggregation, drop any legacy hashes if a multi-day report has a mix of
-  // legacy and ff64 across multiple days.
+  // Observation generation should use Farmhash Fingerprint 64 if a multi-day report has a mix of
+  // legacy and FF64 across multiple days. Use legacy hashes only if buckets is not empty and all
+  // buckets stores legacy hashes.
+  //
+  // TODO(https://fxbug.dev/322409910): Delete usage of legacy hash after clients no longer
+  // store them.
+  const bool generate_observation_use_legacy_hash =
+      !buckets.empty() && std::all_of(buckets.begin(), buckets.end(), [](const auto &b) {
+        return static_cast<bool>(b.use_legacy_hash);
+      });
+
   for (const AggregateDataToGenerate &bucket : buckets) {
+    // Drop aggregated data for any bucket that doesn't match the correct string hash.
+    // Note, buckets using FF64 string hashes are never expected to be dropped because the function
+    // takes precedence over the legacy function when determining
+    // `generate_observation_use_legacy_hash`.
+    if (generate_observation_use_legacy_hash != bucket.use_legacy_hash) {
+      continue;
+    }
+
     for (const EventCodesAggregateData &aggregate_data : bucket.aggregate_data) {
       std::vector<uint32_t> event_vector(aggregate_data.event_codes().begin(),
                                          aggregate_data.event_codes().end());
@@ -143,7 +171,8 @@
     return {nullptr};
   }
 
-  return logger::encoder::EncodeStringHistogramObservation(hashes, data, /*use_legacy_hash=*/true);
+  return logger::encoder::EncodeStringHistogramObservation(hashes, data,
+                                                           generate_observation_use_legacy_hash);
 }
 
 void AtLeastOnceStringAggregationProcedure::ObservationsCommitted(
diff --git a/src/local_aggregation/aggregation_procedures/at_least_once_string_aggregation_procedure_test.cc b/src/local_aggregation/aggregation_procedures/at_least_once_string_aggregation_procedure_test.cc
index d6bd1bb..ae887c5 100644
--- a/src/local_aggregation/aggregation_procedures/at_least_once_string_aggregation_procedure_test.cc
+++ b/src/local_aggregation/aggregation_procedures/at_least_once_string_aggregation_procedure_test.cc
@@ -49,7 +49,72 @@
   }
 };
 
-TEST_F(AtLeastOnceStringAggregationProcedureTest, UpdateAggregate1DayReport) {
+// Test that the local aggregation continues to use legacy hash if it has legacy hash
+// stored.
+//
+// TODO(https://fxbug.dev/322409910): Delete this test after clients stop storing legacy hash.
+TEST_F(AtLeastOnceStringAggregationProcedureTest, UpdateAggregate1DayReportLegacy) {
+  uint32_t metric_id = kStringMetricMetricId;
+  int report_index = kStringMetricUniqueDeviceStringCountsReport1DayReportIndex;
+  util::PinnedUniquePtr<AggregationProcedure> procedure(GetProcedureFor(metric_id, report_index));
+
+  ReportAggregate report_aggregate;
+  const uint32_t kDayIndex = 10000;
+  const uint64_t system_profile_hash = uint64_t{2222};
+  const std::vector<std::string> kTestStrings = {
+      "Nunc dictum justo ac arcu.",
+      "Suspendisse ullamcorper mi vel pulvinar dictum.",
+  };
+  const std::vector<std::string> kTestStrings1 = {
+      kTestStrings.at(0),
+  };
+  const std::vector<std::string> kTestStrings2 = {
+      kTestStrings.at(0),
+      kTestStrings.at(1),
+  };
+  const std::map<uint32_t, std::vector<std::string>> events_to_strings = {
+      {0, kTestStrings1},
+      {2, kTestStrings2},
+  };
+  ASSERT_GE(GetReportDef(metric_id, report_index).event_vector_buffer_max(),
+            events_to_strings.size());
+
+  // Mock that there is stored legacy hash in the current aggregation period bucket. The legacy hash
+  // should be used for local aggregation.
+  AggregationPeriodBucket bucket;
+  bucket.add_string_hashes(util::FarmhashFingerprint(kTestStrings[0]));
+  report_aggregate.mutable_daily()->mutable_by_day_index()->insert({kDayIndex, bucket});
+
+  AddStringEventsForDay(kDayIndex, events_to_strings, system_profile_hash, *procedure,
+                        report_aggregate);
+
+  std::vector<std::string> expected_hashes;
+  expected_hashes.reserve(kTestStrings.size());
+  for (const std::string& string : kTestStrings) {
+    expected_hashes.push_back(util::FarmhashFingerprint(string));
+  }
+
+  ASSERT_TRUE(report_aggregate.daily().by_day_index().contains(kDayIndex));
+  EXPECT_EQ(report_aggregate.daily().by_day_index().at(kDayIndex).string_hashes_size(),
+            kTestStrings.size());
+  EXPECT_EQ(report_aggregate.daily().by_day_index().at(kDayIndex).string_hashes_ff64_size(), 0u);
+  EXPECT_THAT(report_aggregate.daily().by_day_index().at(kDayIndex).string_hashes(),
+              UnorderedElementsAreArray(expected_hashes));
+  ASSERT_EQ(report_aggregate.daily().by_day_index().at(kDayIndex).system_profile_aggregates_size(),
+            1u);
+  const SystemProfileAggregate& system_profile_agg =
+      report_aggregate.daily().by_day_index().at(kDayIndex).system_profile_aggregates(0);
+  ASSERT_EQ(system_profile_agg.system_profile_hash(), system_profile_hash);
+
+  for (int i = 0; i < events_to_strings.size(); i++) {
+    std::vector<std::string> test_strings =
+        events_to_strings.at(system_profile_agg.by_event_code(i).event_codes(0));
+    ASSERT_EQ(system_profile_agg.by_event_code(i).data().unique_strings().unique_strings().size(),
+              test_strings.size());
+  }
+}
+
+TEST_F(AtLeastOnceStringAggregationProcedureTest, UpdateAggregate1DayReportFF64) {
   uint32_t metric_id = kStringMetricMetricId;
   int report_index = kStringMetricUniqueDeviceStringCountsReport1DayReportIndex;
   util::PinnedUniquePtr<AggregationProcedure> procedure(GetProcedureFor(metric_id, report_index));
@@ -81,13 +146,14 @@
   std::vector<std::string> expected_hashes;
   expected_hashes.reserve(kTestStrings.size());
   for (const std::string& string : kTestStrings) {
-    expected_hashes.push_back(util::FarmhashFingerprint(string));
+    expected_hashes.push_back(util::FarmhashFingerprint64(string));
   }
 
   ASSERT_TRUE(report_aggregate.daily().by_day_index().contains(kDayIndex));
-  EXPECT_EQ(report_aggregate.daily().by_day_index().at(kDayIndex).string_hashes_size(),
+  EXPECT_EQ(report_aggregate.daily().by_day_index().at(kDayIndex).string_hashes_size(), 0u);
+  EXPECT_EQ(report_aggregate.daily().by_day_index().at(kDayIndex).string_hashes_ff64_size(),
             kTestStrings.size());
-  EXPECT_THAT(report_aggregate.daily().by_day_index().at(kDayIndex).string_hashes(),
+  EXPECT_THAT(report_aggregate.daily().by_day_index().at(kDayIndex).string_hashes_ff64(),
               UnorderedElementsAreArray(expected_hashes));
   ASSERT_EQ(report_aggregate.daily().by_day_index().at(kDayIndex).system_profile_aggregates_size(),
             1u);
@@ -178,7 +244,113 @@
   EXPECT_EQ(merged_data.unique_strings().unique_strings().at(2).last_day_index(), 100);
 }
 
-TEST_F(AtLeastOnceStringAggregationProcedureTest, GenerateObservation1DayReport) {
+TEST_F(AtLeastOnceStringAggregationProcedureTest, GenerateObservation1DayReportFF64) {
+  uint32_t metric_id = kStringMetricMetricId;
+  int report_index = kStringMetricUniqueDeviceStringCountsReport1DayReportIndex;
+  util::PinnedUniquePtr<AggregationProcedure> procedure(GetProcedureFor(metric_id, report_index));
+
+  const uint64_t system_profile_hash = uint64_t{2222};
+  const uint32_t kDayIndex = 10000;
+  util::TimeInfo time_info;
+  time_info.day_index = kDayIndex;
+
+  ReportAggregate report_aggregate;
+  const std::vector<std::string> kTestStrings = {
+      "Nunc dictum justo ac arcu.",
+      "Suspendisse ullamcorper mi vel pulvinar dictum.",
+      "Aenean feugiat consectetur vestibulum.",
+  };
+  const std::vector<std::string> kTestStrings1 = {
+      kTestStrings.at(0),
+  };
+  const std::vector<std::string> kTestStrings2 = {
+      kTestStrings.at(1),
+      kTestStrings.at(2),
+  };
+  const std::vector<std::string> kTestStrings3 = {
+      kTestStrings.at(2),
+  };
+  const std::map<uint32_t, std::vector<std::string>> events_to_strings = {
+      {0, kTestStrings1},
+      {2, kTestStrings2},
+      {5, kTestStrings3},
+  };
+  ASSERT_GE(GetReportDef(metric_id, report_index).event_vector_buffer_max(),
+            events_to_strings.size());
+
+  const std::vector<std::string> kTestHashes1 = {
+      util::FarmhashFingerprint64(kTestStrings.at(0)),
+  };
+  const std::vector<std::string> kTestHashes2 = {
+      util::FarmhashFingerprint64(kTestStrings.at(1)),
+      util::FarmhashFingerprint64(kTestStrings.at(2)),
+  };
+  const std::vector<std::string> kTestHashes3 = {
+      util::FarmhashFingerprint64(kTestStrings.at(2)),
+  };
+  const std::map<std::vector<std::string>, std::vector<std::string>> strings_to_hashes = {
+      {kTestStrings1, kTestHashes1},
+      {kTestStrings2, kTestHashes2},
+      {kTestStrings3, kTestHashes3},
+  };
+  AddStringEventsForDay(kDayIndex, events_to_strings, system_profile_hash, *procedure,
+                        report_aggregate);
+
+  lib::statusor::StatusOr<std::vector<ObservationAndSystemProfile>> observations_or =
+      procedure->GenerateObservations(time_info, report_aggregate);
+  ASSERT_EQ(observations_or.status().error_code(), StatusCode::OK);
+  std::vector<ObservationAndSystemProfile> observations = std::move(observations_or).value();
+
+  ASSERT_EQ(observations.size(), 1u);
+  EXPECT_EQ(observations[0].system_profile_hash, system_profile_hash);
+  ASSERT_TRUE(observations[0].observation->has_string_histogram());
+  const StringHistogramObservation& histogram = observations[0].observation->string_histogram();
+  ASSERT_EQ(histogram.string_histograms_size(), events_to_strings.size());
+
+  std::vector<std::string> expected_hashes;
+  expected_hashes.reserve(kTestStrings.size());
+  for (const std::string& string : kTestStrings) {
+    expected_hashes.push_back(util::FarmhashFingerprint64(string));
+  }
+  EXPECT_THAT(histogram.string_hashes_ff64(), UnorderedElementsAreArray(expected_hashes));
+
+  for (const IndexHistogram& value : histogram.string_histograms()) {
+    // These string vectors represent the expected (test) vectors of strings and hashes that the
+    // current event vector histogram should have.
+    const std::vector<std::string>& test_strings = events_to_strings.at(value.event_codes(0));
+    const std::vector<std::string>& test_hashes = strings_to_hashes.at(test_strings);
+
+    // This creates a vector of string hashes by fetching the string hashes that correspond to each
+    // bucket indices found in the current event vector histogram.
+    std::vector<std::string> actualHashes;
+    actualHashes.reserve(test_hashes.size());
+    for (const uint32_t index : value.bucket_indices()) {
+      actualHashes.push_back(histogram.string_hashes_ff64(static_cast<int>(index)));
+    }
+
+    // Assert that the created (actual) string hash vector has all of the same string hashes as the
+    // expected (test) string hash vector.
+    ASSERT_THAT(actualHashes, UnorderedElementsAreArray(test_hashes));
+  }
+
+  // Check that obsolete aggregates get cleaned up.
+  procedure->ObservationsCommitted(report_aggregate, time_info, system_profile_hash);
+  ASSERT_EQ(report_aggregate.daily().by_day_index_size(), 0);
+  EXPECT_FALSE(report_aggregate.daily().by_day_index().contains(kDayIndex));
+
+  // Check that calling observation generation the next day generates no observation.
+  time_info.day_index++;
+  observations_or = procedure->GenerateObservations(time_info, report_aggregate);
+  ASSERT_EQ(observations_or.status().error_code(), StatusCode::OK);
+  observations = std::move(observations_or).value();
+  EXPECT_EQ(observations.size(), 0u);
+}
+
+// Test that legacy hash is used for 1 day report observation generation if legacy hash is stored
+// for the local aggregation period.
+//
+// TODO(https://fxbug.dev/322409910): Delete this test after clients stop storing legacy hash.
+TEST_F(AtLeastOnceStringAggregationProcedureTest, GenerateObservation1DayReportLegacy) {
   uint32_t metric_id = kStringMetricMetricId;
   int report_index = kStringMetricUniqueDeviceStringCountsReport1DayReportIndex;
   util::PinnedUniquePtr<AggregationProcedure> procedure(GetProcedureFor(metric_id, report_index));
@@ -227,6 +399,13 @@
       {kTestStrings2, kTestHashes2},
       {kTestStrings3, kTestHashes3},
   };
+
+  // Mock that there is stored legacy hash in the current aggregation period bucket. The legacy hash
+  // should be used for local aggregation.
+  AggregationPeriodBucket bucket;
+  bucket.add_string_hashes(util::FarmhashFingerprint(kTestStrings[0]));
+  report_aggregate.mutable_daily()->mutable_by_day_index()->insert({kDayIndex, bucket});
+
   AddStringEventsForDay(kDayIndex, events_to_strings, system_profile_hash, *procedure,
                         report_aggregate);
 
@@ -310,11 +489,11 @@
             events_to_strings.size());
 
   const std::vector<std::string> kTestHashes1 = {
-      util::FarmhashFingerprint(kTestStrings.at(0)),
+      util::FarmhashFingerprint64(kTestStrings.at(0)),
   };
   const std::vector<std::string> kTestHashes2 = {
-      util::FarmhashFingerprint(kTestStrings.at(0)),
-      util::FarmhashFingerprint(kTestStrings.at(1)),
+      util::FarmhashFingerprint64(kTestStrings.at(0)),
+      util::FarmhashFingerprint64(kTestStrings.at(1)),
   };
   const std::map<std::vector<std::string>, std::vector<std::string>> strings_to_hashes = {
       {kTestStrings1, kTestHashes1},
@@ -337,9 +516,9 @@
   std::vector<std::string> expected_hashes;
   expected_hashes.reserve(kTestStrings.size());
   for (const std::string& string : kTestStrings) {
-    expected_hashes.push_back(util::FarmhashFingerprint(string));
+    expected_hashes.push_back(util::FarmhashFingerprint64(string));
   }
-  EXPECT_THAT(histogram.string_hashes(), UnorderedElementsAreArray(expected_hashes));
+  EXPECT_THAT(histogram.string_hashes_ff64(), UnorderedElementsAreArray(expected_hashes));
 
   for (const IndexHistogram& value : histogram.string_histograms()) {
     // These string vectors represent the expected (test) vectors of strings and hashes that the
@@ -352,7 +531,7 @@
     std::vector<std::string> actualHashes;
     actualHashes.reserve(test_hashes.size());
     for (const uint32_t index : value.bucket_indices()) {
-      actualHashes.push_back(histogram.string_hashes(static_cast<int>(index)));
+      actualHashes.push_back(histogram.string_hashes_ff64(static_cast<int>(index)));
     }
 
     // Assert that the created (actual) string hash vector has all of the same string hashes as the
@@ -397,6 +576,123 @@
   EXPECT_FALSE(report_aggregate.daily().by_day_index().contains(kDayIndex));
 }
 
+// Test that the aggregated data with legacy string hashes are dropped when generating observation
+// for 7 days report.
+//
+// TODO(https://fxbug.dev/322409910): Delete this test after clients stop storing legacy hash.
+TEST_F(AtLeastOnceStringAggregationProcedureTest, GenerateObservation7DaysReportDropLegacy) {
+  uint32_t metric_id = kStringMetricMetricId;
+  int report_index = kStringMetricUniqueDeviceStringCountsReport7DaysReportIndex;
+  util::PinnedUniquePtr<AggregationProcedure> procedure(GetProcedureFor(metric_id, report_index));
+
+  const uint64_t system_profile_hash = uint64_t{2222};
+
+  ReportAggregate report_aggregate;
+  const std::vector<std::string> kTestStrings = {
+      "Nunc dictum justo ac arcu.",
+      "Suspendisse ullamcorper mi vel pulvinar dictum.",
+      "Integer a ullamcorper dolor.",
+  };
+  const std::vector<std::string> kTestStrings1 = {
+      kTestStrings.at(0),
+  };
+  const std::vector<std::string> kTestStrings2 = {
+      kTestStrings.at(1),
+      kTestStrings.at(2),
+  };
+  const std::vector<std::string> kTestHashes1 = {
+      util::FarmhashFingerprint64(kTestStrings.at(0)),
+  };
+  const std::vector<std::string> kTestHashes2 = {
+      util::FarmhashFingerprint64(kTestStrings.at(1)),
+      util::FarmhashFingerprint64(kTestStrings.at(2)),
+  };
+  const std::map<std::vector<std::string>, std::vector<std::string>> strings_to_hashes = {
+      {kTestStrings1, kTestHashes1},
+      {kTestStrings2, kTestHashes2},
+  };
+
+  const uint32_t kDayIndexDay1 = 10000;
+
+  const std::map<uint32_t, std::vector<std::string>> events_to_strings_day1 = {
+      {1, kTestStrings1},
+  };
+
+  // Mock that there is stored legacy hash in the aggregation period bucket for Day 1. The legacy
+  // hash should be used for Day 1 local aggregation.
+  AggregationPeriodBucket bucket;
+  bucket.add_string_hashes(util::FarmhashFingerprint(kTestStrings[0]));
+  report_aggregate.mutable_daily()->mutable_by_day_index()->insert({kDayIndexDay1, bucket});
+  AddStringEventsForDay(kDayIndexDay1, events_to_strings_day1, system_profile_hash, *procedure,
+                        report_aggregate);
+
+  const uint32_t kDayIndexDay2 = kDayIndexDay1 + 1;
+  const std::map<uint32_t, std::vector<std::string>> events_to_strings_day2 = {
+      {0, kTestStrings1},
+      {2, kTestStrings2},
+  };
+  AddStringEventsForDay(kDayIndexDay2, events_to_strings_day2, system_profile_hash, *procedure,
+                        report_aggregate);
+
+  // The observation should only contains data from Day 2.
+  const std::map<uint32_t, std::vector<std::string>> events_to_hashes = {
+      {0, kTestHashes1},
+      {2, kTestHashes2},
+  };
+
+  std::vector<std::string> expected_hashes;
+  expected_hashes.reserve(kTestStrings.size());
+  for (const std::string& string : kTestStrings) {
+    expected_hashes.push_back(util::FarmhashFingerprint64(string));
+  }
+
+  util::TimeInfo time_info;
+  time_info.day_index = kDayIndexDay2;
+  lib::statusor::StatusOr<std::vector<ObservationAndSystemProfile>> observations_or =
+      procedure->GenerateObservations(time_info, report_aggregate);
+  ASSERT_EQ(observations_or.status().error_code(), StatusCode::OK);
+  std::vector<ObservationAndSystemProfile> observations = std::move(observations_or).value();
+
+  ASSERT_EQ(observations.size(), 1u);
+  EXPECT_EQ(observations[0].system_profile_hash, system_profile_hash);
+  ASSERT_TRUE(observations[0].observation->has_string_histogram());
+  const StringHistogramObservation& histogram = observations[0].observation->string_histogram();
+
+  // Check that the number of string histograms for the second day index is the number of unique
+  // events on the second day, due to the fact that first day is using legacy hash so the data a
+  // dropped.
+  ASSERT_EQ(histogram.string_histograms_size(), events_to_hashes.size());
+  EXPECT_THAT(histogram.string_hashes_ff64(), UnorderedElementsAreArray(expected_hashes));
+
+  for (const IndexHistogram& value : histogram.string_histograms()) {
+    // These string vectors represent the expected (test) vectors of strings hashes that the
+    // current event vector histogram should have.
+    const std::vector<std::string>& test_hashes = events_to_hashes.at(value.event_codes(0));
+
+    // This creates a vector of string hashes by fetching the string hashes that correspond to
+    // each bucket indices found in the current event vector histogram.
+    std::vector<std::string> actualHashes;
+    actualHashes.reserve(test_hashes.size());
+    for (const uint32_t index : value.bucket_indices()) {
+      actualHashes.push_back(histogram.string_hashes_ff64(static_cast<int>(index)));
+    }
+
+    // Assert that the created (actual) string hash vector has all of the same string hashes as
+    // the expected (test) string hash vector.
+    ASSERT_THAT(actualHashes, UnorderedElementsAreArray(test_hashes));
+  }
+
+  // Commit observation
+  procedure->ObservationsCommitted(report_aggregate, time_info, system_profile_hash);
+
+  // After 7 days the observation is no longer generated.
+  time_info.day_index = kDayIndexDay2 + 7;
+  observations_or = procedure->GenerateObservations(time_info, report_aggregate);
+  ASSERT_EQ(observations_or.status().error_code(), StatusCode::OK);
+  observations = std::move(observations_or).value();
+  EXPECT_EQ(observations.size(), 0u);
+}
+
 TEST_F(AtLeastOnceStringAggregationProcedureTest, GenerateObservation7DaysReport) {
   uint32_t metric_id = kStringMetricMetricId;
   int report_index = kStringMetricUniqueDeviceStringCountsReport7DaysReportIndex;
@@ -426,11 +722,11 @@
             events_to_strings.size());
 
   const std::vector<std::string> kTestHashes1 = {
-      util::FarmhashFingerprint(kTestStrings.at(0)),
+      util::FarmhashFingerprint64(kTestStrings.at(0)),
   };
   const std::vector<std::string> kTestHashes2 = {
-      util::FarmhashFingerprint(kTestStrings.at(0)),
-      util::FarmhashFingerprint(kTestStrings.at(1)),
+      util::FarmhashFingerprint64(kTestStrings.at(0)),
+      util::FarmhashFingerprint64(kTestStrings.at(1)),
   };
   const std::map<std::vector<std::string>, std::vector<std::string>> strings_to_hashes = {
       {kTestStrings1, kTestHashes1},
@@ -442,7 +738,7 @@
   std::vector<std::string> expected_hashes;
   expected_hashes.reserve(kTestStrings.size());
   for (const std::string& string : kTestStrings) {
-    expected_hashes.push_back(util::FarmhashFingerprint(string));
+    expected_hashes.push_back(util::FarmhashFingerprint64(string));
   }
 
   for (int i = 0; i < 7; i++) {
@@ -457,7 +753,7 @@
     ASSERT_TRUE(observations[0].observation->has_string_histogram());
     const StringHistogramObservation& histogram = observations[0].observation->string_histogram();
     ASSERT_EQ(histogram.string_histograms_size(), events_to_strings.size());
-    EXPECT_THAT(histogram.string_hashes(), UnorderedElementsAreArray(expected_hashes));
+    EXPECT_THAT(histogram.string_hashes_ff64(), UnorderedElementsAreArray(expected_hashes));
 
     for (const IndexHistogram& value : histogram.string_histograms()) {
       // These string vectors represent the expected (test) vectors of strings and hashes that the
@@ -470,7 +766,7 @@
       std::vector<std::string> actualHashes;
       actualHashes.reserve(test_hashes.size());
       for (const uint32_t index : value.bucket_indices()) {
-        actualHashes.push_back(histogram.string_hashes(static_cast<int>(index)));
+        actualHashes.push_back(histogram.string_hashes_ff64(static_cast<int>(index)));
       }
 
       // Assert that the created (actual) string hash vector has all of the same string hashes as
@@ -545,21 +841,21 @@
       kTestStrings.at(2),
   };
   const std::vector<std::string> kTestHashes1 = {
-      util::FarmhashFingerprint(kTestStrings.at(0)),
+      util::FarmhashFingerprint64(kTestStrings.at(0)),
   };
   const std::vector<std::string> kTestHashes2 = {
-      util::FarmhashFingerprint(kTestStrings.at(0)),
-      util::FarmhashFingerprint(kTestStrings.at(1)),
+      util::FarmhashFingerprint64(kTestStrings.at(0)),
+      util::FarmhashFingerprint64(kTestStrings.at(1)),
   };
   const std::vector<std::string> kTestHashes3 = {
-      util::FarmhashFingerprint(kTestStrings.at(1)),
+      util::FarmhashFingerprint64(kTestStrings.at(1)),
   };
   const std::vector<std::string> kTestHashes4 = {
-      util::FarmhashFingerprint(kTestStrings.at(1)),
-      util::FarmhashFingerprint(kTestStrings.at(2)),
+      util::FarmhashFingerprint64(kTestStrings.at(1)),
+      util::FarmhashFingerprint64(kTestStrings.at(2)),
   };
   const std::vector<std::string> kTestHashes5 = {
-      util::FarmhashFingerprint(kTestStrings.at(2)),
+      util::FarmhashFingerprint64(kTestStrings.at(2)),
   };
   const std::map<std::vector<std::string>, std::vector<std::string>> strings_to_hashes = {
       {kTestStrings1, kTestHashes1}, {kTestStrings2, kTestHashes2}, {kTestStrings3, kTestHashes3},
@@ -586,9 +882,9 @@
   // This a vector string hashes that is a combination of hashes across 2 days for a single event
   // vector.
   std::vector<std::string> kTestMultiDayHash = {
-      util::FarmhashFingerprint(kTestStrings.at(0)),
-      util::FarmhashFingerprint(kTestStrings.at(1)),
-      util::FarmhashFingerprint(kTestStrings.at(2)),
+      util::FarmhashFingerprint64(kTestStrings.at(0)),
+      util::FarmhashFingerprint64(kTestStrings.at(1)),
+      util::FarmhashFingerprint64(kTestStrings.at(2)),
   };
   const std::map<uint32_t, std::vector<std::string>> events_to_hashes = {
       {0, kTestHashes1},
@@ -600,7 +896,7 @@
   std::vector<std::string> expected_hashes;
   expected_hashes.reserve(kTestStrings.size());
   for (const std::string& string : kTestStrings) {
-    expected_hashes.push_back(util::FarmhashFingerprint(string));
+    expected_hashes.push_back(util::FarmhashFingerprint64(string));
   }
 
   util::TimeInfo time_info;
@@ -621,7 +917,7 @@
   ASSERT_EQ(histogram.string_histograms_size(), events_to_hashes.size());
 
   // Check that the sting hashes from the histogram is the same as the expected string hashes.
-  EXPECT_THAT(histogram.string_hashes(), UnorderedElementsAreArray(expected_hashes));
+  EXPECT_THAT(histogram.string_hashes_ff64(), UnorderedElementsAreArray(expected_hashes));
 
   std::set<uint32_t> seen_event_vectors;
   for (const IndexHistogram& value : histogram.string_histograms()) {
@@ -641,7 +937,7 @@
     std::set<std::string> seen_string_hashes;
     for (const uint32_t index : value.bucket_indices()) {
       // Check that each string hash is unique for each event vector.
-      std::string string_hash = histogram.string_hashes(static_cast<int>(index));
+      std::string string_hash = histogram.string_hashes_ff64(static_cast<int>(index));
       ASSERT_FALSE(seen_string_hashes.count(string_hash));
       seen_string_hashes.insert(string_hash);
 
@@ -693,11 +989,11 @@
             events_to_strings.size());
 
   const std::vector<std::string> kTestHashes1 = {
-      util::FarmhashFingerprint(kTestStrings.at(0)),
+      util::FarmhashFingerprint64(kTestStrings.at(0)),
   };
   const std::vector<std::string> kTestHashes2 = {
-      util::FarmhashFingerprint(kTestStrings.at(0)),
-      util::FarmhashFingerprint(kTestStrings.at(1)),
+      util::FarmhashFingerprint64(kTestStrings.at(0)),
+      util::FarmhashFingerprint64(kTestStrings.at(1)),
   };
   const std::map<std::vector<std::string>, std::vector<std::string>> strings_to_hashes = {
       {kTestStrings1, kTestHashes1},
@@ -709,7 +1005,7 @@
   std::vector<std::string> expected_hashes;
   expected_hashes.reserve(kTestStrings.size());
   for (const std::string& string : kTestStrings) {
-    expected_hashes.push_back(util::FarmhashFingerprint(string));
+    expected_hashes.push_back(util::FarmhashFingerprint64(string));
   }
 
   for (int i = 0; i < 7; i++) {
@@ -724,7 +1020,7 @@
     ASSERT_TRUE(observations[0].observation->has_string_histogram());
     const StringHistogramObservation& histogram = observations[0].observation->string_histogram();
     ASSERT_EQ(histogram.string_histograms_size(), events_to_strings.size());
-    EXPECT_THAT(histogram.string_hashes(), UnorderedElementsAreArray(expected_hashes));
+    EXPECT_THAT(histogram.string_hashes_ff64(), UnorderedElementsAreArray(expected_hashes));
 
     for (const IndexHistogram& value : histogram.string_histograms()) {
       // These string vectors represent the expected (test) vectors of strings and hashes that the
@@ -737,7 +1033,7 @@
       std::vector<std::string> actualHashes;
       actualHashes.reserve(test_hashes.size());
       for (const uint32_t index : value.bucket_indices()) {
-        actualHashes.push_back(histogram.string_hashes(static_cast<int>(index)));
+        actualHashes.push_back(histogram.string_hashes_ff64(static_cast<int>(index)));
       }
 
       // Assert that the created (actual) string hash vector has all of the same string hashes as
@@ -826,7 +1122,7 @@
                         report_aggregate);
 
   EXPECT_LT(string_buffer_max, kTestStrings.size());
-  EXPECT_EQ(report_aggregate.daily().by_day_index().at(kDayIndex).string_hashes_size(),
+  EXPECT_EQ(report_aggregate.daily().by_day_index().at(kDayIndex).string_hashes_ff64_size(),
             string_buffer_max);
 
   const SystemProfileAggregate& system_profile_agg =
@@ -867,14 +1163,14 @@
       kTestStrings.at(5),
   };
   const std::vector<std::string> kTestHashes1 = {
-      util::FarmhashFingerprint(kTestStrings.at(0)),
-      util::FarmhashFingerprint(kTestStrings.at(1)),
-      util::FarmhashFingerprint(kTestStrings.at(2)),
+      util::FarmhashFingerprint64(kTestStrings.at(0)),
+      util::FarmhashFingerprint64(kTestStrings.at(1)),
+      util::FarmhashFingerprint64(kTestStrings.at(2)),
   };
   const std::vector<std::string> kTestHashes2 = {
-      util::FarmhashFingerprint(kTestStrings.at(3)),
-      util::FarmhashFingerprint(kTestStrings.at(4)),
-      util::FarmhashFingerprint(kTestStrings.at(5)),
+      util::FarmhashFingerprint64(kTestStrings.at(3)),
+      util::FarmhashFingerprint64(kTestStrings.at(4)),
+      util::FarmhashFingerprint64(kTestStrings.at(5)),
   };
 
   const uint32_t kDayIndexDay1 = 10000;
@@ -898,8 +1194,8 @@
 
   // The total number of unique strings should be 6, which should be greater than the string buffer
   // max value of 5.
-  EXPECT_GT(report_aggregate.daily().by_day_index().at(kDayIndexDay1).string_hashes_size() +
-                report_aggregate.daily().by_day_index().at(kDayIndexDay2).string_hashes_size(),
+  EXPECT_GT(report_aggregate.daily().by_day_index().at(kDayIndexDay1).string_hashes_ff64_size() +
+                report_aggregate.daily().by_day_index().at(kDayIndexDay2).string_hashes_ff64_size(),
             string_buffer_max);
 
   // Generate the observation on the second day to generate an observation for the last 7 days,
@@ -920,7 +1216,7 @@
 
   // The observation should only have a max string hash size equal to or less than the string buffer
   // max. So check that the string hash size is equal to the string buffer max.
-  ASSERT_EQ(histogram.string_hashes_size(), string_buffer_max);
+  ASSERT_EQ(histogram.string_hashes_ff64_size(), string_buffer_max);
 
   for (const IndexHistogram& value : histogram.string_histograms()) {
     // This string vector represent the expected (test) vector of string hashes that the
@@ -932,7 +1228,7 @@
     std::vector<std::string> actualHashes;
     actualHashes.reserve(test_hashes.size());
     for (const uint32_t index : value.bucket_indices()) {
-      actualHashes.push_back(histogram.string_hashes(static_cast<int>(index)));
+      actualHashes.push_back(histogram.string_hashes_ff64(static_cast<int>(index)));
     }
 
     // Assert that the created (actual) string hash vector is a subset of the string hashes within
diff --git a/src/local_aggregation/aggregation_procedures/string_histogram_aggregation_procedure.cc b/src/local_aggregation/aggregation_procedures/string_histogram_aggregation_procedure.cc
index 4a53cb8..c40bf32 100644
--- a/src/local_aggregation/aggregation_procedures/string_histogram_aggregation_procedure.cc
+++ b/src/local_aggregation/aggregation_procedures/string_histogram_aggregation_procedure.cc
@@ -20,20 +20,32 @@
     AggregationPeriodBucket &bucket) {
   StringHistogram *histogram = aggregate_data.mutable_string_histogram();
 
+  // TODO(https://fxbug.dev/322409910): Delete usage of legacy hash after clients no longer store
+  // them. Continue to use legacy hashes if they're already stored in the current aggregation period
+  // bucket. Use Farmhash Fingerprint 64 hashes otherwise.
+  bool use_legacy_hash = !bucket.string_hashes().empty();
   std::string bytes =
-      util::FarmhashFingerprint(event_record.event()->string_event().string_value());
+      use_legacy_hash
+          ? util::FarmhashFingerprint(event_record.event()->string_event().string_value())
+          : util::FarmhashFingerprint64(event_record.event()->string_event().string_value());
+  const google::protobuf::RepeatedPtrField<std::string> &string_hashes =
+      use_legacy_hash ? bucket.string_hashes() : bucket.string_hashes_ff64();
 
-  for (int i = 0; i < bucket.string_hashes_size(); i++) {
-    if (bucket.string_hashes(i) == bytes) {
+  for (int i = 0; i < string_hashes.size(); i++) {
+    if (string_hashes.at(i) == bytes) {
       (*histogram->mutable_histogram())[i] += 1;
       return;
     }
   }
 
-  if (bucket.string_hashes_size() < string_buffer_max_) {
+  if (string_hashes.size() < string_buffer_max_) {
     // Add new entry
-    (*histogram->mutable_histogram())[bucket.string_hashes_size()] += 1;
-    bucket.add_string_hashes(bytes);
+    (*histogram->mutable_histogram())[string_hashes.size()] += 1;
+    if (use_legacy_hash) {
+      bucket.add_string_hashes(bytes);
+    } else {
+      bucket.add_string_hashes_ff64(bytes);
+    }
   }
 }
 
@@ -75,9 +87,7 @@
     hashes.push_back(hash);
   }
 
-  // TODO(https://fxbug.dev/321745113): Support Farmhash Fingerprint 64 string hashes once they are
-  // supported in local aggregation.
-  return logger::encoder::EncodeStringHistogramObservation(hashes, data, /*use_legacy_hash=*/true);
+  return logger::encoder::EncodeStringHistogramObservation(hashes, data, bucket.use_legacy_hash);
 }
 
 std::string StringHistogramAggregationProcedure::DebugString() const { return "STRING_HISTOGRAM"; }
diff --git a/src/local_aggregation/aggregation_procedures/string_histogram_aggregation_procedure_test.cc b/src/local_aggregation/aggregation_procedures/string_histogram_aggregation_procedure_test.cc
index 953eb99..8d2097e 100644
--- a/src/local_aggregation/aggregation_procedures/string_histogram_aggregation_procedure_test.cc
+++ b/src/local_aggregation/aggregation_procedures/string_histogram_aggregation_procedure_test.cc
@@ -45,7 +45,7 @@
   }
 };
 
-TEST_F(StringHistogramAggregationProcedureTest, UpdateAggregateWorks) {
+TEST_F(StringHistogramAggregationProcedureTest, UpdateAggregateFF64Works) {
   util::PinnedUniquePtr<AggregationProcedure> procedure(
       GetProcedureFor(kStringMetricMetricId, kStringMetricStringCountsReportIndex));
 
@@ -65,6 +65,46 @@
 
   ASSERT_EQ(aggregate.hourly().by_hour_id_size(), 1);
   ASSERT_EQ(aggregate.hourly().by_hour_id().at(kHourId).system_profile_aggregates_size(), 1u);
+  ASSERT_EQ(aggregate.hourly().by_hour_id().at(kHourId).string_hashes_size(), 0u);
+  ASSERT_EQ(aggregate.hourly().by_hour_id().at(kHourId).string_hashes_ff64_size(), 5u);
+  const SystemProfileAggregate& system_profile_agg =
+      aggregate.hourly().by_hour_id().at(kHourId).system_profile_aggregates(0);
+  EXPECT_EQ(system_profile_agg.system_profile_hash(), system_profile_hash);
+  ASSERT_EQ(system_profile_agg.by_event_code_size(), kNumEventCodes);
+}
+
+// Test that the aggregation period bucket continues to use legacy hash if it has legacy hash
+// stored.
+//
+// TODO(https://fxbug.dev/322409910): Delete this test after clients stop storing legacy hash.
+TEST_F(StringHistogramAggregationProcedureTest, UpdateAggregateLegacyWorks) {
+  util::PinnedUniquePtr<AggregationProcedure> procedure(
+      GetProcedureFor(kStringMetricMetricId, kStringMetricStringCountsReportIndex));
+
+  ReportAggregate aggregate;
+  const uint32_t kNumEventCodes = 100;
+  const uint32_t kHourId = 1;
+  const uint64_t system_profile_hash = uint64_t{111111};
+  const std::vector<std::string> kTestStrings = {
+      "Nunc dictum justo ac arcu.",
+      "Suspendisse ullamcorper mi vel pulvinar dictum.",
+      "Aenean feugiat consectetur vestibulum.",
+      "Integer a ullamcorper dolor.",
+      "Praesent vel nulla quis metus consectetur aliquam sed ut felis.",
+  };
+
+  // Mock that legacy hash is already used in the current aggregation period bucket.
+  AggregationPeriodBucket bucket;
+  bucket.add_string_hashes(util::FarmhashFingerprint(kTestStrings[0]));
+  aggregate.mutable_hourly()->mutable_by_hour_id()->insert({kHourId, bucket});
+
+  LogStringEvents(kHourId, kNumEventCodes, kTestStrings, system_profile_hash, *procedure,
+                  aggregate);
+
+  ASSERT_EQ(aggregate.hourly().by_hour_id_size(), 1);
+  ASSERT_EQ(aggregate.hourly().by_hour_id().at(kHourId).system_profile_aggregates_size(), 1u);
+  ASSERT_EQ(aggregate.hourly().by_hour_id().at(kHourId).string_hashes_size(), 5u);
+  ASSERT_EQ(aggregate.hourly().by_hour_id().at(kHourId).string_hashes_ff64_size(), 0u);
   const SystemProfileAggregate& system_profile_agg =
       aggregate.hourly().by_hour_id().at(kHourId).system_profile_aggregates(0);
   EXPECT_EQ(system_profile_agg.system_profile_hash(), system_profile_hash);
@@ -138,7 +178,7 @@
   EXPECT_EQ(merged_data.string_histogram().histogram().at(2), 40);
 }
 
-TEST_F(StringHistogramAggregationProcedureTest, GenerateObservationWorks) {
+TEST_F(StringHistogramAggregationProcedureTest, GenerateObservationFF64Works) {
   util::PinnedUniquePtr<AggregationProcedure> procedure(
       GetProcedureFor(kStringMetricMetricId, kStringMetricStringCountsReportIndex));
 
@@ -175,6 +215,66 @@
   std::vector<std::string> expected_hashes;
   expected_hashes.reserve(kTestStrings.size());
   for (const std::string& string : kTestStrings) {
+    expected_hashes.push_back(util::FarmhashFingerprint64(string));
+  }
+
+  for (const IndexHistogram& value : histogram.string_histograms()) {
+    for (int i = 0; i < value.bucket_indices_size(); i++) {
+      ASSERT_EQ(value.bucket_counts(i), 1);
+
+      ASSERT_THAT(expected_hashes, Contains(histogram.string_hashes_ff64(value.bucket_indices(i))));
+    }
+  }
+  // Check that obsolete aggregates get cleaned up.
+  procedure->ObservationsCommitted(aggregate, util::TimeInfo::FromHourId(kEndHourId),
+                                   system_profile_hash);
+  ASSERT_EQ(aggregate.hourly().by_hour_id_size(), 0);
+}
+
+// Test that observation are generated using legacy hash
+TEST_F(StringHistogramAggregationProcedureTest, GenerateObservationLegacyWorks) {
+  util::PinnedUniquePtr<AggregationProcedure> procedure(
+      GetProcedureFor(kStringMetricMetricId, kStringMetricStringCountsReportIndex));
+
+  ReportAggregate aggregate;
+  const uint32_t kNumEventCodes = 10;
+  const uint32_t kEndHourId = 11;
+  const uint64_t system_profile_hash = uint64_t{111111};
+  const std::vector<std::string> kTestStrings = {
+      "Nunc dictum justo ac arcu.",
+      "Suspendisse ullamcorper mi vel pulvinar dictum.",
+      "Aenean feugiat consectetur vestibulum.",
+      "Integer a ullamcorper dolor.",
+      "Praesent vel nulla quis metus consectetur aliquam sed ut felis.",
+  };
+
+  // Mock that legacy hash is already used in the kEndHourId aggregation period bucket.
+  AggregationPeriodBucket bucket;
+  bucket.add_string_hashes(util::FarmhashFingerprint(kTestStrings[0]));
+  aggregate.mutable_hourly()->mutable_by_hour_id()->insert({kEndHourId, bucket});
+
+  for (int hour_id = 1; hour_id <= kEndHourId; hour_id += 2) {
+    LogStringEvents(hour_id, kNumEventCodes, kTestStrings, system_profile_hash, *procedure,
+                    aggregate);
+  }
+
+  lib::statusor::StatusOr<std::vector<ObservationAndSystemProfile>> observations_or =
+      procedure->GenerateObservations(util::TimeInfo::FromHourId(kEndHourId), aggregate);
+  ASSERT_EQ(observations_or.status().error_code(), StatusCode::OK);
+  std::vector<ObservationAndSystemProfile> observations = std::move(observations_or).value();
+
+  // Should only generate for kEndHourId
+  ASSERT_EQ(observations.size(), 1u);
+  EXPECT_EQ(observations[0].system_profile_hash, system_profile_hash);
+
+  ASSERT_EQ(observations[0].observation->string_histogram().string_histograms_size(),
+            kNumEventCodes);
+
+  const StringHistogramObservation& histogram = observations[0].observation->string_histogram();
+
+  std::vector<std::string> expected_hashes;
+  expected_hashes.reserve(kTestStrings.size());
+  for (const std::string& string : kTestStrings) {
     expected_hashes.push_back(util::FarmhashFingerprint(string));
   }