[catapult] Generalize HistogramSet conversion algo

* This change makes it so that tracing tests can also be
  converted to HistogramSets.

IN-330

Change-Id: I3d57ce17c795fd1a6c99a49e6143511cf92e99b2
diff --git a/catapult/histogram.go b/catapult/histogram.go
index b8cfada..56da838 100644
--- a/catapult/histogram.go
+++ b/catapult/histogram.go
@@ -61,41 +61,84 @@
 	h.Diagnostics[name] = guid
 }
 
-// ConvertBenchmarkDataToHistograms converts a collection of Fuchsia benchmark
-// variants into a list of Catapult Histograms.
-func ConvertBenchmarkDataToHistograms(data []schema.BenchmarkData) ([]Histogram, error) {
-	var histograms []Histogram
-	for _, benchmarkData := range data {
-		histogram, err := createHistogram(benchmarkData)
-		if err != nil {
-			return []Histogram{}, err
-		}
-		histograms = append(histograms, histogram)
+// ConvertBenchmarkDataToHistograms converts BenchmarkData to Histograms.
+//
+// A BenchmarkData contains one or more Sample objects. This conversion works
+// differently based on the labels of those samples:
+//
+// * If all sample labels are empty, a single Histogram is created for the
+//   BenchmarkData. It contains all sample values and its name is set to
+//   BenchmarkData.Label. Zircon benchmarks results are an example of samples
+//   without labels.
+// * If all sample labels are non-empty, a Histogram is created for each sample.
+//   It contains only that sample's values and its name is set to
+//   "{BenchmarkData.Label}_{Sample.Label}".  Some tracing-based benchmark
+//   results are examples of samples with labels.
+//
+// It does not make sense for some labels to be non-empty while others are empty
+// because there is no way to determine how the benchmark author really wants
+// this information to be organized in the Catapult dashboard. An error is
+// returned in this case.
+//
+// This function assumes that all non-empty sample labels are unique to their
+// parent BenchmarkData. Non-unique names may result in confusing data in the
+// Catapult dashboard.
+//
+// TODO(IN-330): We should have a schema that removes this ambiguity in sample
+// labelling.
+func ConvertBenchmarkDataToHistograms(d schema.BenchmarkData) ([]Histogram, error) {
+	if len(d.Samples) == 0 {
+		return nil, errors.New("BenchmarkData has no samples")
 	}
 
-	return histograms, nil
+	samplesHaveLabels, err := checkSampleLabels(d.Samples)
+	if err != nil {
+		return nil, err
+	}
+
+	if samplesHaveLabels {
+		// Samples are labeled.  Create a Histogram for each one.
+		var histograms []Histogram
+		for _, sample := range d.Samples {
+			histogram, err := createHistogram(d.Label+"_"+sample.Label, sample.Values)
+			if err != nil {
+				return nil, err
+			}
+			histograms = append(histograms, histogram)
+		}
+		return histograms, nil
+	} else {
+		// Samples are unlabeled. Concat all data into a single Histogram.
+		var sampleValues []float64
+		for _, sample := range d.Samples {
+			sampleValues = append(sampleValues, sample.Values...)
+		}
+		histogram, err := createHistogram(d.Label, sampleValues)
+		return []Histogram{histogram}, err
+	}
 }
 
-// Creates a histogram from the given variant and benchmark data set.
+// createHistogram creates a Histogram with the given name.
 //
-// TODO(kjharland): Generalize to support non-zircon benchmarks once I have
-// a better idea of how other benchmarks will be converted.
-func createHistogram(d schema.BenchmarkData) (Histogram, error) {
+// The histogram's statistics are computed from the given slice of values, which
+// are assumed to be nanosecond measurements.
+//
+// Returns an error if values is empty.
+func createHistogram(name string, values []float64) (Histogram, error) {
 	var sampleValues []float64
-	for _, sample := range d.Samples {
-		for _, value := range sample.Values {
-			// All zircon benchmarks use nanoseconds. Catapult doesn't support this,
-			// so convert to milliseconds instead.
-			sampleValues = append(sampleValues, value/1e6)
-		}
-	}
 
-	if len(sampleValues) == 0 {
+	if len(values) == 0 {
 		return Histogram{}, errors.New("at least one sample value required")
 	}
 
+	for _, value := range values {
+		// Fuchsia benchmarks use nanoseconds. Catapult doesn't support this,
+		// so convert to milliseconds instead.
+		sampleValues = append(sampleValues, value/1e6)
+	}
+
 	return Histogram{
-		Name:    d.Label,
+		Name:    name,
 		Unit:    "ms_smallerIsBetter",
 		GUID:    uuid.NewV4().String(),
 		NumNans: 0, // All samples are numeric values
@@ -160,3 +203,26 @@
 
 	return
 }
+
+// checkSampleLabels checks whether the given samples have non-empty labels.
+//
+// Returns true iff all samples are labeled. Returns an error if samples are
+// inconsistently labeled or samples is empty.
+func checkSampleLabels(samples []schema.Sample) (bool, error) {
+	if len(samples) == 0 {
+		return false, errors.New("sample list is empty")
+	}
+
+	samplesShouldHaveLabels := samples[0].Label != ""
+
+	// Verify that all samples are consistently labeled.
+	for _, sample := range samples {
+		// Return an error if the samples are inconsistently labeled.
+		sampleHasLabel := sample.Label != ""
+		if samplesShouldHaveLabels != sampleHasLabel {
+			return false, errors.New("some samples are missing labels")
+		}
+	}
+
+	return samplesShouldHaveLabels, nil
+}
diff --git a/catapult/histogram_test.go b/catapult/histogram_test.go
index 4e882cf..6b4ce92 100644
--- a/catapult/histogram_test.go
+++ b/catapult/histogram_test.go
@@ -17,17 +17,17 @@
 // TODO(kjharland): Add tests to handle corner cases for running statistics
 //  computations.
 func TestConvertVariantsToHistograms(t *testing.T) {
-	// Returns a slice of BenchmarkData for testing with the given sample values.
-	createBenchmarkData := func(sampleValues []float64) []schema.BenchmarkData {
-		return []schema.BenchmarkData{
-			{
-				Label: "example_benchmark_data",
-				Unit:  "ns",
-				Samples: []schema.Sample{
-					{
-						Label:  "example_sample",
-						Values: sampleValues,
-					},
+	// Returns BenchmarkData for testing with the given sample values.
+	//
+	// TODO(kjharland): Inline this.
+	createBenchmarkData := func(sampleValues []float64) schema.BenchmarkData {
+		return schema.BenchmarkData{
+			Label: "example_benchmark_data",
+			Unit:  "ns",
+			Samples: []schema.Sample{
+				{
+					Label:  "example_sample",
+					Values: sampleValues,
 				},
 			},
 		}
@@ -35,16 +35,13 @@
 
 	// Expects that the actual Histogram matches the expected Histogram.
 	expectHistogram := func(actual *Histogram, expected *Histogram) {
-		assert.Equal(t, actual.Name, expected.Name)
-		assert.Equal(t, actual.Unit, expected.Unit)
-		assert.Equal(t, actual.MaxNumSampleValues, expected.MaxNumSampleValues)
-		assert.Equal(t, actual.NumNans, expected.NumNans)
-		assert.Equal(t, actual.Description, expected.Description)
-		assert.Equal(t, actual.Diagnostics, expected.Diagnostics)
-		assert.ElementsMatch(t, actual.Running, expected.Running)
-
-		// TODO(kjharland): Mock this so that we don't have to directly assert that the
-		// GUID is 36 chars.
+		assert.Equal(t, expected.Name, actual.Name)
+		assert.Equal(t, expected.Unit, actual.Unit)
+		assert.Equal(t, expected.MaxNumSampleValues, actual.MaxNumSampleValues)
+		assert.Equal(t, expected.NumNans, actual.NumNans)
+		assert.Equal(t, expected.Description, actual.Description)
+		assert.Equal(t, expected.Diagnostics, actual.Diagnostics)
+		assert.ElementsMatch(t, expected.Running, actual.Running)
 		assert.Len(t, actual.GUID, 36)
 	}
 
@@ -59,13 +56,13 @@
 		histograms, err := ConvertBenchmarkDataToHistograms(
 			createBenchmarkData([]float64{5}))
 		if err != nil {
-			t.Fatal("failed to create histogram: ", err)
+			t.Fatal("failed to create histogram", err)
 		}
 
 		assert.Equal(t, len(histograms), 1)
 
 		expectHistogram(&histograms[0], &Histogram{
-			Name:               "example_benchmark_data",
+			Name:               "example_benchmark_data_example_sample",
 			Unit:               "ms_smallerIsBetter",
 			MaxNumSampleValues: 1,
 			NumNans:            0,
@@ -85,13 +82,16 @@
 		histograms, err := ConvertBenchmarkDataToHistograms(
 			createBenchmarkData([]float64{7, 11}))
 		if err != nil {
-			t.Fatal("failed to create histogram: ", err)
+			t.Fatal("failed to create histogram", err)
 		}
 
 		assert.Equal(t, len(histograms), 1)
 
 		expectHistogram(&histograms[0], &Histogram{
-			Name:               "example_benchmark_data",
+			// Name should be BenchmarkData.Label_Sample.Label. The example data
+			// is BenchmarkData.Label == "example_benchmark" and Sample.Label ==
+			// "example_sample".
+			Name:               "example_benchmark_data_example_sample",
 			Unit:               "ms_smallerIsBetter",
 			MaxNumSampleValues: 2,
 			NumNans:            0,
@@ -111,13 +111,13 @@
 		histograms, err := ConvertBenchmarkDataToHistograms(
 			createBenchmarkData([]float64{10, 20, 30, 40, 50}))
 		if err != nil {
-			t.Fatal("failed to create histogram: ", err)
+			t.Fatal("failed to create histogram", err)
 		}
 
 		assert.Equal(t, len(histograms), 1)
 
 		expectHistogram(&histograms[0], &Histogram{
-			Name:               "example_benchmark_data",
+			Name:               "example_benchmark_data_example_sample",
 			Unit:               "ms_smallerIsBetter",
 			MaxNumSampleValues: 5,
 			NumNans:            0,
@@ -132,6 +132,115 @@
 			},
 		})
 	})
+
+	t.Run("when some samples have labels and others don't", func(t *testing.T) {
+		_, err := ConvertBenchmarkDataToHistograms(schema.BenchmarkData{
+			Label: "example_benchmark_data",
+			Unit:  "ns",
+			Samples: []schema.Sample{{
+				Label:  "example_sample",
+				Values: []float64{1, 2, 3},
+			}, {
+				// No label
+				Values: []float64{4, 5, 6},
+			}},
+		})
+
+		if err == nil {
+			t.Error("expected an error but got nil")
+		}
+	})
+
+	t.Run("when all samples are labeled", func(t *testing.T) {
+		histograms, err := ConvertBenchmarkDataToHistograms(schema.BenchmarkData{
+			Label: "example_benchmark_data",
+			Unit:  "ns",
+			Samples: []schema.Sample{{
+				Label:  "example_sample_a",
+				Values: []float64{10, 20, 30, 40, 50},
+			}, {
+				Label:  "example_sample_b",
+				Values: []float64{10, 20, 30, 40, 50},
+			}},
+		})
+
+		if err != nil {
+			t.Fatal("failed to create histogram", err)
+		}
+
+		if len(histograms) != 2 {
+			t.Fatalf("should have created two histograms, instead got %v", histograms)
+		}
+
+		expectHistogram(&histograms[0], &Histogram{
+			Name:               "example_benchmark_data_example_sample_a",
+			Unit:               "ms_smallerIsBetter",
+			MaxNumSampleValues: 5,
+			NumNans:            0,
+			Running: []float64{
+				5,                      // count
+				5e-05,                  // max
+				-4.5841637507904744,    // meanlogs
+				3.0000000000000004e-05, // mean
+				1e-5, // min
+				1.5000000000000001e-4, // sum
+				2.5e-10,               // variance
+			},
+		})
+
+		expectHistogram(&histograms[1], &Histogram{
+			Name:               "example_benchmark_data_example_sample_b",
+			Unit:               "ms_smallerIsBetter",
+			MaxNumSampleValues: 5,
+			NumNans:            0,
+			Running: []float64{
+				5,                      // count
+				5e-05,                  // max
+				-4.5841637507904744,    // meanlogs
+				3.0000000000000004e-05, // mean
+				1e-5, // min
+				1.5000000000000001e-4, // sum
+				2.5e-10,               // variance
+			},
+		})
+	})
+
+	t.Run("when all samples are unlabeled", func(t *testing.T) {
+		histograms, err := ConvertBenchmarkDataToHistograms(schema.BenchmarkData{
+			Label: "example_benchmark_data",
+			Unit:  "ns",
+			Samples: []schema.Sample{{
+				// no labels
+				Values: []float64{5, 10, 15, 20, 25},
+			}, {
+				Values: []float64{5, 10, 15, 20, 25},
+			}},
+		})
+
+		if err != nil {
+			t.Fatal("failed to create histogram", err)
+		}
+
+		if len(histograms) != 1 {
+			t.Fatalf("should have created one histogram, instead got %v", histograms)
+		}
+
+		expectHistogram(&histograms[0], &Histogram{
+			Name:               "example_benchmark_data",
+			Unit:               "ms_smallerIsBetter",
+			MaxNumSampleValues: 10,
+			NumNans:            0,
+			Running: []float64{
+				10,                     // count
+				2.5e-5,                 // max
+				-4.885193746454457,     // meanlogs
+				1.5000000000000002e-05, // mean
+				5e-06, // min
+				0.00015000000000000001, // sum
+				5.555555555555557e-11,  // variance
+			},
+		})
+	})
 }
 
 func TestHistogram_AddDiagnostic(t *testing.T) {
diff --git a/cmd/catapult/make_histogram.go b/cmd/catapult/make_histogram.go
index a27e0ff..3158465 100644
--- a/cmd/catapult/make_histogram.go
+++ b/cmd/catapult/make_histogram.go
@@ -140,18 +140,22 @@
 
 	histogramSet := new(catapult.HistogramSet)
 
-	var benchmarkData []schema.BenchmarkData
+	var benchmarkDatas []schema.BenchmarkData
 
-	if err := json.Unmarshal(blob, &benchmarkData); err != nil {
+	if err := json.Unmarshal(blob, &benchmarkDatas); err != nil {
 		log.Println(err)
 		return subcommands.ExitFailure
 	}
 
 	// Generate Histograms from test results.
-	histograms, err := catapult.ConvertBenchmarkDataToHistograms(benchmarkData)
-	if err != nil {
-		log.Println("failed to create histograms:", err)
-		return subcommands.ExitFailure
+	var histograms []catapult.Histogram
+	for _, benchmarkData := range benchmarkDatas {
+		newHistograms, err := catapult.ConvertBenchmarkDataToHistograms(benchmarkData)
+		if err != nil {
+			log.Println("failed to create histograms:", err)
+			return subcommands.ExitFailure
+		}
+		histograms = append(histograms, newHistograms...)
 	}
 
 	// Track whether any single Histogram addition failed. We do this instead of
@@ -225,7 +229,7 @@
 		ContentType: "application/histogram-set+json",
 		Type:        streamproto.StreamType(logpb.StreamType_TEXT),
 	})
-        defer stream.Close()
+	defer stream.Close()
 
 	if err != nil {
 		return 0, err