[update_test_durations] Garbage-collect old files

Previously we would never delete old test duration files when the
corresponding builder got deleted, so we still have files sitting around
in the CIPD packages for builders that got deleted many years ago.

Also, we'd delete a test from a file as soon as the test hadn't run in 3
days, so if a test got disabled for, say, 10 days, when it got
re-enabled we would no longer have duration data for it.

This solve both of those problems, leaving test entries around for 30
days after they are last seen, and deleting builder files when no test
on that builder has run in the last 30 days. We do this by storing a
`last_seen` field in the JSON files that records the last date on which
the entry was recorded. For backwards compatibility, I'm having the tool
set the `last_seen` field to the current date if it's empty, so the
field will automatically be populated for existing old builder files,
and they will eventually be deleted 30 days after this lands.

Fixed: 42138833
Fixed: 42139134
Change-Id: Ib21439378f8b16690736084c2c2511d0917c3920
Reviewed-on: https://fuchsia-review.googlesource.com/c/infra/infra/+/1052652
Reviewed-by: Danielle Kay <danikay@google.com>
Commit-Queue: Auto-Submit <auto-submit@fuchsia-infra.iam.gserviceaccount.com>
Fuchsia-Auto-Submit: Oliver Newman <olivernewman@google.com>
diff --git a/cmd/update_test_durations/testdata/goldens/default.json b/cmd/update_test_durations/testdata/goldens/default.json
new file mode 100644
index 0000000..6d532ef
--- /dev/null
+++ b/cmd/update_test_durations/testdata/goldens/default.json
@@ -0,0 +1,26 @@
+[
+  {
+    "name": "*",
+    "runs": 0,
+    "median_duration_ms": 64,
+    "last_seen": "0001-01-01T00:00:00Z"
+  },
+  {
+    "name": "new_test",
+    "runs": 4,
+    "median_duration_ms": 12,
+    "last_seen": "0001-01-01T00:00:00Z"
+  },
+  {
+    "name": "recently_deleted",
+    "runs": 1,
+    "median_duration_ms": 1,
+    "last_seen": "0001-01-01T00:00:00Z"
+  },
+  {
+    "name": "still_existing_test",
+    "runs": 15,
+    "median_duration_ms": 82,
+    "last_seen": "0001-01-01T00:00:00Z"
+  }
+]
diff --git a/cmd/update_test_durations/testdata/goldens/foo.json b/cmd/update_test_durations/testdata/goldens/foo.json
new file mode 100644
index 0000000..fd89d8c
--- /dev/null
+++ b/cmd/update_test_durations/testdata/goldens/foo.json
@@ -0,0 +1,26 @@
+[
+  {
+    "name": "*",
+    "runs": 0,
+    "median_duration_ms": 10,
+    "last_seen": "0001-01-01T00:00:00Z"
+  },
+  {
+    "name": "new_test",
+    "runs": 4,
+    "median_duration_ms": 12,
+    "last_seen": "2023-11-23T00:00:00Z"
+  },
+  {
+    "name": "recently_deleted",
+    "runs": 1,
+    "median_duration_ms": 1,
+    "last_seen": "2023-11-03T00:00:00Z"
+  },
+  {
+    "name": "still_existing_test",
+    "runs": 3,
+    "median_duration_ms": 10,
+    "last_seen": "2023-11-23T00:00:00Z"
+  }
+]
diff --git a/cmd/update_test_durations/testdata/goldens/new-builder.json b/cmd/update_test_durations/testdata/goldens/new-builder.json
new file mode 100644
index 0000000..5e788a7
--- /dev/null
+++ b/cmd/update_test_durations/testdata/goldens/new-builder.json
@@ -0,0 +1,14 @@
+[
+  {
+    "name": "*",
+    "runs": 0,
+    "median_duration_ms": 100,
+    "last_seen": "0001-01-01T00:00:00Z"
+  },
+  {
+    "name": "still_existing_test",
+    "runs": 12,
+    "median_duration_ms": 100,
+    "last_seen": "2023-11-23T00:00:00Z"
+  }
+]
diff --git a/cmd/update_test_durations/testdata/old_durations/default.json b/cmd/update_test_durations/testdata/old_durations/default.json
new file mode 100644
index 0000000..286c80e
--- /dev/null
+++ b/cmd/update_test_durations/testdata/old_durations/default.json
@@ -0,0 +1,8 @@
+[
+  {
+    "name": "*",
+    "runs": 0,
+    "median_duration_ms": 5092,
+    "last_seen": "0001-01-01T00:00:00Z"
+  }
+]
diff --git a/cmd/update_test_durations/testdata/old_durations/deleted-builder.json b/cmd/update_test_durations/testdata/old_durations/deleted-builder.json
new file mode 100644
index 0000000..b9ce4ef
--- /dev/null
+++ b/cmd/update_test_durations/testdata/old_durations/deleted-builder.json
@@ -0,0 +1,14 @@
+[
+  {
+    "name": "*",
+    "runs": 0,
+    "median_duration_ms": 1,
+    "last_seen": "0001-01-01T00:00:00Z"
+  },
+  {
+    "name": "test_on_deleted_builder",
+    "runs": 1,
+    "median_duration_ms": 1,
+    "last_seen": "2023-10-01T00:00:00Z"
+  }
+]
diff --git a/cmd/update_test_durations/testdata/old_durations/foo.json b/cmd/update_test_durations/testdata/old_durations/foo.json
new file mode 100644
index 0000000..075fc43
--- /dev/null
+++ b/cmd/update_test_durations/testdata/old_durations/foo.json
@@ -0,0 +1,26 @@
+[
+  {
+    "name": "*",
+    "runs": 0,
+    "median_duration_ms": 1,
+    "last_seen": "0001-01-01T00:00:00Z"
+  },
+  {
+    "name": "deleted_more_than_a_month_ago",
+    "runs": 1,
+    "median_duration_ms": 1,
+    "last_seen": "2023-09-20T00:00:00Z"
+  },
+  {
+    "name": "recently_deleted",
+    "runs": 1,
+    "median_duration_ms": 1,
+    "last_seen": "2023-11-03T00:00:00Z"
+  },
+  {
+    "name": "still_existing_test",
+    "runs": 1,
+    "median_duration_ms": 1,
+    "last_seen": "2023-11-22T00:00:00Z"
+  }
+]
diff --git a/cmd/update_test_durations/update.go b/cmd/update_test_durations/update.go
index 5cba596..838caf6 100644
--- a/cmd/update_test_durations/update.go
+++ b/cmd/update_test_durations/update.go
@@ -19,11 +19,11 @@
 	"path/filepath"
 	"sort"
 	"strings"
+	"time"
 
 	"github.com/maruel/subcommands"
 	"go.chromium.org/luci/auth"
 	"go.fuchsia.dev/infra/functools"
-	"golang.org/x/exp/maps"
 )
 
 var errZeroTotalRuns = errors.New("cannot calculate average duration for tests with zero total runs")
@@ -43,6 +43,12 @@
 	// and clear about its purpose (an empty string would less clearly be a
 	// fallback, and might even suggest a bug in the updater).
 	defaultTestName = "*"
+
+	// How long to keep around old entries in test duration files before
+	// deleting them. Old entries are kept around for some time so we still have
+	// data in case a builder or test gets deleted and then reintroduced within
+	// the time window.
+	garbageCollectionAge = 24 * 30 * time.Hour
 )
 
 func cmdRun(authOpts auth.Options) *subcommands.Command {
@@ -80,10 +86,15 @@
 	// The median duration of the test, in milliseconds, across all included runs.
 	MedianDurationMS int64 `json:"median_duration_ms"`
 
-	// The builder that ran this test. Test durations are separated into files
-	// by builder, so it's not necessary to include the builder name in the
-	// output JSON.
+	// The builder that ran this test.
+	//
+	// Test durations are separated into files by builder, so it's not necessary
+	// to include the builder name in the output JSON.
 	Builder string `json:"-"`
+
+	// The timestamp of the last test duration update that included new data for
+	// this test. Used for garbage-collecting old duration data.
+	LastSeen time.Time `json:"last_seen"`
 }
 
 // testDurationsMap maps from the name of a builder to a list of tests that were
@@ -91,17 +102,6 @@
 // resulting test duration files.
 type testDurationMap map[string][]test
 
-// durationFileOptions contains feature flags for splitTestsByBuilder. This is
-// primarily for testing purposes; turning off some features makes it easier to
-// construct expected data for tests where we're making assertions that aren't
-// related to those features.
-//
-// All features should always be enabled in production.
-type durationFileOptions struct {
-	includeDefaultTests   bool
-	includeDefaultBuilder bool
-}
-
 func (c *runCmd) Init(defaultAuthOpts auth.Options) {
 	c.commonFlags.Init(defaultAuthOpts)
 	c.Flags.StringVar(
@@ -160,69 +160,114 @@
 		return err
 	}
 
-	durations, err := splitTestsByBuilder(rows, durationFileOptions{
-		includeDefaultTests:   true,
-		includeDefaultBuilder: true,
-	})
+	date := time.Now().UTC().Round(24 * time.Hour)
+	return update(rows, c.previousVersionDir, c.outputDir, false, date)
+}
+
+func update(newData []test, previousVersionDir string, outputDir string, indentOutput bool, date time.Time) error {
+	oldDurations, err := unmarshalDurations(previousVersionDir, date)
 	if err != nil {
 		return err
 	}
 
-	newFileContents, err := marshalDurations(durations)
+	newDurations, err := splitTestsByBuilder(newData)
 	if err != nil {
 		return err
 	}
 
-	oldFileContents, err := previousVersionContents(c.previousVersionDir)
-	if err != nil {
-		return err
-	}
-
-	finalContents := overlayFileContents(oldFileContents, newFileContents)
-	for name, contents := range finalContents {
-		path := filepath.Join(c.outputDir, name)
-		if err := os.WriteFile(path, contents, 0o600); err != nil {
-			return fmt.Errorf("failed to write: %w", err)
+	for builder := range newDurations {
+		for i := range newDurations[builder] {
+			newDurations[builder][i].LastSeen = date
 		}
 	}
 
-	return nil
+	for builder, oldTests := range oldDurations {
+		newTestNames := make(map[string]struct{})
+		for _, newTest := range newDurations[builder] {
+			newTestNames[newTest.Name] = struct{}{}
+		}
+
+		for _, t := range oldTests {
+			if t.LastSeen.IsZero() {
+				// For backwards compatibility with files from before the
+				// LastSeen field was added.
+				t.LastSeen = date
+			}
+			if date.Sub(t.LastSeen) > garbageCollectionAge {
+				continue
+			}
+			if _, ok := newTestNames[t.Name]; ok {
+				// Ignore old data if there is new data for the test.
+				continue
+			}
+			newDurations[builder] = append(newDurations[builder], t)
+		}
+	}
+
+	defaultDurations, err := calculateDefaultDurations(newDurations)
+	if err != nil {
+		return err
+	}
+	newDurations[defaultBuilderName] = defaultDurations
+
+	if err := addDefaultEntries(newDurations); err != nil {
+		return err
+	}
+
+	return writeDurations(newDurations, outputDir, indentOutput)
+}
+
+func unmarshalDurations(dir string, date time.Time) (testDurationMap, error) {
+	files, err := os.ReadDir(dir)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read directory: %w", err)
+	}
+	res := make(testDurationMap)
+	for _, f := range files {
+		if f.IsDir() || !strings.HasSuffix(f.Name(), ".json") {
+			continue
+		}
+		builderName := strings.TrimSuffix(f.Name(), ".json")
+		if builderName == defaultBuilderName {
+			// Don't bother deserializing the default builder data, we'll
+			// recompute it from scratch based on up-to-date data.
+			continue
+		}
+		b, err := os.ReadFile(filepath.Join(dir, f.Name()))
+		if err != nil {
+			return nil, fmt.Errorf("failed to read file: %w", err)
+		}
+		var tests []test
+		if err := json.Unmarshal(b, &tests); err != nil {
+			return nil, fmt.Errorf("failed to unmarshal file %s: %w", f.Name(), err)
+		}
+		for _, test := range tests {
+			// Don't bother deserializing the default test data, we'll recompute
+			// it from scratch based on up-to-date data.
+			if test.Name == defaultTestName {
+				continue
+			}
+			if test.LastSeen.IsZero() {
+				test.LastSeen = date
+			}
+			res[builderName] = append(res[builderName], test)
+		}
+	}
+	return res, nil
 }
 
 // splitTestsByBuilder takes a collection of tests and arranges them into a
 // mapping corresponding to the data that should be written to each per-builder
 // test duration file.
-func splitTestsByBuilder(tests []test, opts durationFileOptions) (testDurationMap, error) {
+func splitTestsByBuilder(tests []test) (testDurationMap, error) {
 	// Mapping from builder name to list of tests.
 	durations := make(testDurationMap)
 
-	if len(tests) == 0 {
-		return durations, nil
-	}
-
-	// Mapping from test name to list of tests, across all builders. Only
-	// used as intermediate storage for producing the default test duration file.
-	testsByName := make(map[string][]test)
 	for _, t := range tests {
 		if t.Builder == "" {
 			return nil, fmt.Errorf("test has empty builder: %+v", t)
 		}
 		durations[t.Builder] = append(durations[t.Builder], t)
-		testsByName[t.Name] = append(testsByName[t.Name], t)
-	}
-
-	if opts.includeDefaultBuilder {
-		defaultDurations, err := calculateDefaultDurations(testsByName)
-		if err != nil {
-			return nil, err
-		}
-		durations[defaultBuilderName] = defaultDurations
-	}
-
-	if opts.includeDefaultTests {
-		if err := addDefaultEntries(durations); err != nil {
-			return nil, err
-		}
 	}
 
 	return durations, nil
@@ -240,7 +285,14 @@
 // few shards when executing testsharder with a target per-shard duration.
 //
 // See unit tests for examples.
-func calculateDefaultDurations(testsByName map[string][]test) ([]test, error) {
+func calculateDefaultDurations(durations testDurationMap) ([]test, error) {
+	testsByName := make(map[string][]test)
+	for _, tests := range durations {
+		for _, t := range tests {
+			testsByName[t.Name] = append(testsByName[t.Name], t)
+		}
+	}
+
 	var defaultDurations []test
 	for name, sameNameTests := range testsByName {
 		var totalRuns int64
@@ -316,71 +368,29 @@
 	return int64(math.Round(avg)), nil
 }
 
-func marshalDurations(durations testDurationMap) (map[string][]byte, error) {
-	files := make(map[string][]byte, len(durations))
+func writeDurations(durations testDurationMap, dir string, indentOutput bool) error {
 	for builder, tests := range durations {
 		functools.SortBy(tests, func(t test) string {
 			return t.Name
 		})
-		b, err := json.Marshal(tests)
+		var b []byte
+		var err error
+		if indentOutput {
+			b, err = json.MarshalIndent(tests, "", "  ")
+		} else {
+			b, err = json.Marshal(tests)
+		}
 		if err != nil {
-			return nil, err
+			return err
+		}
+
+		if indentOutput {
+			b = append(b, '\n')
 		}
 		fileName := fmt.Sprintf("%s.json", builder)
-		files[fileName] = b
-	}
-
-	return files, nil
-}
-
-// previousVersionContents downloads the version of `pkg` identified by `ref`
-// and returns a mapping from file basename to file contents for all the non-
-// hidden files in the root of the package.
-func previousVersionContents(previousDurationsDir string) (map[string][]byte, error) {
-	files, err := os.ReadDir(previousDurationsDir)
-	if err != nil {
-		return nil, err
-	}
-
-	contents := make(map[string][]byte)
-	for _, file := range files {
-		name := file.Name()
-		// Ignore hidden files and directories to ensure we only copy duration
-		// files into the updated package, and not any special files installed
-		// by CIPD itself.
-		if strings.HasPrefix(name, ".") || file.IsDir() {
-			continue
+		if err := os.WriteFile(filepath.Join(dir, fileName), b, 0o600); err != nil {
+			return err
 		}
-		b, err := os.ReadFile(filepath.Join(previousDurationsDir, name))
-		if err != nil {
-			return nil, err
-		}
-		contents[name] = b
 	}
-
-	return contents, nil
-}
-
-// overlayFileContents takes an in-memory listing of the current contents of a
-// directory and overlays that listing with a listing of new files. It leaves in
-// place any entries from `oldFiles` that do not have a corresponding entry in
-// `newFiles`.
-//
-// The purpose of this is to keep around duration files from previous package
-// versions even if we don't have any data for their builders from this run of
-// the updater. By keeping the old files around, we'll ensure that future builds
-// still have test duration data to use.
-//
-// This is useful if, for example, a builder is paused or breaks for a few days
-// and we stop getting data from it, but then it gets fixed/restored. We still
-// want the builder to have duration data to use when it comes back online, so
-// we intentionally keep its duration file around.
-//
-// TODO(olivernewman): Set up a garbage collection system so we don't keep old
-// files around forever.
-func overlayFileContents(oldFiles, newFiles map[string][]byte) map[string][]byte {
-	result := make(map[string][]byte)
-	maps.Copy(result, oldFiles)
-	maps.Copy(result, newFiles)
-	return result
+	return nil
 }
diff --git a/cmd/update_test_durations/update_test.go b/cmd/update_test_durations/update_test.go
index 18db16f..d257bb3 100644
--- a/cmd/update_test_durations/update_test.go
+++ b/cmd/update_test_durations/update_test.go
@@ -5,216 +5,123 @@
 package main
 
 import (
-	"encoding/json"
 	"errors"
+	"flag"
 	"fmt"
 	"os"
+	"os/exec"
+	"strings"
 	"testing"
+	"time"
 
 	"github.com/google/go-cmp/cmp"
 	"github.com/google/go-cmp/cmp/cmpopts"
 )
 
-func TestSplitTestsByBuilder(t *testing.T) {
+var (
+	updateGoldens = flag.Bool("update-goldens", false, "Whether to update goldens")
+)
+
+func TestUpdate(t *testing.T) {
 	t.Parallel()
 
-	testCases := []struct {
-		name             string
-		options          durationFileOptions
-		inputAndExpected func() (input []test, expected testDurationMap)
-		expectedErr      error
-	}{
+	newData := []test{
 		{
-			name: "groups tests by builder",
-			inputAndExpected: func() ([]test, testDurationMap) {
-				fooTests := testsWithDurations("foo", 1, 1)
-				barTests := testsWithDurations("bar", 1)
-
-				allTests := appendAll(fooTests, barTests)
-
-				expectedDurations := testDurationMap{
-					"foo": fooTests,
-					"bar": barTests,
-				}
-				return allTests, expectedDurations
-			},
+			Name:             "still_existing_test",
+			Builder:          "foo",
+			MedianDurationMS: 10,
+			Runs:             3,
 		},
 		{
-			name: "adds a default entry for each builder",
-			options: durationFileOptions{
-				includeDefaultTests: true,
-			},
-			inputAndExpected: func() ([]test, testDurationMap) {
-				fooTests := testsWithDurations("foo", 1, 3)
-				barTests := testsWithDurations("bar", 8, 10)
-
-				allTests := appendAll(fooTests, barTests)
-
-				expectedDurations := testDurationMap{
-					"foo": append(fooTests, defaultEntryWithDuration(2)),
-					"bar": append(barTests, defaultEntryWithDuration(9)),
-				}
-				return allTests, expectedDurations
-			},
+			Name:             "still_existing_test",
+			Builder:          "new-builder",
+			MedianDurationMS: 100,
+			Runs:             12,
 		},
 		{
-			name: "default entry duration is average of other entry durations",
-			options: durationFileOptions{
-				includeDefaultTests: true,
-			},
-			inputAndExpected: func() ([]test, testDurationMap) {
-				// We want to use the average of medians rather than the median of
-				// medians so that total expected shard durations are still accurate if
-				// many new tests are added (assuming the new tests have a similar
-				// distribution of durations to the existing tests).
-				barTests := testsWithDurations("bar", 3, 30, 300)
-
-				expectedDurations := testDurationMap{
-					"bar": append(barTests, defaultEntryWithDuration(111)),
-				}
-				return barTests, expectedDurations
-			},
-		},
-		{
-			name: "weights default entry duration by run count",
-			options: durationFileOptions{
-				includeDefaultTests: true,
-			},
-			inputAndExpected: func() ([]test, testDurationMap) {
-				allTests := []test{
-					{
-						Name:             "foo1",
-						Builder:          "foo",
-						MedianDurationMS: 150,
-						Runs:             2,
-					},
-					{
-						Name:             "foo1",
-						Builder:          "foo",
-						MedianDurationMS: 3,
-						Runs:             1,
-					},
-				}
-
-				expectedDurations := testDurationMap{
-					// The default entry's duration should be the average of the other
-					// entries' durations, weighted by run count.
-					"foo": append(allTests, defaultEntryWithDuration(101)),
-				}
-				return allTests, expectedDurations
-			},
-		},
-		{
-			name: "adds a default builder",
-			options: durationFileOptions{
-				includeDefaultBuilder: true,
-			},
-			inputAndExpected: func() ([]test, testDurationMap) {
-				sharedTest := test{Name: "shared", Runs: 2, MedianDurationMS: 5}
-				fooSharedTest := test{Name: "shared", Runs: 1, MedianDurationMS: 5, Builder: "foo"}
-				barSharedTest := test{Name: "shared", Runs: 1, MedianDurationMS: 5, Builder: "bar"}
-
-				fooOnlyTests := testsWithDurations("foo", 1)
-				fooTests := append(fooOnlyTests, fooSharedTest)
-				barOnlyTests := testsWithDurations("bar", 3)
-				barTests := append(barOnlyTests, barSharedTest)
-
-				allTests := appendAll(fooTests, barTests)
-
-				expectedDefaultTests := appendAll([]test{sharedTest}, fooOnlyTests, barOnlyTests)
-				expectedDurations := testDurationMap{
-					"foo":              fooTests,
-					"bar":              barTests,
-					defaultBuilderName: expectedDefaultTests,
-				}
-				return allTests, expectedDurations
-			},
-		},
-		{
-			name: "adds a default entry for the default builder",
-			options: durationFileOptions{
-				includeDefaultBuilder: true,
-				includeDefaultTests:   true,
-			},
-			inputAndExpected: func() ([]test, testDurationMap) {
-				fooTests := testsWithDurations("foo", 1)
-				barTests := testsWithDurations("bar", 5)
-
-				allTests := appendAll(fooTests, barTests)
-
-				expectedDurations := testDurationMap{
-					"foo":              append(fooTests, defaultEntryWithDuration(1)),
-					"bar":              append(barTests, defaultEntryWithDuration(5)),
-					defaultBuilderName: append(allTests, defaultEntryWithDuration(3)),
-				}
-				return allTests, expectedDurations
-			},
-		},
-		{
-			name: "returns no builders if input contains no tests",
-			options: durationFileOptions{
-				includeDefaultBuilder: true,
-				includeDefaultTests:   true,
-			},
-			inputAndExpected: func() ([]test, testDurationMap) {
-				var emptyTests []test
-				var emptyDurations testDurationMap
-				return emptyTests, emptyDurations
-			},
-		},
-		{
-			name: "returns error if all tests for a builder have zero runs",
-			options: durationFileOptions{
-				includeDefaultBuilder: true,
-				includeDefaultTests:   true,
-			},
-			expectedErr: errZeroTotalRuns,
-			inputAndExpected: func() ([]test, testDurationMap) {
-				fooTests := testsWithDurations("foo", 1, 2, 3)
-				for i := range fooTests {
-					fooTests[i].Runs = 0
-				}
-				return fooTests, nil
-			},
+			Name:             "new_test",
+			Builder:          "foo",
+			MedianDurationMS: 12,
+			Runs:             4,
 		},
 	}
 
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			input, expected := tc.inputAndExpected()
+	goldensDir := "testdata/goldens"
 
-			for builder, origTests := range expected {
-				// Make a copy to avoid modifying the slices in the original, which may
-				// share the same underlying array with the slice of input tests.
-				tests := make([]test, len(origTests))
-				copy(tests, origTests)
-				expected[builder] = tests
-				// It's very repetitive to specify the builder in each expected
-				// test, so set it here based on the map key.
-				for i := range tests {
-					tests[i].Builder = builder
-				}
-			}
+	var outputDir string
+	if *updateGoldens {
+		// Clear the goldens directory to garbage-collect old files.
+		if err := os.RemoveAll(goldensDir); err != nil {
+			t.Fatal(err)
+		}
+		if err := os.MkdirAll(goldensDir, 0o700); err != nil {
+			t.Fatal(err)
+		}
+		outputDir = goldensDir
+	} else {
+		outputDir = t.TempDir()
+	}
 
-			actual, err := splitTestsByBuilder(input, tc.options)
-			if !errors.Is(err, tc.expectedErr) {
-				t.Fatalf("splitTestsByBuilder() returned wrong error, got: %v, wanted %v", err, tc.expectedErr)
-			}
+	now := time.Date(2023, 11, 23, 0, 0, 0, 0, time.UTC)
+	if err := update(newData, "testdata/old_durations", outputDir, true, now); err != nil {
+		t.Fatalf("Unexpected error: %s", err)
+	}
 
-			opts := []cmp.Option{
-				cmpopts.EquateEmpty(),
-				// In production, duration files should always be sorted by name because
-				// the Dremel query orders results by name. So there's no need for
-				// splitTestsByBuilder to do sorting (except for the default duration
-				// file), so we shouldn't care about ordering in its return value.
-				cmpopts.SortSlices(func(a, b test) bool {
-					return a.Name < b.Name
-				}),
-			}
-			if diff := cmp.Diff(expected, actual, opts...); diff != "" {
-				t.Errorf("splitTestsByBuilder() diff (-want +got):\n%s", diff)
-			}
+	// We directly wrote the files to the goldens directory, so we know they'll
+	// be up-to-date, no point in checking them.
+	if *updateGoldens {
+		return
+	}
+
+	cmd := exec.Command(
+		"git", "diff", "--no-index", "--no-ext-diff", "--no-prefix",
+		goldensDir, outputDir,
+	)
+	var stdout, stderr strings.Builder
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+
+	err := cmd.Run()
+	var errExit *exec.ExitError
+	if errors.As(err, &errExit) && errExit.ExitCode() == 1 {
+		t.Fatalf(
+			"Unexpected diff: %s\n\n"+
+				"To update goldens, run: `go test ./cmd/update_test_durations -update-goldens`",
+			stdout.String())
+	} else if err != nil {
+		t.Fatalf("Error running git diff: %s", err)
+	}
+}
+
+func TestCalculateDefaultDurations(t *testing.T) {
+	t.Parallel()
+
+	durations := testDurationMap{
+		"bar": append(
+			testsWithDurations("bar", 3),
+			test{Name: "shared", Runs: 2, MedianDurationMS: 20}),
+		"foo": append(
+			testsWithDurations("foo", 1),
+			test{Name: "shared", Runs: 1, MedianDurationMS: 5}),
+	}
+
+	got, err := calculateDefaultDurations(durations)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	want := appendAll(
+		testsWithDurations("bar", 3),
+		testsWithDurations("foo", 1),
+		[]test{
+			{
+				Name:             "shared",
+				Runs:             3,
+				MedianDurationMS: 15,
+			},
 		})
+	if diff := cmp.Diff(want, got, cmpopts.IgnoreFields(test{}, "Builder")); diff != "" {
+		t.Errorf("Unexpected diff:\n%s", diff)
 	}
 }
 
@@ -233,13 +140,6 @@
 	return res
 }
 
-func defaultEntryWithDuration(medianDurationMS int64) test {
-	return test{
-		Name:             defaultTestName,
-		MedianDurationMS: medianDurationMS,
-	}
-}
-
 func appendAll(slices ...[]test) []test {
 	var res []test
 	for _, s := range slices {
@@ -247,72 +147,3 @@
 	}
 	return res
 }
-
-func TestMarshalDurations(t *testing.T) {
-	dir, err := os.MkdirTemp("/tmp", "test-durations-tests")
-	if err != nil {
-		t.Fatalf("Failed to create temporary directory: %v", err)
-	}
-	defer os.RemoveAll(dir)
-
-	durations := testDurationMap{
-		"foo": append(testsWithDurations("foo", 0, 1, 2), defaultEntryWithDuration(1)),
-		"bar": append(testsWithDurations("bar", -3, 5), defaultEntryWithDuration(1)),
-	}
-
-	fileContents, err := marshalDurations(durations)
-	if err != nil {
-		t.Fatalf("marshalDurations() returned unexpected error: %v", err)
-	}
-
-	for builder, tests := range durations {
-		// Use t.Run() so we can Fatalf() while still checking all duration files.
-		t.Run(fmt.Sprintf("marshals durations for %s", builder), func(t *testing.T) {
-			basename := fmt.Sprintf("%s.json", builder)
-			b, ok := fileContents[basename]
-			if !ok {
-				t.Fatalf("File %s was not marshaled", basename)
-			}
-			var unmarshaledTests []test
-			if err := json.Unmarshal(b, &unmarshaledTests); err != nil {
-				t.Fatalf("Failed to unmarshal file %s: %v", basename, err)
-			}
-
-			var expectedTests []test
-			for _, test := range tests {
-				// The builder should not be included in the files, since it's in the
-				// file name.
-				test.Builder = ""
-				expectedTests = append(expectedTests, test)
-			}
-
-			if diff := cmp.Diff(expectedTests, unmarshaledTests); diff != "" {
-				t.Errorf("marshalDurations() diff in file %s (-want +got):\n%s", basename, diff)
-			}
-		})
-	}
-}
-
-func TestOverlayFileContents(t *testing.T) {
-	oldFiles := map[string][]byte{
-		"foo": []byte("foo-old"),
-		"bar": []byte("bar-old"),
-	}
-
-	newFiles := map[string][]byte{
-		"bar": []byte("bar-new"),
-		"baz": []byte("baz-new"),
-	}
-
-	expected := map[string][]byte{
-		"foo": []byte("foo-old"),
-		"bar": []byte("bar-new"),
-		"baz": []byte("baz-new"),
-	}
-
-	result := overlayFileContents(oldFiles, newFiles)
-
-	if diff := cmp.Diff(expected, result); diff != "" {
-		t.Errorf("overlayFileContents() diff (-want +got):\n%s", diff)
-	}
-}