[builder_oracle] Simulate adding builders to presubmit Now with the cq-add flag builder_oracle can estimate the impact of adding a builder to presubmit. Currently this adds the builder to presubmit for all projects. Granularity will be a followup task fxbug.dev/95819 Bug: 93986 Change-Id: I7a8681043c62734a934380b0be00a2bd55ee49f7 Reviewed-on: https://fuchsia-review.googlesource.com/c/infra/infra/+/660346 Reviewed-by: Oliver Newman <olivernewman@google.com> Commit-Queue: Catherine Duncan <catduncan@google.com>

commit: 63e26b633f84aab58041cb0c25f61b121775dec6 [log] [tgz]
author: Catherine Duncan <catduncan@google.com> Thu Apr 07 00:43:48 2022 +0000
committer: Commit Bot <commit-bot@chromium.org> Thu Apr 07 00:43:48 2022 +0000
tree: c69581f68ebec75f734245e38bfa70c4cbefec9d
parent: 5d8b74228085f62cd6e7908b313588edae8108c6 [diff]
diff --git a/cmd/builder_oracle/common.go b/cmd/builder_oracle/common.go
index 56987aa..49826a4 100644
--- a/cmd/builder_oracle/common.go
+++ b/cmd/builder_oracle/common.go

@@ -15,9 +15,10 @@
 
 type commonFlags struct {
 	subcommands.CommandRunBase
-	authFlags authcli.Flags
-	verbose   bool
-	quiet     bool
+	authFlags   authcli.Flags
+	verbose     bool
+	veryVerbose bool
+	quiet       bool
 
 	parsedAuthOpts auth.Options
 }
@@ -27,6 +28,7 @@
 	c.authFlags.Register(&c.Flags, authOpts)
 	c.Flags.BoolVar(&c.verbose, "verbose", false, "Enable additional informational output")
 	c.Flags.BoolVar(&c.verbose, "v", false, "Enable additional informational output")
+	c.Flags.BoolVar(&c.veryVerbose, "vv", false, "Enable much more informational output")
 	c.Flags.BoolVar(&c.quiet, "quiet", false, "Disable all CLI output")
 	c.Flags.BoolVar(&c.quiet, "q", false, "Disable all CLI output")
 }

diff --git a/cmd/builder_oracle/queries.go b/cmd/builder_oracle/queries.go
index ad58c46..cd4356f 100644
--- a/cmd/builder_oracle/queries.go
+++ b/cmd/builder_oracle/queries.go

@@ -55,8 +55,8 @@
   * 1000 AS runtime_nanos
 FROM chrome-swarming.swarming.task_results_summary
 WHERE
-  create_time >= TIMESTAMP(@StartTime)
-  AND create_time <= TIMESTAMP(@EndTime)
+  create_time >= @StartTime
+  AND create_time <= @EndTime
   AND create_time IS NOT NULL
   AND bot.pools[OFFSET(0)] LIKE 'fuchsia.tests%'
 
@@ -80,8 +80,8 @@
   COUNT(DISTINCT r.bot.bot_id) AS bot_count
 FROM bot_events_all AS r
 WHERE
-  r.event_time >= TIMESTAMP(@StartTime)
-  AND r.event_time <= TIMESTAMP(@EndTime)
+  r.event_time >= @StartTime
+  AND r.event_time <= @EndTime
   AND ARRAY_LENGTH(r.bot.pools)
     != 0
   AND r.bot.pools[OFFSET(0)]
@@ -112,8 +112,8 @@
     FROM
       cr-buildbucket.fuchsia.builds AS b
     WHERE
-      b.create_time >= TIMESTAMP(@StartTime)
-      AND b.create_time <= TIMESTAMP(@EndTime)
+      b.create_time >= @StartTime
+      AND b.create_time <= @EndTime
       AND EXTRACT(HOUR FROM b.create_time AT TIME ZONE 'America/Los_Angeles')
         IN (9, 10, 11, 12, 13, 14, 15, 16, 17)
       AND EXTRACT(
@@ -135,8 +135,8 @@
       TO_MACHINE(c) AS device_type
     FROM swarming_all AS c
     WHERE
-      c.create_time >= TIMESTAMP(@StartTime)
-      AND c.create_time <= TIMESTAMP(@EndTime)
+      c.create_time >= @StartTime
+      AND c.create_time <= @EndTime
       AND EXTRACT(HOUR FROM c.create_time AT TIME ZONE 'America/Los_Angeles')
         IN (9, 10, 11, 12, 13, 14, 15, 16, 17)
       AND EXTRACT(
@@ -183,8 +183,8 @@
     FROM
       cr-buildbucket.fuchsia.builds AS b
     WHERE
-      b.create_time >= TIMESTAMP(@StartTime)
-      AND b.create_time <= TIMESTAMP(@EndTime)
+      b.create_time >= @StartTime
+      AND b.create_time <= @EndTime
       AND EXTRACT(HOUR FROM b.create_time AT TIME ZONE 'America/Los_Angeles')
         IN (9, 10, 11, 12, 13, 14, 15, 16, 17)
       AND EXTRACT(
@@ -206,8 +206,8 @@
       TO_MACHINE(c) AS device_type
     FROM swarming_all AS c
     WHERE
-      c.create_time >= TIMESTAMP(@StartTime)
-      AND c.create_time <= TIMESTAMP(@EndTime)
+      c.create_time >= @StartTime
+      AND c.create_time <= @EndTime
       AND EXTRACT(HOUR FROM c.create_time AT TIME ZONE 'America/Los_Angeles')
         IN (9, 10, 11, 12, 13, 14, 15, 16, 17)
       AND EXTRACT(
@@ -266,8 +266,8 @@
     FROM cr-buildbucket.fuchsia.builds
     WHERE
       builder.builder NOT LIKE '%-subbuild'
-      AND create_time > TIMESTAMP(@StartTime)
-      AND create_time < TIMESTAMP(@EndTime)
+      AND create_time > @StartTime
+      AND create_time < @EndTime
       AND EXTRACT(HOUR FROM create_time AT TIME ZONE 'America/Los_Angeles')
         IN (9, 10, 11, 12, 13, 14, 15, 16, 17)
       AND EXTRACT(
@@ -283,8 +283,8 @@
       COUNTIF(status = 'SUCCESS') AS passes
     FROM cr-buildbucket.fuchsia.builds
     WHERE
-      create_time > TIMESTAMP(@StartTime)
-      AND create_time < TIMESTAMP(@EndTime)
+      create_time > @StartTime
+      AND create_time < @EndTime
       AND NOT input.gitiles_commit.id IS NULL
     GROUP BY builder, input.gitiles_commit.id
     ORDER BY failures DESC
@@ -333,3 +333,22 @@
 	AverageTasks        int           `json:"avg_tasks"`
 	AverageTaskDuration time.Duration `json:"avg_task_duration"`
 }
+
+// CommitQueueAttemptsQuery is a query that fetches all CQ attempts during the period of interest
+// The results will be used to trigger builders that have been added to CQ
+var CommitQueueAttemptsQuery = `
+SELECT
+  start_time,
+  config_group
+FROM commit-queue.fuchsia.attempts
+WHERE
+  start_time >= @StartTime
+  AND start_time < @EndTime
+ORDER BY start_time ASC
+`
+
+// CommitQueueAttempt is a record of an attempt to verify a change for a particular project.
+type CommitQueueAttempt struct {
+	StartTime   time.Time `json:"start_time"`
+	ConfigGroup string    `json:"config_group"`
+}

diff --git a/cmd/builder_oracle/simulate.go b/cmd/builder_oracle/simulate.go
index 2abede3..5d41911 100644
--- a/cmd/builder_oracle/simulate.go
+++ b/cmd/builder_oracle/simulate.go

@@ -52,11 +52,17 @@
 }
 
 func (c *simulateCmd) Init(defaultAuthOpts auth.Options) {
+	descs := map[string]string{
+		"ci-add":    "Builder to be added to CI, multiple instances of this flag are allowed",
+		"ci-remove": "Builder to be removed from CI, multiple instances of this flag are allowed",
+		"cq-add":    "Builder to be added to presubmit, multiple instances of this flag are allowed",
+		"cq-remove": "Builder to be removed from presubmit, multiple instances of this flag are allowed",
+	}
 	c.commonFlags.Init(defaultAuthOpts)
-	c.Flags.Var(&c.cqAdd, "cq-add", "Builder to be added to presubmit, multiple instances of this flag are allowed")
-	c.Flags.Var(&c.cqRemove, "cq-remove", "Builder to be removed from presubmit, multiple instances of this flag are allowed")
-	c.Flags.Var(&c.ciAdd, "ci-add", "Builder to be added to CI, multiple instances of this flag are allowed")
-	c.Flags.Var(&c.ciRemove, "ci-remove", "Builder to be removed from CI, multiple instances of this flag are allowed")
+	c.Flags.Var(&c.ciAdd, "ci-add", descs["ci-add"])
+	c.Flags.Var(&c.ciRemove, "ci-remove", descs["ci-remove"])
+	c.Flags.Var(&c.cqAdd, "cq-add", descs["cq-add"])
+	c.Flags.Var(&c.cqRemove, "cq-remove", descs["cq-remove"])
 }
 
 func (c *simulateCmd) parseArgs() error {
@@ -116,20 +122,18 @@
 
 	endTime := time.Now().In(pst).Truncate(24 * time.Hour)
 	startTime := endTime.Add(-periodOfInterestDuration)
-	startTimeString := startTime.Format("2006-01-02 15:04:05+00")
-	endTimeString := endTime.Format("2006-01-02 15:04:05+00")
-	log.Printf("     Period of interest: %s - %s", startTimeString, endTimeString)
+	log.Printf("     Period of interest: %s - %s", startTime, endTime)
 
 	log.Print("Fetching average subtask runtime...")
 	var avgTaskRuntimeResult []AvgTaskDuration
 	timeframeParams := []bigquery.QueryParameter{
 		{
 			Name:  "StartTime",
-			Value: startTimeString,
+			Value: startTime,
 		},
 		{
 			Name:  "EndTime",
-			Value: endTimeString,
+			Value: endTime,
 		},
 	}
 	if err := runQuery(ctx, client, AvgTaskRuntimeQuery, timeframeParams, &avgTaskRuntimeResult); err != nil {
@@ -189,44 +193,44 @@
 	}
 
 	builderProfileMap := map[string]BuilderProfile{}
-	if len(c.cqAdd)+len(c.ciAdd) > 0 {
-		log.Print("Fetching statistics on proposed builder additions...")
-		builderAddList := append(c.ciAdd, c.cqAdd...)
-		params := []bigquery.QueryParameter{
-			{
-				Name:  "StartTime",
-				Value: startTimeString,
-			},
-			{
-				Name:  "EndTime",
-				Value: endTimeString,
-			},
-			{
-				Name:  "AddedBuilders",
-				Value: builderAddList,
-			},
-		}
+	log.Print("Fetching statistics on proposed builder additions...")
 
-		var builderProfileSlice []BuilderProfile
-		if err := runQuery(ctx, client, BuilderTestingTaskProfileQuery, params, &builderProfileSlice); err != nil {
+	var builderProfileSlice []BuilderProfile
+	if err := runQuery(ctx, client, BuilderTestingTaskProfileQuery, timeframeParams, &builderProfileSlice); err != nil {
+		return err
+	}
+	for _, profile := range builderProfileSlice {
+		builderProfileMap[profile.Builder] = profile
+	}
+
+	var cqAttempts []CommitQueueAttempt
+	if len(c.cqAdd) > 0 {
+		log.Print("Fetching historical presubmit events...")
+		if err := runQuery(ctx, client, CommitQueueAttemptsQuery, timeframeParams, &cqAttempts); err != nil {
 			return err
 		}
-		for _, profile := range builderProfileSlice {
-			builderProfileMap[profile.Builder] = profile
-		}
 	}
 
+	sim := Simulator{
+		baseline:           true,
+		startTime:          startTime,
+		endTime:            endTime,
+		originalIdleBots:   availableBots,
+		originalTaskRuns:   testingTaskRuns,
+		originalCqAttempts: cqAttempts,
+		builderProfiles:    builderProfileMap,
+		cqRemove:           c.cqRemove,
+		ciRemove:           c.ciRemove,
+		cqAdd:              c.cqAdd,
+		ciAdd:              c.ciAdd,
+	}
+
 	log.Print("Running Simulation to gather baseline information...")
-	result := c.simulate(startTime, endTime, availableBots, testingTaskRuns, builderProfileMap, true)
-	baselineWaitingTimes := result.waitingTimes
-	baselineRuntimes := result.runtimes
-	stepResultsBaseline := result.stepResults
+	baseline := sim.simulate()
 
 	log.Print("Running Simulation with proposed changes...")
-	result = c.simulate(startTime, endTime, availableBots, testingTaskRuns, builderProfileMap, false)
-	proposedWaitingTimes := result.waitingTimes
-	proposedRuntimes := result.runtimes
-	stepResultsProposed := result.stepResults
+	sim.baseline = false
+	proposed := sim.simulate()
 	log.Print("Simulation Complete!")
 
 	historicalPending := map[string]time.Duration{}
@@ -234,8 +238,8 @@
 		historicalPending[testingTask.DeviceType] += testingTask.PendingDuration
 	}
 
-	if c.verbose {
-		reportSimulationDifferences(availableBots, stepResultsBaseline, stepResultsProposed)
+	if c.veryVerbose {
+		reportSimulationDifferences(availableBots, baseline.stepResults, proposed.stepResults)
 	}
 
 	fmt.Println("\nDifference in hardware pending times (in minutes):")
@@ -259,18 +263,18 @@
 				"%s%s | %f | %f | %f | %f\n",
 				dt,
 				buffer,
-				(proposedWaitingTimes[dt] - baselineWaitingTimes[dt]).Minutes(),
-				baselineWaitingTimes[dt].Minutes(),
-				proposedWaitingTimes[dt].Minutes(),
+				(proposed.waitingTimes[dt] - baseline.waitingTimes[dt]).Minutes(),
+				baseline.waitingTimes[dt].Minutes(),
+				proposed.waitingTimes[dt].Minutes(),
 				historicalPending[dt].Minutes())
 		} else {
 			fmt.Printf(
 				"%s%s | %f | %f | %f\n",
 				dt,
 				buffer,
-				(proposedWaitingTimes[dt] - baselineWaitingTimes[dt]).Minutes(),
-				baselineWaitingTimes[dt].Minutes(),
-				proposedWaitingTimes[dt].Minutes())
+				(proposed.waitingTimes[dt] - baseline.waitingTimes[dt]).Minutes(),
+				baseline.waitingTimes[dt].Minutes(),
+				proposed.waitingTimes[dt].Minutes())
 		}
 	}
 
@@ -283,9 +287,9 @@
 			"%s%s | %f | %f | %f\n",
 			dt,
 			buffer,
-			(proposedRuntimes[dt] - baselineRuntimes[dt]).Hours(),
-			baselineRuntimes[dt].Hours(),
-			proposedRuntimes[dt].Hours())
+			(proposed.runtimes[dt] - baseline.runtimes[dt]).Hours(),
+			baseline.runtimes[dt].Hours(),
+			proposed.runtimes[dt].Hours())
 	}
 
 	return nil
@@ -302,186 +306,243 @@
 	RecoveryTime time.Time
 }
 
-func (c *simulateCmd) simulate(
-	startTime time.Time,
-	endTime time.Time,
-	originalIdleBots map[string]int,
-	taskRuns []TestingTask,
-	builderProfiles map[string]BuilderProfile,
-	baseline bool,
-) SimulateResults {
+type Simulator struct {
+	baseline           bool                      // whether the simulation should factor in builder changes
+	cqAttempts         []CommitQueueAttempt      // working copy of originalCqAttempts
+	originalCqAttempts []CommitQueueAttempt      // incoming presubmit verification requests
+	recoveringBots     []RecoveringBot           // bots that are performing maintenance between tasks
+	ciAdd              []string                  // builders added to ci
+	ciRemove           []string                  // builders removed from ci
+	cqAdd              []string                  // builders added to cq
+	cqRemove           []string                  // builders removed from cq
+	stepResults        []StepStatus              // debug logging for observing the simulation in detail
+	runningTasks       []TestingTask             // tasks that are using a bot and have not completed
+	waitingTasks       []TestingTask             // tasks that are waiting for an available bot
+	originalTaskRuns   []TestingTask             // historical tasks observed over the simulated period
+	taskRuns           []TestingTask             // working copy of originalTaskRuns
+	builderProfiles    map[string]BuilderProfile // builder information and bot footprint
+	idleBots           map[string]int            // working copy of originalIdleBots
+	originalIdleBots   map[string]int            // available bots for tasks
+	runtimes           map[string]time.Duration  // amount of bot time used by builders
+	waitingTimes       map[string]time.Duration  // amount of time spent waiting for available bots
+	ciTriggers         map[string]time.Time      // next timestamp to trigger new ci builders
+	timestepIncrement  time.Duration             // how far in time the simulation should move between steps
+	currTime           time.Time                 // the current time in the simulation
+	endTime            time.Time                 // the end of the simulated period
+	startTime          time.Time                 // the start of the simulated period
+}
 
-	const timestepIncrement = 5 * time.Second
-	var runningTasks, waitingTasks []TestingTask
-	waitingTimes := map[string]time.Duration{}
-	runtimes := map[string]time.Duration{}
-	recoveringBots := []RecoveringBot{}
-	var stepResults []StepStatus
-	idleBots := map[string]int{}
-	for k, v := range originalIdleBots {
-		idleBots[k] = v
-	}
-	var cqRemove, ciRemove, ciAdd []string
-	if !baseline {
-		cqRemove = c.cqRemove
-		ciRemove = c.ciRemove
-		ciAdd = c.ciAdd
-	}
+// Our hardware-backed bots require recovery overhead, which does not include gce or Cavium
+var recoveryExcludedBots = []string{"gce", "Cavium"}
 
+func (s *Simulator) simulate() SimulateResults {
+	// initialize working variables for the new simulation.
+	s.initVars()
+
+	for s.currTime = s.startTime; s.currTime.Before(s.endTime); s.currTime = s.currTime.Add(s.timestepIncrement) {
+
+		// Check if any bots have finished their tasks and recovery period and
+		// make them available.
+		s.recoverBots()
+
+		// Prune finished tasks from the runningTask queue.
+		s.processFinishedTasks()
+
+		// Add incoming historical traffic to the waitingTask queue.
+		s.processIncomingTasks()
+
+		// Add traffic from proposed changes to the waitingTask queue.
+		if !s.baseline {
+			s.processCITriggers()
+			s.processCQAttempts()
+		}
+
+		// Start waiting tasks if there are available bots.
+		s.processWaitingTasks()
+
+		// Increment our waiting and runtime counts.
+		for _, task := range s.waitingTasks {
+			s.waitingTimes[task.DeviceType] += s.timestepIncrement
+		}
+
+		for _, task := range s.runningTasks {
+			s.runtimes[task.DeviceType] += s.timestepIncrement
+		}
+
+		// Log this step in the simulation for debugging.
+		idleBotsCopy := map[string]int{}
+		for k, v := range s.idleBots {
+			idleBotsCopy[k] = v
+		}
+		s.stepResults = append(s.stepResults, StepStatus{
+			s.currTime,
+			s.runningTasks,
+			s.waitingTasks,
+			idleBotsCopy,
+			s.taskRuns,
+			s.waitingTimes,
+		})
+	}
+	return SimulateResults{s.waitingTimes, s.runtimes, s.stepResults}
+}
+
+// initVars is a helper that initializes working variables before simulate() calls.
+func (s *Simulator) initVars() {
+	s.timestepIncrement = 5 * time.Second
+	s.runningTasks = []TestingTask{}
+	s.waitingTasks = []TestingTask{}
+	s.waitingTimes = map[string]time.Duration{}
+	s.runtimes = map[string]time.Duration{}
+	s.recoveringBots = []RecoveringBot{}
+	s.stepResults = []StepStatus{}
+	s.idleBots = map[string]int{}
+	for k, v := range s.originalIdleBots {
+		s.idleBots[k] = v
+	}
 	// This is a rough approximation for adding builders to CI. This assumes
 	// that there is a constant influx of CLs to merit triggering the builder
 	// after each run has completed.
 	// TODO(fxbug.dev/95557): Track merge events and use
 	// max_concurrent_invocations and batching rules to simulate with higher
 	// fidelity.
-	ciTriggers := map[string]time.Duration{}
-	for _, builder := range ciAdd {
-		ciTriggers[builder] = 0 * time.Second
+	s.ciTriggers = map[string]time.Time{}
+	for _, builder := range s.ciAdd {
+		s.ciTriggers[builder] = s.startTime
 	}
+	s.taskRuns = s.originalTaskRuns
+	s.cqAttempts = s.originalCqAttempts
+}
 
-	for currTime := startTime; currTime.Before(endTime); currTime = currTime.Add(timestepIncrement) {
-
-		// Check if any bots have finished recovering and make them available.
-		recoveringBotSliceIndex := len(recoveringBots)
-		for idx, bot := range recoveringBots {
-			if currTime.After(bot.RecoveryTime) {
-				idleBots[bot.DeviceType] += 1
-			} else {
-				recoveringBotSliceIndex = idx
-				break
-			}
+// recoverBots is a helper that adds bots to the idleBots pool if they've finished their tasks and recovered.
+func (s *Simulator) recoverBots() {
+	var updatedRecoveringBots []RecoveringBot
+	for _, bot := range s.recoveringBots {
+		if s.currTime.After(bot.RecoveryTime) {
+			s.idleBots[bot.DeviceType] += 1
+		} else {
+			updatedRecoveringBots = append(updatedRecoveringBots, bot)
 		}
-		recoveringBots = recoveringBots[recoveringBotSliceIndex:]
+	}
+	s.recoveringBots = updatedRecoveringBots
+}
 
-		// Release bots that have finished their workloads.
-		// On average physical devices have a ~75s recovery overhead to clean up
-		// before they can take on new tasks.
-		var updatedRunningTasks []TestingTask
-		for _, task := range runningTasks {
-			if currTime.After(task.CreateTime.Add(task.Duration)) {
-				if !contains([]string{"gce", "Cavium"}, task.DeviceType) {
-					recoveringBots = append(recoveringBots, RecoveringBot{task.DeviceType, currTime.Add(75 * time.Second)})
-				} else {
-					idleBots[task.DeviceType] += 1
-				}
+func (s *Simulator) processFinishedTasks() {
+	var updatedRunningTasks []TestingTask
+	for _, task := range s.runningTasks {
+		if s.currTime.After(task.CreateTime.Add(task.Duration)) {
+			if contains(recoveryExcludedBots, task.DeviceType) {
+				s.idleBots[task.DeviceType] += 1
 			} else {
-				updatedRunningTasks = append(updatedRunningTasks, task)
+				s.recoveringBots = append(
+					s.recoveringBots,
+					RecoveringBot{
+						DeviceType:   task.DeviceType,
+						RecoveryTime: s.currTime.Add(75 * time.Second),
+					},
+				)
 			}
+		} else {
+			updatedRunningTasks = append(updatedRunningTasks, task)
 		}
-		runningTasks = updatedRunningTasks
+	}
+	s.runningTasks = updatedRunningTasks
+}
 
-		// Assign bots to tasks from waiting tasks first.
-		var updatedWaitingTasks []TestingTask
-		for _, task := range waitingTasks {
-			if idleBots[task.DeviceType] > 0 {
-				idleBots[task.DeviceType] -= 1
-				runningTasks = append(runningTasks, task)
-			} else {
-				updatedWaitingTasks = append(updatedWaitingTasks, task)
-			}
-		}
-		waitingTasks = updatedWaitingTasks
-
-		// Then assign bots to tasks from incoming historical traffic.
-		for len(taskRuns) > 0 && taskRuns[0].CreateTime.Before(currTime) {
-			task := taskRuns[0]
-			taskRuns = taskRuns[1:]
-
-			if task.IsTry && len(cqRemove) > 0 {
-				if contains(cqRemove, task.Builder) {
+// processWaitingTasks is a helper that assigns waiting tasks to an available bot.
+// If no bot is available, it stays in the waitingTasks queue.
+func (s *Simulator) processWaitingTasks() {
+	var updatedWaitingTasks []TestingTask
+	for _, task := range s.waitingTasks {
+		if !s.baseline {
+			// Drop the task if it is proposed for removal in ci/cqRemove
+			if task.IsTry && len(s.cqRemove) > 0 {
+				if contains(s.cqRemove, task.Builder) {
 					continue
 				}
-			} else if !task.IsTry && len(ciRemove) > 0 {
-				if contains(ciRemove, task.Builder) {
+			} else if !task.IsTry && len(s.ciRemove) > 0 {
+				if contains(s.ciRemove, task.Builder) {
 					continue
 				}
 			}
-
-			if idleBots[task.DeviceType] > 0 {
-				idleBots[task.DeviceType] -= 1
-				runningTasks = append(runningTasks, task)
-			} else {
-				waitingTasks = append(waitingTasks, task)
-			}
 		}
-
-		// Generate added traffic from proposed Builder changes.
-		processCiTriggers(
-			ciTriggers,
-			builderProfiles,
-			&runningTasks,
-			&waitingTasks,
-			idleBots,
-			timestepIncrement,
-			currTime,
-		)
-
-		// Increment our waiting and runtime counts.
-		for _, task := range waitingTasks {
-			waitingTimes[task.DeviceType] += timestepIncrement
-		}
-
-		for _, task := range runningTasks {
-			runtimes[task.DeviceType] += timestepIncrement
-		}
-
-		if c.verbose {
-			stepResults = append(stepResults, StepStatus{
-				currTime,
-				runningTasks,
-				waitingTasks,
-				idleBots,
-				taskRuns,
-				waitingTimes,
-			})
+		if s.idleBots[task.DeviceType] > 0 {
+			s.idleBots[task.DeviceType] -= 1
+			s.runningTasks = append(s.runningTasks, task)
+		} else {
+			updatedWaitingTasks = append(updatedWaitingTasks, task)
 		}
 	}
-	return SimulateResults{waitingTimes, runtimes, stepResults}
+	s.waitingTasks = updatedWaitingTasks
+}
+
+// processIncomingTasks is a helper that moves tasks from the taskRuns queue to the waitingTasks queue.
+// Tasks are only moved if the simulation is past the point they were created.
+func (s *Simulator) processIncomingTasks() {
+	for len(s.taskRuns) > 0 && s.taskRuns[0].CreateTime.Before(s.currTime) {
+		task := s.taskRuns[0]
+		s.taskRuns = s.taskRuns[1:]
+		s.waitingTasks = append(s.waitingTasks, task)
+	}
 }
 
 // processCiTriggers is a helper that enqueues subtasks for the simulate command.
-// Added CI builders have an associated countdown timer that indicates how long
-// until we trigger the next round of subtasks. Then the countdown timer resets
-// to the duration of the builder orchestrator.
-func processCiTriggers(
-	ciTriggers map[string]time.Duration,
-	builderProfiles map[string]BuilderProfile,
-	runningTasks *[]TestingTask,
-	waitingTasks *[]TestingTask,
-	idleBots map[string]int,
-	timestepIncrement time.Duration,
-	currTime time.Time) {
-	for builder, countdown := range ciTriggers {
-		if countdown <= 0*time.Second {
+// Added CI builders have an associated timestamp that indicates when we should
+// trigger the next round of subtasks.
+func (s *Simulator) processCITriggers() {
+	for builder, nextTrigger := range s.ciTriggers {
+		if s.currTime.After(nextTrigger) {
 			// Trigger all subtasks in profile.
-			for _, testingTasks := range builderProfiles[builder].DeviceFootprints {
-				for i := 0; i < testingTasks.AverageTasks; i++ {
+			for _, deviceFootprint := range s.builderProfiles[builder].DeviceFootprints {
+				for i := 0; i < deviceFootprint.AverageTasks; i++ {
 					task := TestingTask{
-						builder,
-						currTime,
-						testingTasks.AverageTaskDuration,
-						0 * time.Second,
-						testingTasks.DeviceType,
-						false,
+						Builder:         builder,
+						CreateTime:      s.currTime,
+						Duration:        deviceFootprint.AverageTaskDuration,
+						PendingDuration: 0,
+						DeviceType:      deviceFootprint.DeviceType,
+						IsTry:           false,
 					}
-					if idleBots[task.DeviceType] > 0 {
-						idleBots[task.DeviceType] -= 1
-						*runningTasks = append(*runningTasks, task)
-					} else {
-						*waitingTasks = append(*waitingTasks, task)
-					}
+					s.waitingTasks = append(s.waitingTasks, task)
 				}
 			}
-			// Reset the countdown.
-			ciTriggers[builder] = builderProfiles[builder].AverageBuildDuration
-		} else {
-			// Decrement the counter and continue.
-			ciTriggers[builder] -= timestepIncrement
+			// Set the next trigger.
+			s.ciTriggers[builder] = s.currTime.Add(s.builderProfiles[builder].AverageBuildDuration)
 		}
 	}
 }
 
-func runQuery(ctx context.Context, client *bigquery.Client, query string, params []bigquery.QueryParameter, ptr interface{}) error {
+// processCqAttempts is a helper that enqueues subtasks for the simulate command.
+// It loops through historical presubmit attempts and enqueues tasks for each
+// added CQ builder.
+// (TODO:fxbug.dev/95819) - Only trigger cq builders for the projects they're
+// enabled for.
+func (s *Simulator) processCQAttempts() {
+	for ; len(s.cqAttempts) > 0 && (s.cqAttempts)[0].StartTime.Before(s.currTime); s.cqAttempts = s.cqAttempts[1:] {
+		for _, builder := range s.cqAdd {
+			// Trigger all subtasks in profile.
+			for _, deviceFootprint := range s.builderProfiles[builder].DeviceFootprints {
+				for i := 0; i < deviceFootprint.AverageTasks; i++ {
+					task := TestingTask{
+						Builder:         builder,
+						CreateTime:      s.currTime,
+						Duration:        deviceFootprint.AverageTaskDuration,
+						PendingDuration: 0,
+						DeviceType:      deviceFootprint.DeviceType,
+						IsTry:           true,
+					}
+					s.waitingTasks = append(s.waitingTasks, task)
+				}
+			}
+		}
+	}
+}
+
+func runQuery(
+	ctx context.Context,
+	client *bigquery.Client,
+	query string,
+	params []bigquery.QueryParameter, ptr interface{},
+) error {
 	q := client.Query(query)
 	q.Parameters = params
 	iter, err := q.Read(ctx)
@@ -511,7 +572,10 @@
 	return json.Unmarshal(jsonRows, &ptr)
 }
 
-func reportSimulationDifferences(deviceTypes map[string]int, baselineSimulation, proposedSimulation []StepStatus) {
+func reportSimulationDifferences(
+	deviceTypes map[string]int,
+	baselineSimulation, proposedSimulation []StepStatus,
+) {
 	for idx, baselineStep := range baselineSimulation {
 		proposedStep := proposedSimulation[idx]
 		diffStrings := []string{fmt.Sprintf("Diffs for Execution step %d", idx+1)}
@@ -520,27 +584,49 @@
 			bworkers := baselineStep.idleBots[dt]
 			pworkers := proposedStep.idleBots[dt]
 			if bworkers != pworkers {
-				s := fmt.Sprintf("Diff available workers: %s %d | %d", dt, bworkers, pworkers)
+				s := fmt.Sprintf(
+					"Diff available workers: %s %d | %d",
+					dt,
+					bworkers,
+					pworkers,
+				)
 				diffStrings = append(diffStrings, s)
 			}
 			bpending := baselineStep.waitingTimes[dt]
 			ppending := proposedStep.waitingTimes[dt]
 			if bpending != ppending {
-				s := fmt.Sprintf("Diff in pending minutes: %s %f | %f", dt, bpending.Minutes(), ppending.Minutes())
+				s := fmt.Sprintf(
+					"Diff in pending minutes: %s %f | %f",
+					dt,
+					bpending.Minutes(),
+					ppending.Minutes(),
+				)
 				diffStrings = append(diffStrings, s)
 			}
 		}
 
 		if len(baselineStep.runningTasks) != len(proposedStep.runningTasks) {
-			s := fmt.Sprintf("Diff runningTasks length: %d | %d", len(baselineStep.runningTasks), len(proposedStep.runningTasks))
+			s := fmt.Sprintf(
+				"Diff runningTasks length: %d | %d",
+				len(baselineStep.runningTasks),
+				len(proposedStep.runningTasks),
+			)
 			diffStrings = append(diffStrings, s)
 		}
 		if len(baselineStep.waitingTasks) != len(proposedStep.waitingTasks) {
-			s := fmt.Sprintf("Diff waitingTasks length: %d | %d", len(baselineStep.waitingTasks), len(proposedStep.waitingTasks))
+			s := fmt.Sprintf(
+				"Diff waitingTasks length: %d | %d",
+				len(baselineStep.waitingTasks),
+				len(proposedStep.waitingTasks),
+			)
 			diffStrings = append(diffStrings, s)
 		}
 		if len(baselineStep.taskRuns) != len(proposedStep.taskRuns) {
-			s := fmt.Sprintf("Diff scheduledRuns length: %d | %d", len(baselineStep.taskRuns), len(proposedStep.taskRuns))
+			s := fmt.Sprintf(
+				"Diff scheduledRuns length: %d | %d",
+				len(baselineStep.taskRuns),
+				len(proposedStep.taskRuns),
+			)
 			diffStrings = append(diffStrings, s)
 		}
 

diff --git a/cmd/builder_oracle/simulate_test.go b/cmd/builder_oracle/simulate_test.go
index d2356f6..b3106a5 100644
--- a/cmd/builder_oracle/simulate_test.go
+++ b/cmd/builder_oracle/simulate_test.go

@@ -5,10 +5,11 @@
 package main
 
 import (
-	"reflect"
 	"testing"
 	"time"
 
+	"github.com/google/go-cmp/cmp"
+
 	"go.fuchsia.dev/infra/flagutil"
 )
 
@@ -17,6 +18,12 @@
 	startTime := time.Now()
 	endTime := startTime.Add(30 * time.Minute)
 	idleBots := map[string]int{"a": 3, "b": 1}
+	commitQueueAttempts := []CommitQueueAttempt{
+		{
+			StartTime:   time.Now().Add(30 * time.Second),
+			ConfigGroup: "fuchsia-fuchsia",
+		},
+	}
 	builderProfiles := map[string]BuilderProfile{
 		"builder_a": {
 			Builder:              "builder_a",
@@ -92,11 +99,7 @@
 		{
 			name: "Can remove runs using cqRemove",
 			cmd: &simulateCmd{
-				commonFlags: commonFlags{},
-				cqAdd:       flagutil.RepeatedStringValue{},
-				cqRemove:    flagutil.RepeatedStringValue{"builder_b"},
-				ciAdd:       flagutil.RepeatedStringValue{},
-				ciRemove:    flagutil.RepeatedStringValue{},
+				cqRemove: flagutil.RepeatedStringValue{"builder_b"},
 			},
 			taskRuns: []TestingTask{
 				{
@@ -129,11 +132,7 @@
 		{
 			name: "Can remove runs using ciRemove",
 			cmd: &simulateCmd{
-				commonFlags: commonFlags{},
-				cqAdd:       flagutil.RepeatedStringValue{},
-				cqRemove:    flagutil.RepeatedStringValue{},
-				ciAdd:       flagutil.RepeatedStringValue{},
-				ciRemove:    flagutil.RepeatedStringValue{"builder_b"},
+				ciRemove: flagutil.RepeatedStringValue{"builder_b"},
 			},
 			taskRuns: []TestingTask{
 				{
@@ -166,11 +165,7 @@
 		{
 			name: "Can add runs using ciAdd",
 			cmd: &simulateCmd{
-				commonFlags: commonFlags{},
-				cqAdd:       flagutil.RepeatedStringValue{},
-				cqRemove:    flagutil.RepeatedStringValue{},
-				ciAdd:       flagutil.RepeatedStringValue{"builder_b"},
-				ciRemove:    flagutil.RepeatedStringValue{},
+				ciAdd: flagutil.RepeatedStringValue{"builder_b"},
 			},
 			taskRuns: []TestingTask{
 				{
@@ -184,20 +179,43 @@
 			},
 			expected: map[string]time.Duration{"b": 2*time.Minute + 20*time.Second},
 		},
+		{
+			name: "Can add runs using cqAdd",
+			cmd: &simulateCmd{
+				cqAdd: flagutil.RepeatedStringValue{"builder_b"},
+			},
+			taskRuns: []TestingTask{
+				{
+					Builder:         "builder_b",
+					CreateTime:      startTime,
+					Duration:        1 * time.Minute,
+					PendingDuration: 0,
+					DeviceType:      "b",
+					IsTry:           true,
+				},
+			},
+			expected: map[string]time.Duration{"b": 1*time.Minute + 50*time.Second},
+		},
 	}
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
-			res := tc.cmd.simulate(
-				startTime,
-				endTime,
-				idleBots,
-				tc.taskRuns,
-				builderProfiles,
-				false,
-			)
-			if !reflect.DeepEqual(res.waitingTimes, tc.expected) {
-				t.Fatalf("unexpected output:  got %v, wanted %v\n", res.waitingTimes, tc.expected)
+			sim := Simulator{
+				baseline:           false,
+				startTime:          startTime,
+				endTime:            endTime,
+				originalIdleBots:   idleBots,
+				originalTaskRuns:   tc.taskRuns,
+				originalCqAttempts: commitQueueAttempts,
+				builderProfiles:    builderProfiles,
+				cqRemove:           tc.cmd.cqRemove,
+				ciRemove:           tc.cmd.ciRemove,
+				cqAdd:              tc.cmd.cqAdd,
+				ciAdd:              tc.cmd.ciAdd,
+			}
+			res := sim.simulate()
+			if diff := cmp.Diff(res.waitingTimes, tc.expected); diff != "" {
+				t.Fatalf("unexpected output: (-got +want):\n%s", diff)
 			}
 		})
 	}
@@ -237,36 +255,33 @@
 	}
 
 	testCases := []struct {
-		name                    string
-		ciTriggers              map[string]time.Duration
-		expectedRunningBuilders []TestingTask
-		expectedWaitingBuilders []TestingTask
-		expectedCiTriggers      map[string]time.Duration
+		name                 string
+		ciTriggers           map[string]time.Time
+		expectedWaitingTasks []TestingTask
+		expectedCiTriggers   map[string]time.Time
 	}{
 		{
 			name: "Enqueues no subtasks if no triggers are ready",
-			ciTriggers: map[string]time.Duration{
-				"builder_a": 20 * time.Second,
-				"builder_b": 15 * time.Second,
-				"builder_c": 10 * time.Second,
+			ciTriggers: map[string]time.Time{
+				"builder_a": currTime.Add(20 * time.Second),
+				"builder_b": currTime.Add(15 * time.Second),
+				"builder_c": currTime.Add(10 * time.Second),
 			},
-			expectedRunningBuilders: []TestingTask{},
-			expectedWaitingBuilders: []TestingTask{},
-			expectedCiTriggers: map[string]time.Duration{
-				"builder_a": 15 * time.Second,
-				"builder_b": 10 * time.Second,
-				"builder_c": 5 * time.Second,
+			expectedWaitingTasks: []TestingTask{},
+			expectedCiTriggers: map[string]time.Time{
+				"builder_a": currTime.Add(20 * time.Second),
+				"builder_b": currTime.Add(15 * time.Second),
+				"builder_c": currTime.Add(10 * time.Second),
 			},
 		},
 		{
-			name: "Enqueues to waitingBuilders if there are no available bots",
-			ciTriggers: map[string]time.Duration{
-				"builder_a": 0 * time.Second,
-				"builder_b": 15 * time.Second,
-				"builder_c": 10 * time.Second,
+			name: "Enqueues to waitingBuilders if there is a trigger ready",
+			ciTriggers: map[string]time.Time{
+				"builder_a": currTime.Add(-1 * time.Second),
+				"builder_b": currTime.Add(15 * time.Second),
+				"builder_c": currTime.Add(10 * time.Second),
 			},
-			expectedRunningBuilders: []TestingTask{},
-			expectedWaitingBuilders: []TestingTask{
+			expectedWaitingTasks: []TestingTask{
 				{
 					Builder:         "builder_a",
 					CreateTime:      currTime,
@@ -276,43 +291,18 @@
 					IsTry:           false,
 				},
 			},
-			expectedCiTriggers: map[string]time.Duration{
-				"builder_a": 5 * time.Minute,
-				"builder_b": 10 * time.Second,
-				"builder_c": 5 * time.Second,
-			},
-		},
-		{
-			name: "Enqueues to runningBuilders if there are available bots",
-			ciTriggers: map[string]time.Duration{
-				"builder_a": 20 * time.Second,
-				"builder_b": 0 * time.Second,
-				"builder_c": 10 * time.Second,
-			},
-			expectedRunningBuilders: []TestingTask{
-				{
-					Builder:         "builder_b",
-					CreateTime:      currTime,
-					Duration:        30 * time.Second,
-					PendingDuration: 0,
-					DeviceType:      "b",
-					IsTry:           false,
-				},
-			},
-			expectedWaitingBuilders: []TestingTask{},
-			expectedCiTriggers: map[string]time.Duration{
-				"builder_a": 15 * time.Second,
-				"builder_b": 4 * time.Minute,
-				"builder_c": 5 * time.Second,
+			expectedCiTriggers: map[string]time.Time{
+				"builder_a": currTime.Add(5 * time.Minute),
+				"builder_b": currTime.Add(15 * time.Second),
+				"builder_c": currTime.Add(10 * time.Second),
 			},
 		},
 	}
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
-			ctcopy := map[string]time.Duration{}
-			runningBuilders := []TestingTask{}
-			waitingBuilders := []TestingTask{}
+			ctcopy := map[string]time.Time{}
+			waitingTasks := []TestingTask{}
 			idleBots := map[string]int{
 				"a": 0,
 				"b": 1,
@@ -320,23 +310,130 @@
 			for k, v := range tc.ciTriggers {
 				ctcopy[k] = v
 			}
-			processCiTriggers(
-				ctcopy,
-				builderProfiles,
-				&runningBuilders,
-				&waitingBuilders,
-				idleBots,
-				timestepIncrement,
-				currTime,
-			)
-			if !reflect.DeepEqual(runningBuilders, tc.expectedRunningBuilders) {
-				t.Fatalf("unexpected output:  got %v, wanted %v\n", runningBuilders, tc.expectedRunningBuilders)
+			sim := Simulator{
+				baseline:          false,
+				builderProfiles:   builderProfiles,
+				currTime:          currTime,
+				ciTriggers:        ctcopy,
+				idleBots:          idleBots,
+				timestepIncrement: timestepIncrement,
+				waitingTasks:      waitingTasks,
 			}
-			if !reflect.DeepEqual(waitingBuilders, tc.expectedWaitingBuilders) {
-				t.Fatalf("unexpected output:  got %v, wanted %v\n", waitingBuilders, tc.expectedWaitingBuilders)
+			sim.processCITriggers()
+			if diff := cmp.Diff(sim.waitingTasks, tc.expectedWaitingTasks); diff != "" {
+				t.Fatalf("unexpected output: (-got +want):\n%s", diff)
 			}
-			if !reflect.DeepEqual(ctcopy, tc.expectedCiTriggers) {
-				t.Fatalf("unexpected output:  got %v, wanted %v\n", ctcopy, tc.expectedCiTriggers)
+			if diff := cmp.Diff(sim.ciTriggers, tc.expectedCiTriggers); diff != "" {
+				t.Fatalf("unexpected output: (-got +want):\n%s", diff)
+			}
+		})
+	}
+}
+
+func TestProcessCqAttempts(t *testing.T) {
+	// default test vars
+	currTime := time.Now()
+	builderProfiles := map[string]BuilderProfile{
+		"builder_a": {
+			Builder:              "builder_a",
+			FlakeRate:            0.02,
+			NumBuilds:            100,
+			AverageBuildDuration: 5 * time.Minute,
+			DeviceFootprints: []BuilderDeviceFootprint{
+				{
+					DeviceType:          "a",
+					AverageTasks:        1,
+					AverageTaskDuration: 1 * time.Minute,
+				},
+			},
+		},
+		"builder_b": {
+			Builder:              "builder_b",
+			FlakeRate:            0.00,
+			NumBuilds:            100,
+			AverageBuildDuration: 4 * time.Minute,
+			DeviceFootprints: []BuilderDeviceFootprint{
+				{
+					DeviceType:          "b",
+					AverageTasks:        1,
+					AverageTaskDuration: 30 * time.Second,
+				},
+			},
+		},
+	}
+	testCases := []struct {
+		name                        string
+		cqAdd                       []string
+		commitQueueAttempts         []CommitQueueAttempt
+		expectedWaitingTasks        []TestingTask
+		expectedCommitQueueAttempts []CommitQueueAttempt
+	}{
+		{
+			name:  "Enqueues no subtasks if there aren't any CommitQueueAttempts with appropriate timestamp",
+			cqAdd: []string{"builder_a", "builder_b"},
+			commitQueueAttempts: []CommitQueueAttempt{
+				{
+					StartTime:   currTime.Add(30 * time.Second),
+					ConfigGroup: "fuchsia-fuchsia",
+				},
+			},
+			expectedWaitingTasks: []TestingTask{},
+			expectedCommitQueueAttempts: []CommitQueueAttempt{
+				{
+					StartTime:   currTime.Add(30 * time.Second),
+					ConfigGroup: "fuchsia-fuchsia",
+				},
+			},
+		},
+		{
+			name:  "Enqueues subtasks to waitingTasks if there is a CommitQueueAttempt with an appropriate timestamp",
+			cqAdd: []string{"builder_b"},
+			commitQueueAttempts: []CommitQueueAttempt{
+				{
+					StartTime:   currTime.Add(-30 * time.Second),
+					ConfigGroup: "fuchsia-fuchsia",
+				},
+			},
+			expectedWaitingTasks: []TestingTask{
+				{
+					Builder:         "builder_b",
+					CreateTime:      currTime,
+					Duration:        30 * time.Second,
+					PendingDuration: 0,
+					DeviceType:      "b",
+					IsTry:           true,
+				},
+			},
+			expectedCommitQueueAttempts: []CommitQueueAttempt{},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			cqaCopy := []CommitQueueAttempt{}
+			for _, attempt := range tc.commitQueueAttempts {
+				cqaCopy = append(cqaCopy, attempt)
+			}
+			waitingTasks := []TestingTask{}
+			idleBots := map[string]int{
+				"a": 0,
+				"b": 1,
+			}
+			sim := Simulator{
+				baseline:        false,
+				builderProfiles: builderProfiles,
+				cqAttempts:      cqaCopy,
+				currTime:        currTime,
+				cqAdd:           tc.cqAdd,
+				idleBots:        idleBots,
+				waitingTasks:    waitingTasks,
+			}
+			sim.processCQAttempts()
+			if diff := cmp.Diff(sim.waitingTasks, tc.expectedWaitingTasks); diff != "" {
+				t.Fatalf("unexpected output: (-got +want):\n%s", diff)
+			}
+			if diff := cmp.Diff(sim.cqAttempts, tc.expectedCommitQueueAttempts); diff != "" {
+				t.Fatalf("unexpected output: (-got +want):\n%s", diff)
 			}
 		})
 	}
commit	63e26b633f84aab58041cb0c25f61b121775dec6	[log] [tgz]
author	Catherine Duncan <catduncan@google.com>	Thu Apr 07 00:43:48 2022 +0000
committer	Commit Bot <commit-bot@chromium.org>	Thu Apr 07 00:43:48 2022 +0000
tree	c69581f68ebec75f734245e38bfa70c4cbefec9d
parent	5d8b74228085f62cd6e7908b313588edae8108c6 [diff]