[builder_oracle] Simulate adding builders to presubmit
Now with the cq-add flag builder_oracle can estimate the impact of
adding a builder to presubmit. Currently this adds the builder to
presubmit for all projects. Granularity will be a followup task
fxbug.dev/95819
Bug: 93986
Change-Id: I7a8681043c62734a934380b0be00a2bd55ee49f7
Reviewed-on: https://fuchsia-review.googlesource.com/c/infra/infra/+/660346
Reviewed-by: Oliver Newman <olivernewman@google.com>
Commit-Queue: Catherine Duncan <catduncan@google.com>
diff --git a/cmd/builder_oracle/common.go b/cmd/builder_oracle/common.go
index 56987aa..49826a4 100644
--- a/cmd/builder_oracle/common.go
+++ b/cmd/builder_oracle/common.go
@@ -15,9 +15,10 @@
type commonFlags struct {
subcommands.CommandRunBase
- authFlags authcli.Flags
- verbose bool
- quiet bool
+ authFlags authcli.Flags
+ verbose bool
+ veryVerbose bool
+ quiet bool
parsedAuthOpts auth.Options
}
@@ -27,6 +28,7 @@
c.authFlags.Register(&c.Flags, authOpts)
c.Flags.BoolVar(&c.verbose, "verbose", false, "Enable additional informational output")
c.Flags.BoolVar(&c.verbose, "v", false, "Enable additional informational output")
+ c.Flags.BoolVar(&c.veryVerbose, "vv", false, "Enable much more informational output")
c.Flags.BoolVar(&c.quiet, "quiet", false, "Disable all CLI output")
c.Flags.BoolVar(&c.quiet, "q", false, "Disable all CLI output")
}
diff --git a/cmd/builder_oracle/queries.go b/cmd/builder_oracle/queries.go
index ad58c46..cd4356f 100644
--- a/cmd/builder_oracle/queries.go
+++ b/cmd/builder_oracle/queries.go
@@ -55,8 +55,8 @@
* 1000 AS runtime_nanos
FROM chrome-swarming.swarming.task_results_summary
WHERE
- create_time >= TIMESTAMP(@StartTime)
- AND create_time <= TIMESTAMP(@EndTime)
+ create_time >= @StartTime
+ AND create_time <= @EndTime
AND create_time IS NOT NULL
AND bot.pools[OFFSET(0)] LIKE 'fuchsia.tests%'
@@ -80,8 +80,8 @@
COUNT(DISTINCT r.bot.bot_id) AS bot_count
FROM bot_events_all AS r
WHERE
- r.event_time >= TIMESTAMP(@StartTime)
- AND r.event_time <= TIMESTAMP(@EndTime)
+ r.event_time >= @StartTime
+ AND r.event_time <= @EndTime
AND ARRAY_LENGTH(r.bot.pools)
!= 0
AND r.bot.pools[OFFSET(0)]
@@ -112,8 +112,8 @@
FROM
cr-buildbucket.fuchsia.builds AS b
WHERE
- b.create_time >= TIMESTAMP(@StartTime)
- AND b.create_time <= TIMESTAMP(@EndTime)
+ b.create_time >= @StartTime
+ AND b.create_time <= @EndTime
AND EXTRACT(HOUR FROM b.create_time AT TIME ZONE 'America/Los_Angeles')
IN (9, 10, 11, 12, 13, 14, 15, 16, 17)
AND EXTRACT(
@@ -135,8 +135,8 @@
TO_MACHINE(c) AS device_type
FROM swarming_all AS c
WHERE
- c.create_time >= TIMESTAMP(@StartTime)
- AND c.create_time <= TIMESTAMP(@EndTime)
+ c.create_time >= @StartTime
+ AND c.create_time <= @EndTime
AND EXTRACT(HOUR FROM c.create_time AT TIME ZONE 'America/Los_Angeles')
IN (9, 10, 11, 12, 13, 14, 15, 16, 17)
AND EXTRACT(
@@ -183,8 +183,8 @@
FROM
cr-buildbucket.fuchsia.builds AS b
WHERE
- b.create_time >= TIMESTAMP(@StartTime)
- AND b.create_time <= TIMESTAMP(@EndTime)
+ b.create_time >= @StartTime
+ AND b.create_time <= @EndTime
AND EXTRACT(HOUR FROM b.create_time AT TIME ZONE 'America/Los_Angeles')
IN (9, 10, 11, 12, 13, 14, 15, 16, 17)
AND EXTRACT(
@@ -206,8 +206,8 @@
TO_MACHINE(c) AS device_type
FROM swarming_all AS c
WHERE
- c.create_time >= TIMESTAMP(@StartTime)
- AND c.create_time <= TIMESTAMP(@EndTime)
+ c.create_time >= @StartTime
+ AND c.create_time <= @EndTime
AND EXTRACT(HOUR FROM c.create_time AT TIME ZONE 'America/Los_Angeles')
IN (9, 10, 11, 12, 13, 14, 15, 16, 17)
AND EXTRACT(
@@ -266,8 +266,8 @@
FROM cr-buildbucket.fuchsia.builds
WHERE
builder.builder NOT LIKE '%-subbuild'
- AND create_time > TIMESTAMP(@StartTime)
- AND create_time < TIMESTAMP(@EndTime)
+ AND create_time > @StartTime
+ AND create_time < @EndTime
AND EXTRACT(HOUR FROM create_time AT TIME ZONE 'America/Los_Angeles')
IN (9, 10, 11, 12, 13, 14, 15, 16, 17)
AND EXTRACT(
@@ -283,8 +283,8 @@
COUNTIF(status = 'SUCCESS') AS passes
FROM cr-buildbucket.fuchsia.builds
WHERE
- create_time > TIMESTAMP(@StartTime)
- AND create_time < TIMESTAMP(@EndTime)
+ create_time > @StartTime
+ AND create_time < @EndTime
AND NOT input.gitiles_commit.id IS NULL
GROUP BY builder, input.gitiles_commit.id
ORDER BY failures DESC
@@ -333,3 +333,22 @@
AverageTasks int `json:"avg_tasks"`
AverageTaskDuration time.Duration `json:"avg_task_duration"`
}
+
+// CommitQueueAttemptsQuery is a query that fetches all CQ attempts during the period of interest
+// The results will be used to trigger builders that have been added to CQ
+var CommitQueueAttemptsQuery = `
+SELECT
+ start_time,
+ config_group
+FROM commit-queue.fuchsia.attempts
+WHERE
+ start_time >= @StartTime
+ AND start_time < @EndTime
+ORDER BY start_time ASC
+`
+
+// CommitQueueAttempt is a record of an attempt to verify a change for a particular project.
+type CommitQueueAttempt struct {
+ StartTime time.Time `json:"start_time"`
+ ConfigGroup string `json:"config_group"`
+}
diff --git a/cmd/builder_oracle/simulate.go b/cmd/builder_oracle/simulate.go
index 2abede3..5d41911 100644
--- a/cmd/builder_oracle/simulate.go
+++ b/cmd/builder_oracle/simulate.go
@@ -52,11 +52,17 @@
}
func (c *simulateCmd) Init(defaultAuthOpts auth.Options) {
+ descs := map[string]string{
+ "ci-add": "Builder to be added to CI, multiple instances of this flag are allowed",
+ "ci-remove": "Builder to be removed from CI, multiple instances of this flag are allowed",
+ "cq-add": "Builder to be added to presubmit, multiple instances of this flag are allowed",
+ "cq-remove": "Builder to be removed from presubmit, multiple instances of this flag are allowed",
+ }
c.commonFlags.Init(defaultAuthOpts)
- c.Flags.Var(&c.cqAdd, "cq-add", "Builder to be added to presubmit, multiple instances of this flag are allowed")
- c.Flags.Var(&c.cqRemove, "cq-remove", "Builder to be removed from presubmit, multiple instances of this flag are allowed")
- c.Flags.Var(&c.ciAdd, "ci-add", "Builder to be added to CI, multiple instances of this flag are allowed")
- c.Flags.Var(&c.ciRemove, "ci-remove", "Builder to be removed from CI, multiple instances of this flag are allowed")
+ c.Flags.Var(&c.ciAdd, "ci-add", descs["ci-add"])
+ c.Flags.Var(&c.ciRemove, "ci-remove", descs["ci-remove"])
+ c.Flags.Var(&c.cqAdd, "cq-add", descs["cq-add"])
+ c.Flags.Var(&c.cqRemove, "cq-remove", descs["cq-remove"])
}
func (c *simulateCmd) parseArgs() error {
@@ -116,20 +122,18 @@
endTime := time.Now().In(pst).Truncate(24 * time.Hour)
startTime := endTime.Add(-periodOfInterestDuration)
- startTimeString := startTime.Format("2006-01-02 15:04:05+00")
- endTimeString := endTime.Format("2006-01-02 15:04:05+00")
- log.Printf(" Period of interest: %s - %s", startTimeString, endTimeString)
+ log.Printf(" Period of interest: %s - %s", startTime, endTime)
log.Print("Fetching average subtask runtime...")
var avgTaskRuntimeResult []AvgTaskDuration
timeframeParams := []bigquery.QueryParameter{
{
Name: "StartTime",
- Value: startTimeString,
+ Value: startTime,
},
{
Name: "EndTime",
- Value: endTimeString,
+ Value: endTime,
},
}
if err := runQuery(ctx, client, AvgTaskRuntimeQuery, timeframeParams, &avgTaskRuntimeResult); err != nil {
@@ -189,44 +193,44 @@
}
builderProfileMap := map[string]BuilderProfile{}
- if len(c.cqAdd)+len(c.ciAdd) > 0 {
- log.Print("Fetching statistics on proposed builder additions...")
- builderAddList := append(c.ciAdd, c.cqAdd...)
- params := []bigquery.QueryParameter{
- {
- Name: "StartTime",
- Value: startTimeString,
- },
- {
- Name: "EndTime",
- Value: endTimeString,
- },
- {
- Name: "AddedBuilders",
- Value: builderAddList,
- },
- }
+ log.Print("Fetching statistics on proposed builder additions...")
- var builderProfileSlice []BuilderProfile
- if err := runQuery(ctx, client, BuilderTestingTaskProfileQuery, params, &builderProfileSlice); err != nil {
+ var builderProfileSlice []BuilderProfile
+ if err := runQuery(ctx, client, BuilderTestingTaskProfileQuery, timeframeParams, &builderProfileSlice); err != nil {
+ return err
+ }
+ for _, profile := range builderProfileSlice {
+ builderProfileMap[profile.Builder] = profile
+ }
+
+ var cqAttempts []CommitQueueAttempt
+ if len(c.cqAdd) > 0 {
+ log.Print("Fetching historical presubmit events...")
+ if err := runQuery(ctx, client, CommitQueueAttemptsQuery, timeframeParams, &cqAttempts); err != nil {
return err
}
- for _, profile := range builderProfileSlice {
- builderProfileMap[profile.Builder] = profile
- }
}
+ sim := Simulator{
+ baseline: true,
+ startTime: startTime,
+ endTime: endTime,
+ originalIdleBots: availableBots,
+ originalTaskRuns: testingTaskRuns,
+ originalCqAttempts: cqAttempts,
+ builderProfiles: builderProfileMap,
+ cqRemove: c.cqRemove,
+ ciRemove: c.ciRemove,
+ cqAdd: c.cqAdd,
+ ciAdd: c.ciAdd,
+ }
+
log.Print("Running Simulation to gather baseline information...")
- result := c.simulate(startTime, endTime, availableBots, testingTaskRuns, builderProfileMap, true)
- baselineWaitingTimes := result.waitingTimes
- baselineRuntimes := result.runtimes
- stepResultsBaseline := result.stepResults
+ baseline := sim.simulate()
log.Print("Running Simulation with proposed changes...")
- result = c.simulate(startTime, endTime, availableBots, testingTaskRuns, builderProfileMap, false)
- proposedWaitingTimes := result.waitingTimes
- proposedRuntimes := result.runtimes
- stepResultsProposed := result.stepResults
+ sim.baseline = false
+ proposed := sim.simulate()
log.Print("Simulation Complete!")
historicalPending := map[string]time.Duration{}
@@ -234,8 +238,8 @@
historicalPending[testingTask.DeviceType] += testingTask.PendingDuration
}
- if c.verbose {
- reportSimulationDifferences(availableBots, stepResultsBaseline, stepResultsProposed)
+ if c.veryVerbose {
+ reportSimulationDifferences(availableBots, baseline.stepResults, proposed.stepResults)
}
fmt.Println("\nDifference in hardware pending times (in minutes):")
@@ -259,18 +263,18 @@
"%s%s | %f | %f | %f | %f\n",
dt,
buffer,
- (proposedWaitingTimes[dt] - baselineWaitingTimes[dt]).Minutes(),
- baselineWaitingTimes[dt].Minutes(),
- proposedWaitingTimes[dt].Minutes(),
+ (proposed.waitingTimes[dt] - baseline.waitingTimes[dt]).Minutes(),
+ baseline.waitingTimes[dt].Minutes(),
+ proposed.waitingTimes[dt].Minutes(),
historicalPending[dt].Minutes())
} else {
fmt.Printf(
"%s%s | %f | %f | %f\n",
dt,
buffer,
- (proposedWaitingTimes[dt] - baselineWaitingTimes[dt]).Minutes(),
- baselineWaitingTimes[dt].Minutes(),
- proposedWaitingTimes[dt].Minutes())
+ (proposed.waitingTimes[dt] - baseline.waitingTimes[dt]).Minutes(),
+ baseline.waitingTimes[dt].Minutes(),
+ proposed.waitingTimes[dt].Minutes())
}
}
@@ -283,9 +287,9 @@
"%s%s | %f | %f | %f\n",
dt,
buffer,
- (proposedRuntimes[dt] - baselineRuntimes[dt]).Hours(),
- baselineRuntimes[dt].Hours(),
- proposedRuntimes[dt].Hours())
+ (proposed.runtimes[dt] - baseline.runtimes[dt]).Hours(),
+ baseline.runtimes[dt].Hours(),
+ proposed.runtimes[dt].Hours())
}
return nil
@@ -302,186 +306,243 @@
RecoveryTime time.Time
}
-func (c *simulateCmd) simulate(
- startTime time.Time,
- endTime time.Time,
- originalIdleBots map[string]int,
- taskRuns []TestingTask,
- builderProfiles map[string]BuilderProfile,
- baseline bool,
-) SimulateResults {
+type Simulator struct {
+ baseline bool // whether the simulation should factor in builder changes
+ cqAttempts []CommitQueueAttempt // working copy of originalCqAttempts
+ originalCqAttempts []CommitQueueAttempt // incoming presubmit verification requests
+ recoveringBots []RecoveringBot // bots that are performing maintenance between tasks
+ ciAdd []string // builders added to ci
+ ciRemove []string // builders removed from ci
+ cqAdd []string // builders added to cq
+ cqRemove []string // builders removed from cq
+ stepResults []StepStatus // debug logging for observing the simulation in detail
+ runningTasks []TestingTask // tasks that are using a bot and have not completed
+ waitingTasks []TestingTask // tasks that are waiting for an available bot
+ originalTaskRuns []TestingTask // historical tasks observed over the simulated period
+ taskRuns []TestingTask // working copy of originalTaskRuns
+ builderProfiles map[string]BuilderProfile // builder information and bot footprint
+ idleBots map[string]int // working copy of originalIdleBots
+ originalIdleBots map[string]int // available bots for tasks
+ runtimes map[string]time.Duration // amount of bot time used by builders
+ waitingTimes map[string]time.Duration // amount of time spent waiting for available bots
+ ciTriggers map[string]time.Time // next timestamp to trigger new ci builders
+ timestepIncrement time.Duration // how far in time the simulation should move between steps
+ currTime time.Time // the current time in the simulation
+ endTime time.Time // the end of the simulated period
+ startTime time.Time // the start of the simulated period
+}
- const timestepIncrement = 5 * time.Second
- var runningTasks, waitingTasks []TestingTask
- waitingTimes := map[string]time.Duration{}
- runtimes := map[string]time.Duration{}
- recoveringBots := []RecoveringBot{}
- var stepResults []StepStatus
- idleBots := map[string]int{}
- for k, v := range originalIdleBots {
- idleBots[k] = v
- }
- var cqRemove, ciRemove, ciAdd []string
- if !baseline {
- cqRemove = c.cqRemove
- ciRemove = c.ciRemove
- ciAdd = c.ciAdd
- }
+// Our hardware-backed bots require recovery overhead, which does not include gce or Cavium
+var recoveryExcludedBots = []string{"gce", "Cavium"}
+func (s *Simulator) simulate() SimulateResults {
+ // initialize working variables for the new simulation.
+ s.initVars()
+
+ for s.currTime = s.startTime; s.currTime.Before(s.endTime); s.currTime = s.currTime.Add(s.timestepIncrement) {
+
+ // Check if any bots have finished their tasks and recovery period and
+ // make them available.
+ s.recoverBots()
+
+ // Prune finished tasks from the runningTask queue.
+ s.processFinishedTasks()
+
+ // Add incoming historical traffic to the waitingTask queue.
+ s.processIncomingTasks()
+
+ // Add traffic from proposed changes to the waitingTask queue.
+ if !s.baseline {
+ s.processCITriggers()
+ s.processCQAttempts()
+ }
+
+ // Start waiting tasks if there are available bots.
+ s.processWaitingTasks()
+
+ // Increment our waiting and runtime counts.
+ for _, task := range s.waitingTasks {
+ s.waitingTimes[task.DeviceType] += s.timestepIncrement
+ }
+
+ for _, task := range s.runningTasks {
+ s.runtimes[task.DeviceType] += s.timestepIncrement
+ }
+
+ // Log this step in the simulation for debugging.
+ idleBotsCopy := map[string]int{}
+ for k, v := range s.idleBots {
+ idleBotsCopy[k] = v
+ }
+ s.stepResults = append(s.stepResults, StepStatus{
+ s.currTime,
+ s.runningTasks,
+ s.waitingTasks,
+ idleBotsCopy,
+ s.taskRuns,
+ s.waitingTimes,
+ })
+ }
+ return SimulateResults{s.waitingTimes, s.runtimes, s.stepResults}
+}
+
+// initVars is a helper that initializes working variables before simulate() calls.
+func (s *Simulator) initVars() {
+ s.timestepIncrement = 5 * time.Second
+ s.runningTasks = []TestingTask{}
+ s.waitingTasks = []TestingTask{}
+ s.waitingTimes = map[string]time.Duration{}
+ s.runtimes = map[string]time.Duration{}
+ s.recoveringBots = []RecoveringBot{}
+ s.stepResults = []StepStatus{}
+ s.idleBots = map[string]int{}
+ for k, v := range s.originalIdleBots {
+ s.idleBots[k] = v
+ }
// This is a rough approximation for adding builders to CI. This assumes
// that there is a constant influx of CLs to merit triggering the builder
// after each run has completed.
// TODO(fxbug.dev/95557): Track merge events and use
// max_concurrent_invocations and batching rules to simulate with higher
// fidelity.
- ciTriggers := map[string]time.Duration{}
- for _, builder := range ciAdd {
- ciTriggers[builder] = 0 * time.Second
+ s.ciTriggers = map[string]time.Time{}
+ for _, builder := range s.ciAdd {
+ s.ciTriggers[builder] = s.startTime
}
+ s.taskRuns = s.originalTaskRuns
+ s.cqAttempts = s.originalCqAttempts
+}
- for currTime := startTime; currTime.Before(endTime); currTime = currTime.Add(timestepIncrement) {
-
- // Check if any bots have finished recovering and make them available.
- recoveringBotSliceIndex := len(recoveringBots)
- for idx, bot := range recoveringBots {
- if currTime.After(bot.RecoveryTime) {
- idleBots[bot.DeviceType] += 1
- } else {
- recoveringBotSliceIndex = idx
- break
- }
+// recoverBots is a helper that adds bots to the idleBots pool if they've finished their tasks and recovered.
+func (s *Simulator) recoverBots() {
+ var updatedRecoveringBots []RecoveringBot
+ for _, bot := range s.recoveringBots {
+ if s.currTime.After(bot.RecoveryTime) {
+ s.idleBots[bot.DeviceType] += 1
+ } else {
+ updatedRecoveringBots = append(updatedRecoveringBots, bot)
}
- recoveringBots = recoveringBots[recoveringBotSliceIndex:]
+ }
+ s.recoveringBots = updatedRecoveringBots
+}
- // Release bots that have finished their workloads.
- // On average physical devices have a ~75s recovery overhead to clean up
- // before they can take on new tasks.
- var updatedRunningTasks []TestingTask
- for _, task := range runningTasks {
- if currTime.After(task.CreateTime.Add(task.Duration)) {
- if !contains([]string{"gce", "Cavium"}, task.DeviceType) {
- recoveringBots = append(recoveringBots, RecoveringBot{task.DeviceType, currTime.Add(75 * time.Second)})
- } else {
- idleBots[task.DeviceType] += 1
- }
+func (s *Simulator) processFinishedTasks() {
+ var updatedRunningTasks []TestingTask
+ for _, task := range s.runningTasks {
+ if s.currTime.After(task.CreateTime.Add(task.Duration)) {
+ if contains(recoveryExcludedBots, task.DeviceType) {
+ s.idleBots[task.DeviceType] += 1
} else {
- updatedRunningTasks = append(updatedRunningTasks, task)
+ s.recoveringBots = append(
+ s.recoveringBots,
+ RecoveringBot{
+ DeviceType: task.DeviceType,
+ RecoveryTime: s.currTime.Add(75 * time.Second),
+ },
+ )
}
+ } else {
+ updatedRunningTasks = append(updatedRunningTasks, task)
}
- runningTasks = updatedRunningTasks
+ }
+ s.runningTasks = updatedRunningTasks
+}
- // Assign bots to tasks from waiting tasks first.
- var updatedWaitingTasks []TestingTask
- for _, task := range waitingTasks {
- if idleBots[task.DeviceType] > 0 {
- idleBots[task.DeviceType] -= 1
- runningTasks = append(runningTasks, task)
- } else {
- updatedWaitingTasks = append(updatedWaitingTasks, task)
- }
- }
- waitingTasks = updatedWaitingTasks
-
- // Then assign bots to tasks from incoming historical traffic.
- for len(taskRuns) > 0 && taskRuns[0].CreateTime.Before(currTime) {
- task := taskRuns[0]
- taskRuns = taskRuns[1:]
-
- if task.IsTry && len(cqRemove) > 0 {
- if contains(cqRemove, task.Builder) {
+// processWaitingTasks is a helper that assigns waiting tasks to an available bot.
+// If no bot is available, it stays in the waitingTasks queue.
+func (s *Simulator) processWaitingTasks() {
+ var updatedWaitingTasks []TestingTask
+ for _, task := range s.waitingTasks {
+ if !s.baseline {
+ // Drop the task if it is proposed for removal in ci/cqRemove
+ if task.IsTry && len(s.cqRemove) > 0 {
+ if contains(s.cqRemove, task.Builder) {
continue
}
- } else if !task.IsTry && len(ciRemove) > 0 {
- if contains(ciRemove, task.Builder) {
+ } else if !task.IsTry && len(s.ciRemove) > 0 {
+ if contains(s.ciRemove, task.Builder) {
continue
}
}
-
- if idleBots[task.DeviceType] > 0 {
- idleBots[task.DeviceType] -= 1
- runningTasks = append(runningTasks, task)
- } else {
- waitingTasks = append(waitingTasks, task)
- }
}
-
- // Generate added traffic from proposed Builder changes.
- processCiTriggers(
- ciTriggers,
- builderProfiles,
- &runningTasks,
- &waitingTasks,
- idleBots,
- timestepIncrement,
- currTime,
- )
-
- // Increment our waiting and runtime counts.
- for _, task := range waitingTasks {
- waitingTimes[task.DeviceType] += timestepIncrement
- }
-
- for _, task := range runningTasks {
- runtimes[task.DeviceType] += timestepIncrement
- }
-
- if c.verbose {
- stepResults = append(stepResults, StepStatus{
- currTime,
- runningTasks,
- waitingTasks,
- idleBots,
- taskRuns,
- waitingTimes,
- })
+ if s.idleBots[task.DeviceType] > 0 {
+ s.idleBots[task.DeviceType] -= 1
+ s.runningTasks = append(s.runningTasks, task)
+ } else {
+ updatedWaitingTasks = append(updatedWaitingTasks, task)
}
}
- return SimulateResults{waitingTimes, runtimes, stepResults}
+ s.waitingTasks = updatedWaitingTasks
+}
+
+// processIncomingTasks is a helper that moves tasks from the taskRuns queue to the waitingTasks queue.
+// Tasks are only moved if the simulation is past the point they were created.
+func (s *Simulator) processIncomingTasks() {
+ for len(s.taskRuns) > 0 && s.taskRuns[0].CreateTime.Before(s.currTime) {
+ task := s.taskRuns[0]
+ s.taskRuns = s.taskRuns[1:]
+ s.waitingTasks = append(s.waitingTasks, task)
+ }
}
// processCiTriggers is a helper that enqueues subtasks for the simulate command.
-// Added CI builders have an associated countdown timer that indicates how long
-// until we trigger the next round of subtasks. Then the countdown timer resets
-// to the duration of the builder orchestrator.
-func processCiTriggers(
- ciTriggers map[string]time.Duration,
- builderProfiles map[string]BuilderProfile,
- runningTasks *[]TestingTask,
- waitingTasks *[]TestingTask,
- idleBots map[string]int,
- timestepIncrement time.Duration,
- currTime time.Time) {
- for builder, countdown := range ciTriggers {
- if countdown <= 0*time.Second {
+// Added CI builders have an associated timestamp that indicates when we should
+// trigger the next round of subtasks.
+func (s *Simulator) processCITriggers() {
+ for builder, nextTrigger := range s.ciTriggers {
+ if s.currTime.After(nextTrigger) {
// Trigger all subtasks in profile.
- for _, testingTasks := range builderProfiles[builder].DeviceFootprints {
- for i := 0; i < testingTasks.AverageTasks; i++ {
+ for _, deviceFootprint := range s.builderProfiles[builder].DeviceFootprints {
+ for i := 0; i < deviceFootprint.AverageTasks; i++ {
task := TestingTask{
- builder,
- currTime,
- testingTasks.AverageTaskDuration,
- 0 * time.Second,
- testingTasks.DeviceType,
- false,
+ Builder: builder,
+ CreateTime: s.currTime,
+ Duration: deviceFootprint.AverageTaskDuration,
+ PendingDuration: 0,
+ DeviceType: deviceFootprint.DeviceType,
+ IsTry: false,
}
- if idleBots[task.DeviceType] > 0 {
- idleBots[task.DeviceType] -= 1
- *runningTasks = append(*runningTasks, task)
- } else {
- *waitingTasks = append(*waitingTasks, task)
- }
+ s.waitingTasks = append(s.waitingTasks, task)
}
}
- // Reset the countdown.
- ciTriggers[builder] = builderProfiles[builder].AverageBuildDuration
- } else {
- // Decrement the counter and continue.
- ciTriggers[builder] -= timestepIncrement
+ // Set the next trigger.
+ s.ciTriggers[builder] = s.currTime.Add(s.builderProfiles[builder].AverageBuildDuration)
}
}
}
-func runQuery(ctx context.Context, client *bigquery.Client, query string, params []bigquery.QueryParameter, ptr interface{}) error {
+// processCqAttempts is a helper that enqueues subtasks for the simulate command.
+// It loops through historical presubmit attempts and enqueues tasks for each
+// added CQ builder.
+// (TODO:fxbug.dev/95819) - Only trigger cq builders for the projects they're
+// enabled for.
+func (s *Simulator) processCQAttempts() {
+ for ; len(s.cqAttempts) > 0 && (s.cqAttempts)[0].StartTime.Before(s.currTime); s.cqAttempts = s.cqAttempts[1:] {
+ for _, builder := range s.cqAdd {
+ // Trigger all subtasks in profile.
+ for _, deviceFootprint := range s.builderProfiles[builder].DeviceFootprints {
+ for i := 0; i < deviceFootprint.AverageTasks; i++ {
+ task := TestingTask{
+ Builder: builder,
+ CreateTime: s.currTime,
+ Duration: deviceFootprint.AverageTaskDuration,
+ PendingDuration: 0,
+ DeviceType: deviceFootprint.DeviceType,
+ IsTry: true,
+ }
+ s.waitingTasks = append(s.waitingTasks, task)
+ }
+ }
+ }
+ }
+}
+
+func runQuery(
+ ctx context.Context,
+ client *bigquery.Client,
+ query string,
+ params []bigquery.QueryParameter, ptr interface{},
+) error {
q := client.Query(query)
q.Parameters = params
iter, err := q.Read(ctx)
@@ -511,7 +572,10 @@
return json.Unmarshal(jsonRows, &ptr)
}
-func reportSimulationDifferences(deviceTypes map[string]int, baselineSimulation, proposedSimulation []StepStatus) {
+func reportSimulationDifferences(
+ deviceTypes map[string]int,
+ baselineSimulation, proposedSimulation []StepStatus,
+) {
for idx, baselineStep := range baselineSimulation {
proposedStep := proposedSimulation[idx]
diffStrings := []string{fmt.Sprintf("Diffs for Execution step %d", idx+1)}
@@ -520,27 +584,49 @@
bworkers := baselineStep.idleBots[dt]
pworkers := proposedStep.idleBots[dt]
if bworkers != pworkers {
- s := fmt.Sprintf("Diff available workers: %s %d | %d", dt, bworkers, pworkers)
+ s := fmt.Sprintf(
+ "Diff available workers: %s %d | %d",
+ dt,
+ bworkers,
+ pworkers,
+ )
diffStrings = append(diffStrings, s)
}
bpending := baselineStep.waitingTimes[dt]
ppending := proposedStep.waitingTimes[dt]
if bpending != ppending {
- s := fmt.Sprintf("Diff in pending minutes: %s %f | %f", dt, bpending.Minutes(), ppending.Minutes())
+ s := fmt.Sprintf(
+ "Diff in pending minutes: %s %f | %f",
+ dt,
+ bpending.Minutes(),
+ ppending.Minutes(),
+ )
diffStrings = append(diffStrings, s)
}
}
if len(baselineStep.runningTasks) != len(proposedStep.runningTasks) {
- s := fmt.Sprintf("Diff runningTasks length: %d | %d", len(baselineStep.runningTasks), len(proposedStep.runningTasks))
+ s := fmt.Sprintf(
+ "Diff runningTasks length: %d | %d",
+ len(baselineStep.runningTasks),
+ len(proposedStep.runningTasks),
+ )
diffStrings = append(diffStrings, s)
}
if len(baselineStep.waitingTasks) != len(proposedStep.waitingTasks) {
- s := fmt.Sprintf("Diff waitingTasks length: %d | %d", len(baselineStep.waitingTasks), len(proposedStep.waitingTasks))
+ s := fmt.Sprintf(
+ "Diff waitingTasks length: %d | %d",
+ len(baselineStep.waitingTasks),
+ len(proposedStep.waitingTasks),
+ )
diffStrings = append(diffStrings, s)
}
if len(baselineStep.taskRuns) != len(proposedStep.taskRuns) {
- s := fmt.Sprintf("Diff scheduledRuns length: %d | %d", len(baselineStep.taskRuns), len(proposedStep.taskRuns))
+ s := fmt.Sprintf(
+ "Diff scheduledRuns length: %d | %d",
+ len(baselineStep.taskRuns),
+ len(proposedStep.taskRuns),
+ )
diffStrings = append(diffStrings, s)
}
diff --git a/cmd/builder_oracle/simulate_test.go b/cmd/builder_oracle/simulate_test.go
index d2356f6..b3106a5 100644
--- a/cmd/builder_oracle/simulate_test.go
+++ b/cmd/builder_oracle/simulate_test.go
@@ -5,10 +5,11 @@
package main
import (
- "reflect"
"testing"
"time"
+ "github.com/google/go-cmp/cmp"
+
"go.fuchsia.dev/infra/flagutil"
)
@@ -17,6 +18,12 @@
startTime := time.Now()
endTime := startTime.Add(30 * time.Minute)
idleBots := map[string]int{"a": 3, "b": 1}
+ commitQueueAttempts := []CommitQueueAttempt{
+ {
+ StartTime: time.Now().Add(30 * time.Second),
+ ConfigGroup: "fuchsia-fuchsia",
+ },
+ }
builderProfiles := map[string]BuilderProfile{
"builder_a": {
Builder: "builder_a",
@@ -92,11 +99,7 @@
{
name: "Can remove runs using cqRemove",
cmd: &simulateCmd{
- commonFlags: commonFlags{},
- cqAdd: flagutil.RepeatedStringValue{},
- cqRemove: flagutil.RepeatedStringValue{"builder_b"},
- ciAdd: flagutil.RepeatedStringValue{},
- ciRemove: flagutil.RepeatedStringValue{},
+ cqRemove: flagutil.RepeatedStringValue{"builder_b"},
},
taskRuns: []TestingTask{
{
@@ -129,11 +132,7 @@
{
name: "Can remove runs using ciRemove",
cmd: &simulateCmd{
- commonFlags: commonFlags{},
- cqAdd: flagutil.RepeatedStringValue{},
- cqRemove: flagutil.RepeatedStringValue{},
- ciAdd: flagutil.RepeatedStringValue{},
- ciRemove: flagutil.RepeatedStringValue{"builder_b"},
+ ciRemove: flagutil.RepeatedStringValue{"builder_b"},
},
taskRuns: []TestingTask{
{
@@ -166,11 +165,7 @@
{
name: "Can add runs using ciAdd",
cmd: &simulateCmd{
- commonFlags: commonFlags{},
- cqAdd: flagutil.RepeatedStringValue{},
- cqRemove: flagutil.RepeatedStringValue{},
- ciAdd: flagutil.RepeatedStringValue{"builder_b"},
- ciRemove: flagutil.RepeatedStringValue{},
+ ciAdd: flagutil.RepeatedStringValue{"builder_b"},
},
taskRuns: []TestingTask{
{
@@ -184,20 +179,43 @@
},
expected: map[string]time.Duration{"b": 2*time.Minute + 20*time.Second},
},
+ {
+ name: "Can add runs using cqAdd",
+ cmd: &simulateCmd{
+ cqAdd: flagutil.RepeatedStringValue{"builder_b"},
+ },
+ taskRuns: []TestingTask{
+ {
+ Builder: "builder_b",
+ CreateTime: startTime,
+ Duration: 1 * time.Minute,
+ PendingDuration: 0,
+ DeviceType: "b",
+ IsTry: true,
+ },
+ },
+ expected: map[string]time.Duration{"b": 1*time.Minute + 50*time.Second},
+ },
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
- res := tc.cmd.simulate(
- startTime,
- endTime,
- idleBots,
- tc.taskRuns,
- builderProfiles,
- false,
- )
- if !reflect.DeepEqual(res.waitingTimes, tc.expected) {
- t.Fatalf("unexpected output: got %v, wanted %v\n", res.waitingTimes, tc.expected)
+ sim := Simulator{
+ baseline: false,
+ startTime: startTime,
+ endTime: endTime,
+ originalIdleBots: idleBots,
+ originalTaskRuns: tc.taskRuns,
+ originalCqAttempts: commitQueueAttempts,
+ builderProfiles: builderProfiles,
+ cqRemove: tc.cmd.cqRemove,
+ ciRemove: tc.cmd.ciRemove,
+ cqAdd: tc.cmd.cqAdd,
+ ciAdd: tc.cmd.ciAdd,
+ }
+ res := sim.simulate()
+ if diff := cmp.Diff(res.waitingTimes, tc.expected); diff != "" {
+ t.Fatalf("unexpected output: (-got +want):\n%s", diff)
}
})
}
@@ -237,36 +255,33 @@
}
testCases := []struct {
- name string
- ciTriggers map[string]time.Duration
- expectedRunningBuilders []TestingTask
- expectedWaitingBuilders []TestingTask
- expectedCiTriggers map[string]time.Duration
+ name string
+ ciTriggers map[string]time.Time
+ expectedWaitingTasks []TestingTask
+ expectedCiTriggers map[string]time.Time
}{
{
name: "Enqueues no subtasks if no triggers are ready",
- ciTriggers: map[string]time.Duration{
- "builder_a": 20 * time.Second,
- "builder_b": 15 * time.Second,
- "builder_c": 10 * time.Second,
+ ciTriggers: map[string]time.Time{
+ "builder_a": currTime.Add(20 * time.Second),
+ "builder_b": currTime.Add(15 * time.Second),
+ "builder_c": currTime.Add(10 * time.Second),
},
- expectedRunningBuilders: []TestingTask{},
- expectedWaitingBuilders: []TestingTask{},
- expectedCiTriggers: map[string]time.Duration{
- "builder_a": 15 * time.Second,
- "builder_b": 10 * time.Second,
- "builder_c": 5 * time.Second,
+ expectedWaitingTasks: []TestingTask{},
+ expectedCiTriggers: map[string]time.Time{
+ "builder_a": currTime.Add(20 * time.Second),
+ "builder_b": currTime.Add(15 * time.Second),
+ "builder_c": currTime.Add(10 * time.Second),
},
},
{
- name: "Enqueues to waitingBuilders if there are no available bots",
- ciTriggers: map[string]time.Duration{
- "builder_a": 0 * time.Second,
- "builder_b": 15 * time.Second,
- "builder_c": 10 * time.Second,
+ name: "Enqueues to waitingBuilders if there is a trigger ready",
+ ciTriggers: map[string]time.Time{
+ "builder_a": currTime.Add(-1 * time.Second),
+ "builder_b": currTime.Add(15 * time.Second),
+ "builder_c": currTime.Add(10 * time.Second),
},
- expectedRunningBuilders: []TestingTask{},
- expectedWaitingBuilders: []TestingTask{
+ expectedWaitingTasks: []TestingTask{
{
Builder: "builder_a",
CreateTime: currTime,
@@ -276,43 +291,18 @@
IsTry: false,
},
},
- expectedCiTriggers: map[string]time.Duration{
- "builder_a": 5 * time.Minute,
- "builder_b": 10 * time.Second,
- "builder_c": 5 * time.Second,
- },
- },
- {
- name: "Enqueues to runningBuilders if there are available bots",
- ciTriggers: map[string]time.Duration{
- "builder_a": 20 * time.Second,
- "builder_b": 0 * time.Second,
- "builder_c": 10 * time.Second,
- },
- expectedRunningBuilders: []TestingTask{
- {
- Builder: "builder_b",
- CreateTime: currTime,
- Duration: 30 * time.Second,
- PendingDuration: 0,
- DeviceType: "b",
- IsTry: false,
- },
- },
- expectedWaitingBuilders: []TestingTask{},
- expectedCiTriggers: map[string]time.Duration{
- "builder_a": 15 * time.Second,
- "builder_b": 4 * time.Minute,
- "builder_c": 5 * time.Second,
+ expectedCiTriggers: map[string]time.Time{
+ "builder_a": currTime.Add(5 * time.Minute),
+ "builder_b": currTime.Add(15 * time.Second),
+ "builder_c": currTime.Add(10 * time.Second),
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
- ctcopy := map[string]time.Duration{}
- runningBuilders := []TestingTask{}
- waitingBuilders := []TestingTask{}
+ ctcopy := map[string]time.Time{}
+ waitingTasks := []TestingTask{}
idleBots := map[string]int{
"a": 0,
"b": 1,
@@ -320,23 +310,130 @@
for k, v := range tc.ciTriggers {
ctcopy[k] = v
}
- processCiTriggers(
- ctcopy,
- builderProfiles,
- &runningBuilders,
- &waitingBuilders,
- idleBots,
- timestepIncrement,
- currTime,
- )
- if !reflect.DeepEqual(runningBuilders, tc.expectedRunningBuilders) {
- t.Fatalf("unexpected output: got %v, wanted %v\n", runningBuilders, tc.expectedRunningBuilders)
+ sim := Simulator{
+ baseline: false,
+ builderProfiles: builderProfiles,
+ currTime: currTime,
+ ciTriggers: ctcopy,
+ idleBots: idleBots,
+ timestepIncrement: timestepIncrement,
+ waitingTasks: waitingTasks,
}
- if !reflect.DeepEqual(waitingBuilders, tc.expectedWaitingBuilders) {
- t.Fatalf("unexpected output: got %v, wanted %v\n", waitingBuilders, tc.expectedWaitingBuilders)
+ sim.processCITriggers()
+ if diff := cmp.Diff(sim.waitingTasks, tc.expectedWaitingTasks); diff != "" {
+ t.Fatalf("unexpected output: (-got +want):\n%s", diff)
}
- if !reflect.DeepEqual(ctcopy, tc.expectedCiTriggers) {
- t.Fatalf("unexpected output: got %v, wanted %v\n", ctcopy, tc.expectedCiTriggers)
+ if diff := cmp.Diff(sim.ciTriggers, tc.expectedCiTriggers); diff != "" {
+ t.Fatalf("unexpected output: (-got +want):\n%s", diff)
+ }
+ })
+ }
+}
+
+func TestProcessCqAttempts(t *testing.T) {
+ // default test vars
+ currTime := time.Now()
+ builderProfiles := map[string]BuilderProfile{
+ "builder_a": {
+ Builder: "builder_a",
+ FlakeRate: 0.02,
+ NumBuilds: 100,
+ AverageBuildDuration: 5 * time.Minute,
+ DeviceFootprints: []BuilderDeviceFootprint{
+ {
+ DeviceType: "a",
+ AverageTasks: 1,
+ AverageTaskDuration: 1 * time.Minute,
+ },
+ },
+ },
+ "builder_b": {
+ Builder: "builder_b",
+ FlakeRate: 0.00,
+ NumBuilds: 100,
+ AverageBuildDuration: 4 * time.Minute,
+ DeviceFootprints: []BuilderDeviceFootprint{
+ {
+ DeviceType: "b",
+ AverageTasks: 1,
+ AverageTaskDuration: 30 * time.Second,
+ },
+ },
+ },
+ }
+ testCases := []struct {
+ name string
+ cqAdd []string
+ commitQueueAttempts []CommitQueueAttempt
+ expectedWaitingTasks []TestingTask
+ expectedCommitQueueAttempts []CommitQueueAttempt
+ }{
+ {
+ name: "Enqueues no subtasks if there aren't any CommitQueueAttempts with appropriate timestamp",
+ cqAdd: []string{"builder_a", "builder_b"},
+ commitQueueAttempts: []CommitQueueAttempt{
+ {
+ StartTime: currTime.Add(30 * time.Second),
+ ConfigGroup: "fuchsia-fuchsia",
+ },
+ },
+ expectedWaitingTasks: []TestingTask{},
+ expectedCommitQueueAttempts: []CommitQueueAttempt{
+ {
+ StartTime: currTime.Add(30 * time.Second),
+ ConfigGroup: "fuchsia-fuchsia",
+ },
+ },
+ },
+ {
+ name: "Enqueues subtasks to waitingTasks if there is a CommitQueueAttempt with an appropriate timestamp",
+ cqAdd: []string{"builder_b"},
+ commitQueueAttempts: []CommitQueueAttempt{
+ {
+ StartTime: currTime.Add(-30 * time.Second),
+ ConfigGroup: "fuchsia-fuchsia",
+ },
+ },
+ expectedWaitingTasks: []TestingTask{
+ {
+ Builder: "builder_b",
+ CreateTime: currTime,
+ Duration: 30 * time.Second,
+ PendingDuration: 0,
+ DeviceType: "b",
+ IsTry: true,
+ },
+ },
+ expectedCommitQueueAttempts: []CommitQueueAttempt{},
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ cqaCopy := []CommitQueueAttempt{}
+ for _, attempt := range tc.commitQueueAttempts {
+ cqaCopy = append(cqaCopy, attempt)
+ }
+ waitingTasks := []TestingTask{}
+ idleBots := map[string]int{
+ "a": 0,
+ "b": 1,
+ }
+ sim := Simulator{
+ baseline: false,
+ builderProfiles: builderProfiles,
+ cqAttempts: cqaCopy,
+ currTime: currTime,
+ cqAdd: tc.cqAdd,
+ idleBots: idleBots,
+ waitingTasks: waitingTasks,
+ }
+ sim.processCQAttempts()
+ if diff := cmp.Diff(sim.waitingTasks, tc.expectedWaitingTasks); diff != "" {
+ t.Fatalf("unexpected output: (-got +want):\n%s", diff)
+ }
+ if diff := cmp.Diff(sim.cqAttempts, tc.expectedCommitQueueAttempts); diff != "" {
+ t.Fatalf("unexpected output: (-got +want):\n%s", diff)
}
})
}