blob: 997047f81f36a2ed7c2f073e7f941e8512d042c3 [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package tracking
import (
"context"
"flag"
"fmt"
"log"
"os"
"sync"
"testing"
"time"
"fuchsia.googlesource.com/host_target_testing/device"
"fuchsia.googlesource.com/host_target_testing/packages"
"fuchsia.googlesource.com/host_target_testing/sl4f"
"golang.org/x/crypto/ssh"
)
var c *Config
func TestMain(m *testing.M) {
log.SetPrefix("tracking-test: ")
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lshortfile)
var err error
c, err = NewConfig(flag.CommandLine)
if err != nil {
log.Fatalf("failed to create config: %s", err)
}
flag.Parse()
if err = c.Validate(); err != nil {
log.Fatalf("config is invalid: %s", err)
}
os.Exit(m.Run())
}
func TestOTA(t *testing.T) {
ctx := context.Background()
device, err := c.deviceConfig.NewDeviceClient(ctx)
if err != nil {
t.Fatalf("failed to create ota test client: %s", err)
}
defer device.Close()
// Creating a sl4f.Client requires knowing the build currently running
// on the device, which not all test cases know during start. Store the
// one true client here, and pass around pointers to the various
// functions that may use it or device.Client to interact with the
// target. All OTA attempts must first Close and nil out an existing
// rpcClient and replace it with a new one after reboot. The final
// rpcClient, if present, will be closed by the defer here.
var rpcClient *sl4f.Client
defer func() {
if rpcClient != nil {
rpcClient.Close()
}
}()
if c.ShouldRepaveDevice() {
rpcClient, err = paveDevice(ctx, device)
if err != nil {
t.Fatalf("failed to pave device: %s", err)
}
}
testTrackingOTAs(t, ctx, device, &rpcClient)
}
func testTrackingOTAs(t *testing.T, ctx context.Context, device *device.Client, rpcClient **sl4f.Client) {
builder, err := c.GetUpgradeBuilder()
if err != nil {
t.Fatal(err)
}
// We only check ABR after the first update, since we can't be sure if
// the initial version of Fuchsia is recent enough to support ABR, but
// it should support ABR after the first OTA.
checkABR := false
lastBuildID := ""
attempt := 1
for {
log.Printf("Look up latest build for builder %s", builder)
buildID, err := builder.GetLatestBuildID(ctx)
if err != nil {
t.Fatalf("error getting latest build for builder %s: %s", builder, err)
}
if buildID == lastBuildID {
log.Printf("already updated to %s, sleeping", buildID)
time.Sleep(60 * time.Second)
continue
}
log.Printf("Tracking Test Attempt %d upgrading from build %s to build %s", attempt, lastBuildID, buildID)
if err := testTrackingOTAAttempt(ctx, device, rpcClient, buildID, checkABR); err != nil {
t.Fatalf("Tracking Test Attempt %d failed: %s", attempt, err)
}
log.Printf("Tracking Test Attempt %d successful", attempt)
log.Printf("------------------------------------------------------------------------------")
checkABR = true
lastBuildID = buildID
attempt += 1
}
}
func testTrackingOTAAttempt(
ctx context.Context,
device *device.Client,
rpcClient **sl4f.Client,
buildID string,
checkABR bool,
) error {
ctx, cancel := context.WithTimeout(ctx, c.cycleTimeout)
defer cancel()
outputDir, cleanup, err := c.archiveConfig.OutputDir()
if err != nil {
return fmt.Errorf("failed to get output directory: %s", err)
}
defer cleanup()
build, err := c.archiveConfig.BuildArchive().GetBuildByID(ctx, buildID, outputDir)
if err != nil {
return fmt.Errorf("failed to find build %s: %s", buildID, err)
}
repo, err := build.GetPackageRepository(ctx)
if err != nil {
return fmt.Errorf("failed to get repo for build: %s", err)
}
expectedSystemImageMerkle, err := repo.LookupUpdateSystemImageMerkle()
if err != nil {
return fmt.Errorf("failed to get repo system image merkle: %s", err)
}
upToDate, err := isDeviceUpToDate(ctx, device, *rpcClient, expectedSystemImageMerkle)
if err != nil {
return fmt.Errorf("failed to check if device is up to date: %s", err)
}
if upToDate {
log.Printf("device already up to date")
return nil
}
log.Printf("\n\n")
log.Printf("OTAing to %s", build)
return systemOTA(ctx, device, rpcClient, repo, checkABR)
}
func paveDevice(ctx context.Context, device *device.Client) (*sl4f.Client, error) {
ctx, cancel := context.WithTimeout(ctx, c.paveTimeout)
defer cancel()
outputDir, cleanup, err := c.archiveConfig.OutputDir()
if err != nil {
return nil, err
}
defer cleanup()
downgradePaver, err := c.GetDowngradePaver(ctx, outputDir)
if err != nil {
return nil, fmt.Errorf("error getting downgrade paver: %s", err)
}
downgradeRepo, err := c.GetDowngradeRepository(ctx, outputDir)
if err != nil {
return nil, fmt.Errorf("error etting downgrade repository: %s", err)
}
log.Printf("starting pave")
expectedSystemImageMerkle, err := downgradeRepo.LookupUpdateSystemImageMerkle()
if err != nil {
return nil, fmt.Errorf("error extracting expected system image merkle: %s", err)
}
// Reboot the device into recovery and pave it.
if err = device.RebootToRecovery(ctx); err != nil {
return nil, fmt.Errorf("failed to reboot to recovery: %s", err)
}
if err = downgradePaver.Pave(ctx, c.deviceConfig.DeviceName); err != nil {
return nil, fmt.Errorf("device failed to pave: %s", err)
}
// Wait for the device to come online.
if err = device.WaitForDeviceToBeConnected(ctx); err != nil {
return nil, fmt.Errorf("device failed to connect: %s", err)
}
rpcClient, err := device.StartRpcSession(ctx, downgradeRepo)
if err != nil {
// FIXME(40913): every downgrade builder should at least build
// sl4f as a universe package.
log.Printf("unable to connect to sl4f after pave: %s", err)
//t.Fatalf("unable to connect to sl4f after pave: %s", err)
}
// We always boot into the A partition after a pave.
expectedConfig := sl4f.ConfigurationA
if err := validateDevice(ctx, device, rpcClient, downgradeRepo, expectedSystemImageMerkle, &expectedConfig, true); err != nil {
return nil, err
}
log.Printf("paving successful")
return rpcClient, nil
}
func systemOTA(ctx context.Context, device *device.Client, rpcClient **sl4f.Client, repo *packages.Repository, checkABR bool) error {
expectedSystemImageMerkle, err := repo.LookupUpdateSystemImageMerkle()
if err != nil {
return fmt.Errorf("error extracting expected system image merkle: %s", err)
}
return otaToPackage(
ctx,
device,
rpcClient,
repo,
expectedSystemImageMerkle,
"fuchsia-pkg://fuchsia.com/update",
checkABR,
)
}
func otaToPackage(
ctx context.Context,
device *device.Client,
rpcClient **sl4f.Client,
repo *packages.Repository,
expectedSystemImageMerkle string,
updatePackageUrl string,
checkABR bool,
) error {
expectedConfig, err := determineTargetConfig(ctx, *rpcClient)
if err != nil {
return fmt.Errorf("error determining target config: %s", err)
}
upToDate, err := isDeviceUpToDate(ctx, device, *rpcClient, expectedSystemImageMerkle)
if err != nil {
return fmt.Errorf("failed to check if device is up to date: %s", err)
}
if upToDate {
return fmt.Errorf("device already updated to the expected version %q", expectedSystemImageMerkle)
}
server, err := device.ServePackageRepository(ctx, repo, "upgrade_test")
if err != nil {
return fmt.Errorf("error setting up server: %s", err)
}
defer server.Shutdown(ctx)
// In order to manually trigger the system updater, we need the `run`
// package. Since builds can be configured to not automatically install
// packages, we need to explicitly resolve it.
err = device.Run(ctx, "pkgctl resolve fuchsia-pkg://fuchsia.com/run/0", os.Stdout, os.Stderr)
if err != nil {
return fmt.Errorf("error resolving the run package: %v", err)
}
var wg sync.WaitGroup
device.RegisterDisconnectListener(&wg)
log.Printf("starting system OTA")
cmd := fmt.Sprintf("run \"fuchsia-pkg://fuchsia.com/amber#meta/system_updater.cmx\" --update \"%s\" && sleep 60", updatePackageUrl)
err = device.Run(ctx, cmd, os.Stdout, os.Stderr)
if err != nil {
if _, ok := err.(*ssh.ExitMissingError); !ok {
return fmt.Errorf("failed to run system_updater.cmx: %s", err)
}
}
// Wait until we get a signal that we have disconnected
ch := make(chan struct{})
go func() {
wg.Wait()
ch <- struct{}{}
}()
select {
case <-ch:
case <-ctx.Done():
return fmt.Errorf("device did not disconnect: %s", ctx.Err())
}
if err = device.WaitForDeviceToBeConnected(ctx); err != nil {
return fmt.Errorf("device failed to connect: %s", err)
}
log.Printf("OTA complete, validating device")
// FIXME: See comment in device.TriggerSystemOTA()
if *rpcClient != nil {
(*rpcClient).Close()
*rpcClient = nil
}
*rpcClient, err = device.StartRpcSession(ctx, repo)
if err != nil {
return fmt.Errorf("unable to connect to sl4f after OTA: %s", err)
}
if err := validateDevice(ctx, device, *rpcClient, repo, expectedSystemImageMerkle, expectedConfig, checkABR); err != nil {
return fmt.Errorf("failed to validate after OTA: %s", err)
}
return nil
}
func isDeviceUpToDate(ctx context.Context, device *device.Client, rpcClient *sl4f.Client, expectedSystemImageMerkle string) (bool, error) {
// Get the device's current /system/meta. Error out if it is the same
// version we are about to OTA to.
var remoteSystemImageMerkle string
var err error
if rpcClient == nil {
remoteSystemImageMerkle, err = device.GetSystemImageMerkle(ctx)
} else {
remoteSystemImageMerkle, err = rpcClient.GetSystemImageMerkle(ctx)
}
if err != nil {
return false, err
}
log.Printf("current system image merkle: %q", remoteSystemImageMerkle)
log.Printf("upgrading to system image merkle: %q", expectedSystemImageMerkle)
return expectedSystemImageMerkle == remoteSystemImageMerkle, nil
}
func determineTargetConfig(ctx context.Context, rpcClient *sl4f.Client) (*sl4f.Configuration, error) {
if rpcClient == nil {
log.Printf("sl4f not running, cannot determine current active partition")
return nil, nil
}
activeConfig, err := rpcClient.PaverQueryActiveConfiguration(ctx)
if err == sl4f.ErrNotSupported {
log.Printf("device does not support ABR")
return nil, nil
} else if err != nil {
return nil, err
}
log.Printf("device booted to slot %s", activeConfig)
var targetConfig sl4f.Configuration
if activeConfig == sl4f.ConfigurationA {
targetConfig = sl4f.ConfigurationB
} else {
targetConfig = sl4f.ConfigurationA
}
return &targetConfig, nil
}
func validateDevice(
ctx context.Context,
device *device.Client,
rpcClient *sl4f.Client,
repo *packages.Repository,
expectedSystemImageMerkle string,
expectedConfig *sl4f.Configuration,
checkABR bool,
) error {
// At the this point the system should have been updated to the target
// system version. Confirm the update by fetching the device's current
// /system/meta, and making sure it is the correct version.
upToDate, err := isDeviceUpToDate(ctx, device, rpcClient, expectedSystemImageMerkle)
if err != nil {
return fmt.Errorf("failed to check if device is up to date: %s", err)
}
if !upToDate {
return fmt.Errorf("system version failed to update to %q", expectedSystemImageMerkle)
}
// Make sure the device doesn't have any broken static packages.
// FIXME(40913): every builder should at least build sl4f as a universe package.
if rpcClient == nil {
if err := device.ValidateStaticPackages(ctx); err != nil {
return fmt.Errorf("failed to validate static packages: %s", err)
}
} else {
if err := rpcClient.ValidateStaticPackages(ctx); err != nil {
return fmt.Errorf("failed to validate static packages: %s", err)
}
// Ensure the device is booting from the expected boot slot
activeConfig, err := rpcClient.PaverQueryActiveConfiguration(ctx)
if err == sl4f.ErrNotSupported {
log.Printf("device does not support querying the active configuration")
} else if err != nil {
return fmt.Errorf("unable to determine active boot configuration: %s", err)
}
log.Printf("device booted to slot %s", activeConfig)
if expectedConfig != nil && activeConfig != *expectedConfig {
// FIXME(43336): during the rollout of ABR, the N-1 build might
// not be writing to the inactive partition, so don't
// err out during that phase. This will be removed once
// ABR has rolled through GI.
if checkABR {
log.Printf("expected device to boot from slot %s, got %s (ignoring during ABR rollout)", *expectedConfig, activeConfig)
} else {
return fmt.Errorf("expected device to boot from slot %s, got %s", *expectedConfig, activeConfig)
}
}
}
return nil
}