tools/testing/testrunner/cmd/tester.go - fuchsia - Git at Google

 // Copyright 2019 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package main

 import (
 	"bytes"
 	"context"
 	"encoding/hex"
 	"fmt"
 	"io"
 	"io/ioutil"
 	"net"
 	"os"
 	"path/filepath"
 	"strings"
 	"time"

 	"go.fuchsia.dev/fuchsia/tools/debug/elflib"
 	"go.fuchsia.dev/fuchsia/tools/integration/testsharder"
 	"go.fuchsia.dev/fuchsia/tools/lib/iomisc"
 	"go.fuchsia.dev/fuchsia/tools/lib/logger"
 	"go.fuchsia.dev/fuchsia/tools/lib/osmisc"
 	"go.fuchsia.dev/fuchsia/tools/lib/retry"
 	"go.fuchsia.dev/fuchsia/tools/lib/subprocess"
 	"go.fuchsia.dev/fuchsia/tools/net/sshutil"
 	"go.fuchsia.dev/fuchsia/tools/serial"
 	"go.fuchsia.dev/fuchsia/tools/testing/runtests"
 	"go.fuchsia.dev/fuchsia/tools/testing/testrunner/constants"
 	"golang.org/x/crypto/ssh"
 )

 const (
 	// A test output directory within persistent storage.
 	dataOutputDir = "/data/infra/testrunner"

 	// TODO(fxb/73171): Fix this path.
 	// The output data directory for component v2 tests.
 	dataOutputDirV2 = "/tmp/test_manager:0/children/debug_data:0/data"

 	// Various tools for running tests.
 	runtestsName         = "runtests"
 	runTestComponentName = "run-test-component"
 	runTestSuiteName     = "run-test-suite"

 	componentV2Suffix = ".cm"

 	// Returned by both run-test-component and run-test-suite to indicate the
 	// test timed out.
 	timeoutExitCode = 21

 	// Printed to the serial console when ready to accept user input.
 	serialConsoleCursor = "\n$"

 	llvmProfileEnvKey    = "LLVM_PROFILE_FILE"
 	llvmProfileExtension = ".profraw"
 	llvmProfileSinkType  = "llvm-profile"
 )

 type timeoutError struct {
 	timeout time.Duration
 }

 func (e *timeoutError) Error() string {
 	return fmt.Sprintf("test killed because timeout reached (%v)", e.timeout)
 }

 // For testability
 type cmdRunner interface {
 	Run(ctx context.Context, command []string, stdout, stderr io.Writer) error
 }

 // For testability
 var newRunner = func(dir string, env []string) cmdRunner {
 	return &subprocess.Runner{Dir: dir, Env: env}
 }

 // For testability
 type sshClient interface {
 	Close()
 	Reconnect(ctx context.Context) error
 	Run(ctx context.Context, command []string, stdout, stderr io.Writer) error
 }

 // For testability
 type dataSinkCopier interface {
 	GetReferences(remoteDir string) (map[string]runtests.DataSinkReference, error)
 	Copy(sinks []runtests.DataSinkReference, localDir string) (runtests.DataSinkMap, error)
 	Reconnect() error
 	Close() error
 }

 // For testability
 type serialClient interface {
 	runDiagnostics(ctx context.Context) error
 }

 // subprocessTester executes tests in local subprocesses.
 type subprocessTester struct {
 	env               []string
 	dir               string
 	perTestTimeout    time.Duration
 	localOutputDir    string
 	getModuleBuildIDs func(string) ([]string, error)
 }

 func getModuleBuildIDs(test string) ([]string, error) {
 	f, err := os.Open(test)
 	if err != nil {
 		return nil, err
 	}
 	buildIDs, err := elflib.GetBuildIDs(filepath.Base(test), f)
 	if err != nil {
 		return nil, err
 	}
 	var asStrings []string
 	for _, id := range buildIDs {
 		asStrings = append(asStrings, hex.EncodeToString(id))
 	}
 	return asStrings, nil
 }

 // NewSubprocessTester returns a SubprocessTester that can execute tests
 // locally with a given working directory and environment.
 func newSubprocessTester(dir string, env []string, localOutputDir string, perTestTimeout time.Duration) *subprocessTester {
 	return &subprocessTester{
 		dir:               dir,
 		env:               env,
 		perTestTimeout:    perTestTimeout,
 		localOutputDir:    localOutputDir,
 		getModuleBuildIDs: getModuleBuildIDs,
 	}
 }

 func (t *subprocessTester) Test(ctx context.Context, test testsharder.Test, stdout io.Writer, stderr io.Writer, outDir string) (runtests.DataSinkReference, error) {
 	sinkRef := runtests.DataSinkReference{}
 	if test.Path == "" {
 		return sinkRef, fmt.Errorf("test %q has no `path` set", test.Name)
 	}
 	// Some tests read testOutDirEnvKey so ensure they get their own output dir.
 	if err := os.MkdirAll(outDir, 0770); err != nil {
 		return sinkRef, err
 	}

 	// Might as well emit any profiles directly to the output directory.
 	// TODO(fxbug.dev/61208): until this is resolved, we make the assumption
 	// that the binaries are statically linked and will only produce one
 	// profile on execution. Once build IDs are embedded in profiles
 	// automatically, we can switch to a more flexible scheme where, say,
 	// we set
 	// LLVM_PROFILE_FILE=<output dir>/<test-specific namsepace>/%p.profraw
 	// and then record any .profraw file written to that directory as an
 	// emitted profile.
 	profileRel := filepath.Join(llvmProfileSinkType, test.Path+llvmProfileExtension)
 	profileAbs := filepath.Join(t.localOutputDir, profileRel)
 	os.MkdirAll(filepath.Dir(profileAbs), os.ModePerm)

 	r := newRunner(t.dir, append(
 		t.env,
 		fmt.Sprintf("%s=%s", testOutDirEnvKey, outDir),
 		// When host-side tests are instrumented for profiling, executing
 		// them will write a profile to the location under this environment variable.
 		fmt.Sprintf("%s=%s", llvmProfileEnvKey, profileAbs),
 	))
 	if t.perTestTimeout > 0 {
 		var cancel context.CancelFunc
 		ctx, cancel = context.WithTimeout(ctx, t.perTestTimeout)
 		defer cancel()
 	}
 	err := r.Run(ctx, []string{test.Path}, stdout, stderr)
 	if err == context.DeadlineExceeded {
 		err = &timeoutError{t.perTestTimeout}
 	}

 	if exists, profileErr := osmisc.FileExists(profileAbs); profileErr != nil {
 		logger.Errorf(ctx, "unable to determine whether a profile was emitted: %v", profileErr)
 	} else if exists {
 		// TODO(fxbug.dev/61208): delete determination of build IDs once
 		// profiles embed this information.
 		var buildIDs []string
 		buildIDs, profileErr = t.getModuleBuildIDs(test.Path)
 		if profileErr == nil {
 			sinkRef.Sinks = runtests.DataSinkMap{
 				llvmProfileSinkType: []runtests.DataSink{
 					{
 						Name:     filepath.Base(profileRel),
 						File:     profileRel,
 						BuildIDs: buildIDs,
 					},
 				},
 			}
 		} else {
 			logger.Warningf(ctx, "failed to read module build IDs from %q", test.Path)
 		}
 	}
 	return sinkRef, err
 }

 func (t *subprocessTester) EnsureSinks(ctx context.Context, sinkRefs []runtests.DataSinkReference, _ *testOutputs) error {
 	// Nothing to actually copy; if any profiles were emitted, they would have
 	// been written directly to the output directory. We verify here that all
 	// recorded data sinks are actually present.
 	numSinks := 0
 	for _, ref := range sinkRefs {
 		for _, sinks := range ref.Sinks {
 			for _, sink := range sinks {
 				abs := filepath.Join(t.localOutputDir, sink.File)
 				exists, err := osmisc.FileExists(abs)
 				if err != nil {
 					return fmt.Errorf("unable to determine if local data sink %q exists: %v", sink.File, err)
 				} else if !exists {
 					return fmt.Errorf("expected a local data sink %q, but no such file exists", sink.File)
 				}
 				numSinks++
 			}
 		}
 	}
 	if numSinks > 0 {
 		logger.Debugf(ctx, "local data sinks present: %d", numSinks)
 	}
 	return nil
 }

 func (t *subprocessTester) RunSnapshot(_ context.Context, _ string) error {
 	return nil
 }

 func (t *subprocessTester) Close() error {
 	return nil
 }

 type serialSocket struct {
 	socketPath string
 }

 func (s *serialSocket) runDiagnostics(ctx context.Context) error {
 	if s.socketPath == "" {
 		return fmt.Errorf("serialSocketPath not set")
 	}
 	socket, err := serial.NewSocket(ctx, s.socketPath)
 	if err != nil {
 		return fmt.Errorf("newSerialSocket failed: %v", err)
 	}
 	defer socket.Close()
 	return serial.RunDiagnostics(ctx, socket)
 }

 // fuchsiaSSHTester executes fuchsia tests over an SSH connection.
 type fuchsiaSSHTester struct {
 	client                      sshClient
 	copier                      dataSinkCopier
 	useRuntests                 bool
 	localOutputDir              string
 	perTestTimeout              time.Duration
 	connectionErrorRetryBackoff retry.Backoff
 	serialSocket                serialClient
 }

 // newFuchsiaSSHTester returns a fuchsiaSSHTester associated to a fuchsia
 // instance of given nodename, the private key paired with an authorized one
 // and the directive of whether `runtests` should be used to execute the test.
 func newFuchsiaSSHTester(ctx context.Context, addr net.IPAddr, sshKeyFile, localOutputDir, serialSocketPath string, useRuntests bool, perTestTimeout time.Duration) (*fuchsiaSSHTester, error) {
 	key, err := ioutil.ReadFile(sshKeyFile)
 	if err != nil {
 		return nil, fmt.Errorf("failed to read SSH key file: %w", err)
 	}
 	config, err := sshutil.DefaultSSHConfig(key)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create an SSH client config: %w", err)
 	}

 	client, err := sshutil.NewClient(
 		ctx,
 		sshutil.ConstantAddrResolver{
 			Addr: &net.TCPAddr{
 				IP:   addr.IP,
 				Port: sshutil.SSHPort,
 				Zone: addr.Zone,
 			},
 		},
 		config,
 		sshutil.DefaultConnectBackoff(),
 	)
 	if err != nil {
 		return nil, fmt.Errorf("failed to establish an SSH connection: %w", err)
 	}
 	copier, err := runtests.NewDataSinkCopier(client)
 	if err != nil {
 		return nil, err
 	}
 	return &fuchsiaSSHTester{
 		client:                      client,
 		copier:                      copier,
 		useRuntests:                 useRuntests,
 		localOutputDir:              localOutputDir,
 		perTestTimeout:              perTestTimeout,
 		connectionErrorRetryBackoff: retry.NewConstantBackoff(time.Second),
 		serialSocket:                &serialSocket{serialSocketPath},
 	}, nil
 }

 func (t *fuchsiaSSHTester) reconnect(ctx context.Context) error {
 	if err := t.client.Reconnect(ctx); err != nil {
 		return fmt.Errorf("failed to reestablish SSH connection: %w", err)
 	}
 	if err := t.copier.Reconnect(); err != nil {
 		return fmt.Errorf("failed to reconnect data sink copier: %w", err)
 	}
 	return nil
 }

 func (t *fuchsiaSSHTester) isTimeoutError(test testsharder.Test, err error) bool {
 	if t.perTestTimeout <= 0 {
 		return false
 	}
 	if exitErr, ok := err.(*ssh.ExitError); ok {
 		return exitErr.Waitmsg.ExitStatus() == timeoutExitCode
 	}
 	return false
 }

 func (t *fuchsiaSSHTester) runSSHCommandWithRetry(ctx context.Context, command []string, stdout, stderr io.Writer) error {
 	var cmdErr error
 	const maxReconnectAttempts = 3
 	retry.Retry(ctx, retry.WithMaxAttempts(t.connectionErrorRetryBackoff, maxReconnectAttempts), func() error {
 		cmdErr = t.client.Run(ctx, command, stdout, stderr)
 		if sshutil.IsConnectionError(cmdErr) {
 			logger.Errorf(ctx, "attempting to reconnect over SSH after error: %v", cmdErr)
 			if err := t.reconnect(ctx); err != nil {
 				logger.Errorf(ctx, "%s: %v", constants.FailedToReconnectMsg, err)
 				// If we fail to reconnect, continuing is likely hopeless.
 				return nil
 			}
 			// Return non-ConnectionError because code in main.go will exit early if
 			// it sees that. Since reconnection succeeded, we don't want that.
 			// TODO(garymm): Clean this up; have main.go do its own connection recovery between tests.
 			cmdErr = fmt.Errorf("%v", cmdErr)
 			return cmdErr
 		}
 		// Not a connection error -> command passed or failed -> break retry loop.
 		return nil
 	}, nil)
 	return cmdErr
 }

 // Return this error when the test is skipped.
 type TestSkippedError struct{}

 func (e *TestSkippedError) Error() string {
 	return "test skipped"
 }

 func isTestSkippedErr(err error) bool {
 	_, ok := err.(*TestSkippedError)
 	return ok
 }

 // Test runs a test over SSH.
 func (t *fuchsiaSSHTester) Test(ctx context.Context, test testsharder.Test, stdout io.Writer, stderr io.Writer, _ string) (runtests.DataSinkReference, error) {
 	sinks := runtests.DataSinkReference{}
 	command, err := commandForTest(&test, t.useRuntests, dataOutputDir, t.perTestTimeout)
 	if err != nil {
 		return sinks, err
 	}
 	testErr := t.runSSHCommandWithRetry(ctx, command, stdout, stderr)

 	if sshutil.IsConnectionError(testErr) {
 		if err := t.serialSocket.runDiagnostics(ctx); err != nil {
 			logger.Warningf(ctx, "failed to run serial diagnostics: %v", err)
 		}
 		return sinks, testErr
 	}

 	if t.isTimeoutError(test, testErr) {
 		testErr = &timeoutError{t.perTestTimeout}
 	}

 	var sinkErr error
 	if t.useRuntests && !strings.HasSuffix(test.PackageURL, componentV2Suffix) {
 		startTime := time.Now()
 		var sinksPerTest map[string]runtests.DataSinkReference
 		if sinksPerTest, sinkErr = t.copier.GetReferences(dataOutputDir); sinkErr != nil {
 			logger.Errorf(ctx, "failed to determine data sinks for test %q: %v", test.Name, sinkErr)
 		} else {
 			sinks = sinksPerTest[test.Name]
 		}
 		duration := time.Now().Sub(startTime)
 		if sinks.Size() > 0 {
 			logger.Debugf(ctx, "%d data sinks found in %v", sinks.Size(), duration)
 		}
 	}

 	if testErr == nil {
 		return sinks, sinkErr
 	}
 	return sinks, testErr
 }

 func (t *fuchsiaSSHTester) EnsureSinks(ctx context.Context, sinkRefs []runtests.DataSinkReference, outputs *testOutputs) error {
 	// Collect v2 references.
 	v2Sinks, err := t.copier.GetReferences(dataOutputDirV2)
 	if err != nil {
 		// If we fail to get v2 sinks, just log the error but continue to copy v1 sinks.
 		logger.Errorf(ctx, "failed to determine data sinks for v2 tests: %v", err)
 	}
 	var v2SinkRefs []runtests.DataSinkReference
 	for _, ref := range v2Sinks {
 		v2SinkRefs = append(v2SinkRefs, ref)
 	}
 	if len(v2SinkRefs) > 0 {
 		if err := t.copySinks(ctx, v2SinkRefs, filepath.Join(t.localOutputDir, "v2")); err != nil {
 			return err
 		}
 		outputs.updateDataSinks(v2Sinks, "v2")
 	}
 	return t.copySinks(ctx, sinkRefs, t.localOutputDir)
 }

 func (t *fuchsiaSSHTester) copySinks(ctx context.Context, sinkRefs []runtests.DataSinkReference, localOutputDir string) error {
 	startTime := time.Now()
 	sinkMap, err := t.copier.Copy(sinkRefs, localOutputDir)
 	if err != nil {
 		return fmt.Errorf("failed to copy data sinks off target: %v", err)
 	}
 	copyDuration := time.Now().Sub(startTime)
 	sinkRef := runtests.DataSinkReference{Sinks: sinkMap}
 	numSinks := sinkRef.Size()
 	if numSinks > 0 {
 		logger.Debugf(ctx, "copied %d data sinks in %v", numSinks, copyDuration)
 	}
 	return nil
 }

 // RunSnapshot runs `snapshot` on the device.
 func (t *fuchsiaSSHTester) RunSnapshot(ctx context.Context, snapshotFile string) error {
 	if snapshotFile == "" {
 		return nil
 	}
 	snapshotOutFile, err := osmisc.CreateFile(filepath.Join(t.localOutputDir, snapshotFile))
 	if err != nil {
 		return fmt.Errorf("failed to create snapshot output file: %w", err)
 	}
 	defer snapshotOutFile.Close()
 	startTime := time.Now()
 	err = t.runSSHCommandWithRetry(ctx, []string{"/bin/snapshot"}, snapshotOutFile, os.Stderr)
 	if err != nil {
 		logger.Errorf(ctx, "%s: %v", constants.FailedToRunSnapshotMsg, err)
 	}
 	logger.Debugf(ctx, "ran snapshot in %v", time.Now().Sub(startTime))
 	return err
 }

 // Close terminates the underlying SSH connection. The object is no longer
 // usable after calling this method.
 func (t *fuchsiaSSHTester) Close() error {
 	defer t.client.Close()
 	return t.copier.Close()
 }

 // FuchsiaSerialTester executes fuchsia tests over serial.
 type fuchsiaSerialTester struct {
 	socket         io.ReadWriteCloser
 	perTestTimeout time.Duration
 	localOutputDir string
 }

 func newFuchsiaSerialTester(ctx context.Context, serialSocketPath string, perTestTimeout time.Duration) (*fuchsiaSerialTester, error) {
 	socket, err := serial.NewSocket(ctx, serialSocketPath)
 	if err != nil {
 		return nil, err
 	}

 	return &fuchsiaSerialTester{
 		socket:         socket,
 		perTestTimeout: perTestTimeout,
 	}, nil
 }

 // Exposed for testability.
 var newTestStartedContext = func(ctx context.Context) (context.Context, context.CancelFunc) {
 	return context.WithTimeout(ctx, 5*time.Second)
 }

 // lastWriteSaver is an io.Writer that saves the bytes written in the last Write().
 type lastWriteSaver struct {
 	buf []byte
 }

 func (w *lastWriteSaver) Write(p []byte) (int, error) {
 	w.buf = make([]byte, len(p))
 	copy(w.buf, p)
 	return len(p), nil
 }

 func (t *fuchsiaSerialTester) Test(ctx context.Context, test testsharder.Test, stdout, _ io.Writer, _ string) (runtests.DataSinkReference, error) {
 	// We don't collect data sinks for serial tests. Just return an empty DataSinkReference.
 	sinks := runtests.DataSinkReference{}
 	command, err := commandForTest(&test, true, "", t.perTestTimeout)
 	if err != nil {
 		return sinks, err
 	}
 	logger.Debugf(ctx, "starting: %v", command)

 	// If a single read from the socket includes both the bytes that indicate the test started and the bytes
 	// that indicate the test completed, then the startedReader will consume the bytes needed for detecting
 	// completion. Thus we save the last read from the socket and replay it when searching for completion.
 	lastWrite := &lastWriteSaver{}
 	startedReader := iomisc.NewMatchingReader(io.TeeReader(t.socket, lastWrite), [][]byte{[]byte(runtests.StartedSignature + test.Name)})
 	for ctx.Err() == nil {
 		if err := serial.RunCommands(ctx, t.socket, []serial.Command{{command, 0}}); err != nil {
 			return sinks, fmt.Errorf("failed to write to serial socket: %v", err)
 		}
 		startedCtx, cancel := newTestStartedContext(ctx)
 		_, err := iomisc.ReadUntilMatch(startedCtx, startedReader)
 		cancel()
 		if err == nil {
 			break
 		}
 		logger.Warningf(ctx, "test not started after timeout, retrying")
 	}

 	if ctx.Err() != nil {
 		return sinks, ctx.Err()
 	}

 	success, err := runtests.TestPassed(ctx, io.TeeReader(
 		// See comment above lastWrite declaration.
 		io.MultiReader(bytes.NewReader(lastWrite.buf), t.socket),
 		// To capture stdout.
 		stdout),
 		test.Name)

 	if err != nil {
 		return sinks, err
 	} else if !success {
 		return sinks, fmt.Errorf("test failed")
 	}
 	return sinks, nil
 }

 func (t *fuchsiaSerialTester) EnsureSinks(_ context.Context, _ []runtests.DataSinkReference, _ *testOutputs) error {
 	return nil
 }

 func (t *fuchsiaSerialTester) RunSnapshot(_ context.Context, _ string) error {
 	return nil
 }

 // Close terminates the underlying Serial socket connection. The object is no
 // longer usable after calling this method.
 func (t *fuchsiaSerialTester) Close() error {
 	return t.socket.Close()
 }

 func commandForTest(test *testsharder.Test, useRuntests bool, remoteOutputDir string, timeout time.Duration) ([]string, error) {
 	command := []string{}
 	// For v2 coverage data, use run-test-suite instead of runtests and collect the data from the designated dataOutputDirV2 directory.
 	if useRuntests && !strings.HasSuffix(test.PackageURL, componentV2Suffix) {
 		command = []string{runtestsName}
 		if remoteOutputDir != "" {
 			command = append(command, "--output", remoteOutputDir)
 		}
 		if timeout > 0 {
 			command = append(command, "-i", fmt.Sprintf("%d", int64(timeout.Seconds())))
 		}
 		if test.RealmLabel != "" {
 			command = append(command, "--realm-label", test.RealmLabel)
 		}
 		if test.PackageURL != "" {
 			command = append(command, test.PackageURL)
 		} else {
 			command = append(command, test.Path)
 		}
 	} else if test.PackageURL != "" {
 		if strings.HasSuffix(test.PackageURL, componentV2Suffix) {
 			command = []string{runTestSuiteName, "--filter-ansi"}
 			if test.LogSettings.MaxSeverity != "" {
 				command = append(command, "--max-severity-logs", fmt.Sprintf("%s", test.LogSettings.MaxSeverity))
 			}
 			if test.Parallel != 0 {
 				command = append(command, "--parallel", fmt.Sprintf("%d", test.Parallel))
 			}
 			// TODO(fxbug.dev/49262): Once fixed, combine timeout flag setting for v1 and v2.
 			if timeout > 0 {
 				command = append(command, "--timeout", fmt.Sprintf("%d", int64(timeout.Seconds())))
 			}
 		} else {
 			command = []string{runTestComponentName}
 			if test.LogSettings.MaxSeverity != "" {
 				command = append(command, fmt.Sprintf("--max-log-severity=%s", test.LogSettings.MaxSeverity))
 			}

 			if timeout > 0 {
 				command = append(command, fmt.Sprintf("--timeout=%d", int64(timeout.Seconds())))
 			}

 			// run-test-component supports realm-label but run-test-suite does not
 			if test.RealmLabel != "" {
 				command = append(command, "--realm-label", test.RealmLabel)
 			}
 		}
 		command = append(command, test.PackageURL)
 	} else {
 		return nil, fmt.Errorf("PackageURL is not set and useRuntests is false for %q", test.Name)
 	}
 	return command, nil
 }
	// Copyright 2019 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package main

	import (
	"bytes"
	"context"
	"encoding/hex"
	"fmt"
	"io"
	"io/ioutil"
	"net"
	"os"
	"path/filepath"
	"strings"
	"time"

	"go.fuchsia.dev/fuchsia/tools/debug/elflib"
	"go.fuchsia.dev/fuchsia/tools/integration/testsharder"
	"go.fuchsia.dev/fuchsia/tools/lib/iomisc"
	"go.fuchsia.dev/fuchsia/tools/lib/logger"
	"go.fuchsia.dev/fuchsia/tools/lib/osmisc"
	"go.fuchsia.dev/fuchsia/tools/lib/retry"
	"go.fuchsia.dev/fuchsia/tools/lib/subprocess"
	"go.fuchsia.dev/fuchsia/tools/net/sshutil"
	"go.fuchsia.dev/fuchsia/tools/serial"
	"go.fuchsia.dev/fuchsia/tools/testing/runtests"
	"go.fuchsia.dev/fuchsia/tools/testing/testrunner/constants"
	"golang.org/x/crypto/ssh"
	)

	const (
	// A test output directory within persistent storage.
	dataOutputDir = "/data/infra/testrunner"

	// TODO(fxb/73171): Fix this path.
	// The output data directory for component v2 tests.
	dataOutputDirV2 = "/tmp/test_manager:0/children/debug_data:0/data"

	// Various tools for running tests.
	runtestsName = "runtests"
	runTestComponentName = "run-test-component"
	runTestSuiteName = "run-test-suite"

	componentV2Suffix = ".cm"

	// Returned by both run-test-component and run-test-suite to indicate the
	// test timed out.
	timeoutExitCode = 21

	// Printed to the serial console when ready to accept user input.
	serialConsoleCursor = "\n$"

	llvmProfileEnvKey = "LLVM_PROFILE_FILE"
	llvmProfileExtension = ".profraw"
	llvmProfileSinkType = "llvm-profile"
	)

	type timeoutError struct {
	timeout time.Duration
	}

	func (e *timeoutError) Error() string {
	return fmt.Sprintf("test killed because timeout reached (%v)", e.timeout)
	}

	// For testability
	type cmdRunner interface {
	Run(ctx context.Context, command []string, stdout, stderr io.Writer) error
	}

	// For testability
	var newRunner = func(dir string, env []string) cmdRunner {
	return &subprocess.Runner{Dir: dir, Env: env}
	}

	// For testability
	type sshClient interface {
	Close()
	Reconnect(ctx context.Context) error
	Run(ctx context.Context, command []string, stdout, stderr io.Writer) error
	}

	// For testability
	type dataSinkCopier interface {
	GetReferences(remoteDir string) (map[string]runtests.DataSinkReference, error)
	Copy(sinks []runtests.DataSinkReference, localDir string) (runtests.DataSinkMap, error)
	Reconnect() error
	Close() error
	}

	// For testability
	type serialClient interface {
	runDiagnostics(ctx context.Context) error
	}

	// subprocessTester executes tests in local subprocesses.
	type subprocessTester struct {
	env []string
	dir string
	perTestTimeout time.Duration
	localOutputDir string
	getModuleBuildIDs func(string) ([]string, error)
	}

	func getModuleBuildIDs(test string) ([]string, error) {
	f, err := os.Open(test)
	if err != nil {
	return nil, err
	}
	buildIDs, err := elflib.GetBuildIDs(filepath.Base(test), f)
	if err != nil {
	return nil, err
	}
	var asStrings []string
	for _, id := range buildIDs {
	asStrings = append(asStrings, hex.EncodeToString(id))
	}
	return asStrings, nil
	}

	// NewSubprocessTester returns a SubprocessTester that can execute tests
	// locally with a given working directory and environment.
	func newSubprocessTester(dir string, env []string, localOutputDir string, perTestTimeout time.Duration) *subprocessTester {
	return &subprocessTester{
	dir: dir,
	env: env,
	perTestTimeout: perTestTimeout,
	localOutputDir: localOutputDir,
	getModuleBuildIDs: getModuleBuildIDs,
	}
	}

	func (t *subprocessTester) Test(ctx context.Context, test testsharder.Test, stdout io.Writer, stderr io.Writer, outDir string) (runtests.DataSinkReference, error) {
	sinkRef := runtests.DataSinkReference{}
	if test.Path == "" {
	return sinkRef, fmt.Errorf("test %q has no `path` set", test.Name)
	}
	// Some tests read testOutDirEnvKey so ensure they get their own output dir.
	if err := os.MkdirAll(outDir, 0770); err != nil {
	return sinkRef, err
	}

	// Might as well emit any profiles directly to the output directory.
	// TODO(fxbug.dev/61208): until this is resolved, we make the assumption
	// that the binaries are statically linked and will only produce one
	// profile on execution. Once build IDs are embedded in profiles
	// automatically, we can switch to a more flexible scheme where, say,
	// we set
	// LLVM_PROFILE_FILE=<output dir>/<test-specific namsepace>/%p.profraw
	// and then record any .profraw file written to that directory as an
	// emitted profile.
	profileRel := filepath.Join(llvmProfileSinkType, test.Path+llvmProfileExtension)
	profileAbs := filepath.Join(t.localOutputDir, profileRel)
	os.MkdirAll(filepath.Dir(profileAbs), os.ModePerm)

	r := newRunner(t.dir, append(
	t.env,
	fmt.Sprintf("%s=%s", testOutDirEnvKey, outDir),
	// When host-side tests are instrumented for profiling, executing
	// them will write a profile to the location under this environment variable.
	fmt.Sprintf("%s=%s", llvmProfileEnvKey, profileAbs),
	))
	if t.perTestTimeout > 0 {
	var cancel context.CancelFunc
	ctx, cancel = context.WithTimeout(ctx, t.perTestTimeout)
	defer cancel()
	}
	err := r.Run(ctx, []string{test.Path}, stdout, stderr)
	if err == context.DeadlineExceeded {
	err = &timeoutError{t.perTestTimeout}
	}

	if exists, profileErr := osmisc.FileExists(profileAbs); profileErr != nil {
	logger.Errorf(ctx, "unable to determine whether a profile was emitted: %v", profileErr)
	} else if exists {
	// TODO(fxbug.dev/61208): delete determination of build IDs once
	// profiles embed this information.
	var buildIDs []string
	buildIDs, profileErr = t.getModuleBuildIDs(test.Path)
	if profileErr == nil {
	sinkRef.Sinks = runtests.DataSinkMap{
	llvmProfileSinkType: []runtests.DataSink{
	{
	Name: filepath.Base(profileRel),
	File: profileRel,
	BuildIDs: buildIDs,
	},
	},
	}
	} else {
	logger.Warningf(ctx, "failed to read module build IDs from %q", test.Path)
	}
	}
	return sinkRef, err
	}

	func (t subprocessTester) EnsureSinks(ctx context.Context, sinkRefs []runtests.DataSinkReference, _ testOutputs) error {
	// Nothing to actually copy; if any profiles were emitted, they would have
	// been written directly to the output directory. We verify here that all
	// recorded data sinks are actually present.
	numSinks := 0
	for _, ref := range sinkRefs {
	for _, sinks := range ref.Sinks {
	for _, sink := range sinks {
	abs := filepath.Join(t.localOutputDir, sink.File)
	exists, err := osmisc.FileExists(abs)
	if err != nil {
	return fmt.Errorf("unable to determine if local data sink %q exists: %v", sink.File, err)
	} else if !exists {
	return fmt.Errorf("expected a local data sink %q, but no such file exists", sink.File)
	}
	numSinks++
	}
	}
	}
	if numSinks > 0 {
	logger.Debugf(ctx, "local data sinks present: %d", numSinks)
	}
	return nil
	}

	func (t *subprocessTester) RunSnapshot(_ context.Context, _ string) error {
	return nil
	}

	func (t *subprocessTester) Close() error {
	return nil
	}

	type serialSocket struct {
	socketPath string
	}

	func (s *serialSocket) runDiagnostics(ctx context.Context) error {
	if s.socketPath == "" {
	return fmt.Errorf("serialSocketPath not set")
	}
	socket, err := serial.NewSocket(ctx, s.socketPath)
	if err != nil {
	return fmt.Errorf("newSerialSocket failed: %v", err)
	}
	defer socket.Close()
	return serial.RunDiagnostics(ctx, socket)
	}

	// fuchsiaSSHTester executes fuchsia tests over an SSH connection.
	type fuchsiaSSHTester struct {
	client sshClient
	copier dataSinkCopier
	useRuntests bool
	localOutputDir string
	perTestTimeout time.Duration
	connectionErrorRetryBackoff retry.Backoff
	serialSocket serialClient
	}

	// newFuchsiaSSHTester returns a fuchsiaSSHTester associated to a fuchsia
	// instance of given nodename, the private key paired with an authorized one
	// and the directive of whether `runtests` should be used to execute the test.
	func newFuchsiaSSHTester(ctx context.Context, addr net.IPAddr, sshKeyFile, localOutputDir, serialSocketPath string, useRuntests bool, perTestTimeout time.Duration) (*fuchsiaSSHTester, error) {
	key, err := ioutil.ReadFile(sshKeyFile)
	if err != nil {
	return nil, fmt.Errorf("failed to read SSH key file: %w", err)
	}
	config, err := sshutil.DefaultSSHConfig(key)
	if err != nil {
	return nil, fmt.Errorf("failed to create an SSH client config: %w", err)
	}

	client, err := sshutil.NewClient(
	ctx,
	sshutil.ConstantAddrResolver{
	Addr: &net.TCPAddr{
	IP: addr.IP,
	Port: sshutil.SSHPort,
	Zone: addr.Zone,
	},
	},
	config,
	sshutil.DefaultConnectBackoff(),
	)
	if err != nil {
	return nil, fmt.Errorf("failed to establish an SSH connection: %w", err)
	}
	copier, err := runtests.NewDataSinkCopier(client)
	if err != nil {
	return nil, err
	}
	return &fuchsiaSSHTester{
	client: client,
	copier: copier,
	useRuntests: useRuntests,
	localOutputDir: localOutputDir,
	perTestTimeout: perTestTimeout,
	connectionErrorRetryBackoff: retry.NewConstantBackoff(time.Second),
	serialSocket: &serialSocket{serialSocketPath},
	}, nil
	}

	func (t *fuchsiaSSHTester) reconnect(ctx context.Context) error {
	if err := t.client.Reconnect(ctx); err != nil {
	return fmt.Errorf("failed to reestablish SSH connection: %w", err)
	}
	if err := t.copier.Reconnect(); err != nil {
	return fmt.Errorf("failed to reconnect data sink copier: %w", err)
	}
	return nil
	}

	func (t *fuchsiaSSHTester) isTimeoutError(test testsharder.Test, err error) bool {
	if t.perTestTimeout <= 0 {
	return false
	}
	if exitErr, ok := err.(*ssh.ExitError); ok {
	return exitErr.Waitmsg.ExitStatus() == timeoutExitCode
	}
	return false
	}

	func (t *fuchsiaSSHTester) runSSHCommandWithRetry(ctx context.Context, command []string, stdout, stderr io.Writer) error {
	var cmdErr error
	const maxReconnectAttempts = 3
	retry.Retry(ctx, retry.WithMaxAttempts(t.connectionErrorRetryBackoff, maxReconnectAttempts), func() error {
	cmdErr = t.client.Run(ctx, command, stdout, stderr)
	if sshutil.IsConnectionError(cmdErr) {
	logger.Errorf(ctx, "attempting to reconnect over SSH after error: %v", cmdErr)
	if err := t.reconnect(ctx); err != nil {
	logger.Errorf(ctx, "%s: %v", constants.FailedToReconnectMsg, err)
	// If we fail to reconnect, continuing is likely hopeless.
	return nil
	}
	// Return non-ConnectionError because code in main.go will exit early if
	// it sees that. Since reconnection succeeded, we don't want that.
	// TODO(garymm): Clean this up; have main.go do its own connection recovery between tests.
	cmdErr = fmt.Errorf("%v", cmdErr)
	return cmdErr
	}
	// Not a connection error -> command passed or failed -> break retry loop.
	return nil
	}, nil)
	return cmdErr
	}

	// Return this error when the test is skipped.
	type TestSkippedError struct{}

	func (e *TestSkippedError) Error() string {
	return "test skipped"
	}

	func isTestSkippedErr(err error) bool {
	_, ok := err.(*TestSkippedError)
	return ok
	}

	// Test runs a test over SSH.
	func (t *fuchsiaSSHTester) Test(ctx context.Context, test testsharder.Test, stdout io.Writer, stderr io.Writer, _ string) (runtests.DataSinkReference, error) {
	sinks := runtests.DataSinkReference{}
	command, err := commandForTest(&test, t.useRuntests, dataOutputDir, t.perTestTimeout)
	if err != nil {
	return sinks, err
	}
	testErr := t.runSSHCommandWithRetry(ctx, command, stdout, stderr)

	if sshutil.IsConnectionError(testErr) {
	if err := t.serialSocket.runDiagnostics(ctx); err != nil {
	logger.Warningf(ctx, "failed to run serial diagnostics: %v", err)
	}
	return sinks, testErr
	}

	if t.isTimeoutError(test, testErr) {
	testErr = &timeoutError{t.perTestTimeout}
	}

	var sinkErr error
	if t.useRuntests && !strings.HasSuffix(test.PackageURL, componentV2Suffix) {
	startTime := time.Now()
	var sinksPerTest map[string]runtests.DataSinkReference
	if sinksPerTest, sinkErr = t.copier.GetReferences(dataOutputDir); sinkErr != nil {
	logger.Errorf(ctx, "failed to determine data sinks for test %q: %v", test.Name, sinkErr)
	} else {
	sinks = sinksPerTest[test.Name]
	}
	duration := time.Now().Sub(startTime)
	if sinks.Size() > 0 {
	logger.Debugf(ctx, "%d data sinks found in %v", sinks.Size(), duration)
	}
	}

	if testErr == nil {
	return sinks, sinkErr
	}
	return sinks, testErr
	}

	func (t fuchsiaSSHTester) EnsureSinks(ctx context.Context, sinkRefs []runtests.DataSinkReference, outputs testOutputs) error {
	// Collect v2 references.
	v2Sinks, err := t.copier.GetReferences(dataOutputDirV2)
	if err != nil {
	// If we fail to get v2 sinks, just log the error but continue to copy v1 sinks.
	logger.Errorf(ctx, "failed to determine data sinks for v2 tests: %v", err)
	}
	var v2SinkRefs []runtests.DataSinkReference
	for _, ref := range v2Sinks {
	v2SinkRefs = append(v2SinkRefs, ref)
	}
	if len(v2SinkRefs) > 0 {
	if err := t.copySinks(ctx, v2SinkRefs, filepath.Join(t.localOutputDir, "v2")); err != nil {
	return err
	}
	outputs.updateDataSinks(v2Sinks, "v2")
	}
	return t.copySinks(ctx, sinkRefs, t.localOutputDir)
	}

	func (t *fuchsiaSSHTester) copySinks(ctx context.Context, sinkRefs []runtests.DataSinkReference, localOutputDir string) error {
	startTime := time.Now()
	sinkMap, err := t.copier.Copy(sinkRefs, localOutputDir)
	if err != nil {
	return fmt.Errorf("failed to copy data sinks off target: %v", err)
	}
	copyDuration := time.Now().Sub(startTime)
	sinkRef := runtests.DataSinkReference{Sinks: sinkMap}
	numSinks := sinkRef.Size()
	if numSinks > 0 {
	logger.Debugf(ctx, "copied %d data sinks in %v", numSinks, copyDuration)
	}
	return nil
	}

	// RunSnapshot runs `snapshot` on the device.
	func (t *fuchsiaSSHTester) RunSnapshot(ctx context.Context, snapshotFile string) error {
	if snapshotFile == "" {
	return nil
	}
	snapshotOutFile, err := osmisc.CreateFile(filepath.Join(t.localOutputDir, snapshotFile))
	if err != nil {
	return fmt.Errorf("failed to create snapshot output file: %w", err)
	}
	defer snapshotOutFile.Close()
	startTime := time.Now()
	err = t.runSSHCommandWithRetry(ctx, []string{"/bin/snapshot"}, snapshotOutFile, os.Stderr)
	if err != nil {
	logger.Errorf(ctx, "%s: %v", constants.FailedToRunSnapshotMsg, err)
	}
	logger.Debugf(ctx, "ran snapshot in %v", time.Now().Sub(startTime))
	return err
	}

	// Close terminates the underlying SSH connection. The object is no longer
	// usable after calling this method.
	func (t *fuchsiaSSHTester) Close() error {
	defer t.client.Close()
	return t.copier.Close()
	}

	// FuchsiaSerialTester executes fuchsia tests over serial.
	type fuchsiaSerialTester struct {
	socket io.ReadWriteCloser
	perTestTimeout time.Duration
	localOutputDir string
	}

	func newFuchsiaSerialTester(ctx context.Context, serialSocketPath string, perTestTimeout time.Duration) (*fuchsiaSerialTester, error) {
	socket, err := serial.NewSocket(ctx, serialSocketPath)
	if err != nil {
	return nil, err
	}

	return &fuchsiaSerialTester{
	socket: socket,
	perTestTimeout: perTestTimeout,
	}, nil
	}

	// Exposed for testability.
	var newTestStartedContext = func(ctx context.Context) (context.Context, context.CancelFunc) {
	return context.WithTimeout(ctx, 5*time.Second)
	}

	// lastWriteSaver is an io.Writer that saves the bytes written in the last Write().
	type lastWriteSaver struct {
	buf []byte
	}

	func (w *lastWriteSaver) Write(p []byte) (int, error) {
	w.buf = make([]byte, len(p))
	copy(w.buf, p)
	return len(p), nil
	}

	func (t *fuchsiaSerialTester) Test(ctx context.Context, test testsharder.Test, stdout, _ io.Writer, _ string) (runtests.DataSinkReference, error) {
	// We don't collect data sinks for serial tests. Just return an empty DataSinkReference.
	sinks := runtests.DataSinkReference{}
	command, err := commandForTest(&test, true, "", t.perTestTimeout)
	if err != nil {
	return sinks, err
	}
	logger.Debugf(ctx, "starting: %v", command)

	// If a single read from the socket includes both the bytes that indicate the test started and the bytes
	// that indicate the test completed, then the startedReader will consume the bytes needed for detecting
	// completion. Thus we save the last read from the socket and replay it when searching for completion.
	lastWrite := &lastWriteSaver{}
	startedReader := iomisc.NewMatchingReader(io.TeeReader(t.socket, lastWrite), [][]byte{[]byte(runtests.StartedSignature + test.Name)})
	for ctx.Err() == nil {
	if err := serial.RunCommands(ctx, t.socket, []serial.Command{{command, 0}}); err != nil {
	return sinks, fmt.Errorf("failed to write to serial socket: %v", err)
	}
	startedCtx, cancel := newTestStartedContext(ctx)
	_, err := iomisc.ReadUntilMatch(startedCtx, startedReader)
	cancel()
	if err == nil {
	break
	}
	logger.Warningf(ctx, "test not started after timeout, retrying")
	}

	if ctx.Err() != nil {
	return sinks, ctx.Err()
	}

	success, err := runtests.TestPassed(ctx, io.TeeReader(
	// See comment above lastWrite declaration.
	io.MultiReader(bytes.NewReader(lastWrite.buf), t.socket),
	// To capture stdout.
	stdout),
	test.Name)

	if err != nil {
	return sinks, err
	} else if !success {
	return sinks, fmt.Errorf("test failed")
	}
	return sinks, nil
	}

	func (t fuchsiaSerialTester) EnsureSinks(_ context.Context, _ []runtests.DataSinkReference, _ testOutputs) error {
	return nil
	}

	func (t *fuchsiaSerialTester) RunSnapshot(_ context.Context, _ string) error {
	return nil
	}

	// Close terminates the underlying Serial socket connection. The object is no
	// longer usable after calling this method.
	func (t *fuchsiaSerialTester) Close() error {
	return t.socket.Close()
	}

	func commandForTest(test *testsharder.Test, useRuntests bool, remoteOutputDir string, timeout time.Duration) ([]string, error) {
	command := []string{}
	// For v2 coverage data, use run-test-suite instead of runtests and collect the data from the designated dataOutputDirV2 directory.
	if useRuntests && !strings.HasSuffix(test.PackageURL, componentV2Suffix) {
	command = []string{runtestsName}
	if remoteOutputDir != "" {
	command = append(command, "--output", remoteOutputDir)
	}
	if timeout > 0 {
	command = append(command, "-i", fmt.Sprintf("%d", int64(timeout.Seconds())))
	}
	if test.RealmLabel != "" {
	command = append(command, "--realm-label", test.RealmLabel)
	}
	if test.PackageURL != "" {
	command = append(command, test.PackageURL)
	} else {
	command = append(command, test.Path)
	}
	} else if test.PackageURL != "" {
	if strings.HasSuffix(test.PackageURL, componentV2Suffix) {
	command = []string{runTestSuiteName, "--filter-ansi"}
	if test.LogSettings.MaxSeverity != "" {
	command = append(command, "--max-severity-logs", fmt.Sprintf("%s", test.LogSettings.MaxSeverity))
	}
	if test.Parallel != 0 {
	command = append(command, "--parallel", fmt.Sprintf("%d", test.Parallel))
	}
	// TODO(fxbug.dev/49262): Once fixed, combine timeout flag setting for v1 and v2.
	if timeout > 0 {
	command = append(command, "--timeout", fmt.Sprintf("%d", int64(timeout.Seconds())))
	}
	} else {
	command = []string{runTestComponentName}
	if test.LogSettings.MaxSeverity != "" {
	command = append(command, fmt.Sprintf("--max-log-severity=%s", test.LogSettings.MaxSeverity))
	}

	if timeout > 0 {
	command = append(command, fmt.Sprintf("--timeout=%d", int64(timeout.Seconds())))
	}

	// run-test-component supports realm-label but run-test-suite does not
	if test.RealmLabel != "" {
	command = append(command, "--realm-label", test.RealmLabel)
	}
	}
	command = append(command, test.PackageURL)
	} else {
	return nil, fmt.Errorf("PackageURL is not set and useRuntests is false for %q", test.Name)
	}
	return command, nil
	}