blob: 6c4bcb44868c818564c8e3f5b2c997b6e3cd3d16 [file] [log] [blame]
// Copyright 2019 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"bytes"
"context"
"encoding/hex"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"path/filepath"
"strings"
"time"
"go.fuchsia.dev/fuchsia/tools/debug/elflib"
"go.fuchsia.dev/fuchsia/tools/integration/testsharder"
"go.fuchsia.dev/fuchsia/tools/lib/iomisc"
"go.fuchsia.dev/fuchsia/tools/lib/logger"
"go.fuchsia.dev/fuchsia/tools/lib/osmisc"
"go.fuchsia.dev/fuchsia/tools/lib/retry"
"go.fuchsia.dev/fuchsia/tools/lib/subprocess"
"go.fuchsia.dev/fuchsia/tools/net/sshutil"
"go.fuchsia.dev/fuchsia/tools/serial"
"go.fuchsia.dev/fuchsia/tools/testing/runtests"
"go.fuchsia.dev/fuchsia/tools/testing/testrunner/constants"
"golang.org/x/crypto/ssh"
)
const (
// A test output directory within persistent storage.
dataOutputDir = "/data/infra/testrunner"
// TODO(fxb/73171): Fix this path.
// The output data directory for component v2 tests.
dataOutputDirV2 = "/tmp/test_manager:0/children/debug_data:0/data"
// Various tools for running tests.
runtestsName = "runtests"
runTestComponentName = "run-test-component"
runTestSuiteName = "run-test-suite"
componentV2Suffix = ".cm"
// Returned by both run-test-component and run-test-suite to indicate the
// test timed out.
timeoutExitCode = 21
// Printed to the serial console when ready to accept user input.
serialConsoleCursor = "\n$"
llvmProfileEnvKey = "LLVM_PROFILE_FILE"
llvmProfileExtension = ".profraw"
llvmProfileSinkType = "llvm-profile"
)
type timeoutError struct {
timeout time.Duration
}
func (e *timeoutError) Error() string {
return fmt.Sprintf("test killed because timeout reached (%v)", e.timeout)
}
// For testability
type cmdRunner interface {
Run(ctx context.Context, command []string, stdout, stderr io.Writer) error
}
// For testability
var newRunner = func(dir string, env []string) cmdRunner {
return &subprocess.Runner{Dir: dir, Env: env}
}
// For testability
type sshClient interface {
Close()
Reconnect(ctx context.Context) error
Run(ctx context.Context, command []string, stdout, stderr io.Writer) error
}
// For testability
type dataSinkCopier interface {
GetReferences(remoteDir string) (map[string]runtests.DataSinkReference, error)
Copy(sinks []runtests.DataSinkReference, localDir string) (runtests.DataSinkMap, error)
Reconnect() error
Close() error
}
// For testability
type serialClient interface {
runDiagnostics(ctx context.Context) error
}
// subprocessTester executes tests in local subprocesses.
type subprocessTester struct {
env []string
dir string
perTestTimeout time.Duration
localOutputDir string
getModuleBuildIDs func(string) ([]string, error)
}
func getModuleBuildIDs(test string) ([]string, error) {
f, err := os.Open(test)
if err != nil {
return nil, err
}
buildIDs, err := elflib.GetBuildIDs(filepath.Base(test), f)
if err != nil {
return nil, err
}
var asStrings []string
for _, id := range buildIDs {
asStrings = append(asStrings, hex.EncodeToString(id))
}
return asStrings, nil
}
// NewSubprocessTester returns a SubprocessTester that can execute tests
// locally with a given working directory and environment.
func newSubprocessTester(dir string, env []string, localOutputDir string, perTestTimeout time.Duration) *subprocessTester {
return &subprocessTester{
dir: dir,
env: env,
perTestTimeout: perTestTimeout,
localOutputDir: localOutputDir,
getModuleBuildIDs: getModuleBuildIDs,
}
}
func (t *subprocessTester) Test(ctx context.Context, test testsharder.Test, stdout io.Writer, stderr io.Writer, outDir string) (runtests.DataSinkReference, error) {
sinkRef := runtests.DataSinkReference{}
if test.Path == "" {
return sinkRef, fmt.Errorf("test %q has no `path` set", test.Name)
}
// Some tests read testOutDirEnvKey so ensure they get their own output dir.
if err := os.MkdirAll(outDir, 0770); err != nil {
return sinkRef, err
}
// Might as well emit any profiles directly to the output directory.
// TODO(fxbug.dev/61208): until this is resolved, we make the assumption
// that the binaries are statically linked and will only produce one
// profile on execution. Once build IDs are embedded in profiles
// automatically, we can switch to a more flexible scheme where, say,
// we set
// LLVM_PROFILE_FILE=<output dir>/<test-specific namsepace>/%p.profraw
// and then record any .profraw file written to that directory as an
// emitted profile.
profileRel := filepath.Join(llvmProfileSinkType, test.Path+llvmProfileExtension)
profileAbs := filepath.Join(t.localOutputDir, profileRel)
os.MkdirAll(filepath.Dir(profileAbs), os.ModePerm)
r := newRunner(t.dir, append(
t.env,
fmt.Sprintf("%s=%s", testOutDirEnvKey, outDir),
// When host-side tests are instrumented for profiling, executing
// them will write a profile to the location under this environment variable.
fmt.Sprintf("%s=%s", llvmProfileEnvKey, profileAbs),
))
if t.perTestTimeout > 0 {
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, t.perTestTimeout)
defer cancel()
}
err := r.Run(ctx, []string{test.Path}, stdout, stderr)
if err == context.DeadlineExceeded {
err = &timeoutError{t.perTestTimeout}
}
if exists, profileErr := osmisc.FileExists(profileAbs); profileErr != nil {
logger.Errorf(ctx, "unable to determine whether a profile was emitted: %v", profileErr)
} else if exists {
// TODO(fxbug.dev/61208): delete determination of build IDs once
// profiles embed this information.
var buildIDs []string
buildIDs, profileErr = t.getModuleBuildIDs(test.Path)
if profileErr == nil {
sinkRef.Sinks = runtests.DataSinkMap{
llvmProfileSinkType: []runtests.DataSink{
{
Name: filepath.Base(profileRel),
File: profileRel,
BuildIDs: buildIDs,
},
},
}
} else {
logger.Warningf(ctx, "failed to read module build IDs from %q", test.Path)
}
}
return sinkRef, err
}
func (t *subprocessTester) EnsureSinks(ctx context.Context, sinkRefs []runtests.DataSinkReference, _ *testOutputs) error {
// Nothing to actually copy; if any profiles were emitted, they would have
// been written directly to the output directory. We verify here that all
// recorded data sinks are actually present.
numSinks := 0
for _, ref := range sinkRefs {
for _, sinks := range ref.Sinks {
for _, sink := range sinks {
abs := filepath.Join(t.localOutputDir, sink.File)
exists, err := osmisc.FileExists(abs)
if err != nil {
return fmt.Errorf("unable to determine if local data sink %q exists: %v", sink.File, err)
} else if !exists {
return fmt.Errorf("expected a local data sink %q, but no such file exists", sink.File)
}
numSinks++
}
}
}
if numSinks > 0 {
logger.Debugf(ctx, "local data sinks present: %d", numSinks)
}
return nil
}
func (t *subprocessTester) RunSnapshot(_ context.Context, _ string) error {
return nil
}
func (t *subprocessTester) Close() error {
return nil
}
type serialSocket struct {
socketPath string
}
func (s *serialSocket) runDiagnostics(ctx context.Context) error {
if s.socketPath == "" {
return fmt.Errorf("serialSocketPath not set")
}
socket, err := serial.NewSocket(ctx, s.socketPath)
if err != nil {
return fmt.Errorf("newSerialSocket failed: %v", err)
}
defer socket.Close()
return serial.RunDiagnostics(ctx, socket)
}
// fuchsiaSSHTester executes fuchsia tests over an SSH connection.
type fuchsiaSSHTester struct {
client sshClient
copier dataSinkCopier
useRuntests bool
localOutputDir string
perTestTimeout time.Duration
connectionErrorRetryBackoff retry.Backoff
serialSocket serialClient
}
// newFuchsiaSSHTester returns a fuchsiaSSHTester associated to a fuchsia
// instance of given nodename, the private key paired with an authorized one
// and the directive of whether `runtests` should be used to execute the test.
func newFuchsiaSSHTester(ctx context.Context, addr net.IPAddr, sshKeyFile, localOutputDir, serialSocketPath string, useRuntests bool, perTestTimeout time.Duration) (*fuchsiaSSHTester, error) {
key, err := ioutil.ReadFile(sshKeyFile)
if err != nil {
return nil, fmt.Errorf("failed to read SSH key file: %w", err)
}
config, err := sshutil.DefaultSSHConfig(key)
if err != nil {
return nil, fmt.Errorf("failed to create an SSH client config: %w", err)
}
client, err := sshutil.NewClient(
ctx,
sshutil.ConstantAddrResolver{
Addr: &net.TCPAddr{
IP: addr.IP,
Port: sshutil.SSHPort,
Zone: addr.Zone,
},
},
config,
sshutil.DefaultConnectBackoff(),
)
if err != nil {
return nil, fmt.Errorf("failed to establish an SSH connection: %w", err)
}
copier, err := runtests.NewDataSinkCopier(client)
if err != nil {
return nil, err
}
return &fuchsiaSSHTester{
client: client,
copier: copier,
useRuntests: useRuntests,
localOutputDir: localOutputDir,
perTestTimeout: perTestTimeout,
connectionErrorRetryBackoff: retry.NewConstantBackoff(time.Second),
serialSocket: &serialSocket{serialSocketPath},
}, nil
}
func (t *fuchsiaSSHTester) reconnect(ctx context.Context) error {
if err := t.client.Reconnect(ctx); err != nil {
return fmt.Errorf("failed to reestablish SSH connection: %w", err)
}
if err := t.copier.Reconnect(); err != nil {
return fmt.Errorf("failed to reconnect data sink copier: %w", err)
}
return nil
}
func (t *fuchsiaSSHTester) isTimeoutError(test testsharder.Test, err error) bool {
if t.perTestTimeout <= 0 {
return false
}
if exitErr, ok := err.(*ssh.ExitError); ok {
return exitErr.Waitmsg.ExitStatus() == timeoutExitCode
}
return false
}
func (t *fuchsiaSSHTester) runSSHCommandWithRetry(ctx context.Context, command []string, stdout, stderr io.Writer) error {
var cmdErr error
const maxReconnectAttempts = 3
retry.Retry(ctx, retry.WithMaxAttempts(t.connectionErrorRetryBackoff, maxReconnectAttempts), func() error {
cmdErr = t.client.Run(ctx, command, stdout, stderr)
if sshutil.IsConnectionError(cmdErr) {
logger.Errorf(ctx, "attempting to reconnect over SSH after error: %v", cmdErr)
if err := t.reconnect(ctx); err != nil {
logger.Errorf(ctx, "%s: %v", constants.FailedToReconnectMsg, err)
// If we fail to reconnect, continuing is likely hopeless.
return nil
}
// Return non-ConnectionError because code in main.go will exit early if
// it sees that. Since reconnection succeeded, we don't want that.
// TODO(garymm): Clean this up; have main.go do its own connection recovery between tests.
cmdErr = fmt.Errorf("%v", cmdErr)
return cmdErr
}
// Not a connection error -> command passed or failed -> break retry loop.
return nil
}, nil)
return cmdErr
}
// Return this error when the test is skipped.
type TestSkippedError struct{}
func (e *TestSkippedError) Error() string {
return "test skipped"
}
func isTestSkippedErr(err error) bool {
_, ok := err.(*TestSkippedError)
return ok
}
// Test runs a test over SSH.
func (t *fuchsiaSSHTester) Test(ctx context.Context, test testsharder.Test, stdout io.Writer, stderr io.Writer, _ string) (runtests.DataSinkReference, error) {
sinks := runtests.DataSinkReference{}
command, err := commandForTest(&test, t.useRuntests, dataOutputDir, t.perTestTimeout)
if err != nil {
return sinks, err
}
testErr := t.runSSHCommandWithRetry(ctx, command, stdout, stderr)
if sshutil.IsConnectionError(testErr) {
if err := t.serialSocket.runDiagnostics(ctx); err != nil {
logger.Warningf(ctx, "failed to run serial diagnostics: %v", err)
}
return sinks, testErr
}
if t.isTimeoutError(test, testErr) {
testErr = &timeoutError{t.perTestTimeout}
}
var sinkErr error
if t.useRuntests && !strings.HasSuffix(test.PackageURL, componentV2Suffix) {
startTime := time.Now()
var sinksPerTest map[string]runtests.DataSinkReference
if sinksPerTest, sinkErr = t.copier.GetReferences(dataOutputDir); sinkErr != nil {
logger.Errorf(ctx, "failed to determine data sinks for test %q: %v", test.Name, sinkErr)
} else {
sinks = sinksPerTest[test.Name]
}
duration := time.Now().Sub(startTime)
if sinks.Size() > 0 {
logger.Debugf(ctx, "%d data sinks found in %v", sinks.Size(), duration)
}
}
if testErr == nil {
return sinks, sinkErr
}
return sinks, testErr
}
func (t *fuchsiaSSHTester) EnsureSinks(ctx context.Context, sinkRefs []runtests.DataSinkReference, outputs *testOutputs) error {
// Collect v2 references.
v2Sinks, err := t.copier.GetReferences(dataOutputDirV2)
if err != nil {
// If we fail to get v2 sinks, just log the error but continue to copy v1 sinks.
logger.Errorf(ctx, "failed to determine data sinks for v2 tests: %v", err)
}
var v2SinkRefs []runtests.DataSinkReference
for _, ref := range v2Sinks {
v2SinkRefs = append(v2SinkRefs, ref)
}
if len(v2SinkRefs) > 0 {
if err := t.copySinks(ctx, v2SinkRefs, filepath.Join(t.localOutputDir, "v2")); err != nil {
return err
}
outputs.updateDataSinks(v2Sinks, "v2")
}
return t.copySinks(ctx, sinkRefs, t.localOutputDir)
}
func (t *fuchsiaSSHTester) copySinks(ctx context.Context, sinkRefs []runtests.DataSinkReference, localOutputDir string) error {
startTime := time.Now()
sinkMap, err := t.copier.Copy(sinkRefs, localOutputDir)
if err != nil {
return fmt.Errorf("failed to copy data sinks off target: %v", err)
}
copyDuration := time.Now().Sub(startTime)
sinkRef := runtests.DataSinkReference{Sinks: sinkMap}
numSinks := sinkRef.Size()
if numSinks > 0 {
logger.Debugf(ctx, "copied %d data sinks in %v", numSinks, copyDuration)
}
return nil
}
// RunSnapshot runs `snapshot` on the device.
func (t *fuchsiaSSHTester) RunSnapshot(ctx context.Context, snapshotFile string) error {
if snapshotFile == "" {
return nil
}
snapshotOutFile, err := osmisc.CreateFile(filepath.Join(t.localOutputDir, snapshotFile))
if err != nil {
return fmt.Errorf("failed to create snapshot output file: %w", err)
}
defer snapshotOutFile.Close()
startTime := time.Now()
err = t.runSSHCommandWithRetry(ctx, []string{"/bin/snapshot"}, snapshotOutFile, os.Stderr)
if err != nil {
logger.Errorf(ctx, "%s: %v", constants.FailedToRunSnapshotMsg, err)
}
logger.Debugf(ctx, "ran snapshot in %v", time.Now().Sub(startTime))
return err
}
// Close terminates the underlying SSH connection. The object is no longer
// usable after calling this method.
func (t *fuchsiaSSHTester) Close() error {
defer t.client.Close()
return t.copier.Close()
}
// FuchsiaSerialTester executes fuchsia tests over serial.
type fuchsiaSerialTester struct {
socket io.ReadWriteCloser
perTestTimeout time.Duration
localOutputDir string
}
func newFuchsiaSerialTester(ctx context.Context, serialSocketPath string, perTestTimeout time.Duration) (*fuchsiaSerialTester, error) {
socket, err := serial.NewSocket(ctx, serialSocketPath)
if err != nil {
return nil, err
}
return &fuchsiaSerialTester{
socket: socket,
perTestTimeout: perTestTimeout,
}, nil
}
// Exposed for testability.
var newTestStartedContext = func(ctx context.Context) (context.Context, context.CancelFunc) {
return context.WithTimeout(ctx, 5*time.Second)
}
// lastWriteSaver is an io.Writer that saves the bytes written in the last Write().
type lastWriteSaver struct {
buf []byte
}
func (w *lastWriteSaver) Write(p []byte) (int, error) {
w.buf = make([]byte, len(p))
copy(w.buf, p)
return len(p), nil
}
func (t *fuchsiaSerialTester) Test(ctx context.Context, test testsharder.Test, stdout, _ io.Writer, _ string) (runtests.DataSinkReference, error) {
// We don't collect data sinks for serial tests. Just return an empty DataSinkReference.
sinks := runtests.DataSinkReference{}
command, err := commandForTest(&test, true, "", t.perTestTimeout)
if err != nil {
return sinks, err
}
logger.Debugf(ctx, "starting: %v", command)
// If a single read from the socket includes both the bytes that indicate the test started and the bytes
// that indicate the test completed, then the startedReader will consume the bytes needed for detecting
// completion. Thus we save the last read from the socket and replay it when searching for completion.
lastWrite := &lastWriteSaver{}
startedReader := iomisc.NewMatchingReader(io.TeeReader(t.socket, lastWrite), [][]byte{[]byte(runtests.StartedSignature + test.Name)})
for ctx.Err() == nil {
if err := serial.RunCommands(ctx, t.socket, []serial.Command{{command, 0}}); err != nil {
return sinks, fmt.Errorf("failed to write to serial socket: %v", err)
}
startedCtx, cancel := newTestStartedContext(ctx)
_, err := iomisc.ReadUntilMatch(startedCtx, startedReader)
cancel()
if err == nil {
break
}
logger.Warningf(ctx, "test not started after timeout, retrying")
}
if ctx.Err() != nil {
return sinks, ctx.Err()
}
success, err := runtests.TestPassed(ctx, io.TeeReader(
// See comment above lastWrite declaration.
io.MultiReader(bytes.NewReader(lastWrite.buf), t.socket),
// To capture stdout.
stdout),
test.Name)
if err != nil {
return sinks, err
} else if !success {
return sinks, fmt.Errorf("test failed")
}
return sinks, nil
}
func (t *fuchsiaSerialTester) EnsureSinks(_ context.Context, _ []runtests.DataSinkReference, _ *testOutputs) error {
return nil
}
func (t *fuchsiaSerialTester) RunSnapshot(_ context.Context, _ string) error {
return nil
}
// Close terminates the underlying Serial socket connection. The object is no
// longer usable after calling this method.
func (t *fuchsiaSerialTester) Close() error {
return t.socket.Close()
}
func commandForTest(test *testsharder.Test, useRuntests bool, remoteOutputDir string, timeout time.Duration) ([]string, error) {
command := []string{}
// For v2 coverage data, use run-test-suite instead of runtests and collect the data from the designated dataOutputDirV2 directory.
if useRuntests && !strings.HasSuffix(test.PackageURL, componentV2Suffix) {
command = []string{runtestsName}
if remoteOutputDir != "" {
command = append(command, "--output", remoteOutputDir)
}
if timeout > 0 {
command = append(command, "-i", fmt.Sprintf("%d", int64(timeout.Seconds())))
}
if test.RealmLabel != "" {
command = append(command, "--realm-label", test.RealmLabel)
}
if test.PackageURL != "" {
command = append(command, test.PackageURL)
} else {
command = append(command, test.Path)
}
} else if test.PackageURL != "" {
if strings.HasSuffix(test.PackageURL, componentV2Suffix) {
command = []string{runTestSuiteName, "--filter-ansi"}
if test.LogSettings.MaxSeverity != "" {
command = append(command, "--max-severity-logs", fmt.Sprintf("%s", test.LogSettings.MaxSeverity))
}
if test.Parallel != 0 {
command = append(command, "--parallel", fmt.Sprintf("%d", test.Parallel))
}
// TODO(fxbug.dev/49262): Once fixed, combine timeout flag setting for v1 and v2.
if timeout > 0 {
command = append(command, "--timeout", fmt.Sprintf("%d", int64(timeout.Seconds())))
}
} else {
command = []string{runTestComponentName}
if test.LogSettings.MaxSeverity != "" {
command = append(command, fmt.Sprintf("--max-log-severity=%s", test.LogSettings.MaxSeverity))
}
if timeout > 0 {
command = append(command, fmt.Sprintf("--timeout=%d", int64(timeout.Seconds())))
}
// run-test-component supports realm-label but run-test-suite does not
if test.RealmLabel != "" {
command = append(command, "--realm-label", test.RealmLabel)
}
}
command = append(command, test.PackageURL)
} else {
return nil, fmt.Errorf("PackageURL is not set and useRuntests is false for %q", test.Name)
}
return command, nil
}