blob: c50cadb0d348e65e510f920008923bfe3e90fccb [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package main
import (
"bufio"
"context"
"crypto/rand"
"encoding/hex"
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"path/filepath"
"sync"
"time"
"go.fuchsia.dev/fuchsia/tools/bootserver"
"go.fuchsia.dev/fuchsia/tools/botanist"
"go.fuchsia.dev/fuchsia/tools/botanist/constants"
"go.fuchsia.dev/fuchsia/tools/botanist/target"
"go.fuchsia.dev/fuchsia/tools/lib/environment"
"go.fuchsia.dev/fuchsia/tools/lib/flagmisc"
"go.fuchsia.dev/fuchsia/tools/lib/logger"
"go.fuchsia.dev/fuchsia/tools/lib/runner"
"go.fuchsia.dev/fuchsia/tools/lib/syslog"
"go.fuchsia.dev/fuchsia/tools/net/sshutil"
"go.fuchsia.dev/fuchsia/tools/serial"
"github.com/google/subcommands"
"golang.org/x/sync/errgroup"
)
// RunCommand is a Command implementation for booting a device and running a
// given command locally.
type RunCommand struct {
// ConfigFile is the path to the target configurations.
configFile string
// ImageManifest is a path to an image manifest.
imageManifest string
// Netboot tells botanist to netboot (and not to pave).
netboot bool
// ZirconArgs are kernel command-line arguments to pass on boot.
zirconArgs flagmisc.StringsValue
// Timeout is the duration allowed for the command to finish execution.
timeout time.Duration
// SysloggerFile, if nonempty, is the file to where the system's logs will be written.
syslogFile string
// SshKey is the path to a private SSH user key.
sshKey string
// SerialLogFile, if nonempty, is the file where the system's serial logs will be written.
serialLogFile string
// RepoURL specifies the URL of a package repository.
repoURL string
// BlobURL optionally specifies the URL of where a package repository's blobs may be served from.
// Defaults to $repoURL/blobs.
blobURL string
}
func (*RunCommand) Name() string {
return "run"
}
func (*RunCommand) Usage() string {
return `
botanist run [flags...] [command...]
flags:
`
}
func (*RunCommand) Synopsis() string {
return "boots a device and runs a local command"
}
func (r *RunCommand) SetFlags(f *flag.FlagSet) {
f.StringVar(&r.configFile, "config", "", "path to file of device config")
f.StringVar(&r.imageManifest, "images", "", "path to an image manifest")
f.BoolVar(&r.netboot, "netboot", false, "if set, botanist will not pave; but will netboot instead")
f.Var(&r.zirconArgs, "zircon-args", "kernel command-line arguments")
f.DurationVar(&r.timeout, "timeout", 10*time.Minute, "duration allowed for the command to finish execution.")
f.StringVar(&r.syslogFile, "syslog", "", "file to write the systems logs to")
f.StringVar(&r.sshKey, "ssh", "", "file containing a private SSH user key; if not provided, a private key will be generated.")
f.StringVar(&r.serialLogFile, "serial-log", "", "file to write the serial logs to.")
f.StringVar(&r.repoURL, "repo", "", "URL at which to configure a package repository; if the placeholder of \"localhost\" will be resolved and scoped as appropriate")
f.StringVar(&r.blobURL, "blobs", "", "URL at which to serve a package repository's blobs; if the placeholder of \"localhost\" will be resolved and scoped as appropriate")
}
func (r *RunCommand) execute(ctx context.Context, args []string) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
opts := target.Options{
Netboot: r.netboot,
SSHKey: r.sshKey,
}
data, err := ioutil.ReadFile(r.configFile)
if err != nil {
return fmt.Errorf("%s: %v", constants.ReadConfigFileErrorMsg, err)
}
var objs []json.RawMessage
if err := json.Unmarshal(data, &objs); err != nil {
return fmt.Errorf("could not unmarshal config file as a JSON list: %v", err)
}
var targets []target.Target
for _, obj := range objs {
t, err := deriveTarget(ctx, obj, opts)
if err != nil {
return err
}
targets = append(targets, t)
}
if len(targets) == 0 {
return fmt.Errorf("no targets found")
}
// This is the primary target that a command will be run against and that
// logs will be streamed from.
t0 := targets[0]
// Modify the zirconArgs passed to the kernel on boot to enable serial on x64.
// arm64 devices should already be enabling kernel.serial at compile time.
// We need to pass this in to all devices (even those without a serial line)
// to prevent race conditions that only occur when the option isn't present.
// TODO (fxbug.dev/10480): Move this back to being invoked in the if clause.
r.zirconArgs = append(r.zirconArgs, "kernel.serial=legacy")
// Disable usb mass storage to determine if it affects NUC stability.
// TODO(rudymathu): Remove this once stability is achieved.
r.zirconArgs = append(r.zirconArgs, "driver.usb_mass_storage.disable")
eg, ctx := errgroup.WithContext(ctx)
socketPath := os.Getenv(constants.SerialSocketEnvKey)
var conn net.Conn
if socketPath != "" && r.serialLogFile != "" {
// If a serial server was created earlier in the stack, use
// the socket to copy to the serial log file.
serialLog, err := os.Create(r.serialLogFile)
if err != nil {
return err
}
defer serialLog.Close()
conn, err = net.Dial("unix", socketPath)
if err != nil {
return err
}
eg.Go(func() error {
logger.Debugf(ctx, "starting serial collection")
// Copy each line from the serial mux to the log file.
b := bufio.NewReader(conn)
for {
line, err := b.ReadString('\n')
if err != nil {
if !serial.IsErrNetClosing(err) {
return fmt.Errorf("%s: %w", constants.SerialReadErrorMsg, err)
}
return nil
}
if _, err := io.WriteString(serialLog, line); err != nil {
return fmt.Errorf("failed to write line to serial log: %w", err)
}
}
})
} else if t0.Serial() != nil {
// Otherwise, spin up a serial server now.
defer t0.Serial().Close()
sOpts := serial.ServerOptions{}
if r.serialLogFile != "" {
serialLog, err := os.Create(r.serialLogFile)
if err != nil {
return err
}
defer serialLog.Close()
sOpts.AuxiliaryOutput = serialLog
}
s := serial.NewServer(t0.Serial(), sOpts)
socketPath = createSocketPath()
addr := &net.UnixAddr{Name: socketPath, Net: "unix"}
l, err := net.ListenUnix("unix", addr)
if err != nil {
return err
}
eg.Go(func() error {
if err := s.Run(ctx, l); err != nil && ctx.Err() == nil {
return fmt.Errorf("serial server error: %w", err)
}
return nil
})
}
for _, t := range targets {
t := t
eg.Go(func() error {
if err := t.Wait(ctx); err != nil && err != target.ErrUnimplemented && ctx.Err() == nil {
return fmt.Errorf("target %s failed: %w", t.Nodename(), err)
}
return nil
})
}
eg.Go(func() error {
// Signal other goroutines to exit.
defer cancel()
if conn != nil {
defer conn.Close()
}
if err := r.startTargets(ctx, targets, socketPath); err != nil {
return fmt.Errorf("%s: %w", constants.FailedToStartTargetMsg, err)
}
defer func() {
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
defer cancel()
r.stopTargets(ctx, targets)
}()
return r.runAgainstTarget(ctx, t0, args, socketPath)
})
return eg.Wait()
}
func (r *RunCommand) startTargets(ctx context.Context, targets []target.Target, serialSocketPath string) error {
bootMode := bootserver.ModePave
if r.netboot {
bootMode = bootserver.ModeNetboot
}
// We wait until targets have started before running the subcommand against the zeroth one.
eg, ctx := errgroup.WithContext(ctx)
for _, t := range targets {
t := t
eg.Go(func() error {
// TODO(fxbug.dev/47910): Move outside gofunc once we get rid of downloading or ensure that it only happens once.
imgs, closeFunc, err := bootserver.GetImages(ctx, r.imageManifest, bootMode)
if err != nil {
return err
}
defer closeFunc()
return t.Start(ctx, imgs, r.zirconArgs, serialSocketPath)
})
}
return eg.Wait()
}
func (r *RunCommand) stopTargets(ctx context.Context, targets []target.Target) {
// Stop the targets in parallel.
eg, ctx := errgroup.WithContext(ctx)
for _, t := range targets {
t := t
eg.Go(func() error {
return t.Stop(ctx)
})
}
_ = eg.Wait()
}
func (r *RunCommand) runAgainstTarget(ctx context.Context, t target.Target, args []string, socketPath string) error {
subprocessEnv := map[string]string{
constants.NodenameEnvKey: t.Nodename(),
constants.SerialSocketEnvKey: socketPath,
}
// If |netboot| is true, then we assume that fuchsia is not provisioned
// with a netstack; in this case, do not try to establish a connection.
if !r.netboot {
p, err := ioutil.ReadFile(t.SSHKey())
if err != nil {
return err
}
config, err := sshutil.DefaultSSHConfig(p)
if err != nil {
return err
}
sshAddr := net.TCPAddr{
Port: sshutil.SSHPort,
}
if t, ok := t.(target.ConfiguredTarget); ok {
if addr := t.Address(); len(addr) != 0 {
sshAddr.IP = addr
subprocessEnv[constants.DeviceAddrEnvKey] = addr.String()
}
}
if sshAddr.IP == nil {
ipv4Addr, ipv6Addr, err := func() (net.IP, net.IPAddr, error) {
ctx, cancel := context.WithTimeout(ctx, 90*time.Second)
defer cancel()
return botanist.ResolveIP(ctx, t.Nodename())
}()
if err != nil {
return fmt.Errorf("could not resolve IP address of %s: %w", t.Nodename(), err)
}
if ipv4Addr != nil {
sshAddr.IP = ipv4Addr
logger.Infof(ctx, "IPv4 address of %s found: %s", t.Nodename(), ipv4Addr)
subprocessEnv[constants.IPv4AddrEnvKey] = ipv4Addr.String()
if _, ok := subprocessEnv[constants.DeviceAddrEnvKey]; !ok {
subprocessEnv[constants.DeviceAddrEnvKey] = ipv4Addr.String()
}
} else {
logger.Warningf(ctx, "could not resolve IPv4 address of %s", t.Nodename())
}
if ipv6Addr.IP != nil {
sshAddr.IP = ipv6Addr.IP
sshAddr.Zone = ipv6Addr.Zone
logger.Infof(ctx, "IPv6 address of %s found: %s", t.Nodename(), &ipv6Addr)
subprocessEnv[constants.IPv6AddrEnvKey] = ipv6Addr.String()
if _, ok := subprocessEnv[constants.DeviceAddrEnvKey]; !ok {
subprocessEnv[constants.DeviceAddrEnvKey] = ipv6Addr.String()
}
} else {
logger.Warningf(ctx, "could not resolve IPv6 address of %s", t.Nodename())
}
}
if sshAddr.IP == nil {
// Reachable when ResolveIP times out because no error is returned.
// Invoke `threads` over serial if possible to dump process state to logs.
// Invokes the command twice to identify hanging processes.
if conn, err := net.Dial("unix", socketPath); err != nil {
logger.Errorf(ctx, "failed to open serial socket %s to invoke threads: %s", socketPath, err)
} else {
for i := 0; i < 2; i++ {
if _, err := io.WriteString(conn, fmt.Sprintf("\r\nthreads --all-processes\r\n")); err != nil {
logger.Errorf(ctx, "failed to send threads over serial socket: %s", err)
}
time.Sleep(5 * time.Second)
}
}
return fmt.Errorf("%s for %s", constants.FailedToResolveIPErrorMsg, t.Nodename())
}
client, err := sshutil.NewClient(ctx, &sshAddr, config, sshutil.DefaultConnectBackoff())
if err != nil {
return err
}
defer client.Close()
if r.repoURL != "" {
if err := botanist.AddPackageRepository(ctx, client, r.repoURL, r.blobURL); err != nil {
return fmt.Errorf("%s: %w", constants.PackageRepoSetupErrorMsg, err)
}
}
if r.syslogFile != "" {
stopStreaming, err := r.startSyslogStream(ctx, client)
if err != nil {
return err
}
// Stop streaming syslogs after we've finished running the command.
defer stopStreaming()
}
subprocessEnv[constants.SSHKeyEnvKey] = t.SSHKey()
}
// Run the provided command against t0, adding |subprocessEnv| into
// its environment.
environ := os.Environ()
for k, v := range subprocessEnv {
environ = append(environ, fmt.Sprintf("%s=%s", k, v))
}
runner := runner.SubprocessRunner{
Env: environ,
}
ctx, cancel := context.WithTimeout(ctx, r.timeout)
defer cancel()
if err := runner.Run(ctx, args, os.Stdout, os.Stderr); err != nil {
return fmt.Errorf("command %s with timeout %s failed: %w", args, r.timeout, err)
}
return nil
}
// startSyslogStream uses the SSH client to start streaming syslogs from the
// fuchsia target to a file, in a background goroutine. It returns a function
// that cancels the streaming, which should be deferred by the caller.
func (r *RunCommand) startSyslogStream(ctx context.Context, client *sshutil.Client) (stopStreaming func(), err error) {
syslogger := syslog.NewSyslogger(client)
f, err := os.Create(r.syslogFile)
if err != nil {
return nil, err
}
ctx, cancel := context.WithCancel(ctx)
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer f.Close()
defer wg.Done()
syslogger.Stream(ctx, f)
}()
// The caller should call this function when they want to stop streaming syslogs.
return func() {
// Signal syslogger.Stream to stop and wait for it to finish before
// return. This makes sure syslogger.Stream finish necessary clean-up
// (e.g. closing any open SSH sessions) before SSH client is closed.
cancel()
wg.Wait()
}, nil
}
func (r *RunCommand) Execute(ctx context.Context, f *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus {
args := f.Args()
if len(args) == 0 {
return subcommands.ExitUsageError
}
cleanUp, err := environment.Ensure()
if err != nil {
logger.Errorf(ctx, "failed to setup environment: %v", err)
return subcommands.ExitFailure
}
defer cleanUp()
var expandedArgs []string
for _, arg := range args {
expandedArgs = append(expandedArgs, os.ExpandEnv(arg))
}
r.blobURL = os.ExpandEnv(r.blobURL)
r.repoURL = os.ExpandEnv(r.repoURL)
if err := r.execute(ctx, expandedArgs); err != nil {
logger.Errorf(ctx, "%s", err)
return subcommands.ExitFailure
}
return subcommands.ExitSuccess
}
func createSocketPath() string {
// We randomly construct a socket path that is highly improbable to collide with anything.
randBytes := make([]byte, 16)
rand.Read(randBytes)
return filepath.Join(os.TempDir(), "serial"+hex.EncodeToString(randBytes)+".sock")
}
func deriveTarget(ctx context.Context, obj []byte, opts target.Options) (target.Target, error) {
type typed struct {
Type string `json:"type"`
}
var x typed
if err := json.Unmarshal(obj, &x); err != nil {
return nil, fmt.Errorf("object in list has no \"type\" field: %v", err)
}
switch x.Type {
case "aemu":
var cfg target.QEMUConfig
if err := json.Unmarshal(obj, &cfg); err != nil {
return nil, fmt.Errorf("invalid QEMU config found: %v", err)
}
return target.NewAEMUTarget(cfg, opts)
case "qemu":
var cfg target.QEMUConfig
if err := json.Unmarshal(obj, &cfg); err != nil {
return nil, fmt.Errorf("invalid QEMU config found: %v", err)
}
return target.NewQEMUTarget(cfg, opts)
case "device":
var cfg target.DeviceConfig
if err := json.Unmarshal(obj, &cfg); err != nil {
return nil, fmt.Errorf("invalid device config found: %v", err)
}
t, err := target.NewDeviceTarget(ctx, cfg, opts)
return t, err
case "gce":
var cfg target.GCEConfig
if err := json.Unmarshal(obj, &cfg); err != nil {
return nil, fmt.Errorf("invalid GCE config found: %v", err)
}
return target.NewGCETarget(ctx, cfg, opts)
default:
return nil, fmt.Errorf("unknown type found: %q", x.Type)
}
}