blob: 351163f2e4851a263bdba790c5504dacf0c7b681 [file] [log] [blame]
// Copyright 2015 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
// Package vm provides an abstract test machine (VM, physical machine, etc)
// interface for the rest of the system.
// For convenience test machines are subsequently collectively called VMs.
// Package wraps vmimpl package interface with some common functionality
// and higher-level interface.
package vm
import (
"bytes"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"sync/atomic"
"time"
"github.com/google/syzkaller/pkg/mgrconfig"
"github.com/google/syzkaller/pkg/osutil"
"github.com/google/syzkaller/pkg/report"
"github.com/google/syzkaller/pkg/stats"
"github.com/google/syzkaller/sys/targets"
"github.com/google/syzkaller/vm/vmimpl"
// Import all VM implementations, so that users only need to import vm.
_ "github.com/google/syzkaller/vm/adb"
_ "github.com/google/syzkaller/vm/bhyve"
_ "github.com/google/syzkaller/vm/cuttlefish"
_ "github.com/google/syzkaller/vm/gce"
_ "github.com/google/syzkaller/vm/gvisor"
_ "github.com/google/syzkaller/vm/isolated"
_ "github.com/google/syzkaller/vm/proxyapp"
_ "github.com/google/syzkaller/vm/qemu"
_ "github.com/google/syzkaller/vm/starnix"
_ "github.com/google/syzkaller/vm/vmm"
_ "github.com/google/syzkaller/vm/vmware"
)
type Pool struct {
impl vmimpl.Pool
workdir string
template string
timeouts targets.Timeouts
activeCount int32
hostFuzzer bool
statOutputReceived *stats.Val
}
type Instance struct {
pool *Pool
impl vmimpl.Instance
workdir string
index int
onClose func()
}
var (
Shutdown = vmimpl.Shutdown
ErrTimeout = vmimpl.ErrTimeout
_ BootErrorer = vmimpl.BootError{}
_ InfraErrorer = vmimpl.InfraError{}
)
type BootErrorer interface {
BootError() (string, []byte)
}
type InfraErrorer interface {
InfraError() (string, []byte)
}
// vmType splits the VM type from any suffix (separated by ":"). This is mostly
// useful for the "proxyapp" type, where pkg/build needs to specify/handle
// sub-types.
func vmType(fullName string) string {
name, _, _ := strings.Cut(fullName, ":")
return name
}
// AllowsOvercommit returns if the instance type allows overcommit of instances
// (i.e. creation of instances out-of-thin-air). Overcommit is used during image
// and patch testing in syz-ci when it just asks for more than specified in config
// instances. Generally virtual machines (qemu, gce) support overcommit,
// while physical machines (adb, isolated) do not. Strictly speaking, we should
// never use overcommit and use only what's specified in config, because we
// override resource limits specified in config (e.g. can OOM). But it works and
// makes lots of things much simpler.
func AllowsOvercommit(typ string) bool {
return vmimpl.Types[vmType(typ)].Overcommit
}
// UseNetCompression says if it's beneficial to use network compression for this VM type.
// Local VMs (qemu) generally don't benefit from compression, while remote machines may benefit.
func UseNetCompression(typ string) bool {
return vmimpl.Types[vmType(typ)].NetCompression
}
// Create creates a VM pool that can be used to create individual VMs.
func Create(cfg *mgrconfig.Config, debug bool) (*Pool, error) {
typ, ok := vmimpl.Types[vmType(cfg.Type)]
if !ok {
return nil, fmt.Errorf("unknown instance type '%v'", cfg.Type)
}
env := &vmimpl.Env{
Name: cfg.Name,
OS: cfg.TargetOS,
Arch: cfg.TargetVMArch,
Workdir: cfg.Workdir,
Image: cfg.Image,
SSHKey: cfg.SSHKey,
SSHUser: cfg.SSHUser,
Timeouts: cfg.Timeouts,
Debug: debug,
Config: cfg.VM,
KernelSrc: cfg.KernelSrc,
}
impl, err := typ.Ctor(env)
if err != nil {
return nil, err
}
return &Pool{
impl: impl,
workdir: env.Workdir,
template: cfg.WorkdirTemplate,
timeouts: cfg.Timeouts,
hostFuzzer: cfg.SysTarget.HostFuzzer,
statOutputReceived: stats.Create("vm output", "Bytes of VM console output received",
stats.Graph("traffic"), stats.Rate{}, stats.FormatMB),
}, nil
}
func (pool *Pool) Count() int {
return pool.impl.Count()
}
func (pool *Pool) Create(index int) (*Instance, error) {
if index < 0 || index >= pool.Count() {
return nil, fmt.Errorf("invalid VM index %v (count %v)", index, pool.Count())
}
workdir, err := osutil.ProcessTempDir(pool.workdir)
if err != nil {
return nil, fmt.Errorf("failed to create instance temp dir: %w", err)
}
if pool.template != "" {
if err := osutil.CopyDirRecursively(pool.template, filepath.Join(workdir, "template")); err != nil {
return nil, err
}
}
impl, err := pool.impl.Create(workdir, index)
if err != nil {
os.RemoveAll(workdir)
return nil, err
}
atomic.AddInt32(&pool.activeCount, 1)
return &Instance{
pool: pool,
impl: impl,
workdir: workdir,
index: index,
onClose: func() { atomic.AddInt32(&pool.activeCount, -1) },
}, nil
}
// TODO: Integration or end-to-end testing is needed.
//
// https://github.com/google/syzkaller/pull/3269#discussion_r967650801
func (pool *Pool) Close() error {
if pool.activeCount != 0 {
panic("all the instances should be closed before pool.Close()")
}
if closer, ok := pool.impl.(io.Closer); ok {
return closer.Close()
}
return nil
}
func (inst *Instance) Copy(hostSrc string) (string, error) {
return inst.impl.Copy(hostSrc)
}
func (inst *Instance) Forward(port int) (string, error) {
return inst.impl.Forward(port)
}
type ExitCondition int
const (
// The program is allowed to exit after timeout.
ExitTimeout = ExitCondition(1 << iota)
// The program is allowed to exit with no errors.
ExitNormal
// The program is allowed to exit with errors.
ExitError
)
type StopChan <-chan bool
type InjectOutput <-chan []byte
type OutputSize int
// An early notification that the command has finished / VM crashed.
type EarlyFinishCb func()
// Run runs cmd inside of the VM (think of ssh cmd) and monitors command execution
// and the kernel console output. It detects kernel oopses in output, lost connections, hangs, etc.
// Returns command+kernel output and a non-symbolized crash report (nil if no error happens).
// Accepted options:
// - StopChan: stop channel can be used to prematurely stop the command
// - ExitCondition: says which exit modes should be considered as errors/OK
// - OutputSize: how much output to keep/return
func (inst *Instance) Run(timeout time.Duration, reporter *report.Reporter, command string, opts ...any) (
[]byte, *report.Report, error) {
exit := ExitNormal
var stop <-chan bool
var injected <-chan []byte
var finished func()
outputSize := beforeContextDefault
for _, o := range opts {
switch opt := o.(type) {
case ExitCondition:
exit = opt
case StopChan:
stop = opt
case OutputSize:
outputSize = int(opt)
case InjectOutput:
injected = (<-chan []byte)(opt)
case EarlyFinishCb:
finished = opt
default:
panic(fmt.Sprintf("unknown option %#v", opt))
}
}
outc, errc, err := inst.impl.Run(timeout, stop, command)
if err != nil {
return nil, nil, err
}
mon := &monitor{
inst: inst,
outc: outc,
injected: injected,
errc: errc,
finished: finished,
reporter: reporter,
beforeContext: outputSize,
exit: exit,
lastExecuteTime: time.Now(),
}
rep := mon.monitorExecution()
return mon.output, rep, nil
}
func (inst *Instance) Info() ([]byte, error) {
if ii, ok := inst.impl.(vmimpl.Infoer); ok {
return ii.Info()
}
return nil, nil
}
func (inst *Instance) PprofPort() int {
if inst.pool.hostFuzzer {
// In the fuzzing on host mode, fuzzers are always on the same network.
// Don't set up pprof endpoints in this case.
return 0
}
if ii, ok := inst.impl.(vmimpl.PprofPortProvider); ok {
return ii.PprofPort()
}
return vmimpl.PprofPort
}
func (inst *Instance) diagnose(rep *report.Report) ([]byte, bool) {
if rep == nil {
panic("rep is nil")
}
return inst.impl.Diagnose(rep)
}
func (inst *Instance) Close() {
inst.impl.Close()
os.RemoveAll(inst.workdir)
inst.onClose()
}
type monitor struct {
inst *Instance
outc <-chan []byte
injected <-chan []byte
finished func()
errc <-chan error
reporter *report.Reporter
exit ExitCondition
output []byte
beforeContext int
matchPos int
lastExecuteTime time.Time
}
func (mon *monitor) monitorExecution() *report.Report {
ticker := time.NewTicker(tickerPeriod * mon.inst.pool.timeouts.Scale)
defer ticker.Stop()
for {
select {
case err := <-mon.errc:
switch err {
case nil:
// The program has exited without errors,
// but wait for kernel output in case there is some delayed oops.
crash := ""
if mon.exit&ExitNormal == 0 {
crash = lostConnectionCrash
}
return mon.extractError(crash)
case ErrTimeout:
if mon.exit&ExitTimeout == 0 {
return mon.extractError(timeoutCrash)
}
return nil
default:
// Note: connection lost can race with a kernel oops message.
// In such case we want to return the kernel oops.
crash := ""
if mon.exit&ExitError == 0 {
crash = lostConnectionCrash
}
return mon.extractError(crash)
}
case out, ok := <-mon.outc:
if !ok {
mon.outc = nil
continue
}
mon.inst.pool.statOutputReceived.Add(len(out))
if rep := mon.appendOutput(out); rep != nil {
return rep
}
case out := <-mon.injected:
if rep := mon.appendOutput(out); rep != nil {
return rep
}
case <-ticker.C:
// Detect both "no output whatsoever" and "kernel episodically prints
// something to console, but fuzzer is not actually executing programs".
if time.Since(mon.lastExecuteTime) > mon.inst.pool.timeouts.NoOutput {
return mon.extractError(noOutputCrash)
}
case <-Shutdown:
return nil
}
}
}
func (mon *monitor) appendOutput(out []byte) *report.Report {
lastPos := len(mon.output)
mon.output = append(mon.output, out...)
if bytes.Contains(mon.output[lastPos:], executingProgram1) ||
bytes.Contains(mon.output[lastPos:], executingProgram2) {
mon.lastExecuteTime = time.Now()
}
if mon.reporter.ContainsCrash(mon.output[mon.matchPos:]) {
return mon.extractError("unknown error")
}
if len(mon.output) > 2*mon.beforeContext {
copy(mon.output, mon.output[len(mon.output)-mon.beforeContext:])
mon.output = mon.output[:mon.beforeContext]
}
// Find the starting position for crash matching on the next iteration.
// We step back from the end of output by maxErrorLength to handle the case
// when a crash line is currently split/incomplete. And then we try to find
// the preceding '\n' to have a full line. This is required to handle
// the case when a particular pattern is ignored as crash, but a suffix
// of the pattern is detected as crash (e.g. "ODEBUG:" is trimmed to "BUG:").
mon.matchPos = len(mon.output) - maxErrorLength
for i := 0; i < maxErrorLength; i++ {
if mon.matchPos <= 0 || mon.output[mon.matchPos-1] == '\n' {
break
}
mon.matchPos--
}
if mon.matchPos < 0 {
mon.matchPos = 0
}
return nil
}
func (mon *monitor) extractError(defaultError string) *report.Report {
if mon.finished != nil {
// If the caller wanted an early notification, provide it.
mon.finished()
}
diagOutput, diagWait := []byte{}, false
if defaultError != "" {
diagOutput, diagWait = mon.inst.diagnose(mon.createReport(defaultError))
}
// Give it some time to finish writing the error message.
// But don't wait for "no output", we already waited enough.
if defaultError != noOutputCrash || diagWait {
mon.waitForOutput()
}
if bytes.Contains(mon.output, []byte(fuzzerPreemptedStr)) {
return nil
}
if defaultError == "" && mon.reporter.ContainsCrash(mon.output[mon.matchPos:]) {
// We did not call Diagnose above because we thought there is no error, so call it now.
diagOutput, diagWait = mon.inst.diagnose(mon.createReport(defaultError))
if diagWait {
mon.waitForOutput()
}
}
rep := mon.createReport(defaultError)
if rep == nil {
return nil
}
if len(diagOutput) > 0 {
rep.Output = append(rep.Output, vmDiagnosisStart...)
rep.Output = append(rep.Output, diagOutput...)
}
return rep
}
func (mon *monitor) createReport(defaultError string) *report.Report {
rep := mon.reporter.ParseFrom(mon.output, mon.matchPos)
if rep == nil {
if defaultError == "" {
return nil
}
return &report.Report{
Title: defaultError,
Output: mon.output,
Suppressed: report.IsSuppressed(mon.reporter, mon.output),
}
}
start := rep.StartPos - mon.beforeContext
if start < 0 {
start = 0
}
end := rep.EndPos + afterContext
if end > len(rep.Output) {
end = len(rep.Output)
}
rep.Output = rep.Output[start:end]
rep.StartPos -= start
rep.EndPos -= start
return rep
}
func (mon *monitor) waitForOutput() {
timer := time.NewTimer(waitForOutputTimeout * mon.inst.pool.timeouts.Scale)
defer timer.Stop()
for {
select {
case out, ok := <-mon.outc:
if !ok {
return
}
mon.output = append(mon.output, out...)
case <-timer.C:
return
case <-Shutdown:
return
}
}
}
const (
maxErrorLength = 256
lostConnectionCrash = "lost connection to test machine"
noOutputCrash = "no output from test machine"
timeoutCrash = "timed out"
fuzzerPreemptedStr = "SYZ-FUZZER: PREEMPTED"
vmDiagnosisStart = "\nVM DIAGNOSIS:\n"
)
var (
executingProgram1 = []byte("executing program") // syz-fuzzer, syz-runner output
executingProgram2 = []byte("executed programs:") // syz-execprog output
beforeContextDefault = 1024 << 10
afterContext = 128 << 10
tickerPeriod = 10 * time.Second
waitForOutputTimeout = 10 * time.Second
)