// Copyright 2019 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package main

import (
	"context"
	"encoding/json"
	"flag"
	"fmt"
	"log"
	"net"
	"os"
	"os/exec"
	"time"

	devicePkg "go.fuchsia.dev/infra/devices"
	"go.fuchsia.dev/tools/netboot"
)

const usage = `usage: health_checker [options]

Checks the health of the attached device by checking to see if it can
discover and ping the device's netsvc address. A healthy device should be
running in Zedboot.
`

// Command line flag values
var (
	timeout           time.Duration
	configFile        string
	rebootIfUnhealthy bool
)

const (
	healthyState   = "healthy"
	unhealthyState = "unhealthy"
	logFile        = "/tmp/health_checker.log"
	zedbootWaitDuration    = 1 * time.Minute
	zedbootCheckInterval   = 10 * time.Second
)

// DeviceHealthProperties contains health properties of a hardware device.
type HealthCheckResult struct {
	// Nodename is the hostname of the device that we want to boot on.
	Nodename string `json:"nodename"`

	// State is the health status of the device (either "healthy" or "unhealthy").
	State string `json:"state"`

	// ErrorMsg is the error message provided by the health check.
	ErrorMsg string `json:"error_msg"`
}

func pingZedboot(n *netboot.Client, nodename string) error {
	netsvcAddr, err := n.Discover(nodename, false)
	if err != nil {
		return fmt.Errorf("Failed to discover netsvc addr: %v.", err)
	}
	netsvcIpAddr := &net.IPAddr{IP: netsvcAddr.IP, Zone: netsvcAddr.Zone}
	cmd := exec.Command("ping", "-6", netsvcIpAddr.String(), "-c", "1")
	if _, err = cmd.Output(); err != nil {
		return fmt.Errorf("Failed to ping netsvc addr %s: %v.", netsvcIpAddr, err)
	}
	return nil
}

func ensureNotFuchsia(n *netboot.Client, nodename string) error {
	fuchsiaAddr, err := n.Discover(nodename, true)
	if err != nil {
		return fmt.Errorf("Failed to discover fuchsia addr: %v.", err)
	}
	fuchsiaIpAddr := &net.IPAddr{IP: fuchsiaAddr.IP, Zone: fuchsiaAddr.Zone}
	cmd := exec.Command("ping", "-6", fuchsiaIpAddr.String(), "-c", "1")
	if _, err = cmd.Output(); err == nil {
		return fmt.Errorf("Device is in Fuchsia, should be in Zedboot.")
	}
	return nil
}

func checkHealth(n *netboot.Client, nodename string) HealthCheckResult {
	log.Printf("Checking health for %s", nodename)
	if err := pingZedboot(n, nodename); err != nil {
		return HealthCheckResult{nodename, unhealthyState, err.Error()}
	}
	if err := ensureNotFuchsia(n, nodename); err != nil {
		return HealthCheckResult{nodename, unhealthyState, err.Error()}
	}
	return HealthCheckResult{nodename, healthyState, ""}
}

func printHealthCheckResults(checkResults []HealthCheckResult) error {
	output, err := json.Marshal(checkResults)
	if err != nil {
		return err
	}
	fmt.Println(string(output))
	return nil
}

func init() {
	flag.Usage = func() {
		fmt.Fprint(os.Stderr, usage)
		flag.PrintDefaults()
	}

	// First set the flags ...
	flag.StringVar(&configFile, "config", "/etc/botanist/config.json",
		"The path of the json config file that contains the nodename of the device. Format is defined in https://go.fuchsia.dev/tools/+/master/botanist/common.go")
	flag.DurationVar(&timeout, "timeout", 10*time.Second,
		"The timeout for checking each device. The format should be a value acceptable to time.ParseDuration.")
	flag.BoolVar(&rebootIfUnhealthy, "reboot", false, "If true, attempt to reboot the device if unhealthy.")
}

func attemptReboot(ctx context.Context, n *netboot.Client, device *devicePkg.DeviceTarget) error {
	log.Printf("Attempting reboot for %s", device.Nodename())

	// Attempt to restart the device via serial/SSH.
	if err := device.Restart(ctx); err != nil {
		return err
	}

	// Wait for Zedboot to come back up.
	log.Printf("Waiting for Zedboot to come back up.")
	start := time.Now()
	for ; time.Since(start) < zedbootWaitDuration;  {
		time.Sleep(zedbootCheckInterval)
		if err := pingZedboot(n, device.Nodename()); err == nil {
			return nil
		}
	}

	// If we get here, Zedboot never came back up, so we need to powercycle the device.
	log.Printf("Zedboot did not come up; attempting powercycle")
	if err := device.Powercycle(ctx); err != nil {
		return err
	}
	return nil
}

func main() {
	flag.Parse()
	client := netboot.NewClient(timeout)
	ctx := context.Background()
	devices, err := devicePkg.CreateDeviceTargets(ctx, configFile, nil)
	if err != nil {
		log.Fatal(err)
	}

	f, err := os.OpenFile(logFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
	if err != nil {
		log.Fatal(err)
	}
	defer f.Close()
	log.SetOutput(f)

	var checkResultSlice []HealthCheckResult
	for _, device := range devices {
		checkResult := checkHealth(client, device.Nodename())
		log.Printf("state=%s, error_msg=%s", checkResult.State, checkResult.ErrorMsg)
		if checkResult.State == unhealthyState && rebootIfUnhealthy {
			if err := attemptReboot(ctx, client, device); err != nil {
				log.Printf("reboot failed with error: %s", err.Error())
				checkResult.ErrorMsg += "; " + err.Error()
			} else {
				log.Printf("reboot succeeded for %s", device.Nodename())
			}
		}
		checkResultSlice = append(checkResultSlice, checkResult)
	}
	if err = printHealthCheckResults(checkResultSlice); err != nil {
		log.Fatal(err)
	}
}
