blob: 3c2d007e5562e2ab49c6d6f7a78c2c8db29b1b51 [file] [log] [blame]
// Copyright 2017 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <fcntl.h>
#include <inttypes.h>
#include <libgen.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <hypervisor/acpi.h>
#include <hypervisor/balloon.h>
#include <hypervisor/block.h>
#include <hypervisor/guest.h>
#include <hypervisor/io_apic.h>
#include <hypervisor/io_port.h>
#include <hypervisor/pci.h>
#include <hypervisor/uart.h>
#include <hypervisor/vcpu.h>
#include <zircon/process.h>
#include <zircon/syscalls.h>
#include <zircon/syscalls/hypervisor.h>
#include <fbl/unique_ptr.h>
#include <virtio/balloon.h>
#include "linux.h"
#include "zircon.h"
static const uint64_t kVmoSize = 1u << 30;
static const uint32_t kMapFlags = ZX_VM_FLAG_PERM_READ | ZX_VM_FLAG_PERM_WRITE;
/* Unused memory above this threshold may be reclaimed by the balloon. */
static uint32_t balloon_threshold_pages = 1024;
static zx_status_t usage(const char* cmd) {
fprintf(stderr, "usage: %s [OPTIONS] kernel.bin\n", cmd);
fprintf(stderr, "\n");
fprintf(stderr, "OPTIONS:\n");
fprintf(stderr, "\t-b [block.bin] Use file 'block.bin' as a virtio-block device.\n");
fprintf(stderr, "\t-r [ramdisk.bin] Use file 'ramdisk.bin' as a ramdisk.\n");
fprintf(stderr, "\t-c [cmdline] Use string 'cmdline' as the kernel command line.\n");
fprintf(stderr, "\t-m [seconds] Poll the virtio-balloon device every 'seconds' seconds\n"
"\t and adjust the balloon size based on the amount of\n"
"\t unused guest memory.\n");
fprintf(stderr, "\t-p [pages] Number of unused pages to allow the guest to\n"
"\t retain. Has no effect unless -m is also used.\n");
fprintf(stderr, "\t-d Demand-page balloon deflate requests.\n");
fprintf(stderr, "\n");
return ZX_ERR_INVALID_ARGS;
}
static zx_status_t create_vmo(uint64_t size, uintptr_t* addr, zx_handle_t* vmo) {
zx_status_t status = zx_vmo_create(size, 0, vmo);
if (status != ZX_OK)
return status;
return zx_vmar_map(zx_vmar_root_self(), 0, *vmo, 0, size, kMapFlags, addr);
}
static void balloon_stats_handler(const virtio_balloon_stat_t* stats, size_t len, void* ctx) {
balloon_t* balloon = static_cast<balloon_t*>(ctx);
for (size_t i = 0; i < len; ++i) {
if (stats[i].tag != VIRTIO_BALLOON_S_AVAIL)
continue;
mtx_lock(&balloon->mutex);
uint32_t current_pages = balloon->config.num_pages;
mtx_unlock(&balloon->mutex);
uint32_t available_pages = static_cast<uint32_t>(stats[i].val / VIRTIO_BALLOON_PAGE_SIZE);
uint32_t target_pages = current_pages + (available_pages - balloon_threshold_pages);
if (current_pages == target_pages)
return;
printf("virtio-balloon: adjusting target pages %#x -> %#x.\n",
current_pages, target_pages);
zx_status_t status = balloon_update_num_pages(balloon, target_pages);
if (status != ZX_OK)
fprintf(stderr, "Error %d updating balloon size.\n", status);
return;
}
}
typedef struct balloon_task_args {
balloon_t* balloon;
zx_duration_t interval;
} balloon_task_args_t;
static int balloon_stats_task(void* ctx) {
fbl::unique_ptr<balloon_task_args_t> args(static_cast<balloon_task_args_t*>(ctx));
while (true) {
zx_nanosleep(zx_deadline_after(args->interval));
balloon_request_stats(args->balloon, &balloon_stats_handler, args->balloon);
}
return ZX_OK;
}
static zx_status_t poll_balloon_stats(balloon_t* balloon, zx_duration_t interval) {
thrd_t thread;
auto args = new balloon_task_args_t{balloon, interval};
int ret = thrd_create(&thread, balloon_stats_task, args);
if (ret != thrd_success) {
fprintf(stderr, "Failed to create balloon thread %d\n", ret);
delete args;
return ZX_ERR_INTERNAL;
}
ret = thrd_detach(thread);
if (ret != thrd_success) {
fprintf(stderr, "Failed to detach balloon thread %d\n", ret);
return ZX_ERR_INTERNAL;
}
return ZX_OK;
}
int main(int argc, char** argv) {
const char* cmd = basename(argv[0]);
const char* block_path = NULL;
const char* ramdisk_path = NULL;
const char* cmdline = NULL;
zx_duration_t balloon_poll_interval = 0;
bool balloon_deflate_on_demand = false;
int opt;
while ((opt = getopt(argc, argv, "b:r:c:m:dp:")) != -1) {
switch (opt) {
case 'b':
block_path = optarg;
break;
case 'r':
ramdisk_path = optarg;
break;
case 'c':
cmdline = optarg;
break;
case 'm':
balloon_poll_interval = ZX_SEC(strtoul(optarg, nullptr, 10));
if (balloon_poll_interval <= 0) {
fprintf(stderr, "Invalid balloon interval %s. Must be an integer greater than 0\n",
optarg);
return ZX_ERR_INVALID_ARGS;
}
break;
case 'd':
balloon_deflate_on_demand = true;
break;
case 'p':
balloon_threshold_pages = static_cast<uint32_t>(strtoul(optarg, nullptr, 10));
if (balloon_threshold_pages <= 0) {
fprintf(stderr, "Invalid balloon threshold %s. Must be an integer greater than 0\n",
optarg);
return ZX_ERR_INVALID_ARGS;
}
break;
default:
return usage(cmd);
}
}
if (optind >= argc)
return usage(cmd);
argc -= optind;
argv += optind;
uintptr_t addr;
zx_handle_t physmem_vmo;
zx_status_t status = create_vmo(kVmoSize, &addr, &physmem_vmo);
if (status != ZX_OK) {
fprintf(stderr, "Failed to create guest physical memory\n");
return status;
}
zx_handle_t resource;
status = guest_get_resource(&resource);
if (status != ZX_OK) {
fprintf(stderr, "Failed to get resource\n");
return status;
}
zx_handle_t guest;
status = zx_guest_create(resource, 0, physmem_vmo, &guest);
if (status != ZX_OK) {
fprintf(stderr, "Failed to create guest\n");
return status;
}
zx_handle_close(resource);
uintptr_t pt_end_off;
status = guest_create_page_table(addr, kVmoSize, &pt_end_off);
if (status != ZX_OK) {
fprintf(stderr, "Failed to create page table\n");
return status;
}
status = guest_create_acpi_table(addr, kVmoSize, pt_end_off);
if (status != ZX_OK) {
fprintf(stderr, "Failed to create ACPI table\n");
return status;
}
// Prepare the OS image
int fd = open(argv[0], O_RDONLY);
if (fd < 0) {
fprintf(stderr, "Failed to open kernel image \"%s\"\n", argv[0]);
return ZX_ERR_IO;
}
// Load the first page in to allow OS detection without requiring
// us to seek backwards later.
uintptr_t first_page = addr + kVmoSize - PAGE_SIZE;
ssize_t ret = read(fd, (void*)first_page, PAGE_SIZE);
if (ret != PAGE_SIZE) {
fprintf(stderr, "Failed to read first page of kernel\n");
return ZX_ERR_IO;
}
uintptr_t guest_ip;
uintptr_t bootdata_off = 0;
char guest_cmdline[PATH_MAX];
const char* zircon_fmt_string = "TERM=uart %s";
snprintf(guest_cmdline, PATH_MAX, zircon_fmt_string, cmdline ? cmdline : "");
status = setup_zircon(addr, kVmoSize, first_page, pt_end_off, fd, ramdisk_path,
guest_cmdline, &guest_ip, &bootdata_off);
if (status == ZX_ERR_NOT_SUPPORTED) {
const char* linux_fmt_string = "earlyprintk=serial,ttyS,115200 console=ttyS0,115200 "
"io_delay=none acpi_rsdp=%#lx %s";
snprintf(guest_cmdline, PATH_MAX, linux_fmt_string, pt_end_off, cmdline ? cmdline : "");
status = setup_linux(addr, kVmoSize, first_page, fd, ramdisk_path, guest_cmdline, &guest_ip,
&bootdata_off);
}
if (status == ZX_ERR_NOT_SUPPORTED) {
fprintf(stderr, "Unknown kernel\n");
return status;
} else if (status != ZX_OK) {
fprintf(stderr, "Failed to load kernel\n");
return status;
}
close(fd);
#if __x86_64__
uintptr_t apic_addr;
zx_handle_t apic_vmo;
status = create_vmo(PAGE_SIZE, &apic_addr, &apic_vmo);
if (status != ZX_OK) {
fprintf(stderr, "Failed to create VCPU local APIC memory\n");
return status;
}
#endif // __x86_64__
zx_vcpu_create_args_t args = {
guest_ip,
#if __x86_64__
0 /* cr3 */,
apic_vmo,
#endif // __x86_64__
};
zx_handle_t vcpu;
status = zx_vcpu_create(guest, 0, &args, &vcpu);
if (status != ZX_OK) {
fprintf(stderr, "Failed to create VCPU\n");
return status;
}
guest_ctx_t guest_ctx;
memset(&guest_ctx, 0, sizeof(guest_ctx));
// Setup IO APIC.
io_apic_t io_apic;
guest_ctx.io_apic = &io_apic;
io_apic_init(&io_apic);
// Setup IO ports.
io_port_t io_port;
guest_ctx.io_port = &io_port;
io_port_init(&io_port);
// Setup PCI.
pci_bus_t bus;
guest_ctx.bus = &bus;
status = pci_bus_init(&bus, &io_apic);
if (status != ZX_OK) {
fprintf(stderr, "Failed to create PCI bus.\n");
return status;
}
// Setup UART.
uart_t uart;
guest_ctx.uart = &uart;
uart_init(&uart, &io_apic);
status = uart_async(&uart, guest);
if (status != ZX_OK)
return status;
// Setup block device.
block_t block;
pci_device_t* virtio_block = &block.virtio_device.pci_device;
if (block_path != NULL) {
status = block_init(&block, block_path, addr, kVmoSize);
if (status != ZX_OK)
return status;
status = pci_bus_connect(&bus, virtio_block, PCI_DEVICE_VIRTIO_BLOCK);
if (status != ZX_OK)
return status;
status = pci_device_async(virtio_block, guest);
if (status != ZX_OK)
return status;
}
// Setup memory balloon.
balloon_t balloon;
balloon_init(&balloon, addr, kVmoSize, physmem_vmo);
balloon.deflate_on_demand = balloon_deflate_on_demand;
status = pci_bus_connect(&bus, &balloon.virtio_device.pci_device,
PCI_DEVICE_VIRTIO_BALLOON);
if (status != ZX_OK)
return status;
status = pci_device_async(&balloon.virtio_device.pci_device, guest);
if (status != ZX_OK)
return status;
if (balloon_poll_interval > 0)
poll_balloon_stats(&balloon, balloon_poll_interval);
vcpu_ctx_t vcpu_ctx;
vcpu_init(&vcpu_ctx);
vcpu_ctx.vcpu = vcpu;
#if __x86_64__
vcpu_ctx.local_apic.apic_addr = (void*)apic_addr;
#endif // __x86_64__
vcpu_ctx.guest_ctx = &guest_ctx;
// Setup Local APIC.
vcpu_ctx.local_apic.vcpu = vcpu;
status = io_apic_register_local_apic(&io_apic, 0, &vcpu_ctx.local_apic);
if (status != ZX_OK) {
fprintf(stderr, "Failed to register Local APIC with IO APIC.\n");
return status;
}
zx_vcpu_state_t vcpu_state;
memset(&vcpu_state, 0, sizeof(vcpu_state));
#if __x86_64__
vcpu_state.rsi = bootdata_off;
#endif // __x86_64__
status = zx_vcpu_write_state(vcpu, ZX_VCPU_STATE, &vcpu_state, sizeof(vcpu_state));
if (status != ZX_OK) {
fprintf(stderr, "Failed to write VCPU state\n");
return status;
}
return vcpu_loop(&vcpu_ctx);
}