blob: 404146269e0938392a0d9a6500e081b464c40c12 [file] [log] [blame]
/*
* Copyright 2021-2022 Alyssa Rosenzweig
* SPDX-License-Identifier: MIT
*/
#include <assert.h>
#include <dlfcn.h>
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <IOKit/IODataQueueClient.h>
#include <IOKit/IOKitLib.h>
#include <mach/mach.h>
#include "util/compiler.h"
#include "util/u_hexdump.h"
#include "decode.h"
#include "dyld_interpose.h"
#include "util.h"
#define HANDLE(x) (x ^ (1 << 29))
/*
* This section contains the minimal set of definitions to trace the macOS
* (IOKit) interface to the AGX accelerator.
* They are not used under Linux.
*
* Information is this file was originally determined independently. More
* recently, names have been augmented via the oob_timestamp code sample from
* Project Zero [1]
*
* [1] https://bugs.chromium.org/p/project-zero/issues/detail?id=1986
*/
#define AGX_SERVICE_TYPE 0x100005
enum agx_selector {
AGX_SELECTOR_GET_GLOBAL_IDS = 0x6,
AGX_SELECTOR_SET_API = 0x7,
AGX_SELECTOR_CREATE_COMMAND_QUEUE = 0x8,
AGX_SELECTOR_FREE_COMMAND_QUEUE = 0x9,
AGX_SELECTOR_ALLOCATE_MEM = 0xA,
AGX_SELECTOR_FREE_MEM = 0xB,
AGX_SELECTOR_CREATE_SHMEM = 0xF,
AGX_SELECTOR_FREE_SHMEM = 0x10,
AGX_SELECTOR_CREATE_NOTIFICATION_QUEUE = 0x11,
AGX_SELECTOR_FREE_NOTIFICATION_QUEUE = 0x12,
AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS = 0x1E,
AGX_SELECTOR_GET_VERSION = 0x2A,
AGX_NUM_SELECTORS = 0x33
};
struct IOAccelCommandQueueSubmitArgs_Command {
uint32_t command_buffer_shmem_id;
uint32_t segment_list_shmem_id;
uint64_t unk1B; // 0, new in 12.x
uint64_t notify_1;
uint64_t notify_2;
uint32_t unk2;
uint32_t unk3;
} __attribute__((packed));
struct agx_allocate_resource_resp {
/* Returned GPU virtual address */
uint64_t gpu_va;
/* Returned CPU virtual address */
uint64_t cpu;
uint32_t unk4[3];
/* Handle used to identify the resource in the segment list */
uint32_t handle;
/* Size of the root resource from which we are allocated. If this is not a
* suballocation, this is equal to the size.
*/
uint64_t root_size;
/* Globally unique identifier for the resource, shown in Instruments */
uint32_t guid;
uint32_t unk11[7];
/* Maximum size of the suballocation. For a suballocation, this equals:
*
* sub_size = root_size - (sub_cpu - root_cpu)
*
* For root allocations, this equals the size.
*/
uint64_t sub_size;
} __attribute__((packed));
/*
* Wrap IOKit entrypoints to intercept communication between the AGX kernel
* extension and userspace clients. IOKit prototypes are public from the IOKit
* source release.
*/
mach_port_t metal_connection = 0;
struct agxdecode_ctx *decode_ctx = NULL;
kern_return_t
wrap_Method(mach_port_t connection, uint32_t selector, const uint64_t *input,
uint32_t inputCnt, const void *inputStruct, size_t inputStructCnt,
uint64_t *output, uint32_t *outputCnt, void *outputStruct,
size_t *outputStructCntP)
{
if (!decode_ctx) {
decode_ctx = agxdecode_new_context(0);
}
/* Heuristic guess which connection is Metal, skip over I/O from everything
* else. This is technically wrong but it works in practice, and reduces the
* surface area we need to wrap.
*/
if (selector == AGX_SELECTOR_SET_API) {
metal_connection = connection;
} else if (metal_connection != connection) {
return IOConnectCallMethod(connection, selector, input, inputCnt,
inputStruct, inputStructCnt, output, outputCnt,
outputStruct, outputStructCntP);
}
printf("Selector %u, %X, %X\n", selector, connection, metal_connection);
/* Check the arguments make sense */
assert((input != NULL) == (inputCnt != 0));
assert((inputStruct != NULL) == (inputStructCnt != 0));
assert((output != NULL) == (outputCnt != 0));
assert((outputStruct != NULL) == (outputStructCntP != 0));
/* Dump inputs */
switch (selector) {
case AGX_SELECTOR_SET_API:
assert(input == NULL && output == NULL && outputStruct == NULL);
assert(inputStruct != NULL && inputStructCnt == 16);
assert(((uint8_t *)inputStruct)[15] == 0x0);
printf("%X: SET_API(%s)\n", connection, (const char *)inputStruct);
break;
case AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS: {
assert(output == NULL && outputStruct == NULL);
// assert(inputCnt == 1);
printf("%X: SUBMIT_COMMAND_BUFFERS command queue id:%llx %p\n",
connection, input[0], inputStruct);
u_hexdump(stdout, inputStruct, inputStructCnt, true);
const struct IOAccelCommandQueueSubmitArgs_Command *cmds =
(void *)(inputStruct + 0);
// for (unsigned i = 0; i < hdr->count; ++i) {
const struct IOAccelCommandQueueSubmitArgs_Command *req = &cmds[0];
agxdecode_cmdstream(decode_ctx, HANDLE(req->command_buffer_shmem_id),
HANDLE(req->segment_list_shmem_id), true);
// }
agxdecode_next_frame();
FALLTHROUGH;
}
default:
printf("%X: call %X (out %p, %zu)", connection, selector,
outputStructCntP, outputStructCntP ? *outputStructCntP : 0);
for (uint64_t u = 0; u < inputCnt; ++u)
printf(" %llx", input[u]);
if (inputStructCnt) {
printf(", struct:\n");
u_hexdump(stdout, inputStruct, inputStructCnt, true);
} else {
printf("\n");
}
break;
}
/* Invoke the real method */
kern_return_t ret = IOConnectCallMethod(
connection, selector, input, inputCnt, inputStruct, inputStructCnt,
output, outputCnt, outputStruct, outputStructCntP);
if (ret != 0)
printf("return %u\n", ret);
/* Track allocations for later analysis (dumping, disassembly, etc) */
switch (selector) {
case AGX_SELECTOR_CREATE_SHMEM: {
assert(inputCnt == 2);
assert((*outputStructCntP) == 0x10);
uint64_t *inp = (uint64_t *)input;
uint8_t type = inp[1];
assert(type <= 2);
if (type == 2)
printf("(cmdbuf with error reporting)\n");
uint64_t *ptr = (uint64_t *)outputStruct;
uint32_t *words = (uint32_t *)(ptr + 1);
/* Construct a synthetic GEM handle for the shmem */
agxdecode_track_alloc(decode_ctx, &(struct agx_bo){
.handle = HANDLE(words[1]),
._map = (void *)*ptr,
.size = words[0],
});
break;
}
case AGX_SELECTOR_ALLOCATE_MEM: {
assert((*outputStructCntP) == 0x50);
const struct agx_allocate_resource_req *req = inputStruct;
struct agx_allocate_resource_resp *resp = outputStruct;
struct agx_va *va = malloc(sizeof(struct agx_va));
va->addr = resp->gpu_va;
va->size_B = resp->sub_size;
agxdecode_track_alloc(decode_ctx, &(struct agx_bo){
.size = resp->sub_size,
.handle = resp->handle,
.va = va,
._map = (void *)resp->cpu,
});
break;
}
case AGX_SELECTOR_FREE_MEM: {
assert(inputCnt == 1);
assert(inputStruct == NULL);
assert(output == NULL);
assert(outputStruct == NULL);
agxdecode_track_free(decode_ctx, &(struct agx_bo){.handle = input[0]});
break;
}
case AGX_SELECTOR_FREE_SHMEM: {
assert(inputCnt == 1);
assert(inputStruct == NULL);
assert(output == NULL);
assert(outputStruct == NULL);
agxdecode_track_free(decode_ctx,
&(struct agx_bo){.handle = HANDLE(input[0])});
break;
}
default:
/* Dump the outputs */
if (outputCnt) {
printf("%u scalars: ", *outputCnt);
for (uint64_t u = 0; u < *outputCnt; ++u)
printf("%llx ", output[u]);
printf("\n");
}
if (outputStructCntP) {
printf(" struct\n");
u_hexdump(stdout, outputStruct, *outputStructCntP, true);
if (selector == 2) {
/* Dump linked buffer as well */
void **o = outputStruct;
u_hexdump(stdout, *o, 64, true);
}
}
printf("\n");
break;
}
return ret;
}
kern_return_t
wrap_AsyncMethod(mach_port_t connection, uint32_t selector,
mach_port_t wakePort, uint64_t *reference,
uint32_t referenceCnt, const uint64_t *input,
uint32_t inputCnt, const void *inputStruct,
size_t inputStructCnt, uint64_t *output, uint32_t *outputCnt,
void *outputStruct, size_t *outputStructCntP)
{
/* Check the arguments make sense */
assert((input != NULL) == (inputCnt != 0));
assert((inputStruct != NULL) == (inputStructCnt != 0));
assert((output != NULL) == (outputCnt != 0));
assert((outputStruct != NULL) == (outputStructCntP != 0));
printf("%X: call %X, wake port %X (out %p, %zu)", connection, selector,
wakePort, outputStructCntP, outputStructCntP ? *outputStructCntP : 0);
for (uint64_t u = 0; u < inputCnt; ++u)
printf(" %llx", input[u]);
if (inputStructCnt) {
printf(", struct:\n");
u_hexdump(stdout, inputStruct, inputStructCnt, true);
} else {
printf("\n");
}
printf(", references: ");
for (unsigned i = 0; i < referenceCnt; ++i)
printf(" %llx", reference[i]);
printf("\n");
kern_return_t ret = IOConnectCallAsyncMethod(
connection, selector, wakePort, reference, referenceCnt, input, inputCnt,
inputStruct, inputStructCnt, output, outputCnt, outputStruct,
outputStructCntP);
printf("return %u", ret);
if (outputCnt) {
printf("%u scalars: ", *outputCnt);
for (uint64_t u = 0; u < *outputCnt; ++u)
printf("%llx ", output[u]);
printf("\n");
}
if (outputStructCntP) {
printf(" struct\n");
u_hexdump(stdout, outputStruct, *outputStructCntP, true);
if (selector == 2) {
/* Dump linked buffer as well */
void **o = outputStruct;
u_hexdump(stdout, *o, 64, true);
}
}
printf("\n");
return ret;
}
kern_return_t
wrap_StructMethod(mach_port_t connection, uint32_t selector,
const void *inputStruct, size_t inputStructCnt,
void *outputStruct, size_t *outputStructCntP)
{
return wrap_Method(connection, selector, NULL, 0, inputStruct,
inputStructCnt, NULL, NULL, outputStruct,
outputStructCntP);
}
kern_return_t
wrap_AsyncStructMethod(mach_port_t connection, uint32_t selector,
mach_port_t wakePort, uint64_t *reference,
uint32_t referenceCnt, const void *inputStruct,
size_t inputStructCnt, void *outputStruct,
size_t *outputStructCnt)
{
return wrap_AsyncMethod(connection, selector, wakePort, reference,
referenceCnt, NULL, 0, inputStruct, inputStructCnt,
NULL, NULL, outputStruct, outputStructCnt);
}
kern_return_t
wrap_ScalarMethod(mach_port_t connection, uint32_t selector,
const uint64_t *input, uint32_t inputCnt, uint64_t *output,
uint32_t *outputCnt)
{
return wrap_Method(connection, selector, input, inputCnt, NULL, 0, output,
outputCnt, NULL, NULL);
}
kern_return_t
wrap_AsyncScalarMethod(mach_port_t connection, uint32_t selector,
mach_port_t wakePort, uint64_t *reference,
uint32_t referenceCnt, const uint64_t *input,
uint32_t inputCnt, uint64_t *output, uint32_t *outputCnt)
{
return wrap_AsyncMethod(connection, selector, wakePort, reference,
referenceCnt, input, inputCnt, NULL, 0, output,
outputCnt, NULL, NULL);
}
mach_port_t
wrap_DataQueueAllocateNotificationPort()
{
mach_port_t ret = IODataQueueAllocateNotificationPort();
printf("Allocated notif port %X\n", ret);
return ret;
}
kern_return_t
wrap_SetNotificationPort(io_connect_t connect, uint32_t type, mach_port_t port,
uintptr_t reference)
{
printf(
"Set noficiation port connect=%X, type=%X, port=%X, reference=%" PRIx64
"\n",
connect, type, port, (uint64_t)reference);
return IOConnectSetNotificationPort(connect, type, port, reference);
}
IOReturn
wrap_DataQueueWaitForAvailableData(IODataQueueMemory *dataQueue,
mach_port_t notificationPort)
{
printf("Waiting for data queue at notif port %X\n", notificationPort);
IOReturn ret = IODataQueueWaitForAvailableData(dataQueue, notificationPort);
printf("ret=%X\n", ret);
return ret;
}
IODataQueueEntry *
wrap_DataQueuePeek(IODataQueueMemory *dataQueue)
{
printf("Peeking data queue\n");
return IODataQueuePeek(dataQueue);
}
IOReturn
wrap_DataQueueDequeue(IODataQueueMemory *dataQueue, void *data,
uint32_t *dataSize)
{
printf("Dequeueing (dataQueue=%p, data=%p, buffer %u)\n", dataQueue, data,
*dataSize);
IOReturn ret = IODataQueueDequeue(dataQueue, data, dataSize);
printf("Return \"%s\", got %u bytes\n", mach_error_string(ret), *dataSize);
uint8_t *data8 = data;
for (unsigned i = 0; i < *dataSize; ++i) {
printf("%02X ", data8[i]);
}
printf("\n");
return ret;
}
DYLD_INTERPOSE(wrap_Method, IOConnectCallMethod);
DYLD_INTERPOSE(wrap_AsyncMethod, IOConnectCallAsyncMethod);
DYLD_INTERPOSE(wrap_StructMethod, IOConnectCallStructMethod);
DYLD_INTERPOSE(wrap_AsyncStructMethod, IOConnectCallAsyncStructMethod);
DYLD_INTERPOSE(wrap_ScalarMethod, IOConnectCallScalarMethod);
DYLD_INTERPOSE(wrap_AsyncScalarMethod, IOConnectCallAsyncScalarMethod);
DYLD_INTERPOSE(wrap_SetNotificationPort, IOConnectSetNotificationPort);
DYLD_INTERPOSE(wrap_DataQueueAllocateNotificationPort,
IODataQueueAllocateNotificationPort);
DYLD_INTERPOSE(wrap_DataQueueWaitForAvailableData,
IODataQueueWaitForAvailableData);
DYLD_INTERPOSE(wrap_DataQueuePeek, IODataQueuePeek);
DYLD_INTERPOSE(wrap_DataQueueDequeue, IODataQueueDequeue);