blob: 6a94a442808846396b730406d7a6c4e0f41d2686 [file] [log] [blame]
#include <assert.h>
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/stat.h>
#include "exec_stack.h"
#include "bytecode.h"
#include "jv_alloc.h"
#include "jq_parser.h"
#include "locfile.h"
#include "jv.h"
#include "jq.h"
#include "parser.h"
#include "builtin.h"
#include "util.h"
#include "linker.h"
struct jq_state {
void (*nomem_handler)(void *);
void *nomem_handler_data;
struct bytecode* bc;
jq_msg_cb err_cb;
void *err_cb_data;
jv error;
struct stack stk;
stack_ptr curr_frame;
stack_ptr stk_top;
stack_ptr fork_top;
jv path;
jv value_at_path;
int subexp_nest;
int debug_trace_enabled;
int initial_execution;
unsigned next_label;
jv attrs;
jq_input_cb input_cb;
void *input_cb_data;
jq_msg_cb debug_cb;
void *debug_cb_data;
};
struct closure {
struct bytecode* bc; // jq bytecode
stack_ptr env; // jq stack address of closed frame
};
// locals for any function called: either a closure or a local variable
union frame_entry {
struct closure closure;
jv localvar;
};
// jq function call frame
struct frame {
struct bytecode* bc; // jq bytecode for callee
stack_ptr env; // jq stack address of frame to return to
stack_ptr retdata; // jq stack address to unwind to on RET
uint16_t* retaddr; // jq bytecode return address
union frame_entry entries[0]; // nclosures + nlocals
};
static int frame_size(struct bytecode* bc) {
return sizeof(struct frame) + sizeof(union frame_entry) * (bc->nclosures + bc->nlocals);
}
static struct frame* frame_current(struct jq_state* jq) {
struct frame* fp = stack_block(&jq->stk, jq->curr_frame);
stack_ptr next = *stack_block_next(&jq->stk, jq->curr_frame);
if (next) {
struct frame* fpnext = stack_block(&jq->stk, next);
struct bytecode* bc = fpnext->bc;
assert(fp->retaddr >= bc->code && fp->retaddr < bc->code + bc->codelen);
} else {
assert(fp->retaddr == 0);
}
return fp;
}
static stack_ptr frame_get_level(struct jq_state* jq, int level) {
stack_ptr fr = jq->curr_frame;
for (int i=0; i<level; i++) {
struct frame* fp = stack_block(&jq->stk, fr);
fr = fp->env;
}
return fr;
}
static jv* frame_local_var(struct jq_state* jq, int var, int level) {
struct frame* fr = stack_block(&jq->stk, frame_get_level(jq, level));
assert(var >= 0);
assert(var < fr->bc->nlocals);
return &fr->entries[fr->bc->nclosures + var].localvar;
}
static struct closure make_closure(struct jq_state* jq, uint16_t* pc) {
uint16_t level = *pc++;
uint16_t idx = *pc++;
stack_ptr fridx = frame_get_level(jq, level);
struct frame* fr = stack_block(&jq->stk, fridx);
if (idx & ARG_NEWCLOSURE) {
// A new closure closing the frame identified by level, and with
// the bytecode body of the idx'th subfunction of that frame
int subfn_idx = idx & ~ARG_NEWCLOSURE;
assert(subfn_idx < fr->bc->nsubfunctions);
struct closure cl = {fr->bc->subfunctions[subfn_idx],
fridx};
return cl;
} else {
// A reference to a closure from the frame identified by level; copy
// it as-is
int closure = idx;
assert(closure >= 0);
assert(closure < fr->bc->nclosures);
return fr->entries[closure].closure;
}
}
static struct frame* frame_push(struct jq_state* jq, struct closure callee,
uint16_t* argdef, int nargs) {
stack_ptr new_frame_idx = stack_push_block(&jq->stk, jq->curr_frame, frame_size(callee.bc));
struct frame* new_frame = stack_block(&jq->stk, new_frame_idx);
new_frame->bc = callee.bc;
new_frame->env = callee.env;
assert(nargs == new_frame->bc->nclosures);
union frame_entry* entries = new_frame->entries;
for (int i=0; i<nargs; i++) {
entries->closure = make_closure(jq, argdef + i * 2);
entries++;
}
for (int i=0; i<callee.bc->nlocals; i++) {
entries->localvar = jv_invalid();
entries++;
}
jq->curr_frame = new_frame_idx;
return new_frame;
}
static void frame_pop(struct jq_state* jq) {
assert(jq->curr_frame);
struct frame* fp = frame_current(jq);
if (stack_pop_will_free(&jq->stk, jq->curr_frame)) {
int nlocals = fp->bc->nlocals;
for (int i=0; i<nlocals; i++) {
jv_free(*frame_local_var(jq, i, 0));
}
}
jq->curr_frame = stack_pop_block(&jq->stk, jq->curr_frame, frame_size(fp->bc));
}
void stack_push(jq_state *jq, jv val) {
assert(jv_is_valid(val));
jq->stk_top = stack_push_block(&jq->stk, jq->stk_top, sizeof(jv));
jv* sval = stack_block(&jq->stk, jq->stk_top);
*sval = val;
}
jv stack_pop(jq_state *jq) {
jv* sval = stack_block(&jq->stk, jq->stk_top);
jv val = *sval;
if (!stack_pop_will_free(&jq->stk, jq->stk_top)) {
val = jv_copy(val);
}
jq->stk_top = stack_pop_block(&jq->stk, jq->stk_top, sizeof(jv));
assert(jv_is_valid(val));
return val;
}
// Like stack_pop(), but assert !stack_pop_will_free() and replace with
// jv_null() on the stack.
jv stack_popn(jq_state *jq) {
jv* sval = stack_block(&jq->stk, jq->stk_top);
jv val = *sval;
if (!stack_pop_will_free(&jq->stk, jq->stk_top)) {
*sval = jv_null();
}
jq->stk_top = stack_pop_block(&jq->stk, jq->stk_top, sizeof(jv));
assert(jv_is_valid(val));
return val;
}
struct forkpoint {
stack_ptr saved_data_stack;
stack_ptr saved_curr_frame;
int path_len, subexp_nest;
jv value_at_path;
uint16_t* return_address;
};
struct stack_pos {
stack_ptr saved_data_stack, saved_curr_frame;
};
struct stack_pos stack_get_pos(jq_state* jq) {
struct stack_pos sp = {jq->stk_top, jq->curr_frame};
return sp;
}
void stack_save(jq_state *jq, uint16_t* retaddr, struct stack_pos sp){
jq->fork_top = stack_push_block(&jq->stk, jq->fork_top, sizeof(struct forkpoint));
struct forkpoint* fork = stack_block(&jq->stk, jq->fork_top);
fork->saved_data_stack = jq->stk_top;
fork->saved_curr_frame = jq->curr_frame;
fork->path_len =
jv_get_kind(jq->path) == JV_KIND_ARRAY ? jv_array_length(jv_copy(jq->path)) : 0;
fork->value_at_path = jv_copy(jq->value_at_path);
fork->subexp_nest = jq->subexp_nest;
fork->return_address = retaddr;
jq->stk_top = sp.saved_data_stack;
jq->curr_frame = sp.saved_curr_frame;
}
static int path_intact(jq_state *jq, jv curr) {
if (jq->subexp_nest == 0 && jv_get_kind(jq->path) == JV_KIND_ARRAY) {
return jv_identical(curr, jv_copy(jq->value_at_path));
} else {
jv_free(curr);
return 1;
}
}
static void path_append(jq_state* jq, jv component, jv value_at_path) {
if (jq->subexp_nest == 0 && jv_get_kind(jq->path) == JV_KIND_ARRAY) {
int n1 = jv_array_length(jv_copy(jq->path));
jq->path = jv_array_append(jq->path, component);
int n2 = jv_array_length(jv_copy(jq->path));
assert(n2 == n1 + 1);
jv_free(jq->value_at_path);
jq->value_at_path = value_at_path;
} else {
jv_free(component);
jv_free(value_at_path);
}
}
uint16_t* stack_restore(jq_state *jq){
while (!stack_pop_will_free(&jq->stk, jq->fork_top)) {
if (stack_pop_will_free(&jq->stk, jq->stk_top)) {
jv_free(stack_pop(jq));
} else if (stack_pop_will_free(&jq->stk, jq->curr_frame)) {
frame_pop(jq);
} else {
assert(0);
}
}
if (jq->fork_top == 0) {
return 0;
}
struct forkpoint* fork = stack_block(&jq->stk, jq->fork_top);
uint16_t* retaddr = fork->return_address;
jq->stk_top = fork->saved_data_stack;
jq->curr_frame = fork->saved_curr_frame;
int path_len = fork->path_len;
if (jv_get_kind(jq->path) == JV_KIND_ARRAY) {
assert(path_len >= 0);
jq->path = jv_array_slice(jq->path, 0, path_len);
} else {
assert(path_len == 0);
}
jv_free(jq->value_at_path);
jq->value_at_path = fork->value_at_path;
jq->subexp_nest = fork->subexp_nest;
jq->fork_top = stack_pop_block(&jq->stk, jq->fork_top, sizeof(struct forkpoint));
return retaddr;
}
static void jq_reset(jq_state *jq) {
while (stack_restore(jq)) {}
assert(jq->stk_top == 0);
assert(jq->fork_top == 0);
assert(jq->curr_frame == 0);
stack_reset(&jq->stk);
jv_free(jq->error);
jq->error = jv_null();
if (jv_get_kind(jq->path) != JV_KIND_INVALID)
jv_free(jq->path);
jq->path = jv_null();
jv_free(jq->value_at_path);
jq->value_at_path = jv_null();
jq->subexp_nest = 0;
}
void jq_report_error(jq_state *jq, jv value) {
assert(jq->err_cb);
// callback must jv_free() its jv argument
jq->err_cb(jq->err_cb_data, value);
}
static void set_error(jq_state *jq, jv value) {
// Record so try/catch can find it.
jv_free(jq->error);
jq->error = value;
}
#define ON_BACKTRACK(op) ((op)+NUM_OPCODES)
jv jq_next(jq_state *jq) {
jv cfunc_input[MAX_CFUNCTION_ARGS];
jv_nomem_handler(jq->nomem_handler, jq->nomem_handler_data);
uint16_t* pc = stack_restore(jq);
assert(pc);
int raising;
int backtracking = !jq->initial_execution;
jq->initial_execution = 0;
assert(jv_get_kind(jq->error) == JV_KIND_NULL);
while (1) {
uint16_t opcode = *pc;
raising = 0;
if (jq->debug_trace_enabled) {
dump_operation(frame_current(jq)->bc, pc);
printf("\t");
const struct opcode_description* opdesc = opcode_describe(opcode);
stack_ptr param = 0;
if (!backtracking) {
int stack_in = opdesc->stack_in;
if (stack_in == -1) stack_in = pc[1];
for (int i=0; i<stack_in; i++) {
if (i == 0) {
param = jq->stk_top;
} else {
printf(" | ");
param = *stack_block_next(&jq->stk, param);
}
if (!param) break;
jv_dump(jv_copy(*(jv*)stack_block(&jq->stk, param)), JV_PRINT_REFCOUNT);
//printf("<%d>", jv_get_refcnt(param->val));
//printf(" -- ");
//jv_dump(jv_copy(jq->path), 0);
}
} else {
printf("\t<backtracking>");
}
printf("\n");
}
if (backtracking) {
opcode = ON_BACKTRACK(opcode);
backtracking = 0;
raising = !jv_is_valid(jq->error);
}
pc++;
switch (opcode) {
default: assert(0 && "invalid instruction");
case TOP: break;
case LOADK: {
jv v = jv_array_get(jv_copy(frame_current(jq)->bc->constants), *pc++);
assert(jv_is_valid(v));
jv_free(stack_pop(jq));
stack_push(jq, v);
break;
}
case GENLABEL: {
stack_push(jq, JV_OBJECT(jv_string("__jq"), jv_number(jq->next_label++)));
break;
}
case DUP: {
jv v = stack_pop(jq);
stack_push(jq, jv_copy(v));
stack_push(jq, v);
break;
}
case DUPN: {
jv v = stack_popn(jq);
stack_push(jq, jv_copy(v));
stack_push(jq, v);
break;
}
case DUP2: {
jv keep = stack_pop(jq);
jv v = stack_pop(jq);
stack_push(jq, jv_copy(v));
stack_push(jq, keep);
stack_push(jq, v);
break;
}
case SUBEXP_BEGIN: {
jv v = stack_pop(jq);
stack_push(jq, jv_copy(v));
stack_push(jq, v);
jq->subexp_nest++;
break;
}
case SUBEXP_END: {
assert(jq->subexp_nest > 0);
jq->subexp_nest--;
jv a = stack_pop(jq);
jv b = stack_pop(jq);
stack_push(jq, a);
stack_push(jq, b);
break;
}
case POP: {
jv_free(stack_pop(jq));
break;
}
case APPEND: {
jv v = stack_pop(jq);
uint16_t level = *pc++;
uint16_t vidx = *pc++;
jv* var = frame_local_var(jq, vidx, level);
assert(jv_get_kind(*var) == JV_KIND_ARRAY);
*var = jv_array_append(*var, v);
break;
}
case INSERT: {
jv stktop = stack_pop(jq);
jv v = stack_pop(jq);
jv k = stack_pop(jq);
jv objv = stack_pop(jq);
assert(jv_get_kind(objv) == JV_KIND_OBJECT);
if (jv_get_kind(k) == JV_KIND_STRING) {
stack_push(jq, jv_object_set(objv, k, v));
stack_push(jq, stktop);
} else {
char errbuf[15];
set_error(jq, jv_invalid_with_msg(jv_string_fmt("Cannot use %s (%s) as object key",
jv_kind_name(jv_get_kind(k)),
jv_dump_string_trunc(jv_copy(k), errbuf, sizeof(errbuf)))));
jv_free(stktop);
jv_free(v);
jv_free(k);
jv_free(objv);
goto do_backtrack;
}
break;
}
case ON_BACKTRACK(RANGE):
case RANGE: {
uint16_t level = *pc++;
uint16_t v = *pc++;
jv* var = frame_local_var(jq, v, level);
jv max = stack_pop(jq);
if (raising) goto do_backtrack;
if (jv_get_kind(*var) != JV_KIND_NUMBER ||
jv_get_kind(max) != JV_KIND_NUMBER) {
set_error(jq, jv_invalid_with_msg(jv_string_fmt("Range bounds must be numeric")));
jv_free(max);
goto do_backtrack;
} else if (jv_number_value(jv_copy(*var)) >= jv_number_value(jv_copy(max))) {
/* finished iterating */
goto do_backtrack;
} else {
jv curr = jv_copy(*var);
*var = jv_number(jv_number_value(*var) + 1);
struct stack_pos spos = stack_get_pos(jq);
stack_push(jq, jv_copy(max));
stack_save(jq, pc - 3, spos);
stack_push(jq, curr);
}
break;
}
// FIXME: loadv/storev may do too much copying/freeing
case LOADV: {
uint16_t level = *pc++;
uint16_t v = *pc++;
jv* var = frame_local_var(jq, v, level);
if (jq->debug_trace_enabled) {
printf("V%d = ", v);
jv_dump(jv_copy(*var), 0);
printf(" (%d)\n", jv_get_refcnt(*var));
}
jv_free(stack_pop(jq));
stack_push(jq, jv_copy(*var));
break;
}
// Does a load but replaces the variable with null
case LOADVN: {
uint16_t level = *pc++;
uint16_t v = *pc++;
jv* var = frame_local_var(jq, v, level);
if (jq->debug_trace_enabled) {
printf("V%d = ", v);
jv_dump(jv_copy(*var), 0);
printf(" (%d)\n", jv_get_refcnt(*var));
}
jv_free(stack_popn(jq));
stack_push(jq, *var);
*var = jv_null();
break;
}
case STOREV: {
uint16_t level = *pc++;
uint16_t v = *pc++;
jv* var = frame_local_var(jq, v, level);
jv val = stack_pop(jq);
if (jq->debug_trace_enabled) {
printf("V%d = ", v);
jv_dump(jv_copy(val), 0);
printf(" (%d)\n", jv_get_refcnt(val));
}
jv_free(*var);
*var = val;
break;
}
case STORE_GLOBAL: {
// Get the constant
jv val = jv_array_get(jv_copy(frame_current(jq)->bc->constants), *pc++);
assert(jv_is_valid(val));
// Store the var
uint16_t level = *pc++;
uint16_t v = *pc++;
jv* var = frame_local_var(jq, v, level);
if (jq->debug_trace_enabled) {
printf("V%d = ", v);
jv_dump(jv_copy(val), 0);
printf(" (%d)\n", jv_get_refcnt(val));
}
jv_free(*var);
*var = val;
break;
}
case PATH_BEGIN: {
jv v = stack_pop(jq);
stack_push(jq, jq->path);
stack_save(jq, pc - 1, stack_get_pos(jq));
stack_push(jq, jv_number(jq->subexp_nest));
stack_push(jq, jq->value_at_path);
stack_push(jq, jv_copy(v));
jq->path = jv_array();
jq->value_at_path = v; // next INDEX operation must index into v
jq->subexp_nest = 0;
break;
}
case PATH_END: {
jv v = stack_pop(jq);
// detect invalid path expression like path(.a | reverse)
if (!path_intact(jq, jv_copy(v))) {
char errbuf[30];
jv msg = jv_string_fmt(
"Invalid path expression with result %s",
jv_dump_string_trunc(v, errbuf, sizeof(errbuf)));
set_error(jq, jv_invalid_with_msg(msg));
goto do_backtrack;
}
jv_free(v); // discard value, only keep path
jv old_value_at_path = stack_pop(jq);
int old_subexp_nest = (int)jv_number_value(stack_pop(jq));
jv path = jq->path;
jq->path = stack_pop(jq);
struct stack_pos spos = stack_get_pos(jq);
stack_push(jq, jv_copy(path));
stack_save(jq, pc - 1, spos);
stack_push(jq, path);
jq->subexp_nest = old_subexp_nest;
jv_free(jq->value_at_path);
jq->value_at_path = old_value_at_path;
break;
}
case ON_BACKTRACK(PATH_BEGIN):
case ON_BACKTRACK(PATH_END): {
jv_free(jq->path);
jq->path = stack_pop(jq);
goto do_backtrack;
}
case INDEX:
case INDEX_OPT: {
jv t = stack_pop(jq);
jv k = stack_pop(jq);
// detect invalid path expression like path(reverse | .a)
if (!path_intact(jq, jv_copy(t))) {
char keybuf[15];
char objbuf[30];
jv msg = jv_string_fmt(
"Invalid path expression near attempt to access element %s of %s",
jv_dump_string_trunc(k, keybuf, sizeof(keybuf)),
jv_dump_string_trunc(t, objbuf, sizeof(objbuf)));
set_error(jq, jv_invalid_with_msg(msg));
goto do_backtrack;
}
jv v = jv_get(t, jv_copy(k));
if (jv_is_valid(v)) {
path_append(jq, k, jv_copy(v));
stack_push(jq, v);
} else {
jv_free(k);
if (opcode == INDEX)
set_error(jq, v);
else
jv_free(v);
goto do_backtrack;
}
break;
}
case JUMP: {
uint16_t offset = *pc++;
pc += offset;
break;
}
case JUMP_F: {
uint16_t offset = *pc++;
jv t = stack_pop(jq);
jv_kind kind = jv_get_kind(t);
if (kind == JV_KIND_FALSE || kind == JV_KIND_NULL) {
pc += offset;
}
stack_push(jq, t); // FIXME do this better
break;
}
case EACH:
case EACH_OPT: {
jv container = stack_pop(jq);
// detect invalid path expression like path(reverse | .[])
if (!path_intact(jq, jv_copy(container))) {
char errbuf[30];
jv msg = jv_string_fmt(
"Invalid path expression near attempt to iterate through %s",
jv_dump_string_trunc(container, errbuf, sizeof(errbuf)));
set_error(jq, jv_invalid_with_msg(msg));
goto do_backtrack;
}
stack_push(jq, container);
stack_push(jq, jv_number(-1));
// fallthrough
}
case ON_BACKTRACK(EACH):
case ON_BACKTRACK(EACH_OPT): {
int idx = jv_number_value(stack_pop(jq));
jv container = stack_pop(jq);
int keep_going, is_last = 0;
jv key, value;
if (jv_get_kind(container) == JV_KIND_ARRAY) {
if (opcode == EACH || opcode == EACH_OPT) idx = 0;
else idx = idx + 1;
int len = jv_array_length(jv_copy(container));
keep_going = idx < len;
is_last = idx == len - 1;
if (keep_going) {
key = jv_number(idx);
value = jv_array_get(jv_copy(container), idx);
}
} else if (jv_get_kind(container) == JV_KIND_OBJECT) {
if (opcode == EACH || opcode == EACH_OPT) idx = jv_object_iter(container);
else idx = jv_object_iter_next(container, idx);
keep_going = jv_object_iter_valid(container, idx);
if (keep_going) {
key = jv_object_iter_key(container, idx);
value = jv_object_iter_value(container, idx);
}
} else {
assert(opcode == EACH || opcode == EACH_OPT);
if (opcode == EACH) {
char errbuf[15];
set_error(jq,
jv_invalid_with_msg(jv_string_fmt("Cannot iterate over %s (%s)",
jv_kind_name(jv_get_kind(container)),
jv_dump_string_trunc(jv_copy(container), errbuf, sizeof(errbuf)))));
}
keep_going = 0;
}
if (!keep_going || raising) {
if (keep_going)
jv_free(value);
jv_free(container);
goto do_backtrack;
} else if (is_last) {
// we don't need to make a backtrack point
jv_free(container);
path_append(jq, key, jv_copy(value));
stack_push(jq, value);
} else {
struct stack_pos spos = stack_get_pos(jq);
stack_push(jq, container);
stack_push(jq, jv_number(idx));
stack_save(jq, pc - 1, spos);
path_append(jq, key, jv_copy(value));
stack_push(jq, value);
}
break;
}
do_backtrack:
case BACKTRACK: {
pc = stack_restore(jq);
if (!pc) {
if (!jv_is_valid(jq->error)) {
jv error = jq->error;
jq->error = jv_null();
return error;
}
return jv_invalid();
}
backtracking = 1;
break;
}
case FORK_OPT:
case FORK: {
stack_save(jq, pc - 1, stack_get_pos(jq));
pc++; // skip offset this time
break;
}
case ON_BACKTRACK(FORK_OPT): {
if (jv_is_valid(jq->error)) {
// `try EXP ...` backtracked here (no value, `empty`), so we backtrack more
jv_free(stack_pop(jq));
goto do_backtrack;
}
// `try EXP ...` exception caught in EXP
jv_free(stack_pop(jq)); // free the input
stack_push(jq, jv_invalid_get_msg(jq->error)); // push the error's message
jq->error = jv_null();
uint16_t offset = *pc++;
pc += offset;
break;
}
case ON_BACKTRACK(FORK): {
if (raising) goto do_backtrack;
uint16_t offset = *pc++;
pc += offset;
break;
}
case CALL_BUILTIN: {
int nargs = *pc++;
jv top = stack_pop(jq);
jv* in = cfunc_input;
in[0] = top;
for (int i = 1; i < nargs; i++) {
in[i] = stack_pop(jq);
}
struct cfunction* function = &frame_current(jq)->bc->globals->cfunctions[*pc++];
typedef jv (*func_1)(jq_state*,jv);
typedef jv (*func_2)(jq_state*,jv,jv);
typedef jv (*func_3)(jq_state*,jv,jv,jv);
typedef jv (*func_4)(jq_state*,jv,jv,jv,jv);
typedef jv (*func_5)(jq_state*,jv,jv,jv,jv,jv);
switch (function->nargs) {
case 1: top = ((func_1)function->fptr)(jq, in[0]); break;
case 2: top = ((func_2)function->fptr)(jq, in[0], in[1]); break;
case 3: top = ((func_3)function->fptr)(jq, in[0], in[1], in[2]); break;
case 4: top = ((func_4)function->fptr)(jq, in[0], in[1], in[2], in[3]); break;
case 5: top = ((func_5)function->fptr)(jq, in[0], in[1], in[2], in[3], in[4]); break;
// FIXME: a) up to 7 arguments (input + 6), b) should assert
// because the compiler should not generate this error.
default: return jv_invalid_with_msg(jv_string("Function takes too many arguments"));
}
if (jv_is_valid(top)) {
stack_push(jq, top);
} else if (jv_invalid_has_msg(jv_copy(top))) {
set_error(jq, top);
goto do_backtrack;
} else {
// C-coded function returns invalid w/o msg? -> backtrack, as if
// it had returned `empty`
goto do_backtrack;
}
break;
}
case TAIL_CALL_JQ:
case CALL_JQ: {
/*
* Bytecode layout here:
*
* CALL_JQ
* <nclosures> (i.e., number of call arguments)
* <callee closure> (what we're calling)
* <nclosures' worth of closures> (frame reference + code pointer)
*
* <next instruction (to return to)>
*
* Each closure consists of two uint16_t values: a "level"
* identifying the frame to be closed over, and an index.
*
* The level is a relative number of call frames reachable from
* the currently one; 0 -> current frame, 1 -> previous frame, and
* so on.
*
* The index is either an index of the closed frame's subfunctions
* or of the closed frame's parameter closures. If the latter,
* that closure will be passed, else the closed frame's pointer
* and the subfunction's code will form the closure to be passed.
*
* See make_closure() for more information.
*/
jv input = stack_pop(jq);
uint16_t nclosures = *pc++;
uint16_t* retaddr = pc + 2 + nclosures*2;
stack_ptr retdata = jq->stk_top;
struct frame* new_frame;
struct closure cl = make_closure(jq, pc);
if (opcode == TAIL_CALL_JQ) {
retaddr = frame_current(jq)->retaddr;
retdata = frame_current(jq)->retdata;
frame_pop(jq);
}
new_frame = frame_push(jq, cl, pc + 2, nclosures);
new_frame->retdata = retdata;
new_frame->retaddr = retaddr;
pc = new_frame->bc->code;
stack_push(jq, input);
break;
}
case RET: {
jv value = stack_pop(jq);
assert(jq->stk_top == frame_current(jq)->retdata);
uint16_t* retaddr = frame_current(jq)->retaddr;
if (retaddr) {
// function return
pc = retaddr;
frame_pop(jq);
} else {
// top-level return, yielding value
struct stack_pos spos = stack_get_pos(jq);
stack_push(jq, jv_null());
stack_save(jq, pc - 1, spos);
return value;
}
stack_push(jq, value);
break;
}
case ON_BACKTRACK(RET): {
// resumed after top-level return
goto do_backtrack;
}
}
}
}
jv jq_format_error(jv msg) {
if (jv_get_kind(msg) == JV_KIND_NULL ||
(jv_get_kind(msg) == JV_KIND_INVALID && !jv_invalid_has_msg(jv_copy(msg)))) {
jv_free(msg);
fprintf(stderr, "jq: error: out of memory\n");
return jv_null();
}
if (jv_get_kind(msg) == JV_KIND_STRING)
return msg; // expected to already be formatted
if (jv_get_kind(msg) == JV_KIND_INVALID)
msg = jv_invalid_get_msg(msg);
if (jv_get_kind(msg) == JV_KIND_NULL)
return jq_format_error(msg); // ENOMEM
// Invalid with msg; prefix with "jq: error: "
if (jv_get_kind(msg) != JV_KIND_INVALID) {
if (jv_get_kind(msg) == JV_KIND_STRING)
return jv_string_fmt("jq: error: %s", jv_string_value(msg));
msg = jv_dump_string(msg, JV_PRINT_INVALID);
if (jv_get_kind(msg) == JV_KIND_STRING)
return jv_string_fmt("jq: error: %s", jv_string_value(msg));
return jq_format_error(jv_null()); // ENOMEM
}
// An invalid inside an invalid!
return jq_format_error(jv_invalid_get_msg(msg));
}
// XXX Refactor into a utility function that returns a jv and one that
// uses it and then prints that jv's string as the complete error
// message.
static void default_err_cb(void *data, jv msg) {
msg = jq_format_error(msg);
fprintf((FILE *)data, "%s\n", jv_string_value(msg));
jv_free(msg);
}
jq_state *jq_init(void) {
jq_state *jq;
jq = jv_mem_alloc_unguarded(sizeof(*jq));
if (jq == NULL)
return NULL;
jq->bc = 0;
jq->next_label = 0;
stack_init(&jq->stk);
jq->stk_top = 0;
jq->fork_top = 0;
jq->curr_frame = 0;
jq->error = jv_null();
jq->err_cb = default_err_cb;
jq->err_cb_data = stderr;
jq->attrs = jv_object();
jq->path = jv_null();
jq->value_at_path = jv_null();
return jq;
}
void jq_set_error_cb(jq_state *jq, jq_msg_cb cb, void *data) {
if (cb == NULL) {
jq->err_cb = default_err_cb;
jq->err_cb_data = stderr;
} else {
jq->err_cb = cb;
jq->err_cb_data = data;
}
}
void jq_get_error_cb(jq_state *jq, jq_msg_cb *cb, void **data) {
*cb = jq->err_cb;
*data = jq->err_cb_data;
}
void jq_set_nomem_handler(jq_state *jq, void (*nomem_handler)(void *), void *data) {
jv_nomem_handler(nomem_handler, data);
jq->nomem_handler = nomem_handler;
jq->nomem_handler_data = data;
}
void jq_start(jq_state *jq, jv input, int flags) {
jv_nomem_handler(jq->nomem_handler, jq->nomem_handler_data);
jq_reset(jq);
struct closure top = {jq->bc, -1};
struct frame* top_frame = frame_push(jq, top, 0, 0);
top_frame->retdata = 0;
top_frame->retaddr = 0;
stack_push(jq, input);
stack_save(jq, jq->bc->code, stack_get_pos(jq));
if (flags & JQ_DEBUG_TRACE) {
jq->debug_trace_enabled = 1;
} else {
jq->debug_trace_enabled = 0;
}
jq->initial_execution = 1;
}
void jq_teardown(jq_state **jq) {
jq_state *old_jq = *jq;
if (old_jq == NULL)
return;
*jq = NULL;
jq_reset(old_jq);
bytecode_free(old_jq->bc);
old_jq->bc = 0;
jv_free(old_jq->attrs);
jv_mem_free(old_jq);
}
static int ret_follows(uint16_t *pc) {
if (*pc == RET)
return 1;
if (*pc++ != JUMP)
return 0;
return ret_follows(pc + *pc + 1); // FIXME, might be ironic
}
/*
* Look for tail calls that can be optimized: tail calls with no
* references left to the current frame.
*
* We're staring at this bytecode layout:
*
* CALL_JQ
* <nclosures>
* <callee closure> (2 units)
* <nclosures closures> (2 units each)
* <next instruction>
*
* A closure is:
*
* <level> (a relative frame count chased via the current frame's env)
* <index> (an index of a subfunction or closure in that frame)
*
* We're looking for:
*
* a) the next instruction is a RET or a chain of unconditional JUMPs
* that ends in a RET, and
*
* b) none of the closures -callee included- have level == 0.
*/
static uint16_t tail_call_analyze(uint16_t *pc) {
assert(*pc == CALL_JQ);
pc++;
// + 1 for the callee closure
for (uint16_t nclosures = *pc++ + 1; nclosures > 0; pc++, nclosures--) {
if (*pc++ == 0)
return CALL_JQ;
}
if (ret_follows(pc))
return TAIL_CALL_JQ;
return CALL_JQ;
}
static struct bytecode *optimize_code(struct bytecode *bc) {
uint16_t *pc = bc->code;
// FIXME: Don't mutate bc->code...
while (pc < bc->code + bc->codelen) {
switch (*pc) {
case CALL_JQ:
*pc = tail_call_analyze(pc);
break;
// Other bytecode optimizations here. A peephole optimizer would
// fit right in.
default: break;
}
pc += bytecode_operation_length(pc);
}
return bc;
}
static struct bytecode *optimize(struct bytecode *bc) {
for (int i=0; i<bc->nsubfunctions; i++) {
bc->subfunctions[i] = optimize(bc->subfunctions[i]);
}
return optimize_code(bc);
}
int jq_compile_args(jq_state *jq, const char* str, jv args) {
jv_nomem_handler(jq->nomem_handler, jq->nomem_handler_data);
assert(jv_get_kind(args) == JV_KIND_ARRAY);
struct locfile* locations;
locations = locfile_init(jq, "<top-level>", str, strlen(str));
block program;
jq_reset(jq);
if (jq->bc) {
bytecode_free(jq->bc);
jq->bc = 0;
}
int nerrors = load_program(jq, locations, &program);
if (nerrors == 0) {
jv_array_foreach(args, i, arg) {
jv name = jv_object_get(jv_copy(arg), jv_string("name"));
jv value = jv_object_get(arg, jv_string("value"));
program = gen_var_binding(gen_const(value), jv_string_value(name), program);
jv_free(name);
}
nerrors = builtins_bind(jq, &program);
if (nerrors == 0) {
nerrors = block_compile(program, &jq->bc, locations);
}
}
if (nerrors)
jq_report_error(jq, jv_string_fmt("jq: %d compile %s", nerrors, nerrors > 1 ? "errors" : "error"));
if (jq->bc)
jq->bc = optimize(jq->bc);
jv_free(args);
locfile_free(locations);
return jq->bc != NULL;
}
int jq_compile(jq_state *jq, const char* str) {
return jq_compile_args(jq, str, jv_array());
}
jv jq_get_jq_origin(jq_state *jq) {
return jq_get_attr(jq, jv_string("JQ_ORIGIN"));
}
jv jq_get_prog_origin(jq_state *jq) {
return jq_get_attr(jq, jv_string("PROGRAM_ORIGIN"));
}
jv jq_get_lib_dirs(jq_state *jq) {
return jq_get_attr(jq, jv_string("JQ_LIBRARY_PATH"));
}
void jq_set_attrs(jq_state *jq, jv attrs) {
assert(jv_get_kind(attrs) == JV_KIND_OBJECT);
jv_free(jq->attrs);
jq->attrs = attrs;
}
void jq_set_attr(jq_state *jq, jv attr, jv val) {
jq->attrs = jv_object_set(jq->attrs, attr, val);
}
jv jq_get_attr(jq_state *jq, jv attr) {
return jv_object_get(jv_copy(jq->attrs), attr);
}
void jq_dump_disassembly(jq_state *jq, int indent) {
dump_disassembly(indent, jq->bc);
}
void jq_set_input_cb(jq_state *jq, jq_input_cb cb, void *data) {
jq->input_cb = cb;
jq->input_cb_data = data;
}
void jq_get_input_cb(jq_state *jq, jq_input_cb *cb, void **data) {
*cb = jq->input_cb;
*data = jq->input_cb_data;
}
void jq_set_debug_cb(jq_state *jq, jq_msg_cb cb, void *data) {
jq->debug_cb = cb;
jq->debug_cb_data = data;
}
void jq_get_debug_cb(jq_state *jq, jq_msg_cb *cb, void **data) {
*cb = jq->debug_cb;
*data = jq->debug_cb_data;
}