blob: 3aad4ab31313dbe401deaed3133d953b9f9a9d1e [file] [log] [blame]
#ifndef _GNU_SOURCE
#define _GNU_SOURCE // for strdup
#endif
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include "compile.h"
#include "bytecode.h"
#include "locfile.h"
#include "jv_alloc.h"
#include "linker.h"
/*
The intermediate representation for jq filters is as a sequence of
struct inst, which form a doubly-linked list via the next and prev
pointers.
A "block" represents a sequence of "struct inst", which may be
empty.
Blocks are generated by the parser bottom-up, so may have free
variables (refer to things not defined). See inst.bound_by and
inst.symbol.
*/
struct inst {
struct inst* next;
struct inst* prev;
opcode op;
struct {
uint16_t intval;
struct inst* target;
jv constant;
const struct cfunction* cfunc;
} imm;
struct locfile* locfile;
location source;
// Binding
// An instruction requiring binding (for parameters/variables/functions)
// is in one of three states:
// inst->bound_by = NULL - Unbound free variable
// inst->bound_by = inst - This instruction binds a variable
// inst->bound_by = other - Uses variable bound by other instruction
// Unbound instructions (references to other things that may or may not
// exist) are created by "gen_foo_unbound", and bindings are created by
// block_bind(definition, body), which binds all instructions in
// body which are unboudn and refer to "definition" by name.
struct inst* bound_by;
char* symbol;
int nformals;
int nactuals;
block subfn; // used by CLOSURE_CREATE (body of function)
block arglist; // used by CLOSURE_CREATE (formals) and CALL_JQ (arguments)
// This instruction is compiled as part of which function?
// (only used during block_compile)
struct bytecode* compiled;
int bytecode_pos; // position just after this insn
};
static inst* inst_new(opcode op) {
inst* i = jv_mem_alloc(sizeof(inst));
i->next = i->prev = 0;
i->op = op;
i->bytecode_pos = -1;
i->bound_by = 0;
i->symbol = 0;
i->nformals = -1;
i->nactuals = -1;
i->subfn = gen_noop();
i->arglist = gen_noop();
i->source = UNKNOWN_LOCATION;
i->locfile = 0;
return i;
}
static void inst_free(struct inst* i) {
jv_mem_free(i->symbol);
block_free(i->subfn);
block_free(i->arglist);
if (i->locfile)
locfile_free(i->locfile);
if (opcode_describe(i->op)->flags & OP_HAS_CONSTANT) {
jv_free(i->imm.constant);
}
jv_mem_free(i);
}
static block inst_block(inst* i) {
block b = {i,i};
return b;
}
int block_is_single(block b) {
return b.first && b.first == b.last;
}
static inst* block_take(block* b) {
if (b->first == 0) return 0;
inst* i = b->first;
if (i->next) {
i->next->prev = 0;
b->first = i->next;
i->next = 0;
} else {
b->first = 0;
b->last = 0;
}
return i;
}
block gen_location(location loc, struct locfile* l, block b) {
for (inst* i = b.first; i; i = i->next) {
if (i->source.start == UNKNOWN_LOCATION.start &&
i->source.end == UNKNOWN_LOCATION.end) {
i->source = loc;
i->locfile = locfile_retain(l);
}
}
return b;
}
block gen_noop() {
block b = {0,0};
return b;
}
int block_is_noop(block b) {
return (b.first == 0 && b.last == 0);
}
block gen_op_simple(opcode op) {
assert(opcode_describe(op)->length == 1);
return inst_block(inst_new(op));
}
block gen_const(jv constant) {
assert(opcode_describe(LOADK)->flags & OP_HAS_CONSTANT);
inst* i = inst_new(LOADK);
i->imm.constant = constant;
return inst_block(i);
}
block gen_const_global(jv constant, const char *name) {
assert((opcode_describe(STORE_GLOBAL)->flags & (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING)) ==
(OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING));
inst* i = inst_new(STORE_GLOBAL);
i->imm.constant = constant;
i->symbol = strdup(name);
return inst_block(i);
}
int block_is_const(block b) {
return (block_is_single(b) && b.first->op == LOADK);
}
jv_kind block_const_kind(block b) {
assert(block_is_const(b));
return jv_get_kind(b.first->imm.constant);
}
jv block_const(block b) {
assert(block_is_const(b));
return jv_copy(b.first->imm.constant);
}
block gen_op_target(opcode op, block target) {
assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
assert(target.last);
inst* i = inst_new(op);
i->imm.target = target.last;
return inst_block(i);
}
block gen_op_targetlater(opcode op) {
assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
inst* i = inst_new(op);
i->imm.target = 0;
return inst_block(i);
}
void inst_set_target(block b, block target) {
assert(block_is_single(b));
assert(opcode_describe(b.first->op)->flags & OP_HAS_BRANCH);
assert(target.last);
b.first->imm.target = target.last;
}
block gen_op_unbound(opcode op, const char* name) {
assert(opcode_describe(op)->flags & OP_HAS_BINDING);
inst* i = inst_new(op);
i->symbol = strdup(name);
return inst_block(i);
}
block gen_op_var_fresh(opcode op, const char* name) {
assert(opcode_describe(op)->flags & OP_HAS_VARIABLE);
return block_bind(gen_op_unbound(op, name),
gen_noop(), OP_HAS_VARIABLE);
}
block gen_op_bound(opcode op, block binder) {
assert(block_is_single(binder));
block b = gen_op_unbound(op, binder.first->symbol);
b.first->bound_by = binder.first;
return b;
}
static void inst_join(inst* a, inst* b) {
assert(a && b);
assert(!a->next);
assert(!b->prev);
a->next = b;
b->prev = a;
}
void block_append(block* b, block b2) {
if (b2.first) {
if (b->last) {
inst_join(b->last, b2.first);
} else {
b->first = b2.first;
}
b->last = b2.last;
}
}
block block_join(block a, block b) {
block c = a;
block_append(&c, b);
return c;
}
int block_has_only_binders_and_imports(block binders, int bindflags) {
bindflags |= OP_HAS_BINDING;
for (inst* curr = binders.first; curr; curr = curr->next) {
if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != DEPS && curr->op != MODULEMETA) {
return 0;
}
}
return 1;
}
static int inst_is_binder(inst *i, int bindflags) {
return !((opcode_describe(i->op)->flags & bindflags) != bindflags && i->op != MODULEMETA);
}
int block_has_only_binders(block binders, int bindflags) {
bindflags |= OP_HAS_BINDING;
bindflags &= ~OP_BIND_WILDCARD;
for (inst* curr = binders.first; curr; curr = curr->next) {
if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != MODULEMETA) {
return 0;
}
}
return 1;
}
// Count a binder's (function) formal params
static int block_count_formals(block b) {
int args = 0;
if (b.first->op == CLOSURE_CREATE_C)
return b.first->imm.cfunc->nargs - 1;
for (inst* i = b.first->arglist.first; i; i = i->next) {
assert(i->op == CLOSURE_PARAM);
args++;
}
return args;
}
// Count a call site's actual params
static int block_count_actuals(block b) {
int args = 0;
for (inst* i = b.first; i; i = i->next) {
switch (i->op) {
default: assert(0 && "Unknown function type"); break;
case CLOSURE_CREATE:
case CLOSURE_PARAM:
case CLOSURE_CREATE_C:
args++;
break;
}
}
return args;
}
static int block_count_refs(block binder, block body) {
int nrefs = 0;
for (inst* i = body.first; i; i = i->next) {
if (i != binder.first && i->bound_by == binder.first) {
nrefs++;
}
// counting recurses into closures
nrefs += block_count_refs(binder, i->subfn);
// counting recurses into argument list
nrefs += block_count_refs(binder, i->arglist);
}
return nrefs;
}
static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) {
assert(block_is_single(binder));
assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD));
assert(binder.first->symbol);
assert(binder.first->bound_by == 0 || binder.first->bound_by == binder.first);
assert(break_distance >= 0);
binder.first->bound_by = binder.first;
if (binder.first->nformals == -1)
binder.first->nformals = block_count_formals(binder);
int nrefs = 0;
for (inst* i = body.first; i; i = i->next) {
int flags = opcode_describe(i->op)->flags;
if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by == 0 &&
(!strcmp(i->symbol, binder.first->symbol) ||
// Check for break/break2/break3; see parser.y
((bindflags & OP_BIND_WILDCARD) && i->symbol[0] == '*' &&
break_distance <= 3 && (i->symbol[1] == '1' + break_distance) &&
i->symbol[2] == '\0'))) {
// bind this instruction
if (i->op == CALL_JQ && i->nactuals == -1)
i->nactuals = block_count_actuals(i->arglist);
if (i->nactuals == -1 || i->nactuals == binder.first->nformals) {
i->bound_by = binder.first;
nrefs++;
}
} else if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by != 0 &&
!strncmp(binder.first->symbol, "*anonlabel", sizeof("*anonlabel") - 1) &&
!strncmp(i->symbol, "*anonlabel", sizeof("*anonlabel") - 1)) {
// Increment the break distance required for this binder to match
// a break whenever we come across a STOREV of *anonlabel...
break_distance++;
}
// binding recurses into closures
nrefs += block_bind_subblock(binder, i->subfn, bindflags, break_distance);
// binding recurses into argument list
nrefs += block_bind_subblock(binder, i->arglist, bindflags, break_distance);
}
return nrefs;
}
static int block_bind_each(block binder, block body, int bindflags) {
assert(block_has_only_binders(binder, bindflags));
bindflags |= OP_HAS_BINDING;
int nrefs = 0;
for (inst* curr = binder.first; curr; curr = curr->next) {
nrefs += block_bind_subblock(inst_block(curr), body, bindflags, 0);
}
return nrefs;
}
block block_bind(block binder, block body, int bindflags) {
block_bind_each(binder, body, bindflags);
return block_join(binder, body);
}
block block_bind_library(block binder, block body, int bindflags, const char* libname) {
bindflags |= OP_HAS_BINDING;
int nrefs = 0;
int matchlen = strlen(libname);
char* matchname = calloc(1,matchlen+2+1);
if (libname[0] != '\0') {
strcpy(matchname,libname);
strcpy(matchname+matchlen,"::");
matchlen += 2;
}
assert(block_has_only_binders(binder, bindflags));
for (inst *curr = binder.first; curr; curr = curr->next) {
int bindflags2 = bindflags;
char* cname = curr->symbol;
char* tname = malloc(strlen(curr->symbol)+matchlen+1);
strcpy(tname, matchname);
strcpy(tname+matchlen,cname);
// Ew
if ((opcode_describe(curr->op)->flags & (OP_HAS_VARIABLE | OP_HAS_CONSTANT)))
bindflags2 = OP_HAS_VARIABLE | OP_HAS_BINDING;
// This mutation is ugly, even if we undo it
curr->symbol = tname;
nrefs += block_bind_subblock(inst_block(curr), body, bindflags2, 0);
curr->symbol = cname;
free(tname);
}
free(matchname);
return body; // We don't return a join because we don't want those sticking around...
}
// Bind binder to body and throw away any defs in binder not referenced
// (directly or indirectly) from body.
block block_bind_referenced(block binder, block body, int bindflags) {
assert(block_has_only_binders(binder, bindflags));
bindflags |= OP_HAS_BINDING;
block refd = gen_noop();
block unrefd = gen_noop();
int nrefs;
for (int last_kept = 0, kept = 0; ; ) {
for (inst* curr; (curr = block_take(&binder));) {
block b = inst_block(curr);
nrefs = block_bind_each(b, body, bindflags);
// Check if this binder is referenced from any of the ones we
// already know are referenced by body.
nrefs += block_count_refs(b, refd);
nrefs += block_count_refs(b, body);
if (nrefs) {
refd = BLOCK(refd, b);
kept++;
} else {
unrefd = BLOCK(unrefd, b);
}
}
if (kept == last_kept)
break;
last_kept = kept;
binder = unrefd;
unrefd = gen_noop();
}
block_free(unrefd);
return block_join(refd, body);
}
block block_drop_unreferenced(block body) {
inst* curr;
block refd = gen_noop();
block unrefd = gen_noop();
int drop;
do {
drop = 0;
while ((curr = block_take(&body)) && curr->op != TOP) {
block b = inst_block(curr);
if (block_count_refs(b,refd) + block_count_refs(b,body) == 0) {
unrefd = BLOCK(unrefd, b);
drop++;
} else {
refd = BLOCK(refd, b);
}
}
if (curr && curr->op == TOP) {
body = BLOCK(inst_block(curr),body);
}
body = BLOCK(refd, body);
refd = gen_noop();
} while (drop != 0);
block_free(unrefd);
return body;
}
jv block_take_imports(block* body) {
jv imports = jv_array();
inst* top = NULL;
if (body->first && body->first->op == TOP) {
top = block_take(body);
}
while (body->first && (body->first->op == MODULEMETA || body->first->op == DEPS)) {
inst* dep = block_take(body);
if (dep->op == DEPS) {
imports = jv_array_append(imports, jv_copy(dep->imm.constant));
}
inst_free(dep);
}
if (top) {
*body = block_join(inst_block(top),*body);
}
return imports;
}
block gen_module(block metadata) {
inst* i = inst_new(MODULEMETA);
i->imm.constant = block_const(metadata);
if (jv_get_kind(i->imm.constant) != JV_KIND_OBJECT)
i->imm.constant = jv_object_set(jv_object(), jv_string("metadata"), i->imm.constant);
block_free(metadata);
return inst_block(i);
}
jv block_module_meta(block b) {
if (b.first != NULL && b.first->op == MODULEMETA)
return jv_copy(b.first->imm.constant);
return jv_null();
}
block gen_import(const char* name, block metadata, const char* as, int is_data) {
assert(metadata.first == NULL || block_is_const(metadata));
inst* i = inst_new(DEPS);
jv meta;
if (block_is_const(metadata))
meta = block_const(metadata);
else
meta = jv_object();
meta = jv_object_set(meta, jv_string("as"), jv_string(as));
meta = jv_object_set(meta, jv_string("is_data"), is_data ? jv_true() : jv_false());
meta = jv_object_set(meta, jv_string("relpath"), jv_string(name));
i->imm.constant = meta;
block_free(metadata);
return inst_block(i);
}
block gen_function(const char* name, block formals, block body) {
inst* i = inst_new(CLOSURE_CREATE);
for (inst* i = formals.last; i; i = i->prev) {
if (i->op == CLOSURE_PARAM_REGULAR) {
i->op = CLOSURE_PARAM;
body = gen_var_binding(gen_call(i->symbol, gen_noop()), i->symbol, body);
}
block_bind_subblock(inst_block(i), body, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
}
i->subfn = body;
i->symbol = strdup(name);
i->arglist = formals;
block b = inst_block(i);
block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
return b;
}
block gen_param_regular(const char* name) {
return gen_op_unbound(CLOSURE_PARAM_REGULAR, name);
}
block gen_param(const char* name) {
return gen_op_unbound(CLOSURE_PARAM, name);
}
block gen_lambda(block body) {
return gen_function("@lambda", gen_noop(), body);
}
block gen_call(const char* name, block args) {
block b = gen_op_unbound(CALL_JQ, name);
b.first->arglist = args;
return b;
}
block gen_subexp(block a) {
return BLOCK(gen_op_simple(SUBEXP_BEGIN), a, gen_op_simple(SUBEXP_END));
}
block gen_both(block a, block b) {
block jump = gen_op_targetlater(JUMP);
block fork = gen_op_target(FORK, jump);
block c = BLOCK(fork, a, jump, b);
inst_set_target(jump, c);
return c;
}
block gen_const_object(block expr) {
int is_const = 1;
jv o = jv_object();
jv k = jv_null();
jv v = jv_null();
for (inst *i = expr.first; i; i = i->next) {
if (i->op != SUBEXP_BEGIN ||
i->next == NULL ||
i->next->op != LOADK ||
i->next->next == NULL ||
i->next->next->op != SUBEXP_END) {
is_const = 0;
break;
}
k = jv_copy(i->next->imm.constant);
i = i->next->next->next;
if (i == NULL ||
i->op != SUBEXP_BEGIN ||
i->next == NULL ||
i->next->op != LOADK ||
i->next->next == NULL ||
i->next->next->op != SUBEXP_END) {
is_const = 0;
break;
}
v = jv_copy(i->next->imm.constant);
i = i->next->next->next;
if (i == NULL || i->op != INSERT) {
is_const = 0;
break;
}
o = jv_object_set(o, k, v);
k = jv_null();
v = jv_null();
}
if (!is_const) {
jv_free(o);
jv_free(k);
jv_free(v);
block b = {0,0};
return b;
}
block_free(expr);
return gen_const(o);
}
static block gen_const_array(block expr) {
/*
* An expr of all constant elements looks like this:
*
* 0009 FORK 0027
* 0011 FORK 0023
* 0013 FORK 0019
* 0015 LOADK 1
* 0017 JUMP 0021
* 0019 LOADK 2
* 0021 JUMP 0025
* 0023 LOADK 3
* 0025 JUMP 0029
* 0027 LOADK 4
*
* That's: N-1 commas for N elements, N LOADKs, and a JUMP between
* every LOADK. The sequence ends in a LOADK. Any deviation and it's
* not a list of constants.
*
* Here we check for this pattern almost exactly. We don't check that
* the targets of the FORK and JUMP instructions are in the right
* sequence.
*/
int all_const = 1;
int commas = 0;
int normal = 1;
jv a = jv_array();
for (inst *i = expr.first; i; i = i->next) {
if (i->op == FORK) {
commas++;
if (i->imm.target == NULL || i->imm.target->op != JUMP ||
jv_array_length(jv_copy(a)) > 0) {
normal = 0;
break;
}
} else if (all_const && i->op == LOADK) {
if (i->next != NULL && i->next->op != JUMP) {
normal = 0;
break;
}
a = jv_array_append(a, jv_copy(i->imm.constant));
} else if (i->op != JUMP || i->imm.target == NULL ||
i->imm.target->op != LOADK) {
all_const = 0;
}
}
if (all_const && normal &&
(expr.last == NULL || expr.last->op == LOADK) &&
jv_array_length(jv_copy(a)) == commas + 1) {
block_free(expr);
return gen_const(a);
}
jv_free(a);
block b = {0,0};
return b;
}
block gen_collect(block expr) {
block const_array = gen_const_array(expr);
if (const_array.first != NULL)
return const_array;
block array_var = gen_op_var_fresh(STOREV, "collect");
block c = BLOCK(gen_op_simple(DUP), gen_const(jv_array()), array_var);
block tail = BLOCK(gen_op_bound(APPEND, array_var),
gen_op_simple(BACKTRACK));
return BLOCK(c,
gen_op_target(FORK, tail),
expr,
tail,
gen_op_bound(LOADVN, array_var));
}
block gen_reduce(const char* varname, block source, block init, block body) {
block res_var = gen_op_var_fresh(STOREV, "reduce");
block loop = BLOCK(gen_op_simple(DUPN),
source,
block_bind(gen_op_unbound(STOREV, varname),
BLOCK(gen_op_bound(LOADVN, res_var),
body,
gen_op_bound(STOREV, res_var)),
OP_HAS_VARIABLE),
gen_op_simple(BACKTRACK));
return BLOCK(gen_op_simple(DUP),
init,
res_var,
gen_op_target(FORK, loop),
loop,
gen_op_bound(LOADVN, res_var));
}
block gen_foreach(const char* varname, block source, block init, block update, block extract) {
block output = gen_op_targetlater(JUMP);
block state_var = gen_op_var_fresh(STOREV, "foreach");
block loop = BLOCK(gen_op_simple(DUPN),
// get a value from the source expression:
source,
// bind the $varname to that value for all the code in
// this block_bind() to see:
block_bind(gen_op_unbound(STOREV, varname),
// load the loop state variable
BLOCK(gen_op_bound(LOADVN, state_var),
// generate updated state
update,
// save the updated state for value extraction
gen_op_simple(DUP),
// save new state
gen_op_bound(STOREV, state_var),
// extract an output...
extract,
// ...and output it by jumping
// past the BACKTRACK that comes
// right after the loop body,
// which in turn is there
// because...
//
// (Incidentally, extract can also
// backtrack, e.g., if it calls
// empty, in which case we don't
// get here.)
output),
OP_HAS_VARIABLE));
block foreach = BLOCK(gen_op_simple(DUP),
init,
state_var,
gen_op_target(FORK, loop),
loop,
// ...at this point `foreach`'s original input
// will be on top of the stack, and we don't
// want to output it, so we backtrack.
gen_op_simple(BACKTRACK));
inst_set_target(output, foreach); // make that JUMP go bast the BACKTRACK at the end of the loop
return foreach;
}
block gen_definedor(block a, block b) {
// var found := false
block found_var = gen_op_var_fresh(STOREV, "found");
block init = BLOCK(gen_op_simple(DUP), gen_const(jv_false()), found_var);
// if found, backtrack. Otherwise execute b
block backtrack = gen_op_simple(BACKTRACK);
block tail = BLOCK(gen_op_simple(DUP),
gen_op_bound(LOADV, found_var),
gen_op_target(JUMP_F, backtrack),
backtrack,
gen_op_simple(POP),
b);
// try again
block if_notfound = gen_op_simple(BACKTRACK);
// found := true, produce result
block if_found = BLOCK(gen_op_simple(DUP),
gen_const(jv_true()),
gen_op_bound(STOREV, found_var),
gen_op_target(JUMP, tail));
return BLOCK(init,
gen_op_target(FORK, if_notfound),
a,
gen_op_target(JUMP_F, if_found),
if_found,
if_notfound,
tail);
}
int block_has_main(block top) {
for (inst *c = top.first; c; c = c->next) {
if (c->op == TOP)
return 1;
}
return 0;
}
int block_is_funcdef(block b) {
if (b.first != NULL && b.first->op == CLOSURE_CREATE)
return 1;
return 0;
}
block gen_condbranch(block iftrue, block iffalse) {
iftrue = BLOCK(iftrue, gen_op_target(JUMP, iffalse));
return BLOCK(gen_op_target(JUMP_F, iftrue), iftrue, iffalse);
}
block gen_and(block a, block b) {
// a and b = if a then (if b then true else false) else false
return BLOCK(gen_op_simple(DUP), a,
gen_condbranch(BLOCK(gen_op_simple(POP),
b,
gen_condbranch(gen_const(jv_true()),
gen_const(jv_false()))),
BLOCK(gen_op_simple(POP), gen_const(jv_false()))));
}
block gen_or(block a, block b) {
// a or b = if a then true else (if b then true else false)
return BLOCK(gen_op_simple(DUP), a,
gen_condbranch(BLOCK(gen_op_simple(POP), gen_const(jv_true())),
BLOCK(gen_op_simple(POP),
b,
gen_condbranch(gen_const(jv_true()),
gen_const(jv_false())))));
}
block gen_var_binding(block var, const char* name, block body) {
// var bindings can be added after coding the program; leave the TOP first.
block top = gen_noop();
if (body.first && body.first->op == TOP)
top = inst_block(block_take(&body));
return BLOCK(top, gen_op_simple(DUP), var,
block_bind(gen_op_unbound(STOREV, name),
body, OP_HAS_VARIABLE));
}
// Like gen_var_binding(), but bind `break`'s wildcard unbound variable
static block gen_wildvar_binding(block var, const char* name, block body) {
return BLOCK(gen_op_simple(DUP), var,
block_bind(gen_op_unbound(STOREV, name),
body, OP_HAS_VARIABLE | OP_BIND_WILDCARD));
}
block gen_cond(block cond, block iftrue, block iffalse) {
return BLOCK(gen_op_simple(DUP), cond,
gen_condbranch(BLOCK(gen_op_simple(POP), iftrue),
BLOCK(gen_op_simple(POP), iffalse)));
}
block gen_try_handler(block handler) {
// Quite a pain just to hide jq's internal errors.
return gen_cond(// `if type=="object" and .__jq
gen_and(gen_call("_equal",
BLOCK(gen_lambda(gen_const(jv_string("object"))),
gen_lambda(gen_noop()))),
BLOCK(gen_subexp(gen_const(jv_string("__jq"))),
gen_noop(),
gen_op_simple(INDEX))),
// `then error`
gen_call("error", gen_noop()),
// `else HANDLER end`
handler);
}
block gen_try(block exp, block handler) {
/*
* Produce something like:
* FORK_OPT <address of handler>
* <exp>
* JUMP <end of handler>
* <handler>
*
* If this is not an internal try/catch, then catch and re-raise
* internal errors to prevent them from leaking.
*
* The handler will only execute if we backtrack to the FORK_OPT with
* an error (exception). If <exp> produces no value then FORK_OPT
* will backtrack (propagate the `empty`, as it were. If <exp>
* produces a value then we'll execute whatever bytecode follows this
* sequence.
*/
if (!handler.first && !handler.last)
// A hack to deal with `.` as the handler; we could use a real NOOP here
handler = BLOCK(gen_op_simple(DUP), gen_op_simple(POP), handler);
exp = BLOCK(exp, gen_op_target(JUMP, handler));
return BLOCK(gen_op_target(FORK_OPT, exp), exp, handler);
}
block gen_label(const char *label, block exp) {
block cond = gen_call("_equal",
BLOCK(gen_lambda(gen_noop()),
gen_lambda(gen_op_unbound(LOADV, label))));
return gen_wildvar_binding(gen_op_simple(GENLABEL), label,
BLOCK(gen_op_simple(POP),
// try exp catch if . == $label
// then empty
// else error end
//
// Can't use gen_binop(), as that's firmly
// stuck in parser.y as it refers to things
// like EQ.
gen_try(exp,
gen_cond(cond,
gen_op_simple(BACKTRACK),
gen_call("error", gen_noop())))));
}
block gen_cbinding(const struct cfunction* cfunctions, int ncfunctions, block code) {
for (int cfunc=0; cfunc<ncfunctions; cfunc++) {
inst* i = inst_new(CLOSURE_CREATE_C);
i->imm.cfunc = &cfunctions[cfunc];
i->symbol = strdup(i->imm.cfunc->name);
code = block_bind(inst_block(i), code, OP_IS_CALL_PSEUDO);
}
return code;
}
static uint16_t nesting_level(struct bytecode* bc, inst* target) {
uint16_t level = 0;
assert(bc && target->compiled);
while (bc && target->compiled != bc) {
level++;
bc = bc->parent;
}
assert(bc && bc == target->compiled);
return level;
}
static int count_cfunctions(block b) {
int n = 0;
for (inst* i = b.first; i; i = i->next) {
if (i->op == CLOSURE_CREATE_C) n++;
n += count_cfunctions(i->subfn);
}
return n;
}
// Expands call instructions into a calling sequence
static int expand_call_arglist(block* b) {
int errors = 0;
block ret = gen_noop();
for (inst* curr; (curr = block_take(b));) {
if (opcode_describe(curr->op)->flags & OP_HAS_BINDING) {
if (!curr->bound_by) {
if (curr->symbol[0] == '*' && curr->symbol[1] >= '1' && curr->symbol[1] <= '3' && curr->symbol[2] == '\0')
locfile_locate(curr->locfile, curr->source, "jq: error: break used outside labeled control structure");
else
locfile_locate(curr->locfile, curr->source, "jq: error: %s/%d is not defined", curr->symbol, block_count_actuals(curr->arglist));
errors++;
// don't process this instruction if it's not well-defined
ret = BLOCK(ret, inst_block(curr));
continue;
}
}
block prelude = gen_noop();
if (curr->op == CALL_JQ) {
int actual_args = 0, desired_args = 0;
// We expand the argument list as a series of instructions
switch (curr->bound_by->op) {
default: assert(0 && "Unknown function type"); break;
case CLOSURE_CREATE:
case CLOSURE_PARAM: {
block callargs = gen_noop();
for (inst* i; (i = block_take(&curr->arglist));) {
assert(opcode_describe(i->op)->flags & OP_IS_CALL_PSEUDO);
block b = inst_block(i);
switch (i->op) {
default: assert(0 && "Unknown type of parameter"); break;
case CLOSURE_REF:
block_append(&callargs, b);
break;
case CLOSURE_CREATE:
block_append(&prelude, b);
block_append(&callargs, gen_op_bound(CLOSURE_REF, b));
break;
}
actual_args++;
}
curr->imm.intval = actual_args;
curr->arglist = callargs;
if (curr->bound_by->op == CLOSURE_CREATE) {
for (inst* i = curr->bound_by->arglist.first; i; i = i->next) {
assert(i->op == CLOSURE_PARAM);
desired_args++;
}
}
break;
}
case CLOSURE_CREATE_C: {
for (inst* i; (i = block_take(&curr->arglist)); ) {
assert(i->op == CLOSURE_CREATE); // FIXME
block body = i->subfn;
i->subfn = gen_noop();
inst_free(i);
// arguments should be pushed in reverse order, prepend them to prelude
errors += expand_call_arglist(&body);
prelude = BLOCK(gen_subexp(body), prelude);
actual_args++;
}
assert(curr->op == CALL_JQ);
curr->op = CALL_BUILTIN;
curr->imm.intval = actual_args + 1 /* include the implicit input in arg count */;
assert(curr->bound_by->op == CLOSURE_CREATE_C);
desired_args = curr->bound_by->imm.cfunc->nargs - 1;
assert(!curr->arglist.first);
break;
}
}
assert(actual_args == desired_args); // because now handle this above
}
ret = BLOCK(ret, prelude, inst_block(curr));
}
*b = ret;
return errors;
}
static int compile(struct bytecode* bc, block b) {
int errors = 0;
int pos = 0;
int var_frame_idx = 0;
bc->nsubfunctions = 0;
errors += expand_call_arglist(&b);
b = BLOCK(b, gen_op_simple(RET));
jv localnames = jv_array();
for (inst* curr = b.first; curr; curr = curr->next) {
if (!curr->next) assert(curr == b.last);
int length = opcode_describe(curr->op)->length;
if (curr->op == CALL_JQ) {
for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
length += 2;
}
}
pos += length;
curr->bytecode_pos = pos;
curr->compiled = bc;
assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);
if ((opcode_describe(curr->op)->flags & OP_HAS_VARIABLE) &&
curr->bound_by == curr) {
curr->imm.intval = var_frame_idx++;
localnames = jv_array_append(localnames, jv_string(curr->symbol));
}
if (curr->op == CLOSURE_CREATE) {
assert(curr->bound_by == curr);
curr->imm.intval = bc->nsubfunctions++;
}
if (curr->op == CLOSURE_CREATE_C) {
assert(curr->bound_by == curr);
int idx = bc->globals->ncfunctions++;
bc->globals->cfunc_names = jv_array_append(bc->globals->cfunc_names,
jv_string(curr->symbol));
bc->globals->cfunctions[idx] = *curr->imm.cfunc;
curr->imm.intval = idx;
}
}
bc->debuginfo = jv_object_set(bc->debuginfo, jv_string("locals"), localnames);
if (bc->nsubfunctions) {
bc->subfunctions = jv_mem_alloc(sizeof(struct bytecode*) * bc->nsubfunctions);
for (inst* curr = b.first; curr; curr = curr->next) {
if (curr->op == CLOSURE_CREATE) {
struct bytecode* subfn = jv_mem_alloc(sizeof(struct bytecode));
bc->subfunctions[curr->imm.intval] = subfn;
subfn->globals = bc->globals;
subfn->parent = bc;
subfn->nclosures = 0;
subfn->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_string(curr->symbol));
jv params = jv_array();
for (inst* param = curr->arglist.first; param; param = param->next) {
assert(param->op == CLOSURE_PARAM);
assert(param->bound_by == param);
param->imm.intval = subfn->nclosures++;
param->compiled = subfn;
params = jv_array_append(params, jv_string(param->symbol));
}
subfn->debuginfo = jv_object_set(subfn->debuginfo, jv_string("params"), params);
errors += compile(subfn, curr->subfn);
curr->subfn = gen_noop();
}
}
} else {
bc->subfunctions = 0;
}
bc->codelen = pos;
uint16_t* code = jv_mem_alloc(sizeof(uint16_t) * bc->codelen);
bc->code = code;
pos = 0;
jv constant_pool = jv_array();
int maxvar = -1;
if (!errors) for (inst* curr = b.first; curr; curr = curr->next) {
const struct opcode_description* op = opcode_describe(curr->op);
if (op->length == 0)
continue;
code[pos++] = curr->op;
assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);
if (curr->op == CALL_BUILTIN) {
assert(curr->bound_by->op == CLOSURE_CREATE_C);
assert(!curr->arglist.first);
code[pos++] = (uint16_t)curr->imm.intval;
code[pos++] = curr->bound_by->imm.intval;
} else if (curr->op == CALL_JQ) {
assert(curr->bound_by->op == CLOSURE_CREATE ||
curr->bound_by->op == CLOSURE_PARAM);
code[pos++] = (uint16_t)curr->imm.intval;
code[pos++] = nesting_level(bc, curr->bound_by);
code[pos++] = curr->bound_by->imm.intval |
(curr->bound_by->op == CLOSURE_CREATE ? ARG_NEWCLOSURE : 0);
for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
assert(arg->op == CLOSURE_REF && arg->bound_by->op == CLOSURE_CREATE);
code[pos++] = nesting_level(bc, arg->bound_by);
code[pos++] = arg->bound_by->imm.intval | ARG_NEWCLOSURE;
}
} else if ((op->flags & OP_HAS_CONSTANT) && (op->flags & OP_HAS_VARIABLE)) {
// STORE_GLOBAL: constant global, basically
code[pos++] = jv_array_length(jv_copy(constant_pool));
constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant));
code[pos++] = nesting_level(bc, curr->bound_by);
uint16_t var = (uint16_t)curr->bound_by->imm.intval;
code[pos++] = var;
} else if (op->flags & OP_HAS_CONSTANT) {
code[pos++] = jv_array_length(jv_copy(constant_pool));
constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant));
} else if (op->flags & OP_HAS_VARIABLE) {
code[pos++] = nesting_level(bc, curr->bound_by);
uint16_t var = (uint16_t)curr->bound_by->imm.intval;
code[pos++] = var;
if (var > maxvar) maxvar = var;
} else if (op->flags & OP_HAS_BRANCH) {
assert(curr->imm.target->bytecode_pos != -1);
assert(curr->imm.target->bytecode_pos > pos); // only forward branches
code[pos] = curr->imm.target->bytecode_pos - (pos + 1);
pos++;
} else if (op->length > 1) {
assert(0 && "codegen not implemented for this operation");
}
}
bc->constants = constant_pool;
bc->nlocals = maxvar + 2; // FIXME: frames of size zero?
block_free(b);
return errors;
}
int block_compile(block b, struct bytecode** out) {
struct bytecode* bc = jv_mem_alloc(sizeof(struct bytecode));
bc->parent = 0;
bc->nclosures = 0;
bc->globals = jv_mem_alloc(sizeof(struct symbol_table));
int ncfunc = count_cfunctions(b);
bc->globals->ncfunctions = 0;
bc->globals->cfunctions = jv_mem_alloc(sizeof(struct cfunction) * ncfunc);
bc->globals->cfunc_names = jv_array();
bc->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_null());
int nerrors = compile(bc, b);
assert(bc->globals->ncfunctions == ncfunc);
if (nerrors > 0) {
bytecode_free(bc);
*out = 0;
} else {
*out = bc;
}
return nerrors;
}
void block_free(block b) {
struct inst* next;
for (struct inst* curr = b.first; curr; curr = next) {
next = curr->next;
inst_free(curr);
}
}