blob: d0143d5fe89206fad49870c87aeeb8a60c7366d3 [file] [log] [blame]
/*
Copyright 2002-2003 John Plevyak, All Rights Reserved
*/
#include "d.h"
static const char *mybasename(const char *str) {
const char *base = strrchr(str, '/');
return base ? base+1 : str;
}
typedef struct ScannerBlock {
int state_index;
int scanner_index;
int block_index;
ScanState **chars;
ScanStateTransition **transitions;
} ScannerBlock;
typedef Vec(ScannerBlock*) VecScannerBlock;
typedef Vec(State *) VecState;
static int
scanner_size(State *s) {
if (s->scanner.states.n < 255 && s->scanner.transitions.n < 255)
return 1;
if (s->scanner.states.n < 32384 && s->scanner.transitions.n < 32384)
return 2;
return 4;
}
static char *
make_type(int i) {
switch (i) {
case 1: return "unsigned char";
case 2: return "unsigned short";
case 4: return "unsigned int";
default: d_fail("bad case"); return "";
}
}
static char *
scanner_type(State *s) {
return make_type(scanner_size(s));
}
static char *
make_u_type(int i) {
switch (i) {
case 1: return "uint8";
case 2: return "uint16";
case 4: return "uint32";
default: d_fail("bad case"); return "";
}
}
static char *
scanner_u_type(State *s) {
return make_u_type(scanner_size(s));
}
static uint32
scanner_block_hash_fn(ScannerBlock *b, hash_fns_t *fns) {
uint32 hash = 0;
int i, block_size = (int)fns->data[0];
ScanState **sb = b->chars;
for (i = 0; i < block_size; i++) {
hash *= 17;
hash += sb[i] ? sb[i]->index + 2 : 1;
}
return hash;
}
static int
scanner_block_cmp_fn(ScannerBlock *a, ScannerBlock *b, hash_fns_t *fns) {
int i, block_size = (int)fns->data[0];
ScanState **sa = a->chars;
ScanState **sb = b->chars;
for (i = 0; i < block_size; i++) {
if (sa[i] == sb[i])
continue;
if (!sa[i] || !sb[i])
return 1;
if (sa[i]->index != sb[i]->index)
return 1;
}
return 0;
}
hash_fns_t
scanner_block_fns = {
(hash_fn_t)scanner_block_hash_fn,
(cmp_fn_t)scanner_block_cmp_fn,
{0, 0}
};
static uint32
trans_scanner_block_hash_fn(ScannerBlock *b, hash_fns_t *fns) {
uint32 hash = 0;
int i, block_size = (int)fns->data[0];
ScanStateTransition **sb = b->transitions;
for (i = 0; i < block_size; i++) {
hash *= 3;
hash += sb[i] ? sb[i]->index + 1 : 0;
}
return hash;
}
static int
trans_scanner_block_cmp_fn(ScannerBlock *a, ScannerBlock *b, hash_fns_t *fns) {
int i, block_size = (int)fns->data[0];
ScanStateTransition **sa = a->transitions;
ScanStateTransition **sb = b->transitions;
for (i = 0; i < block_size; i++) {
if (sa[i] == sb[i])
continue;
if (!sa[i] || !sb[i])
return 1;
if (sa[i]->index != sb[i]->index)
return 1;
}
return 0;
}
hash_fns_t
trans_scanner_block_fns = {
(hash_fn_t)trans_scanner_block_hash_fn,
(cmp_fn_t)trans_scanner_block_cmp_fn,
{0, 0}
};
static uint32
shift_hash_fn(Action *sa, hash_fns_t *fns) {
return sa->term->index;
}
static int
shift_cmp_fn(Action *sa, Action *sb, hash_fns_t *fns) {
return sa->term->index != sb->term->index;
}
hash_fns_t
shift_fns = {
(hash_fn_t)shift_hash_fn,
(cmp_fn_t)shift_cmp_fn,
{0, 0}
};
static void
write_scanner_data_as_C(FILE *fp, Grammar *g, char *tag) {
State *s;
ScannerBlock *vsblock, *xv, *yv;
VecScannerBlock scanner_block_hash[4], *pscanner_block_hash;
VecScannerBlock trans_scanner_block_hash[4], *ptrans_scanner_block_hash;
VecAction shift_hash;
int nvsblocks, ivsblock, i, j, k, x, xx;
Action *a;
VecScanState *ss;
char speculative_code[256];
Term *t;
/* shift_actions */
for (i = 0; i < g->terminals.n; i++) {
t = g->terminals.v[i];
if (t->regex_production && t->regex_production->rules.v[0]->speculative_code.code)
sprintf(speculative_code, "d_speculative_reduction_code_%d_%d_%s",
t->regex_production->index, t->regex_production->rules.v[0]->index, tag);
else
strcpy(speculative_code, "NULL");
fprintf(fp,
"D_Shift d_shift_%d_%s = { %d, %d, %d, %d, %d, %s };\n",
i, tag,
g->terminals.v[i]->index + g->productions.n,
g->terminals.v[i]->scan_kind,
g->terminals.v[i]->op_assoc,
g->terminals.v[i]->op_priority,
g->terminals.v[i]->term_priority,
speculative_code);
}
fprintf(fp,"\n");
/* scanners */
nvsblocks = 0;
for (i = 0; i < g->states.n; i++)
nvsblocks += g->states.v[i]->scanner.states.n * g->scanner_blocks;
vsblock = MALLOC((nvsblocks ? nvsblocks : 1) * sizeof(ScannerBlock));
for (i = 0; i < 4; i++) {
vec_clear(&scanner_block_hash[i]);
vec_clear(&trans_scanner_block_hash[i]);
}
scanner_block_fns.data[0] = (void*)g->scanner_block_size;
scanner_block_fns.data[1] = (void*)g;
trans_scanner_block_fns.data[0] = (void*)g->scanner_block_size;
trans_scanner_block_fns.data[1] = (void*)g;
/* shift */
vec_clear(&shift_hash);
ivsblock = 0;
for (i = 0; i < g->states.n; i++) {
s = g->states.v[i];
ss = &s->scanner.states;
/* build shifts */
if (s->shift_actions.n && !s->same_shifts) {
fprintf(fp, "D_Shift *d_shifts_%d_%s[] = {\n", i, tag);
for (j = 0; j < s->shift_actions.n; j++) {
a = s->shift_actions.v[j];
fprintf(fp, "&d_shift_%d_%s%s", a->term->index, tag,
j == s->shift_actions.n - 1 ? ", NULL" : ", ");
}
fprintf(fp, "};\n\n");
}
/* build accepts differences */
for (j = 0; j < s->scanner.transitions.n; j++) {
VecAction *va = &s->scanner.transitions.v[j]->accepts_diff;
fprintf(fp, "D_Shift *d_accepts_diff_%d_%d_%s[] = {",
i, j, tag);
for (k = 0; k < va->n; k++)
fprintf(fp, "&d_shift_%d_%s,", va->v[k]->term->index, tag);
fprintf(fp, "0};\n");
}
if (s->scanner.transitions.n) {
fprintf(fp, "D_Shift **d_accepts_diff_%d_%s[] = {\n", i, tag);
for (j = 0; j < s->scanner.transitions.n; j++) {
fprintf(fp, "d_accepts_diff_%d_%d_%s%s",
i, j, tag, j == s->scanner.transitions.n - 1 ? "\n" : ",\n");
}
fprintf(fp, "};\n\n");
}
/* build scanner_block_hash */
pscanner_block_hash = &scanner_block_hash[scanner_size(s)-1];
ptrans_scanner_block_hash = &trans_scanner_block_hash[scanner_size(s)-1];
for (j = 0; j < ss->n; j++) {
if (!s->same_shifts) {
for (k = 0; k < g->scanner_blocks; k++) {
vsblock[ivsblock].state_index = s->index;
vsblock[ivsblock].scanner_index = j;
vsblock[ivsblock].block_index = k;
vsblock[ivsblock].chars =
(void*)&ss->v[j]->chars[k * g->scanner_block_size];
vsblock[ivsblock].transitions =
(void*)&ss->v[j]->transition[k * g->scanner_block_size];
xv = &vsblock[ivsblock];
ivsblock++;
assert(ivsblock <= nvsblocks);
/* output state scanner blocks */
yv = set_add_fn(pscanner_block_hash, xv, &scanner_block_fns);
if (xv == yv) {
fprintf(fp,
"%s d_scanner_%d_%d_%d_%s[SCANNER_BLOCK_SIZE] = {\n",
scanner_type(s), i, j, k, tag);
for (x = 0; x < g->scanner_block_size; x++) {
xx = x + k * g->scanner_block_size;
fprintf(fp, "%d", ss->v[j]->chars[xx] ?
ss->v[j]->chars[xx]->index + 1 : 0);
if (x != g->scanner_block_size) fprintf(fp, ",");
if (x % 16 == 15) fprintf(fp, "\n");
}
fprintf(fp, "};\n\n");
}
if (s->scan_kind != D_SCAN_LONGEST) {
/* output accept_diff scanner blocks */
yv = set_add_fn(ptrans_scanner_block_hash, xv,
&trans_scanner_block_fns);
if (xv == yv) {
fprintf(fp,
"%s d_accepts_diff_%d_%d_%d_%s[SCANNER_BLOCK_SIZE] = {\n",
scanner_type(s), i, j, k, tag);
for (x = 0; x < g->scanner_block_size; x++) {
xx = x + k * g->scanner_block_size;
fprintf(fp, "%d", ss->v[j]->transition[xx]->index);
if (x != g->scanner_block_size) fprintf(fp, ",");
if (x % 16 == 15) fprintf(fp, "\n");
}
fprintf(fp, "};\n\n");
}
}
}
/* output shifts */
if (ss->v[j]->accepts.n) {
for (k = 0; k < ss->v[j]->accepts.n; k++) {
Action *a = ss->v[j]->accepts.v[k], *aa;
char tmp[256];
sprintf(tmp, "d_shift_%d_%d_%s", i, j, tag);
if (ss->v[j]->accepts.n == 1) {
a->temp_string = strdup(tmp);
aa = set_add_fn(&shift_hash, a, &shift_fns);
if (aa != a)
continue;
}
/* output shifts */
if (!k)
fprintf(fp, "D_Shift *%s[] = { ", tmp);
fprintf(fp, "&d_shift_%d_%s%s",
a->term->index, tag, k == ss->v[j]->accepts.n - 1 ?
", NULL};\n\n" : ", ");
}
}
}
}
}
for (i = 0; i < g->states.n; i++) {
s = g->states.v[i];
ss = &s->scanner.states;
ivsblock = 0;
if (ss->n && !s->same_shifts) {
/* output scanner state transition tables */
fprintf(fp, "SB_%s d_scanner_%d_%s[%d] = {\n",
scanner_u_type(s), i, tag, ss->n);
pscanner_block_hash = &scanner_block_hash[scanner_size(s)-1];
for (j = 0; j < ss->n; j++) {
Action *a;
if (ss->v[j]->accepts.n) {
a = ss->v[j]->accepts.v[0];
if (ss->v[j]->accepts.n == 1) {
a = set_add_fn(&shift_hash, a, &shift_fns);
fprintf(fp, "{ %s, {", a->temp_string);
} else
fprintf(fp, "{ d_shift_%d_%d_%s, {", i, j, tag);
} else
fprintf(fp, "{ NULL, {");
for (k = 0; k < g->scanner_blocks; k++) {
ScannerBlock vs;
vs.state_index = s->index;
vs.scanner_index = j;
vs.block_index = k;
vs.chars = (void*)&ss->v[j]->chars[k * g->scanner_block_size];
vs.transitions =
(void*)&ss->v[j]->transition[k * g->scanner_block_size];
xv = &vs;
yv = set_add_fn(pscanner_block_hash, xv, &scanner_block_fns);
assert(yv != xv);
fprintf(fp, "d_scanner_%d_%d_%d_%s",
yv->state_index, yv->scanner_index,
yv->block_index, tag);
if (k != g->scanner_blocks-1) {
fprintf(fp, ", ");
if ((k % 2) == 1) fprintf(fp,"\n ");
}
}
if (j != ss->n-1)
fprintf(fp, "}},\n");
else
fprintf(fp, "}}\n");
}
fprintf(fp, "};\n\n");
if (s->scan_kind != D_SCAN_LONGEST) {
/* output scanner accepts diffs tables */
fprintf(fp, "SB_trans_%s d_transition_%d_%s[%d] = {\n",
scanner_u_type(s), i, tag, ss->n);
ptrans_scanner_block_hash =
&trans_scanner_block_hash[scanner_size(s)-1];
for (j = 0; j < ss->n; j++) {
fprintf(fp, "{{ ");
for (k = 0; k < g->scanner_blocks; k++) {
ScannerBlock vs;
vs.state_index = s->index;
vs.scanner_index = j;
vs.block_index = k;
vs.chars = (void*)&ss->v[j]->chars[k * g->scanner_block_size];
vs.transitions =
(void*)&ss->v[j]->transition[k * g->scanner_block_size];
xv = &vs;
yv = set_add_fn(ptrans_scanner_block_hash, xv,
&trans_scanner_block_fns);
assert(yv != xv);
fprintf(fp, "d_accepts_diff_%d_%d_%d_%s",
yv->state_index, yv->scanner_index,
yv->block_index, tag);
if (k != g->scanner_blocks-1) {
fprintf(fp, ", ");
if ((k % 2) == 1) fprintf(fp,"\n ");
}
}
if (j != ss->n-1)
fprintf(fp, "}},\n");
else
fprintf(fp, "}}\n");
}
fprintf(fp, "};\n\n");
}
}
}
for (i = 0; i < 4; i++)
vec_free(&scanner_block_hash[i]);
FREE(vsblock);
}
#define reduction_index(_r) \
((_r)->same_reduction ? (_r)->same_reduction->index : (_r)->index)
static void
write_goto_data_as_C(FILE *fp, Grammar *g, char *tag) {
Vec(long) vgoto;
State *s;
uint8 *goto_valid = NULL;
int i, j, x, again, lowest, nvalid_bytes, sym, lowest_sym;
nvalid_bytes = ((g->productions.n + g->terminals.n) + 7) / 8;
goto_valid = MALLOC(nvalid_bytes);
vec_clear(&vgoto);
for (i = 0; i < g->states.n; i++) {
s = g->states.v[i];
if (s->gotos.n) {
/* check for goto on token */
for (j = 0; j < s->gotos.n; j++)
if (s->gotos.v[j]->elem->kind == ELEM_TERM &&
s->gotos.v[j]->elem->e.term->kind == TERM_TOKEN)
s->goto_on_token = 1;
/* find lowest goto, set valid bits */
memset(goto_valid, 0, nvalid_bytes);
lowest = 0;
lowest_sym = elem_symbol(g, s->gotos.v[0]->elem);
SET_BIT(goto_valid, lowest_sym);
for (j = 1; j < s->gotos.n; j++) {
sym = elem_symbol(g, s->gotos.v[j]->elem);
SET_BIT(goto_valid, sym);
if (sym < lowest_sym) {
lowest = j;
lowest_sym = sym;
}
}
/* insert into vgoto */
again = 1;
while (again) {
again = 0;
for (j = 0; j < s->gotos.n; j++) {
x = elem_symbol(g, s->gotos.v[j]->elem);
x -= lowest_sym;
while (vgoto.n <= x)
vec_add(&vgoto, 0);
if (vgoto.v[x]) {
again = 1;
/* undo the damage */
for (--j;j >= 0;j--) {
x = elem_symbol(g, s->gotos.v[j]->elem);
x -= lowest_sym;
vgoto.v[x] = 0;
}
lowest_sym--;
break;
} else
vgoto.v[x] = s->gotos.v[j]->state->index + 1;
}
}
s->goto_table_offset = lowest_sym;
/* valid bits */
fprintf(fp, "unsigned char d_goto_valid_%d_%s[] = {\n", i, tag);
for (j = 0; j < nvalid_bytes; j++)
fprintf(fp, "0x%x%s", goto_valid[j],
j == nvalid_bytes - 1 ? "" : ", ");
fprintf(fp, "};\n");
} else
s->goto_table_offset = -INT_MAX;
/* reduce_actions */
if (s->reduce_actions.n) {
fprintf(fp, "D_Reduction *d_reductions_%d_%s[] = {", i, tag);
for (j = 0; j < s->reduce_actions.n; j++)
fprintf(fp, "&d_reduction_%d_%s%s",
reduction_index(s->reduce_actions.v[j]->rule), tag,
j == s->reduce_actions.n - 1 ? "" : ", ");
fprintf(fp, "};\n");
}
/* modified_reduce_actions */
if (s->right_epsilon_hints.n) {
fprintf(fp, "D_RightEpsilonHint d_right_epsilon_hints_%d_%s[] = {",
i, tag);
for (j = 0; j < s->right_epsilon_hints.n; j++) {
fprintf(fp, "{ %d, %d, &d_reduction_%d_%s}%s",
s->right_epsilon_hints.v[j]->depth,
s->right_epsilon_hints.v[j]->state->index,
reduction_index(s->right_epsilon_hints.v[j]->rule), tag,
j == s->right_epsilon_hints.n - 1 ? "" : ", ");
}
fprintf(fp, "};\n");
}
}
/* gotos */
if (vgoto.n) {
fprintf(fp, "unsigned short d_gotos_%s[%d] = {\n", tag, vgoto.n);
for (j = 0; j < vgoto.n; j++) {
fprintf(fp, "%d", vgoto.v[j]);
if (j != vgoto.n - 1) fprintf(fp, ",");
if (j % 16 == 15) fprintf(fp, "\n");
}
fprintf(fp, "};\n\n");
} else
fprintf(fp, "unsigned short d_gotos_%s[1] = {0};\n", tag);
}
static void
write_scanner_code_as_C(FILE *fp, Grammar *g, char *tag) {
int i, j, l;
Action *a;
State *s;
for (i = 0; i < g->states.n; i++) {
s = g->states.v[i];
for (j = 0; j < s->shift_actions.n; j++) {
a = s->shift_actions.v[j];
if (a->kind == ACTION_SHIFT && a->term->kind == TERM_CODE) {
if (!s->scanner_code) {
s->scanner_code = 1;
fprintf(fp, "int d_scan_code_%d_%s(char **as, int *col, int *line,"
"unsigned short *symbol, int *term_priority,"
"unsigned char *op_assoc, int *op_priority) {\n"
" int res;\n",
i, tag);
}
fprintf(fp, " if ((res = ");
l = strlen(a->term->string);
if (a->term->string[l - 1] == ')') {
fwrite(a->term->string, l - 1, 1, fp);
fprintf(fp, ", ");
} else
fprintf(fp, "%s(", a->term->string);
fprintf(fp, "as, col, line, op_assoc, op_priority))) {\n"
" *symbol = %d;\n"
" *term_priority = %d;\n"
" return res;\n"
" }\n",
a->term->index + g->productions.n,
a->term->term_priority);
}
}
if (s->scanner_code)
fprintf(fp, " return 0;\n}\n\n");
}
}
static int
find_symbol(Grammar *g, char *s, char *e, int kind) {
while (*s && isspace(*s)) s++;
if (e > s) {
if (kind == D_SYMBOL_NTERM) {
Production *p;
if ((p = lookup_production(g, s, e-s)))
return p->index;
} else if (kind == D_SYMBOL_STRING) {
int i;
int found = -1;
for (i = 0; i < g->terminals.n;i++)
if (g->terminals.v[i]->kind == TERM_STRING &&
g->terminals.v[i]->string_len == (e-s) &&
!strncmp(s, g->terminals.v[i]->string, e-s)) {
if (found > 0) {
d_fail("attempt to find symbol for non-unique string '%s'\n",
g->terminals.v[i]->string);
} else
found = i;
}
if (found > 0)
return found + g->productions.n;
}
}
return -1;
}
static void
write_code_as_C(FILE *fp, Grammar *g, Rule *r, char *code,
int line, char *pathname)
{
char *c;
fprintf(fp, "{\n");
if (g->write_line_directives)
fprintf(fp, "#line %d \"%s\"\n", line, mybasename(pathname));
c = code;
while (*c) {
if (*c == '$') {
c++;
if (*c == '#') {
fprintf(fp, "(_n_children)");
c++;
} else if (*c == 'g') {
fprintf(fp, "(D_PN(_ps, _offset)->globals)");
c++;
} else if (*c == 'n') {
++c;
if (isdigit(*c)) {
int n = atoi(c);
fprintf(fp, "(*(D_PN(_children[%d], _offset)))", n);
if (n > r->elems.n-1)
d_fail("$nXXXX greater than number of children at line %d", line);
while (isdigit(*c)) c++;
} else
fprintf(fp, "(*(D_PN(_ps, _offset)))");
} else if (*c == '$') {
fprintf(fp, "(D_PN(_ps, _offset)->user)");
c++;
} else if (isdigit(*c)) {
int n = atoi(c);
fprintf(fp, "(D_PN(_children[%d], _offset)->user)", n);
while (isdigit(*c)) c++;
} else if (*c == '{') {
char *e = ++c, *a;
while (*e && *e != '}' && !isspace(*e)) e++;
a = e;
if (isspace(*a)) a++;
while (*a && *a != '}') a++;
if (STREQ(c, e-c, "reject")) {
fprintf(fp, " return -1 ");
} else if (STREQ(c, e-c, "free_below")) {
fprintf(fp, " free_D_ParseTreeBelow(_parser, (D_PN(_ps, _offset)))");
} else if (STREQ(c, e-c, "scope")) {
fprintf(fp, "(D_PN(_ps, _offset)->scope)");
} else if (STREQ(c, e-c, "parser")) {
fprintf(fp, "_parser");
} else if (STREQ(c, e-c, "nterm")) {
fprintf(fp, "%d", find_symbol(g, e, a, D_SYMBOL_NTERM));
} else if (STREQ(c, e-c, "string")) {
fprintf(fp, "%d", find_symbol(g, e, a, D_SYMBOL_STRING));
} else if (STREQ(c, e-c, "pass")) {
D_Pass *p = find_pass(g, e, a);
if (!p)
d_fail("unknown pass '%s' line %d", dup_str(e, a), line);
fprintf(fp, "%d", p->index);
} else
d_fail("bad $ escape in code line %u\n", line);
c = a + 1;
} else
d_fail("bad $ escape in code line %u\n", line);
} else {
fputc(*c, fp);
c++;
}
}
fprintf(fp, " return 0;");
fprintf(fp, "}\n\n");
}
static void
write_global_code_as_C(FILE *fp, Grammar *g, char *tag) {
int i;
char *c;
for (i = 0; i < g->ncode; i++) {
if (g->write_line_directives)
fprintf(fp, "#line %d \"%s\"\n", g->code[i].line, mybasename(g->pathname));
c = g->code[i].code;
while (*c) {
if (*c == '$') {
c++;
if (*c == '{') {
char *e = ++c, *a;
while (*e && *e != '}' && !isspace(*e)) ++e;
a = e;
if (isspace(*a)) ++a;
while (*a && *a != '}') a++;
if (STREQ(c, e-c, "nterm")) {
fprintf(fp, "%d", find_symbol(g, e, a, D_SYMBOL_NTERM));
} else if (STREQ(c, e-c, "string")) {
fprintf(fp, "%d", find_symbol(g, e, a, D_SYMBOL_STRING));
} else
d_fail("bad $ escape in code line %u\n", g->code[i].line + i);
c = a + 1;
}
else
d_fail("bad $ escape in code line %u\n", g->code[i].line + i);
} else {
fputc(*c, fp);
c++;
}
}
fprintf(fp, "\n");
}
}
static char * reduction_args = "(void *_ps, void **_children, int _n_children, int _offset, D_Parser *_parser)";
static void
write_reductions_as_C(FILE *fp, Grammar *g, char *tag) {
int i, j, k, l, pmax;
Production *p, *pdefault;
Rule *r, *rdefault = NULL;
char final_code[256], speculative_code[256], pass_code[256];
pdefault = lookup_production(g, "_", 1);
if (pdefault) {
rdefault = pdefault->rules.v[0];
fprintf(fp, "int d_speculative_reduction_code_%d_%d_%s%s;\n",
rdefault->prod->index, rdefault->index, tag, reduction_args);
fprintf(fp, "int d_final_reduction_code_%d_%d_%s%s;\n",
rdefault->prod->index, rdefault->index, tag, reduction_args);
fprintf(fp, "extern D_ReductionCode d_pass_code_%d_%d_%s[];\n",
rdefault->prod->index, rdefault->index, tag);
for (i = 0; i < rdefault->pass_code.n; i++)
fprintf(fp, "int d_pass_code_%d_%d_%d_%s%s;\n",
i, rdefault->prod->index, rdefault->index, tag, reduction_args);
}
for (i = 0; i < g->productions.n; i++) {
p = g->productions.v[i];
for (j = p->rules.n - 1; j >= 0; j--) {
r = p->rules.v[j];
for (k = 0; k < j; k++)
if (r->elems.n == p->rules.v[k]->elems.n &&
r->speculative_code.code == p->rules.v[k]->speculative_code.code &&
r->final_code.code == p->rules.v[k]->final_code.code &&
r->op_priority == p->rules.v[k]->op_priority &&
r->op_assoc == p->rules.v[k]->op_assoc &&
r->rule_priority == p->rules.v[k]->rule_priority &&
r->rule_assoc == p->rules.v[k]->rule_assoc &&
r->action_index == p->rules.v[k]->action_index)
{
if (r->pass_code.n != p->rules.v[k]->pass_code.n)
continue;
for (l = 0; l < r->pass_code.n; l++) {
if (!r->pass_code.v[l] && !p->rules.v[k]->pass_code.v[l])
continue;
if (!r->pass_code.v[l] || !p->rules.v[k]->pass_code.v[l])
goto Lcontinue;
if (r->pass_code.v[l]->code != p->rules.v[k]->pass_code.v[l]->code)
goto Lcontinue;
}
r->same_reduction = p->rules.v[k];
break;
Lcontinue:;
}
}
for (j = 0; j < p->rules.n; j++) {
r = p->rules.v[j];
if (r->same_reduction)
continue;
if (r->speculative_code.code) {
fprintf(fp, "int d_speculative_reduction_code_%d_%d_%s%s ",
r->prod->index, r->index, tag, reduction_args);
write_code_as_C(fp, g, r, r->speculative_code.code, r->speculative_code.line, g->pathname);
}
if (r->final_code.code) {
fprintf(fp, "int d_final_reduction_code_%d_%d_%s%s ",
r->prod->index, r->index, tag, reduction_args);
write_code_as_C(fp, g, r, r->final_code.code, r->final_code.line, g->pathname);
}
for (k = 0; k < r->pass_code.n; k++) {
if (r->pass_code.v[k]) {
fprintf(fp, "int d_pass_code_%d_%d_%d_%s%s ",
k, r->prod->index, r->index, tag, reduction_args);
write_code_as_C(fp, g, r, r->pass_code.v[k]->code, r->pass_code.v[k]->line, g->pathname);
}
}
if (r->speculative_code.code)
sprintf(speculative_code, "d_speculative_reduction_code_%d_%d_%s",
r->prod->index, r->index, tag);
else if (rdefault && rdefault->speculative_code.code)
sprintf(speculative_code, "d_speculative_reduction_code_%d_%d_%s",
rdefault->prod->index, rdefault->index, tag);
else
strcpy(speculative_code, "NULL");
if (r->final_code.code)
sprintf(final_code, "d_final_reduction_code_%d_%d_%s", r->prod->index, r->index, tag);
else if (rdefault && rdefault->final_code.code)
sprintf(final_code, "d_final_reduction_code_%d_%d_%s",
rdefault->prod->index, rdefault->index, tag);
else
strcpy(final_code, "NULL");
pmax = r->pass_code.n;
if (r->pass_code.n || (rdefault && rdefault->pass_code.n)) {
if (rdefault && rdefault->pass_code.n > pmax)
pmax = rdefault->pass_code.n;
if (!r->pass_code.n)
sprintf(pass_code, "d_pass_code_%d_%d_%s",
rdefault->prod->index, rdefault->index, tag);
else {
sprintf(pass_code, "d_pass_code_%d_%d_%s",
r->prod->index, r->index, tag);
fprintf(fp, "D_ReductionCode %s[] = {", pass_code);
for (k = 0; k < pmax; k++) {
if (r->pass_code.n > k && r->pass_code.v[k])
fprintf(fp, "d_pass_code_%d_%d_%d_%s%s", k, r->prod->index, r->index, tag,
k < pmax-1 ? ", " : "");
else
if (rdefault && rdefault->pass_code.n > k && rdefault->pass_code.v[k])
fprintf(fp, "d_pass_code_%d_%d_%d_%s%s", k, rdefault->prod->index,
rdefault->index, tag, k < pmax-1 ? ", " : "");
else
fprintf(fp, "NULL%s", k < pmax-1 ? ", " : "");
}
fprintf(fp, "};\n\n");
}
} else
strcpy(pass_code, "NULL");
fprintf(fp, "D_Reduction d_reduction_%d_%s = ", r->index, tag);
fprintf(fp, "{%d, %d, %s, %s, %d, %d, %d, %d, %d, %d, %s};\n",
r->elems.n, r->prod->index,
speculative_code, final_code,
r->op_assoc, r->rule_assoc,
r->op_priority, r->rule_priority,
r->prod->internal ? -1 : r->action_index,
pmax, pass_code);
}
}
}
static uint32
er_hint_hash_fn(State *a, hash_fns_t *fns) {
VecHint *sa = &a->error_recovery_hints;
uint32 hash = 0, i;
Term *ta;
for (i = 0; i < sa->n; i++) {
ta = sa->v[i]->rule->elems.v[sa->v[i]->rule->elems.n - 1]->e.term;
hash += (sa->v[i]->depth + 1) * 13;
hash += strhashl(ta->string, ta->string_len);
if (sa->v[i]->rule)
hash += sa->v[i]->rule->prod->index * 10007;
}
return hash;
}
static int
er_hint_cmp_fn(State *a, State *b, hash_fns_t *fns) {
int i;
VecHint *sa = &a->error_recovery_hints, *sb = &b->error_recovery_hints;
Term *ta, *tb;
if (sa->n != sb->n)
return 1;
for (i = 0; i < sa->n; i++) {
ta = sa->v[i]->rule->elems.v[sa->v[i]->rule->elems.n - 1]->e.term;
tb = sb->v[i]->rule->elems.v[sb->v[i]->rule->elems.n - 1]->e.term;
if (sa->v[i]->depth != sb->v[i]->depth ||
strcmp(ta->string, tb->string) ||
sa->v[i]->rule->prod->index != sb->v[i]->rule->prod->index)
return 1;
}
return 0;
}
hash_fns_t
er_hint_hash_fns = {
(hash_fn_t)er_hint_hash_fn,
(cmp_fn_t)er_hint_cmp_fn,
{0, 0}
};
static void
write_error_data_as_C(FILE *fp, Grammar *g, VecState *er_hash, char *tag) {
int i, j;
State *s;
Term *t;
State *h;
char *ss;
if (g->states.n) {
for (i = 0; i < g->states.n; i++) {
s = g->states.v[i];
if (s->error_recovery_hints.n) {
h = set_add_fn(er_hash, s, &er_hint_hash_fns);
if (h == s) {
fprintf(fp,
"D_ErrorRecoveryHint d_error_recovery_hints_%d_%s[] = {%s",
i, tag, s->error_recovery_hints.n > 1 ? "\n" : "");
for (j = 0; j < s->error_recovery_hints.n; j++) {
t = s->error_recovery_hints.v[j]->rule->elems.v[
s->error_recovery_hints.v[j]->rule->elems.n - 1]->e.term;
ss = escape_string(t->string);
fprintf(fp, "{ %d, %d, \"%s\"}%s",
s->error_recovery_hints.v[j]->depth,
s->error_recovery_hints.v[j]->rule->prod->index,
ss,
j == s->error_recovery_hints.n - 1 ? "" : ",\n");
FREE(ss);
}
fprintf(fp, "};\n");
}
}
}
}
}
static char *scan_kind_strings[] = {"D_SCAN_ALL", "D_SCAN_LONGEST", "D_SCAN_MIXED", NULL};
static void
write_state_data_as_C(FILE *fp, Grammar *g, VecState *er_hash, char *tag) {
int i;
State *s, *h;
fprintf(fp, "\nD_State d_states_%s[] = ", tag);
if (g->states.n) {
fprintf(fp, "{\n");
for (i = 0; i < g->states.n; i++) {
s = g->states.v[i];
fprintf(fp, "{ ");
if (s->gotos.n)
fprintf(fp,"d_goto_valid_%d_%s, ", i, tag);
else
fprintf(fp,"NULL, ");
fprintf(fp,"%d, ", s->goto_table_offset);
if (s->reduce_actions.n)
fprintf(fp, "{ %d, d_reductions_%d_%s}, ", s->reduce_actions.n,
i, tag);
else
fprintf(fp, "{ 0, NULL}, ");
if (s->right_epsilon_hints.n)
fprintf(fp, "{ %d, d_right_epsilon_hints_%d_%s}, ",
s->right_epsilon_hints.n, i, tag);
else
fprintf(fp, "{ 0, NULL}, ");
if (s->error_recovery_hints.n) {
h = set_add_fn(er_hash, s, &er_hint_hash_fns);
fprintf(fp, "{ %d, d_error_recovery_hints_%d_%s}, ",
s->error_recovery_hints.n, h->index, tag);
} else
fprintf(fp, "{ 0, NULL}, ");
if (s->shift_actions.n)
fprintf(fp, "d_shifts_%d_%s, ",
s->same_shifts ? s->same_shifts->index : i, tag);
else if (s->scanner_code || (g->scanner.code && s->goto_on_token))
fprintf(fp, "D_SHIFTS_CODE, ");
else
fprintf(fp, "NULL, ");
if (g->scanner.code) {
if (s->goto_on_token)
fprintf(fp, "%s, ", g->scanner.code);
else
fprintf(fp, "NULL, ");
} else if (s->scanner_code)
fprintf(fp, "d_scan_code_%d_%s, ", i, tag);
else
fprintf(fp, "NULL, ");
if (s->scanner.states.n)
fprintf(fp, "(void*)d_scanner_%d_%s, ",
s->same_shifts ? s->same_shifts->index : i, tag);
else
fprintf(fp, "NULL, ");
fprintf(fp, "sizeof(%s), ", scanner_type(s));
fprintf(fp, s->accept ? "1, " : "0, ");
fprintf(fp, "%s, ", scan_kind_strings[s->scan_kind]);
if (s->scan_kind != D_SCAN_LONGEST && s->scanner.states.n)
fprintf(fp, "(void*)d_transition_%d_%s, ",
s->same_shifts ? s->same_shifts->index : i, tag);
else
fprintf(fp, "NULL, ");
if (s->scan_kind != D_SCAN_LONGEST && s->scanner.states.n)
fprintf(fp, "d_accepts_diff_%d_%s, ",
s->same_shifts ? s->same_shifts->index : i, tag);
else
fprintf(fp, "(D_Shift***)NULL, ");
if (s->reduces_to)
fprintf(fp, "%d", s->reduces_to->index);
else
fprintf(fp, "-1");
fprintf(fp, "}%s\n", i == g->states.n - 1 ? "" : ",");
}
fprintf(fp, "};\n\n");
} else
fprintf(fp, "{{0, {0, NULL}, 0, NULL}};\n\n");
}
static int
write_header_as_C(Grammar *g, char *base_pathname, char *tag) {
char pathname[FILENAME_MAX];
int i, tokens = 0, states = 0, col;
FILE *hfp;
for (i = 0; i < g->terminals.n; i++)
if (g->terminals.v[i]->kind == TERM_TOKEN)
tokens = 1;
if (g->states_for_all_nterms)
states = 1;
else
for (i = 0; i < g->productions.n; i++)
if (state_for_declaration(g, i))
states = 1;
if (g->write_header > 0 || (g->write_header < 0 && (tokens || states))) {
strcpy(pathname, base_pathname);
strcat(pathname, ".d_parser.h");
hfp = stdout; //fopen(pathname, "w");
if (!hfp)
d_fail("unable to open `%s` for write\n", pathname);
fprintf(hfp, "#ifndef _%s_h\n", tag);
fprintf(hfp, "#define _%s_h\n", tag);
if (tokens) {
if (!g->token_type) {
for (i = 0; i < g->terminals.n; i++)
if (g->terminals.v[i]->kind == TERM_TOKEN)
fprintf(hfp, "#define %s \t%d\n",
g->terminals.v[i]->string,
g->terminals.v[i]->index + g->productions.n);
} else {
fprintf(hfp, "enum D_Tokens_%s {\n", tag);
col = 0;
for (i = 0; i < g->terminals.n; i++) {
if (g->terminals.v[i]->kind == TERM_TOKEN) {
col += g->terminals.v[i]->string_len + 7;
if (col > 70) { printf("\n"); col = 0; }
fprintf(hfp, "%s = %d%s",
g->terminals.v[i]->string,
g->terminals.v[i]->index + g->productions.n,
i == g->terminals.n-1 ? "" : ", ");
}
}
fprintf(hfp, "\n};\n");
}
}
if (states) {
for (i = 0; i < g->productions.n; i++)
if (!g->productions.v[i]->internal && g->productions.v[i]->elem)
fprintf(hfp, "#define D_START_STATE_%s \t%d\n",
g->productions.v[i]->name, g->productions.v[i]->state->index);
}
fprintf(hfp, "#endif\n");
//fclose(hfp);
return 1;
}
return 0;
}
static char *d_internal[] = {"D_SYMBOL_NTERM", "D_SYMBOL_INTERNAL"};
static char *d_symbol[] = {
"D_SYMBOL_STRING", "D_SYMBOL_REGEX", "D_SYMBOL_CODE", "D_SYMBOL_TOKEN" };
static void
write_symbol_data_as_C(FILE *fp, Grammar *g, char *tag) {
int i;
fprintf(fp, "D_Symbol d_symbols_%s[] = {\n", tag);
for (i = 0; i < g->productions.n; i++)
fprintf(fp, "{%s, \"%s\", %d},\n",
d_internal[g->productions.v[i]->internal ? 1 : 0],
g->productions.v[i]->name,
g->productions.v[i]->name_len);
for (i = 0; i < g->terminals.n; i++) {
char *s = escape_string(g->terminals.v[i]->string); /* so it is a string */
char *ss = escape_string(s); /* so it is printable */
fprintf(fp, "{%s, \"%s\", %d},\n", d_symbol[g->terminals.v[i]->kind],
ss, (int) strlen(ss));
FREE(s);
FREE(ss);
}
fprintf(fp, "};\n\n");
}
static void
write_passes_as_C(FILE *fp, Grammar *g, char *tag) {
int i;
if (g->passes.n) {
fprintf(fp, "D_Pass d_passes_%s[] = {\n", tag);
for (i = 0; i < g->passes.n; i++) {
D_Pass *p = g->passes.v[i];
fprintf(fp, "{\"%s\", %d, 0x%X, %d}%s\n",
p->name, p->name_len, p->kind, p->index,
i < g->passes.n-1 ? ", " : "");
}
fprintf(fp, "};\n\n");
}
}
void
write_parser_tables_as_C(Grammar *g, char *base_pathname, char *tag) {
char pathname[FILENAME_MAX];
FILE *fp;
int whitespace_production = 0, header;
VecState er_hash;
Production *p;
vec_clear(&er_hash);
strcpy(pathname, base_pathname);
strcat(pathname, ".d_parser.c");
fp = stdout; //fopen(pathname, "w");
if (!fp)
d_fail("unable to open `%s` for write\n", pathname);
header = write_header_as_C(g, base_pathname, tag);
write_global_code_as_C(fp, g, tag);
fprintf(fp, "#include \"dparse.h\"\n");
if (header)
fprintf(fp, "#include \"%s.d_parser.h\"\n", base_pathname);
fprintf(fp, "\n");
write_reductions_as_C(fp, g, tag);
write_scanner_data_as_C(fp, g, tag);
write_scanner_code_as_C(fp, g, tag);
write_goto_data_as_C(fp, g, tag);
write_error_data_as_C(fp, g, &er_hash, tag);
write_state_data_as_C(fp, g, &er_hash, tag);
write_symbol_data_as_C(fp, g, tag);
write_passes_as_C(fp, g, tag);
if ((p = lookup_production(g, "whitespace", sizeof("whitespace")-1)))
whitespace_production = p->state->index;
fprintf(fp, "D_ParserTables parser_tables_%s = {\n", tag);
fprintf(fp, "%d, ", g->states.n);
fprintf(fp, "d_states_%s, ", tag);
fprintf(fp, "d_gotos_%s, ", tag);
fprintf(fp, "%d, ", whitespace_production);
fprintf(fp, "%d, ", g->productions.n + g->terminals.n);
fprintf(fp, "d_symbols_%s, ", tag);
if (g->default_white_space)
fprintf(fp, "%s, ", g->default_white_space);
else
fprintf(fp, "NULL, ");
fprintf(fp, "%d, ", g->passes.n);
if (g->passes.n)
fprintf(fp, "d_passes_%s, ", tag);
else
fprintf(fp, "NULL, ");
if (g->save_parse_tree)
fprintf(fp, "1");
else
fprintf(fp, "0");
fprintf(fp, "};\n");
//fclose(fp);
}
int
write_ctables(Grammar *g) {
g->scanner_block_size = 256/g->scanner_blocks;
write_parser_tables_as_C(g, g->pathname,
*g->grammar_ident ? g->grammar_ident : NULL);
return 0;
}