blob: 12f970570dd7a421b25db5296ab10ed2893e6852 [file] [log] [blame]
/*
* field.c - routines for dealing with fields and record parsing
*/
/*
* Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 1, or (at your option)
* any later version.
*
* GAWK is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GAWK; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "gawk.h"
extern void assoc_clear();
extern int a_get_three();
extern int get_rs();
static char *get_fs();
static int re_split();
static int parse_fields();
static void set_element();
char *line_buf = NULL; /* holds current input line */
static char *parse_extent; /* marks where to restart parse of record */
static int parse_high_water=0; /* field number that we have parsed so far */
static char f_empty[] = "";
static char *save_fs = " "; /* save current value of FS when line is read,
* to be used in deferred parsing
*/
NODE **fields_arr; /* array of pointers to the field nodes */
NODE node0; /* node for $0 which never gets free'd */
int node0_valid = 1; /* $(>0) has not been changed yet */
void
init_fields()
{
emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields");
node0.type = Node_val;
node0.stref = 0;
node0.stptr = "";
node0.flags = (STR|PERM); /* never free buf */
fields_arr[0] = &node0;
}
/*
* Danger! Must only be called for fields we know have just been blanked, or
* fields we know don't exist yet.
*/
/*ARGSUSED*/
static void
set_field(num, str, len, dummy)
int num;
char *str;
int len;
NODE *dummy; /* not used -- just to make interface same as set_element */
{
NODE *n;
int t;
static int nf_high_water = 0;
if (num > nf_high_water) {
erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field");
nf_high_water = num;
}
/* fill in fields that don't exist */
for (t = parse_high_water + 1; t < num; t++)
fields_arr[t] = Nnull_string;
n = make_string(str, len);
(void) force_number(n);
fields_arr[num] = n;
parse_high_water = num;
}
/* Someone assigned a value to $(something). Fix up $0 to be right */
static void
rebuild_record()
{
register int tlen;
register NODE *tmp;
NODE *ofs;
char *ops;
register char *cops;
register NODE **ptr;
register int ofslen;
tlen = 0;
ofs = force_string(OFS_node->var_value);
ofslen = ofs->stlen;
ptr = &fields_arr[parse_high_water];
while (ptr > &fields_arr[0]) {
tmp = force_string(*ptr);
tlen += tmp->stlen;
ptr--;
}
tlen += (parse_high_water - 1) * ofslen;
emalloc(ops, char *, tlen + 1, "fix_fields");
cops = ops;
ops[0] = '\0';
for (ptr = &fields_arr[1]; ptr <= &fields_arr[parse_high_water]; ptr++) {
tmp = *ptr;
if (tmp->stlen == 1)
*cops++ = tmp->stptr[0];
else if (tmp->stlen != 0) {
memcpy(cops, tmp->stptr, tmp->stlen);
cops += tmp->stlen;
}
if (ptr != &fields_arr[parse_high_water]) {
if (ofslen == 1)
*cops++ = ofs->stptr[0];
else if (ofslen != 0) {
memcpy(cops, ofs->stptr, ofslen);
cops += ofslen;
}
}
}
tmp = make_string(ops, tlen);
free(ops);
deref = fields_arr[0];
do_deref();
fields_arr[0] = tmp;
}
/*
* setup $0, but defer parsing rest of line until reference is made to $(>0)
* or to NF. At that point, parse only as much as necessary.
*/
void
set_record(buf, cnt)
char *buf;
int cnt;
{
register int i;
assign_number(&NF_node->var_value, (AWKNUM)-1);
for (i = 1; i <= parse_high_water; i++) {
deref = fields_arr[i];
do_deref();
}
parse_high_water = 0;
node0_valid = 1;
if (buf == line_buf) {
deref = fields_arr[0];
do_deref();
save_fs = get_fs();
node0.type = Node_val;
node0.stptr = buf;
node0.stlen = cnt;
node0.stref = 1;
node0.flags = (STR|PERM); /* never free buf */
fields_arr[0] = &node0;
}
}
NODE **
get_field(num, assign)
int num;
int assign; /* this field is on the LHS of an assign */
{
int n;
/*
* if requesting whole line but some other field has been altered,
* then the whole line must be rebuilt
*/
if (num == 0 && (node0_valid == 0 || assign)) {
/* first, parse remainder of input record */
if (NF_node->var_value->numbr == -1) {
if (parse_high_water == 0)
parse_extent = node0.stptr;
n = parse_fields(HUGE-1, &parse_extent,
node0.stlen - (parse_extent - node0.stptr),
save_fs, set_field, (NODE *)NULL);
assign_number(&NF_node->var_value, (AWKNUM)n);
}
if (node0_valid == 0)
rebuild_record();
return &fields_arr[0];
}
if (num > 0 && assign)
node0_valid = 0;
if (num <= parse_high_water) /* we have already parsed this field */
return &fields_arr[num];
if (parse_high_water == 0 && num > 0) /* starting at the beginning */
parse_extent = fields_arr[0]->stptr;
/*
* parse up to num fields, calling set_field() for each, and saving
* in parse_extent the point where the parse left off
*/
n = parse_fields(num, &parse_extent,
fields_arr[0]->stlen - (parse_extent-fields_arr[0]->stptr),
save_fs, set_field, (NODE *)NULL);
if (num == HUGE-1)
num = n;
if (n < num) { /* requested field number beyond end of record;
* set_field will just extend the number of fields,
* with empty fields
*/
set_field(num, f_empty, 0, (NODE *) NULL);
/*
* if this field is onthe LHS of an assignment, then we want to
* set NF to this value, below
*/
if (assign)
n = num;
}
/*
* if we reached the end of the record, set NF to the number of fields
* so far. Note that num might actually refer to a field that
* is beyond the end of the record, but we won't set NF to that value at
* this point, since this is only a reference to the field and NF
* only gets set if the field is assigned to -- in this case n has
* been set to num above
*/
if (*parse_extent == '\0')
assign_number(&NF_node->var_value, (AWKNUM)n);
return &fields_arr[num];
}
/*
* this is called both from get_field() and from do_split()
*/
static int
parse_fields(up_to, buf, len, fs, set, n)
int up_to; /* parse only up to this field number */
char **buf; /* on input: string to parse; on output: point to start next */
int len;
register char *fs;
void (*set) (); /* routine to set the value of the parsed field */
NODE *n;
{
char *s = *buf;
register char *field;
register char *scan;
register char *end = s + len;
int NF = parse_high_water;
char rs = get_rs();
if (up_to == HUGE)
NF = 0;
if (*fs && *(fs + 1) != '\0') { /* fs is a regexp */
struct re_registers reregs;
scan = s;
if (rs == 0 && STREQ(FS_node->var_value->stptr, " ")) {
while ((*scan == '\n' || *scan == ' ' || *scan == '\t')
&& scan < end)
scan++;
}
s = scan;
while (scan < end
&& re_split(scan, (int)(end - scan), fs, &reregs) != -1
&& NF < up_to) {
if (reregs.end[0] == 0) { /* null match */
scan++;
if (scan == end) {
(*set)(++NF, s, scan - s, n);
up_to = NF;
break;
}
continue;
}
(*set)(++NF, s, scan - s + reregs.start[0], n);
scan += reregs.end[0];
s = scan;
}
if (NF != up_to && scan <= end) {
if (!(rs == 0 && scan == end)) {
(*set)(++NF, scan, (int)(end - scan), n);
scan = end;
}
}
*buf = scan;
return (NF);
}
for (scan = s; scan < end && NF < up_to; scan++) {
/*
* special case: fs is single space, strip leading
* whitespace
*/
if (*fs == ' ') {
while ((*scan == ' ' || *scan == '\t') && scan < end)
scan++;
if (scan >= end)
break;
}
field = scan;
if (*fs == ' ')
while (*scan != ' ' && *scan != '\t' && scan < end)
scan++;
else {
while (*scan != *fs && scan < end)
scan++;
if (rs && scan == end-1 && *scan == *fs) {
(*set)(++NF, field, (int)(scan - field), n);
field = scan;
}
}
(*set)(++NF, field, (int)(scan - field), n);
if (scan == end)
break;
}
*buf = scan;
return NF;
}
static int
re_split(buf, len, fs, reregsp)
char *buf, *fs;
int len;
struct re_registers *reregsp;
{
typedef struct re_pattern_buffer RPAT;
static RPAT *rp;
static char *last_fs = NULL;
if ((last_fs != NULL && !STREQ(fs, last_fs))
|| (rp && ! strict && ((IGNORECASE_node->var_value->numbr != 0)
^ (rp->translate != NULL))))
{
/* fs has changed or IGNORECASE has changed */
free(rp->buffer);
free(rp->fastmap);
free((char *) rp);
free(last_fs);
last_fs = NULL;
}
if (last_fs == NULL) { /* first time */
emalloc(rp, RPAT *, sizeof(RPAT), "re_split");
memset((char *) rp, 0, sizeof(RPAT));
emalloc(rp->buffer, char *, 8, "re_split");
rp->allocated = 8;
emalloc(rp->fastmap, char *, 256, "re_split");
emalloc(last_fs, char *, strlen(fs) + 1, "re_split");
(void) strcpy(last_fs, fs);
if (! strict && IGNORECASE_node->var_value->numbr != 0.0)
rp->translate = casetable;
else
rp->translate = NULL;
if (re_compile_pattern(fs, strlen(fs), rp) != NULL)
fatal("illegal regular expression for FS: `%s'", fs);
}
return re_search(rp, buf, len, 0, len, reregsp);
}
NODE *
do_split(tree)
NODE *tree;
{
NODE *t1, *t2, *t3;
register char *splitc;
char *s;
NODE *n;
if (a_get_three(tree, &t1, &t2, &t3) < 3)
splitc = get_fs();
else
splitc = force_string(t3)->stptr;
n = t2;
if (t2->type == Node_param_list)
n = stack_ptr[t2->param_cnt];
if (n->type != Node_var && n->type != Node_var_array)
fatal("second argument of split is not a variable");
assoc_clear(n);
tree = force_string(t1);
s = tree->stptr;
return tmp_number((AWKNUM)
parse_fields(HUGE, &s, tree->stlen, splitc, set_element, n));
}
static char *
get_fs()
{
register NODE *tmp;
static char buf[10];
tmp = force_string(FS_node->var_value);
if (get_rs() == 0) {
if (tmp->stlen == 1) {
if (tmp->stptr[0] == ' ')
(void) strcpy(buf, "[ \n]+");
else
sprintf(buf, "[%c\n]", tmp->stptr[0]);
} else if (tmp->stlen == 0) {
buf[0] = '\n';
buf[1] = '\0';
} else
return tmp->stptr;
return buf;
}
return tmp->stptr;
}
static void
set_element(num, s, len, n)
int num;
char *s;
int len;
NODE *n;
{
*assoc_lookup(n, tmp_number((AWKNUM) (num))) = make_string(s, len);
}