gno/usr.bin/awk/tran.c
tribby 0b6d503134 awk 2.0 for GNO/ME, including modified Bell Labs source code, test cases,
and output files for test case comparison. See README.gno for implementation
notes. NOTE: some test cases in tests/dotests are commented-out because
they depend on pipes working and/or the "sort" utility.
1998-04-07 16:19:01 +00:00

495 lines
13 KiB
C

/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
Permission to use, copy, modify, and distribute this software and
its documentation for any purpose and without fee is hereby
granted, provided that the above copyright notice appear in all
copies and that both that the copyright notice and this
permission notice and warranty disclaimer appear in supporting
documentation, and that the name Lucent Technologies or any of
its entities not be used in advertising or publicity pertaining
to distribution of the software without specific, written prior
permission.
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
THIS SOFTWARE.
****************************************************************/
/* $Id: tran.c,v 1.2 1998/04/07 16:14:02 tribby Exp $ */
#ifdef __GNO__
segment "tran";
#endif
#define DEBUG
#include <stdio.h>
#include <math.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include "awk.h"
#include "ytab.h"
#define FULLTAB 2 /* rehash when table gets this x full */
#define GROWTAB 4 /* grow table by this factor */
Array *symtab; /* main symbol table */
char **FS; /* initial field sep */
char **RS; /* initial record sep */
char **OFS; /* output field sep */
char **ORS; /* output record sep */
char **OFMT; /* output format for numbers */
char **CONVFMT; /* format for conversions in getsval */
Awkfloat *NF; /* number of fields in current record */
Awkfloat *NR; /* number of current record */
Awkfloat *FNR; /* number of current record in current file */
char **FILENAME; /* current filename argument */
Awkfloat *ARGC; /* number of arguments from command line */
char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
Awkfloat *RLENGTH; /* length of same */
Cell *nrloc; /* NR */
Cell *nfloc; /* NF */
Cell *fnrloc; /* FNR */
Array *ARGVtab; /* symbol table containing ARGV[...] */
Array *ENVtab; /* symbol table containing ENVIRON[...] */
Cell *rstartloc; /* RSTART */
Cell *rlengthloc; /* RLENGTH */
Cell *symtabloc; /* SYMTAB */
Cell *nullloc; /* a guaranteed empty cell */
Node *nullnode; /* zero&null, converted into a node for comparisons */
Cell *literal0;
extern Cell **fldtab;
void syminit(void) /* initialize symbol table with builtin vars */
{
literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
/* this is used for if(x)... tests: */
nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
nullnode = celltonode(nullloc, CCON);
FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval;
RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
NF = &nfloc->fval;
nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
NR = &nrloc->fval;
fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
FNR = &fnrloc->fval;
SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
RSTART = &rstartloc->fval;
rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
RLENGTH = &rlengthloc->fval;
symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
symtabloc->sval = (char *) symtab;
}
void arginit(int ac, char **av) /* set up ARGV and ARGC */
{
Cell *cp;
int i;
char temp[50];
ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
cp->sval = (char *) ARGVtab;
for (i = 0; i < ac; i++) {
sprintf(temp, "%d", i);
if (isnumber(*av))
setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
else
setsymtab(temp, *av, 0.0, STR, ARGVtab);
av++;
}
}
void envinit(char **envp) /* set up ENVIRON variable */
{
Cell *cp;
char *p;
cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
ENVtab = makesymtab(NSYMTAB);
cp->sval = (char *) ENVtab;
for ( ; *envp; envp++) {
if ((p = strchr(*envp, '=')) == NULL)
continue;
*p++ = 0; /* split into two strings at = */
if (isnumber(p))
setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
else
setsymtab(*envp, p, 0.0, STR, ENVtab);
p[-1] = '='; /* restore in case env is passed down to a shell */
}
}
Array *makesymtab(int n) /* make a new symbol table */
{
Array *ap;
Cell **tp;
ap = (Array *) malloc(sizeof(Array));
tp = (Cell **) calloc(n, sizeof(Cell *));
if (ap == NULL || tp == NULL)
ERROR "out of space in makesymtab" FATAL;
ap->nelem = 0;
ap->size = n;
ap->tab = tp;
return(ap);
}
void freesymtab(Cell *ap) /* free a symbol table */
{
Cell *cp, *temp;
Array *tp;
int i;
if (!isarr(ap))
return;
tp = (Array *) ap->sval;
if (tp == NULL)
return;
for (i = 0; i < tp->size; i++) {
for (cp = tp->tab[i]; cp != NULL; cp = temp) {
xfree(cp->nval);
if (freeable(cp))
xfree(cp->sval);
temp = cp->cnext; /* avoids freeing then using */
free(cp);
}
tp->tab[i] = 0;
}
free(tp->tab);
free(tp);
}
void freeelem(Cell *ap, char *s) /* free elem s from ap (i.e., ap["s"] */
{
Array *tp;
Cell *p, *prev = NULL;
int h;
tp = (Array *) ap->sval;
h = hash(s, tp->size);
for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
if (strcmp(s, p->nval) == 0) {
if (prev == NULL) /* 1st one */
tp->tab[h] = p->cnext;
else /* middle somewhere */
prev->cnext = p->cnext;
if (freeable(p))
xfree(p->sval);
free(p->nval);
free(p);
tp->nelem--;
return;
}
}
Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
{
int h;
Cell *p;
if (n != NULL && (p = lookup(n, tp)) != NULL) {
dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
p, p->nval, p->sval, p->fval, p->tval) );
return(p);
}
p = (Cell *) malloc(sizeof(Cell));
if (p == NULL)
ERROR "out of space for symbol table at %s", n FATAL;
p->nval = tostring(n);
p->sval = s ? tostring(s) : tostring("");
p->fval = f;
p->tval = t;
p->csub = CUNK;
p->ctype = OCELL;
tp->nelem++;
if (tp->nelem > FULLTAB * tp->size)
rehash(tp);
h = hash(n, tp->size);
p->cnext = tp->tab[h];
tp->tab[h] = p;
dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
p, p->nval, p->sval, p->fval, p->tval) );
return(p);
}
int hash(char *s, int n) /* form hash value for string s */
{
#ifndef __GNO__
unsigned hashval;
#else /* Need 32 bits; int on Apple IIGS is only 16 */
unsigned long hashval;
#endif
#if defined(__NOASM__) || !defined(__ORCAC__)
for (hashval = 0; *s != '\0'; s++)
hashval = (*s + 31 * hashval);
#else
/* Use equivalent 65816 code to speed up Apple IIGS execution */
unsigned long newhash;
asm{
stz hashval ; hashval = 0
stz hashval+2
ldy #0x0000 ; index into s = 0
forloop:
lda [s],y ; Get next character of s
and #0x00FF ; (one byte only)
beq done ; Done if '\0'
tax ; Hold character in X-reg
; Calculate hasval, but instead of multiplying old value
; by 31, shift it left by 5 (mult by 32) and subtract.
; This is equivalent to
; hashval = (hashval << 5) - hashval + *s
lda hashval ; Use newhash to accumulate
sta newhash ; shifted hash value.
lda hashval+2
sta newhash+2
asl newhash ; First shift (*2)
rol newhash+2
asl newhash ; Second (*4)
rol newhash+2
asl newhash ; Third (*8)
rol newhash+2
asl newhash ; Fourth (*16)
rol newhash+2
asl newhash ; Fifth (*32)
rol newhash+2 ; (newhash = hashval*32)
sec ; newhash -= hashval
lda newhash
sbc hashval
sta newhash
lda newhash+2
sbc hashval+2
sta newhash+2
clc ; newhash += s[y]
txa ; (Restore s[y] from x-register)
adc newhash
bcc nocarry
inc newhash+2
nocarry:
sta hashval ; hashval = newhash
lda newhash+2
sta hashval+2
iny ; Bump index into s
bra forloop ; and continue in loop.
done: ; When done, return hashval % n
}
#endif
return hashval % n;
}
void rehash(Array *tp) /* rehash items in small table into big one */
{
int i, nh, nsz;
Cell *cp, *op, **np;
nsz = GROWTAB * tp->size;
np = (Cell **) calloc(nsz, sizeof(Cell *));
if (np == NULL) /* can't do it, but can keep running. */
return; /* someone else will run out later. */
for (i = 0; i < tp->size; i++) {
for (cp = tp->tab[i]; cp; cp = op) {
op = cp->cnext;
nh = hash(cp->nval, nsz);
cp->cnext = np[nh];
np[nh] = cp;
}
}
free(tp->tab);
tp->tab = np;
tp->size = nsz;
}
Cell *lookup(char *s, Array *tp) /* look for s in tp */
{
Cell *p, *prev = NULL;
int h;
h = hash(s, tp->size);
for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
if (strcmp(s, p->nval) == 0)
return(p); /* found it */
return(NULL); /* not found */
}
Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
{
int fldno;
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "assign to");
if (isfld(vp)) {
donerec = 0; /* mark $0 invalid */
fldno = atoi(vp->nval);
if (fldno > *NF)
newfld(fldno);
dprintf( ("setting field %d to %g\n", fldno, f) );
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
}
vp->tval &= ~STR; /* mark string invalid */
vp->tval |= NUM; /* mark number ok */
dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
return vp->fval = f;
}
void funnyvar(Cell *vp, char *rw)
{
if (isarr(vp))
ERROR "can't %s %s; it's an array name.", rw, vp->nval FATAL;
if (vp->tval & FCN)
ERROR "can't %s %s; it's a function.", rw, vp->nval FATAL;
ERROR "funny variable %p: n=%s s=\"%s\" f=%g t=%o",
vp, vp->nval, vp->sval, vp->fval, vp->tval WARNING;
}
char *setsval(Cell *vp, char *s) /* set string val of a Cell */
{
char *t;
int fldno;
dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "assign to");
if (isfld(vp)) {
donerec = 0; /* mark $0 invalid */
fldno = atoi(vp->nval);
if (fldno > *NF)
newfld(fldno);
dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
} else if (isrec(vp)) {
donefld = 0; /* mark $1... invalid */
donerec = 1;
}
t = tostring(s); /* in case it's self-assign */
vp->tval &= ~NUM;
vp->tval |= STR;
if (freeable(vp))
xfree(vp->sval);
vp->tval &= ~DONTFREE;
dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
return(vp->sval = t);
}
Awkfloat getfval(Cell *vp) /* get float val of a Cell */
{
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "read value of");
if (isfld(vp) && donefld == 0)
fldbld();
else if (isrec(vp) && donerec == 0)
recbld();
if (!isnum(vp)) { /* not a number */
vp->fval = atof(vp->sval); /* best guess */
if (isnumber(vp->sval) && !(vp->tval&CON))
vp->tval |= NUM; /* make NUM only sparingly */
}
dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
return(vp->fval);
}
char *getsval(Cell *vp) /* get string val of a Cell */
{
char s[100]; /* BUG: unchecked */
double dtemp;
if ((vp->tval & (NUM | STR)) == 0)
funnyvar(vp, "read value of");
if (isfld(vp) && donefld == 0)
fldbld();
else if (isrec(vp) && donerec == 0)
recbld();
if (isstr(vp) == 0) {
if (freeable(vp))
xfree(vp->sval);
if (modf(vp->fval, &dtemp) == 0) /* it's integral */
sprintf(s, "%.30g", vp->fval);
else
sprintf(s, *CONVFMT, vp->fval);
vp->sval = tostring(s);
vp->tval &= ~DONTFREE;
vp->tval |= STR;
}
dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
return(vp->sval);
}
char *tostring(char *s) /* make a copy of string s */
{
char *p;
p = (char *) malloc(strlen(s)+1);
if (p == NULL)
ERROR "out of space in tostring on %s", s FATAL;
strcpy(p, s);
return(p);
}
char *qstring(char *s, int delim) /* collect string up to next delim */
{
int c, n;
char *buf = 0, *bp;
if ((buf = (char *) malloc(strlen(s)+3)) == NULL)
ERROR "out of space in qstring(%s)", s);
for (bp = buf; (c = *s) != delim; s++) {
if (c == '\n')
ERROR "newline in string %.10s...", buf SYNTAX;
else if (c != '\\')
*bp++ = c;
else { /* \something */
switch (c = *++s) {
case '\\': *bp++ = '\\'; break;
case 'n': *bp++ = '\n'; break;
case 't': *bp++ = '\t'; break;
case 'b': *bp++ = '\b'; break;
case 'f': *bp++ = '\f'; break;
case 'r': *bp++ = '\r'; break;
default:
if (!isdigit(c)) {
*bp++ = c;
break;
}
n = c - '0';
if (isdigit(s[1])) {
n = 8 * n + *++s - '0';
if (isdigit(s[1]))
n = 8 * n + *++s - '0';
}
*bp++ = n;
break;
}
}
}
*bp++ = 0;
return buf;
}