gno/usr.bin/awk/tests/awk.parser
tribby ddb82cb2e0 Remaining files for awk 2.0 that were left out of the previous checkin.
Maybe someday I'll become adept at using cvs...
1998-04-07 17:06:53 +00:00

172 lines
4.8 KiB
Plaintext

# awk.parser - recursive-descent translator for part of awk
# input: awk program (very restricted subset)
# output: C code to implement the awk program
BEGIN { program() }
function advance() { # lexical analyzer; returns next token
if (tok == "(eof)") return "(eof)"
while (length(line) == 0)
if (getline line == 0)
return tok = "(eof)"
sub(/^[ \t]+/, "", line) # remove white space
if (match(line, /^[A-Za-z_][A-Za-z_0-9]*/) || # identifier
match(line, /^-?([0-9]+\.?[0-9]*|\.[0-9]+)/) || # number
match(line, /^(<|<=|==|!=|>=|>)/) || # relational
match(line, /^./)) { # everything else
tok = substr(line, 1, RLENGTH)
line = substr(line, RLENGTH+1)
return tok
}
error("line " NR " incomprehensible at " line)
}
function gen(s) { # print s with nt leading tabs
printf("%s%s\n", substr("\t\t\t\t\t\t\t\t\t", 1, nt), s)
}
function eat(s) { # read next token if s == tok
if (tok != s) error("line " NF ": saw " tok ", expected " s)
advance()
}
function nl() { # absorb newlines and semicolons
while (tok == "\n" || tok == ";")
advance()
}
function error(s) { print "Error: " s | "cat 1>&2"; exit 1 }
function program() {
advance()
if (tok == "BEGIN") { eat("BEGIN"); statlist() }
pastats()
if (tok == "END") { eat("END"); statlist() }
if (tok != "(eof)") error("program continues after END")
}
function pastats() {
gen("while (getrec()) {"); nt++
while (tok != "END" && tok != "(eof)") pastat()
nt--; gen("}")
}
function pastat() { # pattern-action statement
if (tok == "{") # action only
statlist()
else { # pattern-action
gen("if (" pattern() ") {"); nt++
if (tok == "{") statlist()
else # default action is print $0
gen("print(field(0));")
nt--; gen("}")
}
}
function pattern() { return expr() }
function statlist() {
eat("{"); nl(); while (tok != "}") stat(); eat("}"); nl()
}
function stat() {
if (tok == "print") { eat("print"); gen("print(" exprlist() ");") }
else if (tok == "if") ifstat()
else if (tok == "while") whilestat()
else if (tok == "{") statlist()
else gen(simplestat() ";")
nl()
}
function ifstat() {
eat("if"); eat("("); gen("if (" expr() ") {"); eat(")"); nl(); nt++
stat()
if (tok == "else") { # optional else
eat("else")
nl(); nt--; gen("} else {"); nt++
stat()
}
nt--; gen("}")
}
function whilestat() {
eat("while"); eat("("); gen("while (" expr() ") {"); eat(")"); nl()
nt++; stat(); nt--; gen("}")
}
function simplestat( lhs) { # ident = expr | name(exprlist)
lhs = ident()
if (tok == "=") {
eat("=")
return "assign(" lhs ", " expr() ")"
} else return lhs
}
function exprlist( n, e) { # expr , expr , ...
e = expr() # has to be at least one
for (n = 1; tok == ","; n++) {
advance()
e = e ", " expr()
}
return e
}
function expr(e) { # rel | rel relop rel
e = rel()
while (tok ~ /<|<=|==|!=|>=|>/) {
op = tok
advance()
e = sprintf("eval(\"%s\", %s, %s)", op, e, rel())
}
return e
}
function rel(op, e) { # term | term [+-] term
e = term()
while (tok == "+" || tok == "-") {
op = tok
advance()
e = sprintf("eval(\"%s\", %s, %s)", op, e, term())
}
return e
}
function term(op, e) { # fact | fact [*/%] fact
e = fact()
while (tok == "*" || tok == "/" || tok == "%") {
op = tok
advance()
e = sprintf("eval(\"%s\", %s, %s)", op, e, fact())
}
return e
}
function fact( e) { # (expr) | $fact | ident | number
if (tok == "(") {
eat("("); e = expr(); eat(")")
return "(" e ")"
} else if (tok == "$") {
eat("$")
return "field(" fact() ")"
} else if (tok ~ /^[A-Za-z][A-Za-z0-9]*/) {
return ident()
} else if (tok ~ /^-?([0-9]+\.?[0-9]*|\.[0-9]+)/) {
e = tok
advance()
return "num((float)" e ")"
} else
error("unexpected " tok " at line " NR)
}
function ident( id, e) { # name | name[expr] | name(exprlist)
if (!match(tok, /^[A-Za-z_][A-Za-z_0-9]*/))
error("unexpected " tok " at line " NR)
id = tok
advance()
if (tok == "[") { # array
eat("["); e = expr(); eat("]")
return "array(" id ", " e ")"
} else if (tok == "(") { # function call
eat("(")
if (tok != ")") {
e = exprlist()
eat(")")
} else eat(")")
return id "(" e ")" # calls are statements
} else
return id # variable
}