# awk.parser - recursive-descent translator for part of awk # input: awk program (very restricted subset) # output: C code to implement the awk program BEGIN { program() } function advance() { # lexical analyzer; returns next token if (tok == "(eof)") return "(eof)" while (length(line) == 0) if (getline line == 0) return tok = "(eof)" sub(/^[ \t]+/, "", line) # remove white space if (match(line, /^[A-Za-z_][A-Za-z_0-9]*/) || # identifier match(line, /^-?([0-9]+\.?[0-9]*|\.[0-9]+)/) || # number match(line, /^(<|<=|==|!=|>=|>)/) || # relational match(line, /^./)) { # everything else tok = substr(line, 1, RLENGTH) line = substr(line, RLENGTH+1) return tok } error("line " NR " incomprehensible at " line) } function gen(s) { # print s with nt leading tabs printf("%s%s\n", substr("\t\t\t\t\t\t\t\t\t", 1, nt), s) } function eat(s) { # read next token if s == tok if (tok != s) error("line " NF ": saw " tok ", expected " s) advance() } function nl() { # absorb newlines and semicolons while (tok == "\n" || tok == ";") advance() } function error(s) { print "Error: " s | "cat 1>&2"; exit 1 } function program() { advance() if (tok == "BEGIN") { eat("BEGIN"); statlist() } pastats() if (tok == "END") { eat("END"); statlist() } if (tok != "(eof)") error("program continues after END") } function pastats() { gen("while (getrec()) {"); nt++ while (tok != "END" && tok != "(eof)") pastat() nt--; gen("}") } function pastat() { # pattern-action statement if (tok == "{") # action only statlist() else { # pattern-action gen("if (" pattern() ") {"); nt++ if (tok == "{") statlist() else # default action is print $0 gen("print(field(0));") nt--; gen("}") } } function pattern() { return expr() } function statlist() { eat("{"); nl(); while (tok != "}") stat(); eat("}"); nl() } function stat() { if (tok == "print") { eat("print"); gen("print(" exprlist() ");") } else if (tok == "if") ifstat() else if (tok == "while") whilestat() else if (tok == "{") statlist() else gen(simplestat() ";") nl() } function ifstat() { eat("if"); eat("("); gen("if (" expr() ") {"); eat(")"); nl(); nt++ stat() if (tok == "else") { # optional else eat("else") nl(); nt--; gen("} else {"); nt++ stat() } nt--; gen("}") } function whilestat() { eat("while"); eat("("); gen("while (" expr() ") {"); eat(")"); nl() nt++; stat(); nt--; gen("}") } function simplestat( lhs) { # ident = expr | name(exprlist) lhs = ident() if (tok == "=") { eat("=") return "assign(" lhs ", " expr() ")" } else return lhs } function exprlist( n, e) { # expr , expr , ... e = expr() # has to be at least one for (n = 1; tok == ","; n++) { advance() e = e ", " expr() } return e } function expr(e) { # rel | rel relop rel e = rel() while (tok ~ /<|<=|==|!=|>=|>/) { op = tok advance() e = sprintf("eval(\"%s\", %s, %s)", op, e, rel()) } return e } function rel(op, e) { # term | term [+-] term e = term() while (tok == "+" || tok == "-") { op = tok advance() e = sprintf("eval(\"%s\", %s, %s)", op, e, term()) } return e } function term(op, e) { # fact | fact [*/%] fact e = fact() while (tok == "*" || tok == "/" || tok == "%") { op = tok advance() e = sprintf("eval(\"%s\", %s, %s)", op, e, fact()) } return e } function fact( e) { # (expr) | $fact | ident | number if (tok == "(") { eat("("); e = expr(); eat(")") return "(" e ")" } else if (tok == "$") { eat("$") return "field(" fact() ")" } else if (tok ~ /^[A-Za-z][A-Za-z0-9]*/) { return ident() } else if (tok ~ /^-?([0-9]+\.?[0-9]*|\.[0-9]+)/) { e = tok advance() return "num((float)" e ")" } else error("unexpected " tok " at line " NR) } function ident( id, e) { # name | name[expr] | name(exprlist) if (!match(tok, /^[A-Za-z_][A-Za-z_0-9]*/)) error("unexpected " tok " at line " NR) id = tok advance() if (tok == "[") { # array eat("["); e = expr(); eat("]") return "array(" id ", " e ")" } else if (tok == "(") { # function call eat("(") if (tok != ")") { e = exprlist() eat(")") } else eat(")") return id "(" e ")" # calls are statements } else return id # variable }