mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 04:30:23 +00:00
Initial checkin of burg documetnation files
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@3786 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
633a5b1aac
commit
4f372f0f41
84
support/tools/Burg/Doc/Makefile
Normal file
84
support/tools/Burg/Doc/Makefile
Normal file
@ -0,0 +1,84 @@
|
||||
# $Id$
|
||||
|
||||
#CFLAGS =
|
||||
#CFLAGS = -O
|
||||
#CFLAGS = -O -DNOLEX
|
||||
CFLAGS = -g -DDEBUG
|
||||
#CFLAGS = -g -DNOLEX -DDEBUG
|
||||
|
||||
SRCS = \
|
||||
be.c \
|
||||
burs.c \
|
||||
closure.c \
|
||||
delta.c \
|
||||
fe.c \
|
||||
item.c \
|
||||
lex.c \
|
||||
list.c \
|
||||
main.c \
|
||||
map.c \
|
||||
nonterminal.c \
|
||||
operator.c \
|
||||
pattern.c \
|
||||
plank.c \
|
||||
queue.c \
|
||||
rule.c \
|
||||
string.c \
|
||||
symtab.c \
|
||||
table.c \
|
||||
trim.c \
|
||||
zalloc.c
|
||||
|
||||
BU_OBJS = \
|
||||
burs.o \
|
||||
closure.o \
|
||||
delta.o \
|
||||
item.o \
|
||||
list.o \
|
||||
map.o \
|
||||
nonterminal.o \
|
||||
operator.o \
|
||||
pattern.o \
|
||||
queue.o \
|
||||
rule.o \
|
||||
table.o \
|
||||
trim.o \
|
||||
zalloc.o
|
||||
|
||||
FE_OBJS = \
|
||||
be.o \
|
||||
fe.o \
|
||||
lex.o \
|
||||
main.o \
|
||||
plank.o \
|
||||
string.o \
|
||||
symtab.o \
|
||||
y.tab.o
|
||||
|
||||
all: test
|
||||
|
||||
burg: $(BU_OBJS) $(FE_OBJS)
|
||||
$(CC) -o burg $(CFLAGS) $(BU_OBJS) $(FE_OBJS)
|
||||
|
||||
y.tab.c y.tab.h: gram.y
|
||||
yacc -d gram.y
|
||||
|
||||
clean:
|
||||
rm -f *.o y.tab.h y.tab.c core burg *.aux *.log *.dvi sample sample.c tmp
|
||||
|
||||
$(FE_OBJS): b.h
|
||||
$(BU_OBJS): b.h
|
||||
$(FE_OBJS): fe.h
|
||||
|
||||
lex.o: y.tab.h
|
||||
|
||||
doc.dvi: doc.tex
|
||||
latex doc; latex doc
|
||||
|
||||
test: burg sample.gr
|
||||
./burg -I <sample.gr >sample.c && cc $(CFLAGS) -o sample sample.c && ./sample
|
||||
./burg -I sample.gr >tmp && cmp tmp sample.c
|
||||
./burg -I <sample.gr -o tmp && cmp tmp sample.c
|
||||
./burg -I sample.gr -o tmp && cmp tmp sample.c
|
||||
./burg -I -O0 <sample.gr >tmp && cmp tmp sample.c
|
||||
./burg -I -= <sample.gr >tmp && cmp tmp sample.c
|
50
support/tools/Burg/Doc/doc.aux
Normal file
50
support/tools/Burg/Doc/doc.aux
Normal file
@ -0,0 +1,50 @@
|
||||
\relax
|
||||
\bibstyle{alpha}
|
||||
\citation{aho-twig-toplas}
|
||||
\citation{appel-87}
|
||||
\citation{balachandran-complang}
|
||||
\citation{kron-phd}
|
||||
\citation{hoffmann-jacm}
|
||||
\citation{hatcher-popl}
|
||||
\citation{chase-popl}
|
||||
\citation{pelegri-popl}
|
||||
\citation{pelegri-phd}
|
||||
\citation{wilhelm-tr}
|
||||
\citation{henry-budp}
|
||||
\citation{fraser-henry-spe-91}
|
||||
\citation{proebsting-91}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {1}Overview}{1}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {2}Input}{1}}
|
||||
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces A Sample Tree Grammar}}{2}}
|
||||
\newlabel{fig-tree-grammar}{{1}{2}}
|
||||
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces EBNF Grammar for Tree Grammars for {\sc Burg}\ }}{3}}
|
||||
\newlabel{fig-grammar-grammar}{{2}{3}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {3}Output}{3}}
|
||||
\citation{aho-johnson-dp-classic}
|
||||
\citation{fraser-henry-spe-91}
|
||||
\citation{henry-budp}
|
||||
\citation{pelegri-phd}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {4}Debugging}{6}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {5}Running {\sc Burg}\ }{6}}
|
||||
\newlabel{sec-man-page}{{5}{6}}
|
||||
\citation{pelegri-popl}
|
||||
\citation{henry-budp}
|
||||
\citation{balachandran-complang}
|
||||
\citation{proebsting-91}
|
||||
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces A Diverging Tree Grammar}}{7}}
|
||||
\newlabel{fig-diverge-grammar}{{3}{7}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {6}Acknowledgements}{7}}
|
||||
\bibcite{aho-twig-toplas}{AGT89}
|
||||
\bibcite{aho-johnson-dp-classic}{AJ76}
|
||||
\bibcite{appel-87}{App87}
|
||||
\bibcite{balachandran-complang}{BDB90}
|
||||
\bibcite{wilhelm-tr}{BMW87}
|
||||
\bibcite{chase-popl}{Cha87}
|
||||
\bibcite{fraser-henry-spe-91}{FH91}
|
||||
\bibcite{hatcher-popl}{HC86}
|
||||
\bibcite{henry-budp}{Hen89}
|
||||
\bibcite{hoffmann-jacm}{HO82}
|
||||
\bibcite{kron-phd}{Kro75}
|
||||
\bibcite{pelegri-phd}{PL87}
|
||||
\bibcite{pelegri-popl}{PLG88}
|
||||
\bibcite{proebsting-91}{Pro91}
|
BIN
support/tools/Burg/Doc/doc.dvi
Normal file
BIN
support/tools/Burg/Doc/doc.dvi
Normal file
Binary file not shown.
157
support/tools/Burg/Doc/doc.log
Normal file
157
support/tools/Burg/Doc/doc.log
Normal file
@ -0,0 +1,157 @@
|
||||
This is TeX, Version 3.14159 (Web2C 7.3.2) (format=latex 2000.8.30) 4 JUN 2001 13:20
|
||||
**doc
|
||||
(doc.tex
|
||||
LaTeX2e <2000/06/01>
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/latex209.def
|
||||
File: latex209.def 1998/05/13 v0.52 Standard LaTeX file
|
||||
|
||||
|
||||
Entering LaTeX 2.09 COMPATIBILITY MODE
|
||||
*************************************************************
|
||||
!!WARNING!! !!WARNING!! !!WARNING!! !!WARNING!!
|
||||
|
||||
This mode attempts to provide an emulation of the LaTeX 2.09
|
||||
author environment so that OLD documents can be successfully
|
||||
processed. It should NOT be used for NEW documents!
|
||||
|
||||
New documents should use Standard LaTeX conventions and start
|
||||
with the \documentclass command.
|
||||
|
||||
Compatibility mode is UNLIKELY TO WORK with LaTeX 2.09 style
|
||||
files that change any internal macros, especially not with
|
||||
those that change the FONT SELECTION or OUTPUT ROUTINES.
|
||||
|
||||
Therefore such style files MUST BE UPDATED to use
|
||||
Current Standard LaTeX: LaTeX2e.
|
||||
If you suspect that you may be using such a style file, which
|
||||
is probably very, very old by now, then you should attempt to
|
||||
get it updated by sending a copy of this error message to the
|
||||
author of that file.
|
||||
*************************************************************
|
||||
|
||||
\footheight=\dimen102
|
||||
\@maxsep=\dimen103
|
||||
\@dblmaxsep=\dimen104
|
||||
\@cla=\count79
|
||||
\@clb=\count80
|
||||
\mscount=\count81
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/tracefnt.sty
|
||||
Package: tracefnt 1997/05/29 v3.0j Standard LaTeX package (font tracing)
|
||||
\tracingfonts=\count82
|
||||
LaTeX Info: Redefining \selectfont on input line 96.
|
||||
)
|
||||
\symbold=\mathgroup4
|
||||
\symsans=\mathgroup5
|
||||
\symtypewriter=\mathgroup6
|
||||
\symitalic=\mathgroup7
|
||||
\symsmallcaps=\mathgroup8
|
||||
\symslanted=\mathgroup9
|
||||
LaTeX Font Info: Redeclaring math alphabet \mathbf on input line 288.
|
||||
LaTeX Font Info: Redeclaring math alphabet \mathsf on input line 289.
|
||||
LaTeX Font Info: Redeclaring math alphabet \mathtt on input line 290.
|
||||
LaTeX Font Info: Redeclaring math alphabet \mathit on input line 296.
|
||||
LaTeX Info: Redefining \em on input line 306.
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/latexsym.sty
|
||||
Package: latexsym 1998/08/17 v2.2e Standard LaTeX package (lasy symbols)
|
||||
\symlasy=\mathgroup10
|
||||
LaTeX Font Info: Overwriting symbol font `lasy' in version `bold'
|
||||
(Font) U/lasy/m/n --> U/lasy/b/n on input line 42.
|
||||
)
|
||||
LaTeX Font Info: Redeclaring math delimiter \lgroup on input line 370.
|
||||
LaTeX Font Info: Redeclaring math delimiter \rgroup on input line 372.
|
||||
LaTeX Font Info: Redeclaring math delimiter \bracevert on input line 374.
|
||||
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/config/latex209.cf
|
||||
g
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/tools/rawfonts.sty
|
||||
Compatibility mode: package `' requested, but `rawfonts' provided.
|
||||
Package: rawfonts 1994/05/08 Low-level LaTeX 2.09 font compatibility
|
||||
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/tools/somedefs.sty
|
||||
Package: somedefs 1994/06/01 Toolkit for optional definitions
|
||||
)
|
||||
LaTeX Font Info: Try loading font information for U+lasy on input line 44.
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/ulasy.fd
|
||||
File: ulasy.fd 1998/08/17 v2.2eLaTeX symbol font definitions
|
||||
)))) (/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/article.
|
||||
cls
|
||||
Document Class: article 2000/05/19 v1.4b Standard LaTeX document class
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/size11.clo
|
||||
File: size11.clo 2000/05/19 v1.4b Standard LaTeX file (size option)
|
||||
)
|
||||
\c@part=\count83
|
||||
\c@section=\count84
|
||||
\c@subsection=\count85
|
||||
\c@subsubsection=\count86
|
||||
\c@paragraph=\count87
|
||||
\c@subparagraph=\count88
|
||||
\c@figure=\count89
|
||||
\c@table=\count90
|
||||
\abovecaptionskip=\skip41
|
||||
\belowcaptionskip=\skip42
|
||||
Compatibility mode: definition of \rm ignored.
|
||||
Compatibility mode: definition of \sf ignored.
|
||||
Compatibility mode: definition of \tt ignored.
|
||||
Compatibility mode: definition of \bf ignored.
|
||||
Compatibility mode: definition of \it ignored.
|
||||
Compatibility mode: definition of \sl ignored.
|
||||
Compatibility mode: definition of \sc ignored.
|
||||
LaTeX Info: Redefining \cal on input line 501.
|
||||
LaTeX Info: Redefining \mit on input line 502.
|
||||
\bibindent=\dimen105
|
||||
)
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/pstex/fullpage.sty
|
||||
) (doc.aux)
|
||||
\openout1 = `doc.aux'.
|
||||
|
||||
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 2.
|
||||
LaTeX Font Info: ... okay on input line 2.
|
||||
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 2.
|
||||
LaTeX Font Info: ... okay on input line 2.
|
||||
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 2.
|
||||
LaTeX Font Info: ... okay on input line 2.
|
||||
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 2.
|
||||
LaTeX Font Info: ... okay on input line 2.
|
||||
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 2.
|
||||
LaTeX Font Info: ... okay on input line 2.
|
||||
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 2.
|
||||
LaTeX Font Info: ... okay on input line 2.
|
||||
LaTeX Font Info: External font `cmex10' loaded for size
|
||||
(Font) <12> on input line 33.
|
||||
LaTeX Font Info: External font `cmex10' loaded for size
|
||||
(Font) <8> on input line 33.
|
||||
LaTeX Font Info: External font `cmex10' loaded for size
|
||||
(Font) <6> on input line 33.
|
||||
LaTeX Font Info: Try loading font information for OMS+cmtt on input line 100
|
||||
.
|
||||
LaTeX Font Info: No file OMScmtt.fd. on input line 100.
|
||||
LaTeX Font Warning: Font shape `OMS/cmtt/m/n' undefined
|
||||
(Font) using `OMS/cmsy/m/n' instead
|
||||
(Font) for symbol `textbraceleft' on input line 100.
|
||||
[1
|
||||
|
||||
]
|
||||
LaTeX Font Info: External font `cmex10' loaded for size
|
||||
(Font) <10.95> on input line 150.
|
||||
[2] [3] [4] [5] [6]
|
||||
Overfull \hbox (1.38191pt too wide) in paragraph at lines 480--484
|
||||
[]\OT1/cmr/m/n/10.95 Emit code for \OT1/cmtt/m/n/10.95 burm[]arity\OT1/cmr/m/n/
|
||||
10.95 , \OT1/cmtt/m/n/10.95 burm[]child\OT1/cmr/m/n/10.95 , \OT1/cmtt/m/n/10.95
|
||||
burm[]cost\OT1/cmr/m/n/10.95 , \OT1/cmtt/m/n/10.95 burm[]ntname\OT1/cmr/m/n/10
|
||||
.95 , \OT1/cmtt/m/n/10.95 burm[]op[]label\OT1/cmr/m/n/10.95 , \OT1/cmtt/m/n/10.
|
||||
95 burm[]opname\OT1/cmr/m/n/10.95 ,
|
||||
[]
|
||||
|
||||
[7] [8] [9] (doc.aux)
|
||||
LaTeX Font Warning: Some font shapes were not available, defaults substituted.
|
||||
)
|
||||
Here is how much of TeX's memory you used:
|
||||
543 strings out of 12968
|
||||
6147 string characters out of 289029
|
||||
446019 words of memory out of 1453895
|
||||
3433 multiletter control sequences out of 10000+10000
|
||||
23403 words of font info for 87 fonts, out of 400000 for 2000
|
||||
14 hyphenation exceptions out of 1000
|
||||
21i,6n,20p,308b,283s stack positions out of 300i,100n,500p,50000b,4000s
|
||||
|
||||
Output written on doc.dvi (9 pages, 29856 bytes).
|
596
support/tools/Burg/Doc/doc.tex
Normal file
596
support/tools/Burg/Doc/doc.tex
Normal file
@ -0,0 +1,596 @@
|
||||
\documentstyle[11pt,fullpage]{article}
|
||||
\begin{document}
|
||||
|
||||
\def\AddSpace#1{\ifcat#1a\ \fi#1} % if next is a letter, add a space
|
||||
\def\YACC#1{{\sc Yacc}\AddSpace#1}
|
||||
\def\TWIG#1{{\sc Twig}\AddSpace#1}
|
||||
\def\PROG#1{{\sc Burg}\AddSpace#1}
|
||||
\def\PARSER#1{{\sc Burm}\AddSpace#1}
|
||||
\def\CODEGEN#1{{\sc Codegen}\AddSpace#1}
|
||||
|
||||
\title{{\sc Burg} --- Fast Optimal Instruction Selection and Tree Parsing}
|
||||
\author{
|
||||
Christopher W. Fraser \\
|
||||
AT\&T Bell Laboratories \\
|
||||
600 Mountain Avenue 2C-464 \\
|
||||
Murray Hill, NJ 07974-0636 \\
|
||||
{\tt cwf@research.att.com}
|
||||
\and
|
||||
Robert R. Henry \\
|
||||
Tera Computer Company \\
|
||||
400 N. 34th St., Suite 300 \\
|
||||
Seattle, WA 98103-8600 \\
|
||||
{\tt rrh@tera.com}
|
||||
\and
|
||||
Todd A. Proebsting \\
|
||||
Dept. of Computer Sciences \\
|
||||
University of Wisconsin \\
|
||||
Madison, WI 53706 \\
|
||||
{\tt todd@cs.wisc.edu}
|
||||
}
|
||||
\date{December 1991}
|
||||
|
||||
\maketitle
|
||||
\bibliographystyle{alpha}
|
||||
\newcommand\term[1]{{\it #1}}
|
||||
\newcommand\secref[1]{\S\ref{#1}}
|
||||
\newcommand\figref[1]{Figure~\ref{#1}}
|
||||
%
|
||||
% rationale table making
|
||||
%
|
||||
{\catcode`\^^M=13 \gdef\Obeycr{\catcode`\^^M=13 \def^^M{\\}}%
|
||||
\gdef\Restorecr{\catcode`\^^M=5 }} %
|
||||
|
||||
%
|
||||
% for printing out options
|
||||
%
|
||||
\newcommand\option[1]{% #1=option character
|
||||
{\tt -#1}%
|
||||
}
|
||||
\newcommand\var[1]{%
|
||||
{\tt #1}%
|
||||
}
|
||||
\section{Overview}
|
||||
|
||||
\PROG is a program that generates a fast tree parser using BURS
|
||||
(Bottom-Up Rewrite System) technology. It accepts a cost-augmented
|
||||
tree grammar and emits a C program that discovers in linear time an
|
||||
optimal parse of trees in the language described by the grammar. \PROG
|
||||
has been used to construct fast optimal instruction selectors for use
|
||||
in code generation. \PROG addresses many of the problems addressed by
|
||||
{\sc Twig}~\cite{aho-twig-toplas,appel-87}, but it is somewhat less flexible and
|
||||
much faster. \PROG is available via anonymous \var{ftp} from
|
||||
\var{kaese.cs.wisc.edu}. The compressed \var{shar} file
|
||||
\var{pub/burg.shar.Z} holds the complete distribution.
|
||||
|
||||
This document describes only that fraction of the BURS model that is
|
||||
required to use \PROG. Readers interested in more detail might start
|
||||
with Reference~\cite{balachandran-complang}. Other relevant documents
|
||||
include References~\cite{kron-phd,hoffmann-jacm,hatcher-popl,chase-popl,pelegri-popl,pelegri-phd,wilhelm-tr,henry-budp,fraser-henry-spe-91,proebsting-91}.
|
||||
|
||||
\section{Input}
|
||||
|
||||
\PROG accepts a tree grammar and emits a BURS tree parser.
|
||||
\figref{fig-tree-grammar} shows a sample grammar that implements a very
|
||||
simple instruction selector.
|
||||
\begin{figure}
|
||||
\begin{verbatim}
|
||||
%{
|
||||
#define NODEPTR_TYPE treepointer
|
||||
#define OP_LABEL(p) ((p)->op)
|
||||
#define LEFT_CHILD(p) ((p)->left)
|
||||
#define RIGHT_CHILD(p) ((p)->right)
|
||||
#define STATE_LABEL(p) ((p)->state_label)
|
||||
#define PANIC printf
|
||||
%}
|
||||
%start reg
|
||||
%term Assign=1 Constant=2 Fetch=3 Four=4 Mul=5 Plus=6
|
||||
%%
|
||||
con: Constant = 1 (0);
|
||||
con: Four = 2 (0);
|
||||
addr: con = 3 (0);
|
||||
addr: Plus(con,reg) = 4 (0);
|
||||
addr: Plus(con,Mul(Four,reg)) = 5 (0);
|
||||
reg: Fetch(addr) = 6 (1);
|
||||
reg: Assign(addr,reg) = 7 (1);
|
||||
\end{verbatim}
|
||||
\caption{A Sample Tree Grammar\label{fig-tree-grammar}}
|
||||
\end{figure}
|
||||
\PROG grammars are structurally similar to \YACC's. Comments follow C
|
||||
conventions. Text between ``\var{\%\{}'' and ``\var{\%\}}'' is called
|
||||
the \term{configuration section}; there may be several such segments.
|
||||
All are concatenated and copied verbatim into the head of the generated
|
||||
parser, which is called \PARSER. Text after the second ``\var{\%\%}'',
|
||||
if any, is also copied verbatim into \PARSER, at the end.
|
||||
|
||||
The configuration section configures \PARSER for the trees being parsed
|
||||
and the client's environment. This section must define
|
||||
\var{NODEPTR\_TYPE} to be a visible typedef symbol for a pointer to a
|
||||
node in the subject tree. \PARSER invokes \var{OP\_LABEL(p)},
|
||||
\var{LEFT\_CHILD(p)}, and \var{RIGHT\_CHILD(p)} to read the operator
|
||||
and children from the node pointed to by \var{p}. It invokes
|
||||
\var{PANIC} when it detects an error. If the configuration section
|
||||
defines these operations as macros, they are implemented in-line;
|
||||
otherwise, they must be implemented as functions. The section on
|
||||
diagnostics elaborates on \var{PANIC}.
|
||||
|
||||
\PARSER computes and stores a single integral \term{state} in each node
|
||||
of the subject tree. The configuration section must define a macro
|
||||
\var{STATE\_LABEL(p)} to access the state field of the node pointed to
|
||||
by \var{p}. A macro is required because \PROG uses it as an lvalue. A
|
||||
C \var{short} is usually the right choice; typical code generation
|
||||
grammars require 100--1000 distinct state labels.
|
||||
|
||||
The tree grammar follows the configuration section.
|
||||
\figref{fig-grammar-grammar} gives an EBNF grammar for \PROG tree
|
||||
grammars.
|
||||
\begin{figure}
|
||||
\begin{verbatim}
|
||||
grammar: {dcl} '%%' {rule}
|
||||
|
||||
dcl: '%start' Nonterminal
|
||||
dcl: '%term' { Identifier '=' Integer }
|
||||
|
||||
rule: Nonterminal ':' tree '=' Integer cost ';'
|
||||
cost: /* empty */
|
||||
cost: '(' Integer { ',' Integer } ')'
|
||||
|
||||
tree: Term '(' tree ',' tree ')'
|
||||
tree: Term '(' tree ')'
|
||||
tree: Term
|
||||
tree: Nonterminal
|
||||
\end{verbatim}
|
||||
\caption{EBNF Grammar for Tree Grammars for \PROG\ \label{fig-grammar-grammar}}
|
||||
\end{figure}
|
||||
Comments, the text between ``\var{\%\{}'' and ``\var{\%\}}'', and the
|
||||
text after the optional second ``\var{\%\%}'' are treated lexically, so
|
||||
the figure omits them. In the EBNF grammar, quoted text must appear
|
||||
literally, \var{Nonterminal} and \var{Integer} are self-explanatory,
|
||||
and \var{Term} denotes an identifier previously declared as a
|
||||
terminal. {\tt\{$X$\}} denotes zero or more instances of $X$.
|
||||
|
||||
Text before the first ``\var{\%\%}'' declares the start symbol and the
|
||||
terminals or operators in subject trees. All terminals must be
|
||||
declared; each line of such declarations begins with \var{\%term}.
|
||||
Each terminal has fixed arity, which \PROG infers from the rules using that terminal.
|
||||
\PROG restricts terminals to have at most two children. Each terminal
|
||||
is declared with a positive, unique, integral \term{external symbol
|
||||
number} after a ``\var{=}''. \var{OP\_LABEL(p)} must return the valid
|
||||
external symbol number for \var{p}. Ideally, external symbol numbers
|
||||
form a dense enumeration. Non-terminals are not declared, but the
|
||||
start symbol may be declared with a line that begins with
|
||||
\var{\%start}.
|
||||
|
||||
Text after the first ``\var{\%\%}'' declares the rules. A tree grammar
|
||||
is like a context-free grammar: it has rules, non-terminals,
|
||||
terminals, and a special start non-terminal. The right-hand side of a
|
||||
rule, called the \term{pattern}, is a tree. Tree patterns appear in
|
||||
prefix parenthesized form. Every non-terminal denotes a tree. A chain
|
||||
rule is a rule whose pattern is another non-terminal. If no start
|
||||
symbol is declared, \PROG uses the non-terminal defined by the first
|
||||
rule. \PROG needs a single start symbol; grammars for which it is
|
||||
natural to use multiple start symbols must be augmented with an
|
||||
artificial start symbol that derives, with zero cost, the grammar's
|
||||
natural start symbols. \PARSER will automatically select one
|
||||
that costs least for any given tree.
|
||||
|
||||
\PROG accepts no embedded semantic actions like \YACC's, because no one
|
||||
format suited all intended applications. Instead, each rule has a
|
||||
positive, unique, integral \term{external rule number}, after the
|
||||
pattern and preceded by a ``\var{=}''. Ideally, external rule numbers
|
||||
form a dense enumeration. \PARSER uses these numbers to report the
|
||||
matching rule to a user-supplied routine, which must implement any
|
||||
desired semantic action; see below. Humans may select these integers
|
||||
by hand, but \PROG is intended as a \term{server} for building BURS
|
||||
tree parsers. Thus some \PROG clients will consume a richer
|
||||
description and translate it into \PROG's simpler input.
|
||||
|
||||
Rules end with a vector of non-negative, integer costs, in parentheses
|
||||
and separated by commas. If the cost vector is omitted, then all
|
||||
elements are assumed to be zero. \PROG retains only the first four
|
||||
elements of the list. The cost of a derivation is the sum of the costs
|
||||
for all rules applied in the derivation. Arithmetic on cost vectors
|
||||
treats each member of the vector independently. The tree parser finds
|
||||
the cheapest parse of the subject tree. It breaks ties arbitrarily.
|
||||
By default, \PROG uses only the \term{principal cost} of each cost
|
||||
vector, which defaults to the first element, but options described
|
||||
below provide alternatives.
|
||||
|
||||
\section{Output}
|
||||
|
||||
\PARSER traverses the subject tree twice. The first pass or
|
||||
\term{labeller} runs bottom-up and left-to-right, visiting each node
|
||||
exactly once. Each node is labeled with a state, a single number that
|
||||
encodes all full and partial optimal pattern matches viable at that
|
||||
node. The second pass or \term{reducer} traverses the subject tree
|
||||
top-down. The reducer accepts a tree node's state label and a
|
||||
\term{goal} non-terminal --- initially the root's state label and the
|
||||
start symbol --- which combine to determine the rule to be applied at
|
||||
that node. By construction, the rule has the given goal non-terminal
|
||||
as its left-hand side. The rule's pattern identifies the subject
|
||||
subtrees and goal non-terminals for all recursive visits. Here, a
|
||||
``subtree'' is not necessarily an immediate child of the current node.
|
||||
Patterns with interior operators cause the reducer to skip the
|
||||
corresponding subject nodes, so the reducer may proceed directly to
|
||||
grandchildren, great-grandchildren, and so on. On the other hand,
|
||||
chain rules cause the reducer to revisit the current subject node, with
|
||||
a new goal
|
||||
non-terminal, so \term{x} is also regarded as a subtree of \term{x}.
|
||||
|
||||
As the reducer visits (and possibly revisits) each node, user-supplied
|
||||
code implements semantic action side effects and controls the order in
|
||||
which subtrees are visited. The labeller is self-contained, but the
|
||||
reducer combines code from \PROG with code from the user, so \PARSER
|
||||
does not stand alone.
|
||||
|
||||
The \PARSER that is generated by \PROG provides primitives for
|
||||
labelling and reducing trees. These mechanisms are a compromise
|
||||
between expressibility, abstraction, simplicity, flexibility and
|
||||
efficiency. Clients may combine primitives into labellers and reducers
|
||||
that can traverse trees in arbitrary ways, and they may call semantic
|
||||
routines when and how they wish during traversal. Also, \PROG
|
||||
generates a few higher level routines that implement common
|
||||
combinations of primitives, and it generates mechanisms that help debug
|
||||
the tree parse.
|
||||
|
||||
\PROG generates the labeller as a function named \var{burm\_label} with
|
||||
the signature
|
||||
\begin{verbatim}
|
||||
extern int burm_label(NODEPTR_TYPE p);
|
||||
\end{verbatim}
|
||||
It labels the entire subject tree pointed to by \var{p} and returns the
|
||||
root's state label. State zero labels unmatched trees. The trees may
|
||||
be corrupt or merely inconsistent with the grammar.
|
||||
|
||||
The simpler \var{burm\_state} is \var{burm\_label} without the
|
||||
code to traverse the tree and to read and write its fields. It may be
|
||||
used to integrate labelling into user-supplied traversal code. A
|
||||
typical signature is
|
||||
\begin{verbatim}
|
||||
extern int burm_state(int op, int leftstate, int rightstate);
|
||||
\end{verbatim}
|
||||
It accepts an external symbol number for a node and the labels for the
|
||||
node's left and right children. It returns the state label to assign
|
||||
to that node. For unary operators, the last argument is ignored; for
|
||||
leaves, the last two arguments are ignored. In general, \PROG
|
||||
generates a \var{burm\_state} that accepts the maximum number of child
|
||||
states required by the input grammar. For example, if the grammar
|
||||
includes no binary operators, then \var{burm\_state} will have the
|
||||
signature
|
||||
\begin{verbatim}
|
||||
extern int burm_state(int op, int leftstate);
|
||||
\end{verbatim}
|
||||
This feature is included to permit future expansion to operators with
|
||||
more than two children.
|
||||
|
||||
The user must write the reducer, but \PARSER writes code and data that
|
||||
help. Primary is
|
||||
\begin{verbatim}
|
||||
extern int burm_rule(int state, int goalnt);
|
||||
\end{verbatim}
|
||||
which accepts a tree's state label and a goal non-terminal and returns the
|
||||
external rule number of a rule. The rule will have matched the tree
|
||||
and have the goal non-terminal on the left-hand side; \var{burm\_rule}
|
||||
returns zero when the tree labelled with the given state did not match
|
||||
the goal non-terminal. For the initial, root-level call, \var{goalnt}
|
||||
must be one, and \PARSER exports an array that identifies the values
|
||||
for nested calls:
|
||||
\begin{verbatim}
|
||||
extern short *burm_nts[] = { ... };
|
||||
\end{verbatim}
|
||||
is an array indexed by external rule numbers. Each element points to a
|
||||
zero-terminated vector of short integers, which encode the goal
|
||||
non-terminals for that rule's pattern, left-to-right. The user needs
|
||||
only these two externals to write a complete reducer, but a third
|
||||
external simplifies some applications:
|
||||
\begin{verbatim}
|
||||
extern NODEPTR_TYPE *burm_kids(NODEPTR_TYPE p, int eruleno, NODEPTR_TYPE kids[]);
|
||||
\end{verbatim}
|
||||
accepts the address of a tree \var{p}, an external rule number, and an
|
||||
empty vector of pointers to trees. The procedure assumes that \var{p}
|
||||
matched the given rule, and it fills in the vector with the subtrees (in
|
||||
the sense described above) of \var{p} that must be reduced recursively.
|
||||
\var{kids} is returned. It is not zero-terminated.
|
||||
|
||||
The simple user code below labels and then fully reduces a subject tree;
|
||||
the reducer prints the tree cover. \var{burm\_string} is defined below.
|
||||
\begin{verbatim}
|
||||
parse(NODEPTR_TYPE p) {
|
||||
burm_label(p); /* label the tree */
|
||||
reduce(p, 1, 0); /* and reduce it */
|
||||
}
|
||||
|
||||
reduce(NODEPTR_TYPE p, int goalnt, int indent) {
|
||||
int eruleno = burm_rule(STATE_LABEL(p), goalnt); /* matching rule number */
|
||||
short *nts = burm_nts[eruleno]; /* subtree goal non-terminals */
|
||||
NODEPTR_TYPE kids[10]; /* subtree pointers */
|
||||
int i;
|
||||
|
||||
for (i = 0; i < indent; i++)
|
||||
printf("."); /* print indented ... */
|
||||
printf("%s\n", burm_string[eruleno]); /* ... text of rule */
|
||||
burm_kids(p, eruleno, kids); /* initialize subtree pointers */
|
||||
for (i = 0; nts[i]; i++) /* traverse subtrees left-to-right */
|
||||
reduce(kids[i], nts[i], indent+1); /* and print them recursively */
|
||||
}
|
||||
\end{verbatim}
|
||||
The reducer may recursively traverse subtrees in any order, and it may
|
||||
interleave arbitrary semantic actions with recursive traversals.
|
||||
Multiple reducers may be written, to implement multi-pass algorithms
|
||||
or independent single-pass algorithms.
|
||||
|
||||
For each non-terminal $x$, \PROG emits a preprocessor directive to
|
||||
equate \var{burm\_}$x$\var{\_NT} with $x$'s integral encoding. It also
|
||||
defines a macro \var{burm\_}$x$\var{\_rule(a)} that is equivalent to
|
||||
\var{burm\_rule(a,}$x$\var{)}. For the grammar in
|
||||
\figref{fig-tree-grammar}, \PROG emits
|
||||
\begin{verbatim}
|
||||
#define burm_reg_NT 1
|
||||
#define burm_con_NT 2
|
||||
#define burm_addr_NT 3
|
||||
#define burm_reg_rule(a) ...
|
||||
#define burm_con_rule(a) ...
|
||||
#define burm_addr_rule(a) ...
|
||||
\end{verbatim}
|
||||
Such symbols are visible only to the code after the second
|
||||
``\var{\%\%}''. If the symbols \var{burm\_}$x$\var{\_NT} are needed
|
||||
elsewhere, extract them from the \PARSER source.
|
||||
|
||||
The \option{I} option directs \PROG to emit an encoding of the input
|
||||
that may help the user produce diagnostics. The vectors
|
||||
\begin{verbatim}
|
||||
extern char *burm_opname[];
|
||||
extern char burm_arity[];
|
||||
\end{verbatim}
|
||||
hold the name and number of children, respectively, for each terminal.
|
||||
They are indexed by the terminal's external symbol number. The vectors
|
||||
\begin{verbatim}
|
||||
extern char *burm_string[];
|
||||
extern short burm_cost[][4];
|
||||
\end{verbatim}
|
||||
hold the text and cost vector for each rule. They are indexed by the
|
||||
external rule number. The zero-terminated vector
|
||||
\begin{verbatim}
|
||||
extern char *burm_ntname[];
|
||||
\end{verbatim}
|
||||
is indexed by \var{burm\_}$x$\var{\_NT} and holds the name of
|
||||
non-terminal $x$. Finally, the procedures
|
||||
\begin{verbatim}
|
||||
extern int burm_op_label(NODEPTR_TYPE p);
|
||||
extern int burm_state_label(NODEPTR_TYPE p);
|
||||
extern NODEPTR_TYPE burm_child(NODEPTR_TYPE p, int index);
|
||||
\end{verbatim}
|
||||
are callable versions of the configuration macros.
|
||||
\var{burm\_child(p,0)} implements \var{LEFT\_CHILD(p)}, and
|
||||
\var{burm\_child(p,1)} implements \var{RIGHT\_CHILD(p)}. A sample use
|
||||
is the grammar-independent expression
|
||||
\var{burm\_opname[burm\_op\_label(p)]}, which yields the textual name
|
||||
for the operator in the tree node pointed to by \var{p}.
|
||||
|
||||
A complete tree parser can be assembled from just \var{burm\_state},
|
||||
\var{burm\_rule}, and \var{burm\_nts}, which use none of the
|
||||
configuration section except \var{PANIC}. The generated routines that
|
||||
use the rest of the configuration section are compiled only if the
|
||||
configuration section defines \var{STATE\_LABEL}, so they can be
|
||||
omitted if the user prefers to hide the tree structure from \PARSER.
|
||||
This course may be wise if, say, the tree structure is defined in a
|
||||
large header file with symbols that might collide with \PARSER's.
|
||||
|
||||
\PARSER selects an optimal parse without dynamic programming at compile
|
||||
time~\cite{aho-johnson-dp-classic}. Instead, \PROG does the dynamic
|
||||
programming at compile-compile time, as it builds \PARSER.
|
||||
Consequently, \PARSER parses quickly. Similar labellers have taken as
|
||||
few as 15 instructions per node, and reducers as few as 35 per node
|
||||
visited~\cite{fraser-henry-spe-91}.
|
||||
|
||||
\section{Debugging}
|
||||
|
||||
\PARSER invokes \var{PANIC} when an error prevents it from proceeding.
|
||||
\var{PANIC} has the same signature as \var{printf}. It should pass its
|
||||
arguments to \var{printf} if diagnostics are desired and then either
|
||||
abort (say via \var{exit}) or recover (say via \var{longjmp}). If it
|
||||
returns, \PARSER aborts. Some errors are not caught.
|
||||
|
||||
\PROG assumes a robust preprocessor, so it omits full consistency
|
||||
checking and error recovery. \PROG constructs a set of states using a
|
||||
closure algorithm like that used in LR table construction. \PROG
|
||||
considers all possible trees generated by the tree grammar and
|
||||
summarizes infinite sets of trees with finite sets. The summary
|
||||
records the cost of those trees but actually manipulates the
|
||||
differences in costs between viable alternatives using a dynamic
|
||||
programming algorithm. Reference~\cite{henry-budp} elaborates.
|
||||
|
||||
Some grammars derive trees whose optimal parses depend on arbitrarily
|
||||
distant data. When this happens, \PROG and the tree grammar
|
||||
\term{cost diverge}, and \PROG attempts to build an infinite
|
||||
set of states; it first thrashes and ultimately exhausts
|
||||
memory and exits. For example, the tree grammar in
|
||||
\figref{fig-diverge-grammar}
|
||||
\begin{figure}
|
||||
\begin{verbatim}
|
||||
%term Const=17 RedFetch=20 GreenFetch=21 Plus=22
|
||||
%%
|
||||
reg: GreenFetch(green_reg) = 10 (0);
|
||||
reg: RedFetch(red_reg) = 11 (0);
|
||||
|
||||
green_reg: Const = 20 (0);
|
||||
green_reg: Plus(green_reg,green_reg) = 21 (1);
|
||||
|
||||
red_reg: Const = 30 (0);
|
||||
red_reg: Plus(red_reg,red_reg) = 31 (2);
|
||||
\end{verbatim}
|
||||
\caption{A Diverging Tree Grammar\label{fig-diverge-grammar}}
|
||||
\end{figure}
|
||||
diverges, since non-terminals \var{green\_reg} and \var{red\_reg}
|
||||
derive identical infinite trees with different costs. If the cost of
|
||||
rule 31 is changed to 1, then the grammar does not diverge.
|
||||
|
||||
Practical tree grammars describing instruction selection do not
|
||||
cost-diverge because infinite trees are derived from non-terminals
|
||||
that model temporary registers. Machines can move data between
|
||||
different types of registers for a small bounded cost, and the rules
|
||||
for these instructions prevent divergence. For example, if
|
||||
\figref{fig-diverge-grammar} included rules to move data between red
|
||||
and green registers, the grammar would not diverge. If a bonafide
|
||||
machine grammar appears to make \PROG loop, try a host with more
|
||||
memory. To apply \PROG to problems other than instruction selection,
|
||||
be prepared to consult the literature on
|
||||
cost-divergence~\cite{pelegri-phd}.
|
||||
|
||||
\section{Running \PROG\ }\label{sec-man-page}
|
||||
|
||||
\PROG reads a tree grammar and writes a \PARSER in C. \PARSER can be
|
||||
compiled by itself or included in another file. When suitably named
|
||||
with the \option{p} option, disjoint instances of \PARSER should link
|
||||
together without name conflicts. The command:
|
||||
\begin{flushleft}
|
||||
\var{burg} [ {\it arguments} ] [ {\it file} ]
|
||||
\end{flushleft}
|
||||
invokes \PROG. If a {\it file} is named, \PROG expects its grammar
|
||||
there; otherwise it reads the standard input. The options include:
|
||||
\def\Empty{}
|
||||
%
|
||||
\newcommand\odescr[2]{% #1=option character, #2=optional argument
|
||||
\gdef\Arg2{#2}%
|
||||
\item[\option{#1}\ifx\Arg2\Empty\else{{\it #2}}\fi]
|
||||
}
|
||||
\begin{description}
|
||||
%
|
||||
\odescr{c}{} $N$
|
||||
Abort if any relative cost exceeds $N$, which keeps \PROG from looping on
|
||||
diverging grammars. Several
|
||||
references~\cite{pelegri-popl,henry-budp,balachandran-complang,proebsting-91}
|
||||
explain relative costs.
|
||||
%
|
||||
\odescr{d}{}
|
||||
Report a few statistics and flag unused rules and terminals.
|
||||
%
|
||||
\odescr{o}{} {\it file}
|
||||
Write parser into {\it file}. Otherwise it writes to the standard output.
|
||||
%
|
||||
\odescr{p}{} {\it prefix}
|
||||
Start exported names with {\it prefix}. The default is \var{burm}.
|
||||
%
|
||||
\odescr{t}{}
|
||||
Generates smaller tables faster, but all goal non-terminals passed to
|
||||
\var{burm\_rule} must come from an appropriate \var{burm\_nts}. Using
|
||||
\var{burm\_}$x$\var{\_NT} instead may give unpredictable results.
|
||||
%
|
||||
\odescr{I}{}
|
||||
Emit code for \var{burm\_arity}, \var{burm\_child}, \var{burm\_cost},
|
||||
\var{burm\_ntname}, \var{burm\_op\_label}, \var{burm\_opname},
|
||||
\var{burm\_state\_label}, and \var{burm\_string}.
|
||||
%
|
||||
\odescr{O}{} $N$
|
||||
Change the principal cost to $N$. Elements of each cost vector are
|
||||
numbered from zero.
|
||||
%
|
||||
\odescr{=}{}
|
||||
Compare costs lexicographically, using all costs in the given order.
|
||||
This option slows \PROG and may produce a larger parser. Increases
|
||||
range from small to astronomical.
|
||||
\end{description}
|
||||
|
||||
\section{Acknowledgements}
|
||||
|
||||
The first \PROG was adapted by the second author from his \CODEGEN
|
||||
package, which was developed at the University of Washington with
|
||||
partial support from NSF Grant CCR-88-01806. It was unbundled from
|
||||
\CODEGEN with the support of Tera Computer. The current \PROG was
|
||||
written by the third author with the support of NSF grant
|
||||
CCR-8908355. The interface, documentation, and testing involved
|
||||
all three authors.
|
||||
|
||||
Comments from a large group at the 1991 Dagstuhl Seminar on Code
|
||||
Generation improved \PROG's interface. Robert Giegerich and Susan
|
||||
Graham organized the workshop, and the International Conference and
|
||||
Research Center for Computer Science, Schloss Dagstuhl, provided an
|
||||
ideal environment for such collaboration. Beta-testers included Helmut
|
||||
Emmelmann, Dave Hanson, John Hauser, Hugh Redelmeier, and Bill Waite.
|
||||
|
||||
\begin{thebibliography}{BMW87}
|
||||
|
||||
\bibitem[AGT89]{aho-twig-toplas}
|
||||
Alfred~V. Aho, Mahadevan Ganapathi, and Steven W.~K. Tjiang.
|
||||
\newblock Code generation using tree matching and dynamic programming.
|
||||
\newblock {\em ACM Transactions on Programming Languages and Systems},
|
||||
11(4):491--516, October 1989.
|
||||
|
||||
\bibitem[AJ76]{aho-johnson-dp-classic}
|
||||
Alfred~V. Aho and Steven~C. Johnson.
|
||||
\newblock Optimal code generation for expression trees.
|
||||
\newblock {\em Journal of the ACM}, 23(3):458--501, July 1976.
|
||||
|
||||
\bibitem[App87]{appel-87}
|
||||
Andrew~W. Appel.
|
||||
\newblock Concise specification of locally optimal code generators.
|
||||
\newblock Technical report CS-TR-080-87, Princeton University, 1987.
|
||||
|
||||
\bibitem[BDB90]{balachandran-complang}
|
||||
A.~Balachandran, D.~M. Dhamdhere, and S.~Biswas.
|
||||
\newblock Efficient retargetable code generation using bottom-up tree pattern
|
||||
matching.
|
||||
\newblock {\em Computer Languages}, 15(3):127--140, 1990.
|
||||
|
||||
\bibitem[BMW87]{wilhelm-tr}
|
||||
J\"{u}rgen B\"{o}rstler, Ulrich M\"{o}nche, and Reinhard Wilhelm.
|
||||
\newblock Table compression for tree automata.
|
||||
\newblock Technical Report Aachener Informatik-Berichte No. 87-12, RWTH Aachen,
|
||||
Fachgruppe Informatik, Aachen, Fed. Rep. of Germany, 1987.
|
||||
|
||||
\bibitem[Cha87]{chase-popl}
|
||||
David~R. Chase.
|
||||
\newblock An improvement to bottom up tree pattern matching.
|
||||
\newblock {\em Fourteenth Annual ACM Symposium on Principles of Programming
|
||||
Languages}, pages 168--177, January 1987.
|
||||
|
||||
\bibitem[FH91]{fraser-henry-spe-91}
|
||||
Christopher~W. Fraser and Robert~R. Henry.
|
||||
\newblock Hard-coding bottom-up code generation tables to save time and space.
|
||||
\newblock {\em Software---Practice\&Experience}, 21(1):1--12, January 1991.
|
||||
|
||||
\bibitem[HC86]{hatcher-popl}
|
||||
Philip~J. Hatcher and Thomas~W. Christopher.
|
||||
\newblock High-quality code generation via bottom-up tree pattern matching.
|
||||
\newblock {\em Thirteenth Annual ACM Symposium on Principles of Programming
|
||||
Languages}, pages 119--130, January 1986.
|
||||
|
||||
\bibitem[Hen89]{henry-budp}
|
||||
Robert~R. Henry.
|
||||
\newblock Encoding optimal pattern selection in a table-driven bottom-up
|
||||
tree-pattern matcher.
|
||||
\newblock Technical Report 89-02-04, University of Washington Computer Science
|
||||
Department, Seattle, WA, February 1989.
|
||||
|
||||
\bibitem[HO82]{hoffmann-jacm}
|
||||
Christoph Hoffmann and Michael~J. O'Donnell.
|
||||
\newblock Pattern matching in trees.
|
||||
\newblock {\em Journal of the ACM}, 29(1):68--95, January 1982.
|
||||
|
||||
\bibitem[Kro75]{kron-phd}
|
||||
H.~H. Kron.
|
||||
\newblock {\em Tree Templates and Subtree Transformational Grammars}.
|
||||
\newblock PhD thesis, UC Santa Cruz, December 1975.
|
||||
|
||||
\bibitem[PL87]{pelegri-phd}
|
||||
Eduardo Pelegri-Llopart.
|
||||
\newblock {\em Tree Transformations in Compiler Systems}.
|
||||
\newblock PhD thesis, UC Berkeley, December 1987.
|
||||
|
||||
\bibitem[PLG88]{pelegri-popl}
|
||||
Eduardo Pelegri-Llopart and Susan~L. Graham.
|
||||
\newblock Optimal code generation for expression trees: An application of
|
||||
{BURS} theory.
|
||||
\newblock {\em Fifteenth Annual ACM Symposium on Principles of Programming
|
||||
Languages}, pages 294--308, January 1988.
|
||||
|
||||
\bibitem[Pro91]{proebsting-91}
|
||||
Todd~A. Proebsting.
|
||||
\newblock Simple and efficient {BURS} table generation.
|
||||
\newblock Technical report, Department of Computer Sciences, University of
|
||||
Wisconsin, 1991.
|
||||
|
||||
\end{thebibliography}
|
||||
|
||||
\end{document}
|
||||
|
84
utils/Burg/Doc/Makefile
Normal file
84
utils/Burg/Doc/Makefile
Normal file
@ -0,0 +1,84 @@
|
||||
# $Id$
|
||||
|
||||
#CFLAGS =
|
||||
#CFLAGS = -O
|
||||
#CFLAGS = -O -DNOLEX
|
||||
CFLAGS = -g -DDEBUG
|
||||
#CFLAGS = -g -DNOLEX -DDEBUG
|
||||
|
||||
SRCS = \
|
||||
be.c \
|
||||
burs.c \
|
||||
closure.c \
|
||||
delta.c \
|
||||
fe.c \
|
||||
item.c \
|
||||
lex.c \
|
||||
list.c \
|
||||
main.c \
|
||||
map.c \
|
||||
nonterminal.c \
|
||||
operator.c \
|
||||
pattern.c \
|
||||
plank.c \
|
||||
queue.c \
|
||||
rule.c \
|
||||
string.c \
|
||||
symtab.c \
|
||||
table.c \
|
||||
trim.c \
|
||||
zalloc.c
|
||||
|
||||
BU_OBJS = \
|
||||
burs.o \
|
||||
closure.o \
|
||||
delta.o \
|
||||
item.o \
|
||||
list.o \
|
||||
map.o \
|
||||
nonterminal.o \
|
||||
operator.o \
|
||||
pattern.o \
|
||||
queue.o \
|
||||
rule.o \
|
||||
table.o \
|
||||
trim.o \
|
||||
zalloc.o
|
||||
|
||||
FE_OBJS = \
|
||||
be.o \
|
||||
fe.o \
|
||||
lex.o \
|
||||
main.o \
|
||||
plank.o \
|
||||
string.o \
|
||||
symtab.o \
|
||||
y.tab.o
|
||||
|
||||
all: test
|
||||
|
||||
burg: $(BU_OBJS) $(FE_OBJS)
|
||||
$(CC) -o burg $(CFLAGS) $(BU_OBJS) $(FE_OBJS)
|
||||
|
||||
y.tab.c y.tab.h: gram.y
|
||||
yacc -d gram.y
|
||||
|
||||
clean:
|
||||
rm -f *.o y.tab.h y.tab.c core burg *.aux *.log *.dvi sample sample.c tmp
|
||||
|
||||
$(FE_OBJS): b.h
|
||||
$(BU_OBJS): b.h
|
||||
$(FE_OBJS): fe.h
|
||||
|
||||
lex.o: y.tab.h
|
||||
|
||||
doc.dvi: doc.tex
|
||||
latex doc; latex doc
|
||||
|
||||
test: burg sample.gr
|
||||
./burg -I <sample.gr >sample.c && cc $(CFLAGS) -o sample sample.c && ./sample
|
||||
./burg -I sample.gr >tmp && cmp tmp sample.c
|
||||
./burg -I <sample.gr -o tmp && cmp tmp sample.c
|
||||
./burg -I sample.gr -o tmp && cmp tmp sample.c
|
||||
./burg -I -O0 <sample.gr >tmp && cmp tmp sample.c
|
||||
./burg -I -= <sample.gr >tmp && cmp tmp sample.c
|
50
utils/Burg/Doc/doc.aux
Normal file
50
utils/Burg/Doc/doc.aux
Normal file
@ -0,0 +1,50 @@
|
||||
\relax
|
||||
\bibstyle{alpha}
|
||||
\citation{aho-twig-toplas}
|
||||
\citation{appel-87}
|
||||
\citation{balachandran-complang}
|
||||
\citation{kron-phd}
|
||||
\citation{hoffmann-jacm}
|
||||
\citation{hatcher-popl}
|
||||
\citation{chase-popl}
|
||||
\citation{pelegri-popl}
|
||||
\citation{pelegri-phd}
|
||||
\citation{wilhelm-tr}
|
||||
\citation{henry-budp}
|
||||
\citation{fraser-henry-spe-91}
|
||||
\citation{proebsting-91}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {1}Overview}{1}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {2}Input}{1}}
|
||||
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces A Sample Tree Grammar}}{2}}
|
||||
\newlabel{fig-tree-grammar}{{1}{2}}
|
||||
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces EBNF Grammar for Tree Grammars for {\sc Burg}\ }}{3}}
|
||||
\newlabel{fig-grammar-grammar}{{2}{3}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {3}Output}{3}}
|
||||
\citation{aho-johnson-dp-classic}
|
||||
\citation{fraser-henry-spe-91}
|
||||
\citation{henry-budp}
|
||||
\citation{pelegri-phd}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {4}Debugging}{6}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {5}Running {\sc Burg}\ }{6}}
|
||||
\newlabel{sec-man-page}{{5}{6}}
|
||||
\citation{pelegri-popl}
|
||||
\citation{henry-budp}
|
||||
\citation{balachandran-complang}
|
||||
\citation{proebsting-91}
|
||||
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces A Diverging Tree Grammar}}{7}}
|
||||
\newlabel{fig-diverge-grammar}{{3}{7}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {6}Acknowledgements}{7}}
|
||||
\bibcite{aho-twig-toplas}{AGT89}
|
||||
\bibcite{aho-johnson-dp-classic}{AJ76}
|
||||
\bibcite{appel-87}{App87}
|
||||
\bibcite{balachandran-complang}{BDB90}
|
||||
\bibcite{wilhelm-tr}{BMW87}
|
||||
\bibcite{chase-popl}{Cha87}
|
||||
\bibcite{fraser-henry-spe-91}{FH91}
|
||||
\bibcite{hatcher-popl}{HC86}
|
||||
\bibcite{henry-budp}{Hen89}
|
||||
\bibcite{hoffmann-jacm}{HO82}
|
||||
\bibcite{kron-phd}{Kro75}
|
||||
\bibcite{pelegri-phd}{PL87}
|
||||
\bibcite{pelegri-popl}{PLG88}
|
||||
\bibcite{proebsting-91}{Pro91}
|
BIN
utils/Burg/Doc/doc.dvi
Normal file
BIN
utils/Burg/Doc/doc.dvi
Normal file
Binary file not shown.
157
utils/Burg/Doc/doc.log
Normal file
157
utils/Burg/Doc/doc.log
Normal file
@ -0,0 +1,157 @@
|
||||
This is TeX, Version 3.14159 (Web2C 7.3.2) (format=latex 2000.8.30) 4 JUN 2001 13:20
|
||||
**doc
|
||||
(doc.tex
|
||||
LaTeX2e <2000/06/01>
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/latex209.def
|
||||
File: latex209.def 1998/05/13 v0.52 Standard LaTeX file
|
||||
|
||||
|
||||
Entering LaTeX 2.09 COMPATIBILITY MODE
|
||||
*************************************************************
|
||||
!!WARNING!! !!WARNING!! !!WARNING!! !!WARNING!!
|
||||
|
||||
This mode attempts to provide an emulation of the LaTeX 2.09
|
||||
author environment so that OLD documents can be successfully
|
||||
processed. It should NOT be used for NEW documents!
|
||||
|
||||
New documents should use Standard LaTeX conventions and start
|
||||
with the \documentclass command.
|
||||
|
||||
Compatibility mode is UNLIKELY TO WORK with LaTeX 2.09 style
|
||||
files that change any internal macros, especially not with
|
||||
those that change the FONT SELECTION or OUTPUT ROUTINES.
|
||||
|
||||
Therefore such style files MUST BE UPDATED to use
|
||||
Current Standard LaTeX: LaTeX2e.
|
||||
If you suspect that you may be using such a style file, which
|
||||
is probably very, very old by now, then you should attempt to
|
||||
get it updated by sending a copy of this error message to the
|
||||
author of that file.
|
||||
*************************************************************
|
||||
|
||||
\footheight=\dimen102
|
||||
\@maxsep=\dimen103
|
||||
\@dblmaxsep=\dimen104
|
||||
\@cla=\count79
|
||||
\@clb=\count80
|
||||
\mscount=\count81
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/tracefnt.sty
|
||||
Package: tracefnt 1997/05/29 v3.0j Standard LaTeX package (font tracing)
|
||||
\tracingfonts=\count82
|
||||
LaTeX Info: Redefining \selectfont on input line 96.
|
||||
)
|
||||
\symbold=\mathgroup4
|
||||
\symsans=\mathgroup5
|
||||
\symtypewriter=\mathgroup6
|
||||
\symitalic=\mathgroup7
|
||||
\symsmallcaps=\mathgroup8
|
||||
\symslanted=\mathgroup9
|
||||
LaTeX Font Info: Redeclaring math alphabet \mathbf on input line 288.
|
||||
LaTeX Font Info: Redeclaring math alphabet \mathsf on input line 289.
|
||||
LaTeX Font Info: Redeclaring math alphabet \mathtt on input line 290.
|
||||
LaTeX Font Info: Redeclaring math alphabet \mathit on input line 296.
|
||||
LaTeX Info: Redefining \em on input line 306.
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/latexsym.sty
|
||||
Package: latexsym 1998/08/17 v2.2e Standard LaTeX package (lasy symbols)
|
||||
\symlasy=\mathgroup10
|
||||
LaTeX Font Info: Overwriting symbol font `lasy' in version `bold'
|
||||
(Font) U/lasy/m/n --> U/lasy/b/n on input line 42.
|
||||
)
|
||||
LaTeX Font Info: Redeclaring math delimiter \lgroup on input line 370.
|
||||
LaTeX Font Info: Redeclaring math delimiter \rgroup on input line 372.
|
||||
LaTeX Font Info: Redeclaring math delimiter \bracevert on input line 374.
|
||||
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/config/latex209.cf
|
||||
g
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/tools/rawfonts.sty
|
||||
Compatibility mode: package `' requested, but `rawfonts' provided.
|
||||
Package: rawfonts 1994/05/08 Low-level LaTeX 2.09 font compatibility
|
||||
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/tools/somedefs.sty
|
||||
Package: somedefs 1994/06/01 Toolkit for optional definitions
|
||||
)
|
||||
LaTeX Font Info: Try loading font information for U+lasy on input line 44.
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/ulasy.fd
|
||||
File: ulasy.fd 1998/08/17 v2.2eLaTeX symbol font definitions
|
||||
)))) (/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/article.
|
||||
cls
|
||||
Document Class: article 2000/05/19 v1.4b Standard LaTeX document class
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/base/size11.clo
|
||||
File: size11.clo 2000/05/19 v1.4b Standard LaTeX file (size option)
|
||||
)
|
||||
\c@part=\count83
|
||||
\c@section=\count84
|
||||
\c@subsection=\count85
|
||||
\c@subsubsection=\count86
|
||||
\c@paragraph=\count87
|
||||
\c@subparagraph=\count88
|
||||
\c@figure=\count89
|
||||
\c@table=\count90
|
||||
\abovecaptionskip=\skip41
|
||||
\belowcaptionskip=\skip42
|
||||
Compatibility mode: definition of \rm ignored.
|
||||
Compatibility mode: definition of \sf ignored.
|
||||
Compatibility mode: definition of \tt ignored.
|
||||
Compatibility mode: definition of \bf ignored.
|
||||
Compatibility mode: definition of \it ignored.
|
||||
Compatibility mode: definition of \sl ignored.
|
||||
Compatibility mode: definition of \sc ignored.
|
||||
LaTeX Info: Redefining \cal on input line 501.
|
||||
LaTeX Info: Redefining \mit on input line 502.
|
||||
\bibindent=\dimen105
|
||||
)
|
||||
(/usr/dcs/software/supported/encap/TeX/share/texmf/tex/latex/pstex/fullpage.sty
|
||||
) (doc.aux)
|
||||
\openout1 = `doc.aux'.
|
||||
|
||||
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 2.
|
||||
LaTeX Font Info: ... okay on input line 2.
|
||||
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 2.
|
||||
LaTeX Font Info: ... okay on input line 2.
|
||||
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 2.
|
||||
LaTeX Font Info: ... okay on input line 2.
|
||||
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 2.
|
||||
LaTeX Font Info: ... okay on input line 2.
|
||||
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 2.
|
||||
LaTeX Font Info: ... okay on input line 2.
|
||||
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 2.
|
||||
LaTeX Font Info: ... okay on input line 2.
|
||||
LaTeX Font Info: External font `cmex10' loaded for size
|
||||
(Font) <12> on input line 33.
|
||||
LaTeX Font Info: External font `cmex10' loaded for size
|
||||
(Font) <8> on input line 33.
|
||||
LaTeX Font Info: External font `cmex10' loaded for size
|
||||
(Font) <6> on input line 33.
|
||||
LaTeX Font Info: Try loading font information for OMS+cmtt on input line 100
|
||||
.
|
||||
LaTeX Font Info: No file OMScmtt.fd. on input line 100.
|
||||
LaTeX Font Warning: Font shape `OMS/cmtt/m/n' undefined
|
||||
(Font) using `OMS/cmsy/m/n' instead
|
||||
(Font) for symbol `textbraceleft' on input line 100.
|
||||
[1
|
||||
|
||||
]
|
||||
LaTeX Font Info: External font `cmex10' loaded for size
|
||||
(Font) <10.95> on input line 150.
|
||||
[2] [3] [4] [5] [6]
|
||||
Overfull \hbox (1.38191pt too wide) in paragraph at lines 480--484
|
||||
[]\OT1/cmr/m/n/10.95 Emit code for \OT1/cmtt/m/n/10.95 burm[]arity\OT1/cmr/m/n/
|
||||
10.95 , \OT1/cmtt/m/n/10.95 burm[]child\OT1/cmr/m/n/10.95 , \OT1/cmtt/m/n/10.95
|
||||
burm[]cost\OT1/cmr/m/n/10.95 , \OT1/cmtt/m/n/10.95 burm[]ntname\OT1/cmr/m/n/10
|
||||
.95 , \OT1/cmtt/m/n/10.95 burm[]op[]label\OT1/cmr/m/n/10.95 , \OT1/cmtt/m/n/10.
|
||||
95 burm[]opname\OT1/cmr/m/n/10.95 ,
|
||||
[]
|
||||
|
||||
[7] [8] [9] (doc.aux)
|
||||
LaTeX Font Warning: Some font shapes were not available, defaults substituted.
|
||||
)
|
||||
Here is how much of TeX's memory you used:
|
||||
543 strings out of 12968
|
||||
6147 string characters out of 289029
|
||||
446019 words of memory out of 1453895
|
||||
3433 multiletter control sequences out of 10000+10000
|
||||
23403 words of font info for 87 fonts, out of 400000 for 2000
|
||||
14 hyphenation exceptions out of 1000
|
||||
21i,6n,20p,308b,283s stack positions out of 300i,100n,500p,50000b,4000s
|
||||
|
||||
Output written on doc.dvi (9 pages, 29856 bytes).
|
596
utils/Burg/Doc/doc.tex
Normal file
596
utils/Burg/Doc/doc.tex
Normal file
@ -0,0 +1,596 @@
|
||||
\documentstyle[11pt,fullpage]{article}
|
||||
\begin{document}
|
||||
|
||||
\def\AddSpace#1{\ifcat#1a\ \fi#1} % if next is a letter, add a space
|
||||
\def\YACC#1{{\sc Yacc}\AddSpace#1}
|
||||
\def\TWIG#1{{\sc Twig}\AddSpace#1}
|
||||
\def\PROG#1{{\sc Burg}\AddSpace#1}
|
||||
\def\PARSER#1{{\sc Burm}\AddSpace#1}
|
||||
\def\CODEGEN#1{{\sc Codegen}\AddSpace#1}
|
||||
|
||||
\title{{\sc Burg} --- Fast Optimal Instruction Selection and Tree Parsing}
|
||||
\author{
|
||||
Christopher W. Fraser \\
|
||||
AT\&T Bell Laboratories \\
|
||||
600 Mountain Avenue 2C-464 \\
|
||||
Murray Hill, NJ 07974-0636 \\
|
||||
{\tt cwf@research.att.com}
|
||||
\and
|
||||
Robert R. Henry \\
|
||||
Tera Computer Company \\
|
||||
400 N. 34th St., Suite 300 \\
|
||||
Seattle, WA 98103-8600 \\
|
||||
{\tt rrh@tera.com}
|
||||
\and
|
||||
Todd A. Proebsting \\
|
||||
Dept. of Computer Sciences \\
|
||||
University of Wisconsin \\
|
||||
Madison, WI 53706 \\
|
||||
{\tt todd@cs.wisc.edu}
|
||||
}
|
||||
\date{December 1991}
|
||||
|
||||
\maketitle
|
||||
\bibliographystyle{alpha}
|
||||
\newcommand\term[1]{{\it #1}}
|
||||
\newcommand\secref[1]{\S\ref{#1}}
|
||||
\newcommand\figref[1]{Figure~\ref{#1}}
|
||||
%
|
||||
% rationale table making
|
||||
%
|
||||
{\catcode`\^^M=13 \gdef\Obeycr{\catcode`\^^M=13 \def^^M{\\}}%
|
||||
\gdef\Restorecr{\catcode`\^^M=5 }} %
|
||||
|
||||
%
|
||||
% for printing out options
|
||||
%
|
||||
\newcommand\option[1]{% #1=option character
|
||||
{\tt -#1}%
|
||||
}
|
||||
\newcommand\var[1]{%
|
||||
{\tt #1}%
|
||||
}
|
||||
\section{Overview}
|
||||
|
||||
\PROG is a program that generates a fast tree parser using BURS
|
||||
(Bottom-Up Rewrite System) technology. It accepts a cost-augmented
|
||||
tree grammar and emits a C program that discovers in linear time an
|
||||
optimal parse of trees in the language described by the grammar. \PROG
|
||||
has been used to construct fast optimal instruction selectors for use
|
||||
in code generation. \PROG addresses many of the problems addressed by
|
||||
{\sc Twig}~\cite{aho-twig-toplas,appel-87}, but it is somewhat less flexible and
|
||||
much faster. \PROG is available via anonymous \var{ftp} from
|
||||
\var{kaese.cs.wisc.edu}. The compressed \var{shar} file
|
||||
\var{pub/burg.shar.Z} holds the complete distribution.
|
||||
|
||||
This document describes only that fraction of the BURS model that is
|
||||
required to use \PROG. Readers interested in more detail might start
|
||||
with Reference~\cite{balachandran-complang}. Other relevant documents
|
||||
include References~\cite{kron-phd,hoffmann-jacm,hatcher-popl,chase-popl,pelegri-popl,pelegri-phd,wilhelm-tr,henry-budp,fraser-henry-spe-91,proebsting-91}.
|
||||
|
||||
\section{Input}
|
||||
|
||||
\PROG accepts a tree grammar and emits a BURS tree parser.
|
||||
\figref{fig-tree-grammar} shows a sample grammar that implements a very
|
||||
simple instruction selector.
|
||||
\begin{figure}
|
||||
\begin{verbatim}
|
||||
%{
|
||||
#define NODEPTR_TYPE treepointer
|
||||
#define OP_LABEL(p) ((p)->op)
|
||||
#define LEFT_CHILD(p) ((p)->left)
|
||||
#define RIGHT_CHILD(p) ((p)->right)
|
||||
#define STATE_LABEL(p) ((p)->state_label)
|
||||
#define PANIC printf
|
||||
%}
|
||||
%start reg
|
||||
%term Assign=1 Constant=2 Fetch=3 Four=4 Mul=5 Plus=6
|
||||
%%
|
||||
con: Constant = 1 (0);
|
||||
con: Four = 2 (0);
|
||||
addr: con = 3 (0);
|
||||
addr: Plus(con,reg) = 4 (0);
|
||||
addr: Plus(con,Mul(Four,reg)) = 5 (0);
|
||||
reg: Fetch(addr) = 6 (1);
|
||||
reg: Assign(addr,reg) = 7 (1);
|
||||
\end{verbatim}
|
||||
\caption{A Sample Tree Grammar\label{fig-tree-grammar}}
|
||||
\end{figure}
|
||||
\PROG grammars are structurally similar to \YACC's. Comments follow C
|
||||
conventions. Text between ``\var{\%\{}'' and ``\var{\%\}}'' is called
|
||||
the \term{configuration section}; there may be several such segments.
|
||||
All are concatenated and copied verbatim into the head of the generated
|
||||
parser, which is called \PARSER. Text after the second ``\var{\%\%}'',
|
||||
if any, is also copied verbatim into \PARSER, at the end.
|
||||
|
||||
The configuration section configures \PARSER for the trees being parsed
|
||||
and the client's environment. This section must define
|
||||
\var{NODEPTR\_TYPE} to be a visible typedef symbol for a pointer to a
|
||||
node in the subject tree. \PARSER invokes \var{OP\_LABEL(p)},
|
||||
\var{LEFT\_CHILD(p)}, and \var{RIGHT\_CHILD(p)} to read the operator
|
||||
and children from the node pointed to by \var{p}. It invokes
|
||||
\var{PANIC} when it detects an error. If the configuration section
|
||||
defines these operations as macros, they are implemented in-line;
|
||||
otherwise, they must be implemented as functions. The section on
|
||||
diagnostics elaborates on \var{PANIC}.
|
||||
|
||||
\PARSER computes and stores a single integral \term{state} in each node
|
||||
of the subject tree. The configuration section must define a macro
|
||||
\var{STATE\_LABEL(p)} to access the state field of the node pointed to
|
||||
by \var{p}. A macro is required because \PROG uses it as an lvalue. A
|
||||
C \var{short} is usually the right choice; typical code generation
|
||||
grammars require 100--1000 distinct state labels.
|
||||
|
||||
The tree grammar follows the configuration section.
|
||||
\figref{fig-grammar-grammar} gives an EBNF grammar for \PROG tree
|
||||
grammars.
|
||||
\begin{figure}
|
||||
\begin{verbatim}
|
||||
grammar: {dcl} '%%' {rule}
|
||||
|
||||
dcl: '%start' Nonterminal
|
||||
dcl: '%term' { Identifier '=' Integer }
|
||||
|
||||
rule: Nonterminal ':' tree '=' Integer cost ';'
|
||||
cost: /* empty */
|
||||
cost: '(' Integer { ',' Integer } ')'
|
||||
|
||||
tree: Term '(' tree ',' tree ')'
|
||||
tree: Term '(' tree ')'
|
||||
tree: Term
|
||||
tree: Nonterminal
|
||||
\end{verbatim}
|
||||
\caption{EBNF Grammar for Tree Grammars for \PROG\ \label{fig-grammar-grammar}}
|
||||
\end{figure}
|
||||
Comments, the text between ``\var{\%\{}'' and ``\var{\%\}}'', and the
|
||||
text after the optional second ``\var{\%\%}'' are treated lexically, so
|
||||
the figure omits them. In the EBNF grammar, quoted text must appear
|
||||
literally, \var{Nonterminal} and \var{Integer} are self-explanatory,
|
||||
and \var{Term} denotes an identifier previously declared as a
|
||||
terminal. {\tt\{$X$\}} denotes zero or more instances of $X$.
|
||||
|
||||
Text before the first ``\var{\%\%}'' declares the start symbol and the
|
||||
terminals or operators in subject trees. All terminals must be
|
||||
declared; each line of such declarations begins with \var{\%term}.
|
||||
Each terminal has fixed arity, which \PROG infers from the rules using that terminal.
|
||||
\PROG restricts terminals to have at most two children. Each terminal
|
||||
is declared with a positive, unique, integral \term{external symbol
|
||||
number} after a ``\var{=}''. \var{OP\_LABEL(p)} must return the valid
|
||||
external symbol number for \var{p}. Ideally, external symbol numbers
|
||||
form a dense enumeration. Non-terminals are not declared, but the
|
||||
start symbol may be declared with a line that begins with
|
||||
\var{\%start}.
|
||||
|
||||
Text after the first ``\var{\%\%}'' declares the rules. A tree grammar
|
||||
is like a context-free grammar: it has rules, non-terminals,
|
||||
terminals, and a special start non-terminal. The right-hand side of a
|
||||
rule, called the \term{pattern}, is a tree. Tree patterns appear in
|
||||
prefix parenthesized form. Every non-terminal denotes a tree. A chain
|
||||
rule is a rule whose pattern is another non-terminal. If no start
|
||||
symbol is declared, \PROG uses the non-terminal defined by the first
|
||||
rule. \PROG needs a single start symbol; grammars for which it is
|
||||
natural to use multiple start symbols must be augmented with an
|
||||
artificial start symbol that derives, with zero cost, the grammar's
|
||||
natural start symbols. \PARSER will automatically select one
|
||||
that costs least for any given tree.
|
||||
|
||||
\PROG accepts no embedded semantic actions like \YACC's, because no one
|
||||
format suited all intended applications. Instead, each rule has a
|
||||
positive, unique, integral \term{external rule number}, after the
|
||||
pattern and preceded by a ``\var{=}''. Ideally, external rule numbers
|
||||
form a dense enumeration. \PARSER uses these numbers to report the
|
||||
matching rule to a user-supplied routine, which must implement any
|
||||
desired semantic action; see below. Humans may select these integers
|
||||
by hand, but \PROG is intended as a \term{server} for building BURS
|
||||
tree parsers. Thus some \PROG clients will consume a richer
|
||||
description and translate it into \PROG's simpler input.
|
||||
|
||||
Rules end with a vector of non-negative, integer costs, in parentheses
|
||||
and separated by commas. If the cost vector is omitted, then all
|
||||
elements are assumed to be zero. \PROG retains only the first four
|
||||
elements of the list. The cost of a derivation is the sum of the costs
|
||||
for all rules applied in the derivation. Arithmetic on cost vectors
|
||||
treats each member of the vector independently. The tree parser finds
|
||||
the cheapest parse of the subject tree. It breaks ties arbitrarily.
|
||||
By default, \PROG uses only the \term{principal cost} of each cost
|
||||
vector, which defaults to the first element, but options described
|
||||
below provide alternatives.
|
||||
|
||||
\section{Output}
|
||||
|
||||
\PARSER traverses the subject tree twice. The first pass or
|
||||
\term{labeller} runs bottom-up and left-to-right, visiting each node
|
||||
exactly once. Each node is labeled with a state, a single number that
|
||||
encodes all full and partial optimal pattern matches viable at that
|
||||
node. The second pass or \term{reducer} traverses the subject tree
|
||||
top-down. The reducer accepts a tree node's state label and a
|
||||
\term{goal} non-terminal --- initially the root's state label and the
|
||||
start symbol --- which combine to determine the rule to be applied at
|
||||
that node. By construction, the rule has the given goal non-terminal
|
||||
as its left-hand side. The rule's pattern identifies the subject
|
||||
subtrees and goal non-terminals for all recursive visits. Here, a
|
||||
``subtree'' is not necessarily an immediate child of the current node.
|
||||
Patterns with interior operators cause the reducer to skip the
|
||||
corresponding subject nodes, so the reducer may proceed directly to
|
||||
grandchildren, great-grandchildren, and so on. On the other hand,
|
||||
chain rules cause the reducer to revisit the current subject node, with
|
||||
a new goal
|
||||
non-terminal, so \term{x} is also regarded as a subtree of \term{x}.
|
||||
|
||||
As the reducer visits (and possibly revisits) each node, user-supplied
|
||||
code implements semantic action side effects and controls the order in
|
||||
which subtrees are visited. The labeller is self-contained, but the
|
||||
reducer combines code from \PROG with code from the user, so \PARSER
|
||||
does not stand alone.
|
||||
|
||||
The \PARSER that is generated by \PROG provides primitives for
|
||||
labelling and reducing trees. These mechanisms are a compromise
|
||||
between expressibility, abstraction, simplicity, flexibility and
|
||||
efficiency. Clients may combine primitives into labellers and reducers
|
||||
that can traverse trees in arbitrary ways, and they may call semantic
|
||||
routines when and how they wish during traversal. Also, \PROG
|
||||
generates a few higher level routines that implement common
|
||||
combinations of primitives, and it generates mechanisms that help debug
|
||||
the tree parse.
|
||||
|
||||
\PROG generates the labeller as a function named \var{burm\_label} with
|
||||
the signature
|
||||
\begin{verbatim}
|
||||
extern int burm_label(NODEPTR_TYPE p);
|
||||
\end{verbatim}
|
||||
It labels the entire subject tree pointed to by \var{p} and returns the
|
||||
root's state label. State zero labels unmatched trees. The trees may
|
||||
be corrupt or merely inconsistent with the grammar.
|
||||
|
||||
The simpler \var{burm\_state} is \var{burm\_label} without the
|
||||
code to traverse the tree and to read and write its fields. It may be
|
||||
used to integrate labelling into user-supplied traversal code. A
|
||||
typical signature is
|
||||
\begin{verbatim}
|
||||
extern int burm_state(int op, int leftstate, int rightstate);
|
||||
\end{verbatim}
|
||||
It accepts an external symbol number for a node and the labels for the
|
||||
node's left and right children. It returns the state label to assign
|
||||
to that node. For unary operators, the last argument is ignored; for
|
||||
leaves, the last two arguments are ignored. In general, \PROG
|
||||
generates a \var{burm\_state} that accepts the maximum number of child
|
||||
states required by the input grammar. For example, if the grammar
|
||||
includes no binary operators, then \var{burm\_state} will have the
|
||||
signature
|
||||
\begin{verbatim}
|
||||
extern int burm_state(int op, int leftstate);
|
||||
\end{verbatim}
|
||||
This feature is included to permit future expansion to operators with
|
||||
more than two children.
|
||||
|
||||
The user must write the reducer, but \PARSER writes code and data that
|
||||
help. Primary is
|
||||
\begin{verbatim}
|
||||
extern int burm_rule(int state, int goalnt);
|
||||
\end{verbatim}
|
||||
which accepts a tree's state label and a goal non-terminal and returns the
|
||||
external rule number of a rule. The rule will have matched the tree
|
||||
and have the goal non-terminal on the left-hand side; \var{burm\_rule}
|
||||
returns zero when the tree labelled with the given state did not match
|
||||
the goal non-terminal. For the initial, root-level call, \var{goalnt}
|
||||
must be one, and \PARSER exports an array that identifies the values
|
||||
for nested calls:
|
||||
\begin{verbatim}
|
||||
extern short *burm_nts[] = { ... };
|
||||
\end{verbatim}
|
||||
is an array indexed by external rule numbers. Each element points to a
|
||||
zero-terminated vector of short integers, which encode the goal
|
||||
non-terminals for that rule's pattern, left-to-right. The user needs
|
||||
only these two externals to write a complete reducer, but a third
|
||||
external simplifies some applications:
|
||||
\begin{verbatim}
|
||||
extern NODEPTR_TYPE *burm_kids(NODEPTR_TYPE p, int eruleno, NODEPTR_TYPE kids[]);
|
||||
\end{verbatim}
|
||||
accepts the address of a tree \var{p}, an external rule number, and an
|
||||
empty vector of pointers to trees. The procedure assumes that \var{p}
|
||||
matched the given rule, and it fills in the vector with the subtrees (in
|
||||
the sense described above) of \var{p} that must be reduced recursively.
|
||||
\var{kids} is returned. It is not zero-terminated.
|
||||
|
||||
The simple user code below labels and then fully reduces a subject tree;
|
||||
the reducer prints the tree cover. \var{burm\_string} is defined below.
|
||||
\begin{verbatim}
|
||||
parse(NODEPTR_TYPE p) {
|
||||
burm_label(p); /* label the tree */
|
||||
reduce(p, 1, 0); /* and reduce it */
|
||||
}
|
||||
|
||||
reduce(NODEPTR_TYPE p, int goalnt, int indent) {
|
||||
int eruleno = burm_rule(STATE_LABEL(p), goalnt); /* matching rule number */
|
||||
short *nts = burm_nts[eruleno]; /* subtree goal non-terminals */
|
||||
NODEPTR_TYPE kids[10]; /* subtree pointers */
|
||||
int i;
|
||||
|
||||
for (i = 0; i < indent; i++)
|
||||
printf("."); /* print indented ... */
|
||||
printf("%s\n", burm_string[eruleno]); /* ... text of rule */
|
||||
burm_kids(p, eruleno, kids); /* initialize subtree pointers */
|
||||
for (i = 0; nts[i]; i++) /* traverse subtrees left-to-right */
|
||||
reduce(kids[i], nts[i], indent+1); /* and print them recursively */
|
||||
}
|
||||
\end{verbatim}
|
||||
The reducer may recursively traverse subtrees in any order, and it may
|
||||
interleave arbitrary semantic actions with recursive traversals.
|
||||
Multiple reducers may be written, to implement multi-pass algorithms
|
||||
or independent single-pass algorithms.
|
||||
|
||||
For each non-terminal $x$, \PROG emits a preprocessor directive to
|
||||
equate \var{burm\_}$x$\var{\_NT} with $x$'s integral encoding. It also
|
||||
defines a macro \var{burm\_}$x$\var{\_rule(a)} that is equivalent to
|
||||
\var{burm\_rule(a,}$x$\var{)}. For the grammar in
|
||||
\figref{fig-tree-grammar}, \PROG emits
|
||||
\begin{verbatim}
|
||||
#define burm_reg_NT 1
|
||||
#define burm_con_NT 2
|
||||
#define burm_addr_NT 3
|
||||
#define burm_reg_rule(a) ...
|
||||
#define burm_con_rule(a) ...
|
||||
#define burm_addr_rule(a) ...
|
||||
\end{verbatim}
|
||||
Such symbols are visible only to the code after the second
|
||||
``\var{\%\%}''. If the symbols \var{burm\_}$x$\var{\_NT} are needed
|
||||
elsewhere, extract them from the \PARSER source.
|
||||
|
||||
The \option{I} option directs \PROG to emit an encoding of the input
|
||||
that may help the user produce diagnostics. The vectors
|
||||
\begin{verbatim}
|
||||
extern char *burm_opname[];
|
||||
extern char burm_arity[];
|
||||
\end{verbatim}
|
||||
hold the name and number of children, respectively, for each terminal.
|
||||
They are indexed by the terminal's external symbol number. The vectors
|
||||
\begin{verbatim}
|
||||
extern char *burm_string[];
|
||||
extern short burm_cost[][4];
|
||||
\end{verbatim}
|
||||
hold the text and cost vector for each rule. They are indexed by the
|
||||
external rule number. The zero-terminated vector
|
||||
\begin{verbatim}
|
||||
extern char *burm_ntname[];
|
||||
\end{verbatim}
|
||||
is indexed by \var{burm\_}$x$\var{\_NT} and holds the name of
|
||||
non-terminal $x$. Finally, the procedures
|
||||
\begin{verbatim}
|
||||
extern int burm_op_label(NODEPTR_TYPE p);
|
||||
extern int burm_state_label(NODEPTR_TYPE p);
|
||||
extern NODEPTR_TYPE burm_child(NODEPTR_TYPE p, int index);
|
||||
\end{verbatim}
|
||||
are callable versions of the configuration macros.
|
||||
\var{burm\_child(p,0)} implements \var{LEFT\_CHILD(p)}, and
|
||||
\var{burm\_child(p,1)} implements \var{RIGHT\_CHILD(p)}. A sample use
|
||||
is the grammar-independent expression
|
||||
\var{burm\_opname[burm\_op\_label(p)]}, which yields the textual name
|
||||
for the operator in the tree node pointed to by \var{p}.
|
||||
|
||||
A complete tree parser can be assembled from just \var{burm\_state},
|
||||
\var{burm\_rule}, and \var{burm\_nts}, which use none of the
|
||||
configuration section except \var{PANIC}. The generated routines that
|
||||
use the rest of the configuration section are compiled only if the
|
||||
configuration section defines \var{STATE\_LABEL}, so they can be
|
||||
omitted if the user prefers to hide the tree structure from \PARSER.
|
||||
This course may be wise if, say, the tree structure is defined in a
|
||||
large header file with symbols that might collide with \PARSER's.
|
||||
|
||||
\PARSER selects an optimal parse without dynamic programming at compile
|
||||
time~\cite{aho-johnson-dp-classic}. Instead, \PROG does the dynamic
|
||||
programming at compile-compile time, as it builds \PARSER.
|
||||
Consequently, \PARSER parses quickly. Similar labellers have taken as
|
||||
few as 15 instructions per node, and reducers as few as 35 per node
|
||||
visited~\cite{fraser-henry-spe-91}.
|
||||
|
||||
\section{Debugging}
|
||||
|
||||
\PARSER invokes \var{PANIC} when an error prevents it from proceeding.
|
||||
\var{PANIC} has the same signature as \var{printf}. It should pass its
|
||||
arguments to \var{printf} if diagnostics are desired and then either
|
||||
abort (say via \var{exit}) or recover (say via \var{longjmp}). If it
|
||||
returns, \PARSER aborts. Some errors are not caught.
|
||||
|
||||
\PROG assumes a robust preprocessor, so it omits full consistency
|
||||
checking and error recovery. \PROG constructs a set of states using a
|
||||
closure algorithm like that used in LR table construction. \PROG
|
||||
considers all possible trees generated by the tree grammar and
|
||||
summarizes infinite sets of trees with finite sets. The summary
|
||||
records the cost of those trees but actually manipulates the
|
||||
differences in costs between viable alternatives using a dynamic
|
||||
programming algorithm. Reference~\cite{henry-budp} elaborates.
|
||||
|
||||
Some grammars derive trees whose optimal parses depend on arbitrarily
|
||||
distant data. When this happens, \PROG and the tree grammar
|
||||
\term{cost diverge}, and \PROG attempts to build an infinite
|
||||
set of states; it first thrashes and ultimately exhausts
|
||||
memory and exits. For example, the tree grammar in
|
||||
\figref{fig-diverge-grammar}
|
||||
\begin{figure}
|
||||
\begin{verbatim}
|
||||
%term Const=17 RedFetch=20 GreenFetch=21 Plus=22
|
||||
%%
|
||||
reg: GreenFetch(green_reg) = 10 (0);
|
||||
reg: RedFetch(red_reg) = 11 (0);
|
||||
|
||||
green_reg: Const = 20 (0);
|
||||
green_reg: Plus(green_reg,green_reg) = 21 (1);
|
||||
|
||||
red_reg: Const = 30 (0);
|
||||
red_reg: Plus(red_reg,red_reg) = 31 (2);
|
||||
\end{verbatim}
|
||||
\caption{A Diverging Tree Grammar\label{fig-diverge-grammar}}
|
||||
\end{figure}
|
||||
diverges, since non-terminals \var{green\_reg} and \var{red\_reg}
|
||||
derive identical infinite trees with different costs. If the cost of
|
||||
rule 31 is changed to 1, then the grammar does not diverge.
|
||||
|
||||
Practical tree grammars describing instruction selection do not
|
||||
cost-diverge because infinite trees are derived from non-terminals
|
||||
that model temporary registers. Machines can move data between
|
||||
different types of registers for a small bounded cost, and the rules
|
||||
for these instructions prevent divergence. For example, if
|
||||
\figref{fig-diverge-grammar} included rules to move data between red
|
||||
and green registers, the grammar would not diverge. If a bonafide
|
||||
machine grammar appears to make \PROG loop, try a host with more
|
||||
memory. To apply \PROG to problems other than instruction selection,
|
||||
be prepared to consult the literature on
|
||||
cost-divergence~\cite{pelegri-phd}.
|
||||
|
||||
\section{Running \PROG\ }\label{sec-man-page}
|
||||
|
||||
\PROG reads a tree grammar and writes a \PARSER in C. \PARSER can be
|
||||
compiled by itself or included in another file. When suitably named
|
||||
with the \option{p} option, disjoint instances of \PARSER should link
|
||||
together without name conflicts. The command:
|
||||
\begin{flushleft}
|
||||
\var{burg} [ {\it arguments} ] [ {\it file} ]
|
||||
\end{flushleft}
|
||||
invokes \PROG. If a {\it file} is named, \PROG expects its grammar
|
||||
there; otherwise it reads the standard input. The options include:
|
||||
\def\Empty{}
|
||||
%
|
||||
\newcommand\odescr[2]{% #1=option character, #2=optional argument
|
||||
\gdef\Arg2{#2}%
|
||||
\item[\option{#1}\ifx\Arg2\Empty\else{{\it #2}}\fi]
|
||||
}
|
||||
\begin{description}
|
||||
%
|
||||
\odescr{c}{} $N$
|
||||
Abort if any relative cost exceeds $N$, which keeps \PROG from looping on
|
||||
diverging grammars. Several
|
||||
references~\cite{pelegri-popl,henry-budp,balachandran-complang,proebsting-91}
|
||||
explain relative costs.
|
||||
%
|
||||
\odescr{d}{}
|
||||
Report a few statistics and flag unused rules and terminals.
|
||||
%
|
||||
\odescr{o}{} {\it file}
|
||||
Write parser into {\it file}. Otherwise it writes to the standard output.
|
||||
%
|
||||
\odescr{p}{} {\it prefix}
|
||||
Start exported names with {\it prefix}. The default is \var{burm}.
|
||||
%
|
||||
\odescr{t}{}
|
||||
Generates smaller tables faster, but all goal non-terminals passed to
|
||||
\var{burm\_rule} must come from an appropriate \var{burm\_nts}. Using
|
||||
\var{burm\_}$x$\var{\_NT} instead may give unpredictable results.
|
||||
%
|
||||
\odescr{I}{}
|
||||
Emit code for \var{burm\_arity}, \var{burm\_child}, \var{burm\_cost},
|
||||
\var{burm\_ntname}, \var{burm\_op\_label}, \var{burm\_opname},
|
||||
\var{burm\_state\_label}, and \var{burm\_string}.
|
||||
%
|
||||
\odescr{O}{} $N$
|
||||
Change the principal cost to $N$. Elements of each cost vector are
|
||||
numbered from zero.
|
||||
%
|
||||
\odescr{=}{}
|
||||
Compare costs lexicographically, using all costs in the given order.
|
||||
This option slows \PROG and may produce a larger parser. Increases
|
||||
range from small to astronomical.
|
||||
\end{description}
|
||||
|
||||
\section{Acknowledgements}
|
||||
|
||||
The first \PROG was adapted by the second author from his \CODEGEN
|
||||
package, which was developed at the University of Washington with
|
||||
partial support from NSF Grant CCR-88-01806. It was unbundled from
|
||||
\CODEGEN with the support of Tera Computer. The current \PROG was
|
||||
written by the third author with the support of NSF grant
|
||||
CCR-8908355. The interface, documentation, and testing involved
|
||||
all three authors.
|
||||
|
||||
Comments from a large group at the 1991 Dagstuhl Seminar on Code
|
||||
Generation improved \PROG's interface. Robert Giegerich and Susan
|
||||
Graham organized the workshop, and the International Conference and
|
||||
Research Center for Computer Science, Schloss Dagstuhl, provided an
|
||||
ideal environment for such collaboration. Beta-testers included Helmut
|
||||
Emmelmann, Dave Hanson, John Hauser, Hugh Redelmeier, and Bill Waite.
|
||||
|
||||
\begin{thebibliography}{BMW87}
|
||||
|
||||
\bibitem[AGT89]{aho-twig-toplas}
|
||||
Alfred~V. Aho, Mahadevan Ganapathi, and Steven W.~K. Tjiang.
|
||||
\newblock Code generation using tree matching and dynamic programming.
|
||||
\newblock {\em ACM Transactions on Programming Languages and Systems},
|
||||
11(4):491--516, October 1989.
|
||||
|
||||
\bibitem[AJ76]{aho-johnson-dp-classic}
|
||||
Alfred~V. Aho and Steven~C. Johnson.
|
||||
\newblock Optimal code generation for expression trees.
|
||||
\newblock {\em Journal of the ACM}, 23(3):458--501, July 1976.
|
||||
|
||||
\bibitem[App87]{appel-87}
|
||||
Andrew~W. Appel.
|
||||
\newblock Concise specification of locally optimal code generators.
|
||||
\newblock Technical report CS-TR-080-87, Princeton University, 1987.
|
||||
|
||||
\bibitem[BDB90]{balachandran-complang}
|
||||
A.~Balachandran, D.~M. Dhamdhere, and S.~Biswas.
|
||||
\newblock Efficient retargetable code generation using bottom-up tree pattern
|
||||
matching.
|
||||
\newblock {\em Computer Languages}, 15(3):127--140, 1990.
|
||||
|
||||
\bibitem[BMW87]{wilhelm-tr}
|
||||
J\"{u}rgen B\"{o}rstler, Ulrich M\"{o}nche, and Reinhard Wilhelm.
|
||||
\newblock Table compression for tree automata.
|
||||
\newblock Technical Report Aachener Informatik-Berichte No. 87-12, RWTH Aachen,
|
||||
Fachgruppe Informatik, Aachen, Fed. Rep. of Germany, 1987.
|
||||
|
||||
\bibitem[Cha87]{chase-popl}
|
||||
David~R. Chase.
|
||||
\newblock An improvement to bottom up tree pattern matching.
|
||||
\newblock {\em Fourteenth Annual ACM Symposium on Principles of Programming
|
||||
Languages}, pages 168--177, January 1987.
|
||||
|
||||
\bibitem[FH91]{fraser-henry-spe-91}
|
||||
Christopher~W. Fraser and Robert~R. Henry.
|
||||
\newblock Hard-coding bottom-up code generation tables to save time and space.
|
||||
\newblock {\em Software---Practice\&Experience}, 21(1):1--12, January 1991.
|
||||
|
||||
\bibitem[HC86]{hatcher-popl}
|
||||
Philip~J. Hatcher and Thomas~W. Christopher.
|
||||
\newblock High-quality code generation via bottom-up tree pattern matching.
|
||||
\newblock {\em Thirteenth Annual ACM Symposium on Principles of Programming
|
||||
Languages}, pages 119--130, January 1986.
|
||||
|
||||
\bibitem[Hen89]{henry-budp}
|
||||
Robert~R. Henry.
|
||||
\newblock Encoding optimal pattern selection in a table-driven bottom-up
|
||||
tree-pattern matcher.
|
||||
\newblock Technical Report 89-02-04, University of Washington Computer Science
|
||||
Department, Seattle, WA, February 1989.
|
||||
|
||||
\bibitem[HO82]{hoffmann-jacm}
|
||||
Christoph Hoffmann and Michael~J. O'Donnell.
|
||||
\newblock Pattern matching in trees.
|
||||
\newblock {\em Journal of the ACM}, 29(1):68--95, January 1982.
|
||||
|
||||
\bibitem[Kro75]{kron-phd}
|
||||
H.~H. Kron.
|
||||
\newblock {\em Tree Templates and Subtree Transformational Grammars}.
|
||||
\newblock PhD thesis, UC Santa Cruz, December 1975.
|
||||
|
||||
\bibitem[PL87]{pelegri-phd}
|
||||
Eduardo Pelegri-Llopart.
|
||||
\newblock {\em Tree Transformations in Compiler Systems}.
|
||||
\newblock PhD thesis, UC Berkeley, December 1987.
|
||||
|
||||
\bibitem[PLG88]{pelegri-popl}
|
||||
Eduardo Pelegri-Llopart and Susan~L. Graham.
|
||||
\newblock Optimal code generation for expression trees: An application of
|
||||
{BURS} theory.
|
||||
\newblock {\em Fifteenth Annual ACM Symposium on Principles of Programming
|
||||
Languages}, pages 294--308, January 1988.
|
||||
|
||||
\bibitem[Pro91]{proebsting-91}
|
||||
Todd~A. Proebsting.
|
||||
\newblock Simple and efficient {BURS} table generation.
|
||||
\newblock Technical report, Department of Computer Sciences, University of
|
||||
Wisconsin, 1991.
|
||||
|
||||
\end{thebibliography}
|
||||
|
||||
\end{document}
|
||||
|
Loading…
Reference in New Issue
Block a user