Initial revision

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@2 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2001-06-06 20:29:01 +00:00
parent 8d0afd3d32
commit 009505452b
145 changed files with 19198 additions and 0 deletions

7
Makefile Normal file
View File

@ -0,0 +1,7 @@
LEVEL = .
DIRS = lib tools
include $(LEVEL)/Makefile.common
test :: all
cd test; $(MAKE)

170
Makefile.common Normal file
View File

@ -0,0 +1,170 @@
# Makefile.common
#
# This file is included by all of the LLVM makefiles. This file defines common
# rules to do things like compile a .cpp file or generate dependancy info.
# These are platform dependant, so this is the file used to specify these
# system dependant operations.
#
# The following functionality may be set by setting incoming variables:
#
# 1. LEVEL - The level of the current subdirectory from the top of the
# MagicStats view. This level should be expressed as a path, for
# example, ../.. for two levels deep.
#
# 2. DIRS - A list of subdirectories to be built. Fake targets are set up
# so that each of the targets "all", "install", and "clean" each build.
# the subdirectories before the local target.
#
# 3. Source - If specified, this sets the source code filenames. If this
# is not set, it defaults to be all of the .cpp, .c, .y, and .l files
# in the current directory.
#
# Default Rule:
all ::
# Default for install is to at least build everything...
install ::
#--------------------------------------------------------------------
# Installation configuration options...
#--------------------------------------------------------------------
#BinInstDir=/usr/local/bin
#LibInstDir=/usrl/local/lib/xxx
#DocInstDir=/usr/doc/xxx
#---------------------------------------------------------
# Compilation options...
#---------------------------------------------------------
# Add -L options to the link command lines...
LibPathsO = -L $(LEVEL)/lib/VMCore/Release \
-L $(LEVEL)/lib/Assembly/Parser/Release \
-L $(LEVEL)/lib/Assembly/Writer/Release \
-L $(LEVEL)/lib/Analysis/Release \
-L $(LEVEL)/lib/Bytecode/Writer/Release \
-L $(LEVEL)/lib/Bytecode/Reader/Release \
-L $(LEVEL)/lib/Optimizations/Release
LibPathsG = $(LibPathsO:Release=Debug)
# Enable this for profiling support with 'gprof'
#Prof = -pg
# TODO: Get rid of exceptions! : -fno-exceptions -fno-rtti
CompileCommonOpts = $(Prof) -Wall -Winline -W -Wwrite-strings -Wno-unused -I$(LEVEL)/include
# Compile a file, don't link...
Compile = $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $(CompileCommonOpts)
CompileG = $(Compile) -g -D_DEBUG
# Add This for DebugMalloc: -fno-defer-pop
CompileO = $(Compile) -O3 -DNDEBUG -finline-functions -felide-constructors -fnonnull-objects -freg-struct-return -fshort-enums
# Link final executable
Link = $(CXX) $(Prof)
LinkG = $(Link) -g $(LibPathsG)
LinkO = $(Link) -O3 $(LibPathsO)
# Create a .so file from a .cpp file...
#MakeSO = $(CXX) -shared $(Prof)
MakeSO = $(CXX) -G $(Prof)
MakeSOG = $(MakeSO) -g
MakeSOO = $(MakeSO) -O3
# Create dependancy file from CPP file, send to stdout.
Depend = $(CXX) -MM -I$(LEVEL)/include $(CPPFLAGS)
# Archive a bunch of .o files into a .a file...
AR = ar cq
#----------------------------------------------------------
# Source includes all of the cpp files, and objects are derived from the
# source files...
ifndef Source
Source = $(wildcard *.cpp *.c *.y *.l)
endif
Objs = $(sort $(addsuffix .o,$(basename $(Source))))
ObjectsO = $(addprefix Release/,$(Objs))
ObjectsG = $(addprefix Debug/,$(Objs))
#---------------------------------------------------------
# Handle the DIRS option
#---------------------------------------------------------
ifdef DIRS # Only do this if we're using DIRS!
all :: $(addsuffix /.makeall , $(DIRS))
install :: $(addsuffix /.makeinstall, $(DIRS))
clean :: $(addsuffix /.makeclean , $(DIRS))
%/.makeall %/.makeclean %/.makeinstall:
cd $(@D); $(MAKE) $(subst $(@D)/.make,,$@)
endif
#---------------------------------------------------------
# Handle the LIBRARYNAME option - used when building libs...
#---------------------------------------------------------
ifdef LIBRARYNAME
LIBNAME_O := Release/lib$(LIBRARYNAME).so
LIBNAME_G := Debug/lib$(LIBRARYNAME).so
all:: $(LIBNAME_G)
#$(LIBNAME_O)
# TODO: Enable optimized builds
$(LIBNAME_O): $(ObjectsO) $(LibSubDirs) Release/.dir
@echo ======= Linking $(LIBRARYNAME) release library =======
$(MakeSOO) -o $@ $(ObjectsO) $(LibSubDirs) $(LibLinkOpts)
$(LIBNAME_G): $(ObjectsG) $(LibSubDirs) Debug/.dir
@echo ======= Linking $(LIBRARYNAME) debug library =======
$(MakeSOG) -o $@ $(ObjectsG) $(LibSubDirs) $(LibLinkOpts)
endif
#---------------------------------------------------------
# Create dependacies for the cpp files...
Depend/%.d: %.cpp Depend/.dir
$(Depend) $< | sed 's|$*\.o *|Release/& Debug/& Depend/$(@F)|g' > $@
# Create .o files in the ObjectFiles directory from the .cpp files...
Release/%.o: %.cpp Release/.dir Depend/.dir
$(CompileO) $< -o $@
Debug/%.o: %.cpp Debug/.dir Depend/.dir
$(CompileG) $< -o $@
# Create a .cpp source file from a flex input file... this uses sed to cut down
# on the warnings emited by GCC...
%.cpp: %.l
flex -t $< | sed '/^find_rule/d' | sed 's/void yyunput/inline void yyunput/' | sed 's/void \*yy_flex_realloc/inline void *yy_flex_realloc/' > $@
# Rule for building the bison parsers...
%.cpp %.h : %.y
bison -d -p $(<:%Parser.y=%) $(basename $@).y
mv -f $(basename $@).tab.c $(basename $@).cpp
mv -f $(basename $@).tab.h $(basename $@).h
# To create the directories...
%/.dir:
mkdir -p $(@D)
@date > $@
# Clean does not remove the output files... just the temporaries
clean::
rm -rf Debug Release Depend
rm -f core *.o *.d *.so *~ *.flc
# If dependancies were generated for the file that included this file,
# include the dependancies now...
#
SourceDepend = $(addsuffix .d,$(addprefix Depend/,$(basename $(Source))))
ifneq ($(SourceDepend),)
include $(SourceDepend)
endif

170
Makefile.rules Normal file
View File

@ -0,0 +1,170 @@
# Makefile.common
#
# This file is included by all of the LLVM makefiles. This file defines common
# rules to do things like compile a .cpp file or generate dependancy info.
# These are platform dependant, so this is the file used to specify these
# system dependant operations.
#
# The following functionality may be set by setting incoming variables:
#
# 1. LEVEL - The level of the current subdirectory from the top of the
# MagicStats view. This level should be expressed as a path, for
# example, ../.. for two levels deep.
#
# 2. DIRS - A list of subdirectories to be built. Fake targets are set up
# so that each of the targets "all", "install", and "clean" each build.
# the subdirectories before the local target.
#
# 3. Source - If specified, this sets the source code filenames. If this
# is not set, it defaults to be all of the .cpp, .c, .y, and .l files
# in the current directory.
#
# Default Rule:
all ::
# Default for install is to at least build everything...
install ::
#--------------------------------------------------------------------
# Installation configuration options...
#--------------------------------------------------------------------
#BinInstDir=/usr/local/bin
#LibInstDir=/usrl/local/lib/xxx
#DocInstDir=/usr/doc/xxx
#---------------------------------------------------------
# Compilation options...
#---------------------------------------------------------
# Add -L options to the link command lines...
LibPathsO = -L $(LEVEL)/lib/VMCore/Release \
-L $(LEVEL)/lib/Assembly/Parser/Release \
-L $(LEVEL)/lib/Assembly/Writer/Release \
-L $(LEVEL)/lib/Analysis/Release \
-L $(LEVEL)/lib/Bytecode/Writer/Release \
-L $(LEVEL)/lib/Bytecode/Reader/Release \
-L $(LEVEL)/lib/Optimizations/Release
LibPathsG = $(LibPathsO:Release=Debug)
# Enable this for profiling support with 'gprof'
#Prof = -pg
# TODO: Get rid of exceptions! : -fno-exceptions -fno-rtti
CompileCommonOpts = $(Prof) -Wall -Winline -W -Wwrite-strings -Wno-unused -I$(LEVEL)/include
# Compile a file, don't link...
Compile = $(CXX) -c $(CPPFLAGS) $(CXXFLAGS) $(CompileCommonOpts)
CompileG = $(Compile) -g -D_DEBUG
# Add This for DebugMalloc: -fno-defer-pop
CompileO = $(Compile) -O3 -DNDEBUG -finline-functions -felide-constructors -fnonnull-objects -freg-struct-return -fshort-enums
# Link final executable
Link = $(CXX) $(Prof)
LinkG = $(Link) -g $(LibPathsG)
LinkO = $(Link) -O3 $(LibPathsO)
# Create a .so file from a .cpp file...
#MakeSO = $(CXX) -shared $(Prof)
MakeSO = $(CXX) -G $(Prof)
MakeSOG = $(MakeSO) -g
MakeSOO = $(MakeSO) -O3
# Create dependancy file from CPP file, send to stdout.
Depend = $(CXX) -MM -I$(LEVEL)/include $(CPPFLAGS)
# Archive a bunch of .o files into a .a file...
AR = ar cq
#----------------------------------------------------------
# Source includes all of the cpp files, and objects are derived from the
# source files...
ifndef Source
Source = $(wildcard *.cpp *.c *.y *.l)
endif
Objs = $(sort $(addsuffix .o,$(basename $(Source))))
ObjectsO = $(addprefix Release/,$(Objs))
ObjectsG = $(addprefix Debug/,$(Objs))
#---------------------------------------------------------
# Handle the DIRS option
#---------------------------------------------------------
ifdef DIRS # Only do this if we're using DIRS!
all :: $(addsuffix /.makeall , $(DIRS))
install :: $(addsuffix /.makeinstall, $(DIRS))
clean :: $(addsuffix /.makeclean , $(DIRS))
%/.makeall %/.makeclean %/.makeinstall:
cd $(@D); $(MAKE) $(subst $(@D)/.make,,$@)
endif
#---------------------------------------------------------
# Handle the LIBRARYNAME option - used when building libs...
#---------------------------------------------------------
ifdef LIBRARYNAME
LIBNAME_O := Release/lib$(LIBRARYNAME).so
LIBNAME_G := Debug/lib$(LIBRARYNAME).so
all:: $(LIBNAME_G)
#$(LIBNAME_O)
# TODO: Enable optimized builds
$(LIBNAME_O): $(ObjectsO) $(LibSubDirs) Release/.dir
@echo ======= Linking $(LIBRARYNAME) release library =======
$(MakeSOO) -o $@ $(ObjectsO) $(LibSubDirs) $(LibLinkOpts)
$(LIBNAME_G): $(ObjectsG) $(LibSubDirs) Debug/.dir
@echo ======= Linking $(LIBRARYNAME) debug library =======
$(MakeSOG) -o $@ $(ObjectsG) $(LibSubDirs) $(LibLinkOpts)
endif
#---------------------------------------------------------
# Create dependacies for the cpp files...
Depend/%.d: %.cpp Depend/.dir
$(Depend) $< | sed 's|$*\.o *|Release/& Debug/& Depend/$(@F)|g' > $@
# Create .o files in the ObjectFiles directory from the .cpp files...
Release/%.o: %.cpp Release/.dir Depend/.dir
$(CompileO) $< -o $@
Debug/%.o: %.cpp Debug/.dir Depend/.dir
$(CompileG) $< -o $@
# Create a .cpp source file from a flex input file... this uses sed to cut down
# on the warnings emited by GCC...
%.cpp: %.l
flex -t $< | sed '/^find_rule/d' | sed 's/void yyunput/inline void yyunput/' | sed 's/void \*yy_flex_realloc/inline void *yy_flex_realloc/' > $@
# Rule for building the bison parsers...
%.cpp %.h : %.y
bison -d -p $(<:%Parser.y=%) $(basename $@).y
mv -f $(basename $@).tab.c $(basename $@).cpp
mv -f $(basename $@).tab.h $(basename $@).h
# To create the directories...
%/.dir:
mkdir -p $(@D)
@date > $@
# Clean does not remove the output files... just the temporaries
clean::
rm -rf Debug Release Depend
rm -f core *.o *.d *.so *~ *.flc
# If dependancies were generated for the file that included this file,
# include the dependancies now...
#
SourceDepend = $(addsuffix .d,$(addprefix Depend/,$(basename $(Source))))
ifneq ($(SourceDepend),)
include $(SourceDepend)
endif

50
docs/ChrisNotes.txt Normal file
View File

@ -0,0 +1,50 @@
* Provide a pass that eliminates critical edges from the CFG
* Provide a print hook to print out xvcg format files for vis
* I need to provide an option to the bytecode loader to ignore memory
dependance edges. Instead, the VM would just treat memory operations
(load, store, getfield, putfield, call) as pinned instructions.
* I need to have a way to prevent taking the address of a constant pool
reference. You should only be able to take the address of a variable.
Maybe taking the address of a constant copies it? What about virtual
function tables? Maybe a const pointer would be better...
* Structures should be accessed something like this: ubyte is ok. Limits
structure size to 256 members. This can be fixed later by either:
1. adding varient that takes ushort
2. Splitting structures into nested structures each of half size
<float> %f = loadfield *{int, {float}} Str, ubyte 1, ubyte 0
storefield float %f, *{int, {float}} Str, ubyte 1, ubyte 0
* I'm noticing me writing a lot of code that looks like this (dtor material here):
ConstPool.dropAllReferences();
ConstPool.delete_all();
ConstPool.setParent(0);
~ConstPool
* Need a way to attach bytecode block info at various levels of asm code.
* Rename "ConstantPool" to "ConstPool"
* Maybe ConstantPool objects should keep themselves sorted as things are
inserted.
* Need to be able to inflate recursive types. %x = { *%x }, %x = %x ()
* Recognize and save comments in assembly and bytecode format
* Encode line number table in bytecode (like #line), optional table
* Encode negative relative offsets in the bytecode file
* Implement switch to switch on a constant pool array of type:
[{ label, int }] or [label] (lookup vs index switch)
* Apparently bison has a %pure_parser option. Maybe useful for Assembly/Parser
* Implement a header file that can read either assembly or bytecode, implement
a writer that can output either based on what is read with this reader..
* Implement the following derived types:
* structure/record { int %foo, int %bar} or { %foo = int, int }
* pointer int *
* "packed format", like this: [4 x sbyte]: Packed SIMD datatype
* Maybe 'tailcall' also?
* It might be nice to support enumerations of some sort... especially for use
as a compiler IR
* Include a method level bytecode block that defines a mapping between values
and registers that defines a minimally register allocated code. This can
make me finally address how to encode extensions in assembly.
* Bytecode reader should use extensions that may or may not be linked into the
application to read blocks. Thus an easy way to ignore symbol table info
would be to not link in that reader into the app.

View File

@ -0,0 +1,74 @@
Date: Sat, 18 Nov 2000 09:19:35 -0600 (CST)
From: Vikram Adve <vadve@cs.uiuc.edu>
To: Chris Lattner <lattner@cs.uiuc.edu>
Subject: a few thoughts
I've been mulling over the virtual machine problem and I had some
thoughts about some things for us to think about discuss:
1. We need to be clear on our goals for the VM. Do we want to emphasize
portability and safety like the Java VM? Or shall we focus on the
architecture interface first (i.e., consider the code generation and
processor issues), since the architecture interface question is also
important for portable Java-type VMs?
This is important because the audiences for these two goals are very
different. Architects and many compiler people care much more about
the second question. The Java compiler and OS community care much more
about the first one.
Also, while the architecture interface question is important for
Java-type VMs, the design constraints are very different.
2. Design issues to consider (an initial list that we should continue
to modify). Note that I'm not trying to suggest actual solutions here,
but just various directions we can pursue:
a. A single-assignment VM, which we've both already been thinking about.
b. A strongly-typed VM. One question is do we need the types to be
explicitly declared or should they be inferred by the dynamic compiler?
c. How do we get more high-level information into the VM while keeping
to a low-level VM design?
o Explicit array references as operands? An alternative is
to have just an array type, and let the index computations be
separate 3-operand instructions.
o Explicit instructions to handle aliasing, e.g.s:
-- an instruction to say "I speculate that these two values are not
aliased, but check at runtime", like speculative execution in
EPIC?
-- or an instruction to check whether two values are aliased and
execute different code depending on the answer, somewhat like
predicated code in EPIC
o (This one is a difficult but powerful idea.)
A "thread-id" field on every instruction that allows the static
compiler to generate a set of parallel threads, and then have
the runtime compiler and hardware do what they please with it.
This has very powerful uses, but thread-id on every instruction
is expensive in terms of instruction size and code size.
We would need to compactly encode it somehow.
Also, this will require some reading on at least two other
projects:
-- Multiscalar architecture from Wisconsin
-- Simultaneous multithreading architecture from Washington
o Or forget all this and stick to a traditional instruction set?
BTW, on an unrelated note, after the meeting yesterday, I did remember
that you had suggested doing instruction scheduling on SSA form instead
of a dependence DAG earlier in the semester. When we talked about
it yesterday, I didn't remember where the idea had come from but I
remembered later. Just giving credit where its due...
Perhaps you can save the above as a file under RCS so you and I can
continue to expand on this.
--Vikram

View File

@ -0,0 +1,199 @@
Date: Sun, 19 Nov 2000 16:23:57 -0600 (CST)
From: Chris Lattner <sabre@nondot.org>
To: Vikram Adve <vadve@cs.uiuc.edu>
Subject: Re: a few thoughts
Okay... here are a few of my thoughts on this (it's good to know that we
think so alike!):
> 1. We need to be clear on our goals for the VM. Do we want to emphasize
> portability and safety like the Java VM? Or shall we focus on the
> architecture interface first (i.e., consider the code generation and
> processor issues), since the architecture interface question is also
> important for portable Java-type VMs?
I forsee the architecture looking kinda like this: (which is completely
subject to change)
1. The VM code is NOT guaranteed safe in a java sense. Doing so makes it
basically impossible to support C like languages. Besides that,
certifying a register based language as safe at run time would be a
pretty expensive operation to have to do. Additionally, we would like
to be able to statically eliminate many bounds checks in Java
programs... for example.
2. Instead, we can do the following (eventually):
* Java bytecode is used as our "safe" representation (to avoid
reinventing something that we don't add much value to). When the
user chooses to execute Java bytecodes directly (ie, not
precompiled) the runtime compiler can do some very simple
transformations (JIT style) to convert it into valid input for our
VM. Performance is not wonderful, but it works right.
* The file is scheduled to be compiled (rigorously) at a later
time. This could be done by some background process or by a second
processor in the system during idle time or something...
* To keep things "safe" ie to enforce a sandbox on Java/foreign code,
we could sign the generated VM code with a host specific private
key. Then before the code is executed/loaded, we can check to see if
the trusted compiler generated the code. This would be much quicker
than having to validate consistency (especially if bounds checks have
been removed, for example)
> This is important because the audiences for these two goals are very
> different. Architects and many compiler people care much more about
> the second question. The Java compiler and OS community care much more
> about the first one.
3. By focusing on a more low level virtual machine, we have much more room
for value add. The nice safe "sandbox" VM can be provided as a layer
on top of it. It also lets us focus on the more interesting compilers
related projects.
> 2. Design issues to consider (an initial list that we should continue
> to modify). Note that I'm not trying to suggest actual solutions here,
> but just various directions we can pursue:
Understood. :)
> a. A single-assignment VM, which we've both already been thinking
> about.
Yup, I think that this makes a lot of sense. I am still intrigued,
however, by the prospect of a minimally allocated VM representation... I
think that it could have definate advantages for certain applications
(think very small machines, like PDAs). I don't, however, think that our
initial implementations should focus on this. :)
Here are some other auxilliary goals that I think we should consider:
1. Primary goal: Support a high performance dynamic compilation
system. This means that we have an "ideal" division of labor between
the runtime and static compilers. Of course, the other goals of the
system somewhat reduce the importance of this point (f.e. portability
reduces performance, but hopefully not much)
2. Portability to different processors. Since we are most familiar with
x86 and solaris, I think that these two are excellent candidates when
we get that far...
3. Support for all languages & styles of programming (general purpose
VM). This is the point that disallows java style bytecodes, where all
array refs are checked for bounds, etc...
4. Support linking between different language families. For example, call
C functions directly from Java without using the nasty/slow/gross JNI
layer. This involves several subpoints:
A. Support for languages that require garbage collectors and integration
with languages that don't. As a base point, we could insist on
always using a conservative GC, but implement free as a noop, f.e.
> b. A strongly-typed VM. One question is do we need the types to be
> explicitly declared or should they be inferred by the dynamic
> compiler?
B. This is kind of similar to another idea that I have: make OOP
constructs (virtual function tables, class heirarchies, etc) explicit
in the VM representation. I believe that the number of additional
constructs would be fairly low, but would give us lots of important
information... something else that would/could be important is to
have exceptions as first class types so that they would be handled in
a uniform way for the entire VM... so that C functions can call Java
functions for example...
> c. How do we get more high-level information into the VM while keeping
> to a low-level VM design?
> o Explicit array references as operands? An alternative is
> to have just an array type, and let the index computations be
> separate 3-operand instructions.
C. In the model I was thinking of (subject to change of course), we
would just have an array type (distinct from the pointer
types). This would allow us to have arbitrarily complex index
expressions, while still distinguishing "load" from "Array load",
for example. Perhaps also, switch jump tables would be first class
types as well? This would allow better reasoning about the program.
5. Support dynamic loading of code from various sources. Already
mentioned above was the example of loading java bytecodes, but we want
to support dynamic loading of VM code as well. This makes the job of
the runtime compiler much more interesting: it can do interprocedural
optimizations that the static compiler can't do, because it doesn't
have all of the required information (for example, inlining from
shared libraries, etc...)
6. Define a set of generally useful annotations to add to the VM
representation. For example, a function can be analysed to see if it
has any sideeffects when run... also, the MOD/REF sets could be
calculated, etc... we would have to determine what is reasonable. This
would generally be used to make IP optimizations cheaper for the
runtime compiler...
> o Explicit instructions to handle aliasing, e.g.s:
> -- an instruction to say "I speculate that these two values are not
> aliased, but check at runtime", like speculative execution in
> EPIC?
> -- or an instruction to check whether two values are aliased and
> execute different code depending on the answer, somewhat like
> predicated code in EPIC
These are also very good points... if this can be determined at compile
time. I think that an epic style of representation (not the instruction
packing, just the information presented) could be a very interesting model
to use... more later...
> o (This one is a difficult but powerful idea.)
> A "thread-id" field on every instruction that allows the static
> compiler to generate a set of parallel threads, and then have
> the runtime compiler and hardware do what they please with it.
> This has very powerful uses, but thread-id on every instruction
> is expensive in terms of instruction size and code size.
> We would need to compactly encode it somehow.
Yes yes yes! :) I think it would be *VERY* useful to include this kind
of information (which EPIC architectures *implicitly* encode. The trend
that we are seeing supports this greatly:
1. Commodity processors are getting massive SIMD support:
* Intel/Amd MMX/MMX2
* AMD's 3Dnow!
* Intel's SSE/SSE2
* Sun's VIS
2. SMP is becoming much more common, especially in the server space.
3. Multiple processors on a die are right around the corner.
If nothing else, not designing this in would severely limit our future
expansion of the project...
> Also, this will require some reading on at least two other
> projects:
> -- Multiscalar architecture from Wisconsin
> -- Simultaneous multithreading architecture from Washington
>
> o Or forget all this and stick to a traditional instruction set?
Heh... :) Well, from a pure research point of view, it is almost more
attactive to go with the most extreme/different ISA possible. On one axis
you get safety and conservatism, and on the other you get degree of
influence that the results have. Of course the problem with pure research
is that often times there is no concrete product of the research... :)
> BTW, on an unrelated note, after the meeting yesterday, I did remember
> that you had suggested doing instruction scheduling on SSA form instead
> of a dependence DAG earlier in the semester. When we talked about
> it yesterday, I didn't remember where the idea had come from but I
> remembered later. Just giving credit where its due...
:) Thanks.
> Perhaps you can save the above as a file under RCS so you and I can
> continue to expand on this.
I think it makes sense to do so when we get our ideas more formalized and
bounce it back and forth a couple of times... then I'll do a more formal
writeup of our goals and ideas. Obviously our first implementation will
not want to do all of the stuff that I pointed out above... be we will
want to design the project so that we do not artificially limit ourselves
at sometime in the future...
Anyways, let me know what you think about these ideas... and if they sound
reasonable...
-Chris

View File

@ -0,0 +1,30 @@
From: Chris Lattner [mailto:sabre@nondot.org]
Sent: Wednesday, December 06, 2000 6:41 PM
To: Vikram S. Adve
Subject: Additional idea with respect to encoding
Here's another idea with respect to keeping the common case instruction
size down (less than 32 bits ideally):
Instead of encoding an instruction to operate on two register numbers,
have it operate on two negative offsets based on the current register
number. Therefore, instead of using:
r57 = add r55, r56 (r57 is the implicit dest register, of course)
We could use:
r57 = add -2, -1
My guess is that most SSA references are to recent values (especially if
they correspond to expressions like (x+y*z+p*q/ ...), so the negative
numbers would tend to stay small, even at the end of the procedure (where
the implicit register destination number could be quite large). Of course
the negative sign is reduntant, so you would be storing small integers
almost all of the time, and 5-6 bits worth of register number would be
plenty for most cases...
What do you think?
-Chris

View File

@ -0,0 +1,83 @@
SUMMARY
-------
We met to discuss the LLVM instruction format and bytecode representation:
ISSUES RESOLVED
---------------
1. We decided that we shall use a flat namespace to represent our
variables in SSA form, as opposed to having a two dimensional namespace
of the original variable and the SSA instance subscript.
ARGUMENT AGAINST:
* A two dimensional namespace would be valuable when doing alias
analysis because the extra information can help limit the scope of
analysis.
ARGUMENT FOR:
* Including this information would require that all users of the LLVM
bytecode would have to parse and handle it. This would slow down the
common case and inflate the instruction representation with another
infinite variable space.
REASONING:
* It was decided that because original variable sources could be
reconstructed from SSA form in linear time, that it would be an
unjustified expense for the common case to include the extra
information for one optimization. Alias analysis itself is typically
greater than linear in asymptotic complexity, so this extra analaysis
would not affect the runtime of the optimization in a significant
way. Additionally, this would be an unlikely optimization to do at
runtime.
IDEAS TO CONSIDER
-----------------
1. Including dominator information in the LLVM bytecode
representation. This is one example of an analysis result that may be
packaged with the bytecodes themselves. As a conceptual implementation
idea, we could include an immediate dominator number for each basic block
in the LLVM bytecode program. Basic blocks could be numbered according
to the order of occurance in the bytecode representation.
2. Including loop header and body information. This would facilitate
detection of intervals and natural loops.
UNRESOLVED ISSUES
-----------------
1. Will oSUIF provide enough of an infrastructure to support the research
that we will be doing? We know that it has less than stellar
performance, but hope that this will be of little importance for our
static compiler. This could affect us if we decided to do some IP
research. Also we do not yet understand the level of exception support
currently implemented.
2. Should we consider the requirements of a direct hardware implementation
of the LLVM when we design it? If so, several design issues should
have their priorities shifted. The other option is to focus on a
software layer interpreting the LLVM in all cases.
3. Should we use some form of packetized format to improve forward
compatibility? For example, we could design the system to encode a
packet type and length field before analysis information, to allow a
runtime to skip information that it didn't understand in a bytecode
stream. The obvious benefit would be for compatibility, the drawback
is that it would tend to splinter that 'standard' LLVM definition.
4. Should we use fixed length instructions or variable length
instructions? Fetching variable length instructions is expensive (for
either hardware or software based LLVM runtimes), but we have several
'infinite' spaces that instructions operate in (SSA register numbers,
type spaces, or packet length [if packets were implemented]). Several
options were mentioned including:
A. Using 16 or 32 bit numbers, which would be 'big enough'
B. A scheme similar to how UTF-8 works, to encode infinite numbers
while keeping small number small.
C. Use something similar to Huffman encoding, so that the most common
numbers are the smallest.
-Chris

View File

@ -0,0 +1,39 @@
Date: Wed, 31 Jan 2001 12:04:33 -0600
From: Vikram S. Adve <vadve@cs.uiuc.edu>
To: Chris Lattner <lattner@cs.uiuc.edu>
Subject: another thought
I have a budding idea about making LLVM a little more ambitious: a
customizable runtime system that can be used to implement language-specific
virtual machines for many different languages. E.g., a C vm, a C++ vm, a
Java vm, a Lisp vm, ..
The idea would be that LLVM would provide a standard set of runtime features
(some low-level like standard assembly instructions with code generation and
static and runtime optimization; some higher-level like type-safety and
perhaps a garbage collection library). Each language vm would select the
runtime features needed for that language, extending or customizing them as
needed. Most of the machine-dependent code-generation and optimization
features as well as low-level machine-independent optimizations (like PRE)
could be provided by LLVM and should be sufficient for any language,
simplifying the language compiler. (This would also help interoperability
between languages.) Also, some or most of the higher-level
machine-independent features like type-safety and access safety should be
reusable by different languages, with minor extensions. The language
compiler could then focus on language-specific analyses and optimizations.
The risk is that this sounds like a universal IR -- something that the
compiler community has tried and failed to develop for decades, and is
universally skeptical about. No matter what we say, we won't be able to
convince anyone that we have a universal IR that will work. We need to
think about whether LLVM is different or if has something novel that might
convince people. E.g., the idea of providing a package of separable
features that different languages select from. Also, using SSA with or
without type-safety as the intermediate representation.
One interesting starting point would be to discuss how a JVM would be
implemented on top of LLVM a bit more. That might give us clues on how to
structure LLVM to support one or more language VMs.
--Vikram

View File

@ -0,0 +1,67 @@
Date: Tue, 6 Feb 2001 20:27:37 -0600 (CST)
From: Chris Lattner <sabre@nondot.org>
To: Vikram S. Adve <vadve@cs.uiuc.edu>
Subject: Type notation debate...
This is the way that I am currently planning on implementing types:
Primitive Types:
type ::= void|bool|sbyte|ubyte|short|ushort|int|uint|long|ulong
Method:
typelist ::= typelisth | /*empty*/
typelisth ::= type | typelisth ',' type
type ::= type (typelist)
Arrays (without and with size):
type ::= '[' type ']' | '[' INT ',' type ']'
Pointer:
type ::= type '*'
Structure:
type ::= '{' typelist '}'
Packed:
type ::= '<' INT ',' type '>'
Simple examples:
[[ %4, int ]] - array of (array of 4 (int))
[ { int, int } ] - Array of structure
[ < %4, int > ] - Array of 128 bit SIMD packets
int (int, [[int, %4]]) - Method taking a 2d array and int, returning int
Okay before you comment, please look at:
http://www.research.att.com/~bs/devXinterview.html
Search for "In another interview, you defined the C declarator syntax as
an experiment that failed. However, this syntactic construct has been
around for 27 years and perhaps more; why do you consider it problematic
(except for its cumbersome syntax)?" and read that response for me. :)
Now with this syntax, his example would be represented as:
[ %10, bool (int, int) * ] *
vs
bool (*(*)[10])(int, int)
in C.
Basically, my argument for this type construction system is that it is
VERY simple to use and understand (although it IS different than C, it is
very simple and straightforward, which C is NOT). In fact, I would assert
that most programmers TODAY do not understand pointers to member
functions, and have to look up an example when they have to write them.
In my opinion, it is critically important to have clear and concise type
specifications, because types are going to be all over the programs.
Let me know your thoughts on this. :)
-Chris

View File

@ -0,0 +1,75 @@
Date: Thu, 8 Feb 2001 08:42:04 -0600
From: Vikram S. Adve <vadve@cs.uiuc.edu>
To: Chris Lattner <sabre@nondot.org>
Subject: RE: Type notation debate...
Chris,
> Okay before you comment, please look at:
>
> http://www.research.att.com/~bs/devXinterview.html
I read this argument. Even before that, I was already in agreement with you
and him that the C declarator syntax is difficult and confusing.
But in fact, if you read the entire answer carefully, he came to the same
conclusion I do: that you have to go with familiar syntax over logical
syntax because familiarity is such a strong force:
"However, familiarity is a strong force. To compare, in English, we
live
more or less happily with the absurd rules for "to be" (am, are, is, been,
was, were, ...) and all attempts to simplify are treated with contempt or
(preferably) humor. It be a curious world and it always beed."
> Basically, my argument for this type construction system is that it is
> VERY simple to use and understand (although it IS different than C, it is
> very simple and straightforward, which C is NOT). In fact, I would assert
> that most programmers TODAY do not understand pointers to member
> functions, and have to look up an example when they have to write them.
Again, I don't disagree with this at all. But to some extent this
particular problem is inherently difficult. Your syntax for the above
example may be easier for you to read because this is the way you have been
thinking about it. Honestly, I don't find it much easier than the C syntax.
In either case, I would have to look up an example to write pointers to
member functions.
But pointers to member functions are nowhere near as common as arrays. And
the old array syntax:
type [ int, int, ...]
is just much more familiar and clear to people than anything new you
introduce, no matter how logical it is. Introducing a new syntax that may
make function pointers easier but makes arrays much more difficult seems
very risky to me.
> In my opinion, it is critically important to have clear and concise type
> specifications, because types are going to be all over the programs.
I absolutely agree. But the question is, what is more clear and concise?
The syntax programmers are used to out of years of experience or a new
syntax that they have never seen that has a more logical structure. I think
the answer is the former. Sometimes, you have to give up a better idea
because you can't overcome sociological barriers to it. Qwerty keyboards
and Windows are two classic examples of bad technology that are difficult to
root out.
P.S. Also, while I agree that most your syntax is more logical, there is
one part that isn't:
Arrays (without and with size):
type ::= '[' type ']' | '[' INT ',' type ']'.
The arrays with size lists the dimensions and the type in a single list.
That is just too confusing:
[10, 40, int]
This seems to be a 3-D array where the third dimension is something strange.
It is too confusing to have a list of 3 things, some of which are dimensions
and one is a type. Either of the following would be better:
array [10, 40] of int
or
int [10, 40]
--Vikram

View File

@ -0,0 +1,53 @@
Date: Thu, 8 Feb 2001 14:31:05 -0600 (CST)
From: Chris Lattner <sabre@nondot.org>
To: Vikram S. Adve <vadve@cs.uiuc.edu>
Subject: RE: Type notation debate...
> Arrays (without and with size):
> type ::= '[' type ']' | '[' INT ',' type ']'.
>
> The arrays with size lists the dimensions and the type in a single list.
> That is just too confusing:
> [10, 40, int]
> This seems to be a 3-D array where the third dimension is something strange.
> It is too confusing to have a list of 3 things, some of which are dimensions
> and one is a type.
The above grammar indicates that there is only one integer parameter, ie
the upper bound. The lower bound is always implied to be zero, for
several reasons:
* As a low level VM, we want to expose addressing computations
explicitly. Since the lower bound must always be known in a high level
language statically, the language front end can do the translation
automatically.
* This fits more closely with what Java needs, ie what we need in the
short term. Java arrays are always zero based.
If a two element list is too confusing, I would recommend an alternate
syntax of:
type ::= '[' type ']' | '[' INT 'x' type ']'.
For example:
[12 x int]
[12x int]
[ 12 x [ 4x int ]]
Which is syntactically nicer, and more explicit.
> Either of the following would be better:
> array [10, 40] of int
I considered this approach for arrays in general (ie array of int/ array
of 12 int), but found that it made declarations WAY too long. Remember
that because of the nature of llvm, you get a lot of types strewn all over
the program, and using the 'typedef' like facility is not a wonderful
option, because then types aren't explicit anymore.
I find this email interesting, because you contradict the previous email
you sent, where you recommend that we stick to C syntax....
-Chris

View File

@ -0,0 +1,89 @@
> But in fact, if you read the entire answer carefully, he came to the same
> conclusion I do: that you have to go with familiar syntax over logical
> syntax because familiarity is such a strong force:
> "However, familiarity is a strong force. To compare, in English, we
live
> more or less happily with the absurd rules for "to be" (am, are, is, been,
> was, were, ...) and all attempts to simplify are treated with contempt or
> (preferably) humor. It be a curious world and it always beed."
Although you have to remember that his situation was considerably
different than ours. He was in a position where he was designing a high
level language that had to be COMPATIBLE with C. Our language is such
that a new person would have to learn the new, different, syntax
anyways. Making them learn about the type system does not seem like much
of a stretch from learning the opcodes and how SSA form works, and how
everything ties together...
> > Basically, my argument for this type construction system is that it is
> > VERY simple to use and understand (although it IS different than C, it is
> > very simple and straightforward, which C is NOT). In fact, I would assert
> > that most programmers TODAY do not understand pointers to member
> > functions, and have to look up an example when they have to write them.
> Again, I don't disagree with this at all. But to some extent this
> particular problem is inherently difficult. Your syntax for the above
> example may be easier for you to read because this is the way you have been
> thinking about it. Honestly, I don't find it much easier than the C syntax.
> In either case, I would have to look up an example to write pointers to
> member functions.
I would argue that because the lexical structure of the language is self
consistent, any person who spent a significant amount of time programming
in LLVM directly would understand how to do it without looking it up in a
manual. The reason this does not work for C is because you rarely have to
declare these pointers, and the syntax is inconsistent with the method
declaration and calling syntax.
> But pointers to member functions are nowhere near as common as arrays.
Very true. If you're implementing an object oriented language, however,
remember that you have to do all the pointer to member function stuff
yourself.... so everytime you invoke a virtual method one is involved
(instead of having C++ hide it for you behind "syntactic sugar").
> And the old array syntax:
> type [ int, int, ...]
> is just much more familiar and clear to people than anything new you
> introduce, no matter how logical it is.
Erm... excuse me but how is this the "old array syntax"? If you are
arguing for consistency with C, you should be asking for 'type int []',
which is significantly different than the above (beside the above
introduces a new operator and duplicates information
needlessly). Basically what I am suggesting is exactly the above without
the fluff. So instead of:
type [ int, int, ...]
you use:
type [ int ]
> Introducing a new syntax that may
> make function pointers easier but makes arrays much more difficult seems
> very risky to me.
This is not about function pointers. This is about consistency in the
type system, and consistency with the rest of the language. The point
above does not make arrays any more difficult to use, and makes the
structure of types much more obvious than the "c way".
> > In my opinion, it is critically important to have clear and concise type
> > specifications, because types are going to be all over the programs.
>
> I absolutely agree. But the question is, what is more clear and concise?
> The syntax programmers are used to out of years of experience or a new
> syntax that they have never seen that has a more logical structure. I think
> the answer is the former. Sometimes, you have to give up a better idea
> because you can't overcome sociological barriers to it. Qwerty keyboards
> and Windows are two classic examples of bad technology that are difficult to
> root out.
Very true, but you seem to be advocating a completely different Type
system than C has, in addition to it not offering the advantages of clear
structure that the system I recommended does... so you seem to not have a
problem with changing this, just with what I change it to. :)
-Chris

View File

@ -0,0 +1,120 @@
Ok, here are my comments and suggestions about the LLVM instruction set.
We should discuss some now, but can discuss many of them later, when we
revisit synchronization, type inference, and other issues.
(We have discussed some of the comments already.)
o We should consider eliminating the type annotation in cases where it is
essentially obvious from the instruction type, e.g., in br, it is obvious
that the first arg. should be a bool and the other args should be labels:
br bool <cond>, label <iftrue>, label <iffalse>
I think your point was that making all types explicit improves clarity
and readability. I agree to some extent, but it also comes at the cost
of verbosity. And when the types are obvious from people's experience
(e.g., in the br instruction), it doesn't seem to help as much.
o On reflection, I really like your idea of having the two different switch
types (even though they encode implementation techniques rather than
semantics). It should simplify building the CFG and my guess is it could
enable some significant optimizations, though we should think about which.
o In the lookup-indirect form of the switch, is there a reason not to make
the val-type uint? Most HLL switch statements (including Java and C++)
require that anyway. And it would also make the val-type uniform
in the two forms of the switch.
I did see the switch-on-bool examples and, while cute, we can just use
the branch instructions in that particular case.
o I agree with your comment that we don't need 'neg'.
o There's a trade-off with the cast instruction:
+ it avoids having to define all the upcasts and downcasts that are
valid for the operands of each instruction (you probably have thought
of other benefits also)
- it could make the bytecode significantly larger because there could
be a lot of cast operations
o Making the second arg. to 'shl' a ubyte seems good enough to me.
255 positions seems adequate for several generations of machines
and is more compact than uint.
o I still have some major concerns about including malloc and free in the
language (either as builtin functions or instructions). LLVM must be
able to represent code from many different languages. Languages such as
C, C++ Java and Fortran 90 would not be able to use our malloc anyway
because each of them will want to provide a library implementation of it.
This gets even worse when code from different languages is linked
into a single executable (which is fairly common in large apps).
Having a single malloc would just not suffice, and instead would simply
complicate the picture further because it adds an extra variant in
addition to the one each language provides.
Instead, providing a default library version of malloc and free
(and perhaps a malloc_gc with garbage collection instead of free)
would make a good implementation available to anyone who wants it.
I don't recall all your arguments in favor so let's discuss this again,
and soon.
o 'alloca' on the other hand sounds like a good idea, and the
implementation seems fairly language-independent so it doesn't have the
problems with malloc listed above.
o About indirect call:
Your option #2 sounded good to me. I'm not sure I understand your
concern about an explicit 'icall' instruction?
o A pair of important synchronization instr'ns to think about:
load-linked
store-conditional
o Other classes of instructions that are valuable for pipeline performance:
conditional-move
predicated instructions
o I believe tail calls are relatively easy to identify; do you know why
.NET has a tailcall instruction?
o I agree that we need a static data space. Otherwise, emulating global
data gets unnecessarily complex.
o About explicit parallelism:
We once talked about adding a symbolic thread-id field to each
instruction. (It could be optional so single-threaded codes are
not penalized.) This could map well to multi-threaded architectures
while providing easy ILP for single-threaded onces. But it is probably
too radical an idea to include in a base version of LLVM. Instead, it
could a great topic for a separate study.
What is the semantics of the IA64 stop bit?
o And finally, another thought about the syntax for arrays :-)
Although this syntax:
array <dimension-list> of <type>
is verbose, it will be used only in the human-readable assembly code so
size should not matter. I think we should consider it because I find it
to be the clearest syntax. It could even make arrays of function
pointers somewhat readable.

View File

@ -0,0 +1,245 @@
From: Chris Lattner <sabre@nondot.org>
To: "Vikram S. Adve" <vadve@cs.uiuc.edu>
Subject: Re: LLVM Feedback
I've included your feedback in the /home/vadve/lattner/llvm/docs directory
so that it will live in CVS eventually with the rest of LLVM. I've
significantly updated the documentation to reflect the changes you
suggested, as specified below:
> We should consider eliminating the type annotation in cases where it is
> essentially obvious from the instruction type:
> br bool <cond>, label <iftrue>, label <iffalse>
> I think your point was that making all types explicit improves clarity
> and readability. I agree to some extent, but it also comes at the
> cost of verbosity. And when the types are obvious from people's
> experience (e.g., in the br instruction), it doesn't seem to help as
> much.
Very true. We should discuss this more, but my reasoning is more of a
consistency argument. There are VERY few instructions that can have all
of the types eliminated, and doing so when available unnecesarily makes
the language more difficult to handle. Especially when you see 'int
%this' and 'bool %that' all over the place, I think it would be
disorienting to see:
br %predicate, %iftrue, %iffalse
for branches. Even just typing that once gives me the creeps. ;) Like I
said, we should probably discuss this further in person...
> On reflection, I really like your idea of having the two different
> switch types (even though they encode implementation techniques rather
> than semantics). It should simplify building the CFG and my guess is it
> could enable some significant optimizations, though we should think
> about which.
Great. I added a note to the switch section commenting on how the VM
should just use the instruction type as a hint, and that the
implementation may choose altermate representations (such as predicated
branches).
> In the lookup-indirect form of the switch, is there a reason not to
> make the val-type uint?
No. This was something I was debating for a while, and didn't really feel
strongly about either way. It is common to switch on other types in HLL's
(for example signed int's are particually common), but in this case, all
that will be added is an additional 'cast' instruction. I removed that
from the spec.
> I agree with your comment that we don't need 'neg'
Removed.
> There's a trade-off with the cast instruction:
> + it avoids having to define all the upcasts and downcasts that are
> valid for the operands of each instruction (you probably have
> thought of other benefits also)
> - it could make the bytecode significantly larger because there could
> be a lot of cast operations
+ You NEED casts to represent things like:
void foo(float);
...
int x;
...
foo(x);
in a language like C. Even in a Java like language, you need upcasts
and some way to implement dynamic downcasts.
+ Not all forms of instructions take every type (for example you can't
shift by a floating point number of bits), thus SOME programs will need
implicit casts.
To be efficient and to avoid your '-' point above, we just have to be
careful to specify that the instructions shall operate on all common
types, therefore casting should be relatively uncommon. For example all
of the arithmetic operations work on almost all data types.
> Making the second arg. to 'shl' a ubyte seems good enough to me.
> 255 positions seems adequate for several generations of machines
Okay, that comment is removed.
> and is more compact than uint.
No, it isn't. Remember that the bytecode encoding saves value slots into
the bytecode instructions themselves, not constant values. This is
another case where we may introduce more cast instructions (but we will
also reduce the number of opcode variants that must be supported by a
virtual machine). Because most shifts are by constant values, I don't
think that we'll have to cast many shifts. :)
> I still have some major concerns about including malloc and free in the
> language (either as builtin functions or instructions).
Agreed. How about this proposal:
malloc/free are either built in functions or actual opcodes. They provide
all of the type safety that the document would indicate, blah blah
blah. :)
Now, because of all of the excellent points that you raised, an
implementation may want to override the default malloc/free behavior of
the program. To do this, they simply implement a "malloc" and
"free" function. The virtual machine will then be defined to use the user
defined malloc/free function (which return/take void*'s, not type'd
pointers like the builtin function would) if one is available, otherwise
fall back on a system malloc/free.
Does this sound like a good compromise? It would give us all of the
typesafety/elegance in the language while still allowing the user to do
all the cool stuff they want to...
> 'alloca' on the other hand sounds like a good idea, and the
> implementation seems fairly language-independent so it doesn't have the
> problems with malloc listed above.
Okay, once we get the above stuff figured out, I'll put it all in the
spec.
> About indirect call:
> Your option #2 sounded good to me. I'm not sure I understand your
> concern about an explicit 'icall' instruction?
I worry too much. :) The other alternative has been removed. 'icall' is
now up in the instruction list next to 'call'.
> I believe tail calls are relatively easy to identify; do you know why
> .NET has a tailcall instruction?
Although I am just guessing, I believe it probably has to do with the fact
that they want languages like Haskell and lisp to be efficiently runnable
on their VM. Of course this means that the VM MUST implement tail calls
'correctly', or else life will suck. :) I would put this into a future
feature bin, because it could be pretty handy...
> A pair of important synchronization instr'ns to think about:
> load-linked
> store-conditional
What is 'load-linked'? I think that (at least for now) I should add these
to the 'possible extensions' section, because they are not immediately
needed...
> Other classes of instructions that are valuable for pipeline
> performance:
> conditional-move
> predicated instructions
Conditional move is effectly a special case of a predicated
instruction... and I think that all predicated instructions can possibly
be implemented later in LLVM. It would significantly change things, and
it doesn't seem to be very neccesary right now. It would seem to
complicate flow control analysis a LOT in the virtual machine. I would
tend to prefer that a predicated architecture like IA64 convert from a
"basic block" representation to a predicated rep as part of it's dynamic
complication phase. Also, if a basic block contains ONLY a move, then
that can be trivally translated into a conditional move...
> I agree that we need a static data space. Otherwise, emulating global
> data gets unnecessarily complex.
Definately. Also a later item though. :)
> We once talked about adding a symbolic thread-id field to each
> ..
> Instead, it could a great topic for a separate study.
Agreed. :)
> What is the semantics of the IA64 stop bit?
Basically, the IA64 writes instructions like this:
mov ...
add ...
sub ...
op xxx
op xxx
;;
mov ...
add ...
sub ...
op xxx
op xxx
;;
Where the ;; delimits a group of instruction with no dependencies between
them, which can all be executed concurrently (to the limits of the
available functional units). The ;; gets translated into a bit set in one
of the opcodes.
The advantages of this representation is that you don't have to do some
kind of 'thread id scheduling' pass by having to specify ahead of time how
many threads to use, and the representation doesn't have a per instruction
overhead...
> And finally, another thought about the syntax for arrays :-)
> Although this syntax:
> array <dimension-list> of <type>
> is verbose, it will be used only in the human-readable assembly code so
> size should not matter. I think we should consider it because I find it
> to be the clearest syntax. It could even make arrays of function
> pointers somewhat readable.
My only comment will be to give you an example of why this is a bad
idea. :)
Here is an example of using the switch statement (with my recommended
syntax):
switch uint %val, label %otherwise,
[%3 x {uint, label}] [ { uint %57, label %l1 },
{ uint %20, label %l2 },
{ uint %14, label %l3 } ]
Here it is with the syntax you are proposing:
switch uint %val, label %otherwise,
array %3 of {uint, label}
array of {uint, label}
{ uint %57, label %l1 },
{ uint %20, label %l2 },
{ uint %14, label %l3 }
Which is ambiguous and very verbose. It would be possible to specify
constants with [] brackets as in my syntax, which would look like this:
switch uint %val, label %otherwise,
array %3 of {uint, label} [ { uint %57, label %l1 },
{ uint %20, label %l2 },
{ uint %14, label %l3 } ]
But then the syntax is inconsistent between type definition and constant
definition (why do []'s enclose the constants but not the types??).
Anyways, I'm sure that there is much debate still to be had over
this... :)
-Chris
http://www.nondot.org/~sabre/os/
http://www.nondot.org/MagicStats/
http://korbit.sourceforge.net/

View File

@ -0,0 +1,39 @@
Date: Tue, 13 Feb 2001 13:29:52 -0600 (CST)
From: Chris Lattner <sabre@nondot.org>
To: Vikram S. Adve <vadve@cs.uiuc.edu>
Subject: LLVM Concerns...
I've updated the documentation to include load store and allocation
instructions (please take a look and let me know if I'm on the right
track):
file:/home/vadve/lattner/llvm/docs/LangRef.html#memoryops
I have a couple of concerns I would like to bring up:
1. Reference types
Right now, I've spec'd out the language to have a pointer type, which
works fine for lots of stuff... except that Java really has
references: constrained pointers that cannot be manipulated: added and
subtracted, moved, etc... Do we want to have a type like this? It
could be very nice for analysis (pointer always points to the start of
an object, etc...) and more closely matches Java semantics. The
pointer type would be kept for C++ like semantics. Through analysis,
C++ pointers could be promoted to references in the LLVM
representation.
2. Our "implicit" memory references in assembly language:
After thinking about it, this model has two problems:
A. If you do pointer analysis and realize that two stores are
independent and can share the same memory source object, there is
no way to represent this in either the bytecode or assembly.
B. When parsing assembly/bytecode, we effectively have to do a full
SSA generation/PHI node insertion pass to build the dependencies
when we don't want the "pinned" representation. This is not
cool.
I'm tempted to make memory references explicit in both the assembly and
bytecode to get around this... what do you think?
-Chris

View File

@ -0,0 +1,47 @@
Date: Tue, 13 Feb 2001 18:25:42 -0600
From: Vikram S. Adve <vadve@cs.uiuc.edu>
To: Chris Lattner <sabre@nondot.org>
Subject: RE: LLVM Concerns...
> 1. Reference types
> Right now, I've spec'd out the language to have a pointer type, which
> works fine for lots of stuff... except that Java really has
> references: constrained pointers that cannot be manipulated: added and
> subtracted, moved, etc... Do we want to have a type like this? It
> could be very nice for analysis (pointer always points to the start of
> an object, etc...) and more closely matches Java semantics. The
> pointer type would be kept for C++ like semantics. Through analysis,
> C++ pointers could be promoted to references in the LLVM
> representation.
You're right, having references would be useful. Even for C++ the *static*
compiler could generate references instead of pointers with fairly
straightforward analysis. Let's include a reference type for now. But I'm
also really concerned that LLVM is becoming big and complex and (perhaps)
too high-level. After we get some initial performance results, we may have
a clearer idea of what our goals should be and we should revisit this
question then.
> 2. Our "implicit" memory references in assembly language:
> After thinking about it, this model has two problems:
> A. If you do pointer analysis and realize that two stores are
> independent and can share the same memory source object,
not sure what you meant by "share the same memory source object"
> there is
> no way to represent this in either the bytecode or assembly.
> B. When parsing assembly/bytecode, we effectively have to do a full
> SSA generation/PHI node insertion pass to build the dependencies
> when we don't want the "pinned" representation. This is not
> cool.
I understand the concern. But again, let's focus on the performance first
and then look at the language design issues. E.g., it would be good to know
how big the bytecode files are before expanding them further. I am pretty
keen to explore the implications of LLVM for mobile devices. Both bytecode
size and power consumption are important to consider there.
--Vikram

View File

@ -0,0 +1,12 @@
By Chris:
LLVM has been designed with two primary goals in mind. First we strive to enable the best possible division of labor between static and dynamic compilers, and second, we need a flexible and powerful interface between these two complementary stages of compilation. We feel that providing a solution to these two goals will yield an excellent solution to the performance problem faced by modern architectures and programming languages.
A key insight into current compiler and runtime systems is that a compiler may fall in anywhere in a "continuum of compilation" to do its job. On one side, scripting languages statically compile nothing and dynamically compile (or equivalently, interpret) everything. On the far other side, traditional static compilers process everything statically and nothing dynamically. These approaches have typically been seen as a tradeoff between performance and portability. On a deeper level, however, there are two reasons that optimal system performance may be obtained by a system somewhere in between these two extremes: Dynamic application behavior and social constraints.
From a technical perspective, pure static compilation cannot ever give optimal performance in all cases, because applications have varying dynamic behavior that the static compiler cannot take into consideration. Even compilers that support profile guided optimization generate poor code in the real world, because using such optimization tunes that application to one particular usage pattern, whereas real programs (as opposed to benchmarks) often have several different usage patterns.
On a social level, static compilation is a very shortsighted solution to the performance problem. Instruction set architectures (ISAs) continuously evolve, and each implementation of an ISA (a processor) must choose a set of tradeoffs that make sense in the market context that it is designed for. With every new processor introduced, the vendor faces two fundamental problems: First, there is a lag time between when a processor is introduced to when compilers generate quality code for the architecture. Secondly, even when compilers catch up to the new architecture there is often a large body of legacy code that was compiled for previous generations and will not or can not be upgraded. Thus a large percentage of code running on a processor may be compiled quite sub-optimally for the current characteristics of the dynamic execution environment.
For these reasons, LLVM has been designed from the beginning as a long-term solution to these problems. Its design allows the large body of platform independent, static, program optimizations currently in compilers to be reused unchanged in their current form. It also provides important static type information to enable powerful dynamic and link time optimizations to be performed quickly and efficiently. This combination enables an increase in effective system performance for real world environments.

View File

@ -0,0 +1,202 @@
Meeting notes: Implementation idea: Exception Handling in C++/Java
The 5/18/01 meeting discussed ideas for implementing exceptions in LLVM.
We decided that the best solution requires a set of library calls provided by
the VM, as well as an extension to the LLVM function invocation syntax.
The LLVM function invocation instruction previously looks like this (ignoring
types):
call func(arg1, arg2, arg3)
The extension discussed today adds an optional "with" clause that
associates a label with the call site. The new syntax looks like this:
call func(arg1, arg2, arg3) with funcCleanup
This funcHandler always stays tightly associated with the call site (being
encoded directly into the call opcode itself), and should be used whenever
there is cleanup work that needs to be done for the current function if
an exception is thrown by func (or if we are in a try block).
To support this, the VM/Runtime provide the following simple library
functions (all syntax in this document is very abstract):
typedef struct { something } %frame;
The VM must export a "frame type", that is an opaque structure used to
implement different types of stack walking that may be used by various
language runtime libraries. We imagine that it would be typical to
represent a frame with a PC and frame pointer pair, although that is not
required.
%frame getStackCurrentFrame();
Get a frame object for the current function. Note that if the current
function was inlined into its caller, the "current" frame will belong to
the "caller".
bool isFirstFrame(%frame f);
Returns true if the specified frame is the top level (first activated) frame
for this thread. For the main thread, this corresponds to the main()
function, for a spawned thread, it corresponds to the thread function.
%frame getNextFrame(%frame f);
Return the previous frame on the stack. This function is undefined if f
satisfies the predicate isFirstFrame(f).
Label *getFrameLabel(%frame f);
If a label was associated with f (as discussed below), this function returns
it. Otherwise, it returns a null pointer.
doNonLocalBranch(Label *L);
At this point, it is not clear whether this should be a function or
intrinsic. It should probably be an intrinsic in LLVM, but we'll deal with
this issue later.
Here is a motivating example that illustrates how these facilities could be
used to implement the C++ exception model:
void TestFunction(...) {
A a; B b;
foo(); // Any function call may throw
bar();
C c;
try {
D d;
baz();
} catch (int) {
...int Stuff...
// execution continues after the try block: the exception is consumed
} catch (double) {
...double stuff...
throw; // Exception is propogated
}
}
This function would compile to approximately the following code (heavy
pseudo code follows):
Func:
%a = alloca A
A::A(%a) // These ctors & dtors could throw, but we ignore this
%b = alloca B // minor detail for this example
B::B(%b)
call foo() with fooCleanup // An exception in foo is propogated to fooCleanup
call bar() with barCleanup // An exception in bar is propogated to barCleanup
%c = alloca C
C::C(c)
%d = alloca D
D::D(d)
call baz() with bazCleanup // An exception in baz is propogated to bazCleanup
d->~D();
EndTry: // This label corresponds to the end of the try block
c->~C() // These could also throw, these are also ignored
b->~B()
a->~A()
return
Note that this is a very straight forward and literal translation: exactly
what we want for zero cost (when unused) exception handling. Especially on
platforms with many registers (ie, the IA64) setjmp/longjmp style exception
handling is *very* impractical. Also, the "with" clauses describe the
control flow paths explicitly so that analysis is not adversly effected.
The foo/barCleanup labels are implemented as:
TryCleanup: // Executed if an exception escapes the try block
c->~C()
barCleanup: // Executed if an exception escapes from bar()
// fall through
fooCleanup: // Executed if an exception escapes from foo()
b->~B()
a->~A()
Exception *E = getThreadLocalException()
call throw(E) // Implemented by the C++ runtime, described below
Which does the work one would expect. getThreadLocalException is a function
implemented by the C++ support library. It returns the current exception
object for the current thread. Note that we do not attempt to recycle the
shutdown code from before, because performance of the mainline code is
critically important. Also, obviously fooCleanup and barCleanup may be
merged and one of them eliminated. This just shows how the code generator
would most likely emit code.
The bazCleanup label is more interesting. Because the exception may be caught
by the try block, we must dispatch to its handler... but it does not exist
on the call stack (it does not have a VM Call->Label mapping installed), so
we must dispatch statically with a goto. The bazHandler thus appears as:
bazHandler:
d->~D(); // destruct D as it goes out of scope when entering catch clauses
goto TryHandler
In general, TryHandler is not the same as bazHandler, because multiple
function calls could be made from the try block. In this case, trivial
optimization could merge the two basic blocks. TryHandler is the code
that actually determines the type of exception, based on the Exception object
itself. For this discussion, assume that the exception object contains *at
least*:
1. A pointer to the RTTI info for the contained object
2. A pointer to the dtor for the contained object
3. The contained object itself
Note that it is neccesary to maintain #1 & #2 in the exception object itself
because objects without virtual function tables may be thrown (as in this
example). Assuming this, TryHandler would look something like this:
TryHandler:
Exception *E = getThreadLocalException();
switch (E->RTTIType) {
case IntRTTIInfo:
...int Stuff... // The action to perform from the catch block
break;
case DoubleRTTIInfo:
...double Stuff... // The action to perform from the catch block
goto TryCleanup // This catch block rethrows the exception
break; // Redundant, eliminated by the optimizer
default:
goto TryCleanup // Exception not caught, rethrow
}
// Exception was consumed
if (E->dtor)
E->dtor(E->object) // Invoke the dtor on the object if it exists
goto EndTry // Continue mainline code...
And that is all there is to it.
The throw(E) function would then be implemented like this (which may be
inlined into the caller through standard optimization):
function throw(Exception *E) {
// Get the start of the stack trace...
%frame %f = call getStackCurrentFrame()
// Get the label information that corresponds to it
label * %L = call getFrameLabel(%f)
while (%L == 0 && !isFirstFrame(%f)) {
// Loop until a cleanup handler is found
%f = call getNextFrame(%f)
%L = call getFrameLabel(%f)
}
if (%L != 0) {
call setThreadLocalException(E) // Allow handlers access to this...
call doNonLocalBranch(%L)
}
// No handler found!
call BlowUp() // Ends up calling the terminate() method in use
}
That's a brief rundown of how C++ exception handling could be implemented in
llvm. Java would be very similar, except it only uses destructors to unlock
synchronized blocks, not to destroy data. Also, it uses two stack walks: a
nondestructive walk that builds a stack trace, then a destructive walk that
unwinds the stack as shown here.
It would be trivial to get exception interoperability between C++ and Java.

View File

@ -0,0 +1,45 @@
Date: Sat, 19 May 2001 19:09:13 -0500 (CDT)
From: Chris Lattner <sabre@nondot.org>
To: Vikram S. Adve <vadve@cs.uiuc.edu>
Subject: RE: Meeting writeup
> I read it through and it looks great!
Thanks!
> The finally clause in Java may need more thought. The code for this clause
> is like a subroutine because it needs to be entered from many points (end of
> try block and beginning of each catch block), and then needs to *return to
> the place from where the code was entered*. That's why JVM has the
> jsr/jsr_w instruction.
Hrm... I guess that is an implementation decision. It can either be
modelled as a subroutine (as java bytecodes do), which is really
gross... or it can be modelled as code duplication (emitted once inline,
then once in the exception path). Because this could, at worst,
slightly less than double the amount of code in a function (it is
bounded) I don't think this is a big deal. One of the really nice things
about the LLVM representation is that it still allows for runtime code
generation for exception paths (exceptions paths are not compiled until
needed). Obviously a static compiler couldn't do this though. :)
In this case, only one copy of the code would be compiled... until the
other one is needed on demand. Also this strategy fits with the "zero
cost" exception model... the standard case is not burdened with extra
branches or "call"s.
> I suppose you could save the return address in a particular register
> (specific to this finally block), jump to the finally block, and then at the
> end of the finally block, jump back indirectly through this register. It
> will complicate building the CFG but I suppose that can be handled. It is
> also unsafe in terms of checking where control returns (which is I suppose
> why the JVM doesn't use this).
I think that a code duplication method would be cleaner, and would avoid
the caveats that you mention. Also, it does not slow down the normal case
with an indirect branch...
Like everything, we can probably defer a final decision until later. :)
-Chris

View File

@ -0,0 +1,63 @@
Date: Fri, 1 Jun 2001 16:38:17 -0500 (CDT)
From: Chris Lattner <sabre@nondot.org>
To: Vikram S. Adve <vadve@cs.uiuc.edu>
Subject: Interesting: GCC passes
Take a look at this document (which describes the order of optimizations
that GCC performs):
http://gcc.gnu.org/onlinedocs/gcc_17.html
The rundown is that after RTL generation, the following happens:
1 . [t] jump optimization (jumps to jumps, etc)
2 . [t] Delete unreachable code
3 . Compute live ranges for CSE
4 . [t] Jump threading (jumps to jumps with identical or inverse conditions)
5 . [t] CSE
6 . *** Conversion to SSA
7 . [t] SSA Based DCE
8 . *** Conversion to LLVM
9 . UnSSA
10. GCSE
11. LICM
12. Strength Reduction
13. Loop unrolling
14. [t] CSE
15. [t] DCE
16. Instruction combination, register movement, scheduling... etc.
I've marked optimizations with a [t] to indicate things that I believe to
be relatively trivial to implement in LLVM itself. The time consuming
things to reimplement would be SSA based PRE, Strength reduction & loop
unrolling... these would be the major things we would miss out on if we
did LLVM creation from tree code [inlining and other high level
optimizations are done on the tree representation].
Given the lack of "strong" optimizations that would take a long time to
reimplement, I am leaning a bit more towards creating LLVM from the tree
code. Especially given that SGI has GPL'd their compiler, including many
SSA based optimizations that could be adapted (besides the fact that their
code looks MUCH nicer than GCC :)
Even if we choose to do LLVM code emission from RTL, we will almost
certainly want to move LLVM emission from step 8 down until at least CSE
has been rerun... which causes me to wonder if the SSA generation code
will still work (due to global variable dependancies and stuff). I assume
that it can be made to work, but might be a little more involved than we
would like.
I'm continuing to look at the Tree -> RTL code. It is pretty gross
because they do some of the translation a statement at a time, and some
of it a function at a time... I'm not quite clear why and how the
distinction is drawn, but it does not appear that there is a wonderful
place to attach extra info.
Anyways, I'm proceeding with the RTL -> LLVM conversion phase for now. We
can talk about this more on Monday.
Wouldn't it be nice if there were a obvious decision to be made? :)
-Chris

View File

@ -0,0 +1,71 @@
Date: Fri, 1 Jun 2001 17:08:44 -0500 (CDT)
From: Chris Lattner <sabre@nondot.org>
To: Vikram S. Adve <vadve@cs.uiuc.edu>
Subject: RE: Interesting: GCC passes
> That is very interesting. I agree that some of these could be done on LLVM
> at link-time, but it is the extra time required that concerns me. Link-time
> optimization is severely time-constrained.
If we were to reimplement any of these optimizations, I assume that we
could do them a translation unit at a time, just as GCC does now. This
would lead to a pipeline like this:
Static optimizations, xlation unit at a time:
.c --GCC--> .llvm --llvmopt--> .llvm
Link time optimizations:
.llvm --llvm-ld--> .llvm --llvm-link-opt--> .llvm
Of course, many optimizations could be shared between llvmopt and
llvm-link-opt, but the wouldn't need to be shared... Thus compile time
could be faster, because we are using a "smarter" IR (SSA based).
> BTW, about SGI, "borrowing" SSA-based optimizations from one compiler and
> putting it into another is not necessarily easier than re-doing it.
> Optimization code is usually heavily tied in to the specific IR they use.
Understood. The only reason that I brought this up is because SGI's IR is
more similar to LLVM than it is different in many respects (SSA based,
relatively low level, etc), and could be easily adapted. Also their
optimizations are written in C++ and are actually somewhat
structured... of course it would be no walk in the park, but it would be
much less time consuming to adapt, say, SSA-PRE than to rewrite it.
> But your larger point is valid that adding SSA based optimizations is
> feasible and should be fun. (Again, link time cost is the issue.)
Assuming linktime cost wasn't an issue, the question is:
Does using GCC's backend buy us anything?
> It also occurs to me that GCC is probably doing quite a bit of back-end
> optimization (step 16 in your list). Do you have a breakdown of that?
Not really. The irritating part of GCC is that it mixes it all up and
doesn't have a clean seperation of concerns. A lot of the "back end
optimization" happens right along with other data optimizations (ie, CSE
of machine specific things).
As far as REAL back end optimizations go, it looks something like this:
1. Instruction combination: try to make CISCy instructions, if available
2. Register movement: try to get registers in the right places for the
architecture to avoid register to register moves. For example, try to get
the first argument of a function to naturally land in %o0 for sparc.
3. Instruction scheduling: 'nuff said :)
4. Register class preferencing: ??
5. Local register allocation
6. global register allocation
7. Spilling
8. Local regalloc
9. Jump optimization
10. Delay slot scheduling
11. Branch shorting for CISC machines
12. Instruction selection & peephole optimization
13. Debug info output
But none of this would be usable for LLVM anyways, unless we were using
GCC as a static compiler.
-Chris

1376
docs/LangRef.html Normal file

File diff suppressed because it is too large Load Diff

5
getsomesrcs.sh Executable file
View File

@ -0,0 +1,5 @@
#!/bin/sh
# This script prints out some of the source files that are useful when
# editing. I use this like this: xemacs `./getsomesrcs.sh` &
./getsrcs.sh | grep -v Assembly | grep -v Byte | grep -v \\.ll | grep -v tools | grep -v Makefile | grep -v Opt | grep -v CommandLi | grep -v String | grep -v DataType

4
getsrcs.sh Executable file
View File

@ -0,0 +1,4 @@
#!/bin/sh
# This is useful because it prints out all of the source files. Useful for
# greps.
find . -name \*.\[chyl\]\* | grep -v Lexer.cpp | grep -v llvmAsmParser.cpp | grep -v llvmAsmParser.h | grep -v '~$' | grep -v '\.ll$' | grep -v test | grep -v .flc

View File

@ -0,0 +1,113 @@
//===-- llvm/Analysis/ModuleAnalyzer.h - Module analysis driver --*- C++ -*-==//
//
// This class provides a nice interface to traverse a module in a predictable
// way. This is used by the AssemblyWriter, BytecodeWriter, and SlotCalculator
// to do analysis of a module.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_MODULEANALYZER_H
#define LLVM_ANALYSIS_MODULEANALYZER_H
#include "llvm/ConstantPool.h"
#include <set>
class Module;
class Method;
class BasicBlock;
class Instruction;
class ConstPoolVal;
class MethodType;
class MethodArgument;
class ModuleAnalyzer {
ModuleAnalyzer(const ModuleAnalyzer &); // do not impl
const ModuleAnalyzer &operator=(const ModuleAnalyzer &); // do not impl
public:
ModuleAnalyzer() {}
virtual ~ModuleAnalyzer() {}
protected:
// processModule - Driver function to call all of my subclasses virtual
// methods. Commonly called by derived type's constructor.
//
bool processModule(const Module *M);
//===--------------------------------------------------------------------===//
// Stages of processing Module level information
//
virtual bool processConstPool(const ConstantPool &CP, bool isMethod);
// processType - This callback occurs when an derived type is discovered
// at the class level. This activity occurs when processing a constant pool.
//
virtual bool processType(const Type *Ty) { return false; }
// processMethods - The default implementation of this method loops through
// all of the methods in the module and processModule's them.
//
virtual bool processMethods(const Module *M);
//===--------------------------------------------------------------------===//
// Stages of processing a constant pool
//
// processConstPoolPlane - Called once for every populated plane in the
// constant pool. The default action is to do nothing. The processConstPool
// method does the iteration over constants.
//
virtual bool processConstPoolPlane(const ConstantPool &CP,
const ConstantPool::PlaneType &Pl,
bool isMethod) {
return false;
}
// processConstant is called once per each constant in the constant pool. It
// traverses the constant pool such that it visits each constant in the
// order of its type. Thus, all 'int' typed constants shall be visited
// sequentially, etc...
//
virtual bool processConstant(const ConstPoolVal *CPV) { return false; }
// visitMethod - This member is called after the constant pool has been
// processed. The default implementation of this is a noop.
//
virtual bool visitMethod(const Method *M) { return false; }
//===--------------------------------------------------------------------===//
// Stages of processing Method level information
//
// (processConstPool is also used above, with the isMethod flag set to true)
//
// processMethod - Process all aspects of a method.
//
virtual bool processMethod(const Method *M);
// processMethodArgument - This member is called for every argument that
// is passed into the method.
//
virtual bool processMethodArgument(const MethodArgument *MA) { return false; }
// processBasicBlock - This member is called for each basic block in a methd.
//
virtual bool processBasicBlock(const BasicBlock *BB);
//===--------------------------------------------------------------------===//
// Stages of processing BasicBlock level information
//
// preProcessInstruction - This member is called for each Instruction in a
// method before processInstruction.
//
virtual bool preProcessInstruction(const Instruction *I);
// processInstruction - This member is called for each Instruction in a method
//
virtual bool processInstruction(const Instruction *I) { return false; }
private:
bool handleType(set<const Type *> &TypeSet, const Type *T);
};
#endif

View File

@ -0,0 +1,96 @@
//===-- llvm/Analysis/SlotCalculator.h - Calculate value slots ---*- C++ -*-==//
//
// This ModuleAnalyzer subclass calculates the slots that values will land in.
// This is useful for when writing bytecode or assembly out, because you have
// to know these things.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_SLOTCALCULATOR_H
#define LLVM_ANALYSIS_SLOTCALCULATOR_H
#include "llvm/Analysis/ModuleAnalyzer.h"
#include "llvm/SymTabValue.h"
#include <vector>
#include <map>
class SlotCalculator : public ModuleAnalyzer {
const Module *TheModule;
bool IgnoreNamedNodes; // Shall we not count named nodes?
typedef vector<const Value*> TypePlane;
vector <TypePlane> Table;
map<const Value *, unsigned> NodeMap;
// ModuleLevel - Used to keep track of which values belong to the module,
// and which values belong to the currently incorporated method.
//
vector <unsigned> ModuleLevel;
public:
SlotCalculator(const Module *M, bool IgnoreNamed);
SlotCalculator(const Method *M, bool IgnoreNamed);// Start out in incorp state
inline ~SlotCalculator() {}
// getValSlot returns < 0 on error!
int getValSlot(const Value *D) const;
inline unsigned getNumPlanes() const { return Table.size(); }
inline unsigned getModuleLevel(unsigned Plane) const {
return Plane < ModuleLevel.size() ? ModuleLevel[Plane] : 0;
}
inline const TypePlane &getPlane(unsigned Plane) const {
return Table[Plane];
}
// If you'd like to deal with a method, use these two methods to get its data
// into the SlotCalculator!
//
void incorporateMethod(const Method *M);
void purgeMethod();
protected:
// insertVal - Insert a value into the value table...
//
void insertVal(const Value *D);
// visitMethod - This member is called after the constant pool has been
// processed. The default implementation of this is a noop.
//
virtual bool visitMethod(const Method *M);
// processConstant is called once per each constant in the constant pool. It
// traverses the constant pool such that it visits each constant in the
// order of its type. Thus, all 'int' typed constants shall be visited
// sequentially, etc...
//
virtual bool processConstant(const ConstPoolVal *CPV);
// processType - This callback occurs when an derived type is discovered
// at the class level. This activity occurs when processing a constant pool.
//
virtual bool processType(const Type *Ty);
// processMethods - The default implementation of this method loops through
// all of the methods in the module and processModule's them. We don't want
// this (we want to explicitly visit them with incorporateMethod), so we
// disable it.
//
virtual bool processMethods(const Module *M) { return false; }
// processMethodArgument - This member is called for every argument that
// is passed into the method.
//
virtual bool processMethodArgument(const MethodArgument *MA);
// processBasicBlock - This member is called for each basic block in a methd.
//
virtual bool processBasicBlock(const BasicBlock *BB);
// processInstruction - This member is called for each Instruction in a methd.
//
virtual bool processInstruction(const Instruction *I);
};
#endif

View File

@ -0,0 +1,28 @@
//===-- llvm/Analysis/Verifier.h - Module Verifier ---------------*- C++ -*-==//
//
// This file defines the method verifier interface, that can be used for some
// sanity checking of input to the system.
//
// Note that this does not provide full 'java style' security and verifications,
// instead it just tries to ensure that code is well formed.
//
// To see what specifically is checked, look at the top of Verifier.cpp
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_VERIFIER_H
#define LLVM_ANALYSIS_VERIFIER_H
#include <vector>
#include <string>
class Module;
class Method;
// verify - Check a module or method for validity. If errors are detected,
// error messages corresponding to the problem are added to the errorMsgs
// vectors, and a value of true is returned.
//
bool verify(const Module *M, vector<string> &ErrorMsgs);
bool verify(const Method *M, vector<string> &ErrorMsgs);
#endif

View File

@ -0,0 +1,66 @@
//===-- llvm/assembly/Parser.h - Parser for VM assembly files ----*- C++ -*--=//
//
// These classes are implemented by the lib/AssemblyParser library.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ASSEMBLY_PARSER_H
#define LLVM_ASSEMBLY_PARSER_H
#include <string>
class Module;
class ToolCommandLine;
class ParseException;
// The useful interface defined by this file... Parse an ascii file, and return
// the internal representation in a nice slice'n'dice'able representation.
//
Module *ParseAssemblyFile(const ToolCommandLine &Opts) throw (ParseException);
//===------------------------------------------------------------------------===
// Helper Classes
//===------------------------------------------------------------------------===
// ParseException - For when an exceptional event is generated by the parser.
// This class lets you print out the exception message
//
class ParseException {
public:
ParseException(const ToolCommandLine &Opts, const string &message,
int LineNo = -1, int ColNo = -1);
ParseException(const ParseException &E);
// getMessage - Return the message passed in at construction time plus extra
// information extracted from the options used to parse with...
//
const string getMessage() const;
inline const string getRawMessage() const { // Just the raw message...
return Message;
}
inline const ToolCommandLine &getOptions() const {
return Opts; // Get the options obj used to parse.
}
// getErrorLocation - Return the line and column number of the error in the
// input source file. The source filename can be derived from the
// ParserOptions in effect. If positional information is not applicable,
// these will return a value of -1.
//
inline const void getErrorLocation(int &Line, int &Column) const {
Line = LineNo; Column = ColumnNo;
}
private :
const ToolCommandLine &Opts;
string Message;
int LineNo, ColumnNo; // -1 if not relevant
ParseException &operator=(const ParseException &E); // objects by reference
};
#endif

View File

@ -0,0 +1,79 @@
//===-- llvm/assembly/Writer.h - Printer for VM assembly files ---*- C++ -*--=//
//
// This functionality is implemented by the lib/AssemblyWriter library.
// This library is used to print VM assembly language files to an iostream. It
// can print VM code at a variety of granularities, ranging from a whole class
// down to an individual instruction. This makes it useful for debugging.
//
// This library uses the Analysis library to figure out offsets for
// variables in the method tables...
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ASSEMBLY_WRITER_H
#define LLVM_ASSEMBLY_WRITER_H
#include <iostream>
#include "llvm/Type.h"
class Module;
class Method;
class BasicBlock;
class Instruction;
// The only interface defined by this file... convert the internal
// representation of an object into an ascii bytestream that the parser can
// understand later... (the parser only understands whole classes though)
//
void WriteToAssembly(const Module *Module, ostream &o);
void WriteToAssembly(const Method *Method, ostream &o);
void WriteToAssembly(const BasicBlock *BB, ostream &o);
void WriteToAssembly(const Instruction *In, ostream &o);
void WriteToAssembly(const ConstPoolVal *V, ostream &o);
// Define operator<< to work on the various classes that we can send to an
// ostream...
//
inline ostream &operator<<(ostream &o, const Module *C) {
WriteToAssembly(C, o); return o;
}
inline ostream &operator<<(ostream &o, const Method *M) {
WriteToAssembly(M, o); return o;
}
inline ostream &operator<<(ostream &o, const BasicBlock *B) {
WriteToAssembly(B, o); return o;
}
inline ostream &operator<<(ostream &o, const Instruction *I) {
WriteToAssembly(I, o); return o;
}
inline ostream &operator<<(ostream &o, const ConstPoolVal *I) {
WriteToAssembly(I, o); return o;
}
inline ostream &operator<<(ostream &o, const Type *T) {
if (!T) return o << "<null Type>";
return o << T->getName();
}
inline ostream &operator<<(ostream &o, const Value *I) {
switch (I->getValueType()) {
case Value::TypeVal: return o << (const Type*)I;
case Value::ConstantVal: WriteToAssembly((const ConstPoolVal*)I, o); break;
case Value::MethodArgumentVal: return o <<I->getType() << " " << I->getName();
case Value::InstructionVal: WriteToAssembly((const Instruction *)I, o); break;
case Value::BasicBlockVal: WriteToAssembly((const BasicBlock *)I, o); break;
case Value::MethodVal: WriteToAssembly((const Method *)I, o); break;
case Value::ModuleVal: WriteToAssembly((const Module *)I, o); break;
default: return o << "<unknown value type: " << I->getValueType() << ">";
}
return o;
}
#endif

246
include/llvm/BasicBlock.h Normal file
View File

@ -0,0 +1,246 @@
//===-- llvm/BasicBlock.h - Represent a basic block in the VM ----*- C++ -*--=//
//
// This file contains the declaration of the BasicBlock class, which represents
// a single basic block in the VM.
//
// Note that basic blocks themselves are Def's, because they are referenced
// by instructions like branches and can go in switch tables and stuff...
//
// This may see wierd at first, but it's really pretty cool. :)
//
//===----------------------------------------------------------------------===//
//
// Note that well formed basic blocks are formed of a list of instructions
// followed by a single TerminatorInst instruction. TerminatorInst's may not
// occur in the middle of basic blocks, and must terminate the blocks.
//
// This code allows malformed basic blocks to occur, because it may be useful
// in the intermediate stage of analysis or modification of a program.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_BASICBLOCK_H
#define LLVM_BASICBLOCK_H
#include "llvm/Value.h" // Get the definition of Value
#include "llvm/ValueHolder.h"
#include "llvm/InstrTypes.h"
#include <list>
class Instruction;
class Method;
class BasicBlock;
class TerminatorInst;
typedef UseTy<BasicBlock> BasicBlockUse;
class BasicBlock : public Value { // Basic blocks are data objects also
public:
typedef ValueHolder<Instruction, BasicBlock> InstListType;
private :
InstListType InstList;
friend class ValueHolder<BasicBlock,Method>;
void setParent(Method *parent);
public:
BasicBlock(const string &Name = "", Method *Parent = 0);
~BasicBlock();
// Specialize setName to take care of symbol table majik
virtual void setName(const string &name);
const Method *getParent() const { return (const Method*)InstList.getParent();}
Method *getParent() { return (Method*)InstList.getParent(); }
const InstListType &getInstList() const { return InstList; }
InstListType &getInstList() { return InstList; }
// getTerminator() - If this is a well formed basic block, then this returns
// a pointer to the terminator instruction. If it is not, then you get a null
// pointer back.
//
TerminatorInst *getTerminator();
const TerminatorInst *const getTerminator() const;
// hasConstantPoolReferences() - This predicate is true if there is a
// reference to this basic block in the constant pool for this method. For
// example, if a block is reached through a switch table, that table resides
// in the constant pool, and the basic block is reference from it.
//
bool hasConstantPoolReferences() const;
// dropAllReferences() - This function causes all the subinstructions to "let
// go" of all references that they are maintaining. This allows one to
// 'delete' a whole class at a time, even though there may be circular
// references... first all references are dropped, and all use counts go to
// zero. Then everything is delete'd for real. Note that no operations are
// valid on an object that has "dropped all references", except operator
// delete.
//
void dropAllReferences();
// splitBasicBlock - This splits a basic block into two at the specified
// instruction. Note that all instructions BEFORE the specified iterator stay
// as part of the original basic block, an unconditional branch is added to
// the new BB, and the rest of the instructions in the BB are moved to the new
// BB, including the old terminator. The newly formed BasicBlock is returned.
// This function invalidates the specified iterator.
//
// Note that this only works on well formed basic blocks (must have a
// terminator), and 'I' must not be the end of instruction list (which would
// cause a degenerate basic block to be formed, having a terminator inside of
// the basic block).
//
BasicBlock *splitBasicBlock(InstListType::iterator I);
//===--------------------------------------------------------------------===//
// Predecessor iterator code
//===--------------------------------------------------------------------===//
//
// This is used to figure out what basic blocks we could be coming from.
//
// Forward declare iterator class template...
template <class _Ptr, class _USE_iterator> class PredIterator;
typedef PredIterator<BasicBlock*, use_iterator> pred_iterator;
typedef PredIterator<const BasicBlock*,
use_const_iterator> pred_const_iterator;
inline pred_iterator pred_begin() ;
inline pred_const_iterator pred_begin() const;
inline pred_iterator pred_end() ;
inline pred_const_iterator pred_end() const;
//===--------------------------------------------------------------------===//
// Successor iterator code
//===--------------------------------------------------------------------===//
//
// This is used to figure out what basic blocks we could be going to...
//
// Forward declare iterator class template...
template <class _Term, class _BB> class SuccIterator;
typedef SuccIterator<TerminatorInst*, BasicBlock*> succ_iterator;
typedef SuccIterator<const TerminatorInst*,
const BasicBlock*> succ_const_iterator;
inline succ_iterator succ_begin() ;
inline succ_const_iterator succ_begin() const;
inline succ_iterator succ_end() ;
inline succ_const_iterator succ_end() const;
//===--------------------------------------------------------------------===//
// END of interesting code...
//===--------------------------------------------------------------------===//
//
// Thank god C++ compilers are good at stomping out tons of templated code...
//
template <class _Ptr, class _USE_iterator> // Predecessor Iterator
class PredIterator {
const _Ptr ThisBB;
_USE_iterator It;
public:
typedef PredIterator<_Ptr,_USE_iterator> _Self;
typedef bidirectional_iterator_tag iterator_category;
typedef _Ptr pointer;
inline PredIterator(_Ptr BB) : ThisBB(BB), It(BB->use_begin()) {}
inline PredIterator(_Ptr BB, bool) : ThisBB(BB), It(BB->use_end()) {}
inline bool operator==(const _Self& x) const { return It == x.It; }
inline bool operator!=(const _Self& x) const { return !operator==(x); }
inline pointer operator*() const {
assert ((*It)->getValueType() == Value::InstructionVal);
return ((Instruction *)(*It))->getParent();
}
inline pointer *operator->() const { return &(operator*()); }
inline _Self& operator++() { // Preincrement
do { // Loop to ignore constant pool references
++It;
} while (It != ThisBB->use_end() &&
((*It)->getValueType() != Value::ConstantVal));
// DOES THIS WORK???
//((*It)->getValueType() != Value::BasicBlockVal));
return *this;
}
inline _Self operator++(int) { // Postincrement
_Self tmp = *this; ++*this; return tmp;
}
inline _Self& operator--() { --It; return *this; } // Predecrement
inline _Self operator--(int) { // Postdecrement
_Self tmp = *this; --*this; return tmp;
}
};
template <class _Term, class _BB> // Successor Iterator
class SuccIterator {
const _Term Term;
unsigned idx;
public:
typedef SuccIterator<_Term, _BB> _Self;
typedef forward_iterator_tag iterator_category;
typedef _BB pointer;
inline SuccIterator(_Term T) : Term(T), idx(0) {} // begin iterator
inline SuccIterator(_Term T, bool)
: Term(T), idx(Term->getNumSuccessors()) {} // end iterator
inline bool operator==(const _Self& x) const { return idx == x.idx; }
inline bool operator!=(const _Self& x) const { return !operator==(x); }
inline pointer operator*() const { return Term->getSuccessor(idx); }
inline pointer *operator->() const { return &(operator*()); }
inline _Self& operator++() { ++idx; return *this; } // Preincrement
inline _Self operator++(int) { // Postincrement
_Self tmp = *this; ++*this; return tmp;
}
inline _Self& operator--() { --idx; return *this; } // Predecrement
inline _Self operator--(int) { // Postdecrement
_Self tmp = *this; --*this; return tmp;
}
};
};
//===--------------------------------------------------------------------===//
// Implement some stuff prototyped above...
//===--------------------------------------------------------------------===//
inline BasicBlock::pred_iterator BasicBlock::pred_begin() {
return pred_iterator(this);
}
inline BasicBlock::pred_const_iterator BasicBlock::pred_begin() const {
return pred_const_iterator(this);
}
inline BasicBlock::pred_iterator BasicBlock::pred_end() {
return pred_iterator(this,true);
}
inline BasicBlock::pred_const_iterator BasicBlock::pred_end() const {
return pred_const_iterator(this,true);
}
inline BasicBlock::succ_iterator BasicBlock::succ_begin() {
return succ_iterator(getTerminator());
}
inline BasicBlock::succ_const_iterator BasicBlock::succ_begin() const {
return succ_const_iterator(getTerminator());
}
inline BasicBlock::succ_iterator BasicBlock::succ_end() {
return succ_iterator(getTerminator(),true);
}
inline BasicBlock::succ_const_iterator BasicBlock::succ_end() const {
return succ_const_iterator(getTerminator(),true);
}
#endif

View File

@ -0,0 +1,33 @@
//===-- llvm/Bytecode/Format.h - VM bytecode file format info ----*- C++ -*--=//
//
// This header defines intrinsic constants that are useful to libraries that
// need to hack on bytecode files directly, like the reader and writer.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_BYTECODE_FORMAT_H
#define LLVM_BYTECODE_FORMAT_H
class BytecodeFormat { // Throw the constants into a poorman's namespace...
BytecodeFormat(); // do not implement
public:
// ID Numbers that are used in bytecode files...
enum FileBlockIDs {
// File level identifiers...
Module = 0x01,
// Module subtypes:
Method = 0x11,
ConstantPool,
SymbolTable,
ModuleGlobalInfo,
// Method subtypes:
MethodInfo = 0x21,
// Can also have ConstantPool block
// Can also have SymbolTable block
BasicBlock = 0x31, // May contain many basic blocks
};
};
#endif

View File

@ -0,0 +1,237 @@
//===-- llvm/Bytecode/Primitives.h - Bytecode file format prims --*- C++ -*--=//
//
// This header defines some basic functions for reading and writing basic
// primitive types to a bytecode stream.
//
// Using the routines defined in this file does not require linking to any
// libraries, as all of the services are small self contained units that are to
// be inlined as neccesary.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_BYTECODE_PRIMITIVES_H
#define LLVM_BYTECODE_PRIMITIVES_H
#include "llvm/Tools/DataTypes.h"
#include <string>
#include <vector>
//===----------------------------------------------------------------------===//
// Reading Primitives
//===----------------------------------------------------------------------===//
static inline bool read(const unsigned char *&Buf, const unsigned char *EndBuf,
unsigned &Result) {
if (Buf+4 > EndBuf) return true;
#ifdef LITTLE_ENDIAN
Result = *(unsigned*)Buf;
#else
Result = Buf[0] | (Buf[1] << 8) | (Buf[2] << 16) | (Buf[3] << 24);
#endif
Buf += 4;
return false;
}
static inline bool read(const unsigned char *&Buf, const unsigned char *EndBuf,
uint64_t &Result) {
if (Buf+8 > EndBuf) return true;
#ifdef LITTLE_ENDIAN
Result = *(uint64_t*)Buf;
#else
Result = Buf[0] | (Buf[1] << 8) | (Buf[2] << 16) | (Buf[3] << 24) |
((uint64_t)(Buf[4] | (Buf[5] << 8) | (Buf[6] << 16) | (Buf[7] << 24)) <<32);
#endif
Buf += 8;
return false;
}
static inline bool read(const unsigned char *&Buf, const unsigned char *EndBuf,
int &Result) {
return read(Buf, EndBuf, (unsigned &)Result);
}
static inline bool read(const unsigned char *&Buf, const unsigned char *EndBuf,
int64_t &Result) {
return read(Buf, EndBuf, (uint64_t &)Result);
}
// read_vbr - Read an unsigned integer encoded in variable bitrate format.
//
static inline bool read_vbr(const unsigned char *&Buf,
const unsigned char *EndBuf, unsigned &Result) {
unsigned Shift = Result = 0;
do {
Result |= (unsigned)((*Buf++) & 0x7F) << Shift;
Shift += 7;
} while (Buf[-1] & 0x80 && Buf < EndBuf);
return Buf > EndBuf;
}
static inline bool read_vbr(const unsigned char *&Buf,
const unsigned char *EndBuf, uint64_t &Result) {
unsigned Shift = 0; Result = 0;
do {
Result |= (uint64_t)((*Buf++) & 0x7F) << Shift;
Shift += 7;
} while (Buf[-1] & 0x80 && Buf < EndBuf);
return Buf > EndBuf;
}
// read_vbr (signed) - Read a signed number stored in sign-magnitude format
static inline bool read_vbr(const unsigned char *&Buf,
const unsigned char *EndBuf, int &Result) {
unsigned R;
if (read_vbr(Buf, EndBuf, R)) return true;
if (R & 1)
Result = -(int)(R >> 1);
else
Result = (int)(R >> 1);
return false;
}
static inline bool read_vbr(const unsigned char *&Buf,
const unsigned char *EndBuf, int64_t &Result) {
uint64_t R;
if (read_vbr(Buf, EndBuf, R)) return true;
if (R & 1)
Result = -(int64_t)(R >> 1);
else
Result = (int64_t)(R >> 1);
return false;
}
// align32 - Round up to multiple of 32 bits...
static inline bool align32(const unsigned char *&Buf,
const unsigned char *EndBuf) {
Buf = (const unsigned char *)((unsigned long)(Buf+3) & (~3UL));
return Buf > EndBuf;
}
static inline bool read(const unsigned char *&Buf, const unsigned char *EndBuf,
string &Result, bool Aligned = true) {
unsigned Size;
if (read_vbr(Buf, EndBuf, Size)) return true; // Failure reading size?
if (Buf+Size > EndBuf) return true; // Size invalid?
Result = string((char*)Buf, Size);
Buf += Size;
if (Aligned) // If we should stay aligned do so...
if (align32(Buf, EndBuf)) return true; // Failure aligning?
return false;
}
//===----------------------------------------------------------------------===//
// Writing Primitives
//===----------------------------------------------------------------------===//
// output - If a position is specified, it must be in the valid portion of the
// string... note that this should be inlined always so only the relevant IF
// body should be included...
//
static inline void output(unsigned i, vector<unsigned char> &Out, int pos = -1){
#ifdef LITTLE_ENDIAN
if (pos == -1)
Out.insert(Out.end(), (unsigned char*)&i, (unsigned char*)&i+4);
else
*(unsigned*)&Out[pos] = i;
#else
if (pos == -1) { // Be endian clean, little endian is our friend
Out.push_back((unsigned char)i);
Out.push_back((unsigned char)(i >> 8));
Out.push_back((unsigned char)(i >> 16));
Out.push_back((unsigned char)(i >> 24));
} else {
Out[pos ] = (unsigned char)i;
Out[pos+1] = (unsigned char)(i >> 8);
Out[pos+2] = (unsigned char)(i >> 16);
Out[pos+3] = (unsigned char)(i >> 24);
}
#endif
}
static inline void output(int i, vector<unsigned char> &Out) {
output((unsigned)i, Out);
}
// output_vbr - Output an unsigned value, by using the least number of bytes
// possible. This is useful because many of our "infinite" values are really
// very small most of the time... but can be large a few times...
//
// Data format used: If you read a byte with the night bit set, use the low
// seven bits as data and then read another byte...
//
// Note that using this may cause the output buffer to become unaligned...
//
static inline void output_vbr(uint64_t i, vector<unsigned char> &out) {
while (1) {
if (i < 0x80) { // done?
out.push_back((unsigned char)i); // We know the high bit is clear...
return;
}
// Nope, we are bigger than a character, output the next 7 bits and set the
// high bit to say that there is more coming...
out.push_back(0x80 | (i & 0x7F));
i >>= 7; // Shift out 7 bits now...
}
}
static inline void output_vbr(unsigned i, vector<unsigned char> &out) {
while (1) {
if (i < 0x80) { // done?
out.push_back((unsigned char)i); // We know the high bit is clear...
return;
}
// Nope, we are bigger than a character, output the next 7 bits and set the
// high bit to say that there is more coming...
out.push_back(0x80 | (i & 0x7F));
i >>= 7; // Shift out 7 bits now...
}
}
static inline void output_vbr(int64_t i, vector<unsigned char> &out) {
if (i < 0)
output_vbr(((uint64_t)(-i) << 1) | 1, out); // Set low order sign bit...
else
output_vbr((uint64_t)i << 1, out); // Low order bit is clear.
}
static inline void output_vbr(int i, vector<unsigned char> &out) {
if (i < 0)
output_vbr(((unsigned)(-i) << 1) | 1, out); // Set low order sign bit...
else
output_vbr((unsigned)i << 1, out); // Low order bit is clear.
}
// align32 - emit the minimal number of bytes that will bring us to 32 bit
// alignment...
//
static inline void align32(vector<unsigned char> &Out) {
int NumPads = (4-(Out.size() & 3)) & 3; // Bytes to get padding to 32 bits
while (NumPads--) Out.push_back((unsigned char)0xAB);
}
static inline void output(const string &s, vector<unsigned char> &Out,
bool Aligned = true) {
unsigned Len = s.length();
output_vbr(Len, Out); // Strings may have an arbitrary length...
Out.insert(Out.end(), s.begin(), s.end());
if (Aligned)
align32(Out); // Make sure we are now aligned...
}
#endif

View File

@ -0,0 +1,24 @@
//===-- llvm/Bytecode/Reader.h - Reader for VM bytecode files ----*- C++ -*--=//
//
// This functionality is implemented by the lib/BytecodeReader library.
// This library is used to read VM bytecode files from an iostream.
//
// Note that performance of this library is _crucial_ for performance of the
// JIT type applications, so we have designed the bytecode format to support
// quick reading.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_BYTECODE_READER_H
#define LLVM_BYTECODE_READER_H
#include <string>
class Module;
// Parse and return a class...
//
Module *ParseBytecodeFile(const string &Filename);
Module *ParseBytecodeBuffer(const char *Buffer, unsigned BufferSize);
#endif

View File

@ -0,0 +1,25 @@
//===-- llvm/Bytecode/Writer.h - Writer for VM bytecode files ----*- C++ -*--=//
//
// This functionality is implemented by the lib/BytecodeWriter library.
// This library is used to write VM bytecode files to an iostream. First, you
// have to make a BytecodeStream object, which you can then put a class into
// by using operator <<.
//
// This library uses the Analysis library to figure out offsets for
// variables in the method tables...
//
// Note that performance of this library is not as crucial as performance of the
// bytecode reader (which is to be used in JIT type applications), so we have
// designed the bytecode format to support quick reading.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_BYTECODE_WRITER_H
#define LLVM_BYTECODE_WRITER_H
#include <iostream.h>
class Module;
void WriteBytecodeToFile(const Module *C, ostream &Out);
#endif

View File

@ -0,0 +1,234 @@
//===-- llvm/ConstPoolVals.h - Constant Value nodes --------------*- C++ -*--=//
//
// This file contains the declarations for the ConstPoolVal class and all of
// its subclasses, which represent the different type of constant pool values
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CONSTPOOLVALS_H
#define LLVM_CONSTPOOLVALS_H
#include "llvm/User.h"
#include "llvm/SymTabValue.h"
#include "llvm/Tools/DataTypes.h"
#include <vector>
class ArrayType;
class StructType;
//===----------------------------------------------------------------------===//
// ConstPoolVal Class
//===----------------------------------------------------------------------===//
class ConstPoolVal;
typedef UseTy<ConstPoolVal> ConstPoolUse;
class ConstPoolVal : public User {
SymTabValue *Parent;
friend class ValueHolder<ConstPoolVal, SymTabValue>;
inline void setParent(SymTabValue *parent) {
Parent = parent;
}
public:
inline ConstPoolVal(const Type *Ty, const string &Name = "")
: User(Ty, Value::ConstantVal, Name) { Parent = 0; }
// Specialize setName to handle symbol table majik...
virtual void setName(const string &name);
// Static constructor to create a '0' constant of arbitrary type...
static ConstPoolVal *getNullConstant(const Type *Ty);
// clone() - Create a copy of 'this' value that is identical in all ways
// except the following:
// * The value has no parent
// * The value has no name
//
virtual ConstPoolVal *clone() const = 0;
virtual string getStrValue() const = 0;
virtual bool equals(const ConstPoolVal *V) const = 0;
inline const SymTabValue *getParent() const { return Parent; }
inline SymTabValue *getParent() { return Parent; }
// if i > the number of operands, then getOperand() returns 0, and setOperand
// returns false. setOperand() may also return false if the operand is of
// the wrong type.
//
// Note that some subclasses may change this default no argument behavior
//
virtual Value *getOperand(unsigned i) { return 0; }
virtual const Value *getOperand(unsigned i) const { return 0; }
virtual bool setOperand(unsigned i, Value *Val) { return false; }
virtual void dropAllReferences() {}
};
//===----------------------------------------------------------------------===//
// Classes to represent constant pool variable defs
//===----------------------------------------------------------------------===//
//===---------------------------------------------------------------------------
// ConstPoolBool - Boolean Values
//
class ConstPoolBool : public ConstPoolVal {
bool Val;
ConstPoolBool(const ConstPoolBool &CP);
public:
ConstPoolBool(bool V, const string &Name = "");
virtual string getStrValue() const;
virtual bool equals(const ConstPoolVal *V) const;
virtual ConstPoolVal *clone() const { return new ConstPoolBool(*this); }
inline bool getValue() const { return Val; }
// setValue - Be careful... if there is more than one 'use' of this node, then
// they will ALL see the value that you set...
//
inline void setValue(bool v) { Val = v; }
};
//===---------------------------------------------------------------------------
// ConstPoolSInt - Signed Integer Values [sbyte, short, int, long]
//
class ConstPoolSInt : public ConstPoolVal {
int64_t Val;
ConstPoolSInt(const ConstPoolSInt &CP);
public:
ConstPoolSInt(const Type *Ty, int64_t V, const string &Name = "");
virtual ConstPoolVal *clone() const { return new ConstPoolSInt(*this); }
virtual string getStrValue() const;
virtual bool equals(const ConstPoolVal *V) const;
static bool isValueValidForType(const Type *Ty, int64_t V);
inline int64_t getValue() const { return Val; }
};
//===---------------------------------------------------------------------------
// ConstPoolUInt - Unsigned Integer Values [ubyte, ushort, uint, ulong]
//
class ConstPoolUInt : public ConstPoolVal {
uint64_t Val;
ConstPoolUInt(const ConstPoolUInt &CP);
public:
ConstPoolUInt(const Type *Ty, uint64_t V, const string &Name = "");
virtual ConstPoolVal *clone() const { return new ConstPoolUInt(*this); }
virtual string getStrValue() const;
virtual bool equals(const ConstPoolVal *V) const;
static bool isValueValidForType(const Type *Ty, uint64_t V);
inline uint64_t getValue() const { return Val; }
};
//===---------------------------------------------------------------------------
// ConstPoolFP - Floating Point Values [float, double]
//
class ConstPoolFP : public ConstPoolVal {
double Val;
ConstPoolFP(const ConstPoolFP &CP);
public:
ConstPoolFP(const Type *Ty, double V, const string &Name = "");
virtual ConstPoolVal *clone() const { return new ConstPoolFP(*this); }
virtual string getStrValue() const;
virtual bool equals(const ConstPoolVal *V) const;
static bool isValueValidForType(const Type *Ty, double V);
inline double getValue() const { return Val; }
};
//===---------------------------------------------------------------------------
// ConstPoolType - Type Declarations
//
class ConstPoolType : public ConstPoolVal {
const Type *Val;
ConstPoolType(const ConstPoolType &CPT);
public:
ConstPoolType(const Type *V, const string &Name = "");
virtual ConstPoolVal *clone() const { return new ConstPoolType(*this); }
virtual string getStrValue() const;
virtual bool equals(const ConstPoolVal *V) const;
inline const Type *getValue() const { return Val; }
};
//===---------------------------------------------------------------------------
// ConstPoolArray - Constant Array Declarations
//
class ConstPoolArray : public ConstPoolVal {
vector<ConstPoolUse> Val;
ConstPoolArray(const ConstPoolArray &CPT);
public:
ConstPoolArray(const ArrayType *T, vector<ConstPoolVal*> &V,
const string &Name = "");
inline ~ConstPoolArray() { dropAllReferences(); }
virtual ConstPoolVal *clone() const { return new ConstPoolArray(*this); }
virtual string getStrValue() const;
virtual bool equals(const ConstPoolVal *V) const;
inline const vector<ConstPoolUse> &getValues() const { return Val; }
// Implement User stuff...
//
virtual Value *getOperand(unsigned i) {
return (i < Val.size()) ? Val[i] : 0;
}
virtual const Value *getOperand(unsigned i) const {
return (i < Val.size()) ? Val[i] : 0;
}
// setOperand fails! You can't change a constant!
virtual bool setOperand(unsigned i, Value *Val) { return false; }
virtual void dropAllReferences() { Val.clear(); }
};
//===---------------------------------------------------------------------------
// ConstPoolStruct - Constant Struct Declarations
//
class ConstPoolStruct : public ConstPoolVal {
vector<ConstPoolUse> Val;
ConstPoolStruct(const ConstPoolStruct &CPT);
public:
ConstPoolStruct(const StructType *T, vector<ConstPoolVal*> &V,
const string &Name = "");
inline ~ConstPoolStruct() { dropAllReferences(); }
virtual ConstPoolVal *clone() const { return new ConstPoolStruct(*this); }
virtual string getStrValue() const;
virtual bool equals(const ConstPoolVal *V) const;
inline const vector<ConstPoolUse> &getValues() const { return Val; }
// Implement User stuff...
//
virtual Value *getOperand(unsigned i) {
return (i < Val.size()) ? Val[i] : 0;
}
virtual const Value *getOperand(unsigned i) const {
return (i < Val.size()) ? Val[i] : 0;
}
// setOperand fails! You can't change a constant!
virtual bool setOperand(unsigned i, Value *Val) { return false; }
virtual void dropAllReferences() { Val.clear(); }
};
#endif

View File

@ -0,0 +1,145 @@
//===-- ConstantHandling.h - Stuff for manipulating constants ----*- C++ -*--=//
//
// This file contains the declarations of some cool operators that allow you
// to do natural things with constant pool values.
//
// Unfortunately we can't overload operators on pointer types (like this:)
//
// inline bool operator==(const ConstPoolVal *V1, const ConstPoolVal *V2)
//
// so we must make due with references, even though it leads to some butt ugly
// looking code downstream. *sigh* (ex: ConstPoolVal *Result = *V1 + *v2; )
//
//===----------------------------------------------------------------------===//
//
// WARNING: These operators return pointers to newly 'new'd objects. You MUST
// make sure to free them if you don't want them hanging around. Also,
// note that these may return a null object if I don't know how to
// perform those operations on the specified constant types.
//
//===----------------------------------------------------------------------===//
//
// Implementation notes:
// This library is implemented this way for a reason: In most cases, we do
// not want to have to link the constant mucking code into an executable.
// We do, however want to tie some of this into the main type system, as an
// optional component. By using a mutable cache member in the Type class, we
// get exactly the kind of behavior we want.
//
// In the end, we get performance almost exactly the same as having a virtual
// function dispatch, but we don't have to put our virtual functions into the
// "Type" class, and we can implement functionality with templates. Good deal.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_OPT_CONSTANTHANDLING_H
#define LLVM_OPT_CONSTANTHANDLING_H
#include "llvm/ConstPoolVals.h"
#include "llvm/Type.h"
//===----------------------------------------------------------------------===//
// Implement == directly...
//===----------------------------------------------------------------------===//
inline ConstPoolBool *operator==(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
assert(V1.getType() == V2.getType() && "Constant types must be identical!");
return new ConstPoolBool(V1.equals(&V2));
}
//===----------------------------------------------------------------------===//
// Implement all other operators indirectly through TypeRules system
//===----------------------------------------------------------------------===//
class ConstRules {
protected:
inline ConstRules() {} // Can only be subclassed...
public:
// Unary Operators...
virtual ConstPoolVal *neg(const ConstPoolVal *V) const = 0;
virtual ConstPoolVal *not(const ConstPoolVal *V) const = 0;
// Binary Operators...
virtual ConstPoolVal *add(const ConstPoolVal *V1,
const ConstPoolVal *V2) const = 0;
virtual ConstPoolVal *sub(const ConstPoolVal *V1,
const ConstPoolVal *V2) const = 0;
virtual ConstPoolBool *lessthan(const ConstPoolVal *V1,
const ConstPoolVal *V2) const = 0;
// ConstRules::get - A type will cache its own type rules if one is needed...
// we just want to make sure to hit the cache instead of doing it indirectly,
// if possible...
//
static inline const ConstRules *get(const ConstPoolVal &V) {
const ConstRules *Result = V.getType()->getConstRules();
return Result ? Result : find(V.getType());
}
private :
static const ConstRules *find(const Type *Ty);
ConstRules(const ConstRules &); // Do not implement
ConstRules &operator=(const ConstRules &); // Do not implement
};
inline ConstPoolVal *operator-(const ConstPoolVal &V) {
return ConstRules::get(V)->neg(&V);
}
inline ConstPoolVal *operator!(const ConstPoolVal &V) {
return ConstRules::get(V)->not(&V);
}
inline ConstPoolVal *operator+(const ConstPoolVal &V1, const ConstPoolVal &V2) {
assert(V1.getType() == V2.getType() && "Constant types must be identical!");
return ConstRules::get(V1)->add(&V1, &V2);
}
inline ConstPoolVal *operator-(const ConstPoolVal &V1, const ConstPoolVal &V2) {
assert(V1.getType() == V2.getType() && "Constant types must be identical!");
return ConstRules::get(V1)->sub(&V1, &V2);
}
inline ConstPoolBool *operator<(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
assert(V1.getType() == V2.getType() && "Constant types must be identical!");
return ConstRules::get(V1)->lessthan(&V1, &V2);
}
//===----------------------------------------------------------------------===//
// Implement 'derived' operators based on what we already have...
//===----------------------------------------------------------------------===//
inline ConstPoolBool *operator>(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
return V2 < V1;
}
inline ConstPoolBool *operator!=(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
ConstPoolBool *Result = V1 == V2;
Result->setValue(!Result->getValue()); // Invert value
return Result; // !(V1 == V2)
}
inline ConstPoolBool *operator>=(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
ConstPoolBool *Result = V1 < V2;
Result->setValue(!Result->getValue()); // Invert value
return Result; // !(V1 < V2)
}
inline ConstPoolBool *operator<=(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
ConstPoolBool *Result = V1 > V2;
Result->setValue(!Result->getValue()); // Invert value
return Result; // !(V1 > V2)
}
#endif

View File

@ -0,0 +1,74 @@
//===-- llvm/ConstantPool.h - Define the constant pool class ------*- C++ -*-=//
//
// This file implements a constant pool that is split into different type
// planes. This allows searching for a typed object to go a little faster.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CONSTANTPOOL_H
#define LLVM_CONSTANTPOOL_H
#include <vector>
#include "llvm/ValueHolder.h"
class ConstPoolVal;
class SymTabValue;
class Type;
class ConstantPool {
public:
typedef ValueHolder<ConstPoolVal, SymTabValue> PlaneType;
private:
typedef vector<PlaneType*> PlanesType;
PlanesType Planes;
SymTabValue *Parent;
inline void resize(unsigned size);
public:
inline ConstantPool(SymTabValue *P) { Parent = P; }
inline ~ConstantPool() { delete_all(); }
inline SymTabValue *getParent() { return Parent; }
inline const SymTabValue *getParent() const { return Parent; }
void setParent(SymTabValue *STV);
void dropAllReferences(); // Drop all references to other constants
// Constant getPlane - Returns true if the type plane does not exist,
// otherwise updates the pointer to point to the correct plane.
//
bool getPlane(const Type *T, const PlaneType *&Plane) const;
bool getPlane(const Type *T, PlaneType *&Plane);
// Normal getPlane - Resizes constant pool to contain type even if it doesn't
// already have it.
//
PlaneType &getPlane(const Type *T);
// insert - Add constant into the symbol table...
void insert(ConstPoolVal *N);
bool remove(ConstPoolVal *N); // Returns true on failure
void delete_all();
// find - Search to see if a constant of the specified value is already in
// the constant table.
//
const ConstPoolVal *find(const ConstPoolVal *V) const;
ConstPoolVal *find(const ConstPoolVal *V) ;
const ConstPoolVal *find(const Type *Ty) const;
ConstPoolVal *find(const Type *Ty) ;
// Plane iteration support
//
typedef PlanesType::iterator plane_iterator;
typedef PlanesType::const_iterator plane_const_iterator;
inline plane_iterator begin() { return Planes.begin(); }
inline plane_const_iterator begin() const { return Planes.begin(); }
inline plane_iterator end() { return Planes.end(); }
inline plane_const_iterator end() const { return Planes.end(); }
};
#endif

120
include/llvm/DerivedTypes.h Normal file
View File

@ -0,0 +1,120 @@
//===-- llvm/DerivedTypes.h - Classes for handling data types ----*- C++ -*--=//
//
// This file contains the declarations of classes that represent "derived
// types". These are things like "arrays of x" or "structure of x, y, z" or
// "method returning x taking (y,z) as parameters", etc...
//
// The implementations of these classes live in the Type.cpp file.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_DERIVED_TYPES_H
#define LLVM_DERIVED_TYPES_H
#include "llvm/Type.h"
#include <vector>
// Future derived types: SIMD packed format
class MethodType : public Type {
public:
typedef vector<const Type*> ParamTypes;
private:
const Type *ResultType;
ParamTypes ParamTys;
MethodType(const MethodType &); // Do not implement
const MethodType &operator=(const MethodType &); // Do not implement
protected:
// This should really be private, but it squelches a bogus warning
// from GCC to make them protected: warning: `class MethodType' only
// defines private constructors and has no friends
// Private ctor - Only can be created by a static member...
MethodType(const Type *Result, const vector<const Type*> &Params,
const string &Name);
public:
inline const Type *getReturnType() const { return ResultType; }
inline const ParamTypes &getParamTypes() const { return ParamTys; }
static const MethodType *getMethodType(const Type *Result,
const ParamTypes &Params);
};
class ArrayType : public Type {
private:
const Type *ElementType;
int NumElements; // >= 0 for sized array, -1 for unbounded/unknown array
ArrayType(const ArrayType &); // Do not implement
const ArrayType &operator=(const ArrayType &); // Do not implement
protected:
// This should really be private, but it squelches a bogus warning
// from GCC to make them protected: warning: `class ArrayType' only
// defines private constructors and has no friends
// Private ctor - Only can be created by a static member...
ArrayType(const Type *ElType, int NumEl, const string &Name);
public:
inline const Type *getElementType() const { return ElementType; }
inline int getNumElements() const { return NumElements; }
inline bool isSized() const { return NumElements >= 0; }
inline bool isUnsized() const { return NumElements == -1; }
static const ArrayType *getArrayType(const Type *ElementType,
int NumElements = -1);
};
class StructType : public Type {
public:
typedef vector<const Type*> ElementTypes;
private:
ElementTypes ETypes;
StructType(const StructType &); // Do not implement
const StructType &operator=(const StructType &); // Do not implement
protected:
// This should really be private, but it squelches a bogus warning
// from GCC to make them protected: warning: `class StructType' only
// defines private constructors and has no friends
// Private ctor - Only can be created by a static member...
StructType(const vector<const Type*> &Types, const string &Name);
public:
inline const ElementTypes &getElementTypes() const { return ETypes; }
static const StructType *getStructType(const ElementTypes &Params);
};
class PointerType : public Type {
private:
const Type *ValueType;
PointerType(const PointerType &); // Do not implement
const PointerType &operator=(const PointerType &); // Do not implement
protected:
// This should really be private, but it squelches a bogus warning
// from GCC to make them protected: warning: `class PointerType' only
// defines private constructors and has no friends
// Private ctor - Only can be created by a static member...
PointerType(const Type *ElType);
public:
inline const Type *getValueType() const { return ValueType; }
static const PointerType *getPointerType(const Type *ElementType);
};
#endif

174
include/llvm/Function.h Normal file
View File

@ -0,0 +1,174 @@
//===-- llvm/Method.h - Class to represent a single VM method ----*- C++ -*--=//
//
// This file contains the declaration of the Method class, which represents a
// single Method/function/procedure in the VM.
//
// Note that basic blocks themselves are Def's, because they are referenced
// by instructions like calls and can go in virtual function tables and stuff.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_METHOD_H
#define LLVM_METHOD_H
#include "llvm/SymTabValue.h"
#include "llvm/BasicBlock.h"
#include <list>
class Instruction;
class BasicBlock;
class MethodArgument;
class MethodType;
class Method;
class Module;
typedef UseTy<Method> MethodUse;
class Method : public SymTabValue {
public:
typedef ValueHolder<MethodArgument, Method> ArgumentListType;
typedef ValueHolder<BasicBlock , Method> BasicBlocksType;
private:
// Important things that make up a method!
BasicBlocksType BasicBlocks; // The basic blocks
ArgumentListType ArgumentList; // The formal arguments
Module *Parent; // The module that contains this method
friend class ValueHolder<Method,Module>;
void setParent(Module *parent);
public:
Method(const MethodType *Ty, const string &Name = "");
~Method();
// Specialize setName to handle symbol table majik...
virtual void setName(const string &name);
const Type *getReturnType() const;
const MethodType *getMethodType() const;
// Is the body of this method unknown? (the basic block list is empty if so)
// this is true for "extern"al methods.
bool isMethodExternal() const { return BasicBlocks.empty(); }
// Get the class structure that this method is contained inside of...
inline Module *getParent() { return Parent; }
inline const Module *getParent() const { return Parent; }
inline const BasicBlocksType &getBasicBlocks() const { return BasicBlocks; }
inline BasicBlocksType &getBasicBlocks() { return BasicBlocks; }
inline const ArgumentListType &getArgumentList() const{ return ArgumentList; }
inline ArgumentListType &getArgumentList() { return ArgumentList; }
// dropAllReferences() - This function causes all the subinstructions to "let
// go" of all references that they are maintaining. This allows one to
// 'delete' a whole class at a time, even though there may be circular
// references... first all references are dropped, and all use counts go to
// zero. Then everything is delete'd for real. Note that no operations are
// valid on an object that has "dropped all references", except operator
// delete.
//
void dropAllReferences();
//===--------------------------------------------------------------------===//
// Method Instruction iterator code
//===--------------------------------------------------------------------===//
//
template <class _BB_t, class _BB_i_t, class _BI_t, class _II_t>
class InstIterator;
typedef InstIterator<BasicBlocksType, BasicBlocksType::iterator,
BasicBlock::InstListType::iterator,
Instruction*> inst_iterator;
typedef InstIterator<const BasicBlocksType, BasicBlocksType::const_iterator,
BasicBlock::InstListType::const_iterator,
const Instruction*> inst_const_iterator;
// This inner class is used to implement inst_begin() & inst_end() for
// inst_iterator and inst_const_iterator's.
//
template <class _BB_t, class _BB_i_t, class _BI_t, class _II_t>
class InstIterator {
typedef _BB_t BBty;
typedef _BB_i_t BBIty;
typedef _BI_t BIty;
typedef _II_t IIty;
_BB_t &BBs; // BasicBlocksType
_BB_i_t BB; // BasicBlocksType::iterator
_BI_t BI; // BasicBlock::InstListType::iterator
public:
typedef bidirectional_iterator_tag iterator_category;
template<class M> InstIterator(M &m)
: BBs(m.getBasicBlocks()), BB(BBs.begin()) { // begin ctor
if (BB != BBs.end()) {
BI = (*BB)->getInstList().begin();
resyncInstructionIterator();
}
}
template<class M> InstIterator(M &m, bool)
: BBs(m.getBasicBlocks()), BB(BBs.end()) { // end ctor
}
// Accessors to get at the underlying iterators...
inline BBIty &getBasicBlockIterator() { return BB; }
inline BIty &getInstructionIterator() { return BI; }
inline IIty operator*() const { return *BI; }
inline IIty *operator->() const { return &(operator*()); }
inline bool operator==(const InstIterator &y) const {
return BB == y.BB && (BI == y.BI || BB == BBs.end());
}
inline bool operator!=(const InstIterator& y) const {
return !operator==(y);
}
// resyncInstructionIterator - This should be called if the
// InstructionIterator is modified outside of our control. This resynchs
// the internals of the InstIterator to a consistent state.
//
inline void resyncInstructionIterator() {
// The only way that the II could be broken is if it is now pointing to
// the end() of the current BasicBlock and there are successor BBs.
while (BI == (*BB)->getInstList().end()) {
++BB;
if (BB == BBs.end()) break;
BI = (*BB)->getInstList().begin();
}
}
InstIterator& operator++() {
++BI;
resyncInstructionIterator(); // Make sure it is still valid.
return *this;
}
inline InstIterator operator++(int) {
InstIterator tmp = *this; ++*this; return tmp;
}
InstIterator& operator--() {
while (BB == BBs.end() || BI == (*BB)->getInstList().begin()) {
--BB;
BI = (*BB)->getInstList().end();
}
--BI;
return *this;
}
inline InstIterator operator--(int) {
InstIterator tmp = *this; --*this; return tmp;
}
};
inline inst_iterator inst_begin() { return inst_iterator(*this); }
inline inst_iterator inst_end() { return inst_iterator(*this, true); }
inline inst_const_iterator inst_begin() const { return inst_const_iterator(*this); }
inline inst_const_iterator inst_end() const { return inst_const_iterator(*this, true); }
};
#endif

131
include/llvm/InstrTypes.h Normal file
View File

@ -0,0 +1,131 @@
//===-- llvm/InstrTypes.h - Important Instruction subclasses -----*- C++ -*--=//
//
// This file defines various meta classes of instructions that exist in the VM
// representation. Specific concrete subclasses of these may be found in the
// i*.h files...
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_INSTRUCTION_TYPES_H
#define LLVM_INSTRUCTION_TYPES_H
#include "llvm/Instruction.h"
#include <list>
#include <vector>
class Method;
class SymTabValue;
//===----------------------------------------------------------------------===//
// TerminatorInst Class
//===----------------------------------------------------------------------===//
// TerminatorInst - Subclasses of this class are all able to terminate a basic
// block. Thus, these are all the flow control type of operations.
//
class TerminatorInst : public Instruction {
public:
TerminatorInst(unsigned iType);
inline ~TerminatorInst() {}
// Terminators must implement the methods required by Instruction...
virtual Instruction *clone() const = 0;
virtual void dropAllReferences() = 0;
virtual string getOpcode() const = 0;
virtual bool setOperand(unsigned i, Value *Val) = 0;
virtual const Value *getOperand(unsigned i) const = 0;
// Additionally, they must provide a method to get at the successors of this
// terminator instruction. If 'idx' is out of range, a null pointer shall be
// returned.
//
virtual const BasicBlock *getSuccessor(unsigned idx) const = 0;
virtual unsigned getNumSuccessors() const = 0;
inline BasicBlock *getSuccessor(unsigned idx) {
return (BasicBlock*)((const TerminatorInst *)this)->getSuccessor(idx);
}
};
//===----------------------------------------------------------------------===//
// UnaryOperator Class
//===----------------------------------------------------------------------===//
class UnaryOperator : public Instruction {
Use Source;
public:
UnaryOperator(Value *S, unsigned iType, const string &Name = "")
: Instruction(S->getType(), iType, Name), Source(S, this) {
}
inline ~UnaryOperator() { dropAllReferences(); }
virtual Instruction *clone() const {
return Instruction::getUnaryOperator(getInstType(), Source);
}
virtual void dropAllReferences() {
Source = 0;
}
virtual string getOpcode() const = 0;
virtual unsigned getNumOperands() const { return 1; }
virtual const Value *getOperand(unsigned i) const {
return (i == 0) ? Source : 0;
}
virtual bool setOperand(unsigned i, Value *Val) {
// assert(Val && "operand must not be null!");
if (i) return false;
Source = Val;
return true;
}
};
//===----------------------------------------------------------------------===//
// BinaryOperator Class
//===----------------------------------------------------------------------===//
class BinaryOperator : public Instruction {
Use Source1, Source2;
public:
BinaryOperator(unsigned iType, Value *S1, Value *S2,
const string &Name = "")
: Instruction(S1->getType(), iType, Name), Source1(S1, this),
Source2(S2, this){
assert(S1 && S2 && S1->getType() == S2->getType());
}
inline ~BinaryOperator() { dropAllReferences(); }
virtual Instruction *clone() const {
return Instruction::getBinaryOperator(getInstType(), Source1, Source2);
}
virtual void dropAllReferences() {
Source1 = Source2 = 0;
}
virtual string getOpcode() const = 0;
virtual unsigned getNumOperands() const { return 2; }
virtual const Value *getOperand(unsigned i) const {
return (i == 0) ? Source1 : ((i == 1) ? Source2 : 0);
}
virtual bool setOperand(unsigned i, Value *Val) {
// assert(Val && "operand must not be null!");
if (i == 0) {
Source1 = Val; //assert(Val->getType() == Source2->getType());
} else if (i == 1) {
Source2 = Val; //assert(Val->getType() == Source1->getType());
} else {
return false;
}
return true;
}
};
#endif

199
include/llvm/Instruction.h Normal file
View File

@ -0,0 +1,199 @@
//===-- llvm/Instruction.h - Instruction class definition --------*- C++ -*--=//
//
// This file contains the declaration of the Instruction class, which is the
// base class for all of the VM instructions.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_INSTRUCTION_H
#define LLVM_INSTRUCTION_H
#include "llvm/User.h"
class Type;
class BasicBlock;
class Method;
class Instruction : public User {
BasicBlock *Parent;
unsigned iType; // InstructionType
friend class ValueHolder<Instruction,BasicBlock>;
inline void setParent(BasicBlock *P) { Parent = P; }
public:
Instruction(const Type *Ty, unsigned iType, const string &Name = "");
virtual ~Instruction(); // Virtual dtor == good.
// Specialize setName to handle symbol table majik...
virtual void setName(const string &name);
// clone() - Create a copy of 'this' instruction that is identical in all ways
// except the following:
// * The instruction has no parent
// * The instruction has no name
//
virtual Instruction *clone() const = 0;
// Accessor methods...
//
inline const BasicBlock *getParent() const { return Parent; }
inline BasicBlock *getParent() { return Parent; }
bool hasSideEffects() const { return false; } // Memory & Call insts = true
// ---------------------------------------------------------------------------
// Implement the User interface
// if i > the number of operands, then getOperand() returns 0, and setOperand
// returns false. setOperand() may also return false if the operand is of
// the wrong type.
//
inline Value *getOperand(unsigned i) {
return (Value*)((const Instruction *)this)->getOperand(i);
}
virtual const Value *getOperand(unsigned i) const = 0;
virtual bool setOperand(unsigned i, Value *Val) = 0;
virtual unsigned getNumOperands() const = 0;
// ---------------------------------------------------------------------------
// Operand Iterator interface...
//
template <class _Inst, class _Val> class OperandIterator;
typedef OperandIterator<Instruction *, Value *> op_iterator;
typedef OperandIterator<const Instruction *, const Value *> op_const_iterator;
inline op_iterator op_begin() ;
inline op_const_iterator op_begin() const;
inline op_iterator op_end() ;
inline op_const_iterator op_end() const;
// ---------------------------------------------------------------------------
// Subclass classification... getInstType() returns a member of
// one of the enums that is coming soon (down below)...
//
virtual string getOpcode() const = 0;
unsigned getInstType() const { return iType; }
inline bool isTerminator() const { // Instance of TerminatorInst?
return iType >= FirstTermOp && iType < NumTermOps;
}
inline bool isDefinition() const { return !isTerminator(); }
inline bool isUnaryOp() const {
return iType >= FirstUnaryOp && iType < NumUnaryOps;
}
inline bool isBinaryOp() const {
return iType >= FirstBinaryOp && iType < NumBinaryOps;
}
static Instruction *getBinaryOperator(unsigned Op, Value *S1, Value *S2);
static Instruction *getUnaryOperator (unsigned Op, Value *Source);
//----------------------------------------------------------------------
// Exported enumerations...
//
enum TermOps { // These terminate basic blocks
FirstTermOp = 1,
Ret = 1, Br, Switch,
NumTermOps // Must remain at end of enum
};
enum UnaryOps {
FirstUnaryOp = NumTermOps,
Neg = NumTermOps, Not,
// Type conversions...
ToBoolTy ,
ToUByteTy , ToSByteTy, ToUShortTy, ToShortTy,
ToUInt , ToInt, ToULongTy , ToLongTy,
ToFloatTy , ToDoubleTy, ToArrayTy , ToPointerTy,
NumUnaryOps // Must remain at end of enum
};
enum BinaryOps {
// Standard binary operators...
FirstBinaryOp = NumUnaryOps,
Add = NumUnaryOps, Sub, Mul, Div, Rem,
// Logical operators...
And, Or, Xor,
// Binary comparison operators...
SetEQ, SetNE, SetLE, SetGE, SetLT, SetGT,
NumBinaryOps
};
enum MemoryOps {
FirstMemoryOp = NumBinaryOps,
Malloc = NumBinaryOps, Free, // Heap management instructions
Alloca, // Stack management instruction
Load, Store, // Memory manipulation instructions.
GetField, PutField, // Structure manipulation instructions
NumMemoryOps
};
enum OtherOps {
FirstOtherOp = NumMemoryOps,
PHINode = NumMemoryOps, // PHI node instruction
Call, // Call a function
Shl, Shr, // Shift operations...
NumOps, // Must be the last 'op' defined.
UserOp1, UserOp2 // May be used internally to a pass...
};
public:
template <class _Inst, class _Val> // Operand Iterator Implementation
class OperandIterator {
const _Inst Inst;
unsigned idx;
public:
typedef OperandIterator<_Inst, _Val> _Self;
typedef forward_iterator_tag iterator_category;
typedef _Val pointer;
inline OperandIterator(_Inst T) : Inst(T), idx(0) {} // begin iterator
inline OperandIterator(_Inst T, bool)
: Inst(T), idx(Inst->getNumOperands()) {} // end iterator
inline bool operator==(const _Self& x) const { return idx == x.idx; }
inline bool operator!=(const _Self& x) const { return !operator==(x); }
inline pointer operator*() const { return Inst->getOperand(idx); }
inline pointer *operator->() const { return &(operator*()); }
inline _Self& operator++() { ++idx; return *this; } // Preincrement
inline _Self operator++(int) { // Postincrement
_Self tmp = *this; ++*this; return tmp;
}
inline _Self& operator--() { --idx; return *this; } // Predecrement
inline _Self operator--(int) { // Postdecrement
_Self tmp = *this; --*this; return tmp;
}
};
};
inline Instruction::op_iterator Instruction::op_begin() {
return op_iterator(this);
}
inline Instruction::op_const_iterator Instruction::op_begin() const {
return op_const_iterator(this);
}
inline Instruction::op_iterator Instruction::op_end() {
return op_iterator(this,true);
}
inline Instruction::op_const_iterator Instruction::op_end() const {
return op_const_iterator(this,true);
}
#endif

38
include/llvm/Module.h Normal file
View File

@ -0,0 +1,38 @@
//===-- llvm/Module.h - C++ class to represent a VM module -------*- C++ -*--=//
//
// This file contains the declarations for the Module class that is used to
// maintain all the information related to a VM module.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_MODULE_H
#define LLVM_MODULE_H
#include "llvm/SymTabValue.h"
class Method;
class Module : public SymTabValue {
public:
typedef ValueHolder<Method, Module> MethodListType;
private:
MethodListType MethodList; // The Methods
public:
Module();
~Module();
inline const MethodListType &getMethodList() const { return MethodList; }
inline MethodListType &getMethodList() { return MethodList; }
// dropAllReferences() - This function causes all the subinstructions to "let
// go" of all references that they are maintaining. This allows one to
// 'delete' a whole class at a time, even though there may be circular
// references... first all references are dropped, and all use counts go to
// zero. Then everything is delete'd for real. Note that no operations are
// valid on an object that has "dropped all references", except operator
// delete.
//
void dropAllReferences();
};
#endif

View File

@ -0,0 +1,95 @@
//===-- llvm/AllOpts.h - Header file to get all opt passes -------*- C++ -*--=//
//
// This file #include's all of the small optimization header files.
//
// Note that all optimizations return true if they modified the program, false
// if not.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_OPT_ALLOPTS_H
#define LLVM_OPT_ALLOPTS_H
#include "llvm/Module.h"
#include "llvm/BasicBlock.h"
class Method;
class CallInst;
//===----------------------------------------------------------------------===//
// Helper functions
//
static inline bool ApplyOptToAllMethods(Module *C, bool (*Opt)(Method*)) {
bool Modified = false;
for (Module::MethodListType::iterator I = C->getMethodList().begin();
I != C->getMethodList().end(); I++)
Modified |= Opt(*I);
return Modified;
}
//===----------------------------------------------------------------------===//
// Dead Code Elimination Pass
//
bool DoDeadCodeElimination(Method *M); // DCE a method
bool DoRemoveUnusedConstants(SymTabValue *S); // RUC a method or class
bool DoDeadCodeElimination(Module *C); // DCE & RUC a whole class
//===----------------------------------------------------------------------===//
// Constant Propogation Pass
//
bool DoConstantPropogation(Method *M);
static inline bool DoConstantPropogation(Module *C) {
return ApplyOptToAllMethods(C, DoConstantPropogation);
}
//===----------------------------------------------------------------------===//
// Method Inlining Pass
//
// DoMethodInlining - Use a heuristic based approach to inline methods that seem
// to look good.
//
bool DoMethodInlining(Method *M);
static inline bool DoMethodInlining(Module *C) {
return ApplyOptToAllMethods(C, DoMethodInlining);
}
// InlineMethod - This function forcibly inlines the called method into the
// basic block of the caller. This returns true if it is not possible to inline
// this call. The program is still in a well defined state if this occurs
// though.
//
// Note that this only does one level of inlining. For example, if the
// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
// exists in the instruction stream. Similiarly this will inline a recursive
// method by one level.
//
bool InlineMethod(CallInst *C);
bool InlineMethod(BasicBlock::InstListType::iterator CI);// *CI must be CallInst
//===----------------------------------------------------------------------===//
// Symbol Stripping Pass
//
// DoSymbolStripping - Remove all symbolic information from a method
//
bool DoSymbolStripping(Method *M);
// DoSymbolStripping - Remove all symbolic information from all methods in a
// module
//
static inline bool DoSymbolStripping(Module *M) {
return ApplyOptToAllMethods(M, DoSymbolStripping);
}
// DoFullSymbolStripping - Remove all symbolic information from all methods
// in a module, and all module level symbols. (method names, etc...)
//
bool DoFullSymbolStripping(Module *M);
#endif

View File

@ -0,0 +1,96 @@
//===-- llvm/Analysis/SlotCalculator.h - Calculate value slots ---*- C++ -*-==//
//
// This ModuleAnalyzer subclass calculates the slots that values will land in.
// This is useful for when writing bytecode or assembly out, because you have
// to know these things.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_SLOTCALCULATOR_H
#define LLVM_ANALYSIS_SLOTCALCULATOR_H
#include "llvm/Analysis/ModuleAnalyzer.h"
#include "llvm/SymTabValue.h"
#include <vector>
#include <map>
class SlotCalculator : public ModuleAnalyzer {
const Module *TheModule;
bool IgnoreNamedNodes; // Shall we not count named nodes?
typedef vector<const Value*> TypePlane;
vector <TypePlane> Table;
map<const Value *, unsigned> NodeMap;
// ModuleLevel - Used to keep track of which values belong to the module,
// and which values belong to the currently incorporated method.
//
vector <unsigned> ModuleLevel;
public:
SlotCalculator(const Module *M, bool IgnoreNamed);
SlotCalculator(const Method *M, bool IgnoreNamed);// Start out in incorp state
inline ~SlotCalculator() {}
// getValSlot returns < 0 on error!
int getValSlot(const Value *D) const;
inline unsigned getNumPlanes() const { return Table.size(); }
inline unsigned getModuleLevel(unsigned Plane) const {
return Plane < ModuleLevel.size() ? ModuleLevel[Plane] : 0;
}
inline const TypePlane &getPlane(unsigned Plane) const {
return Table[Plane];
}
// If you'd like to deal with a method, use these two methods to get its data
// into the SlotCalculator!
//
void incorporateMethod(const Method *M);
void purgeMethod();
protected:
// insertVal - Insert a value into the value table...
//
void insertVal(const Value *D);
// visitMethod - This member is called after the constant pool has been
// processed. The default implementation of this is a noop.
//
virtual bool visitMethod(const Method *M);
// processConstant is called once per each constant in the constant pool. It
// traverses the constant pool such that it visits each constant in the
// order of its type. Thus, all 'int' typed constants shall be visited
// sequentially, etc...
//
virtual bool processConstant(const ConstPoolVal *CPV);
// processType - This callback occurs when an derived type is discovered
// at the class level. This activity occurs when processing a constant pool.
//
virtual bool processType(const Type *Ty);
// processMethods - The default implementation of this method loops through
// all of the methods in the module and processModule's them. We don't want
// this (we want to explicitly visit them with incorporateMethod), so we
// disable it.
//
virtual bool processMethods(const Module *M) { return false; }
// processMethodArgument - This member is called for every argument that
// is passed into the method.
//
virtual bool processMethodArgument(const MethodArgument *MA);
// processBasicBlock - This member is called for each basic block in a methd.
//
virtual bool processBasicBlock(const BasicBlock *BB);
// processInstruction - This member is called for each Instruction in a methd.
//
virtual bool processInstruction(const Instruction *I);
};
#endif

View File

@ -0,0 +1,51 @@
//===-- llvm/SymTabDef.h - Implement SymbolTable Defs ------------*- C++ -*--=//
//
// This subclass of Def implements a def that has a symbol table for keeping
// track of children. This is used by the DefHolder template class...
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_SYMTABDEF_H
#define LLVM_SYMTABDEF_H
#include "llvm/Value.h" // Get the definition of Value
#include "llvm/ConstantPool.h"
class SymbolTable;
class ConstPoolVal;
class SymTabValue : public Value {
public:
typedef ConstantPool ConstantPoolType;
private:
SymbolTable *SymTab, *ParentSymTab;
ConstantPool ConstPool; // The constant pool
protected:
void setParentSymTab(SymbolTable *ST);
public:
SymTabValue(const Type *Ty, ValueTy dty, const string &name = "");
~SymTabValue(); // Implemented in Def.cpp
// hasSymbolTable() - Returns true if there is a symbol table allocated to
// this object AND if there is at least one name in it!
//
bool hasSymbolTable() const;
// CAUTION: The current symbol table may be null if there are no names (ie,
// the symbol table is empty)
//
inline SymbolTable *getSymbolTable() { return SymTab; }
inline const SymbolTable *getSymbolTable() const { return SymTab; }
inline const ConstantPool &getConstantPool() const{ return ConstPool; }
inline ConstantPool &getConstantPool() { return ConstPool; }
// getSymbolTableSure is guaranteed to not return a null pointer, because if
// the method does not already have a symtab, one is created. Use this if
// you intend to put something into the symbol table for the method.
//
SymbolTable *getSymbolTableSure(); // Implemented in Def.cpp
};
#endif

View File

@ -0,0 +1,83 @@
//===-- llvm/SymbolTable.h - Implement a type planed symtab -------*- C++ -*-=//
//
// This file implements a symbol table that has planed broken up by type.
// Identical types may have overlapping symbol names as long as they are
// distinct.
//
// Note that this implements a chained symbol table. If a name being 'lookup'd
// isn't found in the current symbol table, then the parent symbol table is
// searched.
//
// This chaining behavior does NOT affect iterators though: only the lookup
// method
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_SYMBOL_TABLE_H
#define LLVM_SYMBOL_TABLE_H
#include <vector>
#include <map>
#include <string>
class Value;
class Type;
// TODO: Change this back to vector<map<const string, Value *> >
// Make the vector be a data member, and base it on UniqueID's
// That should be much more efficient!
//
class SymbolTable : public map<const Type *, map<const string, Value *> > {
typedef map<const string, Value *> VarMap;
typedef map<const Type *, VarMap> super;
SymbolTable *ParentSymTab;
friend class SymTabValue;
inline void setParentSymTab(SymbolTable *P) { ParentSymTab = P; }
public:
typedef VarMap::iterator type_iterator;
typedef VarMap::const_iterator type_const_iterator;
inline SymbolTable(SymbolTable *P = 0) { ParentSymTab = P; }
~SymbolTable();
SymbolTable *getParentSymTab() { return ParentSymTab; }
// lookup - Returns null on failure...
Value *lookup(const Type *Ty, const string &name);
// find - returns end(Ty->getIDNumber()) on failure...
type_iterator type_find(const Type *Ty, const string &name);
type_iterator type_find(const Value *D);
// insert - Add named definition to the symbol table...
void insert(Value *N);
void remove(Value *N);
Value *type_remove(const type_iterator &It);
inline unsigned type_size(const Type *TypeID) const {
return find(TypeID)->second.size();
}
// Note that type_begin / type_end only work if you know that an element of
// TypeID is already in the symbol table!!!
//
inline type_iterator type_begin(const Type *TypeID) {
return find(TypeID)->second.begin();
}
inline type_const_iterator type_begin(const Type *TypeID) const {
return find(TypeID)->second.begin();
}
inline type_iterator type_end(const Type *TypeID) {
return find(TypeID)->second.end();
}
inline type_const_iterator type_end(const Type *TypeID) const {
return find(TypeID)->second.end();
}
};
#endif

View File

@ -0,0 +1,126 @@
//===-- llvm/Tools/CommandLine.h - Command line parser for tools -*- C++ -*--=//
//
// This class implements a command line argument processor that is useful when
// creating a tool.
//
// This class is defined entirely inline so that you don't have to link to any
// libraries to use this.
//
// TODO: make this extensible by passing in arguments to be read.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_COMMANDLINE_H
#define LLVM_TOOLS_COMMANDLINE_H
#include <string>
class ToolCommandLine {
public:
inline ToolCommandLine(int &argc, char **argv, bool OutputBytecode = true);
inline ToolCommandLine(const string &infn, const string &outfn = "-");
inline ToolCommandLine(const ToolCommandLine &O);
inline ToolCommandLine &operator=(const ToolCommandLine &O);
inline bool getForce() const { return Force; }
inline const string getInputFilename() const { return InputFilename; }
inline const string getOutputFilename() const { return OutputFilename; }
private:
void calculateOutputFilename(bool OutputBytecode) {
OutputFilename = InputFilename;
unsigned Len = OutputFilename.length();
if (Len <= 3) {
OutputFilename += (OutputBytecode ? ".bc" : ".ll");
return;
}
if (OutputBytecode) {
if (OutputFilename[Len-3] == '.' &&
OutputFilename[Len-2] == 'l' &&
OutputFilename[Len-1] == 'l') { // .ll -> .bc
OutputFilename[Len-2] = 'b';
OutputFilename[Len-1] = 'c';
} else {
OutputFilename += ".bc";
}
} else {
if (OutputFilename[Len-3] == '.' &&
OutputFilename[Len-2] == 'b' &&
OutputFilename[Len-1] == 'c') { // .ll -> .bc
OutputFilename[Len-2] = 'l';
OutputFilename[Len-1] = 'l';
} else {
OutputFilename += ".ll";
}
}
}
private:
string InputFilename; // Filename to read from. If "-", use stdin.
string OutputFilename; // Filename to write to. If "-", use stdout.
bool Force; // Force output (-f argument)
};
inline ToolCommandLine::ToolCommandLine(int &argc, char **argv, bool OutBC)
: InputFilename("-"), OutputFilename("-"), Force(false) {
bool FoundInputFN = false;
bool FoundOutputFN = false;
bool FoundForce = false;
for (int i = 1; i < argc; i++) {
int RemoveArg = 0;
if (argv[i][0] == '-') {
if (!FoundInputFN && argv[i][1] == 0) { // Is the current argument '-'
InputFilename = argv[i];
FoundInputFN = true;
RemoveArg = 1;
} else if (!FoundOutputFN && (argv[i][1] == 'o' && argv[i][2] == 0)) {
// Is the argument -o?
if (i+1 < argc) { // Next arg is output fn
OutputFilename = argv[i+1];
FoundOutputFN = true;
RemoveArg = 2;
}
} else if (!FoundForce && (argv[i][1] == 'f' && argv[i][2] == 0)) {
Force = true;
FoundForce = true;
RemoveArg = 1;
}
} else if (!FoundInputFN) { // Is the current argument '[^-].*'?
InputFilename = argv[i];
FoundInputFN = true;
RemoveArg = 1;
}
if (RemoveArg) {
argc -= RemoveArg; // Shift args over...
memmove(argv+i, argv+i+RemoveArg, (argc-i)*sizeof(char*));
i--; // Reprocess this argument...
}
}
if (!FoundOutputFN && InputFilename != "-")
calculateOutputFilename(OutBC);
}
inline ToolCommandLine::ToolCommandLine(const string &inf,
const string &outf)
: InputFilename(inf), OutputFilename(outf), Force(false) {
}
inline ToolCommandLine::ToolCommandLine(const ToolCommandLine &Opts)
: InputFilename(Opts.InputFilename), OutputFilename(Opts.OutputFilename),
Force(Opts.Force) {
}
inline ToolCommandLine &ToolCommandLine::operator=(const ToolCommandLine &Opts){
InputFilename = Opts.InputFilename;
OutputFilename = Opts.OutputFilename;
Force = Opts.Force;
return *this;
}
#endif

View File

@ -0,0 +1,26 @@
// TODO: This file sucks. Not only does it not work, but this stuff should be
// autoconfiscated anyways. Major FIXME
#ifndef LLVM_TOOLS_DATATYPES_H
#define LLVM_TOOLS_DATATYPES_H
// Should define the following:
// LITTLE_ENDIAN if applicable
// int64_t
// uint64_t
#ifdef LINUX
#include <stdint.h> // Defined by ISO C 99
#include <endian.h>
#else
#include <sys/types.h>
#ifdef _LITTLE_ENDIAN
#define LITTLE_ENDIAN 1
#endif
#endif
#endif

View File

@ -0,0 +1,63 @@
//===-- StringExtras.h - Useful string functions -----------------*- C++ -*--=//
//
// This file contains some functions that are useful when dealing with strings.
// No library is required when using these functinons.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_STRING_EXTRAS_H
#define LLVM_TOOLS_STRING_EXTRAS_H
#include <string>
#include "llvm/Tools/DataTypes.h"
static inline string utostr(uint64_t X, bool isNeg = false) {
char Buffer[40];
char *BufPtr = Buffer+39;
*BufPtr = 0; // Null terminate buffer...
if (X == 0) *--BufPtr = '0'; // Handle special case...
while (X) {
*--BufPtr = '0' + (X % 10);
X /= 10;
}
if (isNeg) *--BufPtr = '-'; // Add negative sign...
return string(BufPtr);
}
static inline string itostr(int64_t X) {
if (X < 0)
return utostr((uint64_t)-X, true);
else
return utostr((uint64_t)X);
}
static inline string utostr(unsigned X, bool isNeg = false) {
char Buffer[20];
char *BufPtr = Buffer+19;
*BufPtr = 0; // Null terminate buffer...
if (X == 0) *--BufPtr = '0'; // Handle special case...
while (X) {
*--BufPtr = '0' + (X % 10);
X /= 10;
}
if (isNeg) *--BufPtr = '-'; // Add negative sign...
return string(BufPtr);
}
static inline string itostr(int X) {
if (X < 0)
return utostr((unsigned)-X, true);
else
return utostr((unsigned)X);
}
#endif

116
include/llvm/Type.h Normal file
View File

@ -0,0 +1,116 @@
//===-- llvm/Type.h - Classes for handling data types ------------*- C++ -*--=//
//
// This file contains the declaration of the Type class. For more "Type" type
// stuff, look in DerivedTypes.h and Opt/ConstantHandling.h
//
// Note that instances of the Type class are immutable: once they are created,
// they are never changed. Also note that only one instance of a particular
// type is ever created. Thus seeing if two types are equal is a matter of
// doing a trivial pointer comparison.
//
// Types, once allocated, are never free'd.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TYPE_H
#define LLVM_TYPE_H
#include "llvm/Value.h"
class ConstRules;
class ConstPoolVal;
class Type : public Value {
public:
//===--------------------------------------------------------------------===//
// Definitions of all of the base types for the Type system. Based on this
// value, you can cast to a "DerivedType" subclass (see DerivedTypes.h)
// Note: If you add an element to this, you need to add an element to the
// Type::getPrimitiveType function, or else things will break!
//
enum PrimitiveID {
VoidTyID = 0 , BoolTyID, // 0, 1: Basics...
UByteTyID , SByteTyID, // 2, 3: 8 bit types...
UShortTyID , ShortTyID, // 4, 5: 16 bit types...
UIntTyID , IntTyID, // 6, 7: 32 bit types...
ULongTyID , LongTyID, // 8, 9: 64 bit types...
FloatTyID , DoubleTyID, // 10,11: Floating point types...
TypeTyID, // 12 : Type definitions
LabelTyID , LockTyID, // 13,14: Labels... mutexes...
// TODO: Kill FillerTyID. It just makes FirstDerivedTyID = 0x10
FillerTyID , // 15 : filler
// Derived types... see DerivedTypes.h file...
// Make sure FirstDerivedTyID stays up to date!!!
MethodTyID , ModuleTyID, // Methods... Modules...
ArrayTyID , PointerTyID, // Array... pointer...
StructTyID , PackedTyID, // Structure... SIMD 'packed' format...
//...
NumPrimitiveIDs, // Must remain as last defined ID
FirstDerivedTyID = MethodTyID,
};
private:
PrimitiveID ID; // The current base type of this type...
unsigned UID; // The unique ID number for this class
// ConstRulesImpl - See Opt/ConstantHandling.h for more info
mutable const ConstRules *ConstRulesImpl;
protected:
// ctor is protected, so only subclasses can create Type objects...
Type(const string &Name, PrimitiveID id);
public:
virtual ~Type() {}
// isSigned - Return whether a numeric type is signed.
virtual bool isSigned() const { return 0; }
// isUnsigned - Return whether a numeric type is unsigned. This is not
// quite the complement of isSigned... nonnumeric types return false as they
// do with isSigned.
//
virtual bool isUnsigned() const { return 0; }
inline unsigned getUniqueID() const { return UID; }
inline PrimitiveID getPrimitiveID() const { return ID; }
// getPrimitiveType/getUniqueIDType - Return a type based on an identifier.
static const Type *getPrimitiveType(PrimitiveID IDNumber);
static const Type *getUniqueIDType(unsigned UID);
// Methods for dealing with constants uniformly. See Opt/ConstantHandling.h
// for more info on this...
//
inline const ConstRules *getConstRules() const { return ConstRulesImpl; }
inline void setConstRules(const ConstRules *R) const { ConstRulesImpl = R; }
public: // These are the builtin types that are always available...
static const Type *VoidTy , *BoolTy;
static const Type *SByteTy, *UByteTy,
*ShortTy, *UShortTy,
*IntTy , *UIntTy,
*LongTy , *ULongTy;
static const Type *FloatTy, *DoubleTy;
static const Type *TypeTy , *LabelTy, *LockTy;
// Here are some useful little methods to query what type derived types are
// Note that all other types can just compare to see if this == Type::xxxTy;
//
inline bool isDerivedType() const { return ID >= FirstDerivedTyID; }
inline bool isPrimitiveType() const { return ID < FirstDerivedTyID; }
inline bool isLabelType() const { return this == LabelTy; }
inline bool isMethodType() const { return ID == MethodTyID; }
inline bool isModuleType() const { return ID == ModuleTyID; }
inline bool isArrayType() const { return ID == ArrayTyID; }
inline bool isPointerType() const { return ID == PointerTyID; }
inline bool isStructType() const { return ID == StructTyID; }
};
#endif

47
include/llvm/User.h Normal file
View File

@ -0,0 +1,47 @@
//===-- llvm/User.h - User class definition ----------------------*- C++ -*--=//
//
// This class defines the interface that one who 'use's a Value must implement.
// Each instance of the Value class keeps track of what User's have handles
// to it.
//
// * Instructions are the largest class of User's.
// * Constants may be users of other constants (think arrays and stuff)
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_USER_H
#define LLVM_USER_H
#include "llvm/Value.h"
class User : public Value {
User(const User &); // Do not implement
public:
User(const Type *Ty, ValueTy vty, const string &name = "");
virtual ~User() {}
// if i > the number of operands, then getOperand() returns 0, and setOperand
// returns false. setOperand() may also return false if the operand is of
// the wrong type.
//
virtual Value *getOperand(unsigned i) = 0;
virtual const Value *getOperand(unsigned i) const = 0;
virtual bool setOperand(unsigned i, Value *Val) = 0;
// dropAllReferences() - This virtual function should be overridden to "let
// go" of all references that this user is maintaining. This allows one to
// 'delete' a whole class at a time, even though there may be circular
// references... first all references are dropped, and all use counts go to
// zero. Then everything is delete'd for real. Note that no operations are
// valid on an object that has "dropped all references", except operator
// delete.
//
virtual void dropAllReferences() = 0;
// replaceUsesOfWith - Replaces all references to the "From" definition with
// references to the "To" definition. (defined in Value.cpp)
//
void replaceUsesOfWith(Value *From, Value *To);
};
#endif

124
include/llvm/Value.h Normal file
View File

@ -0,0 +1,124 @@
//===-- llvm/Value.h - Definition of the Value class -------------*- C++ -*--=//
//
// This file defines the very important Value class. This is subclassed by a
// bunch of other important classes, like Def, Method, Module, Type, etc...
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_VALUE_H
#define LLVM_VALUE_H
#include <string>
#include <list>
class User;
class Type;
template<class ValueSubclass, class ItemParentType> class ValueHolder;
//===----------------------------------------------------------------------===//
// Value Class
//===----------------------------------------------------------------------===//
class Value {
public:
enum ValueTy {
TypeVal, // This is an instance of Type
ConstantVal, // This is an instance of ConstPoolVal
MethodArgumentVal, // This is an instance of MethodArgument
InstructionVal, // This is an instance of Instruction
BasicBlockVal, // This is an instance of BasicBlock
MethodVal, // This is an instance of Method
ModuleVal, // This is an instance of Module
};
private:
list<User *> Uses;
string Name;
const Type *Ty;
ValueTy VTy;
Value(const Value &); // Do not implement
protected:
inline void setType(const Type *ty) { Ty = ty; }
public:
Value(const Type *Ty, ValueTy vty, const string &name = "");
virtual ~Value();
inline const Type *getType() const { return Ty; }
inline ValueTy getValueType() const { return VTy; }
inline bool hasName() const { return Name != ""; }
inline const string &getName() const { return Name; }
virtual void setName(const string &name) { Name = name; }
// replaceAllUsesWith - Go through the uses list for this definition and make
// each use point to "D" instead of "this". After this completes, 'this's
// use list should be empty.
//
void replaceAllUsesWith(Value *D);
//----------------------------------------------------------------------
// Methods for handling the list of uses of this DEF.
//
typedef list<User*>::iterator use_iterator;
typedef list<User*>::const_iterator use_const_iterator;
inline bool use_size() const { return Uses.size(); }
inline bool use_empty() const { return Uses.empty(); }
inline use_iterator use_begin() { return Uses.begin(); }
inline use_const_iterator use_begin() const { return Uses.begin(); }
inline use_iterator use_end() { return Uses.end(); }
inline use_const_iterator use_end() const { return Uses.end(); }
inline void use_push_back(User *I) { Uses.push_back(I); }
User *use_remove(use_iterator &I);
inline void addUse(User *I) { Uses.push_back(I); }
void killUse(User *I);
};
// UseTy and it's friendly typedefs (Use) are here to make keeping the "use"
// list of a definition node up-to-date really easy.
//
template<class ValueSubclass>
class UseTy {
ValueSubclass *Val;
User *U;
public:
inline UseTy<ValueSubclass>(ValueSubclass *v, User *user) {
Val = v; U = user;
if (Val) Val->addUse(U);
}
inline ~UseTy<ValueSubclass>() { if (Val) Val->killUse(U); }
inline operator ValueSubclass *() const { return Val; }
inline UseTy<ValueSubclass>(const UseTy<ValueSubclass> &user) {
Val = 0;
U = user.U;
operator=(user);
}
inline ValueSubclass *operator=(ValueSubclass *V) {
if (Val) Val->killUse(U);
Val = V;
if (V) V->addUse(U);
return V;
}
inline ValueSubclass *operator->() { return Val; }
inline const ValueSubclass *operator->() const { return Val; }
inline UseTy<ValueSubclass> &operator=(const UseTy<ValueSubclass> &user) {
if (Val) Val->killUse(U);
Val = user.Val;
Val->addUse(U);
return *this;
}
};
typedef UseTy<Value> Use;
#endif

View File

@ -0,0 +1,86 @@
//===-- llvm/ValueHolder.h - Class to hold multiple values -------*- C++ -*--=//
//
// This defines a class that is used as a fancy Definition container. It is
// special because it helps keep the symbol table of the container method up to
// date with the goings on inside of it.
//
// This is used to represent things like the instructions of a basic block and
// the arguments to a method.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_VALUEHOLDER_H
#define LLVM_VALUEHOLDER_H
#include <vector>
class SymTabValue;
// ItemParentType ItemParent - I call setParent() on all of my
// "ValueSubclass" items, and this is the value that I pass in.
//
template<class ValueSubclass, class ItemParentType>
class ValueHolder {
// TODO: Should I use a deque instead of a vector?
vector<ValueSubclass*> ValueList;
ItemParentType *ItemParent;
SymTabValue *Parent;
ValueHolder(const ValueHolder &V); // DO NOT IMPLEMENT
public:
inline ValueHolder(ItemParentType *IP, SymTabValue *parent = 0) {
assert(IP && "Item parent may not be null!");
ItemParent = IP;
Parent = 0;
setParent(parent);
}
inline ~ValueHolder() {
// The caller should have called delete_all first...
assert(empty() && "ValueHolder contains definitions!");
assert(Parent == 0 && "Should have been unlinked from method!");
}
inline const SymTabValue *getParent() const { return Parent; }
inline SymTabValue *getParent() { return Parent; }
void setParent(SymTabValue *Parent); // Defined in ValueHolderImpl.h
inline unsigned size() const { return ValueList.size(); }
inline bool empty() const { return ValueList.empty(); }
inline const ValueSubclass *front() const { return ValueList.front(); }
inline ValueSubclass *front() { return ValueList.front(); }
inline const ValueSubclass *back() const { return ValueList.back(); }
inline ValueSubclass *back() { return ValueList.back(); }
//===--------------------------------------------------------------------===//
// sub-Definition iterator code
//===--------------------------------------------------------------------===//
//
typedef vector<ValueSubclass*>::iterator iterator;
typedef vector<ValueSubclass*>::const_iterator const_iterator;
inline iterator begin() { return ValueList.begin(); }
inline const_iterator begin() const { return ValueList.begin(); }
inline iterator end() { return ValueList.end(); }
inline const_iterator end() const { return ValueList.end(); }
void delete_all() { // Delete all removes and deletes all elements
// TODO: REMOVE FROM END OF VECTOR!!!
while (begin() != end()) {
iterator I = begin();
delete remove(I); // Delete all instructions...
}
}
// ValueHolder::remove(iterator &) this removes the element at the location
// specified by the iterator, and leaves the iterator pointing to the element
// that used to follow the element deleted.
//
ValueSubclass *remove(iterator &DI); // Defined in ValueHolderImpl.h
void remove(ValueSubclass *D); // Defined in ValueHolderImpl.h
inline void push_front(ValueSubclass *Inst); // Defined in ValueHolderImpl.h
inline void push_back(ValueSubclass *Inst); // Defined in ValueHolderImpl.h
};
#endif

140
include/llvm/iMemory.h Normal file
View File

@ -0,0 +1,140 @@
//===-- llvm/iMemory.h - Memory Operator node definitions --------*- C++ -*--=//
//
// This file contains the declarations of all of the memory related operators.
// This includes: malloc, free, alloca, load, store, getfield, putfield
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_IMEMORY_H
#define LLVM_IMEMORY_H
#include "llvm/Instruction.h"
#include "llvm/DerivedTypes.h"
#include "llvm/ConstPoolVals.h"
class ConstPoolType;
class AllocationInst : public Instruction {
protected:
UseTy<ConstPoolType> TyVal;
Use ArraySize;
public:
AllocationInst(ConstPoolType *tyVal, Value *arrSize, unsigned iTy,
const string &Name = "")
: Instruction(tyVal->getValue(), iTy, Name),
TyVal(tyVal, this), ArraySize(arrSize, this) {
// Make sure they didn't try to specify a size for an invalid type...
assert(arrSize == 0 ||
(getType()->getValueType()->isArrayType() &&
((const ArrayType*)getType()->getValueType())->isUnsized()) &&
"Trying to allocate something other than unsized array, with size!");
// Make sure that if a size is specified, that it is a uint!
assert(arrSize == 0 || arrSize->getType() == Type::UIntTy &&
"Malloc SIZE is not a 'uint'!");
}
inline ~AllocationInst() {}
// getType - Overload to return most specific pointer type...
inline const PointerType *getType() const {
return (const PointerType*)Instruction::getType();
}
virtual Instruction *clone() const = 0;
inline virtual void dropAllReferences() { TyVal = 0; ArraySize = 0; }
virtual bool setOperand(unsigned i, Value *Val) {
if (i == 0) {
assert(!Val || Val->getValueType() == Value::ConstantVal);
TyVal = (ConstPoolType*)Val;
return true;
} else if (i == 1) {
// Make sure they didn't try to specify a size for an invalid type...
assert(Val == 0 ||
(getType()->getValueType()->isArrayType() &&
((const ArrayType*)getType()->getValueType())->isUnsized()) &&
"Trying to allocate something other than unsized array, with size!");
// Make sure that if a size is specified, that it is a uint!
assert(Val == 0 || Val->getType() == Type::UIntTy &&
"Malloc SIZE is not a 'uint'!");
ArraySize = Val;
return true;
}
return false;
}
virtual unsigned getNumOperands() const { return 2; }
virtual const Value *getOperand(unsigned i) const {
return i == 0 ? TyVal : (i == 1 ? ArraySize : 0);
}
};
class MallocInst : public AllocationInst {
public:
MallocInst(ConstPoolType *tyVal, Value *ArraySize = 0,
const string &Name = "")
: AllocationInst(tyVal, ArraySize, Instruction::Malloc, Name) {}
inline ~MallocInst() {}
virtual Instruction *clone() const {
return new MallocInst(TyVal, ArraySize);
}
virtual string getOpcode() const { return "malloc"; }
};
class AllocaInst : public AllocationInst {
public:
AllocaInst(ConstPoolType *tyVal, Value *ArraySize = 0,
const string &Name = "")
: AllocationInst(tyVal, ArraySize, Instruction::Alloca, Name) {}
inline ~AllocaInst() {}
virtual Instruction *clone() const {
return new AllocaInst(TyVal, ArraySize);
}
virtual string getOpcode() const { return "alloca"; }
};
class FreeInst : public Instruction {
protected:
Use Pointer;
public:
FreeInst(Value *Ptr, const string &Name = "")
: Instruction(Type::VoidTy, Instruction::Free, Name),
Pointer(Ptr, this) {
assert(Ptr->getType()->isPointerType() && "Can't free nonpointer!");
}
inline ~FreeInst() {}
virtual Instruction *clone() const { return new FreeInst(Pointer); }
inline virtual void dropAllReferences() { Pointer = 0; }
virtual bool setOperand(unsigned i, Value *Val) {
if (i == 0) {
assert(!Val || Val->getType()->isPointerType() &&
"Can't free nonpointer!");
Pointer = Val;
return true;
}
return false;
}
virtual unsigned getNumOperands() const { return 1; }
virtual const Value *getOperand(unsigned i) const {
return i == 0 ? Pointer : 0;
}
virtual string getOpcode() const { return "free"; }
};
#endif // LLVM_IMEMORY_H

48
include/llvm/iOperators.h Normal file
View File

@ -0,0 +1,48 @@
//===-- llvm/iBinary.h - Binary Operator node definitions --------*- C++ -*--=//
//
// This file contains the declarations of all of the Binary Operator classes.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_IBINARY_H
#define LLVM_IBINARY_H
#include "llvm/InstrTypes.h"
//===----------------------------------------------------------------------===//
// Classes to represent Binary operators
//===----------------------------------------------------------------------===//
//
// All of these classes are subclasses of the BinaryOperator class...
//
class AddInst : public BinaryOperator {
public:
AddInst(Value *S1, Value *S2, const string &Name = "")
: BinaryOperator(Instruction::Add, S1, S2, Name) {
}
virtual string getOpcode() const { return "add"; }
};
class SubInst : public BinaryOperator {
public:
SubInst(Value *S1, Value *S2, const string &Name = "")
: BinaryOperator(Instruction::Sub, S1, S2, Name) {
}
virtual string getOpcode() const { return "sub"; }
};
class SetCondInst : public BinaryOperator {
BinaryOps OpType;
public:
SetCondInst(BinaryOps opType, Value *S1, Value *S2,
const string &Name = "");
virtual string getOpcode() const;
};
#endif

116
include/llvm/iOther.h Normal file
View File

@ -0,0 +1,116 @@
//===-- llvm/iOther.h - "Other" instruction node definitions -----*- C++ -*--=//
//
// This file contains the declarations for instructions that fall into the
// grandios 'other' catagory...
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_IOTHER_H
#define LLVM_IOTHER_H
#include "llvm/InstrTypes.h"
#include "llvm/Method.h"
#include <vector>
//===----------------------------------------------------------------------===//
// PHINode Class
//===----------------------------------------------------------------------===//
// PHINode - The PHINode class is used to represent the magical mystical PHI
// node, that can not exist in nature, but can be synthesized in a computer
// scientist's overactive imagination.
//
// TODO: FIXME: This representation is not good enough. Consider the following
// code:
// BB0: %x = int %0
// BB1: %y = int %1
// BB2: %z = phi int %0, %1 - Can't tell where constants come from!
//
// TOFIX: Store pair<Use,BasicBlockUse> instead of just <Use>
//
class PHINode : public Instruction {
vector<Use> IncomingValues;
PHINode(const PHINode &PN);
public:
PHINode(const Type *Ty, const string &Name = "");
inline ~PHINode() { dropAllReferences(); }
virtual Instruction *clone() const { return new PHINode(*this); }
// Implement all of the functionality required by User...
//
virtual void dropAllReferences();
virtual const Value *getOperand(unsigned i) const {
return (i < IncomingValues.size()) ? IncomingValues[i] : 0;
}
inline Value *getOperand(unsigned i) {
return (Value*)((const PHINode*)this)->getOperand(i);
}
virtual unsigned getNumOperands() const { return IncomingValues.size(); }
virtual bool setOperand(unsigned i, Value *Val);
virtual string getOpcode() const { return "phi"; }
void addIncoming(Value *D);
};
//===----------------------------------------------------------------------===//
// MethodArgument Class
//===----------------------------------------------------------------------===//
class MethodArgument : public Value { // Defined in the InstrType.cpp file
Method *Parent;
friend class ValueHolder<MethodArgument,Method>;
inline void setParent(Method *parent) { Parent = parent; }
public:
MethodArgument(const Type *Ty, const string &Name = "")
: Value(Ty, Value::MethodArgumentVal, Name) {
Parent = 0;
}
// Specialize setName to handle symbol table majik...
virtual void setName(const string &name);
inline const Method *getParent() const { return Parent; }
inline Method *getParent() { return Parent; }
};
//===----------------------------------------------------------------------===//
// Classes to function calls and method invocations
//===----------------------------------------------------------------------===//
class CallInst : public Instruction {
MethodUse M;
vector<Use> Params;
CallInst(const CallInst &CI);
public:
CallInst(Method *M, vector<Value*> &params, const string &Name = "");
inline ~CallInst() { dropAllReferences(); }
virtual string getOpcode() const { return "call"; }
virtual Instruction *clone() const { return new CallInst(*this); }
bool hasSideEffects() const { return true; }
const Method *getCalledMethod() const { return M; }
Method *getCalledMethod() { return M; }
// Implement all of the functionality required by Instruction...
//
virtual void dropAllReferences();
virtual const Value *getOperand(unsigned i) const {
return i == 0 ? M : ((i <= Params.size()) ? Params[i-1] : 0);
}
inline Value *getOperand(unsigned i) {
return (Value*)((const CallInst*)this)->getOperand(i);
}
virtual unsigned getNumOperands() const { return Params.size()+1; }
virtual bool setOperand(unsigned i, Value *Val);
};
#endif

136
include/llvm/iTerminators.h Normal file
View File

@ -0,0 +1,136 @@
//===-- llvm/iTerminators.h - Termintator instruction nodes ------*- C++ -*--=//
//
// This file contains the declarations for all the subclasses of the
// Instruction class, which is itself defined in the Instruction.h file. In
// between these definitions and the Instruction class are classes that expose
// the SSA properties of each instruction, and that form the SSA graph.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ITERMINATORS_H
#define LLVM_ITERMINATORS_H
#include "llvm/InstrTypes.h"
#include "llvm/BasicBlock.h"
#include "llvm/ConstPoolVals.h"
//===----------------------------------------------------------------------===//
// Classes to represent Basic Block "Terminator" instructions
//===----------------------------------------------------------------------===//
//===---------------------------------------------------------------------------
// ReturnInst - Return a value (possibly void), from a method. Execution does
// not continue in this method any longer.
//
class ReturnInst : public TerminatorInst {
Use Val; // Will be null if returning void...
ReturnInst(const ReturnInst &RI);
public:
ReturnInst(Value *value = 0);
inline ~ReturnInst() { dropAllReferences(); }
virtual Instruction *clone() const { return new ReturnInst(*this); }
virtual string getOpcode() const { return "ret"; }
inline const Value *getReturnValue() const { return Val; }
inline Value *getReturnValue() { return Val; }
virtual void dropAllReferences();
virtual const Value *getOperand(unsigned i) const {
return (i == 0) ? Val : 0;
}
inline Value *getOperand(unsigned i) { return (i == 0) ? Val : 0; }
virtual bool setOperand(unsigned i, Value *Val);
virtual unsigned getNumOperands() const { return Val != 0; }
// Additionally, they must provide a method to get at the successors of this
// terminator instruction. If 'idx' is out of range, a null pointer shall be
// returned.
//
virtual const BasicBlock *getSuccessor(unsigned idx) const { return 0; }
virtual unsigned getNumSuccessors() const { return 0; }
};
//===---------------------------------------------------------------------------
// BranchInst - Conditional or Unconditional Branch instruction.
//
class BranchInst : public TerminatorInst {
BasicBlockUse TrueDest, FalseDest;
Use Condition;
BranchInst(const BranchInst &BI);
public:
// If cond = null, then is an unconditional br...
BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse = 0, Value *cond = 0);
inline ~BranchInst() { dropAllReferences(); }
virtual Instruction *clone() const { return new BranchInst(*this); }
virtual void dropAllReferences();
inline bool isUnconditional() const {
return Condition == 0 || !FalseDest;
}
virtual string getOpcode() const { return "br"; }
inline Value *getOperand(unsigned i) {
return (Value*)((const BranchInst *)this)->getOperand(i);
}
virtual const Value *getOperand(unsigned i) const;
virtual bool setOperand(unsigned i, Value *Val);
virtual unsigned getNumOperands() const { return isUnconditional() ? 1 : 3; }
// Additionally, they must provide a method to get at the successors of this
// terminator instruction. If 'idx' is out of range, a null pointer shall be
// returned.
//
virtual const BasicBlock *getSuccessor(unsigned idx) const;
virtual unsigned getNumSuccessors() const { return 1+!isUnconditional(); }
};
//===---------------------------------------------------------------------------
// SwitchInst - Multiway switch
//
class SwitchInst : public TerminatorInst {
public:
typedef pair<ConstPoolUse, BasicBlockUse> dest_value;
private:
BasicBlockUse DefaultDest;
Use Val;
vector<dest_value> Destinations;
SwitchInst(const SwitchInst &RI);
public:
typedef vector<dest_value>::iterator dest_iterator;
typedef vector<dest_value>::const_iterator dest_const_iterator;
SwitchInst(Value *Value, BasicBlock *Default);
inline ~SwitchInst() { dropAllReferences(); }
virtual Instruction *clone() const { return new SwitchInst(*this); }
void dest_push_back(ConstPoolVal *OnVal, BasicBlock *Dest);
virtual string getOpcode() const { return "switch"; }
inline Value *getOperand(unsigned i) {
return (Value*)((const SwitchInst*)this)->getOperand(i);
}
virtual const Value *getOperand(unsigned i) const;
virtual bool setOperand(unsigned i, Value *Val);
virtual unsigned getNumOperands() const;
virtual void dropAllReferences();
// Additionally, they must provide a method to get at the successors of this
// terminator instruction. If 'idx' is out of range, a null pointer shall be
// returned.
//
virtual const BasicBlock *getSuccessor(unsigned idx) const;
virtual unsigned getNumSuccessors() const { return 1+Destinations.size(); }
};
#endif

19
include/llvm/iUnary.h Normal file
View File

@ -0,0 +1,19 @@
//===-- llvm/iUnary.h - Unary Operator node definitions ----------*- C++ -*--=//
//
// This file contains the declarations of all of the Unary Operator classes.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_IUNARY_H
#define LLVM_IUNARY_H
#include "llvm/InstrTypes.h"
//===----------------------------------------------------------------------===//
// Classes to represent Unary operators
//===----------------------------------------------------------------------===//
//
// All of these classes are subclasses of the UnaryOperator class...
//
#endif

7
lib/Analysis/Makefile Normal file
View File

@ -0,0 +1,7 @@
LEVEL = ../..
LIBRARYNAME = analysis
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,150 @@
//===-- llvm/Analysis/ModuleAnalyzer.cpp - Module analysis driver ----------==//
//
// This class provides a nice interface to traverse a module in a predictable
// way. This is used by the AssemblyWriter, BytecodeWriter, and SlotCalculator
// to do analysis of a module.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ModuleAnalyzer.h"
#include "llvm/ConstantPool.h"
#include "llvm/Method.h"
#include "llvm/Module.h"
#include "llvm/BasicBlock.h"
#include "llvm/DerivedTypes.h"
#include "llvm/ConstPoolVals.h"
#include <map>
// processModule - Driver function to call all of my subclasses virtual methods.
//
bool ModuleAnalyzer::processModule(const Module *M) {
// Loop over the constant pool, process all of the constants...
if (processConstPool(M->getConstantPool(), false))
return true;
return processMethods(M);
}
inline bool ModuleAnalyzer::handleType(set<const Type *> &TypeSet,
const Type *T) {
if (!T->isDerivedType()) return false; // Boring boring types...
if (TypeSet.count(T) != 0) return false; // Already found this type...
TypeSet.insert(T); // Add it to the set
// Recursively process interesting types...
switch (T->getPrimitiveID()) {
case Type::MethodTyID: {
const MethodType *MT = (const MethodType *)T;
if (handleType(TypeSet, MT->getReturnType())) return true;
const MethodType::ParamTypes &Params = MT->getParamTypes();
for (MethodType::ParamTypes::const_iterator I = Params.begin();
I != Params.end(); ++I)
if (handleType(TypeSet, *I)) return true;
break;
}
case Type::ArrayTyID:
if (handleType(TypeSet, ((const ArrayType *)T)->getElementType()))
return true;
break;
case Type::StructTyID: {
const StructType *ST = (const StructType*)T;
const StructType::ElementTypes &Elements = ST->getElementTypes();
for (StructType::ElementTypes::const_iterator I = Elements.begin();
I != Elements.end(); ++I)
if (handleType(TypeSet, *I)) return true;
break;
}
case Type::PointerTyID:
if (handleType(TypeSet, ((const PointerType *)T)->getValueType()))
return true;
break;
default:
cerr << "ModuleAnalyzer::handleType, type unknown: '"
<< T->getName() << "'\n";
break;
}
return processType(T);
}
bool ModuleAnalyzer::processConstPool(const ConstantPool &CP, bool isMethod) {
// TypeSet - Keep track of which types have already been processType'ed. We
// don't want to reprocess the same type more than once.
//
set<const Type *> TypeSet;
for (ConstantPool::plane_const_iterator PI = CP.begin();
PI != CP.end(); ++PI) {
const ConstantPool::PlaneType &Plane = **PI;
if (Plane.empty()) continue; // Skip empty type planes...
if (processConstPoolPlane(CP, Plane, isMethod)) return true;
for (ConstantPool::PlaneType::const_iterator CI = Plane.begin();
CI != Plane.end(); CI++) {
if ((*CI)->getType() == Type::TypeTy)
if (handleType(TypeSet, ((const ConstPoolType*)(*CI))->getValue()))
return true;
if (handleType(TypeSet, (*CI)->getType())) return true;
if (processConstant(*CI)) return true;
}
}
if (!isMethod) {
assert(CP.getParent()->getValueType() == Value::ModuleVal);
const Module *M = (const Module*)CP.getParent();
// Process the method types after the constant pool...
for (Module::MethodListType::const_iterator I = M->getMethodList().begin();
I != M->getMethodList().end(); I++) {
if (handleType(TypeSet, (*I)->getType())) return true;
if (visitMethod(*I)) return true;
}
}
return false;
}
bool ModuleAnalyzer::processMethods(const Module *M) {
for (Module::MethodListType::const_iterator I = M->getMethodList().begin();
I != M->getMethodList().end(); I++)
if (processMethod(*I)) return true;
return false;
}
bool ModuleAnalyzer::processMethod(const Method *M) {
// Loop over the arguments, processing them...
const Method::ArgumentListType &ArgList = M->getArgumentList();
for (Method::ArgumentListType::const_iterator AI = ArgList.begin();
AI != ArgList.end(); AI++)
if (processMethodArgument(*AI)) return true;
// Loop over the constant pool, adding the constants to the table...
processConstPool(M->getConstantPool(), true);
// Loop over all the basic blocks, in order...
Method::BasicBlocksType::const_iterator BBI = M->getBasicBlocks().begin();
for (; BBI != M->getBasicBlocks().end(); BBI++)
if (processBasicBlock(*BBI)) return true;
return false;
}
bool ModuleAnalyzer::processBasicBlock(const BasicBlock *BB) {
// Process all of the instructions in the basic block
BasicBlock::InstListType::const_iterator Inst = BB->getInstList().begin();
for (; Inst != BB->getInstList().end(); Inst++) {
if (preProcessInstruction(*Inst) || processInstruction(*Inst)) return true;
}
return false;
}
bool ModuleAnalyzer::preProcessInstruction(const Instruction *I) {
return false;
}

2058
lib/AsmParser/Lexer.cpp Normal file

File diff suppressed because it is too large Load Diff

184
lib/AsmParser/Lexer.l Normal file
View File

@ -0,0 +1,184 @@
/*===-- Lexer.l - Scanner for llvm assembly files ----------------*- C++ -*--=//
//
// This file implements the flex scanner for LLVM assembly languages files.
//
//===------------------------------------------------------------------------=*/
%option prefix="llvmAsm"
%option yylineno
%option nostdinit
%option never-interactive
%option batch
%option noyywrap
%option nodefault
%option 8bit
%option outfile="Lexer.cpp"
%option ecs
%option noreject
%option noyymore
%{
#include "ParserInternals.h"
#include "llvm/BasicBlock.h"
#include "llvm/Method.h"
#include "llvm/Module.h"
#include <list>
#include "llvmAsmParser.h"
#define RET_TOK(type, Enum, sym) \
llvmAsmlval.type = Instruction::Enum; return sym
// TODO: All of the static identifiers are figured out by the lexer,
// these should be hashed.
// atoull - Convert an ascii string of decimal digits into the unsigned long
// long representation... this does not have to do input error checking,
// because we know that the input will be matched by a suitable regex...
//
uint64_t atoull(const char *Buffer) {
uint64_t Result = 0;
for (; *Buffer; Buffer++) {
uint64_t OldRes = Result;
Result *= 10;
Result += *Buffer-'0';
if (Result < OldRes) { // Uh, oh, overflow detected!!!
ThrowException("constant bigger than 64 bits detected!");
}
}
return Result;
}
#define YY_NEVER_INTERACTIVE 1
%}
/* Comments start with a ; and go till end of line */
Comment ;.*
/* Variable(Def) identifiers start with a % sign */
VarID %[a-zA-Z$._][a-zA-Z$._0-9]*
/* Label identifiers end with a colon */
Label [a-zA-Z$._0-9]+:
/* Quoted names can contain any character except " and \ */
StringConstant \"[^\"]+\"
/* [PN]Integer: match positive and negative literal integer values that
* are preceeded by a '%' character. These represent unnamed variable slots.
*/
EPInteger %[0-9]+
ENInteger %-[0-9]+
/* E[PN]Integer: match positive and negative literal integer values */
PInteger [0-9]+
NInteger -[0-9]+
%%
{Comment} { /* Ignore comments for now */ }
begin { return BEGINTOK; }
end { return END; }
true { return TRUE; }
false { return FALSE; }
declare { return DECLARE; }
implementation { return IMPLEMENTATION; }
- { cerr << "deprecated argument '-' used!\n"; return '-'; }
bb { cerr << "deprecated type 'bb' used!\n"; llvmAsmlval.TypeVal = Type::LabelTy; return LABEL;}
void { llvmAsmlval.TypeVal = Type::VoidTy ; return VOID; }
bool { llvmAsmlval.TypeVal = Type::BoolTy ; return BOOL; }
sbyte { llvmAsmlval.TypeVal = Type::SByteTy ; return SBYTE; }
ubyte { llvmAsmlval.TypeVal = Type::UByteTy ; return UBYTE; }
short { llvmAsmlval.TypeVal = Type::ShortTy ; return SHORT; }
ushort { llvmAsmlval.TypeVal = Type::UShortTy; return USHORT; }
int { llvmAsmlval.TypeVal = Type::IntTy ; return INT; }
uint { llvmAsmlval.TypeVal = Type::UIntTy ; return UINT; }
long { llvmAsmlval.TypeVal = Type::LongTy ; return LONG; }
ulong { llvmAsmlval.TypeVal = Type::ULongTy ; return ULONG; }
float { llvmAsmlval.TypeVal = Type::FloatTy ; return FLOAT; }
double { llvmAsmlval.TypeVal = Type::DoubleTy; return DOUBLE; }
type { llvmAsmlval.TypeVal = Type::TypeTy ; return TYPE; }
label { llvmAsmlval.TypeVal = Type::LabelTy ; return LABEL; }
neg { RET_TOK(UnaryOpVal, Neg, NEG); }
not { RET_TOK(UnaryOpVal, Not, NOT); }
phi { return PHI; }
call { return CALL; }
add { RET_TOK(BinaryOpVal, Add, ADD); }
sub { RET_TOK(BinaryOpVal, Sub, SUB); }
mul { RET_TOK(BinaryOpVal, Mul, MUL); }
div { RET_TOK(BinaryOpVal, Div, DIV); }
rem { RET_TOK(BinaryOpVal, Rem, REM); }
setne { RET_TOK(BinaryOpVal, SetNE, SETNE); }
seteq { RET_TOK(BinaryOpVal, SetEQ, SETEQ); }
setlt { RET_TOK(BinaryOpVal, SetLT, SETLT); }
setgt { RET_TOK(BinaryOpVal, SetGT, SETGT); }
setle { RET_TOK(BinaryOpVal, SetLE, SETLE); }
setge { RET_TOK(BinaryOpVal, SetGE, SETGE); }
ret { RET_TOK(TermOpVal, Ret, RET); }
br { RET_TOK(TermOpVal, Br, BR); }
switch { RET_TOK(TermOpVal, Switch, SWITCH); }
malloc { RET_TOK(MemOpVal, Malloc, MALLOC); }
alloca { RET_TOK(MemOpVal, Alloca, ALLOCA); }
free { RET_TOK(MemOpVal, Free, FREE); }
load { RET_TOK(MemOpVal, Load, LOAD); }
store { RET_TOK(MemOpVal, Store, STORE); }
getfield { RET_TOK(MemOpVal, GetField, GETFIELD); }
putfield { RET_TOK(MemOpVal, PutField, PUTFIELD); }
{VarID} { llvmAsmlval.StrVal = strdup(yytext+1); return VAR_ID; }
{Label} {
yytext[strlen(yytext)-1] = 0; // nuke colon
llvmAsmlval.StrVal = strdup(yytext);
return LABELSTR;
}
{StringConstant} {
yytext[strlen(yytext)-1] = 0; // nuke end quote
llvmAsmlval.StrVal = strdup(yytext+1); // Nuke start quote
return STRINGCONSTANT;
}
{PInteger} { llvmAsmlval.UInt64Val = atoull(yytext); return EUINT64VAL; }
{NInteger} {
uint64_t Val = atoull(yytext+1);
// +1: we have bigger negative range
if (Val > (uint64_t)INT64_MAX+1)
ThrowException("Constant too large for signed 64 bits!");
llvmAsmlval.SInt64Val = -Val;
return ESINT64VAL;
}
{EPInteger} { llvmAsmlval.UIntVal = atoull(yytext+1); return UINTVAL; }
{ENInteger} {
uint64_t Val = atoull(yytext+2);
// +1: we have bigger negative range
if (Val > (uint64_t)INT32_MAX+1)
ThrowException("Constant too large for signed 32 bits!");
llvmAsmlval.SIntVal = -Val;
return SINTVAL;
}
[ \t\n] { /* Ignore whitespace */ }
. { /*printf("'%s'", yytext);*/ return yytext[0]; }
%%

7
lib/AsmParser/Makefile Normal file
View File

@ -0,0 +1,7 @@
LEVEL = ../../..
LIBRARYNAME = asmparser
include $(LEVEL)/Makefile.common

84
lib/AsmParser/Parser.cpp Normal file
View File

@ -0,0 +1,84 @@
//===- Parser.cpp - Main dispatch module for the Parser library -------------===
//
// This library implements the functionality defined in llvm/assembly/parser.h
//
//===------------------------------------------------------------------------===
#include "llvm/Analysis/Verifier.h"
#include "llvm/Module.h"
#include "ParserInternals.h"
#include <stdio.h> // for sprintf
// The useful interface defined by this file... Parse an ascii file, and return
// the internal representation in a nice slice'n'dice'able representation.
//
Module *ParseAssemblyFile(const ToolCommandLine &Opts) throw (ParseException) {
FILE *F = stdin;
if (Opts.getInputFilename() != "-")
F = fopen(Opts.getInputFilename().c_str(), "r");
if (F == 0) {
throw ParseException(Opts, string("Could not open file '") +
Opts.getInputFilename() + "'");
}
// TODO: If this throws an exception, F is not closed.
Module *Result = RunVMAsmParser(Opts, F);
if (F != stdin)
fclose(F);
if (Result) { // Check to see that it is valid...
vector<string> Errors;
if (verify(Result, Errors)) {
delete Result; Result = 0;
string Message;
for (unsigned i = 0; i < Errors.size(); i++)
Message += Errors[i] + "\n";
throw ParseException(Opts, Message);
}
}
return Result;
}
//===------------------------------------------------------------------------===
// ParseException Class
//===------------------------------------------------------------------------===
ParseException::ParseException(const ToolCommandLine &opts,
const string &message, int lineNo, int colNo)
: Opts(opts), Message(message) {
LineNo = lineNo; ColumnNo = colNo;
}
ParseException::ParseException(const ParseException &E)
: Opts(E.Opts), Message(E.Message) {
LineNo = E.LineNo;
ColumnNo = E.ColumnNo;
}
const string ParseException::getMessage() const { // Includes info from options
string Result;
char Buffer[10];
if (Opts.getInputFilename() == "-")
Result += "<stdin>";
else
Result += Opts.getInputFilename();
if (LineNo != -1) {
sprintf(Buffer, "%d", LineNo);
Result += string(":") + Buffer;
if (ColumnNo != -1) {
sprintf(Buffer, "%d", ColumnNo);
Result += string(",") + Buffer;
}
}
return Result + ": " + Message;
}

View File

@ -0,0 +1,159 @@
//===-- ParserInternals.h - Definitions internal to the parser ---*- C++ -*--=//
//
// This header file defines the various variables that are shared among the
// different components of the parser...
//
//===----------------------------------------------------------------------===//
#ifndef PARSER_INTERNALS_H
#define PARSER_INTERNALS_H
#include <stdio.h>
#define __STDC_LIMIT_MACROS
#include "llvm/InstrTypes.h"
#include "llvm/BasicBlock.h"
#include "llvm/ConstPoolVals.h"
#include "llvm/iOther.h"
#include "llvm/Method.h"
#include "llvm/Type.h"
#include "llvm/Assembly/Parser.h"
#include "llvm/Tools/CommandLine.h"
#include "llvm/Tools/StringExtras.h"
class Module;
// Global variables exported from the lexer...
extern FILE *llvmAsmin;
extern int llvmAsmlineno;
// Globals exported by the parser...
extern const ToolCommandLine *CurOptions;
Module *RunVMAsmParser(const ToolCommandLine &Opts, FILE *F);
// ThrowException - Wrapper around the ParseException class that automatically
// fills in file line number and column number and options info.
//
// This also helps me because I keep typing 'throw new ParseException' instead
// of just 'throw ParseException'... sigh...
//
static inline void ThrowException(const string &message) {
// TODO: column number in exception
throw ParseException(*CurOptions, message, llvmAsmlineno);
}
// ValID - Represents a reference of a definition of some sort. This may either
// be a numeric reference or a symbolic (%var) reference. This is just a
// discriminated union.
//
// Note that I can't implement this class in a straight forward manner with
// constructors and stuff because it goes in a union, and GCC doesn't like
// putting classes with ctor's in unions. :(
//
struct ValID {
int Type; // 0 = number, 1 = name, 2 = const pool,
// 3 = unsigned const pool, 4 = const string
union {
int Num; // If it's a numeric reference
char *Name; // If it's a named reference. Memory must be free'd.
int64_t ConstPool64; // Constant pool reference. This is the value
uint64_t UConstPool64;// Unsigned constant pool reference.
};
static ValID create(int Num) {
ValID D; D.Type = 0; D.Num = Num; return D;
}
static ValID create(char *Name) {
ValID D; D.Type = 1; D.Name = Name; return D;
}
static ValID create(int64_t Val) {
ValID D; D.Type = 2; D.ConstPool64 = Val; return D;
}
static ValID create(uint64_t Val) {
ValID D; D.Type = 3; D.UConstPool64 = Val; return D;
}
static ValID create_conststr(char *Name) {
ValID D; D.Type = 4; D.Name = Name; return D;
}
inline void destroy() {
if (Type == 1 || Type == 4) free(Name); // Free this strdup'd memory...
}
inline ValID copy() const {
if (Type != 1 && Type != 4) return *this;
ValID Result = *this;
Result.Name = strdup(Name);
return Result;
}
inline string getName() const {
switch (Type) {
case 0: return string("#") + itostr(Num);
case 1: return Name;
case 4: return string("\"") + Name + string("\"");
default: return string("%") + itostr(ConstPool64);
}
}
};
template<class SuperType>
class PlaceholderDef : public SuperType {
ValID D;
// TODO: Placeholder def should hold Line #/Column # of definition in case
// there is an error resolving the defintition!
public:
PlaceholderDef(const Type *Ty, const ValID &d) : SuperType(Ty), D(d) {}
ValID &getDef() { return D; }
};
struct InstPlaceHolderHelper : public Instruction {
InstPlaceHolderHelper(const Type *Ty) : Instruction(Ty, UserOp1, "") {}
virtual Instruction *clone() const { abort(); }
inline virtual void dropAllReferences() {}
virtual string getOpcode() const { return "placeholder"; }
// No "operands"...
virtual Value *getOperand(unsigned i) { return 0; }
virtual const Value *getOperand(unsigned i) const { return 0; }
virtual bool setOperand(unsigned i, Value *Val) { return false; }
virtual unsigned getNumOperands() const { return 0; }
};
struct BBPlaceHolderHelper : public BasicBlock {
BBPlaceHolderHelper(const Type *Ty) : BasicBlock() {
assert(Ty->isLabelType());
}
};
struct MethPlaceHolderHelper : public Method {
MethPlaceHolderHelper(const Type *Ty)
: Method((const MethodType*)Ty) {
assert(Ty->isMethodType() && "Method placeholders must be method types!");
}
};
typedef PlaceholderDef<InstPlaceHolderHelper> DefPlaceHolder;
typedef PlaceholderDef<BBPlaceHolderHelper> BBPlaceHolder;
typedef PlaceholderDef<MethPlaceHolderHelper> MethPlaceHolder;
//typedef PlaceholderDef<ModulePlaceHolderHelper> ModulePlaceHolder;
static inline ValID &getValIDFromPlaceHolder(Value *Def) {
switch (Def->getType()->getPrimitiveID()) {
case Type::LabelTyID: return ((BBPlaceHolder*)Def)->getDef();
case Type::MethodTyID: return ((MethPlaceHolder*)Def)->getDef();
//case Type::ModuleTyID: return ((ModulePlaceHolder*)Def)->getDef();
default: return ((DefPlaceHolder*)Def)->getDef();
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,87 @@
typedef union {
Module *ModuleVal;
Method *MethodVal;
MethodArgument *MethArgVal;
BasicBlock *BasicBlockVal;
TerminatorInst *TermInstVal;
Instruction *InstVal;
ConstPoolVal *ConstVal;
const Type *TypeVal;
list<MethodArgument*> *MethodArgList;
list<Value*> *ValueList;
list<const Type*> *TypeList;
list<pair<ConstPoolVal*, BasicBlock*> > *JumpTable;
vector<ConstPoolVal*> *ConstVector;
int64_t SInt64Val;
uint64_t UInt64Val;
int SIntVal;
unsigned UIntVal;
char *StrVal; // This memory is allocated by strdup!
ValID ValIDVal; // May contain memory allocated by strdup
Instruction::UnaryOps UnaryOpVal;
Instruction::BinaryOps BinaryOpVal;
Instruction::TermOps TermOpVal;
Instruction::MemoryOps MemOpVal;
} YYSTYPE;
#define ESINT64VAL 257
#define EUINT64VAL 258
#define SINTVAL 259
#define UINTVAL 260
#define VOID 261
#define BOOL 262
#define SBYTE 263
#define UBYTE 264
#define SHORT 265
#define USHORT 266
#define INT 267
#define UINT 268
#define LONG 269
#define ULONG 270
#define FLOAT 271
#define DOUBLE 272
#define STRING 273
#define TYPE 274
#define LABEL 275
#define VAR_ID 276
#define LABELSTR 277
#define STRINGCONSTANT 278
#define IMPLEMENTATION 279
#define TRUE 280
#define FALSE 281
#define BEGINTOK 282
#define END 283
#define DECLARE 284
#define PHI 285
#define CALL 286
#define RET 287
#define BR 288
#define SWITCH 289
#define NEG 290
#define NOT 291
#define TOINT 292
#define TOUINT 293
#define ADD 294
#define SUB 295
#define MUL 296
#define DIV 297
#define REM 298
#define SETLE 299
#define SETGE 300
#define SETLT 301
#define SETGT 302
#define SETEQ 303
#define SETNE 304
#define MALLOC 305
#define ALLOCA 306
#define FREE 307
#define LOAD 308
#define STORE 309
#define GETFIELD 310
#define PUTFIELD 311
extern YYSTYPE llvmAsmlval;

View File

@ -0,0 +1,954 @@
//===-- llvmAsmParser.y - Parser for llvm assembly files ---------*- C++ -*--=//
//
// This file implements the bison parser for LLVM assembly languages files.
//
//===------------------------------------------------------------------------=//
//
// TODO: Parse comments and add them to an internal node... so that they may
// be saved in the bytecode format as well as everything else. Very important
// for a general IR format.
//
%{
#include "ParserInternals.h"
#include "llvm/BasicBlock.h"
#include "llvm/Method.h"
#include "llvm/SymbolTable.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Assembly/Parser.h"
#include "llvm/ConstantPool.h"
#include "llvm/iTerminators.h"
#include "llvm/iMemory.h"
#include <list>
#include <utility> // Get definition of pair class
#include <stdio.h> // This embarasment is due to our flex lexer...
int yyerror(char *ErrorMsg); // Forward declarations to prevent "implicit
int yylex(); // declaration" of xxx warnings.
int yyparse();
static Module *ParserResult;
const ToolCommandLine *CurOptions = 0;
// This contains info used when building the body of a method. It is destroyed
// when the method is completed.
//
typedef vector<Value *> ValueList; // Numbered defs
static void ResolveDefinitions(vector<ValueList> &LateResolvers);
static struct PerModuleInfo {
Module *CurrentModule;
vector<ValueList> Values; // Module level numbered definitions
vector<ValueList> LateResolveValues;
void ModuleDone() {
// If we could not resolve some blocks at parsing time (forward branches)
// resolve the branches now...
ResolveDefinitions(LateResolveValues);
Values.clear(); // Clear out method local definitions
CurrentModule = 0;
}
} CurModule;
static struct PerMethodInfo {
Method *CurrentMethod; // Pointer to current method being created
vector<ValueList> Values; // Keep track of numbered definitions
vector<ValueList> LateResolveValues;
inline PerMethodInfo() {
CurrentMethod = 0;
}
inline ~PerMethodInfo() {}
inline void MethodStart(Method *M) {
CurrentMethod = M;
}
void MethodDone() {
// If we could not resolve some blocks at parsing time (forward branches)
// resolve the branches now...
ResolveDefinitions(LateResolveValues);
Values.clear(); // Clear out method local definitions
CurrentMethod = 0;
}
} CurMeth; // Info for the current method...
//===----------------------------------------------------------------------===//
// Code to handle definitions of all the types
//===----------------------------------------------------------------------===//
static void InsertValue(Value *D, vector<ValueList> &ValueTab = CurMeth.Values) {
if (!D->hasName()) { // Is this a numbered definition?
unsigned type = D->getType()->getUniqueID();
if (ValueTab.size() <= type)
ValueTab.resize(type+1, ValueList());
//printf("Values[%d][%d] = %d\n", type, ValueTab[type].size(), D);
ValueTab[type].push_back(D);
}
}
static Value *getVal(const Type *Type, ValID &D,
bool DoNotImprovise = false) {
switch (D.Type) {
case 0: { // Is it a numbered definition?
unsigned type = Type->getUniqueID();
unsigned Num = (unsigned)D.Num;
// Module constants occupy the lowest numbered slots...
if (type < CurModule.Values.size()) {
if (Num < CurModule.Values[type].size())
return CurModule.Values[type][Num];
Num -= CurModule.Values[type].size();
}
// Make sure that our type is within bounds
if (CurMeth.Values.size() <= type)
break;
// Check that the number is within bounds...
if (CurMeth.Values[type].size() <= Num)
break;
return CurMeth.Values[type][Num];
}
case 1: { // Is it a named definition?
string Name(D.Name);
SymbolTable *SymTab = 0;
if (CurMeth.CurrentMethod)
SymTab = CurMeth.CurrentMethod->getSymbolTable();
Value *N = SymTab ? SymTab->lookup(Type, Name) : 0;
if (N == 0) {
SymTab = CurModule.CurrentModule->getSymbolTable();
if (SymTab)
N = SymTab->lookup(Type, Name);
if (N == 0) break;
}
D.destroy(); // Free old strdup'd memory...
return N;
}
case 2: // Is it a constant pool reference??
case 3: // Is it an unsigned const pool reference?
case 4:{ // Is it a string const pool reference?
ConstPoolVal *CPV = 0;
// Check to make sure that "Type" is an integral type, and that our
// value will fit into the specified type...
switch (D.Type) {
case 2:
if (Type == Type::BoolTy) { // Special handling for boolean data
CPV = new ConstPoolBool(D.ConstPool64 != 0);
} else {
if (!ConstPoolSInt::isValueValidForType(Type, D.ConstPool64))
ThrowException("Symbolic constant pool reference is invalid!");
CPV = new ConstPoolSInt(Type, D.ConstPool64);
}
break;
case 3:
if (!ConstPoolUInt::isValueValidForType(Type, D.UConstPool64)) {
if (!ConstPoolSInt::isValueValidForType(Type, D.ConstPool64)) {
ThrowException("Symbolic constant pool reference is invalid!");
} else { // This is really a signed reference. Transmogrify.
CPV = new ConstPoolSInt(Type, D.ConstPool64);
}
} else {
CPV = new ConstPoolUInt(Type, D.UConstPool64);
}
break;
case 4:
cerr << "FIXME: TODO: String constants [sbyte] not implemented yet!\n";
abort();
//CPV = new ConstPoolString(D.Name);
D.destroy(); // Free the string memory
break;
}
assert(CPV && "How did we escape creating a constant??");
// Scan through the constant table and see if we already have loaded this
// constant.
//
ConstantPool &CP = CurMeth.CurrentMethod ?
CurMeth.CurrentMethod->getConstantPool() :
CurModule.CurrentModule->getConstantPool();
ConstPoolVal *C = CP.find(CPV); // Already have this constant?
if (C) {
delete CPV; // Didn't need this after all, oh well.
return C; // Yup, we already have one, recycle it!
}
CP.insert(CPV);
// Success, everything is kosher. Lets go!
return CPV;
} // End of case 2,3,4
} // End of switch
// If we reached here, we referenced either a symbol that we don't know about
// or an id number that hasn't been read yet. We may be referencing something
// forward, so just create an entry to be resolved later and get to it...
//
if (DoNotImprovise) return 0; // Do we just want a null to be returned?
// TODO: Attempt to coallecse nodes that are the same with previous ones.
Value *d = 0;
switch (Type->getPrimitiveID()) {
case Type::LabelTyID: d = new BBPlaceHolder(Type, D); break;
case Type::MethodTyID:
d = new MethPlaceHolder(Type, D);
InsertValue(d, CurModule.LateResolveValues);
return d;
//case Type::ClassTyID: d = new ClassPlaceHolder(Type, D); break;
default: d = new DefPlaceHolder(Type, D); break;
}
assert(d != 0 && "How did we not make something?");
InsertValue(d, CurMeth.LateResolveValues);
return d;
}
//===----------------------------------------------------------------------===//
// Code to handle forward references in instructions
//===----------------------------------------------------------------------===//
//
// This code handles the late binding needed with statements that reference
// values not defined yet... for example, a forward branch, or the PHI node for
// a loop body.
//
// This keeps a table (CurMeth.LateResolveValues) of all such forward references
// and back patchs after we are done.
//
// ResolveDefinitions - If we could not resolve some defs at parsing
// time (forward branches, phi functions for loops, etc...) resolve the
// defs now...
//
static void ResolveDefinitions(vector<ValueList> &LateResolvers) {
// Loop over LateResolveDefs fixing up stuff that couldn't be resolved
for (unsigned ty = 0; ty < LateResolvers.size(); ty++) {
while (!LateResolvers[ty].empty()) {
Value *V = LateResolvers[ty].back();
LateResolvers[ty].pop_back();
ValID &DID = getValIDFromPlaceHolder(V);
Value *TheRealValue = getVal(Type::getUniqueIDType(ty), DID, true);
if (TheRealValue == 0 && DID.Type == 1)
ThrowException("Reference to an invalid definition: '" +DID.getName() +
"' of type '" + V->getType()->getName() + "'");
else if (TheRealValue == 0)
ThrowException("Reference to an invalid definition: #" +itostr(DID.Num)+
" of type '" + V->getType()->getName() + "'");
V->replaceAllUsesWith(TheRealValue);
assert(V->use_empty());
delete V;
}
}
LateResolvers.clear();
}
// addConstValToConstantPool - This code is used to insert a constant into the
// current constant pool. This is designed to make maximal (but not more than
// possible) reuse (merging) of constants in the constant pool. This means that
// multiple references to %4, for example will all get merged.
//
static ConstPoolVal *addConstValToConstantPool(ConstPoolVal *C) {
vector<ValueList> &ValTab = CurMeth.CurrentMethod ?
CurMeth.Values : CurModule.Values;
ConstantPool &CP = CurMeth.CurrentMethod ?
CurMeth.CurrentMethod->getConstantPool() :
CurModule.CurrentModule->getConstantPool();
if (ConstPoolVal *CPV = CP.find(C)) {
// Constant already in constant pool. Try to merge the two constants
if (CPV->hasName() && !C->hasName()) {
// Merge the two values, we inherit the existing CPV's name.
// InsertValue requires that the value have no name to insert correctly
// (because we want to fill the slot this constant would have filled)
//
string Name = CPV->getName();
CPV->setName("");
InsertValue(CPV, ValTab);
CPV->setName(Name);
delete C;
return CPV;
} else if (!CPV->hasName() && C->hasName()) {
// If we have a name on this value and there isn't one in the const
// pool val already, propogate it.
//
CPV->setName(C->getName());
delete C; // Sorry, you're toast
return CPV;
} else if (CPV->hasName() && C->hasName()) {
// Both values have distinct names. We cannot merge them.
CP.insert(C);
InsertValue(C, ValTab);
return C;
} else if (!CPV->hasName() && !C->hasName()) {
// Neither value has a name, trivially merge them.
InsertValue(CPV, ValTab);
delete C;
return CPV;
}
assert(0 && "Not reached!");
return 0;
} else { // No duplication of value.
CP.insert(C);
InsertValue(C, ValTab);
return C;
}
}
//===----------------------------------------------------------------------===//
// RunVMAsmParser - Define an interface to this parser
//===----------------------------------------------------------------------===//
//
Module *RunVMAsmParser(const ToolCommandLine &Opts, FILE *F) {
llvmAsmin = F;
CurOptions = &Opts;
llvmAsmlineno = 1; // Reset the current line number...
CurModule.CurrentModule = new Module(); // Allocate a new module to read
yyparse(); // Parse the file.
Module *Result = ParserResult;
CurOptions = 0;
llvmAsmin = stdin; // F is about to go away, don't use it anymore...
ParserResult = 0;
return Result;
}
%}
%union {
Module *ModuleVal;
Method *MethodVal;
MethodArgument *MethArgVal;
BasicBlock *BasicBlockVal;
TerminatorInst *TermInstVal;
Instruction *InstVal;
ConstPoolVal *ConstVal;
const Type *TypeVal;
list<MethodArgument*> *MethodArgList;
list<Value*> *ValueList;
list<const Type*> *TypeList;
list<pair<ConstPoolVal*, BasicBlock*> > *JumpTable;
vector<ConstPoolVal*> *ConstVector;
int64_t SInt64Val;
uint64_t UInt64Val;
int SIntVal;
unsigned UIntVal;
char *StrVal; // This memory is allocated by strdup!
ValID ValIDVal; // May contain memory allocated by strdup
Instruction::UnaryOps UnaryOpVal;
Instruction::BinaryOps BinaryOpVal;
Instruction::TermOps TermOpVal;
Instruction::MemoryOps MemOpVal;
}
%type <ModuleVal> Module MethodList
%type <MethodVal> Method MethodHeader BasicBlockList
%type <BasicBlockVal> BasicBlock InstructionList
%type <TermInstVal> BBTerminatorInst
%type <InstVal> Inst InstVal MemoryInst
%type <ConstVal> ConstVal
%type <ConstVector> ConstVector
%type <MethodArgList> ArgList ArgListH
%type <MethArgVal> ArgVal
%type <ValueList> ValueRefList ValueRefListE
%type <TypeList> TypeList
%type <JumpTable> JumpTable
%type <ValIDVal> ValueRef ConstValueRef // Reference to a definition or BB
// Tokens and types for handling constant integer values
//
// ESINT64VAL - A negative number within long long range
%token <SInt64Val> ESINT64VAL
// EUINT64VAL - A positive number within uns. long long range
%token <UInt64Val> EUINT64VAL
%type <SInt64Val> EINT64VAL
%token <SIntVal> SINTVAL // Signed 32 bit ints...
%token <UIntVal> UINTVAL // Unsigned 32 bit ints...
%type <SIntVal> INTVAL
// Built in types...
%type <TypeVal> Types TypesV SIntType UIntType IntType
%token <TypeVal> VOID BOOL SBYTE UBYTE SHORT USHORT INT UINT LONG ULONG
%token <TypeVal> FLOAT DOUBLE STRING TYPE LABEL
%token <StrVal> VAR_ID LABELSTR STRINGCONSTANT
%type <StrVal> OptVAR_ID OptAssign
%token IMPLEMENTATION TRUE FALSE BEGINTOK END DECLARE
%token PHI CALL
// Basic Block Terminating Operators
%token <TermOpVal> RET BR SWITCH
// Unary Operators
%type <UnaryOpVal> UnaryOps // all the unary operators
%token <UnaryOpVal> NEG NOT
// Unary Conversion Operators
%token <UnaryOpVal> TOINT TOUINT
// Binary Operators
%type <BinaryOpVal> BinaryOps // all the binary operators
%token <BinaryOpVal> ADD SUB MUL DIV REM
// Binary Comarators
%token <BinaryOpVal> SETLE SETGE SETLT SETGT SETEQ SETNE
// Memory Instructions
%token <MemoryOpVal> MALLOC ALLOCA FREE LOAD STORE GETFIELD PUTFIELD
%start Module
%%
// Handle constant integer size restriction and conversion...
//
INTVAL : SINTVAL
INTVAL : UINTVAL {
if ($1 > (uint32_t)INT32_MAX) // Outside of my range!
ThrowException("Value too large for type!");
$$ = (int32_t)$1;
}
EINT64VAL : ESINT64VAL // These have same type and can't cause problems...
EINT64VAL : EUINT64VAL {
if ($1 > (uint64_t)INT64_MAX) // Outside of my range!
ThrowException("Value too large for type!");
$$ = (int64_t)$1;
}
// Types includes all predefined types... except void, because you can't do
// anything with it except for certain specific things...
//
// User defined types are added latter...
//
Types : BOOL | SBYTE | UBYTE | SHORT | USHORT | INT | UINT
Types : LONG | ULONG | FLOAT | DOUBLE | STRING | TYPE | LABEL
// TypesV includes all of 'Types', but it also includes the void type.
TypesV : Types | VOID
// Operations that are notably excluded from this list include:
// RET, BR, & SWITCH because they end basic blocks and are treated specially.
//
UnaryOps : NEG | NOT | TOINT | TOUINT
BinaryOps : ADD | SUB | MUL | DIV | REM
BinaryOps : SETLE | SETGE | SETLT | SETGT | SETEQ | SETNE
// Valueine some types that allow classification if we only want a particular
// thing...
SIntType : LONG | INT | SHORT | SBYTE
UIntType : ULONG | UINT | USHORT | UBYTE
IntType : SIntType | UIntType
OptAssign : VAR_ID '=' {
$$ = $1;
}
| /*empty*/ {
$$ = 0;
}
ConstVal : SIntType EINT64VAL { // integral constants
if (!ConstPoolSInt::isValueValidForType($1, $2))
ThrowException("Constant value doesn't fit in type!");
$$ = new ConstPoolSInt($1, $2);
}
| UIntType EUINT64VAL { // integral constants
if (!ConstPoolUInt::isValueValidForType($1, $2))
ThrowException("Constant value doesn't fit in type!");
$$ = new ConstPoolUInt($1, $2);
}
| BOOL TRUE { // Boolean constants
$$ = new ConstPoolBool(true);
}
| BOOL FALSE { // Boolean constants
$$ = new ConstPoolBool(false);
}
| STRING STRINGCONSTANT { // String constants
cerr << "FIXME: TODO: String constants [sbyte] not implemented yet!\n";
abort();
//$$ = new ConstPoolString($2);
free($2);
}
| TYPE Types { // Type constants
$$ = new ConstPoolType($2);
}
| '[' Types ']' '[' ConstVector ']' { // Nonempty array constant
// Verify all elements are correct type!
const ArrayType *AT = ArrayType::getArrayType($2);
for (unsigned i = 0; i < $5->size(); i++) {
if ($2 != (*$5)[i]->getType())
ThrowException("Element #" + utostr(i) + " is not of type '" +
$2->getName() + "' as required!\nIt is of type '" +
(*$5)[i]->getType()->getName() + "'.");
}
$$ = new ConstPoolArray(AT, *$5);
delete $5;
}
| '[' Types ']' '[' ']' { // Empty array constant
vector<ConstPoolVal*> Empty;
$$ = new ConstPoolArray(ArrayType::getArrayType($2), Empty);
}
| '[' EUINT64VAL 'x' Types ']' '[' ConstVector ']' {
// Verify all elements are correct type!
const ArrayType *AT = ArrayType::getArrayType($4, (int)$2);
if ($2 != $7->size())
ThrowException("Type mismatch: constant sized array initialized with " +
utostr($7->size()) + " arguments, but has size of " +
itostr((int)$2) + "!");
for (unsigned i = 0; i < $7->size(); i++) {
if ($4 != (*$7)[i]->getType())
ThrowException("Element #" + utostr(i) + " is not of type '" +
$4->getName() + "' as required!\nIt is of type '" +
(*$7)[i]->getType()->getName() + "'.");
}
$$ = new ConstPoolArray(AT, *$7);
delete $7;
}
| '[' EUINT64VAL 'x' Types ']' '[' ']' {
if ($2 != 0)
ThrowException("Type mismatch: constant sized array initialized with 0"
" arguments, but has size of " + itostr((int)$2) + "!");
vector<ConstPoolVal*> Empty;
$$ = new ConstPoolArray(ArrayType::getArrayType($4, 0), Empty);
}
| '{' TypeList '}' '{' ConstVector '}' {
StructType::ElementTypes Types($2->begin(), $2->end());
delete $2;
const StructType *St = StructType::getStructType(Types);
$$ = new ConstPoolStruct(St, *$5);
delete $5;
}
| '{' '}' '{' '}' {
const StructType *St =
StructType::getStructType(StructType::ElementTypes());
vector<ConstPoolVal*> Empty;
$$ = new ConstPoolStruct(St, Empty);
}
/*
| Types '*' ConstVal {
assert(0);
$$ = 0;
}
*/
ConstVector : ConstVector ',' ConstVal {
($$ = $1)->push_back(addConstValToConstantPool($3));
}
| ConstVal {
$$ = new vector<ConstPoolVal*>();
$$->push_back(addConstValToConstantPool($1));
}
ConstPool : ConstPool OptAssign ConstVal {
if ($2) {
$3->setName($2);
free($2);
}
addConstValToConstantPool($3);
}
| /* empty: end of list */ {
}
//===----------------------------------------------------------------------===//
// Rules to match Modules
//===----------------------------------------------------------------------===//
// Module rule: Capture the result of parsing the whole file into a result
// variable...
//
Module : MethodList {
$$ = ParserResult = $1;
CurModule.ModuleDone();
}
MethodList : MethodList Method {
$1->getMethodList().push_back($2);
CurMeth.MethodDone();
$$ = $1;
}
| ConstPool IMPLEMENTATION {
$$ = CurModule.CurrentModule;
}
//===----------------------------------------------------------------------===//
// Rules to match Method Headers
//===----------------------------------------------------------------------===//
OptVAR_ID : VAR_ID | /*empty*/ { $$ = 0; }
ArgVal : Types OptVAR_ID {
$$ = new MethodArgument($1);
if ($2) { // Was the argument named?
$$->setName($2);
free($2); // The string was strdup'd, so free it now.
}
}
ArgListH : ArgVal ',' ArgListH {
$$ = $3;
$3->push_front($1);
}
| ArgVal {
$$ = new list<MethodArgument*>();
$$->push_front($1);
}
ArgList : ArgListH {
$$ = $1;
}
| /* empty */ {
$$ = 0;
}
MethodHeaderH : TypesV STRINGCONSTANT '(' ArgList ')' {
MethodType::ParamTypes ParamTypeList;
if ($4)
for (list<MethodArgument*>::iterator I = $4->begin(); I != $4->end(); I++)
ParamTypeList.push_back((*I)->getType());
const MethodType *MT = MethodType::getMethodType($1, ParamTypeList);
Method *M = new Method(MT, $2);
free($2); // Free strdup'd memory!
InsertValue(M, CurModule.Values);
CurMeth.MethodStart(M);
// Add all of the arguments we parsed to the method...
if ($4) { // Is null if empty...
Method::ArgumentListType &ArgList = M->getArgumentList();
for (list<MethodArgument*>::iterator I = $4->begin(); I != $4->end(); I++) {
InsertValue(*I);
ArgList.push_back(*I);
}
delete $4; // We're now done with the argument list
}
}
MethodHeader : MethodHeaderH ConstPool BEGINTOK {
$$ = CurMeth.CurrentMethod;
}
Method : BasicBlockList END {
$$ = $1;
}
//===----------------------------------------------------------------------===//
// Rules to match Basic Blocks
//===----------------------------------------------------------------------===//
ConstValueRef : ESINT64VAL { // A reference to a direct constant
$$ = ValID::create($1);
}
| EUINT64VAL {
$$ = ValID::create($1);
}
| TRUE {
$$ = ValID::create((int64_t)1);
}
| FALSE {
$$ = ValID::create((int64_t)0);
}
| STRINGCONSTANT { // Quoted strings work too... especially for methods
$$ = ValID::create_conststr($1);
}
// ValueRef - A reference to a definition...
ValueRef : INTVAL { // Is it an integer reference...?
$$ = ValID::create($1);
}
| VAR_ID { // It must be a named reference then...
$$ = ValID::create($1);
}
| ConstValueRef {
$$ = $1;
}
// The user may refer to a user defined type by its typeplane... check for this
// now...
//
Types : ValueRef {
Value *D = getVal(Type::TypeTy, $1, true);
if (D == 0) ThrowException("Invalid user defined type: " + $1.getName());
assert (D->getValueType() == Value::ConstantVal &&
"Internal error! User defined type not in const pool!");
ConstPoolType *CPT = (ConstPoolType*)D;
$$ = CPT->getValue();
}
| TypesV '(' TypeList ')' { // Method derived type?
MethodType::ParamTypes Params($3->begin(), $3->end());
delete $3;
$$ = MethodType::getMethodType($1, Params);
}
| TypesV '(' ')' { // Method derived type?
MethodType::ParamTypes Params; // Empty list
$$ = MethodType::getMethodType($1, Params);
}
| '[' Types ']' {
$$ = ArrayType::getArrayType($2);
}
| '[' EUINT64VAL 'x' Types ']' {
$$ = ArrayType::getArrayType($4, (int)$2);
}
| '{' TypeList '}' {
StructType::ElementTypes Elements($2->begin(), $2->end());
delete $2;
$$ = StructType::getStructType(Elements);
}
| '{' '}' {
$$ = StructType::getStructType(StructType::ElementTypes());
}
| Types '*' {
$$ = PointerType::getPointerType($1);
}
TypeList : Types {
$$ = new list<const Type*>();
$$->push_back($1);
}
| TypeList ',' Types {
($$=$1)->push_back($3);
}
BasicBlockList : BasicBlockList BasicBlock {
$1->getBasicBlocks().push_back($2);
$$ = $1;
}
| MethodHeader BasicBlock { // Do not allow methods with 0 basic blocks
$$ = $1; // in them...
$1->getBasicBlocks().push_back($2);
}
// Basic blocks are terminated by branching instructions:
// br, br/cc, switch, ret
//
BasicBlock : InstructionList BBTerminatorInst {
$1->getInstList().push_back($2);
InsertValue($1);
$$ = $1;
}
| LABELSTR InstructionList BBTerminatorInst {
$2->getInstList().push_back($3);
$2->setName($1);
free($1); // Free the strdup'd memory...
InsertValue($2);
$$ = $2;
}
InstructionList : InstructionList Inst {
$1->getInstList().push_back($2);
$$ = $1;
}
| /* empty */ {
$$ = new BasicBlock();
}
BBTerminatorInst : RET Types ValueRef { // Return with a result...
$$ = new ReturnInst(getVal($2, $3));
}
| RET VOID { // Return with no result...
$$ = new ReturnInst();
}
| BR LABEL ValueRef { // Unconditional Branch...
$$ = new BranchInst((BasicBlock*)getVal(Type::LabelTy, $3));
} // Conditional Branch...
| BR BOOL ValueRef ',' LABEL ValueRef ',' LABEL ValueRef {
$$ = new BranchInst((BasicBlock*)getVal(Type::LabelTy, $6),
(BasicBlock*)getVal(Type::LabelTy, $9),
getVal(Type::BoolTy, $3));
}
| SWITCH IntType ValueRef ',' LABEL ValueRef '[' JumpTable ']' {
SwitchInst *S = new SwitchInst(getVal($2, $3),
(BasicBlock*)getVal(Type::LabelTy, $6));
$$ = S;
list<pair<ConstPoolVal*, BasicBlock*> >::iterator I = $8->begin(),
end = $8->end();
for (; I != end; I++)
S->dest_push_back(I->first, I->second);
}
JumpTable : JumpTable IntType ConstValueRef ',' LABEL ValueRef {
$$ = $1;
ConstPoolVal *V = (ConstPoolVal*)getVal($2, $3, true);
if (V == 0)
ThrowException("May only switch on a constant pool value!");
$$->push_back(make_pair(V, (BasicBlock*)getVal($5, $6)));
}
| IntType ConstValueRef ',' LABEL ValueRef {
$$ = new list<pair<ConstPoolVal*, BasicBlock*> >();
ConstPoolVal *V = (ConstPoolVal*)getVal($1, $2, true);
if (V == 0)
ThrowException("May only switch on a constant pool value!");
$$->push_back(make_pair(V, (BasicBlock*)getVal($4, $5)));
}
Inst : OptAssign InstVal {
if ($1) // Is this definition named??
$2->setName($1); // if so, assign the name...
InsertValue($2);
$$ = $2;
}
ValueRefList : Types ValueRef { // Used for PHI nodes and call statements...
$$ = new list<Value*>();
$$->push_back(getVal($1, $2));
}
| ValueRefList ',' ValueRef {
$$ = $1;
$1->push_back(getVal($1->front()->getType(), $3));
}
// ValueRefListE - Just like ValueRefList, except that it may also be empty!
ValueRefListE : ValueRefList | /*empty*/ { $$ = 0; }
InstVal : BinaryOps Types ValueRef ',' ValueRef {
$$ = Instruction::getBinaryOperator($1, getVal($2, $3), getVal($2, $5));
if ($$ == 0)
ThrowException("binary operator returned null!");
}
| UnaryOps Types ValueRef {
$$ = Instruction::getUnaryOperator($1, getVal($2, $3));
if ($$ == 0)
ThrowException("unary operator returned null!");
}
| PHI ValueRefList {
$$ = new PHINode($2->front()->getType());
while ($2->begin() != $2->end()) {
// TODO: Ensure all types are the same...
((PHINode*)$$)->addIncoming($2->front());
$2->pop_front();
}
delete $2; // Free the list...
}
| CALL Types ValueRef '(' ValueRefListE ')' {
if (!$2->isMethodType())
ThrowException("Can only call methods: invalid type '" +
$2->getName() + "'!");
const MethodType *Ty = (const MethodType*)$2;
Value *V = getVal(Ty, $3);
if (V->getValueType() != Value::MethodVal || V->getType() != Ty)
ThrowException("Cannot call: " + $3.getName() + "!");
// Create or access a new type that corresponds to the function call...
vector<Value *> Params;
if ($5) {
// Pull out just the arguments...
Params.insert(Params.begin(), $5->begin(), $5->end());
delete $5;
// Loop through MethodType's arguments and ensure they are specified
// correctly!
//
MethodType::ParamTypes::const_iterator I = Ty->getParamTypes().begin();
unsigned i;
for (i = 0; i < Params.size() && I != Ty->getParamTypes().end(); ++i,++I){
if (Params[i]->getType() != *I)
ThrowException("Parameter " + utostr(i) + " is not of type '" +
(*I)->getName() + "'!");
}
if (i != Params.size() || I != Ty->getParamTypes().end())
ThrowException("Invalid number of parameters detected!");
}
// Create the call node...
$$ = new CallInst((Method*)V, Params);
}
| MemoryInst {
$$ = $1;
}
MemoryInst : MALLOC Types {
ConstPoolVal *TyVal = new ConstPoolType(PointerType::getPointerType($2));
TyVal = addConstValToConstantPool(TyVal);
$$ = new MallocInst((ConstPoolType*)TyVal);
}
| MALLOC Types ',' UINT ValueRef {
if (!$2->isArrayType() || ((const ArrayType*)$2)->isSized())
ThrowException("Trying to allocate " + $2->getName() +
" as unsized array!");
Value *ArrSize = getVal($4, $5);
ConstPoolVal *TyVal = new ConstPoolType(PointerType::getPointerType($2));
TyVal = addConstValToConstantPool(TyVal);
$$ = new MallocInst((ConstPoolType*)TyVal, ArrSize);
}
| ALLOCA Types {
ConstPoolVal *TyVal = new ConstPoolType(PointerType::getPointerType($2));
TyVal = addConstValToConstantPool(TyVal);
$$ = new AllocaInst((ConstPoolType*)TyVal);
}
| ALLOCA Types ',' UINT ValueRef {
if (!$2->isArrayType() || ((const ArrayType*)$2)->isSized())
ThrowException("Trying to allocate " + $2->getName() +
" as unsized array!");
Value *ArrSize = getVal($4, $5);
ConstPoolVal *TyVal = new ConstPoolType(PointerType::getPointerType($2));
TyVal = addConstValToConstantPool(TyVal);
$$ = new AllocaInst((ConstPoolType*)TyVal, ArrSize);
}
| FREE Types ValueRef {
if (!$2->isPointerType())
ThrowException("Trying to free nonpointer type " + $2->getName() + "!");
$$ = new FreeInst(getVal($2, $3));
}
%%
int yyerror(char *ErrorMsg) {
ThrowException(string("Parse error: ") + ErrorMsg);
return 0;
}

5
lib/Bytecode/Makefile Normal file
View File

@ -0,0 +1,5 @@
LEVEL = ../..
DIRS = Reader Writer
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,218 @@
//===- ReadConst.cpp - Code to constants and constant pools -----------------===
//
// This file implements functionality to deserialize constants and entire
// constant pools.
//
// Note that this library should be as fast as possible, reentrant, and
// threadsafe!!
//
//===------------------------------------------------------------------------===
#include "llvm/Module.h"
#include "llvm/BasicBlock.h"
#include "llvm/ConstPoolVals.h"
#include "llvm/DerivedTypes.h"
#include "ReaderInternals.h"
bool BytecodeParser::parseTypeConstant(const uchar *&Buf, const uchar *EndBuf,
ConstPoolVal *&V) {
const Type *Val = 0;
unsigned PrimType;
if (read_vbr(Buf, EndBuf, PrimType)) return true;
if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType))) {
V = new ConstPoolType(Val); // It's just a primitive ID.
return false;
}
switch (PrimType) {
case Type::MethodTyID: {
unsigned Typ;
if (read_vbr(Buf, EndBuf, Typ)) return true;
const Type *RetType = getType(Typ);
if (RetType == 0) return true;
MethodType::ParamTypes Params;
if (read_vbr(Buf, EndBuf, Typ)) return true;
while (Typ) {
const Type *Ty = getType(Typ);
if (Ty == 0) return true;
Params.push_back(Ty);
if (read_vbr(Buf, EndBuf, Typ)) return true;
}
Val = MethodType::getMethodType(RetType, Params);
break;
}
case Type::ArrayTyID: {
unsigned ElTyp;
if (read_vbr(Buf, EndBuf, ElTyp)) return true;
const Type *ElementType = getType(ElTyp);
if (ElementType == 0) return true;
int NumElements;
if (read_vbr(Buf, EndBuf, NumElements)) return true;
Val = ArrayType::getArrayType(ElementType, NumElements);
break;
}
case Type::StructTyID: {
unsigned Typ;
StructType::ElementTypes Elements;
if (read_vbr(Buf, EndBuf, Typ)) return true;
while (Typ) { // List is terminated by void/0 typeid
const Type *Ty = getType(Typ);
if (Ty == 0) return true;
Elements.push_back(Ty);
if (read_vbr(Buf, EndBuf, Typ)) return true;
}
Val = StructType::getStructType(Elements);
break;
}
case Type::PointerTyID: {
unsigned ElTyp;
if (read_vbr(Buf, EndBuf, ElTyp)) return true;
const Type *ElementType = getType(ElTyp);
if (ElementType == 0) return true;
Val = PointerType::getPointerType(ElementType);
break;
}
default:
cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to deserialize"
<< " primitive Type " << PrimType << "\n";
return true;
}
V = new ConstPoolType(Val);
return false;
}
bool BytecodeParser::parseConstPoolValue(const uchar *&Buf,
const uchar *EndBuf,
const Type *Ty, ConstPoolVal *&V) {
switch (Ty->getPrimitiveID()) {
case Type::BoolTyID: {
unsigned Val;
if (read_vbr(Buf, EndBuf, Val)) return true;
if (Val != 0 && Val != 1) return true;
V = new ConstPoolBool(Val == 1);
break;
}
case Type::UByteTyID: // Unsigned integer types...
case Type::UShortTyID:
case Type::UIntTyID: {
unsigned Val;
if (read_vbr(Buf, EndBuf, Val)) return true;
if (!ConstPoolUInt::isValueValidForType(Ty, Val)) return true;
V = new ConstPoolUInt(Ty, Val);
break;
}
case Type::ULongTyID: {
uint64_t Val;
if (read_vbr(Buf, EndBuf, Val)) return true;
V = new ConstPoolUInt(Ty, Val);
break;
}
case Type::SByteTyID: // Unsigned integer types...
case Type::ShortTyID:
case Type::IntTyID: {
int Val;
if (read_vbr(Buf, EndBuf, Val)) return true;
if (!ConstPoolSInt::isValueValidForType(Ty, Val)) return 0;
V = new ConstPoolSInt(Ty, Val);
break;
}
case Type::LongTyID: {
int64_t Val;
if (read_vbr(Buf, EndBuf, Val)) return true;
V = new ConstPoolSInt(Ty, Val);
break;
}
case Type::TypeTyID:
if (parseTypeConstant(Buf, EndBuf, V)) return true;
break;
case Type::ArrayTyID: {
const ArrayType *AT = (const ArrayType*)Ty;
unsigned NumElements;
if (AT->isSized()) // Sized array, # elements stored in type!
NumElements = (unsigned)AT->getNumElements();
else // Unsized array, # elements stored in stream!
if (read_vbr(Buf, EndBuf, NumElements)) return true;
vector<ConstPoolVal *> Elements;
while (NumElements--) { // Read all of the elements of the constant.
unsigned Slot;
if (read_vbr(Buf, EndBuf, Slot)) return true;
Value *V = getValue(AT->getElementType(), Slot, false);
if (!V || V->getValueType() != Value::ConstantVal)
return true;
Elements.push_back((ConstPoolVal*)V);
}
V = new ConstPoolArray(AT, Elements);
break;
}
case Type::StructTyID: {
const StructType *ST = (const StructType*)Ty;
const StructType::ElementTypes &ET = ST->getElementTypes();
vector<ConstPoolVal *> Elements;
for (unsigned i = 0; i < ET.size(); ++i) {
unsigned Slot;
if (read_vbr(Buf, EndBuf, Slot)) return true;
Value *V = getValue(ET[i], Slot, false);
if (!V || V->getValueType() != Value::ConstantVal)
return true;
Elements.push_back((ConstPoolVal*)V);
}
V = new ConstPoolStruct(ST, Elements);
break;
}
default:
cerr << __FILE__ << ":" << __LINE__
<< ": Don't know how to deserialize constant value of type '"
<< Ty->getName() << "'\n";
return true;
}
return false;
}
bool BytecodeParser::ParseConstantPool(const uchar *&Buf, const uchar *EndBuf,
SymTabValue::ConstantPoolType &CP,
ValueTable &Tab) {
while (Buf < EndBuf) {
unsigned NumEntries, Typ;
if (read_vbr(Buf, EndBuf, NumEntries) ||
read_vbr(Buf, EndBuf, Typ)) return true;
const Type *Ty = getType(Typ);
if (Ty == 0) return true;
for (unsigned i = 0; i < NumEntries; i++) {
ConstPoolVal *I;
if (parseConstPoolValue(Buf, EndBuf, Ty, I)) return true;
#if 0
cerr << " Read const value: <" << I->getType()->getName()
<< ">: " << I->getStrValue() << endl;
#endif
insertValue(I, Tab);
CP.insert(I);
}
}
return Buf > EndBuf;
}

View File

@ -0,0 +1,213 @@
//===- ReadInst.cpp - Code to read an instruction from bytecode -------------===
//
// This file defines the mechanism to read an instruction from a bytecode
// stream.
//
// Note that this library should be as fast as possible, reentrant, and
// threadsafe!!
//
// TODO: Change from getValue(Raw.Arg1) etc, to getArg(Raw, 1)
// Make it check type, so that casts are checked.
//
//===------------------------------------------------------------------------===
#include "llvm/iOther.h"
#include "llvm/iTerminators.h"
#include "llvm/iMemory.h"
#include "llvm/DerivedTypes.h"
#include "ReaderInternals.h"
bool BytecodeParser::ParseRawInst(const uchar *&Buf, const uchar *EndBuf,
RawInst &Result) {
unsigned Op, Typ;
if (read(Buf, EndBuf, Op)) return true;
Result.NumOperands = Op >> 30;
Result.Opcode = (Op >> 24) & 63;
switch (Result.NumOperands) {
case 1:
Result.Ty = getType((Op >> 12) & 4095);
Result.Arg1 = Op & 4095;
if (Result.Arg1 == 4095) // Handle special encoding for 0 operands...
Result.NumOperands = 0;
break;
case 2:
Result.Ty = getType((Op >> 16) & 255);
Result.Arg1 = (Op >> 8 ) & 255;
Result.Arg2 = (Op >> 0 ) & 255;
break;
case 3:
Result.Ty = getType((Op >> 18) & 63);
Result.Arg1 = (Op >> 12) & 63;
Result.Arg2 = (Op >> 6 ) & 63;
Result.Arg3 = (Op >> 0 ) & 63;
break;
case 0:
Buf -= 4; // Hrm, try this again...
if (read_vbr(Buf, EndBuf, Result.Opcode)) return true;
if (read_vbr(Buf, EndBuf, Typ)) return true;
Result.Ty = getType(Typ);
if (read_vbr(Buf, EndBuf, Result.NumOperands)) return true;
switch (Result.NumOperands) {
case 0:
cerr << "Zero Arg instr found!\n";
return true; // This encoding is invalid!
case 1:
if (read_vbr(Buf, EndBuf, Result.Arg1)) return true;
break;
case 2:
if (read_vbr(Buf, EndBuf, Result.Arg1) ||
read_vbr(Buf, EndBuf, Result.Arg2)) return true;
break;
case 3:
if (read_vbr(Buf, EndBuf, Result.Arg1) ||
read_vbr(Buf, EndBuf, Result.Arg2) ||
read_vbr(Buf, EndBuf, Result.Arg3)) return true;
break;
default:
if (read_vbr(Buf, EndBuf, Result.Arg1) ||
read_vbr(Buf, EndBuf, Result.Arg2)) return true;
// Allocate a vector to hold arguments 3, 4, 5, 6 ...
Result.VarArgs = new vector<unsigned>(Result.NumOperands-2);
for (unsigned a = 0; a < Result.NumOperands-2; a++)
if (read_vbr(Buf, EndBuf, (*Result.VarArgs)[a])) return true;
break;
}
if (align32(Buf, EndBuf)) return true;
break;
}
//cerr << "NO: " << Result.NumOperands << " opcode: " << Result.Opcode
// << " Ty: " << Result.Ty->getName() << " arg1: " << Result.Arg1 << endl;
return false;
}
bool BytecodeParser::ParseInstruction(const uchar *&Buf, const uchar *EndBuf,
Instruction *&Res) {
RawInst Raw;
if (ParseRawInst(Buf, EndBuf, Raw)) return true;;
if (Raw.Opcode >= Instruction::FirstUnaryOp &&
Raw.Opcode < Instruction::NumUnaryOps && Raw.NumOperands == 1) {
Res = Instruction::getUnaryOperator(Raw.Opcode, getValue(Raw.Ty, Raw.Arg1));
return false;
} else if (Raw.Opcode >= Instruction::FirstBinaryOp &&
Raw.Opcode < Instruction::NumBinaryOps && Raw.NumOperands == 2) {
Res = Instruction::getBinaryOperator(Raw.Opcode, getValue(Raw.Ty, Raw.Arg1),
getValue(Raw.Ty, Raw.Arg2));
return false;
} else if (Raw.Opcode == Instruction::PHINode) {
PHINode *PN = new PHINode(Raw.Ty);
switch (Raw.NumOperands) {
case 0: cerr << "Invalid phi node encountered!\n";
delete PN;
return true;
case 1: PN->addIncoming(getValue(Raw.Ty, Raw.Arg1)); break;
case 2: PN->addIncoming(getValue(Raw.Ty, Raw.Arg1));
PN->addIncoming(getValue(Raw.Ty, Raw.Arg2)); break;
case 3: PN->addIncoming(getValue(Raw.Ty, Raw.Arg1));
PN->addIncoming(getValue(Raw.Ty, Raw.Arg2));
PN->addIncoming(getValue(Raw.Ty, Raw.Arg3)); break;
default:
PN->addIncoming(getValue(Raw.Ty, Raw.Arg1));
PN->addIncoming(getValue(Raw.Ty, Raw.Arg2));
{
vector<unsigned> &args = *Raw.VarArgs;
for (unsigned i = 0; i < args.size(); i++)
PN->addIncoming(getValue(Raw.Ty, args[i]));
}
delete Raw.VarArgs;
}
Res = PN;
return false;
} else if (Raw.Opcode == Instruction::Ret) {
if (Raw.NumOperands == 0) {
Res = new ReturnInst(); return false;
} else if (Raw.NumOperands == 1) {
Res = new ReturnInst(getValue(Raw.Ty, Raw.Arg1)); return false;
}
} else if (Raw.Opcode == Instruction::Br) {
if (Raw.NumOperands == 1) {
Res = new BranchInst((BasicBlock*)getValue(Type::LabelTy, Raw.Arg1));
return false;
} else if (Raw.NumOperands == 3) {
Res = new BranchInst((BasicBlock*)getValue(Type::LabelTy, Raw.Arg1),
(BasicBlock*)getValue(Type::LabelTy, Raw.Arg2),
getValue(Type::BoolTy , Raw.Arg3));
return false;
}
} else if (Raw.Opcode == Instruction::Switch) {
SwitchInst *I =
new SwitchInst(getValue(Raw.Ty, Raw.Arg1),
(BasicBlock*)getValue(Type::LabelTy, Raw.Arg2));
Res = I;
if (Raw.NumOperands < 3) return false; // No destinations? Wierd.
if (Raw.NumOperands == 3 || Raw.VarArgs->size() & 1) {
cerr << "Switch statement with odd number of arguments!\n";
delete I;
return true;
}
vector<unsigned> &args = *Raw.VarArgs;
for (unsigned i = 0; i < args.size(); i += 2)
I->dest_push_back((ConstPoolVal*)getValue(Raw.Ty, args[i]),
(BasicBlock*)getValue(Type::LabelTy, args[i+1]));
delete Raw.VarArgs;
return false;
} else if (Raw.Opcode == Instruction::Call) {
Method *M = (Method*)getValue(Raw.Ty, Raw.Arg1);
if (M == 0) return true;
const MethodType::ParamTypes &PL = M->getMethodType()->getParamTypes();
MethodType::ParamTypes::const_iterator It = PL.begin();
vector<Value *> Params;
switch (Raw.NumOperands) {
case 0: cerr << "Invalid call instruction encountered!\n";
return true;
case 1: break;
case 2: Params.push_back(getValue(*It++, Raw.Arg2)); break;
case 3: Params.push_back(getValue(*It++, Raw.Arg2));
if (It == PL.end()) return true;
Params.push_back(getValue(*It++, Raw.Arg3)); break;
default:
Params.push_back(getValue(*It++, Raw.Arg2));
{
vector<unsigned> &args = *Raw.VarArgs;
for (unsigned i = 0; i < args.size(); i++) {
if (It == PL.end()) return true;
Params.push_back(getValue(*It++, args[i]));
}
}
delete Raw.VarArgs;
}
if (It != PL.end()) return true;
Res = new CallInst(M, Params);
return false;
} else if (Raw.Opcode == Instruction::Malloc) {
if (Raw.NumOperands > 2) return true;
Value *Sz = (Raw.NumOperands == 2) ? getValue(Type::UIntTy, Raw.Arg2) : 0;
Res = new MallocInst((ConstPoolType*)getValue(Type::TypeTy, Raw.Arg1), Sz);
return false;
} else if (Raw.Opcode == Instruction::Alloca) {
if (Raw.NumOperands > 2) return true;
Value *Sz = (Raw.NumOperands == 2) ? getValue(Type::UIntTy, Raw.Arg2) : 0;
Res = new AllocaInst((ConstPoolType*)getValue(Type::TypeTy, Raw.Arg1), Sz);
return false;
} else if (Raw.Opcode == Instruction::Free) {
Value *Val = getValue(Raw.Ty, Raw.Arg1);
if (!Val->getType()->isPointerType()) return true;
Res = new FreeInst(Val);
return false;
}
cerr << "Unrecognized instruction! " << Raw.Opcode << endl;
return true;
}

View File

@ -0,0 +1,7 @@
LEVEL = ../../..
LIBRARYNAME = bcreader
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,478 @@
//===- Reader.cpp - Code to read bytecode files -----------------------------===
//
// This library implements the functionality defined in llvm/Bytecode/Reader.h
//
// Note that this library should be as fast as possible, reentrant, and
// threadsafe!!
//
// TODO: Make error message outputs be configurable depending on an option?
// TODO: Allow passing in an option to ignore the symbol table
//
//===------------------------------------------------------------------------===
#include "llvm/Bytecode/Reader.h"
#include "llvm/Bytecode/Format.h"
#include "llvm/Module.h"
#include "llvm/BasicBlock.h"
#include "llvm/DerivedTypes.h"
#include "llvm/ConstPoolVals.h"
#include "llvm/iOther.h"
#include "ReaderInternals.h"
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <algorithm>
bool BytecodeParser::getTypeSlot(const Type *Ty, unsigned &Slot) {
if (Ty->isPrimitiveType()) {
Slot = Ty->getPrimitiveID();
} else {
TypeMapType::iterator I = TypeMap.find(Ty);
if (I == TypeMap.end()) return true; // Didn't find type!
Slot = I->second;
}
//cerr << "getTypeSlot '" << Ty->getName() << "' = " << Slot << endl;
return false;
}
const Type *BytecodeParser::getType(unsigned ID) {
const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID);
if (T) return T;
//cerr << "Looking up Type ID: " << ID << endl;
const Value *D = getValue(Type::TypeTy, ID, false);
if (D == 0) return 0;
assert(D->getType() == Type::TypeTy &&
D->getValueType() == Value::ConstantVal);
return ((const ConstPoolType*)D)->getValue();;
}
bool BytecodeParser::insertValue(Value *Def, vector<ValueList> &ValueTab) {
unsigned type;
if (getTypeSlot(Def->getType(), type)) return true;
if (ValueTab.size() <= type)
ValueTab.resize(type+1, ValueList());
//cerr << "insertValue Values[" << type << "][" << ValueTab[type].size()
// << "] = " << Def << endl;
if (type == Type::TypeTyID && Def->getValueType() == Value::ConstantVal) {
const Type *Ty = ((const ConstPoolType*)Def)->getValue();
unsigned ValueOffset = FirstDerivedTyID;
if (&ValueTab == &Values) // Take into consideration module level types
ValueOffset += ModuleValues[type].size();
if (TypeMap.find(Ty) == TypeMap.end())
TypeMap[Ty] = ValueTab[type].size()+ValueOffset;
}
ValueTab[type].push_back(Def);
return false;
}
Value *BytecodeParser::getValue(const Type *Ty, unsigned oNum, bool Create) {
unsigned Num = oNum;
unsigned type; // The type plane it lives in...
if (getTypeSlot(Ty, type)) return 0; // TODO: true
if (type == Type::TypeTyID) { // The 'type' plane has implicit values
const Type *T = Type::getPrimitiveType((Type::PrimitiveID)Num);
if (T) return (Value*)T; // Asked for a primitive type...
// Otherwise, derived types need offset...
Num -= FirstDerivedTyID;
}
if (ModuleValues.size() > type) {
if (ModuleValues[type].size() > Num)
return ModuleValues[type][Num];
Num -= ModuleValues[type].size();
}
if (Values.size() > type && Values[type].size() > Num)
return Values[type][Num];
if (!Create) return 0; // Do not create a placeholder?
Value *d = 0;
switch (Ty->getPrimitiveID()) {
case Type::LabelTyID: d = new BBPHolder(Ty, oNum); break;
case Type::MethodTyID:
cerr << "Creating method pholder! : " << type << ":" << oNum << " "
<< Ty->getName() << endl;
d = new MethPHolder(Ty, oNum);
insertValue(d, LateResolveModuleValues);
return d;
default: d = new DefPHolder(Ty, oNum); break;
}
assert(d != 0 && "How did we not make something?");
if (insertValue(d, LateResolveValues)) return 0;
return d;
}
bool BytecodeParser::postResolveValues(ValueTable &ValTab) {
bool Error = false;
for (unsigned ty = 0; ty < ValTab.size(); ty++) {
ValueList &DL = ValTab[ty];
unsigned Size;
while ((Size = DL.size())) {
unsigned IDNumber = getValueIDNumberFromPlaceHolder(DL[Size-1]);
Value *D = DL[Size-1];
DL.pop_back();
Value *NewDef = getValue(D->getType(), IDNumber, false);
if (NewDef == 0) {
Error = true; // Unresolved thinger
cerr << "Unresolvable reference found: <" << D->getType()->getName()
<< ">:" << IDNumber << "!\n";
} else {
// Fixup all of the uses of this placeholder def...
D->replaceAllUsesWith(NewDef);
// Now that all the uses are gone, delete the placeholder...
// If we couldn't find a def (error case), then leak a little
delete D; // memory, 'cause otherwise we can't remove all uses!
}
}
}
return Error;
}
bool BytecodeParser::ParseBasicBlock(const uchar *&Buf, const uchar *EndBuf,
BasicBlock *&BB) {
BB = new BasicBlock();
while (Buf < EndBuf) {
Instruction *Def;
if (ParseInstruction(Buf, EndBuf, Def)) {
delete BB;
return true;
}
if (Def == 0) { delete BB; return true; }
if (insertValue(Def, Values)) { delete BB; return true; }
BB->getInstList().push_back(Def);
}
return false;
}
bool BytecodeParser::ParseSymbolTable(const uchar *&Buf, const uchar *EndBuf) {
while (Buf < EndBuf) {
// Symtab block header: [num entries][type id number]
unsigned NumEntries, Typ;
if (read_vbr(Buf, EndBuf, NumEntries) ||
read_vbr(Buf, EndBuf, Typ)) return true;
const Type *Ty = getType(Typ);
if (Ty == 0) return true;
for (unsigned i = 0; i < NumEntries; i++) {
// Symtab entry: [def slot #][name]
unsigned slot;
if (read_vbr(Buf, EndBuf, slot)) return true;
string Name;
if (read(Buf, EndBuf, Name, false)) // Not aligned...
return true;
Value *D = getValue(Ty, slot, false); // Find mapping...
if (D == 0) return true;
D->setName(Name);
}
}
return Buf > EndBuf;
}
bool BytecodeParser::ParseMethod(const uchar *&Buf, const uchar *EndBuf,
Module *C) {
// Clear out the local values table...
Values.clear();
if (MethodSignatureList.empty()) return true; // Unexpected method!
const MethodType *MTy = MethodSignatureList.front().first;
unsigned MethSlot = MethodSignatureList.front().second;
MethodSignatureList.pop_front();
Method *M = new Method(MTy);
const MethodType::ParamTypes &Params = MTy->getParamTypes();
for (MethodType::ParamTypes::const_iterator It = Params.begin();
It != Params.end(); It++) {
MethodArgument *MA = new MethodArgument(*It);
if (insertValue(MA, Values)) { delete M; return true; }
M->getArgumentList().push_back(MA);
}
while (Buf < EndBuf) {
unsigned Type, Size;
const uchar *OldBuf = Buf;
if (readBlock(Buf, EndBuf, Type, Size)) { delete M; return true; }
switch (Type) {
case BytecodeFormat::ConstantPool:
if (ParseConstantPool(Buf, Buf+Size, M->getConstantPool(), Values)) {
cerr << "Error reading constant pool!\n";
delete M; return true;
}
break;
case BytecodeFormat::BasicBlock: {
BasicBlock *BB;
if (ParseBasicBlock(Buf, Buf+Size, BB) ||
insertValue(BB, Values)) {
cerr << "Error parsing basic block!\n";
delete M; return true; // Parse error... :(
}
M->getBasicBlocks().push_back(BB);
break;
}
case BytecodeFormat::SymbolTable:
if (ParseSymbolTable(Buf, Buf+Size)) {
cerr << "Error reading method symbol table!\n";
delete M; return true;
}
break;
default:
Buf += Size;
if (OldBuf > Buf) return true; // Wrap around!
break;
}
if (align32(Buf, EndBuf)) {
delete M; // Malformed bc file, read past end of block.
return true;
}
}
if (postResolveValues(LateResolveValues) ||
postResolveValues(LateResolveModuleValues)) {
delete M; return true; // Unresolvable references!
}
Value *MethPHolder = getValue(MTy, MethSlot, false);
assert(MethPHolder && "Something is broken no placeholder found!");
assert(MethPHolder->getValueType() == Value::MethodVal && "Not a method?");
unsigned type; // Type slot
assert(!getTypeSlot(MTy, type) && "How can meth type not exist?");
getTypeSlot(MTy, type);
C->getMethodList().push_back(M);
// Replace placeholder with the real method pointer...
ModuleValues[type][MethSlot] = M;
// If anyone is using the placeholder make them use the real method instead
MethPHolder->replaceAllUsesWith(M);
// We don't need the placeholder anymore!
delete MethPHolder;
return false;
}
bool BytecodeParser::ParseModuleGlobalInfo(const uchar *&Buf, const uchar *End,
Module *C) {
if (!MethodSignatureList.empty()) return true; // Two ModuleGlobal blocks?
// Read the method signatures for all of the methods that are coming, and
// create fillers in the Value tables.
unsigned MethSignature;
if (read_vbr(Buf, End, MethSignature)) return true;
while (MethSignature != Type::VoidTyID) { // List is terminated by Void
const Type *Ty = getType(MethSignature);
if (!Ty || !Ty->isMethodType()) {
cerr << "Method not meth type! ";
if (Ty) cerr << Ty->getName(); else cerr << MethSignature; cerr << endl;
return true;
}
// When the ModuleGlobalInfo section is read, we load the type of each method
// and the 'ModuleValues' slot that it lands in. We then load a placeholder
// into its slot to reserve it. When the method is loaded, this placeholder
// is replaced.
// Insert the placeholder...
Value *Def = new MethPHolder(Ty, 0);
insertValue(Def, ModuleValues);
// Figure out which entry of its typeslot it went into...
unsigned TypeSlot;
if (getTypeSlot(Def->getType(), TypeSlot)) return true;
unsigned SlotNo = ModuleValues[TypeSlot].size()-1;
// Keep track of this information in a linked list that is emptied as
// methods are loaded...
//
MethodSignatureList.push_back(make_pair((const MethodType*)Ty, SlotNo));
if (read_vbr(Buf, End, MethSignature)) return true;
}
if (align32(Buf, End)) return true;
// This is for future proofing... in the future extra fields may be added that
// we don't understand, so we transparently ignore them.
//
Buf = End;
return false;
}
bool BytecodeParser::ParseModule(const uchar *Buf, const uchar *EndBuf,
Module *&C) {
unsigned Type, Size;
if (readBlock(Buf, EndBuf, Type, Size)) return true;
if (Type != BytecodeFormat::Module || Buf+Size != EndBuf)
return true; // Hrm, not a class?
MethodSignatureList.clear(); // Just in case...
// Read into instance variables...
if (read_vbr(Buf, EndBuf, FirstDerivedTyID)) return true;
if (align32(Buf, EndBuf)) return true;
C = new Module();
while (Buf < EndBuf) {
const uchar *OldBuf = Buf;
if (readBlock(Buf, EndBuf, Type, Size)) { delete C; return true; }
switch (Type) {
case BytecodeFormat::ModuleGlobalInfo:
if (ParseModuleGlobalInfo(Buf, Buf+Size, C)) {
cerr << "Error reading class global info section!\n";
delete C; return true;
}
break;
case BytecodeFormat::ConstantPool:
if (ParseConstantPool(Buf, Buf+Size, C->getConstantPool(), ModuleValues)) {
cerr << "Error reading class constant pool!\n";
delete C; return true;
}
break;
case BytecodeFormat::Method: {
if (ParseMethod(Buf, Buf+Size, C)) {
delete C; return true; // Error parsing method
}
break;
}
case BytecodeFormat::SymbolTable:
if (ParseSymbolTable(Buf, Buf+Size)) {
cerr << "Error reading class symbol table!\n";
delete C; return true;
}
break;
default:
cerr << "Unknown class block: " << Type << endl;
Buf += Size;
if (OldBuf > Buf) return true; // Wrap around!
break;
}
if (align32(Buf, EndBuf)) { delete C; return true; }
}
if (!MethodSignatureList.empty()) // Expected more methods!
return true;
return false;
}
Module *BytecodeParser::ParseBytecode(const uchar *Buf, const uchar *EndBuf) {
LateResolveValues.clear();
unsigned Sig;
// Read and check signature...
if (read(Buf, EndBuf, Sig) ||
Sig != ('l' | ('l' << 8) | ('v' << 16) | 'm' << 24))
return 0; // Invalid signature!
Module *Result;
if (ParseModule(Buf, EndBuf, Result)) return 0;
return Result;
}
Module *ParseBytecodeBuffer(const uchar *Buffer, unsigned Length) {
BytecodeParser Parser;
return Parser.ParseBytecode(Buffer, Buffer+Length);
}
// Parse and return a class file...
//
Module *ParseBytecodeFile(const string &Filename) {
struct stat StatBuf;
Module *Result = 0;
if (Filename != string("-")) { // Read from a file...
int FD = open(Filename.data(), O_RDONLY);
if (FD == -1) return 0;
if (fstat(FD, &StatBuf) == -1) { close(FD); return 0; }
int Length = StatBuf.st_size;
if (Length == 0) { close(FD); return 0; }
uchar *Buffer = (uchar*)mmap(0, Length, PROT_READ,
MAP_PRIVATE, FD, 0);
if (Buffer == (uchar*)-1) { close(FD); return 0; }
BytecodeParser Parser;
Result = Parser.ParseBytecode(Buffer, Buffer+Length);
munmap((char*)Buffer, Length);
close(FD);
} else { // Read from stdin
size_t FileSize = 0;
int BlockSize;
uchar Buffer[4096], *FileData = 0;
while ((BlockSize = read(0, Buffer, 4))) {
if (BlockSize == -1) { free(FileData); return 0; }
FileData = (uchar*)realloc(FileData, FileSize+BlockSize);
memcpy(FileData+FileSize, Buffer, BlockSize);
FileSize += BlockSize;
}
if (FileSize == 0) { free(FileData); return 0; }
#define ALIGN_PTRS 1
#if ALIGN_PTRS
uchar *Buf = (uchar*)mmap(0, FileSize, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
assert((Buf != (uchar*)-1) && "mmap returned error!");
free(FileData);
memcpy(Buf, FileData, FileSize);
#else
uchar *Buf = FileData;
#endif
BytecodeParser Parser;
Result = Parser.ParseBytecode(Buf, Buf+FileSize);
#if ALIGN_PTRS
munmap((char*)Buf, FileSize); // Free mmap'd data area
#else
free(FileData); // Free realloc'd block of memory
#endif
}
return Result;
}

View File

@ -0,0 +1,146 @@
//===-- ReaderInternals.h - Definitions internal to the reader ---*- C++ -*--=//
//
// This header file defines various stuff that is used by the bytecode reader.
//
//===----------------------------------------------------------------------===//
#ifndef READER_INTERNALS_H
#define READER_INTERNALS_H
#include "llvm/Bytecode/Primitives.h"
#include "llvm/SymTabValue.h"
#include "llvm/Method.h"
#include "llvm/Instruction.h"
#include <map>
#include <utility>
class BasicBlock;
class Method;
class Module;
class Type;
typedef unsigned char uchar;
struct RawInst { // The raw fields out of the bytecode stream...
unsigned NumOperands;
unsigned Opcode;
const Type *Ty;
unsigned Arg1, Arg2;
union {
unsigned Arg3;
vector<unsigned> *VarArgs; // Contains arg #3,4,5... if NumOperands > 3
};
};
class BytecodeParser {
public:
BytecodeParser() {
// Define this in case we don't see a ModuleGlobalInfo block.
FirstDerivedTyID = Type::FirstDerivedTyID;
}
Module *ParseBytecode(const uchar *Buf, const uchar *EndBuf);
private: // All of this data is transient across calls to ParseBytecode
typedef vector<Value *> ValueList;
typedef vector<ValueList> ValueTable;
typedef map<const Type *, unsigned> TypeMapType;
ValueTable Values, LateResolveValues;
ValueTable ModuleValues, LateResolveModuleValues;
TypeMapType TypeMap;
// Information read from the ModuleGlobalInfo section of the file...
unsigned FirstDerivedTyID;
// When the ModuleGlobalInfo section is read, we load the type of each method
// and the 'ModuleValues' slot that it lands in. We then load a placeholder
// into its slot to reserve it. When the method is loaded, this placeholder
// is replaced.
//
list<pair<const MethodType *, unsigned> > MethodSignatureList;
private:
bool ParseModule (const uchar * Buf, const uchar *End, Module *&);
bool ParseModuleGlobalInfo (const uchar *&Buf, const uchar *End, Module *);
bool ParseSymbolTable (const uchar *&Buf, const uchar *End);
bool ParseMethod (const uchar *&Buf, const uchar *End, Module *);
bool ParseBasicBlock (const uchar *&Buf, const uchar *End, BasicBlock *&);
bool ParseInstruction (const uchar *&Buf, const uchar *End, Instruction *&);
bool ParseRawInst (const uchar *&Buf, const uchar *End, RawInst &);
bool ParseConstantPool(const uchar *&Buf, const uchar *EndBuf,
SymTabValue::ConstantPoolType &CP, ValueTable &Tab);
bool parseConstPoolValue(const uchar *&Buf, const uchar *End,
const Type *Ty, ConstPoolVal *&V);
bool parseTypeConstant (const uchar *&Buf, const uchar *, ConstPoolVal *&);
Value *getValue(const Type *Ty, unsigned num, bool Create = true);
const Type *getType(unsigned ID);
bool insertValue(Value *D, vector<ValueList> &D);
bool postResolveValues(ValueTable &ValTab);
bool getTypeSlot(const Type *Ty, unsigned &Slot);
};
template<class SuperType>
class PlaceholderDef : public SuperType {
unsigned ID;
public:
PlaceholderDef(const Type *Ty, unsigned id) : SuperType(Ty), ID(id) {}
unsigned getID() { return ID; }
};
struct InstPlaceHolderHelper : public Instruction {
InstPlaceHolderHelper(const Type *Ty) : Instruction(Ty, UserOp1, "") {}
inline virtual void dropAllReferences() {}
virtual string getOpcode() const { return "placeholder"; }
virtual Instruction *clone() const { abort(); return 0; }
// No "operands"...
virtual Value *getOperand(unsigned i) { return 0; }
virtual const Value *getOperand(unsigned i) const { return 0; }
virtual bool setOperand(unsigned i, Value *Val) { return false; }
virtual unsigned getNumOperands() const { return 0; }
};
struct BBPlaceHolderHelper : public BasicBlock {
BBPlaceHolderHelper(const Type *Ty) : BasicBlock() {
assert(Ty->isLabelType());
}
};
struct MethPlaceHolderHelper : public Method {
MethPlaceHolderHelper(const Type *Ty)
: Method((const MethodType*)Ty) {
assert(Ty->isMethodType() && "Method placeholders must be method types!");
}
};
typedef PlaceholderDef<InstPlaceHolderHelper> DefPHolder;
typedef PlaceholderDef<BBPlaceHolderHelper> BBPHolder;
typedef PlaceholderDef<MethPlaceHolderHelper> MethPHolder;
static inline unsigned getValueIDNumberFromPlaceHolder(Value *Def) {
switch (Def->getType()->getPrimitiveID()) {
case Type::LabelTyID: return ((BBPHolder*)Def)->getID();
case Type::MethodTyID: return ((MethPHolder*)Def)->getID();
default: return ((DefPHolder*)Def)->getID();
}
}
static inline bool readBlock(const uchar *&Buf, const uchar *EndBuf,
unsigned &Type, unsigned &Size) {
#if DEBUG_OUTPUT
bool Result = read(Buf, EndBuf, Type) || read(Buf, EndBuf, Size);
cerr << "StartLoc = " << ((unsigned)Buf & 4095)
<< " Type = " << Type << " Size = " << Size << endl;
return Result;
#else
return read(Buf, EndBuf, Type) || read(Buf, EndBuf, Size);
#endif
}
#endif

View File

@ -0,0 +1,154 @@
//===-- WriteConst.cpp - Functions for writing constants ---------*- C++ -*--=//
//
// This file implements the routines for encoding constants to a bytecode
// stream.
//
// Note that the performance of this library is not terribly important, because
// it shouldn't be used by JIT type applications... so it is not a huge focus
// at least. :)
//
//===----------------------------------------------------------------------===//
#include "WriterInternals.h"
#include "llvm/ConstPoolVals.h"
#include "llvm/SymbolTable.h"
#include "llvm/DerivedTypes.h"
void BytecodeWriter::outputType(const Type *T) {
output_vbr((unsigned)T->getPrimitiveID(), Out);
// That's all there is to handling primitive types...
if (T->isPrimitiveType())
return; // We might do this if we alias a prim type: %x = type int
switch (T->getPrimitiveID()) { // Handle derived types now.
case Type::MethodTyID: {
const MethodType *MT = (const MethodType*)T;
int Slot = Table.getValSlot(MT->getReturnType());
assert(Slot != -1 && "Type used but not available!!");
output_vbr((unsigned)Slot, Out);
// Output all of the arguments...
MethodType::ParamTypes::const_iterator I = MT->getParamTypes().begin();
for (; I != MT->getParamTypes().end(); I++) {
Slot = Table.getValSlot(*I);
assert(Slot != -1 && "Type used but not available!!");
output_vbr((unsigned)Slot, Out);
}
// Terminate list with VoidTy
output_vbr((unsigned)Type::VoidTy->getPrimitiveID(), Out);
break;
}
case Type::ArrayTyID: {
const ArrayType *AT = (const ArrayType*)T;
int Slot = Table.getValSlot(AT->getElementType());
assert(Slot != -1 && "Type used but not available!!");
output_vbr((unsigned)Slot, Out);
//cerr << "Type slot = " << Slot << " Type = " << T->getName() << endl;
output_vbr(AT->getNumElements(), Out);
break;
}
case Type::StructTyID: {
const StructType *ST = (const StructType*)T;
// Output all of the element types...
StructType::ElementTypes::const_iterator I = ST->getElementTypes().begin();
for (; I != ST->getElementTypes().end(); I++) {
int Slot = Table.getValSlot(*I);
assert(Slot != -1 && "Type used but not available!!");
output_vbr((unsigned)Slot, Out);
}
// Terminate list with VoidTy
output_vbr((unsigned)Type::VoidTy->getPrimitiveID(), Out);
break;
}
case Type::PointerTyID: {
const PointerType *PT = (const PointerType*)T;
int Slot = Table.getValSlot(PT->getValueType());
assert(Slot != -1 && "Type used but not available!!");
output_vbr((unsigned)Slot, Out);
break;
}
case Type::ModuleTyID:
case Type::PackedTyID:
default:
cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize"
<< " Type '" << T->getName() << "'\n";
break;
}
}
bool BytecodeWriter::outputConstant(const ConstPoolVal *CPV) {
switch (CPV->getType()->getPrimitiveID()) {
case Type::BoolTyID: // Boolean Types
if (((const ConstPoolBool*)CPV)->getValue())
output_vbr((unsigned)1, Out);
else
output_vbr((unsigned)0, Out);
break;
case Type::UByteTyID: // Unsigned integer types...
case Type::UShortTyID:
case Type::UIntTyID:
case Type::ULongTyID:
output_vbr(((const ConstPoolUInt*)CPV)->getValue(), Out);
break;
case Type::SByteTyID: // Signed integer types...
case Type::ShortTyID:
case Type::IntTyID:
case Type::LongTyID:
output_vbr(((const ConstPoolSInt*)CPV)->getValue(), Out);
break;
case Type::TypeTyID: // Serialize type type
outputType(((const ConstPoolType*)CPV)->getValue());
break;
case Type::ArrayTyID: {
const ConstPoolArray *CPA = (const ConstPoolArray *)CPV;
unsigned size = CPA->getValues().size();
if (!((const ArrayType *)CPA->getType())->isSized())
output_vbr(size, Out); // Not for sized arrays!!!
for (unsigned i = 0; i < size; i++) {
int Slot = Table.getValSlot(CPA->getValues()[i]);
assert(Slot != -1 && "Constant used but not available!!");
output_vbr((unsigned)Slot, Out);
}
break;
}
case Type::StructTyID: {
const ConstPoolStruct *CPS = (const ConstPoolStruct*)CPV;
const vector<ConstPoolUse> &Vals = CPS->getValues();
for (unsigned i = 0; i < Vals.size(); ++i) {
int Slot = Table.getValSlot(Vals[i]);
assert(Slot != -1 && "Constant used but not available!!");
output_vbr((unsigned)Slot, Out);
}
break;
}
case Type::FloatTyID: // Floating point types...
case Type::DoubleTyID:
// TODO: Floating point type serialization
case Type::VoidTyID:
case Type::LabelTyID:
default:
cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize"
<< " type '" << CPV->getType()->getName() << "'\n";
break;
}
return false;
}

View File

@ -0,0 +1,184 @@
//===-- WriteInst.cpp - Functions for writing instructions -------*- C++ -*--=//
//
// This file implements the routines for encoding instruction opcodes to a
// bytecode stream.
//
// Note that the performance of this library is not terribly important, because
// it shouldn't be used by JIT type applications... so it is not a huge focus
// at least. :)
//
//===----------------------------------------------------------------------===//
#include "WriterInternals.h"
#include "llvm/Module.h"
#include "llvm/Method.h"
#include "llvm/BasicBlock.h"
#include "llvm/Instruction.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Tools/DataTypes.h"
#include <algorithm>
typedef unsigned char uchar;
// outputInstructionFormat0 - Output those wierd instructions that have a large
// number of operands or have large operands themselves...
//
// Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg<numargs-1>]
//
static void outputInstructionFormat0(const Instruction *I,
const SlotCalculator &Table,
unsigned Type, vector<uchar> &Out) {
// Opcode must have top two bits clear...
output_vbr(I->getInstType(), Out); // Instruction Opcode ID
output_vbr(Type, Out); // Result type
unsigned NumArgs; // Count the number of arguments to the instruction
for (NumArgs = 0; I->getOperand(NumArgs); NumArgs++) /*empty*/;
output_vbr(NumArgs, Out);
for (unsigned i = 0; const Value *N = I->getOperand(i); i++) {
assert(i < NumArgs && "Count of arguments failed!");
int Slot = Table.getValSlot(N);
output_vbr((unsigned)Slot, Out);
}
align32(Out); // We must maintain correct alignment!
}
// outputInstructionFormat1 - Output one operand instructions, knowing that no
// operand index is >= 2^12.
//
static void outputInstructionFormat1(const Instruction *I,
const SlotCalculator &Table, int *Slots,
unsigned Type, vector<uchar> &Out) {
unsigned IType = I->getInstType(); // Instruction Opcode ID
// bits Instruction format:
// --------------------------
// 31-30: Opcode type, fixed to 1.
// 29-24: Opcode
// 23-12: Resulting type plane
// 11- 0: Operand #1 (if set to (2^12-1), then zero operands)
//
unsigned Opcode = (1 << 30) | (IType << 24) | (Type << 12) | Slots[0];
// cerr << "1 " << IType << " " << Type << " " << Slots[0] << endl;
output(Opcode, Out);
}
// outputInstructionFormat2 - Output two operand instructions, knowing that no
// operand index is >= 2^8.
//
static void outputInstructionFormat2(const Instruction *I,
const SlotCalculator &Table, int *Slots,
unsigned Type, vector<uchar> &Out) {
unsigned IType = I->getInstType(); // Instruction Opcode ID
// bits Instruction format:
// --------------------------
// 31-30: Opcode type, fixed to 2.
// 29-24: Opcode
// 23-16: Resulting type plane
// 15- 8: Operand #1
// 7- 0: Operand #2
//
unsigned Opcode = (2 << 30) | (IType << 24) | (Type << 16) |
(Slots[0] << 8) | (Slots[1] << 0);
// cerr << "2 " << IType << " " << Type << " " << Slots[0] << " "
// << Slots[1] << endl;
output(Opcode, Out);
}
// outputInstructionFormat3 - Output three operand instructions, knowing that no
// operand index is >= 2^6.
//
static void outputInstructionFormat3(const Instruction *I,
const SlotCalculator &Table, int *Slots,
unsigned Type, vector<uchar> &Out) {
unsigned IType = I->getInstType(); // Instruction Opcode ID
// bits Instruction format:
// --------------------------
// 31-30: Opcode type, fixed to 3
// 29-24: Opcode
// 23-18: Resulting type plane
// 17-12: Operand #1
// 11- 6: Operand #2
// 5- 0: Operand #3
//
unsigned Opcode = (3 << 30) | (IType << 24) | (Type << 18) |
(Slots[0] << 12) | (Slots[1] << 6) | (Slots[2] << 0);
// cerr << "3 " << IType << " " << Type << " " << Slots[0] << " "
// << Slots[1] << " " << Slots[2] << endl;
output(Opcode, Out);
}
bool BytecodeWriter::processInstruction(const Instruction *I) {
assert(I->getInstType() < 64 && "Opcode too big???");
unsigned NumOperands = 0;
int MaxOpSlot = 0;
int Slots[3]; Slots[0] = (1 << 12)-1;
const Value *Def;
while ((Def = I->getOperand(NumOperands))) {
int slot = Table.getValSlot(Def);
assert(slot != -1 && "Broken bytecode!");
if (slot > MaxOpSlot) MaxOpSlot = slot;
if (NumOperands < 3) Slots[NumOperands] = slot;
NumOperands++;
}
// Figure out which type to encode with the instruction. Typically we want
// the type of the first parameter, as opposed to the type of the instruction
// (for example, with setcc, we always know it returns bool, but the type of
// the first param is actually interesting). But if we have no arguments
// we take the type of the instruction itself.
//
const Type *Ty;
if (NumOperands)
Ty = I->getOperand(0)->getType();
else
Ty = I->getType();
unsigned Type;
int Slot = Table.getValSlot(Ty);
assert(Slot != -1 && "Type not available!!?!");
Type = (unsigned)Slot;
// Decide which instruction encoding to use. This is determined primarily by
// the number of operands, and secondarily by whether or not the max operand
// will fit into the instruction encoding. More operands == fewer bits per
// operand.
//
switch (NumOperands) {
case 0:
case 1:
if (MaxOpSlot < (1 << 12)-1) { // -1 because we use 4095 to indicate 0 ops
outputInstructionFormat1(I, Table, Slots, Type, Out);
return false;
}
break;
case 2:
if (MaxOpSlot < (1 << 8)) {
outputInstructionFormat2(I, Table, Slots, Type, Out);
return false;
}
break;
case 3:
if (MaxOpSlot < (1 << 6)) {
outputInstructionFormat3(I, Table, Slots, Type, Out);
return false;
}
break;
}
outputInstructionFormat0(I, Table, Type, Out);
return false;
}

View File

@ -0,0 +1,7 @@
LEVEL = ../../..
LIBRARYNAME = bcwriter
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,195 @@
//===-- SlotCalculator.cpp - Calculate what slots values land in ------------=//
//
// This file implements a useful analysis step to figure out what numbered
// slots values in a program will land in (keeping track of per plane
// information as required.
//
// This is used primarily for when writing a file to disk, either in bytecode
// or source format.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/SlotCalculator.h"
#include "llvm/ConstantPool.h"
#include "llvm/Method.h"
#include "llvm/Module.h"
#include "llvm/BasicBlock.h"
#include "llvm/ConstPoolVals.h"
#include "llvm/iOther.h"
#include "llvm/DerivedTypes.h"
SlotCalculator::SlotCalculator(const Module *M, bool IgnoreNamed) {
IgnoreNamedNodes = IgnoreNamed;
TheModule = M;
// Preload table... Make sure that all of the primitive types are in the table
// and that their Primitive ID is equal to their slot #
//
for (unsigned i = 0; i < Type::FirstDerivedTyID; ++i) {
assert(Type::getPrimitiveType((Type::PrimitiveID)i));
insertVal(Type::getPrimitiveType((Type::PrimitiveID)i));
}
if (M == 0) return; // Empty table...
bool Result = processModule(M);
assert(Result == false && "Error in processModule!");
}
SlotCalculator::SlotCalculator(const Method *M, bool IgnoreNamed) {
IgnoreNamedNodes = IgnoreNamed;
TheModule = M ? M->getParent() : 0;
// Preload table... Make sure that all of the primitive types are in the table
// and that their Primitive ID is equal to their slot #
//
for (unsigned i = 0; i < Type::FirstDerivedTyID; ++i) {
assert(Type::getPrimitiveType((Type::PrimitiveID)i));
insertVal(Type::getPrimitiveType((Type::PrimitiveID)i));
}
if (TheModule == 0) return; // Empty table...
bool Result = processModule(TheModule);
assert(Result == false && "Error in processModule!");
incorporateMethod(M);
}
void SlotCalculator::incorporateMethod(const Method *M) {
assert(ModuleLevel.size() == 0 && "Module already incorporated!");
// Save the Table state before we process the method...
for (unsigned i = 0; i < Table.size(); ++i) {
ModuleLevel.push_back(Table[i].size());
}
// Process the method to incorporate its values into our table
processMethod(M);
}
void SlotCalculator::purgeMethod() {
assert(ModuleLevel.size() != 0 && "Module not incorporated!");
unsigned NumModuleTypes = ModuleLevel.size();
// First, remove values from existing type planes
for (unsigned i = 0; i < NumModuleTypes; ++i) {
unsigned ModuleSize = ModuleLevel[i]; // Size of plane before method came
while (Table[i].size() != ModuleSize) {
NodeMap.erase(NodeMap.find(Table[i].back())); // Erase from nodemap
Table[i].pop_back(); // Shrink plane
}
}
// We don't need this state anymore, free it up.
ModuleLevel.clear();
// Next, remove any type planes defined by the method...
while (NumModuleTypes != Table.size()) {
TypePlane &Plane = Table.back();
while (Plane.size()) {
NodeMap.erase(NodeMap.find(Plane.back())); // Erase from nodemap
Plane.pop_back(); // Shrink plane
}
Table.pop_back(); // Nuke the plane, we don't like it.
}
}
bool SlotCalculator::processConstant(const ConstPoolVal *CPV) {
//cerr << "Inserting constant: '" << CPV->getStrValue() << endl;
insertVal(CPV);
return false;
}
// processType - This callback occurs when an derived type is discovered
// at the class level. This activity occurs when processing a constant pool.
//
bool SlotCalculator::processType(const Type *Ty) {
//cerr << "processType: " << Ty->getName() << endl;
// TODO: Don't leak memory!!! Free this in the dtor!
insertVal(new ConstPoolType(Ty));
return false;
}
bool SlotCalculator::visitMethod(const Method *M) {
//cerr << "visitMethod: '" << M->getType()->getName() << "'\n";
insertVal(M);
return false;
}
bool SlotCalculator::processMethodArgument(const MethodArgument *MA) {
insertVal(MA);
return false;
}
bool SlotCalculator::processBasicBlock(const BasicBlock *BB) {
insertVal(BB);
ModuleAnalyzer::processBasicBlock(BB); // Lets visit the instructions too!
return false;
}
bool SlotCalculator::processInstruction(const Instruction *I) {
insertVal(I);
return false;
}
int SlotCalculator::getValSlot(const Value *D) const {
map<const Value*, unsigned>::const_iterator I = NodeMap.find(D);
if (I == NodeMap.end()) return -1;
return (int)I->second;
}
void SlotCalculator::insertVal(const Value *D) {
if (D == 0) return;
// If this node does not contribute to a plane, or if the node has a
// name and we don't want names, then ignore the silly node...
//
if (D->getType() == Type::VoidTy || (IgnoreNamedNodes && D->hasName()))
return;
const Type *Typ = D->getType();
unsigned Ty = Typ->getPrimitiveID();
if (Typ->isDerivedType()) {
int DefSlot = getValSlot(Typ);
if (DefSlot == -1) { // Have we already entered this type?
// This can happen if a type is first seen in an instruction. For
// example, if you say 'malloc uint', this defines a type 'uint*' that
// may be undefined at this point.
//
cerr << "SHOULDNT HAPPEN Adding Type ba: " << Typ->getName() << endl;
assert(0 && "SHouldn't this be taken care of by processType!?!?!");
// Nope... add this to the Type plane now!
insertVal(Typ);
DefSlot = getValSlot(Typ);
assert(DefSlot >= 0 && "Type didn't get inserted correctly!");
}
Ty = (unsigned)DefSlot;
}
if (Table.size() <= Ty) // Make sure we have the type plane allocated...
Table.resize(Ty+1, TypePlane());
// Insert node into table and NodeMap...
NodeMap[D] = Table[Ty].size();
if (Typ == Type::TypeTy && // If it's a type constant, add the Type also
D->getValueType() != Value::TypeVal) {
assert(D->getValueType() == Value::ConstantVal &&
"All Type instances should be constant types!");
const ConstPoolType *CPT = (const ConstPoolType*)D;
int Slot = getValSlot(CPT->getValue());
if (Slot == -1) {
// Only add if it's not already here!
NodeMap[CPT->getValue()] = Table[Ty].size();
} else if (!CPT->hasName()) { // If the type has no name...
NodeMap[D] = (unsigned)Slot; // Don't readd type, merge.
return;
}
}
Table[Ty].push_back(D);
}

View File

@ -0,0 +1,96 @@
//===-- llvm/Analysis/SlotCalculator.h - Calculate value slots ---*- C++ -*-==//
//
// This ModuleAnalyzer subclass calculates the slots that values will land in.
// This is useful for when writing bytecode or assembly out, because you have
// to know these things.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_SLOTCALCULATOR_H
#define LLVM_ANALYSIS_SLOTCALCULATOR_H
#include "llvm/Analysis/ModuleAnalyzer.h"
#include "llvm/SymTabValue.h"
#include <vector>
#include <map>
class SlotCalculator : public ModuleAnalyzer {
const Module *TheModule;
bool IgnoreNamedNodes; // Shall we not count named nodes?
typedef vector<const Value*> TypePlane;
vector <TypePlane> Table;
map<const Value *, unsigned> NodeMap;
// ModuleLevel - Used to keep track of which values belong to the module,
// and which values belong to the currently incorporated method.
//
vector <unsigned> ModuleLevel;
public:
SlotCalculator(const Module *M, bool IgnoreNamed);
SlotCalculator(const Method *M, bool IgnoreNamed);// Start out in incorp state
inline ~SlotCalculator() {}
// getValSlot returns < 0 on error!
int getValSlot(const Value *D) const;
inline unsigned getNumPlanes() const { return Table.size(); }
inline unsigned getModuleLevel(unsigned Plane) const {
return Plane < ModuleLevel.size() ? ModuleLevel[Plane] : 0;
}
inline const TypePlane &getPlane(unsigned Plane) const {
return Table[Plane];
}
// If you'd like to deal with a method, use these two methods to get its data
// into the SlotCalculator!
//
void incorporateMethod(const Method *M);
void purgeMethod();
protected:
// insertVal - Insert a value into the value table...
//
void insertVal(const Value *D);
// visitMethod - This member is called after the constant pool has been
// processed. The default implementation of this is a noop.
//
virtual bool visitMethod(const Method *M);
// processConstant is called once per each constant in the constant pool. It
// traverses the constant pool such that it visits each constant in the
// order of its type. Thus, all 'int' typed constants shall be visited
// sequentially, etc...
//
virtual bool processConstant(const ConstPoolVal *CPV);
// processType - This callback occurs when an derived type is discovered
// at the class level. This activity occurs when processing a constant pool.
//
virtual bool processType(const Type *Ty);
// processMethods - The default implementation of this method loops through
// all of the methods in the module and processModule's them. We don't want
// this (we want to explicitly visit them with incorporateMethod), so we
// disable it.
//
virtual bool processMethods(const Module *M) { return false; }
// processMethodArgument - This member is called for every argument that
// is passed into the method.
//
virtual bool processMethodArgument(const MethodArgument *MA);
// processBasicBlock - This member is called for each basic block in a methd.
//
virtual bool processBasicBlock(const BasicBlock *BB);
// processInstruction - This member is called for each Instruction in a methd.
//
virtual bool processInstruction(const Instruction *I);
};
#endif

View File

@ -0,0 +1,182 @@
//===-- Writer.cpp - Library for writing VM bytecode files -------*- C++ -*--=//
//
// This library implements the functionality defined in llvm/Bytecode/Writer.h
//
// This library uses the Analysis library to figure out offsets for
// variables in the method tables...
//
// Note that this file uses an unusual technique of outputting all the bytecode
// to a vector of unsigned char's, then copies the vector to an ostream. The
// reason for this is that we must do "seeking" in the stream to do back-
// patching, and some very important ostreams that we want to support (like
// pipes) do not support seeking. :( :( :(
//
// The choice of the vector data structure is influenced by the extremely fast
// "append" speed, plus the free "seek"/replace in the middle of the stream.
//
// Note that the performance of this library is not terribly important, because
// it shouldn't be used by JIT type applications... so it is not a huge focus
// at least. :)
//
//===----------------------------------------------------------------------===//
#include "WriterInternals.h"
#include "llvm/Module.h"
#include "llvm/Method.h"
#include "llvm/BasicBlock.h"
#include "llvm/ConstPoolVals.h"
#include "llvm/SymbolTable.h"
#include "llvm/DerivedTypes.h"
#include <string.h>
#include <algorithm>
BytecodeWriter::BytecodeWriter(vector<unsigned char> &o, const Module *M)
: Out(o), Table(M, false) {
outputSignature();
// Emit the top level CLASS block.
BytecodeBlock ModuleBlock(BytecodeFormat::Module, Out);
// Output largest ID of first "primitive" type:
output_vbr((unsigned)Type::FirstDerivedTyID, Out);
align32(Out);
// Do the whole module now!
processModule(M);
// If needed, output the symbol table for the class...
if (M->hasSymbolTable())
outputSymbolTable(*M->getSymbolTable());
}
// TODO: REMOVE
#include "llvm/Assembly/Writer.h"
bool BytecodeWriter::processConstPool(const ConstantPool &CP, bool isMethod) {
BytecodeBlock *CPool = new BytecodeBlock(BytecodeFormat::ConstantPool, Out);
unsigned NumPlanes = Table.getNumPlanes();
for (unsigned pno = 0; pno < NumPlanes; pno++) {
const vector<const Value*> &Plane = Table.getPlane(pno);
if (Plane.empty()) continue; // Skip empty type planes...
unsigned ValNo = 0; // Don't reemit module constants
if (isMethod) ValNo = Table.getModuleLevel(pno);
unsigned NumConstants = 0;
for (unsigned vn = ValNo; vn < Plane.size(); vn++)
if (Plane[vn]->getValueType() == Value::ConstantVal)
NumConstants++;
if (NumConstants == 0) continue; // Skip empty type planes...
// Output type header: [num entries][type id number]
//
output_vbr(NumConstants, Out);
// Output the Type ID Number...
int Slot = Table.getValSlot(Plane.front()->getType());
assert (Slot != -1 && "Type in constant pool but not in method!!");
output_vbr((unsigned)Slot, Out);
//cerr << "NC: " << NumConstants << " Slot = " << hex << Slot << endl;
for (; ValNo < Plane.size(); ValNo++) {
const Value *V = Plane[ValNo];
if (V->getValueType() == Value::ConstantVal) {
//cerr << "Serializing value: <" << V->getType() << ">: "
// << ((const ConstPoolVal*)V)->getStrValue() << ":"
// << Out.size() << "\n";
outputConstant((const ConstPoolVal*)V);
}
}
}
delete CPool; // End bytecode block section!
if (!isMethod) { // The ModuleInfoBlock follows directly after the c-pool
assert(CP.getParent()->getValueType() == Value::ModuleVal);
outputModuleInfoBlock((const Module*)CP.getParent());
}
return false;
}
void BytecodeWriter::outputModuleInfoBlock(const Module *M) {
BytecodeBlock ModuleInfoBlock(BytecodeFormat::ModuleGlobalInfo, Out);
// Output the types of the methods in this class
Module::MethodListType::const_iterator I = M->getMethodList().begin();
while (I != M->getMethodList().end()) {
int Slot = Table.getValSlot((*I)->getType());
assert(Slot != -1 && "Module const pool is broken!");
assert(Slot >= Type::FirstDerivedTyID && "Derived type not in range!");
output_vbr((unsigned)Slot, Out);
I++;
}
output_vbr((unsigned)Table.getValSlot(Type::VoidTy), Out);
align32(Out);
}
bool BytecodeWriter::processMethod(const Method *M) {
BytecodeBlock MethodBlock(BytecodeFormat::Method, Out);
Table.incorporateMethod(M);
if (ModuleAnalyzer::processMethod(M)) return true;
// If needed, output the symbol table for the method...
if (M->hasSymbolTable())
outputSymbolTable(*M->getSymbolTable());
Table.purgeMethod();
return false;
}
bool BytecodeWriter::processBasicBlock(const BasicBlock *BB) {
BytecodeBlock MethodBlock(BytecodeFormat::BasicBlock, Out);
return ModuleAnalyzer::processBasicBlock(BB);
}
void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) {
BytecodeBlock MethodBlock(BytecodeFormat::SymbolTable, Out);
for (SymbolTable::const_iterator TI = MST.begin(); TI != MST.end(); TI++) {
SymbolTable::type_const_iterator I = MST.type_begin(TI->first);
SymbolTable::type_const_iterator End = MST.type_end(TI->first);
int Slot;
if (I == End) continue; // Don't mess with an absent type...
// Symtab block header: [num entries][type id number]
output_vbr(MST.type_size(TI->first), Out);
Slot = Table.getValSlot(TI->first);
assert(Slot != -1 && "Type in symtab, but not in table!");
output_vbr((unsigned)Slot, Out);
for (; I != End; I++) {
// Symtab entry: [def slot #][name]
Slot = Table.getValSlot(I->second);
assert (Slot != -1 && "Value in symtab but not in method!!");
output_vbr((unsigned)Slot, Out);
output(I->first, Out, false); // Don't force alignment...
}
}
}
void WriteBytecodeToFile(const Module *C, ostream &Out) {
assert(C && "You can't write a null class!!");
vector<unsigned char> Buffer;
// This object populates buffer for us...
BytecodeWriter BCW(Buffer, C);
// Okay, write the vector out to the ostream now...
Out.write(&Buffer[0], Buffer.size());
Out.flush();
}

View File

@ -0,0 +1,74 @@
//===-- WriterInternals.h - Data structures shared by the Writer -*- C++ -*--=//
//
// This header defines the interface used between components of the bytecode
// writer.
//
// Note that the performance of this library is not terribly important, because
// it shouldn't be used by JIT type applications... so it is not a huge focus
// at least. :)
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_BYTECODE_WRITER_WRITERINTERNALS_H
#define LLVM_LIB_BYTECODE_WRITER_WRITERINTERNALS_H
#include "llvm/Bytecode/Writer.h"
#include "llvm/Bytecode/Format.h"
#include "llvm/Bytecode/Primitives.h"
#include "llvm/Analysis/SlotCalculator.h"
#include "llvm/Tools/DataTypes.h"
#include "llvm/Instruction.h"
class BytecodeWriter : public ModuleAnalyzer {
vector<unsigned char> &Out;
SlotCalculator Table;
public:
BytecodeWriter(vector<unsigned char> &o, const Module *M);
protected:
virtual bool processConstPool(const ConstantPool &CP, bool isMethod);
virtual bool processMethod(const Method *M);
virtual bool processBasicBlock(const BasicBlock *BB);
virtual bool processInstruction(const Instruction *I);
private :
inline void outputSignature() {
static const unsigned char *Sig = (const unsigned char*)"llvm";
Out.insert(Out.end(), Sig, Sig+4); // output the bytecode signature...
}
void outputModuleInfoBlock(const Module *C);
void outputSymbolTable(const SymbolTable &ST);
bool outputConstant(const ConstPoolVal *CPV);
void outputType(const Type *T);
};
// BytecodeBlock - Little helper class that helps us do backpatching of bytecode
// block sizes really easily. It backpatches when it goes out of scope.
//
class BytecodeBlock {
unsigned Loc;
vector<unsigned char> &Out;
BytecodeBlock(const BytecodeBlock &); // do not implement
void operator=(const BytecodeBlock &); // do not implement
public:
inline BytecodeBlock(unsigned ID, vector<unsigned char> &o) : Out(o) {
output(ID, Out);
output((unsigned)0, Out); // Reserve the space for the block size...
Loc = Out.size();
}
inline ~BytecodeBlock() { // Do backpatch when block goes out
// of scope...
// cerr << "OldLoc = " << Loc << " NewLoc = " << NewLoc << " diff = " << (NewLoc-Loc) << endl;
output((unsigned)(Out.size()-Loc), Out, (int)Loc-4);
align32(Out); // Blocks must ALWAYS be aligned
}
};
#endif

5
lib/Makefile Normal file
View File

@ -0,0 +1,5 @@
LEVEL = ..
DIRS = VMCore Analysis Assembly Bytecode Optimizations
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,283 @@
//===- MethodInlining.cpp - Code to perform method inlining ---------------===//
//
// This file implements inlining of methods.
//
// Specifically, this:
// * Exports functionality to inline any method call
// * Inlines methods that consist of a single basic block
// * Is able to inline ANY method call
// . Has a smart heuristic for when to inline a method
//
// Notice that:
// * This pass has a habit of introducing duplicated constant pool entries,
// and also opens up a lot of opportunities for constant propogation. It is
// a good idea to to run a constant propogation pass, then a DCE pass
// sometime after running this pass.
//
// TODO: Currently this throws away all of the symbol names in the method being
// inlined to try to avoid name clashes. Use a name if it's not taken
//
//===----------------------------------------------------------------------===//
#include "llvm/Module.h"
#include "llvm/Method.h"
#include "llvm/BasicBlock.h"
#include "llvm/iTerminators.h"
#include "llvm/iOther.h"
#include "llvm/Opt/AllOpts.h"
#include <algorithm>
#include <map>
#include "llvm/Assembly/Writer.h"
// RemapInstruction - Convert the instruction operands from referencing the
// current values into those specified by ValueMap.
//
static inline void RemapInstruction(Instruction *I,
map<const Value *, Value*> &ValueMap) {
for (unsigned op = 0; const Value *Op = I->getOperand(op); op++) {
Value *V = ValueMap[Op];
if (!V && Op->getValueType() == Value::MethodVal)
continue; // Methods don't get relocated
if (!V) {
cerr << "Val = " << endl << Op << "Addr = " << (void*)Op << endl;
cerr << "Inst = " << I;
}
assert(V && "Referenced value not in value map!");
I->setOperand(op, V);
}
}
// InlineMethod - This function forcibly inlines the called method into the
// basic block of the caller. This returns false if it is not possible to
// inline this call. The program is still in a well defined state if this
// occurs though.
//
// Note that this only does one level of inlining. For example, if the
// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
// exists in the instruction stream. Similiarly this will inline a recursive
// method by one level.
//
bool InlineMethod(BasicBlock::InstListType::iterator CIIt) {
assert((*CIIt)->getInstType() == Instruction::Call &&
"InlineMethod only works on CallInst nodes!");
assert((*CIIt)->getParent() && "Instruction not embedded in basic block!");
assert((*CIIt)->getParent()->getParent() && "Instruction not in method!");
CallInst *CI = (CallInst*)*CIIt;
const Method *CalledMeth = CI->getCalledMethod();
Method *CurrentMeth = CI->getParent()->getParent();
//cerr << "Inlining " << CalledMeth->getName() << " into "
// << CurrentMeth->getName() << endl;
BasicBlock *OrigBB = CI->getParent();
// Call splitBasicBlock - The original basic block now ends at the instruction
// immediately before the call. The original basic block now ends with an
// unconditional branch to NewBB, and NewBB starts with the call instruction.
//
BasicBlock *NewBB = OrigBB->splitBasicBlock(CIIt);
// Remove (unlink) the CallInst from the start of the new basic block.
NewBB->getInstList().remove(CI);
// If we have a return value generated by this call, convert it into a PHI
// node that gets values from each of the old RET instructions in the original
// method.
//
PHINode *PHI = 0;
if (CalledMeth->getReturnType() != Type::VoidTy) {
PHI = new PHINode(CalledMeth->getReturnType(), CI->getName());
// The PHI node should go at the front of the new basic block to merge all
// possible incoming values.
//
NewBB->getInstList().push_front(PHI);
// Anything that used the result of the function call should now use the PHI
// node as their operand.
//
CI->replaceAllUsesWith(PHI);
}
// Keep a mapping between the original method's values and the new duplicated
// code's values. This includes all of: Method arguments, instruction values,
// constant pool entries, and basic blocks.
//
map<const Value *, Value*> ValueMap;
// Add the method arguments to the mapping: (start counting at 1 to skip the
// method reference itself)
//
Method::ArgumentListType::const_iterator PTI =
CalledMeth->getArgumentList().begin();
for (unsigned a = 1; Value *Operand = CI->getOperand(a); ++a, ++PTI) {
ValueMap[*PTI] = Operand;
}
ValueMap[NewBB] = NewBB; // Returns get converted to reference NewBB
// Loop over all of the basic blocks in the method, inlining them as
// appropriate. Keep track of the first basic block of the method...
//
for (Method::BasicBlocksType::const_iterator BI =
CalledMeth->getBasicBlocks().begin();
BI != CalledMeth->getBasicBlocks().end(); BI++) {
const BasicBlock *BB = *BI;
assert(BB->getTerminator() && "BasicBlock doesn't have terminator!?!?");
// Create a new basic block to copy instructions into!
BasicBlock *IBB = new BasicBlock("", NewBB->getParent());
ValueMap[*BI] = IBB; // Add basic block mapping.
// Make sure to capture the mapping that a return will use...
// TODO: This assumes that the RET is returning a value computed in the same
// basic block as the return was issued from!
//
const TerminatorInst *TI = BB->getTerminator();
// Loop over all instructions copying them over...
Instruction *NewInst;
for (BasicBlock::InstListType::const_iterator II = BB->getInstList().begin();
II != (BB->getInstList().end()-1); II++) {
IBB->getInstList().push_back((NewInst = (*II)->clone()));
ValueMap[*II] = NewInst; // Add instruction map to value.
}
// Copy over the terminator now...
switch (TI->getInstType()) {
case Instruction::Ret: {
const ReturnInst *RI = (const ReturnInst*)TI;
if (PHI) { // The PHI node should include this value!
assert(RI->getReturnValue() && "Ret should have value!");
assert(RI->getReturnValue()->getType() == PHI->getType() &&
"Ret value not consistent in method!");
PHI->addIncoming((Value*)RI->getReturnValue());
}
// Add a branch to the code that was after the original Call.
IBB->getInstList().push_back(new BranchInst(NewBB));
break;
}
case Instruction::Br:
IBB->getInstList().push_back(TI->clone());
break;
default:
cerr << "MethodInlining: Don't know how to handle terminator: " << TI;
abort();
}
}
// Copy over the constant pool...
//
const ConstantPool &CP = CalledMeth->getConstantPool();
ConstantPool &NewCP = CurrentMeth->getConstantPool();
for (ConstantPool::plane_const_iterator PI = CP.begin(); PI != CP.end(); ++PI){
ConstantPool::PlaneType &Plane = **PI;
for (ConstantPool::PlaneType::const_iterator I = Plane.begin();
I != Plane.end(); ++I) {
ConstPoolVal *NewVal = (*I)->clone(); // Copy existing constant
NewCP.insert(NewVal); // Insert the new copy into local const pool
ValueMap[*I] = NewVal; // Keep track of constant value mappings
}
}
// Loop over all of the instructions in the method, fixing up operand
// references as we go. This uses ValueMap to do all the hard work.
//
for (Method::BasicBlocksType::const_iterator BI =
CalledMeth->getBasicBlocks().begin();
BI != CalledMeth->getBasicBlocks().end(); BI++) {
const BasicBlock *BB = *BI;
BasicBlock *NBB = (BasicBlock*)ValueMap[BB];
// Loop over all instructions, fixing each one as we find it...
//
for (BasicBlock::InstListType::iterator II = NBB->getInstList().begin();
II != NBB->getInstList().end(); II++)
RemapInstruction(*II, ValueMap);
}
if (PHI) RemapInstruction(PHI, ValueMap); // Fix the PHI node also...
// Change the branch that used to go to NewBB to branch to the first basic
// block of the inlined method.
//
TerminatorInst *Br = OrigBB->getTerminator();
assert(Br && Br->getInstType() == Instruction::Br &&
"splitBasicBlock broken!");
Br->setOperand(0, ValueMap[CalledMeth->getBasicBlocks().front()]);
// Since we are now done with the CallInst, we can finally delete it.
delete CI;
return true;
}
bool InlineMethod(CallInst *CI) {
assert(CI->getParent() && "CallInst not embeded in BasicBlock!");
BasicBlock *PBB = CI->getParent();
BasicBlock::InstListType::iterator CallIt = find(PBB->getInstList().begin(),
PBB->getInstList().end(),
CI);
assert(CallIt != PBB->getInstList().end() &&
"CallInst has parent that doesn't contain CallInst?!?");
return InlineMethod(CallIt);
}
static inline bool ShouldInlineMethod(const CallInst *CI, const Method *M) {
assert(CI->getParent() && CI->getParent()->getParent() &&
"Call not embedded into a method!");
// Don't inline a recursive call.
if (CI->getParent()->getParent() == M) return false;
// Don't inline something too big. This is a really crappy heuristic
if (M->getBasicBlocks().size() > 3) return false;
// Don't inline into something too big. This is a **really** crappy heuristic
if (CI->getParent()->getParent()->getBasicBlocks().size() > 10) return false;
// Go ahead and try just about anything else.
return true;
}
static inline bool DoMethodInlining(BasicBlock *BB) {
for (BasicBlock::InstListType::iterator I = BB->getInstList().begin();
I != BB->getInstList().end(); I++) {
if ((*I)->getInstType() == Instruction::Call) {
// Check to see if we should inline this method
CallInst *CI = (CallInst*)*I;
Method *M = CI->getCalledMethod();
if (ShouldInlineMethod(CI, M))
return InlineMethod(I);
}
}
return false;
}
bool DoMethodInlining(Method *M) {
Method::BasicBlocksType &BBs = M->getBasicBlocks();
bool Changed = false;
// Loop through now and inline instructions a basic block at a time...
for (Method::BasicBlocksType::iterator I = BBs.begin(); I != BBs.end(); )
if (DoMethodInlining(*I)) {
Changed = true;
// Iterator is now invalidated by new basic blocks inserted
I = BBs.begin();
} else {
++I;
}
return Changed;
}

View File

@ -0,0 +1,239 @@
//===- ConstantProp.cpp - Code to perform Constant Propogation ------------===//
//
// This file implements constant propogation and merging:
//
// Specifically, this:
// * Folds multiple identical constants in the constant pool together
// Note that if one is named and the other is not, that the result gets the
// original name.
// * Converts instructions like "add int %1, %2" into a direct def of %3 in
// the constant pool
// * Converts conditional branches on a constant boolean value into direct
// branches.
// * Converts phi nodes with one incoming def to the incoming def directly
// . Converts switch statements with one entry into a test & conditional
// branch
// . Converts switches on constant values into an unconditional branch.
//
// Notice that:
// * This pass has a habit of making definitions be dead. It is a good idea
// to to run a DCE pass sometime after running this pass.
//
//===----------------------------------------------------------------------===//
#include "llvm/Module.h"
#include "llvm/Method.h"
#include "llvm/BasicBlock.h"
#include "llvm/iTerminators.h"
#include "llvm/iOther.h"
#include "llvm/ConstPoolVals.h"
#include "llvm/ConstantPool.h"
#include "llvm/Opt/AllOpts.h"
#include "llvm/Opt/ConstantHandling.h"
// Merge identical constant values in the constant pool.
//
// TODO: We can do better than this simplistic N^2 algorithm...
//
static bool MergeConstantPoolReferences(ConstantPool &CP) {
bool Modified = false;
for (ConstantPool::plane_iterator PI = CP.begin(); PI != CP.end(); ++PI) {
for (ConstantPool::PlaneType::iterator I = (*PI)->begin();
I != (*PI)->end(); I++) {
ConstPoolVal *C = *I;
ConstantPool::PlaneType::iterator J = I;
for (++J; J != (*PI)->end(); J++) {
if (C->equals(*J)) {
Modified = true;
// Okay we know that *I == *J. So now we need to make all uses of *I
// point to *J.
//
C->replaceAllUsesWith(*J);
(*PI)->remove(I); // Remove C from constant pool...
if (C->hasName() && !(*J)->hasName()) // The merged constant inherits
(*J)->setName(C->getName()); // the old name...
delete C; // Delete the constant itself.
break; // Break out of inner for loop
}
}
}
}
return Modified;
}
inline static bool
ConstantFoldUnaryInst(Method *M, Method::inst_iterator &DI,
UnaryOperator *Op, ConstPoolVal *D) {
ConstPoolVal *ReplaceWith = 0;
switch (Op->getInstType()) {
case Instruction::Not: ReplaceWith = !*D; break;
case Instruction::Neg: ReplaceWith = -*D; break;
}
if (!ReplaceWith) return false; // Nothing new to change...
// Add the new value to the constant pool...
M->getConstantPool().insert(ReplaceWith);
// Replaces all of the uses of a variable with uses of the constant.
Op->replaceAllUsesWith(ReplaceWith);
// Remove the operator from the list of definitions...
Op->getParent()->getInstList().remove(DI.getInstructionIterator());
// The new constant inherits the old name of the operator...
if (Op->hasName()) ReplaceWith->setName(Op->getName());
// Delete the operator now...
delete Op;
return true;
}
inline static bool
ConstantFoldBinaryInst(Method *M, Method::inst_iterator &DI,
BinaryOperator *Op,
ConstPoolVal *D1, ConstPoolVal *D2) {
ConstPoolVal *ReplaceWith = 0;
switch (Op->getInstType()) {
case Instruction::Add: ReplaceWith = *D1 + *D2; break;
case Instruction::Sub: ReplaceWith = *D1 - *D2; break;
case Instruction::SetEQ: ReplaceWith = *D1 == *D2; break;
case Instruction::SetNE: ReplaceWith = *D1 != *D2; break;
case Instruction::SetLE: ReplaceWith = *D1 <= *D2; break;
case Instruction::SetGE: ReplaceWith = *D1 >= *D2; break;
case Instruction::SetLT: ReplaceWith = *D1 < *D2; break;
case Instruction::SetGT: ReplaceWith = *D1 > *D2; break;
}
if (!ReplaceWith) return false; // Nothing new to change...
// Add the new value to the constant pool...
M->getConstantPool().insert(ReplaceWith);
// Replaces all of the uses of a variable with uses of the constant.
Op->replaceAllUsesWith(ReplaceWith);
// Remove the operator from the list of definitions...
Op->getParent()->getInstList().remove(DI.getInstructionIterator());
// The new constant inherits the old name of the operator...
if (Op->hasName()) ReplaceWith->setName(Op->getName());
// Delete the operator now...
delete Op;
return true;
}
inline static bool ConstantFoldTerminator(TerminatorInst *T) {
// Branch - See if we are conditional jumping on constant
if (T->getInstType() == Instruction::Br) {
BranchInst *BI = (BranchInst*)T;
if (!BI->isUnconditional() && // Are we branching on constant?
BI->getOperand(2)->getValueType() == Value::ConstantVal) {
// YES. Change to unconditional branch...
ConstPoolBool *Cond = (ConstPoolBool*)BI->getOperand(2);
Value *Destination = BI->getOperand(Cond->getValue() ? 0 : 1);
BI->setOperand(0, Destination); // Set the unconditional destination
BI->setOperand(1, 0); // Clear the conditional destination
BI->setOperand(2, 0); // Clear the condition...
return true;
}
}
return false;
}
// ConstantFoldInstruction - If an instruction references constants, try to fold
// them together...
//
inline static bool
ConstantFoldInstruction(Method *M, Method::inst_iterator &II) {
Instruction *Inst = *II;
if (Inst->isBinaryOp()) {
Value *D1, *D2;
if (((D1 = Inst->getOperand(0))->getValueType() == Value::ConstantVal) &
((D2 = Inst->getOperand(1))->getValueType() == Value::ConstantVal))
return ConstantFoldBinaryInst(M, II, (BinaryOperator*)Inst,
(ConstPoolVal*)D1, (ConstPoolVal*)D2);
} else if (Inst->isUnaryOp()) {
Value *D;
if ((D = Inst->getOperand(0))->getValueType() == Value::ConstantVal)
return ConstantFoldUnaryInst(M, II, (UnaryOperator*)Inst,
(ConstPoolVal*)D);
} else if (Inst->isTerminator()) {
return ConstantFoldTerminator((TerminatorInst*)Inst);
} else if (Inst->getInstType() == Instruction::PHINode) {
PHINode *PN = (PHINode*)Inst; // If it's a PHI node and only has one operand
// Then replace it directly with that operand.
assert(PN->getOperand(0) && "PHI Node must have at least one operand!");
if (PN->getOperand(1) == 0) { // If the PHI Node has exactly 1 operand
Value *V = PN->getOperand(0);
PN->replaceAllUsesWith(V); // Replace all uses of this PHI
// Unlink from basic block
PN->getParent()->getInstList().remove(II.getInstructionIterator());
if (PN->hasName()) V->setName(PN->getName()); // Inherit PHINode name
delete PN; // Finally, delete the node...
return true;
}
}
return false;
}
// DoConstPropPass - Propogate constants and do constant folding on instructions
// this returns true if something was changed, false if nothing was changed.
//
static bool DoConstPropPass(Method *M) {
bool SomethingChanged = false;
#if 1
Method::inst_iterator It = M->inst_begin();
while (It != M->inst_end())
if (ConstantFoldInstruction(M, It)) {
SomethingChanged = true; // If returned true, iter is already incremented
// Incrementing the iterator in an unchecked manner could mess up the
// internals of 'It'. To make sure everything is happy, tell it we might
// have broken it.
It.resyncInstructionIterator();
} else {
++It;
}
#else
Method::BasicBlocksType::iterator BBIt = M->getBasicBlocks().begin();
for (; BBIt != M->getBasicBlocks().end(); BBIt++) {
BasicBlock *BB = *BBIt;
BasicBlock::InstListType::iterator DI = BB->getInstList().begin();
for (; DI != BB->getInstList().end(); DI++)
SomethingChanged |= ConstantFoldInstruction(M, DI);
}
#endif
return SomethingChanged;
}
// returns true on failure, false on success...
//
bool DoConstantPropogation(Method *M) {
bool Modified = false;
// Fold constants until we make no progress...
while (DoConstPropPass(M)) Modified = true;
// Merge identical constants last: this is important because we may have just
// introduced constants that already exist!
//
Modified |= MergeConstantPoolReferences(M->getConstantPool());
return Modified;
}

View File

@ -0,0 +1,193 @@
//===- DCE.cpp - Code to perform dead code elimination --------------------===//
//
// This file implements dead code elimination and basic block merging.
//
// Specifically, this:
// * removes definitions with no uses (including unused constants)
// * removes basic blocks with no predecessors
// * merges a basic block into its predecessor if there is only one and the
// predecessor only has one successor.
//
// TODO: This should REALLY be recursive instead of iterative. Right now, we
// scan linearly through values, removing unused ones as we go. The problem is
// that this may cause other earlier values to become unused. To make sure that
// we get them all, we iterate until things stop changing. Instead, when
// removing a value, recheck all of its operands to see if they are now unused.
// Piece of cake, and more efficient as well.
//
//===----------------------------------------------------------------------===//
#include "llvm/Module.h"
#include "llvm/Method.h"
#include "llvm/BasicBlock.h"
#include "llvm/iTerminators.h"
#include "llvm/Opt/AllOpts.h"
struct ConstPoolDCE {
enum { EndOffs = 0 };
static bool isDCEable(const Value *) { return true; }
};
struct BasicBlockDCE {
enum { EndOffs = 1 };
static bool isDCEable(const Instruction *I) {
return !I->hasSideEffects();
}
};
template<class ValueSubclass, class ItemParentType, class DCEController>
static bool RemoveUnusedDefs(ValueHolder<ValueSubclass, ItemParentType> &Vals,
DCEController DCEControl) {
bool Changed = false;
typedef ValueHolder<ValueSubclass, ItemParentType> Container;
int Offset = DCEController::EndOffs;
for (Container::iterator DI = Vals.begin(); DI != Vals.end()-Offset; ) {
// Look for un"used" definitions...
if ((*DI)->use_empty() && DCEController::isDCEable(*DI)) {
// Bye bye
delete Vals.remove(DI);
Changed = true;
} else {
DI++;
}
}
return Changed;
}
bool DoRemoveUnusedConstants(SymTabValue *S) {
bool Changed = false;
ConstantPool &CP = S->getConstantPool();
for (ConstantPool::plane_iterator PI = CP.begin(); PI != CP.end(); ++PI)
Changed |= RemoveUnusedDefs(**PI, ConstPoolDCE());
return Changed;
}
static void ReplaceUsesWithConstant(Instruction *I) {
// Get the method level constant pool
ConstantPool &CP = I->getParent()->getParent()->getConstantPool();
ConstPoolVal *CPV = 0;
ConstantPool::PlaneType *P;
if (!CP.getPlane(I->getType(), P)) { // Does plane exist?
// Yes, is it empty?
if (!P->empty()) CPV = P->front();
}
if (CPV == 0) { // We don't have an existing constant to reuse. Just add one.
CPV = ConstPoolVal::getNullConstant(I->getType()); // Create a new constant
// Add the new value to the constant pool...
CP.insert(CPV);
}
// Make all users of this instruction reference the constant instead
I->replaceAllUsesWith(CPV);
}
static bool DoDCEPass(Method *M) {
Method::BasicBlocksType::iterator BBIt;
Method::BasicBlocksType &BBs = M->getBasicBlocks();
bool Changed = false;
// Loop through now and remove instructions that have no uses...
for (BBIt = BBs.begin(); BBIt != BBs.end(); BBIt++)
Changed |= RemoveUnusedDefs((*BBIt)->getInstList(), BasicBlockDCE());
// Scan through and remove basic blocks that have no predecessors (except,
// of course, the first one. :) (so skip first block)
//
for (BBIt = BBs.begin(), ++BBIt; BBIt != BBs.end(); BBIt++) {
BasicBlock *BB = *BBIt;
assert(BB->getTerminator() &&
"Degenerate basic block encountered!"); // Empty bb???
if (BB->pred_begin() == BB->pred_end() &&
!BB->hasConstantPoolReferences()) {
while (!BB->getInstList().empty()) {
Instruction *I = BB->getInstList().front();
// If this instruction is used, replace uses with an arbitrary
// constant value. Because control flow can't get here, we don't care
// what we replace the value with.
if (!I->use_empty()) ReplaceUsesWithConstant(I);
// Remove the instruction from the basic block
BasicBlock::InstListType::iterator f = BB->getInstList().begin();
delete BB->getInstList().remove(f);
}
delete BBs.remove(BBIt);
++BBIt; // remove puts use on the previous block, we want the next one
Changed = true;
}
}
// Loop through an merge basic blocks into their predecessor if there is only
// one, and if there is only one successor of the predecessor.
//
for (BBIt = BBs.begin(); BBIt != BBs.end(); BBIt++) {
BasicBlock *BB = *BBIt;
// Is there exactly one predecessor to this block?
BasicBlock::pred_iterator PI(BB->pred_begin());
if (PI != BB->pred_end() && ++PI == BB->pred_end() &&
!BB->hasConstantPoolReferences()) {
BasicBlock *Pred = *BB->pred_begin();
TerminatorInst *Term = Pred->getTerminator();
if (Term == 0) continue; // Err... malformed basic block!
// Is it an unconditional branch?
if (Term->getInstType() != Instruction::Br ||
!((BranchInst*)Term)->isUnconditional())
continue; // Nope, maybe next time...
Changed = true;
// Make all branches to the predecessor now point to the successor...
Pred->replaceAllUsesWith(BB);
// Move all definitions in the predecessor to the successor...
BasicBlock::InstListType::iterator DI = Pred->getInstList().end();
assert(Pred->getTerminator() &&
"Degenerate basic block encountered!"); // Empty bb???
delete Pred->getInstList().remove(--DI); // Remove terminator
while (Pred->getInstList().begin() != (DI = Pred->getInstList().end())) {
Instruction *Def = Pred->getInstList().remove(--DI); // Remove from end
BB->getInstList().push_front(Def); // Add to front...
}
// Remove basic block from the method...
BBs.remove(Pred);
// Always inherit predecessors name if it exists...
if (Pred->hasName()) BB->setName(Pred->getName());
// So long you waste of a basic block you...
delete Pred;
}
}
// Remove unused constants
Changed |= DoRemoveUnusedConstants(M);
return Changed;
}
// It is possible that we may require multiple passes over the code to fully
// eliminate dead code. Iterate until we are done.
//
bool DoDeadCodeElimination(Method *M) {
bool Changed = false;
while (DoDCEPass(M)) Changed = true;
return Changed;
}
bool DoDeadCodeElimination(Module *C) {
bool Val = ApplyOptToAllMethods(C, DoDeadCodeElimination);
while (DoRemoveUnusedConstants(C)) Val = true;
return Val;
}

View File

@ -0,0 +1,55 @@
//===- SymbolStripping.cpp - Code to string symbols for methods and modules -=//
//
// This file implements stripping symbols out of symbol tables.
//
// Specifically, this allows you to strip all of the symbols out of:
// * A method
// * All methods in a module
// * All symbols in a module (all method symbols + all module scope symbols)
//
// Notice that:
// * This pass makes code much less readable, so it should only be used in
// situations where the 'strip' utility would be used (such as reducing
// code size, and making it harder to reverse engineer code).
//
//===----------------------------------------------------------------------===//
#include "llvm/Module.h"
#include "llvm/Method.h"
#include "llvm/SymbolTable.h"
#include "llvm/Opt/AllOpts.h"
static bool StripSymbolTable(SymbolTable *SymTab) {
if (SymTab == 0) return false; // No symbol table? No problem.
bool RemovedSymbol = false;
for (SymbolTable::iterator I = SymTab->begin(); I != SymTab->end(); I++) {
map<const string, Value *> &Plane = I->second;
map<const string, Value *>::iterator B;
while ((B = Plane.begin()) != Plane.end()) { // Found nonempty type plane!
B->second->setName(""); // Set name to "", removing from symbol table!
RemovedSymbol = true;
assert(Plane.begin() != B);
}
}
return RemovedSymbol;
}
// DoSymbolStripping - Remove all symbolic information from a method
//
bool DoSymbolStripping(Method *M) {
return StripSymbolTable(M->getSymbolTable());
}
// DoFullSymbolStripping - Remove all symbolic information from all methods
// in a module, and all module level symbols. (method names, etc...)
//
bool DoFullSymbolStripping(Module *M) {
// Remove all symbols from methods in this module... and then strip all of the
// symbols in this module...
//
return DoSymbolStripping(M) | StripSymbolTable(M->getSymbolTable());
}

328
lib/VMCore/AsmWriter.cpp Normal file
View File

@ -0,0 +1,328 @@
//===-- Writer.cpp - Library for Printing VM assembly files ------*- C++ -*--=//
//
// This library implements the functionality defined in llvm/Assembly/Writer.h
//
// This library uses the Analysis library to figure out offsets for
// variables in the method tables...
//
// TODO: print out the type name instead of the full type if a particular type
// is in the symbol table...
//
//===----------------------------------------------------------------------===//
#include "llvm/Assembly/Writer.h"
#include "llvm/Analysis/SlotCalculator.h"
#include "llvm/Module.h"
#include "llvm/Method.h"
#include "llvm/BasicBlock.h"
#include "llvm/ConstPoolVals.h"
#include "llvm/iOther.h"
#include "llvm/iMemory.h"
class AssemblyWriter : public ModuleAnalyzer {
ostream &Out;
SlotCalculator &Table;
public:
inline AssemblyWriter(ostream &o, SlotCalculator &Tab) : Out(o), Table(Tab) {
}
inline void write(const Module *M) { processModule(M); }
inline void write(const Method *M) { processMethod(M); }
inline void write(const BasicBlock *BB) { processBasicBlock(BB); }
inline void write(const Instruction *I) { processInstruction(I); }
inline void write(const ConstPoolVal *CPV) { processConstant(CPV); }
protected:
virtual bool visitMethod(const Method *M);
virtual bool processConstPool(const ConstantPool &CP, bool isMethod);
virtual bool processConstant(const ConstPoolVal *CPV);
virtual bool processMethod(const Method *M);
virtual bool processMethodArgument(const MethodArgument *MA);
virtual bool processBasicBlock(const BasicBlock *BB);
virtual bool processInstruction(const Instruction *I);
private :
void writeOperand(const Value *Op, bool PrintType, bool PrintName = true);
};
// visitMethod - This member is called after the above two steps, visting each
// method, because they are effectively values that go into the constant pool.
//
bool AssemblyWriter::visitMethod(const Method *M) {
return false;
}
bool AssemblyWriter::processConstPool(const ConstantPool &CP, bool isMethod) {
// Done printing arguments...
if (isMethod) Out << ")\n";
ModuleAnalyzer::processConstPool(CP, isMethod);
if (isMethod)
Out << "begin";
else
Out << "implementation\n";
return false;
}
// processConstant - Print out a constant pool entry...
//
bool AssemblyWriter::processConstant(const ConstPoolVal *CPV) {
Out << "\t";
// Print out name if it exists...
if (CPV->hasName())
Out << "%" << CPV->getName() << " = ";
// Print out the opcode...
Out << CPV->getType();
// Write the value out now...
writeOperand(CPV, false, false);
if (!CPV->hasName() && CPV->getType() != Type::VoidTy) {
int Slot = Table.getValSlot(CPV); // Print out the def slot taken...
Out << "\t\t; <" << CPV->getType() << ">:";
if (Slot >= 0) Out << Slot;
else Out << "<badref>";
}
Out << endl;
return false;
}
// processMethod - Process all aspects of a method.
//
bool AssemblyWriter::processMethod(const Method *M) {
// Print out the return type and name...
Out << "\n" << M->getReturnType() << " \"" << M->getName() << "\"(";
Table.incorporateMethod(M);
ModuleAnalyzer::processMethod(M);
Table.purgeMethod();
Out << "end\n";
return false;
}
// processMethodArgument - This member is called for every argument that
// is passed into the method. Simply print it out
//
bool AssemblyWriter::processMethodArgument(const MethodArgument *Arg) {
// Insert commas as we go... the first arg doesn't get a comma
if (Arg != Arg->getParent()->getArgumentList().front()) Out << ", ";
// Output type...
Out << Arg->getType();
// Output name, if available...
if (Arg->hasName())
Out << " %" << Arg->getName();
else if (Table.getValSlot(Arg) < 0)
Out << "<badref>";
return false;
}
// processBasicBlock - This member is called for each basic block in a methd.
//
bool AssemblyWriter::processBasicBlock(const BasicBlock *BB) {
if (BB->hasName()) { // Print out the label if it exists...
Out << "\n" << BB->getName() << ":\n";
} else {
int Slot = Table.getValSlot(BB);
Out << "\t\t\t\t; <label>:";
if (Slot >= 0)
Out << Slot << endl; // Extra newline seperates out label's
else
Out << "<badref>\n";
}
ModuleAnalyzer::processBasicBlock(BB);
return false;
}
// processInstruction - This member is called for each Instruction in a methd.
//
bool AssemblyWriter::processInstruction(const Instruction *I) {
Out << "\t";
// Print out name if it exists...
if (I && I->hasName())
Out << "%" << I->getName() << " = ";
// Print out the opcode...
Out << I->getOpcode();
// Print out the type of the operands...
const Value *Operand = I->getOperand(0);
// Special case conditional branches to swizzle the condition out to the front
if (I->getInstType() == Instruction::Br && I->getOperand(1)) {
writeOperand(I->getOperand(2), true);
Out << ",";
writeOperand(Operand, true);
Out << ",";
writeOperand(I->getOperand(1), true);
} else if (I->getInstType() == Instruction::Switch) {
// Special case switch statement to get formatting nice and correct...
writeOperand(Operand , true); Out << ",";
writeOperand(I->getOperand(1), true); Out << " [";
for (unsigned op = 2; (Operand = I->getOperand(op)); op += 2) {
Out << "\n\t\t";
writeOperand(Operand, true); Out << ",";
writeOperand(I->getOperand(op+1), true);
}
Out << "\n\t]";
} else if (I->getInstType() == Instruction::Ret && !Operand) {
Out << " void";
} else if (I->getInstType() == Instruction::Call) {
writeOperand(Operand, true);
Out << "(";
Operand = I->getOperand(1);
if (Operand) writeOperand(Operand, true);
for (unsigned op = 2; (Operand = I->getOperand(op)); ++op) {
Out << ",";
writeOperand(Operand, true);
}
Out << " )";
} else if (I->getInstType() == Instruction::Malloc ||
I->getInstType() == Instruction::Alloca) {
Out << " " << ((const PointerType*)((ConstPoolType*)Operand)
->getValue())->getValueType();
if ((Operand = I->getOperand(1))) {
Out << ","; writeOperand(Operand, true);
}
} else if (Operand) { // Print the normal way...
// PrintAllTypes - Instructions who have operands of all the same type
// omit the type from all but the first operand. If the instruction has
// different type operands (for example br), then they are all printed.
bool PrintAllTypes = false;
const Type *TheType = Operand->getType();
unsigned i;
for (i = 1; (Operand = I->getOperand(i)); i++) {
if (Operand->getType() != TheType) {
PrintAllTypes = true; // We have differing types! Print them all!
break;
}
}
if (!PrintAllTypes)
Out << " " << I->getOperand(0)->getType();
for (unsigned i = 0; (Operand = I->getOperand(i)); i++) {
if (i) Out << ",";
writeOperand(Operand, PrintAllTypes);
}
}
// Print a little comment after the instruction indicating which slot it
// occupies.
//
if (!I->hasName() && I->getType() != Type::VoidTy) {
int Slot = Table.getValSlot(I); // Print out the def slot taken...
Out << "\t\t; <" << I->getType() << ">:";
if (Slot >= 0) Out << Slot;
else Out << "<badref>";
Out << "\t[#uses=" << I->use_size() << "]"; // Output # uses
}
Out << endl;
return false;
}
void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType,
bool PrintName) {
if (PrintType)
Out << " " << Operand->getType();
if (Operand->hasName() && PrintName) {
Out << " %" << Operand->getName();
} else {
int Slot = Table.getValSlot(Operand);
if (Operand->getValueType() == Value::ConstantVal) {
Out << " " << ((ConstPoolVal*)Operand)->getStrValue();
} else {
if (Slot >= 0) Out << " %" << Slot;
else if (PrintName)
Out << "<badref>"; // Not embeded into a location?
}
}
}
//===----------------------------------------------------------------------===//
// External Interface declarations
//===----------------------------------------------------------------------===//
void WriteToAssembly(const Module *M, ostream &o) {
if (M == 0) { o << "<null> module\n"; return; }
SlotCalculator SlotTable(M, true);
AssemblyWriter W(o, SlotTable);
W.write(M);
}
void WriteToAssembly(const Method *M, ostream &o) {
if (M == 0) { o << "<null> method\n"; return; }
SlotCalculator SlotTable(M->getParent(), true);
AssemblyWriter W(o, SlotTable);
W.write(M);
}
void WriteToAssembly(const BasicBlock *BB, ostream &o) {
if (BB == 0) { o << "<null> basic block\n"; return; }
SlotCalculator SlotTable(BB->getParent(), true);
AssemblyWriter W(o, SlotTable);
W.write(BB);
}
void WriteToAssembly(const ConstPoolVal *CPV, ostream &o) {
if (CPV == 0) { o << "<null> constant pool value\n"; return; }
SlotCalculator *SlotTable;
// A Constant pool value may have a parent that is either a method or a
// module. Untangle this now...
//
if (CPV->getParent() == 0 ||
CPV->getParent()->getValueType() == Value::MethodVal) {
SlotTable = new SlotCalculator((Method*)CPV->getParent(), true);
} else {
assert(CPV->getParent()->getValueType() == Value::ModuleVal);
SlotTable = new SlotCalculator((Module*)CPV->getParent(), true);
}
AssemblyWriter W(o, *SlotTable);
W.write(CPV);
delete SlotTable;
}
void WriteToAssembly(const Instruction *I, ostream &o) {
if (I == 0) { o << "<null> instruction\n"; return; }
SlotCalculator SlotTable(I->getParent() ? I->getParent()->getParent() : 0,
true);
AssemblyWriter W(o, SlotTable);
W.write(I);
}

113
lib/VMCore/BasicBlock.cpp Normal file
View File

@ -0,0 +1,113 @@
//===-- BasicBlock.cpp - Implement BasicBlock related functions --*- C++ -*--=//
//
// This file implements the Method class for the VMCore library.
//
//===----------------------------------------------------------------------===//
#include "llvm/ValueHolderImpl.h"
#include "llvm/BasicBlock.h"
#include "llvm/iTerminators.h"
#include "llvm/Module.h"
#include "llvm/Method.h"
#include "llvm/SymbolTable.h"
#include "llvm/Type.h"
// Instantiate Templates - This ugliness is the price we have to pay
// for having a ValueHolderImpl.h file seperate from ValueHolder.h! :(
//
template class ValueHolder<Instruction, BasicBlock>;
BasicBlock::BasicBlock(const string &name, Method *parent)
: Value(Type::LabelTy, Value::BasicBlockVal, name), InstList(this, 0) {
if (parent)
parent->getBasicBlocks().push_back(this);
}
BasicBlock::~BasicBlock() {
dropAllReferences();
InstList.delete_all();
}
// Specialize setName to take care of symbol table majik
void BasicBlock::setName(const string &name) {
Method *P;
if ((P = getParent()) && hasName()) P->getSymbolTable()->remove(this);
Value::setName(name);
if (P && hasName()) P->getSymbolTable()->insert(this);
}
void BasicBlock::setParent(Method *parent) {
if (getParent() && hasName())
getParent()->getSymbolTable()->remove(this);
InstList.setParent(parent);
if (getParent() && hasName())
getParent()->getSymbolTableSure()->insert(this);
}
TerminatorInst *BasicBlock::getTerminator() {
if (InstList.empty()) return 0;
Instruction *T = InstList.back();
if (T->isTerminator()) return (TerminatorInst*)T;
return 0;
}
const TerminatorInst *const BasicBlock::getTerminator() const {
if (InstList.empty()) return 0;
const Instruction *T = InstList.back();
if (T->isTerminator()) return (TerminatorInst*)T;
return 0;
}
void BasicBlock::dropAllReferences() {
for_each(InstList.begin(), InstList.end(),
std::mem_fun(&Instruction::dropAllReferences));
}
// hasConstantPoolReferences() - This predicate is true if there is a
// reference to this basic block in the constant pool for this method. For
// example, if a block is reached through a switch table, that table resides
// in the constant pool, and the basic block is reference from it.
//
bool BasicBlock::hasConstantPoolReferences() const {
for (use_const_iterator I = use_begin(), E = use_end(); I != E; ++I)
if ((*I)->getValueType() == ConstantVal)
return true;
return false;
}
// splitBasicBlock - This splits a basic block into two at the specified
// instruction. Note that all instructions BEFORE the specified iterator stay
// as part of the original basic block, an unconditional branch is added to
// the new BB, and the rest of the instructions in the BB are moved to the new
// BB, including the old terminator. This invalidates the iterator.
//
// Note that this only works on well formed basic blocks (must have a
// terminator), and 'I' must not be the end of instruction list (which would
// cause a degenerate basic block to be formed, having a terminator inside of
// the basic block).
//
BasicBlock *BasicBlock::splitBasicBlock(InstListType::iterator I) {
assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!");
assert(I != InstList.end() &&
"Trying to get me to create degenerate basic block!");
BasicBlock *New = new BasicBlock("", getParent());
// Go from the end of the basic block through to the iterator pointer, moving
// to the new basic block...
Instruction *Inst = 0;
do {
InstListType::iterator EndIt = InstList.end();
Inst = InstList.remove(--EndIt); // Remove from end
New->InstList.push_front(Inst); // Add to front
} while (Inst != *I); // Loop until we move the specified instruction.
// Add a branch instruction to the newly formed basic block.
InstList.push_back(new BranchInst(New));
return New;
}

197
lib/VMCore/ConstantFold.cpp Normal file
View File

@ -0,0 +1,197 @@
//===- ConstantHandling.cpp - Implement ConstantHandling.h ----------------===//
//
// This file implements the various intrinsic operations, on constant values.
//
//===----------------------------------------------------------------------===//
#include "llvm/Opt/ConstantHandling.h"
//===----------------------------------------------------------------------===//
// TemplateRules Class
//===----------------------------------------------------------------------===//
//
// TemplateRules - Implement a subclass of ConstRules that provides all
// operations as noops. All other rules classes inherit from this class so
// that if functionality is needed in the future, it can simply be added here
// and to ConstRules without changing anything else...
//
// This class also provides subclasses with typesafe implementations of methods
// so that don't have to do type casting.
//
template<class ArgType, class SubClassName>
class TemplateRules : public ConstRules {
//===--------------------------------------------------------------------===//
// Redirecting functions that cast to the appropriate types
//===--------------------------------------------------------------------===//
virtual ConstPoolVal *neg(const ConstPoolVal *V) const {
return SubClassName::Neg((const ArgType *)V);
}
virtual ConstPoolVal *not(const ConstPoolVal *V) const {
return SubClassName::Not((const ArgType *)V);
}
virtual ConstPoolVal *add(const ConstPoolVal *V1,
const ConstPoolVal *V2) const {
return SubClassName::Add((const ArgType *)V1, (const ArgType *)V2);
}
virtual ConstPoolVal *sub(const ConstPoolVal *V1,
const ConstPoolVal *V2) const {
return SubClassName::Sub((const ArgType *)V1, (const ArgType *)V2);
}
virtual ConstPoolBool *lessthan(const ConstPoolVal *V1,
const ConstPoolVal *V2) const {
return SubClassName::LessThan((const ArgType *)V1, (const ArgType *)V2);
}
//===--------------------------------------------------------------------===//
// Default "noop" implementations
//===--------------------------------------------------------------------===//
inline static ConstPoolVal *Neg(const ArgType *V) { return 0; }
inline static ConstPoolVal *Not(const ArgType *V) { return 0; }
inline static ConstPoolVal *Add(const ArgType *V1, const ArgType *V2) {
return 0;
}
inline static ConstPoolVal *Sub(const ArgType *V1, const ArgType *V2) {
return 0;
}
inline static ConstPoolBool *LessThan(const ArgType *V1, const ArgType *V2) {
return 0;
}
};
//===----------------------------------------------------------------------===//
// EmptyRules Class
//===----------------------------------------------------------------------===//
//
// EmptyRules provides a concrete base class of ConstRules that does nothing
//
static // EmptyInst is static
struct EmptyRules : public TemplateRules<ConstPoolVal, EmptyRules> {
} EmptyInst;
//===----------------------------------------------------------------------===//
// BoolRules Class
//===----------------------------------------------------------------------===//
//
// BoolRules provides a concrete base class of ConstRules for the 'bool' type.
//
static // BoolTyInst is static...
struct BoolRules : public TemplateRules<ConstPoolBool, BoolRules> {
inline static ConstPoolVal *Not(const ConstPoolBool *V) {
return new ConstPoolBool(!V->getValue());
}
inline static ConstPoolVal *Or(const ConstPoolBool *V1,
const ConstPoolBool *V2) {
bool Result = V1->getValue() | V2->getValue();
return new ConstPoolBool(Result);
}
inline static ConstPoolVal *And(const ConstPoolBool *V1,
const ConstPoolBool *V2) {
bool Result = V1->getValue() & V2->getValue();
return new ConstPoolBool(Result);
}
} BoolTyInst;
//===----------------------------------------------------------------------===//
// DirectRules Class
//===----------------------------------------------------------------------===//
//
// DirectRules provides a concrete base classes of ConstRules for a variety of
// different types. This allows the C++ compiler to automatically generate our
// constant handling operations in a typesafe and accurate manner.
//
template<class ConstPoolClass, class BuiltinType, const Type **Ty>
struct DirectRules
: public TemplateRules<ConstPoolClass,
DirectRules<ConstPoolClass, BuiltinType, Ty> > {
inline static ConstPoolVal *Neg(const ConstPoolClass *V) {
return new ConstPoolClass(*Ty, -(BuiltinType)V->getValue());;
}
inline static ConstPoolVal *Not(const ConstPoolClass *V) {
return new ConstPoolClass(*Ty, !(BuiltinType)V->getValue());;
}
inline static ConstPoolVal *Add(const ConstPoolClass *V1,
const ConstPoolClass *V2) {
BuiltinType Result = (BuiltinType)V1->getValue() +
(BuiltinType)V2->getValue();
return new ConstPoolClass(*Ty, Result);
}
inline static ConstPoolVal *Sub(const ConstPoolClass *V1,
const ConstPoolClass *V2) {
BuiltinType Result = (BuiltinType)V1->getValue() -
(BuiltinType)V2->getValue();
return new ConstPoolClass(*Ty, Result);
}
inline static ConstPoolBool *LessThan(const ConstPoolClass *V1,
const ConstPoolClass *V2) {
bool Result = (BuiltinType)V1->getValue() < (BuiltinType)V2->getValue();
return new ConstPoolBool(Result);
}
};
//===----------------------------------------------------------------------===//
// DirectRules Subclasses
//===----------------------------------------------------------------------===//
//
// Given the DirectRules class we can now implement lots of types with little
// code. Thank goodness C++ compilers are great at stomping out layers of
// templates... can you imagine having to do this all by hand? (/me is lazy :)
//
static DirectRules<ConstPoolSInt, signed char , &Type::SByteTy> SByteTyInst;
static DirectRules<ConstPoolUInt, unsigned char , &Type::UByteTy> UByteTyInst;
static DirectRules<ConstPoolSInt, signed short, &Type::ShortTy> ShortTyInst;
static DirectRules<ConstPoolUInt, unsigned short, &Type::UShortTy> UShortTyInst;
static DirectRules<ConstPoolSInt, signed int , &Type::IntTy> IntTyInst;
static DirectRules<ConstPoolUInt, unsigned int , &Type::UIntTy> UIntTyInst;
static DirectRules<ConstPoolSInt, int64_t , &Type::LongTy> LongTyInst;
static DirectRules<ConstPoolUInt, uint64_t , &Type::ULongTy> ULongTyInst;
static DirectRules<ConstPoolFP , float , &Type::FloatTy> FloatTyInst;
static DirectRules<ConstPoolFP , double , &Type::DoubleTy> DoubleTyInst;
// ConstRules::find - Return the constant rules that take care of the specified
// type. Note that this is cached in the Type value itself, so switch statement
// is only hit at most once per type.
//
const ConstRules *ConstRules::find(const Type *Ty) {
const ConstRules *Result;
switch (Ty->getPrimitiveID()) {
case Type::BoolTyID: Result = &BoolTyInst; break;
case Type::SByteTyID: Result = &SByteTyInst; break;
case Type::UByteTyID: Result = &UByteTyInst; break;
case Type::ShortTyID: Result = &ShortTyInst; break;
case Type::UShortTyID: Result = &UShortTyInst; break;
case Type::IntTyID: Result = &IntTyInst; break;
case Type::UIntTyID: Result = &UIntTyInst; break;
case Type::LongTyID: Result = &LongTyInst; break;
case Type::ULongTyID: Result = &ULongTyInst; break;
case Type::FloatTyID: Result = &FloatTyInst; break;
case Type::DoubleTyID: Result = &DoubleTyInst; break;
default: Result = &EmptyInst; break;
}
Ty->setConstRules(Result); // Cache the value for future short circuiting!
return Result;
}

145
lib/VMCore/ConstantFold.h Normal file
View File

@ -0,0 +1,145 @@
//===-- ConstantHandling.h - Stuff for manipulating constants ----*- C++ -*--=//
//
// This file contains the declarations of some cool operators that allow you
// to do natural things with constant pool values.
//
// Unfortunately we can't overload operators on pointer types (like this:)
//
// inline bool operator==(const ConstPoolVal *V1, const ConstPoolVal *V2)
//
// so we must make due with references, even though it leads to some butt ugly
// looking code downstream. *sigh* (ex: ConstPoolVal *Result = *V1 + *v2; )
//
//===----------------------------------------------------------------------===//
//
// WARNING: These operators return pointers to newly 'new'd objects. You MUST
// make sure to free them if you don't want them hanging around. Also,
// note that these may return a null object if I don't know how to
// perform those operations on the specified constant types.
//
//===----------------------------------------------------------------------===//
//
// Implementation notes:
// This library is implemented this way for a reason: In most cases, we do
// not want to have to link the constant mucking code into an executable.
// We do, however want to tie some of this into the main type system, as an
// optional component. By using a mutable cache member in the Type class, we
// get exactly the kind of behavior we want.
//
// In the end, we get performance almost exactly the same as having a virtual
// function dispatch, but we don't have to put our virtual functions into the
// "Type" class, and we can implement functionality with templates. Good deal.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_OPT_CONSTANTHANDLING_H
#define LLVM_OPT_CONSTANTHANDLING_H
#include "llvm/ConstPoolVals.h"
#include "llvm/Type.h"
//===----------------------------------------------------------------------===//
// Implement == directly...
//===----------------------------------------------------------------------===//
inline ConstPoolBool *operator==(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
assert(V1.getType() == V2.getType() && "Constant types must be identical!");
return new ConstPoolBool(V1.equals(&V2));
}
//===----------------------------------------------------------------------===//
// Implement all other operators indirectly through TypeRules system
//===----------------------------------------------------------------------===//
class ConstRules {
protected:
inline ConstRules() {} // Can only be subclassed...
public:
// Unary Operators...
virtual ConstPoolVal *neg(const ConstPoolVal *V) const = 0;
virtual ConstPoolVal *not(const ConstPoolVal *V) const = 0;
// Binary Operators...
virtual ConstPoolVal *add(const ConstPoolVal *V1,
const ConstPoolVal *V2) const = 0;
virtual ConstPoolVal *sub(const ConstPoolVal *V1,
const ConstPoolVal *V2) const = 0;
virtual ConstPoolBool *lessthan(const ConstPoolVal *V1,
const ConstPoolVal *V2) const = 0;
// ConstRules::get - A type will cache its own type rules if one is needed...
// we just want to make sure to hit the cache instead of doing it indirectly,
// if possible...
//
static inline const ConstRules *get(const ConstPoolVal &V) {
const ConstRules *Result = V.getType()->getConstRules();
return Result ? Result : find(V.getType());
}
private :
static const ConstRules *find(const Type *Ty);
ConstRules(const ConstRules &); // Do not implement
ConstRules &operator=(const ConstRules &); // Do not implement
};
inline ConstPoolVal *operator-(const ConstPoolVal &V) {
return ConstRules::get(V)->neg(&V);
}
inline ConstPoolVal *operator!(const ConstPoolVal &V) {
return ConstRules::get(V)->not(&V);
}
inline ConstPoolVal *operator+(const ConstPoolVal &V1, const ConstPoolVal &V2) {
assert(V1.getType() == V2.getType() && "Constant types must be identical!");
return ConstRules::get(V1)->add(&V1, &V2);
}
inline ConstPoolVal *operator-(const ConstPoolVal &V1, const ConstPoolVal &V2) {
assert(V1.getType() == V2.getType() && "Constant types must be identical!");
return ConstRules::get(V1)->sub(&V1, &V2);
}
inline ConstPoolBool *operator<(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
assert(V1.getType() == V2.getType() && "Constant types must be identical!");
return ConstRules::get(V1)->lessthan(&V1, &V2);
}
//===----------------------------------------------------------------------===//
// Implement 'derived' operators based on what we already have...
//===----------------------------------------------------------------------===//
inline ConstPoolBool *operator>(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
return V2 < V1;
}
inline ConstPoolBool *operator!=(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
ConstPoolBool *Result = V1 == V2;
Result->setValue(!Result->getValue()); // Invert value
return Result; // !(V1 == V2)
}
inline ConstPoolBool *operator>=(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
ConstPoolBool *Result = V1 < V2;
Result->setValue(!Result->getValue()); // Invert value
return Result; // !(V1 < V2)
}
inline ConstPoolBool *operator<=(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
ConstPoolBool *Result = V1 > V2;
Result->setValue(!Result->getValue()); // Invert value
return Result; // !(V1 > V2)
}
#endif

View File

@ -0,0 +1,145 @@
//===-- ConstantHandling.h - Stuff for manipulating constants ----*- C++ -*--=//
//
// This file contains the declarations of some cool operators that allow you
// to do natural things with constant pool values.
//
// Unfortunately we can't overload operators on pointer types (like this:)
//
// inline bool operator==(const ConstPoolVal *V1, const ConstPoolVal *V2)
//
// so we must make due with references, even though it leads to some butt ugly
// looking code downstream. *sigh* (ex: ConstPoolVal *Result = *V1 + *v2; )
//
//===----------------------------------------------------------------------===//
//
// WARNING: These operators return pointers to newly 'new'd objects. You MUST
// make sure to free them if you don't want them hanging around. Also,
// note that these may return a null object if I don't know how to
// perform those operations on the specified constant types.
//
//===----------------------------------------------------------------------===//
//
// Implementation notes:
// This library is implemented this way for a reason: In most cases, we do
// not want to have to link the constant mucking code into an executable.
// We do, however want to tie some of this into the main type system, as an
// optional component. By using a mutable cache member in the Type class, we
// get exactly the kind of behavior we want.
//
// In the end, we get performance almost exactly the same as having a virtual
// function dispatch, but we don't have to put our virtual functions into the
// "Type" class, and we can implement functionality with templates. Good deal.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_OPT_CONSTANTHANDLING_H
#define LLVM_OPT_CONSTANTHANDLING_H
#include "llvm/ConstPoolVals.h"
#include "llvm/Type.h"
//===----------------------------------------------------------------------===//
// Implement == directly...
//===----------------------------------------------------------------------===//
inline ConstPoolBool *operator==(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
assert(V1.getType() == V2.getType() && "Constant types must be identical!");
return new ConstPoolBool(V1.equals(&V2));
}
//===----------------------------------------------------------------------===//
// Implement all other operators indirectly through TypeRules system
//===----------------------------------------------------------------------===//
class ConstRules {
protected:
inline ConstRules() {} // Can only be subclassed...
public:
// Unary Operators...
virtual ConstPoolVal *neg(const ConstPoolVal *V) const = 0;
virtual ConstPoolVal *not(const ConstPoolVal *V) const = 0;
// Binary Operators...
virtual ConstPoolVal *add(const ConstPoolVal *V1,
const ConstPoolVal *V2) const = 0;
virtual ConstPoolVal *sub(const ConstPoolVal *V1,
const ConstPoolVal *V2) const = 0;
virtual ConstPoolBool *lessthan(const ConstPoolVal *V1,
const ConstPoolVal *V2) const = 0;
// ConstRules::get - A type will cache its own type rules if one is needed...
// we just want to make sure to hit the cache instead of doing it indirectly,
// if possible...
//
static inline const ConstRules *get(const ConstPoolVal &V) {
const ConstRules *Result = V.getType()->getConstRules();
return Result ? Result : find(V.getType());
}
private :
static const ConstRules *find(const Type *Ty);
ConstRules(const ConstRules &); // Do not implement
ConstRules &operator=(const ConstRules &); // Do not implement
};
inline ConstPoolVal *operator-(const ConstPoolVal &V) {
return ConstRules::get(V)->neg(&V);
}
inline ConstPoolVal *operator!(const ConstPoolVal &V) {
return ConstRules::get(V)->not(&V);
}
inline ConstPoolVal *operator+(const ConstPoolVal &V1, const ConstPoolVal &V2) {
assert(V1.getType() == V2.getType() && "Constant types must be identical!");
return ConstRules::get(V1)->add(&V1, &V2);
}
inline ConstPoolVal *operator-(const ConstPoolVal &V1, const ConstPoolVal &V2) {
assert(V1.getType() == V2.getType() && "Constant types must be identical!");
return ConstRules::get(V1)->sub(&V1, &V2);
}
inline ConstPoolBool *operator<(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
assert(V1.getType() == V2.getType() && "Constant types must be identical!");
return ConstRules::get(V1)->lessthan(&V1, &V2);
}
//===----------------------------------------------------------------------===//
// Implement 'derived' operators based on what we already have...
//===----------------------------------------------------------------------===//
inline ConstPoolBool *operator>(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
return V2 < V1;
}
inline ConstPoolBool *operator!=(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
ConstPoolBool *Result = V1 == V2;
Result->setValue(!Result->getValue()); // Invert value
return Result; // !(V1 == V2)
}
inline ConstPoolBool *operator>=(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
ConstPoolBool *Result = V1 < V2;
Result->setValue(!Result->getValue()); // Invert value
return Result; // !(V1 < V2)
}
inline ConstPoolBool *operator<=(const ConstPoolVal &V1,
const ConstPoolVal &V2) {
ConstPoolBool *Result = V1 > V2;
Result->setValue(!Result->getValue()); // Invert value
return Result; // !(V1 > V2)
}
#endif

434
lib/VMCore/ConstantPool.cpp Normal file
View File

@ -0,0 +1,434 @@
//===-- iConstPool.cpp - Implement ConstPool instructions --------*- C++ -*--=//
//
// This file implements the ConstPool* classes...
//
//===----------------------------------------------------------------------===//
#define __STDC_LIMIT_MACROS // Get defs for INT64_MAX and friends...
#include "llvm/ConstPoolVals.h"
#include "llvm/ConstantPool.h"
#include "llvm/Tools/StringExtras.h" // itostr
#include "llvm/DerivedTypes.h"
#include "llvm/SymbolTable.h"
#include <algorithm>
#include <assert.h>
//===----------------------------------------------------------------------===//
// ConstantPool Class
//===----------------------------------------------------------------------===//
void ConstantPool::setParent(SymTabValue *STV) {
Parent = STV;
for (unsigned i = 0; i < Planes.size(); i++)
Planes[i]->setParent(Parent);
}
// Constant getPlane - Returns true if the type plane does not exist, otherwise
// updates the pointer to point to the correct plane.
//
bool ConstantPool::getPlane(const Type *T, const PlaneType *&Plane) const {
unsigned Ty = T->getUniqueID();
if (Ty >= Planes.size()) return true;
Plane = Planes[Ty];
return false;
}
// Constant getPlane - Returns true if the type plane does not exist, otherwise
// updates the pointer to point to the correct plane.
//
bool ConstantPool::getPlane(const Type *T, PlaneType *&Plane) {
unsigned Ty = T->getUniqueID();
if (Ty >= Planes.size()) return true;
Plane = Planes[Ty];
return false;
}
void ConstantPool::resize(unsigned size) {
unsigned oldSize = Planes.size();
Planes.resize(size, 0);
while (oldSize < size)
Planes[oldSize++] = new PlaneType(Parent, Parent);
}
ConstantPool::PlaneType &ConstantPool::getPlane(const Type *T) {
unsigned Ty = T->getUniqueID();
if (Ty >= Planes.size()) resize(Ty+1);
return *Planes[Ty];
}
// insert - Add constant into the symbol table...
void ConstantPool::insert(ConstPoolVal *N) {
unsigned Ty = N->getType()->getUniqueID();
if (Ty >= Planes.size()) resize(Ty+1);
Planes[Ty]->push_back(N);
}
bool ConstantPool::remove(ConstPoolVal *N) {
unsigned Ty = N->getType()->getUniqueID();
if (Ty >= Planes.size()) return true; // Doesn't contain any of that type
PlaneType::iterator I = ::find(Planes[Ty]->begin(), Planes[Ty]->end(), N);
if (I == Planes[Ty]->end()) return true;
Planes[Ty]->remove(I);
return false;
}
void ConstantPool::delete_all() {
dropAllReferences();
for (unsigned i = 0; i < Planes.size(); i++) {
Planes[i]->delete_all();
Planes[i]->setParent(0);
delete Planes[i];
}
Planes.clear();
}
void ConstantPool::dropAllReferences() {
for (unsigned i = 0; i < Planes.size(); i++)
for (PlaneType::iterator I = Planes[i]->begin();
I != Planes[i]->end(); I++)
(*I)->dropAllReferences();
}
struct EqualsConstant {
const ConstPoolVal *v;
inline EqualsConstant(const ConstPoolVal *V) { v = V; }
inline bool operator()(const ConstPoolVal *V) const {
return v->equals(V);
}
};
ConstPoolVal *ConstantPool::find(const ConstPoolVal *V) {
const PlaneType *P;
if (getPlane(V->getType(), P)) return 0;
PlaneType::const_iterator PI = find_if(P->begin(), P->end(),
EqualsConstant(V));
if (PI == P->end()) return 0;
return *PI;
}
const ConstPoolVal *ConstantPool::find(const ConstPoolVal *V) const {
const PlaneType *P;
if (getPlane(V->getType(), P)) return 0;
PlaneType::const_iterator PI = find_if(P->begin(), P->end(),
EqualsConstant(V));
if (PI == P->end()) return 0;
return *PI;
}
ConstPoolVal *ConstantPool::find(const Type *Ty) {
const PlaneType *P;
if (getPlane(Type::TypeTy, P)) return 0;
// TODO: This is kinda silly
ConstPoolType V(Ty);
PlaneType::const_iterator PI =
find_if(P->begin(), P->end(), EqualsConstant(&V));
if (PI == P->end()) return 0;
return *PI;
}
const ConstPoolVal *ConstantPool::find(const Type *Ty) const {
const PlaneType *P;
if (getPlane(Type::TypeTy, P)) return 0;
// TODO: This is kinda silly
ConstPoolType V(Ty);
PlaneType::const_iterator PI =
find_if(P->begin(), P->end(), EqualsConstant(&V));
if (PI == P->end()) return 0;
return *PI;
}
//===----------------------------------------------------------------------===//
// ConstPoolVal Class
//===----------------------------------------------------------------------===//
// Specialize setName to take care of symbol table majik
void ConstPoolVal::setName(const string &name) {
SymTabValue *P;
if ((P = getParent()) && hasName()) P->getSymbolTable()->remove(this);
Value::setName(name);
if (P && hasName()) P->getSymbolTable()->insert(this);
}
// Static constructor to create a '0' constant of arbitrary type...
ConstPoolVal *ConstPoolVal::getNullConstant(const Type *Ty) {
switch (Ty->getPrimitiveID()) {
case Type::BoolTyID: return new ConstPoolBool(false);
case Type::SByteTyID:
case Type::ShortTyID:
case Type::IntTyID:
case Type::LongTyID: return new ConstPoolSInt(Ty, 0);
case Type::UByteTyID:
case Type::UShortTyID:
case Type::UIntTyID:
case Type::ULongTyID: return new ConstPoolUInt(Ty, 0);
case Type::FloatTyID:
case Type::DoubleTyID: return new ConstPoolFP(Ty, 0);
default:
return 0;
}
}
//===----------------------------------------------------------------------===//
// ConstPoolXXX Classes
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Normal Constructors
ConstPoolBool::ConstPoolBool(bool V, const string &Name = "")
: ConstPoolVal(Type::BoolTy, Name) {
Val = V;
}
ConstPoolSInt::ConstPoolSInt(const Type *Ty, int64_t V, const string &Name)
: ConstPoolVal(Ty, Name) {
//cerr << "value = " << (int)V << ": " << Ty->getName() << endl;
assert(isValueValidForType(Ty, V) && "Value to large for type!");
Val = V;
}
ConstPoolUInt::ConstPoolUInt(const Type *Ty, uint64_t V, const string &Name)
: ConstPoolVal(Ty, Name) {
//cerr << "Uvalue = " << (int)V << ": " << Ty->getName() << endl;
assert(isValueValidForType(Ty, V) && "Value to large for type!");
Val = V;
}
ConstPoolFP::ConstPoolFP(const Type *Ty, double V, const string &Name)
: ConstPoolVal(Ty, Name) {
assert(isValueValidForType(Ty, V) && "Value to large for type!");
Val = V;
}
ConstPoolType::ConstPoolType(const Type *V, const string &Name)
: ConstPoolVal(Type::TypeTy, Name), Val(V) {
}
ConstPoolArray::ConstPoolArray(const ArrayType *T,
vector<ConstPoolVal*> &V,
const string &Name)
: ConstPoolVal(T, Name) {
for (unsigned i = 0; i < V.size(); i++) {
assert(V[i]->getType() == T->getElementType());
Val.push_back(ConstPoolUse(V[i], this));
}
}
ConstPoolStruct::ConstPoolStruct(const StructType *T,
vector<ConstPoolVal*> &V,
const string &Name)
: ConstPoolVal(T, Name) {
const StructType::ElementTypes &ETypes = T->getElementTypes();
for (unsigned i = 0; i < V.size(); i++) {
assert(V[i]->getType() == ETypes[i]);
Val.push_back(ConstPoolUse(V[i], this));
}
}
//===----------------------------------------------------------------------===//
// Copy Constructors
ConstPoolBool::ConstPoolBool(const ConstPoolBool &CPB)
: ConstPoolVal(Type::BoolTy) {
Val = CPB.Val;
}
ConstPoolSInt::ConstPoolSInt(const ConstPoolSInt &CPSI)
: ConstPoolVal(CPSI.getType()) {
Val = CPSI.Val;
}
ConstPoolUInt::ConstPoolUInt(const ConstPoolUInt &CPUI)
: ConstPoolVal(CPUI.getType()) {
Val = CPUI.Val;
}
ConstPoolFP::ConstPoolFP(const ConstPoolFP &CPFP)
: ConstPoolVal(CPFP.getType()) {
Val = CPFP.Val;
}
ConstPoolType::ConstPoolType(const ConstPoolType &CPT)
: ConstPoolVal(Type::TypeTy), Val(CPT.Val) {
}
ConstPoolArray::ConstPoolArray(const ConstPoolArray &CPA)
: ConstPoolVal(CPA.getType()) {
for (unsigned i = 0; i < CPA.Val.size(); i++)
Val.push_back(ConstPoolUse((ConstPoolVal*)CPA.Val[i], this));
}
ConstPoolStruct::ConstPoolStruct(const ConstPoolStruct &CPS)
: ConstPoolVal(CPS.getType()) {
for (unsigned i = 0; i < CPS.Val.size(); i++)
Val.push_back(ConstPoolUse((ConstPoolVal*)CPS.Val[i], this));
}
//===----------------------------------------------------------------------===//
// getStrValue implementations
string ConstPoolBool::getStrValue() const {
if (Val)
return "true";
else
return "false";
}
string ConstPoolSInt::getStrValue() const {
return itostr(Val);
}
string ConstPoolUInt::getStrValue() const {
return utostr(Val);
}
string ConstPoolFP::getStrValue() const {
assert(0 && "FP Constants Not implemented yet!!!!!!!!!!!");
return "% FP Constants NI!" /* + dtostr(Val)*/;
}
string ConstPoolType::getStrValue() const {
return Val->getName();
}
string ConstPoolArray::getStrValue() const {
string Result = "[";
if (Val.size()) {
Result += " " + Val[0]->getType()->getName() +
" " + Val[0]->getStrValue();
for (unsigned i = 1; i < Val.size(); i++)
Result += ", " + Val[i]->getType()->getName() +
" " + Val[i]->getStrValue();
}
return Result + " ]";
}
string ConstPoolStruct::getStrValue() const {
string Result = "{";
if (Val.size()) {
Result += " " + Val[0]->getType()->getName() +
" " + Val[0]->getStrValue();
for (unsigned i = 1; i < Val.size(); i++)
Result += ", " + Val[i]->getType()->getName() +
" " + Val[i]->getStrValue();
}
return Result + " }";
}
//===----------------------------------------------------------------------===//
// equals implementations
bool ConstPoolBool::equals(const ConstPoolVal *V) const {
assert(getType() == V->getType());
return ((ConstPoolBool*)V)->getValue() == Val;
}
bool ConstPoolSInt::equals(const ConstPoolVal *V) const {
assert(getType() == V->getType());
return ((ConstPoolSInt*)V)->getValue() == Val;
}
bool ConstPoolUInt::equals(const ConstPoolVal *V) const {
assert(getType() == V->getType());
return ((ConstPoolUInt*)V)->getValue() == Val;
}
bool ConstPoolFP::equals(const ConstPoolVal *V) const {
assert(getType() == V->getType());
return ((ConstPoolFP*)V)->getValue() == Val;
}
bool ConstPoolType::equals(const ConstPoolVal *V) const {
assert(getType() == V->getType());
return ((ConstPoolType*)V)->getValue() == Val;
}
bool ConstPoolArray::equals(const ConstPoolVal *V) const {
assert(getType() == V->getType());
ConstPoolArray *AV = (ConstPoolArray*)V;
if (Val.size() != AV->Val.size()) return false;
for (unsigned i = 0; i < Val.size(); i++)
if (!Val[i]->equals(AV->Val[i])) return false;
return true;
}
bool ConstPoolStruct::equals(const ConstPoolVal *V) const {
assert(getType() == V->getType());
ConstPoolStruct *SV = (ConstPoolStruct*)V;
if (Val.size() != SV->Val.size()) return false;
for (unsigned i = 0; i < Val.size(); i++)
if (!Val[i]->equals(SV->Val[i])) return false;
return true;
}
//===----------------------------------------------------------------------===//
// isValueValidForType implementations
bool ConstPoolSInt::isValueValidForType(const Type *Ty, int64_t Val) {
switch (Ty->getPrimitiveID()) {
default:
return false; // These can't be represented as integers!!!
// Signed types...
case Type::SByteTyID:
return (Val <= INT8_MAX && Val >= INT8_MIN);
case Type::ShortTyID:
return (Val <= INT16_MAX && Val >= INT16_MIN);
case Type::IntTyID:
return (Val <= INT32_MAX && Val >= INT32_MIN);
case Type::LongTyID:
return true; // This is the largest type...
}
assert(0 && "WTF?");
return false;
}
bool ConstPoolUInt::isValueValidForType(const Type *Ty, uint64_t Val) {
switch (Ty->getPrimitiveID()) {
default:
return false; // These can't be represented as integers!!!
// Unsigned types...
case Type::UByteTyID:
return (Val <= UINT8_MAX);
case Type::UShortTyID:
return (Val <= UINT16_MAX);
case Type::UIntTyID:
return (Val <= UINT32_MAX);
case Type::ULongTyID:
return true; // This is the largest type...
}
assert(0 && "WTF?");
return false;
}
bool ConstPoolFP::isValueValidForType(const Type *Ty, double Val) {
switch (Ty->getPrimitiveID()) {
default:
return false; // These can't be represented as floating point!
// TODO: Figure out how to test if a double can be cast to a float!
/*
case Type::FloatTyID:
return (Val <= UINT8_MAX);
*/
case Type::DoubleTyID:
return true; // This is the largest type...
}
};

75
lib/VMCore/Function.cpp Normal file
View File

@ -0,0 +1,75 @@
//===-- Method.cpp - Implement the Method class ------------------*- C++ -*--=//
//
// This file implements the Method class for the VMCore library.
//
//===----------------------------------------------------------------------===//
#include "llvm/ValueHolderImpl.h"
#include "llvm/DerivedTypes.h"
#include "llvm/SymbolTable.h"
#include "llvm/Module.h"
#include "llvm/Method.h"
#include "llvm/BasicBlock.h"
#include "llvm/iOther.h"
// Instantiate Templates - This ugliness is the price we have to pay
// for having a ValueHolderImpl.h file seperate from ValueHolder.h! :(
//
template class ValueHolder<MethodArgument, Method>;
template class ValueHolder<BasicBlock , Method>;
Method::Method(const MethodType *Ty, const string &name)
: SymTabValue(Ty, Value::MethodVal, name), BasicBlocks(this),
ArgumentList(this, this) {
assert(Ty->isMethodType() && "Method signature must be of method type!");
Parent = 0;
}
Method::~Method() {
dropAllReferences(); // After this it is safe to delete instructions.
// TODO: Should remove from the end, not the beginning of vector!
BasicBlocksType::iterator BI = BasicBlocks.begin();
while ((BI = BasicBlocks.begin()) != BasicBlocks.end())
delete BasicBlocks.remove(BI);
// Delete all of the method arguments and unlink from symbol table...
ArgumentList.delete_all();
ArgumentList.setParent(0);
}
// Specialize setName to take care of symbol table majik
void Method::setName(const string &name) {
Module *P;
if ((P = getParent()) && hasName()) P->getSymbolTable()->remove(this);
Value::setName(name);
if (P && getName() != "") P->getSymbolTableSure()->insert(this);
}
void Method::setParent(Module *parent) {
Parent = parent;
// Relink symbol tables together...
setParentSymTab(Parent ? Parent->getSymbolTableSure() : 0);
}
const Type *Method::getReturnType() const {
return ((const MethodType *)getType())->getReturnType();
}
const MethodType *Method::getMethodType() const {
return (const MethodType *)getType();
}
// dropAllReferences() - This function causes all the subinstructions to "let
// go" of all references that they are maintaining. This allows one to
// 'delete' a whole class at a time, even though there may be circular
// references... first all references are dropped, and all use counts go to
// zero. Then everything is delete'd for real. Note that no operations are
// valid on an object that has "dropped all references", except operator
// delete.
//
void Method::dropAllReferences() {
for_each(BasicBlocks.begin(), BasicBlocks.end(),
std::mem_fun(&BasicBlock::dropAllReferences));
}

66
lib/VMCore/InstrTypes.cpp Normal file
View File

@ -0,0 +1,66 @@
//===-- InstrTypes.cpp - Implement Instruction subclasses --------*- C++ -*--=//
//
// This file implements
//
//===----------------------------------------------------------------------===//
#include "llvm/iOther.h"
#include "llvm/BasicBlock.h"
#include "llvm/Method.h"
#include "llvm/SymbolTable.h"
#include "llvm/Type.h"
#include <algorithm>
//===----------------------------------------------------------------------===//
// TerminatorInst Class
//===----------------------------------------------------------------------===//
TerminatorInst::TerminatorInst(unsigned iType)
: Instruction(Type::VoidTy, iType, "") {
}
//===----------------------------------------------------------------------===//
// MethodArgument Class
//===----------------------------------------------------------------------===//
// Specialize setName to take care of symbol table majik
void MethodArgument::setName(const string &name) {
Method *P;
if ((P = getParent()) && hasName()) P->getSymbolTable()->remove(this);
Value::setName(name);
if (P && hasName()) P->getSymbolTable()->insert(this);
}
//===----------------------------------------------------------------------===//
// PHINode Class
//===----------------------------------------------------------------------===//
PHINode::PHINode(const Type *Ty, const string &name)
: Instruction(Ty, Instruction::PHINode, name) {
}
PHINode::PHINode(const PHINode &PN)
: Instruction(PN.getType(), Instruction::PHINode) {
for (unsigned i = 0; i < PN.IncomingValues.size(); i++)
IncomingValues.push_back(Use(PN.IncomingValues[i], this));
}
void PHINode::dropAllReferences() {
IncomingValues.clear();
}
bool PHINode::setOperand(unsigned i, Value *Val) {
assert(Val && "PHI node must only reference nonnull definitions!");
if (i >= IncomingValues.size()) return false;
IncomingValues[i] = Val;
return true;
}
void PHINode::addIncoming(Value *D) {
IncomingValues.push_back(Use(D, this));
}

View File

@ -0,0 +1,61 @@
//===-- Instruction.cpp - Implement the Instruction class --------*- C++ -*--=//
//
// This file implements the Instruction class for the VMCore library.
//
//===----------------------------------------------------------------------===//
#include "llvm/Instruction.h"
#include "llvm/BasicBlock.h"
#include "llvm/Method.h"
#include "llvm/SymbolTable.h"
#include "llvm/iBinary.h"
#include "llvm/iUnary.h"
Instruction::Instruction(const Type *ty, unsigned it, const string &Name)
: User(ty, Value::InstructionVal, Name) {
Parent = 0;
iType = it;
}
Instruction::~Instruction() {
assert(getParent() == 0 && "Instruction still embeded in basic block!");
}
// Specialize setName to take care of symbol table majik
void Instruction::setName(const string &name) {
BasicBlock *P = 0; Method *PP = 0;
if ((P = getParent()) && (PP = P->getParent()) && hasName())
PP->getSymbolTable()->remove(this);
Value::setName(name);
if (PP && hasName()) PP->getSymbolTableSure()->insert(this);
}
Instruction *Instruction::getBinaryOperator(unsigned Op, Value *S1, Value *S2) {
switch (Op) {
case Add:
return new AddInst(S1, S2);
case Sub:
return new SubInst(S1, S2);
case SetLT:
case SetGT:
case SetLE:
case SetGE:
case SetEQ:
case SetNE:
return new SetCondInst((BinaryOps)Op, S1, S2);
default:
cerr << "Don't know how to GetBinaryOperator " << Op << endl;
return 0;
}
}
Instruction *Instruction::getUnaryOperator(unsigned Op, Value *Source) {
switch (Op) {
default:
cerr << "Don't know how to GetUnaryOperator " << Op << endl;
return 0;
}
}

7
lib/VMCore/Makefile Normal file
View File

@ -0,0 +1,7 @@
LEVEL = ../..
LIBRARYNAME = vmcore
include $(LEVEL)/Makefile.common

42
lib/VMCore/Module.cpp Normal file
View File

@ -0,0 +1,42 @@
//===-- Module.cpp - Implement the Module class ------------------*- C++ -*--=//
//
// This file implements the Module class for the VMCore library.
//
//===----------------------------------------------------------------------===//
#include "llvm/ValueHolderImpl.h"
#include "llvm/InstrTypes.h"
#include "llvm/BasicBlock.h"
#include "llvm/Method.h"
#include "llvm/Module.h"
// Instantiate Templates - This ugliness is the price we have to pay
// for having a DefHolderImpl.h file seperate from DefHolder.h! :(
//
template class ValueHolder<Method, Module>;
Module::Module()
: SymTabValue(0/*TODO: REAL TYPE*/, Value::ModuleVal, ""),
MethodList(this, this) {
}
Module::~Module() {
dropAllReferences();
MethodList.delete_all();
MethodList.setParent(0);
}
// dropAllReferences() - This function causes all the subinstructions to "let
// go" of all references that they are maintaining. This allows one to
// 'delete' a whole class at a time, even though there may be circular
// references... first all references are dropped, and all use counts go to
// zero. Then everything is delete'd for real. Note that no operations are
// valid on an object that has "dropped all references", except operator
// delete.
//
void Module::dropAllReferences() {
MethodListType::iterator MI = MethodList.begin();
for (; MI != MethodList.end(); MI++)
(*MI)->dropAllReferences();
}

View File

@ -0,0 +1,195 @@
//===-- SlotCalculator.cpp - Calculate what slots values land in ------------=//
//
// This file implements a useful analysis step to figure out what numbered
// slots values in a program will land in (keeping track of per plane
// information as required.
//
// This is used primarily for when writing a file to disk, either in bytecode
// or source format.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/SlotCalculator.h"
#include "llvm/ConstantPool.h"
#include "llvm/Method.h"
#include "llvm/Module.h"
#include "llvm/BasicBlock.h"
#include "llvm/ConstPoolVals.h"
#include "llvm/iOther.h"
#include "llvm/DerivedTypes.h"
SlotCalculator::SlotCalculator(const Module *M, bool IgnoreNamed) {
IgnoreNamedNodes = IgnoreNamed;
TheModule = M;
// Preload table... Make sure that all of the primitive types are in the table
// and that their Primitive ID is equal to their slot #
//
for (unsigned i = 0; i < Type::FirstDerivedTyID; ++i) {
assert(Type::getPrimitiveType((Type::PrimitiveID)i));
insertVal(Type::getPrimitiveType((Type::PrimitiveID)i));
}
if (M == 0) return; // Empty table...
bool Result = processModule(M);
assert(Result == false && "Error in processModule!");
}
SlotCalculator::SlotCalculator(const Method *M, bool IgnoreNamed) {
IgnoreNamedNodes = IgnoreNamed;
TheModule = M ? M->getParent() : 0;
// Preload table... Make sure that all of the primitive types are in the table
// and that their Primitive ID is equal to their slot #
//
for (unsigned i = 0; i < Type::FirstDerivedTyID; ++i) {
assert(Type::getPrimitiveType((Type::PrimitiveID)i));
insertVal(Type::getPrimitiveType((Type::PrimitiveID)i));
}
if (TheModule == 0) return; // Empty table...
bool Result = processModule(TheModule);
assert(Result == false && "Error in processModule!");
incorporateMethod(M);
}
void SlotCalculator::incorporateMethod(const Method *M) {
assert(ModuleLevel.size() == 0 && "Module already incorporated!");
// Save the Table state before we process the method...
for (unsigned i = 0; i < Table.size(); ++i) {
ModuleLevel.push_back(Table[i].size());
}
// Process the method to incorporate its values into our table
processMethod(M);
}
void SlotCalculator::purgeMethod() {
assert(ModuleLevel.size() != 0 && "Module not incorporated!");
unsigned NumModuleTypes = ModuleLevel.size();
// First, remove values from existing type planes
for (unsigned i = 0; i < NumModuleTypes; ++i) {
unsigned ModuleSize = ModuleLevel[i]; // Size of plane before method came
while (Table[i].size() != ModuleSize) {
NodeMap.erase(NodeMap.find(Table[i].back())); // Erase from nodemap
Table[i].pop_back(); // Shrink plane
}
}
// We don't need this state anymore, free it up.
ModuleLevel.clear();
// Next, remove any type planes defined by the method...
while (NumModuleTypes != Table.size()) {
TypePlane &Plane = Table.back();
while (Plane.size()) {
NodeMap.erase(NodeMap.find(Plane.back())); // Erase from nodemap
Plane.pop_back(); // Shrink plane
}
Table.pop_back(); // Nuke the plane, we don't like it.
}
}
bool SlotCalculator::processConstant(const ConstPoolVal *CPV) {
//cerr << "Inserting constant: '" << CPV->getStrValue() << endl;
insertVal(CPV);
return false;
}
// processType - This callback occurs when an derived type is discovered
// at the class level. This activity occurs when processing a constant pool.
//
bool SlotCalculator::processType(const Type *Ty) {
//cerr << "processType: " << Ty->getName() << endl;
// TODO: Don't leak memory!!! Free this in the dtor!
insertVal(new ConstPoolType(Ty));
return false;
}
bool SlotCalculator::visitMethod(const Method *M) {
//cerr << "visitMethod: '" << M->getType()->getName() << "'\n";
insertVal(M);
return false;
}
bool SlotCalculator::processMethodArgument(const MethodArgument *MA) {
insertVal(MA);
return false;
}
bool SlotCalculator::processBasicBlock(const BasicBlock *BB) {
insertVal(BB);
ModuleAnalyzer::processBasicBlock(BB); // Lets visit the instructions too!
return false;
}
bool SlotCalculator::processInstruction(const Instruction *I) {
insertVal(I);
return false;
}
int SlotCalculator::getValSlot(const Value *D) const {
map<const Value*, unsigned>::const_iterator I = NodeMap.find(D);
if (I == NodeMap.end()) return -1;
return (int)I->second;
}
void SlotCalculator::insertVal(const Value *D) {
if (D == 0) return;
// If this node does not contribute to a plane, or if the node has a
// name and we don't want names, then ignore the silly node...
//
if (D->getType() == Type::VoidTy || (IgnoreNamedNodes && D->hasName()))
return;
const Type *Typ = D->getType();
unsigned Ty = Typ->getPrimitiveID();
if (Typ->isDerivedType()) {
int DefSlot = getValSlot(Typ);
if (DefSlot == -1) { // Have we already entered this type?
// This can happen if a type is first seen in an instruction. For
// example, if you say 'malloc uint', this defines a type 'uint*' that
// may be undefined at this point.
//
cerr << "SHOULDNT HAPPEN Adding Type ba: " << Typ->getName() << endl;
assert(0 && "SHouldn't this be taken care of by processType!?!?!");
// Nope... add this to the Type plane now!
insertVal(Typ);
DefSlot = getValSlot(Typ);
assert(DefSlot >= 0 && "Type didn't get inserted correctly!");
}
Ty = (unsigned)DefSlot;
}
if (Table.size() <= Ty) // Make sure we have the type plane allocated...
Table.resize(Ty+1, TypePlane());
// Insert node into table and NodeMap...
NodeMap[D] = Table[Ty].size();
if (Typ == Type::TypeTy && // If it's a type constant, add the Type also
D->getValueType() != Value::TypeVal) {
assert(D->getValueType() == Value::ConstantVal &&
"All Type instances should be constant types!");
const ConstPoolType *CPT = (const ConstPoolType*)D;
int Slot = getValSlot(CPT->getValue());
if (Slot == -1) {
// Only add if it's not already here!
NodeMap[CPT->getValue()] = Table[Ty].size();
} else if (!CPT->hasName()) { // If the type has no name...
NodeMap[D] = (unsigned)Slot; // Don't readd type, merge.
return;
}
}
Table[Ty].push_back(D);
}

106
lib/VMCore/SymbolTable.cpp Normal file
View File

@ -0,0 +1,106 @@
//===-- SymbolTable.cpp - Implement the SymbolTable class -------------------=//
//
// This file implements the SymbolTable class for the VMCore library.
//
//===----------------------------------------------------------------------===//
#include "llvm/SymbolTable.h"
#include "llvm/InstrTypes.h"
#ifndef NDEBUG
#include "llvm/BasicBlock.h" // Required for assertions to work.
#include "llvm/Type.h"
#endif
SymbolTable::~SymbolTable() {
#ifndef NDEBUG // Only do this in -g mode...
bool Good = true;
for (iterator i = begin(); i != end(); i++) {
if (i->second.begin() != i->second.end()) {
for (type_iterator I = i->second.begin(); I != i->second.end(); I++)
cerr << "Value still in symbol table! Type = " << i->first->getName()
<< " Name = " << I->first << endl;
Good = false;
}
}
assert(Good && "Values remain in symbol table!");
#endif
}
SymbolTable::type_iterator SymbolTable::type_find(const Value *D) {
assert(D->hasName() && "type_find(Value*) only works on named nodes!");
return type_find(D->getType(), D->getName());
}
// find - returns end(Ty->getIDNumber()) on failure...
SymbolTable::type_iterator SymbolTable::type_find(const Type *Ty,
const string &Name) {
iterator I = find(Ty);
if (I == end()) { // Not in collection yet... insert dummy entry
(*this)[Ty] = VarMap();
I = find(Ty);
assert(I != end() && "How did insert fail?");
}
return I->second.find(Name);
}
// lookup - Returns null on failure...
Value *SymbolTable::lookup(const Type *Ty, const string &Name) {
iterator I = find(Ty);
if (I != end()) { // We have symbols in that plane...
type_iterator J = I->second.find(Name);
if (J != I->second.end()) // and the name is in our hash table...
return J->second;
}
return ParentSymTab ? ParentSymTab->lookup(Ty, Name) : 0;
}
void SymbolTable::remove(Value *N) {
assert(N->hasName() && "Value doesn't have name!");
assert(type_find(N) != type_end(N->getType()) &&
"Value not in symbol table!");
type_remove(type_find(N));
}
#define DEBUG_SYMBOL_TABLE 0
Value *SymbolTable::type_remove(const type_iterator &It) {
Value *Result = It->second;
#if DEBUG_SYMBOL_TABLE
cerr << this << " Removing Value: " << Result->getName() << endl;
#endif
find(Result->getType())->second.erase(It);
return Result;
}
void SymbolTable::insert(Value *N) {
assert(N->hasName() && "Value must be named to go into symbol table!");
// TODO: The typeverifier should catch this when its implemented
if (lookup(N->getType(), N->getName())) {
cerr << "SymbolTable WARNING: Name already in symbol table: '"
<< N->getName() << "'\n";
abort(); // TODO: REMOVE THIS
}
#if DEBUG_SYMBOL_TABLE
cerr << this << " Inserting definition: " << N->getName() << ": "
<< N->getType()->getName() << endl;
#endif
iterator I = find(N->getType());
if (I == end()) { // Not in collection yet... insert dummy entry
(*this)[N->getType()] = VarMap();
I = find(N->getType());
assert(I != end() && "How did insert fail?");
}
I->second.insert(make_pair(N->getName(), N));
}

Some files were not shown because too many files have changed in this diff Show More