mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-26 18:20:39 +00:00 
			
		
		
		
	git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78948 91177308-0d34-0410-b5e6-96231b3b80d8
		
			
				
	
	
		
			1256 lines
		
	
	
		
			42 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
			
		
		
	
	
			1256 lines
		
	
	
		
			42 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
 | |
|                       "http://www.w3.org/TR/html4/strict.dtd">
 | |
| 
 | |
| <html>
 | |
| <head>
 | |
|   <title>Kaleidoscope: Implementing code generation to LLVM IR</title>
 | |
|   <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 | |
|   <meta name="author" content="Chris Lattner">
 | |
|   <link rel="stylesheet" href="../llvm.css" type="text/css">
 | |
| </head>
 | |
| 
 | |
| <body>
 | |
| 
 | |
| <div class="doc_title">Kaleidoscope: Code generation to LLVM IR</div>
 | |
| 
 | |
| <ul>
 | |
| <li><a href="index.html">Up to Tutorial Index</a></li>
 | |
| <li>Chapter 3
 | |
|   <ol>
 | |
|     <li><a href="#intro">Chapter 3 Introduction</a></li>
 | |
|     <li><a href="#basics">Code Generation Setup</a></li>
 | |
|     <li><a href="#exprs">Expression Code Generation</a></li>
 | |
|     <li><a href="#funcs">Function Code Generation</a></li>
 | |
|     <li><a href="#driver">Driver Changes and Closing Thoughts</a></li>
 | |
|     <li><a href="#code">Full Code Listing</a></li>
 | |
|   </ol>
 | |
| </li>
 | |
| <li><a href="LangImpl4.html">Chapter 4</a>: Adding JIT and Optimizer 
 | |
| Support</li>
 | |
| </ul>
 | |
| 
 | |
| <div class="doc_author">
 | |
|   <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
 | |
| </div>
 | |
| 
 | |
| <!-- *********************************************************************** -->
 | |
| <div class="doc_section"><a name="intro">Chapter 3 Introduction</a></div>
 | |
| <!-- *********************************************************************** -->
 | |
| 
 | |
| <div class="doc_text">
 | |
| 
 | |
| <p>Welcome to Chapter 3 of the "<a href="index.html">Implementing a language
 | |
| with LLVM</a>" tutorial.  This chapter shows you how to transform the <a 
 | |
| href="LangImpl2.html">Abstract Syntax Tree</a>, built in Chapter 2, into LLVM IR.
 | |
| This will teach you a little bit about how LLVM does things, as well as
 | |
| demonstrate how easy it is to use.  It's much more work to build a lexer and
 | |
| parser than it is to generate LLVM IR code. :)
 | |
| </p>
 | |
| 
 | |
| <p><b>Please note</b>: the code in this chapter and later require LLVM 2.2 or
 | |
| later.  LLVM 2.1 and before will not work with it.  Also note that you need
 | |
| to use a version of this tutorial that matches your LLVM release: If you are
 | |
| using an official LLVM release, use the version of the documentation included
 | |
| with your release or on the <a href="http://llvm.org/releases/">llvm.org 
 | |
| releases page</a>.</p>
 | |
| 
 | |
| </div>
 | |
| 
 | |
| <!-- *********************************************************************** -->
 | |
| <div class="doc_section"><a name="basics">Code Generation Setup</a></div>
 | |
| <!-- *********************************************************************** -->
 | |
| 
 | |
| <div class="doc_text">
 | |
| 
 | |
| <p>
 | |
| In order to generate LLVM IR, we want some simple setup to get started.  First
 | |
| we define virtual code generation (codegen) methods in each AST class:</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
| /// ExprAST - Base class for all expression nodes.
 | |
| class ExprAST {
 | |
| public:
 | |
|   virtual ~ExprAST() {}
 | |
|   <b>virtual Value *Codegen() = 0;</b>
 | |
| };
 | |
| 
 | |
| /// NumberExprAST - Expression class for numeric literals like "1.0".
 | |
| class NumberExprAST : public ExprAST {
 | |
|   double Val;
 | |
| public:
 | |
|   explicit NumberExprAST(double val) : Val(val) {}
 | |
|   <b>virtual Value *Codegen();</b>
 | |
| };
 | |
| ...
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>The Codegen() method says to emit IR for that AST node along with all the things it
 | |
| depends on, and they all return an LLVM Value object. 
 | |
| "Value" is the class used to represent a "<a 
 | |
| href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Static Single
 | |
| Assignment (SSA)</a> register" or "SSA value" in LLVM.  The most distinct aspect
 | |
| of SSA values is that their value is computed as the related instruction
 | |
| executes, and it does not get a new value until (and if) the instruction
 | |
| re-executes.  In other words, there is no way to "change" an SSA value.  For
 | |
| more information, please read up on <a 
 | |
| href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Static Single
 | |
| Assignment</a> - the concepts are really quite natural once you grok them.</p>
 | |
| 
 | |
| <p>Note that instead of adding virtual methods to the ExprAST class hierarchy,
 | |
| it could also make sense to use a <a
 | |
| href="http://en.wikipedia.org/wiki/Visitor_pattern">visitor pattern</a> or some
 | |
| other way to model this.  Again, this tutorial won't dwell on good software
 | |
| engineering practices: for our purposes, adding a virtual method is
 | |
| simplest.</p>
 | |
| 
 | |
| <p>The
 | |
| second thing we want is an "Error" method like we used for the parser, which will
 | |
| be used to report errors found during code generation (for example, use of an
 | |
| undeclared parameter):</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
| Value *ErrorV(const char *Str) { Error(Str); return 0; }
 | |
| 
 | |
| static Module *TheModule;
 | |
| static IRBuilder<> Builder(getGlobalContext());
 | |
| static std::map<std::string, Value*> NamedValues;
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>The static variables will be used during code generation.  <tt>TheModule</tt>
 | |
| is the LLVM construct that contains all of the functions and global variables in
 | |
| a chunk of code.  In many ways, it is the top-level structure that the LLVM IR
 | |
| uses to contain code.</p>
 | |
| 
 | |
| <p>The <tt>Builder</tt> object is a helper object that makes it easy to generate
 | |
| LLVM instructions.  Instances of the <a 
 | |
| href="http://llvm.org/doxygen/IRBuilder_8h-source.html"><tt>IRBuilder</tt></a> 
 | |
| class template keep track of the current place to insert instructions and has
 | |
| methods to create new instructions.</p>
 | |
| 
 | |
| <p>The <tt>NamedValues</tt> map keeps track of which values are defined in the
 | |
| current scope and what their LLVM representation is.  (In other words, it is a
 | |
| symbol table for the code).  In this form of Kaleidoscope, the only things that
 | |
| can be referenced are function parameters.  As such, function parameters will
 | |
| be in this map when generating code for their function body.</p>
 | |
| 
 | |
| <p>
 | |
| With these basics in place, we can start talking about how to generate code for
 | |
| each expression.  Note that this assumes that the <tt>Builder</tt> has been set
 | |
| up to generate code <em>into</em> something.  For now, we'll assume that this
 | |
| has already been done, and we'll just use it to emit code.
 | |
| </p>
 | |
| 
 | |
| </div>
 | |
| 
 | |
| <!-- *********************************************************************** -->
 | |
| <div class="doc_section"><a name="exprs">Expression Code Generation</a></div>
 | |
| <!-- *********************************************************************** -->
 | |
| 
 | |
| <div class="doc_text">
 | |
| 
 | |
| <p>Generating LLVM code for expression nodes is very straightforward: less
 | |
| than 45 lines of commented code for all four of our expression nodes.  First
 | |
| we'll do numeric literals:</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
| Value *NumberExprAST::Codegen() {
 | |
|   return ConstantFP::get(getGlobalContext(), APFloat(Val));
 | |
| }
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>In the LLVM IR, numeric constants are represented with the
 | |
| <tt>ConstantFP</tt> class, which holds the numeric value in an <tt>APFloat</tt>
 | |
| internally (<tt>APFloat</tt> has the capability of holding floating point
 | |
| constants of <em>A</em>rbitrary <em>P</em>recision).  This code basically just
 | |
| creates and returns a <tt>ConstantFP</tt>.  Note that in the LLVM IR
 | |
| that constants are all uniqued together and shared.  For this reason, the API
 | |
| uses "the Context.get..." idiom instead of "new foo(..)" or "foo::Create(..)".</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
| Value *VariableExprAST::Codegen() {
 | |
|   // Look this variable up in the function.
 | |
|   Value *V = NamedValues[Name];
 | |
|   return V ? V : ErrorV("Unknown variable name");
 | |
| }
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>References to variables are also quite simple using LLVM.  In the simple version
 | |
| of Kaleidoscope, we assume that the variable has already been emited somewhere
 | |
| and its value is available.  In practice, the only values that can be in the
 | |
| <tt>NamedValues</tt> map are function arguments.  This
 | |
| code simply checks to see that the specified name is in the map (if not, an 
 | |
| unknown variable is being referenced) and returns the value for it.  In future
 | |
| chapters, we'll add support for <a href="LangImpl5.html#for">loop induction 
 | |
| variables</a> in the symbol table, and for <a 
 | |
| href="LangImpl7.html#localvars">local variables</a>.</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
| Value *BinaryExprAST::Codegen() {
 | |
|   Value *L = LHS->Codegen();
 | |
|   Value *R = RHS->Codegen();
 | |
|   if (L == 0 || R == 0) return 0;
 | |
|   
 | |
|   switch (Op) {
 | |
|   case '+': return Builder.CreateAdd(L, R, "addtmp");
 | |
|   case '-': return Builder.CreateSub(L, R, "subtmp");
 | |
|   case '*': return Builder.CreateMul(L, R, "multmp");
 | |
|   case '<':
 | |
|     L = Builder.CreateFCmpULT(L, R, "cmptmp");
 | |
|     // Convert bool 0/1 to double 0.0 or 1.0
 | |
|     return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), "booltmp");
 | |
|   default: return ErrorV("invalid binary operator");
 | |
|   }
 | |
| }
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>Binary operators start to get more interesting.  The basic idea here is that
 | |
| we recursively emit code for the left-hand side of the expression, then the 
 | |
| right-hand side, then we compute the result of the binary expression.  In this
 | |
| code, we do a simple switch on the opcode to create the right LLVM instruction.
 | |
| </p>
 | |
| 
 | |
| <p>In the example above, the LLVM builder class is starting to show its value.  
 | |
| IRBuilder knows where to insert the newly created instruction, all you have to
 | |
| do is specify what instruction to create (e.g. with <tt>CreateAdd</tt>), which
 | |
| operands to use (<tt>L</tt> and <tt>R</tt> here) and optionally provide a name
 | |
| for the generated instruction.</p>
 | |
| 
 | |
| <p>One nice thing about LLVM is that the name is just a hint.  For instance, if
 | |
| the code above emits multiple "addtmp" variables, LLVM will automatically
 | |
| provide each one with an increasing, unique numeric suffix.  Local value names
 | |
| for instructions are purely optional, but it makes it much easier to read the
 | |
| IR dumps.</p>
 | |
| 
 | |
| <p><a href="../LangRef.html#instref">LLVM instructions</a> are constrained by
 | |
| strict rules: for example, the Left and Right operators of
 | |
| an <a href="../LangRef.html#i_add">add instruction</a> must have the same
 | |
| type, and the result type of the add must match the operand types.  Because
 | |
| all values in Kaleidoscope are doubles, this makes for very simple code for add,
 | |
| sub and mul.</p>
 | |
| 
 | |
| <p>On the other hand, LLVM specifies that the <a 
 | |
| href="../LangRef.html#i_fcmp">fcmp instruction</a> always returns an 'i1' value
 | |
| (a one bit integer).  The problem with this is that Kaleidoscope wants the value to be a 0.0 or 1.0 value.  In order to get these semantics, we combine the fcmp instruction with
 | |
| a <a href="../LangRef.html#i_uitofp">uitofp instruction</a>.  This instruction
 | |
| converts its input integer into a floating point value by treating the input
 | |
| as an unsigned value.  In contrast, if we used the <a 
 | |
| href="../LangRef.html#i_sitofp">sitofp instruction</a>, the Kaleidoscope '<'
 | |
| operator would return 0.0 and -1.0, depending on the input value.</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
| Value *CallExprAST::Codegen() {
 | |
|   // Look up the name in the global module table.
 | |
|   Function *CalleeF = TheModule->getFunction(Callee);
 | |
|   if (CalleeF == 0)
 | |
|     return ErrorV("Unknown function referenced");
 | |
|   
 | |
|   // If argument mismatch error.
 | |
|   if (CalleeF->arg_size() != Args.size())
 | |
|     return ErrorV("Incorrect # arguments passed");
 | |
| 
 | |
|   std::vector<Value*> ArgsV;
 | |
|   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
 | |
|     ArgsV.push_back(Args[i]->Codegen());
 | |
|     if (ArgsV.back() == 0) return 0;
 | |
|   }
 | |
|   
 | |
|   return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
 | |
| }
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>Code generation for function calls is quite straightforward with LLVM.  The
 | |
| code above initially does a function name lookup in the LLVM Module's symbol
 | |
| table.  Recall that the LLVM Module is the container that holds all of the
 | |
| functions we are JIT'ing.  By giving each function the same name as what the
 | |
| user specifies, we can use the LLVM symbol table to resolve function names for
 | |
| us.</p>
 | |
| 
 | |
| <p>Once we have the function to call, we recursively codegen each argument that
 | |
| is to be passed in, and create an LLVM <a href="../LangRef.html#i_call">call
 | |
| instruction</a>.  Note that LLVM uses the native C calling conventions by
 | |
| default, allowing these calls to also call into standard library functions like
 | |
| "sin" and "cos", with no additional effort.</p>
 | |
| 
 | |
| <p>This wraps up our handling of the four basic expressions that we have so far
 | |
| in Kaleidoscope.  Feel free to go in and add some more.  For example, by 
 | |
| browsing the <a href="../LangRef.html">LLVM language reference</a> you'll find
 | |
| several other interesting instructions that are really easy to plug into our
 | |
| basic framework.</p>
 | |
| 
 | |
| </div>
 | |
| 
 | |
| <!-- *********************************************************************** -->
 | |
| <div class="doc_section"><a name="funcs">Function Code Generation</a></div>
 | |
| <!-- *********************************************************************** -->
 | |
| 
 | |
| <div class="doc_text">
 | |
| 
 | |
| <p>Code generation for prototypes and functions must handle a number of
 | |
| details, which make their code less beautiful than expression code
 | |
| generation, but allows us to  illustrate some important points.  First, lets
 | |
| talk about code generation for prototypes: they are used both for function 
 | |
| bodies and external function declarations.  The code starts with:</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
| Function *PrototypeAST::Codegen() {
 | |
|   // Make the function type:  double(double,double) etc.
 | |
|   std::vector<const Type*> Doubles(Args.size(), Type::getDoubleTy(getGlobalContext()));
 | |
|   FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
 | |
|   
 | |
|   Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>This code packs a lot of power into a few lines.  Note first that this 
 | |
| function returns a "Function*" instead of a "Value*".  Because a "prototype"
 | |
| really talks about the external interface for a function (not the value computed
 | |
| by an expression), it makes sense for it to return the LLVM Function it
 | |
| corresponds to when codegen'd.</p>
 | |
| 
 | |
| <p>The call to <tt>Context.get</tt> creates
 | |
| the <tt>FunctionType</tt> that should be used for a given Prototype.  Since all
 | |
| function arguments in Kaleidoscope are of type double, the first line creates
 | |
| a vector of "N" LLVM double types.  It then uses the <tt>Context.get</tt>
 | |
| method to create a function type that takes "N" doubles as arguments, returns
 | |
| one double as a result, and that is not vararg (the false parameter indicates
 | |
| this).  Note that Types in LLVM are uniqued just like Constants are, so you
 | |
| don't "new" a type, you "get" it.</p>
 | |
| 
 | |
| <p>The final line above actually creates the function that the prototype will
 | |
| correspond to.  This indicates the type, linkage and name to use, as well as which
 | |
| module to insert into.  "<a href="../LangRef.html#linkage">external linkage</a>"
 | |
| means that the function may be defined outside the current module and/or that it
 | |
| is callable by functions outside the module.  The Name passed in is the name the
 | |
| user specified: since "<tt>TheModule</tt>" is specified, this name is registered
 | |
| in "<tt>TheModule</tt>"s symbol table, which is used by the function call code
 | |
| above.</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
|   // If F conflicted, there was already something named 'Name'.  If it has a
 | |
|   // body, don't allow redefinition or reextern.
 | |
|   if (F->getName() != Name) {
 | |
|     // Delete the one we just made and get the existing one.
 | |
|     F->eraseFromParent();
 | |
|     F = TheModule->getFunction(Name);
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>The Module symbol table works just like the Function symbol table when it
 | |
| comes to name conflicts: if a new function is created with a name was previously
 | |
| added to the symbol table, it will get implicitly renamed when added to the
 | |
| Module.  The code above exploits this fact to determine if there was a previous
 | |
| definition of this function.</p>
 | |
| 
 | |
| <p>In Kaleidoscope, I choose to allow redefinitions of functions in two cases:
 | |
| first, we want to allow 'extern'ing a function more than once, as long as the
 | |
| prototypes for the externs match (since all arguments have the same type, we
 | |
| just have to check that the number of arguments match).  Second, we want to
 | |
| allow 'extern'ing a function and then definining a body for it.  This is useful
 | |
| when defining mutually recursive functions.</p>
 | |
| 
 | |
| <p>In order to implement this, the code above first checks to see if there is
 | |
| a collision on the name of the function.  If so, it deletes the function we just
 | |
| created (by calling <tt>eraseFromParent</tt>) and then calling 
 | |
| <tt>getFunction</tt> to get the existing function with the specified name.  Note
 | |
| that many APIs in LLVM have "erase" forms and "remove" forms.  The "remove" form
 | |
| unlinks the object from its parent (e.g. a Function from a Module) and returns
 | |
| it.  The "erase" form unlinks the object and then deletes it.</p>
 | |
|    
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
|     // If F already has a body, reject this.
 | |
|     if (!F->empty()) {
 | |
|       ErrorF("redefinition of function");
 | |
|       return 0;
 | |
|     }
 | |
|     
 | |
|     // If F took a different number of args, reject.
 | |
|     if (F->arg_size() != Args.size()) {
 | |
|       ErrorF("redefinition of function with different # args");
 | |
|       return 0;
 | |
|     }
 | |
|   }
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>In order to verify the logic above, we first check to see if the pre-existing
 | |
| function is "empty".  In this case, empty means that it has no basic blocks in
 | |
| it, which means it has no body.  If it has no body, it is a forward 
 | |
| declaration.  Since we don't allow anything after a full definition of the
 | |
| function, the code rejects this case.  If the previous reference to a function
 | |
| was an 'extern', we simply verify that the number of arguments for that
 | |
| definition and this one match up.  If not, we emit an error.</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
|   // Set names for all arguments.
 | |
|   unsigned Idx = 0;
 | |
|   for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
 | |
|        ++AI, ++Idx) {
 | |
|     AI->setName(Args[Idx]);
 | |
|     
 | |
|     // Add arguments to variable symbol table.
 | |
|     NamedValues[Args[Idx]] = AI;
 | |
|   }
 | |
|   return F;
 | |
| }
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>The last bit of code for prototypes loops over all of the arguments in the
 | |
| function, setting the name of the LLVM Argument objects to match, and registering
 | |
| the arguments in the <tt>NamedValues</tt> map for future use by the
 | |
| <tt>VariableExprAST</tt> AST node.  Once this is set up, it returns the Function
 | |
| object to the caller.  Note that we don't check for conflicting 
 | |
| argument names here (e.g. "extern foo(a b a)").  Doing so would be very
 | |
| straight-forward with the mechanics we have already used above.</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
| Function *FunctionAST::Codegen() {
 | |
|   NamedValues.clear();
 | |
|   
 | |
|   Function *TheFunction = Proto->Codegen();
 | |
|   if (TheFunction == 0)
 | |
|     return 0;
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>Code generation for function definitions starts out simply enough: we just
 | |
| codegen the prototype (Proto) and verify that it is ok.  We then clear out the
 | |
| <tt>NamedValues</tt> map to make sure that there isn't anything in it from the
 | |
| last function we compiled.  Code generation of the prototype ensures that there
 | |
| is an LLVM Function object that is ready to go for us.</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
|   // Create a new basic block to start insertion into.
 | |
|   BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
 | |
|   Builder.SetInsertPoint(BB);
 | |
|   
 | |
|   if (Value *RetVal = Body->Codegen()) {
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>Now we get to the point where the <tt>Builder</tt> is set up.  The first
 | |
| line creates a new <a href="http://en.wikipedia.org/wiki/Basic_block">basic
 | |
| block</a> (named "entry"), which is inserted into <tt>TheFunction</tt>.  The
 | |
| second line then tells the builder that new instructions should be inserted into
 | |
| the end of the new basic block.  Basic blocks in LLVM are an important part
 | |
| of functions that define the <a 
 | |
| href="http://en.wikipedia.org/wiki/Control_flow_graph">Control Flow Graph</a>.
 | |
| Since we don't have any control flow, our functions will only contain one 
 | |
| block at this point.  We'll fix this in <a href="LangImpl5.html">Chapter 5</a> :).</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
|   if (Value *RetVal = Body->Codegen()) {
 | |
|     // Finish off the function.
 | |
|     Builder.CreateRet(RetVal);
 | |
|     
 | |
|     // Validate the generated code, checking for consistency.
 | |
|     verifyFunction(*TheFunction);
 | |
|     return TheFunction;
 | |
|   }
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>Once the insertion point is set up, we call the <tt>CodeGen()</tt> method for
 | |
| the root expression of the function.  If no error happens, this emits code to
 | |
| compute the expression into the entry block and returns the value that was
 | |
| computed.  Assuming no error, we then create an LLVM <a 
 | |
| href="../LangRef.html#i_ret">ret instruction</a>, which completes the function.
 | |
| Once the function is built, we call <tt>verifyFunction</tt>, which
 | |
| is provided by LLVM.  This function does a variety of consistency checks on the
 | |
| generated code, to determine if our compiler is doing everything right.  Using
 | |
| this is important: it can catch a lot of bugs.  Once the function is finished
 | |
| and validated, we return it.</p>
 | |
|   
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
|   // Error reading body, remove function.
 | |
|   TheFunction->eraseFromParent();
 | |
|   return 0;
 | |
| }
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>The only piece left here is handling of the error case.  For simplicity, we
 | |
| handle this by merely deleting the function we produced with the 
 | |
| <tt>eraseFromParent</tt> method.  This allows the user to redefine a function
 | |
| that they incorrectly typed in before: if we didn't delete it, it would live in
 | |
| the symbol table, with a body, preventing future redefinition.</p>
 | |
| 
 | |
| <p>This code does have a bug, though.  Since the <tt>PrototypeAST::Codegen</tt>
 | |
| can return a previously defined forward declaration, our code can actually delete
 | |
| a forward declaration.  There are a number of ways to fix this bug, see what you
 | |
| can come up with!  Here is a testcase:</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
| extern foo(a b);     # ok, defines foo.
 | |
| def foo(a b) c;      # error, 'c' is invalid.
 | |
| def bar() foo(1, 2); # error, unknown function "foo"
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| </div>
 | |
| 
 | |
| <!-- *********************************************************************** -->
 | |
| <div class="doc_section"><a name="driver">Driver Changes and 
 | |
| Closing Thoughts</a></div>
 | |
| <!-- *********************************************************************** -->
 | |
| 
 | |
| <div class="doc_text">
 | |
| 
 | |
| <p>
 | |
| For now, code generation to LLVM doesn't really get us much, except that we can
 | |
| look at the pretty IR calls.  The sample code inserts calls to Codegen into the
 | |
| "<tt>HandleDefinition</tt>", "<tt>HandleExtern</tt>" etc functions, and then
 | |
| dumps out the LLVM IR.  This gives a nice way to look at the LLVM IR for simple
 | |
| functions.  For example:
 | |
| </p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
| ready> <b>4+5</b>;
 | |
| Read top-level expression:
 | |
| define double @""() {
 | |
| entry:
 | |
|         %addtmp = add double 4.000000e+00, 5.000000e+00
 | |
|         ret double %addtmp
 | |
| }
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>Note how the parser turns the top-level expression into anonymous functions
 | |
| for us.  This will be handy when we add <a href="LangImpl4.html#jit">JIT 
 | |
| support</a> in the next chapter.  Also note that the code is very literally
 | |
| transcribed, no optimizations are being performed.  We will 
 | |
| <a href="LangImpl4.html#trivialconstfold">add optimizations</a> explicitly in
 | |
| the next chapter.</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
| ready> <b>def foo(a b) a*a + 2*a*b + b*b;</b>
 | |
| Read function definition:
 | |
| define double @foo(double %a, double %b) {
 | |
| entry:
 | |
|         %multmp = mul double %a, %a
 | |
|         %multmp1 = mul double 2.000000e+00, %a
 | |
|         %multmp2 = mul double %multmp1, %b
 | |
|         %addtmp = add double %multmp, %multmp2
 | |
|         %multmp3 = mul double %b, %b
 | |
|         %addtmp4 = add double %addtmp, %multmp3
 | |
|         ret double %addtmp4
 | |
| }
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>This shows some simple arithmetic. Notice the striking similarity to the
 | |
| LLVM builder calls that we use to create the instructions.</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
| ready> <b>def bar(a) foo(a, 4.0) + bar(31337);</b>
 | |
| Read function definition:
 | |
| define double @bar(double %a) {
 | |
| entry:
 | |
|         %calltmp = call double @foo( double %a, double 4.000000e+00 )
 | |
|         %calltmp1 = call double @bar( double 3.133700e+04 )
 | |
|         %addtmp = add double %calltmp, %calltmp1
 | |
|         ret double %addtmp
 | |
| }
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>This shows some function calls.  Note that this function will take a long
 | |
| time to execute if you call it.  In the future we'll add conditional control 
 | |
| flow to actually make recursion useful :).</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
| ready> <b>extern cos(x);</b>
 | |
| Read extern: 
 | |
| declare double @cos(double)
 | |
| 
 | |
| ready> <b>cos(1.234);</b>
 | |
| Read top-level expression:
 | |
| define double @""() {
 | |
| entry:
 | |
|         %calltmp = call double @cos( double 1.234000e+00 )
 | |
|         ret double %calltmp
 | |
| }
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>This shows an extern for the libm "cos" function, and a call to it.</p>
 | |
| 
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
| ready> <b>^D</b>
 | |
| ; ModuleID = 'my cool jit'
 | |
| 
 | |
| define double @""() {
 | |
| entry:
 | |
|         %addtmp = add double 4.000000e+00, 5.000000e+00
 | |
|         ret double %addtmp
 | |
| }
 | |
| 
 | |
| define double @foo(double %a, double %b) {
 | |
| entry:
 | |
|         %multmp = mul double %a, %a
 | |
|         %multmp1 = mul double 2.000000e+00, %a
 | |
|         %multmp2 = mul double %multmp1, %b
 | |
|         %addtmp = add double %multmp, %multmp2
 | |
|         %multmp3 = mul double %b, %b
 | |
|         %addtmp4 = add double %addtmp, %multmp3
 | |
|         ret double %addtmp4
 | |
| }
 | |
| 
 | |
| define double @bar(double %a) {
 | |
| entry:
 | |
|         %calltmp = call double @foo( double %a, double 4.000000e+00 )
 | |
|         %calltmp1 = call double @bar( double 3.133700e+04 )
 | |
|         %addtmp = add double %calltmp, %calltmp1
 | |
|         ret double %addtmp
 | |
| }
 | |
| 
 | |
| declare double @cos(double)
 | |
| 
 | |
| define double @""() {
 | |
| entry:
 | |
|         %calltmp = call double @cos( double 1.234000e+00 )
 | |
|         ret double %calltmp
 | |
| }
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>When you quit the current demo, it dumps out the IR for the entire module
 | |
| generated.  Here you can see the big picture with all the functions referencing
 | |
| each other.</p>
 | |
| 
 | |
| <p>This wraps up the third chapter of the Kaleidoscope tutorial.  Up next, we'll
 | |
| describe how to <a href="LangImpl4.html">add JIT codegen and optimizer
 | |
| support</a> to this so we can actually start running code!</p>
 | |
| 
 | |
| </div>
 | |
| 
 | |
| 
 | |
| <!-- *********************************************************************** -->
 | |
| <div class="doc_section"><a name="code">Full Code Listing</a></div>
 | |
| <!-- *********************************************************************** -->
 | |
| 
 | |
| <div class="doc_text">
 | |
| 
 | |
| <p>
 | |
| Here is the complete code listing for our running example, enhanced with the
 | |
| LLVM code generator.    Because this uses the LLVM libraries, we need to link
 | |
| them in.  To do this, we use the <a 
 | |
| href="http://llvm.org/cmds/llvm-config.html">llvm-config</a> tool to inform
 | |
| our makefile/command line about which options to use:</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
|    # Compile
 | |
|    g++ -g -O3 toy.cpp `llvm-config --cppflags --ldflags --libs core` -o toy
 | |
|    # Run
 | |
|    ./toy
 | |
| </pre>
 | |
| </div>
 | |
| 
 | |
| <p>Here is the code:</p>
 | |
| 
 | |
| <div class="doc_code">
 | |
| <pre>
 | |
| // To build this:
 | |
| // See example below.
 | |
| 
 | |
| #include "llvm/DerivedTypes.h"
 | |
| #include "llvm/LLVMContext.h"
 | |
| #include "llvm/Module.h"
 | |
| #include "llvm/Analysis/Verifier.h"
 | |
| #include "llvm/Support/IRBuilder.h"
 | |
| #include <cstdio>
 | |
| #include <string>
 | |
| #include <map>
 | |
| #include <vector>
 | |
| using namespace llvm;
 | |
| 
 | |
| //===----------------------------------------------------------------------===//
 | |
| // Lexer
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
 | |
| // of these for known things.
 | |
| enum Token {
 | |
|   tok_eof = -1,
 | |
| 
 | |
|   // commands
 | |
|   tok_def = -2, tok_extern = -3,
 | |
| 
 | |
|   // primary
 | |
|   tok_identifier = -4, tok_number = -5,
 | |
| };
 | |
| 
 | |
| static std::string IdentifierStr;  // Filled in if tok_identifier
 | |
| static double NumVal;              // Filled in if tok_number
 | |
| 
 | |
| /// gettok - Return the next token from standard input.
 | |
| static int gettok() {
 | |
|   static int LastChar = ' ';
 | |
| 
 | |
|   // Skip any whitespace.
 | |
|   while (isspace(LastChar))
 | |
|     LastChar = getchar();
 | |
| 
 | |
|   if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
 | |
|     IdentifierStr = LastChar;
 | |
|     while (isalnum((LastChar = getchar())))
 | |
|       IdentifierStr += LastChar;
 | |
| 
 | |
|     if (IdentifierStr == "def") return tok_def;
 | |
|     if (IdentifierStr == "extern") return tok_extern;
 | |
|     return tok_identifier;
 | |
|   }
 | |
| 
 | |
|   if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
 | |
|     std::string NumStr;
 | |
|     do {
 | |
|       NumStr += LastChar;
 | |
|       LastChar = getchar();
 | |
|     } while (isdigit(LastChar) || LastChar == '.');
 | |
| 
 | |
|     NumVal = strtod(NumStr.c_str(), 0);
 | |
|     return tok_number;
 | |
|   }
 | |
| 
 | |
|   if (LastChar == '#') {
 | |
|     // Comment until end of line.
 | |
|     do LastChar = getchar();
 | |
|     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
 | |
|     
 | |
|     if (LastChar != EOF)
 | |
|       return gettok();
 | |
|   }
 | |
|   
 | |
|   // Check for end of file.  Don't eat the EOF.
 | |
|   if (LastChar == EOF)
 | |
|     return tok_eof;
 | |
| 
 | |
|   // Otherwise, just return the character as its ascii value.
 | |
|   int ThisChar = LastChar;
 | |
|   LastChar = getchar();
 | |
|   return ThisChar;
 | |
| }
 | |
| 
 | |
| //===----------------------------------------------------------------------===//
 | |
| // Abstract Syntax Tree (aka Parse Tree)
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| /// ExprAST - Base class for all expression nodes.
 | |
| class ExprAST {
 | |
| public:
 | |
|   virtual ~ExprAST() {}
 | |
|   virtual Value *Codegen() = 0;
 | |
| };
 | |
| 
 | |
| /// NumberExprAST - Expression class for numeric literals like "1.0".
 | |
| class NumberExprAST : public ExprAST {
 | |
|   double Val;
 | |
| public:
 | |
|   explicit NumberExprAST(double val) : Val(val) {}
 | |
|   virtual Value *Codegen();
 | |
| };
 | |
| 
 | |
| /// VariableExprAST - Expression class for referencing a variable, like "a".
 | |
| class VariableExprAST : public ExprAST {
 | |
|   std::string Name;
 | |
| public:
 | |
|   explicit VariableExprAST(const std::string &name) : Name(name) {}
 | |
|   virtual Value *Codegen();
 | |
| };
 | |
| 
 | |
| /// BinaryExprAST - Expression class for a binary operator.
 | |
| class BinaryExprAST : public ExprAST {
 | |
|   char Op;
 | |
|   ExprAST *LHS, *RHS;
 | |
| public:
 | |
|   BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
 | |
|     : Op(op), LHS(lhs), RHS(rhs) {}
 | |
|   virtual Value *Codegen();
 | |
| };
 | |
| 
 | |
| /// CallExprAST - Expression class for function calls.
 | |
| class CallExprAST : public ExprAST {
 | |
|   std::string Callee;
 | |
|   std::vector<ExprAST*> Args;
 | |
| public:
 | |
|   CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
 | |
|     : Callee(callee), Args(args) {}
 | |
|   virtual Value *Codegen();
 | |
| };
 | |
| 
 | |
| /// PrototypeAST - This class represents the "prototype" for a function,
 | |
| /// which captures its argument names as well as if it is an operator.
 | |
| class PrototypeAST {
 | |
|   std::string Name;
 | |
|   std::vector<std::string> Args;
 | |
| public:
 | |
|   PrototypeAST(const std::string &name, const std::vector<std::string> &args)
 | |
|     : Name(name), Args(args) {}
 | |
|   
 | |
|   Function *Codegen();
 | |
| };
 | |
| 
 | |
| /// FunctionAST - This class represents a function definition itself.
 | |
| class FunctionAST {
 | |
|   PrototypeAST *Proto;
 | |
|   ExprAST *Body;
 | |
| public:
 | |
|   FunctionAST(PrototypeAST *proto, ExprAST *body)
 | |
|     : Proto(proto), Body(body) {}
 | |
|   
 | |
|   Function *Codegen();
 | |
| };
 | |
| 
 | |
| //===----------------------------------------------------------------------===//
 | |
| // Parser
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
 | |
| /// token the parser it looking at.  getNextToken reads another token from the
 | |
| /// lexer and updates CurTok with its results.
 | |
| static int CurTok;
 | |
| static int getNextToken() {
 | |
|   return CurTok = gettok();
 | |
| }
 | |
| 
 | |
| /// BinopPrecedence - This holds the precedence for each binary operator that is
 | |
| /// defined.
 | |
| static std::map<char, int> BinopPrecedence;
 | |
| 
 | |
| /// GetTokPrecedence - Get the precedence of the pending binary operator token.
 | |
| static int GetTokPrecedence() {
 | |
|   if (!isascii(CurTok))
 | |
|     return -1;
 | |
|   
 | |
|   // Make sure it's a declared binop.
 | |
|   int TokPrec = BinopPrecedence[CurTok];
 | |
|   if (TokPrec <= 0) return -1;
 | |
|   return TokPrec;
 | |
| }
 | |
| 
 | |
| /// Error* - These are little helper functions for error handling.
 | |
| ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
 | |
| PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
 | |
| FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
 | |
| 
 | |
| static ExprAST *ParseExpression();
 | |
| 
 | |
| /// identifierexpr
 | |
| ///   ::= identifier
 | |
| ///   ::= identifier '(' expression* ')'
 | |
| static ExprAST *ParseIdentifierExpr() {
 | |
|   std::string IdName = IdentifierStr;
 | |
|   
 | |
|   getNextToken();  // eat identifier.
 | |
|   
 | |
|   if (CurTok != '(') // Simple variable ref.
 | |
|     return new VariableExprAST(IdName);
 | |
|   
 | |
|   // Call.
 | |
|   getNextToken();  // eat (
 | |
|   std::vector<ExprAST*> Args;
 | |
|   if (CurTok != ')') {
 | |
|     while (1) {
 | |
|       ExprAST *Arg = ParseExpression();
 | |
|       if (!Arg) return 0;
 | |
|       Args.push_back(Arg);
 | |
|     
 | |
|       if (CurTok == ')') break;
 | |
|     
 | |
|       if (CurTok != ',')
 | |
|         return Error("Expected ')' or ',' in argument list");
 | |
|       getNextToken();
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // Eat the ')'.
 | |
|   getNextToken();
 | |
|   
 | |
|   return new CallExprAST(IdName, Args);
 | |
| }
 | |
| 
 | |
| /// numberexpr ::= number
 | |
| static ExprAST *ParseNumberExpr() {
 | |
|   ExprAST *Result = new NumberExprAST(NumVal);
 | |
|   getNextToken(); // consume the number
 | |
|   return Result;
 | |
| }
 | |
| 
 | |
| /// parenexpr ::= '(' expression ')'
 | |
| static ExprAST *ParseParenExpr() {
 | |
|   getNextToken();  // eat (.
 | |
|   ExprAST *V = ParseExpression();
 | |
|   if (!V) return 0;
 | |
|   
 | |
|   if (CurTok != ')')
 | |
|     return Error("expected ')'");
 | |
|   getNextToken();  // eat ).
 | |
|   return V;
 | |
| }
 | |
| 
 | |
| /// primary
 | |
| ///   ::= identifierexpr
 | |
| ///   ::= numberexpr
 | |
| ///   ::= parenexpr
 | |
| static ExprAST *ParsePrimary() {
 | |
|   switch (CurTok) {
 | |
|   default: return Error("unknown token when expecting an expression");
 | |
|   case tok_identifier: return ParseIdentifierExpr();
 | |
|   case tok_number:     return ParseNumberExpr();
 | |
|   case '(':            return ParseParenExpr();
 | |
|   }
 | |
| }
 | |
| 
 | |
| /// binoprhs
 | |
| ///   ::= ('+' primary)*
 | |
| static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
 | |
|   // If this is a binop, find its precedence.
 | |
|   while (1) {
 | |
|     int TokPrec = GetTokPrecedence();
 | |
|     
 | |
|     // If this is a binop that binds at least as tightly as the current binop,
 | |
|     // consume it, otherwise we are done.
 | |
|     if (TokPrec < ExprPrec)
 | |
|       return LHS;
 | |
|     
 | |
|     // Okay, we know this is a binop.
 | |
|     int BinOp = CurTok;
 | |
|     getNextToken();  // eat binop
 | |
|     
 | |
|     // Parse the primary expression after the binary operator.
 | |
|     ExprAST *RHS = ParsePrimary();
 | |
|     if (!RHS) return 0;
 | |
|     
 | |
|     // If BinOp binds less tightly with RHS than the operator after RHS, let
 | |
|     // the pending operator take RHS as its LHS.
 | |
|     int NextPrec = GetTokPrecedence();
 | |
|     if (TokPrec < NextPrec) {
 | |
|       RHS = ParseBinOpRHS(TokPrec+1, RHS);
 | |
|       if (RHS == 0) return 0;
 | |
|     }
 | |
|     
 | |
|     // Merge LHS/RHS.
 | |
|     LHS = new BinaryExprAST(BinOp, LHS, RHS);
 | |
|   }
 | |
| }
 | |
| 
 | |
| /// expression
 | |
| ///   ::= primary binoprhs
 | |
| ///
 | |
| static ExprAST *ParseExpression() {
 | |
|   ExprAST *LHS = ParsePrimary();
 | |
|   if (!LHS) return 0;
 | |
|   
 | |
|   return ParseBinOpRHS(0, LHS);
 | |
| }
 | |
| 
 | |
| /// prototype
 | |
| ///   ::= id '(' id* ')'
 | |
| static PrototypeAST *ParsePrototype() {
 | |
|   if (CurTok != tok_identifier)
 | |
|     return ErrorP("Expected function name in prototype");
 | |
| 
 | |
|   std::string FnName = IdentifierStr;
 | |
|   getNextToken();
 | |
|   
 | |
|   if (CurTok != '(')
 | |
|     return ErrorP("Expected '(' in prototype");
 | |
|   
 | |
|   std::vector<std::string> ArgNames;
 | |
|   while (getNextToken() == tok_identifier)
 | |
|     ArgNames.push_back(IdentifierStr);
 | |
|   if (CurTok != ')')
 | |
|     return ErrorP("Expected ')' in prototype");
 | |
|   
 | |
|   // success.
 | |
|   getNextToken();  // eat ')'.
 | |
|   
 | |
|   return new PrototypeAST(FnName, ArgNames);
 | |
| }
 | |
| 
 | |
| /// definition ::= 'def' prototype expression
 | |
| static FunctionAST *ParseDefinition() {
 | |
|   getNextToken();  // eat def.
 | |
|   PrototypeAST *Proto = ParsePrototype();
 | |
|   if (Proto == 0) return 0;
 | |
| 
 | |
|   if (ExprAST *E = ParseExpression())
 | |
|     return new FunctionAST(Proto, E);
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| /// toplevelexpr ::= expression
 | |
| static FunctionAST *ParseTopLevelExpr() {
 | |
|   if (ExprAST *E = ParseExpression()) {
 | |
|     // Make an anonymous proto.
 | |
|     PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
 | |
|     return new FunctionAST(Proto, E);
 | |
|   }
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| /// external ::= 'extern' prototype
 | |
| static PrototypeAST *ParseExtern() {
 | |
|   getNextToken();  // eat extern.
 | |
|   return ParsePrototype();
 | |
| }
 | |
| 
 | |
| //===----------------------------------------------------------------------===//
 | |
| // Code Generation
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| static Module *TheModule;
 | |
| static IRBuilder<> Builder(getGlobalContext());
 | |
| static std::map<std::string, Value*> NamedValues;
 | |
| 
 | |
| Value *ErrorV(const char *Str) { Error(Str); return 0; }
 | |
| 
 | |
| Value *NumberExprAST::Codegen() {
 | |
|   return ConstantFP::get(getGlobalContext(), APFloat(Val));
 | |
| }
 | |
| 
 | |
| Value *VariableExprAST::Codegen() {
 | |
|   // Look this variable up in the function.
 | |
|   Value *V = NamedValues[Name];
 | |
|   return V ? V : ErrorV("Unknown variable name");
 | |
| }
 | |
| 
 | |
| Value *BinaryExprAST::Codegen() {
 | |
|   Value *L = LHS->Codegen();
 | |
|   Value *R = RHS->Codegen();
 | |
|   if (L == 0 || R == 0) return 0;
 | |
|   
 | |
|   switch (Op) {
 | |
|   case '+': return Builder.CreateAdd(L, R, "addtmp");
 | |
|   case '-': return Builder.CreateSub(L, R, "subtmp");
 | |
|   case '*': return Builder.CreateMul(L, R, "multmp");
 | |
|   case '<':
 | |
|     L = Builder.CreateFCmpULT(L, R, "cmptmp");
 | |
|     // Convert bool 0/1 to double 0.0 or 1.0
 | |
|     return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), "booltmp");
 | |
|   default: return ErrorV("invalid binary operator");
 | |
|   }
 | |
| }
 | |
| 
 | |
| Value *CallExprAST::Codegen() {
 | |
|   // Look up the name in the global module table.
 | |
|   Function *CalleeF = TheModule->getFunction(Callee);
 | |
|   if (CalleeF == 0)
 | |
|     return ErrorV("Unknown function referenced");
 | |
|   
 | |
|   // If argument mismatch error.
 | |
|   if (CalleeF->arg_size() != Args.size())
 | |
|     return ErrorV("Incorrect # arguments passed");
 | |
| 
 | |
|   std::vector<Value*> ArgsV;
 | |
|   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
 | |
|     ArgsV.push_back(Args[i]->Codegen());
 | |
|     if (ArgsV.back() == 0) return 0;
 | |
|   }
 | |
|   
 | |
|   return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
 | |
| }
 | |
| 
 | |
| Function *PrototypeAST::Codegen() {
 | |
|   // Make the function type:  double(double,double) etc.
 | |
|   std::vector<const Type*> Doubles(Args.size(), Type::getDoubleTy(getGlobalContext()));
 | |
|   FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
 | |
|   
 | |
|   Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
 | |
|   
 | |
|   // If F conflicted, there was already something named 'Name'.  If it has a
 | |
|   // body, don't allow redefinition or reextern.
 | |
|   if (F->getName() != Name) {
 | |
|     // Delete the one we just made and get the existing one.
 | |
|     F->eraseFromParent();
 | |
|     F = TheModule->getFunction(Name);
 | |
|     
 | |
|     // If F already has a body, reject this.
 | |
|     if (!F->empty()) {
 | |
|       ErrorF("redefinition of function");
 | |
|       return 0;
 | |
|     }
 | |
|     
 | |
|     // If F took a different number of args, reject.
 | |
|     if (F->arg_size() != Args.size()) {
 | |
|       ErrorF("redefinition of function with different # args");
 | |
|       return 0;
 | |
|     }
 | |
|   }
 | |
|   
 | |
|   // Set names for all arguments.
 | |
|   unsigned Idx = 0;
 | |
|   for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
 | |
|        ++AI, ++Idx) {
 | |
|     AI->setName(Args[Idx]);
 | |
|     
 | |
|     // Add arguments to variable symbol table.
 | |
|     NamedValues[Args[Idx]] = AI;
 | |
|   }
 | |
|   
 | |
|   return F;
 | |
| }
 | |
| 
 | |
| Function *FunctionAST::Codegen() {
 | |
|   NamedValues.clear();
 | |
|   
 | |
|   Function *TheFunction = Proto->Codegen();
 | |
|   if (TheFunction == 0)
 | |
|     return 0;
 | |
|   
 | |
|   // Create a new basic block to start insertion into.
 | |
|   BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
 | |
|   Builder.SetInsertPoint(BB);
 | |
|   
 | |
|   if (Value *RetVal = Body->Codegen()) {
 | |
|     // Finish off the function.
 | |
|     Builder.CreateRet(RetVal);
 | |
|     
 | |
|     // Validate the generated code, checking for consistency.
 | |
|     verifyFunction(*TheFunction);
 | |
|     return TheFunction;
 | |
|   }
 | |
|   
 | |
|   // Error reading body, remove function.
 | |
|   TheFunction->eraseFromParent();
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| //===----------------------------------------------------------------------===//
 | |
| // Top-Level parsing and JIT Driver
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| static void HandleDefinition() {
 | |
|   if (FunctionAST *F = ParseDefinition()) {
 | |
|     if (Function *LF = F->Codegen()) {
 | |
|       fprintf(stderr, "Read function definition:");
 | |
|       LF->dump();
 | |
|     }
 | |
|   } else {
 | |
|     // Skip token for error recovery.
 | |
|     getNextToken();
 | |
|   }
 | |
| }
 | |
| 
 | |
| static void HandleExtern() {
 | |
|   if (PrototypeAST *P = ParseExtern()) {
 | |
|     if (Function *F = P->Codegen()) {
 | |
|       fprintf(stderr, "Read extern: ");
 | |
|       F->dump();
 | |
|     }
 | |
|   } else {
 | |
|     // Skip token for error recovery.
 | |
|     getNextToken();
 | |
|   }
 | |
| }
 | |
| 
 | |
| static void HandleTopLevelExpression() {
 | |
|   // Evaluate a top level expression into an anonymous function.
 | |
|   if (FunctionAST *F = ParseTopLevelExpr()) {
 | |
|     if (Function *LF = F->Codegen()) {
 | |
|       fprintf(stderr, "Read top-level expression:");
 | |
|       LF->dump();
 | |
|     }
 | |
|   } else {
 | |
|     // Skip token for error recovery.
 | |
|     getNextToken();
 | |
|   }
 | |
| }
 | |
| 
 | |
| /// top ::= definition | external | expression | ';'
 | |
| static void MainLoop() {
 | |
|   while (1) {
 | |
|     fprintf(stderr, "ready> ");
 | |
|     switch (CurTok) {
 | |
|     case tok_eof:    return;
 | |
|     case ';':        getNextToken(); break;  // ignore top level semicolons.
 | |
|     case tok_def:    HandleDefinition(); break;
 | |
|     case tok_extern: HandleExtern(); break;
 | |
|     default:         HandleTopLevelExpression(); break;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| //===----------------------------------------------------------------------===//
 | |
| // "Library" functions that can be "extern'd" from user code.
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| /// putchard - putchar that takes a double and returns 0.
 | |
| extern "C" 
 | |
| double putchard(double X) {
 | |
|   putchar((char)X);
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| //===----------------------------------------------------------------------===//
 | |
| // Main driver code.
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| int main() {
 | |
|   TheModule = new Module("my cool jit", getGlobalContext());
 | |
| 
 | |
|   // Install standard binary operators.
 | |
|   // 1 is lowest precedence.
 | |
|   BinopPrecedence['<'] = 10;
 | |
|   BinopPrecedence['+'] = 20;
 | |
|   BinopPrecedence['-'] = 20;
 | |
|   BinopPrecedence['*'] = 40;  // highest.
 | |
| 
 | |
|   // Prime the first token.
 | |
|   fprintf(stderr, "ready> ");
 | |
|   getNextToken();
 | |
| 
 | |
|   MainLoop();
 | |
|   TheModule->dump();
 | |
|   return 0;
 | |
| }
 | |
| </pre>
 | |
| </div>
 | |
| <a href="LangImpl4.html">Next: Adding JIT and Optimizer Support</a>
 | |
| </div>
 | |
| 
 | |
| <!-- *********************************************************************** -->
 | |
| <hr>
 | |
| <address>
 | |
|   <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
 | |
|   src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
 | |
|   <a href="http://validator.w3.org/check/referer"><img
 | |
|   src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
 | |
| 
 | |
|   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
 | |
|   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
 | |
|   Last modified: $Date: 2009-07-21 11:05:13 -0700 (Tue, 21 Jul 2009) $
 | |
| </address>
 | |
| </body>
 | |
| </html>
 |