From 2cf5f14f20ef0dc0d1ebddc45e11661df91f6ebf Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 22 Jun 2009 01:29:09 +0000 Subject: [PATCH] start implementing some simple operand parsing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73867 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-mc/AsmParser.cpp | 129 ++++++++++++++++++++++++++++++++---- tools/llvm-mc/AsmParser.h | 7 +- 2 files changed, 121 insertions(+), 15 deletions(-) diff --git a/tools/llvm-mc/AsmParser.cpp b/tools/llvm-mc/AsmParser.cpp index 1940d761894..1ab1af48ad0 100644 --- a/tools/llvm-mc/AsmParser.cpp +++ b/tools/llvm-mc/AsmParser.cpp @@ -26,7 +26,6 @@ bool AsmParser::TokError(const char *Msg) { return true; } - bool AsmParser::Run() { // Prime the lexer. Lexer.Lex(); @@ -38,10 +37,86 @@ bool AsmParser::Run() { return false; } +/// EatToEndOfStatement - Throw away the rest of the line for testing purposes. +void AsmParser::EatToEndOfStatement() { + while (Lexer.isNot(asmtok::EndOfStatement) && + Lexer.isNot(asmtok::Eof)) + Lexer.Lex(); + + // Eat EOL. + if (Lexer.is(asmtok::EndOfStatement)) + Lexer.Lex(); +} + +struct AsmParser::X86Operand { + enum { + Register, + Immediate + } Kind; + + union { + struct { + unsigned RegNo; + } Reg; + + struct { + // FIXME: Should be a general expression. + int64_t Val; + } Imm; + }; + + static X86Operand CreateReg(unsigned RegNo) { + X86Operand Res; + Res.Kind = Register; + Res.Reg.RegNo = RegNo; + return Res; + } + static X86Operand CreateImm(int64_t Val) { + X86Operand Res; + Res.Kind = Immediate; + Res.Imm.Val = Val; + return Res; + } +}; + +bool AsmParser::ParseX86Operand(X86Operand &Op) { + switch (Lexer.getKind()) { + default: + return TokError("unknown token at start of instruction operand"); + case asmtok::Register: + // FIXME: Decode reg #. + Op = X86Operand::CreateReg(0); + Lexer.Lex(); // Eat register. + return false; + case asmtok::Dollar: + // $42 -> immediate. + Lexer.Lex(); + // FIXME: Parse an arbitrary expression here, like $(4+5) + if (Lexer.isNot(asmtok::IntVal)) + return TokError("expected integer constant"); + + Op = X86Operand::CreateReg(Lexer.getCurIntVal()); + Lexer.Lex(); // Eat register. + return false; + case asmtok::Identifier: + // This is a label, this should be parsed as part of an expression, to + // handle things like LFOO+4 + Op = X86Operand::CreateImm(0); // FIXME. + Lexer.Lex(); // Eat identifier. + return false; + + //case asmtok::Star: + // * %eax + // * + // Note that these are both "dereferenced". + } +} + /// ParseStatement: /// ::= EndOfStatement -/// ::= Label* Identifier Operands* EndOfStatement +/// ::= Label* Directive ...Operands... EndOfStatement +/// ::= Label* Identifier OperandList* EndOfStatement bool AsmParser::ParseStatement() { switch (Lexer.getKind()) { default: @@ -55,7 +130,7 @@ bool AsmParser::ParseStatement() { } // If we have an identifier, handle it as the key symbol. - //SMLoc IDLoc = Lexer.getLoc(); + SMLoc IDLoc = Lexer.getLoc(); std::string IDVal = Lexer.getCurStrVal(); // Consume the identifier, see what is after it. @@ -66,18 +141,44 @@ bool AsmParser::ParseStatement() { } // Otherwise, we have a normal instruction or directive. - if (IDVal[0] == '.') - outs() << "Found directive: " << IDVal << "\n"; - else - outs() << "Found instruction: " << IDVal << "\n"; + if (IDVal[0] == '.') { + Lexer.PrintMessage(IDLoc, "warning: ignoring directive for now"); + EatToEndOfStatement(); + return false; + } - // Skip to end of line for now. - while (Lexer.isNot(asmtok::EndOfStatement) && - Lexer.isNot(asmtok::Eof)) - Lexer.Lex(); + // If it's an instruction, parse an operand list. + std::vector Operands; - // Eat EOL. - if (Lexer.is(asmtok::EndOfStatement)) - Lexer.Lex(); + // Read the first operand, if present. Note that we require a newline at the + // end of file, so we don't have to worry about Eof here. + if (Lexer.isNot(asmtok::EndOfStatement)) { + X86Operand Op; + if (ParseX86Operand(Op)) + return true; + Operands.push_back(Op); + } + + while (Lexer.is(asmtok::Comma)) { + Lexer.Lex(); // Eat the comma. + + // Parse and remember the operand. + X86Operand Op; + if (ParseX86Operand(Op)) + return true; + Operands.push_back(Op); + } + + if (Lexer.isNot(asmtok::EndOfStatement)) + return TokError("unexpected token in operand list"); + + // Eat the end of statement marker. + Lexer.Lex(); + + // Instruction is good, process it. + outs() << "Found instruction: " << IDVal << " with " << Operands.size() + << " operands.\n"; + + // Skip to end of line for now. return false; } diff --git a/tools/llvm-mc/AsmParser.h b/tools/llvm-mc/AsmParser.h index 66e1a9b42f5..54e3741778e 100644 --- a/tools/llvm-mc/AsmParser.h +++ b/tools/llvm-mc/AsmParser.h @@ -17,9 +17,10 @@ #include "AsmLexer.h" namespace llvm { - + class AsmParser { AsmLexer Lexer; + struct X86Operand; public: AsmParser(SourceMgr &SM) : Lexer(SM) {} @@ -32,6 +33,10 @@ private: bool Error(SMLoc L, const char *Msg); bool TokError(const char *Msg); + + void EatToEndOfStatement(); + + bool ParseX86Operand(X86Operand &Op); }; } // end namespace llvm