From 475839e9a97a0c0282e107d14fd1dc6e5f223435 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Mon, 29 Jun 2009 20:37:27 +0000 Subject: [PATCH] MC: Improve expression parsing and implement evaluation of absolute expressions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@74448 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/MC/AsmParser/exprs.s | 39 ++++++++ tools/llvm-mc/AsmLexer.cpp | 39 +++++--- tools/llvm-mc/AsmLexer.h | 8 +- tools/llvm-mc/AsmParser.cpp | 163 ++++++++++++++++++++++++------- tools/llvm-mc/AsmParser.h | 23 ++++- tools/llvm-mc/MC-X86Specific.cpp | 7 +- 6 files changed, 216 insertions(+), 63 deletions(-) create mode 100644 test/MC/AsmParser/exprs.s diff --git a/test/MC/AsmParser/exprs.s b/test/MC/AsmParser/exprs.s new file mode 100644 index 00000000000..472b4cfe8ed --- /dev/null +++ b/test/MC/AsmParser/exprs.s @@ -0,0 +1,39 @@ +# FIXME: For now this test just checks that llvm-mc works. Once we have .macro, +# .if, and .abort we can write a better test (without resorting to miles of +# greps). + +# RUN: llvm-mc %s > %t + + .byte !1 + 2 + .byte !0 + .byte ~0 + .byte -1 + .byte +1 + .byte 1 + 2 + .byte 1 & 3 + .byte 4 / 2 + .byte 4 / -2 + .byte 1 == 1 + .byte 1 == 0 + .byte 1 > 0 + .byte 1 >= 1 + .byte 1 < 2 + .byte 1 <= 1 + .byte 4 % 3 + .byte 2 * 2 + .byte 2 != 2 + .byte 2 <> 2 + .byte 1 | 2 + .byte 1 << 1 + .byte 2 >> 1 + .byte ~0 >> 1 + .byte 3 - 2 + .byte 1 ^ 3 + .byte 1 && 2 + .byte 3 && 0 + .byte 1 || 2 + .byte 0 || 0 + + .set c, 10 + .byte c + 1 + \ No newline at end of file diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp index db86825f3d2..f871d065c43 100644 --- a/tools/llvm-mc/AsmLexer.cpp +++ b/tools/llvm-mc/AsmLexer.cpp @@ -262,11 +262,23 @@ asmtok::TokKind AsmLexer::LexToken() { case '*': return asmtok::Star; case ',': return asmtok::Comma; case '$': return asmtok::Dollar; - case '=': return asmtok::Equal; - case '|': return asmtok::Pipe; + case '=': + if (*CurPtr == '=') + return ++CurPtr, asmtok::EqualEqual; + return asmtok::Equal; + case '|': + if (*CurPtr == '|') + return ++CurPtr, asmtok::PipePipe; + return asmtok::Pipe; case '^': return asmtok::Caret; - case '&': return asmtok::Amp; - case '!': return asmtok::Exclaim; + case '&': + if (*CurPtr == '&') + return ++CurPtr, asmtok::AmpAmp; + return asmtok::Amp; + case '!': + if (*CurPtr == '=') + return ++CurPtr, asmtok::ExclaimEqual; + return asmtok::Exclaim; case '%': return LexPercent(); case '/': return LexSlash(); case '#': return LexHash(); @@ -275,19 +287,18 @@ asmtok::TokKind AsmLexer::LexToken() { case '5': case '6': case '7': case '8': case '9': return LexDigit(); case '<': - if (*CurPtr == '<') { - ++CurPtr; - return asmtok::LessLess; + switch (*CurPtr) { + case '<': return ++CurPtr, asmtok::LessLess; + case '=': return ++CurPtr, asmtok::LessEqual; + case '>': return ++CurPtr, asmtok::LessGreater; + default: return asmtok::Less; } - // Don't have any use for bare '<' yet. - return ReturnError(TokStart, "invalid character in input"); case '>': - if (*CurPtr == '>') { - ++CurPtr; - return asmtok::GreaterGreater; + switch (*CurPtr) { + case '>': return ++CurPtr, asmtok::GreaterGreater; + case '=': return ++CurPtr, asmtok::GreaterEqual; + default: return asmtok::Greater; } - // Don't have any use for bare '>' yet. - return ReturnError(TokStart, "invalid character in input"); // TODO: Quoted identifiers (objc methods etc) // local labels: [0-9][:] diff --git a/tools/llvm-mc/AsmLexer.h b/tools/llvm-mc/AsmLexer.h index 19a1b38af43..30ab2664d25 100644 --- a/tools/llvm-mc/AsmLexer.h +++ b/tools/llvm-mc/AsmLexer.h @@ -42,10 +42,12 @@ namespace asmtok { Plus, Minus, Tilde, Slash, // '/' LParen, RParen, - Star, Comma, Dollar, Equal, + Star, Comma, Dollar, Equal, EqualEqual, - Pipe, Caret, Amp, Exclaim, - Percent, LessLess, GreaterGreater + Pipe, PipePipe, Caret, + Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, + Less, LessEqual, LessLess, LessGreater, + Greater, GreaterEqual, GreaterGreater }; } diff --git a/tools/llvm-mc/AsmParser.cpp b/tools/llvm-mc/AsmParser.cpp index 2b697a66ad2..2248b046c20 100644 --- a/tools/llvm-mc/AsmParser.cpp +++ b/tools/llvm-mc/AsmParser.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "AsmParser.h" + +#include "AsmExpr.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" @@ -57,7 +59,7 @@ void AsmParser::EatToEndOfStatement() { /// /// parenexpr ::= expr) /// -bool AsmParser::ParseParenExpr(int64_t &Res) { +bool AsmParser::ParseParenExpr(AsmExpr *&Res) { if (ParseExpression(Res)) return true; if (Lexer.isNot(asmtok::RParen)) return TokError("expected ')' in parentheses expression"); @@ -70,28 +72,47 @@ bool AsmParser::ParseParenExpr(int64_t &Res) { /// primaryexpr ::= symbol /// primaryexpr ::= number /// primaryexpr ::= ~,+,- primaryexpr -bool AsmParser::ParsePrimaryExpr(int64_t &Res) { +bool AsmParser::ParsePrimaryExpr(AsmExpr *&Res) { switch (Lexer.getKind()) { default: return TokError("unknown token in expression"); + case asmtok::Exclaim: + Lexer.Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res)) + return true; + Res = new AsmUnaryExpr(AsmUnaryExpr::LNot, Res); + return false; case asmtok::Identifier: // This is a label, this should be parsed as part of an expression, to - // handle things like LFOO+4 - Res = 0; // FIXME. + // handle things like LFOO+4. + Res = new AsmSymbolRefExpr(Ctx.GetOrCreateSymbol(Lexer.getCurStrVal())); Lexer.Lex(); // Eat identifier. return false; case asmtok::IntVal: - Res = Lexer.getCurIntVal(); + Res = new AsmConstantExpr(Lexer.getCurIntVal()); Lexer.Lex(); // Eat identifier. return false; case asmtok::LParen: Lexer.Lex(); // Eat the '('. return ParseParenExpr(Res); - case asmtok::Tilde: - case asmtok::Plus: case asmtok::Minus: Lexer.Lex(); // Eat the operator. - return ParsePrimaryExpr(Res); + if (ParsePrimaryExpr(Res)) + return true; + Res = new AsmUnaryExpr(AsmUnaryExpr::Minus, Res); + return false; + case asmtok::Plus: + Lexer.Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res)) + return true; + Res = new AsmUnaryExpr(AsmUnaryExpr::Plus, Res); + return false; + case asmtok::Tilde: + Lexer.Lex(); // Eat the operator. + if (ParsePrimaryExpr(Res)) + return true; + Res = new AsmUnaryExpr(AsmUnaryExpr::Not, Res); + return false; } } @@ -102,59 +123,125 @@ bool AsmParser::ParsePrimaryExpr(int64_t &Res) { /// expr ::= expr *,/,%,<<,>> expr -> highest. /// expr ::= primaryexpr /// -bool AsmParser::ParseExpression(int64_t &Res) { +bool AsmParser::ParseExpression(AsmExpr *&Res) { + Res = 0; return ParsePrimaryExpr(Res) || ParseBinOpRHS(1, Res); } -static unsigned getBinOpPrecedence(asmtok::TokKind K) { +bool AsmParser::ParseAbsoluteExpression(int64_t &Res) { + AsmExpr *Expr; + + if (ParseExpression(Expr)) + return true; + + if (!Expr->EvaluateAsAbsolute(Ctx, Res)) + return TokError("expected absolute expression"); + + return false; +} + +static unsigned getBinOpPrecedence(asmtok::TokKind K, + AsmBinaryExpr::Opcode &Kind) { switch (K) { default: return 0; // not a binop. - case asmtok::Plus: - case asmtok::Minus: + + // Lowest Precedence: &&, || + case asmtok::AmpAmp: + Kind = AsmBinaryExpr::LAnd; return 1; - case asmtok::Pipe: - case asmtok::Caret: - case asmtok::Amp: - case asmtok::Exclaim: + case asmtok::PipePipe: + Kind = AsmBinaryExpr::LOr; + return 1; + + // Low Precedence: +, -, ==, !=, <>, <, <=, >, >= + case asmtok::Plus: + Kind = AsmBinaryExpr::Add; return 2; - case asmtok::Star: - case asmtok::Slash: - case asmtok::Percent: - case asmtok::LessLess: - case asmtok::GreaterGreater: + case asmtok::Minus: + Kind = AsmBinaryExpr::Sub; + return 2; + case asmtok::EqualEqual: + Kind = AsmBinaryExpr::EQ; + return 2; + case asmtok::ExclaimEqual: + case asmtok::LessGreater: + Kind = AsmBinaryExpr::NE; + return 2; + case asmtok::Less: + Kind = AsmBinaryExpr::LT; + return 2; + case asmtok::LessEqual: + Kind = AsmBinaryExpr::LTE; + return 2; + case asmtok::Greater: + Kind = AsmBinaryExpr::GT; + return 2; + case asmtok::GreaterEqual: + Kind = AsmBinaryExpr::GTE; + return 2; + + // Intermediate Precedence: |, &, ^ + // + // FIXME: gas seems to support '!' as an infix operator? + case asmtok::Pipe: + Kind = AsmBinaryExpr::Or; return 3; + case asmtok::Caret: + Kind = AsmBinaryExpr::Xor; + return 3; + case asmtok::Amp: + Kind = AsmBinaryExpr::And; + return 3; + + // Highest Precedence: *, /, %, <<, >> + case asmtok::Star: + Kind = AsmBinaryExpr::Mul; + return 4; + case asmtok::Slash: + Kind = AsmBinaryExpr::Div; + return 4; + case asmtok::Percent: + Kind = AsmBinaryExpr::Mod; + return 4; + case asmtok::LessLess: + Kind = AsmBinaryExpr::Shl; + return 4; + case asmtok::GreaterGreater: + Kind = AsmBinaryExpr::Shr; + return 4; } } /// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'. /// Res contains the LHS of the expression on input. -bool AsmParser::ParseBinOpRHS(unsigned Precedence, int64_t &Res) { +bool AsmParser::ParseBinOpRHS(unsigned Precedence, AsmExpr *&Res) { while (1) { - unsigned TokPrec = getBinOpPrecedence(Lexer.getKind()); + AsmBinaryExpr::Opcode Kind; + unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind); // If the next token is lower precedence than we are allowed to eat, return // successfully with what we ate already. if (TokPrec < Precedence) return false; - //asmtok::TokKind BinOp = Lexer.getKind(); Lexer.Lex(); // Eat the next primary expression. - int64_t RHS; + AsmExpr *RHS; if (ParsePrimaryExpr(RHS)) return true; // If BinOp binds less tightly with RHS than the operator after RHS, let // the pending operator take RHS as its LHS. - unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind()); + AsmBinaryExpr::Opcode Dummy; + unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy); if (TokPrec < NextTokPrec) { if (ParseBinOpRHS(Precedence+1, RHS)) return true; } - // Merge LHS/RHS: fixme use the right operator etc. - Res += RHS; + // Merge LHS and RHS according to operator. + Res = new AsmBinaryExpr(Kind, Res, RHS); } } @@ -354,7 +441,7 @@ bool AsmParser::ParseStatement() { bool AsmParser::ParseAssignment(const char *Name, bool IsDotSet) { int64_t Value; - if (ParseExpression(Value)) + if (ParseAbsoluteExpression(Value)) return true; if (Lexer.isNot(asmtok::EndOfStatement)) @@ -433,7 +520,7 @@ bool AsmParser::ParseDirectiveSectionSwitch(const char *Section, } /// ParseDirectiveAscii: -/// ::= ( .ascii | .asciiz ) [ "string" ( , "string" )* ] +/// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ] bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) { if (Lexer.isNot(asmtok::EndOfStatement)) { for (;;) { @@ -469,7 +556,7 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) { if (Lexer.isNot(asmtok::EndOfStatement)) { for (;;) { int64_t Expr; - if (ParseExpression(Expr)) + if (ParseAbsoluteExpression(Expr)) return true; Out.EmitValue(MCValue::get(Expr), Size); @@ -492,7 +579,7 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) { /// ::= .space expression [ , expression ] bool AsmParser::ParseDirectiveSpace() { int64_t NumBytes; - if (ParseExpression(NumBytes)) + if (ParseAbsoluteExpression(NumBytes)) return true; int64_t FillExpr = 0; @@ -502,7 +589,7 @@ bool AsmParser::ParseDirectiveSpace() { return TokError("unexpected token in '.space' directive"); Lexer.Lex(); - if (ParseExpression(FillExpr)) + if (ParseAbsoluteExpression(FillExpr)) return true; HasFillExpr = true; @@ -527,7 +614,7 @@ bool AsmParser::ParseDirectiveSpace() { /// ::= .fill expression , expression , expression bool AsmParser::ParseDirectiveFill() { int64_t NumValues; - if (ParseExpression(NumValues)) + if (ParseAbsoluteExpression(NumValues)) return true; if (Lexer.isNot(asmtok::Comma)) @@ -535,7 +622,7 @@ bool AsmParser::ParseDirectiveFill() { Lexer.Lex(); int64_t FillSize; - if (ParseExpression(FillSize)) + if (ParseAbsoluteExpression(FillSize)) return true; if (Lexer.isNot(asmtok::Comma)) @@ -543,7 +630,7 @@ bool AsmParser::ParseDirectiveFill() { Lexer.Lex(); int64_t FillExpr; - if (ParseExpression(FillExpr)) + if (ParseAbsoluteExpression(FillExpr)) return true; if (Lexer.isNot(asmtok::EndOfStatement)) @@ -564,7 +651,7 @@ bool AsmParser::ParseDirectiveFill() { /// ::= .org expression [ , expression ] bool AsmParser::ParseDirectiveOrg() { int64_t Offset; - if (ParseExpression(Offset)) + if (ParseAbsoluteExpression(Offset)) return true; // Parse optional fill expression. @@ -574,7 +661,7 @@ bool AsmParser::ParseDirectiveOrg() { return TokError("unexpected token in '.org' directive"); Lexer.Lex(); - if (ParseExpression(FillExpr)) + if (ParseAbsoluteExpression(FillExpr)) return true; if (Lexer.isNot(asmtok::EndOfStatement)) diff --git a/tools/llvm-mc/AsmParser.h b/tools/llvm-mc/AsmParser.h index da256c275ca..326d0e74b8d 100644 --- a/tools/llvm-mc/AsmParser.h +++ b/tools/llvm-mc/AsmParser.h @@ -17,6 +17,7 @@ #include "AsmLexer.h" namespace llvm { +class AsmExpr; class MCContext; class MCInst; class MCStreamer; @@ -44,10 +45,24 @@ private: void EatToEndOfStatement(); bool ParseAssignment(const char *Name, bool IsDotSet); - bool ParseExpression(int64_t &Res); - bool ParsePrimaryExpr(int64_t &Res); - bool ParseBinOpRHS(unsigned Precedence, int64_t &Res); - bool ParseParenExpr(int64_t &Res); + + /// ParseExpression - Parse a general assembly expression. + /// + /// @param Res - The resulting expression. The pointer value is null on error. + /// @result - False on success. + bool ParseExpression(AsmExpr *&Res); + + /// ParseAbsoluteExpr - Parse an expression which must evaluate to an absolute + /// value. + /// + /// @param Res - The value of the absolute expression. The result is undefined + /// on error. + /// @result - False on success. + bool ParseAbsoluteExpression(int64_t &Res); + + bool ParsePrimaryExpr(AsmExpr *&Res); + bool ParseBinOpRHS(unsigned Precedence, AsmExpr *&Res); + bool ParseParenExpr(AsmExpr *&Res); // X86 specific. bool ParseX86InstOperands(MCInst &Inst); diff --git a/tools/llvm-mc/MC-X86Specific.cpp b/tools/llvm-mc/MC-X86Specific.cpp index 45774cf48c8..4e1739b25da 100644 --- a/tools/llvm-mc/MC-X86Specific.cpp +++ b/tools/llvm-mc/MC-X86Specific.cpp @@ -87,7 +87,7 @@ bool AsmParser::ParseX86Operand(X86Operand &Op) { // $42 -> immediate. Lexer.Lex(); int64_t Val; - if (ParseExpression(Val)) + if (ParseAbsoluteExpression(Val)) return TokError("expected integer constant"); Op = X86Operand::CreateReg(Val); return false; @@ -118,7 +118,7 @@ bool AsmParser::ParseX86MemOperand(X86Operand &Op) { // it. int64_t Disp = 0; if (Lexer.isNot(asmtok::LParen)) { - if (ParseExpression(Disp)) return true; + if (ParseAbsoluteExpression(Disp)) return true; // After parsing the base expression we could either have a parenthesized // memory address or not. If not, return now. If so, eat the (. @@ -139,8 +139,7 @@ bool AsmParser::ParseX86MemOperand(X86Operand &Op) { // memory operand consumed. } else { // It must be an parenthesized expression, parse it now. - if (ParseParenExpr(Disp) || - ParseBinOpRHS(1, Disp)) + if (ParseAbsoluteExpression(Disp)) return true; // After parsing the base expression we could either have a parenthesized