From f1c21a8da6ed27a6ab4944e30bbeb4bd3ee08a71 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Tue, 13 Sep 2011 23:45:18 +0000 Subject: [PATCH] First step in supporting #line directives in assembler. This step parses the #line directives with the needed support in the lexer. Next will be to build a simple file/line# table mapping source SMLoc's for later use by diagnostics. And the last step will be to get the diagnostics to use the mapping for file and line numbers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@139669 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCParser/AsmLexer.h | 1 + lib/MC/MCParser/AsmLexer.cpp | 23 ++++++++++++- lib/MC/MCParser/AsmParser.cpp | 51 ++++++++++++++++++++++++++--- 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h index ab78799fdcd..b1277ec6ef0 100644 --- a/include/llvm/MC/MCParser/AsmLexer.h +++ b/include/llvm/MC/MCParser/AsmLexer.h @@ -47,6 +47,7 @@ public: void setBuffer(const MemoryBuffer *buf, const char *ptr = NULL); virtual StringRef LexUntilEndOfStatement(); + StringRef LexUntilEndOfLine(); bool isAtStartOfComment(char Char); bool isAtStatementSeparator(const char *Ptr); diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index bbb0bbc8b24..9760e4676d8 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -334,6 +334,17 @@ StringRef AsmLexer::LexUntilEndOfStatement() { return StringRef(TokStart, CurPtr-TokStart); } +StringRef AsmLexer::LexUntilEndOfLine() { + TokStart = CurPtr; + + while (*CurPtr != '\n' && + *CurPtr != '\r' && + (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { + ++CurPtr; + } + return StringRef(TokStart, CurPtr-TokStart); +} + bool AsmLexer::isAtStartOfComment(char Char) { // FIXME: This won't work for multi-character comment indicators like "//". return Char == *MAI.getCommentString(); @@ -345,17 +356,26 @@ bool AsmLexer::isAtStatementSeparator(const char *Ptr) { } AsmToken AsmLexer::LexToken() { + static bool isAtStartOfLine = true; TokStart = CurPtr; // This always consumes at least one character. int CurChar = getNextChar(); - if (isAtStartOfComment(CurChar)) + if (isAtStartOfComment(CurChar)) { + // If this comment starts with a '#', then return the Hash token and let + // the assembler parser see if it can be parsed as a cpp line filename + // comment. We do this only if we are at the start of a line. + if (CurChar == '#' && isAtStartOfLine) + return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); + isAtStartOfLine = true; return LexLineComment(); + } if (isAtStatementSeparator(TokStart)) { CurPtr += strlen(MAI.getSeparatorString()) - 1; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, strlen(MAI.getSeparatorString()))); } + isAtStartOfLine = false; switch (CurChar) { default: @@ -373,6 +393,7 @@ AsmToken AsmLexer::LexToken() { return LexToken(); case '\n': // FALL THROUGH. case '\r': + isAtStartOfLine = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index cd8d2707a80..5ab7ca6e830 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -153,6 +153,8 @@ private: void CheckForValidSection(); bool ParseStatement(); + void EatToEndOfLine(); + bool ParseCppHashLineFilenameComment(const SMLoc &L); bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M); bool expandMacro(SmallString<256> &Buf, StringRef Body, @@ -937,10 +939,8 @@ bool AsmParser::ParseStatement() { StringRef IDVal; int64_t LocalLabelVal = -1; // A full line comment is a '#' as the first token. - if (Lexer.is(AsmToken::Hash)) { - EatToEndOfStatement(); - return false; - } + if (Lexer.is(AsmToken::Hash)) + return ParseCppHashLineFilenameComment(IDLoc); // Allow an integer followed by a ':' as a directional local label. if (Lexer.is(AsmToken::Integer)) { @@ -1209,6 +1209,49 @@ bool AsmParser::ParseStatement() { return false; } +/// EatToEndOfLine uses the Lexer to eat the characters to the end of the line +/// since they may not be able to be tokenized to get to the end of line token. +void AsmParser::EatToEndOfLine() { + Lexer.LexUntilEndOfLine(); + // Eat EOL. + Lex(); +} + +/// ParseCppHashLineFilenameComment as this: +/// ::= # number "filename" +/// or just as a full line comment if it doesn't have a number and a string. +bool AsmParser::ParseCppHashLineFilenameComment(const SMLoc &L) { + Lex(); // Eat the hash token. + + if (getLexer().isNot(AsmToken::Integer)) { + // Consume the line since in cases it is not a well-formed line directive, + // as if were simply a full line comment. + EatToEndOfLine(); + return false; + } + + int64_t LineNumber = getTok().getIntVal(); + // FIXME: remember to remove this line that is silencing a warning for now. + (void) LineNumber; + Lex(); + + if (getLexer().isNot(AsmToken::String)) { + EatToEndOfLine(); + return false; + } + + StringRef Filename = getTok().getString(); + // Get rid of the enclosing quotes. + Filename = Filename.substr(1, Filename.size()-2); + + // TODO: Now with the Filename, LineNumber set up a mapping to the SMLoc for + // later use by diagnostics. + + // Ignore any trailing characters, they're just comment. + EatToEndOfLine(); + return false; +} + bool AsmParser::expandMacro(SmallString<256> &Buf, StringRef Body, const std::vector &Parameters, const std::vector > &A,