llvm-6502/tools/llvm-mc/AsmLexer.cpp
Kevin Enderby 9823ca971d Added the AsmToken::Hash enum constant to MCAsmLexer.h in preparation of
supporting other targets.  Changed the code to pass MCAsmInfo to the parser
and the lexer.  Then changed the lexer to use CommentString from MCAsmInfo
instead of a literal '#' character.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@81046 91177308-0d34-0410-b5e6-96231b3b80d8
2009-09-04 21:45:34 +00:00

332 lines
11 KiB
C++

//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This class implements the lexer for assembly files.
//
//===----------------------------------------------------------------------===//
#include "AsmLexer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Config/config.h" // for strtoull.
#include "llvm/MC/MCAsmInfo.h"
#include <cerrno>
#include <cstdio>
#include <cstdlib>
using namespace llvm;
AsmLexer::AsmLexer(SourceMgr &SM, const MCAsmInfo &_MAI) : SrcMgr(SM),
MAI(_MAI) {
CurBuffer = 0;
CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
CurPtr = CurBuf->getBufferStart();
TokStart = 0;
}
AsmLexer::~AsmLexer() {
}
SMLoc AsmLexer::getLoc() const {
return SMLoc::getFromPointer(TokStart);
}
void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg,
const char *Type) const {
SrcMgr.PrintMessage(Loc, Msg, Type);
}
/// ReturnError - Set the error to the specified string at the specified
/// location. This is defined to always return AsmToken::Error.
AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
return AsmToken(AsmToken::Error, StringRef(Loc, 0));
}
/// EnterIncludeFile - Enter the specified file. This prints an error and
/// returns true on failure.
bool AsmLexer::EnterIncludeFile(const std::string &Filename) {
int NewBuf = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr));
if (NewBuf == -1)
return true;
// Save the line number and lex buffer of the includer.
CurBuffer = NewBuf;
CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
CurPtr = CurBuf->getBufferStart();
return false;
}
int AsmLexer::getNextChar() {
char CurChar = *CurPtr++;
switch (CurChar) {
default:
return (unsigned char)CurChar;
case 0: {
// A nul character in the stream is either the end of the current buffer or
// a random nul in the file. Disambiguate that here.
if (CurPtr-1 != CurBuf->getBufferEnd())
return 0; // Just whitespace.
// If this is the end of an included file, pop the parent file off the
// include stack.
SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
if (ParentIncludeLoc != SMLoc()) {
CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
CurPtr = ParentIncludeLoc.getPointer();
// Reset the token start pointer to the start of the new file.
TokStart = CurPtr;
return getNextChar();
}
// Otherwise, return end of file.
--CurPtr; // Another call to lex will return EOF again.
return EOF;
}
}
}
/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
AsmToken AsmLexer::LexIdentifier() {
while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
*CurPtr == '.' || *CurPtr == '@')
++CurPtr;
return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
}
/// LexSlash: Slash: /
/// C-Style Comment: /* ... */
AsmToken AsmLexer::LexSlash() {
switch (*CurPtr) {
case '*': break; // C style comment.
case '/': return ++CurPtr, LexLineComment();
default: return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1));
}
// C Style comment.
++CurPtr; // skip the star.
while (1) {
int CurChar = getNextChar();
switch (CurChar) {
case EOF:
return ReturnError(TokStart, "unterminated comment");
case '*':
// End of the comment?
if (CurPtr[0] != '/') break;
++CurPtr; // End the */.
return LexToken();
}
}
}
/// LexLineComment: Comment: #[^\n]*
/// : //[^\n]*
AsmToken AsmLexer::LexLineComment() {
// FIXME: This is broken if we happen to a comment at the end of a file, which
// was .included, and which doesn't end with a newline.
int CurChar = getNextChar();
while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
CurChar = getNextChar();
if (CurChar == EOF)
return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
}
/// LexDigit: First character is [0-9].
/// Local Label: [0-9][:]
/// Forward/Backward Label: [0-9][fb]
/// Binary integer: 0b[01]+
/// Octal integer: 0[0-7]+
/// Hex integer: 0x[0-9a-fA-F]+
/// Decimal integer: [1-9][0-9]*
/// TODO: FP literal.
AsmToken AsmLexer::LexDigit() {
if (*CurPtr == ':')
return ReturnError(TokStart, "FIXME: local label not implemented");
if (*CurPtr == 'f' || *CurPtr == 'b')
return ReturnError(TokStart, "FIXME: directional label not implemented");
// Decimal integer: [1-9][0-9]*
if (CurPtr[-1] != '0') {
while (isdigit(*CurPtr))
++CurPtr;
return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
strtoll(TokStart, 0, 10));
}
if (*CurPtr == 'b') {
++CurPtr;
const char *NumStart = CurPtr;
while (CurPtr[0] == '0' || CurPtr[0] == '1')
++CurPtr;
// Requires at least one binary digit.
if (CurPtr == NumStart)
return ReturnError(CurPtr-2, "Invalid binary number");
return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
strtoll(NumStart, 0, 2));
}
if (*CurPtr == 'x') {
++CurPtr;
const char *NumStart = CurPtr;
while (isxdigit(CurPtr[0]))
++CurPtr;
// Requires at least one hex digit.
if (CurPtr == NumStart)
return ReturnError(CurPtr-2, "Invalid hexadecimal number");
errno = 0;
if (errno == EINVAL)
return ReturnError(CurPtr-2, "Invalid hexadecimal number");
if (errno == ERANGE) {
errno = 0;
if (errno == EINVAL)
return ReturnError(CurPtr-2, "Invalid hexadecimal number");
if (errno == ERANGE)
return ReturnError(CurPtr-2, "Hexadecimal number out of range");
}
return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
(int64_t) strtoull(NumStart, 0, 16));
}
// Must be an octal number, it starts with 0.
while (*CurPtr >= '0' && *CurPtr <= '7')
++CurPtr;
return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
strtoll(TokStart, 0, 8));
}
/// LexQuote: String: "..."
AsmToken AsmLexer::LexQuote() {
int CurChar = getNextChar();
// TODO: does gas allow multiline string constants?
while (CurChar != '"') {
if (CurChar == '\\') {
// Allow \", etc.
CurChar = getNextChar();
}
if (CurChar == EOF)
return ReturnError(TokStart, "unterminated string constant");
CurChar = getNextChar();
}
return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
}
StringRef AsmLexer::LexUntilEndOfStatement() {
TokStart = CurPtr;
while (*CurPtr != ';' && // End of statement marker.
*CurPtr != '\n' &&
*CurPtr != '\r' &&
(*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
// check for start of line comment.
for (const char *p = MAI.getCommentString(); *p != 0; ++p)
if (*CurPtr == *p)
break;
++CurPtr;
}
return StringRef(TokStart, CurPtr-TokStart);
}
AsmToken AsmLexer::LexToken() {
TokStart = CurPtr;
// This always consumes at least one character.
int CurChar = getNextChar();
for (const char *p = MAI.getCommentString(); *p != 0; ++p)
if (CurChar == *p)
return LexLineComment();
switch (CurChar) {
default:
// Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
return LexIdentifier();
// Unknown character, emit an error.
return ReturnError(TokStart, "invalid character in input");
case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
case 0:
case ' ':
case '\t':
// Ignore whitespace.
return LexToken();
case '\n': // FALL THROUGH.
case '\r': // FALL THROUGH.
case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
case '=':
if (*CurPtr == '=')
return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
case '|':
if (*CurPtr == '|')
return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
case '&':
if (*CurPtr == '&')
return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
case '!':
if (*CurPtr == '=')
return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
case '/': return LexSlash();
case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
case '"': return LexQuote();
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return LexDigit();
case '<':
switch (*CurPtr) {
case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
StringRef(TokStart, 2));
case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
StringRef(TokStart, 2));
case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
StringRef(TokStart, 2));
default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
}
case '>':
switch (*CurPtr) {
case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
StringRef(TokStart, 2));
case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
StringRef(TokStart, 2));
default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
}
// TODO: Quoted identifiers (objc methods etc)
// local labels: [0-9][:]
// Forward/backward labels: [0-9][fb]
// Integers, fp constants, character constants.
}
}