some baby steps.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73848 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2009-06-21 07:19:10 +00:00
parent d926e048c1
commit a59e877996
3 changed files with 201 additions and 2 deletions

View File

@ -0,0 +1,87 @@
//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This class implements the lexer for assembly files.
//
//===----------------------------------------------------------------------===//
#include "AsmLexer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/MemoryBuffer.h"
using namespace llvm;
AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) {
CurBuffer = 0;
CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
CurPtr = CurBuf->getBufferStart();
TokStart = 0;
}
void AsmLexer::PrintError(const char *Loc, const std::string &Msg) const {
SrcMgr.PrintError(SMLoc::getFromPointer(Loc), Msg);
}
void AsmLexer::PrintError(SMLoc Loc, const std::string &Msg) const {
SrcMgr.PrintError(Loc, Msg);
}
int AsmLexer::getNextChar() {
char CurChar = *CurPtr++;
switch (CurChar) {
default:
return (unsigned char)CurChar;
case 0: {
// A nul character in the stream is either the end of the current buffer or
// a random nul in the file. Disambiguate that here.
if (CurPtr-1 != CurBuf->getBufferEnd())
return 0; // Just whitespace.
// If this is the end of an included file, pop the parent file off the
// include stack.
SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
if (ParentIncludeLoc != SMLoc()) {
CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
CurPtr = ParentIncludeLoc.getPointer();
return getNextChar();
}
// Otherwise, return end of file.
--CurPtr; // Another call to lex will return EOF again.
return EOF;
}
}
}
asmtok::TokKind AsmLexer::LexToken() {
TokStart = CurPtr;
// This always consumes at least one character.
int CurChar = getNextChar();
switch (CurChar) {
default:
// Handle letters: [a-zA-Z_]
// if (isalpha(CurChar) || CurChar == '_' || CurChar == '#')
// return LexIdentifier();
// Unknown character, emit an error.
return asmtok::Error;
case EOF: return asmtok::Eof;
case 0:
case ' ':
case '\t':
case '\n':
case '\r':
// Ignore whitespace.
return LexToken();
case ':': return asmtok::Colon;
case '+': return asmtok::Plus;
case '-': return asmtok::Minus;
}
}

92
tools/llvm-mc/AsmLexer.h Normal file
View File

@ -0,0 +1,92 @@
//===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This class declares the lexer for assembly files.
//
//===----------------------------------------------------------------------===//
#ifndef ASMLEXER_H
#define ASMLEXER_H
#include "llvm/Support/DataTypes.h"
#include <string>
#include <cassert>
namespace llvm {
class MemoryBuffer;
class SourceMgr;
class SMLoc;
namespace asmtok {
enum TokKind {
// Markers
Eof, Error,
Identifier,
IntVal,
Colon,
Plus,
Minus
};
}
/// AsmLexer - Lexer class for assembly files.
class AsmLexer {
SourceMgr &SrcMgr;
const char *CurPtr;
const MemoryBuffer *CurBuf;
// Information about the current token.
const char *TokStart;
asmtok::TokKind CurKind;
std::string CurStrVal; // This is valid for Identifier.
int64_t CurIntVal;
/// CurBuffer - This is the current buffer index we're lexing from as managed
/// by the SourceMgr object.
int CurBuffer;
public:
AsmLexer(SourceMgr &SrcMgr);
~AsmLexer() {}
asmtok::TokKind Lex() {
return CurKind = LexToken();
}
asmtok::TokKind getKind() const { return CurKind; }
const std::string &getCurStrVal() const {
assert(CurKind == asmtok::Identifier &&
"This token doesn't have a string value");
return CurStrVal;
}
int64_t getCurIntVal() const {
assert(CurKind == asmtok::IntVal && "This token isn't an integer");
return CurIntVal;
}
SMLoc getLoc() const;
void PrintError(const char *Loc, const std::string &Msg) const;
void PrintError(SMLoc Loc, const std::string &Msg) const;
private:
int getNextChar();
/// LexToken - Read the next token and return its code.
asmtok::TokKind LexToken();
};
} // end namespace llvm
#endif

View File

@ -19,6 +19,7 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Signals.h"
#include "AsmLexer.h"
using namespace llvm;
static cl::opt<std::string>
@ -63,10 +64,29 @@ static int AssembleInput(const char *ProgName) {
// Record the location of the include directories so that the lexer can find
// it later.
SrcMgr.setIncludeDirs(IncludeDirs);
//TGParser Parser(SrcMgr);
//return Parser.ParseFile();
AsmLexer Lexer(SrcMgr);
asmtok::TokKind Tok = Lexer.Lex();
while (Tok != asmtok::Eof) {
switch (Tok) {
default: outs() << "<<unknown token>>\n"; break;
case asmtok::Error: outs() << "<<error>>\n"; break;
case asmtok::Identifier:
outs() << "identifier: " << Lexer.getCurStrVal() << '\n';
break;
case asmtok::IntVal:
outs() << "int: " << Lexer.getCurIntVal() << '\n';
break;
case asmtok::Colon: outs() << "Colon\n"; break;
case asmtok::Plus: outs() << "Plus\n"; break;
case asmtok::Minus: outs() << "Minus\n"; break;
}
Tok = Lexer.Lex();
}
return 1;
}