llvm-6502/tools/llvm-mcmarkup/llvm-mcmarkup.cpp
Jim Grosbach 7dd4dc8892 MC: Simple example parser for MC assembly markup.
Nothing fancy, just a simple demonstration parser.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167181 91177308-0d34-0410-b5e6-96231b3b80d8
2012-10-31 23:24:13 +00:00

226 lines
7.1 KiB
C++

//===-- llvm-mcmarkup.cpp - Parse the MC assembly markup tags -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Example simple parser implementation for the MC assembly markup language.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
using namespace llvm;
static cl::list<std::string>
InputFilenames(cl::Positional, cl::desc("<input files>"),
cl::ZeroOrMore);
static cl::opt<bool>
DumpTags("dump-tags", cl::desc("List all tags encountered in input"));
static StringRef ToolName;
/// Trivial lexer for the markup parser. Input is always handled a character
/// at a time. The lexer just encapsulates EOF and lookahead handling.
class MarkupLexer {
StringRef::const_iterator Start;
StringRef::const_iterator CurPtr;
StringRef::const_iterator End;
public:
MarkupLexer(StringRef Source)
: Start(Source.begin()), CurPtr(Source.begin()), End(Source.end()) {}
// When processing non-markup, input is consumed a character at a time.
bool isEOF() { return CurPtr == End; }
int getNextChar() {
if (CurPtr == End) return EOF;
return *CurPtr++;
}
int peekNextChar() {
if (CurPtr == End) return EOF;
return *CurPtr;
}
StringRef::const_iterator getPosition() const { return CurPtr; }
};
/// A markup tag is a name and a (usually empty) list of modifiers.
class MarkupTag {
StringRef Name;
StringRef Modifiers;
SMLoc StartLoc;
public:
MarkupTag(StringRef n, StringRef m, SMLoc Loc)
: Name(n), Modifiers(m), StartLoc(Loc) {}
StringRef getName() const { return Name; }
StringRef getModifiers() const { return Modifiers; }
SMLoc getLoc() const { return StartLoc; }
};
/// A simple parser implementation for creating MarkupTags from input text.
class MarkupParser {
MarkupLexer &Lex;
SourceMgr &SM;
public:
MarkupParser(MarkupLexer &lex, SourceMgr &SrcMgr) : Lex(lex), SM(SrcMgr) {}
/// Create a MarkupTag from the current position in the MarkupLexer.
/// The parseTag() method should be called when the lexer has processed
/// the opening '<' character. Input will be consumed up to and including
/// the ':' which terminates the tag open.
MarkupTag parseTag();
/// Issue a diagnostic and terminate program execution.
void FatalError(SMLoc Loc, StringRef Msg);
};
void MarkupParser::FatalError(SMLoc Loc, StringRef Msg) {
SM.PrintMessage(Loc, SourceMgr::DK_Error, Msg);
exit(1);
}
// Example handler for when a tag is recognized.
static void processStartTag(MarkupTag &Tag) {
// If we're just printing the tags, do that, otherwise do some simple
// colorization.
if (DumpTags) {
outs() << Tag.getName();
if (Tag.getModifiers().size())
outs() << " " << Tag.getModifiers();
outs() << "\n";
return;
}
if (!outs().has_colors())
return;
// Color registers as red and immediates as cyan. Those don't have nested
// tags, so don't bother keeping a stack of colors to reset to.
if (Tag.getName() == "reg")
outs().changeColor(raw_ostream::RED);
else if (Tag.getName() == "imm")
outs().changeColor(raw_ostream::CYAN);
}
// Example handler for when the end of a tag is recognized.
static void processEndTag(MarkupTag &Tag) {
// If we're printing the tags, there's nothing more to do here. Otherwise,
// set the color back the normal.
if (DumpTags)
return;
if (!outs().has_colors())
return;
// Just reset to basic white.
outs().changeColor(raw_ostream::WHITE, false);
}
MarkupTag MarkupParser::parseTag() {
// First off, extract the tag into it's own StringRef so we can look at it
// outside of the context of consuming input.
StringRef::const_iterator Start = Lex.getPosition();
SMLoc Loc = SMLoc::getFromPointer(Start - 1);
while(Lex.getNextChar() != ':') {
// EOF is an error.
if (Lex.isEOF())
FatalError(SMLoc::getFromPointer(Start), "unterminated markup tag");
}
StringRef RawTag(Start, Lex.getPosition() - Start - 1);
std::pair<StringRef, StringRef> SplitTag = RawTag.split(' ');
return MarkupTag(SplitTag.first, SplitTag.second, Loc);
}
static void parseMCMarkup(StringRef Filename) {
OwningPtr<MemoryBuffer> BufferPtr;
if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, BufferPtr)) {
errs() << ToolName << ": " << ec.message() << '\n';
return;
}
MemoryBuffer *Buffer = BufferPtr.take();
SourceMgr SrcMgr;
// Tell SrcMgr about this buffer, which is what the parser will pick up.
SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
StringRef InputSource = Buffer->getBuffer();
MarkupLexer Lex(InputSource);
MarkupParser Parser(Lex, SrcMgr);
SmallVector<MarkupTag, 4> TagStack;
for (int CurChar = Lex.getNextChar();
CurChar != EOF;
CurChar = Lex.getNextChar()) {
switch (CurChar) {
case '<': {
// A "<<" is output as a literal '<' and does not start a markup tag.
if (Lex.peekNextChar() == '<') {
(void)Lex.getNextChar();
break;
}
// Parse the markup entry.
TagStack.push_back(Parser.parseTag());
// Do any special handling for the start of a tag.
processStartTag(TagStack.back());
continue;
}
case '>': {
SMLoc Loc = SMLoc::getFromPointer(Lex.getPosition() - 1);
// A ">>" is output as a literal '>' and does not end a markup tag.
if (Lex.peekNextChar() == '>') {
(void)Lex.getNextChar();
break;
}
// Close out the innermost tag.
if (TagStack.empty())
Parser.FatalError(Loc, "'>' without matching '<'");
// Do any special handling for the end of a tag.
processEndTag(TagStack.back());
TagStack.pop_back();
continue;
}
default:
break;
}
// For anything else, just echo the character back out.
if (!DumpTags && CurChar != EOF)
outs() << (char)CurChar;
}
// If there are any unterminated markup tags, issue diagnostics for them.
while (!TagStack.empty()) {
MarkupTag &Tag = TagStack.back();
SrcMgr.PrintMessage(Tag.getLoc(), SourceMgr::DK_Error,
"unterminated markup tag");
TagStack.pop_back();
}
}
int main(int argc, char **argv) {
// Print a stack trace if we signal out.
sys::PrintStackTraceOnErrorSignal();
PrettyStackTraceProgram X(argc, argv);
llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
cl::ParseCommandLineOptions(argc, argv, "llvm MC markup parser\n");
ToolName = argv[0];
// If no input files specified, read from stdin.
if (InputFilenames.size() == 0)
InputFilenames.push_back("-");
std::for_each(InputFilenames.begin(), InputFilenames.end(),
parseMCMarkup);
return 0;
}