MC: Simple example parser for MC assembly markup.

Nothing fancy, just a simple demonstration parser.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167181 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jim Grosbach 2012-10-31 23:24:13 +00:00
parent a5526a9bff
commit 7dd4dc8892
9 changed files with 290 additions and 2 deletions

View File

@ -0,0 +1,16 @@
// RUN: llvm-mcmarkup %s | FileCheck %s
push {<reg:r1>, <reg:r2>, <reg:r7>}
sub <reg:sp>, <imm:#132>
ldr <reg:r0>, <mem:[<reg:r0>, <imm:#4>]>
// CHECK: reg
// CHECK: reg
// CHECK: reg
// CHECK: reg
// CHECK: imm
// CHECK: reg
// CHECK: mem
// CHECK: reg
// CHECK: imm

View File

@ -0,0 +1,2 @@
config.suffixes = ['.mc']

View File

@ -36,6 +36,7 @@ add_subdirectory(bugpoint)
add_subdirectory(bugpoint-passes) add_subdirectory(bugpoint-passes)
add_subdirectory(llvm-bcanalyzer) add_subdirectory(llvm-bcanalyzer)
add_subdirectory(llvm-stress) add_subdirectory(llvm-stress)
add_subdirectory(llvm-mcmarkup)
if( NOT WIN32 ) if( NOT WIN32 )
add_subdirectory(lto) add_subdirectory(lto)

View File

@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===; ;===------------------------------------------------------------------------===;
[common] [common]
subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size macho-dump opt subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size macho-dump opt llvm-mcmarkup
[component_0] [component_0]
type = Group type = Group

View File

@ -34,7 +34,7 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \
bugpoint llvm-bcanalyzer \ bugpoint llvm-bcanalyzer \
llvm-diff macho-dump llvm-objdump llvm-readobj \ llvm-diff macho-dump llvm-objdump llvm-readobj \
llvm-rtdyld llvm-dwarfdump llvm-cov \ llvm-rtdyld llvm-dwarfdump llvm-cov \
llvm-size llvm-stress llvm-size llvm-stress llvm-mcmarkup
# Let users override the set of tools to build from the command line. # Let users override the set of tools to build from the command line.
ifdef ONLY_TOOLS ifdef ONLY_TOOLS

View File

@ -0,0 +1,5 @@
set(LLVM_LINK_COMPONENTS support)
add_llvm_tool(llvm-mcmarkup
llvm-mcmarkup.cpp
)

View File

@ -0,0 +1,22 @@
;===- ./tools/llvm-mcmarkup/LLVMBuild.txt ----------------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[component_0]
type = Tool
name = llvm-mcmarkup
parent = Tools
required_libraries = Support

View File

@ -0,0 +1,17 @@
##===- tools/llvm-mcmarkup/Makefile ------------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL := ../..
TOOLNAME := llvm-mcmarkup
LINK_COMPONENTS := support
# This tool has no plugins, optimize startup time.
TOOL_NO_EXPORTS = 1
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,225 @@
//===-- llvm-mcmarkup.cpp - Parse the MC assembly markup tags -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Example simple parser implementation for the MC assembly markup language.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
using namespace llvm;
static cl::list<std::string>
InputFilenames(cl::Positional, cl::desc("<input files>"),
cl::ZeroOrMore);
static cl::opt<bool>
DumpTags("dump-tags", cl::desc("List all tags encountered in input"));
static StringRef ToolName;
/// Trivial lexer for the markup parser. Input is always handled a character
/// at a time. The lexer just encapsulates EOF and lookahead handling.
class MarkupLexer {
StringRef::const_iterator Start;
StringRef::const_iterator CurPtr;
StringRef::const_iterator End;
public:
MarkupLexer(StringRef Source)
: Start(Source.begin()), CurPtr(Source.begin()), End(Source.end()) {}
// When processing non-markup, input is consumed a character at a time.
bool isEOF() { return CurPtr == End; }
int getNextChar() {
if (CurPtr == End) return EOF;
return *CurPtr++;
}
int peekNextChar() {
if (CurPtr == End) return EOF;
return *CurPtr;
}
StringRef::const_iterator getPosition() const { return CurPtr; }
};
/// A markup tag is a name and a (usually empty) list of modifiers.
class MarkupTag {
StringRef Name;
StringRef Modifiers;
SMLoc StartLoc;
public:
MarkupTag(StringRef n, StringRef m, SMLoc Loc)
: Name(n), Modifiers(m), StartLoc(Loc) {}
StringRef getName() const { return Name; }
StringRef getModifiers() const { return Modifiers; }
SMLoc getLoc() const { return StartLoc; }
};
/// A simple parser implementation for creating MarkupTags from input text.
class MarkupParser {
MarkupLexer &Lex;
SourceMgr &SM;
public:
MarkupParser(MarkupLexer &lex, SourceMgr &SrcMgr) : Lex(lex), SM(SrcMgr) {}
/// Create a MarkupTag from the current position in the MarkupLexer.
/// The parseTag() method should be called when the lexer has processed
/// the opening '<' character. Input will be consumed up to and including
/// the ':' which terminates the tag open.
MarkupTag parseTag();
/// Issue a diagnostic and terminate program execution.
void FatalError(SMLoc Loc, StringRef Msg);
};
void MarkupParser::FatalError(SMLoc Loc, StringRef Msg) {
SM.PrintMessage(Loc, SourceMgr::DK_Error, Msg);
exit(1);
}
// Example handler for when a tag is recognized.
static void processStartTag(MarkupTag &Tag) {
// If we're just printing the tags, do that, otherwise do some simple
// colorization.
if (DumpTags) {
outs() << Tag.getName();
if (Tag.getModifiers().size())
outs() << " " << Tag.getModifiers();
outs() << "\n";
return;
}
if (!outs().has_colors())
return;
// Color registers as red and immediates as cyan. Those don't have nested
// tags, so don't bother keeping a stack of colors to reset to.
if (Tag.getName() == "reg")
outs().changeColor(raw_ostream::RED);
else if (Tag.getName() == "imm")
outs().changeColor(raw_ostream::CYAN);
}
// Example handler for when the end of a tag is recognized.
static void processEndTag(MarkupTag &Tag) {
// If we're printing the tags, there's nothing more to do here. Otherwise,
// set the color back the normal.
if (DumpTags)
return;
if (!outs().has_colors())
return;
// Just reset to basic white.
outs().changeColor(raw_ostream::WHITE, false);
}
MarkupTag MarkupParser::parseTag() {
// First off, extract the tag into it's own StringRef so we can look at it
// outside of the context of consuming input.
StringRef::const_iterator Start = Lex.getPosition();
SMLoc Loc = SMLoc::getFromPointer(Start - 1);
while(Lex.getNextChar() != ':') {
// EOF is an error.
if (Lex.isEOF())
FatalError(SMLoc::getFromPointer(Start), "unterminated markup tag");
}
StringRef RawTag(Start, Lex.getPosition() - Start - 1);
std::pair<StringRef, StringRef> SplitTag = RawTag.split(' ');
return MarkupTag(SplitTag.first, SplitTag.second, Loc);
}
static void parseMCMarkup(StringRef Filename) {
OwningPtr<MemoryBuffer> BufferPtr;
if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, BufferPtr)) {
errs() << ToolName << ": " << ec.message() << '\n';
return;
}
MemoryBuffer *Buffer = BufferPtr.take();
SourceMgr SrcMgr;
// Tell SrcMgr about this buffer, which is what the parser will pick up.
SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
StringRef InputSource = Buffer->getBuffer();
MarkupLexer Lex(InputSource);
MarkupParser Parser(Lex, SrcMgr);
SmallVector<MarkupTag, 4> TagStack;
for (int CurChar = Lex.getNextChar();
CurChar != EOF;
CurChar = Lex.getNextChar()) {
switch (CurChar) {
case '<': {
// A "<<" is output as a literal '<' and does not start a markup tag.
if (Lex.peekNextChar() == '<') {
(void)Lex.getNextChar();
break;
}
// Parse the markup entry.
TagStack.push_back(Parser.parseTag());
// Do any special handling for the start of a tag.
processStartTag(TagStack.back());
continue;
}
case '>': {
SMLoc Loc = SMLoc::getFromPointer(Lex.getPosition() - 1);
// A ">>" is output as a literal '>' and does not end a markup tag.
if (Lex.peekNextChar() == '>') {
(void)Lex.getNextChar();
break;
}
// Close out the innermost tag.
if (TagStack.empty())
Parser.FatalError(Loc, "'>' without matching '<'");
// Do any special handling for the end of a tag.
processEndTag(TagStack.back());
TagStack.pop_back();
continue;
}
default:
break;
}
// For anything else, just echo the character back out.
if (!DumpTags && CurChar != EOF)
outs() << (char)CurChar;
}
// If there are any unterminated markup tags, issue diagnostics for them.
while (!TagStack.empty()) {
MarkupTag &Tag = TagStack.back();
SrcMgr.PrintMessage(Tag.getLoc(), SourceMgr::DK_Error,
"unterminated markup tag");
TagStack.pop_back();
}
}
int main(int argc, char **argv) {
// Print a stack trace if we signal out.
sys::PrintStackTraceOnErrorSignal();
PrettyStackTraceProgram X(argc, argv);
llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
cl::ParseCommandLineOptions(argc, argv, "llvm MC markup parser\n");
ToolName = argv[0];
// If no input files specified, read from stdin.
if (InputFilenames.size() == 0)
InputFilenames.push_back("-");
std::for_each(InputFilenames.begin(), InputFilenames.end(),
parseMCMarkup);
return 0;
}