mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-07-26 05:25:47 +00:00
MIR Serialization: Introduce a lexer for machine instructions.
This commit adds a function that tokenizes the string containing the machine instruction. This commit also adds a struct called 'MIToken' which is used to represent the lexer's tokens. Reviewers: Sean Silva Differential Revision: http://reviews.llvm.org/D10521 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240323 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
add_llvm_library(LLVMMIRParser
|
add_llvm_library(LLVMMIRParser
|
||||||
|
MILexer.cpp
|
||||||
MIParser.cpp
|
MIParser.cpp
|
||||||
MIRParser.cpp
|
MIRParser.cpp
|
||||||
)
|
)
|
||||||
|
87
lib/CodeGen/MIRParser/MILexer.cpp
Normal file
87
lib/CodeGen/MIRParser/MILexer.cpp
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
//===- MILexer.cpp - Machine instructions lexer implementation ----------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file implements the lexing of machine instructions.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "MILexer.h"
|
||||||
|
#include "llvm/ADT/Twine.h"
|
||||||
|
#include <cctype>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
/// This class provides a way to iterate and get characters from the source
|
||||||
|
/// string.
|
||||||
|
class Cursor {
|
||||||
|
const char *Ptr;
|
||||||
|
const char *End;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit Cursor(StringRef Str) {
|
||||||
|
Ptr = Str.data();
|
||||||
|
End = Ptr + Str.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isEOF() const { return Ptr == End; }
|
||||||
|
|
||||||
|
char peek() const { return isEOF() ? 0 : *Ptr; }
|
||||||
|
|
||||||
|
void advance() { ++Ptr; }
|
||||||
|
|
||||||
|
StringRef remaining() const { return StringRef(Ptr, End - Ptr); }
|
||||||
|
|
||||||
|
StringRef upto(Cursor C) const {
|
||||||
|
assert(C.Ptr >= Ptr && C.Ptr <= End);
|
||||||
|
return StringRef(Ptr, C.Ptr - Ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
StringRef::iterator location() const { return Ptr; }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end anonymous namespace
|
||||||
|
|
||||||
|
/// Skip the leading whitespace characters and return the updated cursor.
|
||||||
|
static Cursor skipWhitespace(Cursor C) {
|
||||||
|
while (isspace(C.peek()))
|
||||||
|
C.advance();
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool isIdentifierChar(char C) {
|
||||||
|
return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.';
|
||||||
|
}
|
||||||
|
|
||||||
|
static Cursor lexIdentifier(Cursor C, MIToken &Token) {
|
||||||
|
auto Range = C;
|
||||||
|
while (isIdentifierChar(C.peek()))
|
||||||
|
C.advance();
|
||||||
|
Token = MIToken(MIToken::Identifier, Range.upto(C));
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
|
StringRef llvm::lexMIToken(
|
||||||
|
StringRef Source, MIToken &Token,
|
||||||
|
function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
|
||||||
|
auto C = skipWhitespace(Cursor(Source));
|
||||||
|
if (C.isEOF()) {
|
||||||
|
Token = MIToken(MIToken::Eof, C.remaining());
|
||||||
|
return C.remaining();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto Char = C.peek();
|
||||||
|
if (isalpha(Char) || Char == '_')
|
||||||
|
return lexIdentifier(C, Token).remaining();
|
||||||
|
Token = MIToken(MIToken::Error, C.remaining());
|
||||||
|
ErrorCallback(C.location(),
|
||||||
|
Twine("unexpected character '") + Twine(Char) + "'");
|
||||||
|
return C.remaining();
|
||||||
|
}
|
65
lib/CodeGen/MIRParser/MILexer.h
Normal file
65
lib/CodeGen/MIRParser/MILexer.h
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
//===- MILexer.h - Lexer for machine instructions -------------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file declares the function that lexes the machine instruction source
|
||||||
|
// string.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
|
||||||
|
#define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
|
||||||
|
|
||||||
|
#include "llvm/ADT/StringRef.h"
|
||||||
|
#include "llvm/ADT/STLExtras.h"
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class Twine;
|
||||||
|
|
||||||
|
/// A token produced by the machine instruction lexer.
|
||||||
|
struct MIToken {
|
||||||
|
enum TokenKind {
|
||||||
|
// Markers
|
||||||
|
Eof,
|
||||||
|
Error,
|
||||||
|
|
||||||
|
// Identifier tokens
|
||||||
|
Identifier
|
||||||
|
};
|
||||||
|
|
||||||
|
private:
|
||||||
|
TokenKind Kind;
|
||||||
|
StringRef Range;
|
||||||
|
|
||||||
|
public:
|
||||||
|
MIToken(TokenKind Kind, StringRef Range) : Kind(Kind), Range(Range) {}
|
||||||
|
|
||||||
|
TokenKind kind() const { return Kind; }
|
||||||
|
|
||||||
|
bool isError() const { return Kind == Error; }
|
||||||
|
|
||||||
|
bool is(TokenKind K) const { return Kind == K; }
|
||||||
|
|
||||||
|
bool isNot(TokenKind K) const { return Kind != K; }
|
||||||
|
|
||||||
|
StringRef::iterator location() const { return Range.begin(); }
|
||||||
|
|
||||||
|
StringRef stringValue() const { return Range; }
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Consume a single machine instruction token in the given source and return
|
||||||
|
/// the remaining source string.
|
||||||
|
StringRef lexMIToken(
|
||||||
|
StringRef Source, MIToken &Token,
|
||||||
|
function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback);
|
||||||
|
|
||||||
|
} // end namespace llvm
|
||||||
|
|
||||||
|
#endif
|
@@ -12,6 +12,7 @@
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "MIParser.h"
|
#include "MIParser.h"
|
||||||
|
#include "MILexer.h"
|
||||||
#include "llvm/ADT/StringMap.h"
|
#include "llvm/ADT/StringMap.h"
|
||||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
@@ -29,7 +30,8 @@ class MIParser {
|
|||||||
SourceMgr &SM;
|
SourceMgr &SM;
|
||||||
MachineFunction &MF;
|
MachineFunction &MF;
|
||||||
SMDiagnostic &Error;
|
SMDiagnostic &Error;
|
||||||
StringRef Source;
|
StringRef Source, CurrentSource;
|
||||||
|
MIToken Token;
|
||||||
/// Maps from instruction names to op codes.
|
/// Maps from instruction names to op codes.
|
||||||
StringMap<unsigned> Names2InstrOpCodes;
|
StringMap<unsigned> Names2InstrOpCodes;
|
||||||
|
|
||||||
@@ -37,11 +39,18 @@ public:
|
|||||||
MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
|
MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
|
||||||
StringRef Source);
|
StringRef Source);
|
||||||
|
|
||||||
|
void lex();
|
||||||
|
|
||||||
/// Report an error at the current location with the given message.
|
/// Report an error at the current location with the given message.
|
||||||
///
|
///
|
||||||
/// This function always return true.
|
/// This function always return true.
|
||||||
bool error(const Twine &Msg);
|
bool error(const Twine &Msg);
|
||||||
|
|
||||||
|
/// Report an error at the given location with the given message.
|
||||||
|
///
|
||||||
|
/// This function always return true.
|
||||||
|
bool error(StringRef::iterator Loc, const Twine &Msg);
|
||||||
|
|
||||||
MachineInstr *parse();
|
MachineInstr *parse();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@@ -50,31 +59,42 @@ private:
|
|||||||
/// Try to convert an instruction name to an opcode. Return true if the
|
/// Try to convert an instruction name to an opcode. Return true if the
|
||||||
/// instruction name is invalid.
|
/// instruction name is invalid.
|
||||||
bool parseInstrName(StringRef InstrName, unsigned &OpCode);
|
bool parseInstrName(StringRef InstrName, unsigned &OpCode);
|
||||||
|
|
||||||
|
bool parseInstruction(unsigned &OpCode);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end anonymous namespace
|
} // end anonymous namespace
|
||||||
|
|
||||||
MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
|
MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
|
||||||
StringRef Source)
|
StringRef Source)
|
||||||
: SM(SM), MF(MF), Error(Error), Source(Source) {}
|
: SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source),
|
||||||
|
Token(MIToken::Error, StringRef()) {}
|
||||||
|
|
||||||
bool MIParser::error(const Twine &Msg) {
|
void MIParser::lex() {
|
||||||
|
CurrentSource = lexMIToken(
|
||||||
|
CurrentSource, Token,
|
||||||
|
[this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); });
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); }
|
||||||
|
|
||||||
|
bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
|
||||||
// TODO: Get the proper location in the MIR file, not just a location inside
|
// TODO: Get the proper location in the MIR file, not just a location inside
|
||||||
// the string.
|
// the string.
|
||||||
Error =
|
assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size()));
|
||||||
SMDiagnostic(SM, SMLoc(), SM.getMemoryBuffer(SM.getMainFileID())
|
Error = SMDiagnostic(
|
||||||
->getBufferIdentifier(),
|
SM, SMLoc(),
|
||||||
1, 0, SourceMgr::DK_Error, Msg.str(), Source, None, None);
|
SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1,
|
||||||
|
Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
MachineInstr *MIParser::parse() {
|
MachineInstr *MIParser::parse() {
|
||||||
StringRef InstrName = Source;
|
lex();
|
||||||
|
|
||||||
unsigned OpCode;
|
unsigned OpCode;
|
||||||
if (parseInstrName(InstrName, OpCode)) {
|
if (Token.isError() || parseInstruction(OpCode))
|
||||||
error(Twine("unknown machine instruction name '") + InstrName + "'");
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Parse the rest of instruction - machine operands, etc.
|
// TODO: Parse the rest of instruction - machine operands, etc.
|
||||||
const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
|
const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
|
||||||
@@ -82,6 +102,15 @@ MachineInstr *MIParser::parse() {
|
|||||||
return MI;
|
return MI;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MIParser::parseInstruction(unsigned &OpCode) {
|
||||||
|
if (Token.isNot(MIToken::Identifier))
|
||||||
|
return error("expected a machine instruction");
|
||||||
|
StringRef InstrName = Token.stringValue();
|
||||||
|
if (parseInstrName(InstrName, OpCode))
|
||||||
|
return error(Twine("unknown machine instruction name '") + InstrName + "'");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void MIParser::initNames2InstrOpCodes() {
|
void MIParser::initNames2InstrOpCodes() {
|
||||||
if (!Names2InstrOpCodes.empty())
|
if (!Names2InstrOpCodes.empty())
|
||||||
return;
|
return;
|
||||||
|
@@ -20,5 +20,5 @@ body:
|
|||||||
# CHECK: - IMUL32rri8
|
# CHECK: - IMUL32rri8
|
||||||
# CHECK-NEXT: - RETQ
|
# CHECK-NEXT: - RETQ
|
||||||
- IMUL32rri8
|
- IMUL32rri8
|
||||||
- RETQ
|
- ' RETQ '
|
||||||
...
|
...
|
||||||
|
18
test/CodeGen/MIR/X86/missing-instruction.mir
Normal file
18
test/CodeGen/MIR/X86/missing-instruction.mir
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
|
||||||
|
|
||||||
|
--- |
|
||||||
|
|
||||||
|
define void @foo() {
|
||||||
|
entry:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: foo
|
||||||
|
body:
|
||||||
|
- name: entry
|
||||||
|
instructions:
|
||||||
|
# CHECK: 1:1: expected a machine instruction
|
||||||
|
- ''
|
||||||
|
...
|
18
test/CodeGen/MIR/X86/unrecognized-character.mir
Normal file
18
test/CodeGen/MIR/X86/unrecognized-character.mir
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
|
||||||
|
|
||||||
|
--- |
|
||||||
|
|
||||||
|
define void @foo() {
|
||||||
|
entry:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: foo
|
||||||
|
body:
|
||||||
|
- name: entry
|
||||||
|
instructions:
|
||||||
|
# CHECK: 1:1: unexpected character '`'
|
||||||
|
- '` RETQ'
|
||||||
|
...
|
Reference in New Issue
Block a user