mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-31 08:16:47 +00:00 
			
		
		
		
	MIR Serialization: Introduce a lexer for machine instructions.
This commit adds a function that tokenizes the string containing the machine instruction. This commit also adds a struct called 'MIToken' which is used to represent the lexer's tokens. Reviewers: Sean Silva Differential Revision: http://reviews.llvm.org/D10521 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240323 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -1,4 +1,5 @@ | ||||
| add_llvm_library(LLVMMIRParser | ||||
|   MILexer.cpp | ||||
|   MIParser.cpp | ||||
|   MIRParser.cpp | ||||
|   ) | ||||
|   | ||||
							
								
								
									
										87
									
								
								lib/CodeGen/MIRParser/MILexer.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								lib/CodeGen/MIRParser/MILexer.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,87 @@ | ||||
| //===- MILexer.cpp - Machine instructions lexer implementation ----------===// | ||||
| // | ||||
| //                     The LLVM Compiler Infrastructure | ||||
| // | ||||
| // This file is distributed under the University of Illinois Open Source | ||||
| // License. See LICENSE.TXT for details. | ||||
| // | ||||
| //===----------------------------------------------------------------------===// | ||||
| // | ||||
| // This file implements the lexing of machine instructions. | ||||
| // | ||||
| //===----------------------------------------------------------------------===// | ||||
|  | ||||
| #include "MILexer.h" | ||||
| #include "llvm/ADT/Twine.h" | ||||
| #include <cctype> | ||||
|  | ||||
| using namespace llvm; | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| /// This class provides a way to iterate and get characters from the source | ||||
| /// string. | ||||
| class Cursor { | ||||
|   const char *Ptr; | ||||
|   const char *End; | ||||
|  | ||||
| public: | ||||
|   explicit Cursor(StringRef Str) { | ||||
|     Ptr = Str.data(); | ||||
|     End = Ptr + Str.size(); | ||||
|   } | ||||
|  | ||||
|   bool isEOF() const { return Ptr == End; } | ||||
|  | ||||
|   char peek() const { return isEOF() ? 0 : *Ptr; } | ||||
|  | ||||
|   void advance() { ++Ptr; } | ||||
|  | ||||
|   StringRef remaining() const { return StringRef(Ptr, End - Ptr); } | ||||
|  | ||||
|   StringRef upto(Cursor C) const { | ||||
|     assert(C.Ptr >= Ptr && C.Ptr <= End); | ||||
|     return StringRef(Ptr, C.Ptr - Ptr); | ||||
|   } | ||||
|  | ||||
|   StringRef::iterator location() const { return Ptr; } | ||||
| }; | ||||
|  | ||||
| } // end anonymous namespace | ||||
|  | ||||
| /// Skip the leading whitespace characters and return the updated cursor. | ||||
| static Cursor skipWhitespace(Cursor C) { | ||||
|   while (isspace(C.peek())) | ||||
|     C.advance(); | ||||
|   return C; | ||||
| } | ||||
|  | ||||
| static bool isIdentifierChar(char C) { | ||||
|   return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.'; | ||||
| } | ||||
|  | ||||
| static Cursor lexIdentifier(Cursor C, MIToken &Token) { | ||||
|   auto Range = C; | ||||
|   while (isIdentifierChar(C.peek())) | ||||
|     C.advance(); | ||||
|   Token = MIToken(MIToken::Identifier, Range.upto(C)); | ||||
|   return C; | ||||
| } | ||||
|  | ||||
| StringRef llvm::lexMIToken( | ||||
|     StringRef Source, MIToken &Token, | ||||
|     function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { | ||||
|   auto C = skipWhitespace(Cursor(Source)); | ||||
|   if (C.isEOF()) { | ||||
|     Token = MIToken(MIToken::Eof, C.remaining()); | ||||
|     return C.remaining(); | ||||
|   } | ||||
|  | ||||
|   auto Char = C.peek(); | ||||
|   if (isalpha(Char) || Char == '_') | ||||
|     return lexIdentifier(C, Token).remaining(); | ||||
|   Token = MIToken(MIToken::Error, C.remaining()); | ||||
|   ErrorCallback(C.location(), | ||||
|                 Twine("unexpected character '") + Twine(Char) + "'"); | ||||
|   return C.remaining(); | ||||
| } | ||||
							
								
								
									
										65
									
								
								lib/CodeGen/MIRParser/MILexer.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								lib/CodeGen/MIRParser/MILexer.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| //===- MILexer.h - Lexer for machine instructions -------------------------===// | ||||
| // | ||||
| //                     The LLVM Compiler Infrastructure | ||||
| // | ||||
| // This file is distributed under the University of Illinois Open Source | ||||
| // License. See LICENSE.TXT for details. | ||||
| // | ||||
| //===----------------------------------------------------------------------===// | ||||
| // | ||||
| // This file declares the function that lexes the machine instruction source | ||||
| // string. | ||||
| // | ||||
| //===----------------------------------------------------------------------===// | ||||
|  | ||||
| #ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H | ||||
| #define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H | ||||
|  | ||||
| #include "llvm/ADT/StringRef.h" | ||||
| #include "llvm/ADT/STLExtras.h" | ||||
| #include <functional> | ||||
|  | ||||
| namespace llvm { | ||||
|  | ||||
| class Twine; | ||||
|  | ||||
| /// A token produced by the machine instruction lexer. | ||||
| struct MIToken { | ||||
|   enum TokenKind { | ||||
|     // Markers | ||||
|     Eof, | ||||
|     Error, | ||||
|  | ||||
|     // Identifier tokens | ||||
|     Identifier | ||||
|   }; | ||||
|  | ||||
| private: | ||||
|   TokenKind Kind; | ||||
|   StringRef Range; | ||||
|  | ||||
| public: | ||||
|   MIToken(TokenKind Kind, StringRef Range) : Kind(Kind), Range(Range) {} | ||||
|  | ||||
|   TokenKind kind() const { return Kind; } | ||||
|  | ||||
|   bool isError() const { return Kind == Error; } | ||||
|  | ||||
|   bool is(TokenKind K) const { return Kind == K; } | ||||
|  | ||||
|   bool isNot(TokenKind K) const { return Kind != K; } | ||||
|  | ||||
|   StringRef::iterator location() const { return Range.begin(); } | ||||
|  | ||||
|   StringRef stringValue() const { return Range; } | ||||
| }; | ||||
|  | ||||
| /// Consume a single machine instruction token in the given source and return | ||||
| /// the remaining source string. | ||||
| StringRef lexMIToken( | ||||
|     StringRef Source, MIToken &Token, | ||||
|     function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback); | ||||
|  | ||||
| } // end namespace llvm | ||||
|  | ||||
| #endif | ||||
| @@ -12,6 +12,7 @@ | ||||
| //===----------------------------------------------------------------------===// | ||||
|  | ||||
| #include "MIParser.h" | ||||
| #include "MILexer.h" | ||||
| #include "llvm/ADT/StringMap.h" | ||||
| #include "llvm/CodeGen/MachineBasicBlock.h" | ||||
| #include "llvm/CodeGen/MachineFunction.h" | ||||
| @@ -29,7 +30,8 @@ class MIParser { | ||||
|   SourceMgr &SM; | ||||
|   MachineFunction &MF; | ||||
|   SMDiagnostic &Error; | ||||
|   StringRef Source; | ||||
|   StringRef Source, CurrentSource; | ||||
|   MIToken Token; | ||||
|   /// Maps from instruction names to op codes. | ||||
|   StringMap<unsigned> Names2InstrOpCodes; | ||||
|  | ||||
| @@ -37,11 +39,18 @@ public: | ||||
|   MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, | ||||
|            StringRef Source); | ||||
|  | ||||
|   void lex(); | ||||
|  | ||||
|   /// Report an error at the current location with the given message. | ||||
|   /// | ||||
|   /// This function always return true. | ||||
|   bool error(const Twine &Msg); | ||||
|  | ||||
|   /// Report an error at the given location with the given message. | ||||
|   /// | ||||
|   /// This function always return true. | ||||
|   bool error(StringRef::iterator Loc, const Twine &Msg); | ||||
|  | ||||
|   MachineInstr *parse(); | ||||
|  | ||||
| private: | ||||
| @@ -50,31 +59,42 @@ private: | ||||
|   /// Try to convert an instruction name to an opcode. Return true if the | ||||
|   /// instruction name is invalid. | ||||
|   bool parseInstrName(StringRef InstrName, unsigned &OpCode); | ||||
|  | ||||
|   bool parseInstruction(unsigned &OpCode); | ||||
| }; | ||||
|  | ||||
| } // end anonymous namespace | ||||
|  | ||||
| MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, | ||||
|                    StringRef Source) | ||||
|     : SM(SM), MF(MF), Error(Error), Source(Source) {} | ||||
|     : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source), | ||||
|       Token(MIToken::Error, StringRef()) {} | ||||
|  | ||||
| bool MIParser::error(const Twine &Msg) { | ||||
| void MIParser::lex() { | ||||
|   CurrentSource = lexMIToken( | ||||
|       CurrentSource, Token, | ||||
|       [this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); }); | ||||
| } | ||||
|  | ||||
| bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); } | ||||
|  | ||||
| bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { | ||||
|   // TODO: Get the proper location in the MIR file, not just a location inside | ||||
|   // the string. | ||||
|   Error = | ||||
|       SMDiagnostic(SM, SMLoc(), SM.getMemoryBuffer(SM.getMainFileID()) | ||||
|                                     ->getBufferIdentifier(), | ||||
|                    1, 0, SourceMgr::DK_Error, Msg.str(), Source, None, None); | ||||
|   assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size())); | ||||
|   Error = SMDiagnostic( | ||||
|       SM, SMLoc(), | ||||
|       SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1, | ||||
|       Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None); | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| MachineInstr *MIParser::parse() { | ||||
|   StringRef InstrName = Source; | ||||
|   lex(); | ||||
|  | ||||
|   unsigned OpCode; | ||||
|   if (parseInstrName(InstrName, OpCode)) { | ||||
|     error(Twine("unknown machine instruction name '") + InstrName + "'"); | ||||
|   if (Token.isError() || parseInstruction(OpCode)) | ||||
|     return nullptr; | ||||
|   } | ||||
|  | ||||
|   // TODO: Parse the rest of instruction - machine operands, etc. | ||||
|   const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode); | ||||
| @@ -82,6 +102,15 @@ MachineInstr *MIParser::parse() { | ||||
|   return MI; | ||||
| } | ||||
|  | ||||
| bool MIParser::parseInstruction(unsigned &OpCode) { | ||||
|   if (Token.isNot(MIToken::Identifier)) | ||||
|     return error("expected a machine instruction"); | ||||
|   StringRef InstrName = Token.stringValue(); | ||||
|   if (parseInstrName(InstrName, OpCode)) | ||||
|     return error(Twine("unknown machine instruction name '") + InstrName + "'"); | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| void MIParser::initNames2InstrOpCodes() { | ||||
|   if (!Names2InstrOpCodes.empty()) | ||||
|     return; | ||||
|   | ||||
| @@ -20,5 +20,5 @@ body: | ||||
|      # CHECK:      - IMUL32rri8 | ||||
|      # CHECK-NEXT: - RETQ | ||||
|      - IMUL32rri8 | ||||
|      - RETQ | ||||
|      - ' RETQ ' | ||||
| ... | ||||
|   | ||||
							
								
								
									
										18
									
								
								test/CodeGen/MIR/X86/missing-instruction.mir
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								test/CodeGen/MIR/X86/missing-instruction.mir
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| # RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s | ||||
|  | ||||
| --- | | ||||
|  | ||||
|   define void @foo() { | ||||
|   entry: | ||||
|     ret void | ||||
|   } | ||||
|  | ||||
| ... | ||||
| --- | ||||
| name:            foo | ||||
| body: | ||||
|  - name:         entry | ||||
|    instructions: | ||||
|      # CHECK: 1:1: expected a machine instruction | ||||
|      - '' | ||||
| ... | ||||
							
								
								
									
										18
									
								
								test/CodeGen/MIR/X86/unrecognized-character.mir
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								test/CodeGen/MIR/X86/unrecognized-character.mir
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| # RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s | ||||
|  | ||||
| --- | | ||||
|  | ||||
|   define void @foo() { | ||||
|   entry: | ||||
|     ret void | ||||
|   } | ||||
|  | ||||
| ... | ||||
| --- | ||||
| name:            foo | ||||
| body: | ||||
|  - name:         entry | ||||
|    instructions: | ||||
|      # CHECK: 1:1: unexpected character '`' | ||||
|      - '` RETQ' | ||||
| ... | ||||
		Reference in New Issue
	
	Block a user