From a027d222e18ea9028e9e12ae2f5cd566889b599a Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 31 Jul 2009 02:32:59 +0000 Subject: [PATCH] llvm-mc: Match a few X86 instructions. - This is "experimental" code, I am feeling my way around and working out the best way to do things (and learning tblgen in the process). Comments welcome, but keep in mind this stuff will change radically. - This is enough to match "subb" and friends, but not much else. The next step is to automatically generate the matchers for individual operands. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77657 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 79 ++++++- test/MC/AsmParser/x86_instructions.s | 5 + test/MC/AsmParser/x86_operands.s | 2 +- utils/TableGen/AsmMatcherEmitter.cpp | 239 ++++++++++++++++++++++ 4 files changed, 316 insertions(+), 9 deletions(-) create mode 100644 test/MC/AsmParser/x86_instructions.s diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 08db8a1f1c6..7f4df0008bd 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmLexer.h" #include "llvm/MC/MCAsmParser.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Target/TargetRegistry.h" @@ -291,16 +292,10 @@ bool X86ATTAsmParser::ParseMemOperand(X86Operand &Op) { return false; } -bool -X86ATTAsmParser::MatchInstruction(const StringRef &Name, - SmallVectorImpl &Operands, - MCInst &Inst) { - return false; -} - bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { SmallVector Operands; + SMLoc Loc = getLexer().getTok().getLoc(); if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. Operands.push_back(X86Operand()); @@ -317,7 +312,15 @@ bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { } } - return MatchInstruction(Name, Operands, Inst); + if (!MatchInstruction(Name, Operands, Inst)) + return false; + + // FIXME: We should give nicer diagnostics about the exact failure. + + // FIXME: For now we just treat unrecognized instructions as "warnings". + Warning(Loc, "unrecognized instruction"); + + return false; } // Force static initialization. @@ -326,4 +329,64 @@ extern "C" void LLVMInitializeX86AsmParser() { RegisterAsmParser Y(TheX86_64Target); } +// FIXME: These should come from tblgen. + +// Match_X86_Op_GR8 +static bool +Match_X86_Op_GR8(const X86Operand &Op, MCOperand *MCOps, unsigned NumOps) { + assert(NumOps == 1 && "Invalid number of ops!"); + + // FIXME: Match correct registers. + if (Op.Kind != X86Operand::Register) + return true; + + MCOps[0].MakeReg(Op.getReg()); + return false; +} + +#define DUMMY(name) \ + static bool Match_X86_Op_##name(const X86Operand &Op, \ + MCOperand *MCOps, \ + unsigned NumMCOps) { \ + return true; \ + } + +DUMMY(FR32) +DUMMY(FR64) +DUMMY(GR16) +DUMMY(GR32) +DUMMY(GR32_NOREX) +DUMMY(GR64) +DUMMY(GR8_NOREX) +DUMMY(RST) +DUMMY(VR128) +DUMMY(VR64) +DUMMY(brtarget) +DUMMY(brtarget8) +DUMMY(f128mem) +DUMMY(f32mem) +DUMMY(f64mem) +DUMMY(f80mem) +DUMMY(i128mem) +DUMMY(i16i8imm) +DUMMY(i16imm) +DUMMY(i16mem) +DUMMY(i32i8imm) +DUMMY(i32imm_pcrel) +DUMMY(i32imm) +DUMMY(i32mem) +DUMMY(i64i32imm_pcrel) +DUMMY(i64i32imm) +DUMMY(i64i8imm) +DUMMY(i64imm) +DUMMY(i64mem) +DUMMY(i8imm) +DUMMY(i8mem_NOREX) +DUMMY(i8mem) +DUMMY(lea32mem) +DUMMY(lea64_32mem) +DUMMY(lea64mem) +DUMMY(sdmem) +DUMMY(ssmem) + #include "X86GenAsmMatcher.inc" diff --git a/test/MC/AsmParser/x86_instructions.s b/test/MC/AsmParser/x86_instructions.s new file mode 100644 index 00000000000..71f98f0d847 --- /dev/null +++ b/test/MC/AsmParser/x86_instructions.s @@ -0,0 +1,5 @@ +// RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s + +# Simple instructions + subb %al, %al +# CHECK: MCInst(opcode=1831, operands=[reg:2, reg:0, reg:2]) diff --git a/test/MC/AsmParser/x86_operands.s b/test/MC/AsmParser/x86_operands.s index 1d31097f160..c6f886b9502 100644 --- a/test/MC/AsmParser/x86_operands.s +++ b/test/MC/AsmParser/x86_operands.s @@ -1,6 +1,6 @@ // FIXME: Actually test that we get the expected results. -// RUN: llvm-mc -triple i386-unknown-unknown %s > %t +// RUN: llvm-mc -triple i386-unknown-unknown %s > %t 2> %t2 # Immediates push $1 diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp index d00ff202ad0..611d470a46d 100644 --- a/utils/TableGen/AsmMatcherEmitter.cpp +++ b/utils/TableGen/AsmMatcherEmitter.cpp @@ -15,8 +15,71 @@ #include "AsmMatcherEmitter.h" #include "CodeGenTarget.h" #include "Record.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include +#include using namespace llvm; +/// FlattenVariants - Flatten an .td file assembly string by selecting the +/// variant at index \arg N. +static std::string FlattenVariants(const std::string &AsmString, + unsigned N) { + StringRef Cur = AsmString; + std::string Res = ""; + + for (;;) { + // Add the prefix until the next '{', and split out the contents in the + // braces. + std::pair Inner, Split = Cur.split('{'); + + Res += Split.first; + if (Split.second.empty()) + break; + + Inner = Split.second.split('}'); + + // Select the Nth variant (or empty). + StringRef Selection = Inner.first; + for (unsigned i = 0; i != N; ++i) + Selection = Selection.split('|').second; + Res += Selection.split('|').first; + + Cur = Inner.second; + } + + return Res; +} + +/// TokenizeAsmString - Tokenize a simplified assembly string. +static void TokenizeAsmString(const std::string &AsmString, + SmallVectorImpl &Tokens) { + unsigned Prev = 0; + bool InTok = true; + for (unsigned i = 0, e = AsmString.size(); i != e; ++i) { + switch (AsmString[i]) { + case '*': + case '!': + case ' ': + case '\t': + case ',': + if (InTok) { + Tokens.push_back(StringRef(&AsmString[Prev], i - Prev)); + InTok = false; + } + if (AsmString[i] == '*' || AsmString[i] == '!') + Tokens.push_back(StringRef(&AsmString[i], 1)); + Prev = i + 1; + break; + + default: + InTok = true; + } + } + if (InTok && Prev != AsmString.size()) + Tokens.push_back(StringRef(&AsmString[Prev], AsmString.size() - Prev)); +} + void AsmMatcherEmitter::run(raw_ostream &OS) { CodeGenTarget Target; const std::vector &Registers = Target.getRegisters(); @@ -44,4 +107,180 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { } OS << " return true;\n"; OS << "}\n"; + + // Emit the function to match instructions. + std::vector NumberedInstructions; + Target.getInstructionsByEnumValue(NumberedInstructions); + + std::list MatchFns; + + OS << "\n"; + const std::map &Instructions = + Target.getInstructions(); + for (std::map::const_iterator + it = Instructions.begin(), ie = Instructions.end(); it != ie; ++it) { + const CodeGenInstruction &CGI = it->second; + + // Ignore psuedo ops. + // + // FIXME: This is a hack. + if (const RecordVal *Form = CGI.TheDef->getValue("Form")) + if (Form->getValue()->getAsString() == "Pseudo") + continue; + + // Ignore instructions with no .s string. + // + // FIXME: What are these? + if (CGI.AsmString.empty()) + continue; + + // FIXME: Hack; ignore "lock". + if (StringRef(CGI.AsmString).startswith("lock")) + continue; + + // FIXME: Hack. +#if 0 + if (1 && it->first != "SUB8mr") + continue; +#endif + + std::string Flattened = FlattenVariants(CGI.AsmString, 0); + SmallVector Tokens; + + TokenizeAsmString(Flattened, Tokens); + + DEBUG({ + outs() << it->first << " -- flattened:\"" + << Flattened << "\", tokens:["; + for (unsigned i = 0, e = Tokens.size(); i != e; ++i) { + outs() << Tokens[i]; + if (i + 1 != e) + outs() << ", "; + } + outs() << "]\n"; + + for (unsigned i = 0, e = CGI.OperandList.size(); i != e; ++i) { + const CodeGenInstruction::OperandInfo &OI = CGI.OperandList[i]; + outs() << " op[" << i << "] = " << OI.Name + << " " << OI.Rec->getName() + << " (" << OI.MIOperandNo << ", " << OI.MINumOperands << ")\n"; + } + }); + + // FIXME: Ignore non-literal tokens. + if (std::find(Tokens[0].begin(), Tokens[0].end(), '$') != Tokens[0].end()) + continue; + + std::string FnName = "Match_" + Target.getName() + "_Inst_" + it->first; + MatchFns.push_back(FnName); + + OS << "static bool " << FnName + << "(const StringRef &Name," + << " SmallVectorImpl &Operands," + << " MCInst &Inst) {\n\n"; + + OS << " // Match name.\n"; + OS << " if (Name != \"" << Tokens[0] << "\")\n"; + OS << " return true;\n\n"; + + OS << " // Match number of operands.\n"; + OS << " if (Operands.size() != " << Tokens.size() - 1 << ")\n"; + OS << " return true;\n\n"; + + // Compute the total number of MCOperands. + // + // FIXME: Isn't this somewhere else? + unsigned NumMIOperands = 0; + for (unsigned i = 0, e = CGI.OperandList.size(); i != e; ++i) { + const CodeGenInstruction::OperandInfo &OI = CGI.OperandList[i]; + NumMIOperands = std::max(NumMIOperands, + OI.MIOperandNo + OI.MINumOperands); + } + + std::set MatchedOperands; + // This the list of operands we need to fill in. + if (NumMIOperands) + OS << " MCOperand Ops[" << NumMIOperands << "];\n\n"; + + unsigned ParsedOpIdx = 0; + for (unsigned i = 1, e = Tokens.size(); i < e; ++i) { + // FIXME: Can only match simple operands. + if (Tokens[i][0] != '$') { + OS << " // FIXME: unable to match token: '" << Tokens[i] << "'!\n"; + OS << " return true;\n\n"; + continue; + } + + // Map this token to an operand. FIXME: Move elsewhere. + + unsigned Idx; + try { + Idx = CGI.getOperandNamed(Tokens[i].substr(1)); + } catch(...) { + OS << " // FIXME: unable to find operand: '" << Tokens[i] << "'!\n"; + OS << " return true;\n\n"; + continue; + } + + // FIXME: Each match routine should always end up filling the same number + // of operands, we should just check that the number matches what the + // match routine expects here instead of passing it. We can do this once + // we start generating the class match functions. + const CodeGenInstruction::OperandInfo &OI = CGI.OperandList[Idx]; + + // Track that we have matched these operands. + // + // FIXME: Verify that we don't parse something to the same operand twice. + for (unsigned j = 0; j != OI.MINumOperands; ++j) + MatchedOperands.insert(OI.MIOperandNo + j); + + OS << " // Match '" << Tokens[i] << "' (parsed operand " << ParsedOpIdx + << ") to machine operands [" << OI.MIOperandNo << ", " + << OI.MIOperandNo + OI.MINumOperands << ").\n"; + OS << " if (Match_" << Target.getName() + << "_Op_" << OI.Rec->getName() << "(" + << "Operands[" << ParsedOpIdx << "], " + << "&Ops[" << OI.MIOperandNo << "], " + << OI.MINumOperands << "))\n"; + OS << " return true;\n\n"; + + ++ParsedOpIdx; + } + + // Generate code to construct the MCInst. + + OS << " // Construct MCInst.\n"; + OS << " Inst.setOpcode(" << Target.getName() << "::" + << it->first << ");\n"; + for (unsigned i = 0, e = NumMIOperands; i != e; ++i) { + // FIXME: Oops! Ignore this for now, the instruction should print ok. If + // we need to evaluate the constraints. + if (!MatchedOperands.count(i)) { + OS << "\n"; + OS << " // FIXME: Nothing matched Ops[" << i << "]!\n"; + OS << " Ops[" << i << "].MakeReg(0);\n"; + OS << "\n"; + } + + OS << " Inst.addOperand(Ops[" << i << "]);\n"; + } + OS << "\n"; + OS << " return false;\n"; + OS << "}\n\n"; + } + + // Generate the top level match function. + + OS << "bool " << Target.getName() << ClassName + << "::MatchInstruction(const StringRef &Name, " + << "SmallVectorImpl<" << Target.getName() << "Operand> &Operands, " + << "MCInst &Inst) {\n"; + for (std::list::iterator it = MatchFns.begin(), + ie = MatchFns.end(); it != ie; ++it) { + OS << " if (!" << *it << "(Name, Operands, Inst))\n"; + OS << " return false;\n\n"; + } + + OS << " return true;\n"; + OS << "}\n\n"; }