From 3c92309f0d95b7e247de682446ac47774f29af29 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 26 Aug 2014 20:32:34 +0000 Subject: [PATCH] MC: Split the x86 asm matcher implementations by dialect The existing matcher has lots of AT&T assembly dialect assumptions baked into it. In particular, the hack for resolving the size of a memory operand by appending the four most common suffixes doesn't work at all. The Intel assembly dialect mnemonic table has ambiguous entries, so we need to try matching multiple times with different operand sizes, since that's the only way to choose different instruction variants. This makes us more compatible with gas's implementation of Intel assembly syntax. MSVC assumes you want byte-sized operations for the instructions that we reject as ambiguous. Reviewed By: grosbach Differential Revision: http://reviews.llvm.org/D4747 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216481 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 228 ++++++++++++++++++---- lib/Target/X86/AsmParser/X86Operand.h | 3 + test/MC/X86/intel-syntax-ambiguous.s | 44 +++++ test/MC/X86/intel-syntax-ptr-sized.s | 20 ++ test/MC/X86/intel-syntax.s | 34 ++++ 5 files changed, 296 insertions(+), 33 deletions(-) create mode 100644 test/MC/X86/intel-syntax-ambiguous.s create mode 100644 test/MC/X86/intel-syntax-ptr-sized.s diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 3a46d7c0551..fde41aa14cd 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -697,6 +697,29 @@ private: uint64_t &ErrorInfo, bool MatchingInlineAsm) override; + void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands, + MCStreamer &Out, bool MatchingInlineAsm); + + bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo, + bool MatchingInlineAsm); + + bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm); + + bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm); + + unsigned getPointerSize() { + if (is16BitMode()) return 16; + if (is32BitMode()) return 32; + if (is64BitMode()) return 64; + llvm_unreachable("invalid mode"); + } + virtual bool OmitRegisterFromClobberLists(unsigned RegNo) override; /// doSrcDstMatch - Returns true if operands are matching in their @@ -2309,12 +2332,16 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { - assert(!Operands.empty() && "Unexpect empty operand list!"); - X86Operand &Op = static_cast(*Operands[0]); - assert(Op.isToken() && "Leading operand should always be a mnemonic!"); - ArrayRef EmptyRanges = None; + if (isParsingIntelSyntax()) + return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, + MatchingInlineAsm); + return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, + MatchingInlineAsm); +} - // First, handle aliases that expand to multiple instructions. +void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, + OperandVector &Operands, MCStreamer &Out, + bool MatchingInlineAsm) { // FIXME: This should be replaced with a real .td file alias mechanism. // Also, MatchInstructionImpl should actually *do* the EmitInstruction // call. @@ -2336,6 +2363,36 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, EmitInstruction(Inst, Operands, Out); Operands[0] = X86Operand::CreateToken(Repl, IDLoc); } +} + +bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo, + bool MatchingInlineAsm) { + assert(ErrorInfo && "Unknown missing feature!"); + ArrayRef EmptyRanges = None; + SmallString<126> Msg; + raw_svector_ostream OS(Msg); + OS << "instruction requires:"; + uint64_t Mask = 1; + for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { + if (ErrorInfo & Mask) + OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask); + Mask <<= 1; + } + return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm); +} + +bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + assert(!Operands.empty() && "Unexpect empty operand list!"); + X86Operand &Op = static_cast(*Operands[0]); + assert(Op.isToken() && "Leading operand should always be a mnemonic!"); + ArrayRef EmptyRanges = None; + + // First, handle aliases that expand to multiple instructions. + MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); bool WasOriginallyInvalidOperand = false; MCInst Inst; @@ -2358,21 +2415,8 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, EmitInstruction(Inst, Operands, Out); Opcode = Inst.getOpcode(); return false; - case Match_MissingFeature: { - assert(ErrorInfo && "Unknown missing feature!"); - // Special case the error message for the very common case where only - // a single subtarget feature is missing. - std::string Msg = "instruction requires:"; - uint64_t Mask = 1; - for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { - if (ErrorInfo & Mask) { - Msg += " "; - Msg += getSubtargetFeatureName(ErrorInfo & Mask); - } - Mask <<= 1; - } - return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm); - } + case Match_MissingFeature: + return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm); case Match_InvalidOperand: WasOriginallyInvalidOperand = true; break; @@ -2490,25 +2534,17 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // missing feature. if (std::count(std::begin(Match), std::end(Match), Match_MissingFeature) == 1) { - std::string Msg = "instruction requires:"; - uint64_t Mask = 1; - for (unsigned i = 0; i < (sizeof(ErrorInfoMissingFeature)*8-1); ++i) { - if (ErrorInfoMissingFeature & Mask) { - Msg += " "; - Msg += getSubtargetFeatureName(ErrorInfoMissingFeature & Mask); - } - Mask <<= 1; - } - return Error(IDLoc, Msg, EmptyRanges, MatchingInlineAsm); + ErrorInfo = ErrorInfoMissingFeature; + return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature, + MatchingInlineAsm); } // If one instruction matched with an invalid operand, report this as an // operand failure. if (std::count(std::begin(Match), std::end(Match), Match_InvalidOperand) == 1) { - Error(IDLoc, "invalid operand for instruction", EmptyRanges, - MatchingInlineAsm); - return true; + return Error(IDLoc, "invalid operand for instruction", EmptyRanges, + MatchingInlineAsm); } // If all of these were an outright failure, report it in a useless way. @@ -2517,6 +2553,132 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return true; } +bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + assert(!Operands.empty() && "Unexpect empty operand list!"); + X86Operand &Op = static_cast(*Operands[0]); + assert(Op.isToken() && "Leading operand should always be a mnemonic!"); + StringRef Mnemonic = Op.getToken(); + ArrayRef EmptyRanges = None; + + // First, handle aliases that expand to multiple instructions. + MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); + + MCInst Inst; + + // Find one unsized memory operand, if present. + X86Operand *UnsizedMemOp = nullptr; + for (const auto &Op : Operands) { + X86Operand *X86Op = static_cast(Op.get()); + // FIXME: Remove this exception for absolute memory references. Currently it + // allows us to assemble 'call foo', because foo is represented as a memory + // operand. + if (X86Op->isMemUnsized() && !X86Op->isAbsMem()) + UnsizedMemOp = X86Op; + } + + // Allow some instructions to have implicitly pointer-sized operands. This is + // compatible with gas. + if (UnsizedMemOp) { + static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"}; + for (const char *Instr : PtrSizedInstrs) { + if (Mnemonic == Instr) { + UnsizedMemOp->Mem.Size = getPointerSize(); + break; + } + } + } + + // If an unsized memory operand is present, try to match with each memory + // operand size. In Intel assembly, the size is not part of the instruction + // mnemonic. + SmallVector Match; + uint64_t ErrorInfoMissingFeature = 0; + if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) { + static const unsigned MopSizes[] = {8, 16, 32, 64, 80}; + for (unsigned Size : MopSizes) { + UnsizedMemOp->Mem.Size = Size; + uint64_t ErrorInfoIgnore; + Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, + MatchingInlineAsm, + isParsingIntelSyntax())); + // If this returned as a missing feature failure, remember that. + if (Match.back() == Match_MissingFeature) + ErrorInfoMissingFeature = ErrorInfoIgnore; + } + } else { + Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchingInlineAsm, + isParsingIntelSyntax())); + // If this returned as a missing feature failure, remember that. + if (Match.back() == Match_MissingFeature) + ErrorInfoMissingFeature = ErrorInfo; + } + + // Restore the size of the unsized memory operand if we modified it. + if (UnsizedMemOp) + UnsizedMemOp->Mem.Size = 0; + + // If it's a bad mnemonic, all results will be the same. + if (Match.back() == Match_MnemonicFail) { + ArrayRef Ranges = + MatchingInlineAsm ? EmptyRanges : Op.getLocRange(); + return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'", + Ranges, MatchingInlineAsm); + } + + // If exactly one matched, then we treat that as a successful match (and the + // instruction will already have been filled in correctly, since the failing + // matches won't have modified it). + unsigned NumSuccessfulMatches = + std::count(std::begin(Match), std::end(Match), Match_Success); + if (NumSuccessfulMatches == 1) { + // Some instructions need post-processing to, for example, tweak which + // encoding is selected. Loop on it while changes happen so the individual + // transformations can chain off each other. + if (!MatchingInlineAsm) + while (processInstruction(Inst, Operands)) + ; + Inst.setLoc(IDLoc); + if (!MatchingInlineAsm) + EmitInstruction(Inst, Operands, Out); + Opcode = Inst.getOpcode(); + return false; + } else if (NumSuccessfulMatches > 1) { + assert(UnsizedMemOp && + "multiple matches only possible with unsized memory operands"); + ArrayRef Ranges = + MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange(); + return Error(UnsizedMemOp->getStartLoc(), + "ambiguous operand size for instruction '" + Mnemonic + "\'", + Ranges, MatchingInlineAsm); + } + + // If one instruction matched with a missing feature, report this as a + // missing feature. + if (std::count(std::begin(Match), std::end(Match), + Match_MissingFeature) == 1) { + ErrorInfo = ErrorInfoMissingFeature; + return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature, + MatchingInlineAsm); + } + + // If one instruction matched with an invalid operand, report this as an + // operand failure. + if (std::count(std::begin(Match), std::end(Match), + Match_InvalidOperand) == 1) { + return Error(IDLoc, "invalid operand for instruction", EmptyRanges, + MatchingInlineAsm); + } + + // If all of these were an outright failure, report it in a useless way. + return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges, + MatchingInlineAsm); +} + bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) { return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo); } diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h index a04ab6c89c2..11a84157f58 100644 --- a/lib/Target/X86/AsmParser/X86Operand.h +++ b/lib/Target/X86/AsmParser/X86Operand.h @@ -205,6 +205,9 @@ struct X86Operand : public MCParsedAsmOperand { } bool isMem() const override { return Kind == Memory; } + bool isMemUnsized() const { + return Kind == Memory && Mem.Size == 0; + } bool isMem8() const { return Kind == Memory && (!Mem.Size || Mem.Size == 8); } diff --git a/test/MC/X86/intel-syntax-ambiguous.s b/test/MC/X86/intel-syntax-ambiguous.s new file mode 100644 index 00000000000..1acfb5ba70f --- /dev/null +++ b/test/MC/X86/intel-syntax-ambiguous.s @@ -0,0 +1,44 @@ +// RUN: not llvm-mc -triple i686-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s + +.intel_syntax + +// Basic case of ambiguity for inc. + +inc [eax] +// CHECK: error: ambiguous operand size for instruction 'inc' +inc dword ptr [eax] +inc word ptr [eax] +inc byte ptr [eax] +// CHECK-NOT: error: + +// Other ambiguous instructions. Anything that doesn't take a register, +// basically. + +dec [eax] +// CHECK: error: ambiguous operand size for instruction 'dec' +mov [eax], 1 +// CHECK: error: ambiguous operand size for instruction 'mov' +and [eax], 0 +// CHECK: error: ambiguous operand size for instruction 'and' +or [eax], 1 +// CHECK: error: ambiguous operand size for instruction 'or' +add [eax], 1 +// CHECK: error: ambiguous operand size for instruction 'add' +sub [eax], 1 +// CHECK: error: ambiguous operand size for instruction 'sub' + +// gas assumes these instructions are pointer-sized by default, and we follow +// suit. +push [eax] +call [eax] +jmp [eax] +// CHECK-NOT: error: + +add byte ptr [eax], eax +// CHECK: error: invalid operand for instruction + +add byte ptr [eax], eax +// CHECK: error: invalid operand for instruction + +add rax, 3 +// CHECK: error: register %rax is only available in 64-bit mode diff --git a/test/MC/X86/intel-syntax-ptr-sized.s b/test/MC/X86/intel-syntax-ptr-sized.s new file mode 100644 index 00000000000..c052c322b80 --- /dev/null +++ b/test/MC/X86/intel-syntax-ptr-sized.s @@ -0,0 +1,20 @@ +// RUN: llvm-mc %s -triple=i686-pc-windows | FileCheck %s + +.intel_syntax + +push [eax] +// CHECK: pushl (%eax) +call [eax] +// CHECK: calll *(%eax) +jmp [eax] +// CHECK: jmpl *(%eax) + +// mode switch +.code16 + +push [eax] +// CHECK: pushw (%eax) +call [eax] +// CHECK: callw *(%eax) +jmp [eax] +// CHECK: jmpw *(%eax) diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s index 796891880b1..bb1762e4727 100644 --- a/test/MC/X86/intel-syntax.s +++ b/test/MC/X86/intel-syntax.s @@ -607,3 +607,37 @@ fadd "?half@?0??bar@@YAXXZ@4NA" fadd "?half@?0??bar@@YAXXZ@4NA"@IMGREL // CHECK: fadds "?half@?0??bar@@YAXXZ@4NA" // CHECK: fadds "?half@?0??bar@@YAXXZ@4NA"@IMGREL32 + +inc qword ptr [rax] +inc dword ptr [rax] +inc word ptr [rax] +inc byte ptr [rax] +// CHECK: incq (%rax) +// CHECK: incl (%rax) +// CHECK: incw (%rax) +// CHECK: incb (%rax) + +dec qword ptr [rax] +dec dword ptr [rax] +dec word ptr [rax] +dec byte ptr [rax] +// CHECK: decq (%rax) +// CHECK: decl (%rax) +// CHECK: decw (%rax) +// CHECK: decb (%rax) + +add qword ptr [rax], 1 +add dword ptr [rax], 1 +add word ptr [rax], 1 +add byte ptr [rax], 1 +// CHECK: addq $1, (%rax) +// CHECK: addl $1, (%rax) +// CHECK: addw $1, (%rax) +// CHECK: addb $1, (%rax) + +fstp xword ptr [rax] +fstp qword ptr [rax] +fstp dword ptr [rax] +// CHECK: fstpt (%rax) +// CHECK: fstpl (%rax) +// CHECK: fstps (%rax)