From 674140fc3e47271f39a0e25cd41d7afa507b8f25 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 22 Jan 2014 15:08:36 +0000 Subject: [PATCH] ]x86] Allow segment and address-size overrides for CMPS[BWLQ] (PR9385) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199806 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmParser/X86AsmParser.cpp | 47 +++++++++++++++++++ lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 6 +++ .../X86/MCTargetDesc/X86MCCodeEmitter.cpp | 18 +++++++ lib/Target/X86/X86InstrFormats.td | 1 + lib/Target/X86/X86InstrInfo.td | 13 +++-- test/MC/X86/index-operations.s | 25 ++++++++++ utils/TableGen/X86RecognizableInstr.cpp | 5 ++ 7 files changed, 110 insertions(+), 5 deletions(-) diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 345c57c8b23..5f3498c7c3c 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -588,6 +588,11 @@ private: MCStreamer &Out, unsigned &ErrorInfo, bool MatchingInlineAsm); + /// doSrcDstMatch - Returns true if operands are matching in their + /// word size (%si and %di, %esi and %edi, etc.). Order depends on + /// the parsing mode (Intel vs. AT&T). + bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2); + /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. bool isSrcOp(X86Operand &Op); @@ -1150,6 +1155,27 @@ struct X86Operand : public MCParsedAsmOperand { } // end anonymous namespace. +bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2) +{ + // Return true and let a normal complaint about bogus operands happen. + if (!Op1.isMem() || !Op2.isMem()) + return true; + + // Actually these might be the other way round if Intel syntax is + // being used. It doesn't matter. + unsigned diReg = Op1.Mem.BaseReg; + unsigned siReg = Op2.Mem.BaseReg; + + if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg)) + return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg); + if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg)) + return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg); + if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg)) + return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg); + // Again, return true and let another error happen. + return true; +} + bool X86AsmParser::isSrcOp(X86Operand &Op) { unsigned basereg = is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI); @@ -2369,6 +2395,27 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, Name == "scasl" || Name == "scasd" || Name == "scasq")) Operands.push_back(DefaultMemDIOperand(NameLoc)); + // Add default SI and DI operands to "cmps[bwlq]". + if (Name.startswith("cmps") && + (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" || + Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) { + if (Operands.size() == 1) { + if (isParsingIntelSyntax()) { + Operands.push_back(DefaultMemSIOperand(NameLoc)); + Operands.push_back(DefaultMemDIOperand(NameLoc)); + } else { + Operands.push_back(DefaultMemDIOperand(NameLoc)); + Operands.push_back(DefaultMemSIOperand(NameLoc)); + } + } else if (Operands.size() == 3) { + X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; + if (!doSrcDstMatch(Op, Op2)) + return Error(Op.getStartLoc(), + "mismatching source and destination index registers"); + } + } + // FIXME: Hack to handle recognize s{hr,ar,hl} $1, . Canonicalize to // "shift ". if ((Name.startswith("shr") || Name.startswith("sar") || diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 69e74b86591..49d8b116581 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -267,6 +267,11 @@ namespace X86II { /// register DI/EDI/ESI. RawFrmDst = 9, + /// RawFrmSrc - This form is for instructions that use the the source index + /// register SI/ESI/ERI with a possible segment override, and also the + /// destination index register DI/ESI/RDI. + RawFrmDstSrc = 10, + /// MRM[0-7][rm] - These forms are used to represent instructions that use /// a Mod/RM byte, and use the middle field to hold extended opcode /// information. In the intel manual these are represented as /0, /1, ... @@ -622,6 +627,7 @@ namespace X86II { case X86II::RawFrmMemOffs: case X86II::RawFrmSrc: case X86II::RawFrmDst: + case X86II::RawFrmDstSrc: return -1; case X86II::MRMDestMem: return 0; diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 0f1ab6d473a..e8c9107f662 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1317,6 +1317,24 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, llvm_unreachable("Unknown FormMask value in X86MCCodeEmitter!"); case X86II::Pseudo: llvm_unreachable("Pseudo instruction shouldn't be emitted"); + case X86II::RawFrmDstSrc: { + unsigned diReg = MI.getOperand(0).getReg(); + unsigned siReg = MI.getOperand(1).getReg(); + assert(((siReg == X86::SI && diReg == X86::DI) || + (siReg == X86::ESI && diReg == X86::EDI) || + (siReg == X86::RSI && diReg == X86::RDI)) && + "SI and DI register sizes do not match"); + // Emit segment override opcode prefix as needed (not for %ds). + if (MI.getOperand(2).getReg() != X86::DS) + EmitSegmentOverridePrefix(CurByte, 2, MI, OS); + // Emit OpSize prefix as needed. + if ((!is32BitMode() && siReg == X86::ESI) || + (is32BitMode() && siReg == X86::SI)) + EmitByte(0x67, CurByte, OS); + CurOp += 3; // Consume operands. + EmitByte(BaseOpcode, CurByte, OS); + break; + } case X86II::RawFrmSrc: { unsigned siReg = MI.getOperand(0).getReg(); // Emit segment override opcode prefix as needed (not for %ds). diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 0ad8f57e0be..adc24e23182 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -23,6 +23,7 @@ def AddRegFrm : Format<2>; def MRMDestReg : Format<3>; def MRMDestMem : Format<4>; def MRMSrcReg : Format<5>; def MRMSrcMem : Format<6>; def RawFrmMemOffs : Format<7>; def RawFrmSrc : Format<8>; def RawFrmDst : Format<9>; +def RawFrmDstSrc: Format<10>; def MRM0r : Format<16>; def MRM1r : Format<17>; def MRM2r : Format<18>; def MRM3r : Format<19>; def MRM4r : Format<20>; def MRM5r : Format<21>; def MRM6r : Format<22>; def MRM7r : Format<23>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 296da6cf184..0fe93cc4fea 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1166,11 +1166,14 @@ def SCAS32 : I<0xAF, RawFrmDst, (outs), (ins dstidx32:$dst), def SCAS64 : RI<0xAF, RawFrmDst, (outs), (ins dstidx64:$dst), "scasq\t{$dst, %rax|rax, $dst}", [], IIC_SCAS>; -def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", [], IIC_CMPS>; -def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", [], IIC_CMPS>, OpSize; -def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", [], IIC_CMPS>, - OpSize16; -def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", [], IIC_CMPS>; +def CMPS8 : I<0xA6, RawFrmDstSrc, (outs), (ins dstidx8:$dst, srcidx8:$src), + "cmpsb\t{$dst, $src|$src, $dst}", [], IIC_CMPS>; +def CMPS16 : I<0xA7, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src), + "cmpsw\t{$dst, $src|$src, $dst}", [], IIC_CMPS>, OpSize; +def CMPS32 : I<0xA7, RawFrmDstSrc, (outs), (ins dstidx32:$dst, srcidx32:$src), + "cmps{l|d}\t{$dst, $src|$src, $dst}", [], IIC_CMPS>, OpSize16; +def CMPS64 : RI<0xA7, RawFrmDstSrc, (outs), (ins dstidx64:$dst, srcidx64:$src), + "cmpsq\t{$dst, $src|$src, $dst}", [], IIC_CMPS>; } // SchedRW //===----------------------------------------------------------------------===// diff --git a/test/MC/X86/index-operations.s b/test/MC/X86/index-operations.s index f1ccfe817cd..7bd29aca2cd 100644 --- a/test/MC/X86/index-operations.s +++ b/test/MC/X86/index-operations.s @@ -94,3 +94,28 @@ scas %es:(%di), %ax // ERR64: invalid 16-bit base register // 16: scasw %es:(%di), %ax # encoding: [0xaf] // 32: scasw %es:(%di), %ax # encoding: [0x66,0x67,0xaf] + +cmpsb +// 64: cmpsb %es:(%rdi), (%rsi) # encoding: [0xa6] +// 32: cmpsb %es:(%edi), (%esi) # encoding: [0xa6] +// 16: cmpsb %es:(%di), (%si) # encoding: [0xa6] + +cmpsw (%edi), (%esi) +// 64: cmpsw %es:(%edi), (%esi) # encoding: [0x66,0x67,0xa7] +// 32: cmpsw %es:(%edi), (%esi) # encoding: [0x66,0xa7] +// 16: cmpsw %es:(%edi), (%esi) # encoding: [0x67,0xa7] + +cmpsb (%di), (%esi) +// ERR64: invalid 16-bit base register +// ERR32: mismatching source and destination +// ERR16: mismatching source and destination + +cmpsl %es:(%edi), %ss:(%esi) +// 64: cmpsl %es:(%edi), %ss:(%esi) # encoding: [0x36,0x67,0xa7] +// 32: cmpsl %es:(%edi), %ss:(%esi) # encoding: [0x36,0xa7] +// 16: cmpsl %es:(%edi), %ss:(%esi) # encoding: [0x66,0x36,0x67,0xa7] + +cmpsq (%rdi), (%rsi) +// 64: cmpsq %es:(%rdi), (%rsi) # encoding: [0x48,0xa7] +// ERR32: 64-bit +// ERR16: 64-bit diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index 12950518444..e0b4be2eb39 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -62,6 +62,7 @@ namespace X86Local { RawFrmMemOffs = 7, RawFrmSrc = 8, RawFrmDst = 9, + RawFrmDstSrc = 10, MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, @@ -638,6 +639,10 @@ void RecognizableInstr::emitInstructionSpecifier() { case X86Local::RawFrmDst: HANDLE_OPERAND(relocation); return; + case X86Local::RawFrmDstSrc: + HANDLE_OPERAND(relocation); + HANDLE_OPERAND(relocation); + return; case X86Local::RawFrm: // Operand 1 (optional) is an address or immediate. // Operand 2 (optional) is an immediate.