From e86b01c153ba52307ecb6e7513ec33f57caedfdd Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Fri, 9 Jul 2010 18:27:43 +0000 Subject: [PATCH] Start the support for AVX instructions with 256-bit %ymm registers. A couple of notes: - The instructions are being added with dummy placeholder patterns using some 256 specifiers, this is not meant to work now, but since there are some multiclasses generic enough to accept them, when we go for codegen, the stuff will be already there. - Add VEX encoding bits to support YMM - Add MOVUPS and MOVAPS in the first round - Use "Y" as suffix for those Instructions: MOVUPSYrr, ... - All AVX instructions in X86InstrSSE.td will move soon to a new X86InstrAVX file. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107996 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h | 3 ++ .../X86/AsmPrinter/X86IntelInstPrinter.h | 4 ++ lib/Target/X86/X86InstrInfo.cpp | 2 + lib/Target/X86/X86InstrInfo.td | 2 +- lib/Target/X86/X86InstrSSE.td | 37 ++++++++++++++ lib/Target/X86/X86MCCodeEmitter.cpp | 12 ++++- lib/Target/X86/X86RegisterInfo.cpp | 24 ++++++--- lib/Target/X86/X86RegisterInfo.td | 33 ++++++++++--- test/MC/AsmParser/X86/x86_32-encoding.s | 49 +++++++++++++++++++ test/MC/AsmParser/X86/x86_64-encoding.s | 49 +++++++++++++++++++ utils/TableGen/EDEmitter.cpp | 2 + 11 files changed, 199 insertions(+), 18 deletions(-) diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h index b29bc0c67bd..3be4bae5bec 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h @@ -68,6 +68,9 @@ public: void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); } + void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); + } }; } diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h index 8a93fd1acf7..4d680744dd6 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h @@ -80,6 +80,10 @@ public: O << "XMMWORD PTR "; printMemReference(MI, OpNo, O); } + void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "YMMWORD PTR "; + printMemReference(MI, OpNo, O); + } }; } diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 9f7cddcb23c..bd3759d0db0 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3147,6 +3147,8 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: + case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: + case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: return true; } return false; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 1fe373e4b9d..1efef5a80b1 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -227,7 +227,7 @@ def f32mem : X86MemOperand<"printf32mem">; def f64mem : X86MemOperand<"printf64mem">; def f80mem : X86MemOperand<"printf80mem">; def f128mem : X86MemOperand<"printf128mem">; -//def f256mem : X86MemOperand<"printf256mem">; +def f256mem : X86MemOperand<"printf256mem">; // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of // plain GR64, so that it doesn't potentially require a REX prefix. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 20446cec376..e8736524f30 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -106,6 +106,12 @@ def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; +// FIXME: move this to a more appropriate place after all AVX is done. +def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; +def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; +def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>; +def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; + // Like 'store', but always requires vector alignment. def alignedstore : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ @@ -130,6 +136,16 @@ def alignedloadv4i32 : PatFrag<(ops node:$ptr), def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>; +// FIXME: move this to a more appropriate place after all AVX is done. +def alignedloadv8f32 : PatFrag<(ops node:$ptr), + (v8f32 (alignedload node:$ptr))>; +def alignedloadv4f64 : PatFrag<(ops node:$ptr), + (v4f64 (alignedload node:$ptr))>; +def alignedloadv8i32 : PatFrag<(ops node:$ptr), + (v8i32 (alignedload node:$ptr))>; +def alignedloadv4i64 : PatFrag<(ops node:$ptr), + (v4i64 (alignedload node:$ptr))>; + // Like 'load', but uses special alignment checks suitable for use in // memory operands in most SSE instructions, which are required to // be naturally aligned on some targets but not on others. If the subtarget @@ -583,6 +599,15 @@ defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups", SSEPackedSingle>, VEX; defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, 0>, OpSize, VEX; + +defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, + "movaps", SSEPackedSingle>, VEX; +defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, + "movapd", SSEPackedDouble>, OpSize, VEX; +defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, + "movups", SSEPackedSingle>, VEX; +defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, + "movupd", SSEPackedDouble, 0>, OpSize, VEX; } defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", SSEPackedSingle>, TB; @@ -606,6 +631,18 @@ def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v2f64 VR128:$src), addr:$dst)]>, VEX; +def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, VEX; +def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, VEX; +def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movups\t{$src, $dst|$dst, $src}", + [(store (v8f32 VR256:$src), addr:$dst)]>, VEX; +def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(store (v4f64 VR256:$src), addr:$dst)]>, VEX; } def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 943284a47ee..633ddd49d74 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -75,7 +75,8 @@ public: unsigned OpNum) { unsigned SrcReg = MI.getOperand(OpNum).getReg(); unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum)); - if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) + if ((SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) || + (SrcReg >= X86::YMM8 && SrcReg <= X86::YMM15)) SrcRegNum += 8; // The registers represented through VEX_VVVV should @@ -454,6 +455,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, break; // No prefix! } + // Set the vector length to 256-bit if YMM0-YMM15 is used + for (unsigned i = 0; i != MI.getNumOperands(); ++i) { + if (!MI.getOperand(i).isReg()) + continue; + unsigned SrcReg = MI.getOperand(i).getReg(); + if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15) + VEX_L = 1; + } + unsigned NumOps = MI.getNumOperands(); unsigned CurOp = 0; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index a2607649239..5f31e00ebab 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -127,21 +127,29 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7: return RegNo-X86::ST0; - case X86::XMM0: case X86::XMM8: case X86::MM0: + case X86::XMM0: case X86::XMM8: + case X86::YMM0: case X86::YMM8: case X86::MM0: return 0; - case X86::XMM1: case X86::XMM9: case X86::MM1: + case X86::XMM1: case X86::XMM9: + case X86::YMM1: case X86::YMM9: case X86::MM1: return 1; - case X86::XMM2: case X86::XMM10: case X86::MM2: + case X86::XMM2: case X86::XMM10: + case X86::YMM2: case X86::YMM10: case X86::MM2: return 2; - case X86::XMM3: case X86::XMM11: case X86::MM3: + case X86::XMM3: case X86::XMM11: + case X86::YMM3: case X86::YMM11: case X86::MM3: return 3; - case X86::XMM4: case X86::XMM12: case X86::MM4: + case X86::XMM4: case X86::XMM12: + case X86::YMM4: case X86::YMM12: case X86::MM4: return 4; - case X86::XMM5: case X86::XMM13: case X86::MM5: + case X86::XMM5: case X86::XMM13: + case X86::YMM5: case X86::YMM13: case X86::MM5: return 5; - case X86::XMM6: case X86::XMM14: case X86::MM6: + case X86::XMM6: case X86::XMM14: + case X86::YMM6: case X86::YMM14: case X86::MM6: return 6; - case X86::XMM7: case X86::XMM15: case X86::MM7: + case X86::XMM7: case X86::XMM15: + case X86::YMM7: case X86::YMM15: case X86::MM7: return 7; case X86::ES: diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index d0e0db16a2f..7c79efb4978 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -147,7 +147,7 @@ let Namespace = "X86" in { def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>; def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>; def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>; - + // Pseudo Floating Point registers def FP0 : Register<"fp0">; def FP1 : Register<"fp1">; @@ -155,7 +155,7 @@ let Namespace = "X86" in { def FP3 : Register<"fp3">; def FP4 : Register<"fp4">; def FP5 : Register<"fp5">; - def FP6 : Register<"fp6">; + def FP6 : Register<"fp6">; // XMM Registers, used by the various SSE instruction set extensions. // The sub_ss and sub_sd subregs are the same registers with another regclass. @@ -181,7 +181,8 @@ let Namespace = "X86" in { } // YMM Registers, used by AVX instructions - let SubRegIndices = [sub_xmm] in { + // The sub_ss and sub_sd subregs are the same registers with another regclass. + let CompositeIndices = [(sub_ss), (sub_sd)], SubRegIndices = [sub_xmm] in { def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>; def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>; def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>; @@ -357,7 +358,7 @@ def GR16 : RegisterClass<"X86", [i16], 16, }]; } -def GR32 : RegisterClass<"X86", [i32], 32, +def GR32 : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; @@ -412,7 +413,7 @@ def GR32 : RegisterClass<"X86", [i32], 32, // GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since // RIP isn't really a register and it can't be used anywhere except in an // address, but it doesn't cause trouble. -def GR64 : RegisterClass<"X86", [i64], 64, +def GR64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, RBX, R14, R15, R12, R13, RBP, RSP, RIP]> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), @@ -446,7 +447,7 @@ def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> { } // Debug registers. -def DEBUG_REG : RegisterClass<"X86", [i32], 32, +def DEBUG_REG : RegisterClass<"X86", [i32], 32, [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]> { } @@ -787,7 +788,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]> { let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)]; - + let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -803,11 +804,27 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, } }]; } -def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256, + +def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15]> { let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)]; + + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + VR256Class::iterator + VR256Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (!Subtarget.is64Bit()) + return end()-8; // Only YMM0 to YMM7 are available in 32-bit mode. + else + return end(); + } + }]; } // Status flags registers. diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index a6d10c63c43..e4bf29e0b24 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -12305,3 +12305,52 @@ // CHECK: vcmpps $31, %xmm1, %xmm2, %xmm3 // CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1f] vcmptrue_usps %xmm1, %xmm2, %xmm3 + +// CHECK: vmovaps (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x28,0x10] + vmovaps (%eax), %ymm2 + +// CHECK: vmovaps %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x28,0xd1] + vmovaps %ymm1, %ymm2 + +// CHECK: vmovaps %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfc,0x29,0x08] + vmovaps %ymm1, (%eax) + +// CHECK: vmovapd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x28,0x10] + vmovapd (%eax), %ymm2 + +// CHECK: vmovapd %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x28,0xd1] + vmovapd %ymm1, %ymm2 + +// CHECK: vmovapd %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfd,0x29,0x08] + vmovapd %ymm1, (%eax) + +// CHECK: vmovups (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x10,0x10] + vmovups (%eax), %ymm2 + +// CHECK: vmovups %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfc,0x10,0xd1] + vmovups %ymm1, %ymm2 + +// CHECK: vmovups %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfc,0x11,0x08] + vmovups %ymm1, (%eax) + +// CHECK: vmovupd (%eax), %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x10,0x10] + vmovupd (%eax), %ymm2 + +// CHECK: vmovupd %ymm1, %ymm2 +// CHECK: encoding: [0xc5,0xfd,0x10,0xd1] + vmovupd %ymm1, %ymm2 + +// CHECK: vmovupd %ymm1, (%eax) +// CHECK: encoding: [0xc5,0xfd,0x11,0x08] + vmovupd %ymm1, (%eax) + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index 185df8f2e00..452aa532a81 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -2379,3 +2379,52 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: vcmpps $31, %xmm11, %xmm12, %xmm13 // CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1f] vcmptrue_usps %xmm11, %xmm12, %xmm13 + +// CHECK: vmovaps (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x28,0x20] + vmovaps (%rax), %ymm12 + +// CHECK: vmovaps %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x28,0xe3] + vmovaps %ymm11, %ymm12 + +// CHECK: vmovaps %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7c,0x29,0x18] + vmovaps %ymm11, (%rax) + +// CHECK: vmovapd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x28,0x20] + vmovapd (%rax), %ymm12 + +// CHECK: vmovapd %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7d,0x28,0xe3] + vmovapd %ymm11, %ymm12 + +// CHECK: vmovapd %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7d,0x29,0x18] + vmovapd %ymm11, (%rax) + +// CHECK: vmovups (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7c,0x10,0x20] + vmovups (%rax), %ymm12 + +// CHECK: vmovups %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7c,0x10,0xe3] + vmovups %ymm11, %ymm12 + +// CHECK: vmovups %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7c,0x11,0x18] + vmovups %ymm11, (%rax) + +// CHECK: vmovupd (%rax), %ymm12 +// CHECK: encoding: [0xc5,0x7d,0x10,0x20] + vmovupd (%rax), %ymm12 + +// CHECK: vmovupd %ymm11, %ymm12 +// CHECK: encoding: [0xc4,0x41,0x7d,0x10,0xe3] + vmovupd %ymm11, %ymm12 + +// CHECK: vmovupd %ymm11, (%rax) +// CHECK: encoding: [0xc5,0x7d,0x11,0x18] + vmovupd %ymm11, (%rax) + diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp index 1c94ebfa108..c5ee82850d1 100644 --- a/utils/TableGen/EDEmitter.cpp +++ b/utils/TableGen/EDEmitter.cpp @@ -306,6 +306,7 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type, REG("RFP64"); REG("RFP80"); REG("VR128"); + REG("VR256"); REG("RST"); REG("SEGMENT_REG"); REG("DEBUG_REG"); @@ -339,6 +340,7 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type, MEM("opaque80mem"); MEM("i128mem"); MEM("f128mem"); + MEM("f256mem"); MEM("opaque512mem"); // all R, I, R, I