From 544a95d7161a0b333ec889d77941aff828e627ac Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Fri, 25 Jun 2010 22:27:51 +0000 Subject: [PATCH] Add several AVX MOV flavors Support VEX encoding for MRMDestReg git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106896 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 105 +++++++++++++++++++++--- lib/Target/X86/X86MCCodeEmitter.cpp | 25 ++++-- test/MC/AsmParser/X86/x86_32-encoding.s | 88 ++++++++++++++++++++ test/MC/AsmParser/X86/x86_64-encoding.s | 88 ++++++++++++++++++++ 4 files changed, 287 insertions(+), 19 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 821b817f03d..a666efe6268 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -566,25 +566,47 @@ multiclass sse12_mov_packed opc, RegisterClass RC, string asm, Domain d, bit IsReMaterializable = 1> { let neverHasSideEffects = 1 in - def rr : PI; + def rr : PI; let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in - def rm : PI; } +let isAsmParserOnly = 1 in { +defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, + "movaps", SSEPackedSingle>, VEX; +defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, + "movapd", SSEPackedDouble>, OpSize, VEX; +defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, + "movups", SSEPackedSingle>, VEX; +defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, + "movupd", SSEPackedDouble, 0>, OpSize, VEX; +} defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, - "movaps\t{$src, $dst|$dst, $src}", - SSEPackedSingle>, TB; + "movaps", SSEPackedSingle>, TB; defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, - "movapd\t{$src, $dst|$dst, $src}", - SSEPackedDouble>, TB, OpSize; + "movapd", SSEPackedDouble>, TB, OpSize; defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, - "movups\t{$src, $dst|$dst, $src}", - SSEPackedSingle>, TB; + "movups", SSEPackedSingle>, TB; defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, - "movupd\t{$src, $dst|$dst, $src}", - SSEPackedDouble, 0>, TB, OpSize; + "movupd", SSEPackedDouble, 0>, TB, OpSize; +let isAsmParserOnly = 1 in { +def VMOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "vmovaps\t{$src, $dst|$dst, $src}", + [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX; +def VMOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "vmovapd\t{$src, $dst|$dst, $src}", + [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX; +def VMOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "vmovups\t{$src, $dst|$dst, $src}", + [(store (v4f32 VR128:$src), addr:$dst)]>, VEX; +def VMOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "vmovupd\t{$src, $dst|$dst, $src}", + [(store (v2f64 VR128:$src), addr:$dst)]>, VEX; +} def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; @@ -599,6 +621,25 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(store (v2f64 VR128:$src), addr:$dst)]>; // Intrinsic forms of MOVUPS/D load and store +let isAsmParserOnly = 1 in { + let canFoldAsLoad = 1, isReMaterializable = 1 in + def VMOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "vmovups\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX; + def VMOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "vmovupd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX; + def VMOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "vmovups\t{$src, $dst|$dst, $src}", + [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX; + def VMOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "vmovupd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX; +} let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", @@ -634,6 +675,12 @@ multiclass sse12_mov_hilo_packedopc, RegisterClass RC, SSEPackedDouble>, TB, OpSize; } +let isAsmParserOnly = 1, AddedComplexity = 20 in { + defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", + "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; + defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", + "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; +} let Constraints = "$src1 = $dst", AddedComplexity = 20 in { defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", "\t{$src2, $dst|$dst, $src2}">; @@ -641,6 +688,16 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { "\t{$src2, $dst|$dst, $src2}">; } +let isAsmParserOnly = 1 in { +def VMOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "vmovlps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), + (iPTR 0))), addr:$dst)]>, VEX; +def VMOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "vmovlpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (v2f64 VR128:$src), + (iPTR 0))), addr:$dst)]>, VEX; +} def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -652,6 +709,20 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. +let isAsmParserOnly = 1 in { +def VMOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "vmovhps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (unpckh (bc_v2f64 (v4f32 VR128:$src)), + (undef)), (iPTR 0))), addr:$dst)]>, + VEX; +def VMOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "vmovhpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (v2f64 (unpckh VR128:$src, (undef))), + (iPTR 0))), addr:$dst)]>, + VEX; +} def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract @@ -663,6 +734,20 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), (v2f64 (unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst)]>; +let isAsmParserOnly = 1, AddedComplexity = 20 in { + def VMOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>, + VEX_4V; + def VMOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>, + VEX_4V; +} let Constraints = "$src1 = $dst", AddedComplexity = 20 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 2223d549b49..3b9b20a6651 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -431,7 +431,6 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, switch (TSFlags & X86II::Op0Mask) { default: assert(0 && "Invalid prefix!"); - case 0: break; // No prefix! case X86II::T8: // 0F 38 VEX_5M = 0x2; break; @@ -448,21 +447,29 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::XD: // F2 0F VEX_PP = 0x3; break; + case X86II::TB: // Bypass: Not used by VEX + case 0: + break; // No prefix! } unsigned NumOps = MI.getNumOperands(); - unsigned i = 0, CurOp = 0; - bool IsSrcMem = false; + unsigned CurOp = 0; + + if ((TSFlags & X86II::FormMask) == X86II::MRMDestMem) + NumOps = CurOp = X86AddrNumOperands; switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); case X86II::MRMSrcMem: - IsSrcMem = true; + case X86II::MRMDestMem: case X86II::MRMSrcReg: if (MI.getOperand(CurOp).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; - CurOp++; + + // If the memory destination has been checked first, + // go back to the first operand + CurOp = (CurOp == NumOps) ? 0 : CurOp+1; // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the // range 0-7 and the difference between the 2 groups is given by the @@ -486,12 +493,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, CurOp++; } - i = CurOp; - for (; i != NumOps; ++i) { - const MCOperand &MO = MI.getOperand(i); + for (; CurOp != NumOps; ++CurOp) { + const MCOperand &MO = MI.getOperand(CurOp); if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) VEX_B = 0x0; - if (!VEX_B && MO.isReg() && IsSrcMem && + if (!VEX_B && MO.isReg() && + ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) VEX_X = 0x0; } diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index 5cfbaf43015..78debd965fb 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -10718,3 +10718,91 @@ // CHECK: encoding: [0xc5,0xf3,0x2a,0x10] vcvtsi2sd (%eax), %xmm1, %xmm2 +// CHECK: vmovaps (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x28,0x10] + vmovaps (%eax), %xmm2 + +// CHECK: vmovaps %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x28,0xd1] + vmovaps %xmm1, %xmm2 + +// CHECK: vmovaps %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x29,0x08] + vmovaps %xmm1, (%eax) + +// CHECK: vmovapd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x28,0x10] + vmovapd (%eax), %xmm2 + +// CHECK: vmovapd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x28,0xd1] + vmovapd %xmm1, %xmm2 + +// CHECK: vmovapd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x29,0x08] + vmovapd %xmm1, (%eax) + +// CHECK: vmovups (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x10,0x10] + vmovups (%eax), %xmm2 + +// CHECK: vmovups %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf8,0x10,0xd1] + vmovups %xmm1, %xmm2 + +// CHECK: vmovups %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x11,0x08] + vmovups %xmm1, (%eax) + +// CHECK: vmovupd (%eax), %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x10,0x10] + vmovupd (%eax), %xmm2 + +// CHECK: vmovupd %xmm1, %xmm2 +// CHECK: encoding: [0xc5,0xf9,0x10,0xd1] + vmovupd %xmm1, %xmm2 + +// CHECK: vmovupd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x11,0x08] + vmovupd %xmm1, (%eax) + +// CHECK: vmovlps %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x13,0x08] + vmovlps %xmm1, (%eax) + +// CHECK: vmovlps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x12,0x18] + vmovlps (%eax), %xmm2, %xmm3 + +// CHECK: vmovlpd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x13,0x08] + vmovlpd %xmm1, (%eax) + +// CHECK: vmovlpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x12,0x18] + vmovlpd (%eax), %xmm2, %xmm3 + +// CHECK: vmovhps %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf8,0x17,0x08] + vmovhps %xmm1, (%eax) + +// CHECK: vmovhps (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x16,0x18] + vmovhps (%eax), %xmm2, %xmm3 + +// CHECK: vmovhpd %xmm1, (%eax) +// CHECK: encoding: [0xc5,0xf9,0x17,0x08] + vmovhpd %xmm1, (%eax) + +// CHECK: vmovhpd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe9,0x16,0x18] + vmovhpd (%eax), %xmm2, %xmm3 + +// CHECK: vmovlhps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x16,0xd9] + vmovlhps %xmm1, %xmm2, %xmm3 + +// CHECK: vmovhlps %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xe8,0x12,0xd9] + vmovhlps %xmm1, %xmm2, %xmm3 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index 543a0a6921b..6df43c8753e 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -766,4 +766,92 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc5,0x23,0x2a,0x20] vcvtsi2sd (%rax), %xmm11, %xmm12 +// CHECK: vmovaps (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x28,0x20] + vmovaps (%rax), %xmm12 + +// CHECK: vmovaps %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x28,0xe3] + vmovaps %xmm11, %xmm12 + +// CHECK: vmovaps %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x29,0x18] + vmovaps %xmm11, (%rax) + +// CHECK: vmovapd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x28,0x20] + vmovapd (%rax), %xmm12 + +// CHECK: vmovapd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x28,0xe3] + vmovapd %xmm11, %xmm12 + +// CHECK: vmovapd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x29,0x18] + vmovapd %xmm11, (%rax) + +// CHECK: vmovups (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x78,0x10,0x20] + vmovups (%rax), %xmm12 + +// CHECK: vmovups %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x78,0x10,0xe3] + vmovups %xmm11, %xmm12 + +// CHECK: vmovups %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x11,0x18] + vmovups %xmm11, (%rax) + +// CHECK: vmovupd (%rax), %xmm12 +// CHECK: encoding: [0xc5,0x79,0x10,0x20] + vmovupd (%rax), %xmm12 + +// CHECK: vmovupd %xmm11, %xmm12 +// CHECK: encoding: [0xc4,0x41,0x79,0x10,0xe3] + vmovupd %xmm11, %xmm12 + +// CHECK: vmovupd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x11,0x18] + vmovupd %xmm11, (%rax) + +// CHECK: vmovlps %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x13,0x18] + vmovlps %xmm11, (%rax) + +// CHECK: vmovlps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0x12,0x28] + vmovlps (%rax), %xmm12, %xmm13 + +// CHECK: vmovlpd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x13,0x18] + vmovlpd %xmm11, (%rax) + +// CHECK: vmovlpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x12,0x28] + vmovlpd (%rax), %xmm12, %xmm13 + +// CHECK: vmovhps %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x78,0x17,0x18] + vmovhps %xmm11, (%rax) + +// CHECK: vmovhps (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x18,0x16,0x28] + vmovhps (%rax), %xmm12, %xmm13 + +// CHECK: vmovhpd %xmm11, (%rax) +// CHECK: encoding: [0xc5,0x79,0x17,0x18] + vmovhpd %xmm11, (%rax) + +// CHECK: vmovhpd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x19,0x16,0x28] + vmovhpd (%rax), %xmm12, %xmm13 + +// CHECK: vmovlhps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x16,0xeb] + vmovlhps %xmm11, %xmm12, %xmm13 + +// CHECK: vmovhlps %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb] + vmovhlps %xmm11, %xmm12, %xmm13 +