Support all 128-bit AVX vector intrinsics. Most part of them I already

declared during the addition of the assembler support, the additional
changes are:
- Add missing intrinsics
- Move all SSE conversion instructions in X86InstInfo64.td to the SSE.td file.
- Duplicate some patterns to AVX mode.
- Step into PCMPEST/PCMPIST custom inserter and add AVX versions.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109878 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2010-07-30 19:54:33 +00:00
parent 5b7dab83e9
commit 98f985607b
3 changed files with 240 additions and 232 deletions

View File

@ -8543,19 +8543,31 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
}
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
// all of this code can be replaced with that in the .td file.
// or XMM0_V32I8 in AVX all of this code can be replaced with that
// in the .td file.
MachineBasicBlock *
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
unsigned numArgs, bool memArg) const {
assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
"Target must have SSE4.2 or AVX features enabled");
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
unsigned Opc;
if (memArg)
Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
else
Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
if (!Subtarget->hasAVX()) {
if (memArg)
Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
else
Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
} else {
if (memArg)
Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
else
Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
}
MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc));
@ -8902,12 +8914,16 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
}
// String/text processing lowering.
case X86::PCMPISTRM128REG:
case X86::VPCMPISTRM128REG:
return EmitPCMP(MI, BB, 3, false /* in-mem */);
case X86::PCMPISTRM128MEM:
case X86::VPCMPISTRM128MEM:
return EmitPCMP(MI, BB, 3, true /* in-mem */);
case X86::PCMPESTRM128REG:
case X86::VPCMPESTRM128REG:
return EmitPCMP(MI, BB, 5, false /* in mem */);
case X86::PCMPESTRM128MEM:
case X86::VPCMPESTRM128MEM:
return EmitPCMP(MI, BB, 5, true /* in mem */);
// Atomic Lowering.

View File

@ -1540,116 +1540,6 @@ def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C64r)>;
//===----------------------------------------------------------------------===//
// Conversion Instructions...
//
// f64 -> signed i64
def CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
"cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>;
def CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src),
"cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>;
def Int_CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst,
(int_x86_sse2_cvtsd2si64 VR128:$src))]>;
def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst),
(ins f128mem:$src),
"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (int_x86_sse2_cvtsd2si64
(load addr:$src)))]>;
def CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (fp_to_sint FR64:$src))]>;
def CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src),
"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
def Int_CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst,
(int_x86_sse2_cvttsd2si64 VR128:$src))]>;
def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst),
(ins f128mem:$src),
"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst,
(int_x86_sse2_cvttsd2si64
(load addr:$src)))]>;
// Signed i64 -> f64
def CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (sint_to_fp GR64:$src))]>;
def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
let Constraints = "$src1 = $dst" in {
def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
"cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsi642sd VR128:$src1,
GR64:$src2))]>;
def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
"cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsi642sd VR128:$src1,
(loadi64 addr:$src2)))]>;
} // Constraints = "$src1 = $dst"
// Signed i64 -> f32
def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src),
"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (sint_to_fp GR64:$src))]>;
def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src),
"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
let Constraints = "$src1 = $dst" in {
def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
"cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse_cvtsi642ss VR128:$src1,
GR64:$src2))]>;
def Int_CVTSI2SS64rm : RSSI<0x2A, MRMSrcMem,
(outs VR128:$dst),
(ins VR128:$src1, i64mem:$src2),
"cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse_cvtsi642ss VR128:$src1,
(loadi64 addr:$src2)))]>;
} // Constraints = "$src1 = $dst"
// f32 -> signed i64
def CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
"cvtss2si{q}\t{$src, $dst|$dst, $src}", []>;
def CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
"cvtss2si{q}\t{$src, $dst|$dst, $src}", []>;
def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"cvtss2si{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst,
(int_x86_sse_cvtss2si64 VR128:$src))]>;
def Int_CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
"cvtss2si{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (int_x86_sse_cvtss2si64
(load addr:$src)))]>;
def CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
"cvttss2si{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (fp_to_sint FR32:$src))]>;
def CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
"cvttss2si{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
def Int_CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"cvttss2si{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst,
(int_x86_sse_cvttss2si64 VR128:$src))]>;
def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst),
(ins f32mem:$src),
"cvttss2si{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst,
(int_x86_sse_cvttss2si64 (load addr:$src)))]>;
// Descriptor-table support instructions
// LLDT is not interpreted specially in 64-bit mode because there is no sign

View File

@ -543,41 +543,49 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
}
let isAsmParserOnly = 1 in {
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
defm VCVTTSS2SIr64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
VEX_W;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
defm VCVTTSD2SIr64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX,
VEX_W;
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
VEX_W;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD,
VEX, VEX_W;
// The assembler can recognize rr 64-bit instructions by seeing a rxx
// register, but the same isn't true when only using memory operands,
// provide other assembly "l" and "q" forms to address this explicitly
// where appropriate to do so.
defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
VEX_4V;
defm VCVTSI2SSQ : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ssq">, XS,
VEX_4V, VEX_W;
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
VEX_4V;
defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sdl">, XD,
VEX_4V;
defm VCVTSI2SDQ : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sdq">, XD,
VEX_4V, VEX_W;
defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
VEX_4V;
defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS,
VEX_4V, VEX_W;
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
VEX_4V;
defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
VEX_4V;
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
VEX_4V, VEX_W;
}
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS;
defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
"cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
"cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
"cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
"cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
"cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
"cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
@ -593,10 +601,12 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
string asm> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
[(set DstRC:$dst, (Int SrcRC:$src))]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
[(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (Int SrcRC:$src))]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
}
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
@ -611,21 +621,32 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
PatFrag ld_frag, string asm> {
PatFrag ld_frag, string asm, bit Is2Addr = 1> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2), asm,
(ins DstRC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
}
let isAsmParserOnly = 1 in {
defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS,
VEX;
f32mem, load, "cvtss2si">, XS, VEX;
defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
XS, VEX, VEX_W;
defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD,
VEX;
f128mem, load, "cvtsd2si">, XD, VEX;
defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
XD, VEX, VEX_W;
// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
// Get rid of this hack or rename the intrinsics, there are several
// intructions that only match with the intrinsic form, why create duplicates
@ -636,18 +657,43 @@ let isAsmParserOnly = 1 in {
"cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
}
defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS;
f32mem, load, "cvtss2si">, XS;
defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
f32mem, load, "cvtss2si{q}">, XS, REX_W;
defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD;
f128mem, load, "cvtsd2si">, XD;
defm Int_CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
f128mem, load, "cvtsd2si">, XD, REX_W;
defm CVTSD2SI64 : sse12_cvt_s_np<0x2D, VR128, GR64, f64mem, "cvtsd2si{q}">, XD,
REX_W;
let isAsmParserOnly = 1 in {
defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
VEX_W;
defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
VEX_4V, VEX_W;
}
let Constraints = "$src1 = $dst" in {
defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32,
"cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XS;
"cvtsi2ss">, XS;
defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse_cvtsi642ss, i64mem, loadi64,
"cvtsi2ss{q}">, XS, REX_W;
defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse2_cvtsi2sd, i32mem, loadi32,
"cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XD;
"cvtsi2sd">, XD;
defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse2_cvtsi642sd, i64mem, loadi64,
"cvtsi2sd">, XD, REX_W;
}
// Instructions below don't have an AVX form.
@ -676,20 +722,28 @@ let Constraints = "$src1 = $dst" in {
/// SSE 1 Only
// Aliases for intrinsics
let isAsmParserOnly = 1, Pattern = []<dag> in {
defm Int_VCVTTSS2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
int_x86_sse_cvttss2si, f32mem, load,
"cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS;
defm Int_VCVTTSD2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
int_x86_sse2_cvttsd2si, f128mem, load,
"cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD;
let isAsmParserOnly = 1 in {
defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
f32mem, load, "cvttss2si">, XS, VEX;
defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse_cvttss2si64, f32mem, load,
"cvttss2si">, XS, VEX, VEX_W;
defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
f128mem, load, "cvttss2si">, XD, VEX;
defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, f128mem, load,
"cvttss2si">, XD, VEX, VEX_W;
}
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
f32mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
XS;
f32mem, load, "cvttss2si">, XS;
defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse_cvttss2si64, f32mem, load,
"cvttss2si{q}">, XS, REX_W;
defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
f128mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
XD;
f128mem, load, "cvttss2si">, XD;
defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, f128mem, load,
"cvttss2si{q}">, XD, REX_W;
let isAsmParserOnly = 1, Pattern = []<dag> in {
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
@ -707,6 +761,8 @@ defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load,
let Pattern = []<dag> in {
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
"cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
"cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
@ -735,13 +791,11 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
let isAsmParserOnly = 1 in
defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
int_x86_sse2_cvtsd2ss, f64mem, load,
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
XS, VEX_4V;
int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
XS, VEX_4V;
let Constraints = "$src1 = $dst" in
defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
int_x86_sse2_cvtsd2ss, f64mem, load,
"cvtsd2ss\t{$src2, $dst|$dst, $src2}">, XS;
int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
// Convert scalar single to scalar double
let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
@ -998,11 +1052,11 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
let isAsmParserOnly = 1 in {
def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
VEX, Requires<[HasAVX]>;
def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd
(load addr:$src)))]>,
VEX, Requires<[HasAVX]>;
@ -1618,7 +1672,7 @@ multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
}
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
bit Is2Addr = 1> {
bit Is2Addr = 1> {
defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS;
defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
@ -1626,7 +1680,7 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
}
multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
bit Is2Addr = 1> {
bit Is2Addr = 1> {
defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "ps"), "", "_ps", f128mem, memopv4f32,
SSEPackedSingle, Is2Addr>, TB;
@ -1639,24 +1693,32 @@ multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
// Binary Arithmetic instructions
let isAsmParserOnly = 1 in {
defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
basic_sse12_fp_binop_s_int<0x58, "add", 0>,
basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
let isCommutable = 0 in {
defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, VEX_4V;
defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
}
}
@ -1721,20 +1783,20 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F32Int> {
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
!strconcat(!strconcat("v", OpcodeStr),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
!strconcat(!strconcat("v", OpcodeStr),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, XS, Requires<[HasAVX, OptForSize]>;
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(!strconcat("v", OpcodeStr),
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
!strconcat(!strconcat("v", OpcodeStr),
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr,
"ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
[(set VR128:$dst, (F32Int VR128:$src))]>;
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
!strconcat(OpcodeStr,
"ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
[(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
}
/// sse1_fp_unop_p - SSE1 unops in packed form.
@ -1791,21 +1853,19 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F64Int> {
def SDr : VSDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SDm : VSDI<opc, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1, f64mem:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SDr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>;
def SDm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, sdmem:$src2),
!strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>;
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1, f64mem:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
[(set VR128:$dst, (F64Int VR128:$src))]>;
def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
[(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
}
/// sse2_fp_unop_p - SSE2 unops in vector forms.
@ -1842,27 +1902,31 @@ multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
// Square root.
defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>,
VEX_4V;
defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>,
sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>,
VEX;
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt,
defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt", X86frsqrt,
int_x86_sse_rsqrt_ss>, VEX_4V;
defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, VEX;
sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>,
VEX_4V;
defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, VEX;
sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
}
// Square root.
@ -2442,6 +2506,25 @@ let ExeDomain = SSEPackedInt in {
}
} // Constraints = "$src1 = $dst"
let Predicates = [HasAVX] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
(v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
(v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
(v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>;
def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
(v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>;
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
(v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
// Shift up / down and insert zero's.
def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
(v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
(v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
}
let Predicates = [HasSSE2] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
(v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
@ -4257,7 +4340,11 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in
defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
(INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
(VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
Requires<[HasAVX]>;
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
(INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
Requires<[HasSSE41]>;
//===----------------------------------------------------------------------===//
// SSE4.1 - Round Instructions
@ -4682,27 +4769,39 @@ let Constraints = "$src1 = $dst" in {
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop> {
RegisterClass RC, X86MemOperand x86memop,
PatFrag mem_frag, Intrinsic IntId> {
def rr : I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
[(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
def rm : I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
[(set RC:$dst,
(IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
RC:$src3))],
SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
}
}
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem>;
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem>;
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem>;
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
memopv16i8, int_x86_sse41_blendvpd>;
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
memopv16i8, int_x86_sse41_blendvps>;
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
memopv16i8, int_x86_sse41_pblendvb>;
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem>;
let Pattern = []<dag> in { // FIXME: implement 256 intrinsics here.
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
memopv32i8, int_x86_sse41_blendvpd>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
memopv32i8, int_x86_sse41_blendvps>;
}
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
@ -4778,17 +4877,20 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
//===----------------------------------------------------------------------===//
// Packed Compare Implicit Length Strings, Return Mask
let Defs = [EFLAGS], usesCustomInserter = 1 in {
def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"#PCMPISTRM128rr PSEUDO!",
multiclass pseudo_pcmpistrm<string asm> {
def REG : Ii8<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "rr PSEUDO"),
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
imm:$src3))]>, OpSize;
def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"#PCMPISTRM128rm PSEUDO!",
imm:$src3))]>;
def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "rm PSEUDO"),
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128
VR128:$src1, (load addr:$src2), imm:$src3))]>, OpSize;
VR128:$src1, (load addr:$src2), imm:$src3))]>;
}
let Defs = [EFLAGS], usesCustomInserter = 1 in {
defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>;
defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
}
let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1,
@ -4811,20 +4913,20 @@ let Defs = [XMM0, EFLAGS] in {
}
// Packed Compare Explicit Length Strings, Return Mask
let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"#PCMPESTRM128rr PSEUDO!",
[(set VR128:$dst,
(int_x86_sse42_pcmpestrm128
VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize;
def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"#PCMPESTRM128rm PSEUDO!",
multiclass pseudo_pcmpestrm<string asm> {
def REG : Ii8<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "rr PSEUDO"),
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128
VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
OpSize;
VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "rm PSEUDO"),
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128
VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
}
let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>;
defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
}
let isAsmParserOnly = 1, Predicates = [HasAVX],