mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
Support all 128-bit AVX vector intrinsics. Most part of them I already
declared during the addition of the assembler support, the additional changes are: - Add missing intrinsics - Move all SSE conversion instructions in X86InstInfo64.td to the SSE.td file. - Duplicate some patterns to AVX mode. - Step into PCMPEST/PCMPIST custom inserter and add AVX versions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109878 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5b7dab83e9
commit
98f985607b
@ -8543,19 +8543,31 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
|
||||
}
|
||||
|
||||
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
|
||||
// all of this code can be replaced with that in the .td file.
|
||||
// or XMM0_V32I8 in AVX all of this code can be replaced with that
|
||||
// in the .td file.
|
||||
MachineBasicBlock *
|
||||
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
|
||||
unsigned numArgs, bool memArg) const {
|
||||
|
||||
assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
|
||||
"Target must have SSE4.2 or AVX features enabled");
|
||||
|
||||
DebugLoc dl = MI->getDebugLoc();
|
||||
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
|
||||
|
||||
unsigned Opc;
|
||||
if (memArg)
|
||||
Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
|
||||
else
|
||||
Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
|
||||
|
||||
if (!Subtarget->hasAVX()) {
|
||||
if (memArg)
|
||||
Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
|
||||
else
|
||||
Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
|
||||
} else {
|
||||
if (memArg)
|
||||
Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
|
||||
else
|
||||
Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
|
||||
}
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc));
|
||||
|
||||
@ -8902,12 +8914,16 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||
}
|
||||
// String/text processing lowering.
|
||||
case X86::PCMPISTRM128REG:
|
||||
case X86::VPCMPISTRM128REG:
|
||||
return EmitPCMP(MI, BB, 3, false /* in-mem */);
|
||||
case X86::PCMPISTRM128MEM:
|
||||
case X86::VPCMPISTRM128MEM:
|
||||
return EmitPCMP(MI, BB, 3, true /* in-mem */);
|
||||
case X86::PCMPESTRM128REG:
|
||||
case X86::VPCMPESTRM128REG:
|
||||
return EmitPCMP(MI, BB, 5, false /* in mem */);
|
||||
case X86::PCMPESTRM128MEM:
|
||||
case X86::VPCMPESTRM128MEM:
|
||||
return EmitPCMP(MI, BB, 5, true /* in mem */);
|
||||
|
||||
// Atomic Lowering.
|
||||
|
@ -1540,116 +1540,6 @@ def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
||||
(SETB_C64r)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Conversion Instructions...
|
||||
//
|
||||
|
||||
// f64 -> signed i64
|
||||
def CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
|
||||
"cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>;
|
||||
def CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src),
|
||||
"cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>;
|
||||
def Int_CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
|
||||
"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst,
|
||||
(int_x86_sse2_cvtsd2si64 VR128:$src))]>;
|
||||
def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst),
|
||||
(ins f128mem:$src),
|
||||
"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (int_x86_sse2_cvtsd2si64
|
||||
(load addr:$src)))]>;
|
||||
def CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
|
||||
"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (fp_to_sint FR64:$src))]>;
|
||||
def CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src),
|
||||
"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
|
||||
def Int_CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
|
||||
"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst,
|
||||
(int_x86_sse2_cvttsd2si64 VR128:$src))]>;
|
||||
def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst),
|
||||
(ins f128mem:$src),
|
||||
"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst,
|
||||
(int_x86_sse2_cvttsd2si64
|
||||
(load addr:$src)))]>;
|
||||
|
||||
// Signed i64 -> f64
|
||||
def CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
|
||||
"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set FR64:$dst, (sint_to_fp GR64:$src))]>;
|
||||
def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
|
||||
"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
|
||||
"cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse2_cvtsi642sd VR128:$src1,
|
||||
GR64:$src2))]>;
|
||||
def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
|
||||
"cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse2_cvtsi642sd VR128:$src1,
|
||||
(loadi64 addr:$src2)))]>;
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
// Signed i64 -> f32
|
||||
def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src),
|
||||
"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (sint_to_fp GR64:$src))]>;
|
||||
def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src),
|
||||
"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
|
||||
"cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse_cvtsi642ss VR128:$src1,
|
||||
GR64:$src2))]>;
|
||||
def Int_CVTSI2SS64rm : RSSI<0x2A, MRMSrcMem,
|
||||
(outs VR128:$dst),
|
||||
(ins VR128:$src1, i64mem:$src2),
|
||||
"cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse_cvtsi642ss VR128:$src1,
|
||||
(loadi64 addr:$src2)))]>;
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
// f32 -> signed i64
|
||||
def CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
|
||||
"cvtss2si{q}\t{$src, $dst|$dst, $src}", []>;
|
||||
def CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
|
||||
"cvtss2si{q}\t{$src, $dst|$dst, $src}", []>;
|
||||
def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
|
||||
"cvtss2si{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst,
|
||||
(int_x86_sse_cvtss2si64 VR128:$src))]>;
|
||||
def Int_CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
|
||||
"cvtss2si{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (int_x86_sse_cvtss2si64
|
||||
(load addr:$src)))]>;
|
||||
def CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
|
||||
"cvttss2si{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (fp_to_sint FR32:$src))]>;
|
||||
def CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
|
||||
"cvttss2si{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
|
||||
def Int_CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
|
||||
"cvttss2si{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst,
|
||||
(int_x86_sse_cvttss2si64 VR128:$src))]>;
|
||||
def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst),
|
||||
(ins f32mem:$src),
|
||||
"cvttss2si{q}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst,
|
||||
(int_x86_sse_cvttss2si64 (load addr:$src)))]>;
|
||||
|
||||
// Descriptor-table support instructions
|
||||
|
||||
// LLDT is not interpreted specially in 64-bit mode because there is no sign
|
||||
|
@ -543,41 +543,49 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1 in {
|
||||
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
|
||||
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
|
||||
defm VCVTTSS2SIr64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
|
||||
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
|
||||
VEX_W;
|
||||
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
|
||||
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
|
||||
defm VCVTTSD2SIr64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
|
||||
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX,
|
||||
VEX_W;
|
||||
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
|
||||
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
|
||||
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
|
||||
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
|
||||
VEX_W;
|
||||
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
|
||||
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
|
||||
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
|
||||
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD,
|
||||
VEX, VEX_W;
|
||||
|
||||
// The assembler can recognize rr 64-bit instructions by seeing a rxx
|
||||
// register, but the same isn't true when only using memory operands,
|
||||
// provide other assembly "l" and "q" forms to address this explicitly
|
||||
// where appropriate to do so.
|
||||
defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
|
||||
VEX_4V;
|
||||
defm VCVTSI2SSQ : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ssq">, XS,
|
||||
VEX_4V, VEX_W;
|
||||
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
|
||||
VEX_4V;
|
||||
defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sdl">, XD,
|
||||
VEX_4V;
|
||||
defm VCVTSI2SDQ : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sdq">, XD,
|
||||
VEX_4V, VEX_W;
|
||||
defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
|
||||
VEX_4V;
|
||||
defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS,
|
||||
VEX_4V, VEX_W;
|
||||
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
|
||||
VEX_4V;
|
||||
defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
|
||||
VEX_4V;
|
||||
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
|
||||
VEX_4V, VEX_W;
|
||||
}
|
||||
|
||||
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
|
||||
"cvttss2si\t{$src, $dst|$dst, $src}">, XS;
|
||||
defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
|
||||
"cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
|
||||
defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
|
||||
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
|
||||
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
|
||||
"cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
|
||||
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
|
||||
"cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
|
||||
defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
|
||||
"cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
|
||||
defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
|
||||
"cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
|
||||
defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
|
||||
"cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
|
||||
|
||||
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
|
||||
// and/or XMM operand(s).
|
||||
@ -593,10 +601,12 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
|
||||
string asm> {
|
||||
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
|
||||
[(set DstRC:$dst, (Int SrcRC:$src))]>;
|
||||
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
|
||||
[(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
|
||||
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
|
||||
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set DstRC:$dst, (Int SrcRC:$src))]>;
|
||||
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
|
||||
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
|
||||
}
|
||||
|
||||
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
|
||||
@ -611,21 +621,32 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
|
||||
|
||||
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
|
||||
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
|
||||
PatFrag ld_frag, string asm> {
|
||||
PatFrag ld_frag, string asm, bit Is2Addr = 1> {
|
||||
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
|
||||
asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
|
||||
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
|
||||
(ins DstRC:$src1, x86memop:$src2), asm,
|
||||
(ins DstRC:$src1, x86memop:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1 in {
|
||||
defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
|
||||
f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS,
|
||||
VEX;
|
||||
f32mem, load, "cvtss2si">, XS, VEX;
|
||||
defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
|
||||
int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
|
||||
XS, VEX, VEX_W;
|
||||
defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
|
||||
f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD,
|
||||
VEX;
|
||||
f128mem, load, "cvtsd2si">, XD, VEX;
|
||||
defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
|
||||
int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
|
||||
XD, VEX, VEX_W;
|
||||
|
||||
// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
|
||||
// Get rid of this hack or rename the intrinsics, there are several
|
||||
// intructions that only match with the intrinsic form, why create duplicates
|
||||
@ -636,18 +657,43 @@ let isAsmParserOnly = 1 in {
|
||||
"cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
|
||||
}
|
||||
defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
|
||||
f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS;
|
||||
f32mem, load, "cvtss2si">, XS;
|
||||
defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
|
||||
f32mem, load, "cvtss2si{q}">, XS, REX_W;
|
||||
defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
|
||||
f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD;
|
||||
f128mem, load, "cvtsd2si">, XD;
|
||||
defm Int_CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
|
||||
f128mem, load, "cvtsd2si">, XD, REX_W;
|
||||
|
||||
defm CVTSD2SI64 : sse12_cvt_s_np<0x2D, VR128, GR64, f64mem, "cvtsd2si{q}">, XD,
|
||||
REX_W;
|
||||
|
||||
let isAsmParserOnly = 1 in {
|
||||
defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
|
||||
int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
|
||||
defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
|
||||
int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
|
||||
VEX_W;
|
||||
defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
|
||||
int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
|
||||
defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
|
||||
int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
|
||||
VEX_4V, VEX_W;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
|
||||
int_x86_sse_cvtsi2ss, i32mem, loadi32,
|
||||
"cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XS;
|
||||
"cvtsi2ss">, XS;
|
||||
defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
|
||||
int_x86_sse_cvtsi642ss, i64mem, loadi64,
|
||||
"cvtsi2ss{q}">, XS, REX_W;
|
||||
defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
|
||||
int_x86_sse2_cvtsi2sd, i32mem, loadi32,
|
||||
"cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XD;
|
||||
"cvtsi2sd">, XD;
|
||||
defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
|
||||
int_x86_sse2_cvtsi642sd, i64mem, loadi64,
|
||||
"cvtsi2sd">, XD, REX_W;
|
||||
}
|
||||
|
||||
// Instructions below don't have an AVX form.
|
||||
@ -676,20 +722,28 @@ let Constraints = "$src1 = $dst" in {
|
||||
/// SSE 1 Only
|
||||
|
||||
// Aliases for intrinsics
|
||||
let isAsmParserOnly = 1, Pattern = []<dag> in {
|
||||
defm Int_VCVTTSS2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
|
||||
int_x86_sse_cvttss2si, f32mem, load,
|
||||
"cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS;
|
||||
defm Int_VCVTTSD2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
|
||||
int_x86_sse2_cvttsd2si, f128mem, load,
|
||||
"cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD;
|
||||
let isAsmParserOnly = 1 in {
|
||||
defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
|
||||
f32mem, load, "cvttss2si">, XS, VEX;
|
||||
defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
|
||||
int_x86_sse_cvttss2si64, f32mem, load,
|
||||
"cvttss2si">, XS, VEX, VEX_W;
|
||||
defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
|
||||
f128mem, load, "cvttss2si">, XD, VEX;
|
||||
defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
|
||||
int_x86_sse2_cvttsd2si64, f128mem, load,
|
||||
"cvttss2si">, XD, VEX, VEX_W;
|
||||
}
|
||||
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
|
||||
f32mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
|
||||
XS;
|
||||
f32mem, load, "cvttss2si">, XS;
|
||||
defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
|
||||
int_x86_sse_cvttss2si64, f32mem, load,
|
||||
"cvttss2si{q}">, XS, REX_W;
|
||||
defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
|
||||
f128mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
|
||||
XD;
|
||||
f128mem, load, "cvttss2si">, XD;
|
||||
defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
|
||||
int_x86_sse2_cvttsd2si64, f128mem, load,
|
||||
"cvttss2si{q}">, XD, REX_W;
|
||||
|
||||
let isAsmParserOnly = 1, Pattern = []<dag> in {
|
||||
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
|
||||
@ -707,6 +761,8 @@ defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load,
|
||||
let Pattern = []<dag> in {
|
||||
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
|
||||
"cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
|
||||
defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
|
||||
"cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
|
||||
defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/,
|
||||
"cvtdq2ps\t{$src, $dst|$dst, $src}",
|
||||
SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
|
||||
@ -735,13 +791,11 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
|
||||
|
||||
let isAsmParserOnly = 1 in
|
||||
defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
|
||||
int_x86_sse2_cvtsd2ss, f64mem, load,
|
||||
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
|
||||
XS, VEX_4V;
|
||||
int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
|
||||
XS, VEX_4V;
|
||||
let Constraints = "$src1 = $dst" in
|
||||
defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
|
||||
int_x86_sse2_cvtsd2ss, f64mem, load,
|
||||
"cvtsd2ss\t{$src2, $dst|$dst, $src2}">, XS;
|
||||
int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
|
||||
|
||||
// Convert scalar single to scalar double
|
||||
let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
|
||||
@ -998,11 +1052,11 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
||||
|
||||
let isAsmParserOnly = 1 in {
|
||||
def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvtps2pd\t{$src, $dst|$dst, $src}",
|
||||
"vcvtps2pd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
|
||||
VEX, Requires<[HasAVX]>;
|
||||
def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
||||
"cvtps2pd\t{$src, $dst|$dst, $src}",
|
||||
"vcvtps2pd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse2_cvtps2pd
|
||||
(load addr:$src)))]>,
|
||||
VEX, Requires<[HasAVX]>;
|
||||
@ -1618,7 +1672,7 @@ multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
|
||||
}
|
||||
|
||||
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
|
||||
bit Is2Addr = 1> {
|
||||
bit Is2Addr = 1> {
|
||||
defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
|
||||
!strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS;
|
||||
defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
|
||||
@ -1626,7 +1680,7 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
|
||||
}
|
||||
|
||||
multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
|
||||
bit Is2Addr = 1> {
|
||||
bit Is2Addr = 1> {
|
||||
defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
|
||||
!strconcat(OpcodeStr, "ps"), "", "_ps", f128mem, memopv4f32,
|
||||
SSEPackedSingle, Is2Addr>, TB;
|
||||
@ -1639,24 +1693,32 @@ multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
|
||||
// Binary Arithmetic instructions
|
||||
let isAsmParserOnly = 1 in {
|
||||
defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
|
||||
basic_sse12_fp_binop_s_int<0x58, "add", 0>,
|
||||
basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
|
||||
basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
|
||||
defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
|
||||
basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
|
||||
basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
|
||||
basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
|
||||
|
||||
let isCommutable = 0 in {
|
||||
defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
|
||||
basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
|
||||
basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
|
||||
basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
|
||||
defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
|
||||
basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
|
||||
basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
|
||||
basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
|
||||
defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
|
||||
basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
|
||||
basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
|
||||
basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
|
||||
basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, VEX_4V;
|
||||
defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
|
||||
basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
|
||||
basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
|
||||
basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
|
||||
basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
|
||||
}
|
||||
}
|
||||
@ -1721,20 +1783,20 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
|
||||
multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, Intrinsic F32Int> {
|
||||
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
|
||||
!strconcat(!strconcat("v", OpcodeStr),
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
|
||||
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
|
||||
!strconcat(!strconcat("v", OpcodeStr),
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, XS, Requires<[HasAVX, OptForSize]>;
|
||||
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(!strconcat("v", OpcodeStr),
|
||||
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
|
||||
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, ssmem:$src2),
|
||||
!strconcat(!strconcat("v", OpcodeStr),
|
||||
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
|
||||
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
|
||||
[(set VR128:$dst, (F32Int VR128:$src))]>;
|
||||
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
|
||||
[(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
|
||||
}
|
||||
|
||||
/// sse1_fp_unop_p - SSE1 unops in packed form.
|
||||
@ -1791,21 +1853,19 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
|
||||
/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
|
||||
multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, Intrinsic F64Int> {
|
||||
def SDr : VSDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
|
||||
def SDm : VSDI<opc, MRMSrcMem, (outs FR64:$dst),
|
||||
(ins FR64:$src1, f64mem:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
|
||||
def SDr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>;
|
||||
def SDm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, sdmem:$src2),
|
||||
!strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>;
|
||||
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
|
||||
def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
|
||||
(ins FR64:$src1, f64mem:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
|
||||
def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
|
||||
[(set VR128:$dst, (F64Int VR128:$src))]>;
|
||||
def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
|
||||
!strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
|
||||
[(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
|
||||
}
|
||||
|
||||
/// sse2_fp_unop_p - SSE2 unops in vector forms.
|
||||
@ -1842,27 +1902,31 @@ multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
|
||||
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||
// Square root.
|
||||
defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
|
||||
sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
|
||||
defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
|
||||
sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>,
|
||||
VEX_4V;
|
||||
|
||||
defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
|
||||
sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
|
||||
sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
|
||||
sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
|
||||
sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>,
|
||||
sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>,
|
||||
VEX;
|
||||
|
||||
// Reciprocal approximations. Note that these typically require refinement
|
||||
// in order to obtain suitable precision.
|
||||
defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt,
|
||||
defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt", X86frsqrt,
|
||||
int_x86_sse_rsqrt_ss>, VEX_4V;
|
||||
defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
|
||||
sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, VEX;
|
||||
sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
|
||||
sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
|
||||
|
||||
defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
|
||||
defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>,
|
||||
VEX_4V;
|
||||
defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
|
||||
sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, VEX;
|
||||
sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
|
||||
sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
|
||||
}
|
||||
|
||||
// Square root.
|
||||
@ -2442,6 +2506,25 @@ let ExeDomain = SSEPackedInt in {
|
||||
}
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
|
||||
(v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
|
||||
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
|
||||
(v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
|
||||
def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
|
||||
(v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>;
|
||||
def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
|
||||
(v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>;
|
||||
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
|
||||
(v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
|
||||
|
||||
// Shift up / down and insert zero's.
|
||||
def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
|
||||
(v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
|
||||
def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
|
||||
(v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSSE2] in {
|
||||
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
|
||||
(v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
|
||||
@ -4257,7 +4340,11 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in
|
||||
defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
|
||||
|
||||
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
|
||||
(INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
|
||||
(VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
|
||||
Requires<[HasAVX]>;
|
||||
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
|
||||
(INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
|
||||
Requires<[HasSSE41]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE4.1 - Round Instructions
|
||||
@ -4682,27 +4769,39 @@ let Constraints = "$src1 = $dst" in {
|
||||
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||
multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
|
||||
RegisterClass RC, X86MemOperand x86memop> {
|
||||
RegisterClass RC, X86MemOperand x86memop,
|
||||
PatFrag mem_frag, Intrinsic IntId> {
|
||||
def rr : I<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
|
||||
[(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
|
||||
SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
|
||||
|
||||
def rm : I<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
|
||||
[(set RC:$dst,
|
||||
(IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
|
||||
RC:$src3))],
|
||||
SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
|
||||
}
|
||||
}
|
||||
|
||||
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem>;
|
||||
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem>;
|
||||
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem>;
|
||||
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem>;
|
||||
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
|
||||
memopv16i8, int_x86_sse41_blendvpd>;
|
||||
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
|
||||
memopv16i8, int_x86_sse41_blendvps>;
|
||||
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
|
||||
memopv16i8, int_x86_sse41_pblendvb>;
|
||||
|
||||
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem>;
|
||||
let Pattern = []<dag> in { // FIXME: implement 256 intrinsics here.
|
||||
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
|
||||
memopv32i8, int_x86_sse41_blendvpd>;
|
||||
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
|
||||
memopv32i8, int_x86_sse41_blendvps>;
|
||||
}
|
||||
|
||||
/// SS41I_ternary_int - SSE 4.1 ternary operator
|
||||
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
|
||||
@ -4778,17 +4877,20 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Packed Compare Implicit Length Strings, Return Mask
|
||||
let Defs = [EFLAGS], usesCustomInserter = 1 in {
|
||||
def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
||||
"#PCMPISTRM128rr PSEUDO!",
|
||||
multiclass pseudo_pcmpistrm<string asm> {
|
||||
def REG : Ii8<0, Pseudo, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "rr PSEUDO"),
|
||||
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
|
||||
imm:$src3))]>, OpSize;
|
||||
def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
|
||||
"#PCMPISTRM128rm PSEUDO!",
|
||||
imm:$src3))]>;
|
||||
def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "rm PSEUDO"),
|
||||
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128
|
||||
VR128:$src1, (load addr:$src2), imm:$src3))]>, OpSize;
|
||||
VR128:$src1, (load addr:$src2), imm:$src3))]>;
|
||||
}
|
||||
|
||||
let Defs = [EFLAGS], usesCustomInserter = 1 in {
|
||||
defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>;
|
||||
defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
|
||||
}
|
||||
|
||||
let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1,
|
||||
@ -4811,20 +4913,20 @@ let Defs = [XMM0, EFLAGS] in {
|
||||
}
|
||||
|
||||
// Packed Compare Explicit Length Strings, Return Mask
|
||||
let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
|
||||
def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
|
||||
"#PCMPESTRM128rr PSEUDO!",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse42_pcmpestrm128
|
||||
VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize;
|
||||
|
||||
def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
|
||||
"#PCMPESTRM128rm PSEUDO!",
|
||||
multiclass pseudo_pcmpestrm<string asm> {
|
||||
def REG : Ii8<0, Pseudo, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "rr PSEUDO"),
|
||||
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128
|
||||
VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
|
||||
OpSize;
|
||||
VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
|
||||
def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "rm PSEUDO"),
|
||||
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128
|
||||
VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
|
||||
}
|
||||
|
||||
let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
|
||||
defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>;
|
||||
defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX],
|
||||
|
Loading…
Reference in New Issue
Block a user