mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-08 03:30:22 +00:00
Add more 256-bit forms for a bunch of regular AVX instructions
Add 64-bit (GR64) versions of some instructions (which are not described in their SSE forms, but are described in AVX) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109063 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b937549e51
commit
2b69143083
@ -262,6 +262,9 @@ def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
|
||||
def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
|
||||
def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
|
||||
|
||||
// FIXME: move this to a more appropriate place after all AVX is done.
|
||||
def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
|
||||
|
||||
def vzmovl_v2i64 : PatFrag<(ops node:$src),
|
||||
(bitconvert (v2i64 (X86vzmovl
|
||||
(v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
|
||||
|
@ -1266,24 +1266,30 @@ def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
|
||||
multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
|
||||
ValueType vt, string asm, PatFrag mem_frag,
|
||||
Domain d, bit IsConvertibleToThreeAddress = 0> {
|
||||
def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm,
|
||||
[(set VR128:$dst, (vt (shufp:$src3
|
||||
VR128:$src1, (mem_frag addr:$src2))))], d>;
|
||||
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, f128mem:$src2, i8imm:$src3), asm,
|
||||
[(set RC:$dst, (vt (shufp:$src3
|
||||
RC:$src1, (mem_frag addr:$src2))))], d>;
|
||||
let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
|
||||
def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, i8imm:$src3), asm,
|
||||
[(set VR128:$dst,
|
||||
(vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>;
|
||||
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, i8imm:$src3), asm,
|
||||
[(set RC:$dst,
|
||||
(vt (shufp:$src3 RC:$src1, RC:$src2)))], d>;
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1 in {
|
||||
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
|
||||
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
memopv4f32, SSEPackedSingle>, VEX_4V;
|
||||
defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
|
||||
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
|
||||
memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
|
||||
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
|
||||
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
memopv4f32, SSEPackedSingle>, VEX_4V;
|
||||
defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
|
||||
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
memopv8f32, SSEPackedSingle>, VEX_4V;
|
||||
defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
|
||||
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
|
||||
memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
|
||||
defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
|
||||
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
|
||||
memopv4f64, SSEPackedDouble>, OpSize, VEX_4V;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
@ -2037,35 +2043,47 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
|
||||
//===---------------------------------------------------------------------===//
|
||||
// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
let ExeDomain = SSEPackedInt in { // SSE integer instructions
|
||||
|
||||
let isAsmParserOnly = 1 in {
|
||||
let neverHasSideEffects = 1 in
|
||||
def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
|
||||
def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
|
||||
let neverHasSideEffects = 1 in {
|
||||
def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
|
||||
def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
|
||||
"movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
|
||||
}
|
||||
def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
|
||||
def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
|
||||
"movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
|
||||
|
||||
let canFoldAsLoad = 1, mayLoad = 1 in {
|
||||
def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"movdqa\t{$src, $dst|$dst, $src}",
|
||||
[/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>,
|
||||
VEX;
|
||||
def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"vmovdqu\t{$src, $dst|$dst, $src}",
|
||||
[/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
|
||||
XS, VEX, Requires<[HasAVX]>;
|
||||
def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
|
||||
def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
|
||||
"movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
|
||||
let Predicates = [HasAVX] in {
|
||||
def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
|
||||
def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
|
||||
"vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
|
||||
}
|
||||
}
|
||||
|
||||
let mayStore = 1 in {
|
||||
def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
|
||||
(ins i128mem:$dst, VR128:$src),
|
||||
"movdqa\t{$src, $dst|$dst, $src}",
|
||||
[/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>, VEX;
|
||||
def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
|
||||
"vmovdqu\t{$src, $dst|$dst, $src}",
|
||||
[/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
|
||||
XS, VEX, Requires<[HasAVX]>;
|
||||
def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
|
||||
(ins i128mem:$dst, VR128:$src),
|
||||
"movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
|
||||
def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
|
||||
(ins i256mem:$dst, VR256:$src),
|
||||
"movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
|
||||
let Predicates = [HasAVX] in {
|
||||
def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
|
||||
"vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
|
||||
def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
|
||||
"vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -2973,11 +2991,13 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
|
||||
|
||||
// Instructions to match in the assembler
|
||||
let isAsmParserOnly = 1 in {
|
||||
// This instructions is in fact an alias to movd with 64 bit dst
|
||||
def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
|
||||
def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
|
||||
// Recognize "movd" with GR64 destination, but encode as a "movq"
|
||||
def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
|
||||
"movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
|
||||
}
|
||||
|
||||
// Instructions for the disassembler
|
||||
@ -3091,9 +3111,20 @@ def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
(memopv4f32 addr:$src), (undef)))]>;
|
||||
}
|
||||
|
||||
multiclass sse3_replicate_sfp_y<bits<8> op, PatFrag rep_frag,
|
||||
string OpcodeStr> {
|
||||
def rr : S3SI<op, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
|
||||
def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||
defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
|
||||
defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
|
||||
// FIXME: Merge above classes when we have patterns for the ymm version
|
||||
defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
|
||||
defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
|
||||
defm VMOVSHDUPY : sse3_replicate_sfp_y<0x16, movshdup, "vmovshdup">, VEX;
|
||||
defm VMOVSLDUPY : sse3_replicate_sfp_y<0x12, movsldup, "vmovsldup">, VEX;
|
||||
}
|
||||
defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">;
|
||||
defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">;
|
||||
@ -3110,15 +3141,30 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
||||
(undef))))]>;
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX] in
|
||||
defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
|
||||
multiclass sse3_replicate_dfp_y<string OpcodeStr> {
|
||||
def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[]>;
|
||||
def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[]>;
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||
// FIXME: Merge above classes when we have patterns for the ymm version
|
||||
defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
|
||||
defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
|
||||
}
|
||||
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
|
||||
|
||||
// Move Unaligned Integer
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX] in
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"vlddqu\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
|
||||
def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
|
||||
"vlddqu\t{$src, $dst|$dst, $src}", []>, VEX;
|
||||
}
|
||||
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"lddqu\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
|
||||
@ -4061,8 +4107,13 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
|
||||
addr:$dst)]>, OpSize;
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX] in
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||
defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
|
||||
def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
|
||||
(ins VR128:$src1, i32i8imm:$src2),
|
||||
"vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[]>, OpSize, VEX;
|
||||
}
|
||||
defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
|
||||
|
||||
// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
|
||||
@ -4232,33 +4283,33 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
multiclass sse41_fp_unop_rm_avx<bits<8> opcps, bits<8> opcpd,
|
||||
string OpcodeStr> {
|
||||
multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd,
|
||||
RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
|
||||
// Intrinsic operation, reg.
|
||||
// Vector intrinsic operation, reg
|
||||
def PSr : SS4AIi8<opcps, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
|
||||
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, OpSize;
|
||||
|
||||
// Vector intrinsic operation, mem
|
||||
def PSm : Ii8<opcps, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
|
||||
(outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, TA, OpSize, Requires<[HasSSE41]>;
|
||||
|
||||
// Vector intrinsic operation, reg
|
||||
def PDr : SS4AIi8<opcpd, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
|
||||
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, OpSize;
|
||||
|
||||
// Vector intrinsic operation, mem
|
||||
def PDm : SS4AIi8<opcpd, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
|
||||
(outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, OpSize;
|
||||
@ -4315,8 +4366,8 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd,
|
||||
string OpcodeStr> {
|
||||
multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
|
||||
string OpcodeStr> {
|
||||
// Intrinsic operation, reg.
|
||||
def SSr : SS4AIi8<opcss, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||
@ -4356,8 +4407,11 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||
int_x86_sse41_round_ss, int_x86_sse41_round_sd,
|
||||
0>, VEX_4V;
|
||||
// Instructions for the assembler
|
||||
defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX;
|
||||
defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V;
|
||||
defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
|
||||
VEX;
|
||||
defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">,
|
||||
VEX;
|
||||
defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V;
|
||||
}
|
||||
|
||||
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
|
||||
@ -4366,6 +4420,57 @@ let Constraints = "$src1 = $dst" in
|
||||
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
|
||||
int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE4.1 - Packed Bit Test
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// ptest instruction we'll lower to this in X86ISelLowering primarily from
|
||||
// the intel intrinsic that corresponds to this.
|
||||
let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
|
||||
OpSize, VEX;
|
||||
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX;
|
||||
|
||||
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
|
||||
OpSize, VEX;
|
||||
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX;
|
||||
}
|
||||
|
||||
let Defs = [EFLAGS] in {
|
||||
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
|
||||
"ptest \t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
|
||||
OpSize;
|
||||
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
|
||||
"ptest \t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
// The bit test instructions below are AVX only
|
||||
multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
X86MemOperand x86memop> {
|
||||
def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[]>, OpSize, VEX;
|
||||
def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[]>, OpSize, VEX;
|
||||
}
|
||||
|
||||
let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem>;
|
||||
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem>;
|
||||
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem>;
|
||||
defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE4.1 - Misc Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -4602,30 +4707,6 @@ defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
|
||||
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
|
||||
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
|
||||
|
||||
// ptest instruction we'll lower to this in X86ISelLowering primarily from
|
||||
// the intel intrinsic that corresponds to this.
|
||||
let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
|
||||
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
|
||||
OpSize, VEX;
|
||||
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
|
||||
"vptest\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
|
||||
OpSize, VEX;
|
||||
}
|
||||
|
||||
let Defs = [EFLAGS] in {
|
||||
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
|
||||
"ptest \t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
|
||||
OpSize;
|
||||
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
|
||||
"ptest \t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1, Predicates = [HasAVX] in
|
||||
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"vmovntdqa\t{$src, $dst|$dst, $src}",
|
||||
|
@ -13158,3 +13158,127 @@
|
||||
// CHECK: encoding: [0xc5,0xfb,0x2a,0x3c,0x24]
|
||||
vcvtsi2sdl (%esp), %xmm0, %xmm7
|
||||
|
||||
// CHECK: vlddqu (%eax), %ymm2
|
||||
// CHECK: encoding: [0xc5,0xff,0xf0,0x10]
|
||||
vlddqu (%eax), %ymm2
|
||||
|
||||
// CHECK: vmovddup %ymm2, %ymm5
|
||||
// CHECK: encoding: [0xc5,0xff,0x12,0xea]
|
||||
vmovddup %ymm2, %ymm5
|
||||
|
||||
// CHECK: vmovddup (%eax), %ymm2
|
||||
// CHECK: encoding: [0xc5,0xff,0x12,0x10]
|
||||
vmovddup (%eax), %ymm2
|
||||
|
||||
// CHECK: vmovdqa %ymm2, %ymm5
|
||||
// CHECK: encoding: [0xc5,0xfd,0x6f,0xea]
|
||||
vmovdqa %ymm2, %ymm5
|
||||
|
||||
// CHECK: vmovdqa %ymm2, (%eax)
|
||||
// CHECK: encoding: [0xc5,0xfd,0x7f,0x10]
|
||||
vmovdqa %ymm2, (%eax)
|
||||
|
||||
// CHECK: vmovdqa (%eax), %ymm2
|
||||
// CHECK: encoding: [0xc5,0xfd,0x6f,0x10]
|
||||
vmovdqa (%eax), %ymm2
|
||||
|
||||
// CHECK: vmovdqu %ymm2, %ymm5
|
||||
// CHECK: encoding: [0xc5,0xfe,0x6f,0xea]
|
||||
vmovdqu %ymm2, %ymm5
|
||||
|
||||
// CHECK: vmovdqu %ymm2, (%eax)
|
||||
// CHECK: encoding: [0xc5,0xfe,0x7f,0x10]
|
||||
vmovdqu %ymm2, (%eax)
|
||||
|
||||
// CHECK: vmovdqu (%eax), %ymm2
|
||||
// CHECK: encoding: [0xc5,0xfe,0x6f,0x10]
|
||||
vmovdqu (%eax), %ymm2
|
||||
|
||||
// CHECK: vmovshdup %ymm2, %ymm5
|
||||
// CHECK: encoding: [0xc5,0xfe,0x16,0xea]
|
||||
vmovshdup %ymm2, %ymm5
|
||||
|
||||
// CHECK: vmovshdup (%eax), %ymm2
|
||||
// CHECK: encoding: [0xc5,0xfe,0x16,0x10]
|
||||
vmovshdup (%eax), %ymm2
|
||||
|
||||
// CHECK: vmovsldup %ymm2, %ymm5
|
||||
// CHECK: encoding: [0xc5,0xfe,0x12,0xea]
|
||||
vmovsldup %ymm2, %ymm5
|
||||
|
||||
// CHECK: vmovsldup (%eax), %ymm2
|
||||
// CHECK: encoding: [0xc5,0xfe,0x12,0x10]
|
||||
vmovsldup (%eax), %ymm2
|
||||
|
||||
// CHECK: vptest %ymm2, %ymm5
|
||||
// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0xea]
|
||||
vptest %ymm2, %ymm5
|
||||
|
||||
// CHECK: vptest (%eax), %ymm2
|
||||
// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0x10]
|
||||
vptest (%eax), %ymm2
|
||||
|
||||
// CHECK: vroundpd $7, %ymm5, %ymm1
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0xcd,0x07]
|
||||
vroundpd $7, %ymm5, %ymm1
|
||||
|
||||
// CHECK: vroundpd $7, (%eax), %ymm5
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0x28,0x07]
|
||||
vroundpd $7, (%eax), %ymm5
|
||||
|
||||
// CHECK: vroundps $7, %ymm5, %ymm1
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0xcd,0x07]
|
||||
vroundps $7, %ymm5, %ymm1
|
||||
|
||||
// CHECK: vroundps $7, (%eax), %ymm5
|
||||
// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0x28,0x07]
|
||||
vroundps $7, (%eax), %ymm5
|
||||
|
||||
// CHECK: vshufpd $7, %ymm2, %ymm5, %ymm1
|
||||
// CHECK: encoding: [0xc5,0xd5,0xc6,0xca,0x07]
|
||||
vshufpd $7, %ymm2, %ymm5, %ymm1
|
||||
|
||||
// CHECK: vshufpd $7, (%eax), %ymm5, %ymm1
|
||||
// CHECK: encoding: [0xc5,0xd5,0xc6,0x08,0x07]
|
||||
vshufpd $7, (%eax), %ymm5, %ymm1
|
||||
|
||||
// CHECK: vshufps $7, %ymm2, %ymm5, %ymm1
|
||||
// CHECK: encoding: [0xc5,0xd4,0xc6,0xca,0x07]
|
||||
vshufps $7, %ymm2, %ymm5, %ymm1
|
||||
|
||||
// CHECK: vshufps $7, (%eax), %ymm5, %ymm1
|
||||
// CHECK: encoding: [0xc5,0xd4,0xc6,0x08,0x07]
|
||||
vshufps $7, (%eax), %ymm5, %ymm1
|
||||
|
||||
// CHECK: vtestpd %xmm2, %xmm5
|
||||
// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0xea]
|
||||
vtestpd %xmm2, %xmm5
|
||||
|
||||
// CHECK: vtestpd %ymm2, %ymm5
|
||||
// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0xea]
|
||||
vtestpd %ymm2, %ymm5
|
||||
|
||||
// CHECK: vtestpd (%eax), %xmm2
|
||||
// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0x10]
|
||||
vtestpd (%eax), %xmm2
|
||||
|
||||
// CHECK: vtestpd (%eax), %ymm2
|
||||
// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0x10]
|
||||
vtestpd (%eax), %ymm2
|
||||
|
||||
// CHECK: vtestps %xmm2, %xmm5
|
||||
// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0xea]
|
||||
vtestps %xmm2, %xmm5
|
||||
|
||||
// CHECK: vtestps %ymm2, %ymm5
|
||||
// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0xea]
|
||||
vtestps %ymm2, %ymm5
|
||||
|
||||
// CHECK: vtestps (%eax), %xmm2
|
||||
// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0x10]
|
||||
vtestps (%eax), %xmm2
|
||||
|
||||
// CHECK: vtestps (%eax), %ymm2
|
||||
// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0x10]
|
||||
vtestps (%eax), %ymm2
|
||||
|
||||
|
@ -3264,3 +3264,139 @@ pshufb CPI1_0(%rip), %xmm1
|
||||
// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0x09]
|
||||
vcvttss2si (%rcx), %rcx
|
||||
|
||||
// CHECK: vlddqu (%rax), %ymm12
|
||||
// CHECK: encoding: [0xc5,0x7f,0xf0,0x20]
|
||||
vlddqu (%rax), %ymm12
|
||||
|
||||
// CHECK: vmovddup %ymm12, %ymm10
|
||||
// CHECK: encoding: [0xc4,0x41,0x7f,0x12,0xd4]
|
||||
vmovddup %ymm12, %ymm10
|
||||
|
||||
// CHECK: vmovddup (%rax), %ymm12
|
||||
// CHECK: encoding: [0xc5,0x7f,0x12,0x20]
|
||||
vmovddup (%rax), %ymm12
|
||||
|
||||
// CHECK: vmovdqa %ymm12, %ymm10
|
||||
// CHECK: encoding: [0xc4,0x41,0x7d,0x6f,0xd4]
|
||||
vmovdqa %ymm12, %ymm10
|
||||
|
||||
// CHECK: vmovdqa %ymm12, (%rax)
|
||||
// CHECK: encoding: [0xc5,0x7d,0x7f,0x20]
|
||||
vmovdqa %ymm12, (%rax)
|
||||
|
||||
// CHECK: vmovdqa (%rax), %ymm12
|
||||
// CHECK: encoding: [0xc5,0x7d,0x6f,0x20]
|
||||
vmovdqa (%rax), %ymm12
|
||||
|
||||
// CHECK: vmovdqu %ymm12, %ymm10
|
||||
// CHECK: encoding: [0xc4,0x41,0x7e,0x6f,0xd4]
|
||||
vmovdqu %ymm12, %ymm10
|
||||
|
||||
// CHECK: vmovdqu %ymm12, (%rax)
|
||||
// CHECK: encoding: [0xc5,0x7e,0x7f,0x20]
|
||||
vmovdqu %ymm12, (%rax)
|
||||
|
||||
// CHECK: vmovdqu (%rax), %ymm12
|
||||
// CHECK: encoding: [0xc5,0x7e,0x6f,0x20]
|
||||
vmovdqu (%rax), %ymm12
|
||||
|
||||
// CHECK: vmovshdup %ymm12, %ymm10
|
||||
// CHECK: encoding: [0xc4,0x41,0x7e,0x16,0xd4]
|
||||
vmovshdup %ymm12, %ymm10
|
||||
|
||||
// CHECK: vmovshdup (%rax), %ymm12
|
||||
// CHECK: encoding: [0xc5,0x7e,0x16,0x20]
|
||||
vmovshdup (%rax), %ymm12
|
||||
|
||||
// CHECK: vmovsldup %ymm12, %ymm10
|
||||
// CHECK: encoding: [0xc4,0x41,0x7e,0x12,0xd4]
|
||||
vmovsldup %ymm12, %ymm10
|
||||
|
||||
// CHECK: vmovsldup (%rax), %ymm12
|
||||
// CHECK: encoding: [0xc5,0x7e,0x12,0x20]
|
||||
vmovsldup (%rax), %ymm12
|
||||
|
||||
// CHECK: vptest %ymm12, %ymm10
|
||||
// CHECK: encoding: [0xc4,0x42,0x7d,0x17,0xd4]
|
||||
vptest %ymm12, %ymm10
|
||||
|
||||
// CHECK: vptest (%rax), %ymm12
|
||||
// CHECK: encoding: [0xc4,0x62,0x7d,0x17,0x20]
|
||||
vptest (%rax), %ymm12
|
||||
|
||||
// CHECK: vroundpd $7, %ymm10, %ymm11
|
||||
// CHECK: encoding: [0xc4,0x43,0x7d,0x09,0xda,0x07]
|
||||
vroundpd $7, %ymm10, %ymm11
|
||||
|
||||
// CHECK: vroundpd $7, (%rax), %ymm10
|
||||
// CHECK: encoding: [0xc4,0x63,0x7d,0x09,0x10,0x07]
|
||||
vroundpd $7, (%rax), %ymm10
|
||||
|
||||
// CHECK: vroundps $7, %ymm10, %ymm11
|
||||
// CHECK: encoding: [0xc4,0x43,0x7d,0x08,0xda,0x07]
|
||||
vroundps $7, %ymm10, %ymm11
|
||||
|
||||
// CHECK: vroundps $7, (%rax), %ymm10
|
||||
// CHECK: encoding: [0xc4,0x63,0x7d,0x08,0x10,0x07]
|
||||
vroundps $7, (%rax), %ymm10
|
||||
|
||||
// CHECK: vshufpd $7, %ymm12, %ymm10, %ymm11
|
||||
// CHECK: encoding: [0xc4,0x41,0x2d,0xc6,0xdc,0x07]
|
||||
vshufpd $7, %ymm12, %ymm10, %ymm11
|
||||
|
||||
// CHECK: vshufpd $7, (%rax), %ymm10, %ymm11
|
||||
// CHECK: encoding: [0xc5,0x2d,0xc6,0x18,0x07]
|
||||
vshufpd $7, (%rax), %ymm10, %ymm11
|
||||
|
||||
// CHECK: vshufps $7, %ymm12, %ymm10, %ymm11
|
||||
// CHECK: encoding: [0xc4,0x41,0x2c,0xc6,0xdc,0x07]
|
||||
vshufps $7, %ymm12, %ymm10, %ymm11
|
||||
|
||||
// CHECK: vshufps $7, (%rax), %ymm10, %ymm11
|
||||
// CHECK: encoding: [0xc5,0x2c,0xc6,0x18,0x07]
|
||||
vshufps $7, (%rax), %ymm10, %ymm11
|
||||
|
||||
// CHECK: vtestpd %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc4,0x42,0x79,0x0f,0xd4]
|
||||
vtestpd %xmm12, %xmm10
|
||||
|
||||
// CHECK: vtestpd %ymm12, %ymm10
|
||||
// CHECK: encoding: [0xc4,0x42,0x7d,0x0f,0xd4]
|
||||
vtestpd %ymm12, %ymm10
|
||||
|
||||
// CHECK: vtestpd (%rax), %xmm12
|
||||
// CHECK: encoding: [0xc4,0x62,0x79,0x0f,0x20]
|
||||
vtestpd (%rax), %xmm12
|
||||
|
||||
// CHECK: vtestpd (%rax), %ymm12
|
||||
// CHECK: encoding: [0xc4,0x62,0x7d,0x0f,0x20]
|
||||
vtestpd (%rax), %ymm12
|
||||
|
||||
// CHECK: vtestps %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc4,0x42,0x79,0x0e,0xd4]
|
||||
vtestps %xmm12, %xmm10
|
||||
|
||||
// CHECK: vtestps %ymm12, %ymm10
|
||||
// CHECK: encoding: [0xc4,0x42,0x7d,0x0e,0xd4]
|
||||
vtestps %ymm12, %ymm10
|
||||
|
||||
// CHECK: vtestps (%rax), %xmm12
|
||||
// CHECK: encoding: [0xc4,0x62,0x79,0x0e,0x20]
|
||||
vtestps (%rax), %xmm12
|
||||
|
||||
// CHECK: vtestps (%rax), %ymm12
|
||||
// CHECK: encoding: [0xc4,0x62,0x7d,0x0e,0x20]
|
||||
vtestps (%rax), %ymm12
|
||||
|
||||
// CHECK: vextractps $10, %xmm8, %r8
|
||||
// CHECK: encoding: [0xc4,0x43,0x79,0x17,0xc0,0x0a]
|
||||
vextractps $10, %xmm8, %r8
|
||||
|
||||
// CHECK: vextractps $7, %xmm4, %rcx
|
||||
// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xe1,0x07]
|
||||
vextractps $7, %xmm4, %rcx
|
||||
|
||||
// CHECK: vmovd %xmm4, %rcx
|
||||
// CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xe1]
|
||||
vmovd %xmm4, %rcx
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user