mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
AVX-512: masked load/store + intrinsics for them.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203790 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ab35f3fd0c
commit
3d1ae71813
@ -1321,6 +1321,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">,
|
def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">,
|
||||||
Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty],
|
Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty],
|
||||||
[IntrReadArgMem]>;
|
[IntrReadArgMem]>;
|
||||||
|
def int_x86_avx512_mask_loadu_ps_512 : GCCBuiltin<"__builtin_ia32_loadups512_mask">,
|
||||||
|
Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
|
||||||
|
[IntrReadArgMem]>;
|
||||||
|
def int_x86_avx512_mask_loadu_pd_512 : GCCBuiltin<"__builtin_ia32_loadupd512_mask">,
|
||||||
|
Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
|
||||||
|
[IntrReadArgMem]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Conditional store ops
|
// Conditional store ops
|
||||||
@ -1339,6 +1345,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
GCCBuiltin<"__builtin_ia32_maskstoreps256">,
|
GCCBuiltin<"__builtin_ia32_maskstoreps256">,
|
||||||
Intrinsic<[], [llvm_ptr_ty,
|
Intrinsic<[], [llvm_ptr_ty,
|
||||||
llvm_v8f32_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>;
|
llvm_v8f32_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>;
|
||||||
|
def int_x86_avx512_mask_storeu_ps_512 :
|
||||||
|
GCCBuiltin<"__builtin_ia32_storeups512_mask">,
|
||||||
|
Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
|
||||||
|
[IntrReadWriteArgMem]>;
|
||||||
|
def int_x86_avx512_mask_storeu_pd_512 :
|
||||||
|
GCCBuiltin<"__builtin_ia32_storeupd512_mask">,
|
||||||
|
Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
|
||||||
|
[IntrReadWriteArgMem]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -1753,6 +1767,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
def int_x86_avx2_maskload_q_256 : GCCBuiltin<"__builtin_ia32_maskloadq256">,
|
def int_x86_avx2_maskload_q_256 : GCCBuiltin<"__builtin_ia32_maskloadq256">,
|
||||||
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty],
|
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty],
|
||||||
[IntrReadArgMem]>;
|
[IntrReadArgMem]>;
|
||||||
|
def int_x86_avx512_mask_loadu_d_512 : GCCBuiltin<"__builtin_ia32_loaddqusi512_mask">,
|
||||||
|
Intrinsic<[llvm_v16i32_ty], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
|
||||||
|
[IntrReadArgMem]>;
|
||||||
|
def int_x86_avx512_mask_loadu_q_512 : GCCBuiltin<"__builtin_ia32_loaddqudi512_mask">,
|
||||||
|
Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||||
|
[IntrReadArgMem]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Conditional store ops
|
// Conditional store ops
|
||||||
@ -1771,6 +1791,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
GCCBuiltin<"__builtin_ia32_maskstoreq256">,
|
GCCBuiltin<"__builtin_ia32_maskstoreq256">,
|
||||||
Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty],
|
Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty],
|
||||||
[IntrReadWriteArgMem]>;
|
[IntrReadWriteArgMem]>;
|
||||||
|
def int_x86_avx512_mask_storeu_d_512 :
|
||||||
|
GCCBuiltin<"__builtin_ia32_storedqusi512_mask">,
|
||||||
|
Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
|
||||||
|
[IntrReadWriteArgMem]>;
|
||||||
|
def int_x86_avx512_mask_storeu_q_512 :
|
||||||
|
GCCBuiltin<"__builtin_ia32_storedqudi512_mask">,
|
||||||
|
Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||||
|
[IntrReadWriteArgMem]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Variable bit shift ops
|
// Variable bit shift ops
|
||||||
|
@ -1222,152 +1222,139 @@ def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
|
|||||||
// AVX-512 - Aligned and unaligned load and store
|
// AVX-512 - Aligned and unaligned load and store
|
||||||
//
|
//
|
||||||
|
|
||||||
multiclass avx512_mov_packed<bits<8> opc, RegisterClass RC, RegisterClass KRC,
|
multiclass avx512_load<bits<8> opc, RegisterClass RC, RegisterClass KRC,
|
||||||
X86MemOperand x86memop, PatFrag ld_frag,
|
X86MemOperand x86memop, PatFrag ld_frag,
|
||||||
string asm, Domain d, bit IsReMaterializable = 1> {
|
string asm, Domain d,
|
||||||
let hasSideEffects = 0 in
|
ValueType vt, bit IsReMaterializable = 1> {
|
||||||
|
let hasSideEffects = 0 in {
|
||||||
def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
|
def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
|
||||||
!strconcat(asm, " \t{$src, $dst|$dst, $src}"), [], d>,
|
!strconcat(asm, " \t{$src, $dst|$dst, $src}"), [], d>,
|
||||||
EVEX;
|
EVEX;
|
||||||
let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
|
def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
|
||||||
|
!strconcat(asm,
|
||||||
|
" \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
|
||||||
|
[], d>, EVEX, EVEX_KZ;
|
||||||
|
}
|
||||||
|
let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
|
||||||
def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
||||||
!strconcat(asm, " \t{$src, $dst|$dst, $src}"),
|
!strconcat(asm, " \t{$src, $dst|$dst, $src}"),
|
||||||
[(set RC:$dst, (ld_frag addr:$src))], d>, EVEX;
|
[(set (vt RC:$dst), (ld_frag addr:$src))], d>, EVEX;
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
|
||||||
def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
|
def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
|
||||||
(ins RC:$src1, KRC:$mask, RC:$src2),
|
(ins RC:$src1, KRC:$mask, RC:$src2),
|
||||||
!strconcat(asm,
|
!strconcat(asm,
|
||||||
" \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
|
" \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
|
||||||
EVEX, EVEX_K;
|
EVEX, EVEX_K;
|
||||||
|
let mayLoad = 1 in
|
||||||
def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
|
def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
|
||||||
(ins RC:$src1, KRC:$mask, x86memop:$src2),
|
(ins RC:$src1, KRC:$mask, x86memop:$src2),
|
||||||
!strconcat(asm,
|
!strconcat(asm,
|
||||||
" \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
|
" \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
|
||||||
[], d>, EVEX, EVEX_K;
|
[], d>, EVEX, EVEX_K;
|
||||||
}
|
}
|
||||||
|
let mayLoad = 1 in
|
||||||
|
def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
|
||||||
|
(ins KRC:$mask, x86memop:$src2),
|
||||||
|
!strconcat(asm,
|
||||||
|
" \t{$src2, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src2}"),
|
||||||
|
[], d>, EVEX, EVEX_KZ;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VMOVAPSZ : avx512_mov_packed<0x28, VR512, VK16WM, f512mem, alignedloadv16f32,
|
multiclass avx512_store<bits<8> opc, RegisterClass RC, RegisterClass KRC,
|
||||||
"vmovaps", SSEPackedSingle>,
|
X86MemOperand x86memop, PatFrag store_frag,
|
||||||
|
string asm, Domain d, ValueType vt> {
|
||||||
|
let isAsmParserOnly = 1, hasSideEffects = 0 in {
|
||||||
|
def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
|
||||||
|
!strconcat(asm, " \t{$src, $dst|$dst, $src}"), [], d>,
|
||||||
|
EVEX;
|
||||||
|
let Constraints = "$src1 = $dst" in
|
||||||
|
def alt_rrk : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
|
||||||
|
(ins RC:$src1, KRC:$mask, RC:$src2),
|
||||||
|
!strconcat(asm,
|
||||||
|
" \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
|
||||||
|
EVEX, EVEX_K;
|
||||||
|
def alt_rrkz : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
|
||||||
|
(ins KRC:$mask, RC:$src),
|
||||||
|
!strconcat(asm,
|
||||||
|
" \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
|
||||||
|
[], d>, EVEX, EVEX_KZ;
|
||||||
|
}
|
||||||
|
let mayStore = 1 in {
|
||||||
|
def mr : AVX512PI<opc, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
|
||||||
|
!strconcat(asm, " \t{$src, $dst|$dst, $src}"),
|
||||||
|
[(store_frag (vt RC:$src), addr:$dst)], d>, EVEX;
|
||||||
|
def mrk : AVX512PI<opc, MRMDestMem, (outs),
|
||||||
|
(ins x86memop:$dst, KRC:$mask, RC:$src),
|
||||||
|
!strconcat(asm,
|
||||||
|
" \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
|
||||||
|
[], d>, EVEX, EVEX_K;
|
||||||
|
def mrkz : AVX512PI<opc, MRMDestMem, (outs),
|
||||||
|
(ins x86memop:$dst, KRC:$mask, RC:$src),
|
||||||
|
!strconcat(asm,
|
||||||
|
" \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
|
||||||
|
[], d>, EVEX, EVEX_KZ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defm VMOVAPSZ : avx512_load<0x28, VR512, VK16WM, f512mem, alignedloadv16f32,
|
||||||
|
"vmovaps", SSEPackedSingle, v16f32>,
|
||||||
|
avx512_store<0x29, VR512, VK16WM, f512mem, alignedstore512,
|
||||||
|
"vmovaps", SSEPackedSingle, v16f32>,
|
||||||
PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||||
defm VMOVAPDZ : avx512_mov_packed<0x28, VR512, VK8WM, f512mem, alignedloadv8f64,
|
defm VMOVAPDZ : avx512_load<0x28, VR512, VK8WM, f512mem, alignedloadv8f64,
|
||||||
"vmovapd", SSEPackedDouble>,
|
"vmovapd", SSEPackedDouble, v8f64>,
|
||||||
|
avx512_store<0x29, VR512, VK8WM, f512mem, alignedstore512,
|
||||||
|
"vmovapd", SSEPackedDouble, v8f64>,
|
||||||
PD, EVEX_V512, VEX_W,
|
PD, EVEX_V512, VEX_W,
|
||||||
EVEX_CD8<64, CD8VF>;
|
EVEX_CD8<64, CD8VF>;
|
||||||
defm VMOVUPSZ : avx512_mov_packed<0x10, VR512, VK16WM, f512mem, loadv16f32,
|
defm VMOVUPSZ : avx512_load<0x10, VR512, VK16WM, f512mem, loadv16f32,
|
||||||
"vmovups", SSEPackedSingle>,
|
"vmovups", SSEPackedSingle, v16f32>,
|
||||||
|
avx512_store<0x11, VR512, VK16WM, f512mem, store,
|
||||||
|
"vmovups", SSEPackedSingle, v16f32>,
|
||||||
PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||||
defm VMOVUPDZ : avx512_mov_packed<0x10, VR512, VK8WM, f512mem, loadv8f64,
|
defm VMOVUPDZ : avx512_load<0x10, VR512, VK8WM, f512mem, loadv8f64,
|
||||||
"vmovupd", SSEPackedDouble, 0>,
|
"vmovupd", SSEPackedDouble, v8f64, 0>,
|
||||||
|
avx512_store<0x11, VR512, VK8WM, f512mem, store,
|
||||||
|
"vmovupd", SSEPackedDouble, v8f64>,
|
||||||
PD, EVEX_V512, VEX_W,
|
PD, EVEX_V512, VEX_W,
|
||||||
EVEX_CD8<64, CD8VF>;
|
EVEX_CD8<64, CD8VF>;
|
||||||
def VMOVAPSZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
|
def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
|
||||||
"vmovaps\t{$src, $dst|$dst, $src}",
|
(bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
|
||||||
[(alignedstore512 (v16f32 VR512:$src), addr:$dst)],
|
(VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
|
||||||
SSEPackedSingle>, EVEX, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
|
|
||||||
def VMOVAPDZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
|
|
||||||
"vmovapd\t{$src, $dst|$dst, $src}",
|
|
||||||
[(alignedstore512 (v8f64 VR512:$src), addr:$dst)],
|
|
||||||
SSEPackedDouble>, EVEX, EVEX_V512,
|
|
||||||
PD, VEX_W, EVEX_CD8<64, CD8VF>;
|
|
||||||
def VMOVUPSZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
|
|
||||||
"vmovups\t{$src, $dst|$dst, $src}",
|
|
||||||
[(store (v16f32 VR512:$src), addr:$dst)],
|
|
||||||
SSEPackedSingle>, EVEX, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
|
|
||||||
def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
|
|
||||||
"vmovupd\t{$src, $dst|$dst, $src}",
|
|
||||||
[(store (v8f64 VR512:$src), addr:$dst)],
|
|
||||||
SSEPackedDouble>, EVEX, EVEX_V512,
|
|
||||||
PD, VEX_W, EVEX_CD8<64, CD8VF>;
|
|
||||||
|
|
||||||
let hasSideEffects = 0 in {
|
def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
|
||||||
def VMOVDQA32rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst),
|
(bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
|
||||||
(ins VR512:$src),
|
(VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
|
||||||
"vmovdqa32\t{$src, $dst|$dst, $src}", []>,
|
|
||||||
EVEX, EVEX_V512;
|
|
||||||
def VMOVDQA64rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst),
|
|
||||||
(ins VR512:$src),
|
|
||||||
"vmovdqa64\t{$src, $dst|$dst, $src}", []>,
|
|
||||||
EVEX, EVEX_V512, VEX_W;
|
|
||||||
let mayStore = 1 in {
|
|
||||||
def VMOVDQA32mr : AVX512BI<0x7F, MRMDestMem, (outs),
|
|
||||||
(ins i512mem:$dst, VR512:$src),
|
|
||||||
"vmovdqa32\t{$src, $dst|$dst, $src}", []>,
|
|
||||||
EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
|
||||||
def VMOVDQA64mr : AVX512BI<0x7F, MRMDestMem, (outs),
|
|
||||||
(ins i512mem:$dst, VR512:$src),
|
|
||||||
"vmovdqa64\t{$src, $dst|$dst, $src}", []>,
|
|
||||||
EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
|
||||||
}
|
|
||||||
let mayLoad = 1 in {
|
|
||||||
def VMOVDQA32rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst),
|
|
||||||
(ins i512mem:$src),
|
|
||||||
"vmovdqa32\t{$src, $dst|$dst, $src}", []>,
|
|
||||||
EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
|
||||||
def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst),
|
|
||||||
(ins i512mem:$src),
|
|
||||||
"vmovdqa64\t{$src, $dst|$dst, $src}", []>,
|
|
||||||
EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 512-bit aligned load/store
|
def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
|
||||||
def : Pat<(alignedloadv8i64 addr:$src), (VMOVDQA64rm addr:$src)>;
|
GR16:$mask),
|
||||||
def : Pat<(alignedloadv16i32 addr:$src), (VMOVDQA32rm addr:$src)>;
|
(VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
|
||||||
|
VR512:$src)>;
|
||||||
|
def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
|
||||||
|
GR8:$mask),
|
||||||
|
(VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
|
||||||
|
VR512:$src)>;
|
||||||
|
|
||||||
def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst),
|
defm VMOVDQA32: avx512_load<0x6F, VR512, VK16WM, i512mem, alignedloadv16i32,
|
||||||
(VMOVDQA64mr addr:$dst, VR512:$src)>;
|
"vmovdqa32", SSEPackedInt, v16i32>,
|
||||||
def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst),
|
avx512_store<0x7F, VR512, VK16WM, i512mem, alignedstore512,
|
||||||
(VMOVDQA32mr addr:$dst, VR512:$src)>;
|
"vmovdqa32", SSEPackedInt, v16i32>,
|
||||||
|
PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||||
multiclass avx512_mov_int<bits<8> load_opc, bits<8> store_opc, string asm,
|
defm VMOVDQA64: avx512_load<0x6F, VR512, VK8WM, i512mem, alignedloadv8i64,
|
||||||
RegisterClass RC, RegisterClass KRC,
|
"vmovdqa64", SSEPackedInt, v8i64>,
|
||||||
PatFrag ld_frag, X86MemOperand x86memop> {
|
avx512_store<0x7F, VR512, VK8WM, i512mem, alignedstore512,
|
||||||
let hasSideEffects = 0 in
|
"vmovdqa64", SSEPackedInt, v8i64>,
|
||||||
def rr : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
|
PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||||
!strconcat(asm, " \t{$src, $dst|$dst, $src}"), []>, EVEX;
|
defm VMOVDQU32: avx512_load<0x6F, VR512, VK16WM, i512mem, load,
|
||||||
let canFoldAsLoad = 1 in
|
"vmovdqu32", SSEPackedInt, v16i32>,
|
||||||
def rm : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
avx512_store<0x7F, VR512, VK16WM, i512mem, store,
|
||||||
!strconcat(asm, " \t{$src, $dst|$dst, $src}"),
|
"vmovdqu32", SSEPackedInt, v16i32>,
|
||||||
[(set RC:$dst, (ld_frag addr:$src))]>, EVEX;
|
XS, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||||
let mayStore = 1 in
|
defm VMOVDQU64: avx512_load<0x6F, VR512, VK8WM, i512mem, load,
|
||||||
def mr : AVX512XSI<store_opc, MRMDestMem, (outs),
|
"vmovdqu64", SSEPackedInt, v8i64>,
|
||||||
(ins x86memop:$dst, VR512:$src),
|
avx512_store<0x7F, VR512, VK8WM, i512mem, store,
|
||||||
!strconcat(asm, " \t{$src, $dst|$dst, $src}"), []>, EVEX;
|
"vmovdqu64", SSEPackedInt, v8i64>,
|
||||||
let Constraints = "$src1 = $dst" in {
|
XS, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||||
def rrk : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst),
|
|
||||||
(ins RC:$src1, KRC:$mask, RC:$src2),
|
|
||||||
!strconcat(asm,
|
|
||||||
" \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), []>,
|
|
||||||
EVEX, EVEX_K;
|
|
||||||
def rmk : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst),
|
|
||||||
(ins RC:$src1, KRC:$mask, x86memop:$src2),
|
|
||||||
!strconcat(asm,
|
|
||||||
" \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
|
|
||||||
[]>, EVEX, EVEX_K;
|
|
||||||
}
|
|
||||||
def rrkz : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst),
|
|
||||||
(ins KRC:$mask, RC:$src),
|
|
||||||
!strconcat(asm,
|
|
||||||
" \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"), []>,
|
|
||||||
EVEX, EVEX_KZ;
|
|
||||||
}
|
|
||||||
|
|
||||||
defm VMOVDQU32 : avx512_mov_int<0x6F, 0x7F, "vmovdqu32", VR512, VK16WM,
|
|
||||||
memopv16i32, i512mem>,
|
|
||||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
|
||||||
defm VMOVDQU64 : avx512_mov_int<0x6F, 0x7F, "vmovdqu64", VR512, VK8WM,
|
|
||||||
memopv8i64, i512mem>,
|
|
||||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
|
||||||
|
|
||||||
// 512-bit unaligned load/store
|
|
||||||
def : Pat<(loadv8i64 addr:$src), (VMOVDQU64rm addr:$src)>;
|
|
||||||
def : Pat<(loadv16i32 addr:$src), (VMOVDQU32rm addr:$src)>;
|
|
||||||
|
|
||||||
def : Pat<(store (v8i64 VR512:$src), addr:$dst),
|
|
||||||
(VMOVDQU64mr addr:$dst, VR512:$src)>;
|
|
||||||
def : Pat<(store (v16i32 VR512:$src), addr:$dst),
|
|
||||||
(VMOVDQU32mr addr:$dst, VR512:$src)>;
|
|
||||||
|
|
||||||
let AddedComplexity = 20 in {
|
let AddedComplexity = 20 in {
|
||||||
def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
|
def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
|
||||||
|
@ -521,3 +521,19 @@ define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
|
|||||||
ret i16 %res
|
ret i16 %res
|
||||||
}
|
}
|
||||||
declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
|
declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
|
||||||
|
|
||||||
|
define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
|
||||||
|
; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
|
||||||
|
call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
|
||||||
|
|
||||||
|
define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
|
||||||
|
; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
|
||||||
|
call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8 )
|
@ -713,8 +713,12 @@ vpsrad 512(%rdi, %rsi, 4), %zmm12, %zmm25
|
|||||||
vpbroadcastd %xmm0, %zmm1 {%k1} {z}
|
vpbroadcastd %xmm0, %zmm1 {%k1} {z}
|
||||||
|
|
||||||
// CHECK: vmovdqu64 {{.*}} {%k3}
|
// CHECK: vmovdqu64 {{.*}} {%k3}
|
||||||
// CHECK: encoding: [0x62,0xf1,0xfe,0x4b,0x6f,0xc8]
|
// CHECK: encoding: [0x62,0xf1,0xfe,0x4b,0x7f,0x07]
|
||||||
vmovdqu64 %zmm0, %zmm1 {%k3}
|
vmovdqu64 %zmm0, (%rdi) {%k3}
|
||||||
|
|
||||||
|
// CHECK: vmovdqa32 {{.*}} {%k4}
|
||||||
|
// CHECK: encoding: [0x62,0x61,0x7d,0x4c,0x6f,0x1e]
|
||||||
|
vmovdqa32 (%rsi), %zmm27 {%k4}
|
||||||
|
|
||||||
// CHECK: vmovd
|
// CHECK: vmovd
|
||||||
// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x74,0x24,0xeb]
|
// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x74,0x24,0xeb]
|
||||||
|
Loading…
Reference in New Issue
Block a user