mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-09 16:45:03 +00:00
AVX-512: aligned / unaligned load and store for 512-bit integer vectors.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193156 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3ebe47ee13
commit
ea79feb1a8
@ -1067,23 +1067,6 @@ def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$sr
|
||||
SSEPackedDouble>, EVEX, EVEX_V512,
|
||||
OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
// Use vmovaps/vmovups for AVX-512 integer load/store.
|
||||
// 512-bit load/store
|
||||
def : Pat<(alignedloadv8i64 addr:$src),
|
||||
(VMOVAPSZrm addr:$src)>;
|
||||
def : Pat<(loadv8i64 addr:$src),
|
||||
(VMOVUPSZrm addr:$src)>;
|
||||
|
||||
def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst),
|
||||
(VMOVAPSZmr addr:$dst, VR512:$src)>;
|
||||
def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst),
|
||||
(VMOVAPSZmr addr:$dst, VR512:$src)>;
|
||||
|
||||
def : Pat<(store (v8i64 VR512:$src), addr:$dst),
|
||||
(VMOVUPDZmr addr:$dst, VR512:$src)>;
|
||||
def : Pat<(store (v16i32 VR512:$src), addr:$dst),
|
||||
(VMOVUPSZmr addr:$dst, VR512:$src)>;
|
||||
|
||||
let neverHasSideEffects = 1 in {
|
||||
def VMOVDQA32rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst),
|
||||
(ins VR512:$src),
|
||||
@ -1115,25 +1098,36 @@ def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst),
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_mov_int<bits<8> opc, string asm, RegisterClass RC,
|
||||
RegisterClass KRC,
|
||||
// 512-bit aligned load/store
|
||||
def : Pat<(alignedloadv8i64 addr:$src), (VMOVDQA64rm addr:$src)>;
|
||||
def : Pat<(alignedloadv16i32 addr:$src), (VMOVDQA32rm addr:$src)>;
|
||||
|
||||
def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst),
|
||||
(VMOVDQA64mr addr:$dst, VR512:$src)>;
|
||||
def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst),
|
||||
(VMOVDQA32mr addr:$dst, VR512:$src)>;
|
||||
|
||||
multiclass avx512_mov_int<bits<8> load_opc, bits<8> store_opc, string asm,
|
||||
RegisterClass RC, RegisterClass KRC,
|
||||
PatFrag ld_frag, X86MemOperand x86memop> {
|
||||
let neverHasSideEffects = 1 in
|
||||
def rr : AVX512XSI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
|
||||
!strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>,
|
||||
EVEX;
|
||||
def rr : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
|
||||
!strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, EVEX;
|
||||
let canFoldAsLoad = 1 in
|
||||
def rm : AVX512XSI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
||||
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set RC:$dst, (ld_frag addr:$src))]>,
|
||||
EVEX;
|
||||
def rm : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
|
||||
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set RC:$dst, (ld_frag addr:$src))]>, EVEX;
|
||||
let mayStore = 1 in
|
||||
def mr : AVX512XSI<store_opc, MRMDestMem, (outs),
|
||||
(ins x86memop:$dst, VR512:$src),
|
||||
!strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, EVEX;
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
def rrk : AVX512XSI<opc, MRMSrcReg, (outs RC:$dst),
|
||||
def rrk : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, KRC:$mask, RC:$src2),
|
||||
!strconcat(asm,
|
||||
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), []>,
|
||||
EVEX, EVEX_K;
|
||||
def rmk : AVX512XSI<opc, MRMSrcMem, (outs RC:$dst),
|
||||
def rmk : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, KRC:$mask, x86memop:$src2),
|
||||
!strconcat(asm,
|
||||
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
|
||||
@ -1141,11 +1135,22 @@ let Constraints = "$src1 = $dst" in {
|
||||
}
|
||||
}
|
||||
|
||||
defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, memopv16i32, i512mem>,
|
||||
defm VMOVDQU32 : avx512_mov_int<0x6F, 0x7F, "vmovdqu32", VR512, VK16WM,
|
||||
memopv16i32, i512mem>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>,
|
||||
defm VMOVDQU64 : avx512_mov_int<0x6F, 0x7F, "vmovdqu64", VR512, VK8WM,
|
||||
memopv8i64, i512mem>,
|
||||
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
// 512-bit unaligned load/store
|
||||
def : Pat<(loadv8i64 addr:$src), (VMOVDQU64rm addr:$src)>;
|
||||
def : Pat<(loadv16i32 addr:$src), (VMOVDQU32rm addr:$src)>;
|
||||
|
||||
def : Pat<(store (v8i64 VR512:$src), addr:$dst),
|
||||
(VMOVDQU64mr addr:$dst, VR512:$src)>;
|
||||
def : Pat<(store (v16i32 VR512:$src), addr:$dst),
|
||||
(VMOVDQU32mr addr:$dst, VR512:$src)>;
|
||||
|
||||
let AddedComplexity = 20 in {
|
||||
def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1),
|
||||
(v16f32 VR512:$src2))),
|
||||
|
@ -289,6 +289,7 @@ def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
|
||||
// 512-bit load pattern fragments
|
||||
def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
|
||||
def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
|
||||
def loadv16i32 : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>;
|
||||
def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
|
||||
|
||||
// 128-/256-/512-bit extload pattern fragments
|
||||
|
@ -125,3 +125,31 @@ define <4 x i32> @test15(i32* %x) {
|
||||
%res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
|
||||
ret <4 x i32>%res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test16
|
||||
; CHECK: vmovdqu32
|
||||
; CHECK: ret
|
||||
define <16 x i32> @test16(i8 * %addr) {
|
||||
%vaddr = bitcast i8* %addr to <16 x i32>*
|
||||
%res = load <16 x i32>* %vaddr, align 1
|
||||
ret <16 x i32>%res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test17
|
||||
; CHECK: vmovdqa32
|
||||
; CHECK: ret
|
||||
define <16 x i32> @test17(i8 * %addr) {
|
||||
%vaddr = bitcast i8* %addr to <16 x i32>*
|
||||
%res = load <16 x i32>* %vaddr, align 64
|
||||
ret <16 x i32>%res
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test18
|
||||
; CHECK: vmovdqa64
|
||||
; CHECK: ret
|
||||
define void @test18(i8 * %addr, <8 x i64> %data) {
|
||||
%vaddr = bitcast i8* %addr to <8 x i64>*
|
||||
store <8 x i64>%data, <8 x i64>* %vaddr, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user