mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 05:32:25 +00:00
[AArch64 NEON] Support poly128_t and implement relevant intrinsic.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196887 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2b14080035
commit
3171b8df48
@ -312,6 +312,9 @@ def int_aarch64_neon_vqdmlal : Neon_3Arg_Long_Intrinsic;
|
||||
// Signed Saturating Doubling Multiply-Subtract Long
|
||||
def int_aarch64_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic;
|
||||
|
||||
def int_aarch64_neon_vmull_p64 :
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
|
||||
|
||||
class Neon_2Arg_ShiftImm_Intrinsic
|
||||
: Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
|
@ -3039,19 +3039,19 @@ defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
|
||||
int_arm_neon_vqsubs>;
|
||||
|
||||
multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
|
||||
SDPatternOperator opnode, bit Commutable = 0> {
|
||||
SDPatternOperator opnode_8h8b,
|
||||
SDPatternOperator opnode_1q1d, bit Commutable = 0> {
|
||||
let isCommutable = Commutable in {
|
||||
def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
|
||||
opnode, VPR128, VPR64, v8i16, v8i8>;
|
||||
opnode_8h8b, VPR128, VPR64, v8i16, v8i8>;
|
||||
|
||||
def _1q1d : NeonI_3VDiff<0b0, u, 0b11, opcode,
|
||||
(outs VPR128:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
|
||||
asmop # "\t$Rd.1q, $Rn.1d, $Rm.1d",
|
||||
[], NoItinerary>;
|
||||
def _1q1d : NeonI_3VD_2Op<0b0, u, 0b11, opcode, asmop, "1q", "1d",
|
||||
opnode_1q1d, VPR128, VPR64, v16i8, v1i64>;
|
||||
}
|
||||
}
|
||||
|
||||
defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>;
|
||||
defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp,
|
||||
int_aarch64_neon_vmull_p64, 1>;
|
||||
|
||||
multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
|
||||
string opnode, bit Commutable = 0> {
|
||||
@ -3060,10 +3060,17 @@ multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
|
||||
!cast<PatFrag>(opnode # "_16B"),
|
||||
v8i16, v16i8>;
|
||||
|
||||
def _1q2d : NeonI_3VDiff<0b1, u, 0b11, opcode,
|
||||
(outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
|
||||
asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d",
|
||||
[], NoItinerary>;
|
||||
def _1q2d :
|
||||
NeonI_3VDiff<0b1, u, 0b11, opcode,
|
||||
(outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
|
||||
asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d",
|
||||
[(set (v16i8 VPR128:$Rd),
|
||||
(v16i8 (int_aarch64_neon_vmull_p64
|
||||
(v1i64 (scalar_to_vector
|
||||
(i64 (vector_extract (v2i64 VPR128:$Rn), 1)))),
|
||||
(v1i64 (scalar_to_vector
|
||||
(i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))],
|
||||
NoItinerary>;
|
||||
}
|
||||
}
|
||||
|
||||
|
53
test/CodeGen/AArch64/128bit_load_store.ll
Normal file
53
test/CodeGen/AArch64/128bit_load_store.ll
Normal file
@ -0,0 +1,53 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon | FileCheck %s
|
||||
|
||||
define void @test_store_f128(fp128* %ptr, fp128 %val) #0 {
|
||||
; CHECK: test_store_f128
|
||||
; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}]
|
||||
entry:
|
||||
store fp128 %val, fp128* %ptr, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
define fp128 @test_load_f128(fp128* readonly %ptr) #2 {
|
||||
; CHECK: test_load_f128
|
||||
; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
|
||||
entry:
|
||||
%0 = load fp128* %ptr, align 16
|
||||
ret fp128 %0
|
||||
}
|
||||
|
||||
define void @test_vstrq_p128(i128* %ptr, i128 %val) #0 {
|
||||
; CHECK: test_vstrq_p128
|
||||
; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #8]
|
||||
; CHECK-NEXT: str {{x[0-9]+}}, [{{x[0-9]+}}]
|
||||
entry:
|
||||
%0 = bitcast i128* %ptr to fp128*
|
||||
%1 = bitcast i128 %val to fp128
|
||||
store fp128 %1, fp128* %0, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
define i128 @test_vldrq_p128(i128* readonly %ptr) #2 {
|
||||
; CHECK: test_vldrq_p128
|
||||
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}]
|
||||
; CHECK-NEXT: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8]
|
||||
entry:
|
||||
%0 = bitcast i128* %ptr to fp128*
|
||||
%1 = load fp128* %0, align 16
|
||||
%2 = bitcast fp128 %1 to i128
|
||||
ret i128 %2
|
||||
}
|
||||
|
||||
define void @test_ld_st_p128(i128* nocapture %ptr) #0 {
|
||||
; CHECK: test_ld_st_p128
|
||||
; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
|
||||
; CHECK-NEXT: str {{q[0-9]+}}, [{{x[0-9]+}}, #16]
|
||||
entry:
|
||||
%0 = bitcast i128* %ptr to fp128*
|
||||
%1 = load fp128* %0, align 16
|
||||
%add.ptr = getelementptr inbounds i128* %ptr, i64 1
|
||||
%2 = bitcast i128* %add.ptr to fp128*
|
||||
store fp128 %1, fp128* %2, align 16
|
||||
ret void
|
||||
}
|
||||
|
@ -1804,3 +1804,30 @@ entry:
|
||||
ret <8 x i16> %vmull.i.i
|
||||
}
|
||||
|
||||
define i128 @test_vmull_p64(i64 %a, i64 %b) #4 {
|
||||
; CHECK: test_vmull_p64
|
||||
; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d
|
||||
entry:
|
||||
%vmull.i = insertelement <1 x i64> undef, i64 %a, i32 0
|
||||
%vmull1.i = insertelement <1 x i64> undef, i64 %b, i32 0
|
||||
%vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64> %vmull.i, <1 x i64> %vmull1.i) #1
|
||||
%vmull3.i = bitcast <16 x i8> %vmull2.i to i128
|
||||
ret i128 %vmull3.i
|
||||
}
|
||||
|
||||
define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 {
|
||||
; CHECK: test_vmull_high_p64
|
||||
; CHECK: pmull2 {{v[0-9]+}}.1q, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
|
||||
entry:
|
||||
%0 = extractelement <2 x i64> %a, i32 1
|
||||
%1 = extractelement <2 x i64> %b, i32 1
|
||||
%vmull.i.i = insertelement <1 x i64> undef, i64 %0, i32 0
|
||||
%vmull1.i.i = insertelement <1 x i64> undef, i64 %1, i32 0
|
||||
%vmull2.i.i = tail call <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64> %vmull.i.i, <1 x i64> %vmull1.i.i) #1
|
||||
%vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128
|
||||
ret i128 %vmull3.i.i
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64>, <1 x i64>) #5
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user