mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-01 00:33:09 +00:00
ARM64: add intrinsic for pmull (p64 x p64 = p128) operations.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205302 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
103683c4cb
commit
ed839120c4
@ -74,8 +74,7 @@ let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_2VectorArg_Long_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMTruncatedType<0>,
|
||||
LLVMTruncatedType<0>],
|
||||
[LLVMTruncatedType<0>, LLVMTruncatedType<0>],
|
||||
[IntrNoMem]>;
|
||||
class AdvSIMD_2VectorArg_Wide_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
@ -178,6 +177,11 @@ let Properties = [IntrNoMem] in {
|
||||
def int_arm64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic;
|
||||
def int_arm64_neon_pmull : AdvSIMD_2VectorArg_Long_Intrinsic;
|
||||
|
||||
// 64-bit polynomial multiply really returns an i128, which is not legal. Fake
|
||||
// it with a v16i8.
|
||||
def int_arm64_neon_pmull64 :
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
|
||||
|
||||
// Vector Extending Multiply
|
||||
def int_arm64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic;
|
||||
|
||||
|
@ -2726,6 +2726,13 @@ defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
|
||||
defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
|
||||
BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>;
|
||||
|
||||
// Patterns for 64-bit pmull
|
||||
def : Pat<(int_arm64_neon_pmull64 V64:$Rn, V64:$Rm),
|
||||
(PMULLv1i64 V64:$Rn, V64:$Rm)>;
|
||||
def : Pat<(int_arm64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)),
|
||||
(vector_extract (v2i64 V128:$Rm), (i64 1))),
|
||||
(PMULLv2i64 V128:$Rn, V128:$Rm)>;
|
||||
|
||||
// CodeGen patterns for addhn and subhn instructions, which can actually be
|
||||
// written in LLVM IR without too much difficulty.
|
||||
|
||||
|
@ -1983,3 +1983,21 @@ define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind {
|
||||
%tmp5 = call i64 @llvm.arm64.neon.sqsub.i64(i64 %C, i64 %tmp4)
|
||||
ret i64 %tmp5
|
||||
}
|
||||
|
||||
define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind {
|
||||
; CHECK-LABEL: test_pmull_64:
|
||||
; CHECK: pmull.1q
|
||||
%val = call <16 x i8> @llvm.arm64.neon.pmull64(i64 %l, i64 %r)
|
||||
ret <16 x i8> %val
|
||||
}
|
||||
|
||||
define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind {
|
||||
; CHECK-LABEL: test_pmull_high_64:
|
||||
; CHECK: pmull2.1q
|
||||
%l_hi = extractelement <2 x i64> %l, i32 1
|
||||
%r_hi = extractelement <2 x i64> %r, i32 1
|
||||
%val = call <16 x i8> @llvm.arm64.neon.pmull64(i64 %l_hi, i64 %r_hi)
|
||||
ret <16 x i8> %val
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm64.neon.pmull64(i64, i64)
|
||||
|
Loading…
Reference in New Issue
Block a user