mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 05:32:25 +00:00
AArch64 & ARM: refactor crypto intrinsics to take scalars
Some of the SHA instructions take a scalar i32 as one argument (largely because they work on 160-bit hash fragments). This wasn't reflected in the IR previously, with ARM and AArch64 choosing different types (<4 x i32> and <1 x i32> respectively) which was ugly. This makes all the affected intrinsics take a uniform "i32", allowing them to become non-polymorphic at the same time. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200706 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e6c04bff3c
commit
07786c2f09
@ -404,11 +404,4 @@ def int_aarch64_neon_vcvtfp2fxs_n :
|
||||
def int_aarch64_neon_vcvtfp2fxu_n :
|
||||
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
class Neon_SHA_Intrinsic
|
||||
: Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v1i32_ty, llvm_v4i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_aarch64_neon_sha1c : Neon_SHA_Intrinsic;
|
||||
def int_aarch64_neon_sha1m : Neon_SHA_Intrinsic;
|
||||
def int_aarch64_neon_sha1p : Neon_SHA_Intrinsic;
|
||||
}
|
||||
|
@ -472,19 +472,37 @@ def int_arm_neon_vbsl : Intrinsic<[llvm_anyvector_ty],
|
||||
|
||||
|
||||
// Crypto instructions
|
||||
def int_arm_neon_aesd : Neon_2Arg_Intrinsic;
|
||||
def int_arm_neon_aese : Neon_2Arg_Intrinsic;
|
||||
def int_arm_neon_aesimc : Neon_1Arg_Intrinsic;
|
||||
def int_arm_neon_aesmc : Neon_1Arg_Intrinsic;
|
||||
def int_arm_neon_sha1h : Neon_1Arg_Intrinsic;
|
||||
def int_arm_neon_sha1su1 : Neon_2Arg_Intrinsic;
|
||||
def int_arm_neon_sha256su0 : Neon_2Arg_Intrinsic;
|
||||
def int_arm_neon_sha1c : Neon_3Arg_Intrinsic;
|
||||
def int_arm_neon_sha1m : Neon_3Arg_Intrinsic;
|
||||
def int_arm_neon_sha1p : Neon_3Arg_Intrinsic;
|
||||
def int_arm_neon_sha1su0: Neon_3Arg_Intrinsic;
|
||||
def int_arm_neon_sha256h: Neon_3Arg_Intrinsic;
|
||||
def int_arm_neon_sha256h2: Neon_3Arg_Intrinsic;
|
||||
def int_arm_neon_sha256su1: Neon_3Arg_Intrinsic;
|
||||
class AES_1Arg_Intrinsic : Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v16i8_ty], [IntrNoMem]>;
|
||||
class AES_2Arg_Intrinsic : Intrinsic<[llvm_v16i8_ty],
|
||||
[llvm_v16i8_ty, llvm_v16i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class SHA_1Arg_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
class SHA_2Arg_Intrinsic : Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i32_ty, llvm_v4i32_ty],
|
||||
[IntrNoMem]>;
|
||||
class SHA_3Arg_i32_Intrinsic : Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
|
||||
[IntrNoMem]>;
|
||||
class SHA_3Arg_v4i32_Intrinsic : Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i32_ty, llvm_v4i32_ty,llvm_v4i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_arm_neon_aesd : AES_2Arg_Intrinsic;
|
||||
def int_arm_neon_aese : AES_2Arg_Intrinsic;
|
||||
def int_arm_neon_aesimc : AES_1Arg_Intrinsic;
|
||||
def int_arm_neon_aesmc : AES_1Arg_Intrinsic;
|
||||
def int_arm_neon_sha1h : SHA_1Arg_Intrinsic;
|
||||
def int_arm_neon_sha1su1 : SHA_2Arg_Intrinsic;
|
||||
def int_arm_neon_sha256su0 : SHA_2Arg_Intrinsic;
|
||||
def int_arm_neon_sha1c : SHA_3Arg_i32_Intrinsic;
|
||||
def int_arm_neon_sha1m : SHA_3Arg_i32_Intrinsic;
|
||||
def int_arm_neon_sha1p : SHA_3Arg_i32_Intrinsic;
|
||||
def int_arm_neon_sha1su0: SHA_3Arg_v4i32_Intrinsic;
|
||||
def int_arm_neon_sha256h: SHA_3Arg_v4i32_Intrinsic;
|
||||
def int_arm_neon_sha256h2: SHA_3Arg_v4i32_Intrinsic;
|
||||
def int_arm_neon_sha256su1: SHA_3Arg_v4i32_Intrinsic;
|
||||
|
||||
} // end TargetPrefix
|
||||
|
@ -8863,13 +8863,15 @@ class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
|
||||
: NeonI_Crypto_SHA<size, opcode,
|
||||
(outs FPR32:$Rd), (ins FPR32:$Rn),
|
||||
asmop # "\t$Rd, $Rn",
|
||||
[(set (v1i32 FPR32:$Rd),
|
||||
(v1i32 (opnode (v1i32 FPR32:$Rn))))],
|
||||
NoItinerary> {
|
||||
[], NoItinerary> {
|
||||
let Predicates = [HasNEON, HasCrypto];
|
||||
let hasSideEffects = 0;
|
||||
}
|
||||
|
||||
def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>;
|
||||
def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
|
||||
(COPY_TO_REGCLASS (SHA1H (COPY_TO_REGCLASS i32:$Rn, FPR32)), GPR32)>;
|
||||
|
||||
|
||||
class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
|
||||
SDPatternOperator opnode>
|
||||
@ -8911,24 +8913,30 @@ def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h",
|
||||
def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2",
|
||||
int_arm_neon_sha256h2>;
|
||||
|
||||
class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop,
|
||||
SDPatternOperator opnode>
|
||||
class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop>
|
||||
: NeonI_Crypto_3VSHA<size, opcode,
|
||||
(outs FPR128:$Rd),
|
||||
(ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
|
||||
asmop # "\t$Rd, $Rn, $Rm.4s",
|
||||
[(set (v4i32 FPR128:$Rd),
|
||||
(v4i32 (opnode (v4i32 FPR128:$src),
|
||||
(v1i32 FPR32:$Rn),
|
||||
(v4i32 VPR128:$Rm))))],
|
||||
NoItinerary> {
|
||||
[], NoItinerary> {
|
||||
let Constraints = "$src = $Rd";
|
||||
let hasSideEffects = 0;
|
||||
let Predicates = [HasNEON, HasCrypto];
|
||||
}
|
||||
|
||||
def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>;
|
||||
def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>;
|
||||
def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>;
|
||||
def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c">;
|
||||
def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p">;
|
||||
def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m">;
|
||||
|
||||
def : Pat<(int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk),
|
||||
(SHA1C v4i32:$hash_abcd,
|
||||
(COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>;
|
||||
def : Pat<(int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk),
|
||||
(SHA1M v4i32:$hash_abcd,
|
||||
(COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>;
|
||||
def : Pat<(int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk),
|
||||
(SHA1P v4i32:$hash_abcd,
|
||||
(COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>;
|
||||
|
||||
// Additional patterns to match shl to USHL.
|
||||
def : Pat<(v8i8 (shl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
|
||||
|
@ -5876,7 +5876,7 @@ defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
|
||||
|
||||
// Cryptography instructions
|
||||
let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
|
||||
DecoderNamespace = "v8Crypto" in {
|
||||
DecoderNamespace = "v8Crypto", hasSideEffects = 0 in {
|
||||
class AES<string op, bit op7, bit op6, SDPatternOperator Int>
|
||||
: N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
|
||||
!strconcat("aes", op), "8", v16i8, v16i8, Int>,
|
||||
@ -5906,17 +5906,45 @@ def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
|
||||
def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
|
||||
def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
|
||||
|
||||
def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>;
|
||||
def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>;
|
||||
def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
|
||||
def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
|
||||
def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>;
|
||||
def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>;
|
||||
def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>;
|
||||
def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>;
|
||||
def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>;
|
||||
def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>;
|
||||
def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
|
||||
def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
|
||||
def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
|
||||
def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
|
||||
|
||||
def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
|
||||
(COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
|
||||
(SHA1H (SUBREG_TO_REG (i64 0),
|
||||
(f32 (COPY_TO_REGCLASS i32:$Rn, SPR)),
|
||||
ssub_0)),
|
||||
ssub_0)), GPR)>;
|
||||
|
||||
def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
|
||||
(SHA1C v4i32:$hash_abcd,
|
||||
(SUBREG_TO_REG (i64 0),
|
||||
(f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
|
||||
ssub_0),
|
||||
v4i32:$wk)>;
|
||||
|
||||
def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
|
||||
(SHA1M v4i32:$hash_abcd,
|
||||
(SUBREG_TO_REG (i64 0),
|
||||
(f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
|
||||
ssub_0),
|
||||
v4i32:$wk)>;
|
||||
|
||||
def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
|
||||
(SHA1P v4i32:$hash_abcd,
|
||||
(SUBREG_TO_REG (i64 0),
|
||||
(f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
|
||||
ssub_0),
|
||||
v4i32:$wk)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// NEON instructions for single-precision FP math
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1,40 +1,40 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -mattr=+crypto | FileCheck %s
|
||||
; RUN: not llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
|
||||
declare <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32>, <4 x i32>, <4 x i32>) #1
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
|
||||
declare <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32>, <4 x i32>, <4 x i32>) #1
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
|
||||
declare <4 x i32> @llvm.arm.neon.sha256h(<4 x i32>, <4 x i32>, <4 x i32>) #1
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #1
|
||||
declare <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32>, <4 x i32>, <4 x i32>) #1
|
||||
|
||||
declare <4 x i32> @llvm.aarch64.neon.sha1m(<4 x i32>, <1 x i32>, <4 x i32>) #1
|
||||
declare <4 x i32> @llvm.arm.neon.sha1m(<4 x i32>, i32, <4 x i32>) #1
|
||||
|
||||
declare <4 x i32> @llvm.aarch64.neon.sha1p(<4 x i32>, <1 x i32>, <4 x i32>) #1
|
||||
declare <4 x i32> @llvm.arm.neon.sha1p(<4 x i32>, i32, <4 x i32>) #1
|
||||
|
||||
declare <4 x i32> @llvm.aarch64.neon.sha1c(<4 x i32>, <1 x i32>, <4 x i32>) #1
|
||||
declare <4 x i32> @llvm.arm.neon.sha1c(<4 x i32>, i32, <4 x i32>) #1
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32>, <4 x i32>) #1
|
||||
declare <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32>, <4 x i32>) #1
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32>, <4 x i32>) #1
|
||||
declare <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32>, <4 x i32>) #1
|
||||
|
||||
declare <1 x i32> @llvm.arm.neon.sha1h.v1i32(<1 x i32>) #1
|
||||
declare i32 @llvm.arm.neon.sha1h(i32) #1
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8>) #1
|
||||
declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>) #1
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8>) #1
|
||||
declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>) #1
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8>, <16 x i8>) #1
|
||||
declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) #1
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8>, <16 x i8>) #1
|
||||
declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) #1
|
||||
|
||||
define <16 x i8> @test_vaeseq_u8(<16 x i8> %data, <16 x i8> %key) {
|
||||
; CHECK: test_vaeseq_u8:
|
||||
; CHECK: aese {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
; CHECK-NO-CRYPTO: Cannot select: intrinsic %llvm.arm.neon.aese
|
||||
entry:
|
||||
%aese.i = tail call <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8> %data, <16 x i8> %key)
|
||||
%aese.i = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data, <16 x i8> %key)
|
||||
ret <16 x i8> %aese.i
|
||||
}
|
||||
|
||||
@ -42,7 +42,7 @@ define <16 x i8> @test_vaesdq_u8(<16 x i8> %data, <16 x i8> %key) {
|
||||
; CHECK: test_vaesdq_u8:
|
||||
; CHECK: aesd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%aesd.i = tail call <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8> %data, <16 x i8> %key)
|
||||
%aesd.i = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data, <16 x i8> %key)
|
||||
ret <16 x i8> %aesd.i
|
||||
}
|
||||
|
||||
@ -50,7 +50,7 @@ define <16 x i8> @test_vaesmcq_u8(<16 x i8> %data) {
|
||||
; CHECK: test_vaesmcq_u8:
|
||||
; CHECK: aesmc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%aesmc.i = tail call <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8> %data)
|
||||
%aesmc.i = tail call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %data)
|
||||
ret <16 x i8> %aesmc.i
|
||||
}
|
||||
|
||||
@ -58,7 +58,7 @@ define <16 x i8> @test_vaesimcq_u8(<16 x i8> %data) {
|
||||
; CHECK: test_vaesimcq_u8:
|
||||
; CHECK: aesimc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%aesimc.i = tail call <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8> %data)
|
||||
%aesimc.i = tail call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %data)
|
||||
ret <16 x i8> %aesimc.i
|
||||
}
|
||||
|
||||
@ -66,17 +66,15 @@ define i32 @test_vsha1h_u32(i32 %hash_e) {
|
||||
; CHECK: test_vsha1h_u32:
|
||||
; CHECK: sha1h {{s[0-9]+}}, {{s[0-9]+}}
|
||||
entry:
|
||||
%sha1h.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0
|
||||
%sha1h1.i = tail call <1 x i32> @llvm.arm.neon.sha1h.v1i32(<1 x i32> %sha1h.i)
|
||||
%0 = extractelement <1 x i32> %sha1h1.i, i32 0
|
||||
ret i32 %0
|
||||
%sha1h1.i = tail call i32 @llvm.arm.neon.sha1h(i32 %hash_e)
|
||||
ret i32 %sha1h1.i
|
||||
}
|
||||
|
||||
define <4 x i32> @test_vsha1su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w12_15) {
|
||||
; CHECK: test_vsha1su1q_u32:
|
||||
; CHECK: sha1su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
||||
entry:
|
||||
%sha1su12.i = tail call <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32> %tw0_3, <4 x i32> %w12_15)
|
||||
%sha1su12.i = tail call <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32> %tw0_3, <4 x i32> %w12_15)
|
||||
ret <4 x i32> %sha1su12.i
|
||||
}
|
||||
|
||||
@ -84,7 +82,7 @@ define <4 x i32> @test_vsha256su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7) {
|
||||
; CHECK: test_vsha256su0q_u32:
|
||||
; CHECK: sha256su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
||||
entry:
|
||||
%sha256su02.i = tail call <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32> %w0_3, <4 x i32> %w4_7)
|
||||
%sha256su02.i = tail call <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7)
|
||||
ret <4 x i32> %sha256su02.i
|
||||
}
|
||||
|
||||
@ -92,8 +90,7 @@ define <4 x i32> @test_vsha1cq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32>
|
||||
; CHECK: test_vsha1cq_u32:
|
||||
; CHECK: sha1c {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
|
||||
entry:
|
||||
%sha1c.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0
|
||||
%sha1c1.i = tail call <4 x i32> @llvm.aarch64.neon.sha1c(<4 x i32> %hash_abcd, <1 x i32> %sha1c.i, <4 x i32> %wk)
|
||||
%sha1c1.i = tail call <4 x i32> @llvm.arm.neon.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
|
||||
ret <4 x i32> %sha1c1.i
|
||||
}
|
||||
|
||||
@ -101,8 +98,7 @@ define <4 x i32> @test_vsha1pq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32>
|
||||
; CHECK: test_vsha1pq_u32:
|
||||
; CHECK: sha1p {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
|
||||
entry:
|
||||
%sha1p.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0
|
||||
%sha1p1.i = tail call <4 x i32> @llvm.aarch64.neon.sha1p(<4 x i32> %hash_abcd, <1 x i32> %sha1p.i, <4 x i32> %wk)
|
||||
%sha1p1.i = tail call <4 x i32> @llvm.arm.neon.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
|
||||
ret <4 x i32> %sha1p1.i
|
||||
}
|
||||
|
||||
@ -110,8 +106,7 @@ define <4 x i32> @test_vsha1mq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32>
|
||||
; CHECK: test_vsha1mq_u32:
|
||||
; CHECK: sha1m {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
|
||||
entry:
|
||||
%sha1m.i = insertelement <1 x i32> undef, i32 %hash_e, i32 0
|
||||
%sha1m1.i = tail call <4 x i32> @llvm.aarch64.neon.sha1m(<4 x i32> %hash_abcd, <1 x i32> %sha1m.i, <4 x i32> %wk)
|
||||
%sha1m1.i = tail call <4 x i32> @llvm.arm.neon.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk)
|
||||
ret <4 x i32> %sha1m1.i
|
||||
}
|
||||
|
||||
@ -119,7 +114,7 @@ define <4 x i32> @test_vsha1su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32>
|
||||
; CHECK: test_vsha1su0q_u32:
|
||||
; CHECK: sha1su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
||||
entry:
|
||||
%sha1su03.i = tail call <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11)
|
||||
%sha1su03.i = tail call <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11)
|
||||
ret <4 x i32> %sha1su03.i
|
||||
}
|
||||
|
||||
@ -127,7 +122,7 @@ define <4 x i32> @test_vsha256hq_u32(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh,
|
||||
; CHECK: test_vsha256hq_u32:
|
||||
; CHECK: sha256h {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
|
||||
entry:
|
||||
%sha256h3.i = tail call <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
|
||||
%sha256h3.i = tail call <4 x i32> @llvm.arm.neon.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk)
|
||||
ret <4 x i32> %sha256h3.i
|
||||
}
|
||||
|
||||
@ -135,7 +130,7 @@ define <4 x i32> @test_vsha256h2q_u32(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd
|
||||
; CHECK: test_vsha256h2q_u32:
|
||||
; CHECK: sha256h2 {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
|
||||
entry:
|
||||
%sha256h23.i = tail call <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
|
||||
%sha256h23.i = tail call <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk)
|
||||
ret <4 x i32> %sha256h23.i
|
||||
}
|
||||
|
||||
@ -143,7 +138,7 @@ define <4 x i32> @test_vsha256su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x
|
||||
; CHECK: test_vsha256su1q_u32:
|
||||
; CHECK: sha256su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
|
||||
entry:
|
||||
%sha256su13.i = tail call <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15)
|
||||
%sha256su13.i = tail call <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15)
|
||||
ret <4 x i32> %sha256su13.i
|
||||
}
|
||||
|
||||
|
@ -3,13 +3,13 @@
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_aesde(<16 x i8>* %a, <16 x i8> *%b) {
|
||||
%tmp = load <16 x i8>* %a
|
||||
%tmp2 = load <16 x i8>* %b
|
||||
%tmp3 = call <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8> %tmp, <16 x i8> %tmp2)
|
||||
%tmp3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %tmp, <16 x i8> %tmp2)
|
||||
; CHECK: aesd.8 q{{[0-9]+}}, q{{[0-9]+}}
|
||||
%tmp4 = call <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8> %tmp3, <16 x i8> %tmp2)
|
||||
%tmp4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %tmp3, <16 x i8> %tmp2)
|
||||
; CHECK: aese.8 q{{[0-9]+}}, q{{[0-9]+}}
|
||||
%tmp5 = call <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8> %tmp4)
|
||||
%tmp5 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %tmp4)
|
||||
; CHECK: aesimc.8 q{{[0-9]+}}, q{{[0-9]+}}
|
||||
%tmp6 = call <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8> %tmp5)
|
||||
%tmp6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %tmp5)
|
||||
; CHECK: aesmc.8 q{{[0-9]+}}, q{{[0-9]+}}
|
||||
ret <16 x i8> %tmp6
|
||||
}
|
||||
@ -18,40 +18,42 @@ define arm_aapcs_vfpcc <4 x i32> @test_sha(<4 x i32> *%a, <4 x i32> *%b, <4 x i3
|
||||
%tmp = load <4 x i32>* %a
|
||||
%tmp2 = load <4 x i32>* %b
|
||||
%tmp3 = load <4 x i32>* %c
|
||||
%res1 = call <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32> %tmp)
|
||||
%scalar = extractelement <4 x i32> %tmp, i32 0
|
||||
%resscalar = call i32 @llvm.arm.neon.sha1h(i32 %scalar)
|
||||
%res1 = insertelement <4 x i32> undef, i32 %resscalar, i32 0
|
||||
; CHECK: sha1h.32 q{{[0-9]+}}, q{{[0-9]+}}
|
||||
%res2 = call <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3, <4 x i32> %res1)
|
||||
%res2 = call <4 x i32> @llvm.arm.neon.sha1c(<4 x i32> %tmp2, i32 %scalar, <4 x i32> %res1)
|
||||
; CHECK: sha1c.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
|
||||
%res3 = call <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32> %res2, <4 x i32> %tmp3, <4 x i32> %res1)
|
||||
%res3 = call <4 x i32> @llvm.arm.neon.sha1m(<4 x i32> %res2, i32 %scalar, <4 x i32> %res1)
|
||||
; CHECK: sha1m.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
|
||||
%res4 = call <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32> %res3, <4 x i32> %tmp3, <4 x i32> %res1)
|
||||
%res4 = call <4 x i32> @llvm.arm.neon.sha1p(<4 x i32> %res3, i32 %scalar, <4 x i32> %res1)
|
||||
; CHECK: sha1p.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
|
||||
%res5 = call <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32> %res4, <4 x i32> %tmp3, <4 x i32> %res1)
|
||||
%res5 = call <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32> %res4, <4 x i32> %tmp3, <4 x i32> %res1)
|
||||
; CHECK: sha1su0.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
|
||||
%res6 = call <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32> %res5, <4 x i32> %res1)
|
||||
%res6 = call <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32> %res5, <4 x i32> %res1)
|
||||
; CHECK: sha1su1.32 q{{[0-9]+}}, q{{[0-9]+}}
|
||||
%res7 = call <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32> %res6, <4 x i32> %tmp3, <4 x i32> %res1)
|
||||
%res7 = call <4 x i32> @llvm.arm.neon.sha256h(<4 x i32> %res6, <4 x i32> %tmp3, <4 x i32> %res1)
|
||||
; CHECK: sha256h.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
|
||||
%res8 = call <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32> %res7, <4 x i32> %tmp3, <4 x i32> %res1)
|
||||
%res8 = call <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32> %res7, <4 x i32> %tmp3, <4 x i32> %res1)
|
||||
; CHECK: sha256h2.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
|
||||
%res9 = call <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32> %res8, <4 x i32> %tmp3, <4 x i32> %res1)
|
||||
%res9 = call <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32> %res8, <4 x i32> %tmp3, <4 x i32> %res1)
|
||||
; CHECK: sha256su1.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
|
||||
%res10 = call <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32> %res9, <4 x i32> %tmp3)
|
||||
%res10 = call <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32> %res9, <4 x i32> %tmp3)
|
||||
; CHECK: sha256su0.32 q{{[0-9]+}}, q{{[0-9]+}}
|
||||
ret <4 x i32> %res10
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>)
|
||||
declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>)
|
||||
declare i32 @llvm.arm.neon.sha1h(i32)
|
||||
declare <4 x i32> @llvm.arm.neon.sha1c(<4 x i32>, i32, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha1m(<4 x i32>, i32, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha1p(<4 x i32>, i32, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha256h(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32>, <4 x i32>)
|
||||
|
Loading…
Reference in New Issue
Block a user