mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-03 14:08:57 +00:00
[AArch64] Add nvcast patterns for v4f16 and v8f16
Summary: Constant stores of f16 vectors can create NvCast nodes from various operand types to v4f16 or v8f16 depending on patterns in the stored constants. This patch adds nvcast rules with v4f16 and v8f16 values. AArchISelLowering::LowerBUILD_VECTOR has the details on which constant patterns generate the nvcast nodes. Reviewers: jmolloy, srhines, ab Subscribers: rengolin, aemerson, llvm-commits Differential Revision: http://reviews.llvm.org/D9201 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235610 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b7db5f28c5
commit
dab5145cb3
@ -5128,22 +5128,26 @@ def : Pat<(trap), (BRK 1)>;
|
|||||||
// Natural vector casts (64 bit)
|
// Natural vector casts (64 bit)
|
||||||
def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
|
def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
|
||||||
def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
|
def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
|
||||||
|
def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
|
||||||
def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
|
def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
|
||||||
def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
|
def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
|
||||||
def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
|
def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
|
||||||
|
|
||||||
def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
|
def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
|
||||||
def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
|
def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
|
||||||
|
def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
|
||||||
def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
|
def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
|
||||||
def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
|
def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
|
||||||
|
|
||||||
def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
|
def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
|
||||||
def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
|
def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
|
||||||
|
def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
|
||||||
def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
|
def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
|
||||||
def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
|
def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
|
||||||
|
|
||||||
def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
|
def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
|
||||||
def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
|
def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
|
||||||
|
def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
|
||||||
def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
|
def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
|
||||||
def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
|
def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
|
||||||
def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
|
def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
|
||||||
@ -5158,22 +5162,26 @@ def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
|
|||||||
// Natural vector casts (128 bit)
|
// Natural vector casts (128 bit)
|
||||||
def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
|
def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
|
||||||
def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
|
def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
|
||||||
|
def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
|
||||||
def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
|
def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
|
||||||
def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
|
def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
|
||||||
def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
|
def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
|
||||||
|
|
||||||
def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
|
def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
|
||||||
def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
|
def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
|
||||||
|
def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
|
||||||
def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
|
def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
|
||||||
def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
|
def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
|
||||||
|
|
||||||
def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
|
def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
|
||||||
def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
|
def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
|
||||||
|
def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
|
||||||
def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
|
def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
|
||||||
def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
|
def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
|
||||||
|
|
||||||
def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
|
def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
|
||||||
def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
|
def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
|
||||||
|
def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
|
||||||
def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
|
def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
|
||||||
def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
|
def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
|
||||||
def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
|
def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
|
||||||
|
89
test/CodeGen/AArch64/fp16-vector-nvcast.ll
Normal file
89
test/CodeGen/AArch64/fp16-vector-nvcast.ll
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s
|
||||||
|
|
||||||
|
; Test pattern (v4f16 (AArch64NvCast (v2i32 FPR64:$src)))
|
||||||
|
define void @nvcast_v2i32(<4 x half>* %a) #0 {
|
||||||
|
; CHECK-LABEL: nvcast_v2i32:
|
||||||
|
; CHECK-NEXT: movi v[[REG:[0-9]+]].2s, #0xab, lsl #16
|
||||||
|
; CHECK-NEXT: str d[[REG]], [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
store volatile <4 x half> <half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB>, <4 x half>* %a
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
; Test pattern (v4f16 (AArch64NvCast (v4i16 FPR64:$src)))
|
||||||
|
define void @nvcast_v4i16(<4 x half>* %a) #0 {
|
||||||
|
; CHECK-LABEL: nvcast_v4i16:
|
||||||
|
; CHECK-NEXT: movi v[[REG:[0-9]+]].4h, #0xab
|
||||||
|
; CHECK-NEXT: str d[[REG]], [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
store volatile <4 x half> <half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB>, <4 x half>* %a
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
; Test pattern (v4f16 (AArch64NvCast (v8i8 FPR64:$src)))
|
||||||
|
define void @nvcast_v8i8(<4 x half>* %a) #0 {
|
||||||
|
; CHECK-LABEL: nvcast_v8i8:
|
||||||
|
; CHECK-NEXT: movi v[[REG:[0-9]+]].8b, #0xab
|
||||||
|
; CHECK-NEXT: str d[[REG]], [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
store volatile <4 x half> <half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB>, <4 x half>* %a
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
; Test pattern (v4f16 (AArch64NvCast (f64 FPR64:$src)))
|
||||||
|
define void @nvcast_f64(<4 x half>* %a) #0 {
|
||||||
|
; CHECK-LABEL: nvcast_f64:
|
||||||
|
; CHECK-NEXT: movi d[[REG:[0-9]+]], #0000000000000000
|
||||||
|
; CHECK-NEXT: str d[[REG]], [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
store volatile <4 x half> zeroinitializer, <4 x half>* %a
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test pattern (v8f16 (AArch64NvCast (v4i32 FPR128:$src)))
|
||||||
|
define void @nvcast_v4i32(<8 x half>* %a) #0 {
|
||||||
|
; CHECK-LABEL: nvcast_v4i32:
|
||||||
|
; CHECK-NEXT: movi v[[REG:[0-9]+]].4s, #0xab, lsl #16
|
||||||
|
; CHECK-NEXT: str q[[REG]], [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
store volatile <8 x half> <half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB>, <8 x half>* %a
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
; Test pattern (v8f16 (AArch64NvCast (v8i16 FPR128:$src)))
|
||||||
|
define void @nvcast_v8i16(<8 x half>* %a) #0 {
|
||||||
|
; CHECK-LABEL: nvcast_v8i16:
|
||||||
|
; CHECK-NEXT: movi v[[REG:[0-9]+]].8h, #0xab
|
||||||
|
; CHECK-NEXT: str q[[REG]], [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
store volatile <8 x half> <half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB>, <8 x half>* %a
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
; Test pattern (v8f16 (AArch64NvCast (v16i8 FPR128:$src)))
|
||||||
|
define void @nvcast_v16i8(<8 x half>* %a) #0 {
|
||||||
|
; CHECK-LABEL: nvcast_v16i8:
|
||||||
|
; CHECK-NEXT: movi v[[REG:[0-9]+]].16b, #0xab
|
||||||
|
; CHECK-NEXT: str q[[REG]], [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
store volatile <8 x half> <half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB>, <8 x half>* %a
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
; Test pattern (v8f16 (AArch64NvCast (v2i64 FPR128:$src)))
|
||||||
|
define void @nvcast_v2i64(<8 x half>* %a) #0 {
|
||||||
|
; CHECK-LABEL: nvcast_v2i64:
|
||||||
|
; CHECK-NEXT: movi v[[REG:[0-9]+]].2d, #0000000000000000
|
||||||
|
; CHECK-NEXT: str q[[REG]], [x0]
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
store volatile <8 x half> zeroinitializer, <8 x half>* %a
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind }
|
Loading…
Reference in New Issue
Block a user