[AArch64] Add nvcast patterns for v4f16 and v8f16

Summary:
Constant stores of f16 vectors can create NvCast nodes from various
operand types to v4f16 or v8f16 depending on patterns in the stored
constants.  This patch adds nvcast rules with v4f16 and v8f16 values.

AArchISelLowering::LowerBUILD_VECTOR has the details on which constant
patterns generate the nvcast nodes.

Reviewers: jmolloy, srhines, ab

Subscribers: rengolin, aemerson, llvm-commits

Differential Revision: http://reviews.llvm.org/D9201

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235610 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Pirama Arumuga Nainar 2015-04-23 17:32:25 +00:00
parent b7db5f28c5
commit dab5145cb3
2 changed files with 97 additions and 0 deletions

View File

@ -5128,22 +5128,26 @@ def : Pat<(trap), (BRK 1)>;
// Natural vector casts (64 bit) // Natural vector casts (64 bit)
def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
@ -5158,22 +5162,26 @@ def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
// Natural vector casts (128 bit) // Natural vector casts (128 bit)
def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;

View File

@ -0,0 +1,89 @@
; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s
; Test pattern (v4f16 (AArch64NvCast (v2i32 FPR64:$src)))
define void @nvcast_v2i32(<4 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v2i32:
; CHECK-NEXT: movi v[[REG:[0-9]+]].2s, #0xab, lsl #16
; CHECK-NEXT: str d[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <4 x half> <half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB>, <4 x half>* %a
ret void
}
; Test pattern (v4f16 (AArch64NvCast (v4i16 FPR64:$src)))
define void @nvcast_v4i16(<4 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v4i16:
; CHECK-NEXT: movi v[[REG:[0-9]+]].4h, #0xab
; CHECK-NEXT: str d[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <4 x half> <half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB>, <4 x half>* %a
ret void
}
; Test pattern (v4f16 (AArch64NvCast (v8i8 FPR64:$src)))
define void @nvcast_v8i8(<4 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v8i8:
; CHECK-NEXT: movi v[[REG:[0-9]+]].8b, #0xab
; CHECK-NEXT: str d[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <4 x half> <half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB>, <4 x half>* %a
ret void
}
; Test pattern (v4f16 (AArch64NvCast (f64 FPR64:$src)))
define void @nvcast_f64(<4 x half>* %a) #0 {
; CHECK-LABEL: nvcast_f64:
; CHECK-NEXT: movi d[[REG:[0-9]+]], #0000000000000000
; CHECK-NEXT: str d[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <4 x half> zeroinitializer, <4 x half>* %a
ret void
}
; Test pattern (v8f16 (AArch64NvCast (v4i32 FPR128:$src)))
define void @nvcast_v4i32(<8 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v4i32:
; CHECK-NEXT: movi v[[REG:[0-9]+]].4s, #0xab, lsl #16
; CHECK-NEXT: str q[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <8 x half> <half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB>, <8 x half>* %a
ret void
}
; Test pattern (v8f16 (AArch64NvCast (v8i16 FPR128:$src)))
define void @nvcast_v8i16(<8 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v8i16:
; CHECK-NEXT: movi v[[REG:[0-9]+]].8h, #0xab
; CHECK-NEXT: str q[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <8 x half> <half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB>, <8 x half>* %a
ret void
}
; Test pattern (v8f16 (AArch64NvCast (v16i8 FPR128:$src)))
define void @nvcast_v16i8(<8 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v16i8:
; CHECK-NEXT: movi v[[REG:[0-9]+]].16b, #0xab
; CHECK-NEXT: str q[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <8 x half> <half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB>, <8 x half>* %a
ret void
}
; Test pattern (v8f16 (AArch64NvCast (v2i64 FPR128:$src)))
define void @nvcast_v2i64(<8 x half>* %a) #0 {
; CHECK-LABEL: nvcast_v2i64:
; CHECK-NEXT: movi v[[REG:[0-9]+]].2d, #0000000000000000
; CHECK-NEXT: str q[[REG]], [x0]
; CHECK-NEXT: ret
store volatile <8 x half> zeroinitializer, <8 x half>* %a
ret void
}
attributes #0 = { nounwind }