From 3cd73a5dedc7e48b3cde182cd5a33e23d9749365 Mon Sep 17 00:00:00 2001 From: Jiangning Liu Date: Fri, 29 Aug 2014 01:31:42 +0000 Subject: [PATCH] [AArch64] Fix some failures exposed by value type v4f16 and v8f16. 1) Add some missing bitcast patterns for v8f16. 2) Add type promotion for operand of ld/st operations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216706 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 4 +- lib/Target/AArch64/AArch64InstrInfo.td | 9 +++ test/CodeGen/AArch64/aarch64_f16_be.ll | 67 ++++++++++++++++++++++ 3 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/AArch64/aarch64_f16_be.ll diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 60cd78442c0..d3ec172c9d4 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -576,13 +576,13 @@ AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM) } void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { - if (VT == MVT::v2f32) { + if (VT == MVT::v2f32 || VT == MVT::v4f16) { setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i32); setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i32); - } else if (VT == MVT::v2f64 || VT == MVT::v4f32) { + } else if (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16) { setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i64); diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 024624f9476..48f1fc739bf 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -5233,6 +5233,9 @@ def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), (REV64v8i16 FPR128:$src), (i32 8)))>; +def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), + (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), + (REV64v8i16 FPR128:$src), (i32 8)))>; def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), @@ -5247,6 +5250,7 @@ let Predicates = [IsLE] in { def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; } @@ -5258,6 +5262,8 @@ def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 (REV64v4i32 FPR128:$src))>; def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 (REV64v8i16 FPR128:$src))>; +def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), + (v2f64 (REV64v8i16 FPR128:$src))>; def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 (REV64v16i8 FPR128:$src))>; def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), @@ -5268,6 +5274,7 @@ def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; @@ -5278,6 +5285,8 @@ def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (REV64v4i32 FPR128:$src), (i32 8)))>; def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 (REV32v8i16 FPR128:$src))>; +def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), + (v4f32 (REV32v8i16 FPR128:$src))>; def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 (REV32v16i8 FPR128:$src))>; def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), diff --git a/test/CodeGen/AArch64/aarch64_f16_be.ll b/test/CodeGen/AArch64/aarch64_f16_be.ll new file mode 100644 index 00000000000..7504439bab8 --- /dev/null +++ b/test/CodeGen/AArch64/aarch64_f16_be.ll @@ -0,0 +1,67 @@ +; RUN: llc -mtriple=aarch64-linux-gnuabi -O0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-linux-gnuabi -O0 < %s | FileCheck %s --check-prefix=CHECK-BE + +define void @test_bitcast_v8f16_to_v4f32(<8 x half> %a) { +; CHECK-LABEL: test_bitcast_v8f16_to_v4f32: +; CHECK-NOT: st1 + +; CHECK-BE-LABEL: test_bitcast_v8f16_to_v4f32: +; CHECK-BE: st1 + + %x = alloca <4 x float>, align 16 + %y = bitcast <8 x half> %a to <4 x float> + store <4 x float> %y, <4 x float>* %x, align 16 + ret void +} + +define void @test_bitcast_v8f16_to_v2f64(<8 x half> %a) { +; CHECK-LABEL: test_bitcast_v8f16_to_v2f64: +; CHECK-NOT: st1 + +; CHECK-BE-LABEL: test_bitcast_v8f16_to_v2f64: +; CHECK-BE: st1 + + %x = alloca <2 x double>, align 16 + %y = bitcast <8 x half> %a to <2 x double> + store <2 x double> %y, <2 x double>* %x, align 16 + ret void +} + +define void @test_bitcast_v8f16_to_fp128(<8 x half> %a) { +; CHECK-LABEL: test_bitcast_v8f16_to_fp128: +; CHECK-NOT: st1 + +; CHECK-BE-LABEL: test_bitcast_v8f16_to_fp128: +; CHECK-BE: st1 + + %x = alloca fp128, align 16 + %y = bitcast <8 x half> %a to fp128 + store fp128 %y, fp128* %x, align 16 + ret void +} + +define void @test_bitcast_v4f16_to_v2f32(<4 x half> %a) { +; CHECK-LABEL: test_bitcast_v4f16_to_v2f32: +; CHECK-NOT: st1 + +; CHECK-BE-LABEL: test_bitcast_v4f16_to_v2f32: +; CHECK-BE: st1 + + %x = alloca <2 x float>, align 8 + %y = bitcast <4 x half> %a to <2 x float> + store <2 x float> %y, <2 x float>* %x, align 8 + ret void +} + +define void @test_bitcast_v4f16_to_v1f64(<4 x half> %a) { +; CHECK-LABEL: test_bitcast_v4f16_to_v1f64: +; CHECK-NOT: st1 + +; CHECK-BE-LABEL: test_bitcast_v4f16_to_v1f64: +; CHECK-BE: st1 + + %x = alloca <1 x double>, align 8 + %y = bitcast <4 x half> %a to <1 x double> + store <1 x double> %y, <1 x double>* %x, align 8 + ret void +}