From e95ed2b7afbe37f1831cb6d8d46d09ccb5cd6b7f Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Thu, 15 Nov 2012 20:56:03 +0000 Subject: [PATCH] PowerPC: Lowering floor intrinsic for Altivec This patch lowers the llvm.floor, llvm.ceil, llvm.trunc, and llvm.nearbyint to Altivec instruction when using 4 single-precision float vectors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168086 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 4 + lib/Target/PowerPC/PPCInstrAltivec.td | 10 ++ test/CodeGen/PowerPC/vec_rounding.ll | 172 +++++++++++++++++++++++++ 3 files changed, 186 insertions(+) create mode 100644 test/CodeGen/PowerPC/vec_rounding.ll diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index d51baa6a576..7d97450676e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -402,6 +402,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index ba58c3e4ac8..87758e90fbd 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -721,3 +721,13 @@ def : Pat<(v4f32 (sint_to_fp (v4i32 VRRC:$vA))), (VCFSX_0 VRRC:$vA)>; def : Pat<(v4f32 (uint_to_fp (v4i32 VRRC:$vA))), (VCFUX_0 VRRC:$vA)>; + +// Floating-point rounding +def : Pat<(v4f32 (ffloor (v4f32 VRRC:$vA))), + (VRFIM VRRC:$vA)>; +def : Pat<(v4f32 (fceil (v4f32 VRRC:$vA))), + (VRFIP VRRC:$vA)>; +def : Pat<(v4f32 (ftrunc (v4f32 VRRC:$vA))), + (VRFIZ VRRC:$vA)>; +def : Pat<(v4f32 (fnearbyint (v4f32 VRRC:$vA))), + (VRFIN VRRC:$vA)>; diff --git a/test/CodeGen/PowerPC/vec_rounding.ll b/test/CodeGen/PowerPC/vec_rounding.ll new file mode 100644 index 00000000000..f41faa0339a --- /dev/null +++ b/test/CodeGen/PowerPC/vec_rounding.ll @@ -0,0 +1,172 @@ +; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s + +; Check vector round to single-precision toward -infinity (vrfim) +; instruction generation using Altivec. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +declare <2 x double> @llvm.floor.v2f64(<2 x double> %p) +define <2 x double> @floor_v2f64(<2 x double> %p) +{ + %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) + ret <2 x double> %t +} +; CHECK: floor_v2f64: +; CHECK: bl floor +; CHECK: bl floor + +declare <4 x double> @llvm.floor.v4f64(<4 x double> %p) +define <4 x double> @floor_v4f64(<4 x double> %p) +{ + %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) + ret <4 x double> %t +} +; CHECK: floor_v4f64: +; CHECK: bl floor +; CHECK: bl floor +; CHECK: bl floor +; CHECK: bl floor + +declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p) +define <2 x double> @ceil_v2f64(<2 x double> %p) +{ + %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) + ret <2 x double> %t +} +; CHECK: ceil_v2f64: +; CHECK: bl ceil +; CHECK: bl ceil + +declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p) +define <4 x double> @ceil_v4f64(<4 x double> %p) +{ + %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) + ret <4 x double> %t +} +; CHECK: ceil_v4f64: +; CHECK: bl ceil +; CHECK: bl ceil +; CHECK: bl ceil +; CHECK: bl ceil + +declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p) +define <2 x double> @trunc_v2f64(<2 x double> %p) +{ + %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) + ret <2 x double> %t +} +; CHECK: trunc_v2f64: +; CHECK: bl trunc +; CHECK: bl trunc + +declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p) +define <4 x double> @trunc_v4f64(<4 x double> %p) +{ + %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) + ret <4 x double> %t +} +; CHECK: trunc_v4f64: +; CHECK: bl trunc +; CHECK: bl trunc +; CHECK: bl trunc +; CHECK: bl trunc + +declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) +define <2 x double> @nearbyint_v2f64(<2 x double> %p) +{ + %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) + ret <2 x double> %t +} +; CHECK: nearbyint_v2f64: +; CHECK: bl nearbyint +; CHECK: bl nearbyint + +declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) +define <4 x double> @nearbyint_v4f64(<4 x double> %p) +{ + %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) + ret <4 x double> %t +} +; CHECK: nearbyint_v4f64: +; CHECK: bl nearbyint +; CHECK: bl nearbyint +; CHECK: bl nearbyint +; CHECK: bl nearbyint + + +declare <4 x float> @llvm.floor.v4f32(<4 x float> %p) +define <4 x float> @floor_v4f32(<4 x float> %p) +{ + %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) + ret <4 x float> %t +} +; CHECK: floor_v4f32: +; CHECK: vrfim + +declare <8 x float> @llvm.floor.v8f32(<8 x float> %p) +define <8 x float> @floor_v8f32(<8 x float> %p) +{ + %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) + ret <8 x float> %t +} +; CHECK: floor_v8f32: +; CHECK: vrfim +; CHECK: vrfim + +declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p) +define <4 x float> @ceil_v4f32(<4 x float> %p) +{ + %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) + ret <4 x float> %t +} +; CHECK: ceil_v4f32: +; CHECK: vrfip + +declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p) +define <8 x float> @ceil_v8f32(<8 x float> %p) +{ + %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) + ret <8 x float> %t +} +; CHECK: ceil_v8f32: +; CHECK: vrfip +; CHECK: vrfip + +declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p) +define <4 x float> @trunc_v4f32(<4 x float> %p) +{ + %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) + ret <4 x float> %t +} +; CHECK: trunc_v4f32: +; CHECK: vrfiz + +declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p) +define <8 x float> @trunc_v8f32(<8 x float> %p) +{ + %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) + ret <8 x float> %t +} +; CHECK: trunc_v8f32: +; CHECK: vrfiz +; CHECK: vrfiz + +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) +define <4 x float> @nearbyint_v4f32(<4 x float> %p) +{ + %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) + ret <4 x float> %t +} +; CHECK: nearbyint_v4f32: +; CHECK: vrfin + +declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) +define <8 x float> @nearbyint_v8f32(<8 x float> %p) +{ + %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) + ret <8 x float> %t +} +; CHECK: nearbyint_v8f32: +; CHECK: vrfin +; CHECK: vrfin