Use roundps/pd for llvm.ceil, llvm.trunc, llvm.rint, and llvm.nearbyint of vector types.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168141 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2012-11-16 06:37:56 +00:00
parent 06be8b8a69
commit d577552c66
3 changed files with 212 additions and 0 deletions

View File

@ -978,7 +978,15 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
@ -1058,6 +1066,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v8f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal);
setOperationAction(ISD::FRINT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
setOperationAction(ISD::FABS, MVT::v8f32, Custom);
@ -1067,6 +1079,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
setOperationAction(ISD::FRINT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
setOperationAction(ISD::FABS, MVT::v4f64, Custom);

View File

@ -6378,12 +6378,47 @@ let Predicates = [HasAVX] in {
def : Pat<(v4f32 (ffloor VR128:$src)),
(VROUNDPSr VR128:$src, (i32 0x1))>;
def : Pat<(v4f32 (fnearbyint VR128:$src)),
(VROUNDPSr VR128:$src, (i32 0xC))>;
def : Pat<(v4f32 (fceil VR128:$src)),
(VROUNDPSr VR128:$src, (i32 0x2))>;
def : Pat<(v4f32 (frint VR128:$src)),
(VROUNDPSr VR128:$src, (i32 0x4))>;
def : Pat<(v4f32 (ftrunc VR128:$src)),
(VROUNDPSr VR128:$src, (i32 0x3))>;
def : Pat<(v2f64 (ffloor VR128:$src)),
(VROUNDPDr VR128:$src, (i32 0x1))>;
def : Pat<(v2f64 (fnearbyint VR128:$src)),
(VROUNDPDr VR128:$src, (i32 0xC))>;
def : Pat<(v2f64 (fceil VR128:$src)),
(VROUNDPDr VR128:$src, (i32 0x2))>;
def : Pat<(v2f64 (frint VR128:$src)),
(VROUNDPDr VR128:$src, (i32 0x4))>;
def : Pat<(v2f64 (ftrunc VR128:$src)),
(VROUNDPDr VR128:$src, (i32 0x3))>;
def : Pat<(v8f32 (ffloor VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0x1))>;
def : Pat<(v8f32 (fnearbyint VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0xC))>;
def : Pat<(v8f32 (fceil VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0x2))>;
def : Pat<(v8f32 (frint VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0x4))>;
def : Pat<(v8f32 (ftrunc VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0x3))>;
def : Pat<(v4f64 (ffloor VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0x1))>;
def : Pat<(v4f64 (fnearbyint VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0xC))>;
def : Pat<(v4f64 (fceil VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0x2))>;
def : Pat<(v4f64 (frint VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0x4))>;
def : Pat<(v4f64 (ftrunc VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0x3))>;
}
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@ -6417,8 +6452,25 @@ let Predicates = [UseSSE41] in {
def : Pat<(v4f32 (ffloor VR128:$src)),
(ROUNDPSr VR128:$src, (i32 0x1))>;
def : Pat<(v4f32 (fnearbyint VR128:$src)),
(ROUNDPSr VR128:$src, (i32 0xC))>;
def : Pat<(v4f32 (fceil VR128:$src)),
(ROUNDPSr VR128:$src, (i32 0x2))>;
def : Pat<(v4f32 (frint VR128:$src)),
(ROUNDPSr VR128:$src, (i32 0x4))>;
def : Pat<(v4f32 (ftrunc VR128:$src)),
(ROUNDPSr VR128:$src, (i32 0x3))>;
def : Pat<(v2f64 (ffloor VR128:$src)),
(ROUNDPDr VR128:$src, (i32 0x1))>;
def : Pat<(v2f64 (fnearbyint VR128:$src)),
(ROUNDPDr VR128:$src, (i32 0xC))>;
def : Pat<(v2f64 (fceil VR128:$src)),
(ROUNDPDr VR128:$src, (i32 0x2))>;
def : Pat<(v2f64 (frint VR128:$src)),
(ROUNDPDr VR128:$src, (i32 0x4))>;
def : Pat<(v2f64 (ftrunc VR128:$src)),
(ROUNDPDr VR128:$src, (i32 0x3))>;
}
//===----------------------------------------------------------------------===//

View File

@ -36,3 +36,147 @@ define <8 x float> @floor_v8f32(<8 x float> %p)
ret <8 x float> %t
}
declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
define <2 x double> @ceil_v2f64(<2 x double> %p)
{
; CHECK: ceil_v2f64
; CHECK: vroundpd
%t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
ret <2 x double> %t
}
declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
define <4 x float> @ceil_v4f32(<4 x float> %p)
{
; CHECK: ceil_v4f32
; CHECK: vroundps
%t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
ret <4 x float> %t
}
declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
define <4 x double> @ceil_v4f64(<4 x double> %p)
{
; CHECK: ceil_v4f64
; CHECK: vroundpd
%t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
ret <4 x double> %t
}
declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
define <8 x float> @ceil_v8f32(<8 x float> %p)
{
; CHECK: ceil_v8f32
; CHECK: vroundps
%t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
ret <8 x float> %t
}
declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
define <2 x double> @trunc_v2f64(<2 x double> %p)
{
; CHECK: trunc_v2f64
; CHECK: vroundpd
%t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
ret <2 x double> %t
}
declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
define <4 x float> @trunc_v4f32(<4 x float> %p)
{
; CHECK: trunc_v4f32
; CHECK: vroundps
%t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
ret <4 x float> %t
}
declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
define <4 x double> @trunc_v4f64(<4 x double> %p)
{
; CHECK: trunc_v4f64
; CHECK: vroundpd
%t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
ret <4 x double> %t
}
declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
define <8 x float> @trunc_v8f32(<8 x float> %p)
{
; CHECK: trunc_v8f32
; CHECK: vroundps
%t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
ret <8 x float> %t
}
declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
define <2 x double> @rint_v2f64(<2 x double> %p)
{
; CHECK: rint_v2f64
; CHECK: vroundpd
%t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
ret <2 x double> %t
}
declare <2 x double> @llvm.rint.v2f64(<2 x double> %p)
define <4 x float> @rint_v4f32(<4 x float> %p)
{
; CHECK: rint_v4f32
; CHECK: vroundps
%t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
ret <4 x float> %t
}
declare <4 x float> @llvm.rint.v4f32(<4 x float> %p)
define <4 x double> @rint_v4f64(<4 x double> %p)
{
; CHECK: rint_v4f64
; CHECK: vroundpd
%t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
ret <4 x double> %t
}
declare <4 x double> @llvm.rint.v4f64(<4 x double> %p)
define <8 x float> @rint_v8f32(<8 x float> %p)
{
; CHECK: rint_v8f32
; CHECK: vroundps
%t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
ret <8 x float> %t
}
declare <8 x float> @llvm.rint.v8f32(<8 x float> %p)
define <2 x double> @nearbyint_v2f64(<2 x double> %p)
{
; CHECK: nearbyint_v2f64
; CHECK: vroundpd
%t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
ret <2 x double> %t
}
declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
define <4 x float> @nearbyint_v4f32(<4 x float> %p)
{
; CHECK: nearbyint_v4f32
; CHECK: vroundps
%t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
ret <4 x float> %t
}
declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
define <4 x double> @nearbyint_v4f64(<4 x double> %p)
{
; CHECK: nearbyint_v4f64
; CHECK: vroundpd
%t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
ret <4 x double> %t
}
declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
define <8 x float> @nearbyint_v8f32(<8 x float> %p)
{
; CHECK: nearbyint_v8f32
; CHECK: vroundps
%t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
ret <8 x float> %t
}
declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)