Add support for FP_ROUND from v2f64 to v2f32

- Due to the current matching vector elements constraints in
  ISD::FP_ROUND, rounding from v2f64 to v4f32 (after legalization from
  v2f32) is scalarized. Add a customized v2f32 widening to convert it
  into a target-specific X86ISD::VFPROUND to work around this
  constraints.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165631 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael Liao 2012-10-10 16:53:28 +00:00
parent 9d796db3e7
commit 44c2d61b67
6 changed files with 121 additions and 15 deletions

View File

@ -940,6 +940,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
}
@ -11468,6 +11469,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
case ISD::FP_ROUND: {
SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
Results.push_back(V);
return;
}
case ISD::READCYCLECOUNTER: {
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue TheChain = N->getOperand(0);
@ -11662,6 +11668,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";

View File

@ -233,6 +233,9 @@ namespace llvm {
// VFPEXT - Vector FP extend.
VFPEXT,
// VFPROUND - Vector FP round.
VFPROUND,
// VSHL, VSRL - 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,

View File

@ -93,6 +93,9 @@ def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisFP<1>]>>;
def X86vfpround: SDNode<"X86ISD::VFPROUND",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisFP<1>]>>;
def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;

View File

@ -2125,6 +2125,10 @@ let Predicates = [HasAVX] in {
(VCVTDQ2PSYrm addr:$src)>;
// Match fround and fextend for 128/256-bit conversions
def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
(VCVTPD2PSrr VR128:$src)>;
def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
(VCVTPD2PSXrm addr:$src)>;
def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
(VCVTPD2PSYrr VR256:$src)>;
def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
@ -2139,7 +2143,12 @@ let Predicates = [HasAVX] in {
}
let Predicates = [UseSSE2] in {
// Match fextend for 128 conversions
// Match fround and fextend for 128 conversions
def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
(CVTPD2PSrr VR128:$src)>;
def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
(CVTPD2PSrm addr:$src)>;
def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
(CVTPS2PDrr VR128:$src)>;
}

View File

@ -0,0 +1,61 @@
; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s
; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s --check-prefix=AVX
define <1 x float> @test1(<1 x double>* %p) nounwind {
; CHECK: test1
; CHECK: cvtsd2ss
; CHECK: ret
; AVX: test1
; AVX: vcvtsd2ss
; AVX: ret
%x = load <1 x double>* %p
%y = fptrunc <1 x double> %x to <1 x float>
ret <1 x float> %y
}
define <2 x float> @test2(<2 x double>* %p) nounwind {
; CHECK: test2
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
; CHECK: ret
; AVX: test2
; AVX: vcvtpd2psx {{[0-9]*}}(%{{.*}})
; AVX: ret
%x = load <2 x double>* %p
%y = fptrunc <2 x double> %x to <2 x float>
ret <2 x float> %y
}
define <4 x float> @test3(<4 x double>* %p) nounwind {
; CHECK: test3
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
; CHECK: movlhps
; CHECK: ret
; AVX: test3
; AVX: vcvtpd2psy {{[0-9]*}}(%{{.*}})
; AVX: ret
%x = load <4 x double>* %p
%y = fptrunc <4 x double> %x to <4 x float>
ret <4 x float> %y
}
define <8 x float> @test4(<8 x double>* %p) nounwind {
; CHECK: test4
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
; CHECK: movlhps
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
; CHECK: movlhps
; CHECK: ret
; AVX: test4
; AVX: vcvtpd2psy {{[0-9]*}}(%{{.*}})
; AVX: vcvtpd2psy {{[0-9]*}}(%{{.*}})
; AVX: vinsertf128
; AVX: ret
%x = load <8 x double>* %p
%y = fptrunc <8 x double> %x to <8 x float>
ret <8 x float> %y
}

View File

@ -1,33 +1,56 @@
; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s
; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s --check-prefix=AVX
define <1 x float> @test1(<1 x double> %x) nounwind {
; CHECK: test1
; CHECK: cvtsd2ss
; CHECK: ret
; AVX: test1
; AVX: vcvtsd2ss
; AVX: ret
%y = fptrunc <1 x double> %x to <1 x float>
ret <1 x float> %y
}
define <2 x float> @test2(<2 x double> %x) nounwind {
; FIXME: It would be nice if this compiled down to a cvtpd2ps
; CHECK: cvtsd2ss
; CHECK: cvtsd2ss
; CHECK: test2
; CHECK: cvtpd2ps
; CHECK: ret
; AVX: test2
; AVX-NOT: vcvtpd2psy
; AVX: vcvtpd2ps
; AVX: ret
%y = fptrunc <2 x double> %x to <2 x float>
ret <2 x float> %y
}
define <8 x float> @test3(<8 x double> %x) nounwind {
; FIXME: It would be nice if this compiled down to a series of cvtpd2ps
; CHECK: cvtsd2ss
; CHECK: cvtsd2ss
; CHECK: cvtsd2ss
; CHECK: cvtsd2ss
; CHECK: cvtsd2ss
; CHECK: cvtsd2ss
; CHECK: cvtsd2ss
; CHECK: cvtsd2ss
define <4 x float> @test3(<4 x double> %x) nounwind {
; CHECK: test3
; CHECK: cvtpd2ps
; CHECK: cvtpd2ps
; CHECK: movlhps
; CHECK: ret
; AVX: test3
; AVX: vcvtpd2psy
; AVX: ret
%y = fptrunc <4 x double> %x to <4 x float>
ret <4 x float> %y
}
define <8 x float> @test4(<8 x double> %x) nounwind {
; CHECK: test4
; CHECK: cvtpd2ps
; CHECK: cvtpd2ps
; CHECK: movlhps
; CHECK: cvtpd2ps
; CHECK: cvtpd2ps
; CHECK: movlhps
; CHECK: ret
; AVX: test4
; AVX: vcvtpd2psy
; AVX: vcvtpd2psy
; AVX: vinsertf128
; AVX: ret
%y = fptrunc <8 x double> %x to <8 x float>
ret <8 x float> %y
}