fix PR11334

- FP_EXTEND only support extending from vectors with matching elements. This results in the scalarization of extending to v2f64 from v2f32, which will be legalized to v4f32 not matching with v2f64. - add X86-specific VFPEXT supproting extending from v4f32 to v2f64. - add BUILD_VECTOR lowering helper to recover back the original extending from v4f32 to v2f64. - test case is enhanced to include different vector width. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161894 91177308-0d34-0410-b5e6-96231b3b80d8
2025-02-21 21:29:41 +00:00 · 2012-08-14 21:24:47 +00:00 · 2012-08-14 21:24:47 +00:00 · 7091b2451d
commit 7091b2451d
parent fc1a161d76
5 changed files with 155 additions and 0 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -5114,6 +5114,82 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
  return SDValue();
 }
 // LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
 // and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
 // constraint of matching input/output vector elements.
 SDValue
 X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
  DebugLoc DL = Op.getDebugLoc();
  SDNode *N = Op.getNode();
  EVT VT = Op.getValueType();
  unsigned NumElts = Op.getNumOperands();
  // Check supported types and sub-targets.
  //
  // Only v2f32 -> v2f64 needs special handling.
  if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
    return SDValue();
  SDValue VecIn;
  EVT VecInVT;
  SmallVector<int, 8> Mask;
  EVT SrcVT = MVT::Other;
  // Check the patterns could be translated into X86vfpext.
  for (unsigned i = 0; i < NumElts; ++i) {
    SDValue In = N->getOperand(i);
    unsigned Opcode = In.getOpcode();
    // Skip if the element is undefined.
    if (Opcode == ISD::UNDEF) {
      Mask.push_back(-1);
      continue;
    }
    // Quit if one of the elements is not defined from 'fpext'.
    if (Opcode != ISD::FP_EXTEND)
      return SDValue();
    // Check how the source of 'fpext' is defined.
    SDValue L2In = In.getOperand(0);
    EVT L2InVT = L2In.getValueType();
    // Check the original type
    if (SrcVT == MVT::Other)
      SrcVT = L2InVT;
    else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
      return SDValue();
    // Check whether the value being 'fpext'ed is extracted from the same
    // source.
    Opcode = L2In.getOpcode();
    // Quit if it's not extracted with a constant index.
    if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
        !isa<ConstantSDNode>(L2In.getOperand(1)))
      return SDValue();
    SDValue ExtractedFromVec = L2In.getOperand(0);
    if (VecIn.getNode() == 0) {
      VecIn = ExtractedFromVec;
      VecInVT = ExtractedFromVec.getValueType();
    } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
      return SDValue();
    Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue());
  }
  // Fill the remaining mask as undef.
  for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
    Mask.push_back(-1);
  return DAG.getNode(X86ISD::VFPEXT, DL, VT,
                     DAG.getVectorShuffle(VecInVT, DL,
                                          VecIn, DAG.getUNDEF(VecInVT),
                                          &Mask[0]));
 }
 SDValue
 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
  DebugLoc dl = Op.getDebugLoc();
@ -5146,6 +5222,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
  if (Broadcast.getNode())
    return Broadcast;
  SDValue FpExt = LowerVectorFpExtend(Op, DAG);
  if (FpExt.getNode())
    return FpExt;
  unsigned EVTBits = ExtVT.getSizeInBits();
  unsigned NumZero  = 0;
@ -11343,6 +11423,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
  case X86ISD::ATOMNAND64_DAG:     return "X86ISD::ATOMNAND64_DAG";
  case X86ISD::VZEXT_MOVL:         return "X86ISD::VZEXT_MOVL";
  case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
  case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
  case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
  case X86ISD::VSRLDQ:             return "X86ISD::VSRLDQ";
  case X86ISD::VSHL:               return "X86ISD::VSHL";
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@ -227,6 +227,9 @@ namespace llvm {
      // VSEXT_MOVL - Vector move low and sign extend.
      VSEXT_MOVL,
      // VFPEXT - Vector FP extend.
      VFPEXT,
      // VSHL, VSRL - 128-bit vector logical left / right shift
      VSHLDQ, VSRLDQ,
@ -828,6 +831,8 @@ namespace llvm {
    SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
    SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const;
    virtual SDValue
      LowerFormalArguments(SDValue Chain,
                           CallingConv::ID CallConv, bool isVarArg,
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@ -81,6 +81,11 @@ def X86vsmovl  : SDNode<"X86ISD::VSEXT_MOVL",
 def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 def X86vfpext  : SDNode<"X86ISD::VFPEXT",
                        SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
                                             SDTCisFP<0>, SDTCisFP<1>]>>;
 def X86vshldq  : SDNode<"X86ISD::VSHLDQ",    SDTIntShiftOp>;
 def X86vshrdq  : SDNode<"X86ISD::VSRLDQ",    SDTIntShiftOp>;
 def X86cmpp    : SDNode<"X86ISD::CMPP",      SDTX86VFCMP>;
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@ -2101,12 +2101,20 @@ let Predicates = [HasAVX] in {
  def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
            (VCVTPD2PSYrm addr:$src)>;
  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
            (VCVTPS2PDrr VR128:$src)>;
  def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
            (VCVTPS2PDYrr VR128:$src)>;
  def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
            (VCVTPS2PDYrm addr:$src)>;
 }
 let Predicates = [HasSSE2] in {
  // Match fextend for 128 conversions
  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
            (CVTPS2PDrr VR128:$src)>;
 }
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Compare Instructions
 //===----------------------------------------------------------------------===//
--- a/test/CodeGen/X86/pr11334.ll
+++ b/test/CodeGen/X86/pr11334.ll
@ -0,0 +1,56 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
 define <2 x double> @v2f2d_ext_vec(<2 x float> %v1) nounwind {
 entry:
 ; CHECK: v2f2d_ext_vec
 ; CHECK: cvtps2pd
 ; AVX:   v2f2d_ext_vec
 ; AVX:   vcvtps2pd
  %f1 = fpext <2 x float> %v1 to <2 x double>
  ret <2 x double> %f1
 }
 define <3 x double> @v3f2d_ext_vec(<3 x float> %v1) nounwind {
 entry:
 ; CHECK: v3f2d_ext_vec
 ; CHECK: cvtps2pd
 ; CHECK: movhlps
 ; CHECK: cvtps2pd
 ; AVX:   v3f2d_ext_vec
 ; AVX:   vcvtps2pd
 ; AVX:   ret
  %f1 = fpext <3 x float> %v1 to <3 x double>
  ret <3 x double> %f1
 }
 define <4 x double> @v4f2d_ext_vec(<4 x float> %v1) nounwind {
 entry:
 ; CHECK: v4f2d_ext_vec
 ; CHECK: cvtps2pd
 ; CHECK: movhlps
 ; CHECK: cvtps2pd
 ; AVX:   v4f2d_ext_vec
 ; AVX:   vcvtps2pd
 ; AVX:   ret
  %f1 = fpext <4 x float> %v1 to <4 x double>
  ret <4 x double> %f1
 }
 define <8 x double> @v8f2d_ext_vec(<8 x float> %v1) nounwind {
 entry:
 ; CHECK: v8f2d_ext_vec
 ; CHECK: cvtps2pd
 ; CHECK: cvtps2pd
 ; CHECK: movhlps
 ; CHECK: cvtps2pd
 ; CHECK: movhlps
 ; CHECK: cvtps2pd
 ; AVX:   v8f2d_ext_vec
 ; AVX:   vcvtps2pd
 ; AVX:   vextractf128
 ; AVX:   vcvtps2pd
 ; AVX:   ret
  %f1 = fpext <8 x float> %v1 to <8 x double>
  ret <8 x double> %f1
 }