Change handling of illegal vector types to widen when possible instead of

expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112101 91177308-0d34-0410-b5e6-96231b3b80d8
2025-07-02 19:24:25 +00:00 · 2010-08-25 22:49:25 +00:00
parent e6e0018d3e
commit e6f7c267df
5 changed files with 193 additions and 74 deletions
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@ -214,24 +214,59 @@ public:
    /// ValueTypeActions - For each value type, keep a LegalizeAction enum
    /// that indicates how instruction selection should deal with the type.
    uint8_t ValueTypeActions[MVT::LAST_VALUETYPE];
    LegalizeAction getExtendedTypeAction(EVT VT) const {
      // Handle non-vector integers.
      if (!VT.isVector()) {
        assert(VT.isInteger() && "Unsupported extended type!");
        unsigned BitSize = VT.getSizeInBits();
        // First promote to a power-of-two size, then expand if necessary.
        if (BitSize < 8 || !isPowerOf2_32(BitSize))
          return Promote;
        return Expand;
      }
      // If this is a type smaller than a legal vector type, promote to that
      // type, e.g. <2 x float> -> <4 x float>.
      if (VT.getVectorElementType().isSimple() &&
          VT.getVectorNumElements() != 1) {
        MVT EltType = VT.getVectorElementType().getSimpleVT();
        unsigned NumElts = VT.getVectorNumElements();
        while (1) {
          // Round up to the nearest power of 2.
          NumElts = (unsigned)NextPowerOf2(NumElts);
          MVT LargerVector = MVT::getVectorVT(EltType, NumElts);
          if (LargerVector == MVT()) break;
          // If this the larger type is legal, promote to it.
          if (getTypeAction(LargerVector) == Legal) return Promote;
        }
      }
      return VT.isPow2VectorType() ? Expand : Promote;
    }      
  public:
    ValueTypeActionImpl() {
      std::fill(ValueTypeActions, array_endof(ValueTypeActions), 0);
    }
    /// FIXME: This Context argument is now dead, zap it.
    LegalizeAction getTypeAction(LLVMContext &Context, EVT VT) const {
-      if (VT.isExtended()) {
+      return getTypeAction(VT);
        if (VT.isVector()) {
          return VT.isPow2VectorType() ? Expand : Promote;
        }
        if (VT.isInteger())
          // First promote to a power-of-two size, then expand if necessary.
          return VT == VT.getRoundIntegerType(Context) ? Expand : Promote;
        assert(0 && "Unsupported extended type!");
        return Legal;
      }
      unsigned I = VT.getSimpleVT().SimpleTy;
      return (LegalizeAction)ValueTypeActions[I];
    }
    LegalizeAction getTypeAction(EVT VT) const {
      if (!VT.isExtended())
        return getTypeAction(VT.getSimpleVT());
      return getExtendedTypeAction(VT);
    }
    LegalizeAction getTypeAction(MVT VT) const {
      return (LegalizeAction)ValueTypeActions[VT.SimpleTy];
    }
    void setTypeAction(EVT VT, LegalizeAction Action) {
      unsigned I = VT.getSimpleVT().SimpleTy;
      ValueTypeActions[I] = Action;
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -252,8 +252,21 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
  if (PartVT == ValueVT)
    return Val;
-  if (PartVT.isVector())  // Vector/Vector bitcast.
+  if (PartVT.isVector()) {
    // If the element type of the source/dest vectors are the same, but the
    // parts vector has more elements than the value vector, then we have a
    // vector widening case (e.g. <2 x float> -> <4 x float>).  Extract the
    // elements we want.
    if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
      assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
             "Cannot narrow, it would be a lossy transformation");
      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
                         DAG.getIntPtrConstant(0));
    }                                      
    // Vector/Vector bitcast.
    return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
  }
  assert(ValueVT.getVectorElementType() == PartVT &&
         ValueVT.getVectorNumElements() == 1 &&
@ -392,16 +405,39 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  if (NumParts == 1) {
-    if (PartVT != ValueVT) {
+    if (PartVT == ValueVT) {
-      if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+      // Nothing to do.
-        Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+    } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
-      } else {
+      // Bitconvert vector->vector case.
-        assert(ValueVT.getVectorElementType() == PartVT &&
+      Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
-               ValueVT.getVectorNumElements() == 1 &&
+    } else if (PartVT.isVector() &&
-               "Only trivial vector-to-scalar conversions should get here!");
+               PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
-        Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+               PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
-                          PartVT, Val, DAG.getIntPtrConstant(0));
+      EVT ElementVT = PartVT.getVectorElementType();
-      }
+      // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
      // undef elements.
      SmallVector<SDValue, 16> Ops;
      for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                                  ElementVT, Val, DAG.getIntPtrConstant(i)));
      for (unsigned i = ValueVT.getVectorNumElements(),
           e = PartVT.getVectorNumElements(); i != e; ++i)
        Ops.push_back(DAG.getUNDEF(ElementVT));
      Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
      // FIXME: Use CONCAT for 2x -> 4x.
      //SDValue UndefElts = DAG.getUNDEF(VectorTy);
      //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
    } else {
      // Vector -> scalar conversion.
      assert(ValueVT.getVectorElementType() == PartVT &&
             ValueVT.getVectorNumElements() == 1 &&
             "Only trivial vector-to-scalar conversions should get here!");
      Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                        PartVT, Val, DAG.getIntPtrConstant(0));
    }
    Parts[0] = Val;
@ -428,8 +464,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
                   DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
    else
      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-                           IntermediateVT, Val,
+                           IntermediateVT, Val, DAG.getIntPtrConstant(i));
                           DAG.getIntPtrConstant(i));
  }
  // Split the intermediate operands into legal parts.
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@ -697,6 +697,7 @@ TargetLowering::findRepresentativeClass(EVT VT) const {
  return std::make_pair(BestRC, 1);
 }
 /// computeRegisterProperties - Once all of the register classes are added,
 /// this allows us to compute derived properties we expose.
 void TargetLowering::computeRegisterProperties() {
@ -782,6 +783,28 @@ void TargetLowering::computeRegisterProperties() {
    MVT VT = (MVT::SimpleValueType)i;
    if (isTypeLegal(VT)) continue;
    // Determine if there is a legal wider type.  If so, we should promote to
    // that wider vector type.
    EVT EltVT = VT.getVectorElementType();
    unsigned NElts = VT.getVectorNumElements();
    if (NElts != 1) {
      bool IsLegalWiderType = false;
      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
        EVT SVT = (MVT::SimpleValueType)nVT;
        if (SVT.getVectorElementType() == EltVT &&
            SVT.getVectorNumElements() > NElts && 
            isTypeSynthesizable(SVT)) {
          TransformToType[i] = SVT;
          RegisterTypeForVT[i] = SVT;
          NumRegistersForVT[i] = 1;
          ValueTypeActions.setTypeAction(VT, Promote);
          IsLegalWiderType = true;
          break;
        }
      }
      if (IsLegalWiderType) continue;
    }
    MVT IntermediateVT;
    EVT RegisterVT;
    unsigned NumIntermediates;
@ -790,30 +813,14 @@ void TargetLowering::computeRegisterProperties() {
                                RegisterVT, this);
    RegisterTypeForVT[i] = RegisterVT;
-    // Determine if there is a legal wider type.
+    EVT NVT = VT.getPow2VectorType();
-    bool IsLegalWiderType = false;
+    if (NVT == VT) {
-    EVT EltVT = VT.getVectorElementType();
+      // Type is already a power of 2.  The default action is to split.
-    unsigned NElts = VT.getVectorNumElements();
+      TransformToType[i] = MVT::Other;
-    for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+      ValueTypeActions.setTypeAction(VT, Expand);
-      EVT SVT = (MVT::SimpleValueType)nVT;
+    } else {
-      if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
+      TransformToType[i] = NVT;
-          SVT.getVectorNumElements() > NElts && NElts != 1) {
+      ValueTypeActions.setTypeAction(VT, Promote);
        TransformToType[i] = SVT;
        ValueTypeActions.setTypeAction(VT, Promote);
        IsLegalWiderType = true;
        break;
      }
    }
    if (!IsLegalWiderType) {
      EVT NVT = VT.getPow2VectorType();
      if (NVT == VT) {
        // Type is already a power of 2.  The default action is to split.
        TransformToType[i] = MVT::Other;
        ValueTypeActions.setTypeAction(VT, Expand);
      } else {
        TransformToType[i] = NVT;
        ValueTypeActions.setTypeAction(VT, Promote);
      }
    }
  }
@ -857,8 +864,21 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
                                                EVT &IntermediateVT,
                                                unsigned &NumIntermediates,
                                                EVT &RegisterVT) const {
  // Figure out the right, legal destination reg to copy into.
  unsigned NumElts = VT.getVectorNumElements();
  // If there is a wider vector type with the same element type as this one,
  // we should widen to that legal vector type.  This handles things like
  // <2 x float> -> <4 x float>.
  if (NumElts != 1 && getTypeAction(Context, VT) == Promote) {
    RegisterVT = getTypeToTransformTo(Context, VT);
    if (isTypeLegal(RegisterVT)) {
      IntermediateVT = RegisterVT;
      NumIntermediates = 1;
      return 1;
    }
  }
  // Figure out the right, legal destination reg to copy into.
  EVT EltTy = VT.getVectorElementType();
  unsigned NumVectorRegs = 1;
@ -887,16 +907,12 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
  EVT DestVT = getRegisterType(Context, NewVT);
  RegisterVT = DestVT;
-  if (DestVT.bitsLT(NewVT)) {
+  if (DestVT.bitsLT(NewVT))   // Value is expanded, e.g. i64 -> i16.
    // Value is expanded, e.g. i64 -> i16.
    return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
  } else {
    // Otherwise, promotion or legal types use the same number of registers as
    // the vector decimated to the appropriate level.
    return NumVectorRegs;
  }
-  return 1;
+  // Otherwise, promotion or legal types use the same number of registers as
  // the vector decimated to the appropriate level.
  return NumVectorRegs;
 }
 /// Get the EVTs and ArgFlags collections that represent the legalized return 
--- a/test/CodeGen/X86/v2f32.ll
+++ b/test/CodeGen/X86/v2f32.ll
@ -10,15 +10,16 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind {
  store float %c, float* %P2
  ret void
 ; X64: test1:
-; X64-NEXT: addss	%xmm1, %xmm0
+; X64-NEXT: pshufd	$1, %xmm0, %xmm1
-; X64-NEXT: movss	%xmm0, (%rdi)
+; X64-NEXT: addss	%xmm0, %xmm1
 ; X64-NEXT: movss	%xmm1, (%rdi)
 ; X64-NEXT: ret
 ; X32: test1:
-; X32-NEXT: movss	4(%esp), %xmm0
+; X32-NEXT: pshufd	$1, %xmm0, %xmm1
-; X32-NEXT: addss	8(%esp), %xmm0
+; X32-NEXT: addss	%xmm0, %xmm1
-; X32-NEXT: movl	12(%esp), %eax
+; X32-NEXT: movl	4(%esp), %eax
-; X32-NEXT: movss	%xmm0, (%eax)
+; X32-NEXT: movss	%xmm1, (%eax)
 ; X32-NEXT: ret
 }
@ -28,12 +29,42 @@ define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounw
  ret <2 x float> %Z
 ; X64: test2:
-; X64-NEXT: insertps $0
+; X64-NEXT: addps	%xmm1, %xmm0
 ; X64-NEXT: insertps $16
 ; X64-NEXT: insertps $0
 ; X64-NEXT: insertps $16
 ; X64-NEXT: addps
 ; X64-NEXT: movaps
 ; X64-NEXT: pshufd
 ; X64-NEXT: ret
 }
 define <2 x float> @test3(<4 x float> %A) nounwind {
 	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
 	%C = fadd <2 x float> %B, %B
 	ret <2 x float> %C
 ; CHECK: test3:
 ; CHECK-NEXT: 	addps	%xmm0, %xmm0
 ; CHECK-NEXT: 	ret
 }
 define <2 x float> @test4(<2 x float> %A) nounwind {
 	%C = fadd <2 x float> %A, %A
 	ret <2 x float> %C
 ; CHECK: test4:
 ; CHECK-NEXT: 	addps	%xmm0, %xmm0
 ; CHECK-NEXT: 	ret
 }
 define <4 x float> @test5(<4 x float> %A) nounwind {
 	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
 	%C = fadd <2 x float> %B, %B
        br label %BB
 BB:
        %D = fadd <2 x float> %C, %C
 	%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 	ret <4 x float> %E
 ; CHECK: _test5:
 ; CHECK-NEXT: 	addps	%xmm0, %xmm0
 ; CHECK-NEXT: 	addps	%xmm0, %xmm0
 ; CHECK-NEXT: 	ret
 }
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@ -3,7 +3,8 @@
 ; widening shuffle v3float and then a add
 define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
 entry:
-; CHECK: insertps
+; CHECK: shuf:
 ; CHECK: extractps
 ; CHECK: extractps
 	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
 	%val = fadd <3 x float> %x, %src2
@ -15,7 +16,8 @@ entry:
 ; widening shuffle v3float with a different mask and then a add
 define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
 entry:
-; CHECK: insertps
+; CHECK: shuf2:
 ; CHECK: extractps
 ; CHECK: extractps
 	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
 	%val = fadd <3 x float> %x, %src2
@ -26,7 +28,7 @@ entry:
 ; Example of when widening a v3float operation causes the DAG to replace a node
 ; with the operation that we are currently widening, i.e. when replacing
 ; opA with opB, the DAG will produce new operations with opA.
-define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) {
+define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
 entry:
 ; CHECK: pshufd
  %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>