Added support for the following definition of shufflevector

<result> = shufflevector <n x <ty>> <v1>, <n x <ty>> <v2>, <m x i32> <mask> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@58964 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-05 17:39:16 +00:00 · 2008-11-10 04:46:22 +00:00 · 2008-11-10 04:46:22 +00:00 · aeb06d2462
commit aeb06d2462
parent a64f463fb9
12 changed files with 315 additions and 77 deletions
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@ -2932,23 +2932,25 @@ exceeds the length of <tt>val</tt>, the results are undefined.
 <h5>Syntax:</h5>

 <pre>
-  &lt;result&gt; = shufflevector &lt;n x &lt;ty&gt;&gt; &lt;v1&gt;, &lt;n x &lt;ty&gt;&gt; &lt;v2&gt;, &lt;n x i32&gt; &lt;mask&gt;    <i>; yields &lt;n x &lt;ty&gt;&gt;</i>
+  &lt;result&gt; = shufflevector &lt;n x &lt;ty&gt;&gt; &lt;v1&gt;, &lt;n x &lt;ty&gt;&gt; &lt;v2&gt;, &lt;m x i32&gt; &lt;mask&gt;    <i>; yields &lt;m x &lt;ty&gt;&gt;</i>
 </pre>

 <h5>Overview:</h5>

 <p>
 The '<tt>shufflevector</tt>' instruction constructs a permutation of elements
-from two input vectors, returning a vector of the same type.
+from two input vectors, returning a vector with the same element type as
+the input and length that is the same as the shuffle mask.
 </p>

 <h5>Arguments:</h5>

 <p>
-The first two operands of a '<tt>shufflevector</tt>' instruction are vectors
-with types that match each other and types that match the result of the
-instruction.  The third argument is a shuffle mask, which has the same number
-of elements as the other vector type, but whose element type is always 'i32'.
+The first two operands of a '<tt>shufflevector</tt>' instruction are vectors 
+with types that match each other. The third argument is a shuffle mask whose
+element type is always 'i32'.  The result of the instruction is a vector whose
+length is the same as the shuffle mask and whose element type is the same as
+the element type of the first two operands.
 </p>

 <p>
@ -2961,7 +2963,7 @@ constant integer or undef values.
 <p>
 The elements of the two input vectors are numbered from left to right across
 both of the vectors.  The shuffle mask operand specifies, for each element of
-the result vector, which element of the two input registers the result element
+the result vector, which element of the two input vectors the result element
 gets.  The element selector may be undef (meaning "don't care") and the second
 operand may be undef if performing a shuffle from only one vector.
 </p>
@ -2973,6 +2975,10 @@ operand may be undef if performing a shuffle from only one vector.
                          &lt;4 x i32&gt; &lt;i32 0, i32 4, i32 1, i32 5&gt;  <i>; yields &lt;4 x i32&gt;</i>
  %result = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; undef, 
                          &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3&gt;  <i>; yields &lt;4 x i32&gt;</i> - Identity shuffle.
+  %result = shufflevector &lt;8 x i32&gt; %v1, &lt;8 x i32&gt; undef, 
+                          &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3&gt;  <i>; yields &lt;4 x i32&gt;</i>
+  %result = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2, 
+                          &lt;8 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 &gt;  <i>; yields &lt;8 x i32&gt;</i>
 </pre>
 </div>

--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@ -1592,16 +1592,12 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
          getValue(Record, OpNum, Vec1->getType(), Vec2))
        return Error("Invalid SHUFFLEVEC record");

-      const Type *MaskTy =
-        VectorType::get(Type::Int32Ty, 
-                        cast<VectorType>(Vec1->getType())->getNumElements());
-
-      if (getValue(Record, OpNum, MaskTy, Mask))
+      if (getValueTypePair(Record, OpNum, NextValueNo, Mask))
        return Error("Invalid SHUFFLEVEC record");
      I = new ShuffleVectorInst(Vec1, Vec2, Mask);
      break;
    }
-      
+
    case bitc::FUNC_CODE_INST_CMP: { // CMP: [opty, opval, opval, pred]
      // VFCmp/VICmp
      // or old form of ICmp/FCmp returning bool
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -4997,6 +4997,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
  SDValue ShufMask = N->getOperand(2);
  unsigned NumElts = ShufMask.getNumOperands();

+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  assert(N0.getValueType().getVectorNumElements() == NumElts &&
+        "Vector shuffle must be normalized in DAG");
+
  // If the shuffle mask is an identity operation on the LHS, return the LHS.
  bool isIdentity = true;
  for (unsigned i = 0; i != NumElts; ++i) {
@ -5043,8 +5049,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
      }
    }

-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
  // Normalize unary shuffle so the RHS is undef.
  if (isUnary && VecNum == 1)
    std::swap(N0, N1);
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@ -3099,7 +3099,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
    }

    Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
-    
+
    switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
    default: assert(0 && "BinOp legalize operation not supported");
    case TargetLowering::Legal: break;
@ -7210,16 +7210,34 @@ void SelectionDAGLegalize::SplitVectorOp(SDValue Op, SDValue &Lo,
      Lo = Node->getOperand(0);
      Hi = Node->getOperand(1);
    } else {
-      SmallVector<SDValue, 8> LoOps(Node->op_begin(), 
-                                      Node->op_begin()+NewNumSubvectors);
+      SmallVector<SDValue, 8> LoOps(Node->op_begin(),
+                                    Node->op_begin()+NewNumSubvectors);
      Lo = DAG.getNode(ISD::CONCAT_VECTORS, NewVT_Lo, &LoOps[0], LoOps.size());

-      SmallVector<SDValue, 8> HiOps(Node->op_begin()+NewNumSubvectors, 
+      SmallVector<SDValue, 8> HiOps(Node->op_begin()+NewNumSubvectors,
                                      Node->op_end());
      Hi = DAG.getNode(ISD::CONCAT_VECTORS, NewVT_Hi, &HiOps[0], HiOps.size());
    }
    break;
  }
+  case ISD::EXTRACT_SUBVECTOR: {
+    SDValue Vec = Op.getOperand(0);
+    SDValue Idx = Op.getOperand(1);
+    MVT     IdxVT = Idx.getValueType();
+
+    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, NewVT_Lo, Vec, Idx);
+    ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);
+    if (CIdx) {
+      Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, NewVT_Hi, Vec, 
+                       DAG.getConstant(CIdx->getZExtValue() + NewNumElts_Lo,
+                                       IdxVT));
+    } else {
+      Idx = DAG.getNode(ISD::ADD, IdxVT, Idx,
+                        DAG.getConstant(NewNumElts_Lo, IdxVT));
+      Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, NewVT_Hi, Vec, Idx);
+    }
+    break;
+  }
  case ISD::SELECT: {
    SDValue Cond = Node->getOperand(0);

@ -7517,7 +7535,7 @@ SDValue SelectionDAGLegalize::ScalarizeVectorOp(SDValue Op) {
  }
  case ISD::EXTRACT_SUBVECTOR:
    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, NewVT, Node->getOperand(0),
-                          Node->getOperand(1));
+                         Node->getOperand(1));
    break;
  case ISD::BIT_CONVERT: {
    SDValue Op0 = Op.getOperand(0);
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@ -476,6 +476,7 @@ private:
  void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
  void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
  void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
  void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
  void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
  void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@ -336,10 +336,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
  case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
  case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;

-  case ISD::BIT_CONVERT:    SplitVecRes_BIT_CONVERT(N, Lo, Hi); break;
-  case ISD::BUILD_VECTOR:   SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
-  case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
-  case ISD::FPOWI:          SplitVecRes_FPOWI(N, Lo, Hi); break;
+  case ISD::BIT_CONVERT:       SplitVecRes_BIT_CONVERT(N, Lo, Hi); break;
+  case ISD::BUILD_VECTOR:      SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
+  case ISD::CONCAT_VECTORS:    SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
+  case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+  case ISD::FPOWI:             SplitVecRes_FPOWI(N, Lo, Hi); break;
  case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
  case ISD::LOAD:           SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);break;
  case ISD::VECTOR_SHUFFLE: SplitVecRes_VECTOR_SHUFFLE(N, Lo, Hi); break;
@ -486,6 +487,32 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
  Hi = DAG.getNode(ISD::CONCAT_VECTORS, HiVT, &HiOps[0], HiOps.size());
 }

+void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
+                                                     SDValue &Hi) {
+  MVT LoVT, HiVT;
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+  unsigned LoNumElts = LoVT.getVectorNumElements();
+
+  SDValue Vec = N->getOperand(0);
+  SDValue Idx = N->getOperand(1);
+  MVT     IdxVT = Idx.getValueType();
+  Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, LoVT, Vec, Idx);
+
+  ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);
+  if (CIdx) {
+    unsigned IdxVal = CIdx->getZExtValue();
+    assert (IdxVal % LoVT.getVectorNumElements() == 0 &&
+           (IdxVal+LoNumElts) % HiVT.getVectorNumElements()==0 &&
+            "Index must be a multiple of the result type");
+    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, HiVT, Vec,
+                     DAG.getConstant(IdxVal + LoNumElts, IdxVT));
+  } else {
+    assert(LoVT == HiVT && "Low and High value type should be the same");
+    Idx = DAG.getNode(ISD::ADD, IdxVT, Idx, DAG.getConstant(LoNumElts, IdxVT));
+    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, HiVT, Vec, Idx);
+  }
+}
+
 void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
                                         SDValue &Hi) {
  GetSplitVector(N->getOperand(0), Lo, Hi);
@ -631,14 +658,19 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
  Ops.clear();

  for (unsigned i = LoNumElts; i != NumElements; ++i) {
-    unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
-    SDValue InVec = N->getOperand(0);
-    if (Idx >= NumElements) {
-      InVec = N->getOperand(1);
-      Idx -= NumElements;
+    SDValue Arg = Mask.getOperand(i);
+    if (Arg.getOpcode() == ISD::UNDEF) {
+      Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
+    } else {
+      unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
+      SDValue InVec = N->getOperand(0);
+      if (Idx >= NumElements) {
+        InVec = N->getOperand(1);
+        Idx -= NumElements;
+      }
+      Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, InVec,
+                                DAG.getIntPtrConstant(Idx)));
    }
-    Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, InVec,
-                              DAG.getIntPtrConstant(Idx)));
  }
  Hi = DAG.getNode(ISD::BUILD_VECTOR, HiVT, &Ops[0], Ops.size());
 }
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -2697,7 +2697,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, MVT VT,
    }
    break;
  case ISD::VECTOR_SHUFFLE:
-    assert(VT == N1.getValueType() && VT == N2.getValueType() &&
+    assert(N1.getValueType() == N2.getValueType() &&
+           N1.getValueType().isVector() &&
           VT.isVector() && N3.getValueType().isVector() &&
           N3.getOpcode() == ISD::BUILD_VECTOR &&
           VT.getVectorNumElements() == N3.getNumOperands() &&
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@ -2288,14 +2288,180 @@ void SelectionDAGLowering::visitExtractElement(User &I) {
                           TLI.getValueType(I.getType()), InVec, InIdx));
 }

+
+// Utility for visitShuffleVector - Returns true if the mask is mask starting
+// from SIndx and increasing to the element length (undefs are allowed).
+static bool SequentialMask(SDValue Mask, unsigned SIndx) {
+  unsigned NumElems = Mask.getNumOperands();
+  for (unsigned i = 0; i != NumElems; ++i) {
+    if (Mask.getOperand(i).getOpcode() != ISD::UNDEF) {
+      unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
+      if (Idx != i + SIndx)
+        return false;
+    }
+  }
+  return true;
+}
+
 void SelectionDAGLowering::visitShuffleVector(User &I) {
  SDValue V1   = getValue(I.getOperand(0));
  SDValue V2   = getValue(I.getOperand(1));
  SDValue Mask = getValue(I.getOperand(2));

-  setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE,
-                           TLI.getValueType(I.getType()),
-                           V1, V2, Mask));
+  MVT VT = TLI.getValueType(I.getType());
+  MVT VT1 = V1.getValueType();
+  unsigned MaskNumElts = Mask.getNumOperands();
+  unsigned Src1NumElts = VT1.getVectorNumElements();
+
+  if (Src1NumElts == MaskNumElts) {
+    setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask));
+    return;
+  }
+
+  // Normalize the shuffle vector since mask and vector length don't match.
+  if (Src1NumElts < MaskNumElts && MaskNumElts % Src1NumElts == 0) {
+    // We can concat vectors to make the mask and input vector match.
+    if (Src1NumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
+      // The shuffle is concatenating two vectors.
+      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, VT, V1, V2));
+      return;
+    }
+
+    // Pad both vectors with undefs to the same size as the mask.
+    unsigned NumConcat = MaskNumElts / Src1NumElts;
+    std::vector<SDValue> UnOps(Src1NumElts,
+                               DAG.getNode(ISD::UNDEF, 
+                                           VT1.getVectorElementType()));
+    SDValue UndefVal = DAG.getNode(ISD::BUILD_VECTOR, VT1,
+                                   &UnOps[0], UnOps.size());
+
+    SmallVector<SDValue, 8> MOps1, MOps2;
+    MOps1.push_back(V1);
+    MOps2.push_back(V2);
+    for (unsigned i = 1; i != NumConcat; ++i) {
+      MOps1.push_back(UndefVal);
+      MOps2.push_back(UndefVal);
+    }
+    V1 = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps1[0], MOps1.size());
+    V2 = DAG.getNode(ISD::CONCAT_VECTORS, VT, &MOps2[0], MOps2.size());
+    
+    // Readjust mask for new input vector length.
+    SmallVector<SDValue, 8> MappedOps;
+    for (unsigned i = 0; i != MaskNumElts; ++i) {
+      if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) {
+        MappedOps.push_back(Mask.getOperand(i));
+      } else {
+        unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
+        if (Idx < Src1NumElts) {
+          MappedOps.push_back(DAG.getConstant(Idx,
+                                           Mask.getOperand(i).getValueType()));
+        } else {
+          MappedOps.push_back(DAG.getConstant(Idx + MaskNumElts - Src1NumElts,
+                                           Mask.getOperand(i).getValueType()));
+        } 
+      }
+    }
+    Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
+                       &MappedOps[0], MappedOps.size());
+
+    setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask));
+    return;
+  }
+
+  if (Src1NumElts > MaskNumElts) {
+    // Resulting vector is shorter than the incoming vector.
+    if (Src1NumElts == MaskNumElts && SequentialMask(Mask,0)) {
+      // Shuffle extracts 1st vector.
+      setValue(&I, V1);
+      return;
+    }
+
+    if (Src1NumElts == MaskNumElts && SequentialMask(Mask,MaskNumElts)) {
+      // Shuffle extracts 2nd vector.
+      setValue(&I, V2);
+      return;
+    }
+
+    // Analyze the access pattern of the vector to see if we can extract each
+    // subvector and then do the shuffle. The analysis is done by calculating
+    // the range of elements the mask access on both vectors. If it is useful,
+    // we could do better by considering separate what elements are accessed
+    // in each vector (i.e., have min/max for each vector).
+    int MinRange = Src1NumElts+1;
+    int MaxRange = -1;
+    for (unsigned i = 0; i != MaskNumElts; ++i) {
+      SDValue Arg = Mask.getOperand(i);
+      if (Arg.getOpcode() != ISD::UNDEF) {
+        assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+        int Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
+        if (Idx > (int) Src1NumElts)
+          Idx -= Src1NumElts;
+        if (Idx > MaxRange)
+          MaxRange = Idx;
+        if (Idx < MinRange)
+          MinRange = Idx;
+      }
+    }
+    // Adjust MinRange to start at an even boundary since this give us
+    // better quality splits later.
+    if ((unsigned) MinRange < Src1NumElts && MinRange%2 != 0)
+      MinRange = MinRange - 1;
+    if (MaxRange - MinRange < (int) MaskNumElts) {
+      // Extract subvector because the range is less than the new vector length
+      unsigned StartIdx = (MinRange/MaskNumElts)*MaskNumElts;
+      if (MaxRange - StartIdx < MaskNumElts) {
+        V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, VT, V1,
+                         DAG.getIntPtrConstant(MinRange));
+        V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, VT, V2,
+                         DAG.getIntPtrConstant(MinRange));
+        // Readjust mask for new input vector length.
+        SmallVector<SDValue, 8> MappedOps;
+        for (unsigned i = 0; i != MaskNumElts; ++i) {
+          if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) {
+            MappedOps.push_back(Mask.getOperand(i));
+          } else {
+            unsigned Idx =
+              cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
+            if (Idx < Src1NumElts) {
+              MappedOps.push_back(DAG.getConstant(Idx - StartIdx,
+                                         Mask.getOperand(i).getValueType()));
+            } else {
+              Idx = Idx - Src1NumElts - StartIdx + MaskNumElts;
+              MappedOps.push_back(DAG.getConstant(Idx,
+                                        Mask.getOperand(i).getValueType()));
+            } 
+          }
+        }
+        Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
+                           &MappedOps[0], MappedOps.size());
+
+        setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask));
+        return;
+      }
+    }
+  }
+
+  // We can't use either concat vectors or extract subvectors so we fall back
+  // to insert and extracts.
+  MVT EltVT = VT.getVectorElementType();
+  MVT PtrVT = TLI.getPointerTy();
+  SmallVector<SDValue,8> Ops;
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    SDValue Arg = Mask.getOperand(i);
+    if (Arg.getOpcode() == ISD::UNDEF) {
+      Ops.push_back(DAG.getNode(ISD::UNDEF, EltVT));
+    } else {
+      assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+      unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
+      if (Idx < Src1NumElts)
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, V1,
+                                  DAG.getConstant(Idx, PtrVT)));
+      else
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, V2,
+                                  DAG.getConstant(Idx - Src1NumElts, PtrVT)));
+    }
+  }
+  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, VT, &Ops[0], Ops.size()));
 }

 void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) {
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@ -1362,7 +1362,7 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
 }


-/// SimplifyDemandedVectorElts - The specified value producecs a vector with
+/// SimplifyDemandedVectorElts - The specified value produces a vector with
 /// 64 or fewer elements.  DemandedElts contains the set of elements that are
 /// actually used by the caller.  This method analyzes which elements of the
 /// operand are undef and returns that information in UndefElts.
@ -1386,7 +1386,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
    UndefElts = EltMask;
    return UndefValue::get(V->getType());
  }
-  
+
  UndefElts = 0;
  if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) {
    const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
@ -1403,7 +1403,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
      } else {                               // Otherwise, defined.
        Elts.push_back(CP->getOperand(i));
      }
-        
+
    // If we changed the constant, return it.
    Constant *NewCP = ConstantVector::get(Elts);
    return NewCP != CP ? NewCP : 0;
@ -1486,17 +1486,19 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
  }
  case Instruction::ShuffleVector: {
    ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+    uint64_t LHSVWidth =
+      cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
    uint64_t LeftDemanded = 0, RightDemanded = 0;
    for (unsigned i = 0; i < VWidth; i++) {
      if (DemandedElts & (1ULL << i)) {
        unsigned MaskVal = Shuffle->getMaskValue(i);
        if (MaskVal != -1u) {
-          assert(MaskVal < VWidth * 2 &&
+          assert(MaskVal < LHSVWidth * 2 &&
                 "shufflevector mask index out of range!");
-          if (MaskVal < VWidth)
+          if (MaskVal < LHSVWidth)
            LeftDemanded |= 1ULL << MaskVal;
          else
-            RightDemanded |= 1ULL << (MaskVal - VWidth);
+            RightDemanded |= 1ULL << (MaskVal - LHSVWidth);
        }
      }
    }
@ -1516,12 +1518,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
      if (MaskVal == -1u) {
        uint64_t NewBit = 1ULL << i;
        UndefElts |= NewBit;
-      } else if (MaskVal < VWidth) {
+      } else if (MaskVal < LHSVWidth) {
        uint64_t NewBit = ((UndefElts2 >> MaskVal) & 1) << i;
        NewUndefElts |= NewBit;
        UndefElts |= NewBit;
      } else {
-        uint64_t NewBit = ((UndefElts3 >> (MaskVal - VWidth)) & 1) << i;
+        uint64_t NewBit = ((UndefElts3 >> (MaskVal - LHSVWidth)) & 1) << i;
        NewUndefElts |= NewBit;
        UndefElts |= NewBit;
      }
@ -8398,8 +8400,10 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
      // Okay, we have (bitconvert (shuffle ..)).  Check to see if this is
      // a bitconvert to a vector with the same # elts.
      if (isa<VectorType>(DestTy) && 
-          cast<VectorType>(DestTy)->getNumElements() == 
-                SVI->getType()->getNumElements()) {
+          cast<VectorType>(DestTy)->getNumElements() ==
+                SVI->getType()->getNumElements() &&
+          SVI->getType()->getNumElements() ==
+            cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) {
        CastInst *Tmp;
        // If either of the operands is a cast from CI.getType(), then
        // evaluating the shuffle in the casted destination's type will allow
@ -11456,11 +11460,13 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
    // vector input.
    return FindScalarElement(III->getOperand(0), EltNo);
  } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
+    unsigned LHSWidth =
+      cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
    unsigned InEl = getShuffleMask(SVI)[EltNo];
-    if (InEl < Width)
+    if (InEl < LHSWidth)
      return FindScalarElement(SVI->getOperand(0), InEl);
-    else if (InEl < Width*2)
-      return FindScalarElement(SVI->getOperand(1), InEl - Width);
+    else if (InEl < LHSWidth*2)
+      return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
    else
      return UndefValue::get(PTy->getElementType());
  }
@ -11578,10 +11584,13 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
      if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
        unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
        Value *Src;
-        if (SrcIdx < SVI->getType()->getNumElements())
+        unsigned LHSWidth =
+          cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+
+        if (SrcIdx < LHSWidth)
          Src = SVI->getOperand(0);
-        else if (SrcIdx < SVI->getType()->getNumElements()*2) {
-          SrcIdx -= SVI->getType()->getNumElements();
+        else if (SrcIdx < LHSWidth*2) {
+          SrcIdx -= LHSWidth;
          Src = SVI->getOperand(1);
        } else {
          return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
@ -11802,13 +11811,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
  std::vector<unsigned> Mask = getShuffleMask(&SVI);

  bool MadeChange = false;
-  
+
  // Undefined shuffle mask -> undefined value.
  if (isa<UndefValue>(SVI.getOperand(2)))
    return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));

  uint64_t UndefElts;
  unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
+
+  if (VWidth != cast<VectorType>(LHS->getType())->getNumElements())
+    return 0;
+
  uint64_t AllOnesEltMask = ~0ULL >> (64-VWidth);
  if (VWidth <= 64 &&
      SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@ -424,24 +424,25 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(const Constant *V1,
                                                     const Constant *Mask) {
  // Undefined shuffle mask -> undefined value.
  if (isa<UndefValue>(Mask)) return UndefValue::get(V1->getType());
-  
-  unsigned NumElts = cast<VectorType>(V1->getType())->getNumElements();
+
+  unsigned MaskNumElts = cast<VectorType>(Mask->getType())->getNumElements();
+  unsigned SrcNumElts = cast<VectorType>(V1->getType())->getNumElements();
  const Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
-  
+
  // Loop over the shuffle mask, evaluating each element.
  SmallVector<Constant*, 32> Result;
-  for (unsigned i = 0; i != NumElts; ++i) {
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
    Constant *InElt = GetVectorElement(Mask, i);
    if (InElt == 0) return 0;
-    
+
    if (isa<UndefValue>(InElt))
      InElt = UndefValue::get(EltTy);
    else if (ConstantInt *CI = dyn_cast<ConstantInt>(InElt)) {
      unsigned Elt = CI->getZExtValue();
-      if (Elt >= NumElts*2)
+      if (Elt >= SrcNumElts*2)
        InElt = UndefValue::get(EltTy);
-      else if (Elt >= NumElts)
-        InElt = GetVectorElement(V2, Elt-NumElts);
+      else if (Elt >= SrcNumElts)
+        InElt = GetVectorElement(V2, Elt - SrcNumElts);
      else
        InElt = GetVectorElement(V1, Elt);
      if (InElt == 0) return 0;
@ -451,7 +452,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(const Constant *V1,
    }
    Result.push_back(InElt);
  }
-  
+
  return ConstantVector::get(&Result[0], Result.size());
 }

--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@ -1287,10 +1287,12 @@ ShuffleVectorInst::ShuffleVectorInst(const ShuffleVectorInst &SV)
 ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
                                     const std::string &Name,
                                     Instruction *InsertBefore)
-  : Instruction(V1->getType(), ShuffleVector,
-                OperandTraits<ShuffleVectorInst>::op_begin(this),
-                OperandTraits<ShuffleVectorInst>::operands(this),
-                InsertBefore) {
+: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
+                cast<VectorType>(Mask->getType())->getNumElements()),
+              ShuffleVector,
+              OperandTraits<ShuffleVectorInst>::op_begin(this),
+              OperandTraits<ShuffleVectorInst>::operands(this),
+              InsertBefore) {
  assert(isValidOperands(V1, V2, Mask) &&
         "Invalid shuffle vector instruction operands!");
  Op<0>() = V1;
@ -1300,7 +1302,7 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
 }

 ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
-                                     const std::string &Name, 
+                                     const std::string &Name,
                                     BasicBlock *InsertAtEnd)
  : Instruction(V1->getType(), ShuffleVector,
                OperandTraits<ShuffleVectorInst>::op_begin(this),
@ -1315,17 +1317,14 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
  setName(Name);
 }

-bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, 
+bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
                                        const Value *Mask) {
-  if (!isa<VectorType>(V1->getType()) || 
-      V1->getType() != V2->getType()) 
+  if (!isa<VectorType>(V1->getType()) || V1->getType() != V2->getType())
    return false;
  
  const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
  if (!isa<Constant>(Mask) || MaskTy == 0 ||
-      MaskTy->getElementType() != Type::Int32Ty ||
-      MaskTy->getNumElements() != 
-      cast<VectorType>(V1->getType())->getNumElements())
+      MaskTy->getElementType() != Type::Int32Ty)
    return false;
  return true;
 }
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@ -1098,14 +1098,15 @@ void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
  Assert1(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
                                             SV.getOperand(2)),
          "Invalid shufflevector operands!", &SV);
-  Assert1(SV.getType() == SV.getOperand(0)->getType(),
-          "Result of shufflevector must match first operand type!", &SV);
-  
+
+  const VectorType *VTy = dyn_cast<VectorType>(SV.getOperand(0)->getType());
+  Assert1(VTy, "Operands are not a vector type", &SV);
+
  // Check to see if Mask is valid.
  if (const ConstantVector *MV = dyn_cast<ConstantVector>(SV.getOperand(2))) {
    for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) {
      if (ConstantInt* CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
-        Assert1(!CI->uge(MV->getNumOperands()*2),
+        Assert1(!CI->uge(VTy->getNumElements()*2),
                "Invalid shufflevector shuffle mask!", &SV);
      } else {
        Assert1(isa<UndefValue>(MV->getOperand(i)),
@ -1117,7 +1118,7 @@ void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
            isa<ConstantAggregateZero>(SV.getOperand(2)),
            "Invalid shufflevector shuffle mask!", &SV);
  }
-  
+
  visitInstruction(SV);
 }