diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 2884ab72040..e6499d87ef0 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -164,7 +164,10 @@ public:
     
     LegalizeAction getTypeAction(MVT VT) const {
       if (VT.isExtended()) {
-        if (VT.isVector()) return Expand;
+        if (VT.isVector()) {
+          // First try vector widening
+          return Promote;
+        }
         if (VT.isInteger())
           // First promote to a power-of-two size, then expand if necessary.
           return VT == VT.getRoundIntegerType() ? Expand : Promote;
@@ -261,6 +264,13 @@ public:
                                   unsigned &NumIntermediates,
                                   MVT &RegisterVT) const;
   
+  /// getWidenVectorType: given a vector type, returns the type to widen to
+  /// (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
+  /// If there is no vector type that we want to widen to, returns MVT::Other
+  /// When and were to widen is target dependent based on the cost of
+  /// scalarizing vs using the wider vector type.
+  virtual MVT getWidenVectorType(MVT VT);
+
   typedef std::vector<APFloat>::const_iterator legal_fpimm_iterator;
   legal_fpimm_iterator legal_fpimm_begin() const {
     return LegalFPImmediates.begin();
@@ -1455,7 +1465,7 @@ private:
   MVT TransformToType[MVT::LAST_VALUETYPE];
 
   // Defines the capacity of the TargetLowering::OpActions table
-  static const int OpActionsCapacity = 212;
+  static const int OpActionsCapacity = 218;
 
   /// OpActions - For each operation and each value type, keep a LegalizeAction
   /// that indicates how instruction selection should deal with the operation.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 96b427bd806..f1915917836 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -86,12 +86,12 @@ class VISIBILITY_HIDDEN SelectionDAGLegalize {
   DenseMap<SDValue, SDValue> PromotedNodes;
 
   /// ExpandedNodes - For nodes that need to be expanded this map indicates
-  /// which which operands are the expanded version of the input.  This allows
+  /// which operands are the expanded version of the input.  This allows
   /// us to avoid expanding the same node more than once.
   DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedNodes;
 
   /// SplitNodes - For vector nodes that need to be split, this map indicates
-  /// which which operands are the split version of the input.  This allows us
+  /// which operands are the split version of the input.  This allows us
   /// to avoid splitting the same node more than once.
   std::map<SDValue, std::pair<SDValue, SDValue> > SplitNodes;
   
@@ -100,6 +100,11 @@ class VISIBILITY_HIDDEN SelectionDAGLegalize {
   /// processed to the result.
   std::map<SDValue, SDValue> ScalarizedNodes;
   
+  /// WidenNodes - For nodes that need to be widen from one vector type to
+  /// another, this contains the mapping of ones we have already widen.  This
+  /// allows us to avoid widening more than once.
+  std::map<SDValue, SDValue> WidenNodes;
+
   void AddLegalizedOperand(SDValue From, SDValue To) {
     LegalizedNodes.insert(std::make_pair(From, To));
     // If someone requests legalization of the new node, return itself.
@@ -112,6 +117,12 @@ class VISIBILITY_HIDDEN SelectionDAGLegalize {
     // If someone requests legalization of the new node, return itself.
     LegalizedNodes.insert(std::make_pair(To, To));
   }
+  void AddWidenOperand(SDValue From, SDValue To) {
+    bool isNew = WidenNodes.insert(std::make_pair(From, To)).second;
+    assert(isNew && "Got into the map somehow?");
+    // If someone requests legalization of the new node, return itself.
+    LegalizedNodes.insert(std::make_pair(To, To));
+  }
 
 public:
   explicit SelectionDAGLegalize(SelectionDAG &DAG);
@@ -169,6 +180,15 @@ private:
   /// types.
   void ExpandOp(SDValue O, SDValue &Lo, SDValue &Hi);
 
+  /// WidenVectorOp - Widen a vector operation in order to do the computation
+  /// in a wider type given to a wider type given by WidenVT (e.g., v3i32 to
+  /// v4i32).  The produced value will have the correct value for the existing
+  /// elements but no guarantee is made about the new elements at the end of
+  /// the vector: it may be zero, sign-extended, or garbage.  This is useful
+  /// when we have instruction operating on an illegal vector type and we want
+  /// to widen it to do the computation on a legal wider vector type.
+  SDValue WidenVectorOp(SDValue Op, MVT WidenVT);
+
   /// SplitVectorOp - Given an operand of vector type, break it down into
   /// two smaller values.
   void SplitVectorOp(SDValue O, SDValue &Lo, SDValue &Hi);
@@ -178,6 +198,62 @@ private:
   /// scalar (e.g. f32) value.
   SDValue ScalarizeVectorOp(SDValue O);
   
+  /// Useful 16 element vector used to pass operands for widening
+  typedef SmallVector<SDValue, 16> SDValueVector;  
+  
+  /// LoadWidenVectorOp - Load a vector for a wider type. Returns true if
+  /// the LdChain contains a single load and false if it contains a token
+  /// factor for multiple loads. It takes
+  ///   Result:  location to return the result
+  ///   LdChain: location to return the load chain
+  ///   Op:      load operation to widen
+  ///   NVT:     widen vector result type we want for the load
+  bool LoadWidenVectorOp(SDValue& Result, SDValue& LdChain, 
+                         SDValue Op, MVT NVT);
+                        
+  /// Helper genWidenVectorLoads - Helper function to generate a set of
+  /// loads to load a vector with a resulting wider type. It takes
+  ///   LdChain: list of chains for the load we have generated
+  ///   Chain:   incoming chain for the ld vector
+  ///   BasePtr: base pointer to load from
+  ///   SV:      memory disambiguation source value
+  ///   SVOffset:  memory disambiugation offset
+  ///   Alignment: alignment of the memory
+  ///   isVolatile: volatile load
+  ///   LdWidth:    width of memory that we want to load 
+  ///   ResType:    the wider result result type for the resulting loaded vector
+  SDValue genWidenVectorLoads(SDValueVector& LdChain, SDValue Chain,
+                                SDValue BasePtr, const Value *SV,
+                                int SVOffset, unsigned Alignment,
+                                bool isVolatile, unsigned LdWidth,
+                                MVT ResType);
+  
+  /// StoreWidenVectorOp - Stores a widen vector into non widen memory
+  /// location. It takes
+  ///     ST:      store node that we want to replace
+  ///     Chain:   incoming store chain
+  ///     BasePtr: base address of where we want to store into
+  SDValue StoreWidenVectorOp(StoreSDNode *ST, SDValue Chain, 
+                               SDValue BasePtr);
+  
+  /// Helper genWidenVectorStores - Helper function to generate a set of
+  /// stores to store a widen vector into non widen memory
+  // It takes
+  //   StChain: list of chains for the stores we have generated
+  //   Chain:   incoming chain for the ld vector
+  //   BasePtr: base pointer to load from
+  //   SV:      memory disambiguation source value
+  //   SVOffset:   memory disambiugation offset
+  //   Alignment:  alignment of the memory
+  //   isVolatile: volatile lod
+  //   ValOp:   value to store  
+  //   StWidth: width of memory that we want to store 
+  void genWidenVectorStores(SDValueVector& StChain, SDValue Chain,
+                            SDValue BasePtr, const Value *SV,
+                            int SVOffset, unsigned Alignment,
+                            bool isVolatile, SDValue ValOp,
+                            unsigned StWidth);
+ 
   /// isShuffleLegal - Return non-null if a vector shuffle is legal with the
   /// specified mask and type.  Targets can specify exactly which masks they
   /// support and the code generator is tasked with not creating illegal masks.
@@ -300,6 +376,7 @@ void SelectionDAGLegalize::LegalizeDAG() {
   PromotedNodes.clear();
   SplitNodes.clear();
   ScalarizedNodes.clear();
+  WidenNodes.clear();
 
   // Remove dead nodes now.
   DAG.RemoveDeadNodes();
@@ -403,14 +480,27 @@ bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
   return false;
 }
 
-/// HandleOp - Legalize, Promote, or Expand the specified operand as
+/// HandleOp - Legalize, Promote, Widen, or Expand the specified operand as
 /// appropriate for its type.
 void SelectionDAGLegalize::HandleOp(SDValue Op) {
   MVT VT = Op.getValueType();
   switch (getTypeAction(VT)) {
   default: assert(0 && "Bad type action!");
   case Legal:   (void)LegalizeOp(Op); break;
-  case Promote: (void)PromoteOp(Op); break;
+  case Promote:
+    if (!VT.isVector()) {
+      (void)PromoteOp(Op);
+      break;
+    }
+    else  {
+      // See if we can widen otherwise use Expand to either scalarize or split
+      MVT WidenVT = TLI.getWidenVectorType(VT);
+      if (WidenVT != MVT::Other) {
+        (void) WidenVectorOp(Op, WidenVT);
+        break;
+      }
+      // else fall thru to expand since we can't widen the vector
+    }
   case Expand:
     if (!VT.isVector()) {
       // If this is an illegal scalar, expand it into its two component
@@ -424,7 +514,7 @@ void SelectionDAGLegalize::HandleOp(SDValue Op) {
       // scalar operation.
       (void)ScalarizeVectorOp(Op);
     } else {
-      // Otherwise, this is an illegal multiple element vector.
+      // This is an illegal multiple element vector.
       // Split it in half and legalize both parts.
       SDValue X, Y;
       SplitVectorOp(Op, X, Y);
@@ -742,7 +832,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx) {
 
   // Store the vector.
   SDValue Ch = DAG.getStore(DAG.getEntryNode(), Tmp1, StackPtr,
-                              PseudoSourceValue::getFixedStack(SPFI), 0);
+                            PseudoSourceValue::getFixedStack(SPFI), 0);
 
   // Truncate or zero extend offset to target pointer type.
   unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -1417,6 +1507,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     default: assert(0 && "Cannot expand insert element operand");
     case Legal:   Tmp2 = LegalizeOp(Node->getOperand(1)); break;
     case Promote: Tmp2 = PromoteOp(Node->getOperand(1));  break;
+    case Expand:
+      // FIXME: An alternative would be to check to see if the target is not
+      // going to custom lower this operation, we could bitcast to half elt 
+      // width and perform two inserts at that width, if that is legal.
+      Tmp2 = Node->getOperand(1);
+      break;
     }
     Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3);
     
@@ -1432,6 +1528,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         break;
       }
       // FALLTHROUGH
+    case TargetLowering::Promote:
+      // Fall thru for vector case
     case TargetLowering::Expand: {
       // If the insert index is a constant, codegen this as a scalar_to_vector,
       // then a shuffle that inserts it into the right position in the vector.
@@ -1574,6 +1672,26 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     Result = ExpandEXTRACT_SUBVECTOR(Result);
     break;
     
+  case ISD::CONCAT_VECTORS: {
+    // Use extract/insert/build vector for now. We might try to be
+    // more clever later.
+    MVT PtrVT = TLI.getPointerTy();
+    SmallVector<SDValue, 8> Ops;
+    unsigned NumOperands = Node->getNumOperands();
+    for (unsigned i=0; i < NumOperands; ++i) {
+      SDValue SubOp = Node->getOperand(i);
+      MVT VVT = SubOp.getNode()->getValueType(0);
+      MVT EltVT = VVT.getVectorElementType();
+      unsigned NumSubElem = VVT.getVectorNumElements();
+      for (unsigned j=0; j < NumSubElem; ++j) {
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EltVT, SubOp,
+                                  DAG.getConstant(j, PtrVT)));
+      }
+    }
+    return LegalizeOp(DAG.getNode(ISD::BUILD_VECTOR, Node->getValueType(0),
+                      &Ops[0], Ops.size()));
+  }
+
   case ISD::CALLSEQ_START: {
     SDNode *CallEnd = FindCallEndFromCallStart(Node);
     
@@ -2429,14 +2547,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         break;
       }
       case Promote:
-        // Truncate the value and store the result.
-        Tmp3 = PromoteOp(ST->getValue());
-        Result = DAG.getTruncStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
-                                   SVOffset, ST->getMemoryVT(),
-                                   isVolatile, Alignment);
-        break;
-
-      case Expand:
+        if (!ST->getMemoryVT().isVector()) {
+          // Truncate the value and store the result.
+          Tmp3 = PromoteOp(ST->getValue());
+          Result = DAG.getTruncStore(Tmp1, Tmp3, Tmp2, ST->getSrcValue(),
+                                     SVOffset, ST->getMemoryVT(),
+                                     isVolatile, Alignment);
+          break;
+        }
+        // Fall thru to expand for vector
+      case Expand: {
         unsigned IncrementSize = 0;
         SDValue Lo, Hi;
       
@@ -2470,9 +2590,18 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
             Result = LegalizeOp(Result);
             break;
           } else {
-            SplitVectorOp(ST->getValue(), Lo, Hi);
-            IncrementSize = Lo.getNode()->getValueType(0).getVectorNumElements() *
-                            EVT.getSizeInBits()/8;
+            // Check if we have widen this node with another value
+            std::map<SDValue, SDValue>::iterator I =
+              WidenNodes.find(ST->getValue());
+            if (I != WidenNodes.end()) {
+              Result = StoreWidenVectorOp(ST, Tmp1, Tmp2);
+              break;
+            }
+            else {
+              SplitVectorOp(ST->getValue(), Lo, Hi);
+              IncrementSize = Lo.getNode()->getValueType(0).getVectorNumElements() *
+                              EVT.getSizeInBits()/8;
+            }
           }
         } else {
           ExpandOp(ST->getValue(), Lo, Hi);
@@ -2501,6 +2630,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                           SVOffset, isVolatile, Alignment);
         Result = DAG.getNode(ISD::TokenFactor, MVT::Other, Lo, Hi);
         break;
+      }  // case Expand
       }
     } else {
       switch (getTypeAction(ST->getValue().getValueType())) {
@@ -2508,9 +2638,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         Tmp3 = LegalizeOp(ST->getValue());
         break;
       case Promote:
-        // We can promote the value, the truncstore will still take care of it.
-        Tmp3 = PromoteOp(ST->getValue());
-        break;
+        if (!ST->getValue().getValueType().isVector()) {
+          // We can promote the value, the truncstore will still take care of it.
+          Tmp3 = PromoteOp(ST->getValue());
+          break;
+        }
+        // Vector case falls through to expand
       case Expand:
         // Just store the low part.  This may become a non-trunc store, so make
         // sure to use getTruncStore, not UpdateNodeOperands below.
@@ -2709,6 +2842,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the condition.
       break;
     case Promote: {
+      assert(!Node->getOperand(0).getValueType().isVector() && "not possible");
       Tmp1 = PromoteOp(Node->getOperand(0));  // Promote the condition.
       // Make sure the condition is either zero or one.
       unsigned BitWidth = Tmp1.getValueSizeInBits();
@@ -2966,7 +3100,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     }
 
     Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
-      
+    
     switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) {
     default: assert(0 && "BinOp legalize operation not supported");
     case TargetLowering::Legal: break;
@@ -2979,7 +3113,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       // Fall through if the custom lower can't deal with the operation
     case TargetLowering::Expand: {
       MVT VT = Op.getValueType();
- 
+      
       // See if multiply or divide can be lowered using two-result operations.
       SDVTList VTs = DAG.getVTList(VT, VT);
       if (Node->getOpcode() == ISD::MUL) {
@@ -3030,7 +3164,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                          0);
         break;
       }
-
+      
       // Check to see if we have a libcall for this operator.
       RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
       bool isSigned = false;
@@ -3039,7 +3173,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       case ISD::SDIV:
         if (VT == MVT::i32) {
           LC = Node->getOpcode() == ISD::UDIV
-            ? RTLIB::UDIV_I32 : RTLIB::SDIV_I32;
+               ? RTLIB::UDIV_I32 : RTLIB::SDIV_I32;
           isSigned = Node->getOpcode() == ISD::SDIV;
         }
         break;
@@ -3058,7 +3192,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         Result = ExpandLibCall(LC, Node, isSigned, Dummy);
         break;
       }
-
+      
       assert(Node->getValueType(0).isVector() &&
              "Cannot expand this binary operator!");
       // Expand the operation into a bunch of nasty scalar code.
@@ -4548,6 +4682,9 @@ SDValue SelectionDAGLegalize::ExpandEXTRACT_VECTOR_ELT(SDValue Op) {
       return Op;
     }
     break;
+  case TargetLowering::Promote:
+    assert(TVT.isVector() && "not vector type");
+    // fall thru to expand since vectors are by default are promote
   case TargetLowering::Expand:
     break;
   }
@@ -5348,7 +5485,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
     Args.push_back(Entry);
   }
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
-                                           TLI.getPointerTy());
+                                         TLI.getPointerTy());
 
   // Splice the libcall in wherever FindInputOutputChains tells us to.
   const Type *RetTy = Node->getValueType(0).getTypeForMVT();
@@ -5958,7 +6095,6 @@ void SelectionDAGLegalize::ExpandOp(SDValue Op, SDValue &Lo, SDValue &Hi){
       return ExpandOp(Hi, Lo, Hi);
     return ExpandOp(Lo, Lo, Hi);
   case ISD::EXTRACT_VECTOR_ELT:
-    assert(VT==MVT::i64 && "Do not know how to expand this operator!");
     // ExpandEXTRACT_VECTOR_ELT tolerates invalid result types.
     Lo  = ExpandEXTRACT_VECTOR_ELT(Op);
     return ExpandOp(Lo, Lo, Hi);
@@ -7423,6 +7559,690 @@ SDValue SelectionDAGLegalize::ScalarizeVectorOp(SDValue Op) {
 }
 
 
+SDValue SelectionDAGLegalize::WidenVectorOp(SDValue Op, MVT WidenVT) {
+  std::map<SDValue, SDValue>::iterator I = WidenNodes.find(Op);
+  if (I != WidenNodes.end()) return I->second;
+  
+  MVT VT = Op.getValueType();
+  assert(VT.isVector() && "Cannot widen non-vector type!");
+
+  SDValue Result;
+  SDNode *Node = Op.getNode();
+  MVT EVT = VT.getVectorElementType();
+
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned NewNumElts = WidenVT.getVectorNumElements();
+  assert(NewNumElts > NumElts  && "Cannot widen to smaller type!");
+  assert(NewNumElts < 17);
+
+  // When widen is called, it is assumed that it is more efficient to use a
+  // wide type.  The default action is to widen to operation to a wider legal
+  // vector type and then do the operation if it is legal by calling LegalizeOp
+  // again.  If there is no vector equivalent, we will unroll the operation, do
+  // it, and rebuild the vector.  If most of the operations are vectorizible to
+  // the legal type, the resulting code will be more efficient.  If this is not
+  // the case, the resulting code will preform badly as we end up generating
+  // code to pack/unpack the results. It is the function that calls widen
+  // that is responsible for seeing this doesn't happen.  For some cases, we
+  // have decided that it is not worth widening so we just split the operation.
+  switch (Node->getOpcode()) {
+  default: 
+#ifndef NDEBUG
+      Node->dump(&DAG);
+#endif
+      assert(0 && "Unexpected operation in WidenVectorOp!");
+      break;
+  case ISD::CopyFromReg:
+    assert(0 && "CopyFromReg must be legal!");
+  case ISD::UNDEF:
+  case ISD::Constant:
+  case ISD::ConstantFP:
+    // To build a vector of these elements, clients should call BuildVector
+    // and with each element instead of creating a node with a vector type
+    assert(0 && "Unexpected operation in WidenVectorOp!");
+  case ISD::VAARG:
+    // Variable Arguments with vector types doesn't make any sense to me
+    assert(0 && "Unexpected operation in WidenVectorOp!");
+    break;
+  case ISD::BUILD_VECTOR: {
+    // Build a vector with undefined for the new nodes
+    SDValueVector NewOps(Node->op_begin(), Node->op_end());
+    for (unsigned i = NumElts; i < NewNumElts; ++i) {
+      NewOps.push_back(DAG.getNode(ISD::UNDEF,EVT));
+    }
+    Result = DAG.getNode(ISD::BUILD_VECTOR, WidenVT, &NewOps[0], NewOps.size());    
+    break;
+  }
+  case ISD::INSERT_VECTOR_ELT: {
+    SDValue Tmp1 = WidenVectorOp(Node->getOperand(0), WidenVT);
+    Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, WidenVT, Tmp1,
+                         Node->getOperand(1), Node->getOperand(2));
+    break;
+  }
+  case ISD::VECTOR_SHUFFLE: {
+    SDValue Tmp1 = WidenVectorOp(Node->getOperand(0), WidenVT);
+    SDValue Tmp2 = WidenVectorOp(Node->getOperand(1), WidenVT);
+    // VECTOR_SHUFFLE 3rd operand must be a constant build vector that is
+    // used as permutation array. We build the vector here instead of widening
+    // because we don't want to legalize and have it turned to something else.
+    SDValue PermOp = Node->getOperand(2);
+    SDValueVector NewOps;
+    MVT PVT = PermOp.getValueType().getVectorElementType();
+    for (unsigned i = 0; i < NumElts; ++i) {
+      if (PermOp.getOperand(i).getOpcode() == ISD::UNDEF) {
+        NewOps.push_back(PermOp.getOperand(i));
+      } else {
+        unsigned Idx =
+        cast<ConstantSDNode>(PermOp.getOperand(i))->getZExtValue();
+        if (Idx < NumElts) {
+          NewOps.push_back(PermOp.getOperand(i));
+        }
+        else {
+          NewOps.push_back(DAG.getConstant(Idx + NewNumElts - NumElts,
+                                           PermOp.getOperand(i).getValueType()));
+        } 
+      }
+    }
+    for (unsigned i = NumElts; i < NewNumElts; ++i) {
+      NewOps.push_back(DAG.getNode(ISD::UNDEF,PVT));
+    }
+    
+    SDValue Tmp3 = DAG.getNode(ISD::BUILD_VECTOR, 
+                               MVT::getVectorVT(PVT, NewOps.size()),
+                               &NewOps[0], NewOps.size()); 
+    
+    Result = DAG.getNode(ISD::VECTOR_SHUFFLE, WidenVT, Tmp1, Tmp2, Tmp3);    
+    break;
+  }
+  case ISD::LOAD: {
+    // If the load widen returns true, we can use a single load for the
+    // vector.  Otherwise, it is returning a token factor for multiple
+    // loads.
+    SDValue TFOp;
+    if (LoadWidenVectorOp(Result, TFOp, Op, WidenVT))
+      AddLegalizedOperand(Op.getValue(1), LegalizeOp(TFOp.getValue(1)));
+    else
+      AddLegalizedOperand(Op.getValue(1), LegalizeOp(TFOp.getValue(0)));
+    break;
+  }
+
+  case ISD::BIT_CONVERT: {
+    SDValue Tmp1 = Node->getOperand(0);
+    // Converts between two different types so we need to determine
+    // the correct widen type for the input operand.
+    MVT TVT = Tmp1.getValueType();
+    assert(TVT.isVector() && "can not widen non vector type");
+    MVT TEVT = TVT.getVectorElementType();
+    assert(WidenVT.getSizeInBits() % EVT.getSizeInBits() == 0 &&
+         "can not widen bit bit convert that are not multiple of element type");
+    MVT TWidenVT =  MVT::getVectorVT(TEVT,
+                                   WidenVT.getSizeInBits()/EVT.getSizeInBits());
+    Tmp1 = WidenVectorOp(Tmp1, TWidenVT);
+    assert(Tmp1.getValueType().getSizeInBits() == WidenVT.getSizeInBits());
+    Result = DAG.getNode(Node->getOpcode(), WidenVT, Tmp1);
+
+    TargetLowering::LegalizeAction action =
+      TLI.getOperationAction(Node->getOpcode(), WidenVT);
+    switch (action)  {
+    default: assert(0 && "action not supported");
+    case TargetLowering::Legal:
+        break;
+    case TargetLowering::Promote:
+        // We defer the promotion to when we legalize the op
+      break;
+    case TargetLowering::Expand:
+      // Expand the operation into a bunch of nasty scalar code.
+      Result = LegalizeOp(UnrollVectorOp(Result));
+      break;
+    }
+    break;
+  }
+
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT: {
+    SDValue Tmp1 = Node->getOperand(0);
+    // Converts between two different types so we need to determine
+    // the correct widen type for the input operand.
+    MVT TVT = Tmp1.getValueType();
+    assert(TVT.isVector() && "can not widen non vector type");
+    MVT TEVT = TVT.getVectorElementType();
+    MVT TWidenVT =  MVT::getVectorVT(TEVT, NewNumElts);
+    Tmp1 = WidenVectorOp(Tmp1, TWidenVT);
+    assert(Tmp1.getValueType().getVectorNumElements() == NewNumElts);
+    Result = DAG.getNode(Node->getOpcode(), WidenVT, Tmp1);
+
+    TargetLowering::LegalizeAction action =
+      TLI.getOperationAction(Node->getOpcode(), WidenVT);
+    switch (action)  {
+    default: assert(0 && "action not supported");
+    case TargetLowering::Legal:
+        break;
+    case TargetLowering::Promote:
+        // We defer the promotion to when we legalize the op
+      break;
+    case TargetLowering::Expand:
+      // Expand the operation into a bunch of nasty scalar code.
+      Result = LegalizeOp(UnrollVectorOp(Result));
+      break;
+    }
+    break;
+  }
+
+  case ISD::FP_EXTEND:
+    assert(0 && "Case not implemented.  Dynamically dead with 2 FP types!");
+  case ISD::TRUNCATE:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::SIGN_EXTEND_INREG:
+  case ISD::FABS:
+  case ISD::FNEG:
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS: {
+    // Unary op widening
+    SDValue Tmp1;    
+    TargetLowering::LegalizeAction action =
+      TLI.getOperationAction(Node->getOpcode(), WidenVT);
+
+    Tmp1 = WidenVectorOp(Node->getOperand(0), WidenVT);
+    assert(Tmp1.getValueType() == WidenVT);
+    Result = DAG.getNode(Node->getOpcode(), WidenVT, Tmp1);
+    switch (action)  {
+    default: assert(0 && "action not supported");
+    case TargetLowering::Legal:
+        break;
+    case TargetLowering::Promote:
+        // We defer the promotion to when we legalize the op
+      break;
+    case TargetLowering::Expand:
+      // Expand the operation into a bunch of nasty scalar code.
+      Result = LegalizeOp(UnrollVectorOp(Result));
+      break;
+    }
+    break;
+  }
+  case ISD::FPOW:
+  case ISD::FPOWI: 
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+  case ISD::MULHS:
+  case ISD::MULHU:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::SDIV:
+  case ISD::SREM:
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::FCOPYSIGN:
+  case ISD::UDIV:
+  case ISD::UREM:
+  case ISD::BSWAP: {
+    // Binary op widening
+    TargetLowering::LegalizeAction action =
+      TLI.getOperationAction(Node->getOpcode(), WidenVT);
+    
+    SDValue Tmp1 = WidenVectorOp(Node->getOperand(0), WidenVT);
+    SDValue Tmp2 = WidenVectorOp(Node->getOperand(1), WidenVT);
+    assert(Tmp1.getValueType() == WidenVT && Tmp2.getValueType() == WidenVT);
+    Result = DAG.getNode(Node->getOpcode(), WidenVT, Tmp1, Tmp2);
+    switch (action)  {
+    default: assert(0 && "action not supported");
+    case TargetLowering::Legal:
+      break;
+    case TargetLowering::Promote:
+      // We defer the promotion to when we legalize the op
+      break;
+    case TargetLowering::Expand:
+      // Expand the operation into a bunch of nasty scalar code by first 
+      // Widening to the right type and then unroll the beast.
+      Result = LegalizeOp(UnrollVectorOp(Result));
+      break;
+    }
+    break;
+  }
+
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL: {
+    // Binary op with one non vector operand
+    TargetLowering::LegalizeAction action =
+      TLI.getOperationAction(Node->getOpcode(), WidenVT);
+    
+    SDValue Tmp1 = WidenVectorOp(Node->getOperand(0), WidenVT);
+    assert(Tmp1.getValueType() == WidenVT);
+    Result = DAG.getNode(Node->getOpcode(), WidenVT, Tmp1, Node->getOperand(1));
+    switch (action)  {
+    default: assert(0 && "action not supported");
+    case TargetLowering::Legal:
+      break;
+    case TargetLowering::Promote:
+       // We defer the promotion to when we legalize the op
+      break;
+    case TargetLowering::Expand:
+      // Expand the operation into a bunch of nasty scalar code.
+      Result = LegalizeOp(UnrollVectorOp(Result));
+      break;
+    }
+    break;
+  }
+  case ISD::EXTRACT_VECTOR_ELT: {
+    SDValue Tmp1 = WidenVectorOp(Node->getOperand(0), WidenVT);
+    assert(Tmp1.getValueType() == WidenVT);
+    Result = DAG.getNode(Node->getOpcode(), EVT, Tmp1, Node->getOperand(1));
+    break;
+  }
+  case ISD::CONCAT_VECTORS: {
+    // We concurrently support only widen on a multiple of the incoming vector.
+    // We could widen on a multiple of the incoming operand if necessary.
+    unsigned NumConcat = NewNumElts / NumElts;
+    assert(NewNumElts % NumElts == 0 && "Can widen only a multiple of vector");
+    std::vector<SDValue> UnOps(NumElts, DAG.getNode(ISD::UNDEF, 
+                               VT.getVectorElementType()));
+    SDValue UndefVal = DAG.getNode(ISD::BUILD_VECTOR, VT,
+                                   &UnOps[0], UnOps.size());
+    SmallVector<SDValue, 8> MOps;
+    MOps.push_back(Op);
+    for (unsigned i = 1; i != NumConcat; ++i) {
+      MOps.push_back(UndefVal);
+    }
+    Result = LegalizeOp(DAG.getNode(ISD::CONCAT_VECTORS, WidenVT,
+                                    &MOps[0], MOps.size()));
+    break;
+  }
+  case ISD::EXTRACT_SUBVECTOR: {
+    SDValue Tmp1;
+
+    // The incoming vector might already be the proper type
+    if (Node->getOperand(0).getValueType() != WidenVT)
+      Tmp1 = WidenVectorOp(Node->getOperand(0), WidenVT);
+    else
+      Tmp1 = Node->getOperand(0);
+    assert(Tmp1.getValueType() == WidenVT);
+    Result = DAG.getNode(Node->getOpcode(), WidenVT, Tmp1, Node->getOperand(1));
+    break;
+  }
+
+  case ISD::SELECT: {
+    TargetLowering::LegalizeAction action =
+      TLI.getOperationAction(Node->getOpcode(), WidenVT);
+
+    // Determine new condition widen type and widen
+    SDValue Cond1 = Node->getOperand(0);
+    MVT CondVT = Cond1.getValueType();
+    assert(CondVT.isVector() && "can not widen non vector type");
+    MVT CondEVT = CondVT.getVectorElementType();
+    MVT CondWidenVT =  MVT::getVectorVT(CondEVT, NewNumElts);
+    Cond1 = WidenVectorOp(Cond1, CondWidenVT);
+    assert(Cond1.getValueType() == CondWidenVT && "Condition not widen");
+
+    SDValue Tmp1 = WidenVectorOp(Node->getOperand(1), WidenVT);
+    SDValue Tmp2 = WidenVectorOp(Node->getOperand(2), WidenVT);
+    assert(Tmp1.getValueType() == WidenVT && Tmp2.getValueType() == WidenVT);
+    Result = DAG.getNode(Node->getOpcode(), WidenVT, Cond1, Tmp1, Tmp2);
+    switch (action)  {
+    default: assert(0 && "action not supported");
+    case TargetLowering::Legal:
+      break;
+    case TargetLowering::Promote:
+      // We defer the promotion to when we legalize the op
+      break;
+    case TargetLowering::Expand:
+      // Expand the operation into a bunch of nasty scalar code by first 
+      // Widening to the right type and then unroll the beast.
+      Result = LegalizeOp(UnrollVectorOp(Result));
+      break;
+    }  
+    break;
+  }
+  
+  case ISD::SELECT_CC: {
+    TargetLowering::LegalizeAction action =
+      TLI.getOperationAction(Node->getOpcode(), WidenVT);
+
+    // Determine new condition widen type and widen
+    SDValue Cond1 = Node->getOperand(0);
+    SDValue Cond2 = Node->getOperand(1);
+    MVT CondVT = Cond1.getValueType();
+    assert(CondVT.isVector() && "can not widen non vector type");
+    assert(CondVT == Cond2.getValueType() && "mismatch lhs/rhs");
+    MVT CondEVT = CondVT.getVectorElementType();
+    MVT CondWidenVT =  MVT::getVectorVT(CondEVT, NewNumElts);
+    Cond1 = WidenVectorOp(Cond1, CondWidenVT);
+    Cond2 = WidenVectorOp(Cond2, CondWidenVT);
+    assert(Cond1.getValueType() == CondWidenVT &&
+           Cond2.getValueType() == CondWidenVT && "condition not widen");
+
+    SDValue Tmp1 = WidenVectorOp(Node->getOperand(2), WidenVT);
+    SDValue Tmp2 = WidenVectorOp(Node->getOperand(3), WidenVT);
+    assert(Tmp1.getValueType() == WidenVT && Tmp2.getValueType() == WidenVT &&
+           "operands not widen");
+    Result = DAG.getNode(Node->getOpcode(), WidenVT, Cond1, Cond2, Tmp1,
+                         Tmp2, Node->getOperand(4));
+    switch (action)  {
+    default: assert(0 && "action not supported");
+    case TargetLowering::Legal:
+      break;
+    case TargetLowering::Promote:
+      // We defer the promotion to when we legalize the op
+      break;
+    case TargetLowering::Expand:
+      // Expand the operation into a bunch of nasty scalar code by first 
+      // Widening to the right type and then unroll the beast.
+      Result = LegalizeOp(UnrollVectorOp(Result));
+      break;
+    }  
+    break;
+    break;
+  }
+
+  case ISD::ATOMIC_CMP_SWAP_8:
+  case ISD::ATOMIC_CMP_SWAP_16:
+  case ISD::ATOMIC_CMP_SWAP_32:
+  case ISD::ATOMIC_CMP_SWAP_64:
+  case ISD::ATOMIC_LOAD_ADD_8:
+  case ISD::ATOMIC_LOAD_SUB_8:
+  case ISD::ATOMIC_LOAD_AND_8:
+  case ISD::ATOMIC_LOAD_OR_8:
+  case ISD::ATOMIC_LOAD_XOR_8:
+  case ISD::ATOMIC_LOAD_NAND_8:
+  case ISD::ATOMIC_LOAD_MIN_8:
+  case ISD::ATOMIC_LOAD_MAX_8:
+  case ISD::ATOMIC_LOAD_UMIN_8:
+  case ISD::ATOMIC_LOAD_UMAX_8:
+  case ISD::ATOMIC_SWAP_8: 
+  case ISD::ATOMIC_LOAD_ADD_16:
+  case ISD::ATOMIC_LOAD_SUB_16:
+  case ISD::ATOMIC_LOAD_AND_16:
+  case ISD::ATOMIC_LOAD_OR_16:
+  case ISD::ATOMIC_LOAD_XOR_16:
+  case ISD::ATOMIC_LOAD_NAND_16:
+  case ISD::ATOMIC_LOAD_MIN_16:
+  case ISD::ATOMIC_LOAD_MAX_16:
+  case ISD::ATOMIC_LOAD_UMIN_16:
+  case ISD::ATOMIC_LOAD_UMAX_16:
+  case ISD::ATOMIC_SWAP_16:
+  case ISD::ATOMIC_LOAD_ADD_32:
+  case ISD::ATOMIC_LOAD_SUB_32:
+  case ISD::ATOMIC_LOAD_AND_32:
+  case ISD::ATOMIC_LOAD_OR_32:
+  case ISD::ATOMIC_LOAD_XOR_32:
+  case ISD::ATOMIC_LOAD_NAND_32:
+  case ISD::ATOMIC_LOAD_MIN_32:
+  case ISD::ATOMIC_LOAD_MAX_32:
+  case ISD::ATOMIC_LOAD_UMIN_32:
+  case ISD::ATOMIC_LOAD_UMAX_32:
+  case ISD::ATOMIC_SWAP_32:
+  case ISD::ATOMIC_LOAD_ADD_64:
+  case ISD::ATOMIC_LOAD_SUB_64:
+  case ISD::ATOMIC_LOAD_AND_64:
+  case ISD::ATOMIC_LOAD_OR_64:
+  case ISD::ATOMIC_LOAD_XOR_64:
+  case ISD::ATOMIC_LOAD_NAND_64:
+  case ISD::ATOMIC_LOAD_MIN_64:
+  case ISD::ATOMIC_LOAD_MAX_64:
+  case ISD::ATOMIC_LOAD_UMIN_64:
+  case ISD::ATOMIC_LOAD_UMAX_64:
+  case ISD::ATOMIC_SWAP_64: {
+    // For now, we assume that using vectors for these operations don't make
+    // much sense so we just split it.  We return an empty result
+    SDValue X, Y;
+    SplitVectorOp(Op, X, Y);
+    return Result;
+    break;
+  }
+
+  } // end switch (Node->getOpcode())
+
+  assert(Result.getNode() && "Didn't set a result!");  
+  if (Result != Op)
+    Result = LegalizeOp(Result);
+
+  AddWidenOperand(Op, Result);
+  return Result;
+}
+
+// Utility function to find a legal vector type and its associated element
+// type from a preferred width and whose vector type must be the same size
+// as the VVT.
+//  TLI:   Target lowering used to determine legal types
+//  Width: Preferred width of element type
+//  VVT:   Vector value type whose size we must match.
+// Returns VecEVT and EVT - the vector type and its associated element type
+static void FindWidenVecType(TargetLowering &TLI, unsigned Width, MVT VVT,
+                             MVT& EVT, MVT& VecEVT) {
+  // We start with the preferred width, make it a power of 2 and see if
+  // we can find a vector type of that width. If not, we reduce it by
+  // another power of 2.  If we have widen the type, a vector of bytes should
+  // always be legal.
+  assert(TLI.isTypeLegal(VVT));
+  unsigned EWidth = Width + 1;
+  do {
+    assert(EWidth > 0);
+    EWidth =  (1 << Log2_32(EWidth-1));
+    EVT = MVT::getIntegerVT(EWidth);
+    unsigned NumEVT = VVT.getSizeInBits()/EWidth;
+    VecEVT = MVT::getVectorVT(EVT, NumEVT);
+  } while (!TLI.isTypeLegal(VecEVT) ||
+           VVT.getSizeInBits() != VecEVT.getSizeInBits());
+}
+
+SDValue SelectionDAGLegalize::genWidenVectorLoads(SDValueVector& LdChain,
+                                                    SDValue   Chain,
+                                                    SDValue   BasePtr,
+                                                    const Value *SV,
+                                                    int         SVOffset,
+                                                    unsigned    Alignment,
+                                                    bool        isVolatile,
+                                                    unsigned    LdWidth,
+                                                    MVT         ResType) {
+  // We assume that we have good rules to handle loading power of two loads so
+  // we break down the operations to power of 2 loads.  The strategy is to
+  // load the largest power of 2 that we can easily transform to a legal vector
+  // and then insert into that vector, and the cast the result into the legal
+  // vector that we want.  This avoids unnecessary stack converts.
+  // TODO: If the Ldwidth is legal, alignment is the same as the LdWidth, and
+  //       the load is nonvolatile, we an use a wider load for the value.
+  // Find a vector length we can load a large chunk
+  MVT EVT, VecEVT;
+  unsigned EVTWidth;
+  FindWidenVecType(TLI, LdWidth, ResType, EVT, VecEVT);
+  EVTWidth = EVT.getSizeInBits();
+
+  SDValue LdOp = DAG.getLoad(EVT, Chain, BasePtr, SV, SVOffset,
+                               isVolatile, Alignment);
+  SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, VecEVT, LdOp);
+  LdChain.push_back(LdOp.getValue(1));
+  
+  // Check if we can load the element with one instruction
+  if (LdWidth == EVTWidth) {
+    return DAG.getNode(ISD::BIT_CONVERT, ResType, VecOp);
+  }
+
+  // The vector element order is endianness dependent.
+  unsigned Idx = 1;
+  LdWidth -= EVTWidth;
+  unsigned Offset = 0;
+    
+  while (LdWidth > 0) {
+    unsigned Increment = EVTWidth / 8;
+    Offset += Increment;
+    BasePtr = DAG.getNode(ISD::ADD, BasePtr.getValueType(), BasePtr,
+                          DAG.getIntPtrConstant(Increment));
+
+    if (LdWidth < EVTWidth) {
+      // Our current type we are using is too large, use a smaller size by
+      // using a smaller power of 2
+      unsigned oEVTWidth = EVTWidth;
+      FindWidenVecType(TLI, LdWidth, ResType, EVT, VecEVT);
+      EVTWidth = EVT.getSizeInBits();
+      // Readjust position and vector position based on new load type
+      Idx = Idx * (oEVTWidth/EVTWidth)+1;
+      VecOp = DAG.getNode(ISD::BIT_CONVERT, VecEVT, VecOp);
+    }
+      
+    SDValue LdOp = DAG.getLoad(EVT, Chain, BasePtr, SV,
+                                 SVOffset+Offset, isVolatile,
+                                 MinAlign(Alignment, Offset));
+    LdChain.push_back(LdOp.getValue(1));
+    VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, VecEVT, VecOp, LdOp,
+                        DAG.getIntPtrConstant(Idx++));
+    
+    LdWidth -= EVTWidth;
+  }
+
+  return DAG.getNode(ISD::BIT_CONVERT, ResType, VecOp);
+}
+
+bool SelectionDAGLegalize::LoadWidenVectorOp(SDValue& Result,
+                                             SDValue& TFOp,
+                                             SDValue Op,
+                                             MVT NVT) {
+  // TODO: Add support for ConcatVec and the ability to load many vector
+  //       types (e.g., v4i8).  This will not work when a vector register
+  //       to memory mapping is strange (e.g., vector elements are not
+  //       stored in some sequential order).
+
+  // It must be true that the widen vector type is bigger than where 
+  // we need to load from.
+  LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+  MVT LdVT = LD->getMemoryVT();
+  assert(LdVT.isVector() && NVT.isVector());
+  assert(LdVT.getVectorElementType() == NVT.getVectorElementType());
+  
+  // Load information
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  int       SVOffset = LD->getSrcValueOffset();
+  unsigned  Alignment = LD->getAlignment();
+  bool      isVolatile = LD->isVolatile();
+  const Value *SV = LD->getSrcValue();
+  unsigned int LdWidth = LdVT.getSizeInBits();
+  
+  // Load value as a large register
+  SDValueVector LdChain;
+  Result = genWidenVectorLoads(LdChain, Chain, BasePtr, SV, SVOffset,
+                               Alignment, isVolatile, LdWidth, NVT);
+
+  if (LdChain.size() == 1) {
+    TFOp = LdChain[0];
+    return true;
+  }
+  else {
+    TFOp=DAG.getNode(ISD::TokenFactor, MVT::Other, &LdChain[0], LdChain.size());
+    return false;
+  }
+}
+
+
+void SelectionDAGLegalize::genWidenVectorStores(SDValueVector& StChain,
+                                                SDValue   Chain,
+                                                SDValue   BasePtr,
+                                                const Value *SV,
+                                                int         SVOffset,
+                                                unsigned    Alignment,
+                                                bool        isVolatile,
+                                                SDValue   ValOp,
+                                                unsigned    StWidth) {
+  // Breaks the stores into a series of power of 2 width stores.  For any
+  // width, we convert the vector to the vector of element size that we
+  // want to store.  This avoids requiring a stack convert.
+  
+  // Find a width of the element type we can store with
+  MVT VVT = ValOp.getValueType();
+  MVT EVT, VecEVT;
+  unsigned EVTWidth;
+  FindWidenVecType(TLI, StWidth, VVT, EVT, VecEVT);
+  EVTWidth = EVT.getSizeInBits();
+
+  SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, VecEVT, ValOp);
+  SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EVT, VecOp,
+                                  DAG.getIntPtrConstant(0));
+  SDValue StOp = DAG.getStore(Chain, EOp, BasePtr, SV, SVOffset,
+                               isVolatile, Alignment);
+  StChain.push_back(StOp);
+
+  // Check if we are done
+  if (StWidth == EVTWidth) {
+    return;
+  }
+  
+  unsigned Idx = 1;
+  StWidth -= EVTWidth;
+  unsigned Offset = 0;
+    
+  while (StWidth > 0) {
+    unsigned Increment = EVTWidth / 8;
+    Offset += Increment;
+    BasePtr = DAG.getNode(ISD::ADD, BasePtr.getValueType(), BasePtr,
+                          DAG.getIntPtrConstant(Increment));
+                          
+    if (StWidth < EVTWidth) {
+      // Our current type we are using is too large, use a smaller size by
+      // using a smaller power of 2
+      unsigned oEVTWidth = EVTWidth;
+      FindWidenVecType(TLI, StWidth, VVT, EVT, VecEVT);
+      EVTWidth = EVT.getSizeInBits();
+      // Readjust position and vector position based on new load type
+      Idx = Idx * (oEVTWidth/EVTWidth)+1;
+      VecOp = DAG.getNode(ISD::BIT_CONVERT, VecEVT, VecOp);
+    }
+    
+    EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, EVT, VecOp,
+                      DAG.getIntPtrConstant(Idx));
+    StChain.push_back(DAG.getStore(Chain, EOp, BasePtr, SV,
+                                   SVOffset + Offset, isVolatile,
+                                   MinAlign(Alignment, Offset)));
+    StWidth -= EVTWidth;
+  }
+}
+
+
+SDValue SelectionDAGLegalize::StoreWidenVectorOp(StoreSDNode *ST,
+                                                   SDValue Chain,
+                                                   SDValue BasePtr) {
+  // TODO: It might be cleaner if we can use SplitVector and have more legal
+  //        vector types that can be stored into memory (e.g., v4xi8 can
+  //        be stored as a word). This will not work when a vector register
+  //        to memory mapping is strange (e.g., vector elements are not
+  //        stored in some sequential order).
+  
+  MVT StVT = ST->getMemoryVT();
+  SDValue ValOp = ST->getValue();
+
+  // Check if we have widen this node with another value
+  std::map<SDValue, SDValue>::iterator I = WidenNodes.find(ValOp);
+  if (I != WidenNodes.end())
+    ValOp = I->second;
+    
+  MVT VVT = ValOp.getValueType();
+
+  // It must be true that we the widen vector type is bigger than where
+  // we need to store.
+  assert(StVT.isVector() && VVT.isVector());
+  assert(StVT.getSizeInBits() < VVT.getSizeInBits());
+  assert(StVT.getVectorElementType() == VVT.getVectorElementType());
+
+  // Store value
+  SDValueVector StChain;
+  genWidenVectorStores(StChain, Chain, BasePtr, ST->getSrcValue(),
+                       ST->getSrcValueOffset(), ST->getAlignment(),
+                       ST->isVolatile(), ValOp, StVT.getSizeInBits());
+  if (StChain.size() == 1)
+    return StChain[0];
+  else 
+    return DAG.getNode(ISD::TokenFactor, MVT::Other,&StChain[0],StChain.size());
+}
+
+
 // SelectionDAG::Legalize - This is the entry point for the file.
 //
 void SelectionDAG::Legalize() {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d2ae587ba01..c2d0fd4f1f3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -79,7 +79,20 @@ private:
     case TargetLowering::Legal:
       return Legal;
     case TargetLowering::Promote:
-      return PromoteInteger;
+      // Promote can mean
+      //   1) On integers, it means to promote type (e.g., i8 to i32)
+      //   2) For vectors, it means try to widen (e.g., v3i32 to v4i32)
+      if (!VT.isVector()) {
+        return PromoteInteger;
+      }
+      else {
+        // TODO: move widen code to LegalizeType
+        if (VT.getVectorNumElements() == 1) {
+          return ScalarizeVector;
+        } else {
+          return SplitVector;
+        }        
+      }
     case TargetLowering::Expand:
       // Expand can mean
       // 1) split scalar in half, 2) convert a float to an integer,
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 1e7e847b8ad..9410271d06c 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -573,7 +573,7 @@ void TargetLowering::computeRegisterProperties() {
                                RegisterVT);
       RegisterTypeForVT[i] = RegisterVT;
       TransformToType[i] = MVT::Other; // this isn't actually used
-      ValueTypeActions.setTypeAction(VT, Expand);
+      ValueTypeActions.setTypeAction(VT, Promote);
     }
   }
 }
@@ -642,6 +642,20 @@ unsigned TargetLowering::getVectorTypeBreakdown(MVT VT,
   return 1;
 }
 
+/// getWidenVectorType: given a vector type, returns the type to widen to
+/// (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
+/// If there is no vector type that we want to widen to, returns MVT::Other
+/// When and were to widen is target dependent based on the cost of
+/// scalarizing vs using the wider vector type.
+MVT TargetLowering::getWidenVectorType(MVT VT) {
+  assert(VT.isVector());
+  if (isTypeLegal(VT))
+    return VT;
+ 
+  // Default is not to widen until moved to LegalizeTypes
+  return MVT::Other;
+}
+
 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
 /// function arguments in the caller parameter area.  This is the actual
 /// alignment, not its logarithm.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index a10f8a8daaf..7e1b7a0c76d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -523,8 +523,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::FEXP, MVT::f80, Expand);
   setOperationAction(ISD::FEXP2, MVT::f80, Expand);
 
-  // First set operation action for all vector types to expand. Then we
-  // will selectively turn on ones that can be effectively codegen'd.
+  // First set operation action for all vector types to either to promote
+  // (for widening) or expand (for scalarization). Then we will selectively
+  // turn on ones that can be effectively codegen'd.
   for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
        VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
     setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand);
@@ -543,6 +544,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand);
     setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::CONCAT_VECTORS,(MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
@@ -7837,3 +7840,41 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
 
   return Res;
 }
+
+//===----------------------------------------------------------------------===//
+//                           X86 Widen vector type
+//===----------------------------------------------------------------------===//
+
+/// getWidenVectorType: given a vector type, returns the type to widen
+/// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
+/// If there is no vector type that we want to widen to, returns MVT::Other
+/// When and were to widen is target dependent based on the cost of
+/// scalarizing vs using the wider vector type.
+
+MVT X86TargetLowering::getWidenVectorType(MVT VT) {
+  assert(VT.isVector());
+  if (isTypeLegal(VT))
+    return VT;
+  
+  // TODO: In computeRegisterProperty, we can compute the list of legal vector
+  //       type based on element type.  This would speed up our search (though
+  //       it may not be worth it since the size of the list is relatively
+  //       small).
+  MVT EltVT = VT.getVectorElementType();
+  unsigned NElts = VT.getVectorNumElements();
+  
+  // On X86, it make sense to widen any vector wider than 1
+  if (NElts <= 1)
+    return MVT::Other;
+  
+  for (unsigned nVT = MVT::FIRST_VECTOR_VALUETYPE; 
+       nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+    MVT SVT = (MVT::SimpleValueType)nVT;
+    
+    if (isTypeLegal(SVT) && 
+        SVT.getVectorElementType() == EltVT && 
+        SVT.getVectorNumElements() > NElts)
+      return SVT;
+  }
+  return MVT::Other;
+}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 0a2f4aa8922..2358889fda8 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -486,6 +486,13 @@ namespace llvm {
       (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
     }
 
+    /// getWidenVectorType: given a vector type, returns the type to widen
+    /// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
+    /// If there is no vector type that we want to widen to, returns MVT::Other
+    /// When and were to widen is target dependent based on the cost of
+    /// scalarizing vs using the wider vector type.
+    virtual MVT getWidenVectorType(MVT VT);
+
     /// createFastISel - This method returns a target specific FastISel object,
     /// or null if the target does not support "fast" ISel.
     virtual FastISel *