diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 7a9de93268c..366e2fa4552 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2760,24 +2760,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     case ARM::VLD4q8oddPseudo_UPD:
     case ARM::VLD4q16oddPseudo_UPD:
     case ARM::VLD4q32oddPseudo_UPD:
-    case ARM::VLD1DUPq8Pseudo:
-    case ARM::VLD1DUPq16Pseudo:
-    case ARM::VLD1DUPq32Pseudo:
-    case ARM::VLD1DUPq8PseudoWB_fixed:
-    case ARM::VLD1DUPq16PseudoWB_fixed:
-    case ARM::VLD1DUPq32PseudoWB_fixed:
-    case ARM::VLD1DUPq8PseudoWB_register:
-    case ARM::VLD1DUPq16PseudoWB_register:
-    case ARM::VLD1DUPq32PseudoWB_register:
-    case ARM::VLD2DUPd8Pseudo:
-    case ARM::VLD2DUPd16Pseudo:
-    case ARM::VLD2DUPd32Pseudo:
-    case ARM::VLD2DUPd8PseudoWB_fixed:
-    case ARM::VLD2DUPd16PseudoWB_fixed:
-    case ARM::VLD2DUPd32PseudoWB_fixed:
-    case ARM::VLD2DUPd8PseudoWB_register:
-    case ARM::VLD2DUPd16PseudoWB_register:
-    case ARM::VLD2DUPd32PseudoWB_register:
+    case ARM::VLD1DUPq8:
+    case ARM::VLD1DUPq16:
+    case ARM::VLD1DUPq32:
+    case ARM::VLD1DUPq8wb_fixed:
+    case ARM::VLD1DUPq16wb_fixed:
+    case ARM::VLD1DUPq32wb_fixed:
+    case ARM::VLD1DUPq8wb_register:
+    case ARM::VLD1DUPq16wb_register:
+    case ARM::VLD1DUPq32wb_register:
+    case ARM::VLD2DUPd8:
+    case ARM::VLD2DUPd16:
+    case ARM::VLD2DUPd32:
+    case ARM::VLD2DUPd8wb_fixed:
+    case ARM::VLD2DUPd16wb_fixed:
+    case ARM::VLD2DUPd32wb_fixed:
+    case ARM::VLD2DUPd8wb_register:
+    case ARM::VLD2DUPd16wb_register:
+    case ARM::VLD2DUPd32wb_register:
     case ARM::VLD4DUPd8Pseudo:
     case ARM::VLD4DUPd16Pseudo:
     case ARM::VLD4DUPd32Pseudo:
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index c4ab99d4542..3c14d74f786 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -129,16 +129,6 @@ namespace {
 }
 
 static const NEONLdStTableEntry NEONLdStTable[] = {
-{ ARM::VLD1DUPq16Pseudo,     ARM::VLD1DUPq16,     true, false, false, SingleSpc, 2, 4,false},
-{ ARM::VLD1DUPq16PseudoWB_fixed, ARM::VLD1DUPq16wb_fixed, true, true, true,  SingleSpc, 2, 4,false},
-{ ARM::VLD1DUPq16PseudoWB_register, ARM::VLD1DUPq16wb_register, true, true, true,  SingleSpc, 2, 4,false},
-{ ARM::VLD1DUPq32Pseudo,     ARM::VLD1DUPq32,     true, false, false, SingleSpc, 2, 2,false},
-{ ARM::VLD1DUPq32PseudoWB_fixed, ARM::VLD1DUPq32wb_fixed, true, true, false,  SingleSpc, 2, 2,false},
-{ ARM::VLD1DUPq32PseudoWB_register, ARM::VLD1DUPq32wb_register, true, true, true,  SingleSpc, 2, 2,false},
-{ ARM::VLD1DUPq8Pseudo,      ARM::VLD1DUPq8,      true, false, false, SingleSpc, 2, 8,false},
-{ ARM::VLD1DUPq8PseudoWB_fixed,  ARM::VLD1DUPq8wb_fixed, true, true, false,  SingleSpc, 2, 8,false},
-{ ARM::VLD1DUPq8PseudoWB_register,  ARM::VLD1DUPq8wb_register, true, true, true,  SingleSpc, 2, 8,false},
-
 { ARM::VLD1LNq16Pseudo,     ARM::VLD1LNd16,     true, false, false, EvenDblSpc, 1, 4 ,true},
 { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true,  EvenDblSpc, 1, 4 ,true},
 { ARM::VLD1LNq32Pseudo,     ARM::VLD1LNd32,     true, false, false, EvenDblSpc, 1, 2 ,true},
@@ -149,16 +139,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
 { ARM::VLD1d64QPseudo,      ARM::VLD1d64Q,     true,  false, false, SingleSpc,  4, 1 ,false},
 { ARM::VLD1d64TPseudo,      ARM::VLD1d64T,     true,  false, false, SingleSpc,  3, 1 ,false},
 
-{ ARM::VLD2DUPd16Pseudo,     ARM::VLD2DUPd16,     true, false, false, SingleSpc, 2, 4,false},
-{ ARM::VLD2DUPd16PseudoWB_fixed, ARM::VLD2DUPd16wb_fixed, true, true, false,  SingleSpc, 2, 4,false},
-{ ARM::VLD2DUPd16PseudoWB_register, ARM::VLD2DUPd16wb_register, true, true, true,  SingleSpc, 2, 4,false},
-{ ARM::VLD2DUPd32Pseudo,     ARM::VLD2DUPd32,     true, false, false, SingleSpc, 2, 2,false},
-{ ARM::VLD2DUPd32PseudoWB_fixed, ARM::VLD2DUPd32wb_fixed, true, true, false,  SingleSpc, 2, 2,false},
-{ ARM::VLD2DUPd32PseudoWB_register, ARM::VLD2DUPd32wb_register, true, true, true,  SingleSpc, 2, 2,false},
-{ ARM::VLD2DUPd8Pseudo,      ARM::VLD2DUPd8,      true, false, false, SingleSpc, 2, 8,false},
-{ ARM::VLD2DUPd8PseudoWB_fixed,  ARM::VLD2DUPd8wb_fixed, true, true, false,  SingleSpc, 2, 8,false},
-{ ARM::VLD2DUPd8PseudoWB_register,  ARM::VLD2DUPd8wb_register, true, true, true,  SingleSpc, 2, 8,false},
-
 { ARM::VLD2LNd16Pseudo,     ARM::VLD2LNd16,     true, false, false, SingleSpc,  2, 4 ,true},
 { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true,  SingleSpc,  2, 4 ,true},
 { ARM::VLD2LNd32Pseudo,     ARM::VLD2LNd32,     true, false, false, SingleSpc,  2, 2 ,true},
@@ -1090,24 +1070,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VLD4q8oddPseudo_UPD:
     case ARM::VLD4q16oddPseudo_UPD:
     case ARM::VLD4q32oddPseudo_UPD:
-    case ARM::VLD1DUPq8Pseudo:
-    case ARM::VLD1DUPq16Pseudo:
-    case ARM::VLD1DUPq32Pseudo:
-    case ARM::VLD1DUPq8PseudoWB_fixed:
-    case ARM::VLD1DUPq16PseudoWB_fixed:
-    case ARM::VLD1DUPq32PseudoWB_fixed:
-    case ARM::VLD1DUPq8PseudoWB_register:
-    case ARM::VLD1DUPq16PseudoWB_register:
-    case ARM::VLD1DUPq32PseudoWB_register:
-    case ARM::VLD2DUPd8Pseudo:
-    case ARM::VLD2DUPd16Pseudo:
-    case ARM::VLD2DUPd32Pseudo:
-    case ARM::VLD2DUPd8PseudoWB_fixed:
-    case ARM::VLD2DUPd16PseudoWB_fixed:
-    case ARM::VLD2DUPd32PseudoWB_fixed:
-    case ARM::VLD2DUPd8PseudoWB_register:
-    case ARM::VLD2DUPd16PseudoWB_register:
-    case ARM::VLD2DUPd32PseudoWB_register:
     case ARM::VLD3DUPd8Pseudo:
     case ARM::VLD3DUPd16Pseudo:
     case ARM::VLD3DUPd32Pseudo:
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c99db98fbcc..ffb9acb2f2c 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1589,9 +1589,9 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
 
-  case ARM::VLD2DUPd8PseudoWB_fixed: return ARM::VLD2DUPd8PseudoWB_register;
-  case ARM::VLD2DUPd16PseudoWB_fixed: return ARM::VLD2DUPd16PseudoWB_register;
-  case ARM::VLD2DUPd32PseudoWB_fixed: return ARM::VLD2DUPd32PseudoWB_register;
+  case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
+  case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
+  case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
   }
   return Opc; // If not one we handle, return it unchanged.
 }
@@ -2891,8 +2891,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   }
 
   case ARMISD::VLD2DUP: {
-    unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
-                           ARM::VLD2DUPd32Pseudo };
+    unsigned Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
+                           ARM::VLD2DUPd32 };
     return SelectVLDDup(N, false, 2, Opcodes);
   }
 
@@ -2909,9 +2909,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   }
 
   case ARMISD::VLD2DUP_UPD: {
-    unsigned Opcodes[] = { ARM::VLD2DUPd8PseudoWB_fixed,
-                           ARM::VLD2DUPd16PseudoWB_fixed,
-                           ARM::VLD2DUPd32PseudoWB_fixed };
+    unsigned Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed,
+                           ARM::VLD2DUPd32wb_fixed };
     return SelectVLDDup(N, true, 2, Opcodes);
   }
 
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 8684ce10d3c..8a706b1e4f0 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -94,7 +94,7 @@ def VecListDPairAsmOperand : AsmOperandClass {
   let ParserMethod = "parseVectorList";
   let RenderMethod = "addVecListOperands";
 }
-def VecListDPair : RegisterOperand<DPair, "printVectorListDPair"> {
+def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
   let ParserMatchClass = VecListDPairAsmOperand;
 }
 // Register list of three sequential D registers.
@@ -121,7 +121,7 @@ def VecListDPairSpacedAsmOperand : AsmOperandClass {
   let ParserMethod = "parseVectorList";
   let RenderMethod = "addVecListOperands";
 }
-def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListDPairSpaced"> {
+def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
   let ParserMatchClass = VecListDPairSpacedAsmOperand;
 }
 // Register list of three D registers spaced by 2 (three Q registers).
@@ -153,13 +153,14 @@ def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
   let ParserMatchClass = VecListOneDAllLanesAsmOperand;
 }
 // Register list of two D registers, with "all lanes" subscripting.
-def VecListTwoDAllLanesAsmOperand : AsmOperandClass {
-  let Name = "VecListTwoDAllLanes";
+def VecListDPairAllLanesAsmOperand : AsmOperandClass {
+  let Name = "VecListDPairAllLanes";
   let ParserMethod = "parseVectorList";
   let RenderMethod = "addVecListOperands";
 }
-def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> {
-  let ParserMatchClass = VecListTwoDAllLanesAsmOperand;
+def VecListDPairAllLanes : RegisterOperand<DPair,
+                                           "printVectorListTwoAllLanes"> {
+  let ParserMatchClass = VecListDPairAllLanesAsmOperand;
 }
 // Register list of two D registers spaced by 2 (two sequential Q registers).
 def VecListTwoQAllLanesAsmOperand : AsmOperandClass {
@@ -1276,39 +1277,32 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVLD1DupInstruction";
 }
-class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
-  let Pattern = [(set QPR:$dst,
-                      (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
-}
-
 def VLD1DUPd8  : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
 def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
 def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
 
-def VLD1DUPq8Pseudo  : VLD1QDUPPseudo<v16i8, extloadi8>;
-def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
-def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
-
 def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
           (VLD1DUPd32 addrmode6:$addr)>;
-def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
-          (VLD1DUPq32Pseudo addrmode6:$addr)>;
 
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-
-class VLD1QDUP<bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd),
+class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
           (ins addrmode6dup:$Rn), IIC_VLD1dup,
-          "vld1", Dt, "$Vd, $Rn", "", []> {
+          "vld1", Dt, "$Vd, $Rn", "",
+          [(set VecListDPairAllLanes:$Vd,
+                (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
   let Rm = 0b1111;
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVLD1DupInstruction";
 }
 
-def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8">;
-def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
-def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
+def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>;
 
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+          (VLD1DUPq32 addrmode6:$addr)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 // ...with address register writeback:
 multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
   def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
@@ -1333,7 +1327,7 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
 }
 multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
   def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
-                     (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+                     (outs VecListDPairAllLanes:$Vd, GPR:$wb),
                      (ins addrmode6dup:$Rn), IIC_VLD1dupu,
                      "vld1", Dt, "$Vd, $Rn!",
                      "$Rn.addr = $wb", []> {
@@ -1343,7 +1337,7 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
     let AsmMatchConverter = "cvtVLDwbFixed";
   }
   def _register : NLdSt<1, 0b10, 0b1100, op7_4,
-                        (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+                        (outs VecListDPairAllLanes:$Vd, GPR:$wb),
                         (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
                         "vld1", Dt, "$Vd, $Rn, $Rm",
                         "$Rn.addr = $wb", []> {
@@ -1361,13 +1355,6 @@ defm VLD1DUPq8wb  : VLD1QDUPWB<{0,0,1,0}, "8">;
 defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
 defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
 
-def VLD1DUPq8PseudoWB_fixed     : VLDQWBfixedPseudo<IIC_VLD1dupu>;
-def VLD1DUPq16PseudoWB_fixed    : VLDQWBfixedPseudo<IIC_VLD1dupu>;
-def VLD1DUPq32PseudoWB_fixed    : VLDQWBfixedPseudo<IIC_VLD1dupu>;
-def VLD1DUPq8PseudoWB_register  : VLDQWBregisterPseudo<IIC_VLD1dupu>;
-def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
-def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
-
 //   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
 class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
   : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
@@ -1378,13 +1365,9 @@ class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
   let DecoderMethod = "DecodeVLD2DupInstruction";
 }
 
-def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8",  VecListTwoDAllLanes>;
-def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListTwoDAllLanes>;
-def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListTwoDAllLanes>;
-
-def VLD2DUPd8Pseudo  : VLDQPseudo<IIC_VLD2dup>;
-def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
-def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8",  VecListDPairAllLanes>;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>;
 
 // ...with double-spaced registers (not used for codegen):
 def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8",  VecListTwoQAllLanes>;
@@ -1414,21 +1397,14 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
   }
 }
 
-defm VLD2DUPd8wb    : VLD2DUPWB<{0,0,0,0}, "8",  VecListTwoDAllLanes>;
-defm VLD2DUPd16wb   : VLD2DUPWB<{0,1,0,?}, "16", VecListTwoDAllLanes>;
-defm VLD2DUPd32wb   : VLD2DUPWB<{1,0,0,?}, "32", VecListTwoDAllLanes>;
+defm VLD2DUPd8wb    : VLD2DUPWB<{0,0,0,0}, "8",  VecListDPairAllLanes>;
+defm VLD2DUPd16wb   : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>;
+defm VLD2DUPd32wb   : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>;
 
 defm VLD2DUPd8x2wb  : VLD2DUPWB<{0,0,1,0}, "8",  VecListTwoQAllLanes>;
 defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListTwoQAllLanes>;
 defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListTwoQAllLanes>;
 
-def VLD2DUPd8PseudoWB_fixed     : VLDQWBfixedPseudo   <IIC_VLD2dupu>;
-def VLD2DUPd8PseudoWB_register  : VLDQWBregisterPseudo<IIC_VLD2dupu>;
-def VLD2DUPd16PseudoWB_fixed    : VLDQWBfixedPseudo   <IIC_VLD2dupu>;
-def VLD2DUPd16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
-def VLD2DUPd32PseudoWB_fixed    : VLDQWBfixedPseudo   <IIC_VLD2dupu>;
-def VLD2DUPd32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
-
 //   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
 class VLD3DUP<bits<4> op7_4, string Dt>
   : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 20454821569..fcb85449e63 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -1133,9 +1133,10 @@ public:
     return VectorList.Count == 1;
   }
 
-  bool isVecListTwoDAllLanes() const {
+  bool isVecListDPairAllLanes() const {
     if (!isSingleSpacedVectorAllLanes()) return false;
-    return VectorList.Count == 2;
+    return (ARMMCRegisterClasses[ARM::DPairRegClassID]
+              .contains(VectorList.RegNum));
   }
 
   bool isVecListTwoQAllLanes() const {
@@ -2981,12 +2982,13 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
       case NoLanes:
         E = Parser.getTok().getLoc();
         Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
-                                      &ARMMCRegisterClasses[ARM::DPairRegClassID]);
-
+                                   &ARMMCRegisterClasses[ARM::DPairRegClassID]);
         Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
         break;
       case AllLanes:
         E = Parser.getTok().getLoc();
+        Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
+                                   &ARMMCRegisterClasses[ARM::DPairRegClassID]);
         Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false,
                                                                 S, E));
         break;
@@ -3152,7 +3154,7 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
   switch (LaneKind) {
   case NoLanes:
-    // Non-lane two-register operands have been converted to the
+    // Two-register operands have been converted to the
     // composite register classes.
     if (Count == 2) {
       const MCRegisterClass *RC = (Spacing == 1) ?
@@ -3165,6 +3167,12 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
                                                     (Spacing == 2), S, E));
     break;
   case AllLanes:
+    // Two-register operands have been converted to the
+    // composite register classes.
+    if (Count == 2 && Spacing == 1) {
+      const MCRegisterClass *RC = &ARMMCRegisterClasses[ARM::DPairRegClassID];
+      FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
+    }
     Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
                                                             (Spacing == 2),
                                                             S, E));
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 2d408e77aa4..6fdc49d143b 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -2001,27 +2001,21 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
 
   // First output register
   switch (Inst.getOpcode()) {
-  case ARM::VLD1q16:
-  case ARM::VLD1q32:
-  case ARM::VLD1q64:
-  case ARM::VLD1q8:
-  case ARM::VLD1q16wb_fixed:
-  case ARM::VLD1q16wb_register:
-  case ARM::VLD1q32wb_fixed:
-  case ARM::VLD1q32wb_register:
-  case ARM::VLD1q64wb_fixed:
-  case ARM::VLD1q64wb_register:
-  case ARM::VLD1q8wb_fixed:
-  case ARM::VLD1q8wb_register:
-  case ARM::VLD2d16:
-  case ARM::VLD2d32:
-  case ARM::VLD2d8:
-  case ARM::VLD2d16wb_fixed:
-  case ARM::VLD2d16wb_register:
-  case ARM::VLD2d32wb_fixed:
-  case ARM::VLD2d32wb_register:
-  case ARM::VLD2d8wb_fixed:
-  case ARM::VLD2d8wb_register:
+  case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: case ARM::VLD1q8:
+  case ARM::VLD1q16wb_fixed: case ARM::VLD1q16wb_register:
+  case ARM::VLD1q32wb_fixed: case ARM::VLD1q32wb_register:
+  case ARM::VLD1q64wb_fixed: case ARM::VLD1q64wb_register:
+  case ARM::VLD1q8wb_fixed: case ARM::VLD1q8wb_register:
+  case ARM::VLD2d16: case ARM::VLD2d32: case ARM::VLD2d8:
+  case ARM::VLD2d16wb_fixed: case ARM::VLD2d16wb_register:
+  case ARM::VLD2d32wb_fixed: case ARM::VLD2d32wb_register:
+  case ARM::VLD2d8wb_fixed: case ARM::VLD2d8wb_register:
+
+    // FIXME: These go in the VLDnDup* functions, not here.
+  case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: case ARM::VLD2DUPd8:
+  case ARM::VLD2DUPd16wb_fixed: case ARM::VLD2DUPd16wb_register:
+  case ARM::VLD2DUPd32wb_fixed: case ARM::VLD2DUPd32wb_register:
+  case ARM::VLD2DUPd8wb_fixed: case ARM::VLD2DUPd8wb_register:
     if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
       return MCDisassembler::Fail;
     break;
@@ -2525,8 +2519,19 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
 
   align *= (1 << size);
 
-  if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
-    return MCDisassembler::Fail;
+  switch (Inst.getOpcode()) {
+  case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: case ARM::VLD1DUPq8:
+  case ARM::VLD1DUPq16wb_fixed: case ARM::VLD1DUPq16wb_register:
+  case ARM::VLD1DUPq32wb_fixed: case ARM::VLD1DUPq32wb_register:
+  case ARM::VLD1DUPq8wb_fixed: case ARM::VLD1DUPq8wb_register:
+    if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  default:
+    if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  }
   if (Rm != 0xF) {
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
       return MCDisassembler::Fail;
@@ -2559,8 +2564,19 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
   unsigned pred = fieldFromInstruction32(Insn, 22, 4);
   align *= 2*size;
 
-  if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
-    return MCDisassembler::Fail;
+  switch (Inst.getOpcode()) {
+  case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: case ARM::VLD2DUPd8:
+  case ARM::VLD2DUPd16wb_fixed: case ARM::VLD2DUPd16wb_register:
+  case ARM::VLD2DUPd32wb_fixed: case ARM::VLD2DUPd32wb_register:
+  case ARM::VLD2DUPd8wb_fixed: case ARM::VLD2DUPd8wb_register:
+    if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  default:
+    if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  }
 
   if (Rm != 0xF)
     Inst.addOperand(MCOperand::CreateImm(0));
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 590cf0e720b..8754053d62a 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -1025,7 +1025,7 @@ void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
   O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "}";
 }
 
-void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum,
+void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
                                           raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
@@ -1033,9 +1033,9 @@ void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum,
   O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
 }
 
-void ARMInstPrinter::printVectorListDPairSpaced(const MCInst *MI,
-                                                unsigned OpNum,
-                                                raw_ostream &O) {
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
+                                              unsigned OpNum,
+                                              raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
   unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
@@ -1072,11 +1072,10 @@ void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
 void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
                                                 unsigned OpNum,
                                                 raw_ostream &O) {
-  // Normally, it's not safe to use register enum values directly with
-  // addition to get the next register, but for VFP registers, the
-  // sort order is guaranteed because they're all of the form D<n>.
-  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
-    << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}";
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+  unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+  O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
 }
 
 void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
@@ -1102,15 +1101,6 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
     << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}";
 }
 
-void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
-                                              raw_ostream &O) {
-  // Normally, it's not safe to use register enum values directly with
-  // addition to get the next register, but for VFP registers, the
-  // sort order is guaranteed because they're all of the form D<n>.
-  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
-    << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}";
-}
-
 void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
                                                       unsigned OpNum,
                                                       raw_ostream &O) {
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index dc2dd453a9d..e9cd4075f11 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -133,9 +133,9 @@ public:
   void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorListDPair(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorListDPairSpaced(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+  void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+                               raw_ostream &O);
   void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
@@ -146,8 +146,6 @@ public:
                                     raw_ostream &O);
   void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
                                    raw_ostream &O);
-  void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
-                                raw_ostream &O);
   void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
                                         raw_ostream &O);
   void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
index 70d07bc9911..b2912d0bb8b 100644
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -579,7 +579,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
   REG("VecListThreeD");
   REG("VecListFourD");
   REG("VecListOneDAllLanes");
-  REG("VecListTwoDAllLanes");
+  REG("VecListDPairAllLanes");
   REG("VecListTwoQAllLanes");
 
   IMM("i32imm");