diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index dc4b67a8324..f8e554fcbfd 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -3479,9 +3479,6 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
       DstReg = MI->getOperand(0).getReg();
       SrcReg = MI->getOperand(1).getReg();
 
-      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
-        MI->RemoveOperand(i-1);
-
       DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
 
       // If we insert both a novel <def> and an <undef> on the DReg, we break
@@ -3491,6 +3488,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
       if (!MI->definesRegister(DReg, TRI) && !MI->readsRegister(DReg, TRI))
           break;
 
+      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+        MI->RemoveOperand(i-1);
+
       // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
       // Again DDst may be undefined at the beginning of this instruction.
       MI->setDesc(get(ARM::VSETLNi32));
@@ -3512,9 +3512,6 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
       DstReg = MI->getOperand(0).getReg();
       SrcReg = MI->getOperand(1).getReg();
 
-      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
-        MI->RemoveOperand(i-1);
-
       unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
       DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
       DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
@@ -3526,6 +3523,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
       if (!MI->definesRegister(DDst, TRI) && !MI->readsRegister(DDst, TRI))
           break;
 
+      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+        MI->RemoveOperand(i-1);
+
       if (DSrc == DDst) {
         // Destination can be:
         //     %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
diff --git a/test/CodeGen/ARM/domain-conv-vmovs.ll b/test/CodeGen/ARM/domain-conv-vmovs.ll
index f1cd9f58404..e19185b0eb2 100644
--- a/test/CodeGen/ARM/domain-conv-vmovs.ll
+++ b/test/CodeGen/ARM/domain-conv-vmovs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s
 
 define <2 x float> @test_vmovs_via_vext_lane0to0(float %arg, <2 x float> %in) {
 ; CHECK: test_vmovs_via_vext_lane0to0:
@@ -62,3 +62,23 @@ define float @test_vmovs_via_vdup(float, float %ret, float %lhs, float %rhs) {
   ret float %res
 }
 
+declare float @llvm.sqrt.f32(float)
+
+declare void @bar()
+
+; This is a comp
+define float @test_ineligible(float, float %in) {
+; CHECK: test_ineligible:
+
+  %sqrt = call float @llvm.sqrt.f32(float %in)
+  %val = fadd float %sqrt, %sqrt
+
+  ; This call forces a move from a callee-saved register to the return-reg. That
+  ; move is not eligible for conversion to a d-register instructions because the
+  ; use-def chains would be messed up. Primarily a compile-test (we used to
+  ; internal fault).
+  call void @bar()
+; CHECL: bl bar
+; CHECK: vmov.f32 {{s[0-9]+}}, {{s[0-9]+}}
+  ret float %val
+}
\ No newline at end of file