diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0952350d07e..9727de82036 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -13001,8 +13001,8 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, MachineBasicBlock *origMainMBB = mainMBB; // Add a PHI. - BuildMI(mainMBB, DL, TII->get(X86::PHI), t4) - .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB); + MachineInstr *Phi = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4) + .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB); unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -13105,6 +13105,11 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, .addReg(SrcReg).addReg(t4) .addImm(CC); mainMBB = EmitLoweredSelect(MIB, mainMBB); + // Replace the original PHI node as mainMBB is changed after CMOV + // lowering. + BuildMI(*origMainMBB, Phi, DL, TII->get(X86::PHI), t4) + .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB); + Phi->eraseFromParent(); } break; } @@ -13298,10 +13303,10 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, MachineBasicBlock *origMainMBB = mainMBB; // Add PHIs. - BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L) - .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB); - BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H) - .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB); + MachineInstr *PhiL = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L) + .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB); + MachineInstr *PhiH = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H) + .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB); unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -13375,10 +13380,21 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, .addReg(SrcLoReg).addReg(t4L) .addImm(X86::COND_NE); mainMBB = EmitLoweredSelect(MIB, mainMBB); + // As the lowered CMOV won't clobber EFLAGS, we could reuse it for the + // 2nd CMOV lowering. + mainMBB->addLiveIn(X86::EFLAGS); MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H) .addReg(SrcHiReg).addReg(t4H) .addImm(X86::COND_NE); mainMBB = EmitLoweredSelect(MIB, mainMBB); + // Replace the original PHI node as mainMBB is changed after CMOV + // lowering. + BuildMI(*origMainMBB, PhiL, DL, TII->get(X86::PHI), t4L) + .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB); + BuildMI(*origMainMBB, PhiH, DL, TII->get(X86::PHI), t4H) + .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB); + PhiL->eraseFromParent(); + PhiH->eraseFromParent(); } break; } diff --git a/test/CodeGen/X86/atomic-minmax-i6432.ll b/test/CodeGen/X86/atomic-minmax-i6432.ll index 1a6db7781f7..62f784f6960 100644 --- a/test/CodeGen/X86/atomic-minmax-i6432.ll +++ b/test/CodeGen/X86/atomic-minmax-i6432.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=x86 -mattr=+cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX +; RUN: llc -march=x86 -mattr=-cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=NOCMOV ; RUN: llc -march=x86 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC @sc64 = external global i64 @@ -16,6 +17,16 @@ define void @atomic_maxmin_i6432() { ; LINUX: lock ; LINUX-NEXT: cmpxchg8b ; LINUX: jne [[LABEL]] +; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]] +; NOCMOV: cmpl +; NOCMOV: setl +; NOCMOV: cmpl +; NOCMOV: setl +; NOCMOV: jne +; NOCMOV: jne +; NOCMOV: lock +; NOCMOV-NEXT: cmpxchg8b +; NOCMOV: jne [[LABEL]] %2 = atomicrmw min i64* @sc64, i64 6 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl @@ -27,6 +38,16 @@ define void @atomic_maxmin_i6432() { ; LINUX: lock ; LINUX-NEXT: cmpxchg8b ; LINUX: jne [[LABEL]] +; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]] +; NOCMOV: cmpl +; NOCMOV: setg +; NOCMOV: cmpl +; NOCMOV: setg +; NOCMOV: jne +; NOCMOV: jne +; NOCMOV: lock +; NOCMOV-NEXT: cmpxchg8b +; NOCMOV: jne [[LABEL]] %3 = atomicrmw umax i64* @sc64, i64 7 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl @@ -38,6 +59,16 @@ define void @atomic_maxmin_i6432() { ; LINUX: lock ; LINUX-NEXT: cmpxchg8b ; LINUX: jne [[LABEL]] +; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]] +; NOCMOV: cmpl +; NOCMOV: setb +; NOCMOV: cmpl +; NOCMOV: setb +; NOCMOV: jne +; NOCMOV: jne +; NOCMOV: lock +; NOCMOV-NEXT: cmpxchg8b +; NOCMOV: jne [[LABEL]] %4 = atomicrmw umin i64* @sc64, i64 8 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl @@ -49,6 +80,16 @@ define void @atomic_maxmin_i6432() { ; LINUX: lock ; LINUX-NEXT: cmpxchg8b ; LINUX: jne [[LABEL]] +; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]] +; NOCMOV: cmpl +; NOCMOV: seta +; NOCMOV: cmpl +; NOCMOV: seta +; NOCMOV: jne +; NOCMOV: jne +; NOCMOV: lock +; NOCMOV-NEXT: cmpxchg8b +; NOCMOV: jne [[LABEL]] ret void } diff --git a/test/CodeGen/X86/atomic32.ll b/test/CodeGen/X86/atomic32.ll index 50c5751f018..3cb9ca1c76c 100644 --- a/test/CodeGen/X86/atomic32.ll +++ b/test/CodeGen/X86/atomic32.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64 ; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32 +; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -mattr=-cmov -verify-machineinstrs | FileCheck %s --check-prefix NOCMOV @sc32 = external global i32 @@ -164,9 +165,15 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X32: cmov ; X32: lock ; X32: cmpxchgl + +; NOCMOV: cmpl +; NOCMOV: jl +; NOCMOV: lock +; NOCMOV: cmpxchgl ret void ; X64: ret ; X32: ret +; NOCMOV: ret } define void @atomic_fetch_min32(i32 %x) nounwind { @@ -180,9 +187,15 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X32: cmov ; X32: lock ; X32: cmpxchgl + +; NOCMOV: cmpl +; NOCMOV: jg +; NOCMOV: lock +; NOCMOV: cmpxchgl ret void ; X64: ret ; X32: ret +; NOCMOV: ret } define void @atomic_fetch_umax32(i32 %x) nounwind { @@ -196,9 +209,15 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X32: cmov ; X32: lock ; X32: cmpxchgl + +; NOCMOV: cmpl +; NOCMOV: jb +; NOCMOV: lock +; NOCMOV: cmpxchgl ret void ; X64: ret ; X32: ret +; NOCMOV: ret } define void @atomic_fetch_umin32(i32 %x) nounwind { @@ -207,13 +226,20 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X64: cmov ; X64: lock ; X64: cmpxchgl + ; X32: cmpl ; X32: cmov ; X32: lock ; X32: cmpxchgl + +; NOCMOV: cmpl +; NOCMOV: ja +; NOCMOV: lock +; NOCMOV: cmpxchgl ret void ; X64: ret ; X32: ret +; NOCMOV: ret } define void @atomic_fetch_cmpxchg32() nounwind {