Fix PR15355

- Clear 'mayStore' flag when loading from the atomic variable before the
  spin loop
- Clear kill flag from one use to multiple use in registers forming the
  address to that atomic variable
- don't use a physical register as live-in register in BB (neither entry
  nor landing pad.) by copying it into virtual register

(patch by Cameron Zwarich)



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176538 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael Liao 2013-03-06 00:17:04 +00:00
parent 603e874c64
commit c537f79dcd
14 changed files with 201 additions and 121 deletions

View File

@ -12893,13 +12893,16 @@ static unsigned getPseudoCMOVOpc(EVT VT) {
// to // to
// //
// ... // ...
// EAX = LOAD MI.addr // t1 = LOAD MI.addr
// loop: // loop:
// t1 = OP MI.val, EAX // t4 = phi(t1, t3 / loop)
// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined] // t2 = OP MI.val, t4
// EAX = t4
// LCMPXCHG [MI.addr], t2, [EAX is implicitly used & defined]
// t3 = EAX
// JNE loop // JNE loop
// sink: // sink:
// dst = EAX // dst = t3
// ... // ...
MachineBasicBlock * MachineBasicBlock *
X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
@ -12936,7 +12939,11 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
const TargetRegisterClass *RC = MRI.getRegClass(DstReg); const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
MVT::SimpleValueType VT = *RC->vt_begin(); MVT::SimpleValueType VT = *RC->vt_begin();
unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT); unsigned t1 = MRI.createVirtualRegister(RC);
unsigned t2 = MRI.createVirtualRegister(RC);
unsigned t3 = MRI.createVirtualRegister(RC);
unsigned t4 = MRI.createVirtualRegister(RC);
unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT);
unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT); unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
unsigned LOADOpc = getLoadOpcode(VT); unsigned LOADOpc = getLoadOpcode(VT);
@ -12944,12 +12951,16 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
// For the atomic load-arith operator, we generate // For the atomic load-arith operator, we generate
// //
// thisMBB: // thisMBB:
// EAX = LOAD [MI.addr] // t1 = LOAD [MI.addr]
// mainMBB: // mainMBB:
// t4 = phi(t1 / thisMBB, t3 / mainMBB)
// t1 = OP MI.val, EAX // t1 = OP MI.val, EAX
// EAX = t4
// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined] // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
// t3 = EAX
// JNE mainMBB // JNE mainMBB
// sinkMBB: // sinkMBB:
// dst = t3
MachineBasicBlock *thisMBB = MBB; MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
@ -12965,23 +12976,34 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB: // thisMBB:
MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), AccPhyReg); MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
MIB.addOperand(MI->getOperand(MemOpndSlot + i)); MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
MIB.setMemRefs(MMOBegin, MMOEnd); if (NewMO.isReg())
NewMO.setIsKill(false);
MIB.addOperand(NewMO);
}
for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
unsigned flags = (*MMOI)->getFlags();
flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
MachineMemOperand *MMO =
MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
(*MMOI)->getSize(),
(*MMOI)->getBaseAlignment(),
(*MMOI)->getTBAAInfo(),
(*MMOI)->getRanges());
MIB.addMemOperand(MMO);
}
thisMBB->addSuccessor(mainMBB); thisMBB->addSuccessor(mainMBB);
// mainMBB: // mainMBB:
MachineBasicBlock *origMainMBB = mainMBB; MachineBasicBlock *origMainMBB = mainMBB;
mainMBB->addLiveIn(AccPhyReg);
// Copy AccPhyReg as it is used more than once. // Add a PHI.
unsigned AccReg = MRI.createVirtualRegister(RC); BuildMI(mainMBB, DL, TII->get(X86::PHI), t4)
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccReg) .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
.addReg(AccPhyReg);
unsigned t1 = MRI.createVirtualRegister(RC);
unsigned Opc = MI->getOpcode(); unsigned Opc = MI->getOpcode();
switch (Opc) { switch (Opc) {
default: default:
@ -12999,20 +13021,20 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
case X86::ATOMXOR32: case X86::ATOMXOR32:
case X86::ATOMXOR64: { case X86::ATOMXOR64: {
unsigned ARITHOpc = getNonAtomicOpcode(Opc); unsigned ARITHOpc = getNonAtomicOpcode(Opc);
BuildMI(mainMBB, DL, TII->get(ARITHOpc), t1).addReg(SrcReg) BuildMI(mainMBB, DL, TII->get(ARITHOpc), t2).addReg(SrcReg)
.addReg(AccReg); .addReg(t4);
break; break;
} }
case X86::ATOMNAND8: case X86::ATOMNAND8:
case X86::ATOMNAND16: case X86::ATOMNAND16:
case X86::ATOMNAND32: case X86::ATOMNAND32:
case X86::ATOMNAND64: { case X86::ATOMNAND64: {
unsigned t2 = MRI.createVirtualRegister(RC); unsigned Tmp = MRI.createVirtualRegister(RC);
unsigned NOTOpc; unsigned NOTOpc;
unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc); unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
BuildMI(mainMBB, DL, TII->get(ANDOpc), t2).addReg(SrcReg) BuildMI(mainMBB, DL, TII->get(ANDOpc), Tmp).addReg(SrcReg)
.addReg(AccReg); .addReg(t4);
BuildMI(mainMBB, DL, TII->get(NOTOpc), t1).addReg(t2); BuildMI(mainMBB, DL, TII->get(NOTOpc), t2).addReg(Tmp);
break; break;
} }
case X86::ATOMMAX8: case X86::ATOMMAX8:
@ -13036,20 +13058,22 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
BuildMI(mainMBB, DL, TII->get(CMPOpc)) BuildMI(mainMBB, DL, TII->get(CMPOpc))
.addReg(SrcReg) .addReg(SrcReg)
.addReg(AccReg); .addReg(t4);
if (Subtarget->hasCMov()) { if (Subtarget->hasCMov()) {
if (VT != MVT::i8) { if (VT != MVT::i8) {
// Native support // Native support
BuildMI(mainMBB, DL, TII->get(CMOVOpc), t1) BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
.addReg(SrcReg) .addReg(SrcReg)
.addReg(AccReg); .addReg(t4);
} else { } else {
// Promote i8 to i32 to use CMOV32 // Promote i8 to i32 to use CMOV32
const TargetRegisterClass *RC32 = getRegClassFor(MVT::i32); const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
const TargetRegisterClass *RC32 =
TRI->getSubClassWithSubReg(getRegClassFor(MVT::i32), X86::sub_8bit);
unsigned SrcReg32 = MRI.createVirtualRegister(RC32); unsigned SrcReg32 = MRI.createVirtualRegister(RC32);
unsigned AccReg32 = MRI.createVirtualRegister(RC32); unsigned AccReg32 = MRI.createVirtualRegister(RC32);
unsigned t2 = MRI.createVirtualRegister(RC32); unsigned Tmp = MRI.createVirtualRegister(RC32);
unsigned Undef = MRI.createVirtualRegister(RC32); unsigned Undef = MRI.createVirtualRegister(RC32);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef); BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef);
@ -13060,15 +13084,15 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
.addImm(X86::sub_8bit); .addImm(X86::sub_8bit);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32) BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32)
.addReg(Undef) .addReg(Undef)
.addReg(AccReg) .addReg(t4)
.addImm(X86::sub_8bit); .addImm(X86::sub_8bit);
BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2) BuildMI(mainMBB, DL, TII->get(CMOVOpc), Tmp)
.addReg(SrcReg32) .addReg(SrcReg32)
.addReg(AccReg32); .addReg(AccReg32);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t1) BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t2)
.addReg(t2, 0, X86::sub_8bit); .addReg(Tmp, 0, X86::sub_8bit);
} }
} else { } else {
// Use pseudo select and lower them. // Use pseudo select and lower them.
@ -13077,8 +13101,8 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
unsigned SelOpc = getPseudoCMOVOpc(VT); unsigned SelOpc = getPseudoCMOVOpc(VT);
X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc); X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!"); assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t1) MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t2)
.addReg(SrcReg).addReg(AccReg) .addReg(SrcReg).addReg(t4)
.addImm(CC); .addImm(CC);
mainMBB = EmitLoweredSelect(MIB, mainMBB); mainMBB = EmitLoweredSelect(MIB, mainMBB);
} }
@ -13086,27 +13110,33 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
} }
} }
// Copy AccPhyReg back from virtual register. // Copy PhyReg back from virtual register.
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccPhyReg) BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), PhyReg)
.addReg(AccReg); .addReg(t4);
MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc)); MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
MIB.addOperand(MI->getOperand(MemOpndSlot + i)); MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
MIB.addReg(t1); if (NewMO.isReg())
NewMO.setIsKill(false);
MIB.addOperand(NewMO);
}
MIB.addReg(t2);
MIB.setMemRefs(MMOBegin, MMOEnd); MIB.setMemRefs(MMOBegin, MMOEnd);
// Copy PhyReg back to virtual register.
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3)
.addReg(PhyReg);
BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB); BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
mainMBB->addSuccessor(origMainMBB); mainMBB->addSuccessor(origMainMBB);
mainMBB->addSuccessor(sinkMBB); mainMBB->addSuccessor(sinkMBB);
// sinkMBB: // sinkMBB:
sinkMBB->addLiveIn(AccPhyReg);
BuildMI(*sinkMBB, sinkMBB->begin(), DL, BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), DstReg) TII->get(TargetOpcode::COPY), DstReg)
.addReg(AccPhyReg); .addReg(t3);
MI->eraseFromParent(); MI->eraseFromParent();
return sinkMBB; return sinkMBB;
@ -13123,15 +13153,24 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
// to // to
// //
// ... // ...
// EAX = LOAD [MI.addr + 0] // t1L = LOAD [MI.addr + 0]
// EDX = LOAD [MI.addr + 4] // t1H = LOAD [MI.addr + 4]
// loop: // loop:
// EBX = OP MI.val.lo, EAX // t4L = phi(t1L, t3L / loop)
// ECX = OP MI.val.hi, EDX // t4H = phi(t1H, t3H / loop)
// t2L = OP MI.val.lo, t4L
// t2H = OP MI.val.hi, t4H
// EAX = t4L
// EDX = t4H
// EBX = t2L
// ECX = t2H
// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined] // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
// t3L = EAX
// t3H = EDX
// JNE loop // JNE loop
// sink: // sink:
// dst = EDX:EAX // dstL = t3L
// dstH = t3H
// ... // ...
MachineBasicBlock * MachineBasicBlock *
X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
@ -13172,20 +13211,37 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
const TargetRegisterClass *RC = &X86::GR32RegClass; const TargetRegisterClass *RC = &X86::GR32RegClass;
const TargetRegisterClass *RC8 = &X86::GR8RegClass; const TargetRegisterClass *RC8 = &X86::GR8RegClass;
unsigned t1L = MRI.createVirtualRegister(RC);
unsigned t1H = MRI.createVirtualRegister(RC);
unsigned t2L = MRI.createVirtualRegister(RC);
unsigned t2H = MRI.createVirtualRegister(RC);
unsigned t3L = MRI.createVirtualRegister(RC);
unsigned t3H = MRI.createVirtualRegister(RC);
unsigned t4L = MRI.createVirtualRegister(RC);
unsigned t4H = MRI.createVirtualRegister(RC);
unsigned LCMPXCHGOpc = X86::LCMPXCHG8B; unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
unsigned LOADOpc = X86::MOV32rm; unsigned LOADOpc = X86::MOV32rm;
// For the atomic load-arith operator, we generate // For the atomic load-arith operator, we generate
// //
// thisMBB: // thisMBB:
// EAX = LOAD [MI.addr + 0] // t1L = LOAD [MI.addr + 0]
// EDX = LOAD [MI.addr + 4] // t1H = LOAD [MI.addr + 4]
// mainMBB: // mainMBB:
// EBX = OP MI.vallo, EAX // t4L = phi(t1L / thisMBB, t3L / mainMBB)
// ECX = OP MI.valhi, EDX // t4H = phi(t1H / thisMBB, t3H / mainMBB)
// t2L = OP MI.val.lo, t4L
// t2H = OP MI.val.hi, t4H
// EBX = t2L
// ECX = t2H
// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined] // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
// JNE mainMBB // t3L = EAX
// t3H = EDX
// JNE loop
// sinkMBB: // sinkMBB:
// dstL = t3L
// dstH = t3H
MachineBasicBlock *thisMBB = MBB; MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
@ -13202,35 +13258,50 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
// thisMBB: // thisMBB:
// Lo // Lo
MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EAX); MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1L);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
MIB.addOperand(MI->getOperand(MemOpndSlot + i));
MIB.setMemRefs(MMOBegin, MMOEnd);
// Hi
MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp) MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32) if (NewMO.isReg())
else NewMO.setIsKill(false);
MIB.addOperand(MI->getOperand(MemOpndSlot + i)); MIB.addOperand(NewMO);
} }
MIB.setMemRefs(MMOBegin, MMOEnd); for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
unsigned flags = (*MMOI)->getFlags();
flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
MachineMemOperand *MMO =
MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
(*MMOI)->getSize(),
(*MMOI)->getBaseAlignment(),
(*MMOI)->getTBAAInfo(),
(*MMOI)->getRanges());
MIB.addMemOperand(MMO);
};
MachineInstr *LowMI = MIB;
// Hi
MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1H);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp) {
MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
} else {
MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
if (NewMO.isReg())
NewMO.setIsKill(false);
MIB.addOperand(NewMO);
}
}
MIB.setMemRefs(LowMI->memoperands_begin(), LowMI->memoperands_end());
thisMBB->addSuccessor(mainMBB); thisMBB->addSuccessor(mainMBB);
// mainMBB: // mainMBB:
MachineBasicBlock *origMainMBB = mainMBB; MachineBasicBlock *origMainMBB = mainMBB;
mainMBB->addLiveIn(X86::EAX);
mainMBB->addLiveIn(X86::EDX);
// Copy EDX:EAX as they are used more than once. // Add PHIs.
unsigned LoReg = MRI.createVirtualRegister(RC); BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L)
unsigned HiReg = MRI.createVirtualRegister(RC); .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), LoReg).addReg(X86::EAX); BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H)
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), HiReg).addReg(X86::EDX); .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
unsigned t1L = MRI.createVirtualRegister(RC);
unsigned t1H = MRI.createVirtualRegister(RC);
unsigned Opc = MI->getOpcode(); unsigned Opc = MI->getOpcode();
switch (Opc) { switch (Opc) {
@ -13243,19 +13314,23 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
case X86::ATOMSUB6432: { case X86::ATOMSUB6432: {
unsigned HiOpc; unsigned HiOpc;
unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(LoReg).addReg(SrcLoReg); BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(t4L)
BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(HiReg).addReg(SrcHiReg); .addReg(SrcLoReg);
BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(t4H)
.addReg(SrcHiReg);
break; break;
} }
case X86::ATOMNAND6432: { case X86::ATOMNAND6432: {
unsigned HiOpc, NOTOpc; unsigned HiOpc, NOTOpc;
unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc); unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
unsigned t2L = MRI.createVirtualRegister(RC); unsigned TmpL = MRI.createVirtualRegister(RC);
unsigned t2H = MRI.createVirtualRegister(RC); unsigned TmpH = MRI.createVirtualRegister(RC);
BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg).addReg(LoReg); BuildMI(mainMBB, DL, TII->get(LoOpc), TmpL).addReg(SrcLoReg)
BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg).addReg(HiReg); .addReg(t4L);
BuildMI(mainMBB, DL, TII->get(NOTOpc), t1L).addReg(t2L); BuildMI(mainMBB, DL, TII->get(HiOpc), TmpH).addReg(SrcHiReg)
BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H); .addReg(t4H);
BuildMI(mainMBB, DL, TII->get(NOTOpc), t2L).addReg(TmpL);
BuildMI(mainMBB, DL, TII->get(NOTOpc), t2H).addReg(TmpH);
break; break;
} }
case X86::ATOMMAX6432: case X86::ATOMMAX6432:
@ -13271,12 +13346,12 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
unsigned cc = MRI.createVirtualRegister(RC); unsigned cc = MRI.createVirtualRegister(RC);
// cl := cmp src_lo, lo // cl := cmp src_lo, lo
BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
.addReg(SrcLoReg).addReg(LoReg); .addReg(SrcLoReg).addReg(t4L);
BuildMI(mainMBB, DL, TII->get(LoOpc), cL); BuildMI(mainMBB, DL, TII->get(LoOpc), cL);
BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL); BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL);
// ch := cmp src_hi, hi // ch := cmp src_hi, hi
BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
.addReg(SrcHiReg).addReg(HiReg); .addReg(SrcHiReg).addReg(t4H);
BuildMI(mainMBB, DL, TII->get(HiOpc), cH); BuildMI(mainMBB, DL, TII->get(HiOpc), cH);
BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH); BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH);
// cc := if (src_hi == hi) ? cl : ch; // cc := if (src_hi == hi) ? cl : ch;
@ -13291,17 +13366,17 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
} }
BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc); BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc);
if (Subtarget->hasCMov()) { if (Subtarget->hasCMov()) {
BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1L) BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2L)
.addReg(SrcLoReg).addReg(LoReg); .addReg(SrcLoReg).addReg(t4L);
BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1H) BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2H)
.addReg(SrcHiReg).addReg(HiReg); .addReg(SrcHiReg).addReg(t4H);
} else { } else {
MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1L) MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2L)
.addReg(SrcLoReg).addReg(LoReg) .addReg(SrcLoReg).addReg(t4L)
.addImm(X86::COND_NE); .addImm(X86::COND_NE);
mainMBB = EmitLoweredSelect(MIB, mainMBB); mainMBB = EmitLoweredSelect(MIB, mainMBB);
MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1H) MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H)
.addReg(SrcHiReg).addReg(HiReg) .addReg(SrcHiReg).addReg(t4H)
.addImm(X86::COND_NE); .addImm(X86::COND_NE);
mainMBB = EmitLoweredSelect(MIB, mainMBB); mainMBB = EmitLoweredSelect(MIB, mainMBB);
} }
@ -13310,39 +13385,44 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
case X86::ATOMSWAP6432: { case X86::ATOMSWAP6432: {
unsigned HiOpc; unsigned HiOpc;
unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg); BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg);
BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg); BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg);
break; break;
} }
} }
// Copy EDX:EAX back from HiReg:LoReg // Copy EDX:EAX back from HiReg:LoReg
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(LoReg); BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(t4L);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(HiReg); BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(t4H);
// Copy ECX:EBX from t1H:t1L // Copy ECX:EBX from t1H:t1L
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t1L); BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t2L);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t1H); BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t2H);
MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc)); MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
MIB.addOperand(MI->getOperand(MemOpndSlot + i)); MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
if (NewMO.isReg())
NewMO.setIsKill(false);
MIB.addOperand(NewMO);
}
MIB.setMemRefs(MMOBegin, MMOEnd); MIB.setMemRefs(MMOBegin, MMOEnd);
// Copy EDX:EAX back to t3H:t3L
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3L).addReg(X86::EAX);
BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3H).addReg(X86::EDX);
BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB); BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
mainMBB->addSuccessor(origMainMBB); mainMBB->addSuccessor(origMainMBB);
mainMBB->addSuccessor(sinkMBB); mainMBB->addSuccessor(sinkMBB);
// sinkMBB: // sinkMBB:
sinkMBB->addLiveIn(X86::EAX);
sinkMBB->addLiveIn(X86::EDX);
BuildMI(*sinkMBB, sinkMBB->begin(), DL, BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), DstLoReg) TII->get(TargetOpcode::COPY), DstLoReg)
.addReg(X86::EAX); .addReg(t3L);
BuildMI(*sinkMBB, sinkMBB->begin(), DL, BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), DstHiReg) TII->get(TargetOpcode::COPY), DstHiReg)
.addReg(X86::EDX); .addReg(t3H);
MI->eraseFromParent(); MI->eraseFromParent();
return sinkMBB; return sinkMBB;

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s ; RUN: llc < %s -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s
define void @test(i8** %a, i64* %b, i64 %c, i64 %d) nounwind { define void @test(i8** %a, i64* %b, i64 %c, i64 %d) nounwind {
entry: entry:

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 | FileCheck %s ; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s
; 64-bit load/store on x86-32 ; 64-bit load/store on x86-32
; FIXME: The generated code can be substantially improved. ; FIXME: The generated code can be substantially improved.

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -O0 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s
define void @test1(i32* %ptr, i32 %val1) { define void @test1(i32* %ptr, i32 %val1) {
; CHECK: test1 ; CHECK: test1

View File

@ -1,5 +1,5 @@
; RUN: llc -march=x86 -mattr=+cmov -mtriple=i386-pc-linux < %s | FileCheck %s -check-prefix=LINUX ; RUN: llc -march=x86 -mattr=+cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX
; RUN: llc -march=x86 -mtriple=i386-macosx -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC ; RUN: llc -march=x86 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC
@sc64 = external global i64 @sc64 = external global i64

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86-64 | FileCheck %s ; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s
; rdar://9692967 ; rdar://9692967

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=i686-none-linux | FileCheck %s ; RUN: llc < %s -mtriple=i686-none-linux -verify-machineinstrs | FileCheck %s
define i32* @test_atomic_ptr_load(i32** %a0) { define i32* @test_atomic_ptr_load(i32** %a0) {
; CHECK: test_atomic_ptr_load ; CHECK: test_atomic_ptr_load

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mcpu=corei7 -show-mc-encoding | FileCheck %s --check-prefix X64 ; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mcpu=corei7 -verify-machineinstrs -show-mc-encoding | FileCheck %s --check-prefix X64
; RUN: llc < %s -O0 -mtriple=i386-unknown-unknown -mcpu=corei7 | FileCheck %s --check-prefix X32 ; RUN: llc < %s -O0 -mtriple=i386-unknown-unknown -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
@sc16 = external global i16 @sc16 = external global i16

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64 ; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64
; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32 ; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
@sc32 = external global i32 @sc32 = external global i32

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64 ; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64
@sc64 = external global i64 @sc64 = external global i64

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32 ; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
@sc64 = external global i64 @sc64 = external global i64

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64 ; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64
; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32 ; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
@sc8 = external global i8 @sc8 = external global i8

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86-64 | FileCheck %s ; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s
; rdar://7103704 ; rdar://7103704

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov | FileCheck %s ; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov -verify-machineinstrs | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"