From 0d7d0b5cb7e41173b6fff2f0c2fbdcbebc9693fe Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 18 Jul 2011 18:52:12 +0000 Subject: [PATCH] Set mayLoad or mayStore flags for SC and LL in order to prevent LICM from moving them out of the loop. Previously, stores and loads to a stack frame object were inserted to accomplish this. Remove the code that was needed to do this. Patch by Sasa Stankovic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@135415 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 95 ++++++--------------------- lib/Target/Mips/MipsInstrInfo.td | 4 +- lib/Target/Mips/MipsMachineFunction.h | 9 +-- test/CodeGen/Mips/atomic.ll | 42 +++++------- 4 files changed, 41 insertions(+), 109 deletions(-) diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 873c99a1f4a..dfde8c97180 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -733,11 +733,10 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); - unsigned Dest = MI->getOperand(0).getReg(); + unsigned Oldval = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); unsigned Incr = MI->getOperand(2).getReg(); - unsigned Oldval = RegInfo.createVirtualRegister(RC); unsigned Tmp1 = RegInfo.createVirtualRegister(RC); unsigned Tmp2 = RegInfo.createVirtualRegister(RC); unsigned Tmp3 = RegInfo.createVirtualRegister(RC); @@ -759,38 +758,16 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // thisMBB: // ... - // sw incr, fi(sp) // store incr to stack (when BinOpcode == 0) // fallthrough --> loopMBB - - // Note: for atomic.swap (when BinOpcode == 0), storing incr to stack before - // the loop and then loading it from stack in block loopMBB is necessary to - // prevent MachineLICM pass to hoist "or" instruction out of the block - // loopMBB. - - int fi = 0; - if (BinOpcode == 0 && !Nand) { - // Get or create a temporary stack location. - MipsFunctionInfo *MipsFI = MF->getInfo(); - fi = MipsFI->getAtomicFrameIndex(); - if (fi == -1) { - fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false); - MipsFI->setAtomicFrameIndex(fi); - } - - BuildMI(BB, dl, TII->get(Mips::SW)) - .addReg(Incr).addFrameIndex(fi).addImm(0); - } BB->addSuccessor(loopMBB); // loopMBB: // ll oldval, 0(ptr) - // or dest, $0, oldval // tmp1, oldval, incr // sc tmp1, 0(ptr) // beq tmp1, $0, loopMBB BB = loopMBB; BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Ptr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Dest).addReg(Mips::ZERO).addReg(Oldval); if (Nand) { // and tmp2, oldval, incr // nor tmp1, $0, tmp2 @@ -800,10 +777,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // tmp1, oldval, incr BuildMI(BB, dl, TII->get(BinOpcode), Tmp1).addReg(Oldval).addReg(Incr); } else { - // lw tmp2, fi(sp) // load incr from stack - // or tmp1, $zero, tmp2 - BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); + Tmp1 = Incr; } BuildMI(BB, dl, TII->get(Mips::SC), Tmp3).addReg(Tmp1).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) @@ -880,12 +854,6 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, // nor mask2,$0,mask // andi tmp4,incr,255 // sll incr2,tmp4,shift - // sw incr2, fi(sp) // store incr2 to stack (when BinOpcode == 0) - - // Note: for atomic.swap (when BinOpcode == 0), storing incr2 to stack before - // the loop and then loading it from stack in block loopMBB is necessary to - // prevent MachineLICM pass to hoist "or" instruction out of the block - // loopMBB. int64_t MaskImm = (Size == 1) ? 255 : 65535; BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4); @@ -904,21 +872,9 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp5).addReg(Shift); } - int fi = 0; - if (BinOpcode == 0 && !Nand) { - // Get or create a temporary stack location. - MipsFunctionInfo *MipsFI = MF->getInfo(); - fi = MipsFI->getAtomicFrameIndex(); - if (fi == -1) { - fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false); - MipsFI->setAtomicFrameIndex(fi); - } - - BuildMI(BB, dl, TII->get(Mips::SW)) - .addReg(Incr2).addFrameIndex(fi).addImm(0); - } BB->addSuccessor(loopMBB); + // atomic.load.binop // loopMBB: // ll oldval,0(addr) // binop tmp7,oldval,incr2 @@ -927,6 +883,15 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, // or tmp9,tmp8,newval // sc tmp9,0(addr) // beq tmp9,$0,loopMBB + + // atomic.swap + // loopMBB: + // ll oldval,0(addr) + // and tmp8,oldval,mask2 + // or tmp9,tmp8,incr2 + // sc tmp9,0(addr) + // beq tmp9,$0,loopMBB + BB = loopMBB; BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Addr).addImm(0); if (Nand) { @@ -940,15 +905,14 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, } else if (BinOpcode) { // tmp7, oldval, incr2 BuildMI(BB, dl, TII->get(BinOpcode), Tmp7).addReg(Oldval).addReg(Incr2); - } else { - // lw tmp6, fi(sp) // load incr2 from stack - // or tmp7, $zero, tmp6 - BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addFrameIndex(fi).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6); } - BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask); + if (BinOpcode != 0 || Nand) + BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask); BuildMI(BB, dl, TII->get(Mips::AND), Tmp8).addReg(Oldval).addReg(Mask2); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval); + if (BinOpcode != 0 || Nand) + BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval); + else + BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Incr2); BuildMI(BB, dl, TII->get(Mips::SC), Tmp13) .addReg(Tmp9).addReg(Addr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) @@ -996,7 +960,6 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, unsigned Newval = MI->getOperand(3).getReg(); unsigned Tmp1 = RegInfo.createVirtualRegister(RC); - unsigned Tmp2 = RegInfo.createVirtualRegister(RC); unsigned Tmp3 = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block @@ -1016,25 +979,9 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - // Get or create a temporary stack location. - MipsFunctionInfo *MipsFI = MF->getInfo(); - int fi = MipsFI->getAtomicFrameIndex(); - if (fi == -1) { - fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false); - MipsFI->setAtomicFrameIndex(fi); - } - // thisMBB: // ... - // sw newval, fi(sp) // store newval to stack // fallthrough --> loop1MBB - - // Note: storing newval to stack before the loop and then loading it from - // stack in block loop2MBB is necessary to prevent MachineLICM pass to - // hoist "or" instruction out of the block loop2MBB. - - BuildMI(BB, dl, TII->get(Mips::SW)) - .addReg(Newval).addFrameIndex(fi).addImm(0); BB->addSuccessor(loop1MBB); // loop1MBB: @@ -1048,13 +995,11 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, BB->addSuccessor(loop2MBB); // loop2MBB: - // lw tmp2, fi(sp) // load newval from stack - // or tmp1, $0, tmp2 + // or tmp1, $0, newval // sc tmp1, 0(ptr) // beq tmp1, $0, loop1MBB BB = loop2MBB; - BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0); - BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2); + BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Newval); BuildMI(BB, dl, TII->get(Mips::SC), Tmp3).addReg(Tmp1).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) .addReg(Tmp3).addReg(Mips::ZERO).addMBB(loop1MBB); diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index d1a05871245..72265d0f224 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -590,10 +590,10 @@ def SH : StoreM<0x29, "sh", truncstorei16>; def SW : StoreM<0x2b, "sw", store>; /// Load-linked, Store-conditional -let hasDelaySlot = 1 in +let mayLoad = 1, hasDelaySlot = 1 in def LL : FI<0x30, (outs CPURegs:$dst), (ins mem:$addr), "ll\t$dst, $addr", [], IILoad>; -let Constraints = "$src = $dst" in +let mayStore = 1, Constraints = "$src = $dst" in def SC : FI<0x38, (outs CPURegs:$dst), (ins CPURegs:$src, mem:$addr), "sc\t$src, $addr", [], IIStore>; diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index dbb7a674422..bc30b6b2425 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -51,16 +51,12 @@ private: mutable int DynAllocFI; // Frame index of dynamically allocated stack area. unsigned MaxCallFrameSize; - /// AtomicFrameIndex - To implement atomic.swap and atomic.cmp.swap - /// intrinsics, it is necessary to use a temporary stack location. - /// This field holds the frame index of this location. - int AtomicFrameIndex; public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0), - MaxCallFrameSize(0), AtomicFrameIndex(-1) + MaxCallFrameSize(0) {} bool isInArgFI(int FI) const { @@ -104,9 +100,6 @@ public: unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; } void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; } - - int getAtomicFrameIndex() const { return AtomicFrameIndex; } - void setAtomicFrameIndex(int Index) { AtomicFrameIndex = Index; } }; } // end of namespace llvm diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll index 2d5555bd2cb..71e39285dba 100644 --- a/test/CodeGen/Mips/atomic.ll +++ b/test/CodeGen/Mips/atomic.ll @@ -24,7 +24,6 @@ entry: ; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp) ; CHECK: $[[BB0:[A-Z_0-9]+]]: ; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]]) -; CHECK: or $2, $zero, $[[R1]] ; CHECK: addu $[[R2:[0-9]+]], $[[R1]], $4 ; CHECK: sc $[[R2]], 0($[[R0]]) ; CHECK: beq $[[R2]], $zero, $[[BB0]] @@ -39,43 +38,42 @@ entry: ; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp) ; CHECK: $[[BB0:[A-Z_0-9]+]]: ; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]]) -; CHECK: or $2, $zero, $[[R1]] -; CHECK: and $[[R1]], $[[R1]], $4 -; CHECK: nor $[[R2:[0-9]+]], $zero, $[[R1]] +; CHECK: and $[[R3:[0-9]+]], $[[R1]], $4 +; CHECK: nor $[[R2:[0-9]+]], $zero, $[[R3]] ; CHECK: sc $[[R2]], 0($[[R0]]) ; CHECK: beq $[[R2]], $zero, $[[BB0]] } -define i32 @AtomicSwap32(i32 %oldval) nounwind { +define i32 @AtomicSwap32(i32 %newval) nounwind { entry: - %0 = call i32 @llvm.atomic.swap.i32.p0i32(i32* @x, i32 %oldval) + %newval.addr = alloca i32, align 4 + store i32 %newval, i32* %newval.addr, align 4 + %tmp = load i32* %newval.addr, align 4 + %0 = call i32 @llvm.atomic.swap.i32.p0i32(i32* @x, i32 %tmp) ret i32 %0 ; CHECK: AtomicSwap32: ; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp) -; CHECK: sw $4, [[OFFSET:[0-9]+]]($sp) ; CHECK: $[[BB0:[A-Z_0-9]+]]: -; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]]) -; CHECK: or $2, $zero, $[[R1]] -; CHECK: lw $[[R2:[0-9]+]], [[OFFSET]]($sp) -; CHECK: or $[[R3:[0-9]+]], $zero, $[[R2]] -; CHECK: sc $[[R3]], 0($[[R0]]) -; CHECK: beq $[[R3]], $zero, $[[BB0]] +; CHECK: ll ${{[0-9]+}}, 0($[[R0]]) +; CHECK: sc $[[R2:[0-9]+]], 0($[[R0]]) +; CHECK: beq $[[R2]], $zero, $[[BB0]] } define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind { entry: - %0 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* @x, i32 %oldval, i32 %newval) + %newval.addr = alloca i32, align 4 + store i32 %newval, i32* %newval.addr, align 4 + %tmp = load i32* %newval.addr, align 4 + %0 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* @x, i32 %oldval, i32 %tmp) ret i32 %0 ; CHECK: AtomicCmpSwap32: ; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp) -; CHECK: sw $5, [[OFFSET:[0-9]+]]($sp) ; CHECK: $[[BB0:[A-Z_0-9]+]]: ; CHECK: ll $2, 0($[[R0]]) ; CHECK: bne $2, $4, $[[BB1:[A-Z_0-9]+]] -; CHECK: lw $[[R1:[0-9]+]], [[OFFSET]]($sp) -; CHECK: or $[[R2:[0-9]+]], $zero, $[[R1]] +; CHECK: or $[[R2:[0-9]+]], $zero, $5 ; CHECK: sc $[[R2]], 0($[[R0]]) ; CHECK: beq $[[R2]], $zero, $[[BB0]] ; CHECK: $[[BB1]]: @@ -183,9 +181,9 @@ entry: ; CHECK: sra $2, $[[R17]], 24 } -define signext i8 @AtomicSwap8(i8 signext %oldval) nounwind { +define signext i8 @AtomicSwap8(i8 signext %newval) nounwind { entry: - %0 = call i8 @llvm.atomic.swap.i8.p0i8(i8* @y, i8 %oldval) + %0 = call i8 @llvm.atomic.swap.i8.p0i8(i8* @y, i8 %newval) ret i8 %0 ; CHECK: AtomicSwap8: @@ -199,15 +197,11 @@ entry: ; CHECK: nor $[[R7:[0-9]+]], $zero, $[[R6]] ; CHECK: andi $[[R8:[0-9]+]], $4, 255 ; CHECK: sll $[[R9:[0-9]+]], $[[R8]], $[[R4]] -; CHECK: sw $[[R9]], [[OFFSET:[0-9]+]]($sp) ; CHECK: $[[BB0:[A-Z_0-9]+]]: ; CHECK: ll $[[R10:[0-9]+]], 0($[[R2]]) -; CHECK: lw $[[R18:[0-9]+]], [[OFFSET]]($sp) -; CHECK: or $[[R11:[0-9]+]], $zero, $[[R18]] -; CHECK: and $[[R12:[0-9]+]], $[[R11]], $[[R6]] ; CHECK: and $[[R13:[0-9]+]], $[[R10]], $[[R7]] -; CHECK: or $[[R14:[0-9]+]], $[[R13]], $[[R12]] +; CHECK: or $[[R14:[0-9]+]], $[[R13]], $[[R9]] ; CHECK: sc $[[R14]], 0($[[R2]]) ; CHECK: beq $[[R14]], $zero, $[[BB0]]