From 65ca7aa57d5e9b391f02a5686e7622deaac146f9 Mon Sep 17 00:00:00 2001 From: Venkatraman Govindaraju Date: Sun, 2 Jun 2013 21:48:17 +0000 Subject: [PATCH] Sparc: Combine add/or/sethi instruction with restore if possible. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183088 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Sparc/DelaySlotFiller.cpp | 197 ++++++++++++++++++--- test/CodeGen/SPARC/2011-01-11-FrameAddr.ll | 8 +- test/CodeGen/SPARC/2011-01-19-DelaySlot.ll | 76 ++++++++ test/CodeGen/SPARC/64bit.ll | 41 ++++- test/CodeGen/SPARC/64cond.ll | 8 +- 5 files changed, 301 insertions(+), 29 deletions(-) diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index 9961232cbcc..271c630e393 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -82,6 +82,9 @@ namespace { bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize); + bool tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); + }; char Filler::ID = 0; } // end of anonymous namespace @@ -100,29 +103,44 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) { bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) - if (I->hasDelaySlot()) { - MachineBasicBlock::iterator D = MBB.end(); - MachineBasicBlock::iterator J = I; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { + MachineBasicBlock::iterator MI = I; + ++I; - if (!DisableDelaySlotFiller) - D = findDelayInstr(MBB, I); - - ++FilledSlots; - Changed = true; - - if (D == MBB.end()) - BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP)); - else - MBB.splice(++J, &MBB, D); - unsigned structSize = 0; - if (needsUnimp(I, structSize)) { - MachineBasicBlock::iterator J = I; - ++J; //skip the delay filler. - BuildMI(MBB, ++J, I->getDebugLoc(), - TII->get(SP::UNIMP)).addImm(structSize); - } + //If MI is restore, try combining it with previous inst. + if (!DisableDelaySlotFiller && + (MI->getOpcode() == SP::RESTORErr + || MI->getOpcode() == SP::RESTOREri)) { + Changed |= tryCombineRestoreWithPrevInst(MBB, MI); + continue; } + + //If MI has no delay slot, skip + if (!MI->hasDelaySlot()) + continue; + + MachineBasicBlock::iterator D = MBB.end(); + + if (!DisableDelaySlotFiller) + D = findDelayInstr(MBB, MI); + + ++FilledSlots; + Changed = true; + + if (D == MBB.end()) + BuildMI(MBB, I, MI->getDebugLoc(), TII->get(SP::NOP)); + else + MBB.splice(I, &MBB, D); + + unsigned structSize = 0; + if (needsUnimp(MI, structSize)) { + MachineBasicBlock::iterator J = MI; + ++J; //skip the delay filler. + assert (J != MBB.end() && "MI needs a delay instruction."); + BuildMI(MBB, ++J, I->getDebugLoc(), + TII->get(SP::UNIMP)).addImm(structSize); + } + } return Changed; } @@ -332,3 +350,140 @@ bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize) StructSize = MO.getImm(); return true; } + +static bool combineRestoreADD(MachineBasicBlock::iterator RestoreMI, + MachineBasicBlock::iterator AddMI, + const TargetInstrInfo *TII) +{ + //Before: add , , %i[0-7] + // restore %g0, %g0, %i[0-7] + // + //After : restore , , %o[0-7] + + unsigned reg = AddMI->getOperand(0).getReg(); + if (reg < SP::I0 || reg > SP::I7) + return false; + + //Erase RESTORE + RestoreMI->eraseFromParent(); + + //Change ADD to RESTORE + AddMI->setDesc(TII->get((AddMI->getOpcode() == SP::ADDrr) + ? SP::RESTORErr + : SP::RESTOREri)); + + //map the destination register + AddMI->getOperand(0).setReg(reg - SP::I0 + SP::O0); + + return true; +} + +static bool combineRestoreOR(MachineBasicBlock::iterator RestoreMI, + MachineBasicBlock::iterator OrMI, + const TargetInstrInfo *TII) +{ + //Before: or , , %i[0-7] + // restore %g0, %g0, %i[0-7] + // and or is zero, + // + //After : restore , , %o[0-7] + + unsigned reg = OrMI->getOperand(0).getReg(); + if (reg < SP::I0 || reg > SP::I7) + return false; + + //check whether it is a copy + if (OrMI->getOpcode() == SP::ORrr + && OrMI->getOperand(1).getReg() != SP::G0 + && OrMI->getOperand(2).getReg() != SP::G0) + return false; + + if (OrMI->getOpcode() == SP::ORri + && OrMI->getOperand(1).getReg() != SP::G0 + && (!OrMI->getOperand(2).isImm() || OrMI->getOperand(2).getImm() != 0)) + return false; + + //Erase RESTORE + RestoreMI->eraseFromParent(); + + //Change OR to RESTORE + OrMI->setDesc(TII->get((OrMI->getOpcode() == SP::ORrr) + ? SP::RESTORErr + : SP::RESTOREri)); + + //map the destination register + OrMI->getOperand(0).setReg(reg - SP::I0 + SP::O0); + + return true; +} + +static bool combineRestoreSETHIi(MachineBasicBlock::iterator RestoreMI, + MachineBasicBlock::iterator SetHiMI, + const TargetInstrInfo *TII) +{ + //Before: sethi imm3, %i[0-7] + // restore %g0, %g0, %g0 + // + //After : restore %g0, (imm3<<10), %o[0-7] + + unsigned reg = SetHiMI->getOperand(0).getReg(); + if (reg < SP::I0 || reg > SP::I7) + return false; + + if (!SetHiMI->getOperand(1).isImm()) + return false; + + int64_t imm = SetHiMI->getOperand(1).getImm(); + + //is it a 3 bit immediate? + if (!isInt<3>(imm)) + return false; + + //make it a 13 bit immediate + imm = (imm << 10) & 0x1FFF; + + assert(RestoreMI->getOpcode() == SP::RESTORErr); + + RestoreMI->setDesc(TII->get(SP::RESTOREri)); + + RestoreMI->getOperand(0).setReg(reg - SP::I0 + SP::O0); + RestoreMI->getOperand(1).setReg(SP::G0); + RestoreMI->getOperand(2).ChangeToImmediate(imm); + + + //Erase the original SETHI + SetHiMI->eraseFromParent(); + + return true; +} + +bool Filler::tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) +{ + //No previous instruction + if (MBBI == MBB.begin()) + return false; + + //asssert that MBBI is "restore %g0, %g0, %g0" + assert(MBBI->getOpcode() == SP::RESTORErr + && MBBI->getOperand(0).getReg() == SP::G0 + && MBBI->getOperand(1).getReg() == SP::G0 + && MBBI->getOperand(2).getReg() == SP::G0); + + MachineBasicBlock::iterator PrevInst = MBBI; --PrevInst; + + //Cannot combine with a delay filler + if (isDelayFiller(MBB, PrevInst)) + return false; + + switch (PrevInst->getOpcode()) { + default: break; + case SP::ADDrr: + case SP::ADDri: return combineRestoreADD(MBBI, PrevInst, TII); break; + case SP::ORrr: + case SP::ORri: return combineRestoreOR(MBBI, PrevInst, TII); break; + case SP::SETHIi: return combineRestoreSETHIi(MBBI, PrevInst, TII); break; + } + //Cannot combine with the previous instruction + return false; +} diff --git a/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll index 683d3026671..5fd5687ba5b 100644 --- a/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll +++ b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll @@ -7,10 +7,14 @@ define i8* @frameaddr() nounwind readnone { entry: ;V8: frameaddr -;V8: or %g0, %fp, {{.+}} +;V8: save %sp, -96, %sp +;V8: jmp %i7+8 +;V8: restore %g0, %fp, %o0 ;V9: frameaddr -;V9: or %g0, %fp, {{.+}} +;V9: save %sp, -96, %sp +;V9: jmp %i7+8 +;V9: restore %g0, %fp, %o0 %0 = tail call i8* @llvm.frameaddress(i32 0) ret i8* %0 } diff --git a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll index 4fd2e7beb1c..89981a8d8e1 100644 --- a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll +++ b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll @@ -105,3 +105,79 @@ entry: declare i32 @func(i32*) + + +define i32 @restore_add(i32 %a, i32 %b) { +entry: +;CHECK: restore_add: +;CHECK: jmp %i7+8 +;CHECK: restore %o0, %i1, %o0 + %0 = tail call i32 @bar(i32 %a) nounwind + %1 = add nsw i32 %0, %b + ret i32 %1 +} + +define i32 @restore_add_imm(i32 %a) { +entry: +;CHECK: restore_add_imm: +;CHECK: jmp %i7+8 +;CHECK: restore %o0, 20, %o0 + %0 = tail call i32 @bar(i32 %a) nounwind + %1 = add nsw i32 %0, 20 + ret i32 %1 +} + +define i32 @restore_or(i32 %a) { +entry: +;CHECK: restore_or: +;CHECK: jmp %i7+8 +;CHECK: restore %g0, %o0, %o0 + %0 = tail call i32 @bar(i32 %a) nounwind + ret i32 %0 +} + +define i32 @restore_or_imm(i32 %a) { +entry: +;CHECK: restore_or_imm: +;CHECK: or %o0, 20, %i0 +;CHECK: jmp %i7+8 +;CHECK: restore %g0, %g0, %g0 + %0 = tail call i32 @bar(i32 %a) nounwind + %1 = or i32 %0, 20 + ret i32 %1 +} + + +define i32 @restore_sethi(i32 %a) { +entry: +;CHECK: restore_sethi +;CHECK-NOT: sethi 3 +;CHECK: restore %g0, 3072, %o0 + %0 = tail call i32 @bar(i32 %a) nounwind + %1 = icmp ne i32 %0, 0 + %2 = select i1 %1, i32 3072, i32 0 + ret i32 %2 +} + +define i32 @restore_sethi_3bit(i32 %a) { +entry: +;CHECK: restore_sethi +;CHECK: sethi 6 +;CHECK-NOT: restore %g0, 6144, %o0 + %0 = tail call i32 @bar(i32 %a) nounwind + %1 = icmp ne i32 %0, 0 + %2 = select i1 %1, i32 6144, i32 0 + ret i32 %2 +} + +define i32 @restore_sethi_large(i32 %a) { +entry: +;CHECK: restore_sethi +;CHECK: sethi 4000, %i0 +;CHECK: restore %g0, %g0, %g0 + %0 = tail call i32 @bar(i32 %a) nounwind + %1 = icmp ne i32 %0, 0 + %2 = select i1 %1, i32 4096000, i32 0 + ret i32 %2 +} + diff --git a/test/CodeGen/SPARC/64bit.ll b/test/CodeGen/SPARC/64bit.ll index 106781420fb..f881ddfbc06 100644 --- a/test/CodeGen/SPARC/64bit.ll +++ b/test/CodeGen/SPARC/64bit.ll @@ -1,13 +1,22 @@ -; RUN: llc < %s -march=sparcv9 -disable-sparc-leaf-proc | FileCheck %s +; RUN: llc < %s -march=sparcv9 -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck %s +; RUN: llc < %s -march=sparcv9 | FileCheck %s -check-prefix=OPT ; CHECK: ret2: ; CHECK: or %g0, %i1, %i0 + +; OPT: ret2: +; OPT: jmp %o7+8 +; OPT: or %g0, %o1, %o0 define i64 @ret2(i64 %a, i64 %b) { ret i64 %b } ; CHECK: shl_imm ; CHECK: sllx %i0, 7, %i0 + +; OPT: shl_imm: +; OPT: jmp %o7+8 +; OPT: sllx %o0, 7, %o0 define i64 @shl_imm(i64 %a) { %x = shl i64 %a, 7 ret i64 %x @@ -15,6 +24,10 @@ define i64 @shl_imm(i64 %a) { ; CHECK: sra_reg ; CHECK: srax %i0, %i1, %i0 + +; OPT: sra_reg: +; OPT: jmp %o7+8 +; OPT: srax %o0, %o1, %o0 define i64 @sra_reg(i64 %a, i64 %b) { %x = ashr i64 %a, %b ret i64 %x @@ -27,12 +40,20 @@ define i64 @sra_reg(i64 %a, i64 %b) { ; ; CHECK: ret_imm0 ; CHECK: or %g0, 0, %i0 + +; OPT: ret_imm0 +; OPT: jmp %o7+8 +; OPT: or %g0, 0, %o0 define i64 @ret_imm0() { ret i64 0 } ; CHECK: ret_simm13 ; CHECK: or %g0, -4096, %i0 + +; OPT: ret_simm13 +; OPT: jmp %o7+8 +; OPT: or %g0, -4096, %o0 define i64 @ret_simm13() { ret i64 -4096 } @@ -41,13 +62,23 @@ define i64 @ret_simm13() { ; CHECK: sethi 4, %i0 ; CHECK-NOT: or ; CHECK: restore + +; OPT: ret_sethi +; OPT: jmp %o7+8 +; OPT: sethi 4, %o0 define i64 @ret_sethi() { ret i64 4096 } -; CHECK: ret_sethi +; CHECK: ret_sethi_or ; CHECK: sethi 4, [[R:%[goli][0-7]]] ; CHECK: or [[R]], 1, %i0 + +; OPT: ret_sethi_or +; OPT: sethi 4, [[R:%[go][0-7]]] +; OPT: jmp %o7+8 +; OPT: or [[R]], 1, %o0 + define i64 @ret_sethi_or() { ret i64 4097 } @@ -55,6 +86,12 @@ define i64 @ret_sethi_or() { ; CHECK: ret_nimm33 ; CHECK: sethi 4, [[R:%[goli][0-7]]] ; CHECK: xor [[R]], -4, %i0 + +; OPT: ret_nimm33 +; OPT: sethi 4, [[R:%[go][0-7]]] +; OPT: jmp %o7+8 +; OPT: xor [[R]], -4, %o0 + define i64 @ret_nimm33() { ret i64 -4100 } diff --git a/test/CodeGen/SPARC/64cond.ll b/test/CodeGen/SPARC/64cond.ll index 1404a51741c..88b476d1a24 100644 --- a/test/CodeGen/SPARC/64cond.ll +++ b/test/CodeGen/SPARC/64cond.ll @@ -36,7 +36,7 @@ if.end: ; CHECK: selecti32_xcc ; CHECK: subcc %i0, %i1 ; CHECK: movg %xcc, %i2, %i3 -; CHECK: or %g0, %i3, %i0 +; CHECK: restore %g0, %i3, %o0 define i32 @selecti32_xcc(i64 %x, i64 %y, i32 %a, i32 %b) { entry: %tobool = icmp sgt i64 %x, %y @@ -47,7 +47,7 @@ entry: ; CHECK: selecti64_xcc ; CHECK: subcc %i0, %i1 ; CHECK: movg %xcc, %i2, %i3 -; CHECK: or %g0, %i3, %i0 +; CHECK: restore %g0, %i3, %o0 define i64 @selecti64_xcc(i64 %x, i64 %y, i64 %a, i64 %b) { entry: %tobool = icmp sgt i64 %x, %y @@ -58,7 +58,7 @@ entry: ; CHECK: selecti64_icc ; CHECK: subcc %i0, %i1 ; CHECK: movg %icc, %i2, %i3 -; CHECK: or %g0, %i3, %i0 +; CHECK: restore %g0, %i3, %o0 define i64 @selecti64_icc(i32 %x, i32 %y, i64 %a, i64 %b) { entry: %tobool = icmp sgt i32 %x, %y @@ -69,7 +69,7 @@ entry: ; CHECK: selecti64_fcc ; CHECK: fcmps %f1, %f3 ; CHECK: movul %fcc0, %i2, %i3 -; CHECK: or %g0, %i3, %i0 +; CHECK: restore %g0, %i3, %o0 define i64 @selecti64_fcc(float %x, float %y, i64 %a, i64 %b) { entry: %tobool = fcmp ult float %x, %y