From 40ae57cc0a187424c8db723a51506b7192b77f71 Mon Sep 17 00:00:00 2001 From: James Molloy Date: Fri, 16 May 2014 14:14:30 +0000 Subject: [PATCH] Fix the Load/Store optimization pass to work with Thumb1. Patch by Moritz Roth! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208992 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 270 ++++++++++++++++++++--- lib/Target/ARM/README-Thumb.txt | 4 - test/CodeGen/Thumb/dyn-stackalloc.ll | 7 +- test/CodeGen/Thumb/thumb-ldm.ll | 42 ++++ test/CodeGen/Thumb2/thumb2-ldm.ll | 2 + 5 files changed, 285 insertions(+), 40 deletions(-) create mode 100644 test/CodeGen/Thumb/thumb-ldm.ll diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 5b678bd2799..ee7df5476c7 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -15,9 +15,11 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" +#include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "Thumb1RegisterInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -66,6 +68,7 @@ namespace { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const ARMSubtarget *STI; + const TargetLowering *TL; ARMFunctionInfo *AFI; RegScavenger *RS; bool isThumb1, isThumb2; @@ -94,7 +97,10 @@ namespace { void findUsesOfImpDef(SmallVectorImpl &UsesOfImpDefs, const MemOpQueue &MemOps, unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd); - + void UpdateBaseRegUses(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc dl, unsigned Base, unsigned WordOffset, + ARMCC::CondCodes Pred, unsigned PredReg); bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, @@ -120,7 +126,6 @@ namespace { ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, MemOpQueue &MemOps, SmallVectorImpl &Merges); - void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); bool FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); @@ -160,6 +165,21 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::db: return ARM::STMDB; case ARM_AM::ib: return ARM::STMIB; } + case ARM::tLDRi: + // tLDMIA is writeback-only - unless the base register is in the input + // reglist. + ++NumLDMGened; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::tLDMIA; + } + case ARM::tSTRi: + // There is no non-writeback tSTMIA either. + ++NumSTMGened; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::tSTMIA_UPD; + } case ARM::t2LDRi8: case ARM::t2LDRi12: ++NumLDMGened; @@ -218,6 +238,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { case ARM::LDMIA_UPD: case ARM::STMIA: case ARM::STMIA_UPD: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: case ARM::t2LDMIA_RET: case ARM::t2LDMIA: case ARM::t2LDMIA_UPD: @@ -264,12 +287,20 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { } // end namespace ARM_AM } // end namespace llvm +static bool isT1i32Load(unsigned Opc) { + return Opc == ARM::tLDRi; +} + static bool isT2i32Load(unsigned Opc) { return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; } static bool isi32Load(unsigned Opc) { - return Opc == ARM::LDRi12 || isT2i32Load(Opc); + return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; +} + +static bool isT1i32Store(unsigned Opc) { + return Opc == ARM::tSTRi; } static bool isT2i32Store(unsigned Opc) { @@ -277,7 +308,102 @@ static bool isT2i32Store(unsigned Opc) { } static bool isi32Store(unsigned Opc) { - return Opc == ARM::STRi12 || isT2i32Store(Opc); + return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); +} + +static unsigned getImmScale(unsigned Opc) { + switch (Opc) { + default: llvm_unreachable("Unhandled opcode!"); + case ARM::tLDRi: + case ARM::tSTRi: + return 1; + case ARM::tLDRHi: + case ARM::tSTRHi: + return 2; + case ARM::tLDRBi: + case ARM::tSTRBi: + return 4; + } +} + +/// Update future uses of the base register with the offset introduced +/// due to writeback. This function only works on Thumb1. +void +ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc dl, unsigned Base, + unsigned WordOffset, + ARMCC::CondCodes Pred, unsigned PredReg) { + assert(isThumb1 && "Can only update base register uses for Thumb1!"); + + // Start updating any instructions with immediate offsets. Insert a sub before + // the first non-updateable instruction (if any). + for (; MBBI != MBB.end(); ++MBBI) { + if (MBBI->readsRegister(Base)) { + unsigned Opc = MBBI->getOpcode(); + int Offset; + bool InsertSub = false; + + if (Opc == ARM::tLDRi || Opc == ARM::tSTRi || + Opc == ARM::tLDRHi || Opc == ARM::tSTRHi || + Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) { + // Loads and stores with immediate offsets can be updated, but only if + // the new offset isn't negative. + // The MachineOperand containing the offset immediate is the last one + // before predicates. + MachineOperand &MO = + MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); + // The offsets are scaled by 1, 2 or 4 depending on the Opcode + Offset = MO.getImm() - WordOffset * getImmScale(Opc); + if (Offset >= 0) + MO.setImm(Offset); + else + InsertSub = true; + + } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) { + // SUB/ADD using this register. Merge it with the update. + // If the merged offset is too large, insert a new sub instead. + MachineOperand &MO = + MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); + Offset = (Opc == ARM::tSUBi8) ? + MO.getImm() + WordOffset * 4 : + MO.getImm() - WordOffset * 4 ; + if (TL->isLegalAddImmediate(Offset)) { + MO.setImm(Offset); + // The base register has now been reset, so exit early. + return; + } else { + InsertSub = true; + } + + } else { + // Can't update the instruction. + InsertSub = true; + } + + if (InsertSub) { + // An instruction above couldn't be updated, so insert a sub. + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base)) + .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) + .addImm(Pred).addReg(PredReg); + return; + } + } + + if (MBBI->killsRegister(Base)) + // Register got killed. Stop updating. + return; + } + + // The end of the block was reached. This means register liveness escapes the + // block, and it's necessary to insert a sub before the last instruction. + if (MBB.succ_size() > 0) + // But only insert the SUB if there is actually a successor block. + // FIXME: Check more carefully if register is live at this point, e.g. by + // also examining the successor block's register liveness information. + AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base)) + .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) + .addImm(Pred).addReg(PredReg); } /// MergeOps - Create and insert a LDM or STM with Base as base register and @@ -297,14 +423,15 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; ARM_AM::AMSubMode Mode = ARM_AM::ia; - // VFP and Thumb2 do not support IB or DA modes. + // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); - bool haveIBAndDA = isNotVFP && !isThumb2; + bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; + if (Offset == 4 && haveIBAndDA) { Mode = ARM_AM::ib; } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { Mode = ARM_AM::da; - } else if (Offset == -4 * (int)NumRegs && isNotVFP) { + } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { // VLDM/VSTM do not support DB mode without also updating the base reg. Mode = ARM_AM::db; } else if (Offset != 0) { @@ -329,31 +456,88 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (NewBase == 0) return false; } - int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; + + int BaseOpc = + isThumb2 ? ARM::t2ADDri : + isThumb1 ? ARM::tADDi8 : ARM::ADDri; + if (Offset < 0) { - BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; + BaseOpc = + isThumb2 ? ARM::t2SUBri : + isThumb1 ? ARM::tSUBi8 : ARM::SUBri; Offset = - Offset; } - int ImmedOffset = isThumb2 - ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); - if (ImmedOffset == -1) - // FIXME: Try t2ADDri12 or t2SUBri12? - return false; // Probably not worth it then. - BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) - .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) - .addImm(Pred).addReg(PredReg).addReg(0); + if (!TL->isLegalAddImmediate(Offset)) + // FIXME: Try add with register operand? + return false; // Probably not worth it then. + + if (isThumb1) { + if (Base != NewBase) { + // Need to insert a MOV to the new base first. + // FIXME: If the immediate fits in 3 bits, use ADD instead. + BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(Pred).addReg(PredReg); + } + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)) + .addReg(NewBase, getKillRegState(true)).addImm(Offset) + .addImm(Pred).addReg(PredReg); + } else { + BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) + .addImm(Pred).addReg(PredReg).addReg(0); + } + Base = NewBase; BaseKill = true; // New base is always killed straight away. } bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD); + + // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with + // base register writeback. Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); if (!Opcode) return false; - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) - .addReg(Base, getKillRegState(BaseKill)) - .addImm(Pred).addReg(PredReg); + + bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. + + // Exception: If the base register is in the input reglist, Thumb1 LDM is + // non-writeback. Check for this. + if (Opcode == ARM::tLDRi && isThumb1) + for (unsigned I = 0; I < NumRegs; ++I) + if (Base == Regs[I].first) { + Writeback = false; + break; + } + + MachineInstrBuilder MIB; + + if (Writeback) { + if (Opcode == ARM::tLDMIA) + // Update tLDMIA with writeback if necessary. + Opcode = ARM::tLDMIA_UPD; + + // The base isn't dead after a merged instruction with writeback. Update + // future uses of the base with the added offset (if possible), or reset + // the base register as necessary. + if (!BaseKill) + UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); + + MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + + // Thumb1: we might need to set base writeback when building the MI. + MIB.addReg(Base, getDefRegState(true)) + .addReg(Base, getKillRegState(BaseKill)); + } else { + // No writeback, simply build the MachineInstr. + MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + MIB.addReg(Base, getKillRegState(BaseKill)); + } + + MIB.addImm(Pred).addReg(PredReg); + for (unsigned i = 0; i != NumRegs; ++i) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); @@ -616,6 +800,7 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, bool CheckCPSRDef = false; switch (MI->getOpcode()) { default: return false; + case ARM::tSUBi8: case ARM::t2SUBri: case ARM::SUBri: CheckCPSRDef = true; @@ -628,10 +813,11 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, if (Bytes == 0 || (Limit && Bytes >= Limit)) return false; - unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME + unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || + MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && + (MI->getOperand(2).getImm() * Scale) == Bytes && getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -649,6 +835,7 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, bool CheckCPSRDef = false; switch (MI->getOpcode()) { default: return false; + case ARM::tADDi8: case ARM::t2ADDri: case ARM::ADDri: CheckCPSRDef = true; @@ -661,10 +848,11 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, // Make sure the offset fits in 8 bits. return false; - unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME + unsigned Scale = (MI->getOpcode() == ARM::tADDspi || + MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && + (MI->getOperand(2).getImm() * Scale) == Bytes && getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -677,6 +865,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { default: return 0; case ARM::LDRi12: case ARM::STRi12: + case ARM::tLDRi: + case ARM::tSTRi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -695,6 +885,9 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::STMDA: case ARM::STMDB: case ARM::STMIB: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: case ARM::t2LDMIA: case ARM::t2LDMDB: case ARM::t2STMIA: @@ -791,6 +984,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool &Advance, MachineBasicBlock::iterator &I) { + // Thumb1 is already using updating loads/stores. + if (isThumb1) return false; + MachineInstr *MI = MBBI; unsigned Base = MI->getOperand(0).getReg(); bool BaseKill = MI->getOperand(0).isKill(); @@ -927,6 +1123,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, const TargetInstrInfo *TII, bool &Advance, MachineBasicBlock::iterator &I) { + // Thumb1 doesn't have updating LDR/STR. + // FIXME: Use LDM/STM with single register instead. + if (isThumb1) return false; + MachineInstr *MI = MBBI; unsigned Base = MI->getOperand(1).getReg(); bool BaseKill = MI->getOperand(1).isKill(); @@ -1100,6 +1300,8 @@ static bool isMemoryOp(const MachineInstr *MI) { return MI->getOperand(1).isReg(); case ARM::LDRi12: case ARM::STRi12: + case ARM::tLDRi: + case ARM::tSTRi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -1137,6 +1339,10 @@ static int getMemoryOpOffset(const MachineInstr *MI) { Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) return OffField; + // Thumb1 immediate offsets are scaled by 4 + if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) + return OffField * 4; + int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; if (isAM3) { @@ -1417,8 +1623,11 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // Try to find a free register to use as a new base in case it's needed. // First advance to the instruction just before the start of the chain. AdvanceRS(MBB, MemOps); + // Find a scratch register. - unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass); + unsigned Scratch = + RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); + // Process the load / store instructions. RS->forward(std::prev(MBBI)); @@ -1484,6 +1693,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { /// => /// ldmfd sp!, {..., pc} bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { + // Thumb1 LDM doesn't allow high registers. + if (isThumb1) return false; if (MBB.empty()) return false; MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); @@ -1514,6 +1725,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); + TL = TM.getTargetLowering(); AFI = Fn.getInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); @@ -1522,9 +1734,6 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { isThumb2 = AFI->isThumb2Function(); isThumb1 = AFI->isThumbFunction() && !isThumb2; - // Don't do anything in this pass with Thumb1 for now. - if (isThumb1) return false; - bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { @@ -1585,11 +1794,6 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { MRI = &Fn.getRegInfo(); MF = &Fn; - ARMFunctionInfo *AFI = Fn.getInfo(); - bool isThumb1 = AFI->isThumbFunction() && !AFI->isThumb2Function(); - // Don't do anything in this pass with Thumb1 for now. - if (isThumb1) return false; - bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index a64707e6f34..f4d9be3beb6 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -215,10 +215,6 @@ etc. Almost all Thumb instructions clobber condition code. //===---------------------------------------------------------------------===// -Add ldmia, stmia support. - -//===---------------------------------------------------------------------===// - Thumb load / store address mode offsets are scaled. The values kept in the instruction operands are pre-scale values. This probably ought to be changed to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions. diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll index 6c6de55347a..6bc39afe214 100644 --- a/test/CodeGen/Thumb/dyn-stackalloc.ll +++ b/test/CodeGen/Thumb/dyn-stackalloc.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s -; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s +; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s -check-prefix=CHECK -check-prefix=RA_GREEDY +; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s -check-prefix=CHECK -check-prefix=RA_BASIC %struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* } %struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* } @@ -45,7 +45,8 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) { ; CHECK: sub sp, # ; CHECK: mov r[[R0:[0-9]+]], sp ; CHECK: str r{{[0-9+]}}, [r[[R0]] -; CHECK: str r{{[0-9+]}}, [r[[R0]] +; RA_GREEDY: str r{{[0-9+]}}, [r[[R0]] +; RA_BASIC: stm r[[R0]]! ; CHECK-NOT: ldr r0, [sp ; CHECK: mov r[[R1:[0-9]+]], sp ; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r{{[0-9]+}} diff --git a/test/CodeGen/Thumb/thumb-ldm.ll b/test/CodeGen/Thumb/thumb-ldm.ll new file mode 100644 index 00000000000..dd98e6f300e --- /dev/null +++ b/test/CodeGen/Thumb/thumb-ldm.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -mtriple=thumbv6m-eabi -o - | FileCheck %s + +@X = external global [0 x i32] ; <[0 x i32]*> [#uses=5] + +define i32 @t1() { +; CHECK-LABEL: t1: +; CHECK: push {r7, lr} +; CHECK: ldm +; CHECK: pop {r7, pc} + %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; [#uses=1] + %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; [#uses=1] + %tmp4 = call i32 @f1( i32 %tmp, i32 %tmp3 ) ; [#uses=1] + ret i32 %tmp4 +} + +define i32 @t2() { +; CHECK-LABEL: t2: +; CHECK: push {r7, lr} +; CHECK: ldm +; CHECK: pop {r7, pc} + %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; [#uses=1] + %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; [#uses=1] + %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; [#uses=1] + %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; [#uses=1] + ret i32 %tmp6 +} + +define i32 @t3() { +; CHECK-LABEL: t3: +; CHECK: push {r7, lr} +; CHECK: ldm +; CHECK: pop {r7, pc} + %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; [#uses=1] + %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; [#uses=1] + %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; [#uses=1] + %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; [#uses=1] + ret i32 %tmp6 +} + +declare i32 @f1(i32, i32) + +declare i32 @f2(i32, i32, i32) diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll index 8716d80a2c8..adfcf2b6aaf 100644 --- a/test/CodeGen/Thumb2/thumb2-ldm.ll +++ b/test/CodeGen/Thumb2/thumb2-ldm.ll @@ -5,6 +5,7 @@ define i32 @t1() { ; CHECK-LABEL: t1: ; CHECK: push {r7, lr} +; CHECK: ldrd ; CHECK: pop {r7, pc} %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; [#uses=1] %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; [#uses=1] @@ -27,6 +28,7 @@ define i32 @t2() { define i32 @t3() { ; CHECK-LABEL: t3: ; CHECK: push {r7, lr} +; CHECK: ldm ; CHECK: pop {r7, pc} %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; [#uses=1] %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; [#uses=1]