Fix the Load/Store optimization pass to work with Thumb1.

Patch by Moritz Roth!



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208992 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
James Molloy 2014-05-16 14:14:30 +00:00
parent 2c9f8a715e
commit 40ae57cc0a
5 changed files with 285 additions and 40 deletions

View File

@ -15,9 +15,11 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "Thumb1RegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@ -66,6 +68,7 @@ namespace {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const ARMSubtarget *STI;
const TargetLowering *TL;
ARMFunctionInfo *AFI;
RegScavenger *RS;
bool isThumb1, isThumb2;
@ -94,7 +97,10 @@ namespace {
void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
const MemOpQueue &MemOps, unsigned DefReg,
unsigned RangeBegin, unsigned RangeEnd);
void UpdateBaseRegUses(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc dl, unsigned Base, unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg);
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
int Offset, unsigned Base, bool BaseKill, int Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
@ -120,7 +126,6 @@ namespace {
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned Scratch, MemOpQueue &MemOps,
SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
@ -160,6 +165,21 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::db: return ARM::STMDB;
case ARM_AM::ib: return ARM::STMIB;
}
case ARM::tLDRi:
// tLDMIA is writeback-only - unless the base register is in the input
// reglist.
++NumLDMGened;
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::tLDMIA;
}
case ARM::tSTRi:
// There is no non-writeback tSTMIA either.
++NumSTMGened;
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::tSTMIA_UPD;
}
case ARM::t2LDRi8:
case ARM::t2LDRi12:
++NumLDMGened;
@ -218,6 +238,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
case ARM::LDMIA_UPD:
case ARM::STMIA:
case ARM::STMIA_UPD:
case ARM::tLDMIA:
case ARM::tLDMIA_UPD:
case ARM::tSTMIA_UPD:
case ARM::t2LDMIA_RET:
case ARM::t2LDMIA:
case ARM::t2LDMIA_UPD:
@ -264,12 +287,20 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
} // end namespace ARM_AM
} // end namespace llvm
static bool isT1i32Load(unsigned Opc) {
return Opc == ARM::tLDRi;
}
static bool isT2i32Load(unsigned Opc) {
return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
}
static bool isi32Load(unsigned Opc) {
return Opc == ARM::LDRi12 || isT2i32Load(Opc);
return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
}
static bool isT1i32Store(unsigned Opc) {
return Opc == ARM::tSTRi;
}
static bool isT2i32Store(unsigned Opc) {
@ -277,7 +308,102 @@ static bool isT2i32Store(unsigned Opc) {
}
static bool isi32Store(unsigned Opc) {
return Opc == ARM::STRi12 || isT2i32Store(Opc);
return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
}
static unsigned getImmScale(unsigned Opc) {
switch (Opc) {
default: llvm_unreachable("Unhandled opcode!");
case ARM::tLDRi:
case ARM::tSTRi:
return 1;
case ARM::tLDRHi:
case ARM::tSTRHi:
return 2;
case ARM::tLDRBi:
case ARM::tSTRBi:
return 4;
}
}
/// Update future uses of the base register with the offset introduced
/// due to writeback. This function only works on Thumb1.
void
ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc dl, unsigned Base,
unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg) {
assert(isThumb1 && "Can only update base register uses for Thumb1!");
// Start updating any instructions with immediate offsets. Insert a sub before
// the first non-updateable instruction (if any).
for (; MBBI != MBB.end(); ++MBBI) {
if (MBBI->readsRegister(Base)) {
unsigned Opc = MBBI->getOpcode();
int Offset;
bool InsertSub = false;
if (Opc == ARM::tLDRi || Opc == ARM::tSTRi ||
Opc == ARM::tLDRHi || Opc == ARM::tSTRHi ||
Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) {
// Loads and stores with immediate offsets can be updated, but only if
// the new offset isn't negative.
// The MachineOperand containing the offset immediate is the last one
// before predicates.
MachineOperand &MO =
MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
// The offsets are scaled by 1, 2 or 4 depending on the Opcode
Offset = MO.getImm() - WordOffset * getImmScale(Opc);
if (Offset >= 0)
MO.setImm(Offset);
else
InsertSub = true;
} else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) {
// SUB/ADD using this register. Merge it with the update.
// If the merged offset is too large, insert a new sub instead.
MachineOperand &MO =
MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
Offset = (Opc == ARM::tSUBi8) ?
MO.getImm() + WordOffset * 4 :
MO.getImm() - WordOffset * 4 ;
if (TL->isLegalAddImmediate(Offset)) {
MO.setImm(Offset);
// The base register has now been reset, so exit early.
return;
} else {
InsertSub = true;
}
} else {
// Can't update the instruction.
InsertSub = true;
}
if (InsertSub) {
// An instruction above couldn't be updated, so insert a sub.
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base))
.addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
.addImm(Pred).addReg(PredReg);
return;
}
}
if (MBBI->killsRegister(Base))
// Register got killed. Stop updating.
return;
}
// The end of the block was reached. This means register liveness escapes the
// block, and it's necessary to insert a sub before the last instruction.
if (MBB.succ_size() > 0)
// But only insert the SUB if there is actually a successor block.
// FIXME: Check more carefully if register is live at this point, e.g. by
// also examining the successor block's register liveness information.
AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base))
.addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
.addImm(Pred).addReg(PredReg);
}
/// MergeOps - Create and insert a LDM or STM with Base as base register and
@ -297,14 +423,15 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return false;
ARM_AM::AMSubMode Mode = ARM_AM::ia;
// VFP and Thumb2 do not support IB or DA modes.
// VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
bool haveIBAndDA = isNotVFP && !isThumb2;
bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
if (Offset == 4 && haveIBAndDA) {
Mode = ARM_AM::ib;
} else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
Mode = ARM_AM::da;
} else if (Offset == -4 * (int)NumRegs && isNotVFP) {
} else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
// VLDM/VSTM do not support DB mode without also updating the base reg.
Mode = ARM_AM::db;
} else if (Offset != 0) {
@ -329,31 +456,88 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
if (NewBase == 0)
return false;
}
int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
int BaseOpc =
isThumb2 ? ARM::t2ADDri :
isThumb1 ? ARM::tADDi8 : ARM::ADDri;
if (Offset < 0) {
BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
BaseOpc =
isThumb2 ? ARM::t2SUBri :
isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
Offset = - Offset;
}
int ImmedOffset = isThumb2
? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
if (ImmedOffset == -1)
// FIXME: Try t2ADDri12 or t2SUBri12?
return false; // Probably not worth it then.
BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
.addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
.addImm(Pred).addReg(PredReg).addReg(0);
if (!TL->isLegalAddImmediate(Offset))
// FIXME: Try add with register operand?
return false; // Probably not worth it then.
if (isThumb1) {
if (Base != NewBase) {
// Need to insert a MOV to the new base first.
// FIXME: If the immediate fits in 3 bits, use ADD instead.
BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
.addReg(Base, getKillRegState(BaseKill))
.addImm(Pred).addReg(PredReg);
}
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase))
.addReg(NewBase, getKillRegState(true)).addImm(Offset)
.addImm(Pred).addReg(PredReg);
} else {
BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
.addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
.addImm(Pred).addReg(PredReg).addReg(0);
}
Base = NewBase;
BaseKill = true; // New base is always killed straight away.
}
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
Opcode == ARM::VLDRD);
// Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
// base register writeback.
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
if (!Opcode) return false;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
.addReg(Base, getKillRegState(BaseKill))
.addImm(Pred).addReg(PredReg);
bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
// Exception: If the base register is in the input reglist, Thumb1 LDM is
// non-writeback. Check for this.
if (Opcode == ARM::tLDRi && isThumb1)
for (unsigned I = 0; I < NumRegs; ++I)
if (Base == Regs[I].first) {
Writeback = false;
break;
}
MachineInstrBuilder MIB;
if (Writeback) {
if (Opcode == ARM::tLDMIA)
// Update tLDMIA with writeback if necessary.
Opcode = ARM::tLDMIA_UPD;
// The base isn't dead after a merged instruction with writeback. Update
// future uses of the base with the added offset (if possible), or reset
// the base register as necessary.
if (!BaseKill)
UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
// Thumb1: we might need to set base writeback when building the MI.
MIB.addReg(Base, getDefRegState(true))
.addReg(Base, getKillRegState(BaseKill));
} else {
// No writeback, simply build the MachineInstr.
MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
MIB.addReg(Base, getKillRegState(BaseKill));
}
MIB.addImm(Pred).addReg(PredReg);
for (unsigned i = 0; i != NumRegs; ++i)
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
| getKillRegState(Regs[i].second));
@ -616,6 +800,7 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
bool CheckCPSRDef = false;
switch (MI->getOpcode()) {
default: return false;
case ARM::tSUBi8:
case ARM::t2SUBri:
case ARM::SUBri:
CheckCPSRDef = true;
@ -628,10 +813,11 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
if (Bytes == 0 || (Limit && Bytes >= Limit))
return false;
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
if (!(MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
(MI->getOperand(2).getImm()*Scale) == Bytes &&
(MI->getOperand(2).getImm() * Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg))
return false;
@ -649,6 +835,7 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
bool CheckCPSRDef = false;
switch (MI->getOpcode()) {
default: return false;
case ARM::tADDi8:
case ARM::t2ADDri:
case ARM::ADDri:
CheckCPSRDef = true;
@ -661,10 +848,11 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
// Make sure the offset fits in 8 bits.
return false;
unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
if (!(MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
(MI->getOperand(2).getImm()*Scale) == Bytes &&
(MI->getOperand(2).getImm() * Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg))
return false;
@ -677,6 +865,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
default: return 0;
case ARM::LDRi12:
case ARM::STRi12:
case ARM::tLDRi:
case ARM::tSTRi:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
@ -695,6 +885,9 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
case ARM::STMDA:
case ARM::STMDB:
case ARM::STMIB:
case ARM::tLDMIA:
case ARM::tLDMIA_UPD:
case ARM::tSTMIA_UPD:
case ARM::t2LDMIA:
case ARM::t2LDMDB:
case ARM::t2STMIA:
@ -791,6 +984,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
bool &Advance,
MachineBasicBlock::iterator &I) {
// Thumb1 is already using updating loads/stores.
if (isThumb1) return false;
MachineInstr *MI = MBBI;
unsigned Base = MI->getOperand(0).getReg();
bool BaseKill = MI->getOperand(0).isKill();
@ -927,6 +1123,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
const TargetInstrInfo *TII,
bool &Advance,
MachineBasicBlock::iterator &I) {
// Thumb1 doesn't have updating LDR/STR.
// FIXME: Use LDM/STM with single register instead.
if (isThumb1) return false;
MachineInstr *MI = MBBI;
unsigned Base = MI->getOperand(1).getReg();
bool BaseKill = MI->getOperand(1).isKill();
@ -1100,6 +1300,8 @@ static bool isMemoryOp(const MachineInstr *MI) {
return MI->getOperand(1).isReg();
case ARM::LDRi12:
case ARM::STRi12:
case ARM::tLDRi:
case ARM::tSTRi:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
@ -1137,6 +1339,10 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
return OffField;
// Thumb1 immediate offsets are scaled by 4
if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
return OffField * 4;
int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
: ARM_AM::getAM5Offset(OffField) * 4;
if (isAM3) {
@ -1417,8 +1623,11 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
// Try to find a free register to use as a new base in case it's needed.
// First advance to the instruction just before the start of the chain.
AdvanceRS(MBB, MemOps);
// Find a scratch register.
unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
unsigned Scratch =
RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
// Process the load / store instructions.
RS->forward(std::prev(MBBI));
@ -1484,6 +1693,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
/// =>
/// ldmfd sp!, {..., pc}
bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
// Thumb1 LDM doesn't allow high registers.
if (isThumb1) return false;
if (MBB.empty()) return false;
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@ -1514,6 +1725,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
TL = TM.getTargetLowering();
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
@ -1522,9 +1734,6 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
isThumb2 = AFI->isThumb2Function();
isThumb1 = AFI->isThumbFunction() && !isThumb2;
// Don't do anything in this pass with Thumb1 for now.
if (isThumb1) return false;
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
@ -1585,11 +1794,6 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
MRI = &Fn.getRegInfo();
MF = &Fn;
ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>();
bool isThumb1 = AFI->isThumbFunction() && !AFI->isThumb2Function();
// Don't do anything in this pass with Thumb1 for now.
if (isThumb1) return false;
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI)

View File

@ -215,10 +215,6 @@ etc. Almost all Thumb instructions clobber condition code.
//===---------------------------------------------------------------------===//
Add ldmia, stmia support.
//===---------------------------------------------------------------------===//
Thumb load / store address mode offsets are scaled. The values kept in the
instruction operands are pre-scale values. This probably ought to be changed
to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions.

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s -check-prefix=CHECK -check-prefix=RA_GREEDY
; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s -check-prefix=CHECK -check-prefix=RA_BASIC
%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
@ -45,7 +45,8 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
; CHECK: sub sp, #
; CHECK: mov r[[R0:[0-9]+]], sp
; CHECK: str r{{[0-9+]}}, [r[[R0]]
; CHECK: str r{{[0-9+]}}, [r[[R0]]
; RA_GREEDY: str r{{[0-9+]}}, [r[[R0]]
; RA_BASIC: stm r[[R0]]!
; CHECK-NOT: ldr r0, [sp
; CHECK: mov r[[R1:[0-9]+]], sp
; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r{{[0-9]+}}

View File

@ -0,0 +1,42 @@
; RUN: llc < %s -mtriple=thumbv6m-eabi -o - | FileCheck %s
@X = external global [0 x i32] ; <[0 x i32]*> [#uses=5]
define i32 @t1() {
; CHECK-LABEL: t1:
; CHECK: push {r7, lr}
; CHECK: ldm
; CHECK: pop {r7, pc}
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
%tmp4 = call i32 @f1( i32 %tmp, i32 %tmp3 ) ; <i32> [#uses=1]
ret i32 %tmp4
}
define i32 @t2() {
; CHECK-LABEL: t2:
; CHECK: push {r7, lr}
; CHECK: ldm
; CHECK: pop {r7, pc}
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
%tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
%tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
ret i32 %tmp6
}
define i32 @t3() {
; CHECK-LABEL: t3:
; CHECK: push {r7, lr}
; CHECK: ldm
; CHECK: pop {r7, pc}
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
%tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
%tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
ret i32 %tmp6
}
declare i32 @f1(i32, i32)
declare i32 @f2(i32, i32, i32)

View File

@ -5,6 +5,7 @@
define i32 @t1() {
; CHECK-LABEL: t1:
; CHECK: push {r7, lr}
; CHECK: ldrd
; CHECK: pop {r7, pc}
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
@ -27,6 +28,7 @@ define i32 @t2() {
define i32 @t3() {
; CHECK-LABEL: t3:
; CHECK: push {r7, lr}
; CHECK: ldm
; CHECK: pop {r7, pc}
%tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
%tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]