From 9e7a312391cb955bfc148d15a69adcaf7cc3ae50 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 4 Aug 2009 21:12:13 +0000 Subject: [PATCH] Enable load / store multiple pass for Thumb2. It's not using ldrd / strd yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78104 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 15 ++++---- lib/Target/ARM/ARMTargetMachine.cpp | 5 +-- lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp | 1 + test/CodeGen/Thumb2/thumb2-ldm.ll | 40 +++++++++++++++++++++ 4 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 test/CodeGen/Thumb2/thumb2-ldm.ll diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index c6afbe93360..a81b790f9dc 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -615,12 +615,15 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return false; bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD; - unsigned Offset = isAM5 - ? ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia, - true, isDPR ? 2 : 1) - : (isAM2 - ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift) - : Bytes); + unsigned Offset = 0; + if (isAM5) + Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) + ? ARM_AM::db + : ARM_AM::ia, true, (isDPR ? 2 : 1)); + else if (isAM2) + Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); + else + Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; if (isLd) { if (isAM5) // FLDMS, FLDMD diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 918ca7806b4..a6e987b086b 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -101,8 +101,9 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - // FIXME: temporarily disabling load / store optimization pass for Thumb mode. - if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb()) + // FIXME: temporarily disabling load / store optimization pass for Thumb1 mode. + if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && + !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass()); if (OptLevel != CodeGenOpt::None && diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 4438a8c74c9..a326185c88a 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -599,6 +599,7 @@ void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op, // FIXME bool isLDM = (MI->getOpcode() == ARM::LDM || MI->getOpcode() == ARM::LDM_RET || + MI->getOpcode() == ARM::t2LDM || MI->getOpcode() == ARM::t2LDM_RET); O << ARM_AM::getAMSubModeAltStr(Mode, isLDM); } else diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll new file mode 100644 index 00000000000..c90722ccfd6 --- /dev/null +++ b/test/CodeGen/Thumb2/thumb2-ldm.ll @@ -0,0 +1,40 @@ +; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | FileCheck %s + +@X = external global [0 x i32] ; <[0 x i32]*> [#uses=5] + +define i32 @t1() { +; CHECK: t1: +; CHECK: stmfd sp!, {r7, lr} +; CHECK: ldmfd sp!, {r7, pc} + %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; [#uses=1] + %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; [#uses=1] + %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 ) ; [#uses=1] + ret i32 %tmp4 +} + +define i32 @t2() { +; CHECK: t2: +; CHECK: stmfd sp!, {r7, lr} +; CHECK: ldmia +; CHECK: ldmfd sp!, {r7, pc} + %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; [#uses=1] + %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; [#uses=1] + %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; [#uses=1] + %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; [#uses=1] + ret i32 %tmp6 +} + +define i32 @t3() { +; CHECK: t3: +; CHECK: stmfd sp!, {r7, lr} +; CHECK: ldmfd sp!, {r7, pc} + %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; [#uses=1] + %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; [#uses=1] + %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; [#uses=1] + %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; [#uses=1] + ret i32 %tmp6 +} + +declare i32 @f1(i32, i32) + +declare i32 @f2(i32, i32, i32)