Turn on if-conversion for thumb2.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79084 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2009-08-15 07:59:10 +00:00
parent ad27d77fc0
commit bc9b754091
12 changed files with 245 additions and 50 deletions

View File

@ -272,6 +272,11 @@ const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB,
return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead));
}
static inline
const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) {
return MIB.addReg(0);
}
static inline
bool isUncondBranchOpcode(int Opc) {
return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B;

View File

@ -391,26 +391,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setStackPointerRegisterToSaveRestore(ARM::SP);
setSchedulingPreference(SchedulingForRegPressure);
setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10);
setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2);
if (!Subtarget->isThumb()) {
// Use branch latency information to determine if-conversion limits.
// FIXME: If-converter should use instruction latency of the branch being
// eliminated to compute the threshold. For ARMv6, the branch "latency"
// varies depending on whether it's dynamically or statically predicted
// and on whether the destination is in the prefetch buffer.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const InstrItineraryData &InstrItins = Subtarget->getInstrItineraryData();
unsigned Latency= InstrItins.getLatency(TII->get(ARM::Bcc).getSchedClass());
if (Latency > 1) {
setIfCvtBlockSizeLimit(Latency-1);
if (Latency > 2)
setIfCvtDupBlockSizeLimit(Latency-2);
} else {
setIfCvtBlockSizeLimit(10);
setIfCvtDupBlockSizeLimit(2);
}
// FIXME: If-converter should use instruction latency to determine
// profitability rather than relying on fixed limits.
if (Subtarget->getCPUString() == "generic") {
// Generic (and overly aggressive) if-conversion limits.
setIfCvtBlockSizeLimit(10);
setIfCvtDupBlockSizeLimit(2);
} else if (Subtarget->hasV6Ops()) {
setIfCvtBlockSizeLimit(2);
setIfCvtDupBlockSizeLimit(1);
} else {
setIfCvtBlockSizeLimit(3);
setIfCvtDupBlockSizeLimit(2);
}
maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type

View File

@ -39,12 +39,14 @@ def GenericItineraries : ProcessorItineraries<[
InstrItinData<IIC_iMPYh , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_iMPYw , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_iMPYl , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>,
InstrStage<1, [FU_LdSt0]>]>,
InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_fpALU , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_fpMPY , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>,
InstrStage<1, [FU_LdSt0]>]>,
InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>
]>;

View File

@ -18,11 +18,13 @@ def V6Itineraries : ProcessorItineraries<[
InstrItinData<IIC_iMPYh , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_iMPYw , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_iMPYl , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>,
InstrStage<1, [FU_LdSt0]>]>,
InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_fpALU , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_fpMPY , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>,
InstrStage<1, [FU_LdSt0]>]>,
InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>
]>;

View File

@ -55,11 +55,13 @@ def CortexA9Itineraries : ProcessorItineraries<[
InstrItinData<IIC_iMPYh , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_iMPYw , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_iMPYl , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>,
InstrStage<1, [FU_LdSt0]>]>,
InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_fpALU , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_fpMPY , [InstrStage<1, [FU_Pipe0]>]>,
InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>,
InstrStage<1, [FU_LdSt0]>]>,
InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>
]>;

View File

@ -75,17 +75,15 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: ARMBaseTargetMachine(T, TT, FS, true),
InstrInfo(Subtarget.hasThumb2()
? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:32-i64:32:32-"
"i16:16:32-i8:8:32-i1:8:32-a:0:32") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
"i16:16:32-i8:8:32-i1:8:32-a:0:32")),
TLInfo(*this) {
// Create the approriate type of Thumb InstrInfo
if (Subtarget.hasThumb2())
InstrInfo = new Thumb2InstrInfo(Subtarget);
else
InstrInfo = new Thumb1InstrInfo(Subtarget);
}
@ -116,7 +114,7 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
PM.add(createARMLoadStoreOptimizationPass());
if (OptLevel != CodeGenOpt::None &&
!DisableIfConversion && !Subtarget.isThumb())
!DisableIfConversion && !Subtarget.isThumb1Only())
PM.add(createIfConverterPass());
if (Subtarget.isThumb2()) {

View File

@ -702,7 +702,7 @@ ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op) {
unsigned NumTZ = CountTrailingZeros_32(Mask);
assert(NumTZ <= 3 && "Invalid IT mask!");
for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
bool T = (Mask & (1 << Pos)) != 0;
bool T = (Mask & (1 << Pos)) == 0;
if (T)
O << 't';
else

View File

@ -66,23 +66,19 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
.addImm(CC);
++MBBI;
// Finalize IT mask. If the following instruction is not predicated or it's
// predicated on a condition that's not the same or the opposite of CC, then
// the mask is 0x8.
// Finalize IT mask.
ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
unsigned Mask = 0x8;
while (MBBI != E || (Mask & 1)) {
unsigned Mask = 0, Pos = 3;
while (MBBI != E && Pos) {
ARMCC::CondCodes NCC = getPredicate(&*MBBI, TII);
if (NCC == CC) {
Mask >>= 1;
Mask |= 0x8;
} else if (NCC == OCC) {
Mask >>= 1;
} else {
if (NCC == OCC) {
Mask |= (1 << Pos);
} else if (NCC != CC)
break;
}
--Pos;
++MBBI;
}
Mask |= (1 << Pos);
MIB.addImm(Mask);
Modified = true;
++NumITs;

View File

@ -129,7 +129,7 @@ namespace {
static char ID;
Thumb2SizeReduce();
const TargetInstrInfo *TII;
const Thumb2InstrInfo *TII;
virtual bool runOnMachineFunction(MachineFunction &MF);
@ -454,8 +454,12 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
MIB.addOperand(MI->getOperand(0));
if (HasCC && NewTID.hasOptionalDef())
AddDefaultT1CC(MIB, CCDead);
if (NewTID.hasOptionalDef()) {
if (HasCC)
AddDefaultT1CC(MIB, CCDead);
else
AddNoT1CC(MIB);
}
// Transfer the rest of operands.
unsigned NumOps = TID.getNumOperands();
@ -534,8 +538,12 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
MIB.addOperand(MI->getOperand(0));
if (HasCC && NewTID.hasOptionalDef())
AddDefaultT1CC(MIB, CCDead);
if (NewTID.hasOptionalDef()) {
if (HasCC)
AddDefaultT1CC(MIB, CCDead);
else
AddNoT1CC(MIB);
}
// Transfer the rest of operands.
unsigned NumOps = TID.getNumOperands();
@ -659,7 +667,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
TII = TM.getInstrInfo();
TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
bool Modified = false;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)

View File

@ -0,0 +1,12 @@
; RUN: llvm-as < %s | llc -mtriple=thumbv7-apple-darwin -disable-fp-elim | not grep r7
%struct.noise3 = type { [3 x [17 x i32]] }
%struct.noiseguard = type { i32, i32, i32 }
define arm_apcscc void @vorbis_encode_noisebias_setup(i8* nocapture %vi.0.7.val, double %s, i32 %block, i32* nocapture %suppress, %struct.noise3* nocapture %in, %struct.noiseguard* nocapture %guard, double %userbias) nounwind {
entry:
%0 = getelementptr %struct.noiseguard* %guard, i32 %block, i32 2; <i32*> [#uses=1]
%1 = load i32* %0, align 4 ; <i32> [#uses=1]
store i32 %1, i32* undef, align 4
unreachable
}

View File

@ -0,0 +1,84 @@
; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | FileCheck %s
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK: t1:
; CHECK: it ne
; CHECK: cmpne
switch i32 %c, label %cond_next [
i32 1, label %cond_true
i32 7, label %cond_true
]
cond_true:
%tmp12 = add i32 %a, 1
%tmp1518 = add i32 %tmp12, %b
ret i32 %tmp1518
cond_next:
%tmp15 = add i32 %b, %a
ret i32 %tmp15
}
; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
define i32 @t2(i32 %a, i32 %b) {
entry:
; CHECK: t2:
; CHECK: ite le
; CHECK: suble
; CHECK: subgt
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
br i1 %tmp1434, label %bb17, label %bb.outer
bb.outer: ; preds = %cond_false, %entry
%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ] ; <i32> [#uses=5]
%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
br label %bb
bb: ; preds = %cond_true, %bb.outer
%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2]
%tmp. = sub i32 0, %b_addr.021.0.ph ; <i32> [#uses=1]
%tmp.40 = mul i32 %indvar, %tmp. ; <i32> [#uses=1]
%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph ; <i32> [#uses=6]
%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph ; <i1> [#uses=1]
br i1 %tmp3, label %cond_true, label %cond_false
cond_true: ; preds = %bb
%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph ; <i32> [#uses=2]
%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph ; <i1> [#uses=1]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
br i1 %tmp1437, label %bb17, label %bb
cond_false: ; preds = %bb
%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0 ; <i32> [#uses=2]
%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10 ; <i1> [#uses=1]
br i1 %tmp14, label %bb17, label %bb.outer
bb17: ; preds = %cond_false, %cond_true, %entry
%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
ret i32 %a_addr.026.1
}
@x = external global i32* ; <i32**> [#uses=1]
define void @foo(i32 %a) {
entry:
%tmp = load i32** @x ; <i32*> [#uses=1]
store i32 %a, i32* %tmp
ret void
}
define void @t3(i32 %a, i32 %b) {
entry:
; CHECK: t3:
; CHECK: it lt
; CHECK: poplt {r7, pc}
%tmp1 = icmp sgt i32 %a, 10 ; <i1> [#uses=1]
br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
cond_true: ; preds = %entry
tail call void @foo( i32 %b )
ret void
UnifiedReturnBlock: ; preds = %entry
ret void
}

View File

@ -0,0 +1,93 @@
; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | FileCheck %s
define void @foo(i32 %X, i32 %Y) {
entry:
; CHECK: foo:
; CHECK: it ne
; CHECK: cmpne
; CHECK: it hi
; CHECK: pophi {r7, pc}
%tmp1 = icmp ult i32 %X, 4 ; <i1> [#uses=1]
%tmp4 = icmp eq i32 %Y, 0 ; <i1> [#uses=1]
%tmp7 = or i1 %tmp4, %tmp1 ; <i1> [#uses=1]
br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
cond_true: ; preds = %entry
%tmp10 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
ret void
UnifiedReturnBlock: ; preds = %entry
ret void
}
declare i32 @bar(...)
; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
entry:
; CHECK: CountTree:
; CHECK: it eq
; CHECK: cmpeq
; CHECK: beq.n
; CHECK: itt eq
; CHECK: moveq
; CHECK: popeq
br label %tailrecurse
tailrecurse: ; preds = %bb, %entry
%tmp6 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
%tmp9 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=2]
%tmp12 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
%tmp14 = icmp eq %struct.quad_struct* null, null ; <i1> [#uses=1]
%tmp17 = icmp eq %struct.quad_struct* %tmp6, null ; <i1> [#uses=1]
%tmp23 = icmp eq %struct.quad_struct* %tmp9, null ; <i1> [#uses=1]
%tmp29 = icmp eq %struct.quad_struct* %tmp12, null ; <i1> [#uses=1]
%bothcond = and i1 %tmp17, %tmp14 ; <i1> [#uses=1]
%bothcond1 = and i1 %bothcond, %tmp23 ; <i1> [#uses=1]
%bothcond2 = and i1 %bothcond1, %tmp29 ; <i1> [#uses=1]
br i1 %bothcond2, label %return, label %bb
bb: ; preds = %tailrecurse
%tmp41 = tail call fastcc i32 @CountTree( %struct.quad_struct* %tmp9 ) ; <i32> [#uses=0]
br label %tailrecurse
return: ; preds = %tailrecurse
ret i32 0
}
%struct.SString = type { i8*, i32, i32 }
declare void @abort()
define fastcc void @t1(%struct.SString* %word, i8 signext %c) {
entry:
; CHECK: t1:
; CHECK: it ne
; CHECK: popne {r7, pc}
%tmp1 = icmp eq %struct.SString* %word, null ; <i1> [#uses=1]
br i1 %tmp1, label %cond_true, label %cond_false
cond_true: ; preds = %entry
tail call void @abort( )
unreachable
cond_false: ; preds = %entry
ret void
}
define fastcc void @t2() nounwind {
entry:
; CHECK: t2:
; CHECK: cmp r0, #0
; CHECK: bne.n
br i1 undef, label %bb.i.i3, label %growMapping.exit
bb.i.i3: ; preds = %entry
unreachable
growMapping.exit: ; preds = %entry
unreachable
}