Change Thumb2 jumptable codegen to one that uses two level jumps:

Before:
      adr r12, #LJTI3_0_0
      ldr pc, [r12, +r0, lsl #2]
LJTI3_0_0:
      .long    LBB3_24
      .long    LBB3_30
      .long    LBB3_31
      .long    LBB3_32

After:
      adr r12, #LJTI3_0_0
      add pc, r12, +r0, lsl #2
LJTI3_0_0:
      b.w    LBB3_24
      b.w    LBB3_30
      b.w    LBB3_31
      b.w    LBB3_32

This has several advantages.
1. This will make it easier to optimize this to a TBB / TBH instruction +
   (smaller) table.
2. This eliminate the need for ugly asm printer hack to force the address
   into thumb addresses (bit 0 is one).
3. Same codegen for pic and non-pic.
4. This eliminate the need to align the table so constantpool island pass
   won't have to over-estimate the size.

Based on my calculation, the later is probably slightly faster as well since
ldr pc with shifter address is very slow. That is, it should be a win as long
as the HW implementation can do a reasonable job of branch predict the second
branch.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77024 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng
2009-07-25 00:33:29 +00:00
parent 4711326d60
commit 66ac53165e
13 changed files with 77 additions and 72 deletions

View File

@@ -402,6 +402,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::tCALL: return "ARMISD::tCALL";
case ARMISD::BRCOND: return "ARMISD::BRCOND";
case ARMISD::BR_JT: return "ARMISD::BR_JT";
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
case ARMISD::CMP: return "ARMISD::CMP";
@@ -1704,15 +1705,27 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
if (Subtarget->isThumb2()) {
// Thumb2 uses a two-level jump. That is, it jumps into the jump table
// which does another jump to the destination. This also makes it easier
// to translate it to TBB / TBH later.
// FIXME: This might not work if the function is extremely large.
return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, Table, Index,
JTI, UId);
}
Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
Addr = DAG.getLoad(isPIC ? (MVT)MVT::i32 : PTy, dl,
Chain, Addr, NULL, 0);
Chain = Addr.getValue(1);
if (isPIC)
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
Addr = DAG.getLoad((MVT)MVT::i32, dl, Chain, Addr, NULL, 0);
Chain = Addr.getValue(1);
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
} else {
Addr = DAG.getLoad(PTy, dl, Chain, Addr, NULL, 0);
Chain = Addr.getValue(1);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
}
}
static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {