It turns out most of the thumb2 instructions are not allowed to touch SP. The semantics of such instructions are unpredictable. We have just been lucky that tests have been passing.

This patch takes pain to ensure all the PEI lowering code does the right thing when lowering frame indices, insert code to manipulate stack pointers, etc. It's also custom lowering dynamic stack alloc into pseudo instructions so we can insert the right instructions at scheduling time.

This fixes PR4659 and PR4682.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78361 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2009-08-07 00:34:42 +00:00
parent 3a3cce59c4
commit 861986401e
12 changed files with 373 additions and 73 deletions

View File

@ -688,7 +688,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
unsigned OpNum = Ops[0];
unsigned Opc = MI->getOpcode();
MachineInstr *NewMI = NULL;
if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) {
if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) { // FIXME: tMOVgpr2gpr etc.?
// If it is updating CPSR, then it cannot be folded.
if (MI->getOperand(4).getReg() != ARM::CPSR || MI->getOperand(4).isDead()) {
unsigned Pred = MI->getOperand(2).getImm();

View File

@ -1289,11 +1289,12 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
AFI->getDPRCalleeSavedAreaOffset()||
hasFP(MF)) {
if (NumBytes) {
unsigned SUBriOpc = isARM ? ARM::SUBri : ARM::t2SUBri;
BuildMI(MBB, MBBI, dl, TII.get(SUBriOpc), ARM::SP)
.addReg(FramePtr)
.addImm(NumBytes)
.addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
if (isARM)
emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
ARMCC::AL, 0, TII);
else
emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
ARMCC::AL, 0, TII);
} else {
// Thumb2 or ARM.
unsigned MOVrOpc = isARM ? ARM::MOVr : ARM::t2MOVr;

View File

@ -122,6 +122,8 @@ private:
SDNode *SelectARMIndexedLoad(SDValue Op);
SDNode *SelectT2IndexedLoad(SDValue Op);
/// SelectDYN_ALLOC - Select dynamic alloc for Thumb.
SDNode *SelectDYN_ALLOC(SDValue Op);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
@ -868,6 +870,59 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) {
return NULL;
}
SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDValue Op) {
SDNode *N = Op.getNode();
DebugLoc dl = N->getDebugLoc();
MVT VT = Op.getValueType();
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
SDValue Align = Op.getOperand(2);
SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32);
int32_t AlignVal = cast<ConstantSDNode>(Align)->getSExtValue();
if (AlignVal < 0)
// We need to align the stack. Use Thumb1 tAND which is the only thumb
// instruction that can read and write SP. This matches to a pseudo
// instruction that has a chain to ensure the result is written back to
// the stack pointer.
SP = SDValue(CurDAG->getTargetNode(ARM::tANDsp, dl, VT, SP, Align), 0);
bool isC = isa<ConstantSDNode>(Size);
uint32_t C = isC ? cast<ConstantSDNode>(Size)->getZExtValue() : ~0UL;
// Handle the most common case for both Thumb1 and Thumb2:
// tSUBspi - immediate is between 0 ... 508 inclusive.
if (C <= 508 && ((C & 3) == 0))
// FIXME: tSUBspi encode scale 4 implicitly.
return CurDAG->SelectNodeTo(N, ARM::tSUBspi_, VT, MVT::Other, SP,
CurDAG->getTargetConstant(C/4, MVT::i32),
Chain);
if (Subtarget->isThumb1Only()) {
// Use tADDrSPr since Thumb1 does not have a sub r, sp, r. ARMISelLowering
// should have negated the size operand already. FIXME: We can't insert
// new target independent node at this stage so we are forced to negate
// it earlier. Is there a better solution?
return CurDAG->SelectNodeTo(N, ARM::tADDspr_, VT, MVT::Other, SP, Size,
Chain);
} else if (Subtarget->isThumb2()) {
if (isC && Predicate_t2_so_imm(Size.getNode())) {
// t2SUBrSPi
SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi_, VT, MVT::Other, Ops, 3);
} else if (isC && Predicate_imm0_4095(Size.getNode())) {
// t2SUBrSPi12
SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi12_, VT, MVT::Other, Ops, 3);
} else {
// t2SUBrSPs
SDValue Ops[] = { SP, Size,
getI32Imm(ARM_AM::getSORegOpc(ARM_AM::lsl,0)), Chain };
return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPs_, VT, MVT::Other, Ops, 4);
}
}
// FIXME: Add ADD / SUB sp instructions for ARM.
return 0;
}
SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDNode *N = Op.getNode();
@ -941,25 +996,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
}
}
case ISD::ADD: {
if (!Subtarget->isThumb1Only())
break;
// Select add sp, c to tADDhirr.
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(Op.getOperand(0));
RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(Op.getOperand(1));
if (LHSR && LHSR->getReg() == ARM::SP) {
std::swap(N0, N1);
std::swap(LHSR, RHSR);
}
if (RHSR && RHSR->getReg() == ARM::SP) {
SDValue Val = SDValue(CurDAG->getTargetNode(ARM::tMOVtgpr2gpr, dl,
Op.getValueType(), N0, N0),0);
return CurDAG->SelectNodeTo(N, ARM::tADDhirr, Op.getValueType(), Val, N1);
}
break;
}
case ARMISD::DYN_ALLOC:
return SelectDYN_ALLOC(Op);
case ISD::MUL:
if (Subtarget->isThumb1Only())
break;

View File

@ -307,7 +307,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
if (Subtarget->isThumb())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) {
@ -435,6 +438,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCGE: return "ARMISD::VCGE";
case ARMISD::VCGEU: return "ARMISD::VCGEU";
@ -1398,6 +1403,53 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
}
SDValue
ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
MVT VT = Node->getValueType(0);
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
SDValue Align = Op.getOperand(2);
// Chain the dynamic stack allocation so that it doesn't modify the stack
// pointer when other instructions are using the stack.
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue();
unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment();
if (AlignVal > StackAlign)
// Do this now since selection pass cannot introduce new target
// independent node.
Align = DAG.getConstant(-(uint64_t)AlignVal, VT);
// In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up
// using a "add r, sp, r" instead. Negate the size now so we don't have to
// do even more horrible hack later.
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
if (AFI->isThumb1OnlyFunction()) {
bool Negate = true;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size);
if (C) {
uint32_t Val = C->getZExtValue();
if (Val <= 508 && ((Val & 3) == 0))
Negate = false;
}
if (Negate)
Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size);
}
SDVTList VTList = DAG.getVTList(VT, MVT::Other);
SDValue Ops1[] = { Chain, Size, Align };
SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3);
Chain = Res.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
DAG.getIntPtrConstant(0, true), SDValue());
SDValue Ops2[] = { Res, Chain };
return DAG.getMergeValues(Ops2, 2, dl);
}
SDValue
ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue &Root, SelectionDAG &DAG,
@ -2396,6 +2448,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget);
case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
@ -2454,7 +2507,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
default:
llvm_unreachable("Unexpected instr type to insert");
case ARM::tMOVCCr: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@ -2509,6 +2563,78 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
return BB;
}
case ARM::tANDsp:
case ARM::tADDspr_:
case ARM::tSUBspi_:
case ARM::t2SUBrSPi_:
case ARM::t2SUBrSPi12_:
case ARM::t2SUBrSPs_: {
MachineFunction *MF = BB->getParent();
unsigned DstReg = MI->getOperand(0).getReg();
unsigned SrcReg = MI->getOperand(1).getReg();
bool DstIsDead = MI->getOperand(0).isDead();
bool SrcIsKill = MI->getOperand(1).isKill();
if (SrcReg != ARM::SP) {
// Copy the source to SP from virtual register.
const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP)
.addReg(SrcReg, getKillRegState(SrcIsKill));
}
unsigned OpOpc = 0;
bool NeedPred = false, NeedCC = false, NeedOp3 = false;
switch (MI->getOpcode()) {
default:
llvm_unreachable("Unexpected pseudo instruction!");
case ARM::tANDsp:
OpOpc = ARM::tAND;
NeedPred = true;
break;
case ARM::tADDspr_:
OpOpc = ARM::tADDspr;
break;
case ARM::tSUBspi_:
OpOpc = ARM::tSUBspi;
break;
case ARM::t2SUBrSPi_:
OpOpc = ARM::t2SUBrSPi;
NeedPred = true; NeedCC = true;
break;
case ARM::t2SUBrSPi12_:
OpOpc = ARM::t2SUBrSPi12;
NeedPred = true;
break;
case ARM::t2SUBrSPs_:
OpOpc = ARM::t2SUBrSPs;
NeedPred = true; NeedCC = true; NeedOp3 = true;
break;
}
MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP);
if (OpOpc == ARM::tAND)
AddDefaultT1CC(MIB);
MIB.addReg(ARM::SP);
MIB.addOperand(MI->getOperand(2));
if (NeedOp3)
MIB.addOperand(MI->getOperand(3));
if (NeedPred)
AddDefaultPred(MIB);
if (NeedCC)
AddDefaultCC(MIB);
// Copy the result from SP to virtual register.
const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
BuildMI(BB, dl, TII->get(CopyOpc))
.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
.addReg(ARM::SP);
MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
return BB;
}
}
}

View File

@ -65,11 +65,13 @@ namespace llvm {
FMRRD, // double to two gprs.
FMDRR, // Two gprs to double.
EH_SJLJ_SETJMP, // SjLj exception handling setjmp
EH_SJLJ_LONGJMP, // SjLj exception handling longjmp
EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
THREAD_POINTER,
DYN_ALLOC, // Dynamic allocation on the stack.
VCEQ, // Vector compare equal.
VCGE, // Vector compare greater than or equal.
VCGEU, // Vector compare unsigned greater than or equal.
@ -255,6 +257,7 @@ namespace llvm {
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG);
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain,

View File

@ -138,18 +138,37 @@ def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs), IIC_iALU,
"add $dst, pc, $rhs * 4", []>;
// ADD rd, sp, #imm8
// FIXME: hard code sp?
def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), IIC_iALU,
"add $dst, $sp, $rhs * 4 @ addrspi", []>;
// ADD sp, sp, #imm7
// FIXME: hard code sp?
def tADDspi : T1It<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU,
def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU,
"add $dst, $rhs * 4", []>;
// FIXME: Make use of the following?
// SUB sp, sp, #imm7
def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU,
"sub $dst, $rhs * 4", []>;
// ADD rm, sp, rm
def tADDrSPr : TI<(outs GPR:$dst), (ins GPR:$sp, GPR:$rhs), IIC_iALU,
"add $dst, $sp, $rhs", []>;
// ADD sp, rm
def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALU,
"add $dst, $rhs", []>;
// Pseudo instruction that will expand into a tSUBspi + a copy.
let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
NoItinerary, "@ sub $dst, $rhs * 4", []>;
def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
NoItinerary, "@ add $dst, $rhs", []>;
let Defs = [CPSR] in
def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
NoItinerary, "@ and $dst, $rhs", []>;
} // usesCustomDAGSchedInserter
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
@ -549,9 +568,6 @@ def tSUBrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALU,
// TODO: A7-96: STMIA - store multiple.
def tSUBspi : T1It<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU,
"sub $dst, $rhs * 4", []>;
// sign-extend byte
def tSXTB : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iALU,
"sxtb", " $dst, $src",

View File

@ -75,7 +75,8 @@ def imm1_31 : PatLeaf<(i32 imm), [{
}]>;
/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
def imm0_4095 : PatLeaf<(i32 imm), [{
def imm0_4095 : Operand<i32>,
PatLeaf<(i32 imm), [{
return (uint32_t)N->getZExtValue() < 4096;
}]>;
@ -239,7 +240,7 @@ multiclass T2I_bin_ii12rs<string opc, PatFrag opnode, bit Commutable = 0> {
opc, ".w $dst, $lhs, $rhs",
[(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
// 12-bit imm
def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU,
def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALU,
!strconcat(opc, "w"), " $dst, $lhs, $rhs",
[(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]>;
// register
@ -431,6 +432,39 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst),
(ins i32imm:$label, i32imm:$id, pred:$p), IIC_iALU,
"adr$p.w $dst, #${label}_${id:no_hash}", []>;
// ADD r, sp, {so_imm|i12}
def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), IIC_iALU,
"add", ".w $dst, $sp, $imm", []>;
def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), IIC_iALU,
"addw", " $dst, $sp, $imm", []>;
// ADD r, sp, so_reg
def t2ADDrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), IIC_iALU,
"add", ".w $dst, $sp, $rhs", []>;
// SUB r, sp, {so_imm|i12}
def t2SUBrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), IIC_iALU,
"sub", ".w $dst, $sp, $imm", []>;
def t2SUBrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), IIC_iALU,
"subw", " $dst, $sp, $imm", []>;
// SUB r, sp, so_reg
def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), IIC_iALU,
"sub", " $dst, $sp, $rhs", []>;
// Pseudo instruction that will expand into a t2SUBrSPi + a copy.
let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
NoItinerary, "@ sub.w $dst, $sp, $imm", []>;
def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
NoItinerary, "@ subw $dst, $sp, $imm", []>;
def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
NoItinerary, "@ sub $dst, $sp, $rhs", []>;
} // usesCustomDAGSchedInserter
//===----------------------------------------------------------------------===//
// Load / store Instructions.
//

View File

@ -206,9 +206,13 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
if (NewBase == 0)
return false;
}
int BaseOpc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
int BaseOpc = !isThumb2
? ARM::ADDri
: ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
if (Offset < 0) {
BaseOpc = isThumb2 ? ARM::t2SUBri : ARM::SUBri;
BaseOpc = !isThumb2
? ARM::SUBri
: ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
Offset = - Offset;
}
int ImmedOffset = isThumb2
@ -329,6 +333,9 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
if (!MI)
return false;
if (MI->getOpcode() != ARM::t2SUBri &&
MI->getOpcode() != ARM::t2SUBrSPi &&
MI->getOpcode() != ARM::t2SUBrSPi12 &&
MI->getOpcode() != ARM::tSUBspi &&
MI->getOpcode() != ARM::SUBri)
return false;
@ -336,9 +343,10 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
if (Bytes <= 0 || (Limit && Bytes >= Limit))
return false;
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
return (MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
MI->getOperand(2).getImm() == Bytes &&
(MI->getOperand(2).getImm()*Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg);
}
@ -350,6 +358,9 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
if (!MI)
return false;
if (MI->getOpcode() != ARM::t2ADDri &&
MI->getOpcode() != ARM::t2ADDrSPi &&
MI->getOpcode() != ARM::t2ADDrSPi12 &&
MI->getOpcode() != ARM::tADDspi &&
MI->getOpcode() != ARM::ADDri)
return false;
@ -357,9 +368,10 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
// Make sure the offset fits in 8 bits.
return false;
unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
return (MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
MI->getOperand(2).getImm() == Bytes &&
(MI->getOperand(2).getImm()*Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg);
}

View File

@ -1,3 +1,7 @@
//===---------------------------------------------------------------------===//
// Random ideas for the ARM backend (Thumb2 specific).
//===---------------------------------------------------------------------===//
We should be using ADD / SUB rd, sp, rm <shift> instructions.
copyRegToReg should use tMOVgpr2gpr instead of t2MOVr?

View File

@ -65,11 +65,15 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
if (DestRC == ARM::GPRRegisterClass &&
SrcRC == ARM::GPRRegisterClass) {
AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2MOVr),
DestReg).addReg(SrcReg)));
// FIXME: Just use tMOVgpr2gpr since it's shorter?
if (SrcReg == ARM::SP || DestReg == ARM::SP)
BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
else
AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2MOVr),
DestReg).addReg(SrcReg)));
return true;
} else if (DestRC == ARM::GPRRegisterClass &&
SrcRC == ARM::tGPRRegisterClass) {
SrcRC == ARM::tGPRRegisterClass) {
BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg);
return true;
} else if (DestRC == ARM::tGPRRegisterClass &&
@ -162,26 +166,62 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
}
while (NumBytes) {
unsigned Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
unsigned ThisVal = NumBytes;
if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
NumBytes = 0;
} else if (ThisVal < 4096) {
Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
NumBytes = 0;
unsigned Opc = 0;
if (DestReg == ARM::SP && BaseReg != ARM::SP) {
// mov sp, rn. Note t2MOVr cannot be used.
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg).addReg(BaseReg);
BaseReg = ARM::SP;
continue;
}
if (BaseReg == ARM::SP) {
// sub sp, sp, #imm7
if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
// FIXME: Fix Thumb1 immediate encoding.
BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
.addReg(BaseReg).addImm(ThisVal/4);
NumBytes = 0;
continue;
}
// sub rd, sp, so_imm
Opc = isSub ? ARM::t2SUBrSPi : ARM::t2ADDrSPi;
if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
NumBytes = 0;
} else {
// FIXME: Move this to ARMAddressingModes.h?
unsigned RotAmt = CountLeadingZeros_32(ThisVal);
ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
NumBytes &= ~ThisVal;
assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
"Bit extraction didn't work?");
}
} else {
// FIXME: Move this to ARMAddressingModes.h?
unsigned RotAmt = CountLeadingZeros_32(ThisVal);
ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
NumBytes &= ~ThisVal;
assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
"Bit extraction didn't work?");
assert(DestReg != ARM::SP && BaseReg != ARM::SP);
Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
NumBytes = 0;
} else if (ThisVal < 4096) {
Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
NumBytes = 0;
} else {
// FIXME: Move this to ARMAddressingModes.h?
unsigned RotAmt = CountLeadingZeros_32(ThisVal);
ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
NumBytes &= ~ThisVal;
assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
"Bit extraction didn't work?");
}
}
// Build the new ADD / SUB.
BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
.addReg(BaseReg, RegState::Kill).addImm(ThisVal)
.addImm((unsigned)Pred).addReg(PredReg).addReg(0);
AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
.addReg(BaseReg, RegState::Kill)
.addImm(ThisVal)));
BaseReg = DestReg;
}
}
@ -288,7 +328,6 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int Offset,
const ARMBaseInstrInfo &TII) {
unsigned Opcode = MI.getOpcode();
unsigned NewOpc = Opcode;
const TargetInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
bool isSub = false;
@ -299,9 +338,12 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
Offset += MI.getOperand(FrameRegIdx+1).getImm();
bool isSP = FrameReg == ARM::SP;
if (Offset == 0) {
// Turn it into a move.
MI.setDesc(TII.get(ARM::t2MOVr));
unsigned NewOpc = isSP ? ARM::tMOVgpr2gpr : ARM::t2MOVr;
MI.setDesc(TII.get(NewOpc));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
MI.RemoveOperand(FrameRegIdx+1);
return 0;
@ -310,23 +352,23 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
if (Offset < 0) {
Offset = -Offset;
isSub = true;
MI.setDesc(TII.get(ARM::t2SUBri));
MI.setDesc(TII.get(isSP ? ARM::t2SUBrSPi : ARM::t2SUBri));
} else {
MI.setDesc(TII.get(isSP ? ARM::t2ADDrSPi : ARM::t2ADDri));
}
// Common case: small offset, fits into instruction.
if (ARM_AM::getT2SOImmVal(Offset) != -1) {
NewOpc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
if (NewOpc != Opcode)
MI.setDesc(TII.get(NewOpc));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
return 0;
}
// Another common case: imm12.
if (Offset < 4096) {
NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
if (NewOpc != Opcode)
MI.setDesc(TII.get(NewOpc));
unsigned NewOpc = isSP
? (isSub ? ARM::t2SUBrSPi12 : ARM::t2ADDrSPi12)
: (isSub ? ARM::t2SUBri12 : ARM::t2ADDri12);
MI.setDesc(TII.get(NewOpc));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
return 0;
@ -346,7 +388,7 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
} else {
// AddrModeT2_so cannot handle any offset. If there is no offset
// register then we change to an immediate version.
NewOpc = Opcode;
unsigned NewOpc = Opcode;
if (AddrMode == ARMII::AddrModeT2_so) {
unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg();
if (OffsetReg != 0) {

View File

@ -0,0 +1,24 @@
; RUN: llvm-as < %s | llc -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s
; PR4659
; PR4682
define hidden arm_aapcscc i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind {
entry:
; CHECK: __gcov_execlp:
; CHECK: mov sp, r7
; CHECK: sub sp, #1 * 4
call arm_aapcscc void @__gcov_flush() nounwind
br i1 undef, label %bb5, label %bb
bb: ; preds = %bb, %entry
br i1 undef, label %bb5, label %bb
bb5: ; preds = %bb, %entry
%0 = alloca i8*, i32 undef, align 4 ; <i8**> [#uses=1]
%1 = call arm_aapcscc i32 @execvp(i8* %path, i8** %0) nounwind ; <i32> [#uses=1]
ret i32 %1
}
declare hidden arm_aapcscc void @__gcov_flush()
declare arm_aapcscc i32 @execvp(i8*, i8**) nounwind

View File

@ -2,7 +2,7 @@
define void @test1() {
; CHECK: test1:
; CHECK: sub.w sp, sp, #256
; CHECK: sub sp, #64 * 4
%tmp = alloca [ 64 x i32 ] , align 4
ret void
}
@ -10,7 +10,7 @@ define void @test1() {
define void @test2() {
; CHECK: test2:
; CHECK: sub.w sp, sp, #4160
; CHECK: sub.w sp, sp, #8
; CHECK: sub sp, #2 * 4
%tmp = alloca [ 4168 x i8 ] , align 4
ret void
}
@ -18,7 +18,7 @@ define void @test2() {
define i32 @test3() {
; CHECK: test3:
; CHECK: sub.w sp, sp, #805306368
; CHECK: sub.w sp, sp, #16
; CHECK: sub sp, #4 * 4
%retval = alloca i32, align 4
%tmp = alloca i32, align 4
%a = alloca [805306369 x i8], align 16