mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-27 13:30:05 +00:00
Improve handling of stack accesses in Thumb-1
Thumb-1 only allows SP-based LDR and STR to be word-sized, and SP-base LDR, STR, and ADD only allow offsets that are a multiple of 4. Make some changes to better make use of these instructions: * Use word loads for anyext byte and halfword loads from the stack. * Enforce 4-byte alignment on objects accessed in this way, to ensure that the offset is valid. * Do the same for objects whose frame index is used, in order to avoid having to use more than one ADD to generate the frame index. * Correct how many bits of offset we think AddrModeT1_s has. Patch by John Brawn. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230496 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3cecbeccf2
commit
b451f4e376
@ -546,12 +546,13 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
|
||||
// and pick a real one.
|
||||
Offset += 128; // 128 bytes of spill slots
|
||||
|
||||
// If there is a frame pointer, try using it.
|
||||
// If there's a frame pointer and the addressing mode allows it, try using it.
|
||||
// The FP is only available if there is no dynamic realignment. We
|
||||
// don't know for sure yet whether we'll need that, so we guess based
|
||||
// on whether there are any local variables that would trigger it.
|
||||
unsigned StackAlign = TFI->getStackAlignment();
|
||||
if (TFI->hasFP(MF) &&
|
||||
if (TFI->hasFP(MF) &&
|
||||
(MI->getDesc().TSFlags & ARMII::AddrModeMask) != ARMII::AddrModeT1_s &&
|
||||
!((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
|
||||
if (isFrameOffsetLegal(MI, FPOffset))
|
||||
return false;
|
||||
@ -668,7 +669,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
|
||||
NumBits = 8;
|
||||
break;
|
||||
case ARMII::AddrModeT1_s:
|
||||
NumBits = 5;
|
||||
NumBits = 8;
|
||||
Scale = 4;
|
||||
isSigned = false;
|
||||
break;
|
||||
|
@ -1197,6 +1197,11 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
|
||||
SDValue &Base, SDValue &OffImm) {
|
||||
if (N.getOpcode() == ISD::FrameIndex) {
|
||||
int FI = cast<FrameIndexSDNode>(N)->getIndex();
|
||||
// Only multiples of 4 are allowed for the offset, so the frame object
|
||||
// alignment must be at least 4.
|
||||
MachineFrameInfo *MFI = MF->getFrameInfo();
|
||||
if (MFI->getObjectAlignment(FI) < 4)
|
||||
MFI->setObjectAlignment(FI, 4);
|
||||
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
|
||||
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
return true;
|
||||
@ -1214,6 +1219,11 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
|
||||
Base = N.getOperand(0);
|
||||
if (Base.getOpcode() == ISD::FrameIndex) {
|
||||
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
|
||||
// For LHS+RHS to result in an offset that's a multiple of 4 the object
|
||||
// indexed by the LHS must be 4-byte aligned.
|
||||
MachineFrameInfo *MFI = MF->getFrameInfo();
|
||||
if (MFI->getObjectAlignment(FI) < 4)
|
||||
MFI->setObjectAlignment(FI, 4);
|
||||
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
|
||||
}
|
||||
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
|
||||
@ -2502,6 +2512,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
int FI = cast<FrameIndexSDNode>(N)->getIndex();
|
||||
SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
|
||||
if (Subtarget->isThumb1Only()) {
|
||||
// Set the alignment of the frame object to 4, to avoid having to generate
|
||||
// more than one ADD
|
||||
MachineFrameInfo *MFI = MF->getFrameInfo();
|
||||
if (MFI->getObjectAlignment(FI) < 4)
|
||||
MFI->setObjectAlignment(FI, 4);
|
||||
return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
|
||||
CurDAG->getTargetConstant(0, MVT::i32));
|
||||
} else {
|
||||
|
@ -1375,6 +1375,17 @@ def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
|
||||
def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
|
||||
(tLDRBi t_addrmode_is1:$addr)>;
|
||||
|
||||
// extload from the stack -> word load from the stack, as it avoids having to
|
||||
// materialize the base in a separate register. This only works when a word
|
||||
// load puts the byte/halfword value in the same place in the register that the
|
||||
// byte/halfword load would, i.e. when little-endian.
|
||||
def : T1Pat<(extloadi1 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
|
||||
Requires<[IsThumb, IsThumb1Only, IsLE]>;
|
||||
def : T1Pat<(extloadi8 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
|
||||
Requires<[IsThumb, IsThumb1Only, IsLE]>;
|
||||
def : T1Pat<(extloadi16 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
|
||||
Requires<[IsThumb, IsThumb1Only, IsLE]>;
|
||||
|
||||
// extload -> zextload
|
||||
def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
|
||||
def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
|
||||
|
@ -170,7 +170,8 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
|
||||
return OffField;
|
||||
|
||||
// Thumb1 immediate offsets are scaled by 4
|
||||
if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
|
||||
if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
|
||||
Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
|
||||
return OffField * 4;
|
||||
|
||||
int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
|
||||
@ -206,6 +207,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
|
||||
case ARM_AM::ib: return ARM::STMIB;
|
||||
}
|
||||
case ARM::tLDRi:
|
||||
case ARM::tLDRspi:
|
||||
// tLDMIA is writeback-only - unless the base register is in the input
|
||||
// reglist.
|
||||
++NumLDMGened;
|
||||
@ -214,6 +216,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
|
||||
case ARM_AM::ia: return ARM::tLDMIA;
|
||||
}
|
||||
case ARM::tSTRi:
|
||||
case ARM::tSTRspi:
|
||||
// There is no non-writeback tSTMIA either.
|
||||
++NumSTMGened;
|
||||
switch (Mode) {
|
||||
@ -328,7 +331,7 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
|
||||
} // end namespace llvm
|
||||
|
||||
static bool isT1i32Load(unsigned Opc) {
|
||||
return Opc == ARM::tLDRi;
|
||||
return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
|
||||
}
|
||||
|
||||
static bool isT2i32Load(unsigned Opc) {
|
||||
@ -340,7 +343,7 @@ static bool isi32Load(unsigned Opc) {
|
||||
}
|
||||
|
||||
static bool isT1i32Store(unsigned Opc) {
|
||||
return Opc == ARM::tSTRi;
|
||||
return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
|
||||
}
|
||||
|
||||
static bool isT2i32Store(unsigned Opc) {
|
||||
@ -356,6 +359,8 @@ static unsigned getImmScale(unsigned Opc) {
|
||||
default: llvm_unreachable("Unhandled opcode!");
|
||||
case ARM::tLDRi:
|
||||
case ARM::tSTRi:
|
||||
case ARM::tLDRspi:
|
||||
case ARM::tSTRspi:
|
||||
return 1;
|
||||
case ARM::tLDRHi:
|
||||
case ARM::tSTRHi:
|
||||
@ -495,6 +500,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
|
||||
if (isThumb1)
|
||||
for (unsigned I = 0; I < NumRegs; ++I)
|
||||
if (Base == Regs[I].first) {
|
||||
assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
|
||||
if (Opcode == ARM::tLDRi) {
|
||||
Writeback = false;
|
||||
break;
|
||||
@ -515,7 +521,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
|
||||
} else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
|
||||
// VLDM/VSTM do not support DB mode without also updating the base reg.
|
||||
Mode = ARM_AM::db;
|
||||
} else if (Offset != 0) {
|
||||
} else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
|
||||
// Check if this is a supported opcode before inserting instructions to
|
||||
// calculate a new base register.
|
||||
if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
|
||||
@ -545,6 +551,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
|
||||
|
||||
int BaseOpc =
|
||||
isThumb2 ? ARM::t2ADDri :
|
||||
(isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
|
||||
(isThumb1 && Offset < 8) ? ARM::tADDi3 :
|
||||
isThumb1 ? ARM::tADDi8 : ARM::ADDri;
|
||||
|
||||
@ -552,7 +559,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
|
||||
Offset = - Offset;
|
||||
BaseOpc =
|
||||
isThumb2 ? ARM::t2SUBri :
|
||||
(isThumb1 && Offset < 8) ? ARM::tSUBi3 :
|
||||
(isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
|
||||
isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
|
||||
}
|
||||
|
||||
@ -566,7 +573,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
|
||||
// or
|
||||
// MOV NewBase, Base
|
||||
// ADDS NewBase, #imm8.
|
||||
if (Base != NewBase && Offset >= 8) {
|
||||
if (Base != NewBase &&
|
||||
(BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
|
||||
// Need to insert a MOV to the new base first.
|
||||
if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
|
||||
!STI->hasV6Ops()) {
|
||||
@ -584,9 +592,15 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
|
||||
Base = NewBase;
|
||||
BaseKill = false;
|
||||
}
|
||||
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
|
||||
.addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
|
||||
.addImm(Pred).addReg(PredReg);
|
||||
if (BaseOpc == ARM::tADDrSPi) {
|
||||
assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
|
||||
BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
|
||||
.addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
|
||||
.addImm(Pred).addReg(PredReg);
|
||||
} else
|
||||
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
|
||||
.addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
|
||||
.addImm(Pred).addReg(PredReg);
|
||||
} else {
|
||||
BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
|
||||
.addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
|
||||
@ -967,6 +981,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
|
||||
case ARM::STRi12:
|
||||
case ARM::tLDRi:
|
||||
case ARM::tSTRi:
|
||||
case ARM::tLDRspi:
|
||||
case ARM::tSTRspi:
|
||||
case ARM::t2LDRi8:
|
||||
case ARM::t2LDRi12:
|
||||
case ARM::t2STRi8:
|
||||
@ -1402,6 +1418,8 @@ static bool isMemoryOp(const MachineInstr *MI) {
|
||||
case ARM::STRi12:
|
||||
case ARM::tLDRi:
|
||||
case ARM::tSTRi:
|
||||
case ARM::tLDRspi:
|
||||
case ARM::tSTRspi:
|
||||
case ARM::t2LDRi8:
|
||||
case ARM::t2LDRi12:
|
||||
case ARM::t2STRi8:
|
||||
|
@ -1,55 +1,48 @@
|
||||
; RUN: llc -mtriple=thumbv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V4T
|
||||
; RUN: llc -mtriple=thumbv6m-none--eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V6M
|
||||
|
||||
; CHECK-LABEL: foo
|
||||
define i32 @foo(i32 %z, ...) #0 {
|
||||
entry:
|
||||
%a = alloca i32, align 4
|
||||
%b = alloca i32, align 4
|
||||
%c = alloca i32, align 4
|
||||
%d = alloca i32, align 4
|
||||
%e = alloca i32, align 4
|
||||
%f = alloca i32, align 4
|
||||
%g = alloca i32, align 4
|
||||
%h = alloca i32, align 4
|
||||
; CHECK-LABEL: test1
|
||||
define i32 @test1(i32* %p) {
|
||||
|
||||
store i32 1, i32* %a, align 4
|
||||
store i32 2, i32* %b, align 4
|
||||
store i32 3, i32* %c, align 4
|
||||
store i32 4, i32* %d, align 4
|
||||
store i32 5, i32* %e, align 4
|
||||
store i32 6, i32* %f, align 4
|
||||
store i32 7, i32* %g, align 4
|
||||
store i32 8, i32* %h, align 4
|
||||
; Offsets less than 8 can be generated in a single add
|
||||
; CHECK: adds [[NEWBASE:r[0-9]]], r0, #4
|
||||
%1 = getelementptr inbounds i32* %p, i32 1
|
||||
%2 = getelementptr inbounds i32* %p, i32 2
|
||||
%3 = getelementptr inbounds i32* %p, i32 3
|
||||
%4 = getelementptr inbounds i32* %p, i32 4
|
||||
|
||||
%0 = load i32* %a, align 4
|
||||
%1 = load i32* %b, align 4
|
||||
%2 = load i32* %c, align 4
|
||||
%3 = load i32* %d, align 4
|
||||
%4 = load i32* %e, align 4
|
||||
%5 = load i32* %f, align 4
|
||||
%6 = load i32* %g, align 4
|
||||
%7 = load i32* %h, align 4
|
||||
|
||||
%add = add nsw i32 %0, %1
|
||||
%add4 = add nsw i32 %add, %2
|
||||
%add5 = add nsw i32 %add4, %3
|
||||
%add6 = add nsw i32 %add5, %4
|
||||
%add7 = add nsw i32 %add6, %5
|
||||
%add8 = add nsw i32 %add7, %6
|
||||
%add9 = add nsw i32 %add8, %7
|
||||
|
||||
%addz = add nsw i32 %add9, %z
|
||||
call void @llvm.va_start(i8* null)
|
||||
ret i32 %addz
|
||||
|
||||
; CHECK: sub sp, #40
|
||||
; CHECK-NEXT: add [[BASE:r[0-9]]], sp, #8
|
||||
|
||||
; CHECK-V4T: movs [[NEWBASE:r[0-9]]], [[BASE]]
|
||||
; CHECK-V6M: mov [[NEWBASE:r[0-9]]], [[BASE]]
|
||||
; CHECK-NEXT: adds [[NEWBASE]], #8
|
||||
; CHECK-NEXT: ldm [[NEWBASE]],
|
||||
%5 = load i32* %1, align 4
|
||||
%6 = load i32* %2, align 4
|
||||
%7 = load i32* %3, align 4
|
||||
%8 = load i32* %4, align 4
|
||||
|
||||
%9 = add nsw i32 %5, %6
|
||||
%10 = add nsw i32 %9, %7
|
||||
%11 = add nsw i32 %10, %8
|
||||
ret i32 %11
|
||||
}
|
||||
|
||||
declare void @llvm.va_start(i8*) nounwind
|
||||
; CHECK-LABEL: test2
|
||||
define i32 @test2(i32* %p) {
|
||||
|
||||
; Offsets >=8 require a mov and an add
|
||||
; CHECK-V4T: movs [[NEWBASE:r[0-9]]], r0
|
||||
; CHECK-V6M: mov [[NEWBASE:r[0-9]]], r0
|
||||
; CHECK-NEXT: adds [[NEWBASE]], #8
|
||||
%1 = getelementptr inbounds i32* %p, i32 2
|
||||
%2 = getelementptr inbounds i32* %p, i32 3
|
||||
%3 = getelementptr inbounds i32* %p, i32 4
|
||||
%4 = getelementptr inbounds i32* %p, i32 5
|
||||
|
||||
; CHECK-NEXT: ldm [[NEWBASE]],
|
||||
%5 = load i32* %1, align 4
|
||||
%6 = load i32* %2, align 4
|
||||
%7 = load i32* %3, align 4
|
||||
%8 = load i32* %4, align 4
|
||||
|
||||
%9 = add nsw i32 %5, %6
|
||||
%10 = add nsw i32 %9, %7
|
||||
%11 = add nsw i32 %10, %8
|
||||
ret i32 %11
|
||||
}
|
||||
|
@ -1296,7 +1296,7 @@ define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val)
|
||||
%addr = inttoptr i64 %addr_int to i8*
|
||||
|
||||
store atomic i8 %val, i8* %addr monotonic, align 1
|
||||
; CHECK-LE: ldrb{{(\.w)?}} [[VAL:r[0-9]+]], [sp]
|
||||
; CHECK-LE: ldr{{b?(\.w)?}} [[VAL:r[0-9]+]], [sp]
|
||||
; CHECK-LE: strb [[VAL]], [r0, r2]
|
||||
; CHECK-BE: ldrb{{(\.w)?}} [[VAL:r[0-9]+]], [sp, #3]
|
||||
; CHECK-BE: strb [[VAL]], [r1, r3]
|
||||
|
@ -88,24 +88,22 @@
|
||||
; CHECK-THUMB-FP: .cfi_startproc
|
||||
; CHECK-THUMB-FP: sub sp, #16
|
||||
; CHECK-THUMB-FP: .cfi_def_cfa_offset 16
|
||||
; CHECK-THUMB-FP: push {r4, r5, r7, lr}
|
||||
; CHECK-THUMB-FP: .cfi_def_cfa_offset 32
|
||||
; CHECK-THUMB-FP: push {r4, lr}
|
||||
; CHECK-THUMB-FP: .cfi_def_cfa_offset 24
|
||||
; CHECK-THUMB-FP: .cfi_offset lr, -20
|
||||
; CHECK-THUMB-FP: .cfi_offset r7, -24
|
||||
; CHECK-THUMB-FP: .cfi_offset r5, -28
|
||||
; CHECK-THUMB-FP: .cfi_offset r4, -32
|
||||
; CHECK-THUMB-FP: .cfi_offset r4, -24
|
||||
; CHECK-THUMB-FP: sub sp, #8
|
||||
; CHECK-THUMB-FP: .cfi_def_cfa_offset 40
|
||||
; CHECK-THUMB-FP: .cfi_def_cfa_offset 32
|
||||
|
||||
; CHECK-THUMB-FP-ELIM-LABEL: sum
|
||||
; CHECK-THUMB-FP-ELIM: .cfi_startproc
|
||||
; CHECK-THUMB-FP-ELIM: sub sp, #16
|
||||
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 16
|
||||
; CHECK-THUMB-FP-ELIM: push {r4, r5, r7, lr}
|
||||
; CHECK-THUMB-FP-ELIM: push {r4, r6, r7, lr}
|
||||
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 32
|
||||
; CHECK-THUMB-FP-ELIM: .cfi_offset lr, -20
|
||||
; CHECK-THUMB-FP-ELIM: .cfi_offset r7, -24
|
||||
; CHECK-THUMB-FP-ELIM: .cfi_offset r5, -28
|
||||
; CHECK-THUMB-FP-ELIM: .cfi_offset r6, -28
|
||||
; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -32
|
||||
; CHECK-THUMB-FP-ELIM: add r7, sp, #8
|
||||
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa r7, 24
|
||||
|
@ -30,9 +30,9 @@ entry:
|
||||
; CHECK-ARM: push {r11, lr}
|
||||
; CHECK-ARM: mov r11, sp
|
||||
|
||||
; CHECK-THUMB: push {r4, r6, r7, lr}
|
||||
; CHECK-THUMB: add r7, sp, #8
|
||||
; CHECK-THUMB: push {r7, lr}
|
||||
; CHECK-THUMB: add r7, sp, #0
|
||||
|
||||
; CHECK-DARWIN-ARM: push {r7, lr}
|
||||
; CHECK-DARWIN-THUMB: push {r4, r7, lr}
|
||||
; CHECK-DARWIN-THUMB: push {r7, lr}
|
||||
|
||||
|
@ -43,26 +43,6 @@ bb3:
|
||||
declare noalias i8* @strdup(i8* nocapture) nounwind
|
||||
declare i32 @_called_func(i8*, i32*) nounwind
|
||||
|
||||
; Variable ending up at unaligned offset from sp (i.e. not a multiple of 4)
|
||||
define void @test_local_var_addr() {
|
||||
; CHECK-LABEL: test_local_var_addr:
|
||||
|
||||
%addr1 = alloca i8
|
||||
%addr2 = alloca i8
|
||||
|
||||
; CHECK: mov r0, sp
|
||||
; CHECK: adds r0, #{{[0-9]+}}
|
||||
; CHECK: blx
|
||||
call void @take_ptr(i8* %addr1)
|
||||
|
||||
; CHECK: mov r0, sp
|
||||
; CHECK: adds r0, #{{[0-9]+}}
|
||||
; CHECK: blx
|
||||
call void @take_ptr(i8* %addr2)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; Simple variable ending up *at* sp.
|
||||
define void @test_simple_var() {
|
||||
; CHECK-LABEL: test_simple_var:
|
||||
@ -126,14 +106,16 @@ define void @test_local_var_offset_1020() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; Max range addressable with tADDrSPi + tADDi8
|
||||
define void @test_local_var_offset_1275() {
|
||||
; CHECK-LABEL: test_local_var_offset_1275
|
||||
; Max range addressable with tADDrSPi + tADDi8 is 1275, however the automatic
|
||||
; 4-byte aligning of objects on the stack combined with 8-byte stack alignment
|
||||
; means that 1268 is the max offset we can use.
|
||||
define void @test_local_var_offset_1268() {
|
||||
; CHECK-LABEL: test_local_var_offset_1268
|
||||
%addr1 = alloca i8, i32 1
|
||||
%addr2 = alloca i8, i32 1275
|
||||
%addr2 = alloca i8, i32 1268
|
||||
|
||||
; CHECK: add r0, sp, #1020
|
||||
; CHECK: adds r0, #255
|
||||
; CHECK: adds r0, #248
|
||||
; CHECK-NEXT: blx
|
||||
call void @take_ptr(i8* %addr1)
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
; CHECK-V4T-LABEL: clobberframe
|
||||
; CHECK-V5T-LABEL: clobberframe
|
||||
define <4 x i32> @clobberframe() #0 {
|
||||
define <4 x i32> @clobberframe(<6 x i32>* %p) #0 {
|
||||
entry:
|
||||
; Prologue
|
||||
; --------
|
||||
@ -11,9 +11,10 @@ entry:
|
||||
; CHECK-V4T: sub sp,
|
||||
; CHECK-V5T: push {[[SAVED:(r[4567](, )?)+]], lr}
|
||||
|
||||
%b = alloca <4 x i32>, align 16
|
||||
%b = alloca <6 x i32>, align 16
|
||||
%a = alloca <4 x i32>, align 16
|
||||
store <4 x i32> <i32 42, i32 42, i32 42, i32 42>, <4 x i32>* %b, align 16
|
||||
%stuff = load <6 x i32>* %p, align 16
|
||||
store <6 x i32> %stuff, <6 x i32>* %b, align 16
|
||||
store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a, align 16
|
||||
%0 = load <4 x i32>* %a, align 16
|
||||
ret <4 x i32> %0
|
||||
@ -70,40 +71,25 @@ entry:
|
||||
|
||||
; CHECK-V4T-LABEL: simpleframe
|
||||
; CHECK-V5T-LABEL: simpleframe
|
||||
define i32 @simpleframe() #0 {
|
||||
define i32 @simpleframe(<6 x i32>* %p) #0 {
|
||||
entry:
|
||||
; Prologue
|
||||
; --------
|
||||
; CHECK-V4T: push {[[SAVED:(r[4567](, )?)+]], lr}
|
||||
; CHECK-V5T: push {[[SAVED:(r[4567](, )?)+]], lr}
|
||||
|
||||
%a = alloca i32, align 4
|
||||
%b = alloca i32, align 4
|
||||
%c = alloca i32, align 4
|
||||
%d = alloca i32, align 4
|
||||
store i32 1, i32* %a, align 4
|
||||
store i32 2, i32* %b, align 4
|
||||
store i32 3, i32* %c, align 4
|
||||
store i32 4, i32* %d, align 4
|
||||
%0 = load i32* %a, align 4
|
||||
%inc = add nsw i32 %0, 1
|
||||
store i32 %inc, i32* %a, align 4
|
||||
%1 = load i32* %b, align 4
|
||||
%inc1 = add nsw i32 %1, 1
|
||||
store i32 %inc1, i32* %b, align 4
|
||||
%2 = load i32* %c, align 4
|
||||
%inc2 = add nsw i32 %2, 1
|
||||
store i32 %inc2, i32* %c, align 4
|
||||
%3 = load i32* %d, align 4
|
||||
%inc3 = add nsw i32 %3, 1
|
||||
store i32 %inc3, i32* %d, align 4
|
||||
%4 = load i32* %a, align 4
|
||||
%5 = load i32* %b, align 4
|
||||
%add = add nsw i32 %4, %5
|
||||
%6 = load i32* %c, align 4
|
||||
%add4 = add nsw i32 %add, %6
|
||||
%7 = load i32* %d, align 4
|
||||
%add5 = add nsw i32 %add4, %7
|
||||
%0 = load <6 x i32>* %p, align 16
|
||||
%1 = extractelement <6 x i32> %0, i32 0
|
||||
%2 = extractelement <6 x i32> %0, i32 1
|
||||
%3 = extractelement <6 x i32> %0, i32 2
|
||||
%4 = extractelement <6 x i32> %0, i32 3
|
||||
%5 = extractelement <6 x i32> %0, i32 4
|
||||
%6 = extractelement <6 x i32> %0, i32 5
|
||||
%add1 = add nsw i32 %1, %2
|
||||
%add2 = add nsw i32 %add1, %3
|
||||
%add3 = add nsw i32 %add2, %4
|
||||
%add4 = add nsw i32 %add3, %5
|
||||
%add5 = add nsw i32 %add4, %6
|
||||
ret i32 %add5
|
||||
|
||||
; Epilogue
|
||||
|
74
test/CodeGen/Thumb/stack-access.ll
Normal file
74
test/CodeGen/Thumb/stack-access.ll
Normal file
@ -0,0 +1,74 @@
|
||||
; RUN: llc -mtriple=thumb-eabi < %s -o - | FileCheck %s
|
||||
|
||||
; Check that stack addresses are generated using a single ADD
|
||||
define void @test1(i8** %p) {
|
||||
%x = alloca i8, align 1
|
||||
%y = alloca i8, align 1
|
||||
%z = alloca i8, align 1
|
||||
; CHECK: add r1, sp, #8
|
||||
; CHECK: str r1, [r0]
|
||||
store i8* %x, i8** %p, align 4
|
||||
; CHECK: add r1, sp, #4
|
||||
; CHECK: str r1, [r0]
|
||||
store i8* %y, i8** %p, align 4
|
||||
; CHECK: mov r1, sp
|
||||
; CHECK: str r1, [r0]
|
||||
store i8* %z, i8** %p, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Stack offsets larger than 1020 still need two ADDs
|
||||
define void @test2([1024 x i8]** %p) {
|
||||
%arr1 = alloca [1024 x i8], align 1
|
||||
%arr2 = alloca [1024 x i8], align 1
|
||||
; CHECK: add r1, sp, #1020
|
||||
; CHECK: adds r1, #4
|
||||
; CHECK: str r1, [r0]
|
||||
store [1024 x i8]* %arr1, [1024 x i8]** %p, align 4
|
||||
; CHECK: mov r1, sp
|
||||
; CHECK: str r1, [r0]
|
||||
store [1024 x i8]* %arr2, [1024 x i8]** %p, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; If possible stack-based lrdb/ldrh are widened to use SP-based addressing
|
||||
define i32 @test3() #0 {
|
||||
%x = alloca i8, align 1
|
||||
%y = alloca i8, align 1
|
||||
; CHECK: ldr r0, [sp]
|
||||
%1 = load i8* %x, align 1
|
||||
; CHECK: ldr r1, [sp, #4]
|
||||
%2 = load i8* %y, align 1
|
||||
%3 = add nsw i8 %1, %2
|
||||
%4 = zext i8 %3 to i32
|
||||
ret i32 %4
|
||||
}
|
||||
|
||||
define i32 @test4() #0 {
|
||||
%x = alloca i16, align 2
|
||||
%y = alloca i16, align 2
|
||||
; CHECK: ldr r0, [sp]
|
||||
%1 = load i16* %x, align 2
|
||||
; CHECK: ldr r1, [sp, #4]
|
||||
%2 = load i16* %y, align 2
|
||||
%3 = add nsw i16 %1, %2
|
||||
%4 = zext i16 %3 to i32
|
||||
ret i32 %4
|
||||
}
|
||||
|
||||
; Don't widen if the value needs to be zero-extended
|
||||
define zeroext i8 @test5() {
|
||||
%x = alloca i8, align 1
|
||||
; CHECK: mov r0, sp
|
||||
; CHECK: ldrb r0, [r0]
|
||||
%1 = load i8* %x, align 1
|
||||
ret i8 %1
|
||||
}
|
||||
|
||||
define zeroext i16 @test6() {
|
||||
%x = alloca i16, align 2
|
||||
; CHECK: mov r0, sp
|
||||
; CHECK: ldrh r0, [r0]
|
||||
%1 = load i16* %x, align 2
|
||||
ret i16 %1
|
||||
}
|
@ -7,16 +7,17 @@ target triple = "thumbv6m--linux-gnueabi"
|
||||
@e = internal unnamed_addr global i32* null, align 4
|
||||
|
||||
; Function Attrs: nounwind optsize
|
||||
define void @fn1() #0 {
|
||||
define void @fn1(i32 %x, i32 %y, i32 %z) #0 {
|
||||
entry:
|
||||
; CHECK-LABEL: fn1:
|
||||
; CHECK: stm r[[BASE:[0-9]]]!, {{.*}}
|
||||
; CHECK-NOT: {{.*}} r[[BASE]]
|
||||
; CHECK: ldr r[[BASE]], {{.*}}
|
||||
%g = alloca i32, align 4
|
||||
%h = alloca i32, align 4
|
||||
store i32 1, i32* %g, align 4
|
||||
store i32 0, i32* %h, align 4
|
||||
%i = alloca i32, align 4
|
||||
store i32 %x, i32* %i, align 4
|
||||
store i32 %y, i32* %h, align 4
|
||||
store i32 %z, i32* %g, align 4
|
||||
%.pr = load i32* @d, align 4
|
||||
%cmp11 = icmp slt i32 %.pr, 1
|
||||
br i1 %cmp11, label %for.inc.lr.ph, label %for.body5
|
||||
|
@ -6,6 +6,10 @@
|
||||
|
||||
define void @f(i32 %a, ...) {
|
||||
entry:
|
||||
; Check that space is reserved above the pushed lr for variadic argument
|
||||
; registers to be stored in.
|
||||
; CHECK: sub sp, #[[IMM:[0-9]+]]
|
||||
; CHECK: push
|
||||
%va = alloca i8*, align 4 ; <i8**> [#uses=4]
|
||||
%va.upgrd.1 = bitcast i8** %va to i8* ; <i8*> [#uses=1]
|
||||
call void @llvm.va_start( i8* %va.upgrd.1 )
|
||||
@ -27,6 +31,13 @@ bb7: ; preds = %bb
|
||||
%va.upgrd.4 = bitcast i8** %va to i8* ; <i8*> [#uses=1]
|
||||
call void @llvm.va_end( i8* %va.upgrd.4 )
|
||||
ret void
|
||||
|
||||
; The return sequence should pop the lr to r3, recover the stack space used to
|
||||
; store variadic argument registers, then return via r3. Possibly there is a pop
|
||||
; before this, but only if the function happened to use callee-saved registers.
|
||||
; CHECK: pop {r3}
|
||||
; CHECK: add sp, #[[IMM]]
|
||||
; CHECK: bx r3
|
||||
}
|
||||
|
||||
declare void @llvm.va_start(i8*)
|
||||
@ -34,8 +45,3 @@ declare void @llvm.va_start(i8*)
|
||||
declare i32 @printf(i8*, ...)
|
||||
|
||||
declare void @llvm.va_end(i8*)
|
||||
|
||||
; CHECK: pop
|
||||
; CHECK: pop
|
||||
; CHECK-NOT: pop
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user