Let t2LDRBi8 and t2LDRBi12 have same Base Pointer

When determining if two different loads are from the same base address,
this patch allows one load to use a t2LDRi8 address mode and another to
use a t2LDRi12 address mode. The current implementation is very
conservative and this allows the case of differing Thumb2 byte loads to
be considered. Allowing these differing modes instead of forcing the exact
same opcode is useful for situations where one opcodes loads from a base
address+1 and a second opcode loads for a base address-1.

Patch by Daniel Stewart.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188385 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Renato Golin 2013-08-14 16:35:29 +00:00
parent 17a5457ffe
commit dd34dc99fd
2 changed files with 78 additions and 1 deletions

View File

@ -1404,9 +1404,11 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
case ARM::t2LDRBi8:
case ARM::t2LDRDi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
case ARM::t2LDRBi12:
case ARM::t2LDRSHi12:
break;
}
@ -1423,8 +1425,10 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
case ARM::t2LDRBi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
case ARM::t2LDRBi12:
case ARM::t2LDRSHi12:
break;
}
@ -1471,7 +1475,16 @@ bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
if ((Offset2 - Offset1) / 8 > 64)
return false;
if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
// Check if the machine opcodes are different. If they are different
// then we consider them to not be of the same base address,
// EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
// In this case, they are considered to be the same because they are different
// encoding forms of the same basic instruction.
if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
!((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
(Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
Load2->getMachineOpcode() == ARM::t2LDRBi8)))
return false; // FIXME: overly conservative?
// Four loads in a row should be sufficient.

View File

@ -0,0 +1,64 @@
; RUN: llc < %s -march=thumb -mattr=+v7,+thumb2 | FileCheck %s
define i8 @f1(i8* %call1, i8* %call3, i32 %h, i32 %w, i32 %Width) {
; CHECK: f1:
entry:
%mul17 = mul nsw i32 %Width, %h
%add = add nsw i32 %mul17, %w
%sub19 = sub i32 %add, %Width
%sub20 = add i32 %sub19, -1
%arrayidx21 = getelementptr inbounds i8* %call1, i32 %sub20
%0 = load i8* %arrayidx21, align 1
%conv22 = zext i8 %0 to i32
%arrayidx25 = getelementptr inbounds i8* %call1, i32 %sub19
%1 = load i8* %arrayidx25, align 1
%conv26 = zext i8 %1 to i32
%mul23189 = add i32 %conv26, %conv22
%add30 = add i32 %sub19, 1
%arrayidx31 = getelementptr inbounds i8* %call1, i32 %add30
%2 = load i8* %arrayidx31, align 1
%conv32 = zext i8 %2 to i32
; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
; CHECK-NEXT: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #1]
%add28190 = add i32 %mul23189, %conv32
%sub35 = add i32 %add, -1
%arrayidx36 = getelementptr inbounds i8* %call1, i32 %sub35
%3 = load i8* %arrayidx36, align 1
%conv37 = zext i8 %3 to i32
%add34191 = add i32 %add28190, %conv37
%arrayidx40 = getelementptr inbounds i8* %call1, i32 %add
%4 = load i8* %arrayidx40, align 1
%conv41 = zext i8 %4 to i32
%mul42 = mul nsw i32 %conv41, 255
%add44 = add i32 %add, 1
%arrayidx45 = getelementptr inbounds i8* %call1, i32 %add44
%5 = load i8* %arrayidx45, align 1
%conv46 = zext i8 %5 to i32
; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
; CHECK-NEXT: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #1]
%add49 = add i32 %add, %Width
%sub50 = add i32 %add49, -1
%arrayidx51 = getelementptr inbounds i8* %call1, i32 %sub50
%6 = load i8* %arrayidx51, align 1
%conv52 = zext i8 %6 to i32
%arrayidx56 = getelementptr inbounds i8* %call1, i32 %add49
%7 = load i8* %arrayidx56, align 1
%conv57 = zext i8 %7 to i32
%add61 = add i32 %add49, 1
%arrayidx62 = getelementptr inbounds i8* %call1, i32 %add61
%8 = load i8* %arrayidx62, align 1
%conv63 = zext i8 %8 to i32
; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
; CHECK-NEXT: ldrb{{[.w]*}} r{{[0-9]*}}, [r{{[0-9]*}}, #1]
%tmp = add i32 %add34191, %conv46
%tmp193 = add i32 %tmp, %conv52
%tmp194 = add i32 %tmp193, %conv57
%tmp195 = add i32 %tmp194, %conv63
%tmp196 = mul i32 %tmp195, -28
%add65 = add i32 %tmp196, %mul42
%9 = lshr i32 %add65, 8
%conv68 = trunc i32 %9 to i8
%arrayidx69 = getelementptr inbounds i8* %call3, i32 %add
store i8 %conv68, i8* %arrayidx69, align 1
ret i8 %conv68
}