mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 20:32:21 +00:00
[AArch64] Improve AA to remove unneeded edges in the AA MI scheduling graph.
Patch by Sanjin Sijaric <ssijaric@codeaurora.org>! Phabricator Review: http://reviews.llvm.org/D5103 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217371 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1ef487d463
commit
b30d031de4
@ -1192,6 +1192,20 @@ public:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// areMemAccessesTriviallyDisjoint - Sometimes, it is possible for the target
|
||||
// to tell, even without aliasing information, that two MIs access different
|
||||
// memory addresses. This function returns true if two MIs access different
|
||||
// memory addresses, and false otherwise.
|
||||
virtual bool
|
||||
areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
|
||||
AliasAnalysis *AA = nullptr) const {
|
||||
assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
|
||||
"MIa must load from or modify a memory location");
|
||||
assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
|
||||
"MIb must load from or modify a memory location");
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
int CallFrameSetupOpcode, CallFrameDestroyOpcode;
|
||||
};
|
||||
|
@ -511,10 +511,19 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
|
||||
static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
|
||||
MachineInstr *MIa,
|
||||
MachineInstr *MIb) {
|
||||
const MachineFunction *MF = MIa->getParent()->getParent();
|
||||
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
|
||||
|
||||
// Cover a trivial case - no edge is need to itself.
|
||||
if (MIa == MIb)
|
||||
return false;
|
||||
|
||||
// Let the target decide if memory accesses cannot possibly overlap.
|
||||
if ((MIa->mayLoad() || MIa->mayStore()) &&
|
||||
(MIb->mayLoad() || MIb->mayStore()))
|
||||
if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
|
||||
return false;
|
||||
|
||||
// FIXME: Need to handle multiple memory operands to support all targets.
|
||||
if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
|
||||
return true;
|
||||
|
@ -607,6 +607,42 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
|
||||
MachineInstr *MIb,
|
||||
AliasAnalysis *AA) const {
|
||||
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
||||
unsigned BaseRegA = 0, BaseRegB = 0;
|
||||
int OffsetA = 0, OffsetB = 0;
|
||||
int WidthA = 0, WidthB = 0;
|
||||
|
||||
assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
|
||||
"MIa must be a store or a load");
|
||||
assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
|
||||
"MIb must be a store or a load");
|
||||
|
||||
if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
|
||||
MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
|
||||
return false;
|
||||
|
||||
// Retrieve the base register, offset from the base register and width. Width
|
||||
// is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
|
||||
// base registers are identical, and the offset of a lower memory access +
|
||||
// the width doesn't overlap the offset of a higher memory access,
|
||||
// then the memory accesses are different.
|
||||
if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
|
||||
getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
|
||||
if (BaseRegA == BaseRegB) {
|
||||
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
|
||||
int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
|
||||
int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
|
||||
if (LowOffset + LowWidth <= HighOffset)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// analyzeCompare - For a comparison instruction, return the source registers
|
||||
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
|
||||
/// Return true if the comparison instruction can be analyzed.
|
||||
@ -1270,6 +1306,102 @@ AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
|
||||
};
|
||||
}
|
||||
|
||||
bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth(
|
||||
MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
// Handle only loads/stores with base register followed by immediate offset.
|
||||
if (LdSt->getNumOperands() != 3)
|
||||
return false;
|
||||
if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
|
||||
return false;
|
||||
|
||||
// Offset is calculated as the immediate operand multiplied by the scaling factor.
|
||||
// Unscaled instructions have scaling factor set to 1.
|
||||
int Scale = 0;
|
||||
switch (LdSt->getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
case AArch64::LDURQi:
|
||||
case AArch64::STURQi:
|
||||
Width = 16;
|
||||
Scale = 1;
|
||||
break;
|
||||
case AArch64::LDURXi:
|
||||
case AArch64::LDURDi:
|
||||
case AArch64::STURXi:
|
||||
case AArch64::STURDi:
|
||||
Width = 8;
|
||||
Scale = 1;
|
||||
break;
|
||||
case AArch64::LDURWi:
|
||||
case AArch64::LDURSi:
|
||||
case AArch64::LDURSWi:
|
||||
case AArch64::STURWi:
|
||||
case AArch64::STURSi:
|
||||
Width = 4;
|
||||
Scale = 1;
|
||||
break;
|
||||
case AArch64::LDURHi:
|
||||
case AArch64::LDURHHi:
|
||||
case AArch64::LDURSHXi:
|
||||
case AArch64::LDURSHWi:
|
||||
case AArch64::STURHi:
|
||||
case AArch64::STURHHi:
|
||||
Width = 2;
|
||||
Scale = 1;
|
||||
break;
|
||||
case AArch64::LDURBi:
|
||||
case AArch64::LDURBBi:
|
||||
case AArch64::LDURSBXi:
|
||||
case AArch64::LDURSBWi:
|
||||
case AArch64::STURBi:
|
||||
case AArch64::STURBBi:
|
||||
Width = 1;
|
||||
Scale = 1;
|
||||
break;
|
||||
case AArch64::LDRXui:
|
||||
case AArch64::STRXui:
|
||||
Scale = Width = 8;
|
||||
break;
|
||||
case AArch64::LDRWui:
|
||||
case AArch64::STRWui:
|
||||
Scale = Width = 4;
|
||||
break;
|
||||
case AArch64::LDRBui:
|
||||
case AArch64::STRBui:
|
||||
Scale = Width = 1;
|
||||
break;
|
||||
case AArch64::LDRHui:
|
||||
case AArch64::STRHui:
|
||||
Scale = Width = 2;
|
||||
break;
|
||||
case AArch64::LDRSui:
|
||||
case AArch64::STRSui:
|
||||
Scale = Width = 4;
|
||||
break;
|
||||
case AArch64::LDRDui:
|
||||
case AArch64::STRDui:
|
||||
Scale = Width = 8;
|
||||
break;
|
||||
case AArch64::LDRQui:
|
||||
case AArch64::STRQui:
|
||||
Scale = Width = 16;
|
||||
break;
|
||||
case AArch64::LDRBBui:
|
||||
case AArch64::STRBBui:
|
||||
Scale = Width = 1;
|
||||
break;
|
||||
case AArch64::LDRHHui:
|
||||
case AArch64::STRHHui:
|
||||
Scale = Width = 2;
|
||||
break;
|
||||
};
|
||||
|
||||
BaseReg = LdSt->getOperand(1).getReg();
|
||||
Offset = LdSt->getOperand(2).getImm() * Scale;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Detect opportunities for ldp/stp formation.
|
||||
///
|
||||
/// Only called for LdSt for which getLdStBaseRegImmOfs returns true.
|
||||
|
@ -52,6 +52,10 @@ public:
|
||||
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
|
||||
unsigned &DstReg, unsigned &SubIdx) const override;
|
||||
|
||||
bool
|
||||
areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
|
||||
AliasAnalysis *AA = nullptr) const override;
|
||||
|
||||
unsigned isLoadFromStackSlot(const MachineInstr *MI,
|
||||
int &FrameIndex) const override;
|
||||
unsigned isStoreToStackSlot(const MachineInstr *MI,
|
||||
@ -90,6 +94,10 @@ public:
|
||||
unsigned &Offset,
|
||||
const TargetRegisterInfo *TRI) const override;
|
||||
|
||||
bool getLdStBaseRegImmOfsWidth(MachineInstr *LdSt, unsigned &BaseReg,
|
||||
int &Offset, int &Width,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
bool enableClusterLoads() const override { return true; }
|
||||
|
||||
bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt,
|
||||
|
31
test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
Normal file
31
test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
Normal file
@ -0,0 +1,31 @@
|
||||
; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -enable-aa-sched-mi | FileCheck %s
|
||||
; Check that the scheduler moves the load from a[1] past the store into a[2].
|
||||
@a = common global i32* null, align 8
|
||||
@m = common global i32 0, align 4
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32 @func(i32 %i, i32 %j, i32 %k) #0 {
|
||||
entry:
|
||||
; CHECK: ldr {{w[0-9]+}}, [x[[REG:[0-9]+]], #4]
|
||||
; CHECK: str {{w[0-9]+}}, [x[[REG]], #8]
|
||||
%0 = load i32** @a, align 8, !tbaa !1
|
||||
%arrayidx = getelementptr inbounds i32* %0, i64 2
|
||||
store i32 %i, i32* %arrayidx, align 4, !tbaa !5
|
||||
%arrayidx1 = getelementptr inbounds i32* %0, i64 1
|
||||
%1 = load i32* %arrayidx1, align 4, !tbaa !5
|
||||
%add = add nsw i32 %k, %i
|
||||
store i32 %add, i32* @m, align 4, !tbaa !5
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
|
||||
|
||||
!llvm.ident = !{!0}
|
||||
|
||||
!0 = metadata !{metadata !"clang version 3.6.0 "}
|
||||
!1 = metadata !{metadata !2, metadata !2, i64 0}
|
||||
!2 = metadata !{metadata !"any pointer", metadata !3, i64 0}
|
||||
!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
|
||||
!4 = metadata !{metadata !"Simple C/C++ TBAA"}
|
||||
!5 = metadata !{metadata !6, metadata !6, i64 0}
|
||||
!6 = metadata !{metadata !"int", metadata !3, i64 0}
|
Loading…
Reference in New Issue
Block a user