mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-20 12:31:40 +00:00
[AArch64] Improve AA to remove unneeded edges in the AA MI scheduling graph.
Patch by Sanjin Sijaric <ssijaric@codeaurora.org>! Phabricator Review: http://reviews.llvm.org/D5103 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217371 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1ef487d463
commit
b30d031de4
@ -1192,6 +1192,20 @@ public:
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// areMemAccessesTriviallyDisjoint - Sometimes, it is possible for the target
|
||||||
|
// to tell, even without aliasing information, that two MIs access different
|
||||||
|
// memory addresses. This function returns true if two MIs access different
|
||||||
|
// memory addresses, and false otherwise.
|
||||||
|
virtual bool
|
||||||
|
areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
|
||||||
|
AliasAnalysis *AA = nullptr) const {
|
||||||
|
assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
|
||||||
|
"MIa must load from or modify a memory location");
|
||||||
|
assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
|
||||||
|
"MIb must load from or modify a memory location");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int CallFrameSetupOpcode, CallFrameDestroyOpcode;
|
int CallFrameSetupOpcode, CallFrameDestroyOpcode;
|
||||||
};
|
};
|
||||||
|
@ -511,10 +511,19 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
|
|||||||
static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
|
static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
|
||||||
MachineInstr *MIa,
|
MachineInstr *MIa,
|
||||||
MachineInstr *MIb) {
|
MachineInstr *MIb) {
|
||||||
|
const MachineFunction *MF = MIa->getParent()->getParent();
|
||||||
|
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
|
||||||
|
|
||||||
// Cover a trivial case - no edge is need to itself.
|
// Cover a trivial case - no edge is need to itself.
|
||||||
if (MIa == MIb)
|
if (MIa == MIb)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// Let the target decide if memory accesses cannot possibly overlap.
|
||||||
|
if ((MIa->mayLoad() || MIa->mayStore()) &&
|
||||||
|
(MIb->mayLoad() || MIb->mayStore()))
|
||||||
|
if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
|
||||||
|
return false;
|
||||||
|
|
||||||
// FIXME: Need to handle multiple memory operands to support all targets.
|
// FIXME: Need to handle multiple memory operands to support all targets.
|
||||||
if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
|
if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
|
||||||
return true;
|
return true;
|
||||||
|
@ -607,6 +607,42 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
AArch64InstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
|
||||||
|
MachineInstr *MIb,
|
||||||
|
AliasAnalysis *AA) const {
|
||||||
|
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
||||||
|
unsigned BaseRegA = 0, BaseRegB = 0;
|
||||||
|
int OffsetA = 0, OffsetB = 0;
|
||||||
|
int WidthA = 0, WidthB = 0;
|
||||||
|
|
||||||
|
assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
|
||||||
|
"MIa must be a store or a load");
|
||||||
|
assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
|
||||||
|
"MIb must be a store or a load");
|
||||||
|
|
||||||
|
if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
|
||||||
|
MIa->hasOrderedMemoryRef() || MIb->hasOrderedMemoryRef())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Retrieve the base register, offset from the base register and width. Width
|
||||||
|
// is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
|
||||||
|
// base registers are identical, and the offset of a lower memory access +
|
||||||
|
// the width doesn't overlap the offset of a higher memory access,
|
||||||
|
// then the memory accesses are different.
|
||||||
|
if (getLdStBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
|
||||||
|
getLdStBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
|
||||||
|
if (BaseRegA == BaseRegB) {
|
||||||
|
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
|
||||||
|
int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
|
||||||
|
int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
|
||||||
|
if (LowOffset + LowWidth <= HighOffset)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/// analyzeCompare - For a comparison instruction, return the source registers
|
/// analyzeCompare - For a comparison instruction, return the source registers
|
||||||
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
|
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
|
||||||
/// Return true if the comparison instruction can be analyzed.
|
/// Return true if the comparison instruction can be analyzed.
|
||||||
@ -1270,6 +1306,102 @@ AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AArch64InstrInfo::getLdStBaseRegImmOfsWidth(
|
||||||
|
MachineInstr *LdSt, unsigned &BaseReg, int &Offset, int &Width,
|
||||||
|
const TargetRegisterInfo *TRI) const {
|
||||||
|
// Handle only loads/stores with base register followed by immediate offset.
|
||||||
|
if (LdSt->getNumOperands() != 3)
|
||||||
|
return false;
|
||||||
|
if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Offset is calculated as the immediate operand multiplied by the scaling factor.
|
||||||
|
// Unscaled instructions have scaling factor set to 1.
|
||||||
|
int Scale = 0;
|
||||||
|
switch (LdSt->getOpcode()) {
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
case AArch64::LDURQi:
|
||||||
|
case AArch64::STURQi:
|
||||||
|
Width = 16;
|
||||||
|
Scale = 1;
|
||||||
|
break;
|
||||||
|
case AArch64::LDURXi:
|
||||||
|
case AArch64::LDURDi:
|
||||||
|
case AArch64::STURXi:
|
||||||
|
case AArch64::STURDi:
|
||||||
|
Width = 8;
|
||||||
|
Scale = 1;
|
||||||
|
break;
|
||||||
|
case AArch64::LDURWi:
|
||||||
|
case AArch64::LDURSi:
|
||||||
|
case AArch64::LDURSWi:
|
||||||
|
case AArch64::STURWi:
|
||||||
|
case AArch64::STURSi:
|
||||||
|
Width = 4;
|
||||||
|
Scale = 1;
|
||||||
|
break;
|
||||||
|
case AArch64::LDURHi:
|
||||||
|
case AArch64::LDURHHi:
|
||||||
|
case AArch64::LDURSHXi:
|
||||||
|
case AArch64::LDURSHWi:
|
||||||
|
case AArch64::STURHi:
|
||||||
|
case AArch64::STURHHi:
|
||||||
|
Width = 2;
|
||||||
|
Scale = 1;
|
||||||
|
break;
|
||||||
|
case AArch64::LDURBi:
|
||||||
|
case AArch64::LDURBBi:
|
||||||
|
case AArch64::LDURSBXi:
|
||||||
|
case AArch64::LDURSBWi:
|
||||||
|
case AArch64::STURBi:
|
||||||
|
case AArch64::STURBBi:
|
||||||
|
Width = 1;
|
||||||
|
Scale = 1;
|
||||||
|
break;
|
||||||
|
case AArch64::LDRXui:
|
||||||
|
case AArch64::STRXui:
|
||||||
|
Scale = Width = 8;
|
||||||
|
break;
|
||||||
|
case AArch64::LDRWui:
|
||||||
|
case AArch64::STRWui:
|
||||||
|
Scale = Width = 4;
|
||||||
|
break;
|
||||||
|
case AArch64::LDRBui:
|
||||||
|
case AArch64::STRBui:
|
||||||
|
Scale = Width = 1;
|
||||||
|
break;
|
||||||
|
case AArch64::LDRHui:
|
||||||
|
case AArch64::STRHui:
|
||||||
|
Scale = Width = 2;
|
||||||
|
break;
|
||||||
|
case AArch64::LDRSui:
|
||||||
|
case AArch64::STRSui:
|
||||||
|
Scale = Width = 4;
|
||||||
|
break;
|
||||||
|
case AArch64::LDRDui:
|
||||||
|
case AArch64::STRDui:
|
||||||
|
Scale = Width = 8;
|
||||||
|
break;
|
||||||
|
case AArch64::LDRQui:
|
||||||
|
case AArch64::STRQui:
|
||||||
|
Scale = Width = 16;
|
||||||
|
break;
|
||||||
|
case AArch64::LDRBBui:
|
||||||
|
case AArch64::STRBBui:
|
||||||
|
Scale = Width = 1;
|
||||||
|
break;
|
||||||
|
case AArch64::LDRHHui:
|
||||||
|
case AArch64::STRHHui:
|
||||||
|
Scale = Width = 2;
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
BaseReg = LdSt->getOperand(1).getReg();
|
||||||
|
Offset = LdSt->getOperand(2).getImm() * Scale;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// Detect opportunities for ldp/stp formation.
|
/// Detect opportunities for ldp/stp formation.
|
||||||
///
|
///
|
||||||
/// Only called for LdSt for which getLdStBaseRegImmOfs returns true.
|
/// Only called for LdSt for which getLdStBaseRegImmOfs returns true.
|
||||||
|
@ -52,6 +52,10 @@ public:
|
|||||||
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
|
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
|
||||||
unsigned &DstReg, unsigned &SubIdx) const override;
|
unsigned &DstReg, unsigned &SubIdx) const override;
|
||||||
|
|
||||||
|
bool
|
||||||
|
areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
|
||||||
|
AliasAnalysis *AA = nullptr) const override;
|
||||||
|
|
||||||
unsigned isLoadFromStackSlot(const MachineInstr *MI,
|
unsigned isLoadFromStackSlot(const MachineInstr *MI,
|
||||||
int &FrameIndex) const override;
|
int &FrameIndex) const override;
|
||||||
unsigned isStoreToStackSlot(const MachineInstr *MI,
|
unsigned isStoreToStackSlot(const MachineInstr *MI,
|
||||||
@ -90,6 +94,10 @@ public:
|
|||||||
unsigned &Offset,
|
unsigned &Offset,
|
||||||
const TargetRegisterInfo *TRI) const override;
|
const TargetRegisterInfo *TRI) const override;
|
||||||
|
|
||||||
|
bool getLdStBaseRegImmOfsWidth(MachineInstr *LdSt, unsigned &BaseReg,
|
||||||
|
int &Offset, int &Width,
|
||||||
|
const TargetRegisterInfo *TRI) const;
|
||||||
|
|
||||||
bool enableClusterLoads() const override { return true; }
|
bool enableClusterLoads() const override { return true; }
|
||||||
|
|
||||||
bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt,
|
bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt,
|
||||||
|
31
test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
Normal file
31
test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -enable-aa-sched-mi | FileCheck %s
|
||||||
|
; Check that the scheduler moves the load from a[1] past the store into a[2].
|
||||||
|
@a = common global i32* null, align 8
|
||||||
|
@m = common global i32 0, align 4
|
||||||
|
|
||||||
|
; Function Attrs: nounwind
|
||||||
|
define i32 @func(i32 %i, i32 %j, i32 %k) #0 {
|
||||||
|
entry:
|
||||||
|
; CHECK: ldr {{w[0-9]+}}, [x[[REG:[0-9]+]], #4]
|
||||||
|
; CHECK: str {{w[0-9]+}}, [x[[REG]], #8]
|
||||||
|
%0 = load i32** @a, align 8, !tbaa !1
|
||||||
|
%arrayidx = getelementptr inbounds i32* %0, i64 2
|
||||||
|
store i32 %i, i32* %arrayidx, align 4, !tbaa !5
|
||||||
|
%arrayidx1 = getelementptr inbounds i32* %0, i64 1
|
||||||
|
%1 = load i32* %arrayidx1, align 4, !tbaa !5
|
||||||
|
%add = add nsw i32 %k, %i
|
||||||
|
store i32 %add, i32* @m, align 4, !tbaa !5
|
||||||
|
ret i32 %1
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
|
||||||
|
|
||||||
|
!llvm.ident = !{!0}
|
||||||
|
|
||||||
|
!0 = metadata !{metadata !"clang version 3.6.0 "}
|
||||||
|
!1 = metadata !{metadata !2, metadata !2, i64 0}
|
||||||
|
!2 = metadata !{metadata !"any pointer", metadata !3, i64 0}
|
||||||
|
!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
|
||||||
|
!4 = metadata !{metadata !"Simple C/C++ TBAA"}
|
||||||
|
!5 = metadata !{metadata !6, metadata !6, i64 0}
|
||||||
|
!6 = metadata !{metadata !"int", metadata !3, i64 0}
|
Loading…
x
Reference in New Issue
Block a user