mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-30 17:33:24 +00:00
ARM: correct liveness flags during ARMLoadStoreOpt
When we had a sequence like: s1 = VLDRS [r0, 1], Q0<imp-def> s3 = VLDRS [r0, 2], Q0<imp-use,kill>, Q0<imp-def> s0 = VLDRS [r0, 0], Q0<imp-use,kill>, Q0<imp-def> s2 = VLDRS [r0, 4], Q0<imp-use,kill>, Q0<imp-def> we were gathering the {s0, s1} loads below the s3 load. This is fine, but confused the verifier since now the s3 load had Q0<imp-use> with no definition above it. This should mark such uses <undef> as well. The liveness structure at the beginning and end of the block is unaffected, and the true sN definitions should prevent any dodgy reorderings being introduced elsewhere. rdar://problem/15124449 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192344 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
15de63cfde
commit
acd79ce0ad
@ -90,6 +90,10 @@ namespace {
|
|||||||
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
|
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
|
||||||
typedef MemOpQueue::iterator MemOpQueueIter;
|
typedef MemOpQueue::iterator MemOpQueueIter;
|
||||||
|
|
||||||
|
void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
|
||||||
|
const MemOpQueue &MemOps, unsigned DefReg,
|
||||||
|
unsigned RangeBegin, unsigned RangeEnd);
|
||||||
|
|
||||||
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||||
int Offset, unsigned Base, bool BaseKill, int Opcode,
|
int Offset, unsigned Base, bool BaseKill, int Opcode,
|
||||||
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
|
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
|
||||||
@ -360,6 +364,62 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// \brief Find all instructions using a given imp-def within a range.
|
||||||
|
///
|
||||||
|
/// We are trying to combine a range of instructions, one of which (located at
|
||||||
|
/// position RangeBegin) implicitly defines a register. The final LDM/STM will
|
||||||
|
/// be placed at RangeEnd, and so any uses of this definition between RangeStart
|
||||||
|
/// and RangeEnd must be modified to use an undefined value.
|
||||||
|
///
|
||||||
|
/// The live range continues until we find a second definition or one of the
|
||||||
|
/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
|
||||||
|
/// we must consider all uses and decide which are relevant in a second pass.
|
||||||
|
void ARMLoadStoreOpt::findUsesOfImpDef(
|
||||||
|
SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
|
||||||
|
unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
|
||||||
|
std::map<unsigned, MachineOperand *> Uses;
|
||||||
|
unsigned LastLivePos = RangeEnd;
|
||||||
|
|
||||||
|
// First we find all uses of this register with Position between RangeBegin
|
||||||
|
// and RangeEnd, any or all of these could be uses of a definition at
|
||||||
|
// RangeBegin. We also record the latest position a definition at RangeBegin
|
||||||
|
// would be considered live.
|
||||||
|
for (unsigned i = 0; i < MemOps.size(); ++i) {
|
||||||
|
MachineInstr &MI = *MemOps[i].MBBI;
|
||||||
|
unsigned MIPosition = MemOps[i].Position;
|
||||||
|
if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// If this instruction defines the register, then any later use will be of
|
||||||
|
// that definition rather than ours.
|
||||||
|
if (MI.definesRegister(DefReg))
|
||||||
|
LastLivePos = std::min(LastLivePos, MIPosition);
|
||||||
|
|
||||||
|
MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
|
||||||
|
if (!UseOp)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// If this instruction kills the register then (assuming liveness is
|
||||||
|
// correct when we start) we don't need to think about anything after here.
|
||||||
|
if (UseOp->isKill())
|
||||||
|
LastLivePos = std::min(LastLivePos, MIPosition);
|
||||||
|
|
||||||
|
Uses[MIPosition] = UseOp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now we traverse the list of all uses, and append the ones that actually use
|
||||||
|
// our definition to the requested list.
|
||||||
|
for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
|
||||||
|
E = Uses.end();
|
||||||
|
I != E; ++I) {
|
||||||
|
// List is sorted by position so once we've found one out of range there
|
||||||
|
// will be no more to consider.
|
||||||
|
if (I->first > LastLivePos)
|
||||||
|
break;
|
||||||
|
UsesOfImpDefs.push_back(I->second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
|
// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
|
||||||
// success.
|
// success.
|
||||||
void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
|
void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
|
||||||
@ -392,6 +452,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
|
|||||||
|
|
||||||
SmallVector<std::pair<unsigned, bool>, 8> Regs;
|
SmallVector<std::pair<unsigned, bool>, 8> Regs;
|
||||||
SmallVector<unsigned, 8> ImpDefs;
|
SmallVector<unsigned, 8> ImpDefs;
|
||||||
|
SmallVector<MachineOperand *, 8> UsesOfImpDefs;
|
||||||
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
|
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
|
||||||
unsigned Reg = memOps[i].Reg;
|
unsigned Reg = memOps[i].Reg;
|
||||||
// If we are inserting the merged operation after an operation that
|
// If we are inserting the merged operation after an operation that
|
||||||
@ -406,6 +467,12 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
|
|||||||
unsigned DefReg = MO->getReg();
|
unsigned DefReg = MO->getReg();
|
||||||
if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
|
if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
|
||||||
ImpDefs.push_back(DefReg);
|
ImpDefs.push_back(DefReg);
|
||||||
|
|
||||||
|
// There may be other uses of the definition between this instruction and
|
||||||
|
// the eventual LDM/STM position. These should be marked undef if the
|
||||||
|
// merge takes place.
|
||||||
|
findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
|
||||||
|
insertPos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -418,6 +485,16 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
|
|||||||
|
|
||||||
// Merge succeeded, update records.
|
// Merge succeeded, update records.
|
||||||
Merges.push_back(prior(Loc));
|
Merges.push_back(prior(Loc));
|
||||||
|
|
||||||
|
// In gathering loads together, we may have moved the imp-def of a register
|
||||||
|
// past one of its uses. This is OK, since we know better than the rest of
|
||||||
|
// LLVM what's OK with ARM loads and stores; but we still have to adjust the
|
||||||
|
// affected uses.
|
||||||
|
for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
|
||||||
|
E = UsesOfImpDefs.end();
|
||||||
|
I != E; ++I)
|
||||||
|
(*I)->setIsUndef();
|
||||||
|
|
||||||
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
|
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
|
||||||
// Remove kill flags from any memops that come before insertPos.
|
// Remove kill flags from any memops that come before insertPos.
|
||||||
if (Regs[i-memOpsBegin].second) {
|
if (Regs[i-memOpsBegin].second) {
|
||||||
|
40
test/CodeGen/ARM/vldm-liveness.ll
Normal file
40
test/CodeGen/ARM/vldm-liveness.ll
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
; RUN: llc -mtriple thumbv7-apple-ios -verify-machineinstrs -o - %s | FileCheck %s
|
||||||
|
|
||||||
|
; ARM load store optimizer was dealing with a sequence like:
|
||||||
|
; s1 = VLDRS [r0, 1], Q0<imp-def>
|
||||||
|
; s3 = VLDRS [r0, 2], Q0<imp-use,kill>, Q0<imp-def>
|
||||||
|
; s0 = VLDRS [r0, 0], Q0<imp-use,kill>, Q0<imp-def>
|
||||||
|
; s2 = VLDRS [r0, 4], Q0<imp-use,kill>, Q0<imp-def>
|
||||||
|
;
|
||||||
|
; It decided to combine the {s0, s1} loads into a single instruction in the
|
||||||
|
; third position. However, this leaves the instruction defining s3 with a stray
|
||||||
|
; imp-use of Q0, which is undefined.
|
||||||
|
;
|
||||||
|
; The verifier catches this, so this test just makes sure that appropriate
|
||||||
|
; liveness flags are added.
|
||||||
|
;
|
||||||
|
; I believe the change will be tested as long as the vldmia is not the first of
|
||||||
|
; the loads. Earlier optimisations may perturb the output over time, but
|
||||||
|
; fiddling the indices should be sufficient to restore the test.
|
||||||
|
|
||||||
|
define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) {
|
||||||
|
; CHECK-LABEL: foo:
|
||||||
|
; CHECK: vldr s3, [r0, #8]
|
||||||
|
; CHECK: vldmia r0, {s0, s1}
|
||||||
|
; CHECK: vldr s2, [r0, #16]
|
||||||
|
%off0 = getelementptr float* %ptr, i32 0
|
||||||
|
%val0 = load float* %off0
|
||||||
|
%off1 = getelementptr float* %ptr, i32 1
|
||||||
|
%val1 = load float* %off1
|
||||||
|
%off4 = getelementptr float* %ptr, i32 4
|
||||||
|
%val4 = load float* %off4
|
||||||
|
%off2 = getelementptr float* %ptr, i32 2
|
||||||
|
%val2 = load float* %off2
|
||||||
|
|
||||||
|
%vec1 = insertelement <4 x float> undef, float %val0, i32 0
|
||||||
|
%vec2 = insertelement <4 x float> %vec1, float %val1, i32 1
|
||||||
|
%vec3 = insertelement <4 x float> %vec2, float %val4, i32 2
|
||||||
|
%vec4 = insertelement <4 x float> %vec3, float %val2, i32 3
|
||||||
|
|
||||||
|
ret <4 x float> %vec4
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user