ARM: correct liveness flags during ARMLoadStoreOpt

When we had a sequence like:

    s1 = VLDRS [r0, 1], Q0<imp-def>
    s3 = VLDRS [r0, 2], Q0<imp-use,kill>, Q0<imp-def>
    s0 = VLDRS [r0, 0], Q0<imp-use,kill>, Q0<imp-def>
    s2 = VLDRS [r0, 4], Q0<imp-use,kill>, Q0<imp-def>

we were gathering the {s0, s1} loads below the s3 load. This is fine,
but confused the verifier since now the s3 load had Q0<imp-use> with
no definition above it.

This should mark such uses <undef> as well. The liveness structure at
the beginning and end of the block is unaffected, and the true sN
definitions should prevent any dodgy reorderings being introduced
elsewhere.

rdar://problem/15124449

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192344 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2013-10-10 09:28:20 +00:00
parent 15de63cfde
commit acd79ce0ad
2 changed files with 117 additions and 0 deletions

View File

@ -90,6 +90,10 @@ namespace {
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
typedef MemOpQueue::iterator MemOpQueueIter;
void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
const MemOpQueue &MemOps, unsigned DefReg,
unsigned RangeBegin, unsigned RangeEnd);
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
int Offset, unsigned Base, bool BaseKill, int Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
@ -360,6 +364,62 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return true;
}
/// \brief Find all instructions using a given imp-def within a range.
///
/// We are trying to combine a range of instructions, one of which (located at
/// position RangeBegin) implicitly defines a register. The final LDM/STM will
/// be placed at RangeEnd, and so any uses of this definition between RangeStart
/// and RangeEnd must be modified to use an undefined value.
///
/// The live range continues until we find a second definition or one of the
/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
/// we must consider all uses and decide which are relevant in a second pass.
void ARMLoadStoreOpt::findUsesOfImpDef(
SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
std::map<unsigned, MachineOperand *> Uses;
unsigned LastLivePos = RangeEnd;
// First we find all uses of this register with Position between RangeBegin
// and RangeEnd, any or all of these could be uses of a definition at
// RangeBegin. We also record the latest position a definition at RangeBegin
// would be considered live.
for (unsigned i = 0; i < MemOps.size(); ++i) {
MachineInstr &MI = *MemOps[i].MBBI;
unsigned MIPosition = MemOps[i].Position;
if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
continue;
// If this instruction defines the register, then any later use will be of
// that definition rather than ours.
if (MI.definesRegister(DefReg))
LastLivePos = std::min(LastLivePos, MIPosition);
MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
if (!UseOp)
continue;
// If this instruction kills the register then (assuming liveness is
// correct when we start) we don't need to think about anything after here.
if (UseOp->isKill())
LastLivePos = std::min(LastLivePos, MIPosition);
Uses[MIPosition] = UseOp;
}
// Now we traverse the list of all uses, and append the ones that actually use
// our definition to the requested list.
for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
E = Uses.end();
I != E; ++I) {
// List is sorted by position so once we've found one out of range there
// will be no more to consider.
if (I->first > LastLivePos)
break;
UsesOfImpDefs.push_back(I->second);
}
}
// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
// success.
void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
@ -392,6 +452,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
SmallVector<std::pair<unsigned, bool>, 8> Regs;
SmallVector<unsigned, 8> ImpDefs;
SmallVector<MachineOperand *, 8> UsesOfImpDefs;
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
unsigned Reg = memOps[i].Reg;
// If we are inserting the merged operation after an operation that
@ -406,6 +467,12 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
unsigned DefReg = MO->getReg();
if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
ImpDefs.push_back(DefReg);
// There may be other uses of the definition between this instruction and
// the eventual LDM/STM position. These should be marked undef if the
// merge takes place.
findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
insertPos);
}
}
@ -418,6 +485,16 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
// Merge succeeded, update records.
Merges.push_back(prior(Loc));
// In gathering loads together, we may have moved the imp-def of a register
// past one of its uses. This is OK, since we know better than the rest of
// LLVM what's OK with ARM loads and stores; but we still have to adjust the
// affected uses.
for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
E = UsesOfImpDefs.end();
I != E; ++I)
(*I)->setIsUndef();
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
// Remove kill flags from any memops that come before insertPos.
if (Regs[i-memOpsBegin].second) {

View File

@ -0,0 +1,40 @@
; RUN: llc -mtriple thumbv7-apple-ios -verify-machineinstrs -o - %s | FileCheck %s
; ARM load store optimizer was dealing with a sequence like:
; s1 = VLDRS [r0, 1], Q0<imp-def>
; s3 = VLDRS [r0, 2], Q0<imp-use,kill>, Q0<imp-def>
; s0 = VLDRS [r0, 0], Q0<imp-use,kill>, Q0<imp-def>
; s2 = VLDRS [r0, 4], Q0<imp-use,kill>, Q0<imp-def>
;
; It decided to combine the {s0, s1} loads into a single instruction in the
; third position. However, this leaves the instruction defining s3 with a stray
; imp-use of Q0, which is undefined.
;
; The verifier catches this, so this test just makes sure that appropriate
; liveness flags are added.
;
; I believe the change will be tested as long as the vldmia is not the first of
; the loads. Earlier optimisations may perturb the output over time, but
; fiddling the indices should be sufficient to restore the test.
define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) {
; CHECK-LABEL: foo:
; CHECK: vldr s3, [r0, #8]
; CHECK: vldmia r0, {s0, s1}
; CHECK: vldr s2, [r0, #16]
%off0 = getelementptr float* %ptr, i32 0
%val0 = load float* %off0
%off1 = getelementptr float* %ptr, i32 1
%val1 = load float* %off1
%off4 = getelementptr float* %ptr, i32 4
%val4 = load float* %off4
%off2 = getelementptr float* %ptr, i32 2
%val2 = load float* %off2
%vec1 = insertelement <4 x float> undef, float %val0, i32 0
%vec2 = insertelement <4 x float> %vec1, float %val1, i32 1
%vec3 = insertelement <4 x float> %vec2, float %val4, i32 2
%vec4 = insertelement <4 x float> %vec3, float %val2, i32 3
ret <4 x float> %vec4
}