From acd79ce0ada50c7a437757d188f410d67030fbb9 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 10 Oct 2013 09:28:20 +0000 Subject: [PATCH] ARM: correct liveness flags during ARMLoadStoreOpt When we had a sequence like: s1 = VLDRS [r0, 1], Q0 s3 = VLDRS [r0, 2], Q0, Q0 s0 = VLDRS [r0, 0], Q0, Q0 s2 = VLDRS [r0, 4], Q0, Q0 we were gathering the {s0, s1} loads below the s3 load. This is fine, but confused the verifier since now the s3 load had Q0 with no definition above it. This should mark such uses as well. The liveness structure at the beginning and end of the block is unaffected, and the true sN definitions should prevent any dodgy reorderings being introduced elsewhere. rdar://problem/15124449 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192344 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 77 ++++++++++++++++++++++++ test/CodeGen/ARM/vldm-liveness.ll | 40 ++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 test/CodeGen/ARM/vldm-liveness.ll diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 237adb9b22e..61596d54b69 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -90,6 +90,10 @@ namespace { typedef SmallVector MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; + void findUsesOfImpDef(SmallVectorImpl &UsesOfImpDefs, + const MemOpQueue &MemOps, unsigned DefReg, + unsigned RangeBegin, unsigned RangeEnd); + bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, @@ -360,6 +364,62 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return true; } +/// \brief Find all instructions using a given imp-def within a range. +/// +/// We are trying to combine a range of instructions, one of which (located at +/// position RangeBegin) implicitly defines a register. The final LDM/STM will +/// be placed at RangeEnd, and so any uses of this definition between RangeStart +/// and RangeEnd must be modified to use an undefined value. +/// +/// The live range continues until we find a second definition or one of the +/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so +/// we must consider all uses and decide which are relevant in a second pass. +void ARMLoadStoreOpt::findUsesOfImpDef( + SmallVectorImpl &UsesOfImpDefs, const MemOpQueue &MemOps, + unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { + std::map Uses; + unsigned LastLivePos = RangeEnd; + + // First we find all uses of this register with Position between RangeBegin + // and RangeEnd, any or all of these could be uses of a definition at + // RangeBegin. We also record the latest position a definition at RangeBegin + // would be considered live. + for (unsigned i = 0; i < MemOps.size(); ++i) { + MachineInstr &MI = *MemOps[i].MBBI; + unsigned MIPosition = MemOps[i].Position; + if (MIPosition <= RangeBegin || MIPosition > RangeEnd) + continue; + + // If this instruction defines the register, then any later use will be of + // that definition rather than ours. + if (MI.definesRegister(DefReg)) + LastLivePos = std::min(LastLivePos, MIPosition); + + MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); + if (!UseOp) + continue; + + // If this instruction kills the register then (assuming liveness is + // correct when we start) we don't need to think about anything after here. + if (UseOp->isKill()) + LastLivePos = std::min(LastLivePos, MIPosition); + + Uses[MIPosition] = UseOp; + } + + // Now we traverse the list of all uses, and append the ones that actually use + // our definition to the requested list. + for (std::map::iterator I = Uses.begin(), + E = Uses.end(); + I != E; ++I) { + // List is sorted by position so once we've found one out of range there + // will be no more to consider. + if (I->first > LastLivePos) + break; + UsesOfImpDefs.push_back(I->second); + } +} + // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on // success. void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, @@ -392,6 +452,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, SmallVector, 8> Regs; SmallVector ImpDefs; + SmallVector UsesOfImpDefs; for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { unsigned Reg = memOps[i].Reg; // If we are inserting the merged operation after an operation that @@ -406,6 +467,12 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, unsigned DefReg = MO->getReg(); if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) ImpDefs.push_back(DefReg); + + // There may be other uses of the definition between this instruction and + // the eventual LDM/STM position. These should be marked undef if the + // merge takes place. + findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, + insertPos); } } @@ -418,6 +485,16 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, // Merge succeeded, update records. Merges.push_back(prior(Loc)); + + // In gathering loads together, we may have moved the imp-def of a register + // past one of its uses. This is OK, since we know better than the rest of + // LLVM what's OK with ARM loads and stores; but we still have to adjust the + // affected uses. + for (SmallVectorImpl::iterator I = UsesOfImpDefs.begin(), + E = UsesOfImpDefs.end(); + I != E; ++I) + (*I)->setIsUndef(); + for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { // Remove kill flags from any memops that come before insertPos. if (Regs[i-memOpsBegin].second) { diff --git a/test/CodeGen/ARM/vldm-liveness.ll b/test/CodeGen/ARM/vldm-liveness.ll new file mode 100644 index 00000000000..751f447077b --- /dev/null +++ b/test/CodeGen/ARM/vldm-liveness.ll @@ -0,0 +1,40 @@ +; RUN: llc -mtriple thumbv7-apple-ios -verify-machineinstrs -o - %s | FileCheck %s + +; ARM load store optimizer was dealing with a sequence like: +; s1 = VLDRS [r0, 1], Q0 +; s3 = VLDRS [r0, 2], Q0, Q0 +; s0 = VLDRS [r0, 0], Q0, Q0 +; s2 = VLDRS [r0, 4], Q0, Q0 +; +; It decided to combine the {s0, s1} loads into a single instruction in the +; third position. However, this leaves the instruction defining s3 with a stray +; imp-use of Q0, which is undefined. +; +; The verifier catches this, so this test just makes sure that appropriate +; liveness flags are added. +; +; I believe the change will be tested as long as the vldmia is not the first of +; the loads. Earlier optimisations may perturb the output over time, but +; fiddling the indices should be sufficient to restore the test. + +define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) { +; CHECK-LABEL: foo: +; CHECK: vldr s3, [r0, #8] +; CHECK: vldmia r0, {s0, s1} +; CHECK: vldr s2, [r0, #16] + %off0 = getelementptr float* %ptr, i32 0 + %val0 = load float* %off0 + %off1 = getelementptr float* %ptr, i32 1 + %val1 = load float* %off1 + %off4 = getelementptr float* %ptr, i32 4 + %val4 = load float* %off4 + %off2 = getelementptr float* %ptr, i32 2 + %val2 = load float* %off2 + + %vec1 = insertelement <4 x float> undef, float %val0, i32 0 + %vec2 = insertelement <4 x float> %vec1, float %val1, i32 1 + %vec3 = insertelement <4 x float> %vec2, float %val4, i32 2 + %vec4 = insertelement <4 x float> %vec3, float %val2, i32 3 + + ret <4 x float> %vec4 +}