Look through PHIs to find additional register sources

- Teaches the ValueTracker in the PeepholeOptimizer to look through PHI
instructions.
- Add findNextSourceAndRewritePHI method to lookup into multiple sources
returnted by the ValueTracker and rewrite PHIs with new sources.

With these changes we can find more register sources and rewrite more
copies to allow coaslescing of bitcast instructions. Hence, we eliminate
unnecessary VR64 <-> GR64 copies in x86, but it could be extended to
other archs by marking "isBitcast" on target specific instructions. The
x86 example follows:

A:
  psllq %mm1, %mm0
  movd  %mm0, %r9
  jmp C

B:
  por %mm1, %mm0
  movd  %mm0, %r9
  jmp C

C:
  movd  %r9, %mm0
  pshufw  $238, %mm0, %mm0

Becomes:

A:
  psllq %mm1, %mm0
  jmp C

B:
  por %mm1, %mm0
  jmp C

C:
  pshufw  $238, %mm0, %mm0

Differential Revision: http://reviews.llvm.org/D11197

rdar://problem/20404526

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242295 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2015-07-15 15:35:23 +00:00
parent 7c72a2254c
commit b11d8102cf
4 changed files with 377 additions and 82 deletions

View File

@ -1270,6 +1270,33 @@ private:
unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
}; };
/// \brief Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair.
template<>
struct DenseMapInfo<TargetInstrInfo::RegSubRegPair> {
typedef DenseMapInfo<unsigned> RegInfo;
static inline TargetInstrInfo::RegSubRegPair getEmptyKey() {
return TargetInstrInfo::RegSubRegPair(RegInfo::getEmptyKey(),
RegInfo::getEmptyKey());
}
static inline TargetInstrInfo::RegSubRegPair getTombstoneKey() {
return TargetInstrInfo::RegSubRegPair(RegInfo::getTombstoneKey(),
RegInfo::getTombstoneKey());
}
/// \brief Reuse getHashValue implementation from
/// std::pair<unsigned, unsigned>.
static unsigned getHashValue(const TargetInstrInfo::RegSubRegPair &Val) {
std::pair<unsigned, unsigned> PairVal =
std::make_pair(Val.Reg, Val.SubReg);
return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
}
static bool isEqual(const TargetInstrInfo::RegSubRegPair &LHS,
const TargetInstrInfo::RegSubRegPair &RHS) {
return RegInfo::isEqual(LHS.Reg, RHS.Reg) &&
RegInfo::isEqual(LHS.SubReg, RHS.SubReg);
}
};
} // End llvm namespace } // End llvm namespace
#endif #endif

View File

@ -98,6 +98,12 @@ static cl::opt<bool>
DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
cl::desc("Disable advanced copy optimization")); cl::desc("Disable advanced copy optimization"));
// Limit the number of PHI instructions to process
// in PeepholeOptimizer::getNextSource.
static cl::opt<unsigned> RewritePHILimit(
"rewrite-phi-limit", cl::Hidden, cl::init(10),
cl::desc("Limit the length of PHI chains to lookup"));
STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded"); STATISTIC(NumImmFold, "Number of move immediate folded");
@ -132,6 +138,10 @@ namespace {
} }
} }
/// \brief Track Def -> Use info used for rewriting copies.
typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>
RewriteMapTy;
private: private:
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
@ -143,7 +153,8 @@ namespace {
bool optimizeCoalescableCopy(MachineInstr *MI); bool optimizeCoalescableCopy(MachineInstr *MI);
bool optimizeUncoalescableCopy(MachineInstr *MI, bool optimizeUncoalescableCopy(MachineInstr *MI,
SmallPtrSetImpl<MachineInstr *> &LocalMIs); SmallPtrSetImpl<MachineInstr *> &LocalMIs);
bool findNextSource(unsigned &Reg, unsigned &SubReg); bool findNextSource(unsigned Reg, unsigned SubReg,
RewriteMapTy &RewriteMap);
bool isMoveImmediate(MachineInstr *MI, bool isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs, SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs); DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
@ -220,6 +231,20 @@ namespace {
assert(Idx < getNumSources() && "SubReg source out of index"); assert(Idx < getNumSources() && "SubReg source out of index");
return RegSrcs[Idx].SubReg; return RegSrcs[Idx].SubReg;
} }
bool operator==(const ValueTrackerResult &Other) {
if (Other.getInst() != getInst())
return false;
if (Other.getNumSources() != getNumSources())
return false;
for (int i = 0, e = Other.getNumSources(); i != e; ++i)
if (Other.getSrcReg(i) != getSrcReg(i) ||
Other.getSrcSubReg(i) != getSrcSubReg(i))
return false;
return true;
}
}; };
/// \brief Helper class to track the possible sources of a value defined by /// \brief Helper class to track the possible sources of a value defined by
@ -281,6 +306,8 @@ namespace {
/// \brief Specialized version of getNextSource for SubregToReg /// \brief Specialized version of getNextSource for SubregToReg
/// instructions. /// instructions.
ValueTrackerResult getNextSourceFromSubregToReg(); ValueTrackerResult getNextSourceFromSubregToReg();
/// \brief Specialized version of getNextSource for PHI instructions.
ValueTrackerResult getNextSourceFromPHI();
public: public:
/// \brief Create a ValueTracker instance for the value defined by \p Reg. /// \brief Create a ValueTracker instance for the value defined by \p Reg.
@ -583,58 +610,133 @@ static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
/// \brief Try to find the next source that share the same register file /// \brief Try to find the next source that share the same register file
/// for the value defined by \p Reg and \p SubReg. /// for the value defined by \p Reg and \p SubReg.
/// When true is returned, \p Reg and \p SubReg are updated with the /// When true is returned, the \p RewriteMap can be used by the client to
/// register number and sub-register index of the new source. /// retrieve all Def -> Use along the way up to the next source. Any found
/// Use that is not itself a key for another entry, is the next source to
/// use. During the search for the next source, multiple sources can be found
/// given multiple incoming sources of a PHI instruction. In this case, we
/// look in each PHI source for the next source; all found next sources must
/// share the same register file as \p Reg and \p SubReg. The client should
/// then be capable to rewrite all intermediate PHIs to get the next source.
/// \return False if no alternative sources are available. True otherwise. /// \return False if no alternative sources are available. True otherwise.
bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) { bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,
RewriteMapTy &RewriteMap) {
// Do not try to find a new source for a physical register. // Do not try to find a new source for a physical register.
// So far we do not have any motivating example for doing that. // So far we do not have any motivating example for doing that.
// Thus, instead of maintaining untested code, we will revisit that if // Thus, instead of maintaining untested code, we will revisit that if
// that changes at some point. // that changes at some point.
if (TargetRegisterInfo::isPhysicalRegister(Reg)) if (TargetRegisterInfo::isPhysicalRegister(Reg))
return false; return false;
const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
unsigned DefSubReg = SubReg;
unsigned Src; SmallVector<TargetInstrInfo::RegSubRegPair, 4> SrcToLook;
unsigned SrcSubReg; TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg);
SrcToLook.push_back(CurSrcPair);
bool ShouldRewrite = false; bool ShouldRewrite = false;
unsigned PHILimit = RewritePHILimit;
while (!SrcToLook.empty() && PHILimit) {
TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val();
CurSrcPair = Pair;
ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI,
!DisableAdvCopyOpt, TII);
ValueTrackerResult Res;
do {
// Follow the chain of copies until we reach the top of the use-def chain // Follow the chain of copies until we reach the top of the use-def chain
// or find a more suitable source. // or find a more suitable source.
ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII); Res = ValTracker.getNextSource();
do {
ValueTrackerResult Res = ValTracker.getNextSource();
if (!Res.isValid()) if (!Res.isValid())
break; break;
Src = Res.getSrcReg(0);
SrcSubReg = Res.getSrcSubReg(0);
// Do not extend the live-ranges of physical registers as they add // Insert the Def -> Use entry for the recently found source.
// constraints to the register allocator. ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
// Moreover, if we want to extend the live-range of a physical register, if (CurSrcRes.isValid()) {
// unlike SSA virtual register, we will have to check that they are not assert(CurSrcRes == Res && "ValueTrackerResult found must match");
// redefine before the related use.
if (TargetRegisterInfo::isPhysicalRegister(Src))
break; break;
}
RewriteMap.insert(std::make_pair(CurSrcPair, Res));
const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); // ValueTrackerResult usually have one source unless it's the result from
// a PHI instruction. Add the found PHI edges to be looked up further.
unsigned NumSrcs = Res.getNumSources();
if (NumSrcs > 1) {
PHILimit--;
for (unsigned i = 0; i < NumSrcs; ++i)
SrcToLook.push_back(TargetInstrInfo::RegSubRegPair(
Res.getSrcReg(i), Res.getSrcSubReg(i)));
break;
}
// If this source does not incur a cross register bank copy, use it. CurSrcPair.Reg = Res.getSrcReg(0);
ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC, CurSrcPair.SubReg = Res.getSrcSubReg(0);
SrcSubReg); // Do not extend the live-ranges of physical registers as they add
} while (!ShouldRewrite); // constraints to the register allocator. Moreover, if we want to extend
// the live-range of a physical register, unlike SSA virtual register,
// If we did not find a more suitable source, there is nothing to optimize. // we will have to check that they aren't redefine before the related use.
if (!ShouldRewrite || Src == Reg) if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
return false;
const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
// If this source does not incur a cross register bank copy, use it.
ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, SubReg, SrcRC,
CurSrcPair.SubReg);
} while (!ShouldRewrite);
// Continue looking for new sources...
if (Res.isValid())
continue;
if (!PHILimit) {
DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
return false;
}
// Do not continue searching for a new source if the there's at least
// one use-def which cannot be rewritten.
if (!ShouldRewrite)
return false;
}
// If we did not find a more suitable source, there is nothing to optimize.
if (CurSrcPair.Reg == Reg)
return false; return false;
Reg = Src;
SubReg = SrcSubReg;
return true; return true;
} }
/// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are
/// guaranteed to have the same register class. This is necessary whenever we
/// successfully traverse a PHI instruction and find suitable sources coming
/// from its edges. By inserting a new PHI, we provide a rewritten PHI def
/// suitable to be used in a new COPY instruction.
MachineInstr *
insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
const SmallVectorImpl<TargetInstrInfo::RegSubRegPair> &SrcRegs,
MachineInstr *OrigPHI) {
assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg);
unsigned NewVR = MRI->createVirtualRegister(NewRC);
MachineBasicBlock *MBB = OrigPHI->getParent();
MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(),
TII->get(TargetOpcode::PHI), NewVR);
unsigned MBBOpIdx = 2;
for (auto RegPair : SrcRegs) {
MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
MIB.addMBB(OrigPHI->getOperand(MBBOpIdx).getMBB());
// Since we're extended the lifetime of RegPair.Reg, clear the
// kill flags to account for that and make RegPair.Reg reaches
// the new PHI.
MRI->clearKillFlags(RegPair.Reg);
MBBOpIdx += 2;
}
return MIB;
}
namespace { namespace {
/// \brief Helper class to rewrite the arguments of a copy-like instruction. /// \brief Helper class to rewrite the arguments of a copy-like instruction.
class CopyRewriter { class CopyRewriter {
@ -708,16 +810,65 @@ public:
return true; return true;
} }
/// \brief Rewrite the current source with \p NewSrcReg and \p NewSecSubReg /// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find
/// by creating a new COPY instruction. \p DefReg and \p DefSubReg contain the /// the new source to use for rewrite. If \p HandleMultipleSources is true and
/// definition to be rewritten from the original copylike instruction. /// multiple sources for a given \p Def are found along the way, we found a
/// \return The new COPY if the rewriting was possible, nullptr otherwise. /// PHI instructions that needs to be rewritten.
/// This is needed to handle Uncoalescable copies, since they are copy /// TODO: HandleMultipleSources should be removed once we test PHI handling
/// like instructions that aren't recognized by the register allocator. /// with coalescable copies.
virtual MachineInstr *RewriteCurrentSource(unsigned DefReg, TargetInstrInfo::RegSubRegPair
unsigned DefSubReg, getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
unsigned NewSrcReg, TargetInstrInfo::RegSubRegPair Def,
unsigned NewSrcSubReg) { PeepholeOptimizer::RewriteMapTy &RewriteMap,
bool HandleMultipleSources = true) {
TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
do {
ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
// If there are no entries on the map, LookupSrc is the new source.
if (!Res.isValid())
return LookupSrc;
// There's only one source for this definition, keep searching...
unsigned NumSrcs = Res.getNumSources();
if (NumSrcs == 1) {
LookupSrc.Reg = Res.getSrcReg(0);
LookupSrc.SubReg = Res.getSrcSubReg(0);
continue;
}
if (!HandleMultipleSources)
break;
// Multiple sources, recurse into each source to find a new source
// for it. Then, rewrite the PHI accordingly to its new edges.
SmallVector<TargetInstrInfo::RegSubRegPair, 4> NewPHISrcs;
for (unsigned i = 0; i < NumSrcs; ++i) {
TargetInstrInfo::RegSubRegPair PHISrc(Res.getSrcReg(i),
Res.getSrcSubReg(i));
NewPHISrcs.push_back(
getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
}
// Build the new PHI node and return its def register as the new source.
MachineInstr *OrigPHI = const_cast<MachineInstr *>(Res.getInst());
MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI);
const MachineOperand &MODef = NewPHI->getOperand(0);
return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg());
} while (1);
return TargetInstrInfo::RegSubRegPair(0, 0);
}
/// \brief Rewrite the source found through \p Def, by using the \p RewriteMap
/// and create a new COPY instruction. More info about RewriteMap in
/// PeepholeOptimizer::findNextSource. Right now this is only used to handle
/// Uncoalescable copies, since they are copy like instructions that aren't
/// recognized by the register allocator.
virtual MachineInstr *
RewriteSource(TargetInstrInfo::RegSubRegPair Def,
PeepholeOptimizer::RewriteMapTy &RewriteMap) {
return nullptr; return nullptr;
} }
}; };
@ -765,31 +916,41 @@ public:
return true; return true;
} }
/// \brief Rewrite the current source with \p NewSrcReg and \p NewSrcSubReg /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap
/// by creating a new COPY instruction. \p DefReg and \p DefSubReg contain the /// and create a new COPY instruction. More info about RewriteMap in
/// definition to be rewritten from the original copylike instruction. /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
/// \return The new COPY if the rewriting was possible, nullptr otherwise. /// Uncoalescable copies, since they are copy like instructions that aren't
MachineInstr *RewriteCurrentSource(unsigned DefReg, unsigned DefSubReg, /// recognized by the register allocator.
unsigned NewSrcReg, MachineInstr *
unsigned NewSrcSubReg) override { RewriteSource(TargetInstrInfo::RegSubRegPair Def,
assert(!TargetRegisterInfo::isPhysicalRegister(DefReg) && PeepholeOptimizer::RewriteMapTy &RewriteMap) override {
assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
"We do not rewrite physical registers"); "We do not rewrite physical registers");
const TargetRegisterClass *DefRC = MRI.getRegClass(DefReg); // Find the new source to use in the COPY rewrite.
TargetInstrInfo::RegSubRegPair NewSrc =
getNewSource(&MRI, &TII, Def, RewriteMap);
// Insert the COPY.
const TargetRegisterClass *DefRC = MRI.getRegClass(Def.Reg);
unsigned NewVR = MRI.createVirtualRegister(DefRC); unsigned NewVR = MRI.createVirtualRegister(DefRC);
MachineInstr *NewCopy = MachineInstr *NewCopy =
BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
TII.get(TargetOpcode::COPY), NewVR) TII.get(TargetOpcode::COPY), NewVR)
.addReg(NewSrcReg, 0, NewSrcSubReg); .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
NewCopy->getOperand(0).setSubReg(DefSubReg); NewCopy->getOperand(0).setSubReg(Def.SubReg);
if (DefSubReg) if (Def.SubReg)
NewCopy->getOperand(0).setIsUndef(); NewCopy->getOperand(0).setIsUndef();
MRI.replaceRegWith(DefReg, NewVR); MRI.replaceRegWith(Def.Reg, NewVR);
MRI.clearKillFlags(NewVR); MRI.clearKillFlags(NewVR);
// We extended the lifetime of NewSrc.Reg, clear the kill flags to
// account for that.
MRI.clearKillFlags(NewSrc.Reg);
return NewCopy; return NewCopy;
} }
}; };
@ -1033,16 +1194,25 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg;
while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg,
TrackSubReg)) { TrackSubReg)) {
unsigned NewSrc = TrackReg; // Keep track of PHI nodes and its incoming edges when looking for sources.
unsigned NewSubReg = TrackSubReg; RewriteMapTy RewriteMap;
// Try to find a more suitable source. If we failed to do so, or get the // Try to find a more suitable source. If we failed to do so, or get the
// actual source, move to the next source. // actual source, move to the next source.
if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc) if (!findNextSource(TrackReg, TrackSubReg, RewriteMap))
continue; continue;
// Get the new source to rewrite. TODO: Only enable handling of multiple
// sources (PHIs) once we have a motivating example and testcases for it.
TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg);
TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource(
MRI, TII, TrackPair, RewriteMap, false /* multiple sources */);
if (SrcReg == NewSrc.Reg)
continue;
// Rewrite source. // Rewrite source.
if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) { if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
// We may have extended the live-range of NewSrc, account for that. // We may have extended the live-range of NewSrc, account for that.
MRI->clearKillFlags(NewSrc); MRI->clearKillFlags(NewSrc.Reg);
Changed = true; Changed = true;
} }
} }
@ -1071,9 +1241,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); assert(MI && isUncoalescableCopy(*MI) && "Invalid argument");
// Check if we can rewrite all the values defined by this instruction. // Check if we can rewrite all the values defined by this instruction.
SmallVector< SmallVector<TargetInstrInfo::RegSubRegPair, 4> RewritePairs;
std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>,
4> RewritePairs;
// Get the right rewriter for the current copy. // Get the right rewriter for the current copy.
std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI));
// If none exists, bails out. // If none exists, bails out.
@ -1083,39 +1251,32 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
// Rewrite each rewritable source by generating new COPYs. This works // Rewrite each rewritable source by generating new COPYs. This works
// differently from optimizeCoalescableCopy since it first makes sure that all // differently from optimizeCoalescableCopy since it first makes sure that all
// definitions can be rewritten. // definitions can be rewritten.
unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; RewriteMapTy RewriteMap;
while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, unsigned Reg, SubReg, CopyDefReg, CopyDefSubReg;
TrackSubReg)) { while (CpyRewriter->getNextRewritableSource(Reg, SubReg, CopyDefReg,
CopyDefSubReg)) {
// If a physical register is here, this is probably for a good reason. // If a physical register is here, this is probably for a good reason.
// Do not rewrite that. // Do not rewrite that.
if (TargetRegisterInfo::isPhysicalRegister(TrackReg)) if (TargetRegisterInfo::isPhysicalRegister(CopyDefReg))
return false; return false;
// If we do not know how to rewrite this definition, there is no point // If we do not know how to rewrite this definition, there is no point
// in trying to kill this instruction. // in trying to kill this instruction.
TargetInstrInfo::RegSubRegPair Def(TrackReg, TrackSubReg); TargetInstrInfo::RegSubRegPair Def(CopyDefReg, CopyDefSubReg);
TargetInstrInfo::RegSubRegPair Src = Def; if (!findNextSource(Def.Reg, Def.SubReg, RewriteMap))
if (!findNextSource(Src.Reg, Src.SubReg))
return false; return false;
RewritePairs.push_back(std::make_pair(Def, Src)); RewritePairs.push_back(Def);
} }
// The change is possible for all defs, do it. // The change is possible for all defs, do it.
for (const auto &PairDefSrc : RewritePairs) { for (const auto &Def : RewritePairs) {
const auto &Def = PairDefSrc.first;
const auto &Src = PairDefSrc.second;
// Rewrite the "copy" in a way the register coalescer understands. // Rewrite the "copy" in a way the register coalescer understands.
MachineInstr *NewCopy = CpyRewriter->RewriteCurrentSource( MachineInstr *NewCopy = CpyRewriter->RewriteSource(Def, RewriteMap);
Def.Reg, Def.SubReg, Src.Reg, Src.SubReg);
assert(NewCopy && "Should be able to always generate a new copy"); assert(NewCopy && "Should be able to always generate a new copy");
// We extended the lifetime of Src and clear the kill flags to
// account for that.
MRI->clearKillFlags(Src.Reg);
LocalMIs.insert(NewCopy); LocalMIs.insert(NewCopy);
} }
// MI is now dead. // MI is now dead.
MI->eraseFromParent(); MI->eraseFromParent();
++NumUncoalescableCopies; ++NumUncoalescableCopies;
@ -1523,6 +1684,26 @@ ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
Def->getOperand(3).getImm()); Def->getOperand(3).getImm());
} }
/// \brief Explore each PHI incoming operand and return its sources
ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
assert(Def->isPHI() && "Invalid definition");
ValueTrackerResult Res;
// If we look for a different subreg, bails as we do not
// support composing subreg yet.
if (Def->getOperand(0).getSubReg() != DefSubReg)
return ValueTrackerResult();
// Return all register sources for PHI instructions.
for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
auto &MO = Def->getOperand(i);
assert(MO.isReg() && "Invalid PHI instruction");
Res.addSource(MO.getReg(), MO.getSubReg());
}
return Res;
}
ValueTrackerResult ValueTracker::getNextSourceImpl() { ValueTrackerResult ValueTracker::getNextSourceImpl() {
assert(Def && "This method needs a valid definition"); assert(Def && "This method needs a valid definition");
@ -1545,6 +1726,8 @@ ValueTrackerResult ValueTracker::getNextSourceImpl() {
return getNextSourceFromExtractSubreg(); return getNextSourceFromExtractSubreg();
if (Def->isSubregToReg()) if (Def->isSubregToReg())
return getNextSourceFromSubregToReg(); return getNextSourceFromSubregToReg();
if (Def->isPHI())
return getNextSourceFromPHI();
return ValueTrackerResult(); return ValueTrackerResult();
} }

View File

@ -249,6 +249,7 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
(MMX_X86movd2w (x86mmx VR64:$src)))], (MMX_X86movd2w (x86mmx VR64:$src)))],
IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>; IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>;
let isBitcast = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movd\t{$src, $dst|$dst, $src}", "movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (bitconvert GR64:$src))], [(set VR64:$dst, (bitconvert GR64:$src))],
@ -262,7 +263,7 @@ def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst),
// These are 64 bit moves, but since the OS X assembler doesn't // These are 64 bit moves, but since the OS X assembler doesn't
// recognize a register-register movq, we write them as // recognize a register-register movq, we write them as
// movd. // movd.
let SchedRW = [WriteMove] in { let SchedRW = [WriteMove], isBitcast = 1 in {
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src), (outs GR64:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}", "movd\t{$src, $dst|$dst, $src}",

View File

@ -0,0 +1,84 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s
%SA = type <{ %union.anon, i32, [4 x i8], i8*, i8*, i8*, i32, [4 x i8] }>
%union.anon = type { <1 x i64> }
; Check that extra movd (copy) instructions aren't generated.
define i32 @test(%SA* %pSA, i16* %A, i32 %B, i32 %C, i32 %D, i8* %E) {
entry:
; CHECK-LABEL: test
; CHECK: # BB#0:
; CHECK-NEXT: pshufw
; CHECK-NEXT: movd
; CHECK-NOT: movd
; CHECK-NEXT: testl
%shl = shl i32 1, %B
%shl1 = shl i32 %C, %B
%shl2 = shl i32 1, %D
%v = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 0, i32 0
%v0 = load <1 x i64>, <1 x i64>* %v, align 8
%SA0 = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 1
%v1 = load i32, i32* %SA0, align 4
%SA1 = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 3
%v2 = load i8*, i8** %SA1, align 8
%SA2 = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 4
%v3 = load i8*, i8** %SA2, align 8
%v4 = bitcast <1 x i64> %v0 to <4 x i16>
%v5 = bitcast <4 x i16> %v4 to x86_mmx
%v6 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v5, i8 -18)
%v7 = bitcast x86_mmx %v6 to <4 x i16>
%v8 = bitcast <4 x i16> %v7 to <1 x i64>
%v9 = extractelement <1 x i64> %v8, i32 0
%v10 = bitcast i64 %v9 to <2 x i32>
%v11 = extractelement <2 x i32> %v10, i32 0
%cmp = icmp eq i32 %v11, 0
br i1 %cmp, label %if.A, label %if.B
if.A:
; CHECK: %if.A
; CHECK-NEXT: movd
; CHECK-NEXT: psllq
%pa = phi <1 x i64> [ %v8, %entry ], [ %vx, %if.C ]
%v17 = extractelement <1 x i64> %pa, i32 0
%v18 = bitcast i64 %v17 to x86_mmx
%v19 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %v18, i32 %B) #2
%v20 = bitcast x86_mmx %v19 to i64
%v21 = insertelement <1 x i64> undef, i64 %v20, i32 0
%cmp3 = icmp eq i64 %v20, 0
br i1 %cmp3, label %if.C, label %merge
if.B:
%v34 = bitcast <1 x i64> %v8 to <4 x i16>
%v35 = bitcast <4 x i16> %v34 to x86_mmx
%v36 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v35, i8 -18)
%v37 = bitcast x86_mmx %v36 to <4 x i16>
%v38 = bitcast <4 x i16> %v37 to <1 x i64>
br label %if.C
if.C:
%vx = phi <1 x i64> [ %v21, %if.A ], [ %v38, %if.B ]
%cvt = bitcast <1 x i64> %vx to <2 x i32>
%ex = extractelement <2 x i32> %cvt, i32 0
%cmp2 = icmp eq i32 %ex, 0
br i1 %cmp2, label %if.A, label %merge
merge:
; CHECK: %merge
; CHECK-NOT: movd
; CHECK-NEXT: pshufw
%vy = phi <1 x i64> [ %v21, %if.A ], [ %vx, %if.C ]
%v130 = bitcast <1 x i64> %vy to <4 x i16>
%v131 = bitcast <4 x i16> %v130 to x86_mmx
%v132 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v131, i8 -18)
%v133 = bitcast x86_mmx %v132 to <4 x i16>
%v134 = bitcast <4 x i16> %v133 to <1 x i64>
%v135 = extractelement <1 x i64> %v134, i32 0
%v136 = bitcast i64 %v135 to <2 x i32>
%v137 = extractelement <2 x i32> %v136, i32 0
ret i32 %v137
}
declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8)
declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)