From 6fed9cabfd131090d338df2b91ae1821607aaa3d Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Wed, 10 Dec 2014 01:12:00 +0000 Subject: [PATCH] Add function that translates subregister lane masks to other subregs. This works like the composeSubRegisterIndices() function but transforms a subregister lane mask instead of a subregister index. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223874 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetRegisterInfo.h | 15 ++++ utils/TableGen/CodeGenRegisters.cpp | 64 ++++++++++++++++ utils/TableGen/CodeGenRegisters.h | 15 ++++ utils/TableGen/RegisterInfoEmitter.cpp | 97 +++++++++++++++++++++++- 4 files changed, 190 insertions(+), 1 deletion(-) diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index ad4fdbe3958..d5c048e010e 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -510,6 +510,15 @@ public: return composeSubRegIndicesImpl(a, b); } + /// Transforms a LaneMask computed for one subregister to the lanemask that + /// would have been computed when composing the subsubregisters with IdxA + /// first. @sa composeSubRegIndices() + unsigned composeSubRegIndexLaneMask(unsigned IdxA, unsigned LaneMask) const { + if (!IdxA) + return LaneMask; + return composeSubRegIndexLaneMaskImpl(IdxA, LaneMask); + } + /// Debugging helper: dump register in human readable form to dbgs() stream. static void dumpReg(unsigned Reg, unsigned SubRegIndex = 0, const TargetRegisterInfo* TRI = nullptr); @@ -520,6 +529,12 @@ protected: llvm_unreachable("Target has no sub-registers"); } + /// Overridden by TableGen in targets that have sub-registers. + virtual unsigned + composeSubRegIndexLaneMaskImpl(unsigned, unsigned) const { + llvm_unreachable("Target has no sub-registers"); + } + public: /// getCommonSuperRegClass - Find a common super-register class if it exists. /// diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp index fdfdba4f202..f37c3aec1ca 100644 --- a/utils/TableGen/CodeGenRegisters.cpp +++ b/utils/TableGen/CodeGenRegisters.cpp @@ -1192,6 +1192,70 @@ void CodeGenRegBank::computeSubRegLaneMasks() { } } + // Compute transformation sequences for composeSubRegIndexLaneMask. The idea + // here is that for each possible target subregister we look at the leafs + // in the subregister graph that compose for this target and create + // transformation sequences for the lanemasks. Each step in the sequence + // consists of a bitmask and a bitrotate operation. As the rotation amounts + // are usually the same for many subregisters we can easily combine the steps + // by combining the masks. + for (const auto &Idx : SubRegIndices) { + const auto &Composites = Idx.getComposites(); + auto &LaneTransforms = Idx.CompositionLaneMaskTransform; + // Go through all leaf subregisters and find the ones that compose with Idx. + // These make out all possible valid bits in the lane mask we want to + // transform. Looking only at the leafs ensure that only a single bit in + // the mask is set. + unsigned NextBit = 0; + for (auto &Idx2 : SubRegIndices) { + // Skip non-leaf subregisters. + if (!Idx2.getComposites().empty()) + continue; + // Replicate the behaviour from the lane mask generation loop above. + unsigned SrcBit = NextBit; + unsigned SrcMask = 1u << SrcBit; + if (NextBit < 31) + ++NextBit; + assert(Idx2.LaneMask == SrcMask); + + // Get the composed subregister if there is any. + auto C = Composites.find(&Idx2); + if (C == Composites.end()) + continue; + const CodeGenSubRegIndex *Composite = C->second; + // The Composed subreg should be a leaf subreg too + assert(Composite->getComposites().empty()); + + // Create Mask+Rotate operation and merge with existing ops if possible. + unsigned DstBit = Log2_32(Composite->LaneMask); + int Shift = DstBit - SrcBit; + uint8_t RotateLeft = Shift >= 0 ? (uint8_t)Shift : 32+Shift; + for (auto &I : LaneTransforms) { + if (I.RotateLeft == RotateLeft) { + I.Mask |= SrcMask; + SrcMask = 0; + } + } + if (SrcMask != 0) { + MaskRolPair MaskRol = { SrcMask, RotateLeft }; + LaneTransforms.push_back(MaskRol); + } + } + // Optimize if the transformation consists of one step only: Set mask to + // 0xffffffff (including some irrelevant invalid bits) so that it should + // merge with more entries later while compressing the table. + if (LaneTransforms.size() == 1) + LaneTransforms[0].Mask = ~0u; + + // Further compression optimization: For invalid compositions resulting + // in a sequence with 0 entries we can just pick any other. Choose + // Mask 0xffffffff with Rotation 0. + if (LaneTransforms.size() == 0) { + MaskRolPair P = { ~0u, 0 }; + LaneTransforms.push_back(P); + } + } + // FIXME: What if ad-hoc aliasing introduces overlaps that aren't represented // by the sub-register graph? This doesn't occur in any known targets. diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h index 6baaa28d95d..d4668a05f33 100644 --- a/utils/TableGen/CodeGenRegisters.h +++ b/utils/TableGen/CodeGenRegisters.h @@ -34,6 +34,20 @@ namespace llvm { class CodeGenRegBank; + /// Used to encode a step in a register lane mask transformation. + /// Mask the bits specified in Mask, then rotate them Rol bits to the left + /// assuming a wraparound at 32bits. + struct MaskRolPair { + unsigned Mask; + uint8_t RotateLeft; + bool operator==(const MaskRolPair Other) { + return Mask == Other.Mask && RotateLeft == Other.RotateLeft; + } + bool operator!=(const MaskRolPair Other) { + return Mask != Other.Mask || RotateLeft != Other.RotateLeft; + } + }; + /// CodeGenSubRegIndex - Represents a sub-register index. class CodeGenSubRegIndex { Record *const TheDef; @@ -45,6 +59,7 @@ namespace llvm { uint16_t Offset; const unsigned EnumValue; mutable unsigned LaneMask; + mutable SmallVector CompositionLaneMaskTransform; // Are all super-registers containing this SubRegIndex covered by their // sub-registers? diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp index b7bd37b30a2..bde6a174dd1 100644 --- a/utils/TableGen/RegisterInfoEmitter.cpp +++ b/utils/TableGen/RegisterInfoEmitter.cpp @@ -62,6 +62,8 @@ private: const std::string &ClassName); void emitComposeSubRegIndices(raw_ostream &OS, CodeGenRegBank &RegBank, const std::string &ClassName); + void emitComposeSubRegIndexLaneMask(raw_ostream &OS, CodeGenRegBank &RegBank, + const std::string &ClassName); }; } // End anonymous namespace @@ -687,6 +689,95 @@ RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS, OS << "}\n\n"; } +void +RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS, + CodeGenRegBank &RegBank, + const std::string &ClName) { + // See the comments in computeSubRegLaneMasks() for our goal here. + const auto &SubRegIndices = RegBank.getSubRegIndices(); + + // Create a list of Mask+Rotate operations, with equivalent entries merged. + SmallVector SubReg2SequenceIndexMap; + SmallVector, 4> Sequences; + for (const auto &Idx : SubRegIndices) { + const SmallVector &IdxSequence + = Idx.CompositionLaneMaskTransform; + + unsigned Found = ~0u; + unsigned SIdx = 0; + unsigned NextSIdx; + for (size_t s = 0, se = Sequences.size(); s != se; ++s, SIdx = NextSIdx) { + SmallVectorImpl &Sequence = Sequences[s]; + NextSIdx = SIdx + Sequence.size() + 1; + if (Sequence.size() != IdxSequence.size()) + continue; + bool Identical = true; + for (size_t o = 0, oe = Sequence.size(); o != oe; ++o) { + if (Sequence[o] != IdxSequence[o]) { + Identical = false; + break; + } + } + if (Identical) { + Found = SIdx; + break; + } + } + if (Found == ~0u) { + Sequences.push_back(IdxSequence); + Found = SIdx; + } + SubReg2SequenceIndexMap.push_back(Found); + } + + OS << "unsigned " << ClName + << "::composeSubRegIndexLaneMaskImpl(unsigned IdxA, unsigned LaneMask)" + " const {\n"; + + OS << " struct MaskRolOp {\n" + " unsigned Mask;\n" + " uint8_t RotateLeft;\n" + " };\n" + " static const MaskRolOp Seqs[] = {\n"; + unsigned Idx = 0; + for (size_t s = 0, se = Sequences.size(); s != se; ++s) { + OS << " "; + const SmallVectorImpl &Sequence = Sequences[s]; + for (size_t p = 0, pe = Sequence.size(); p != pe; ++p) { + const MaskRolPair &P = Sequence[p]; + OS << format("{ 0x%08X, %2u }, ", P.Mask, P.RotateLeft); + } + OS << "{ 0, 0 }"; + if (s+1 != se) + OS << ", "; + OS << " // Sequence " << Idx << "\n"; + Idx += Sequence.size() + 1; + } + OS << " };\n" + " static const MaskRolOp *CompositeSequences[] = {\n"; + for (size_t i = 0, e = SubRegIndices.size(); i != e; ++i) { + OS << " "; + unsigned Idx = SubReg2SequenceIndexMap[i]; + OS << format("&Seqs[%u]", Idx); + if (i+1 != e) + OS << ","; + OS << " // to " << SubRegIndices[i].getName() << "\n"; + } + OS << " };\n\n"; + + OS << " --IdxA; assert(IdxA < " << SubRegIndices.size() + << " && \"Subregister index out of bounds\");\n" + " unsigned Result = 0;\n" + " for (const MaskRolOp *Ops = CompositeSequences[IdxA]; Ops->Mask != 0; ++Ops)" + " {\n" + " unsigned Masked = LaneMask & Ops->Mask;\n" + " Result |= (Masked << Ops->RotateLeft) & 0xFFFFFFFF;\n" + " Result |= (Masked >> ((32 - Ops->RotateLeft) & 0x1F));\n" + " }\n" + " return Result;\n" + "}\n"; +} + // // runMCDesc - Print out MC register descriptions. // @@ -966,6 +1057,8 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target, << " { return false; }\n"; if (!RegBank.getSubRegIndices().empty()) { OS << " unsigned composeSubRegIndicesImpl" + << "(unsigned, unsigned) const override;\n" + << " unsigned composeSubRegIndexLaneMaskImpl" << "(unsigned, unsigned) const override;\n" << " const TargetRegisterClass *getSubClassWithSubReg" << "(const TargetRegisterClass*, unsigned) const override;\n"; @@ -1214,8 +1307,10 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target, auto SubRegIndicesSize = std::distance(SubRegIndices.begin(), SubRegIndices.end()); - if (!SubRegIndices.empty()) + if (!SubRegIndices.empty()) { emitComposeSubRegIndices(OS, RegBank, ClassName); + emitComposeSubRegIndexLaneMask(OS, RegBank, ClassName); + } // Emit getSubClassWithSubReg. if (!SubRegIndices.empty()) {