//===- StratifiedSets.h - Abstract stratified sets implementation. --------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_STRATIFIEDSETS_H #define LLVM_ADT_STRATIFIEDSETS_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include #include #include #include #include #include #include namespace llvm { // \brief An index into Stratified Sets. typedef unsigned StratifiedIndex; // NOTE: ^ This can't be a short -- bootstrapping clang has a case where // ~1M sets exist. // \brief Container of information related to a value in a StratifiedSet. struct StratifiedInfo { StratifiedIndex Index; // For field sensitivity, etc. we can tack attributes on to this struct. }; // The number of attributes that StratifiedAttrs should contain. Attributes are // described below, and 32 was an arbitrary choice because it fits nicely in 32 // bits (because we use a bitset for StratifiedAttrs). static const unsigned NumStratifiedAttrs = 32; // These are attributes that the users of StratifiedSets/StratifiedSetBuilders // may use for various purposes. These also have the special property of that // they are merged down. So, if set A is above set B, and one decides to set an // attribute in set A, then the attribute will automatically be set in set B. typedef std::bitset StratifiedAttrs; // \brief A "link" between two StratifiedSets. struct StratifiedLink { // \brief This is a value used to signify "does not exist" where // the StratifiedIndex type is used. This is used instead of // Optional because Optional would // eat up a considerable amount of extra memory, after struct // padding/alignment is taken into account. static const StratifiedIndex SetSentinel; // \brief The index for the set "above" current StratifiedIndex Above; // \brief The link for the set "below" current StratifiedIndex Below; // \brief Attributes for these StratifiedSets. StratifiedAttrs Attrs; StratifiedLink() : Above(SetSentinel), Below(SetSentinel) {} bool hasBelow() const { return Below != SetSentinel; } bool hasAbove() const { return Above != SetSentinel; } void clearBelow() { Below = SetSentinel; } void clearAbove() { Above = SetSentinel; } }; // \brief These are stratified sets, as described in "Fast algorithms for // Dyck-CFL-reachability with applications to Alias Analysis" by Zhang Q, Lyu M // R, Yuan H, and Su Z. -- in short, this is meant to represent different sets // of Value*s. If two Value*s are in the same set, or if both sets have // overlapping attributes, then the Value*s are said to alias. // // Sets may be related by position, meaning that one set may be considered as // above or below another. In CFL Alias Analysis, this gives us an indication // of how two variables are related; if the set of variable A is below a set // containing variable B, then at some point, a variable that has interacted // with B (or B itself) was either used in order to extract the variable A, or // was used as storage of variable A. // // Sets may also have attributes (as noted above). These attributes are // generally used for noting whether a variable in the set has interacted with // a variable whose origins we don't quite know (i.e. globals/arguments), or if // the variable may have had operations performed on it (modified in a function // call). All attributes that exist in a set A must exist in all sets marked as // below set A. template class StratifiedSets { public: StratifiedSets() {} StratifiedSets(DenseMap Map, std::vector Links) : Values(std::move(Map)), Links(std::move(Links)) {} StratifiedSets(StratifiedSets &&Other) { *this = std::move(Other); } StratifiedSets &operator=(StratifiedSets &&Other) { Values = std::move(Other.Values); Links = std::move(Other.Links); return *this; } Optional find(const T &Elem) const { auto Iter = Values.find(Elem); if (Iter == Values.end()) { return NoneType(); } return Iter->second; } const StratifiedLink &getLink(StratifiedIndex Index) const { assert(inbounds(Index)); return Links[Index]; } private: DenseMap Values; std::vector Links; bool inbounds(StratifiedIndex Idx) const { return Idx < Links.size(); } }; // \brief Generic Builder class that produces StratifiedSets instances. // // The goal of this builder is to efficiently produce correct StratifiedSets // instances. To this end, we use a few tricks: // > Set chains (A method for linking sets together) // > Set remaps (A method for marking a set as an alias [irony?] of another) // // ==== Set chains ==== // This builder has a notion of some value A being above, below, or with some // other value B: // > The `A above B` relationship implies that there is a reference edge going // from A to B. Namely, it notes that A can store anything in B's set. // > The `A below B` relationship is the opposite of `A above B`. It implies // that there's a dereference edge going from A to B. // > The `A with B` relationship states that there's an assignment edge going // from A to B, and that A and B should be treated as equals. // // As an example, take the following code snippet: // // %a = alloca i32, align 4 // %ap = alloca i32*, align 8 // %app = alloca i32**, align 8 // store %a, %ap // store %ap, %app // %aw = getelementptr %ap, 0 // // Given this, the follow relations exist: // - %a below %ap & %ap above %a // - %ap below %app & %app above %ap // - %aw with %ap & %ap with %aw // // These relations produce the following sets: // [{%a}, {%ap, %aw}, {%app}] // // ...Which states that the only MayAlias relationship in the above program is // between %ap and %aw. // // Life gets more complicated when we actually have logic in our programs. So, // we either must remove this logic from our programs, or make consessions for // it in our AA algorithms. In this case, we have decided to select the latter // option. // // First complication: Conditionals // Motivation: // %ad = alloca int, align 4 // %a = alloca int*, align 8 // %b = alloca int*, align 8 // %bp = alloca int**, align 8 // %c = call i1 @SomeFunc() // %k = select %c, %ad, %bp // store %ad, %a // store %b, %bp // // %k has 'with' edges to both %a and %b, which ordinarily would not be linked // together. So, we merge the set that contains %a with the set that contains // %b. We then recursively merge the set above %a with the set above %b, and // the set below %a with the set below %b, etc. Ultimately, the sets for this // program would end up like: {%ad}, {%a, %b, %k}, {%bp}, where {%ad} is below // {%a, %b, %c} is below {%ad}. // // Second complication: Arbitrary casts // Motivation: // %ip = alloca int*, align 8 // %ipp = alloca int**, align 8 // %i = bitcast ipp to int // store %ip, %ipp // store %i, %ip // // This is impossible to construct with any of the rules above, because a set // containing both {%i, %ipp} is supposed to exist, the set with %i is supposed // to be below the set with %ip, and the set with %ip is supposed to be below // the set with %ipp. Because we don't allow circular relationships like this, // we merge all concerned sets into one. So, the above code would generate a // single StratifiedSet: {%ip, %ipp, %i}. // // ==== Set remaps ==== // More of an implementation detail than anything -- when merging sets, we need // to update the numbers of all of the elements mapped to those sets. Rather // than doing this at each merge, we note in the BuilderLink structure that a // remap has occurred, and use this information so we can defer renumbering set // elements until build time. template class StratifiedSetsBuilder { // \brief Represents a Stratified Set, with information about the Stratified // Set above it, the set below it, and whether the current set has been // remapped to another. struct BuilderLink { const StratifiedIndex Number; BuilderLink(StratifiedIndex N) : Number(N) { Remap = StratifiedLink::SetSentinel; } bool hasAbove() const { assert(!isRemapped()); return Link.hasAbove(); } bool hasBelow() const { assert(!isRemapped()); return Link.hasBelow(); } void setBelow(StratifiedIndex I) { assert(!isRemapped()); Link.Below = I; } void setAbove(StratifiedIndex I) { assert(!isRemapped()); Link.Above = I; } void clearBelow() { assert(!isRemapped()); Link.clearBelow(); } void clearAbove() { assert(!isRemapped()); Link.clearAbove(); } StratifiedIndex getBelow() const { assert(!isRemapped()); assert(hasBelow()); return Link.Below; } StratifiedIndex getAbove() const { assert(!isRemapped()); assert(hasAbove()); return Link.Above; } StratifiedAttrs &getAttrs() { assert(!isRemapped()); return Link.Attrs; } void setAttr(unsigned index) { assert(!isRemapped()); assert(index < NumStratifiedAttrs); Link.Attrs.set(index); } void setAttrs(const StratifiedAttrs &other) { assert(!isRemapped()); Link.Attrs |= other; } bool isRemapped() const { return Remap != StratifiedLink::SetSentinel; } // \brief For initial remapping to another set void remapTo(StratifiedIndex Other) { assert(!isRemapped()); Remap = Other; } StratifiedIndex getRemapIndex() const { assert(isRemapped()); return Remap; } // \brief Should only be called when we're already remapped. void updateRemap(StratifiedIndex Other) { assert(isRemapped()); Remap = Other; } // \brief Prefer the above functions to calling things directly on what's // returned from this -- they guard against unexpected calls when the // current BuilderLink is remapped. const StratifiedLink &getLink() const { return Link; } private: StratifiedLink Link; StratifiedIndex Remap; }; // \brief This function performs all of the set unioning/value renumbering // that we've been putting off, and generates a vector that // may be placed in a StratifiedSets instance. void finalizeSets(std::vector &StratLinks) { DenseMap Remaps; for (auto &Link : Links) { if (Link.isRemapped()) { continue; } StratifiedIndex Number = StratLinks.size(); Remaps.insert(std::make_pair(Link.Number, Number)); StratLinks.push_back(Link.getLink()); } for (auto &Link : StratLinks) { if (Link.hasAbove()) { auto &Above = linksAt(Link.Above); auto Iter = Remaps.find(Above.Number); assert(Iter != Remaps.end()); Link.Above = Iter->second; } if (Link.hasBelow()) { auto &Below = linksAt(Link.Below); auto Iter = Remaps.find(Below.Number); assert(Iter != Remaps.end()); Link.Below = Iter->second; } } for (auto &Pair : Values) { auto &Info = Pair.second; auto &Link = linksAt(Info.Index); auto Iter = Remaps.find(Link.Number); assert(Iter != Remaps.end()); Info.Index = Iter->second; } } // \brief There's a guarantee in StratifiedLink where all bits set in a // Link.externals will be set in all Link.externals "below" it. static void propagateAttrs(std::vector &Links) { const auto getHighestParentAbove = [&Links](StratifiedIndex Idx) { const auto *Link = &Links[Idx]; while (Link->hasAbove()) { Idx = Link->Above; Link = &Links[Idx]; } return Idx; }; SmallSet Visited; for (unsigned I = 0, E = Links.size(); I < E; ++I) { auto CurrentIndex = getHighestParentAbove(I); if (!Visited.insert(CurrentIndex)) { continue; } while (Links[CurrentIndex].hasBelow()) { auto &CurrentBits = Links[CurrentIndex].Attrs; auto NextIndex = Links[CurrentIndex].Below; auto &NextBits = Links[NextIndex].Attrs; NextBits |= CurrentBits; CurrentIndex = NextIndex; } } } public: // \brief Builds a StratifiedSet from the information we've been given since // either construction or the prior build() call. StratifiedSets build() { std::vector StratLinks; finalizeSets(StratLinks); propagateAttrs(StratLinks); Links.clear(); return StratifiedSets(std::move(Values), std::move(StratLinks)); } std::size_t size() const { return Values.size(); } std::size_t numSets() const { return Links.size(); } bool has(const T &Elem) const { return get(Elem).hasValue(); } bool add(const T &Main) { if (get(Main).hasValue()) return false; auto NewIndex = getNewUnlinkedIndex(); return addAtMerging(Main, NewIndex); } // \brief Restructures the stratified sets as necessary to make "ToAdd" in a // set above "Main". There are some cases where this is not possible (see // above), so we merge them such that ToAdd and Main are in the same set. bool addAbove(const T &Main, const T &ToAdd) { assert(has(Main)); auto Index = *indexOf(Main); if (!linksAt(Index).hasAbove()) addLinkAbove(Index); auto Above = linksAt(Index).getAbove(); return addAtMerging(ToAdd, Above); } // \brief Restructures the stratified sets as necessary to make "ToAdd" in a // set below "Main". There are some cases where this is not possible (see // above), so we merge them such that ToAdd and Main are in the same set. bool addBelow(const T &Main, const T &ToAdd) { assert(has(Main)); auto Index = *indexOf(Main); if (!linksAt(Index).hasBelow()) addLinkBelow(Index); auto Below = linksAt(Index).getBelow(); return addAtMerging(ToAdd, Below); } bool addWith(const T &Main, const T &ToAdd) { assert(has(Main)); auto MainIndex = *indexOf(Main); return addAtMerging(ToAdd, MainIndex); } void noteAttribute(const T &Main, unsigned AttrNum) { assert(has(Main)); assert(AttrNum < StratifiedLink::SetSentinel); auto *Info = *get(Main); auto &Link = linksAt(Info->Index); Link.setAttr(AttrNum); } void noteAttributes(const T &Main, const StratifiedAttrs &NewAttrs) { assert(has(Main)); auto *Info = *get(Main); auto &Link = linksAt(Info->Index); Link.setAttrs(NewAttrs); } StratifiedAttrs getAttributes(const T &Main) { assert(has(Main)); auto *Info = *get(Main); auto *Link = &linksAt(Info->Index); auto Attrs = Link->getAttrs(); while (Link->hasAbove()) { Link = &linksAt(Link->getAbove()); Attrs |= Link->getAttrs(); } return Attrs; } bool getAttribute(const T &Main, unsigned AttrNum) { assert(AttrNum < StratifiedLink::SetSentinel); auto Attrs = getAttributes(Main); return Attrs[AttrNum]; } // \brief Gets the attributes that have been applied to the set that Main // belongs to. It ignores attributes in any sets above the one that Main // resides in. StratifiedAttrs getRawAttributes(const T &Main) { assert(has(Main)); auto *Info = *get(Main); auto &Link = linksAt(Info->Index); return Link.getAttrs(); } // \brief Gets an attribute from the attributes that have been applied to the // set that Main belongs to. It ignores attributes in any sets above the one // that Main resides in. bool getRawAttribute(const T &Main, unsigned AttrNum) { assert(AttrNum < StratifiedLink::SetSentinel); auto Attrs = getRawAttributes(Main); return Attrs[AttrNum]; } private: DenseMap Values; std::vector Links; // \brief Adds the given element at the given index, merging sets if // necessary. bool addAtMerging(const T &ToAdd, StratifiedIndex Index) { StratifiedInfo Info = {Index}; auto Pair = Values.insert(std::make_pair(ToAdd, Info)); if (Pair.second) return true; auto &Iter = Pair.first; auto &IterSet = linksAt(Iter->second.Index); auto &ReqSet = linksAt(Index); // Failed to add where we wanted to. Merge the sets. if (&IterSet != &ReqSet) merge(IterSet.Number, ReqSet.Number); return false; } // \brief Gets the BuilderLink at the given index, taking set remapping into // account. BuilderLink &linksAt(StratifiedIndex Index) { auto *Start = &Links[Index]; if (!Start->isRemapped()) return *Start; auto *Current = Start; while (Current->isRemapped()) Current = &Links[Current->getRemapIndex()]; auto NewRemap = Current->Number; // Run through everything that has yet to be updated, and update them to // remap to NewRemap Current = Start; while (Current->isRemapped()) { auto *Next = &Links[Current->getRemapIndex()]; Current->updateRemap(NewRemap); Current = Next; } return *Current; } // \brief Merges two sets into one another. Assumes that these sets are not // already one in the same void merge(StratifiedIndex Idx1, StratifiedIndex Idx2) { assert(inbounds(Idx1) && inbounds(Idx2)); assert(&linksAt(Idx1) != &linksAt(Idx2) && "Merging a set into itself is not allowed"); // CASE 1: If the set at `Idx1` is above or below `Idx2`, we need to merge // both the // given sets, and all sets between them, into one. if (tryMergeUpwards(Idx1, Idx2)) return; if (tryMergeUpwards(Idx2, Idx1)) return; // CASE 2: The set at `Idx1` is not in the same chain as the set at `Idx2`. // We therefore need to merge the two chains together. mergeDirect(Idx1, Idx2); } // \brief Merges two sets assuming that the set at `Idx1` is unreachable from // traversing above or below the set at `Idx2`. void mergeDirect(StratifiedIndex Idx1, StratifiedIndex Idx2) { assert(inbounds(Idx1) && inbounds(Idx2)); auto *LinksInto = &linksAt(Idx1); auto *LinksFrom = &linksAt(Idx2); // Merging everything above LinksInto then proceeding to merge everything // below LinksInto becomes problematic, so we go as far "up" as possible! while (LinksInto->hasAbove() && LinksFrom->hasAbove()) { LinksInto = &linksAt(LinksInto->getAbove()); LinksFrom = &linksAt(LinksFrom->getAbove()); } if (LinksFrom->hasAbove()) { LinksInto->setAbove(LinksFrom->getAbove()); auto &NewAbove = linksAt(LinksInto->getAbove()); NewAbove.setBelow(LinksInto->Number); } // Merging strategy: // > If neither has links below, stop. // > If only `LinksInto` has links below, stop. // > If only `LinksFrom` has links below, reset `LinksInto.Below` to // match `LinksFrom.Below` // > If both have links above, deal with those next. while (LinksInto->hasBelow() && LinksFrom->hasBelow()) { auto &FromAttrs = LinksFrom->getAttrs(); LinksInto->setAttrs(FromAttrs); // Remap needs to happen after getBelow(), but before // assignment of LinksFrom auto *NewLinksFrom = &linksAt(LinksFrom->getBelow()); LinksFrom->remapTo(LinksInto->Number); LinksFrom = NewLinksFrom; LinksInto = &linksAt(LinksInto->getBelow()); } if (LinksFrom->hasBelow()) { LinksInto->setBelow(LinksFrom->getBelow()); auto &NewBelow = linksAt(LinksInto->getBelow()); NewBelow.setAbove(LinksInto->Number); } LinksFrom->remapTo(LinksInto->Number); } // \brief Checks to see if lowerIndex is at a level lower than upperIndex. // If so, it will merge lowerIndex with upperIndex (and all of the sets // between) and return true. Otherwise, it will return false. bool tryMergeUpwards(StratifiedIndex LowerIndex, StratifiedIndex UpperIndex) { assert(inbounds(LowerIndex) && inbounds(UpperIndex)); auto *Lower = &linksAt(LowerIndex); auto *Upper = &linksAt(UpperIndex); if (Lower == Upper) return true; SmallVector Found; auto *Current = Lower; auto Attrs = Current->getAttrs(); while (Current->hasAbove() && Current != Upper) { Found.push_back(Current); Attrs |= Current->getAttrs(); Current = &linksAt(Current->getAbove()); } if (Current != Upper) return false; Upper->setAttrs(Attrs); if (Lower->hasBelow()) { auto NewBelowIndex = Lower->getBelow(); Upper->setBelow(NewBelowIndex); auto &NewBelow = linksAt(NewBelowIndex); NewBelow.setAbove(UpperIndex); } else { Upper->clearBelow(); } for (const auto &Ptr : Found) Ptr->remapTo(Upper->Number); return true; } Optional get(const T &Val) const { auto Result = Values.find(Val); if (Result == Values.end()) return NoneType(); return &Result->second; } Optional get(const T &Val) { auto Result = Values.find(Val); if (Result == Values.end()) return NoneType(); return &Result->second; } Optional indexOf(const T &Val) { auto MaybeVal = get(Val); if (!MaybeVal.hasValue()) return NoneType(); auto *Info = *MaybeVal; auto &Link = linksAt(Info->Index); return Link.Number; } StratifiedIndex addLinkBelow(StratifiedIndex Set) { auto At = addLinks(); Links[Set].setBelow(At); Links[At].setAbove(Set); return At; } StratifiedIndex addLinkAbove(StratifiedIndex Set) { auto At = addLinks(); Links[At].setBelow(Set); Links[Set].setAbove(At); return At; } StratifiedIndex getNewUnlinkedIndex() { return addLinks(); } StratifiedIndex addLinks() { auto Link = Links.size(); Links.push_back(BuilderLink(Link)); return Link; } bool inbounds(StratifiedIndex N) const { return N < Links.size(); } }; } #endif // LLVM_ADT_STRATIFIEDSETS_H