From 11af4b49b2d816a17cd12e2d071ad8bae4aac351 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Fri, 18 Jul 2014 15:51:28 +0000 Subject: [PATCH] Add a dereferenceable attribute This attribute indicates that the parameter or return pointer is dereferenceable. Practically speaking, loads from such a pointer within the associated byte range are safe to speculatively execute. Such pointer parameters are common in source languages (C++ references, for example). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213385 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.rst | 11 +++ include/llvm-c/Core.h | 1 + include/llvm/Bitcode/LLVMBitCodes.h | 3 +- include/llvm/IR/Argument.h | 8 ++- include/llvm/IR/Attributes.h | 26 ++++++- include/llvm/IR/CallSite.h | 19 ++++++ include/llvm/IR/Function.h | 6 ++ include/llvm/IR/Instructions.h | 12 ++++ lib/Analysis/ValueTracking.cpp | 2 +- lib/AsmParser/LLLexer.cpp | 1 + lib/AsmParser/LLParser.cpp | 45 +++++++++++++ lib/AsmParser/LLParser.h | 6 ++ lib/AsmParser/LLToken.h | 1 + lib/Bitcode/Reader/BitcodeReader.cpp | 6 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 2 + lib/IR/AttributeImpl.h | 4 +- lib/IR/Attributes.cpp | 78 +++++++++++++++++++-- lib/IR/Function.cpp | 18 ++++- lib/IR/Value.cpp | 27 +++++++- test/Bitcode/attributes.ll | 10 +++ test/Transforms/InstSimplify/compare.ll | 34 ++++++++++ test/Transforms/LICM/hoist-deref-load.ll | 86 ++++++++++++++++++++++++ 22 files changed, 388 insertions(+), 18 deletions(-) create mode 100644 test/Transforms/LICM/hoist-deref-load.ll diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 27fe1be116e..497f658a4d9 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -969,6 +969,17 @@ Currently, only the following parameter attributes are defined: passed in is non-null, or the callee must ensure that the returned pointer is non-null. +``dereferenceable()`` + This indicates that the parameter or return pointer is dereferenceable. This + attribute may only be applied to pointer typed parameters. A pointer that + is dereferenceable can be loaded from speculatively without a risk of + trapping. The number of bytes known to be dereferenceable must be provided + in parentheses. It is legal for the number of bytes to be less than the + size of the pointee type. The ``nonnull`` attribute does not imply + dereferenceability (consider a pointer to one element past the end of an + array), however ``dereferenceable()`` does imply ``nonnull`` in + ``addrspace(0)`` (which is the default address space). + .. _gc: Garbage Collector Names diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index 8693a3020ab..fdff77bc5e5 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -168,6 +168,7 @@ typedef enum { LLVMInAllocaAttribute = 1ULL << 36, LLVMNonNullAttribute = 1ULL << 37, LLVMJumpTableAttribute = 1ULL << 38, + LLVMDereferenceableAttribute = 1ULL << 39, */ } LLVMAttribute; diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index 75b26a94c9b..ee2efa2257b 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -374,7 +374,8 @@ namespace bitc { ATTR_KIND_OPTIMIZE_NONE = 37, ATTR_KIND_IN_ALLOCA = 38, ATTR_KIND_NON_NULL = 39, - ATTR_KIND_JUMP_TABLE = 40 + ATTR_KIND_JUMP_TABLE = 40, + ATTR_KIND_DEREFERENCEABLE = 41 }; enum ComdatSelectionKindCodes { diff --git a/include/llvm/IR/Argument.h b/include/llvm/IR/Argument.h index 3a63e1a1eaa..7c398a5e553 100644 --- a/include/llvm/IR/Argument.h +++ b/include/llvm/IR/Argument.h @@ -56,9 +56,15 @@ public: unsigned getArgNo() const; /// \brief Return true if this argument has the nonnull attribute on it in - /// its containing function. + /// its containing function. Also returns true if at least one byte is known + /// to be dereferenceable and the pointer is in addrspace(0). bool hasNonNullAttr() const; + /// \brief If this argument has the dereferenceable attribute on it in its + /// containing function, return the number of bytes known to be + /// dereferenceable. Otherwise, zero is returned. + uint64_t getDereferenceableBytes() const; + /// \brief Return true if this argument has the byval attribute on it in its /// containing function. bool hasByValAttr() const; diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h index 9cdadb3c805..5ff48d68891 100644 --- a/include/llvm/IR/Attributes.h +++ b/include/llvm/IR/Attributes.h @@ -88,6 +88,7 @@ public: NonLazyBind, ///< Function is called early and/or ///< often, so lazy binding isn't worthwhile NonNull, ///< Pointer is known to be not null + Dereferenceable, ///< Pointer is known to be dereferenceable NoRedZone, ///< Disable redzone NoReturn, ///< Mark the function as not returning NoUnwind, ///< Function doesn't unwind stack @@ -133,6 +134,8 @@ public: /// alignment set. static Attribute getWithAlignment(LLVMContext &Context, uint64_t Align); static Attribute getWithStackAlignment(LLVMContext &Context, uint64_t Align); + static Attribute getWithDereferenceableBytes(LLVMContext &Context, + uint64_t Bytes); //===--------------------------------------------------------------------===// // Attribute Accessors @@ -178,6 +181,10 @@ public: /// alignment value. unsigned getStackAlignment() const; + /// \brief Returns the number of dereferenceable bytes from the + /// dereferenceable attribute (or zero if unknown). + uint64_t getDereferenceableBytes() const; + /// \brief The Attribute is converted to a string of equivalent mnemonic. This /// is, presumably, for writing out the mnemonics for the assembly writer. std::string getAsString(bool InAttrGrp = false) const; @@ -316,6 +323,9 @@ public: /// \brief Get the stack alignment. unsigned getStackAlignment(unsigned Index) const; + /// \brief Get the number of dereferenceable bytes (or zero if unknown). + uint64_t getDereferenceableBytes(unsigned Index) const; + /// \brief Return the attributes at the index as a string. std::string getAsString(unsigned Index, bool InAttrGrp = false) const; @@ -395,13 +405,15 @@ class AttrBuilder { std::map TargetDepAttrs; uint64_t Alignment; uint64_t StackAlignment; + uint64_t DerefBytes; public: - AttrBuilder() : Attrs(0), Alignment(0), StackAlignment(0) {} + AttrBuilder() : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0) {} explicit AttrBuilder(uint64_t Val) - : Attrs(0), Alignment(0), StackAlignment(0) { + : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0) { addRawValue(Val); } - AttrBuilder(const Attribute &A) : Attrs(0), Alignment(0), StackAlignment(0) { + AttrBuilder(const Attribute &A) + : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0) { addAttribute(A); } AttrBuilder(AttributeSet AS, unsigned Idx); @@ -455,6 +467,10 @@ public: /// \brief Retrieve the stack alignment attribute, if it exists. uint64_t getStackAlignment() const { return StackAlignment; } + /// \brief Retrieve the number of dereferenceable bytes, if the dereferenceable + /// attribute exists (zero is returned otherwise). + uint64_t getDereferenceableBytes() const { return DerefBytes; } + /// \brief This turns an int alignment (which must be a power of 2) into the /// form used internally in Attribute. AttrBuilder &addAlignmentAttr(unsigned Align); @@ -463,6 +479,10 @@ public: /// the form used internally in Attribute. AttrBuilder &addStackAlignmentAttr(unsigned Align); + /// \brief This turns the number of dereferenceable bytes into the form used + /// internally in Attribute. + AttrBuilder &addDereferenceableAttr(uint64_t Bytes); + /// \brief Return true if the builder contains no target-independent /// attributes. bool empty() const { return Attrs.none(); } diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h index deea4151ddd..df082577a0e 100644 --- a/include/llvm/IR/CallSite.h +++ b/include/llvm/IR/CallSite.h @@ -217,6 +217,12 @@ public: CALLSITE_DELEGATE_GETTER(getParamAlignment(i)); } + /// @brief Extract the number of dereferenceable bytes for a call or + /// parameter (0=unknown). + uint64_t getDereferenceableBytes(uint16_t i) const { + CALLSITE_DELEGATE_GETTER(getDereferenceableBytes(i)); + } + /// \brief Return true if the call should not be treated as a call to a /// builtin. bool isNoBuiltin() const { @@ -302,6 +308,19 @@ public: paramHasAttr(ArgNo + 1, Attribute::ReadNone); } + /// @brief Return true if the return value is known to be not null. + /// This may be because it has the nonnull attribute, or because at least + /// one byte is dereferenceable and the pointer is in addrspace(0). + bool isReturnNonNull() const { + if (paramHasAttr(0, Attribute::NonNull)) + return true; + else if (getDereferenceableBytes(0) > 0 && + getType()->getPointerAddressSpace() == 0) + return true; + + return false; + } + /// hasArgument - Returns true if this CallSite passes the given Value* as an /// argument to the called function. bool hasArgument(const Value *Arg) const { diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h index 22444bd3000..ad4b1395f0c 100644 --- a/include/llvm/IR/Function.h +++ b/include/llvm/IR/Function.h @@ -233,6 +233,12 @@ public: return AttributeSets.getParamAlignment(i); } + /// @brief Extract the number of dereferenceable bytes for a call or + /// parameter (0=unknown). + uint64_t getDereferenceableBytes(unsigned i) const { + return AttributeSets.getDereferenceableBytes(i); + } + /// @brief Determine if the function does not access memory. bool doesNotAccessMemory() const { return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index a590f5ad7b2..308467f7aa1 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -1376,6 +1376,12 @@ public: return AttributeList.getParamAlignment(i); } + /// \brief Extract the number of dereferenceable bytes for a call or + /// parameter (0=unknown). + uint64_t getDereferenceableBytes(unsigned i) const { + return AttributeList.getDereferenceableBytes(i); + } + /// \brief Return true if the call should not be treated as a call to a /// builtin. bool isNoBuiltin() const { @@ -3051,6 +3057,12 @@ public: return AttributeList.getParamAlignment(i); } + /// \brief Extract the number of dereferenceable bytes for a call or + /// parameter (0=unknown). + uint64_t getDereferenceableBytes(unsigned i) const { + return AttributeList.getDereferenceableBytes(i); + } + /// \brief Return true if the call should not be treated as a call to a /// builtin. bool isNoBuiltin() const { diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index cedf076497e..e6d09f4e31f 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -2086,7 +2086,7 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { return !GV->hasExternalWeakLinkage(); if (ImmutableCallSite CS = V) - if (CS.paramHasAttr(0, Attribute::NonNull)) + if (CS.isReturnNonNull()) return true; // operator new never returns null. diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 1e5bcdd930c..962298fcd54 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -612,6 +612,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(byval); KEYWORD(inalloca); KEYWORD(cold); + KEYWORD(dereferenceable); KEYWORD(inlinehint); KEYWORD(inreg); KEYWORD(jumptable); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index be55ac6481f..ac6e0e512fe 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -1052,6 +1052,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, "invalid use of attribute on a function"); break; case lltok::kw_byval: + case lltok::kw_dereferenceable: case lltok::kw_inalloca: case lltok::kw_nest: case lltok::kw_noalias: @@ -1212,6 +1213,16 @@ bool LLParser::ParseUInt32(unsigned &Val) { return false; } +/// ParseUInt64 +/// ::= uint64 +bool LLParser::ParseUInt64(uint64_t &Val) { + if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned()) + return TokError("expected integer"); + Val = Lex.getAPSIntVal().getLimitedValue(); + Lex.Lex(); + return false; +} + /// ParseTLSModel /// := 'localdynamic' /// := 'initialexec' @@ -1284,6 +1295,13 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { continue; } case lltok::kw_byval: B.addAttribute(Attribute::ByVal); break; + case lltok::kw_dereferenceable: { + uint64_t Bytes; + if (ParseOptionalDereferenceableBytes(Bytes)) + return true; + B.addDereferenceableAttr(Bytes); + continue; + } case lltok::kw_inalloca: B.addAttribute(Attribute::InAlloca); break; case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break; case lltok::kw_nest: B.addAttribute(Attribute::Nest); break; @@ -1341,6 +1359,13 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { switch (Token) { default: // End of attributes. return HaveError; + case lltok::kw_dereferenceable: { + uint64_t Bytes; + if (ParseOptionalDereferenceableBytes(Bytes)) + return true; + B.addDereferenceableAttr(Bytes); + continue; + } case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break; case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; case lltok::kw_nonnull: B.addAttribute(Attribute::NonNull); break; @@ -1606,6 +1631,26 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) { return false; } +/// ParseOptionalDereferenceableBytes +/// ::= /* empty */ +/// ::= 'dereferenceable' '(' 4 ')' +bool LLParser::ParseOptionalDereferenceableBytes(uint64_t &Bytes) { + Bytes = 0; + if (!EatIfPresent(lltok::kw_dereferenceable)) + return false; + LocTy ParenLoc = Lex.getLoc(); + if (!EatIfPresent(lltok::lparen)) + return Error(ParenLoc, "expected '('"); + LocTy DerefLoc = Lex.getLoc(); + if (ParseUInt64(Bytes)) return true; + ParenLoc = Lex.getLoc(); + if (!EatIfPresent(lltok::rparen)) + return Error(ParenLoc, "expected ')'"); + if (!Bytes) + return Error(DerefLoc, "dereferenceable bytes must be non-zero"); + return false; +} + /// ParseOptionalCommaAlign /// ::= /// ::= ',' align 4 diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 2efb260d66b..7203bb245d0 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -202,6 +202,11 @@ namespace llvm { Loc = Lex.getLoc(); return ParseUInt32(Val); } + bool ParseUInt64(uint64_t &Val); + bool ParseUInt64(uint64_t &Val, LocTy &Loc) { + Loc = Lex.getLoc(); + return ParseUInt64(Val); + } bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM); bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM); @@ -219,6 +224,7 @@ namespace llvm { bool ParseOptionalDLLStorageClass(unsigned &DLLStorageClass); bool ParseOptionalCallingConv(CallingConv::ID &CC); bool ParseOptionalAlignment(unsigned &Alignment); + bool ParseOptionalDereferenceableBytes(uint64_t &Bytes); bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope, AtomicOrdering &Ordering); bool ParseOrdering(AtomicOrdering &Ordering); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 534d82415fd..2f02606f93e 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -106,6 +106,7 @@ namespace lltok { kw_byval, kw_inalloca, kw_cold, + kw_dereferenceable, kw_inlinehint, kw_inreg, kw_jumptable, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 4a0484777cf..47a39539e20 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -588,6 +588,8 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) { return Attribute::NonLazyBind; case bitc::ATTR_KIND_NON_NULL: return Attribute::NonNull; + case bitc::ATTR_KIND_DEREFERENCEABLE: + return Attribute::Dereferenceable; case bitc::ATTR_KIND_NO_RED_ZONE: return Attribute::NoRedZone; case bitc::ATTR_KIND_NO_RETURN: @@ -689,8 +691,10 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() { return EC; if (Kind == Attribute::Alignment) B.addAlignmentAttr(Record[++i]); - else + else if (Kind == Attribute::StackAlignment) B.addStackAlignmentAttr(Record[++i]); + else if (Kind == Attribute::Dereferenceable) + B.addDereferenceableAttr(Record[++i]); } else { // String attribute assert((Record[i] == 3 || Record[i] == 4) && "Invalid attribute group entry"); diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 67710472a15..b2e49486d70 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -201,6 +201,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_NON_LAZY_BIND; case Attribute::NonNull: return bitc::ATTR_KIND_NON_NULL; + case Attribute::Dereferenceable: + return bitc::ATTR_KIND_DEREFERENCEABLE; case Attribute::NoRedZone: return bitc::ATTR_KIND_NO_RED_ZONE; case Attribute::NoReturn: diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 999a803c037..cc6d557ab4c 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -116,7 +116,8 @@ public: IntAttributeImpl(Attribute::AttrKind Kind, uint64_t Val) : EnumAttributeImpl(IntAttrEntry, Kind), Val(Val) { assert( - (Kind == Attribute::Alignment || Kind == Attribute::StackAlignment) && + (Kind == Attribute::Alignment || Kind == Attribute::StackAlignment || + Kind == Attribute::Dereferenceable) && "Wrong kind for int attribute!"); } @@ -164,6 +165,7 @@ public: unsigned getAlignment() const; unsigned getStackAlignment() const; + uint64_t getDereferenceableBytes() const; std::string getAsString(bool InAttrGrp) const; typedef const Attribute *iterator; diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 3ee622b31ba..04545ea919a 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -88,6 +88,12 @@ Attribute Attribute::getWithStackAlignment(LLVMContext &Context, return get(Context, StackAlignment, Align); } +Attribute Attribute::getWithDereferenceableBytes(LLVMContext &Context, + uint64_t Bytes) { + assert(Bytes && "Bytes must be non-zero."); + return get(Context, Dereferenceable, Bytes); +} + //===----------------------------------------------------------------------===// // Attribute Accessor Methods //===----------------------------------------------------------------------===// @@ -156,6 +162,14 @@ unsigned Attribute::getStackAlignment() const { return pImpl->getValueAsInt(); } +/// This returns the number of dereferenceable bytes. +uint64_t Attribute::getDereferenceableBytes() const { + assert(hasAttribute(Attribute::Dereferenceable) && + "Trying to get dereferenceable bytes from " + "non-dereferenceable attribute!"); + return pImpl->getValueAsInt(); +} + std::string Attribute::getAsString(bool InAttrGrp) const { if (!pImpl) return ""; @@ -263,6 +277,20 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return Result; } + if (hasAttribute(Attribute::Dereferenceable)) { + std::string Result; + Result += "dereferenceable"; + if (InAttrGrp) { + Result += "="; + Result += utostr(getValueAsInt()); + } else { + Result += "("; + Result += utostr(getValueAsInt()); + Result += ")"; + } + return Result; + } + // Convert target-dependent attributes to strings of the form: // // "kind" @@ -398,6 +426,8 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::InAlloca: return 1ULL << 43; case Attribute::NonNull: return 1ULL << 44; case Attribute::JumpTable: return 1ULL << 45; + case Attribute::Dereferenceable: + llvm_unreachable("dereferenceable attribute not supported in raw format"); } llvm_unreachable("Unsupported attribute type"); } @@ -482,6 +512,13 @@ unsigned AttributeSetNode::getStackAlignment() const { return 0; } +uint64_t AttributeSetNode::getDereferenceableBytes() const { + for (iterator I = begin(), E = end(); I != E; ++I) + if (I->hasAttribute(Attribute::Dereferenceable)) + return I->getDereferenceableBytes(); + return 0; +} + std::string AttributeSetNode::getAsString(bool InAttrGrp) const { std::string Str; for (iterator I = begin(), E = end(); I != E; ++I) { @@ -515,6 +552,8 @@ uint64_t AttributeSetImpl::Raw(unsigned Index) const { Mask |= (Log2_32(ASN->getAlignment()) + 1) << 16; else if (Kind == Attribute::StackAlignment) Mask |= (Log2_32(ASN->getStackAlignment()) + 1) << 26; + else if (Kind == Attribute::Dereferenceable) + llvm_unreachable("dereferenceable not supported in bit mask"); else Mask |= AttributeImpl::getAttrMask(Kind); } @@ -620,6 +659,10 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, else if (Kind == Attribute::StackAlignment) Attrs.push_back(std::make_pair(Index, Attribute:: getWithStackAlignment(C, B.getStackAlignment()))); + else if (Kind == Attribute::Dereferenceable) + Attrs.push_back(std::make_pair(Index, + Attribute::getWithDereferenceableBytes(C, + B.getDereferenceableBytes()))); else Attrs.push_back(std::make_pair(Index, Attribute::get(C, Kind))); } @@ -877,6 +920,11 @@ unsigned AttributeSet::getStackAlignment(unsigned Index) const { return ASN ? ASN->getStackAlignment() : 0; } +uint64_t AttributeSet::getDereferenceableBytes(unsigned Index) const { + AttributeSetNode *ASN = getAttributes(Index); + return ASN ? ASN->getDereferenceableBytes() : 0; +} + std::string AttributeSet::getAsString(unsigned Index, bool InAttrGrp) const { AttributeSetNode *ASN = getAttributes(Index); @@ -956,7 +1004,7 @@ void AttributeSet::dump() const { //===----------------------------------------------------------------------===// AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index) - : Attrs(0), Alignment(0), StackAlignment(0) { + : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0) { AttributeSetImpl *pImpl = AS.pImpl; if (!pImpl) return; @@ -973,13 +1021,14 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index) void AttrBuilder::clear() { Attrs.reset(); - Alignment = StackAlignment = 0; + Alignment = StackAlignment = DerefBytes = 0; } AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) { assert((unsigned)Val < Attribute::EndAttrKinds && "Attribute out of range!"); assert(Val != Attribute::Alignment && Val != Attribute::StackAlignment && - "Adding alignment attribute without adding alignment value!"); + Val != Attribute::Dereferenceable && + "Adding integer attribute without adding a value!"); Attrs[Val] = true; return *this; } @@ -997,6 +1046,8 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) { Alignment = Attr.getAlignment(); else if (Kind == Attribute::StackAlignment) StackAlignment = Attr.getStackAlignment(); + else if (Kind == Attribute::Dereferenceable) + DerefBytes = Attr.getDereferenceableBytes(); return *this; } @@ -1013,6 +1064,8 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { Alignment = 0; else if (Val == Attribute::StackAlignment) StackAlignment = 0; + else if (Val == Attribute::Dereferenceable) + DerefBytes = 0; return *this; } @@ -1037,6 +1090,8 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) { Alignment = 0; else if (Kind == Attribute::StackAlignment) StackAlignment = 0; + else if (Kind == Attribute::Dereferenceable) + DerefBytes = 0; } else { assert(Attr.isStringAttribute() && "Invalid attribute type!"); std::map::iterator @@ -1079,6 +1134,14 @@ AttrBuilder &AttrBuilder::addStackAlignmentAttr(unsigned Align) { return *this; } +AttrBuilder &AttrBuilder::addDereferenceableAttr(uint64_t Bytes) { + if (Bytes == 0) return *this; + + Attrs[Attribute::Dereferenceable] = true; + DerefBytes = Bytes; + return *this; +} + AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) { // FIXME: What if both have alignments, but they don't match?! if (!Alignment) @@ -1087,6 +1150,9 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) { if (!StackAlignment) StackAlignment = B.StackAlignment; + if (!DerefBytes) + DerefBytes = B.DerefBytes; + Attrs |= B.Attrs; for (td_const_iterator I = B.TargetDepAttrs.begin(), @@ -1142,7 +1208,8 @@ bool AttrBuilder::operator==(const AttrBuilder &B) { if (B.TargetDepAttrs.find(I->first) == B.TargetDepAttrs.end()) return false; - return Alignment == B.Alignment && StackAlignment == B.StackAlignment; + return Alignment == B.Alignment && StackAlignment == B.StackAlignment && + DerefBytes == B.DerefBytes; } AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { @@ -1151,6 +1218,8 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; I = Attribute::AttrKind(I + 1)) { + if (I == Attribute::Dereferenceable) + continue; if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) { Attrs[I] = true; @@ -1184,6 +1253,7 @@ AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) { .addAttribute(Attribute::NoAlias) .addAttribute(Attribute::NoCapture) .addAttribute(Attribute::NonNull) + .addDereferenceableAttr(1) // the int here is ignored .addAttribute(Attribute::ReadNone) .addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::StructRet) diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 14435711bb6..de59b26ec92 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -77,11 +77,17 @@ unsigned Argument::getArgNo() const { } /// hasNonNullAttr - Return true if this argument has the nonnull attribute on -/// it in its containing function. +/// it in its containing function. Also returns true if at least one byte is +/// known to be dereferenceable and the pointer is in addrspace(0). bool Argument::hasNonNullAttr() const { if (!getType()->isPointerTy()) return false; - return getParent()->getAttributes(). - hasAttribute(getArgNo()+1, Attribute::NonNull); + if (getParent()->getAttributes(). + hasAttribute(getArgNo()+1, Attribute::NonNull)) + return true; + else if (getDereferenceableBytes() > 0 && + getType()->getPointerAddressSpace() == 0) + return true; + return false; } /// hasByValAttr - Return true if this argument has the byval attribute on it @@ -113,6 +119,12 @@ unsigned Argument::getParamAlignment() const { } +uint64_t Argument::getDereferenceableBytes() const { + assert(getType()->isPointerTy() && + "Only pointers have dereferenceable bytes"); + return getParent()->getDereferenceableBytes(getArgNo()+1); +} + /// hasNestAttr - Return true if this argument has the nest attribute on /// it in its containing function. bool Argument::hasNestAttr() const { diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index 999685e15eb..efdd6a7ed6e 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -15,6 +15,7 @@ #include "LLVMContextImpl.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -504,9 +505,29 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout *DL, if (const GlobalVariable *GV = dyn_cast(V)) return !GV->hasExternalWeakLinkage(); - // byval arguments are ok. - if (const Argument *A = dyn_cast(V)) - return A->hasByValAttr(); + // byval arguments are okay. Arguments specifically marked as + // dereferenceable are okay too. + if (const Argument *A = dyn_cast(V)) { + if (A->hasByValAttr()) + return true; + else if (uint64_t Bytes = A->getDereferenceableBytes()) { + Type *Ty = V->getType()->getPointerElementType(); + if (Ty->isSized() && DL && DL->getTypeStoreSize(Ty) <= Bytes) + return true; + } + + return false; + } + + // Return values from call sites specifically marked as dereferenceable are + // also okay. + if (ImmutableCallSite CS = V) { + if (uint64_t Bytes = CS.getDereferenceableBytes(0)) { + Type *Ty = V->getType()->getPointerElementType(); + if (Ty->isSized() && DL && DL->getTypeStoreSize(Ty) <= Bytes) + return true; + } + } // For GEPs, determine if the indexing lands within the allocated object. if (const GEPOperator *GEP = dyn_cast(V)) { diff --git a/test/Bitcode/attributes.ll b/test/Bitcode/attributes.ll index 49366de9836..2490e592072 100644 --- a/test/Bitcode/attributes.ll +++ b/test/Bitcode/attributes.ll @@ -229,6 +229,16 @@ define void @f38() unnamed_addr jumptable { unreachable } +define dereferenceable(2) i8* @f39(i8* dereferenceable(1) %a) { +; CHECK: define dereferenceable(2) i8* @f39(i8* dereferenceable(1) %a) { + ret i8* %a +} + +define dereferenceable(18446744073709551606) i8* @f40(i8* dereferenceable(18446744073709551615) %a) { +; CHECK: define dereferenceable(18446744073709551606) i8* @f40(i8* dereferenceable(18446744073709551615) %a) { + ret i8* %a +} + ; CHECK: attributes #0 = { noreturn } ; CHECK: attributes #1 = { nounwind } ; CHECK: attributes #2 = { readnone } diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll index 42084c9eeed..94872d35bb9 100644 --- a/test/Transforms/InstSimplify/compare.ll +++ b/test/Transforms/InstSimplify/compare.ll @@ -874,6 +874,21 @@ define i1 @nonnull_arg(i32* nonnull %i) { ; CHECK: ret i1 false } +define i1 @nonnull_deref_arg(i32* dereferenceable(4) %i) { + %cmp = icmp eq i32* %i, null + ret i1 %cmp +; CHECK-LABEL: @nonnull_deref_arg +; CHECK: ret i1 false +} + +define i1 @nonnull_deref_as_arg(i32 addrspace(1)* dereferenceable(4) %i) { + %cmp = icmp eq i32 addrspace(1)* %i, null + ret i1 %cmp +; CHECK-LABEL: @nonnull_deref_as_arg +; CHECK: icmp +; CHECK ret +} + declare nonnull i32* @returns_nonnull_helper() define i1 @returns_nonnull() { %call = call nonnull i32* @returns_nonnull_helper() @@ -883,6 +898,25 @@ define i1 @returns_nonnull() { ; CHECK: ret i1 false } +declare dereferenceable(4) i32* @returns_nonnull_deref_helper() +define i1 @returns_nonnull_deref() { + %call = call dereferenceable(4) i32* @returns_nonnull_deref_helper() + %cmp = icmp eq i32* %call, null + ret i1 %cmp +; CHECK-LABEL: @returns_nonnull_deref +; CHECK: ret i1 false +} + +declare dereferenceable(4) i32 addrspace(1)* @returns_nonnull_deref_as_helper() +define i1 @returns_nonnull_as_deref() { + %call = call dereferenceable(4) i32 addrspace(1)* @returns_nonnull_deref_as_helper() + %cmp = icmp eq i32 addrspace(1)* %call, null + ret i1 %cmp +; CHECK-LABEL: @returns_nonnull_as_deref +; CHECK: icmp +; CHECK: ret +} + ; If a bit is known to be zero for A and known to be one for B, ; then A and B cannot be equal. define i1 @icmp_eq_const(i32 %a) nounwind { diff --git a/test/Transforms/LICM/hoist-deref-load.ll b/test/Transforms/LICM/hoist-deref-load.ll new file mode 100644 index 00000000000..972d75fff35 --- /dev/null +++ b/test/Transforms/LICM/hoist-deref-load.ll @@ -0,0 +1,86 @@ +; RUN: opt -S -basicaa -licm < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; This test represents the following function: +; void test1(int * __restrict__ a, int * __restrict__ b, int &c, int n) { +; for (int i = 0; i < n; ++i) +; if (a[i] > 0) +; a[i] = c*b[i]; +; } +; and we want to hoist the load of %c out of the loop. This can be done only +; because the dereferenceable attribute is on %c. + +; CHECK-LABEL: @test1 +; CHECK: load i32* %c, align 4 +; CHECK: for.body: + +define void @test1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly nonnull dereferenceable(4) %c, i32 %n) #0 { +entry: + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +; This is the same as @test1, but without the dereferenceable attribute on %c. +; Without this attribute, we should not hoist the load of %c. + +; CHECK-LABEL: @test2 +; CHECK: if.then: +; CHECK: load i32* %c, align 4 + +define void @test2(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly nonnull %c, i32 %n) #0 { +entry: + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +attributes #0 = { nounwind uwtable } +