From cf8b959e8d938d39c0899365d98426cdbd96ed93 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Fri, 1 Aug 2014 21:11:34 +0000 Subject: [PATCH] BitcodeReader: Fix some BlockAddress forward reference corner cases `BlockAddress`es are interesting in that they can reference basic blocks from *outside* the block's function. Since basic blocks are not global values, this presents particular challenges for lazy parsing. One corner case was found in PR11677 and fixed in r147425. In that case, a global variable references a block address. It's necessary to load the relevant function to resolve the forward reference before doing anything with the module. By inspection, I found (and have fixed here) two other cases: - An instruction from one function references a block address from another function, and only the first function is lazily loaded. I fixed this the same way as PR11677: by eagerly loading the referenced function. - A function whose block address is taken is dematerialized, leaving invalid references to it. I fixed this by refusing to dematerialize functions whose block addresses are taken (if you have to load it, you can't unload it). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214559 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Bitcode/ReaderWriter.h | 1 + lib/Bitcode/Reader/BitcodeReader.cpp | 59 +++++++++++++++++++++--- lib/Bitcode/Reader/BitcodeReader.h | 15 ++++-- unittests/Bitcode/BitReaderTest.cpp | 68 ++++++++++++++++++++++++++++ 4 files changed, 134 insertions(+), 9 deletions(-) diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h index b32009d38e9..6fb77565979 100644 --- a/include/llvm/Bitcode/ReaderWriter.h +++ b/include/llvm/Bitcode/ReaderWriter.h @@ -160,6 +160,7 @@ namespace llvm { InvalidMultipleBlocks, // We found multiple blocks of a kind that should // have only one NeverResolvedValueFoundInFunction, + NeverResolvedFunctionFromBlockAddress, InvalidValue // Invalid version, inst number, attr number, etc }; inline std::error_code make_error_code(BitcodeError E) { diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 9dd209df124..8537354a5b3 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -31,11 +31,31 @@ enum { SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex }; -void BitcodeReader::materializeForwardReferencedFunctions() { +std::error_code BitcodeReader::materializeForwardReferencedFunctions() { + if (WillMaterializeAllForwardRefs) + return std::error_code(); + + // Prevent recursion. + WillMaterializeAllForwardRefs = true; + while (!BlockAddrFwdRefs.empty()) { Function *F = BlockAddrFwdRefs.begin()->first; - F->Materialize(); + assert(F && "Expected valid function"); + // Check for a function that isn't materializable to prevent an infinite + // loop. When parsing a blockaddress stored in a global variable, there + // isn't a trivial way to check if a function will have a body without a + // linear search through FunctionsWithBodies, so just check it here. + if (!F->isMaterializable()) + return Error(BitcodeError::NeverResolvedFunctionFromBlockAddress); + + // Try to materialize F. + if (std::error_code EC = Materialize(F)) + return EC; } + + // Reset state. + WillMaterializeAllForwardRefs = false; + return std::error_code(); } void BitcodeReader::FreeState() { @@ -1587,6 +1607,9 @@ std::error_code BitcodeReader::ParseConstants() { if (!Fn) return Error(BitcodeError::InvalidRecord); + // Don't let Fn get dematerialized. + BlockAddressesTaken.insert(Fn); + // If the function is already parsed we can insert the block address right // away. if (!Fn->empty()) { @@ -3274,13 +3297,21 @@ std::error_code BitcodeReader::Materialize(GlobalValue *GV) { } } - return std::error_code(); + // Bring in any functions that this function forward-referenced via + // blockaddresses. + return materializeForwardReferencedFunctions(); } bool BitcodeReader::isDematerializable(const GlobalValue *GV) const { const Function *F = dyn_cast(GV); if (!F || F->isDeclaration()) return false; + + // Dematerializing F would leave dangling references that wouldn't be + // reconnected on re-materialization. + if (BlockAddressesTaken.count(F)) + return false; + return DeferredFunctionInfo.count(const_cast(F)); } @@ -3299,6 +3330,10 @@ void BitcodeReader::Dematerialize(GlobalValue *GV) { std::error_code BitcodeReader::MaterializeModule(Module *M) { assert(M == TheModule && "Can only Materialize the Module this BitcodeReader is attached to."); + + // Promise to materialize all forward references. + WillMaterializeAllForwardRefs = true; + // Iterate over the module, deserializing any functions that are still on // disk. for (Module::iterator F = TheModule->begin(), E = TheModule->end(); @@ -3314,6 +3349,11 @@ std::error_code BitcodeReader::MaterializeModule(Module *M) { if (NextUnreadBit) ParseModule(true); + // Check that all block address forward references got resolved (as we + // promised above). + if (!BlockAddrFwdRefs.empty()) + return Error(BitcodeError::NeverResolvedFunctionFromBlockAddress); + // Upgrade any intrinsic calls that slipped through (should not happen!) and // delete the old functions to clean up. We can't do this unless the entire // module is materialized because there could always be another function body @@ -3431,6 +3471,8 @@ class BitcodeErrorCategoryType : public std::error_category { return "Invalid multiple blocks"; case BitcodeError::NeverResolvedValueFoundInFunction: return "Never resolved value found in function"; + case BitcodeError::NeverResolvedFunctionFromBlockAddress: + return "Never resolved function from blockaddress"; case BitcodeError::InvalidValue: return "Invalid value"; } @@ -3455,13 +3497,18 @@ ErrorOr llvm::getLazyBitcodeModule(MemoryBuffer *Buffer, Module *M = new Module(Buffer->getBufferIdentifier(), Context); BitcodeReader *R = new BitcodeReader(Buffer, Context); M->setMaterializer(R); - if (std::error_code EC = R->ParseBitcodeInto(M)) { + + auto cleanupOnError = [&](std::error_code EC) { R->releaseBuffer(); // Never take ownership on error. delete M; // Also deletes R. return EC; - } + }; - R->materializeForwardReferencedFunctions(); + if (std::error_code EC = R->ParseBitcodeInto(M)) + return cleanupOnError(EC); + + if (std::error_code EC = R->materializeForwardReferencedFunctions()) + return cleanupOnError(EC); return M; } diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h index 40f8d134f6a..341e134302d 100644 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -193,20 +193,29 @@ class BitcodeReader : public GVMaterializer { /// not need this flag. bool UseRelativeIDs; + /// True if all functions will be materialized, negating the need to process + /// (e.g.) blockaddress forward references. + bool WillMaterializeAllForwardRefs; + + /// Functions that have block addresses taken. This is usually empty. + SmallPtrSet BlockAddressesTaken; + public: std::error_code Error(BitcodeError E) { return make_error_code(E); } explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C) : Context(C), TheModule(nullptr), Buffer(buffer), LazyStreamer(nullptr), NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), - MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {} + MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false), + WillMaterializeAllForwardRefs(false) {} explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C) : Context(C), TheModule(nullptr), Buffer(nullptr), LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), - MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {} + MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false), + WillMaterializeAllForwardRefs(false) {} ~BitcodeReader() { FreeState(); } - void materializeForwardReferencedFunctions(); + std::error_code materializeForwardReferencedFunctions(); void FreeState(); diff --git a/unittests/Bitcode/BitReaderTest.cpp b/unittests/Bitcode/BitReaderTest.cpp index 144853d4d78..8331527437b 100644 --- a/unittests/Bitcode/BitReaderTest.cpp +++ b/unittests/Bitcode/BitReaderTest.cpp @@ -70,6 +70,74 @@ TEST(BitReaderTest, MaterializeFunctionsForBlockAddr) { // PR11677 " unreachable\n" "}\n"); EXPECT_FALSE(verifyModule(*M, &dbgs())); + + // Try (and fail) to dematerialize @func. + M->getFunction("func")->Dematerialize(); + EXPECT_FALSE(M->getFunction("func")->empty()); +} + +TEST(BitReaderTest, MaterializeFunctionsForBlockAddrInFunctionBefore) { + SmallString<1024> Mem; + + LLVMContext Context; + std::unique_ptr M = getLazyModuleFromAssembly( + Context, Mem, "define i8* @before() {\n" + " ret i8* blockaddress(@func, %bb)\n" + "}\n" + "define void @other() {\n" + " unreachable\n" + "}\n" + "define void @func() {\n" + " unreachable\n" + "bb:\n" + " unreachable\n" + "}\n"); + EXPECT_TRUE(M->getFunction("before")->empty()); + EXPECT_TRUE(M->getFunction("func")->empty()); + EXPECT_FALSE(verifyModule(*M, &dbgs())); + + // Materialize @before, pulling in @func. + EXPECT_FALSE(M->getFunction("before")->Materialize()); + EXPECT_FALSE(M->getFunction("func")->empty()); + EXPECT_TRUE(M->getFunction("other")->empty()); + EXPECT_FALSE(verifyModule(*M, &dbgs())); + + // Try (and fail) to dematerialize @func. + M->getFunction("func")->Dematerialize(); + EXPECT_FALSE(M->getFunction("func")->empty()); + EXPECT_FALSE(verifyModule(*M, &dbgs())); +} + +TEST(BitReaderTest, MaterializeFunctionsForBlockAddrInFunctionAfter) { + SmallString<1024> Mem; + + LLVMContext Context; + std::unique_ptr M = getLazyModuleFromAssembly( + Context, Mem, "define void @func() {\n" + " unreachable\n" + "bb:\n" + " unreachable\n" + "}\n" + "define void @other() {\n" + " unreachable\n" + "}\n" + "define i8* @after() {\n" + " ret i8* blockaddress(@func, %bb)\n" + "}\n"); + EXPECT_TRUE(M->getFunction("after")->empty()); + EXPECT_TRUE(M->getFunction("func")->empty()); + EXPECT_FALSE(verifyModule(*M, &dbgs())); + + // Materialize @after, pulling in @func. + EXPECT_FALSE(M->getFunction("after")->Materialize()); + EXPECT_FALSE(M->getFunction("func")->empty()); + EXPECT_TRUE(M->getFunction("other")->empty()); + EXPECT_FALSE(verifyModule(*M, &dbgs())); + + // Try (and fail) to dematerialize @func. + M->getFunction("func")->Dematerialize(); + EXPECT_FALSE(M->getFunction("func")->empty()); + EXPECT_FALSE(verifyModule(*M, &dbgs())); } } // end namespace