diff --git a/tools/llvm-diff/CMakeLists.txt b/tools/llvm-diff/CMakeLists.txt new file mode 100644 index 00000000000..f6d65c947a5 --- /dev/null +++ b/tools/llvm-diff/CMakeLists.txt @@ -0,0 +1,6 @@ +set(LLVM_LINK_COMPONENTS support asmparser bitreader) + +add_llvm_tool(llvm-diff + llvm-diff.cpp + DifferenceEngine.cpp + ) diff --git a/tools/llvm-diff/DifferenceEngine.cpp b/tools/llvm-diff/DifferenceEngine.cpp new file mode 100644 index 00000000000..400ab9fcb5a --- /dev/null +++ b/tools/llvm-diff/DifferenceEngine.cpp @@ -0,0 +1,600 @@ +//===-- DifferenceEngine.cpp - Structural function/module comparison ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines the interface to the LLVM difference engine, +// which structurally compares functions within a module. +// +//===----------------------------------------------------------------------===// + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "DifferenceEngine.h" + +using namespace llvm; + +namespace { + +/// A priority queue, implemented as a heap. +template +class PriorityQueue { + Sorter Precedes; + llvm::SmallVector Storage; + +public: + PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {} + + /// Checks whether the heap is empty. + bool empty() const { return Storage.empty(); } + + /// Insert a new value on the heap. + void insert(const T &V) { + unsigned Index = Storage.size(); + Storage.push_back(V); + if (Index == 0) return; + + T *data = Storage.data(); + while (true) { + unsigned Target = (Index + 1) / 2 - 1; + if (!Precedes(data[Index], data[Target])) return; + std::swap(data[Index], data[Target]); + if (Target == 0) return; + Index = Target; + } + } + + /// Remove the minimum value in the heap. Only valid on a non-empty heap. + T remove_min() { + assert(!empty()); + T tmp = Storage[0]; + + unsigned NewSize = Storage.size() - 1; + if (NewSize) { + // Move the slot at the end to the beginning. + if (isPodLike::value) + Storage[0] = Storage[NewSize]; + else + std::swap(Storage[0], Storage[NewSize]); + + // Bubble the root up as necessary. + unsigned Index = 0; + while (true) { + // With a 1-based index, the children would be Index*2 and Index*2+1. + unsigned R = (Index + 1) * 2; + unsigned L = R - 1; + + // If R is out of bounds, we're done after this in any case. + if (R >= NewSize) { + // If L is also out of bounds, we're done immediately. + if (L >= NewSize) break; + + // Otherwise, test whether we should swap L and Index. + if (Precedes(Storage[L], Storage[Index])) + std::swap(Storage[L], Storage[Index]); + break; + } + + // Otherwise, we need to compare with the smaller of L and R. + // Prefer R because it's closer to the end of the array. + unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R); + + // If Index is >= the min of L and R, then heap ordering is restored. + if (!Precedes(Storage[IndexToTest], Storage[Index])) + break; + + // Otherwise, keep bubbling up. + std::swap(Storage[IndexToTest], Storage[Index]); + Index = IndexToTest; + } + } + Storage.pop_back(); + + return tmp; + } +}; + +/// A function-scope difference engine. +class FunctionDifferenceEngine { + DifferenceEngine &Engine; + + /// The current mapping from old local values to new local values. + DenseMap Values; + + /// The current mapping from old blocks to new blocks. + DenseMap Blocks; + + DenseSet > TentativeValuePairs; + + unsigned getUnprocPredCount(BasicBlock *Block) const { + unsigned Count = 0; + for (pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E; ++I) + if (!Blocks.count(*I)) Count++; + return Count; + } + + typedef std::pair BlockPair; + + /// A type which sorts a priority queue by the number of unprocessed + /// predecessor blocks it has remaining. + /// + /// This is actually really expensive to calculate. + struct QueueSorter { + const FunctionDifferenceEngine &fde; + explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {} + + bool operator()(const BlockPair &Old, const BlockPair &New) { + return fde.getUnprocPredCount(Old.first) + < fde.getUnprocPredCount(New.first); + } + }; + + /// A queue of unified blocks to process. + PriorityQueue Queue; + + /// Try to unify the given two blocks. Enqueues them for processing + /// if they haven't already been processed. + /// + /// Returns true if there was a problem unifying them. + bool tryUnify(BasicBlock *L, BasicBlock *R) { + BasicBlock *&Ref = Blocks[L]; + + if (Ref) { + if (Ref == R) return false; + + Engine.logf("successor %s cannot be equivalent to %s; " + "it's already equivalent to %s") + << L << R << Ref; + return true; + } + + Ref = R; + Queue.insert(BlockPair(L, R)); + return false; + } + + void processQueue() { + while (!Queue.empty()) { + BlockPair Pair = Queue.remove_min(); + diff(Pair.first, Pair.second); + } + } + + void diff(BasicBlock *L, BasicBlock *R) { + DifferenceEngine::Context C(Engine, L, R); + + BasicBlock::iterator LI = L->begin(), LE = L->end(); + BasicBlock::iterator RI = R->begin(), RE = R->end(); + + do { + assert(LI != LE && RI != RE); + Instruction *LeftI = &*LI, *RightI = &*RI; + + // If the instructions differ, start the more sophisticated diff + // algorithm here. + if (diff(LeftI, RightI, false, true)) + return runBlockDiff(LI, RI); + + // Otherwise, unify them. + if (!LeftI->use_empty()) + Values[LeftI] = RightI; + + ++LI, ++RI; + } while (LI != LE); // This is sufficient: we can't get equality of + // terminators if there are residual instructions. + } + + bool matchForBlockDiff(Instruction *L, Instruction *R); + void runBlockDiff(BasicBlock::iterator LI, BasicBlock::iterator RI); + + bool diffCallSites(CallSite L, CallSite R, bool Complain) { + // FIXME: call attributes + if (!equivalentAsOperands(L.getCalledValue(), R.getCalledValue())) { + if (Complain) Engine.log("called functions differ"); + return true; + } + if (L.arg_size() != R.arg_size()) { + if (Complain) Engine.log("argument counts differ"); + return true; + } + for (unsigned I = 0, E = L.arg_size(); I != E; ++I) + if (!equivalentAsOperands(L.getArgument(I), R.getArgument(I))) { + if (Complain) + Engine.logf("arguments %s and %s differ") + << L.getArgument(I) << R.getArgument(I); + return true; + } + return false; + } + + bool diff(Instruction *L, Instruction *R, bool Complain, bool TryUnify) { + // FIXME: metadata (if Complain is set) + + // Different opcodes always imply different operations. + if (L->getOpcode() != R->getOpcode()) { + if (Complain) Engine.log("different instruction types"); + return true; + } + + if (isa(L)) { + if (cast(L)->getPredicate() + != cast(R)->getPredicate()) { + if (Complain) Engine.log("different predicates"); + return true; + } + } else if (isa(L)) { + return diffCallSites(CallSite(L), CallSite(R), Complain); + } else if (isa(L)) { + // FIXME: implement. + + // This is really wierd; type uniquing is broken? + if (L->getType() != R->getType()) { + if (!L->getType()->isPointerTy() || !R->getType()->isPointerTy()) { + if (Complain) Engine.log("different phi types"); + return true; + } + } + return false; + + // Terminators. + } else if (isa(L)) { + InvokeInst *LI = cast(L); + InvokeInst *RI = cast(R); + if (diffCallSites(CallSite(LI), CallSite(RI), Complain)) + return true; + + if (TryUnify) { + tryUnify(LI->getNormalDest(), RI->getNormalDest()); + tryUnify(LI->getUnwindDest(), RI->getUnwindDest()); + } + return false; + + } else if (isa(L)) { + BranchInst *LI = cast(L); + BranchInst *RI = cast(R); + if (LI->isConditional() != RI->isConditional()) { + if (Complain) Engine.log("branch conditionality differs"); + return true; + } + + if (LI->isConditional()) { + if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) { + if (Complain) Engine.log("branch conditions differ"); + return true; + } + if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1)); + } + if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0)); + return false; + + } else if (isa(L)) { + SwitchInst *LI = cast(L); + SwitchInst *RI = cast(R); + if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) { + if (Complain) Engine.log("switch conditions differ"); + return true; + } + if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest()); + + bool Difference = false; + + DenseMap LCases; + for (unsigned I = 1, E = LI->getNumCases(); I != E; ++I) + LCases[LI->getCaseValue(I)] = LI->getSuccessor(I); + for (unsigned I = 1, E = RI->getNumCases(); I != E; ++I) { + ConstantInt *CaseValue = RI->getCaseValue(I); + BasicBlock *LCase = LCases[CaseValue]; + if (LCase) { + if (TryUnify) tryUnify(LCase, RI->getSuccessor(I)); + LCases.erase(CaseValue); + } else if (!Difference) { + if (Complain) + Engine.logf("right switch has extra case %s") << CaseValue; + Difference = true; + } + } + if (!Difference) + for (DenseMap::iterator + I = LCases.begin(), E = LCases.end(); I != E; ++I) { + if (Complain) + Engine.logf("left switch has extra case %s") << I->first; + Difference = true; + } + return Difference; + } else if (isa(L)) { + return false; + } + + if (L->getNumOperands() != R->getNumOperands()) { + if (Complain) Engine.log("instructions have different operand counts"); + return true; + } + + for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) { + Value *LO = L->getOperand(I), *RO = R->getOperand(I); + if (!equivalentAsOperands(LO, RO)) { + if (Complain) Engine.logf("operands %s and %s differ") << LO << RO; + return true; + } + } + + return false; + } + + bool equivalentAsOperands(Constant *L, Constant *R) { + // Use equality as a preliminary filter. + if (L == R) + return true; + + if (L->getValueID() != R->getValueID()) + return false; + + // Ask the engine about global values. + if (isa(L)) + return Engine.equivalentAsOperands(cast(L), + cast(R)); + + // Compare constant expressions structurally. + if (isa(L)) + return equivalentAsOperands(cast(L), + cast(R)); + + // Nulls of the "same type" don't always actually have the same + // type; I don't know why. Just white-list them. + if (isa(L)) + return true; + + // Block addresses only match if we've already encountered the + // block. FIXME: tentative matches? + if (isa(L)) + return Blocks[cast(L)->getBasicBlock()] + == cast(R)->getBasicBlock(); + + return false; + } + + bool equivalentAsOperands(ConstantExpr *L, ConstantExpr *R) { + if (L == R) + return true; + if (L->getOpcode() != R->getOpcode()) + return false; + + switch (L->getOpcode()) { + case Instruction::ICmp: + case Instruction::FCmp: + if (L->getPredicate() != R->getPredicate()) + return false; + break; + + case Instruction::GetElementPtr: + // FIXME: inbounds? + break; + + default: + break; + } + + if (L->getNumOperands() != R->getNumOperands()) + return false; + + for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) + if (!equivalentAsOperands(L->getOperand(I), R->getOperand(I))) + return false; + + return true; + } + + bool equivalentAsOperands(Value *L, Value *R) { + // Fall out if the values have different kind. + // This possibly shouldn't take priority over oracles. + if (L->getValueID() != R->getValueID()) + return false; + + // Value subtypes: Argument, Constant, Instruction, BasicBlock, + // InlineAsm, MDNode, MDString, PseudoSourceValue + + if (isa(L)) + return equivalentAsOperands(cast(L), cast(R)); + + if (isa(L)) + return Values[L] == R || TentativeValuePairs.count(std::make_pair(L, R)); + + if (isa(L)) + return Values[L] == R; + + if (isa(L)) + return Blocks[cast(L)] != R; + + // Pretend everything else is identical. + return true; + } + + // Avoid a gcc warning about accessing 'this' in an initializer. + FunctionDifferenceEngine *this_() { return this; } + +public: + FunctionDifferenceEngine(DifferenceEngine &Engine) : + Engine(Engine), Queue(QueueSorter(*this_())) {} + + void diff(Function *L, Function *R) { + if (L->arg_size() != R->arg_size()) + Engine.log("different argument counts"); + + // Map the arguments. + for (Function::arg_iterator + LI = L->arg_begin(), LE = L->arg_end(), + RI = R->arg_begin(), RE = R->arg_end(); + LI != LE && RI != RE; ++LI, ++RI) + Values[&*LI] = &*RI; + + tryUnify(&*L->begin(), &*R->begin()); + processQueue(); + } +}; + +struct DiffEntry { + DiffEntry() : Cost(0) {} + + unsigned Cost; + llvm::SmallVector Path; // actually of DifferenceEngine::DiffChange +}; + +bool FunctionDifferenceEngine::matchForBlockDiff(Instruction *L, + Instruction *R) { + return !diff(L, R, false, false); +} + +void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart, + BasicBlock::iterator RStart) { + BasicBlock::iterator LE = LStart->getParent()->end(); + BasicBlock::iterator RE = RStart->getParent()->end(); + + unsigned NL = std::distance(LStart, LE); + + SmallVector Paths1(NL+1); + SmallVector Paths2(NL+1); + + DiffEntry *Cur = Paths1.data(); + DiffEntry *Next = Paths2.data(); + + assert(TentativeValuePairs.empty()); + + // Initialize the first column. + for (unsigned I = 0; I != NL+1; ++I) { + Cur[I].Cost = I; + for (unsigned J = 0; J != I; ++J) + Cur[I].Path.push_back(DifferenceEngine::DC_left); + } + + for (BasicBlock::iterator RI = RStart; RI != RE; ++RI) { + // Initialize the first row. + Next[0] = Cur[0]; + Next[0].Path.push_back(DifferenceEngine::DC_right); + + unsigned Index = 1; + for (BasicBlock::iterator LI = LStart; LI != LE; ++LI, ++Index) { + if (matchForBlockDiff(&*LI, &*RI)) { + Next[Index] = Cur[Index-1]; + Next[Index].Path.push_back(DifferenceEngine::DC_match); + TentativeValuePairs.insert(std::make_pair(&*LI, &*RI)); + } else if (Next[Index-1].Cost <= Cur[Index].Cost) { + Next[Index] = Next[Index-1]; + Next[Index].Path.push_back(DifferenceEngine::DC_left); + } else { + Next[Index] = Cur[Index]; + Next[Index].Path.push_back(DifferenceEngine::DC_right); + } + } + + std::swap(Cur, Next); + } + + SmallVectorImpl &Path = Cur[NL].Path; + BasicBlock::iterator LI = LStart, RI = RStart; + + DifferenceEngine::DiffLogBuilder Diff(Engine); + + // Drop trailing matches. + while (Path.back() == DifferenceEngine::DC_match) + Path.pop_back(); + + for (SmallVectorImpl::iterator + PI = Path.begin(), PE = Path.end(); PI != PE; ++PI) { + switch (static_cast(*PI)) { + case DifferenceEngine::DC_match: + assert(LI != LE && RI != RE); + { + Instruction *L = &*LI, *R = &*RI; + DifferenceEngine::Context C(Engine, L, R); + diff(L, R, false, true); + Diff.addMatch(L, R); + } + ++LI; ++RI; + break; + + case DifferenceEngine::DC_left: + assert(LI != LE); + Diff.addLeft(&*LI); + ++LI; + break; + + case DifferenceEngine::DC_right: + assert(RI != RE); + Diff.addRight(&*RI); + ++RI; + break; + } + } + + TentativeValuePairs.clear(); +} + +} + +void DifferenceEngine::diff(Function *L, Function *R) { + Context C(*this, L, R); + + // FIXME: types + // FIXME: attributes and CC + // FIXME: parameter attributes + + // If both are declarations, we're done. + if (L->empty() && R->empty()) + return; + else if (L->empty()) + log("left function is declaration, right function is definition"); + else if (R->empty()) + log("right function is declaration, left function is definition"); + else + FunctionDifferenceEngine(*this).diff(L, R); +} + +void DifferenceEngine::diff(Module *L, Module *R) { + StringSet<> LNames; + SmallVector, 20> Queue; + + for (Module::iterator I = L->begin(), E = L->end(); I != E; ++I) { + Function *LFn = &*I; + LNames.insert(LFn->getName()); + + if (Function *RFn = R->getFunction(LFn->getName())) + Queue.push_back(std::make_pair(LFn, RFn)); + else + logf("function %s exists only in left module") << LFn; + } + + for (Module::iterator I = R->begin(), E = R->end(); I != E; ++I) { + Function *RFn = &*I; + if (!LNames.count(RFn->getName())) + logf("function %s exists only in right module") << RFn; + } + + for (SmallVectorImpl >::iterator + I = Queue.begin(), E = Queue.end(); I != E; ++I) + diff(I->first, I->second); +} + +bool DifferenceEngine::equivalentAsOperands(GlobalValue *L, GlobalValue *R) { + if (globalValueOracle) return (*globalValueOracle)(L, R); + return L->getName() == R->getName(); +} diff --git a/tools/llvm-diff/DifferenceEngine.h b/tools/llvm-diff/DifferenceEngine.h new file mode 100644 index 00000000000..9d0ebecc53d --- /dev/null +++ b/tools/llvm-diff/DifferenceEngine.h @@ -0,0 +1,171 @@ +//===-- DifferenceEngine.h - Module comparator ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header defines the interface to the LLVM difference engine, +// which structurally compares functions within a module. +// +//===----------------------------------------------------------------------===// + +#ifndef _LLVM_DIFFERENCE_ENGINE_H_ +#define _LLVM_DIFFERENCE_ENGINE_H_ + +#include +#include +#include + +namespace llvm { + class LLVMContext; + class Module; + class Function; + class Twine; + class Value; + class GlobalValue; + + /// A class for performing structural comparisons of LLVM assembly. + class DifferenceEngine { + public: + /// A temporary-object class for building up log messages. + class LogBuilder { + DifferenceEngine &Engine; + + /// The use of a stored StringRef here is okay because + /// LogBuilder should be used only as a temporary, and as a + /// temporary it will be destructed before whatever temporary + /// might be initializing this format. + StringRef Format; + + SmallVector Arguments; + + public: + LogBuilder(DifferenceEngine &Engine, StringRef Format) + : Engine(Engine), Format(Format) {} + + LogBuilder &operator<<(Value *V) { + Arguments.push_back(V); + return *this; + } + + ~LogBuilder() { + Engine.consumer.logf(*this); + } + + StringRef getFormat() const { return Format; } + + unsigned getNumArguments() const { return Arguments.size(); } + Value *getArgument(unsigned I) const { return Arguments[I]; } + }; + + enum DiffChange { DC_match, DC_left, DC_right }; + + /// A temporary-object class for building up diff messages. + class DiffLogBuilder { + typedef std::pair DiffRecord; + SmallVector Diff; + + DifferenceEngine &Engine; + + public: + DiffLogBuilder(DifferenceEngine &Engine) : Engine(Engine) {} + ~DiffLogBuilder() { Engine.consumer.logd(*this); } + + void addMatch(Instruction *L, Instruction *R) { + Diff.push_back(DiffRecord(L, R)); + } + void addLeft(Instruction *L) { Diff.push_back(DiffRecord(L, 0)); } + void addRight(Instruction *R) { Diff.push_back(DiffRecord(0, R)); } + + unsigned getNumLines() const { return Diff.size(); } + DiffChange getLineKind(unsigned I) const { + return (Diff[I].first ? (Diff[I].second ? DC_match : DC_left) + : DC_right); + } + Instruction *getLeft(unsigned I) const { return Diff[I].first; } + Instruction *getRight(unsigned I) const { return Diff[I].second; } + }; + + /// The interface for consumers of difference data. + struct Consumer { + /// Record that a local context has been entered. Left and + /// Right are IR "containers" of some sort which are being + /// considered for structural equivalence: global variables, + /// functions, blocks, instructions, etc. + virtual void enterContext(Value *Left, Value *Right) = 0; + + /// Record that a local context has been exited. + virtual void exitContext() = 0; + + /// Record a difference within the current context. + virtual void log(StringRef Text) = 0; + + /// Record a formatted difference within the current context. + virtual void logf(const LogBuilder &Log) = 0; + + /// Record a line-by-line instruction diff. + virtual void logd(const DiffLogBuilder &Log) = 0; + + protected: + ~Consumer() {} + }; + + /// A RAII object for recording the current context. + struct Context { + Context(DifferenceEngine &Engine, Value *L, Value *R) : Engine(Engine) { + Engine.consumer.enterContext(L, R); + } + + ~Context() { + Engine.consumer.exitContext(); + } + + private: + DifferenceEngine &Engine; + }; + + /// An oracle for answering whether two values are equivalent as + /// operands. + struct Oracle { + virtual bool operator()(Value *L, Value *R) = 0; + + protected: + ~Oracle() {} + }; + + DifferenceEngine(LLVMContext &context, Consumer &consumer) + : context(context), consumer(consumer), globalValueOracle(0) {} + + void diff(Module *L, Module *R); + void diff(Function *L, Function *R); + + void log(StringRef text) { + consumer.log(text); + } + + LogBuilder logf(StringRef text) { + return LogBuilder(*this, text); + } + + /// Installs an oracle to decide whether two global values are + /// equivalent as operands. Without an oracle, global values are + /// considered equivalent as operands precisely when they have the + /// same name. + void setGlobalValueOracle(Oracle *oracle) { + globalValueOracle = oracle; + } + + /// Determines whether two global values are equivalent. + bool equivalentAsOperands(GlobalValue *L, GlobalValue *R); + + private: + LLVMContext &context; + Consumer &consumer; + Oracle *globalValueOracle; + }; +} + +#endif diff --git a/tools/llvm-diff/Makefile b/tools/llvm-diff/Makefile new file mode 100644 index 00000000000..58e49fa9596 --- /dev/null +++ b/tools/llvm-diff/Makefile @@ -0,0 +1,17 @@ +##===- tools/llvm-diff/Makefile ----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +TOOLNAME = llvm-diff +LINK_COMPONENTS := asmparser bitreader + +# This tool has no plugins, optimize startup time. +TOOL_NO_EXPORTS = 1 + +include $(LEVEL)/Makefile.common diff --git a/tools/llvm-diff/llvm-diff.cpp b/tools/llvm-diff/llvm-diff.cpp new file mode 100644 index 00000000000..7c2a5d66c3b --- /dev/null +++ b/tools/llvm-diff/llvm-diff.cpp @@ -0,0 +1,301 @@ +#include +#include + +#include +#include +#include + +// Required to parse .ll files. +#include +#include + +// Required to parse .bc files. +#include +#include + +#include +#include +#include +#include +#include + +#include "DifferenceEngine.h" + +using namespace llvm; + +/// Reads a module from a file. If the filename ends in .ll, it is +/// interpreted as an assembly file; otherwise, it is interpreted as +/// bitcode. On error, messages are written to stderr and null is +/// returned. +static Module *ReadModule(LLVMContext &Context, StringRef Name) { + // LLVM assembly path. + if (Name.endswith(".ll")) { + SMDiagnostic Diag; + Module *M = ParseAssemblyFile(Name, Diag, Context); + if (M) return M; + + Diag.Print("llvmdiff", errs()); + return 0; + } + + // Bitcode path. + MemoryBuffer *Buffer = MemoryBuffer::getFile(Name); + + // ParseBitcodeFile takes ownership of the buffer if it succeeds. + std::string Error; + Module *M = ParseBitcodeFile(Buffer, Context, &Error); + if (M) return M; + + errs() << "error parsing " << Name << ": " << Error; + delete Buffer; + return 0; +} + +static int usage() { + errs() << "expected usage:\n"; + errs() << " llvm-diff oldmodule.ll newmodule.ll [function list]\n"; + errs() << "Assembly or bitcode modules may be used interchangeably.\n"; + errs() << "If no functions are provided, all functions will be compared.\n"; + return 1; +} + +namespace { +struct DiffContext { + DiffContext(Value *L, Value *R) + : L(L), R(R), Differences(false), IsFunction(isa(L)) {} + Value *L; + Value *R; + bool Differences; + bool IsFunction; + DenseMap LNumbering; + DenseMap RNumbering; +}; + +void ComputeNumbering(Function *F, DenseMap &Numbering) { + unsigned BBN = 0; + unsigned IN = 0; + + // Arguments get the first numbers. + for (Function::arg_iterator + AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI) + if (!AI->hasName()) + Numbering[&*AI] = IN++; + + // Walk the basic blocks in order. + for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) { + // Basic blocks have their own 'namespace'. + if (!FI->hasName()) + Numbering[&*FI] = BBN++; + + // Walk the instructions in order. + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) + // void instructions don't get numbers. + if (!BI->hasName() && !BI->getType()->isVoidTy()) + Numbering[&*BI] = IN++; + } + + assert(!Numbering.empty() && "asked for numbering but numbering was no-op"); +} + +class DiffConsumer : public DifferenceEngine::Consumer { +private: + Module *LModule; + Module *RModule; + SmallVector contexts; + bool Differences; + unsigned Indent; + + void printValue(Value *V, bool isL) { + if (V->hasName()) { + errs() << (isa(V) ? '@' : '%') << V->getName(); + return; + } + if (V->getType()->isVoidTy()) { + if (isa(V)) { + errs() << "store to "; + printValue(cast(V)->getPointerOperand(), isL); + } else if (isa(V)) { + errs() << "call to "; + printValue(cast(V)->getCalledValue(), isL); + } else if (isa(V)) { + errs() << "invoke to "; + printValue(cast(V)->getCalledValue(), isL); + } else { + errs() << *V; + } + return; + } + + unsigned N = contexts.size(); + while (N > 0) { + --N; + DiffContext &ctxt = contexts[N]; + if (!ctxt.IsFunction) continue; + if (isL) { + if (ctxt.LNumbering.empty()) + ComputeNumbering(cast(ctxt.L), ctxt.LNumbering); + errs() << '%' << ctxt.LNumbering[V]; + return; + } else { + if (ctxt.RNumbering.empty()) + ComputeNumbering(cast(ctxt.R), ctxt.RNumbering); + errs() << '%' << ctxt.RNumbering[V]; + return; + } + } + + errs() << ""; + } + + void header() { + if (contexts.empty()) return; + for (SmallVectorImpl::iterator + I = contexts.begin(), E = contexts.end(); I != E; ++I) { + if (I->Differences) continue; + if (isa(I->L)) { + // Extra newline between functions. + if (Differences) errs() << "\n"; + + Function *L = cast(I->L); + Function *R = cast(I->R); + if (L->getName() != R->getName()) + errs() << "in function " << L->getName() << " / " << R->getName() << ":\n"; + else + errs() << "in function " << L->getName() << ":\n"; + } else if (isa(I->L)) { + BasicBlock *L = cast(I->L); + BasicBlock *R = cast(I->R); + errs() << " in block "; + printValue(L, true); + errs() << " / "; + printValue(R, false); + errs() << ":\n"; + } else if (isa(I->L)) { + errs() << " in instruction "; + printValue(I->L, true); + errs() << " / "; + printValue(I->R, false); + errs() << ":\n"; + } + + I->Differences = true; + } + } + + void indent() { + unsigned N = Indent; + while (N--) errs() << ' '; + } + +public: + DiffConsumer(Module *L, Module *R) + : LModule(L), RModule(R), Differences(false), Indent(0) {} + + bool hadDifferences() const { return Differences; } + + void enterContext(Value *L, Value *R) { + contexts.push_back(DiffContext(L, R)); + Indent += 2; + } + void exitContext() { + Differences |= contexts.back().Differences; + contexts.pop_back(); + Indent -= 2; + } + + void log(StringRef text) { + header(); + indent(); + errs() << text << "\n"; + } + + void logf(const DifferenceEngine::LogBuilder &Log) { + header(); + indent(); + + // FIXME: we don't know whether these are l-values or r-values (ha!) + // Print them in some saner way! + errs() << Log.getFormat() << "\n"; + for (unsigned I = 0, E = Log.getNumArguments(); I != E; ++I) + Log.getArgument(I)->dump(); + } + + void logd(const DifferenceEngine::DiffLogBuilder &Log) { + header(); + + for (unsigned I = 0, E = Log.getNumLines(); I != E; ++I) { + indent(); + switch (Log.getLineKind(I)) { + case DifferenceEngine::DC_match: + errs() << " "; + Log.getLeft(I)->dump(); + //printValue(Log.getLeft(I), true); + break; + case DifferenceEngine::DC_left: + errs() << "< "; + Log.getLeft(I)->dump(); + //printValue(Log.getLeft(I), true); + break; + case DifferenceEngine::DC_right: + errs() << "> "; + Log.getRight(I)->dump(); + //printValue(Log.getRight(I), false); + break; + } + //errs() << "\n"; + } + } + +}; +} + +int main(int argc, const char **argv) { + if (argc < 3) return usage(); + + // Don't make StringRef locals like this at home. + StringRef LModuleFile = argv[1]; + StringRef RModuleFile = argv[2]; + + LLVMContext Context; + + // Load both modules. Die if that fails. + Module *LModule = ReadModule(Context, LModuleFile); + Module *RModule = ReadModule(Context, RModuleFile); + if (!LModule || !RModule) return 1; + + DiffConsumer Consumer(LModule, RModule); + DifferenceEngine Engine(Context, Consumer); + + // If any function names were given, just diff those. + const char **FnNames = argv + 3; + unsigned NumFnNames = argc - 3; + if (NumFnNames) { + for (unsigned I = 0; I != NumFnNames; ++I) { + StringRef FnName = FnNames[I]; + + // Drop leading sigils from the function name. + if (FnName.startswith("@")) FnName = FnName.substr(1); + + Function *LFn = LModule->getFunction(FnName); + Function *RFn = RModule->getFunction(FnName); + if (LFn && RFn) + Engine.diff(LFn, RFn); + else { + if (!LFn && !RFn) + errs() << "No function named @" << FnName << " in either module\n"; + else if (!LFn) + errs() << "No function named @" << FnName << " in left module\n"; + else + errs() << "No function named @" << FnName << " in right module\n"; + } + } + } else { + // Otherwise, diff all functions in the modules. + Engine.diff(LModule, RModule); + } + + delete LModule; + delete RModule; + + return Consumer.hadDifferences(); +}