2008-11-02 05:52:50 +00:00
//===- MergeFunctions.cpp - Merge identical functions ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass looks for equivalent functions that are mergable and folds them.
//
// A hash is computed from the function, based on its type and number of
// basic blocks.
//
// Once all hashes are computed, we perform an expensive equality comparison
// on each function pair. This takes n^2/2 comparisons per bucket, so it's
// important that the hash function be high quality. The equality comparison
// iterates through each instruction in each basic block.
//
2010-05-13 05:48:45 +00:00
// When a match is found the functions are folded. If both functions are
// overridable, we move the functionality into a new internal function and
// leave two overridable thunks to it.
2008-11-02 05:52:50 +00:00
//
//===----------------------------------------------------------------------===//
//
// Future work:
//
// * virtual functions.
//
// Many functions have their address taken by the virtual function table for
// the object they belong to. However, as long as it's only used for a lookup
2010-08-08 05:04:23 +00:00
// and call, this is irrelevant, and we'd like to fold such functions.
2008-11-02 05:52:50 +00:00
//
2010-08-02 05:23:03 +00:00
// * switch from n^2 pair-wise comparisons to an n-way comparison for each
// bucket.
2010-05-13 05:48:45 +00:00
//
2010-08-08 05:04:23 +00:00
// * be smarter about bitcasts.
2010-05-13 05:48:45 +00:00
//
// In order to fold functions, we will sometimes add either bitcast instructions
// or bitcast constant expressions. Unfortunately, this can confound further
// analysis since the two functions differ where one has a bitcast and the
2010-08-08 05:04:23 +00:00
// other doesn't. We should learn to look through bitcasts.
2010-05-13 05:48:45 +00:00
//
2008-11-02 05:52:50 +00:00
//===----------------------------------------------------------------------===//
# define DEBUG_TYPE "mergefunc"
# include "llvm/Transforms/IPO.h"
2010-08-31 05:53:05 +00:00
# include "llvm/ADT/DenseSet.h"
2009-06-12 08:04:51 +00:00
# include "llvm/ADT/FoldingSet.h"
2010-05-13 05:48:45 +00:00
# include "llvm/ADT/SmallSet.h"
2008-11-02 05:52:50 +00:00
# include "llvm/ADT/Statistic.h"
2010-08-31 05:53:05 +00:00
# include "llvm/ADT/STLExtras.h"
2008-11-02 05:52:50 +00:00
# include "llvm/Constants.h"
# include "llvm/InlineAsm.h"
# include "llvm/Instructions.h"
2009-07-06 01:34:54 +00:00
# include "llvm/LLVMContext.h"
2008-11-02 05:52:50 +00:00
# include "llvm/Module.h"
# include "llvm/Pass.h"
2008-11-02 16:46:26 +00:00
# include "llvm/Support/CallSite.h"
2008-11-02 05:52:50 +00:00
# include "llvm/Support/Debug.h"
2009-07-11 20:10:48 +00:00
# include "llvm/Support/ErrorHandling.h"
2010-08-08 05:04:23 +00:00
# include "llvm/Support/IRBuilder.h"
2010-08-31 05:53:05 +00:00
# include "llvm/Support/ValueHandle.h"
2009-07-25 00:23:56 +00:00
# include "llvm/Support/raw_ostream.h"
2010-05-13 05:48:45 +00:00
# include "llvm/Target/TargetData.h"
2010-08-31 08:29:37 +00:00
# include <vector>
2008-11-02 05:52:50 +00:00
using namespace llvm ;
STATISTIC ( NumFunctionsMerged , " Number of functions merged " ) ;
2010-09-07 01:42:10 +00:00
STATISTIC ( NumThunksWritten , " Number of thunks generated " ) ;
STATISTIC ( NumDoubleWeak , " Number of new functions created " ) ;
2008-11-02 05:52:50 +00:00
2010-09-07 01:42:10 +00:00
/// ProfileFunction - Creates a hash-code for the function which is the same
/// for any two functions that will compare equal, without looking at the
/// instructions inside the function.
2010-09-05 08:22:49 +00:00
static unsigned ProfileFunction ( const Function * F ) {
const FunctionType * FTy = F - > getFunctionType ( ) ;
FoldingSetNodeID ID ;
ID . AddInteger ( F - > size ( ) ) ;
ID . AddInteger ( F - > getCallingConv ( ) ) ;
ID . AddBoolean ( F - > hasGC ( ) ) ;
ID . AddBoolean ( FTy - > isVarArg ( ) ) ;
ID . AddInteger ( FTy - > getReturnType ( ) - > getTypeID ( ) ) ;
for ( unsigned i = 0 , e = FTy - > getNumParams ( ) ; i ! = e ; + + i )
ID . AddInteger ( FTy - > getParamType ( i ) - > getTypeID ( ) ) ;
return ID . ComputeHash ( ) ;
2008-11-02 05:52:50 +00:00
}
2010-09-07 01:42:10 +00:00
namespace {
2010-09-05 08:22:49 +00:00
class ComparableFunction {
public :
2010-09-05 09:00:32 +00:00
static const ComparableFunction EmptyKey ;
static const ComparableFunction TombstoneKey ;
2010-09-05 08:22:49 +00:00
ComparableFunction ( Function * Func , TargetData * TD )
: Func ( Func ) , Hash ( ProfileFunction ( Func ) ) , TD ( TD ) { }
2010-09-05 09:00:32 +00:00
Function * getFunc ( ) const { return Func ; }
unsigned getHash ( ) const { return Hash ; }
TargetData * getTD ( ) const { return TD ; }
2010-09-05 08:22:49 +00:00
2010-09-05 09:00:32 +00:00
// Drops AssertingVH reference to the function. Outside of debug mode, this
// does nothing.
void release ( ) {
assert ( Func & &
" Attempted to release function twice, or release empty/tombstone! " ) ;
Func = NULL ;
2010-09-05 08:22:49 +00:00
}
2010-09-05 09:00:32 +00:00
private :
explicit ComparableFunction ( unsigned Hash )
: Func ( NULL ) , Hash ( Hash ) , TD ( NULL ) { }
AssertingVH < Function > Func ;
unsigned Hash ;
TargetData * TD ;
2010-09-05 08:22:49 +00:00
} ;
2010-09-05 09:00:32 +00:00
const ComparableFunction ComparableFunction : : EmptyKey = ComparableFunction ( 0 ) ;
const ComparableFunction ComparableFunction : : TombstoneKey =
ComparableFunction ( 1 ) ;
2010-09-07 01:42:10 +00:00
}
2010-09-05 09:00:32 +00:00
namespace llvm {
template < >
struct DenseMapInfo < ComparableFunction > {
static ComparableFunction getEmptyKey ( ) {
return ComparableFunction : : EmptyKey ;
}
static ComparableFunction getTombstoneKey ( ) {
return ComparableFunction : : TombstoneKey ;
}
static unsigned getHashValue ( const ComparableFunction & CF ) {
return CF . getHash ( ) ;
}
static bool isEqual ( const ComparableFunction & LHS ,
const ComparableFunction & RHS ) ;
} ;
}
namespace {
2010-09-05 08:22:49 +00:00
/// MergeFunctions finds functions which will generate identical machine code,
/// by considering all pointer types to be equivalent. Once identified,
/// MergeFunctions will fold them by replacing a call to one to a call to a
/// bitcast of the other.
///
class MergeFunctions : public ModulePass {
public :
static char ID ;
2010-10-19 17:21:58 +00:00
MergeFunctions ( ) : ModulePass ( ID ) {
initializeMergeFunctionsPass ( * PassRegistry : : getPassRegistry ( ) ) ;
}
2010-09-05 08:22:49 +00:00
bool runOnModule ( Module & M ) ;
private :
2010-09-05 09:00:32 +00:00
typedef DenseSet < ComparableFunction > FnSetType ;
2010-09-05 08:22:49 +00:00
/// Insert a ComparableFunction into the FnSet, or merge it away if it's
/// equal to one that's already present.
2010-09-05 09:00:32 +00:00
bool Insert ( FnSetType & FnSet , ComparableFunction & NewF ) ;
2010-09-05 08:22:49 +00:00
/// MergeTwoFunctions - Merge two equivalent functions. Upon completion, G
/// may be deleted, or may be converted into a thunk. In either case, it
/// should never be visited again.
void MergeTwoFunctions ( Function * F , Function * G ) const ;
/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also
/// replace direct uses of G with bitcast(F). Deletes G.
void WriteThunk ( Function * F , Function * G ) const ;
TargetData * TD ;
} ;
} // end anonymous namespace
2008-11-02 05:52:50 +00:00
char MergeFunctions : : ID = 0 ;
2010-10-07 22:25:06 +00:00
INITIALIZE_PASS ( MergeFunctions , " mergefunc " , " Merge Functions " , false , false )
2008-11-02 05:52:50 +00:00
ModulePass * llvm : : createMergeFunctionsPass ( ) {
return new MergeFunctions ( ) ;
}
2010-08-02 05:23:03 +00:00
namespace {
2010-08-08 05:04:23 +00:00
/// FunctionComparator - Compares two functions to determine whether or not
/// they will generate machine code with the same behaviour. TargetData is
/// used if available. The comparator always fails conservatively (erring on the
/// side of claiming that two functions are different).
2010-08-02 05:23:03 +00:00
class FunctionComparator {
public :
2010-08-31 05:53:05 +00:00
FunctionComparator ( const TargetData * TD , const Function * F1 ,
const Function * F2 )
2010-08-06 07:21:30 +00:00
: F1 ( F1 ) , F2 ( F2 ) , TD ( TD ) , IDMap1Count ( 0 ) , IDMap2Count ( 0 ) { }
2010-08-02 05:23:03 +00:00
2010-08-08 05:04:23 +00:00
/// Compare - test whether the two functions have equivalent behaviour.
2010-08-02 05:23:03 +00:00
bool Compare ( ) ;
private :
2010-08-08 05:04:23 +00:00
/// Compare - test whether two basic blocks have equivalent behaviour.
2010-08-02 05:23:03 +00:00
bool Compare ( const BasicBlock * BB1 , const BasicBlock * BB2 ) ;
2010-08-08 05:04:23 +00:00
/// Enumerate - Assign or look up previously assigned numbers for the two
/// values, and return whether the numbers are equal. Numbers are assigned in
/// the order visited.
2010-08-02 05:23:03 +00:00
bool Enumerate ( const Value * V1 , const Value * V2 ) ;
2010-08-08 05:04:23 +00:00
/// isEquivalentOperation - Compare two Instructions for equivalence, similar
/// to Instruction::isSameOperationAs but with modifications to the type
/// comparison.
2010-08-02 05:23:03 +00:00
bool isEquivalentOperation ( const Instruction * I1 ,
const Instruction * I2 ) const ;
2010-08-08 05:04:23 +00:00
/// isEquivalentGEP - Compare two GEPs for equivalent pointer arithmetic.
2010-08-02 05:23:03 +00:00
bool isEquivalentGEP ( const GEPOperator * GEP1 , const GEPOperator * GEP2 ) ;
bool isEquivalentGEP ( const GetElementPtrInst * GEP1 ,
2010-08-08 05:04:23 +00:00
const GetElementPtrInst * GEP2 ) {
2010-08-02 05:23:03 +00:00
return isEquivalentGEP ( cast < GEPOperator > ( GEP1 ) , cast < GEPOperator > ( GEP2 ) ) ;
}
2009-06-12 08:04:51 +00:00
2010-08-08 05:04:23 +00:00
/// isEquivalentType - Compare two Types, treating all pointer types as equal.
2010-08-02 05:23:03 +00:00
bool isEquivalentType ( const Type * Ty1 , const Type * Ty2 ) const ;
// The two functions undergoing comparison.
2010-08-31 05:53:05 +00:00
const Function * F1 , * F2 ;
2010-08-02 05:23:03 +00:00
2010-08-31 05:53:05 +00:00
const TargetData * TD ;
2010-08-02 05:23:03 +00:00
typedef DenseMap < const Value * , unsigned long > IDMap ;
2010-08-06 07:21:30 +00:00
IDMap Map1 , Map2 ;
unsigned long IDMap1Count , IDMap2Count ;
2010-08-02 05:23:03 +00:00
} ;
}
2010-08-06 07:21:30 +00:00
/// isEquivalentType - any two pointers in the same address space are
/// equivalent. Otherwise, standard type equivalence rules apply.
2010-08-02 05:23:03 +00:00
bool FunctionComparator : : isEquivalentType ( const Type * Ty1 ,
const Type * Ty2 ) const {
2009-06-12 08:04:51 +00:00
if ( Ty1 = = Ty2 )
return true ;
if ( Ty1 - > getTypeID ( ) ! = Ty2 - > getTypeID ( ) )
return false ;
switch ( Ty1 - > getTypeID ( ) ) {
2010-05-13 05:48:45 +00:00
default :
llvm_unreachable ( " Unknown type! " ) ;
2010-07-07 07:48:00 +00:00
// Fall through in Release mode.
2010-05-13 05:48:45 +00:00
case Type : : IntegerTyID :
case Type : : OpaqueTyID :
// Ty1 == Ty2 would have returned true earlier.
return false ;
2009-06-12 08:04:51 +00:00
case Type : : VoidTyID :
case Type : : FloatTyID :
case Type : : DoubleTyID :
case Type : : X86_FP80TyID :
case Type : : FP128TyID :
case Type : : PPC_FP128TyID :
case Type : : LabelTyID :
case Type : : MetadataTyID :
return true ;
case Type : : PointerTyID : {
const PointerType * PTy1 = cast < PointerType > ( Ty1 ) ;
const PointerType * PTy2 = cast < PointerType > ( Ty2 ) ;
return PTy1 - > getAddressSpace ( ) = = PTy2 - > getAddressSpace ( ) ;
}
case Type : : StructTyID : {
const StructType * STy1 = cast < StructType > ( Ty1 ) ;
const StructType * STy2 = cast < StructType > ( Ty2 ) ;
if ( STy1 - > getNumElements ( ) ! = STy2 - > getNumElements ( ) )
return false ;
if ( STy1 - > isPacked ( ) ! = STy2 - > isPacked ( ) )
return false ;
for ( unsigned i = 0 , e = STy1 - > getNumElements ( ) ; i ! = e ; + + i ) {
if ( ! isEquivalentType ( STy1 - > getElementType ( i ) , STy2 - > getElementType ( i ) ) )
return false ;
}
return true ;
}
case Type : : FunctionTyID : {
const FunctionType * FTy1 = cast < FunctionType > ( Ty1 ) ;
const FunctionType * FTy2 = cast < FunctionType > ( Ty2 ) ;
if ( FTy1 - > getNumParams ( ) ! = FTy2 - > getNumParams ( ) | |
FTy1 - > isVarArg ( ) ! = FTy2 - > isVarArg ( ) )
return false ;
if ( ! isEquivalentType ( FTy1 - > getReturnType ( ) , FTy2 - > getReturnType ( ) ) )
return false ;
for ( unsigned i = 0 , e = FTy1 - > getNumParams ( ) ; i ! = e ; + + i ) {
if ( ! isEquivalentType ( FTy1 - > getParamType ( i ) , FTy2 - > getParamType ( i ) ) )
return false ;
}
return true ;
}
2010-07-16 06:31:12 +00:00
case Type : : ArrayTyID : {
const ArrayType * ATy1 = cast < ArrayType > ( Ty1 ) ;
const ArrayType * ATy2 = cast < ArrayType > ( Ty2 ) ;
return ATy1 - > getNumElements ( ) = = ATy2 - > getNumElements ( ) & &
isEquivalentType ( ATy1 - > getElementType ( ) , ATy2 - > getElementType ( ) ) ;
}
2010-08-06 07:21:30 +00:00
2009-06-12 08:04:51 +00:00
case Type : : VectorTyID : {
2010-07-16 06:31:12 +00:00
const VectorType * VTy1 = cast < VectorType > ( Ty1 ) ;
const VectorType * VTy2 = cast < VectorType > ( Ty2 ) ;
return VTy1 - > getNumElements ( ) = = VTy2 - > getNumElements ( ) & &
isEquivalentType ( VTy1 - > getElementType ( ) , VTy2 - > getElementType ( ) ) ;
2009-06-12 08:04:51 +00:00
}
}
}
/// isEquivalentOperation - determine whether the two operations are the same
/// except that pointer-to-A and pointer-to-B are equivalent. This should be
2009-06-12 19:03:05 +00:00
/// kept in sync with Instruction::isSameOperationAs.
2010-08-02 05:23:03 +00:00
bool FunctionComparator : : isEquivalentOperation ( const Instruction * I1 ,
const Instruction * I2 ) const {
2009-06-12 08:04:51 +00:00
if ( I1 - > getOpcode ( ) ! = I2 - > getOpcode ( ) | |
I1 - > getNumOperands ( ) ! = I2 - > getNumOperands ( ) | |
2009-08-25 22:11:20 +00:00
! isEquivalentType ( I1 - > getType ( ) , I2 - > getType ( ) ) | |
! I1 - > hasSameSubclassOptionalData ( I2 ) )
2009-06-12 08:04:51 +00:00
return false ;
// We have two instructions of identical opcode and #operands. Check to see
// if all operands are the same type
for ( unsigned i = 0 , e = I1 - > getNumOperands ( ) ; i ! = e ; + + i )
if ( ! isEquivalentType ( I1 - > getOperand ( i ) - > getType ( ) ,
I2 - > getOperand ( i ) - > getType ( ) ) )
return false ;
// Check special state that is a part of some instructions.
if ( const LoadInst * LI = dyn_cast < LoadInst > ( I1 ) )
return LI - > isVolatile ( ) = = cast < LoadInst > ( I2 ) - > isVolatile ( ) & &
LI - > getAlignment ( ) = = cast < LoadInst > ( I2 ) - > getAlignment ( ) ;
if ( const StoreInst * SI = dyn_cast < StoreInst > ( I1 ) )
return SI - > isVolatile ( ) = = cast < StoreInst > ( I2 ) - > isVolatile ( ) & &
SI - > getAlignment ( ) = = cast < StoreInst > ( I2 ) - > getAlignment ( ) ;
if ( const CmpInst * CI = dyn_cast < CmpInst > ( I1 ) )
return CI - > getPredicate ( ) = = cast < CmpInst > ( I2 ) - > getPredicate ( ) ;
if ( const CallInst * CI = dyn_cast < CallInst > ( I1 ) )
return CI - > isTailCall ( ) = = cast < CallInst > ( I2 ) - > isTailCall ( ) & &
CI - > getCallingConv ( ) = = cast < CallInst > ( I2 ) - > getCallingConv ( ) & &
CI - > getAttributes ( ) . getRawPointer ( ) = =
cast < CallInst > ( I2 ) - > getAttributes ( ) . getRawPointer ( ) ;
if ( const InvokeInst * CI = dyn_cast < InvokeInst > ( I1 ) )
return CI - > getCallingConv ( ) = = cast < InvokeInst > ( I2 ) - > getCallingConv ( ) & &
CI - > getAttributes ( ) . getRawPointer ( ) = =
cast < InvokeInst > ( I2 ) - > getAttributes ( ) . getRawPointer ( ) ;
if ( const InsertValueInst * IVI = dyn_cast < InsertValueInst > ( I1 ) ) {
if ( IVI - > getNumIndices ( ) ! = cast < InsertValueInst > ( I2 ) - > getNumIndices ( ) )
return false ;
for ( unsigned i = 0 , e = IVI - > getNumIndices ( ) ; i ! = e ; + + i )
if ( IVI - > idx_begin ( ) [ i ] ! = cast < InsertValueInst > ( I2 ) - > idx_begin ( ) [ i ] )
return false ;
return true ;
}
if ( const ExtractValueInst * EVI = dyn_cast < ExtractValueInst > ( I1 ) ) {
if ( EVI - > getNumIndices ( ) ! = cast < ExtractValueInst > ( I2 ) - > getNumIndices ( ) )
return false ;
for ( unsigned i = 0 , e = EVI - > getNumIndices ( ) ; i ! = e ; + + i )
if ( EVI - > idx_begin ( ) [ i ] ! = cast < ExtractValueInst > ( I2 ) - > idx_begin ( ) [ i ] )
return false ;
return true ;
}
return true ;
2008-11-02 05:52:50 +00:00
}
2010-08-02 05:23:03 +00:00
/// isEquivalentGEP - determine whether two GEP operations perform the same
/// underlying arithmetic.
bool FunctionComparator : : isEquivalentGEP ( const GEPOperator * GEP1 ,
const GEPOperator * GEP2 ) {
// When we have target data, we can reduce the GEP down to the value in bytes
// added to the address.
2010-05-13 05:48:45 +00:00
if ( TD & & GEP1 - > hasAllConstantIndices ( ) & & GEP2 - > hasAllConstantIndices ( ) ) {
2010-08-02 05:23:03 +00:00
SmallVector < Value * , 8 > Indices1 ( GEP1 - > idx_begin ( ) , GEP1 - > idx_end ( ) ) ;
SmallVector < Value * , 8 > Indices2 ( GEP2 - > idx_begin ( ) , GEP2 - > idx_end ( ) ) ;
2010-05-13 05:48:45 +00:00
uint64_t Offset1 = TD - > getIndexedOffset ( GEP1 - > getPointerOperandType ( ) ,
Indices1 . data ( ) , Indices1 . size ( ) ) ;
uint64_t Offset2 = TD - > getIndexedOffset ( GEP2 - > getPointerOperandType ( ) ,
Indices2 . data ( ) , Indices2 . size ( ) ) ;
return Offset1 = = Offset2 ;
}
2008-11-02 05:52:50 +00:00
2010-05-13 05:48:45 +00:00
if ( GEP1 - > getPointerOperand ( ) - > getType ( ) ! =
GEP2 - > getPointerOperand ( ) - > getType ( ) )
return false ;
2008-11-02 05:52:50 +00:00
2010-05-13 05:48:45 +00:00
if ( GEP1 - > getNumOperands ( ) ! = GEP2 - > getNumOperands ( ) )
2008-11-02 05:52:50 +00:00
return false ;
2010-05-13 05:48:45 +00:00
for ( unsigned i = 0 , e = GEP1 - > getNumOperands ( ) ; i ! = e ; + + i ) {
2010-08-02 05:23:03 +00:00
if ( ! Enumerate ( GEP1 - > getOperand ( i ) , GEP2 - > getOperand ( i ) ) )
2010-05-13 05:48:45 +00:00
return false ;
2008-11-02 05:52:50 +00:00
}
2010-05-13 05:48:45 +00:00
return true ;
2008-11-02 05:52:50 +00:00
}
2010-08-02 05:23:03 +00:00
/// Enumerate - Compare two values used by the two functions under pair-wise
/// comparison. If this is the first time the values are seen, they're added to
/// the mapping so that we will detect mismatches on next use.
bool FunctionComparator : : Enumerate ( const Value * V1 , const Value * V2 ) {
// Check for function @f1 referring to itself and function @f2 referring to
// itself, or referring to each other, or both referring to either of them.
// They're all equivalent if the two functions are otherwise equivalent.
2010-08-06 07:21:30 +00:00
if ( V1 = = F1 & & V2 = = F2 )
return true ;
if ( V1 = = F2 & & V2 = = F1 )
return true ;
2008-11-02 05:52:50 +00:00
2010-08-02 05:23:03 +00:00
// TODO: constant expressions with GEP or references to F1 or F2.
2010-05-13 05:48:45 +00:00
if ( isa < Constant > ( V1 ) )
return V1 = = V2 ;
2008-11-02 05:52:50 +00:00
2010-05-13 05:48:45 +00:00
if ( isa < InlineAsm > ( V1 ) & & isa < InlineAsm > ( V2 ) ) {
const InlineAsm * IA1 = cast < InlineAsm > ( V1 ) ;
const InlineAsm * IA2 = cast < InlineAsm > ( V2 ) ;
return IA1 - > getAsmString ( ) = = IA2 - > getAsmString ( ) & &
IA1 - > getConstraintString ( ) = = IA2 - > getConstraintString ( ) ;
}
2008-11-02 05:52:50 +00:00
2010-05-13 05:48:45 +00:00
unsigned long & ID1 = Map1 [ V1 ] ;
if ( ! ID1 )
2010-08-06 07:21:30 +00:00
ID1 = + + IDMap1Count ;
2010-05-13 05:48:45 +00:00
unsigned long & ID2 = Map2 [ V2 ] ;
if ( ! ID2 )
2010-08-06 07:21:30 +00:00
ID2 = + + IDMap2Count ;
2010-05-13 05:48:45 +00:00
return ID1 = = ID2 ;
}
2010-08-08 05:04:23 +00:00
/// Compare - test whether two basic blocks have equivalent behaviour.
2010-08-02 05:23:03 +00:00
bool FunctionComparator : : Compare ( const BasicBlock * BB1 , const BasicBlock * BB2 ) {
BasicBlock : : const_iterator F1I = BB1 - > begin ( ) , F1E = BB1 - > end ( ) ;
BasicBlock : : const_iterator F2I = BB2 - > begin ( ) , F2E = BB2 - > end ( ) ;
2008-11-02 05:52:50 +00:00
2010-05-13 05:48:45 +00:00
do {
2010-08-02 05:23:03 +00:00
if ( ! Enumerate ( F1I , F2I ) )
2008-11-02 05:52:50 +00:00
return false ;
2010-08-02 05:23:03 +00:00
if ( const GetElementPtrInst * GEP1 = dyn_cast < GetElementPtrInst > ( F1I ) ) {
const GetElementPtrInst * GEP2 = dyn_cast < GetElementPtrInst > ( F2I ) ;
if ( ! GEP2 )
return false ;
2008-11-02 05:52:50 +00:00
2010-08-02 05:23:03 +00:00
if ( ! Enumerate ( GEP1 - > getPointerOperand ( ) , GEP2 - > getPointerOperand ( ) ) )
2010-05-13 06:45:13 +00:00
return false ;
2008-11-02 05:52:50 +00:00
2010-05-13 05:48:45 +00:00
if ( ! isEquivalentGEP ( GEP1 , GEP2 ) )
2010-05-13 06:45:13 +00:00
return false ;
2010-05-13 05:48:45 +00:00
} else {
2010-08-02 05:23:03 +00:00
if ( ! isEquivalentOperation ( F1I , F2I ) )
2008-11-02 05:52:50 +00:00
return false ;
2010-08-02 05:23:03 +00:00
assert ( F1I - > getNumOperands ( ) = = F2I - > getNumOperands ( ) ) ;
for ( unsigned i = 0 , e = F1I - > getNumOperands ( ) ; i ! = e ; + + i ) {
Value * OpF1 = F1I - > getOperand ( i ) ;
Value * OpF2 = F2I - > getOperand ( i ) ;
2008-11-02 05:52:50 +00:00
2010-08-02 05:23:03 +00:00
if ( ! Enumerate ( OpF1 , OpF2 ) )
2010-05-13 06:45:13 +00:00
return false ;
2010-05-13 05:48:45 +00:00
2010-08-02 05:23:03 +00:00
if ( OpF1 - > getValueID ( ) ! = OpF2 - > getValueID ( ) | |
! isEquivalentType ( OpF1 - > getType ( ) , OpF2 - > getType ( ) ) )
2008-11-02 05:52:50 +00:00
return false ;
}
}
2010-08-02 05:23:03 +00:00
+ + F1I , + + F2I ;
} while ( F1I ! = F1E & & F2I ! = F2E ) ;
2008-11-02 05:52:50 +00:00
2010-08-02 05:23:03 +00:00
return F1I = = F1E & & F2I = = F2E ;
2008-11-02 05:52:50 +00:00
}
2010-08-08 05:04:23 +00:00
/// Compare - test whether the two functions have equivalent behaviour.
2010-08-02 05:23:03 +00:00
bool FunctionComparator : : Compare ( ) {
2008-11-02 05:52:50 +00:00
// We need to recheck everything, but check the things that weren't included
// in the hash first.
2010-08-02 05:23:03 +00:00
if ( F1 - > getAttributes ( ) ! = F2 - > getAttributes ( ) )
2008-11-02 05:52:50 +00:00
return false ;
2010-08-02 05:23:03 +00:00
if ( F1 - > hasGC ( ) ! = F2 - > hasGC ( ) )
2008-11-02 05:52:50 +00:00
return false ;
2010-08-02 05:23:03 +00:00
if ( F1 - > hasGC ( ) & & F1 - > getGC ( ) ! = F2 - > getGC ( ) )
2008-11-02 05:52:50 +00:00
return false ;
2010-08-02 05:23:03 +00:00
if ( F1 - > hasSection ( ) ! = F2 - > hasSection ( ) )
2008-11-02 05:52:50 +00:00
return false ;
2010-08-02 05:23:03 +00:00
if ( F1 - > hasSection ( ) & & F1 - > getSection ( ) ! = F2 - > getSection ( ) )
2008-11-02 05:52:50 +00:00
return false ;
2010-08-02 05:23:03 +00:00
if ( F1 - > isVarArg ( ) ! = F2 - > isVarArg ( ) )
2009-06-12 08:04:51 +00:00
return false ;
2008-11-02 05:52:50 +00:00
// TODO: if it's internal and only used in direct calls, we could handle this
// case too.
2010-08-02 05:23:03 +00:00
if ( F1 - > getCallingConv ( ) ! = F2 - > getCallingConv ( ) )
2008-11-02 05:52:50 +00:00
return false ;
2010-08-02 05:23:03 +00:00
if ( ! isEquivalentType ( F1 - > getFunctionType ( ) , F2 - > getFunctionType ( ) ) )
2008-11-02 05:52:50 +00:00
return false ;
2010-08-02 05:23:03 +00:00
assert ( F1 - > arg_size ( ) = = F2 - > arg_size ( ) & &
2010-09-07 01:42:10 +00:00
" Identically typed functions have different numbers of args! " ) ;
2008-11-02 05:52:50 +00:00
2010-05-13 05:48:45 +00:00
// Visit the arguments so that they get enumerated in the order they're
// passed in.
2010-08-02 05:23:03 +00:00
for ( Function : : const_arg_iterator f1i = F1 - > arg_begin ( ) ,
f2i = F2 - > arg_begin ( ) , f1e = F1 - > arg_end ( ) ; f1i ! = f1e ; + + f1i , + + f2i ) {
if ( ! Enumerate ( f1i , f2i ) )
2010-09-07 01:42:10 +00:00
llvm_unreachable ( " Arguments repeat! " ) ;
2010-05-13 05:48:45 +00:00
}
2008-11-02 05:52:50 +00:00
2010-08-08 05:04:23 +00:00
// We do a CFG-ordered walk since the actual ordering of the blocks in the
// linked list is immaterial. Our walk starts at the entry block for both
2010-08-02 05:23:03 +00:00
// functions, then takes each block from each terminator in order. As an
// artifact, this also means that unreachable blocks are ignored.
SmallVector < const BasicBlock * , 8 > F1BBs , F2BBs ;
SmallSet < const BasicBlock * , 128 > VisitedBBs ; // in terms of F1.
2010-08-06 07:21:30 +00:00
2010-08-02 05:23:03 +00:00
F1BBs . push_back ( & F1 - > getEntryBlock ( ) ) ;
F2BBs . push_back ( & F2 - > getEntryBlock ( ) ) ;
2010-08-06 07:21:30 +00:00
2010-08-02 05:23:03 +00:00
VisitedBBs . insert ( F1BBs [ 0 ] ) ;
while ( ! F1BBs . empty ( ) ) {
const BasicBlock * F1BB = F1BBs . pop_back_val ( ) ;
const BasicBlock * F2BB = F2BBs . pop_back_val ( ) ;
2010-08-06 07:21:30 +00:00
2010-08-02 05:23:03 +00:00
if ( ! Enumerate ( F1BB , F2BB ) | | ! Compare ( F1BB , F2BB ) )
2008-11-02 05:52:50 +00:00
return false ;
2010-08-06 07:21:30 +00:00
2010-08-02 05:23:03 +00:00
const TerminatorInst * F1TI = F1BB - > getTerminator ( ) ;
const TerminatorInst * F2TI = F2BB - > getTerminator ( ) ;
2010-08-06 07:21:30 +00:00
2010-08-02 05:23:03 +00:00
assert ( F1TI - > getNumSuccessors ( ) = = F2TI - > getNumSuccessors ( ) ) ;
for ( unsigned i = 0 , e = F1TI - > getNumSuccessors ( ) ; i ! = e ; + + i ) {
if ( ! VisitedBBs . insert ( F1TI - > getSuccessor ( i ) ) )
2010-05-13 06:45:13 +00:00
continue ;
2010-08-06 07:21:30 +00:00
2010-08-02 05:23:03 +00:00
F1BBs . push_back ( F1TI - > getSuccessor ( i ) ) ;
F2BBs . push_back ( F2TI - > getSuccessor ( i ) ) ;
2010-05-13 05:48:45 +00:00
}
2008-11-02 05:52:50 +00:00
}
return true ;
}
2010-08-08 05:04:23 +00:00
/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also replace
2010-09-05 08:22:49 +00:00
/// direct uses of G with bitcast(F). Deletes G.
2010-08-08 05:04:23 +00:00
void MergeFunctions : : WriteThunk ( Function * F , Function * G ) const {
2010-05-13 05:48:45 +00:00
if ( ! G - > mayBeOverridden ( ) ) {
// Redirect direct callers of G to F.
Constant * BitcastF = ConstantExpr : : getBitCast ( F , G - > getType ( ) ) ;
for ( Value : : use_iterator UI = G - > use_begin ( ) , UE = G - > use_end ( ) ;
UI ! = UE ; ) {
Value : : use_iterator TheIter = UI ;
+ + UI ;
CallSite CS ( * TheIter ) ;
if ( CS & & CS . isCallee ( TheIter ) )
TheIter . getUse ( ) . set ( BitcastF ) ;
}
}
2010-09-07 01:42:10 +00:00
// If G was internal then we may have replaced all uses of G with F. If so,
2010-08-06 07:21:30 +00:00
// stop here and delete G. There's no need for a thunk.
if ( G - > hasLocalLinkage ( ) & & G - > use_empty ( ) ) {
G - > eraseFromParent ( ) ;
return ;
}
2009-06-12 15:56:56 +00:00
Function * NewG = Function : : Create ( G - > getFunctionType ( ) , G - > getLinkage ( ) , " " ,
G - > getParent ( ) ) ;
2009-08-13 21:58:54 +00:00
BasicBlock * BB = BasicBlock : : Create ( F - > getContext ( ) , " " , NewG ) ;
2010-08-08 05:04:23 +00:00
IRBuilder < false > Builder ( BB ) ;
2009-06-12 08:04:51 +00:00
2010-05-13 05:48:45 +00:00
SmallVector < Value * , 16 > Args ;
2009-06-12 08:04:51 +00:00
unsigned i = 0 ;
const FunctionType * FFTy = F - > getFunctionType ( ) ;
for ( Function : : arg_iterator AI = NewG - > arg_begin ( ) , AE = NewG - > arg_end ( ) ;
AI ! = AE ; + + AI ) {
2010-08-08 05:04:23 +00:00
Args . push_back ( Builder . CreateBitCast ( AI , FFTy - > getParamType ( i ) ) ) ;
2009-06-12 08:04:51 +00:00
+ + i ;
2008-11-02 05:52:50 +00:00
}
2010-08-08 05:04:23 +00:00
CallInst * CI = Builder . CreateCall ( F , Args . begin ( ) , Args . end ( ) ) ;
2009-06-12 08:04:51 +00:00
CI - > setTailCall ( ) ;
2009-06-12 16:04:00 +00:00
CI - > setCallingConv ( F - > getCallingConv ( ) ) ;
2010-01-05 13:12:22 +00:00
if ( NewG - > getReturnType ( ) - > isVoidTy ( ) ) {
2010-08-08 05:04:23 +00:00
Builder . CreateRetVoid ( ) ;
2009-06-12 08:04:51 +00:00
} else {
2010-08-08 05:04:23 +00:00
Builder . CreateRet ( Builder . CreateBitCast ( CI , NewG - > getReturnType ( ) ) ) ;
2008-11-02 16:46:26 +00:00
}
2009-06-12 08:04:51 +00:00
NewG - > copyAttributesFrom ( G ) ;
NewG - > takeName ( G ) ;
G - > replaceAllUsesWith ( NewG ) ;
G - > eraseFromParent ( ) ;
2010-09-07 01:42:10 +00:00
DEBUG ( dbgs ( ) < < " WriteThunk: " < < NewG - > getName ( ) < < ' \n ' ) ;
+ + NumThunksWritten ;
2008-11-02 05:52:50 +00:00
}
2010-08-08 05:04:23 +00:00
/// MergeTwoFunctions - Merge two equivalent functions. Upon completion,
2010-08-31 05:53:05 +00:00
/// Function G is deleted.
void MergeFunctions : : MergeTwoFunctions ( Function * F , Function * G ) const {
2010-09-07 01:42:10 +00:00
if ( F - > mayBeOverridden ( ) ) {
assert ( G - > mayBeOverridden ( ) ) ;
2009-06-12 15:56:56 +00:00
2010-05-13 05:48:45 +00:00
// Make them both thunks to the same internal function.
Function * H = Function : : Create ( F - > getFunctionType ( ) , F - > getLinkage ( ) , " " ,
F - > getParent ( ) ) ;
H - > copyAttributesFrom ( F ) ;
H - > takeName ( F ) ;
F - > replaceAllUsesWith ( H ) ;
2009-06-12 15:56:56 +00:00
2010-08-09 21:03:28 +00:00
unsigned MaxAlignment = std : : max ( G - > getAlignment ( ) , H - > getAlignment ( ) ) ;
2010-08-08 05:04:23 +00:00
WriteThunk ( F , G ) ;
WriteThunk ( F , H ) ;
2009-06-12 15:56:56 +00:00
2010-08-09 21:03:28 +00:00
F - > setAlignment ( MaxAlignment ) ;
2010-05-13 05:48:45 +00:00
F - > setLinkage ( GlobalValue : : InternalLinkage ) ;
2010-09-07 01:42:10 +00:00
+ + NumDoubleWeak ;
2010-08-06 07:21:30 +00:00
} else {
2010-08-08 05:04:23 +00:00
WriteThunk ( F , G ) ;
2008-11-02 05:52:50 +00:00
}
2009-06-12 08:04:51 +00:00
+ + NumFunctionsMerged ;
2008-11-02 05:52:50 +00:00
}
2010-09-05 08:22:49 +00:00
// Insert - Insert a ComparableFunction into the FnSet, or merge it away if
// equal to one that's already inserted.
2010-09-05 09:00:32 +00:00
bool MergeFunctions : : Insert ( FnSetType & FnSet , ComparableFunction & NewF ) {
2010-09-05 08:22:49 +00:00
std : : pair < FnSetType : : iterator , bool > Result = FnSet . insert ( NewF ) ;
if ( Result . second )
return false ;
2010-08-31 05:53:05 +00:00
2010-09-05 09:00:32 +00:00
const ComparableFunction & OldF = * Result . first ;
2010-09-05 08:22:49 +00:00
// Never thunk a strong function to a weak function.
2010-09-07 01:42:10 +00:00
assert ( ! OldF . getFunc ( ) - > mayBeOverridden ( ) | |
NewF . getFunc ( ) - > mayBeOverridden ( ) ) ;
2010-09-05 08:22:49 +00:00
2010-09-05 09:00:32 +00:00
DEBUG ( dbgs ( ) < < " " < < OldF . getFunc ( ) - > getName ( ) < < " == "
< < NewF . getFunc ( ) - > getName ( ) < < ' \n ' ) ;
2010-09-05 08:22:49 +00:00
2010-09-05 09:00:32 +00:00
Function * DeleteF = NewF . getFunc ( ) ;
NewF . release ( ) ;
MergeTwoFunctions ( OldF . getFunc ( ) , DeleteF ) ;
2010-09-05 08:22:49 +00:00
return true ;
2010-08-08 05:04:23 +00:00
}
2009-06-12 08:04:51 +00:00
2010-09-05 08:22:49 +00:00
// IsThunk - This method determines whether or not a given Function is a thunk\// like the ones emitted by this pass and therefore not subject to further
// merging.
static bool IsThunk ( const Function * F ) {
// The safe direction to fail is to return true. In that case, the function
// will be removed from merging analysis. If we failed to including functions
// then we may try to merge unmergable thing (ie., identical weak functions)
// which will push us into an infinite loop.
2010-08-31 05:53:05 +00:00
2010-09-07 01:42:10 +00:00
assert ( ! F - > isDeclaration ( ) & & " Expected a function definition. " ) ;
2010-08-31 05:53:05 +00:00
2010-09-05 08:22:49 +00:00
const BasicBlock * BB = & F - > front ( ) ;
// A thunk is:
// bitcast-inst*
// optional-reg tail call @thunkee(args...*)
// ret void|optional-reg
// where the args are in the same order as the arguments.
2010-09-07 01:42:10 +00:00
// Put this at the top since it triggers most often.
const ReturnInst * RI = dyn_cast < ReturnInst > ( BB - > getTerminator ( ) ) ;
if ( ! RI ) return false ;
2010-09-05 08:22:49 +00:00
// Verify that the sequence of bitcast-inst's are all casts of arguments and
// that there aren't any extras (ie. no repeated casts).
int LastArgNo = - 1 ;
BasicBlock : : const_iterator I = BB - > begin ( ) ;
while ( const BitCastInst * BCI = dyn_cast < BitCastInst > ( I ) ) {
const Argument * A = dyn_cast < Argument > ( BCI - > getOperand ( 0 ) ) ;
if ( ! A ) return false ;
2010-09-07 01:42:10 +00:00
if ( ( int ) A - > getArgNo ( ) < = LastArgNo ) return false ;
2010-09-05 08:22:49 +00:00
LastArgNo = A - > getArgNo ( ) ;
+ + I ;
2010-08-31 05:53:05 +00:00
}
2010-09-05 08:22:49 +00:00
2010-09-07 01:42:10 +00:00
// Verify that we have a direct tail call and that the calling conventions
// and number of arguments match.
const CallInst * CI = dyn_cast < CallInst > ( I + + ) ;
if ( ! CI | | ! CI - > isTailCall ( ) | | ! CI - > getCalledFunction ( ) | |
CI - > getCallingConv ( ) ! = CI - > getCalledFunction ( ) - > getCallingConv ( ) | |
CI - > getNumArgOperands ( ) ! = F - > arg_size ( ) )
return false ;
2010-09-05 08:22:49 +00:00
// Verify that the call instruction has the same arguments as this function
// and that they're all either the incoming argument or a cast of the right
// argument.
for ( unsigned i = 0 , e = CI - > getNumArgOperands ( ) ; i ! = e ; + + i ) {
const Value * V = CI - > getArgOperand ( i ) ;
const Argument * A = dyn_cast < Argument > ( V ) ;
if ( ! A ) {
const BitCastInst * BCI = dyn_cast < BitCastInst > ( V ) ;
if ( ! BCI ) return false ;
A = cast < Argument > ( BCI - > getOperand ( 0 ) ) ;
}
if ( A - > getArgNo ( ) ! = i ) return false ;
2010-08-31 05:53:05 +00:00
}
2010-09-05 08:22:49 +00:00
// Verify that the terminator is a ret void (if we're void) or a ret of the
// call's return, or a ret of a bitcast of the call's return.
if ( const BitCastInst * BCI = dyn_cast < BitCastInst > ( I ) ) {
+ + I ;
if ( BCI - > getOperand ( 0 ) ! = CI ) return false ;
2010-08-31 05:53:05 +00:00
}
2010-09-07 01:42:10 +00:00
if ( RI ! = I ) return false ;
2010-09-05 08:22:49 +00:00
if ( RI - > getNumOperands ( ) = = 0 )
return CI - > getType ( ) - > isVoidTy ( ) ;
return RI - > getReturnValue ( ) = = CI ;
}
2010-08-31 05:53:05 +00:00
2008-11-02 05:52:50 +00:00
bool MergeFunctions : : runOnModule ( Module & M ) {
2010-08-31 08:29:37 +00:00
bool Changed = false ;
TD = getAnalysisIfAvailable < TargetData > ( ) ;
2010-08-31 05:53:05 +00:00
2010-08-31 08:29:37 +00:00
bool LocalChanged ;
do {
2010-09-07 01:42:10 +00:00
DEBUG ( dbgs ( ) < < " size of module: " < < M . size ( ) < < ' \n ' ) ;
2010-08-31 08:29:37 +00:00
LocalChanged = false ;
FnSetType FnSet ;
2010-09-05 08:22:49 +00:00
// Insert only strong functions and merge them. Strong function merging
// always deletes one of them.
for ( Module : : iterator I = M . begin ( ) , E = M . end ( ) ; I ! = E ; ) {
Function * F = I + + ;
if ( ! F - > isDeclaration ( ) & & ! F - > hasAvailableExternallyLinkage ( ) & &
2010-09-07 01:42:10 +00:00
! F - > mayBeOverridden ( ) & & ! IsThunk ( F ) ) {
2010-09-05 09:00:32 +00:00
ComparableFunction CF = ComparableFunction ( F , TD ) ;
2010-09-05 08:22:49 +00:00
LocalChanged | = Insert ( FnSet , CF ) ;
}
}
// Insert only weak functions and merge them. By doing these second we
// create thunks to the strong function when possible. When two weak
// functions are identical, we create a new strong function with two weak
// weak thunks to it which are identical but not mergable.
for ( Module : : iterator I = M . begin ( ) , E = M . end ( ) ; I ! = E ; ) {
Function * F = I + + ;
if ( ! F - > isDeclaration ( ) & & ! F - > hasAvailableExternallyLinkage ( ) & &
2010-09-07 01:42:10 +00:00
F - > mayBeOverridden ( ) & & ! IsThunk ( F ) ) {
2010-09-05 09:00:32 +00:00
ComparableFunction CF = ComparableFunction ( F , TD ) ;
2010-09-05 08:22:49 +00:00
LocalChanged | = Insert ( FnSet , CF ) ;
2010-08-31 08:29:37 +00:00
}
2008-11-02 05:52:50 +00:00
}
2010-09-07 01:42:10 +00:00
DEBUG ( dbgs ( ) < < " size of FnSet: " < < FnSet . size ( ) < < ' \n ' ) ;
2010-09-05 08:22:49 +00:00
Changed | = LocalChanged ;
2010-08-31 08:29:37 +00:00
} while ( LocalChanged ) ;
2008-11-02 05:52:50 +00:00
return Changed ;
}
2010-09-05 08:22:49 +00:00
2010-09-05 09:00:32 +00:00
bool DenseMapInfo < ComparableFunction > : : isEqual ( const ComparableFunction & LHS ,
const ComparableFunction & RHS ) {
if ( LHS . getFunc ( ) = = RHS . getFunc ( ) & &
LHS . getHash ( ) = = RHS . getHash ( ) )
2010-09-05 08:22:49 +00:00
return true ;
2010-09-05 09:00:32 +00:00
if ( ! LHS . getFunc ( ) | | ! RHS . getFunc ( ) )
2010-09-05 08:22:49 +00:00
return false ;
2010-09-05 09:00:32 +00:00
assert ( LHS . getTD ( ) = = RHS . getTD ( ) & &
" Comparing functions for different targets " ) ;
return FunctionComparator ( LHS . getTD ( ) ,
LHS . getFunc ( ) , RHS . getFunc ( ) ) . Compare ( ) ;
2010-09-05 08:22:49 +00:00
}