diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index d8f67044326..332a0425514 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -320,6 +320,7 @@ static const Value *getNoopInput(const Value *V, static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, SmallVectorImpl &RetIndices, SmallVectorImpl &CallIndices, + bool AllowDifferingSizes, const TargetLoweringBase &TLI) { // Trace the sub-value needed by the return value as far back up the graph as @@ -350,7 +351,8 @@ static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, // all the bits that are needed by the "ret" have been provided by the "tail // call". FIXME: with sufficiently cunning bit-tracking, we could look through // extensions too. - if (BitsProvided < BitsRequired) + if (BitsProvided < BitsRequired || + (!AllowDifferingSizes && BitsProvided != BitsRequired)) return false; return true; @@ -516,19 +518,38 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, // return type is. if (isa(Ret->getOperand(0))) return true; - // Conservatively require the attributes of the call to match those of - // the return. Ignore noalias because it doesn't affect the call sequence. - const Function *F = ExitBB->getParent(); - AttributeSet CallerAttrs = F->getAttributes(); - if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). - removeAttribute(Attribute::NoAlias) != - AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). - removeAttribute(Attribute::NoAlias)) - return false; + // Make sure the attributes attached to each return are compatible. + AttrBuilder CallerAttrs(ExitBB->getParent()->getAttributes(), + AttributeSet::ReturnIndex); + AttrBuilder CalleeAttrs(cast(I)->getAttributes(), + AttributeSet::ReturnIndex); - // It's not safe to eliminate the sign / zero extension of the return value. - if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || - CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + // Noalias is completely benign as far as calling convention goes, it + // shouldn't affect whether the call is a tail call. + CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias); + CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias); + + bool AllowDifferingSizes = true; + if (CallerAttrs.contains(Attribute::ZExt)) { + if (!CalleeAttrs.contains(Attribute::ZExt)) + return false; + + AllowDifferingSizes = false; + CallerAttrs.removeAttribute(Attribute::ZExt); + CalleeAttrs.removeAttribute(Attribute::ZExt); + } else if (CallerAttrs.contains(Attribute::SExt)) { + if (!CalleeAttrs.contains(Attribute::SExt)) + return false; + + AllowDifferingSizes = false; + CallerAttrs.removeAttribute(Attribute::SExt); + CalleeAttrs.removeAttribute(Attribute::SExt); + } + + // If they're still different, there's some facet we don't understand + // (currently only "inreg", but in future who knows). It may be OK but the + // only safe option is to reject the tail call. + if (CallerAttrs != CalleeAttrs) return false; const Value *RetVal = Ret->getOperand(0), *CallVal = I; @@ -570,7 +591,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, // Finally, we can check whether the value produced by the tail call at this // index is compatible with the value we return. - if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, TLI)) + if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, + AllowDifferingSizes, TLI)) return false; CallEmpty = !nextRealType(CallSubTypes, CallPath); diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll index 7b774f6b692..589e9ec1052 100644 --- a/test/CodeGen/X86/sibcall.ll +++ b/test/CodeGen/X86/sibcall.ll @@ -106,10 +106,10 @@ declare i32 @bar2(i32, i32, i32) define signext i16 @t8() nounwind ssp { entry: ; 32-LABEL: t8: -; 32: calll {{_?}}bar3 +; 32: jmp {{_?}}bar3 ; 64-LABEL: t8: -; 64: callq {{_?}}bar3 +; 64: jmp {{_?}}bar3 %0 = tail call signext i16 @bar3() nounwind ; [#uses=1] ret i16 %0 } @@ -122,7 +122,7 @@ entry: ; 32: calll * ; 64-LABEL: t9: -; 64: callq * +; 64: jmpq * %0 = bitcast i32 (i32)* %x to i16 (i32)* %1 = tail call signext i16 %0(i32 0) nounwind ret i16 %1 diff --git a/test/CodeGen/X86/tail-call-attrs.ll b/test/CodeGen/X86/tail-call-attrs.ll new file mode 100644 index 00000000000..17ebe997c8c --- /dev/null +++ b/test/CodeGen/X86/tail-call-attrs.ll @@ -0,0 +1,56 @@ +; RUN: llc -mtriple=x86_64-apple-darwin -o - %s | FileCheck %s + +; Simple case: completely identical returns, even with extensions, shouldn't be +; a barrier to tail calls. +declare zeroext i1 @give_bool() +define zeroext i1 @test_bool() { +; CHECK-LABEL: test_bool: +; CHECK: jmp + %call = tail call zeroext i1 @give_bool() + ret i1 %call +} + +; Here, there's more zero extension to be done between the call and the return, +; so a tail call is impossible (well, according to current Clang practice +; anyway. The AMD64 ABI isn't crystal clear on the matter). +declare zeroext i32 @give_i32() +define zeroext i8 @test_i32() { +; CHECK-LABEL: test_i32: +; CHECK: callq _give_i32 +; CHECK: movzbl %al, %eax +; CHECK: ret + + %call = tail call zeroext i32 @give_i32() + %val = trunc i32 %call to i8 + ret i8 %val +} + +; Here, one function is zeroext and the other is signext. To the extent that +; these both mean something they are incompatible so no tail call is possible. +declare zeroext i16 @give_unsigned_i16() +define signext i16 @test_incompatible_i16() { +; CHECK-LABEL: test_incompatible_i16: +; CHECK: callq _give_unsigned_i16 +; CHECK: cwtl +; CHECK: ret + + %call = tail call zeroext i16 @give_unsigned_i16() + ret i16 %call +} + +declare inreg i32 @give_i32_inreg() +define i32 @test_inreg_to_normal() { +; CHECK-LABEL: test_inreg_to_normal: +; CHECK: callq _give_i32_inreg +; CHECK: ret + %val = tail call inreg i32 @give_i32_inreg() + ret i32 %val +} + +define inreg i32 @test_normal_to_inreg() { +; CHECK-LABEL: test_normal_to_inreg: +; CHECK: callq _give_i32 +; CHECK: ret + %val = tail call i32 @give_i32() + ret i32 %val +}