diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index e2b2067b0c6..4731af5089a 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -269,8 +269,6 @@ static bool sameNoopInput(const Value *V1, const Value *V2, i != e; ++i) { unsigned attrInd = i - I->op_begin() + 1; if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && - !cast(I)->paramHasAttr(attrInd, Attribute::ZExt) && - !cast(I)->paramHasAttr(attrInd, Attribute::SExt) && isNoopBitcast((*i)->getType(), I->getType(), TLI)) { NoopInput = *i; break; @@ -284,8 +282,6 @@ static bool sameNoopInput(const Value *V1, const Value *V2, i != e; ++i) { unsigned attrInd = i - I->op_begin() + 1; if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && - !cast(I)->paramHasAttr(attrInd, Attribute::ZExt) && - !cast(I)->paramHasAttr(attrInd, Attribute::SExt) && isNoopBitcast((*i)->getType(), I->getType(), TLI)) { NoopInput = *i; break; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 6e613d606f4..5eeac5b11f2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6398,6 +6398,28 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { + // Handle the incoming return values from the call. + CLI.Ins.clear(); + SmallVector RetTys; + ComputeValueVTs(*this, CLI.RetTy, RetTys); + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } + } + // Handle all of the outgoing arguments. CLI.Outs.clear(); CLI.OutVals.clear(); @@ -6439,8 +6461,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } if (Args[i].isNest) Flags.setNest(); - if (Args[i].isReturned) - Flags.setReturned(); Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); @@ -6453,6 +6473,26 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (Args[i].isZExt) ExtendKind = ISD::ZERO_EXTEND; + // Conservatively only handle 'returned' on non-vectors for now + if (Args[i].isReturned && !Op.getValueType().isVector()) { + assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && + "unexpected use of 'returned'"); + // Before passing 'returned' to the target lowering code, ensure that + // either the register MVT and the actual EVT are the same size or that + // the return value and argument are extended in the same way; in these + // cases it's safe to pass the argument register value unchanged as the + // return register value (although it's at the target's option whether + // to do so) + // TODO: allow code generation to take advantage of partially preserved + // registers rather than clobbering the entire register when the + // parameter extension method is not compatible with the return + // extension method + if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || + (ExtendKind != ISD::ANY_EXTEND && + CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt)) + Flags.setReturned(); + } + getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); @@ -6472,28 +6512,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } } - // Handle the incoming return values from the call. - CLI.Ins.clear(); - SmallVector RetTys; - ComputeValueVTs(*this, CLI.RetTy, RetTys); - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); - for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT; - MyFlags.Used = CLI.IsReturnValueUsed; - if (CLI.RetSExt) - MyFlags.Flags.setSExt(); - if (CLI.RetZExt) - MyFlags.Flags.setZExt(); - if (CLI.IsInReg) - MyFlags.Flags.setInReg(); - CLI.Ins.push_back(MyFlags); - } - } - SmallVector InVals; CLI.Chain = LowerCall(CLI, InVals); diff --git a/test/CodeGen/ARM/returned-ext.ll b/test/CodeGen/ARM/returned-ext.ll new file mode 100644 index 00000000000..670b12f249d --- /dev/null +++ b/test/CodeGen/ARM/returned-ext.ll @@ -0,0 +1,178 @@ +; RUN: llc < %s -mtriple=armv6-linux-gnueabi -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF +; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D + +declare i16 @identity16(i16 returned %x) +declare i32 @identity32(i32 returned %x) +declare zeroext i16 @retzext16(i16 returned %x) +declare i16 @paramzext16(i16 zeroext returned %x) +declare zeroext i16 @bothzext16(i16 zeroext returned %x) + +; The zeroext param attribute below is meant to have no effect +define i16 @test_identity(i16 zeroext %x) { +entry: +; CHECKELF: test_identity: +; CHECKELF: mov [[SAVEX:r[0-9]+]], r0 +; CHECKELF: bl identity16 +; CHECKELF: uxth r0, r0 +; CHECKELF: bl identity32 +; CHECKELF: mov r0, [[SAVEX]] +; CHECKT2D: test_identity: +; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0 +; CHECKT2D: blx _identity16 +; CHECKT2D: uxth r0, r0 +; CHECKT2D: blx _identity32 +; CHECKT2D: mov r0, [[SAVEX]] + %call = tail call i16 @identity16(i16 %x) + %b = zext i16 %call to i32 + %call2 = tail call i32 @identity32(i32 %b) + ret i16 %x +} + +; FIXME: This ought not to require register saving but currently does because +; x is not considered equal to %call (see SelectionDAGBuilder.cpp) +define i16 @test_matched_ret(i16 %x) { +entry: +; CHECKELF: test_matched_ret: + +; This shouldn't be required +; CHECKELF: mov [[SAVEX:r[0-9]+]], r0 + +; CHECKELF: bl retzext16 +; CHECKELF-NOT: uxth r0, {{r[0-9]+}} +; CHECKELF: bl identity32 + +; This shouldn't be required +; CHECKELF: mov r0, [[SAVEX]] + +; CHECKT2D: test_matched_ret: + +; This shouldn't be required +; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0 + +; CHECKT2D: blx _retzext16 +; CHECKT2D-NOT: uxth r0, {{r[0-9]+}} +; CHECKT2D: blx _identity32 + +; This shouldn't be required +; CHECKT2D: mov r0, [[SAVEX]] + + %call = tail call i16 @retzext16(i16 %x) + %b = zext i16 %call to i32 + %call2 = tail call i32 @identity32(i32 %b) + ret i16 %x +} + +define i16 @test_mismatched_ret(i16 %x) { +entry: +; CHECKELF: test_mismatched_ret: +; CHECKELF: mov [[SAVEX:r[0-9]+]], r0 +; CHECKELF: bl retzext16 +; CHECKELF: sxth r0, {{r[0-9]+}} +; CHECKELF: bl identity32 +; CHECKELF: mov r0, [[SAVEX]] +; CHECKT2D: test_mismatched_ret: +; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0 +; CHECKT2D: blx _retzext16 +; CHECKT2D: sxth r0, {{r[0-9]+}} +; CHECKT2D: blx _identity32 +; CHECKT2D: mov r0, [[SAVEX]] + %call = tail call i16 @retzext16(i16 %x) + %b = sext i16 %call to i32 + %call2 = tail call i32 @identity32(i32 %b) + ret i16 %x +} + +define i16 @test_matched_paramext(i16 %x) { +entry: +; CHECKELF: test_matched_paramext: +; CHECKELF: uxth r0, r0 +; CHECKELF: bl paramzext16 +; CHECKELF: uxth r0, r0 +; CHECKELF: bl identity32 +; CHECKELF: b paramzext16 +; CHECKT2D: test_matched_paramext: +; CHECKT2D: uxth r0, r0 +; CHECKT2D: blx _paramzext16 +; CHECKT2D: uxth r0, r0 +; CHECKT2D: blx _identity32 +; CHECKT2D: b.w _paramzext16 + %call = tail call i16 @paramzext16(i16 %x) + %b = zext i16 %call to i32 + %call2 = tail call i32 @identity32(i32 %b) + %call3 = tail call i16 @paramzext16(i16 %call) + ret i16 %call3 +} + +; FIXME: This theoretically ought to optimize to exact same output as the +; version above, but doesn't currently (see SelectionDAGBuilder.cpp) +define i16 @test_matched_paramext2(i16 %x) { +entry: + +; Since there doesn't seem to be an unambiguous optimal selection and +; scheduling of uxth and mov instructions below in lieu of the 'returned' +; optimization, don't bother checking: just verify that the calls are made +; in the correct order as a basic sanity check + +; CHECKELF: test_matched_paramext2: +; CHECKELF: bl paramzext16 +; CHECKELF: bl identity32 +; CHECKELF: b paramzext16 +; CHECKT2D: test_matched_paramext2: +; CHECKT2D: blx _paramzext16 +; CHECKT2D: blx _identity32 +; CHECKT2D: b.w _paramzext16 + %call = tail call i16 @paramzext16(i16 %x) + +; Should make no difference if %x is used below rather than %call, but it does + %b = zext i16 %x to i32 + + %call2 = tail call i32 @identity32(i32 %b) + %call3 = tail call i16 @paramzext16(i16 %call) + ret i16 %call3 +} + +define i16 @test_matched_bothext(i16 %x) { +entry: +; CHECKELF: test_matched_bothext: +; CHECKELF: uxth r0, r0 +; CHECKELF: bl bothzext16 +; CHECKELF-NOT: uxth r0, r0 + +; FIXME: Tail call should be OK here +; CHECKELF: bl identity32 + +; CHECKT2D: test_matched_bothext: +; CHECKT2D: uxth r0, r0 +; CHECKT2D: blx _bothzext16 +; CHECKT2D-NOT: uxth r0, r0 + +; FIXME: Tail call should be OK here +; CHECKT2D: blx _identity32 + + %call = tail call i16 @bothzext16(i16 %x) + %b = zext i16 %x to i32 + %call2 = tail call i32 @identity32(i32 %b) + ret i16 %call +} + +define i16 @test_mismatched_bothext(i16 %x) { +entry: +; CHECKELF: test_mismatched_bothext: +; CHECKELF: mov [[SAVEX:r[0-9]+]], r0 +; CHECKELF: uxth r0, {{r[0-9]+}} +; CHECKELF: bl bothzext16 +; CHECKELF: sxth r0, [[SAVEX]] +; CHECKELF: bl identity32 +; CHECKELF: mov r0, [[SAVEX]] +; CHECKT2D: test_mismatched_bothext: +; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0 +; CHECKT2D: uxth r0, {{r[0-9]+}} +; CHECKT2D: blx _bothzext16 +; CHECKT2D: sxth r0, [[SAVEX]] +; CHECKT2D: blx _identity32 +; CHECKT2D: mov r0, [[SAVEX]] + %call = tail call i16 @bothzext16(i16 %x) + %b = sext i16 %x to i32 + %call2 = tail call i32 @identity32(i32 %b) + ret i16 %x +} diff --git a/test/CodeGen/ARM/this-return.ll b/test/CodeGen/ARM/this-return.ll index 3047302ea75..f06e4a4f8dd 100644 --- a/test/CodeGen/ARM/this-return.ll +++ b/test/CodeGen/ARM/this-return.ll @@ -103,67 +103,3 @@ entry: %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b2, i32 %x) ret %struct.E* %this } - -declare i16 @identity16(i16 returned %x) -declare zeroext i16 @zeroext16(i16 returned %x) -declare i32 @identity32(i32 returned %x) - -define i16 @test_identity(i16 %x) { -entry: -; CHECKELF: test_identity: -; CHECKELF: mov [[SAVEX:r[0-9]+]], r0 -; CHECKELF: bl identity16 -; CHECKELF: uxth r0, [[SAVEX]] -; CHECKELF: bl identity32 -; CHECKELF: mov r0, [[SAVEX]] -; CHECKT2D: test_identity: -; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0 -; CHECKT2D: blx _identity16 -; CHECKT2D: uxth r0, [[SAVEX]] -; CHECKT2D: blx _identity32 -; CHECKT2D: mov r0, [[SAVEX]] - %call = tail call i16 @identity16(i16 %x) - %b = zext i16 %x to i32 - %call2 = tail call i32 @identity32(i32 %b) - ret i16 %call -} - -define i16 @test_matched_ext(i16 %x) { -entry: -; CHECKELF: test_matched_ext: -; CHECKELF-NOT: mov {{r[0-9]+}}, r0 -; CHECKELF: bl zeroext16 -; CHECKELF-NOT: uxth r0, {{r[0-9]+}} -; CHECKELF: bl identity32 -; CHECKELF-NOT: mov r0, {{r[0-9]+}} -; CHECKT2D: test_matched_ext: -; CHECKT2D-NOT: mov {{r[0-9]+}}, r0 -; CHECKT2D: blx _zeroext16 -; CHECKT2D-NOT: uxth r0, {{r[0-9]+}} -; CHECKT2D: blx _identity32 -; CHECKT2D-NOT: mov r0, {{r[0-9]+}} - %call = tail call i16 @zeroext16(i16 %x) - %b = zext i16 %call to i32 - %call2 = tail call i32 @identity32(i32 %b) - ret i16 %call -} - -define i16 @test_mismatched_ext(i16 %x) { -entry: -; CHECKELF: test_mismatched_ext: -; CHECKELF: mov [[SAVEX:r[0-9]+]], r0 -; CHECKELF: bl zeroext16 -; CHECKELF: sxth r0, [[SAVEX]] -; CHECKELF: bl identity32 -; CHECKELF: mov r0, [[SAVEX]] -; CHECKT2D: test_mismatched_ext: -; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0 -; CHECKT2D: blx _zeroext16 -; CHECKT2D: sxth r0, [[SAVEX]] -; CHECKT2D: blx _identity32 -; CHECKT2D: mov r0, [[SAVEX]] - %call = tail call i16 @zeroext16(i16 %x) - %b = sext i16 %call to i32 - %call2 = tail call i32 @identity32(i32 %b) - ret i16 %call -}