From 567d14f07cd62bfb9dd0edd90144a0a840450f7a Mon Sep 17 00:00:00 2001 From: Anton Korobeynikov Date: Wed, 5 Aug 2009 19:04:42 +0000 Subject: [PATCH] Missed pieces for ARM HardFP ABI. Patch by Sandeep Patel! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78225 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMCallingConv.td | 2 + lib/Target/ARM/ARMISelLowering.cpp | 56 ++++++++++++--------- lib/Target/ARM/ARMISelLowering.h | 2 +- lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp | 5 ++ test/CodeGen/ARM/hardfloat_neon.ll | 13 +++++ 5 files changed, 54 insertions(+), 24 deletions(-) create mode 100644 test/CodeGen/ARM/hardfloat_neon.ll diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 8a4c741faf9..716163958d9 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -111,6 +111,7 @@ def CC_ARM_AAPCS_VFP : CallingConv<[ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, @@ -122,6 +123,7 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index af18c2770f9..61a08db29f6 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -662,27 +662,28 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, /// CCAssignFnForNode - Selects the correct CCAssignFn for a the /// given CallingConvention value. CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC, - bool Return) const { + bool Return, + bool isVarArg) const { switch (CC) { default: - llvm_unreachable("Unsupported calling convention"); + llvm_unreachable("Unsupported calling convention"); case CallingConv::C: case CallingConv::Fast: - // Use target triple & subtarget features to do actual dispatch. - if (Subtarget->isAAPCS_ABI()) { - if (Subtarget->hasVFP2() && - FloatABIType == FloatABI::Hard) - return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); - else - return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); - } else - return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + // Use target triple & subtarget features to do actual dispatch. + if (Subtarget->isAAPCS_ABI()) { + if (Subtarget->hasVFP2() && + FloatABIType == FloatABI::Hard && !isVarArg) + return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + else + return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); + } else + return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); case CallingConv::ARM_AAPCS_VFP: - return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); case CallingConv::ARM_AAPCS: - return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); + return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); case CallingConv::ARM_APCS: - return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); } } @@ -700,7 +701,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, - CCAssignFnForNode(CallConv, /* Return*/ true)); + CCAssignFnForNode(CallConv, /* Return*/ true, + isVarArg)); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -832,7 +834,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, - CCAssignFnForNode(CallConv, /* Return*/ false)); + CCAssignFnForNode(CallConv, /* Return*/ false, + isVarArg)); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -873,7 +876,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, break; } - // f64 and v2f64 are passed in i32 pairs and must be split into pieces + // f64 and v2f64 might be passed in i32 pairs and must be split into pieces if (VA.needsCustom()) { if (VA.getLocVT() == MVT::v2f64) { SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, @@ -1034,7 +1037,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, *DAG.getContext()); // Analyze outgoing return values. - CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true)); + CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, + isVarArg)); // If this is the first return lowered for this function, add // the regs to the liveout set for the function. @@ -1422,7 +1426,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, - CCAssignFnForNode(CallConv, /* Return*/ false)); + CCAssignFnForNode(CallConv, /* Return*/ false, + isVarArg)); SmallVector ArgValues; @@ -1455,18 +1460,23 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, } else { TargetRegisterClass *RC; - if (FloatABIType == FloatABI::Hard && RegVT == MVT::f32) + bool IsHardFloatCC = (CallConv == CallingConv::ARM_AAPCS_VFP); + + if (IsHardFloatCC && RegVT == MVT::f32) RC = ARM::SPRRegisterClass; - else if (FloatABIType == FloatABI::Hard && RegVT == MVT::f64) + else if (IsHardFloatCC && RegVT == MVT::f64) RC = ARM::DPRRegisterClass; + else if (IsHardFloatCC && RegVT == MVT::v2f64) + RC = ARM::QPRRegisterClass; else if (AFI->isThumb1OnlyFunction()) RC = ARM::tGPRRegisterClass; else RC = ARM::GPRRegisterClass; assert((RegVT == MVT::i32 || RegVT == MVT::f32 || - (FloatABIType == FloatABI::Hard && RegVT == MVT::f64)) && - "RegVT not supported by formal arguments Lowering"); + (IsHardFloatCC && + ((RegVT == MVT::f64) || (RegVT == MVT::v2f64)))) && + "RegVT not supported by FORMAL_ARGUMENTS Lowering"); // Transform the arguments in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 4e05d4530f9..9393bafdacf 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -235,7 +235,7 @@ namespace llvm { SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, DebugLoc dl); - CCAssignFn *CCAssignFnForNode(unsigned CC, bool Return) const; + CCAssignFn *CCAssignFnForNode(unsigned CC, bool Return, bool isVarArg) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 3cb8458f2be..7b19969cfb7 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -1108,6 +1108,11 @@ bool ARMAsmPrinter::doInitialization(Module &M) { O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_needed << ", 1\n" << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_preserved << ", 1\n"; + // Hard float. Use both S and D registers and conform to AAPCS-VFP. + if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) + O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_HardFP_use << ", 3\n" + << "\t.eabi_attribute " << ARMBuildAttrs::ABI_VFP_args << ", 1\n"; + // FIXME: Should we signal R9 usage? } diff --git a/test/CodeGen/ARM/hardfloat_neon.ll b/test/CodeGen/ARM/hardfloat_neon.ll new file mode 100644 index 00000000000..728370a56da --- /dev/null +++ b/test/CodeGen/ARM/hardfloat_neon.ll @@ -0,0 +1,13 @@ +; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -mattr=+neon -float-abi=hard + +define <16 x i8> @vmulQi8_reg(<16 x i8> %A, <16 x i8> %B) nounwind { + %tmp1 = mul <16 x i8> %A, %B + ret <16 x i8> %tmp1 +} + +define <16 x i8> @f(<16 x i8> %a, <16 x i8> %b) { + %tmp = call <16 x i8> @g(<16 x i8> %b) + ret <16 x i8> %tmp +} + +declare <16 x i8> @g(<16 x i8>)