From cb047f2a74666bc436eb4dea3b5893bc289d574d Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Fri, 11 Jul 2014 13:33:46 +0000 Subject: [PATCH] ARM: Allow __fp16 as a function arg or return type for AArch64 ACLE 2.0 allows __fp16 to be used as a function argument or return type. This enables this for AArch64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212812 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 2 +- lib/Target/AArch64/AArch64CallingConvention.td | 12 +++++++++--- lib/Target/AArch64/AArch64ISelLowering.cpp | 2 ++ test/CodeGen/AArch64/arm64-aapcs.ll | 14 ++++++++++++++ 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 0d6a06605ed..bff540ad174 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -270,7 +270,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { EVT OrigVT = VT; EVT SVT = VT; - while (SVT != MVT::f32) { + while (SVT != MVT::f32 && SVT != MVT::f16) { SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && // Only do this if the target has a native EXTLOAD instruction from diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index 8e8bd3d0bcd..1fe5138b529 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -54,6 +54,8 @@ def CC_AArch64_AAPCS : CallingConv<[ CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], @@ -65,7 +67,7 @@ def CC_AArch64_AAPCS : CallingConv<[ CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, // If more than will fit in registers, pass them on the stack instead. - CCIfType<[i1, i8, i16], CCAssignToStack<8, 8>>, + CCIfType<[i1, i8, i16, f16], CCAssignToStack<8, 8>>, CCIfType<[i32, f32], CCAssignToStack<8, 8>>, CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8], CCAssignToStack<8, 8>>, @@ -88,6 +90,8 @@ def RetCC_AArch64_AAPCS : CallingConv<[ [X0, X1, X2, X3, X4, X5, X6, X7]>>, CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], @@ -129,6 +133,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[ CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], @@ -141,7 +147,7 @@ def CC_AArch64_DarwinPCS : CallingConv<[ // If more than will fit in registers, pass them on the stack instead. CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>, - CCIf<"ValVT == MVT::i16", CCAssignToStack<2, 2>>, + CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8], CCAssignToStack<8, 8>>, @@ -154,7 +160,7 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ // Handle all scalar types as either i64 or f64. CCIfType<[i8, i16, i32], CCPromoteToType>, - CCIfType<[f32], CCPromoteToType>, + CCIfType<[f16, f32], CCPromoteToType>, // Everything is on the stack. // i128 is split to two i64s, and its stack alignment is 16 bytes. diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 28d0035a11a..07ff0938ed2 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1711,6 +1711,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments( RC = &AArch64::GPR32RegClass; else if (RegVT == MVT::i64) RC = &AArch64::GPR64RegClass; + else if (RegVT == MVT::f16) + RC = &AArch64::FPR16RegClass; else if (RegVT == MVT::f32) RC = &AArch64::FPR32RegClass; else if (RegVT == MVT::f64 || RegVT.is64BitVector()) diff --git a/test/CodeGen/AArch64/arm64-aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll index ccf1371bb5f..127a7cc0a15 100644 --- a/test/CodeGen/AArch64/arm64-aapcs.ll +++ b/test/CodeGen/AArch64/arm64-aapcs.ll @@ -109,3 +109,17 @@ entry: ; CHECK: ldr {{q[0-9]+}}, [sp] ret <2 x double> %varg_stack; } + +; Check that f16 can be passed and returned (ACLE 2.0 extension) +define half @test_half(float, half %arg) { +; CHECK-LABEL: test_half: +; CHECK: mov v0.16b, v{{[0-9]+}}.16b + ret half %arg; +} + +; Check that f16 constants are materialized correctly +define half @test_half_const() { +; CHECK-LABEL: test_half_const: +; CHECK: ldr h0, [x{{[0-9]+}}, :lo12:{{.*}}] + ret half 0xH4248 +}