diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index e3c034c8a15..649dd7a349f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -373,6 +373,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); + + // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's + // entirely possible for both f16 and f32 to be legal, so use the fully + // hard-float FP_EXTEND rather than FP16_TO_FP. + if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) + Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; @@ -511,6 +518,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + if (N->getValueType(0) == MVT::f16) + return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 189b56d5a93..c07b5e6a736 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5155,13 +5155,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::convert_to_fp16: - setValue(&I, DAG.getNode(ISD::FP_TO_FP16, sdl, - MVT::i16, getValue(I.getArgOperand(0)))); + setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, + DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, + getValue(I.getArgOperand(0)), + DAG.getTargetConstant(0, MVT::i32)))); return nullptr; case Intrinsic::convert_from_fp16: setValue(&I, - DAG.getNode(ISD::FP16_TO_FP, sdl, TLI->getValueType(I.getType()), - getValue(I.getArgOperand(0)))); + DAG.getNode(ISD::FP_EXTEND, sdl, TLI->getValueType(I.getType()), + DAG.getNode(ISD::BITCAST, sdl, MVT::f16, + getValue(I.getArgOperand(0))))); return nullptr; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 42cd4bf1fe1..e80ef7176c2 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -422,7 +422,10 @@ static void InitLibcallCallingConvs(CallingConv::ID *CCs) { /// getFPEXT - Return the FPEXT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { + if (OpVT == MVT::f16) { + if (RetVT == MVT::f32) + return FPEXT_F16_F32; + } else if (OpVT == MVT::f32) { if (RetVT == MVT::f64) return FPEXT_F32_F64; if (RetVT == MVT::f128) diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 2708ee03313..0ba069e99a8 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -2244,90 +2244,6 @@ def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>; defm FCVT : FPConversion<"fcvt">; -def : Pat<(fp_to_f16 FPR32:$Rn), - (i32 (COPY_TO_REGCLASS - (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)), - GPR32))>; - -def : Pat<(f32 (f16_to_fp i32:$Rn)), - (FCVTSHr (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS i32:$Rn, FPR32)), - hsub))>; - -// When converting from f16 coming directly from a load, make sure we -// load into the FPR16 registers rather than going through the GPRs. -// f16->f32 -def : Pat<(f32 (f16_to_fp (i32 - (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend16:$extend))))), - (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>; -def : Pat<(f32 (f16_to_fp (i32 - (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend16:$extend))))), - (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>; -def : Pat <(f32 (f16_to_fp (i32 - (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), - (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>; -def : Pat <(f32 (f16_to_fp (i32 - (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), - (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>; - -// f16->f64 -def : Pat<(f64 (fextend (f32 (f16_to_fp (i32 - (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend16:$extend))))))), - (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>; -def : Pat<(f64 (fextend (f32 (f16_to_fp (i32 - (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend16:$extend))))))), - (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>; -def : Pat <(f64 (fextend (f32 (f16_to_fp (i32 - (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))), - (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>; -def : Pat <(f64 (fextend (f32 (f16_to_fp (i32 - (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))), - (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>; - -// When converting to f16 going directly to a store, make sure we use the -// appropriate direct conversion instructions and store via the FPR16 -// registers rather than going through the GPRs. -let AddedComplexity = 10 in { -// f32->f16 -def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))), - (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend16:$extend)), - (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend16:$extend)>; -def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))), - (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend16:$extend)), - (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend16:$extend)>; -def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))), - (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), - (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>; -def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))), - (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), - (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>; -// f64->f16 -def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))), - (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend16:$extend)), - (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm, - ro_Wextend16:$extend)>; -def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))), - (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend16:$extend)), - (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm, - ro_Xextend16:$extend)>; -def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))), - (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), - (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>; -def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))), - (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), - (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>; -} - - //===----------------------------------------------------------------------===// // Floating point single operand instructions. //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/AArch64/f16-convert.ll b/test/CodeGen/AArch64/f16-convert.ll index 72685e87bd5..12412d45aa6 100644 --- a/test/CodeGen/AArch64/f16-convert.ll +++ b/test/CodeGen/AArch64/f16-convert.ll @@ -18,8 +18,7 @@ define double @load1(i16* nocapture readonly %a) nounwind { ; CHECK-NEXT: ret %tmp = load i16* %a, align 2 - %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp) - %conv = fpext float %tmp1 to double + %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp) ret double %conv } @@ -45,8 +44,7 @@ define double @load3(i16* nocapture readonly %a, i32 %i) nounwind { %idxprom = sext i32 %i to i64 %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom %tmp = load i16* %arrayidx, align 2 - %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp) - %conv = fpext float %tmp1 to double + %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp) ret double %conv } @@ -70,8 +68,7 @@ define double @load5(i16* nocapture readonly %a, i64 %i) nounwind { %arrayidx = getelementptr inbounds i16* %a, i64 %i %tmp = load i16* %arrayidx, align 2 - %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp) - %conv = fpext float %tmp1 to double + %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp) ret double %conv } @@ -95,8 +92,7 @@ define double @load7(i16* nocapture readonly %a) nounwind { %arrayidx = getelementptr inbounds i16* %a, i64 10 %tmp = load i16* %arrayidx, align 2 - %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp) - %conv = fpext float %tmp1 to double + %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp) ret double %conv } @@ -120,8 +116,7 @@ define double @load9(i16* nocapture readonly %a) nounwind { %arrayidx = getelementptr inbounds i16* %a, i64 -10 %tmp = load i16* %arrayidx, align 2 - %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp) - %conv = fpext float %tmp1 to double + %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp) ret double %conv } @@ -252,3 +247,5 @@ define void @store9(i16* nocapture %a, double %val) nounwind { declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone +declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone +declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone