mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-14 15:28:20 +00:00
CodeGen: emit IR-level f16 conversion intrinsics as fptrunc/fpext
This makes the first stage DAG for @llvm.convert.to.fp16 an fptrunc, and correspondingly @llvm.convert.from.fp16 an fpext. The legalisation path is now uniform, regardless of the input IR: fptrunc -> FP_TO_FP16 (if f16 illegal) -> libcall fpext -> FP16_TO_FP (if f16 illegal) -> libcall Each target should be able to select the version that best matches its operations and not be required to duplicate patterns for both fptrunc and FP_TO_FP16 (for example). As a result we can remove some redundant AArch64 patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213507 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -373,6 +373,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
|
|||||||
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
|
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
|
||||||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||||
SDValue Op = N->getOperand(0);
|
SDValue Op = N->getOperand(0);
|
||||||
|
|
||||||
|
// There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
|
||||||
|
// entirely possible for both f16 and f32 to be legal, so use the fully
|
||||||
|
// hard-float FP_EXTEND rather than FP16_TO_FP.
|
||||||
|
if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32)
|
||||||
|
Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
|
||||||
|
|
||||||
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
|
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
|
||||||
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
|
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
|
||||||
return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
|
return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
|
||||||
@@ -511,6 +518,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
|
|||||||
|
|
||||||
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
|
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
|
||||||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||||
|
if (N->getValueType(0) == MVT::f16)
|
||||||
|
return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0));
|
||||||
|
|
||||||
SDValue Op = GetSoftenedFloat(N->getOperand(0));
|
SDValue Op = GetSoftenedFloat(N->getOperand(0));
|
||||||
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
|
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
|
||||||
RTLIB::TRUNC_F32,
|
RTLIB::TRUNC_F32,
|
||||||
|
@@ -5155,13 +5155,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
case Intrinsic::convert_to_fp16:
|
case Intrinsic::convert_to_fp16:
|
||||||
setValue(&I, DAG.getNode(ISD::FP_TO_FP16, sdl,
|
setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
|
||||||
MVT::i16, getValue(I.getArgOperand(0))));
|
DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
|
||||||
|
getValue(I.getArgOperand(0)),
|
||||||
|
DAG.getTargetConstant(0, MVT::i32))));
|
||||||
return nullptr;
|
return nullptr;
|
||||||
case Intrinsic::convert_from_fp16:
|
case Intrinsic::convert_from_fp16:
|
||||||
setValue(&I,
|
setValue(&I,
|
||||||
DAG.getNode(ISD::FP16_TO_FP, sdl, TLI->getValueType(I.getType()),
|
DAG.getNode(ISD::FP_EXTEND, sdl, TLI->getValueType(I.getType()),
|
||||||
getValue(I.getArgOperand(0))));
|
DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
|
||||||
|
getValue(I.getArgOperand(0)))));
|
||||||
return nullptr;
|
return nullptr;
|
||||||
case Intrinsic::pcmarker: {
|
case Intrinsic::pcmarker: {
|
||||||
SDValue Tmp = getValue(I.getArgOperand(0));
|
SDValue Tmp = getValue(I.getArgOperand(0));
|
||||||
|
@@ -422,7 +422,10 @@ static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
|
|||||||
/// getFPEXT - Return the FPEXT_*_* value for the given types, or
|
/// getFPEXT - Return the FPEXT_*_* value for the given types, or
|
||||||
/// UNKNOWN_LIBCALL if there is none.
|
/// UNKNOWN_LIBCALL if there is none.
|
||||||
RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
|
RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
|
||||||
if (OpVT == MVT::f32) {
|
if (OpVT == MVT::f16) {
|
||||||
|
if (RetVT == MVT::f32)
|
||||||
|
return FPEXT_F16_F32;
|
||||||
|
} else if (OpVT == MVT::f32) {
|
||||||
if (RetVT == MVT::f64)
|
if (RetVT == MVT::f64)
|
||||||
return FPEXT_F32_F64;
|
return FPEXT_F32_F64;
|
||||||
if (RetVT == MVT::f128)
|
if (RetVT == MVT::f128)
|
||||||
|
@@ -2244,90 +2244,6 @@ def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>;
|
|||||||
|
|
||||||
defm FCVT : FPConversion<"fcvt">;
|
defm FCVT : FPConversion<"fcvt">;
|
||||||
|
|
||||||
def : Pat<(fp_to_f16 FPR32:$Rn),
|
|
||||||
(i32 (COPY_TO_REGCLASS
|
|
||||||
(f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)),
|
|
||||||
GPR32))>;
|
|
||||||
|
|
||||||
def : Pat<(f32 (f16_to_fp i32:$Rn)),
|
|
||||||
(FCVTSHr (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS i32:$Rn, FPR32)),
|
|
||||||
hsub))>;
|
|
||||||
|
|
||||||
// When converting from f16 coming directly from a load, make sure we
|
|
||||||
// load into the FPR16 registers rather than going through the GPRs.
|
|
||||||
// f16->f32
|
|
||||||
def : Pat<(f32 (f16_to_fp (i32
|
|
||||||
(zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
|
|
||||||
ro_Wextend16:$extend))))),
|
|
||||||
(FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
|
|
||||||
def : Pat<(f32 (f16_to_fp (i32
|
|
||||||
(zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
|
|
||||||
ro_Xextend16:$extend))))),
|
|
||||||
(FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
|
|
||||||
def : Pat <(f32 (f16_to_fp (i32
|
|
||||||
(zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
|
|
||||||
(FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
|
|
||||||
def : Pat <(f32 (f16_to_fp (i32
|
|
||||||
(zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
|
|
||||||
(FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
|
|
||||||
|
|
||||||
// f16->f64
|
|
||||||
def : Pat<(f64 (fextend (f32 (f16_to_fp (i32
|
|
||||||
(zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
|
|
||||||
ro_Wextend16:$extend))))))),
|
|
||||||
(FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
|
|
||||||
def : Pat<(f64 (fextend (f32 (f16_to_fp (i32
|
|
||||||
(zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
|
|
||||||
ro_Xextend16:$extend))))))),
|
|
||||||
(FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
|
|
||||||
def : Pat <(f64 (fextend (f32 (f16_to_fp (i32
|
|
||||||
(zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))),
|
|
||||||
(FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
|
|
||||||
def : Pat <(f64 (fextend (f32 (f16_to_fp (i32
|
|
||||||
(zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
|
|
||||||
(FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
|
|
||||||
|
|
||||||
// When converting to f16 going directly to a store, make sure we use the
|
|
||||||
// appropriate direct conversion instructions and store via the FPR16
|
|
||||||
// registers rather than going through the GPRs.
|
|
||||||
let AddedComplexity = 10 in {
|
|
||||||
// f32->f16
|
|
||||||
def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
|
|
||||||
(ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
|
|
||||||
ro_Wextend16:$extend)),
|
|
||||||
(STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm,
|
|
||||||
ro_Wextend16:$extend)>;
|
|
||||||
def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
|
|
||||||
(ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
|
|
||||||
ro_Xextend16:$extend)),
|
|
||||||
(STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm,
|
|
||||||
ro_Xextend16:$extend)>;
|
|
||||||
def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
|
|
||||||
(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
|
|
||||||
(STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
|
|
||||||
def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
|
|
||||||
(am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
|
|
||||||
(STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>;
|
|
||||||
// f64->f16
|
|
||||||
def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
|
|
||||||
(ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
|
|
||||||
ro_Wextend16:$extend)),
|
|
||||||
(STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm,
|
|
||||||
ro_Wextend16:$extend)>;
|
|
||||||
def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
|
|
||||||
(ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
|
|
||||||
ro_Xextend16:$extend)),
|
|
||||||
(STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm,
|
|
||||||
ro_Xextend16:$extend)>;
|
|
||||||
def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
|
|
||||||
(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
|
|
||||||
(STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
|
|
||||||
def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
|
|
||||||
(am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
|
|
||||||
(STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Floating point single operand instructions.
|
// Floating point single operand instructions.
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@@ -18,8 +18,7 @@ define double @load1(i16* nocapture readonly %a) nounwind {
|
|||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
|
|
||||||
%tmp = load i16* %a, align 2
|
%tmp = load i16* %a, align 2
|
||||||
%tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
|
%conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
|
||||||
%conv = fpext float %tmp1 to double
|
|
||||||
ret double %conv
|
ret double %conv
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -45,8 +44,7 @@ define double @load3(i16* nocapture readonly %a, i32 %i) nounwind {
|
|||||||
%idxprom = sext i32 %i to i64
|
%idxprom = sext i32 %i to i64
|
||||||
%arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
|
%arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
|
||||||
%tmp = load i16* %arrayidx, align 2
|
%tmp = load i16* %arrayidx, align 2
|
||||||
%tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
|
%conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
|
||||||
%conv = fpext float %tmp1 to double
|
|
||||||
ret double %conv
|
ret double %conv
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -70,8 +68,7 @@ define double @load5(i16* nocapture readonly %a, i64 %i) nounwind {
|
|||||||
|
|
||||||
%arrayidx = getelementptr inbounds i16* %a, i64 %i
|
%arrayidx = getelementptr inbounds i16* %a, i64 %i
|
||||||
%tmp = load i16* %arrayidx, align 2
|
%tmp = load i16* %arrayidx, align 2
|
||||||
%tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
|
%conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
|
||||||
%conv = fpext float %tmp1 to double
|
|
||||||
ret double %conv
|
ret double %conv
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -95,8 +92,7 @@ define double @load7(i16* nocapture readonly %a) nounwind {
|
|||||||
|
|
||||||
%arrayidx = getelementptr inbounds i16* %a, i64 10
|
%arrayidx = getelementptr inbounds i16* %a, i64 10
|
||||||
%tmp = load i16* %arrayidx, align 2
|
%tmp = load i16* %arrayidx, align 2
|
||||||
%tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
|
%conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
|
||||||
%conv = fpext float %tmp1 to double
|
|
||||||
ret double %conv
|
ret double %conv
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -120,8 +116,7 @@ define double @load9(i16* nocapture readonly %a) nounwind {
|
|||||||
|
|
||||||
%arrayidx = getelementptr inbounds i16* %a, i64 -10
|
%arrayidx = getelementptr inbounds i16* %a, i64 -10
|
||||||
%tmp = load i16* %arrayidx, align 2
|
%tmp = load i16* %arrayidx, align 2
|
||||||
%tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
|
%conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
|
||||||
%conv = fpext float %tmp1 to double
|
|
||||||
ret double %conv
|
ret double %conv
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -252,3 +247,5 @@ define void @store9(i16* nocapture %a, double %val) nounwind {
|
|||||||
|
|
||||||
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
|
declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
|
||||||
declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
|
declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
|
||||||
|
declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
|
||||||
|
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
|
||||||
|
Reference in New Issue
Block a user