diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index fb12cc226af..0bebe0ad89e 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -614,6 +614,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FRINT, MVT::f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); } @@ -869,14 +875,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // Various VFP goodness if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { - // int <-> fp are custom expanded into bit_convert + ARMISD ops. - if (Subtarget->hasVFP2()) { - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - } - // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) { setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); @@ -1033,11 +1031,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::RBIT: return "ARMISD::RBIT"; - case ARMISD::FTOSI: return "ARMISD::FTOSI"; - case ARMISD::FTOUI: return "ARMISD::FTOUI"; - case ARMISD::SITOF: return "ARMISD::SITOF"; - case ARMISD::UITOF: return "ARMISD::UITOF"; - case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; @@ -3795,7 +3788,6 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorFP_TO_INT(Op, DAG); - if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::FP_TO_SINT) @@ -3808,20 +3800,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { /*isSigned*/ false, SDLoc(Op)).first; } - SDLoc dl(Op); - unsigned Opc; - - switch (Op.getOpcode()) { - default: llvm_unreachable("Invalid opcode!"); - case ISD::FP_TO_SINT: - Opc = ARMISD::FTOSI; - break; - case ISD::FP_TO_UINT: - Opc = ARMISD::FTOUI; - break; - } - Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); - return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); + return Op; } static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { @@ -3861,7 +3840,6 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorINT_TO_FP(Op, DAG); - if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::SINT_TO_FP) @@ -3874,21 +3852,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { /*isSigned*/ false, SDLoc(Op)).first; } - SDLoc dl(Op); - unsigned Opc; - - switch (Op.getOpcode()) { - default: llvm_unreachable("Invalid opcode!"); - case ISD::SINT_TO_FP: - Opc = ARMISD::SITOF; - break; - case ISD::UINT_TO_FP: - Opc = ARMISD::UITOF; - break; - } - - Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); - return DAG.getNode(Opc, dl, VT, Op); + return Op; } SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 682f479278e..dd4c954e937 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -65,11 +65,6 @@ namespace llvm { RBIT, // ARM bitreverse instruction - FTOSI, // FP to sint within a FP register. - FTOUI, // FP to uint within a FP register. - SITOF, // sint to FP within a FP register. - UITOF, // uint to FP within a FP register. - SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out. SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag. diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 7d27cf3fcdc..e79608d360c 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -983,7 +983,12 @@ class ARMV5MOPat : Pat { class ARMV6Pat : Pat { list Predicates = [IsARM, HasV6]; } - +class VFPPat : Pat { + list Predicates = [HasVFP2]; +} +class VFPNoNEONPat : Pat { + list Predicates = [HasVFP2, DontUseNEONForFP]; +} //===----------------------------------------------------------------------===// // Thumb Instruction Format Definitions. // diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 2a7b4b57fd0..655fc812f00 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -6158,6 +6158,21 @@ class N3VSMulOpPat (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; +class NVCVTIFPat + : NEONFPPat<(f32 (OpNode GPR:$a)), + (f32 (EXTRACT_SUBREG + (v2f32 (Inst + (INSERT_SUBREG + (v2f32 (IMPLICIT_DEF)), + (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), + ssub_0))>; +class NVCVTFIPat + : NEONFPPat<(i32 (OpNode SPR:$a)), + (i32 (EXTRACT_SUBREG + (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$a, ssub_0))), + ssub_0))>; + def : N3VSPat; def : N3VSPat; def : N3VSPat; @@ -6173,10 +6188,22 @@ def : N2VSPat; def : N2VSPat; def : N3VSPat; def : N3VSPat; -def : N2VSPat; -def : N2VSPat; -def : N2VSPat; -def : N2VSPat; +def : NVCVTFIPat; +def : NVCVTFIPat; +def : NVCVTIFPat; +def : NVCVTIFPat; + +// NEON doesn't have any f64 conversions, so provide patterns to make +// sure the VFP conversions match when extracting from a vector. +def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), + (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; +def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), + (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; +def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), + (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; +def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), + (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; + // Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. def : Pat<(f32 (bitconvert GPR:$a)), diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index cc8a2b0b4c6..dba95f9e56d 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -11,16 +11,10 @@ // //===----------------------------------------------------------------------===// -def SDT_FTOI : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>; -def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>; def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; -def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>; -def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>; -def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>; -def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>; def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>; @@ -641,7 +635,7 @@ multiclass vcvt_inst rm, def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), - [(set SPR:$Sd, (arm_ftosi (node SPR:$Sm)))]>, + []>, Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } @@ -649,7 +643,7 @@ multiclass vcvt_inst rm, def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"), - [(set SPR:$Sd, (arm_ftoui (node SPR:$Sm)))]>, + []>, Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } @@ -657,7 +651,7 @@ multiclass vcvt_inst rm, def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"), - [(set SPR:$Sd, (arm_ftosi (f64 (node (f64 DPR:$Dm)))))]>, + []>, Requires<[HasFPARMv8, HasDPVFP]> { bits<5> Dm; @@ -672,7 +666,7 @@ multiclass vcvt_inst rm, def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"), - [(set SPR:$Sd, (arm_ftoui (f64 (node (f64 DPR:$Dm)))))]>, + []>, Requires<[HasFPARMv8, HasDPVFP]> { bits<5> Dm; @@ -684,6 +678,27 @@ multiclass vcvt_inst rm, let Inst{8} = 1; } } + + let Predicates = [HasFPARMv8] in { + def : Pat<(i32 (fp_to_sint (node SPR:$a))), + (COPY_TO_REGCLASS + (!cast(NAME#"SS") SPR:$a), + GPR)>; + def : Pat<(i32 (fp_to_uint (node SPR:$a))), + (COPY_TO_REGCLASS + (!cast(NAME#"US") SPR:$a), + GPR)>; + } + let Predicates = [HasFPARMv8, HasDPVFP] in { + def : Pat<(i32 (fp_to_sint (node (f64 DPR:$a)))), + (COPY_TO_REGCLASS + (!cast(NAME#"SD") DPR:$a), + GPR)>; + def : Pat<(i32 (fp_to_uint (node (f64 DPR:$a)))), + (COPY_TO_REGCLASS + (!cast(NAME#"UD") DPR:$a), + GPR)>; + } } defm VCVTA : vcvt_inst<"a", 0b00, frnd>; @@ -988,14 +1003,22 @@ class AVConv1InSs_Encode opcod1, bits<2> opcod2, bits<4> opcod3, def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm", - [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> { + []> { let Inst{7} = 1; // s32 } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(f64 (sint_to_fp GPR:$a)), + (VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; + + def : VFPPat<(f64 (sint_to_fp (i32 (load addrmode5:$a)))), + (VSITOD (VLDRS addrmode5:$a))>; +} + def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$Sd),(ins SPR:$Sm), IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> { + []> { let Inst{7} = 1; // s32 // Some single precision VFP instructions may be executed on both NEON and @@ -1003,17 +1026,31 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)), + (VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>; + +def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (load addrmode5:$a)))), + (VSITOS (VLDRS addrmode5:$a))>; + def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm", - [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> { + []> { let Inst{7} = 0; // u32 } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(f64 (uint_to_fp GPR:$a)), + (VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; + + def : VFPPat<(f64 (uint_to_fp (i32 (load addrmode5:$a)))), + (VUITOD (VLDRS addrmode5:$a))>; +} + def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> { + []> { let Inst{7} = 0; // u32 // Some single precision VFP instructions may be executed on both NEON and @@ -1021,6 +1058,12 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)), + (VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>; + +def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (load addrmode5:$a)))), + (VUITOS (VLDRS addrmode5:$a))>; + // FP -> Int: class AVConv1IsD_Encode opcod1, bits<2> opcod2, bits<4> opcod3, @@ -1063,14 +1106,22 @@ class AVConv1InsS_Encode opcod1, bits<2> opcod2, bits<4> opcod3, def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> { + []> { let Inst{7} = 1; // Z bit } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))), + (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>; + + def : VFPPat<(store (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr), + (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>; +} + def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> { + []> { let Inst{7} = 1; // Z bit // Some single precision VFP instructions may be executed on both NEON and @@ -1078,17 +1129,31 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)), + (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>; + +def : VFPNoNEONPat<(store (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr), + (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>; + def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> { + []> { let Inst{7} = 1; // Z bit } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))), + (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>; + + def : VFPPat<(store (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr), + (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>; +} + def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> { + []> { let Inst{7} = 1; // Z bit // Some single precision VFP instructions may be executed on both NEON and @@ -1096,6 +1161,12 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)), + (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>; + +def : VFPNoNEONPat<(store (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr), + (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>; + // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. let Uses = [FPSCR] in { // FIXME: Verify encoding after integrated assembler is working.