From 76a312b7d1c2b41394696510506967cd0794b831 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Fri, 19 Mar 2010 22:51:32 +0000 Subject: [PATCH] Revert this change, since it was causing ARM performance regressions. --- Reverse-merging r98889 into '.': U lib/Target/ARM/ARMInstrNEON.td U lib/Target/ARM/ARMISelLowering.h U lib/Target/ARM/ARMInstrInfo.td U lib/Target/ARM/ARMInstrVFP.td U lib/Target/ARM/ARMISelLowering.cpp U lib/Target/ARM/ARMInstrFormats.td git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99010 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 54 ++++++++++++++++++++++++++++ lib/Target/ARM/ARMISelLowering.h | 5 +++ lib/Target/ARM/ARMInstrFormats.td | 3 -- lib/Target/ARM/ARMInstrInfo.td | 1 - lib/Target/ARM/ARMInstrNEON.td | 27 +++----------- lib/Target/ARM/ARMInstrVFP.td | 58 +++++++++--------------------- 6 files changed, 79 insertions(+), 69 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 38aa689fc68..0d0a004c284 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -428,6 +428,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // Various VFP goodness if (!UseSoftFloat && !Subtarget->isThumb1Only()) { + // int <-> fp are custom expanded into bit_convert + ARMISD ops. + if (Subtarget->hasVFP2()) { + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + } // Special handling for half-precision FP. if (!Subtarget->hasFP16()) { setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); @@ -488,6 +495,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::RBIT: return "ARMISD::RBIT"; + case ARMISD::FTOSI: return "ARMISD::FTOSI"; + case ARMISD::FTOUI: return "ARMISD::FTOUI"; + case ARMISD::SITOF: return "ARMISD::SITOF"; + case ARMISD::UITOF: return "ARMISD::UITOF"; + case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; @@ -1966,6 +1978,44 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { } } +static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + unsigned Opc; + + switch (Op.getOpcode()) { + default: + assert(0 && "Invalid opcode!"); + case ISD::FP_TO_SINT: + Opc = ARMISD::FTOSI; + break; + case ISD::FP_TO_UINT: + Opc = ARMISD::FTOUI; + break; + } + Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); +} + +static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned Opc; + + switch (Op.getOpcode()) { + default: + assert(0 && "Invalid opcode!"); + case ISD::SINT_TO_FP: + Opc = ARMISD::SITOF; + break; + case ISD::UINT_TO_FP: + Opc = ARMISD::UITOF; + break; + } + + Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0)); + return DAG.getNode(Opc, dl, VT, Op); +} + static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { // Implement fcopysign with a fabs and a conditional fneg. SDValue Tmp0 = Op.getOperand(0); @@ -3020,6 +3070,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::RETURNADDR: break; case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index c18df5e4b34..f8f8adc70af 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -55,6 +55,11 @@ namespace llvm { RBIT, // ARM bitreverse instruction + FTOSI, // FP to sint within a FP register. + FTOUI, // FP to uint within a FP register. + SITOF, // sint to FP within a FP register. + UITOF, // uint to FP within a FP register. + SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out. SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag. diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index f0816e15665..5af654ae047 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -937,9 +937,6 @@ class ARMV5TEPat : Pat { class ARMV6Pat : Pat { list Predicates = [IsARM, HasV6]; } -class VFPPat : Pat { - list Predicates = [HasVFP2]; -} //===----------------------------------------------------------------------===// // diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index c159b6ab248..de5a9fe5b0d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -127,7 +127,6 @@ def HasV7 : Predicate<"Subtarget->hasV7Ops()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; def HasNEON : Predicate<"Subtarget->hasNEON()">; -def HasFP16 : Predicate<"Subtarget->hasFP16()">; def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; def IsThumb : Predicate<"Subtarget->isThumb()">; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index c3a308dfaa2..1e12b6f99c1 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -3139,44 +3139,25 @@ def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), def : N3VSPat; // Vector Convert between single-precision FP and integer - -class NVCVTFIPat - : NEONFPPat<(i32 (OpNode SPR:$a)), - (i32 (EXTRACT_SUBREG - (v2i32 (Inst - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, - arm_ssubreg_0))), - arm_ssubreg_0))>; - -class NVCVTIFPat - : NEONFPPat<(f32 (OpNode GPR:$a)), - (f32 (EXTRACT_SUBREG - (v2f32 (Inst - (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), - (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), - arm_ssubreg_0))), - arm_ssubreg_0))>; - let neverHasSideEffects = 1 in def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", v2i32, v2f32, fp_to_sint>; -def : NVCVTFIPat; +def : N2VSPat; let neverHasSideEffects = 1 in def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", v2i32, v2f32, fp_to_uint>; -def : NVCVTFIPat; +def : N2VSPat; let neverHasSideEffects = 1 in def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", v2f32, v2i32, sint_to_fp>; -def : NVCVTIFPat; +def : N2VSPat; let neverHasSideEffects = 1 in def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v2f32, v2i32, uint_to_fp>; -def : NVCVTIFPat; +def : N2VSPat; //===----------------------------------------------------------------------===// // Non-Instruction Patterns diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index bb62940156d..7d36cac137d 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -21,6 +21,10 @@ def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; +def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>; +def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>; +def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>; +def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>; def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>; @@ -259,17 +263,15 @@ def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a) /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a", [/* For disassembly only; pattern left blank */]>; -def : VFPPat<(f32_to_f16 SPR:$a), - (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>, - Requires<[HasVFP3, HasFP16]>; +def : ARMPat<(f32_to_f16 SPR:$a), + (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a", [/* For disassembly only; pattern left blank */]>; -def : VFPPat<(f16_to_f32 GPR:$a), - (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>, - Requires<[HasVFP3, HasFP16]>; +def : ARMPat<(f16_to_f32 GPR:$a), + (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a", @@ -361,90 +363,62 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010, def VSITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set DPR:$dst, (f64 (arm_sitof SPR:$a)))]> { let Inst{7} = 1; // s32 } -def : VFPPat<(f64 (sint_to_fp GPR:$a)), - (VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; - def VSITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (arm_sitof SPR:$a))]> { let Inst{7} = 1; // s32 } -def : VFPPat<(f32 (sint_to_fp GPR:$a)), - (VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>, - Requires<[DontUseNEONForFP, HasVFP2]>; - def VUITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set DPR:$dst, (f64 (arm_uitof SPR:$a)))]> { let Inst{7} = 0; // u32 } -def : VFPPat<(f64 (uint_to_fp GPR:$a)), - (VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; - def VUITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a), IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (arm_uitof SPR:$a))]> { let Inst{7} = 0; // u32 } -def : VFPPat<(f32 (uint_to_fp GPR:$a)), - (VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>, - Requires<[DontUseNEONForFP, HasVFP2]>; - // FP to Int: // Always set Z bit in the instruction, i.e. "round towards zero" variants. def VTOSIZD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$dst), (ins DPR:$a), IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (arm_ftosi (f64 DPR:$a)))]> { let Inst{7} = 1; // Z bit } -def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))), - (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>; - def VTOSIZS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$dst), (ins SPR:$a), IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (arm_ftosi SPR:$a))]> { let Inst{7} = 1; // Z bit } -def : VFPPat<(i32 (fp_to_sint SPR:$a)), - (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>, - Requires<[DontUseNEONForFP, HasVFP2]>; - def VTOUIZD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$dst), (ins DPR:$a), IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (arm_ftoui (f64 DPR:$a)))]> { let Inst{7} = 1; // Z bit } -def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))), - (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>; - def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$dst), (ins SPR:$a), IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (arm_ftoui SPR:$a))]> { let Inst{7} = 1; // Z bit } -def : VFPPat<(i32 (fp_to_uint SPR:$a)), - (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>, - Requires<[DontUseNEONForFP, HasVFP2]>; - // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. // For disassembly only.