diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td index 7e909b7545c..2589de2c592 100644 --- a/include/llvm/IntrinsicsARM.td +++ b/include/llvm/IntrinsicsARM.td @@ -35,6 +35,20 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.". Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; } +//===----------------------------------------------------------------------===// +// VFP + +let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.". + def int_arm_get_fpscr : GCCBuiltin<"__builtin_arm_get_fpscr">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + def int_arm_set_fpscr : GCCBuiltin<"__builtin_arm_set_fpscr">, + Intrinsic<[], [llvm_i32_ty], [IntrWriteMem]>; + def int_arm_vcvtr : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty], + [IntrNoMem]>; + def int_arm_vcvtru : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty], + [IntrNoMem]>; +} + //===----------------------------------------------------------------------===// // Advanced SIMD (NEON) diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 31439b4a267..cd37633d967 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -176,6 +176,7 @@ getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); Reserved.set(ARM::PC); + Reserved.set(ARM::FPSCR); if (STI.isTargetDarwin() || hasFP(MF)) Reserved.set(FramePtr); // Some targets reserve R9. diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 571dc2e63e9..08054cbae32 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -474,10 +474,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // iff target supports vfp2. setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); + setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); + } // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -2764,6 +2766,24 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, return DAG.getMergeValues(Ops, 2, dl); } +SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, + SelectionDAG &DAG) const { + // The rounding mode is in bits 23:22 of the FPSCR. + // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 + // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) + // so that the shift + and get folded into a bitfield extract. + DebugLoc dl = Op.getDebugLoc(); + SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, + DAG.getConstant(Intrinsic::arm_get_fpscr, + MVT::i32)); + SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, + DAG.getConstant(1U << 22, MVT::i32)); + SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, + DAG.getConstant(22, MVT::i32)); + return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, + DAG.getConstant(3, MVT::i32)); +} + static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); @@ -3705,6 +3725,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); } return SDValue(); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 5007f96dbe1..df153030c5f 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -82,7 +82,7 @@ namespace llvm { MEMBARRIER, // Memory barrier SYNCBARRIER, // Memory sync barrier - + VCEQ, // Vector compare equal. VCGE, // Vector compare greater than or equal. VCGEU, // Vector compare unsigned greater than or equal. @@ -342,6 +342,7 @@ namespace llvm { SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 84c23e1a784..d15544420b8 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -420,34 +420,35 @@ def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. // For disassembly only. - +let Uses = [FPSCR] in { def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$dst), (ins DPR:$a), IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtr (f64 DPR:$a)))]> { let Inst{7} = 0; // Z bit } def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$dst), (ins SPR:$a), IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtr SPR:$a))]> { let Inst{7} = 0; // Z bit } def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$dst), (ins DPR:$a), IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtru (f64 DPR:$a)))]> { let Inst{7} = 0; // Z bit } def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$dst), (ins SPR:$a), IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtru SPR:$a))]> { let Inst{7} = 0; // Z bit } +} // Convert between floating-point and fixed-point // Data type for fixed-point naming convention: @@ -654,32 +655,27 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", } // FPSCR <-> GPR (for disassembly only) - -let neverHasSideEffects = 1 in { -let Uses = [FPSCR] in { -def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", - "\t$dst, fpscr", - [/* For disassembly only; pattern left blank */]> { +let hasSideEffects = 1, Uses = [FPSCR] in +def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, + "vmrs", "\t$dst, fpscr", + [(set GPR:$dst, (int_arm_get_fpscr))]> { let Inst{27-20} = 0b11101111; let Inst{19-16} = 0b0001; let Inst{11-8} = 0b1010; let Inst{7} = 0; let Inst{4} = 1; } -} -let Defs = [FPSCR] in { -def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr", - "\tfpscr, $src", - [/* For disassembly only; pattern left blank */]> { +let Defs = [FPSCR] in +def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, + "vmsr", "\tfpscr, $src", + [(int_arm_set_fpscr GPR:$src)]> { let Inst{27-20} = 0b11101110; let Inst{19-16} = 0b0001; let Inst{11-8} = 0b1010; let Inst{7} = 0; let Inst{4} = 1; } -} -} // neverHasSideEffects // Materialize FP immediates. VFP3 only. let isReMaterializable = 1 in {