Add support for getting & setting the FPSCR application register on ARM when VFP is enabled.

Add support for using the FPSCR in conjunction with the vcvtr instruction, for controlling fp to int rounding.
Add support for the FLT_ROUNDS_ node now that the FPSCR is exposed.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110152 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nate Begeman 2010-08-03 21:31:55 +00:00
parent 6fc24467e9
commit d1fb583128
5 changed files with 53 additions and 20 deletions

View File

@ -35,6 +35,20 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
} }
//===----------------------------------------------------------------------===//
// VFP
let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
def int_arm_get_fpscr : GCCBuiltin<"__builtin_arm_get_fpscr">,
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
def int_arm_set_fpscr : GCCBuiltin<"__builtin_arm_set_fpscr">,
Intrinsic<[], [llvm_i32_ty], [IntrWriteMem]>;
def int_arm_vcvtr : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
[IntrNoMem]>;
def int_arm_vcvtru : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Advanced SIMD (NEON) // Advanced SIMD (NEON)

View File

@ -176,6 +176,7 @@ getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs()); BitVector Reserved(getNumRegs());
Reserved.set(ARM::SP); Reserved.set(ARM::SP);
Reserved.set(ARM::PC); Reserved.set(ARM::PC);
Reserved.set(ARM::FPSCR);
if (STI.isTargetDarwin() || hasFP(MF)) if (STI.isTargetDarwin() || hasFP(MF))
Reserved.set(FramePtr); Reserved.set(FramePtr);
// Some targets reserve R9. // Some targets reserve R9.

View File

@ -474,10 +474,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
} }
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2. // iff target supports vfp2.
setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
}
// We want to custom lower some of our intrinsics. // We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@ -2764,6 +2766,24 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
return DAG.getMergeValues(Ops, 2, dl); return DAG.getMergeValues(Ops, 2, dl);
} }
SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SelectionDAG &DAG) const {
// The rounding mode is in bits 23:22 of the FPSCR.
// The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract.
DebugLoc dl = Op.getDebugLoc();
SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
DAG.getConstant(Intrinsic::arm_get_fpscr,
MVT::i32));
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
DAG.getConstant(1U << 22, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
DAG.getConstant(22, MVT::i32));
return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
DAG.getConstant(3, MVT::i32));
}
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) { const ARMSubtarget *ST) {
EVT VT = N->getValueType(0); EVT VT = N->getValueType(0);
@ -3705,6 +3725,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
} }
return SDValue(); return SDValue();
} }

View File

@ -82,7 +82,7 @@ namespace llvm {
MEMBARRIER, // Memory barrier MEMBARRIER, // Memory barrier
SYNCBARRIER, // Memory sync barrier SYNCBARRIER, // Memory sync barrier
VCEQ, // Vector compare equal. VCEQ, // Vector compare equal.
VCGE, // Vector compare greater than or equal. VCGE, // Vector compare greater than or equal.
VCGEU, // Vector compare unsigned greater than or equal. VCGEU, // Vector compare unsigned greater than or equal.
@ -342,6 +342,7 @@ namespace llvm {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag, SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg, CallingConv::ID CallConv, bool isVarArg,

View File

@ -420,34 +420,35 @@ def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
// For disassembly only. // For disassembly only.
let Uses = [FPSCR] in {
def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011, def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$dst), (ins DPR:$a), (outs SPR:$dst), (ins DPR:$a),
IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a", IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a",
[/* For disassembly only; pattern left blank */]> { [(set SPR:$dst, (int_arm_vcvtr (f64 DPR:$a)))]> {
let Inst{7} = 0; // Z bit let Inst{7} = 0; // Z bit
} }
def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010, def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$dst), (ins SPR:$a), (outs SPR:$dst), (ins SPR:$a),
IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a", IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a",
[/* For disassembly only; pattern left blank */]> { [(set SPR:$dst, (int_arm_vcvtr SPR:$a))]> {
let Inst{7} = 0; // Z bit let Inst{7} = 0; // Z bit
} }
def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011, def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$dst), (ins DPR:$a), (outs SPR:$dst), (ins DPR:$a),
IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a", IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a",
[/* For disassembly only; pattern left blank */]> { [(set SPR:$dst, (int_arm_vcvtru (f64 DPR:$a)))]> {
let Inst{7} = 0; // Z bit let Inst{7} = 0; // Z bit
} }
def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$dst), (ins SPR:$a), (outs SPR:$dst), (ins SPR:$a),
IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a", IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a",
[/* For disassembly only; pattern left blank */]> { [(set SPR:$dst, (int_arm_vcvtru SPR:$a))]> {
let Inst{7} = 0; // Z bit let Inst{7} = 0; // Z bit
} }
}
// Convert between floating-point and fixed-point // Convert between floating-point and fixed-point
// Data type for fixed-point naming convention: // Data type for fixed-point naming convention:
@ -654,32 +655,27 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
} }
// FPSCR <-> GPR (for disassembly only) // FPSCR <-> GPR (for disassembly only)
let hasSideEffects = 1, Uses = [FPSCR] in
let neverHasSideEffects = 1 in { def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT,
let Uses = [FPSCR] in { "vmrs", "\t$dst, fpscr",
def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", [(set GPR:$dst, (int_arm_get_fpscr))]> {
"\t$dst, fpscr",
[/* For disassembly only; pattern left blank */]> {
let Inst{27-20} = 0b11101111; let Inst{27-20} = 0b11101111;
let Inst{19-16} = 0b0001; let Inst{19-16} = 0b0001;
let Inst{11-8} = 0b1010; let Inst{11-8} = 0b1010;
let Inst{7} = 0; let Inst{7} = 0;
let Inst{4} = 1; let Inst{4} = 1;
} }
}
let Defs = [FPSCR] in { let Defs = [FPSCR] in
def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr", def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT,
"\tfpscr, $src", "vmsr", "\tfpscr, $src",
[/* For disassembly only; pattern left blank */]> { [(int_arm_set_fpscr GPR:$src)]> {
let Inst{27-20} = 0b11101110; let Inst{27-20} = 0b11101110;
let Inst{19-16} = 0b0001; let Inst{19-16} = 0b0001;
let Inst{11-8} = 0b1010; let Inst{11-8} = 0b1010;
let Inst{7} = 0; let Inst{7} = 0;
let Inst{4} = 1; let Inst{4} = 1;
} }
}
} // neverHasSideEffects
// Materialize FP immediates. VFP3 only. // Materialize FP immediates. VFP3 only.
let isReMaterializable = 1 in { let isReMaterializable = 1 in {