[ARM] Enable DP copy, load and store instructions for FPv4-SP

The FPv4-SP floating-point unit is generally referred to as
single-precision only, but it does have double-precision registers and
load, store and GPR<->DPR move instructions which operate on them.
This patch enables the use of these registers, the main advantage of
which is that we now comply with the AAPCS-VFP calling convention.
This partially reverts r209650, which added some AAPCS-VFP support,
but did not handle return values or alignment of double arguments in
registers.

This patch also adds tests for Thumb2 code generation for
floating-point instructions and intrinsics, which previously only
existed for ARM.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216172 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Oliver Stannard 2014-08-21 12:50:31 +00:00
parent fc4bdcdc87
commit 760a46522a
15 changed files with 1280 additions and 66 deletions

View File

@ -3517,6 +3517,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
RTLIB::FMA_F80, RTLIB::FMA_F128,
RTLIB::FMA_PPCF128));
break;
case ISD::FADD:
Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
RTLIB::ADD_F80, RTLIB::ADD_F128,
RTLIB::ADD_PPCF128));
break;
case ISD::FMUL:
Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
RTLIB::MUL_F80, RTLIB::MUL_F128,
RTLIB::MUL_PPCF128));
break;
case ISD::FP16_TO_FP: {
if (Node->getValueType(0) == MVT::f32) {
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
@ -3549,12 +3559,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
case ISD::FSUB: {
EVT VT = Node->getValueType(0);
assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
"Don't know how to expand this FP subtraction!");
Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
Results.push_back(Tmp1);
if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
Results.push_back(Tmp1);
} else {
Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
RTLIB::SUB_F80, RTLIB::SUB_F128,
RTLIB::SUB_PPCF128));
}
break;
}
case ISD::SUB: {

View File

@ -7265,8 +7265,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
if (Args[i].isNest)
Flags.setNest();
if (NeedsRegBlock)
if (NeedsRegBlock) {
Flags.setInConsecutiveRegs();
if (Value == NumValues - 1)
Flags.setInConsecutiveRegsLast();
}
Flags.setOrigAlign(OriginalAlignment);
MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
@ -7312,10 +7315,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
else if (j != 0)
MyFlags.Flags.setOrigAlign(1);
// Only mark the end at the last register of the last value.
if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1)
MyFlags.Flags.setInConsecutiveRegsLast();
CLI.Outs.push_back(MyFlags);
CLI.OutVals.push_back(Parts[j]);
}
@ -7530,8 +7529,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
}
if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
Flags.setNest();
if (NeedsRegBlock)
if (NeedsRegBlock) {
Flags.setInConsecutiveRegs();
if (Value == NumValues - 1)
Flags.setInConsecutiveRegsLast();
}
Flags.setOrigAlign(OriginalAlignment);
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
@ -7544,11 +7546,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// if it isn't first piece, alignment must be 1
else if (i > 0)
MyFlags.Flags.setOrigAlign(1);
// Only mark the end at the last register of the last value.
if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1)
MyFlags.Flags.setInConsecutiveRegsLast();
Ins.push_back(MyFlags);
}
PartBase += VT.getStoreSize();

View File

@ -721,7 +721,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = ARM::VMOVRS;
else if (SPRDest && GPRSrc)
Opc = ARM::VMOVSR;
else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
Opc = ARM::VMOVD;
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
Opc = ARM::VORRq;
@ -781,6 +781,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BeginIdx = ARM::dsub_0;
SubRegs = 4;
Spacing = 2;
} else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
Opc = ARM::VMOVS;
BeginIdx = ARM::ssub_0;
SubRegs = 2;
}
assert(Opc && "Impossible reg-to-reg copy");
@ -1231,7 +1235,8 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15() ||
Subtarget.isFPOnlySP())
return false;
// Look for a copy between even S-registers. That is where we keep floats

View File

@ -177,8 +177,9 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
// AAPCS HFAs must have 1-4 elements, all of the same type
assert(PendingHAMembers.size() < 8);
assert(PendingHAMembers.size() < 4);
if (PendingHAMembers.size() > 0)
assert(PendingHAMembers[0].getLocVT() == LocVT);
@ -188,7 +189,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
if (ArgFlags.isInConsecutiveRegsLast()) {
assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 &&
assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 &&
"Homogeneous aggregates must have between 1 and 4 members");
// Try to allocate a contiguous block of registers, each of the correct
@ -196,7 +197,6 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
const uint16_t *RegList;
unsigned NumRegs;
switch (LocVT.SimpleTy) {
case MVT::i32:
case MVT::f32:
RegList = SRegList;
NumRegs = 16;
@ -235,20 +235,11 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
State.AllocateReg(SRegList[regNo]);
unsigned Size = LocVT.getSizeInBits() / 8;
unsigned Align = Size;
if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) {
// Vectors are always aligned to 8 bytes. If we've seen an i32 here
// it's because it's been split from a larger type, also with align 8.
Align = 8;
}
unsigned Align = std::min(Size, 8U);
for (auto It : PendingHAMembers) {
It.convertToMem(State.AllocateStack(Size, Align));
State.addLoc(It);
// Only the first member needs to be aligned.
Align = 1;
}
// All pending members have now been allocated

View File

@ -445,8 +445,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
!Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, &ARM::SPRRegClass);
if (!Subtarget->isFPOnlySP())
addRegisterClass(MVT::f64, &ARM::DPRRegClass);
addRegisterClass(MVT::f64, &ARM::DPRRegClass);
}
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@ -628,6 +627,39 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!Subtarget->isThumb1Only())
setTargetDAGCombine(ISD::ADDC);
if (Subtarget->isFPOnlySP()) {
// When targetting a floating-point unit with only single-precision
// operations, f64 is legal for the few double-precision instructions which
// are present However, no double-precision operations other than moves,
// loads and stores are provided by the hardware.
setOperationAction(ISD::FADD, MVT::f64, Expand);
setOperationAction(ISD::FSUB, MVT::f64, Expand);
setOperationAction(ISD::FMUL, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FDIV, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
setOperationAction(ISD::FNEG, MVT::f64, Expand);
setOperationAction(ISD::FABS, MVT::f64, Expand);
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOWI, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FLOG, MVT::f64, Expand);
setOperationAction(ISD::FLOG2, MVT::f64, Expand);
setOperationAction(ISD::FLOG10, MVT::f64, Expand);
setOperationAction(ISD::FEXP, MVT::f64, Expand);
setOperationAction(ISD::FEXP2, MVT::f64, Expand);
setOperationAction(ISD::FCEIL, MVT::f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
setOperationAction(ISD::FRINT, MVT::f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
}
computeRegisterProperties();
@ -3276,6 +3308,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue
ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
SDLoc dl) const {
assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
if (!isFloatingPointZero(RHS))
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
@ -3391,9 +3424,8 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
EVT VT = Op.getValueType();
return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse,
ARMcc, CCR, OverflowCmp);
return getCMOV(SDLoc(Op), VT, SelectTrue, SelectFalse, ARMcc, CCR,
OverflowCmp, DAG);
}
// Convert:
@ -3427,7 +3459,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue CCR = Cond.getOperand(3);
SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
assert(True.getValueType() == VT);
return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
}
}
}
@ -3497,6 +3529,32 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
}
}
SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal,
SDValue TrueVal, SDValue ARMcc, SDValue CCR,
SDValue Cmp, SelectionDAG &DAG) const {
if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
SDValue TrueLow = TrueVal.getValue(0);
SDValue TrueHigh = TrueVal.getValue(1);
SDValue FalseLow = FalseVal.getValue(0);
SDValue FalseHigh = FalseVal.getValue(1);
SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
ARMcc, CCR, Cmp);
SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
ARMcc, CCR, duplicateCmp(Cmp, DAG));
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
} else {
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
Cmp);
}
}
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
@ -3506,6 +3564,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue FalseVal = Op.getOperand(3);
SDLoc dl(Op);
if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
dl);
// If softenSetCCOperands only returned one value, we should compare it to
// zero.
if (!RHS.getNode()) {
RHS = DAG.getConstant(0, LHS.getValueType());
CC = ISD::SETNE;
}
}
if (LHS.getValueType() == MVT::i32) {
// Try to generate VSEL on ARMv8.
// The VSEL instruction can't use all the usual ARM condition
@ -3530,8 +3600,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
Cmp);
return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
}
ARMCC::CondCodes CondCode, CondCode2;
@ -3570,14 +3639,12 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
ARMcc, CCR, Cmp);
SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
Result = DAG.getNode(ARMISD::CMOV, dl, VT,
Result, TrueVal, ARMcc2, CCR, Cmp2);
Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
}
return Result;
}
@ -3710,6 +3777,18 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
dl);
// If softenSetCCOperands only returned one value, we should compare it to
// zero.
if (!RHS.getNode()) {
RHS = DAG.getConstant(0, LHS.getValueType());
CC = ISD::SETNE;
}
}
if (LHS.getValueType() == MVT::i32) {
SDValue ARMcc;
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
@ -3802,11 +3881,23 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
}
static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorFP_TO_INT(Op, DAG);
if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::FP_TO_SINT)
LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
Op.getValueType());
else
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
Op.getValueType());
return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
/*isSigned*/ false, SDLoc(Op)).first;
}
SDLoc dl(Op);
unsigned Opc;
@ -3856,11 +3947,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(Opc, dl, VT, Op);
}
static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorINT_TO_FP(Op, DAG);
if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::SINT_TO_FP)
LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
Op.getValueType());
else
LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
Op.getValueType());
return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
/*isSigned*/ false, SDLoc(Op)).first;
}
SDLoc dl(Op);
unsigned Opc;
@ -4369,7 +4472,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDLoc dl(Op);
if (Op.getOperand(1).getValueType().isFloatingPoint()) {
if (Op1.getValueType().isFloatingPoint()) {
switch (SetCCOpcode) {
default: llvm_unreachable("Illegal FP comparison");
case ISD::SETUNE:
@ -4633,6 +4736,11 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
bool IsDouble = Op.getValueType() == MVT::f64;
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
// Use the default (constant pool) lowering for double constants when we have
// an SP-only FPU
if (IsDouble && Subtarget->isFPOnlySP())
return SDValue();
// Try splatting with a VMOV.f32...
APFloat FPVal = CFP->getValueAPF();
int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
@ -6336,6 +6444,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
return LowerDYNAMIC_STACKALLOC(Op, DAG);
llvm_unreachable("Don't know how to custom lower this!");
case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
}
}
@ -8479,10 +8589,11 @@ static SDValue PerformBFICombine(SDNode *N,
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// vmovrrd(vmovdrr x, y) -> x,y
SDValue InDouble = N->getOperand(0);
if (InDouble.getOpcode() == ARMISD::VMOVDRR)
if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
// vmovrrd(load f64) -> (load i32), (load i32)
@ -8695,7 +8806,8 @@ static bool hasNormalLoadOperand(SDNode *N) {
/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
/// ISD::BUILD_VECTOR.
static SDValue PerformBUILD_VECTORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI){
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
// VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
// into a pair of GPRs, which is fine when the value is used as a scalar,
@ -9710,10 +9822,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
case ARMISD::BFI: return PerformBFICombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
case ISD::STORE: return PerformSTORECombine(N, DCI);
case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
@ -10703,6 +10815,31 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
return DAG.getMergeValues(Ops, DL);
}
SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
"Unexpected type for custom-lowering FP_EXTEND");
RTLIB::Libcall LC;
LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
SDValue SrcVal = Op.getOperand(0);
return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
/*isSigned*/ false, SDLoc(Op)).first;
}
SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getOperand(0).getValueType() == MVT::f64 &&
Subtarget->isFPOnlySP() &&
"Unexpected type for custom-lowering FP_ROUND");
RTLIB::Libcall LC;
LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
SDValue SrcVal = Op.getOperand(0);
return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
/*isSigned*/ false, SDLoc(Op)).first;
}
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.
@ -10730,7 +10867,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return false;
if (VT == MVT::f32)
return ARM_AM::getFP32Imm(Imm) != -1;
if (VT == MVT::f64)
if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
return ARM_AM::getFP64Imm(Imm) != -1;
return false;
}

View File

@ -476,6 +476,10 @@ namespace llvm {
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
@ -565,6 +569,9 @@ namespace llvm {
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
SDValue getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
SDValue ARMcc, SDValue CCR, SDValue Cmp,
SelectionDAG &DAG) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS,

View File

@ -515,6 +515,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
let Inst{5} = Sm{0};
let Inst{15-12} = Dd{3-0};
let Inst{22} = Dd{4};
let Predicates = [HasVFP2, HasDPVFP];
}
// Special case encoding: bits 11-8 is 0b1011.

View File

@ -54,12 +54,11 @@ define arm_aapcs_vfpcc void @test_1double({ double } %a) {
; CHECK: bl test_1double
; CHECK-M4F-LABEL: test_1double:
; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
; CHECK-M4F: movt [[ONEHI]], #16368
; CHECK-M4F-DAG: vmov s0, [[ONELO]]
; CHECK-M4F-DAG: vmov s1, [[ONEHI]]
; CHECK-M4F: vldr d0, [[CP_LABEL:.*]]
; CHECK-M4F: bl test_1double
; CHECK-M4F: [[CP_LABEL]]
; CHECK-M4F-NEXT: .long 0
; CHECK-M4F-NEXT: .long 1072693248
call arm_aapcs_vfpcc void @test_1double({ double } { double 1.0 })
ret void
@ -76,11 +75,10 @@ define arm_aapcs_vfpcc void @test_1double_nosplit([4 x float], [4 x double], [3
; CHECK: bl test_1double_nosplit
; CHECK-M4F-LABEL: test_1double_nosplit:
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
; CHECK-M4F: movt [[ONEHI]], #16368
; CHECK-M4F-DAG: str [[ONELO]], [sp]
; CHECK-M4F-DAG: str [[ONEHI]], [sp, #4]
; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp]
; CHECK-M4F: bl test_1double_nosplit
call arm_aapcs_vfpcc void @test_1double_nosplit([4 x float] undef, [4 x double] undef, [3 x float] undef, double 1.0)
ret void
@ -98,11 +96,10 @@ define arm_aapcs_vfpcc void @test_1double_misaligned([4 x double], [4 x double],
; CHECK-DAG: strd [[ONELO]], [[ONEHI]], [sp, #8]
; CHECK-M4F-LABEL: test_1double_misaligned:
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
; CHECK-M4F: movt [[ONEHI]], #16368
; CHECK-M4F-DAG: str [[ONELO]], [sp, #8]
; CHECK-M4F-DAG: str [[ONEHI]], [sp, #12]
; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp, #8]
; CHECK-M4F: bl test_1double_misaligned
ret void

View File

@ -20,5 +20,5 @@ define double @double_op(double %lhs, double %rhs) {
; CHECK-M3: bl ___adddf3
; CHECK-M4-LABEL: double_op:
; CHECK-M4: bl ___adddf3
; CHECK-M4: {{(blx|b.w)}} ___adddf3
}

View File

@ -0,0 +1,50 @@
; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m4 -mattr=-vfp2 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 -mattr=+vfp4,+fp-only-sp | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 -mattr=+vfp3 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
define float @float_in_reg(float %a, float %b) {
entry:
; CHECK-LABEL: float_in_reg:
; SOFT: mov r0, r1
; HARD: vmov.f32 s0, s1
; CHECK-NEXT: bx lr
ret float %b
}
define double @double_in_reg(double %a, double %b) {
entry:
; CHECK-LABEL: double_in_reg:
; SOFT: mov r0, r2
; SOFT: mov r1, r3
; SP: vmov.f32 s0, s2
; SP: vmov.f32 s1, s3
; DP: vmov.f64 d0, d1
; CHECK-NEXT: bx lr
ret double %b
}
define float @float_on_stack(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, float %i) {
; CHECK-LABEL: float_on_stack:
; SOFT: ldr r0, [sp, #48]
; HARD: vldr s0, [sp]
; CHECK-NEXT: bx lr
ret float %i
}
define double @double_on_stack(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) {
; CHECK-LABEL: double_on_stack:
; SOFT: ldr r0, [sp, #48]
; SOFT: ldr r1, [sp, #52]
; HARD: vldr d0, [sp]
; CHECK-NEXT: bx lr
ret double %i
}
define double @double_not_split(double %a, double %b, double %c, double %d, double %e, double %f, double %g, float %h, double %i) {
; CHECK-LABEL: double_not_split:
; SOFT: ldr r0, [sp, #48]
; SOFT: ldr r1, [sp, #52]
; HARD: vldr d0, [sp]
; CHECK-NEXT: bx lr
ret double %i
}

View File

@ -18,7 +18,7 @@ entry:
; CHECK-LABEL: bar:
%0 = fmul double %a, %b
; CORTEXM3: bl ___muldf3
; CORTEXM4: bl ___muldf3
; CORTEXM4: {{bl|b.w}} ___muldf3
; CORTEXA8: vmul.f64 d
ret double %0
}

View File

@ -0,0 +1,300 @@
; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
define i1 @cmp_f_false(float %a, float %b) {
; CHECK-LABEL: cmp_f_false:
; NONE: movs r0, #0
; HARD: movs r0, #0
%1 = fcmp false float %a, %b
ret i1 %1
}
define i1 @cmp_f_oeq(float %a, float %b) {
; CHECK-LABEL: cmp_f_oeq:
; NONE: bl __aeabi_fcmpeq
; HARD: vcmpe.f32
; HARD: moveq r0, #1
%1 = fcmp oeq float %a, %b
ret i1 %1
}
define i1 @cmp_f_ogt(float %a, float %b) {
; CHECK-LABEL: cmp_f_ogt:
; NONE: bl __aeabi_fcmpgt
; HARD: vcmpe.f32
; HARD: movgt r0, #1
%1 = fcmp ogt float %a, %b
ret i1 %1
}
define i1 @cmp_f_oge(float %a, float %b) {
; CHECK-LABEL: cmp_f_oge:
; NONE: bl __aeabi_fcmpge
; HARD: vcmpe.f32
; HARD: movge r0, #1
%1 = fcmp oge float %a, %b
ret i1 %1
}
define i1 @cmp_f_olt(float %a, float %b) {
; CHECK-LABEL: cmp_f_olt:
; NONE: bl __aeabi_fcmplt
; HARD: vcmpe.f32
; HARD: movmi r0, #1
%1 = fcmp olt float %a, %b
ret i1 %1
}
define i1 @cmp_f_ole(float %a, float %b) {
; CHECK-LABEL: cmp_f_ole:
; NONE: bl __aeabi_fcmple
; HARD: vcmpe.f32
; HARD: movls r0, #1
%1 = fcmp ole float %a, %b
ret i1 %1
}
define i1 @cmp_f_one(float %a, float %b) {
; CHECK-LABEL: cmp_f_one:
; NONE: bl __aeabi_fcmpgt
; NONE: bl __aeabi_fcmplt
; HARD: vcmpe.f32
; HARD: movmi r0, #1
; HARD: movgt r0, #1
%1 = fcmp one float %a, %b
ret i1 %1
}
define i1 @cmp_f_ord(float %a, float %b) {
; CHECK-LABEL: cmp_f_ord:
; NONE: bl __aeabi_fcmpun
; HARD: vcmpe.f32
; HARD: movvc r0, #1
%1 = fcmp ord float %a, %b
ret i1 %1
}define i1 @cmp_f_ueq(float %a, float %b) {
; CHECK-LABEL: cmp_f_ueq:
; NONE: bl __aeabi_fcmpeq
; NONE: bl __aeabi_fcmpun
; HARD: vcmpe.f32
; HARD: moveq r0, #1
; HARD: movvs r0, #1
%1 = fcmp ueq float %a, %b
ret i1 %1
}
define i1 @cmp_f_ugt(float %a, float %b) {
; CHECK-LABEL: cmp_f_ugt:
; NONE: bl __aeabi_fcmpgt
; NONE: bl __aeabi_fcmpun
; HARD: vcmpe.f32
; HARD: movhi r0, #1
%1 = fcmp ugt float %a, %b
ret i1 %1
}
define i1 @cmp_f_uge(float %a, float %b) {
; CHECK-LABEL: cmp_f_uge:
; NONE: bl __aeabi_fcmpge
; NONE: bl __aeabi_fcmpun
; HARD: vcmpe.f32
; HARD: movpl r0, #1
%1 = fcmp uge float %a, %b
ret i1 %1
}
define i1 @cmp_f_ult(float %a, float %b) {
; CHECK-LABEL: cmp_f_ult:
; NONE: bl __aeabi_fcmplt
; NONE: bl __aeabi_fcmpun
; HARD: vcmpe.f32
; HARD: movlt r0, #1
%1 = fcmp ult float %a, %b
ret i1 %1
}
define i1 @cmp_f_ule(float %a, float %b) {
; CHECK-LABEL: cmp_f_ule:
; NONE: bl __aeabi_fcmple
; NONE: bl __aeabi_fcmpun
; HARD: vcmpe.f32
; HARD: movle r0, #1
%1 = fcmp ule float %a, %b
ret i1 %1
}
define i1 @cmp_f_une(float %a, float %b) {
; CHECK-LABEL: cmp_f_une:
; NONE: bl __aeabi_fcmpeq
; HARD: vcmpe.f32
; HARD: movne r0, #1
%1 = fcmp une float %a, %b
ret i1 %1
}
define i1 @cmp_f_uno(float %a, float %b) {
; CHECK-LABEL: cmp_f_uno:
; NONE: bl __aeabi_fcmpun
; HARD: vcmpe.f32
; HARD: movvs r0, #1
%1 = fcmp uno float %a, %b
ret i1 %1
}
define i1 @cmp_f_true(float %a, float %b) {
; CHECK-LABEL: cmp_f_true:
; NONE: movs r0, #1
; HARD: movs r0, #1
%1 = fcmp true float %a, %b
ret i1 %1
}
define i1 @cmp_d_false(double %a, double %b) {
; CHECK-LABEL: cmp_d_false:
; NONE: movs r0, #0
; HARD: movs r0, #0
%1 = fcmp false double %a, %b
ret i1 %1
}
define i1 @cmp_d_oeq(double %a, double %b) {
; CHECK-LABEL: cmp_d_oeq:
; NONE: bl __aeabi_dcmpeq
; SP: bl __aeabi_dcmpeq
; DP: vcmpe.f64
; DP: moveq r0, #1
%1 = fcmp oeq double %a, %b
ret i1 %1
}
define i1 @cmp_d_ogt(double %a, double %b) {
; CHECK-LABEL: cmp_d_ogt:
; NONE: bl __aeabi_dcmpgt
; SP: bl __aeabi_dcmpgt
; DP: vcmpe.f64
; DP: movgt r0, #1
%1 = fcmp ogt double %a, %b
ret i1 %1
}
define i1 @cmp_d_oge(double %a, double %b) {
; CHECK-LABEL: cmp_d_oge:
; NONE: bl __aeabi_dcmpge
; SP: bl __aeabi_dcmpge
; DP: vcmpe.f64
; DP: movge r0, #1
%1 = fcmp oge double %a, %b
ret i1 %1
}
define i1 @cmp_d_olt(double %a, double %b) {
; CHECK-LABEL: cmp_d_olt:
; NONE: bl __aeabi_dcmplt
; SP: bl __aeabi_dcmplt
; DP: vcmpe.f64
; DP: movmi r0, #1
%1 = fcmp olt double %a, %b
ret i1 %1
}
define i1 @cmp_d_ole(double %a, double %b) {
; CHECK-LABEL: cmp_d_ole:
; NONE: bl __aeabi_dcmple
; SP: bl __aeabi_dcmple
; DP: vcmpe.f64
; DP: movls r0, #1
%1 = fcmp ole double %a, %b
ret i1 %1
}
define i1 @cmp_d_one(double %a, double %b) {
; CHECK-LABEL: cmp_d_one:
; NONE: bl __aeabi_dcmpgt
; NONE: bl __aeabi_dcmplt
; SP: bl __aeabi_dcmpgt
; SP: bl __aeabi_dcmplt
; DP: vcmpe.f64
; DP: movmi r0, #1
; DP: movgt r0, #1
%1 = fcmp one double %a, %b
ret i1 %1
}
define i1 @cmp_d_ord(double %a, double %b) {
; CHECK-LABEL: cmp_d_ord:
; NONE: bl __aeabi_dcmpun
; SP: bl __aeabi_dcmpun
; DP: vcmpe.f64
; DP: movvc r0, #1
%1 = fcmp ord double %a, %b
ret i1 %1
}
define i1 @cmp_d_ugt(double %a, double %b) {
; CHECK-LABEL: cmp_d_ugt:
; NONE: bl __aeabi_dcmpgt
; NONE: bl __aeabi_dcmpun
; SP: bl __aeabi_dcmpgt
; SP: bl __aeabi_dcmpun
; DP: vcmpe.f64
; DP: movhi r0, #1
%1 = fcmp ugt double %a, %b
ret i1 %1
}
define i1 @cmp_d_ult(double %a, double %b) {
; CHECK-LABEL: cmp_d_ult:
; NONE: bl __aeabi_dcmplt
; NONE: bl __aeabi_dcmpun
; SP: bl __aeabi_dcmplt
; SP: bl __aeabi_dcmpun
; DP: vcmpe.f64
; DP: movlt r0, #1
%1 = fcmp ult double %a, %b
ret i1 %1
}
define i1 @cmp_d_uno(double %a, double %b) {
; CHECK-LABEL: cmp_d_uno:
; NONE: bl __aeabi_dcmpun
; SP: bl __aeabi_dcmpun
; DP: vcmpe.f64
; DP: movvs r0, #1
%1 = fcmp uno double %a, %b
ret i1 %1
}
define i1 @cmp_d_true(double %a, double %b) {
; CHECK-LABEL: cmp_d_true:
; NONE: movs r0, #1
; HARD: movs r0, #1
%1 = fcmp true double %a, %b
ret i1 %1
}
define i1 @cmp_d_ueq(double %a, double %b) {
; CHECK-LABEL: cmp_d_ueq:
; NONE: bl __aeabi_dcmpeq
; NONE: bl __aeabi_dcmpun
; SP: bl __aeabi_dcmpeq
; SP: bl __aeabi_dcmpun
; DP: vcmpe.f64
; DP: moveq r0, #1
; DP: movvs r0, #1
%1 = fcmp ueq double %a, %b
ret i1 %1
}
define i1 @cmp_d_uge(double %a, double %b) {
; CHECK-LABEL: cmp_d_uge:
; NONE: bl __aeabi_dcmpge
; NONE: bl __aeabi_dcmpun
; SP: bl __aeabi_dcmpge
; SP: bl __aeabi_dcmpun
; DP: vcmpe.f64
; DP: movpl r0, #1
%1 = fcmp uge double %a, %b
ret i1 %1
}
define i1 @cmp_d_ule(double %a, double %b) {
; CHECK-LABEL: cmp_d_ule:
; NONE: bl __aeabi_dcmple
; NONE: bl __aeabi_dcmpun
; SP: bl __aeabi_dcmple
; SP: bl __aeabi_dcmpun
; DP: vcmpe.f64
; DP: movle r0, #1
%1 = fcmp ule double %a, %b
ret i1 %1
}
define i1 @cmp_d_une(double %a, double %b) {
; CHECK-LABEL: cmp_d_une:
; NONE: bl __aeabi_dcmpeq
; SP: bl __aeabi_dcmpeq
; DP: vcmpe.f64
; DP: movne r0, #1
%1 = fcmp une double %a, %b
ret i1 %1
}

View File

@ -0,0 +1,214 @@
; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=SP
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
declare double @llvm.sqrt.f64(double %Val)
define double @sqrt_d(double %a) {
; CHECK-LABEL: sqrt_d:
; SOFT: {{(bl|b)}} sqrt
; HARD: vsqrt.f64 d0, d0
%1 = call double @llvm.sqrt.f64(double %a)
ret double %1
}
declare double @llvm.powi.f64(double %Val, i32 %power)
define double @powi_d(double %a, i32 %b) {
; CHECK-LABEL: powi_d:
; SOFT: {{(bl|b)}} __powidf2
; HARD: b __powidf2
%1 = call double @llvm.powi.f64(double %a, i32 %b)
ret double %1
}
declare double @llvm.sin.f64(double %Val)
define double @sin_d(double %a) {
; CHECK-LABEL: sin_d:
; SOFT: {{(bl|b)}} sin
; HARD: b sin
%1 = call double @llvm.sin.f64(double %a)
ret double %1
}
declare double @llvm.cos.f64(double %Val)
define double @cos_d(double %a) {
; CHECK-LABEL: cos_d:
; SOFT: {{(bl|b)}} cos
; HARD: b cos
%1 = call double @llvm.cos.f64(double %a)
ret double %1
}
declare double @llvm.pow.f64(double %Val, double %power)
define double @pow_d(double %a, double %b) {
; CHECK-LABEL: pow_d:
; SOFT: {{(bl|b)}} pow
; HARD: b pow
%1 = call double @llvm.pow.f64(double %a, double %b)
ret double %1
}
declare double @llvm.exp.f64(double %Val)
define double @exp_d(double %a) {
; CHECK-LABEL: exp_d:
; SOFT: {{(bl|b)}} exp
; HARD: b exp
%1 = call double @llvm.exp.f64(double %a)
ret double %1
}
declare double @llvm.exp2.f64(double %Val)
define double @exp2_d(double %a) {
; CHECK-LABEL: exp2_d:
; SOFT: {{(bl|b)}} exp2
; HARD: b exp2
%1 = call double @llvm.exp2.f64(double %a)
ret double %1
}
declare double @llvm.log.f64(double %Val)
define double @log_d(double %a) {
; CHECK-LABEL: log_d:
; SOFT: {{(bl|b)}} log
; HARD: b log
%1 = call double @llvm.log.f64(double %a)
ret double %1
}
declare double @llvm.log10.f64(double %Val)
define double @log10_d(double %a) {
; CHECK-LABEL: log10_d:
; SOFT: {{(bl|b)}} log10
; HARD: b log10
%1 = call double @llvm.log10.f64(double %a)
ret double %1
}
declare double @llvm.log2.f64(double %Val)
define double @log2_d(double %a) {
; CHECK-LABEL: log2_d:
; SOFT: {{(bl|b)}} log2
; HARD: b log2
%1 = call double @llvm.log2.f64(double %a)
ret double %1
}
declare double @llvm.fma.f64(double %a, double %b, double %c)
define double @fma_d(double %a, double %b, double %c) {
; CHECK-LABEL: fma_d:
; SOFT: {{(bl|b)}} fma
; HARD: vfma.f64
%1 = call double @llvm.fma.f64(double %a, double %b, double %c)
ret double %1
}
; FIXME: the FPv4-SP version is less efficient than the no-FPU version
declare double @llvm.fabs.f64(double %Val)
define double @abs_d(double %a) {
; CHECK-LABEL: abs_d:
; NONE: bic r1, r1, #-2147483648
; SP: bl __aeabi_dcmpgt
; SP: bl __aeabi_dcmpun
; SP: bl __aeabi_dsub
; DP: vabs.f64 d0, d0
%1 = call double @llvm.fabs.f64(double %a)
ret double %1
}
declare double @llvm.copysign.f64(double %Mag, double %Sgn)
define double @copysign_d(double %a, double %b) {
; CHECK-LABEL: copysign_d:
; SOFT: lsrs [[REG:r[0-9]+]], r3, #31
; SOFT: bfi r1, [[REG]], #31, #1
; HARD: vmov.i32 [[REG:d[0-9]+]], #0x80000000
; HARD: vshl.i64 [[REG]], [[REG]], #32
; HARD: vbsl [[REG]], d
%1 = call double @llvm.copysign.f64(double %a, double %b)
ret double %1
}
declare double @llvm.floor.f64(double %Val)
define double @floor_d(double %a) {
; CHECK-LABEL: floor_d:
; SOFT: {{(bl|b)}} floor
; HARD: b floor
%1 = call double @llvm.floor.f64(double %a)
ret double %1
}
declare double @llvm.ceil.f64(double %Val)
define double @ceil_d(double %a) {
; CHECK-LABEL: ceil_d:
; SOFT: {{(bl|b)}} ceil
; HARD: b ceil
%1 = call double @llvm.ceil.f64(double %a)
ret double %1
}
declare double @llvm.trunc.f64(double %Val)
define double @trunc_d(double %a) {
; CHECK-LABEL: trunc_d:
; SOFT: {{(bl|b)}} trunc
; HARD: b trunc
%1 = call double @llvm.trunc.f64(double %a)
ret double %1
}
declare double @llvm.rint.f64(double %Val)
define double @rint_d(double %a) {
; CHECK-LABEL: rint_d:
; SOFT: {{(bl|b)}} rint
; HARD: b rint
%1 = call double @llvm.rint.f64(double %a)
ret double %1
}
declare double @llvm.nearbyint.f64(double %Val)
define double @nearbyint_d(double %a) {
; CHECK-LABEL: nearbyint_d:
; SOFT: {{(bl|b)}} nearbyint
; HARD: b nearbyint
%1 = call double @llvm.nearbyint.f64(double %a)
ret double %1
}
declare double @llvm.round.f64(double %Val)
define double @round_d(double %a) {
; CHECK-LABEL: round_d:
; SOFT: {{(bl|b)}} round
; HARD: b round
%1 = call double @llvm.round.f64(double %a)
ret double %1
}
declare double @llvm.fmuladd.f64(double %a, double %b, double %c)
define double @fmuladd_d(double %a, double %b, double %c) {
; CHECK-LABEL: fmuladd_d:
; SOFT: bl __aeabi_dmul
; SOFT: bl __aeabi_dadd
; HARD: vmul.f64
; HARD: vadd.f64
%1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
ret double %1
}
declare i16 @llvm.convert.to.fp16.f64(double %a)
define i16 @d_to_h(double %a) {
; CHECK-LABEL: d_to_h:
; SOFT: bl __aeabi_d2h
; HARD: bl __aeabi_d2h
%1 = call i16 @llvm.convert.to.fp16.f64(double %a)
ret i16 %1
}
declare double @llvm.convert.from.fp16.f64(i16 %a)
define double @h_to_d(i16 %a) {
; CHECK-LABEL: h_to_d:
; NONE: bl __gnu_h2f_ieee
; NONE: bl __aeabi_f2d
; SP: vcvtb.f32.f16
; SP: bl __aeabi_f2d
; DP: vcvtb.f32.f16
; DP: vcvt.f64.f32
%1 = call double @llvm.convert.from.fp16.f64(i16 %a)
ret double %1
}

View File

@ -0,0 +1,210 @@
; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
declare float @llvm.sqrt.f32(float %Val)
define float @sqrt_f(float %a) {
; CHECK-LABEL: sqrt_f:
; SOFT: bl sqrtf
; HARD: vsqrt.f32 s0, s0
%1 = call float @llvm.sqrt.f32(float %a)
ret float %1
}
declare float @llvm.powi.f32(float %Val, i32 %power)
define float @powi_f(float %a, i32 %b) {
; CHECK-LABEL: powi_f:
; SOFT: bl __powisf2
; HARD: b __powisf2
%1 = call float @llvm.powi.f32(float %a, i32 %b)
ret float %1
}
declare float @llvm.sin.f32(float %Val)
define float @sin_f(float %a) {
; CHECK-LABEL: sin_f:
; SOFT: bl sinf
; HARD: b sinf
%1 = call float @llvm.sin.f32(float %a)
ret float %1
}
declare float @llvm.cos.f32(float %Val)
define float @cos_f(float %a) {
; CHECK-LABEL: cos_f:
; SOFT: bl cosf
; HARD: b cosf
%1 = call float @llvm.cos.f32(float %a)
ret float %1
}
declare float @llvm.pow.f32(float %Val, float %power)
define float @pow_f(float %a, float %b) {
; CHECK-LABEL: pow_f:
; SOFT: bl powf
; HARD: b powf
%1 = call float @llvm.pow.f32(float %a, float %b)
ret float %1
}
declare float @llvm.exp.f32(float %Val)
define float @exp_f(float %a) {
; CHECK-LABEL: exp_f:
; SOFT: bl expf
; HARD: b expf
%1 = call float @llvm.exp.f32(float %a)
ret float %1
}
declare float @llvm.exp2.f32(float %Val)
define float @exp2_f(float %a) {
; CHECK-LABEL: exp2_f:
; SOFT: bl exp2f
; HARD: b exp2f
%1 = call float @llvm.exp2.f32(float %a)
ret float %1
}
declare float @llvm.log.f32(float %Val)
define float @log_f(float %a) {
; CHECK-LABEL: log_f:
; SOFT: bl logf
; HARD: b logf
%1 = call float @llvm.log.f32(float %a)
ret float %1
}
declare float @llvm.log10.f32(float %Val)
define float @log10_f(float %a) {
; CHECK-LABEL: log10_f:
; SOFT: bl log10f
; HARD: b log10f
%1 = call float @llvm.log10.f32(float %a)
ret float %1
}
declare float @llvm.log2.f32(float %Val)
define float @log2_f(float %a) {
; CHECK-LABEL: log2_f:
; SOFT: bl log2f
; HARD: b log2f
%1 = call float @llvm.log2.f32(float %a)
ret float %1
}
declare float @llvm.fma.f32(float %a, float %b, float %c)
define float @fma_f(float %a, float %b, float %c) {
; CHECK-LABEL: fma_f:
; SOFT: bl fmaf
; HARD: vfma.f32
%1 = call float @llvm.fma.f32(float %a, float %b, float %c)
ret float %1
}
declare float @llvm.fabs.f32(float %Val)
define float @abs_f(float %a) {
; CHECK-LABEL: abs_f:
; SOFT: bic r0, r0, #-2147483648
; HARD: vabs.f32
%1 = call float @llvm.fabs.f32(float %a)
ret float %1
}
declare float @llvm.copysign.f32(float %Mag, float %Sgn)
define float @copysign_f(float %a, float %b) {
; CHECK-LABEL: copysign_f:
; NONE: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31
; NONE: bfi r{{[0-9]+}}, [[REG]], #31, #1
; SP: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31
; SP: bfi r{{[0-9]+}}, [[REG]], #31, #1
; DP: vmov.i32 [[REG:d[0-9]+]], #0x80000000
; DP: vbsl [[REG]], d
%1 = call float @llvm.copysign.f32(float %a, float %b)
ret float %1
}
declare float @llvm.floor.f32(float %Val)
define float @floor_f(float %a) {
; CHECK-LABEL: floor_f:
; SOFT: bl floorf
; HARD: b floorf
%1 = call float @llvm.floor.f32(float %a)
ret float %1
}
declare float @llvm.ceil.f32(float %Val)
define float @ceil_f(float %a) {
; CHECK-LABEL: ceil_f:
; SOFT: bl ceilf
; HARD: b ceilf
%1 = call float @llvm.ceil.f32(float %a)
ret float %1
}
declare float @llvm.trunc.f32(float %Val)
define float @trunc_f(float %a) {
; CHECK-LABEL: trunc_f:
; SOFT: bl truncf
; HARD: b truncf
%1 = call float @llvm.trunc.f32(float %a)
ret float %1
}
declare float @llvm.rint.f32(float %Val)
define float @rint_f(float %a) {
; CHECK-LABEL: rint_f:
; SOFT: bl rintf
; HARD: b rintf
%1 = call float @llvm.rint.f32(float %a)
ret float %1
}
declare float @llvm.nearbyint.f32(float %Val)
define float @nearbyint_f(float %a) {
; CHECK-LABEL: nearbyint_f:
; SOFT: bl nearbyintf
; HARD: b nearbyintf
%1 = call float @llvm.nearbyint.f32(float %a)
ret float %1
}
declare float @llvm.round.f32(float %Val)
define float @round_f(float %a) {
; CHECK-LABEL: round_f:
; SOFT: bl roundf
; HARD: b roundf
%1 = call float @llvm.round.f32(float %a)
ret float %1
}
; FIXME: why does cortex-m4 use vmla, while cortex-a7 uses vmul+vadd?
; (these should be equivalent, even the rounding is the same)
declare float @llvm.fmuladd.f32(float %a, float %b, float %c)
define float @fmuladd_f(float %a, float %b, float %c) {
; CHECK-LABEL: fmuladd_f:
; SOFT: bl __aeabi_fmul
; SOFT: bl __aeabi_fadd
; SP: vmla.f32
; DP: vmul.f32
; DP: vadd.f32
%1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
ret float %1
}
declare i16 @llvm.convert.to.fp16.f32(float %a)
define i16 @f_to_h(float %a) {
; CHECK-LABEL: f_to_h:
; SOFT: bl __gnu_f2h_ieee
; HARD: vcvtb.f16.f32
%1 = call i16 @llvm.convert.to.fp16.f32(float %a)
ret i16 %1
}
declare float @llvm.convert.from.fp16.f32(i16 %a)
define float @h_to_f(i16 %a) {
; CHECK-LABEL: h_to_f:
; SOFT: bl __gnu_h2f_ieee
; HARD: vcvtb.f32.f16
%1 = call float @llvm.convert.from.fp16.f32(i16 %a)
ret float %1
}

View File

@ -0,0 +1,290 @@
; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP
; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
define float @add_f(float %a, float %b) {
entry:
; CHECK-LABEL: add_f:
; NONE: bl __aeabi_fadd
; HARD: vadd.f32 s0, s0, s1
%0 = fadd float %a, %b
ret float %0
}
define double @add_d(double %a, double %b) {
entry:
; CHECK-LABEL: add_d:
; NONE: bl __aeabi_dadd
; SP: bl __aeabi_dadd
; DP: vadd.f64 d0, d0, d1
%0 = fadd double %a, %b
ret double %0
}
define float @sub_f(float %a, float %b) {
entry:
; CHECK-LABEL: sub_f:
; NONE: bl __aeabi_fsub
; HARD: vsub.f32 s
%0 = fsub float %a, %b
ret float %0
}
define double @sub_d(double %a, double %b) {
entry:
; CHECK-LABEL: sub_d:
; NONE: bl __aeabi_dsub
; SP: bl __aeabi_dsub
; DP: vsub.f64 d0, d0, d1
%0 = fsub double %a, %b
ret double %0
}
define float @mul_f(float %a, float %b) {
entry:
; CHECK-LABEL: mul_f:
; NONE: bl __aeabi_fmul
; HARD: vmul.f32 s
%0 = fmul float %a, %b
ret float %0
}
define double @mul_d(double %a, double %b) {
entry:
; CHECK-LABEL: mul_d:
; NONE: bl __aeabi_dmul
; SP: bl __aeabi_dmul
; DP: vmul.f64 d0, d0, d1
%0 = fmul double %a, %b
ret double %0
}
define float @div_f(float %a, float %b) {
entry:
; CHECK-LABEL: div_f:
; NONE: bl __aeabi_fdiv
; HARD: vdiv.f32 s
%0 = fdiv float %a, %b
ret float %0
}
define double @div_d(double %a, double %b) {
entry:
; CHECK-LABEL: div_d:
; NONE: bl __aeabi_ddiv
; SP: bl __aeabi_ddiv
; DP: vdiv.f64 d0, d0, d1
%0 = fdiv double %a, %b
ret double %0
}
define float @rem_f(float %a, float %b) {
entry:
; CHECK-LABEL: rem_f:
; NONE: bl fmodf
; HARD: b fmodf
%0 = frem float %a, %b
ret float %0
}
define double @rem_d(double %a, double %b) {
entry:
; CHECK-LABEL: rem_d:
; NONE: bl fmod
; HARD: b fmod
%0 = frem double %a, %b
ret double %0
}
define float @load_f(float* %a) {
entry:
; CHECK-LABEL: load_f:
; NONE: ldr r0, [r0]
; HARD: vldr s0, [r0]
%0 = load float* %a, align 4
ret float %0
}
define double @load_d(double* %a) {
entry:
; CHECK-LABEL: load_d:
; NONE: ldm.w r0, {r0, r1}
; HARD: vldr d0, [r0]
%0 = load double* %a, align 8
ret double %0
}
define void @store_f(float* %a, float %b) {
entry:
; CHECK-LABEL: store_f:
; NONE: str r1, [r0]
; HARD: vstr s0, [r0]
store float %b, float* %a, align 4
ret void
}
define void @store_d(double* %a, double %b) {
entry:
; CHECK-LABEL: store_d:
; NONE: mov r1, r3
; NONE: str r2, [r0]
; NONE: str r1, [r0, #4]
; HARD: vstr d0, [r0]
store double %b, double* %a, align 8
ret void
}
define double @f_to_d(float %a) {
; CHECK-LABEL: f_to_d:
; NONE: bl __aeabi_f2d
; SP: bl __aeabi_f2d
; DP: vcvt.f64.f32 d0, s0
%1 = fpext float %a to double
ret double %1
}
define float @d_to_f(double %a) {
; CHECK-LABEL: d_to_f:
; NONE: bl __aeabi_d2f
; SP: bl __aeabi_d2f
; DP: vcvt.f32.f64 s0, d0
%1 = fptrunc double %a to float
ret float %1
}
define i32 @f_to_si(float %a) {
; CHECK-LABEL: f_to_si:
; NONE: bl __aeabi_f2iz
; HARD: vcvt.s32.f32 s0, s0
; HARD: vmov r0, s0
%1 = fptosi float %a to i32
ret i32 %1
}
define i32 @d_to_si(double %a) {
; CHECK-LABEL: d_to_si:
; NONE: bl __aeabi_d2iz
; SP: vmov r0, r1, d0
; SP: bl __aeabi_d2iz
; DP: vcvt.s32.f64 s0, d0
; DP: vmov r0, s0
%1 = fptosi double %a to i32
ret i32 %1
}
define i32 @f_to_ui(float %a) {
; CHECK-LABEL: f_to_ui:
; NONE: bl __aeabi_f2uiz
; HARD: vcvt.u32.f32 s0, s0
; HARD: vmov r0, s0
%1 = fptoui float %a to i32
ret i32 %1
}
define i32 @d_to_ui(double %a) {
; CHECK-LABEL: d_to_ui:
; NONE: bl __aeabi_d2uiz
; SP: vmov r0, r1, d0
; SP: bl __aeabi_d2uiz
; DP: vcvt.u32.f64 s0, d0
; DP: vmov r0, s0
%1 = fptoui double %a to i32
ret i32 %1
}
define float @si_to_f(i32 %a) {
; CHECK-LABEL: si_to_f:
; NONE: bl __aeabi_i2f
; HARD: vcvt.f32.s32 s0, s0
%1 = sitofp i32 %a to float
ret float %1
}
define double @si_to_d(i32 %a) {
; CHECK-LABEL: si_to_d:
; NONE: bl __aeabi_i2d
; SP: bl __aeabi_i2d
; DP: vcvt.f64.s32 d0, s0
%1 = sitofp i32 %a to double
ret double %1
}
define float @ui_to_f(i32 %a) {
; CHECK-LABEL: ui_to_f:
; NONE: bl __aeabi_ui2f
; HARD: vcvt.f32.u32 s0, s0
%1 = uitofp i32 %a to float
ret float %1
}
define double @ui_to_d(i32 %a) {
; CHECK-LABEL: ui_to_d:
; NONE: bl __aeabi_ui2d
; SP: bl __aeabi_ui2d
; DP: vcvt.f64.u32 d0, s0
%1 = uitofp i32 %a to double
ret double %1
}
define float @bitcast_i_to_f(i32 %a) {
; CHECK-LABEL: bitcast_i_to_f:
; NONE-NOT: mov
; HARD: vmov s0, r0
%1 = bitcast i32 %a to float
ret float %1
}
define double @bitcast_i_to_d(i64 %a) {
; CHECK-LABEL: bitcast_i_to_d:
; NONE-NOT: mov
; HARD: vmov d0, r0, r1
%1 = bitcast i64 %a to double
ret double %1
}
define i32 @bitcast_f_to_i(float %a) {
; CHECK-LABEL: bitcast_f_to_i:
; NONE-NOT: mov
; HARD: vmov r0, s0
%1 = bitcast float %a to i32
ret i32 %1
}
define i64 @bitcast_d_to_i(double %a) {
; CHECK-LABEL: bitcast_d_to_i:
; NONE-NOT: mov
; HARD: vmov r0, r1, d0
%1 = bitcast double %a to i64
ret i64 %1
}
define float @select_f(float %a, float %b, i1 %c) {
; CHECK-LABEL: select_f:
; NONE: tst.w r2, #1
; NONE: moveq r0, r1
; HARD: tst.w r0, #1
; HARD: vmovne.f32 s1, s0
; HARD: vmov.f32 s0, s1
%1 = select i1 %c, float %a, float %b
ret float %1
}
define double @select_d(double %a, double %b, i1 %c) {
; CHECK-LABEL: select_d:
; NONE: ldr.w [[REG:r[0-9]+]], [sp]
; NONE: ands [[REG]], [[REG]], #1
; NONE: moveq r0, r2
; NONE: moveq r1, r3
; SP: ands r0, r0, #1
; SP-DAG: vmov [[ALO:r[0-9]+]], [[AHI:r[0-9]+]], d0
; SP-DAG: vmov [[BLO:r[0-9]+]], [[BHI:r[0-9]+]], d1
; SP: itt ne
; SP-DAG: movne [[BLO]], [[ALO]]
; SP-DAG: movne [[BHI]], [[AHI]]
; SP: vmov d0, [[BLO]], [[BHI]]
; DP: tst.w r0, #1
; DP: vmovne.f64 d1, d0
; DP: vmov.f64 d0, d1
%1 = select i1 %c, double %a, double %b
ret double %1
}