diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index 6ee1b68efaa..ddeb1b994e8 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -520,8 +520,8 @@ namespace ARM_AM { return ((AM5Opc >> 8) & 1) ? sub : add; } - /// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and - /// FSTM instructions. + /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and + /// VSTM instructions. static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB, unsigned char Offset) { assert((SubMode == ia || SubMode == db) && diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 96b1c9ce041..868d31dbe0e 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -509,9 +509,9 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, switch (MI.getOpcode()) { default: break; - case ARM::FCPYS: - case ARM::FCPYD: + case ARM::VMOVS: case ARM::VMOVD: + case ARM::VMOVDneon: case ARM::VMOVQ: { SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); @@ -561,8 +561,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; - case ARM::FLDD: - case ARM::FLDS: + case ARM::VLDRD: + case ARM::VLDRS: if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { @@ -600,8 +600,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; - case ARM::FSTD: - case ARM::FSTS: + case ARM::VSTRD: + case ARM::VSTRS: if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { @@ -637,17 +637,17 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg).addReg(SrcReg))); } else if (DestRC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVS), DestReg) .addReg(SrcReg)); } else if (DestRC == ARM::DPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg) .addReg(SrcReg)); } else if (DestRC == ARM::DPR_VFP2RegisterClass || DestRC == ARM::DPR_8RegisterClass || SrcRC == ARM::DPR_VFP2RegisterClass || SrcRC == ARM::DPR_8RegisterClass) { // Always use neon reg-reg move if source or dest is NEON-only regclass. - BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg).addReg(SrcReg); + BuildMI(MBB, I, DL, get(ARM::VMOVDneon), DestReg).addReg(SrcReg); } else if (DestRC == ARM::QPRRegisterClass || DestRC == ARM::QPR_VFP2RegisterClass || DestRC == ARM::QPR_8RegisterClass) { @@ -682,11 +682,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass || RC == ARM::DPR_8RegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else { @@ -728,10 +728,10 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass || RC == ARM::DPR_8RegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else { assert((RC == ARM::QPRRegisterClass || @@ -827,7 +827,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, DstSubReg) .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0); } - } else if (Opc == ARM::FCPYS) { + } else if (Opc == ARM::VMOVS) { unsigned Pred = MI->getOperand(2).getImm(); unsigned PredReg = MI->getOperand(3).getReg(); if (OpNum == 0) { // move -> store @@ -835,7 +835,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned SrcSubReg = MI->getOperand(1).getSubReg(); bool isKill = MI->getOperand(1).isKill(); bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS)) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRS)) .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef), SrcSubReg) .addFrameIndex(FI) @@ -845,7 +845,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned DstSubReg = MI->getOperand(0).getSubReg(); bool isDead = MI->getOperand(0).isDead(); bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS)) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRS)) .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | @@ -854,7 +854,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } } - else if (Opc == ARM::FCPYD) { + else if (Opc == ARM::VMOVD) { unsigned Pred = MI->getOperand(2).getImm(); unsigned PredReg = MI->getOperand(3).getReg(); if (OpNum == 0) { // move -> store @@ -862,7 +862,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned SrcSubReg = MI->getOperand(1).getSubReg(); bool isKill = MI->getOperand(1).isKill(); bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD)) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef), SrcSubReg) @@ -872,7 +872,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned DstSubReg = MI->getOperand(0).getSubReg(); bool isDead = MI->getOperand(0).isDead(); bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD)) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRD)) .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | @@ -908,9 +908,9 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, Opc == ARM::tMOVtgpr2gpr || Opc == ARM::tMOVgpr2tgpr) { return true; - } else if (Opc == ARM::FCPYS || Opc == ARM::FCPYD) { + } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD) { return true; - } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVQ) { + } else if (Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) { return false; // FIXME } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index ad5dfc436bc..3197ab08440 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -1346,7 +1346,7 @@ emitPrologue(MachineFunction &MF) const { AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); - movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 0, 3, STI); + movePastCSLoadStoreOps(MBB, MBBI, ARM::VSTRD, 0, 3, STI); NumBytes = DPRCSOffset; if (NumBytes) { // Adjust SP after all the callee-save spills. @@ -1385,7 +1385,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { static bool isCSRestore(MachineInstr *MI, const ARMBaseInstrInfo &TII, const unsigned *CSRegs) { - return ((MI->getOpcode() == (int)ARM::FLDD || + return ((MI->getOpcode() == (int)ARM::VLDRD || MI->getOpcode() == (int)ARM::LDR || MI->getOpcode() == (int)ARM::t2LDRi12) && MI->getOperand(1).isFI() && @@ -1411,7 +1411,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { if (NumBytes != 0) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); } else { - // Unwind MBBI to point to first LDR / FLDD. + // Unwind MBBI to point to first LDR / VLDRD. const unsigned *CSRegs = getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do @@ -1459,7 +1459,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Move SP to start of integer callee save spill area 2. - movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 0, 3, STI); + movePastCSLoadStoreOps(MBB, MBBI, ARM::VLDRD, 0, 3, STI); emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedAreaSize()); // Move SP to start of integer callee save spill area 1. diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 981962522db..3235c3f746a 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -541,8 +541,8 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, Scale = 4; // +(offset_8*4) break; - case ARM::FLDD: - case ARM::FLDS: + case ARM::VLDRD: + case ARM::VLDRS: Bits = 8; Scale = 4; // +-(offset_8*4) NegOk = true; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 0eb1eb6dc1b..9be74540e23 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1466,8 +1466,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { } break; } - case ARMISD::FMRRD: - return CurDAG->getMachineNode(ARM::FMRRD, dl, MVT::i32, MVT::i32, + case ARMISD::VMOVRRD: + return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32, Op.getOperand(0), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32)); case ISD::UMUL_LOHI: { @@ -1656,10 +1656,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { : ARM::MOVCCr; break; case MVT::f32: - Opc = ARM::FCPYScc; + Opc = ARM::VMOVScc; break; case MVT::f64: - Opc = ARM::FCPYDcc; + Opc = ARM::VMOVDcc; break; } return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); @@ -1683,10 +1683,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { default: assert(false && "Illegal conditional move type!"); break; case MVT::f32: - Opc = ARM::FNEGScc; + Opc = ARM::VNEGScc; break; case MVT::f64: - Opc = ARM::FNEGDcc; + Opc = ARM::VNEGDcc; break; } return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c468b906e52..4945fdfc1d2 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -389,7 +389,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) - // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2. + // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR iff target supports vfp2. setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); // We want to custom lower some of our intrinsics. @@ -434,7 +434,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // We have target-specific dag combine patterns for the following nodes: - // ARMISD::FMRRD - No need to call setTargetDAGCombine + // ARMISD::VMOVRRD - No need to call setTargetDAGCombine setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); @@ -493,8 +493,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; - case ARMISD::FMRRD: return "ARMISD::FMRRD"; - case ARMISD::FMDRR: return "ARMISD::FMDRR"; + case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; + case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; @@ -790,7 +790,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); - Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); if (VA.getLocVT() == MVT::v2f64) { SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); @@ -805,7 +805,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); - Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(1, MVT::i32)); } @@ -870,7 +870,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, SmallVector &MemOpChains, ISD::ArgFlagsTy Flags) { - SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, + SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); @@ -1149,7 +1149,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, // Extract the first half and return it in two registers. SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(0, MVT::i32)); - SDValue HalfGPRs = DAG.getNode(ARMISD::FMRRD, dl, + SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Half); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); @@ -1166,7 +1166,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, } // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is // available. - SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, + SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); Flag = Chain.getValue(1); @@ -1556,7 +1556,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } - return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, ArgValue, ArgValue2); + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } SDValue @@ -2072,16 +2072,16 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { SDValue Op = N->getOperand(0); DebugLoc dl = N->getDebugLoc(); if (N->getValueType(0) == MVT::f64) { - // Turn i64->f64 into FMDRR. + // Turn i64->f64 into VMOVDRR. SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(1, MVT::i32)); - return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); } - // Turn f64->i64 into FMRRD. - SDValue Cvt = DAG.getNode(ARMISD::FMRRD, dl, + // Turn f64->i64 into VMOVRRD. + SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); // Merge the pieces into a single i64 value. @@ -3178,12 +3178,12 @@ static SDValue PerformSUBCombine(SDNode *N, return SDValue(); } -/// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD. -static SDValue PerformFMRRDCombine(SDNode *N, +/// PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD. +static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // fmrrd(fmdrr x, y) -> x,y SDValue InDouble = N->getOperand(0); - if (InDouble.getOpcode() == ARMISD::FMDRR) + if (InDouble.getOpcode() == ARMISD::VMOVDRR) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); return SDValue(); } @@ -3478,7 +3478,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, default: break; case ISD::ADD: return PerformADDCombine(N, DCI); case ISD::SUB: return PerformSUBCombine(N, DCI); - case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI); + case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: @@ -3760,7 +3760,7 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, return true; } - // FIXME: Use FLDM / FSTM to emulate indexed FP load / store. + // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. return false; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 9c7a91d68d2..df69ab1cc2c 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -62,8 +62,8 @@ namespace llvm { SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag. - FMRRD, // double to two gprs. - FMDRR, // Two gprs to double. + VMOVRRD, // double to two gprs. + VMOVDRR, // Two gprs to double. EH_SJLJ_SETJMP, // SjLj exception handling setjmp. EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index dfa2bdd071a..893694bfdee 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -394,13 +394,13 @@ let Defs = [CPSR] in { multiclass AI1_bin_s_irs opcod, string opc, PatFrag opnode, bit Commutable = 0> { def ri : AI1 { let Inst{20} = 1; let Inst{25} = 1; } def rr : AI1 { let isCommutable = Commutable; let Inst{11-4} = 0b00000000; @@ -408,7 +408,7 @@ multiclass AI1_bin_s_irs opcod, string opc, PatFrag opnode, let Inst{25} = 0; } def rs : AI1 { let Inst{20} = 1; let Inst{25} = 0; @@ -504,9 +504,13 @@ multiclass AI1_adde_sube_irs opcod, string opc, PatFrag opnode, Requires<[IsARM, CarryDefIsUnused]> { let Inst{25} = 0; } - // Carry setting variants +} +// Carry setting variants +let Defs = [CPSR] in { +multiclass AI1_adde_sube_s_irs opcod, string opc, PatFrag opnode, + bit Commutable = 0> { def Sri : AXI1, Requires<[IsARM, CarryDefIsUsed]> { let Defs = [CPSR]; @@ -514,7 +518,7 @@ multiclass AI1_adde_sube_irs opcod, string opc, PatFrag opnode, let Inst{25} = 1; } def Srr : AXI1, Requires<[IsARM, CarryDefIsUsed]> { let Defs = [CPSR]; @@ -523,7 +527,7 @@ multiclass AI1_adde_sube_irs opcod, string opc, PatFrag opnode, let Inst{25} = 0; } def Srs : AXI1, Requires<[IsARM, CarryDefIsUsed]> { let Defs = [CPSR]; @@ -532,6 +536,7 @@ multiclass AI1_adde_sube_irs opcod, string opc, PatFrag opnode, } } } +} //===----------------------------------------------------------------------===// // Instructions @@ -663,7 +668,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in def LDM_RET : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - LdStMulFrm, IIC_Br, "ldm${p}${addr:submode}\t$addr, $wb", + LdStMulFrm, IIC_Br, "ldm${addr:submode}${p}\t$addr, $wb", []>; // On non-Darwin platforms R9 is callee-saved. @@ -803,26 +808,26 @@ def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, // Loads with zero extension def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "h\t$dst, $addr", + IIC_iLoadr, "ldrh", "\t$dst, $addr", [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>; def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, - IIC_iLoadr, "ldr", "b\t$dst, $addr", + IIC_iLoadr, "ldrb", "\t$dst, $addr", [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>; // Loads with sign extension def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "sh\t$dst, $addr", + IIC_iLoadr, "ldrsh", "\t$dst, $addr", [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>; def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "sb\t$dst, $addr", + IIC_iLoadr, "ldrsb", "\t$dst, $addr", [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "d\t$dst1, $addr", + IIC_iLoadr, "ldrd", "\t$dst1, $addr", []>, Requires<[IsARM, HasV5TE]>; // Indexed loads @@ -836,35 +841,35 @@ def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb), def LDRH_PRE : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, - "ldr", "h\t$dst, $addr!", "$addr.base = $base_wb", []>; + "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldr", "h\t$dst, [$base], $offset", "$base = $base_wb", []>; + "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRB_PRE : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode2:$addr), LdFrm, IIC_iLoadru, - "ldr", "b\t$dst, $addr!", "$addr.base = $base_wb", []>; + "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru, - "ldr", "b\t$dst, [$base], $offset", "$base = $base_wb", []>; + "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, - "ldr", "sh\t$dst, $addr!", "$addr.base = $base_wb", []>; + "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldr", "sh\t$dst, [$base], $offset", "$base = $base_wb", []>; + "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, - "ldr", "sb\t$dst, $addr!", "$addr.base = $base_wb", []>; + "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldr", "sb\t$dst, [$base], $offset", "$base = $base_wb", []>; + "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; } // Store @@ -874,18 +879,18 @@ def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, // Stores with truncate def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, IIC_iStorer, - "str", "h\t$src, $addr", + "strh", "\t$src, $addr", [(truncstorei16 GPR:$src, addrmode3:$addr)]>; def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, - "str", "b\t$src, $addr", + "strb", "\t$src, $addr", [(truncstorei8 GPR:$src, addrmode2:$addr)]>; // Store doubleword let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), StMiscFrm, IIC_iStorer, - "str", "d\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>; + "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>; // Indexed stores def STR_PRE : AI2stwpr<(outs GPR:$base_wb), @@ -905,28 +910,28 @@ def STR_POST : AI2stwpo<(outs GPR:$base_wb), def STRH_PRE : AI3sthpr<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, IIC_iStoreru, - "str", "h\t$src, [$base, $offset]!", "$base = $base_wb", + "strh", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>; def STRH_POST: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, IIC_iStoreru, - "str", "h\t$src, [$base], $offset", "$base = $base_wb", + "strh", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti16 GPR:$src, GPR:$base, am3offset:$offset))]>; def STRB_PRE : AI2stbpr<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, IIC_iStoreru, - "str", "b\t$src, [$base, $offset]!", "$base = $base_wb", + "strb", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; def STRB_POST: AI2stbpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, IIC_iStoreru, - "str", "b\t$src, [$base], $offset", "$base = $base_wb", + "strb", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; @@ -937,13 +942,13 @@ def STRB_POST: AI2stbpo<(outs GPR:$base_wb), let mayLoad = 1, hasExtraDefRegAllocReq = 1 in def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - LdStMulFrm, IIC_iLoadm, "ldm${p}${addr:submode}\t$addr, $wb", + LdStMulFrm, IIC_iLoadm, "ldm${addr:submode}${p}\t$addr, $wb", []>; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - LdStMulFrm, IIC_iStorem, "stm${p}${addr:submode}\t$addr, $wb", + LdStMulFrm, IIC_iStorem, "stm${addr:submode}${p}\t$addr, $wb", []>; //===----------------------------------------------------------------------===// @@ -1004,10 +1009,10 @@ def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi, let Defs = [CPSR] in { def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, - IIC_iMOVsi, "mov", "s\t$dst, $src, lsr #1", + IIC_iMOVsi, "movs", "\t$dst, $src, lsr #1", [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP; def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, - IIC_iMOVsi, "mov", "s\t$dst, $src, asr #1", + IIC_iMOVsi, "movs", "\t$dst, $src, asr #1", [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP; } @@ -1083,15 +1088,19 @@ defm SUB : AsI1_bin_irs<0b0010, "sub", BinOpFrag<(sub node:$LHS, node:$RHS)>>; // ADD and SUB with 's' bit set. -defm ADDS : AI1_bin_s_irs<0b0100, "add", - BinOpFrag<(addc node:$LHS, node:$RHS)>>; -defm SUBS : AI1_bin_s_irs<0b0010, "sub", +defm ADDS : AI1_bin_s_irs<0b0100, "adds", + BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>; +defm SUBS : AI1_bin_s_irs<0b0010, "subs", BinOpFrag<(subc node:$LHS, node:$RHS)>>; defm ADC : AI1_adde_sube_irs<0b0101, "adc", BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>; defm SBC : AI1_adde_sube_irs<0b0110, "sbc", BinOpFrag<(sube node:$LHS, node:$RHS)>>; +defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs", + BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>; +defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs", + BinOpFrag<(sube node:$LHS, node:$RHS)>>; // These don't define reg/reg forms, because they are handled above. def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, @@ -1109,13 +1118,13 @@ def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, // RSB with 's' bit set. let Defs = [CPSR] in { def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, "rsb", "s\t$dst, $a, $b", + IIC_iALUi, "rsbs", "\t$dst, $a, $b", [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]> { let Inst{20} = 1; let Inst{25} = 1; } def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, "rsb", "s\t$dst, $a, $b", + IIC_iALUsr, "rsbs", "\t$dst, $a, $b", [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]> { let Inst{20} = 1; let Inst{25} = 0; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 4c34035efa8..05d2918c3d3 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -2295,7 +2295,7 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, // VMOV : Vector Move (Register) -def VMOVD : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), +def VMOVDneon: N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), IIC_VMOVD, "vmov\t$dst, $src", "", []>; def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), IIC_VMOVD, "vmov\t$dst, $src", "", []>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 455c33b7959..a1d581d0e4e 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -17,7 +17,7 @@ def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>; -def SDT_FMDRR : +def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; @@ -28,7 +28,7 @@ def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>; def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>; -def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>; +def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; //===----------------------------------------------------------------------===// // Operand Definitions. @@ -55,21 +55,21 @@ def vfp_f64imm : Operand, // let canFoldAsLoad = 1 in { -def FLDD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), - IIC_fpLoad64, "fldd", "\t$dst, $addr", +def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), + IIC_fpLoad64, "vldr", ".64\t$dst, $addr", [(set DPR:$dst, (load addrmode5:$addr))]>; -def FLDS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr), - IIC_fpLoad32, "flds", "\t$dst, $addr", +def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr), + IIC_fpLoad32, "vldr", ".32\t$dst, $addr", [(set SPR:$dst, (load addrmode5:$addr))]>; } // canFoldAsLoad -def FSTD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr), - IIC_fpStore64, "fstd", "\t$src, $addr", +def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr), + IIC_fpStore64, "vstr", ".64\t$src, $addr", [(store DPR:$src, addrmode5:$addr)]>; -def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), - IIC_fpStore32, "fsts", "\t$src, $addr", +def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), + IIC_fpStore32, "vstr", ".32\t$src, $addr", [(store SPR:$src, addrmode5:$addr)]>; //===----------------------------------------------------------------------===// @@ -77,32 +77,32 @@ def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), // let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { -def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpLoadm, - "fldm${addr:submode}d${p}\t${addr:base}, $wb", + "vldm${addr:submode}${p}\t${addr:base}, $wb", []> { let Inst{20} = 1; } -def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpLoadm, - "fldm${addr:submode}s${p}\t${addr:base}, $wb", + "vldm${addr:submode}${p}\t${addr:base}, $wb", []> { let Inst{20} = 1; } } // mayLoad, hasExtraDefRegAllocReq let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { -def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpStorem, - "fstm${addr:submode}d${p}\t${addr:base}, $wb", + "vstm${addr:submode}${p}\t${addr:base}, $wb", []> { let Inst{20} = 0; } -def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpStorem, - "fstm${addr:submode}s${p}\t${addr:base}, $wb", + "vstm${addr:submode}${p}\t${addr:base}, $wb", []> { let Inst{20} = 0; } @@ -114,68 +114,68 @@ def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, // FP Binary Operations. // -def FADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpALU64, "faddd", "\t$dst, $a, $b", +def VADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + IIC_fpALU64, "vadd", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>; -def FADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpALU32, "fadds", "\t$dst, $a, $b", +def VADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpALU32, "vadd", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>; // These are encoded as unary instructions. let Defs = [FPSCR] in { -def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b), - IIC_fpCMP64, "fcmped", "\t$a, $b", +def VCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b), + IIC_fpCMP64, "vcmpe", ".f64\t$a, $b", [(arm_cmpfp DPR:$a, DPR:$b)]>; -def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b), - IIC_fpCMP32, "fcmpes", "\t$a, $b", +def VCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b), + IIC_fpCMP32, "vcmpe", ".f32\t$a, $b", [(arm_cmpfp SPR:$a, SPR:$b)]>; } -def FDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpDIV64, "fdivd", "\t$dst, $a, $b", +def VDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + IIC_fpDIV64, "vdiv", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>; -def FDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpDIV32, "fdivs", "\t$dst, $a, $b", +def VDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpDIV32, "vdiv", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>; -def FMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpMUL64, "fmuld", "\t$dst, $a, $b", +def VMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + IIC_fpMUL64, "vmul", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>; -def FMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpMUL32, "fmuls", "\t$dst, $a, $b", +def VMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpMUL32, "vmul", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>; - -def FNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpMUL64, "fnmuld", "\t$dst, $a, $b", + +def VNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + IIC_fpMUL64, "vnmul", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> { let Inst{6} = 1; } -def FNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpMUL32, "fnmuls", "\t$dst, $a, $b", +def VNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpMUL32, "vnmul", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> { let Inst{6} = 1; } // Match reassociated forms only if not sign dependent rounding. def : Pat<(fmul (fneg DPR:$a), DPR:$b), - (FNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>; + (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>; def : Pat<(fmul (fneg SPR:$a), SPR:$b), - (FNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; + (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; -def FSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpALU64, "fsubd", "\t$dst, $a, $b", +def VSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + IIC_fpALU64, "vsub", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> { let Inst{6} = 1; } -def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpALU32, "fsubs", "\t$dst, $a, $b", +def VSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpALU32, "vsub", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> { let Inst{6} = 1; } @@ -184,31 +184,31 @@ def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), // FP Unary Operations. // -def FABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpUNA64, "fabsd", "\t$dst, $a", +def VABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a), + IIC_fpUNA64, "vabs", ".f64\t$dst, $a", [(set DPR:$dst, (fabs DPR:$a))]>; -def FABSS : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpUNA32, "fabss", "\t$dst, $a", +def VABSS : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a), + IIC_fpUNA32, "vabs", ".f32\t$dst, $a", [(set SPR:$dst, (fabs SPR:$a))]>; let Defs = [FPSCR] in { -def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a), - IIC_fpCMP64, "fcmpezd", "\t$a", +def VCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a), + IIC_fpCMP64, "vcmpe", ".f64\t$a, #0.0", [(arm_cmpfp0 DPR:$a)]>; -def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a), - IIC_fpCMP32, "fcmpezs", "\t$a", +def VCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a), + IIC_fpCMP32, "vcmpe", ".f32\t$a, #0.0", [(arm_cmpfp0 SPR:$a)]>; } -def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a), - IIC_fpCVTDS, "fcvtds", "\t$dst, $a", +def VCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a), + IIC_fpCVTDS, "vcvt", ".f64.f32\t$dst, $a", [(set DPR:$dst, (fextend SPR:$a))]>; // Special case encoding: bits 11-8 is 0b1011. -def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, - IIC_fpCVTSD, "fcvtsd", "\t$dst, $a", +def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, + IIC_fpCVTSD, "vcvt", ".f32.f64\t$dst, $a", [(set SPR:$dst, (fround DPR:$a))]> { let Inst{27-23} = 0b11101; let Inst{21-16} = 0b110111; @@ -217,52 +217,52 @@ def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, } let neverHasSideEffects = 1 in { -def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpUNA64, "fcpyd", "\t$dst, $a", []>; +def VMOVD: ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a), + IIC_fpUNA64, "vmov", ".f64\t$dst, $a", []>; -def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpUNA32, "fcpys", "\t$dst, $a", []>; +def VMOVS: ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a), + IIC_fpUNA32, "vmov", ".f32\t$dst, $a", []>; } // neverHasSideEffects -def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpUNA64, "fnegd", "\t$dst, $a", +def VNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), + IIC_fpUNA64, "vneg", ".f64\t$dst, $a", [(set DPR:$dst, (fneg DPR:$a))]>; -def FNEGS : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpUNA32, "fnegs", "\t$dst, $a", +def VNEGS : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a), + IIC_fpUNA32, "vneg", ".f32\t$dst, $a", [(set SPR:$dst, (fneg SPR:$a))]>; -def FSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpSQRT64, "fsqrtd", "\t$dst, $a", +def VSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a), + IIC_fpSQRT64, "vsqrt", ".f64\t$dst, $a", [(set DPR:$dst, (fsqrt DPR:$a))]>; -def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpSQRT32, "fsqrts", "\t$dst, $a", +def VSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), + IIC_fpSQRT32, "vsqrt", ".f32\t$dst, $a", [(set SPR:$dst, (fsqrt SPR:$a))]>; //===----------------------------------------------------------------------===// // FP <-> GPR Copies. Int <-> FP Conversions. // -def FMRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), - IIC_VMOVSI, "fmrs", "\t$dst, $src", +def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), + IIC_VMOVSI, "vmov", "\t$dst, $src", [(set GPR:$dst, (bitconvert SPR:$src))]>; -def FMSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "fmsr", "\t$dst, $src", +def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), + IIC_VMOVIS, "vmov", "\t$dst, $src", [(set SPR:$dst, (bitconvert GPR:$src))]>; -def FMRRD : AVConv3I<0b11000101, 0b1011, +def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$wb, GPR:$dst2), (ins DPR:$src), - IIC_VMOVDI, "fmrrd", "\t$wb, $dst2, $src", + IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src", [/* FIXME: Can't write pattern for multiple result instr*/]>; // FMDHR: GPR -> SPR // FMDLR: GPR -> SPR -def FMDRR : AVConv5I<0b11000100, 0b1011, +def VMOVDRR : AVConv5I<0b11000100, 0b1011, (outs DPR:$dst), (ins GPR:$src1, GPR:$src2), - IIC_VMOVID, "fmdrr", "\t$dst, $src1, $src2", + IIC_VMOVID, "vmov", "\t$dst, $src1, $src2", [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>; // FMRDH: SPR -> GPR @@ -277,53 +277,53 @@ def FMDRR : AVConv5I<0b11000100, 0b1011, // Int to FP: -def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), - IIC_fpCVTID, "fsitod", "\t$dst, $a", +def VSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), + IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a", [(set DPR:$dst, (arm_sitof SPR:$a))]> { let Inst{7} = 1; } -def FSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), - IIC_fpCVTIS, "fsitos", "\t$dst, $a", +def VSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), + IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a", [(set SPR:$dst, (arm_sitof SPR:$a))]> { let Inst{7} = 1; } -def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), - IIC_fpCVTID, "fuitod", "\t$dst, $a", +def VUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), + IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a", [(set DPR:$dst, (arm_uitof SPR:$a))]>; -def FUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), - IIC_fpCVTIS, "fuitos", "\t$dst, $a", +def VUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), + IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a", [(set SPR:$dst, (arm_uitof SPR:$a))]>; // FP to Int: // Always set Z bit in the instruction, i.e. "round towards zero" variants. -def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011, +def VTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011, (outs SPR:$dst), (ins DPR:$a), - IIC_fpCVTDI, "ftosizd", "\t$dst, $a", + IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a", [(set SPR:$dst, (arm_ftosi DPR:$a))]> { let Inst{7} = 1; // Z bit } -def FTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010, +def VTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010, (outs SPR:$dst), (ins SPR:$a), - IIC_fpCVTSI, "ftosizs", "\t$dst, $a", + IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a", [(set SPR:$dst, (arm_ftosi SPR:$a))]> { let Inst{7} = 1; // Z bit } -def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011, +def VTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011, (outs SPR:$dst), (ins DPR:$a), - IIC_fpCVTDI, "ftouizd", "\t$dst, $a", + IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a", [(set SPR:$dst, (arm_ftoui DPR:$a))]> { let Inst{7} = 1; // Z bit } -def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, +def VTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, (outs SPR:$dst), (ins SPR:$a), - IIC_fpCVTSI, "ftouizs", "\t$dst, $a", + IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a", [(set SPR:$dst, (arm_ftoui SPR:$a))]> { let Inst{7} = 1; // Z bit } @@ -332,54 +332,54 @@ def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, // FP FMA Operations. // -def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fmacd", "\t$dst, $a, $b", +def VMLAD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def FMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fmacs", "\t$dst, $a, $b", +def VMLAS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + IIC_fpMAC32, "vmla", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fmscd", "\t$dst, $a, $b", +def VNMLSD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fmscs", "\t$dst, $a, $b", +def VNMLSS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + IIC_fpMAC32, "vnmls", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fnmacd", "\t$dst, $a, $b", +def VMLSD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } -def FNMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fnmacs", "\t$dst, $a, $b", +def VMLSS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + IIC_fpMAC32, "vmls", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, DPR:$b)), - (FNMACD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>; + (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>; def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)), - (FNMACS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; + (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; -def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fnmscd", "\t$dst, $a, $b", +def VNMLAD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } -def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fnmscs", "\t$dst, $a, $b", +def VNMLAS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + IIC_fpMAC32, "vnmla", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; @@ -389,27 +389,27 @@ def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), // FP Conditional moves. // -def FCPYDcc : ADuI<0b11101011, 0b0000, 0b0100, +def VMOVDcc : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$false, DPR:$true), - IIC_fpUNA64, "fcpyd", "\t$dst, $true", + IIC_fpUNA64, "vmov", ".f64\t$dst, $true", [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; -def FCPYScc : ASuI<0b11101011, 0b0000, 0b0100, +def VMOVScc : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$false, SPR:$true), - IIC_fpUNA32, "fcpys", "\t$dst, $true", + IIC_fpUNA32, "vmov", ".f32\t$dst, $true", [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; -def FNEGDcc : ADuI<0b11101011, 0b0001, 0b0100, +def VNEGDcc : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$false, DPR:$true), - IIC_fpUNA64, "fnegd", "\t$dst, $true", + IIC_fpUNA64, "vneg", ".f64\t$dst, $true", [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; -def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100, +def VNEGScc : ASuI<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$false, SPR:$true), - IIC_fpUNA32, "fnegs", "\t$dst, $true", + IIC_fpUNA32, "vneg", ".f32\t$dst, $true", [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; @@ -419,7 +419,8 @@ def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100, // let Defs = [CPSR], Uses = [FPSCR] in -def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "", +def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", + "\tAPSR_nzcv, FPSCR", [(arm_fmstat)]> { let Inst{27-20} = 0b11101111; let Inst{19-16} = 0b0001; @@ -431,19 +432,7 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "", // Materialize FP immediates. VFP3 only. -let isReMaterializable = 1 in -def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm), - VFPMiscFrm, IIC_VMOVImm, - "fconsts", "\t$dst, $imm", - [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { - let Inst{27-23} = 0b11101; - let Inst{21-20} = 0b11; - let Inst{11-9} = 0b101; - let Inst{8} = 0; - let Inst{7-4} = 0b0000; -} - -let isReMaterializable = 1 in +let isReMaterializable = 1 in { def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), VFPMiscFrm, IIC_VMOVImm, "fconstd", "\t$dst, $imm", @@ -454,3 +443,15 @@ def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), let Inst{8} = 1; let Inst{7-4} = 0b0000; } + +def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm), + VFPMiscFrm, IIC_VMOVImm, + "fconsts", "\t$dst, $imm", + [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { + let Inst{27-23} = 0b11101; + let Inst{21-20} = 0b11; + let Inst{11-9} = 0b101; + let Inst{8} = 0; + let Inst{7-4} = 0b0000; +} +} diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 7e1783bd44b..873a8d3cfee 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -41,8 +41,8 @@ using namespace llvm; STATISTIC(NumLDMGened , "Number of ldm instructions generated"); STATISTIC(NumSTMGened , "Number of stm instructions generated"); -STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); -STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); +STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); +STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); @@ -127,18 +127,18 @@ static int getLoadStoreMultipleOpcode(int Opcode) { case ARM::t2STRi12: NumSTMGened++; return ARM::t2STM; - case ARM::FLDS: - NumFLDMGened++; - return ARM::FLDMS; - case ARM::FSTS: - NumFSTMGened++; - return ARM::FSTMS; - case ARM::FLDD: - NumFLDMGened++; - return ARM::FLDMD; - case ARM::FSTD: - NumFSTMGened++; - return ARM::FSTMD; + case ARM::VLDRS: + NumVLDMGened++; + return ARM::VLDMS; + case ARM::VSTRS: + NumVSTMGened++; + return ARM::VSTMS; + case ARM::VLDRD: + NumVLDMGened++; + return ARM::VLDMD; + case ARM::VSTRD: + NumVSTMGened++; + return ARM::VSTMD; default: llvm_unreachable("Unhandled opcode!"); } return 0; @@ -229,8 +229,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, BaseKill = true; // New base is always killed right its use. } - bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD; - bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD; + bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD; + bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; Opcode = getLoadStoreMultipleOpcode(Opcode); MachineInstrBuilder MIB = (isAM4) ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) @@ -373,27 +373,27 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::t2LDRi12: case ARM::t2STRi8: case ARM::t2STRi12: - case ARM::FLDS: - case ARM::FSTS: + case ARM::VLDRS: + case ARM::VSTRS: return 4; - case ARM::FLDD: - case ARM::FSTD: + case ARM::VLDRD: + case ARM::VSTRD: return 8; case ARM::LDM: case ARM::STM: case ARM::t2LDM: case ARM::t2STM: return (MI->getNumOperands() - 5) * 4; - case ARM::FLDMS: - case ARM::FSTMS: - case ARM::FLDMD: - case ARM::FSTMD: + case ARM::VLDMS: + case ARM::VSTMS: + case ARM::VLDMD: + case ARM::VSTMD: return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; } } /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base -/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible: +/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: /// /// stmia rn, /// rn := rn + 4 * 3; @@ -475,7 +475,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, } } } else { - // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops. + // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())) return false; @@ -517,10 +517,10 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { switch (Opc) { case ARM::LDR: return ARM::LDR_PRE; case ARM::STR: return ARM::STR_PRE; - case ARM::FLDS: return ARM::FLDMS; - case ARM::FLDD: return ARM::FLDMD; - case ARM::FSTS: return ARM::FSTMS; - case ARM::FSTD: return ARM::FSTMD; + case ARM::VLDRS: return ARM::VLDMS; + case ARM::VLDRD: return ARM::VLDMD; + case ARM::VSTRS: return ARM::VSTMS; + case ARM::VSTRD: return ARM::VSTMD; case ARM::t2LDRi8: case ARM::t2LDRi12: return ARM::t2LDR_PRE; @@ -536,10 +536,10 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { switch (Opc) { case ARM::LDR: return ARM::LDR_POST; case ARM::STR: return ARM::STR_POST; - case ARM::FLDS: return ARM::FLDMS; - case ARM::FLDD: return ARM::FLDMD; - case ARM::FSTS: return ARM::FSTMS; - case ARM::FSTD: return ARM::FSTMD; + case ARM::VLDRS: return ARM::VLDMS; + case ARM::VLDRD: return ARM::VLDMD; + case ARM::VSTRS: return ARM::VSTMS; + case ARM::VSTRD: return ARM::VSTMD; case ARM::t2LDRi8: case ARM::t2LDRi12: return ARM::t2LDR_POST; @@ -564,8 +564,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, unsigned Bytes = getLSMultipleTransferSize(MI); int Opcode = MI->getOpcode(); DebugLoc dl = MI->getDebugLoc(); - bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS || - Opcode == ARM::FSTD || Opcode == ARM::FSTS; + bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || + Opcode == ARM::VSTRD || Opcode == ARM::VSTRS; bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) return false; @@ -575,7 +575,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (MI->getOperand(2).getImm() != 0) return false; - bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD; + bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; // Can't do the merge if the destination register is the same as the would-be // writeback register. if (isLd && MI->getOperand(0).getReg() == Base) @@ -626,7 +626,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (!DoMerge) return false; - bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD; + bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD; unsigned Offset = 0; if (isAM5) Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) @@ -638,7 +638,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; if (isLd) { if (isAM5) - // FLDMS, FLDMD + // VLDMS, VLDMD BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) .addReg(Base, getKillRegState(BaseKill)) .addImm(Offset).addImm(Pred).addReg(PredReg) @@ -657,7 +657,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, } else { MachineOperand &MO = MI->getOperand(0); if (isAM5) - // FSTMS, FSTMD + // VSTMS, VSTMD BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset) .addImm(Pred).addReg(PredReg) .addReg(Base, getDefRegState(true)) // WB base register @@ -687,11 +687,11 @@ static bool isMemoryOp(const MachineInstr *MI) { case ARM::LDR: case ARM::STR: return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; - case ARM::FLDS: - case ARM::FSTS: + case ARM::VLDRS: + case ARM::VSTRS: return MI->getOperand(1).isReg(); - case ARM::FLDD: - case ARM::FSTD: + case ARM::VLDRD: + case ARM::VSTRD: return MI->getOperand(1).isReg(); case ARM::t2LDRi8: case ARM::t2LDRi12: @@ -1214,7 +1214,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, if (!STI->hasV5TEOps()) return false; - // FIXME: FLDS / FSTS -> FLDD / FSTD + // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD unsigned Scale = 1; unsigned Opcode = Op0->getOpcode(); if (Opcode == ARM::LDR) @@ -1456,7 +1456,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { continue; int Opc = MI->getOpcode(); - bool isLd = isi32Load(Opc) || Opc == ARM::FLDS || Opc == ARM::FLDD; + bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; unsigned Base = MI->getOperand(1).getReg(); int Offset = getMemoryOpOffset(MI); diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 020710bb45d..e9d89d75767 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -609,12 +609,7 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, if (Modifier && strcmp(Modifier, "submode") == 0) { ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); - if (MO1.getReg() == ARM::SP) { - bool isFLDM = (MI->getOpcode() == ARM::FLDMD || - MI->getOpcode() == ARM::FLDMS); - O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM); - } else - O << ARM_AM::getAMSubModeStr(Mode); + O << ARM_AM::getAMSubModeStr(Mode); return; } else if (Modifier && strcmp(Modifier, "base") == 0) { // Used for FSTM{D|S} and LSTM{D|S} operations. @@ -1131,9 +1126,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { } } - // Use unified assembler syntax mode for Thumb. - if (Subtarget->isThumb()) - O << "\t.syntax unified\n"; + // Use unified assembler syntax. + O << "\t.syntax unified\n"; // Emit ARM Build Attributes if (Subtarget->isTargetELF()) { diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index f422798e315..00479257217 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -259,12 +259,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, if (Modifier && strcmp(Modifier, "submode") == 0) { ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); - if (MO1.getReg() == ARM::SP) { - bool isFLDM = (MI->getOpcode() == ARM::FLDMD || - MI->getOpcode() == ARM::FLDMS); - O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM); - } else - O << ARM_AM::getAMSubModeStr(Mode); + O << ARM_AM::getAMSubModeStr(Mode); return; } else if (Modifier && strcmp(Modifier, "base") == 0) { // Used for FSTM{D|S} and LSTM{D|S} operations. diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index f307e3b3108..7d767ec5261 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -54,10 +54,10 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { NextMII = next(MII); MachineInstr *MI = &*MII; - if (MI->getOpcode() == ARM::FCPYD && + if (MI->getOpcode() == ARM::VMOVD && !TII->isPredicated(MI)) { unsigned SrcReg = MI->getOperand(1).getReg(); - // If we do not found an instruction defining the reg, this means the + // If we do not find an instruction defining the reg, this means the // register should be live-in for this BB. It's always to better to use // NEON reg-reg moves. unsigned Domain = ARMII::DomainNEON; @@ -71,7 +71,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { } if (Domain & ARMII::DomainNEON) { - // Convert FCPYD to VMOVD. + // Convert VMOVD to VMOVDneon unsigned DestReg = MI->getOperand(0).getReg(); DEBUG({errs() << "vmov convert: "; MI->dump();}); @@ -82,7 +82,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { // - The imp-defs / imp-uses are superregs only, we don't care about // them. BuildMI(MBB, *MI, MI->getDebugLoc(), - TII->get(ARM::VMOVD), DestReg).addReg(SrcReg); + TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg); MBB.erase(MI); MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index e7770b2292e..6b605bb0a7c 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -37,7 +37,7 @@ LPCRELL0: mov r1, #PCRELV0 add r1, pc ldr r0, [r0, r1] - cpy pc, r0 + mov pc, r0 .align 2 LJTI1_0_0: .long LBB1_3 @@ -51,7 +51,7 @@ We should be able to generate: LPCRELL0: add r1, LJTI1_0_0 ldr r0, [r0, r1] - cpy pc, r0 + mov pc, r0 .align 2 LJTI1_0_0: .long LBB1_3 @@ -206,8 +206,8 @@ LPC0: add r5, pc ldr r6, LCPI1_1 ldr r2, LCPI1_2 - cpy r3, r6 - cpy lr, pc + mov r3, r6 + mov lr, pc bx r5 //===---------------------------------------------------------------------===// diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index e4c7e6b5a6d..11c48add21b 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -321,7 +321,7 @@ time. 4) Once we added support for multiple result patterns, write indexed loads patterns instead of C++ instruction selection code. -5) Use FLDM / FSTM to emulate indexed FP load / store. +5) Use VLDM / VSTM to emulate indexed FP load / store. //===---------------------------------------------------------------------===// diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 5ef7801ba8b..37adf37af4c 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -795,7 +795,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); } else { - // Unwind MBBI to point to first LDR / FLDD. + // Unwind MBBI to point to first LDR / VLDRD. const unsigned *CSRegs = getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do diff --git a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll index ee93fde998c..2b7ccd86152 100644 --- a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll +++ b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv6-apple-darwin10 -mattr=+vfp2 | grep fcmpezd | count 13 +; RUN: llc < %s -mtriple=armv6-apple-darwin10 -mattr=+vfp2 | grep vcmpe | count 13 %struct.EDGE_PAIR = type { %struct.edge_rec*, %struct.edge_rec* } %struct.VEC2 = type { double, double, double } diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll index 98cab9a9149..3909c6a526e 100644 --- a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll +++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll @@ -9,7 +9,7 @@ define void @test(double* %x, double* %y) nounwind { br i1 %4, label %bb1, label %bb2 bb1: -;CHECK: fstdhi +;CHECK: vstrhi.64 store double %1, double* %y, align 4 br label %bb2 diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll index 6281775d061..5476d5f7961 100644 --- a/test/CodeGen/ARM/2009-09-24-spill-align.ll +++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll @@ -6,7 +6,7 @@ entry: %arg0_poly16x4_t = alloca <4 x i16> ; <<4 x i16>*> [#uses=1] %out_poly16_t = alloca i16 ; [#uses=1] %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] -; CHECK: fldd +; CHECK: vldr.64 %0 = load <4 x i16>* %arg0_poly16x4_t, align 8 ; <<4 x i16>> [#uses=1] %1 = extractelement <4 x i16> %0, i32 1 ; [#uses=1] store i16 %1, i16* %out_poly16_t, align 2 diff --git a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll index c260b973b5a..62f3786e206 100644 --- a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll +++ b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=cortex-a8 < %s | grep vmov | count 1 +; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "armv7-eabi" @@ -11,12 +11,15 @@ entry: %0 = getelementptr inbounds %foo* %quat_addr, i32 0, i32 0 ; <<4 x float>*> [#uses=1] store <4 x float> %quat.0, <4 x float>* %0 %1 = call arm_aapcs_vfpcc <4 x float> @quux(%foo* %quat_addr) nounwind ; <<4 x float>> [#uses=3] +;CHECK: vmov.f32 +;CHECK: vmov.f32 %2 = fmul <4 x float> %1, %1 ; <<4 x float>> [#uses=2] %3 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=1] %4 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=1] %5 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %3, <2 x float> %4) nounwind ; <<2 x float>> [#uses=2] %6 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %5, <2 x float> %5) nounwind ; <<2 x float>> [#uses=2] %7 = shufflevector <2 x float> %6, <2 x float> %6, <4 x i32> ; <<4 x float>> [#uses=2] +;CHECK: vmov %8 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %7) nounwind ; <<4 x float>> [#uses=3] %9 = fmul <4 x float> %8, %8 ; <<4 x float>> [#uses=1] %10 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %9, <4 x float> %7) nounwind ; <<4 x float>> [#uses=1] diff --git a/test/CodeGen/ARM/arguments_f64_backfill.ll b/test/CodeGen/ARM/arguments_f64_backfill.ll index d8019a07fab..062133e8645 100644 --- a/test/CodeGen/ARM/arguments_f64_backfill.ll +++ b/test/CodeGen/ARM/arguments_f64_backfill.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | FileCheck %s define float @f(float %z, double %a, float %b) { -; CHECK: fcpys s0, s1 +; CHECK: vmov.f32 s0, s1 %tmp = call float @g(float %b) ret float %tmp } diff --git a/test/CodeGen/ARM/compare-call.ll b/test/CodeGen/ARM/compare-call.ll index 5f3ed1d2743..fac2bc5e432 100644 --- a/test/CodeGen/ARM/compare-call.ll +++ b/test/CodeGen/ARM/compare-call.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \ -; RUN: grep fcmpes +; RUN: grep vcmpe.f32 define void @test3(float* %glob, i32 %X) { entry: diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll index 5690a01d750..46f136ba1fe 100644 --- a/test/CodeGen/ARM/fabss.ll +++ b/test/CodeGen/ARM/fabss.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vabs.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vabs.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vabs.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 define float @test(float %a, float %b) { entry: diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll index a01f868d18b..1426a2dc883 100644 --- a/test/CodeGen/ARM/fadds.ll +++ b/test/CodeGen/ARM/fadds.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vadd.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vadd.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vadd.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 define float @test(float %a, float %b) { entry: diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll index bf7c305c895..a6d741087a8 100644 --- a/test/CodeGen/ARM/fcopysign.ll +++ b/test/CodeGen/ARM/fcopysign.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=arm | grep bic | count 2 ; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \ -; RUN: grep fneg | count 2 +; RUN: grep vneg | count 2 define float @test1(float %x, double %y) { %tmp = fpext float %x to double diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll index 2af250d121d..45803f6d3c3 100644 --- a/test/CodeGen/ARM/fdivs.ll +++ b/test/CodeGen/ARM/fdivs.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 define float @test(float %a, float %b) { entry: diff --git a/test/CodeGen/ARM/fixunsdfdi.ll b/test/CodeGen/ARM/fixunsdfdi.ll index ebf1d84536e..6db2385a63e 100644 --- a/test/CodeGen/ARM/fixunsdfdi.ll +++ b/test/CodeGen/ARM/fixunsdfdi.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 -; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fstd +; RUN: llc < %s -march=arm -mattr=vfp2 | not grep vstr.64 define hidden i64 @__fixunsdfdi(double %x) nounwind readnone { entry: diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll index 5c31ea641de..57efa826404 100644 --- a/test/CodeGen/ARM/fmacs.ll +++ b/test/CodeGen/ARM/fmacs.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vmla.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vmla.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vmla.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 define float @test(float %acc, float %a, float %b) { entry: diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll index c6e6d406040..31b5c52d38d 100644 --- a/test/CodeGen/ARM/fmscs.ll +++ b/test/CodeGen/ARM/fmscs.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 define float @test(float %acc, float %a, float %b) { entry: diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll index cb5dadeb210..735263c9a31 100644 --- a/test/CodeGen/ARM/fmuls.ll +++ b/test/CodeGen/ARM/fmuls.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vmul.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vmul.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vmul.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 define float @test(float %a, float %b) { entry: diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll index 7da443dd93f..bc3d42de75c 100644 --- a/test/CodeGen/ARM/fnegs.ll +++ b/test/CodeGen/ARM/fnegs.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2 +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vneg.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vneg.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vneg.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2 define float @test1(float* %a) { entry: diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll index 8fc13e78bc3..724947ea04d 100644 --- a/test/CodeGen/ARM/fnmacs.ll +++ b/test/CodeGen/ARM/fnmacs.ll @@ -4,14 +4,14 @@ define float @test(float %acc, float %a, float %b) { entry: -; VFP2: fnmacs -; NEON: fnmacs +; VFP2: vmls.f32 +; NEON: vmls.f32 ; NEONFP-NOT: vmls -; NEONFP-NOT: fcpys +; NEONFP-NOT: vmov.f32 ; NEONFP: vmul.f32 ; NEONFP: vsub.f32 -; NEONFP: fmrs +; NEONFP: vmov %0 = fmul float %a, %b %1 = fsub float %acc, %0 diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll index 3ae437d69db..ad2188218e4 100644 --- a/test/CodeGen/ARM/fnmscs.ll +++ b/test/CodeGen/ARM/fnmscs.ll @@ -5,7 +5,7 @@ ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s define float @test1(float %acc, float %a, float %b) nounwind { -; CHECK: fnmscs s2, s1, s0 +; CHECK: vnmla.f32 s2, s1, s0 entry: %0 = fmul float %a, %b %1 = fsub float -0.0, %0 @@ -14,7 +14,7 @@ entry: } define float @test2(float %acc, float %a, float %b) nounwind { -; CHECK: fnmscs s2, s1, s0 +; CHECK: vnmla.f32 s2, s1, s0 entry: %0 = fmul float %a, %b %1 = fmul float -1.0, %0 diff --git a/test/CodeGen/ARM/fnmul.ll b/test/CodeGen/ARM/fnmul.ll index 613b347cdbf..6d7bc05ffa9 100644 --- a/test/CodeGen/ARM/fnmul.ll +++ b/test/CodeGen/ARM/fnmul.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fnmuld -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep fmul +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep vnmul.f64 +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep vmul.f64 define double @t1(double %a, double %b) { diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll index 4e4ef722f97..8fbd45b9757 100644 --- a/test/CodeGen/ARM/fp.ll +++ b/test/CodeGen/ARM/fp.ll @@ -2,9 +2,9 @@ define float @f(i32 %a) { ;CHECK: f: -;CHECK: fmsr -;CHECK-NEXT: fsitos -;CHECK-NEXT: fmrs +;CHECK: vmov +;CHECK-NEXT: vcvt.f32.s32 +;CHECK-NEXT: vmov entry: %tmp = sitofp i32 %a to float ; [#uses=1] ret float %tmp @@ -12,9 +12,9 @@ entry: define double @g(i32 %a) { ;CHECK: g: -;CHECK: fmsr -;CHECK-NEXT: fsitod -;CHECK-NEXT: fmrrd +;CHECK: vmov +;CHECK-NEXT: vcvt.f64.s32 +;CHECK-NEXT: vmov entry: %tmp = sitofp i32 %a to double ; [#uses=1] ret double %tmp @@ -22,9 +22,9 @@ entry: define double @uint_to_double(i32 %a) { ;CHECK: uint_to_double: -;CHECK: fmsr -;CHECK-NEXT: fuitod -;CHECK-NEXT: fmrrd +;CHECK: vmov +;CHECK-NEXT: vcvt.f64.u32 +;CHECK-NEXT: vmov entry: %tmp = uitofp i32 %a to double ; [#uses=1] ret double %tmp @@ -32,9 +32,9 @@ entry: define float @uint_to_float(i32 %a) { ;CHECK: uint_to_float: -;CHECK: fmsr -;CHECK-NEXT: fuitos -;CHECK-NEXT: fmrs +;CHECK: vmov +;CHECK-NEXT: vcvt.f32.u32 +;CHECK-NEXT: vmov entry: %tmp = uitofp i32 %a to float ; [#uses=1] ret float %tmp @@ -42,8 +42,8 @@ entry: define double @h(double* %v) { ;CHECK: h: -;CHECK: fldd -;CHECK-NEXT: fmrrd +;CHECK: vldr.64 +;CHECK-NEXT: vmov entry: %tmp = load double* %v ; [#uses=1] ret double %tmp @@ -58,13 +58,13 @@ entry: define double @f2(double %a) { ;CHECK: f2: -;CHECK-NOT: fmdrr +;CHECK-NOT: vmov ret double %a } define void @f3() { ;CHECK: f3: -;CHECK-NOT: fmdrr +;CHECK-NOT: vmov ;CHECK: f4 entry: %tmp = call double @f5( ) ; [#uses=1] diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll index 9ce2ac549b5..2adac78cf80 100644 --- a/test/CodeGen/ARM/fp_convert.ll +++ b/test/CodeGen/ARM/fp_convert.ll @@ -6,7 +6,7 @@ define i32 @test1(float %a, float %b) { ; VFP2: test1: -; VFP2: ftosizs s0, s0 +; VFP2: vcvt.s32.f32 s0, s0 ; NEON: test1: ; NEON: vcvt.s32.f32 d0, d0 entry: @@ -17,7 +17,7 @@ entry: define i32 @test2(float %a, float %b) { ; VFP2: test2: -; VFP2: ftouizs s0, s0 +; VFP2: vcvt.u32.f32 s0, s0 ; NEON: test2: ; NEON: vcvt.u32.f32 d0, d0 entry: @@ -28,7 +28,7 @@ entry: define float @test3(i32 %a, i32 %b) { ; VFP2: test3: -; VFP2: fuitos s0, s0 +; VFP2: vcvt.f32.u32 s0, s0 ; NEON: test3: ; NEON: vcvt.f32.u32 d0, d0 entry: @@ -39,7 +39,7 @@ entry: define float @test4(i32 %a, i32 %b) { ; VFP2: test4: -; VFP2: fsitos s0, s0 +; VFP2: vcvt.f32.s32 s0, s0 ; NEON: test4: ; NEON: vcvt.f32.s32 d0, d0 entry: diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll index ebeeb184121..ce6d6b29e9d 100644 --- a/test/CodeGen/ARM/fparith.ll +++ b/test/CodeGen/ARM/fparith.ll @@ -2,7 +2,7 @@ define float @f1(float %a, float %b) { ;CHECK: f1: -;CHECK: fadds +;CHECK: vadd.f32 entry: %tmp = fadd float %a, %b ; [#uses=1] ret float %tmp @@ -10,7 +10,7 @@ entry: define double @f2(double %a, double %b) { ;CHECK: f2: -;CHECK: faddd +;CHECK: vadd.f64 entry: %tmp = fadd double %a, %b ; [#uses=1] ret double %tmp @@ -18,7 +18,7 @@ entry: define float @f3(float %a, float %b) { ;CHECK: f3: -;CHECK: fmuls +;CHECK: vmul.f32 entry: %tmp = fmul float %a, %b ; [#uses=1] ret float %tmp @@ -26,7 +26,7 @@ entry: define double @f4(double %a, double %b) { ;CHECK: f4: -;CHECK: fmuld +;CHECK: vmul.f64 entry: %tmp = fmul double %a, %b ; [#uses=1] ret double %tmp @@ -34,7 +34,7 @@ entry: define float @f5(float %a, float %b) { ;CHECK: f5: -;CHECK: fsubs +;CHECK: vsub.f32 entry: %tmp = fsub float %a, %b ; [#uses=1] ret float %tmp @@ -42,7 +42,7 @@ entry: define double @f6(double %a, double %b) { ;CHECK: f6: -;CHECK: fsubd +;CHECK: vsub.f64 entry: %tmp = fsub double %a, %b ; [#uses=1] ret double %tmp @@ -58,7 +58,7 @@ entry: define double @f8(double %a) { ;CHECK: f8: -;CHECK: fnegd +;CHECK: vneg.f64 entry: %tmp1 = fsub double -0.000000e+00, %a ; [#uses=1] ret double %tmp1 @@ -66,7 +66,7 @@ entry: define float @f9(float %a, float %b) { ;CHECK: f9: -;CHECK: fdivs +;CHECK: vdiv.f32 entry: %tmp1 = fdiv float %a, %b ; [#uses=1] ret float %tmp1 @@ -74,7 +74,7 @@ entry: define double @f10(double %a, double %b) { ;CHECK: f10: -;CHECK: fdivd +;CHECK: vdiv.f64 entry: %tmp1 = fdiv double %a, %b ; [#uses=1] ret double %tmp1 @@ -92,7 +92,7 @@ declare float @fabsf(float) define double @f12(double %a) { ;CHECK: f12: -;CHECK: fabsd +;CHECK: vabs.f64 entry: %tmp1 = call double @fabs( double %a ) ; [#uses=1] ret double %tmp1 diff --git a/test/CodeGen/ARM/fpcmp.ll b/test/CodeGen/ARM/fpcmp.ll index 2c9591ca542..260ec49cd86 100644 --- a/test/CodeGen/ARM/fpcmp.ll +++ b/test/CodeGen/ARM/fpcmp.ll @@ -2,7 +2,7 @@ define i32 @f1(float %a) { ;CHECK: f1: -;CHECK: fcmpes +;CHECK: vcmpe.f32 ;CHECK: movmi entry: %tmp = fcmp olt float %a, 1.000000e+00 ; [#uses=1] @@ -12,7 +12,7 @@ entry: define i32 @f2(float %a) { ;CHECK: f2: -;CHECK: fcmpes +;CHECK: vcmpe.f32 ;CHECK: moveq entry: %tmp = fcmp oeq float %a, 1.000000e+00 ; [#uses=1] @@ -22,7 +22,7 @@ entry: define i32 @f3(float %a) { ;CHECK: f3: -;CHECK: fcmpes +;CHECK: vcmpe.f32 ;CHECK: movgt entry: %tmp = fcmp ogt float %a, 1.000000e+00 ; [#uses=1] @@ -32,7 +32,7 @@ entry: define i32 @f4(float %a) { ;CHECK: f4: -;CHECK: fcmpes +;CHECK: vcmpe.f32 ;CHECK: movge entry: %tmp = fcmp oge float %a, 1.000000e+00 ; [#uses=1] @@ -42,7 +42,7 @@ entry: define i32 @f5(float %a) { ;CHECK: f5: -;CHECK: fcmpes +;CHECK: vcmpe.f32 ;CHECK: movls entry: %tmp = fcmp ole float %a, 1.000000e+00 ; [#uses=1] @@ -52,7 +52,7 @@ entry: define i32 @f6(float %a) { ;CHECK: f6: -;CHECK: fcmpes +;CHECK: vcmpe.f32 ;CHECK: movne entry: %tmp = fcmp une float %a, 1.000000e+00 ; [#uses=1] @@ -62,7 +62,7 @@ entry: define i32 @g1(double %a) { ;CHECK: g1: -;CHECK: fcmped +;CHECK: vcmpe.f64 ;CHECK: movmi entry: %tmp = fcmp olt double %a, 1.000000e+00 ; [#uses=1] diff --git a/test/CodeGen/ARM/fpconv.ll b/test/CodeGen/ARM/fpconv.ll index ee3c338e3b3..bf197a46cb7 100644 --- a/test/CodeGen/ARM/fpconv.ll +++ b/test/CodeGen/ARM/fpconv.ll @@ -3,7 +3,7 @@ define float @f1(double %x) { ;CHECK-VFP: f1: -;CHECK-VFP: fcvtsd +;CHECK-VFP: vcvt.f32.f64 ;CHECK: f1: ;CHECK: truncdfsf2 entry: @@ -13,7 +13,7 @@ entry: define double @f2(float %x) { ;CHECK-VFP: f2: -;CHECK-VFP: fcvtds +;CHECK-VFP: vcvt.f64.f32 ;CHECK: f2: ;CHECK: extendsfdf2 entry: @@ -23,7 +23,7 @@ entry: define i32 @f3(float %x) { ;CHECK-VFP: f3: -;CHECK-VFP: ftosizs +;CHECK-VFP: vcvt.s32.f32 ;CHECK: f3: ;CHECK: fixsfsi entry: @@ -33,7 +33,7 @@ entry: define i32 @f4(float %x) { ;CHECK-VFP: f4: -;CHECK-VFP: ftouizs +;CHECK-VFP: vcvt.u32.f32 ;CHECK: f4: ;CHECK: fixunssfsi entry: @@ -43,7 +43,7 @@ entry: define i32 @f5(double %x) { ;CHECK-VFP: f5: -;CHECK-VFP: ftosizd +;CHECK-VFP: vcvt.s32.f64 ;CHECK: f5: ;CHECK: fixdfsi entry: @@ -53,7 +53,7 @@ entry: define i32 @f6(double %x) { ;CHECK-VFP: f6: -;CHECK-VFP: ftouizd +;CHECK-VFP: vcvt.u32.f64 ;CHECK: f6: ;CHECK: fixunsdfsi entry: @@ -63,7 +63,7 @@ entry: define float @f7(i32 %a) { ;CHECK-VFP: f7: -;CHECK-VFP: fsitos +;CHECK-VFP: vcvt.f32.s32 ;CHECK: f7: ;CHECK: floatsisf entry: @@ -73,7 +73,7 @@ entry: define double @f8(i32 %a) { ;CHECK-VFP: f8: -;CHECK-VFP: fsitod +;CHECK-VFP: vcvt.f64.s32 ;CHECK: f8: ;CHECK: floatsidf entry: @@ -83,7 +83,7 @@ entry: define float @f9(i32 %a) { ;CHECK-VFP: f9: -;CHECK-VFP: fuitos +;CHECK-VFP: vcvt.f32.u32 ;CHECK: f9: ;CHECK: floatunsisf entry: @@ -93,7 +93,7 @@ entry: define double @f10(i32 %a) { ;CHECK-VFP: f10: -;CHECK-VFP: fuitod +;CHECK-VFP: vcvt.f64.u32 ;CHECK: f10: ;CHECK: floatunsidf entry: diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll index 0822fbff653..c3cff18c959 100644 --- a/test/CodeGen/ARM/fpmem.ll +++ b/test/CodeGen/ARM/fpmem.ll @@ -8,7 +8,7 @@ define float @f1(float %a) { define float @f2(float* %v, float %u) { ; CHECK: f2: -; CHECK: flds{{.*}}[ +; CHECK: vldr.32{{.*}}[ %tmp = load float* %v ; [#uses=1] %tmp1 = fadd float %tmp, %u ; [#uses=1] ret float %tmp1 @@ -16,7 +16,7 @@ define float @f2(float* %v, float %u) { define void @f3(float %a, float %b, float* %v) { ; CHECK: f3: -; CHECK: fsts{{.*}}[ +; CHECK: vstr.32{{.*}}[ %tmp = fadd float %a, %b ; [#uses=1] store float %tmp, float* %v ret void diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll index 0d270b0c056..4cacc5de7ee 100644 --- a/test/CodeGen/ARM/fptoint.ll +++ b/test/CodeGen/ARM/fptoint.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fmrs | count 1 +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep -E {vmov\\W*r\[0-9\]+,\\W*s\[0-9\]+} | count 1 ; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | not grep fmrrd @i = weak global i32 0 ; [#uses=2] diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll index 060dd464f1b..f84ccdd480b 100644 --- a/test/CodeGen/ARM/fsubs.ll +++ b/test/CodeGen/ARM/fsubs.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vsub.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 ; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vsub.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vsub.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 define float @test(float %a, float %b) { entry: diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll index e9145ac36dd..623f2cb1dfa 100644 --- a/test/CodeGen/ARM/ifcvt5.ll +++ b/test/CodeGen/ARM/ifcvt5.ll @@ -11,7 +11,7 @@ entry: define void @t1(i32 %a, i32 %b) { ; CHECK: t1: -; CHECK: ldmltfd sp!, {r7, pc} +; CHECK: ldmfdlt sp!, {r7, pc} entry: %tmp1 = icmp sgt i32 %a, 10 ; [#uses=1] br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll index 58241157580..d7fcf7d6440 100644 --- a/test/CodeGen/ARM/ifcvt6.ll +++ b/test/CodeGen/ARM/ifcvt6.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \ ; RUN: grep cmpne | count 1 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \ -; RUN: grep ldmhi | count 1 +; RUN: grep ldmfdhi | count 1 define void @foo(i32 %X, i32 %Y) { entry: diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll index f9cf88f7292..c60ad93699f 100644 --- a/test/CodeGen/ARM/ifcvt7.ll +++ b/test/CodeGen/ARM/ifcvt7.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \ ; RUN: grep moveq | count 1 ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \ -; RUN: grep ldmeq | count 1 +; RUN: grep ldmfdeq | count 1 ; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1. %struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* } diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll index 6cb8e7bb69f..a7da834f781 100644 --- a/test/CodeGen/ARM/ifcvt8.ll +++ b/test/CodeGen/ARM/ifcvt8.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \ -; RUN: grep ldmne | count 1 +; RUN: grep ldmfdne | count 1 %struct.SString = type { i8*, i32, i32 } diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll index 2796dec5b97..c78872a4bca 100644 --- a/test/CodeGen/ARM/neon_ld1.ll +++ b/test/CodeGen/ARM/neon_ld1.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=arm -mattr=+neon | grep fldd | count 4 -; RUN: llc < %s -march=arm -mattr=+neon | grep fstd -; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd +; RUN: llc < %s -march=arm -mattr=+neon | grep vldr.64 | count 4 +; RUN: llc < %s -march=arm -mattr=+neon | grep vstr.64 +; RUN: llc < %s -march=arm -mattr=+neon | grep vmov define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind { entry: diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll index 547bab76356..130277b31c3 100644 --- a/test/CodeGen/ARM/neon_ld2.ll +++ b/test/CodeGen/ARM/neon_ld2.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4 ; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1 -; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | grep vmov | count 2 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind { entry: diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll index 85c8b5b8477..29c55c6bd97 100644 --- a/test/CodeGen/ARM/select.ll +++ b/test/CodeGen/ARM/select.ll @@ -60,7 +60,7 @@ define double @f7(double %a, double %b) { ;CHECK: movlt ;CHECK: movlt ;CHECK-VFP: f7: -;CHECK-VFP: fcpydmi +;CHECK-VFP: vmovmi %tmp = fcmp olt double %a, 1.234e+00 %tmp1 = select i1 %tmp, double -1.000e+00, double %b ret double %tmp1 diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll index 50000e31e11..44a44afe9af 100644 --- a/test/CodeGen/ARM/vfp.ll +++ b/test/CodeGen/ARM/vfp.ll @@ -15,11 +15,11 @@ declare double @fabs(double) define void @test_abs(float* %P, double* %D) { ;CHECK: test_abs: %a = load float* %P ; [#uses=1] -;CHECK: fabss +;CHECK: vabs.f32 %b = call float @fabsf( float %a ) ; [#uses=1] store float %b, float* %P %A = load double* %D ; [#uses=1] -;CHECK: fabsd +;CHECK: vabs.f64 %B = call double @fabs( double %A ) ; [#uses=1] store double %B, double* %D ret void @@ -39,10 +39,10 @@ define void @test_add(float* %P, double* %D) { define void @test_ext_round(float* %P, double* %D) { ;CHECK: test_ext_round: %a = load float* %P ; [#uses=1] -;CHECK: fcvtds +;CHECK: vcvt.f64.f32 %b = fpext float %a to double ; [#uses=1] %A = load double* %D ; [#uses=1] -;CHECK: fcvtsd +;CHECK: vcvt.f32.f64 %B = fptrunc double %A to float ; [#uses=1] store double %b, double* %D store float %B, float* %P @@ -54,7 +54,7 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) { %a1 = load float* %P1 ; [#uses=1] %a2 = load float* %P2 ; [#uses=1] %a3 = load float* %P3 ; [#uses=1] -;CHECK: fmscs +;CHECK: vnmls.f32 %X = fmul float %a1, %a2 ; [#uses=1] %Y = fsub float %X, %a3 ; [#uses=1] store float %Y, float* %P1 @@ -64,7 +64,7 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) { define i32 @test_ftoi(float* %P1) { ;CHECK: test_ftoi: %a1 = load float* %P1 ; [#uses=1] -;CHECK: ftosizs +;CHECK: vcvt.s32.f32 %b1 = fptosi float %a1 to i32 ; [#uses=1] ret i32 %b1 } @@ -72,7 +72,7 @@ define i32 @test_ftoi(float* %P1) { define i32 @test_ftou(float* %P1) { ;CHECK: test_ftou: %a1 = load float* %P1 ; [#uses=1] -;CHECK: ftouizs +;CHECK: vcvt.u32.f32 %b1 = fptoui float %a1 to i32 ; [#uses=1] ret i32 %b1 } @@ -80,7 +80,7 @@ define i32 @test_ftou(float* %P1) { define i32 @test_dtoi(double* %P1) { ;CHECK: test_dtoi: %a1 = load double* %P1 ; [#uses=1] -;CHECK: ftosizd +;CHECK: vcvt.s32.f64 %b1 = fptosi double %a1 to i32 ; [#uses=1] ret i32 %b1 } @@ -88,14 +88,14 @@ define i32 @test_dtoi(double* %P1) { define i32 @test_dtou(double* %P1) { ;CHECK: test_dtou: %a1 = load double* %P1 ; [#uses=1] -;CHECK: ftouizd +;CHECK: vcvt.u32.f64 %b1 = fptoui double %a1 to i32 ; [#uses=1] ret i32 %b1 } define void @test_utod(double* %P1, i32 %X) { ;CHECK: test_utod: -;CHECK: fuitod +;CHECK: vcvt.f64.u32 %b1 = uitofp i32 %X to double ; [#uses=1] store double %b1, double* %P1 ret void @@ -103,7 +103,7 @@ define void @test_utod(double* %P1, i32 %X) { define void @test_utod2(double* %P1, i8 %X) { ;CHECK: test_utod2: -;CHECK: fuitod +;CHECK: vcvt.f64.u32 %b1 = uitofp i8 %X to double ; [#uses=1] store double %b1, double* %P1 ret void @@ -141,7 +141,7 @@ define void @test_cmpfp0(float* %glob, i32 %X) { ;CHECK: test_cmpfp0: entry: %tmp = load float* %glob ; [#uses=1] -;CHECK: fcmpezs +;CHECK: vcmpe.f32 %tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00 ; [#uses=1] br i1 %tmp.upgrd.3, label %cond_true, label %cond_false diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll index f0df7982ef4..5dd87d66c15 100644 --- a/test/CodeGen/ARM/vget_lane.ll +++ b/test/CodeGen/ARM/vget_lane.ll @@ -204,8 +204,8 @@ define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind { define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind { ;CHECK: test_vset_lanef32: -;CHECK: fcpys -;CHECK: fcpys +;CHECK: vmov.f32 +;CHECK: vmov.f32 entry: %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1] ret <2 x float> %0 diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll index 4320328e9c1..eefbae53e72 100644 --- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll +++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep fcpys | count 4 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 4 define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind { entry: