diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index de2f50495ca..7315e52ef0a 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -183,18 +183,18 @@ def SDTVecInsert : SDTypeProfile<1, 3, [ // vector insert SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3> ]>; -def STDPrefetch : SDTypeProfile<0, 3, [ // prefetch +def SDTPrefetch : SDTypeProfile<0, 3, [ // prefetch SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisInt<1> ]>; -def STDMemBarrier : SDTypeProfile<0, 5, [ // memory barier +def SDTMemBarrier : SDTypeProfile<0, 5, [ // memory barier SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisSameAs<0,4>, SDTCisInt<0> ]>; -def STDAtomic3 : SDTypeProfile<1, 3, [ +def SDTAtomic3 : SDTypeProfile<1, 3, [ SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisInt<0>, SDTCisPtrTy<1> ]>; -def STDAtomic2 : SDTypeProfile<1, 2, [ +def SDTAtomic2 : SDTypeProfile<1, 2, [ SDTCisSameAs<0,2>, SDTCisInt<0>, SDTCisPtrTy<1> ]>; @@ -374,35 +374,35 @@ def br : SDNode<"ISD::BR" , SDTBr, [SDNPHasChain]>; def trap : SDNode<"ISD::TRAP" , SDTNone, [SDNPHasChain, SDNPSideEffect]>; -def prefetch : SDNode<"ISD::PREFETCH" , STDPrefetch, +def prefetch : SDNode<"ISD::PREFETCH" , SDTPrefetch, [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; -def membarrier : SDNode<"ISD::MEMBARRIER" , STDMemBarrier, +def membarrier : SDNode<"ISD::MEMBARRIER" , SDTMemBarrier, [SDNPHasChain, SDNPSideEffect]>; -def atomic_cmp_swap : SDNode<"ISD::ATOMIC_CMP_SWAP" , STDAtomic3, +def atomic_cmp_swap : SDNode<"ISD::ATOMIC_CMP_SWAP" , SDTAtomic3, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_add : SDNode<"ISD::ATOMIC_LOAD_ADD" , STDAtomic2, +def atomic_load_add : SDNode<"ISD::ATOMIC_LOAD_ADD" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_swap : SDNode<"ISD::ATOMIC_SWAP", STDAtomic2, +def atomic_swap : SDNode<"ISD::ATOMIC_SWAP", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_sub : SDNode<"ISD::ATOMIC_LOAD_SUB" , STDAtomic2, +def atomic_load_sub : SDNode<"ISD::ATOMIC_LOAD_SUB" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , STDAtomic2, +def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_or : SDNode<"ISD::ATOMIC_LOAD_OR" , STDAtomic2, +def atomic_load_or : SDNode<"ISD::ATOMIC_LOAD_OR" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , STDAtomic2, +def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_nand: SDNode<"ISD::ATOMIC_LOAD_NAND", STDAtomic2, +def atomic_load_nand: SDNode<"ISD::ATOMIC_LOAD_NAND", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_min : SDNode<"ISD::ATOMIC_LOAD_MIN", STDAtomic2, +def atomic_load_min : SDNode<"ISD::ATOMIC_LOAD_MIN", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_max : SDNode<"ISD::ATOMIC_LOAD_MAX", STDAtomic2, +def atomic_load_max : SDNode<"ISD::ATOMIC_LOAD_MAX", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", STDAtomic2, +def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", STDAtomic2, +def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; // Do not use ld, st directly. Use load, extload, sextload, zextload, store, diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 0ebdd75244e..eef152f2741 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -64,6 +64,9 @@ def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", "Prefer 32-bit Thumb instrs">; +// Multiprocessing extension. +def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", + "Supports Multiprocessing extension">; // ARM architectures. def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T", diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b816e66f7d3..6bd8503bb47 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -598,10 +598,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); - // ARM v5TE+ and Thumb2 has preload instructions. - if (Subtarget->isThumb2() || - (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())) - setOperationAction(ISD::PREFETCH, MVT::Other, Legal); + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. if (!Subtarget->hasV6Ops()) { @@ -777,6 +774,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; + case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; + case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCGE: return "ARMISD::VCGE"; case ARMISD::VCGEU: return "ARMISD::VCGEU"; @@ -2060,6 +2059,31 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, DAG.getConstant(DMBOpt, MVT::i32)); } +static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + // ARM pre v5TE and Thumb1 does not have preload instructions. + if (!(Subtarget->isThumb2() || + (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) + // Just preserve the chain. + return Op.getOperand(0); + + DebugLoc dl = Op.getDebugLoc(); + unsigned Flavor = cast(Op.getOperand(3))->getZExtValue(); + if (Flavor != 3) { + if (!Subtarget->hasV7Ops()) + return Op.getOperand(0); + else if (Flavor == 2 && !Subtarget->hasMPExtension()) + return Op.getOperand(0); + } + + if (Subtarget->isThumb()) + // Invert the bits. + Flavor = ~Flavor & 0x3; + + return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), + Op.getOperand(1), DAG.getConstant(Flavor, MVT::i32)); +} + static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *FuncInfo = MF.getInfo(); @@ -3842,6 +3866,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); + case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index bd2fa8eb47e..8504b83d243 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -83,6 +83,8 @@ namespace llvm { MEMBARRIER, // Memory barrier (DMB) MEMBARRIER_MCR, // Memory barrier (MCR) + + PRELOAD, // Preload VCEQ, // Vector compare equal. VCGE, // Vector compare greater than or equal. diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 71557c9864e..c98d45bf43d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -62,6 +62,8 @@ def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMPRELOAD : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; + def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, @@ -130,6 +132,8 @@ def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, [SDNPHasChain]>; def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, [SDNPHasChain]>; +def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPRELOAD, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; @@ -159,6 +163,8 @@ def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">, AssemblerPredicate; def HasDB : Predicate<"Subtarget->hasDataBarrier()">, AssemblerPredicate; +def HasMP : Predicate<"Subtarget->hasMPExtension()">, + AssemblerPredicate; def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; def IsThumb : Predicate<"Subtarget->isThumb()">, AssemblerPredicate; @@ -988,14 +994,11 @@ def CPS : AXI<(outs), (ins cps_opt:$opt), MiscFrm, NoItinerary, "cps$opt", // Preload signals the memory system of possible future data/instruction access. // These are for disassembly only. -// -// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0. -// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc. multiclass APreLoad data_read, string opc> { - def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, NoItinerary, + def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload, !strconcat(opc, "\t$addr"), - [(prefetch addrmode_imm12:$addr, imm, (i32 data_read))]> { + [(ARMPreload addrmode_imm12:$addr, (i32 data_read))]> { bits<4> Rt; bits<17> addr; let Inst{31-26} = 0b111101; @@ -1009,9 +1012,9 @@ multiclass APreLoad data_read, string opc> { let Inst{11-0} = addr{11-0}; // imm12 } - def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, NoItinerary, + def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload, !strconcat(opc, "\t$shift"), - [(prefetch ldst_so_reg:$shift, imm, (i32 data_read))]> { + [(ARMPreload ldst_so_reg:$shift, (i32 data_read))]> { bits<4> Rt; bits<17> shift; let Inst{31-26} = 0b111101; @@ -1025,9 +1028,9 @@ multiclass APreLoad data_read, string opc> { } } -defm PLD : APreLoad<3, "pld">; -defm PLDW : APreLoad<2, "pldw">; -defm PLI : APreLoad<1, "pli">; +defm PLD : APreLoad<3, "pld">, Requires<[IsARM]>; +defm PLDW : APreLoad<2, "pldw">, Requires<[IsARM,HasV7,HasMP]>; +defm PLI : APreLoad<1, "pli">, Requires<[IsARM,HasV7]>; def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary, "setend\t$end", diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 44724111bb3..b105bc7eed6 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1171,67 +1171,66 @@ def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs), // T2Ipl (Preload Data/Instruction) signals the memory system of possible future // data/instruction access. These are for disassembly only. -// -// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0. -// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc. -multiclass T2Ipl data_read, string opc> { +// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0), +// (prefetch 1) -> (preload 2), (prefetch 2) -> (preload 1). +multiclass T2Ipl instr_write, string opc> { - def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_iLoad_i, opc, + def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc, "\t$addr", - [(prefetch t2addrmode_imm12:$addr, imm, (i32 data_read))]> { + [(ARMPreload t2addrmode_imm12:$addr, (i32 instr_write))]> { let Inst{31-25} = 0b1111100; - let Inst{24} = instr; + let Inst{24} = instr_write{1}; let Inst{23} = 1; // U = 1 let Inst{22} = 0; - let Inst{21} = write; + let Inst{21} = instr_write{0}; let Inst{20} = 1; let Inst{15-12} = 0b1111; } - def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_iLoad_i, opc, + def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc, "\t$addr", - [(prefetch t2addrmode_imm8:$addr, imm, (i32 data_read))]> { + [(ARMPreload t2addrmode_imm8:$addr, (i32 instr_write))]> { let Inst{31-25} = 0b1111100; - let Inst{24} = instr; + let Inst{24} = instr_write{1}; let Inst{23} = 0; // U = 0 let Inst{22} = 0; - let Inst{21} = write; + let Inst{21} = instr_write{0}; let Inst{20} = 1; let Inst{15-12} = 0b1111; let Inst{11-8} = 0b1100; } - def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_iLoad_i, opc, + def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc, "\t$addr", - [(prefetch t2addrmode_so_reg:$addr, imm, (i32 data_read))]> { + [(ARMPreload t2addrmode_so_reg:$addr, (i32 instr_write))]> { let Inst{31-25} = 0b1111100; - let Inst{24} = instr; + let Inst{24} = instr_write{1}; let Inst{23} = 0; // add = TRUE for T1 let Inst{22} = 0; - let Inst{21} = write; + let Inst{21} = instr_write{0}; let Inst{20} = 1; let Inst{15-12} = 0b1111; let Inst{11-6} = 0000000; } let isCodeGenOnly = 1 in - def pci : T2Ipc<(outs), (ins i32imm:$addr), IIC_iLoad_i, opc, + def pci : T2Ipc<(outs), (ins i32imm:$addr), IIC_Preload, opc, "\t$addr", []> { let Inst{31-25} = 0b1111100; - let Inst{24} = instr; + let Inst{24} = instr_write{1}; let Inst{23} = ?; // add = (U == 1) let Inst{22} = 0; - let Inst{21} = write; + let Inst{21} = instr_write{0}; let Inst{20} = 1; let Inst{19-16} = 0b1111; // Rn = 0b1111 let Inst{15-12} = 0b1111; } } -defm t2PLD : T2Ipl<0, 0, 3, "pld">; -defm t2PLDW : T2Ipl<0, 1, 2, "pldw">; -defm t2PLI : T2Ipl<1, 0, 1, "pli">; +defm t2PLD : T2Ipl<0, "pld">, Requires<[IsThumb2]>; +defm t2PLDW : T2Ipl<1, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>; +defm t2PLI : T2Ipl<2, "pli">, Requires<[IsThumb2,HasV7]>; //===----------------------------------------------------------------------===// // Load / store multiple Instructions. diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index c35cadb12ca..173961875e4 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -90,6 +90,7 @@ def IIC_iStore_d_r : InstrItinClass; def IIC_iStore_d_ru : InstrItinClass; def IIC_iStore_m : InstrItinClass<0>; // micro-coded def IIC_iStore_mu : InstrItinClass<0>; // micro-coded +def IIC_Preload : InstrItinClass; def IIC_Br : InstrItinClass; def IIC_fpSTAT : InstrItinClass; def IIC_fpUNA32 : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 94b22c9113c..fc62faa09a3 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -225,6 +225,10 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData, InstrStage<2, [A8_LSPipe]>], [2]>, + // + // Preload + InstrItinData], [2, 2]>, + // Branch // // no delay slots, so the latency of a branch is unimportant diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index d5ab5dcc87b..bc325b1ec44 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -402,6 +402,10 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<1, [A9_AGU], 0>, InstrStage<2, [A9_LSUnit]>], [2]>, + // + // Preload + InstrItinData], [1, 1]>, + // Branch // // no delay slots, so the latency of a branch is unimportant diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 787bc305dfb..08331724f1e 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -51,6 +51,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , HasT2ExtractPack(false) , HasDataBarrier(false) , Pref32BitThumb(false) + , HasMPExtension(false) , FPOnlySP(false) , AllowsUnalignedMem(false) , stackAlignment(4) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index ca9921eba13..551b2f99ffd 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -106,6 +106,10 @@ protected: /// over 16-bit ones. bool Pref32BitThumb; + /// HasMPExtension - True if the subtarget supports Multiprocessing + /// extension (ARMv7 only). + bool HasMPExtension; + /// FPOnlySP - If true, the floating point unit only supports single /// precision. bool FPOnlySP; @@ -176,6 +180,7 @@ protected: bool isFPBrccSlow() const { return SlowFPBrcc; } bool isFPOnlySP() const { return FPOnlySP; } bool prefers32BitThumb() const { return Pref32BitThumb; } + bool hasMPExtension() const { return HasMPExtension; } bool hasFP16() const { return HasFP16; } bool hasD16() const { return HasD16; } diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll index dbc1002e508..1e4ff30af78 100644 --- a/test/CodeGen/ARM/prefetch.ll +++ b/test/CodeGen/ARM/prefetch.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s -check-prefix=THUMB2 -; RUN: llc < %s -march=arm -mattr=+v5te | FileCheck %s -check-prefix=ARM +; RUN: llc < %s -march=thumb -mattr=+v7a,+mp | FileCheck %s -check-prefix=THUMB2 +; RUN: llc < %s -march=arm -mattr=+v7a,+mp | FileCheck %s -check-prefix=ARM ; rdar://8601536 define void @t1(i8* %ptr) nounwind {