diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 4eecd15a6fc..5e32eba7f72 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -6663,6 +6663,9 @@ memory. Implementations are allowed to either return a application specific value or a system wide value. On backends without support, this is lowered to a constant 0. +Note that runtime support may be conditional on the privilege-level code is +running at and the host platform. + Standard C Library Intrinsics ----------------------------- diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index a1443d1b020..4238c72c139 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -681,6 +681,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); + // Only ARMv6 has BSWAP. if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); @@ -5702,7 +5704,6 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { return SDValue(); } - static void ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl& Results, SelectionDAG &DAG, unsigned NewOp) { @@ -5736,6 +5737,44 @@ ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl& Results, Results.push_back(Result.getValue(2)); } +static void ReplaceREADCYCLECOUNTER(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + DebugLoc DL = N->getDebugLoc(); + SDValue Cycles32, OutChain; + + if (Subtarget->hasPerfMon()) { + // Under Power Management extensions, the cycle-count is: + // mrc p15, #0, , c9, c13, #0 + SDValue Ops[] = { N->getOperand(0), // Chain + DAG.getConstant(Intrinsic::arm_mrc, MVT::i32), + DAG.getConstant(15, MVT::i32), + DAG.getConstant(0, MVT::i32), + DAG.getConstant(9, MVT::i32), + DAG.getConstant(13, MVT::i32), + DAG.getConstant(0, MVT::i32) + }; + + Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, + DAG.getVTList(MVT::i32, MVT::Other), &Ops[0], + array_lengthof(Ops)); + OutChain = Cycles32.getValue(1); + } else { + // Intrinsic is defined to return 0 on unsupported platforms. Technically + // there are older ARM CPUs that have implementation-specific ways of + // obtaining this information (FIXME!). + Cycles32 = DAG.getConstant(0, MVT::i32); + OutChain = DAG.getEntryNode(); + } + + + SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, + Cycles32, DAG.getConstant(0, MVT::i32)); + Results.push_back(Cycles64); + Results.push_back(OutChain); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); @@ -5813,6 +5852,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); break; + case ISD::READCYCLECOUNTER: + ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); + return; case ISD::ATOMIC_LOAD_ADD: ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); return; diff --git a/test/CodeGen/ARM/readcyclecounter.ll b/test/CodeGen/ARM/readcyclecounter.ll new file mode 100644 index 00000000000..db47ad355d0 --- /dev/null +++ b/test/CodeGen/ARM/readcyclecounter.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=armv7-none-linux-gnueabi < %s | FileCheck %s +; RUN: llc -mtriple=thumbv7-none-linux-gnueabi < %s | FileCheck %s +; RUN: llc -mtriple=armv7-none-linux-gnueabi -mattr=-perfmon < %s | FileCheck %s --check-prefix=CHECK-NO-PERFMON +; RUN: llc -mtriple=armv6-none-linux-gnueabi < %s | FileCheck %s --check-prefix=CHECK-NO-PERFMON + +; The performance monitor we're looking for is an ARMv7 extension. It should be +; possible to disable it, but realistically present on at least every v7-A +; processor (but not on v6, at least by default). + +declare i64 @llvm.readcyclecounter() + +define i64 @get_count() { + %val = call i64 @llvm.readcyclecounter() + ret i64 %val + + ; As usual, exact registers only sort of matter but the cycle-count had better + ; end up in r0 in the end. + +; CHECK: mrc p15, #0, r0, c9, c13, #0 +; CHECK: {{movs?}} r1, #0 + +; CHECK-NO-PERFMON: {{movs?}} r0, #0 +; CHECK-NO-PERFMON: {{movs?}} r1, #0 +}