From 35f9e1aa4990a20245cb7c39eb01f9feffc97853 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Thu, 24 Apr 2014 17:18:27 +0000 Subject: [PATCH] [X86] Add support for Read Time Stamp Counter x86 builtin intrinsics. This patch: - Adds two new X86 builtin intrinsics ('int_x86_rdtsc' and 'int_x86_rdtscp') as GCCBuiltin intrinsics; - Teaches the backend how to lower the two new builtins; - Introduces a common function to lower READCYCLECOUNTER dag nodes and the two new rdtsc/rdtscp intrinsics; - Improves (and extends) the existing x86 test 'rdtsc.ll'; now test 'rdtsc.ll' correctly verifies that both READCYCLECOUNTER and the two new intrinsics work fine for both 64bit and 32bit Subtargets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207127 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 9 ++ lib/Target/X86/X86ISelLowering.cpp | 130 ++++++++++++++++++++++------- lib/Target/X86/X86ISelLowering.h | 3 + lib/Target/X86/X86InstrInfo.td | 2 + lib/Target/X86/X86InstrSystem.td | 2 +- test/CodeGen/X86/rdtsc.ll | 53 ++++++++++-- 6 files changed, 160 insertions(+), 39 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 8f64b5d8ee9..e262687e912 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -17,6 +17,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_int : Intrinsic<[], [llvm_i8_ty]>; } +//===----------------------------------------------------------------------===// +// Read Time Stamp Counter. +let TargetPrefix = "x86" in { + def int_x86_rdtsc : GCCBuiltin<"__builtin_ia32_rdtsc">, + Intrinsic<[llvm_i64_ty], [], []>; + def int_x86_rdtscp : GCCBuiltin<"__builtin_ia32_rdtscp">, + Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>; +} + //===----------------------------------------------------------------------===// // 3DNow! diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f40ad30e658..936699e9b59 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1472,6 +1472,8 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + if (!Subtarget->is64Bit()) + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't // handle type legalization for these operations here. @@ -12261,6 +12263,71 @@ static SDValue getMScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, return SDValue(Res, 1); } +// getReadTimeStampCounter - Handles the lowering of builtin intrinsics that +// read the time stamp counter (x86_rdtsc and x86_rdtscp). This function is +// also used to custom lower READCYCLECOUNTER nodes. +static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode, + SelectionDAG &DAG, const X86Subtarget *Subtarget, + SmallVectorImpl &Results) { + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue TheChain = N->getOperand(0); + SDValue rd = DAG.getNode(Opcode, DL, Tys, &TheChain, 1); + SDValue LO, HI; + + // The processor's time-stamp counter (a 64-bit MSR) is stored into the + // EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR + // and the EAX register is loaded with the low-order 32 bits. + if (Subtarget->is64Bit()) { + LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1)); + HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64, + LO.getValue(2)); + } else { + LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1)); + HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32, + LO.getValue(2)); + } + SDValue Chain = HI.getValue(1); + + if (Opcode == X86ISD::RDTSCP_DAG) { + assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); + + // Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into + // the ECX register. Add 'ecx' explicitly to the chain. + SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32, + HI.getValue(2)); + // Explicitly store the content of ECX at the location passed in input + // to the 'rdtscp' intrinsic. + Chain = DAG.getStore(ecx.getValue(1), DL, ecx, N->getOperand(2), + MachinePointerInfo(), false, false, 0); + } + + if (Subtarget->is64Bit()) { + // The EDX register is loaded with the high-order 32 bits of the MSR, and + // the EAX register is loaded with the low-order 32 bits. + SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, + DAG.getConstant(32, MVT::i8)); + Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp)); + Results.push_back(Chain); + return; + } + + // Use a buildpair to merge the two 32-bit values into a 64-bit one. + SDValue Ops[] = { LO, HI }; + SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, + array_lengthof(Ops)); + Results.push_back(Pair); + Results.push_back(Chain); +} + +static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SmallVector Results; + SDLoc DL(Op); + getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget, + Results); + return DAG.getMergeValues(&Results[0], Results.size(), DL); +} + static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); @@ -12435,6 +12502,22 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SDValue Scale = Op.getOperand(6); return getMScatterNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain); } + // Read Time Stamp Counter (RDTSC). + case Intrinsic::x86_rdtsc: + // Read Time Stamp Counter and Processor ID (RDTSCP). + case Intrinsic::x86_rdtscp: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_rdtsc: + Opc = X86ISD::RDTSC_DAG; break; + case Intrinsic::x86_rdtscp: + Opc = X86ISD::RDTSCP_DAG; break; + } + SmallVector Results; + getReadTimeStampCounter(Op.getNode(), dl, Opc, DAG, Subtarget, Results); + return DAG.getMergeValues(&Results[0], Results.size(), dl); + } // XTEST intrinsics. case Intrinsic::x86_xtest: { SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other); @@ -13805,25 +13888,6 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, return cpOut; } -static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, - SelectionDAG &DAG) { - assert(Subtarget->is64Bit() && "Result not type legalized?"); - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue TheChain = Op.getOperand(0); - SDLoc dl(Op); - SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1); - SDValue rax = DAG.getCopyFromReg(rd, dl, X86::RAX, MVT::i64, rd.getValue(1)); - SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), dl, X86::RDX, MVT::i64, - rax.getValue(2)); - SDValue Tmp = DAG.getNode(ISD::SHL, dl, MVT::i64, rdx, - DAG.getConstant(32, MVT::i8)); - SDValue Ops[] = { - DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp), - rdx.getValue(1) - }; - return DAG.getMergeValues(Ops, array_lengthof(Ops), dl); -} - static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT SrcVT = Op.getOperand(0).getSimpleValueType(); @@ -14158,20 +14222,22 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(V); return; } + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); + switch (IntNo) { + default : llvm_unreachable("Do not know how to custom type " + "legalize this intrinsic operation!"); + case Intrinsic::x86_rdtsc: + return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget, + Results); + case Intrinsic::x86_rdtscp: + return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget, + Results); + } + } case ISD::READCYCLECOUNTER: { - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue TheChain = N->getOperand(0); - SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1); - SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32, - rd.getValue(1)); - SDValue edx = DAG.getCopyFromReg(eax.getValue(1), dl, X86::EDX, MVT::i32, - eax.getValue(2)); - // Use a buildpair to merge the two 32-bit values into a 64-bit one. - SDValue Ops[] = { eax, edx }; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, - array_lengthof(Ops))); - Results.push_back(edx.getValue(1)); - return; + return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget, + Results); } case ISD::ATOMIC_CMP_SWAP: { EVT T = N->getValueType(0); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 0beefea9cb5..6eb0069d63b 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -83,6 +83,9 @@ namespace llvm { /// readcyclecounter RDTSC_DAG, + /// X86 Read Time-Stamp Counter and Processor ID. + RDTSCP_DAG, + /// X86 compare and logical compare instructions. CMP, COMI, UCOMI, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index d9fbb1127ad..fc193b403ed 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -206,6 +206,8 @@ def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr, def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void, [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; +def X86rdtscp : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void, + [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>; def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>; diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 9d3aa1c9102..b5595cbd3bb 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -19,7 +19,7 @@ let Defs = [RAX, RDX] in TB; let Defs = [RAX, RCX, RDX] in - def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; + def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", [(X86rdtscp)]>, TB; // CPU flow control instructions diff --git a/test/CodeGen/X86/rdtsc.ll b/test/CodeGen/X86/rdtsc.ll index f21a44c3607..f1697429318 100644 --- a/test/CodeGen/X86/rdtsc.ll +++ b/test/CodeGen/X86/rdtsc.ll @@ -1,8 +1,49 @@ -; RUN: llc < %s -march=x86 | grep rdtsc -; RUN: llc < %s -march=x86-64 | grep rdtsc -declare i64 @llvm.readcyclecounter() +; RUN: llc < %s -march=x86-64 -mcpu=generic | FileCheck %s +; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s --check-prefix=CHECK --check-prefix=X86 -define i64 @foo() { - %tmp.1 = call i64 @llvm.readcyclecounter( ) ; [#uses=1] - ret i64 %tmp.1 +; Verify that we correctly lower ISD::READCYCLECOUNTER. + + +define i64 @test_builtin_readcyclecounter() { + %1 = tail call i64 @llvm.readcyclecounter() + ret i64 %1 } +; CHECK-LABEL: test_builtin_readcyclecounter +; CHECK: rdtsc +; X86-NOT: shlq +; X86-NOT: or +; CHECK-NOT: mov +; CHECK: ret + + +; Verify that we correctly lower the Read Cycle Counter GCC x86 builtins +; (i.e. RDTSC and RDTSCP). + +define i64 @test_builtin_rdtsc() { + %1 = tail call i64 @llvm.x86.rdtsc() + ret i64 %1 +} +; CHECK-LABEL: test_builtin_rdtsc +; CHECK: rdtsc +; X86-NOT: shlq +; X86-NOT: or +; CHECK-NOT: mov +; CHECK: ret + + +define i64 @test_builtin_rdtscp(i8* %A) { + %1 = tail call i64 @llvm.x86.rdtscp(i8* %A) + ret i64 %1 +} +; CHECK-LABEL: test_builtin_rdtscp +; CHECK: rdtscp +; X86-NOT: shlq +; CHECK: movl %ecx, (%{{[a-z]+}}) +; X86-NOT: shlq +; CHECK: ret + + +declare i64 @llvm.readcyclecounter() +declare i64 @llvm.x86.rdtscp(i8*) +declare i64 @llvm.x86.rdtsc() +