diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index dfab690a9d4..27f7157a28d 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -63,6 +63,8 @@ def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true", "Enable the popcnt[dw] instructions">; +def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true", + "Enable the ldbrx instruction">; def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", "Enable Book E instructions">; def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", @@ -77,7 +79,6 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", // FPRND p5x, p6, p6x, p7 frim, frin, frip, friz // FRE p5 through p7 fre (vs. fres, available since p3) // FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3) -// LDBRX p7 load with byte reversal // LFIWAX p6, p6x, p7 lfiwax // LFIWZX p7 lfiwzx // POPCNTB p5 through p7 popcntb and related instructions @@ -129,17 +130,16 @@ def : ProcessorModel<"e500mc", PPCE500mcModel, def : ProcessorModel<"e5500", PPCE5500Model, [DirectiveE5500, FeatureMFOCRF, Feature64Bit, FeatureSTFIWX, FeatureBookE, FeatureISEL]>; -def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, - FeatureMFOCRF, FeatureFSqrt, - FeatureSTFIWX, FeatureISEL, - FeaturePOPCNTD, Feature64Bit - /*, Feature64BitRegs */]>; -def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE, - FeatureMFOCRF, FeatureFSqrt, - FeatureSTFIWX, FeatureISEL, - FeaturePOPCNTD, Feature64Bit - /*, Feature64BitRegs */, - FeatureQPX]>; +def : Processor<"a2", PPCA2Itineraries, + [DirectiveA2, FeatureBookE, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, FeatureISEL, + FeaturePOPCNTD, FeatureLDBRX, Feature64Bit + /*, Feature64BitRegs */]>; +def : Processor<"a2q", PPCA2Itineraries, + [DirectiveA2, FeatureBookE, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, FeatureISEL, + FeaturePOPCNTD, FeatureLDBRX, Feature64Bit + /*, Feature64BitRegs */, FeatureQPX]>; def : Processor<"pwr3", G5Itineraries, [DirectivePwr3, FeatureAltivec, FeatureMFOCRF, FeatureSTFIWX, Feature64Bit]>; @@ -162,8 +162,8 @@ def : Processor<"pwr6x", G5Itineraries, def : Processor<"pwr7", G5Itineraries, [DirectivePwr7, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, - FeatureISEL, FeaturePOPCNTD, Feature64Bit - /*, Feature64BitRegs */]>; + FeatureISEL, FeaturePOPCNTD, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : Processor<"ppc64", G5Itineraries, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 3275315a6a4..eba762f3b22 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6604,7 +6604,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, N->getOperand(1).getOpcode() == ISD::BSWAP && N->getOperand(1).getNode()->hasOneUse() && (N->getOperand(1).getValueType() == MVT::i32 || - N->getOperand(1).getValueType() == MVT::i16)) { + N->getOperand(1).getValueType() == MVT::i16 || + (TM.getSubtarget().hasLDBRX() && + N->getOperand(1).getValueType() == MVT::i64))) { SDValue BSwapOp = N->getOperand(1).getOperand(0); // Do an any-extend to 32-bits if this is a half-word input. if (BSwapOp.getValueType() == MVT::i16) @@ -6625,7 +6627,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Turn BSWAP (LOAD) -> lhbrx/lwbrx. if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && - (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) { + (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || + (TM.getSubtarget().hasLDBRX() && + N->getValueType(0) == MVT::i64))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast(Load); // Create the byte-swapping load. @@ -6636,7 +6640,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, }; SDValue BSLoad = DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, - DAG.getVTList(MVT::i32, MVT::Other), Ops, 3, + DAG.getVTList(N->getValueType(0) == MVT::i64 ? + MVT::i64 : MVT::i32, MVT::Other), + Ops, 3, LD->getMemoryVT(), LD->getMemOperand()); // If this is an i16 load, insert the truncate. diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 0fbb11d1338..e031dfa3987 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -658,7 +658,10 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), "ldx $rD, $src", LdStLD, [(set i64:$rD, (load xaddr:$src))]>, isPPC64; - +def LDBRX : XForm_1<31, 532, (outs G8RC:$rD), (ins memrr:$src), + "ldbrx $rD, $src", LdStLoad, + [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64; + let mayLoad = 1 in def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr), "ldu $rD, $addr", LdStLDU, @@ -779,6 +782,11 @@ def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), "stdx $rS, $dst", LdStSTD, [(store i64:$rS, xaddr:$dst)]>, isPPC64, PPC970_DGroup_Cracked; +def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst), + "stdbrx $rS, $dst", LdStStore, + [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64, + PPC970_DGroup_Cracked; + // STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register. def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst), "std $rT, $dst", LdStSTD, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 44ac0b2f1af..ea0be97e0d0 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -36,10 +36,10 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [ ]>; def SDT_PPClbrx : SDTypeProfile<1, 2, [ - SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> + SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; def SDT_PPCstbrx : SDTypeProfile<0, 3, [ - SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> + SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; def SDT_PPClarx : SDTypeProfile<1, 1, [ diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 11b7fc2b03b..be64700d8c3 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -41,6 +41,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, , HasSTFIWX(false) , HasISEL(false) , HasPOPCNTD(false) + , HasLDBRX(false) , IsBookE(false) , HasLazyResolverStubs(false) , IsJITCodeModel(false) diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 070a9a9485f..36436f6e407 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -80,6 +80,7 @@ protected: bool HasSTFIWX; bool HasISEL; bool HasPOPCNTD; + bool HasLDBRX; bool IsBookE; bool HasLazyResolverStubs; bool IsJITCodeModel; @@ -161,6 +162,7 @@ public: bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } + bool hasLDBRX() const { return HasLDBRX; } bool isBookE() const { return IsBookE; } const Triple &getTargetTriple() const { return TargetTriple; } diff --git a/test/CodeGen/PowerPC/bswap-load-store.ll b/test/CodeGen/PowerPC/bswap-load-store.ll index 4f6bfc72991..2aae4150efb 100644 --- a/test/CodeGen/PowerPC/bswap-load-store.ll +++ b/test/CodeGen/PowerPC/bswap-load-store.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=ppc32 | FileCheck %s -check-prefix=X32 ; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s -check-prefix=PWR7 define void @STWBRX(i32 %i, i8* %ptr, i32 %off) { @@ -34,18 +35,47 @@ define i16 @LHBRX(i8* %ptr, i32 %off) { ret i16 %tmp6 } +define void @STDBRX(i64 %i, i8* %ptr, i64 %off) { + %tmp1 = getelementptr i8* %ptr, i64 %off ; [#uses=1] + %tmp1.upgrd.1 = bitcast i8* %tmp1 to i64* ; [#uses=1] + %tmp13 = tail call i64 @llvm.bswap.i64( i64 %i ) ; [#uses=1] + store i64 %tmp13, i64* %tmp1.upgrd.1 + ret void +} + +define i64 @LDBRX(i8* %ptr, i64 %off) { + %tmp1 = getelementptr i8* %ptr, i64 %off ; [#uses=1] + %tmp1.upgrd.2 = bitcast i8* %tmp1 to i64* ; [#uses=1] + %tmp = load i64* %tmp1.upgrd.2 ; [#uses=1] + %tmp14 = tail call i64 @llvm.bswap.i64( i64 %tmp ) ; [#uses=1] + ret i64 %tmp14 +} + declare i32 @llvm.bswap.i32(i32) declare i16 @llvm.bswap.i16(i16) +declare i64 @llvm.bswap.i64(i64) + ; X32: stwbrx ; X32: lwbrx ; X32: sthbrx ; X32: lhbrx +; X32-NOT: ldbrx +; X32-NOT: stdbrx ; X64: stwbrx ; X64: lwbrx ; X64: sthbrx ; X64: lhbrx +; X64-NOT: ldbrx +; X64-NOT: stdbrx + +; PWR7: stwbrx +; PWR7: lwbrx +; PWR7: sthbrx +; PWR7: lhbrx +; PWR7: stdbrx +; PWR7: ldbrx