Add the PPC64 ldbrx/stdbrx instructions

These are 64-bit load/store with byte-swap, and available on the P7 and the A2.
Like the similar instructions for 16- and 32-bit words, these are matched in the
target DAG-combine phase against load/store-bswap pairs.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178276 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2013-03-28 19:25:55 +00:00
parent ce88835110
commit efdd4673d6
7 changed files with 67 additions and 20 deletions

View File

@ -63,6 +63,8 @@ def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
"Enable the isel instruction">;
def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
"Enable the popcnt[dw] instructions">;
def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
"Enable the ldbrx instruction">;
def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
@ -77,7 +79,6 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
// FPRND p5x, p6, p6x, p7 frim, frin, frip, friz
// FRE p5 through p7 fre (vs. fres, available since p3)
// FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3)
// LDBRX p7 load with byte reversal
// LFIWAX p6, p6x, p7 lfiwax
// LFIWZX p7 lfiwzx
// POPCNTB p5 through p7 popcntb and related instructions
@ -129,17 +130,16 @@ def : ProcessorModel<"e500mc", PPCE500mcModel,
def : ProcessorModel<"e5500", PPCE5500Model,
[DirectiveE5500, FeatureMFOCRF, Feature64Bit,
FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
FeatureMFOCRF, FeatureFSqrt,
FeatureSTFIWX, FeatureISEL,
FeaturePOPCNTD, Feature64Bit
/*, Feature64BitRegs */]>;
def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
FeatureMFOCRF, FeatureFSqrt,
FeatureSTFIWX, FeatureISEL,
FeaturePOPCNTD, Feature64Bit
/*, Feature64BitRegs */,
FeatureQPX]>;
def : Processor<"a2", PPCA2Itineraries,
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
FeatureFSqrt, FeatureSTFIWX, FeatureISEL,
FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
/*, Feature64BitRegs */]>;
def : Processor<"a2q", PPCA2Itineraries,
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
FeatureFSqrt, FeatureSTFIWX, FeatureISEL,
FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
/*, Feature64BitRegs */, FeatureQPX]>;
def : Processor<"pwr3", G5Itineraries,
[DirectivePwr3, FeatureAltivec, FeatureMFOCRF,
FeatureSTFIWX, Feature64Bit]>;
@ -162,8 +162,8 @@ def : Processor<"pwr6x", G5Itineraries,
def : Processor<"pwr7", G5Itineraries,
[DirectivePwr7, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
FeatureISEL, FeaturePOPCNTD, Feature64Bit
/*, Feature64BitRegs */]>;
FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : Processor<"ppc64", G5Itineraries,
[Directive64, FeatureAltivec,

View File

@ -6604,7 +6604,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
N->getOperand(1).getOpcode() == ISD::BSWAP &&
N->getOperand(1).getNode()->hasOneUse() &&
(N->getOperand(1).getValueType() == MVT::i32 ||
N->getOperand(1).getValueType() == MVT::i16)) {
N->getOperand(1).getValueType() == MVT::i16 ||
(TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
N->getOperand(1).getValueType() == MVT::i64))) {
SDValue BSwapOp = N->getOperand(1).getOperand(0);
// Do an any-extend to 32-bits if this is a half-word input.
if (BSwapOp.getValueType() == MVT::i16)
@ -6625,7 +6627,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
N->getOperand(0).hasOneUse() &&
(N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
(N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
(TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
N->getValueType(0) == MVT::i64))) {
SDValue Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);
// Create the byte-swapping load.
@ -6636,7 +6640,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
};
SDValue BSLoad =
DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
DAG.getVTList(N->getValueType(0) == MVT::i64 ?
MVT::i64 : MVT::i32, MVT::Other),
Ops, 3,
LD->getMemoryVT(), LD->getMemOperand());
// If this is an i16 load, insert the truncate.

View File

@ -658,7 +658,10 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
"ldx $rD, $src", LdStLD,
[(set i64:$rD, (load xaddr:$src))]>, isPPC64;
def LDBRX : XForm_1<31, 532, (outs G8RC:$rD), (ins memrr:$src),
"ldbrx $rD, $src", LdStLoad,
[(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
let mayLoad = 1 in
def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
"ldu $rD, $addr", LdStLDU,
@ -779,6 +782,11 @@ def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
"stdx $rS, $dst", LdStSTD,
[(store i64:$rS, xaddr:$dst)]>, isPPC64,
PPC970_DGroup_Cracked;
def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst),
"stdbrx $rS, $dst", LdStStore,
[(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
PPC970_DGroup_Cracked;
// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
"std $rT, $dst", LdStSTD,

View File

@ -36,10 +36,10 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [
]>;
def SDT_PPClbrx : SDTypeProfile<1, 2, [
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
def SDT_PPCstbrx : SDTypeProfile<0, 3, [
SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
def SDT_PPClarx : SDTypeProfile<1, 1, [

View File

@ -41,6 +41,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
, HasSTFIWX(false)
, HasISEL(false)
, HasPOPCNTD(false)
, HasLDBRX(false)
, IsBookE(false)
, HasLazyResolverStubs(false)
, IsJITCodeModel(false)

View File

@ -80,6 +80,7 @@ protected:
bool HasSTFIWX;
bool HasISEL;
bool HasPOPCNTD;
bool HasLDBRX;
bool IsBookE;
bool HasLazyResolverStubs;
bool IsJITCodeModel;
@ -161,6 +162,7 @@ public:
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
bool hasLDBRX() const { return HasLDBRX; }
bool isBookE() const { return IsBookE; }
const Triple &getTargetTriple() const { return TargetTriple; }

View File

@ -1,5 +1,6 @@
; RUN: llc < %s -march=ppc32 | FileCheck %s -check-prefix=X32
; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=X64
; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s -check-prefix=PWR7
define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {
@ -34,18 +35,47 @@ define i16 @LHBRX(i8* %ptr, i32 %off) {
ret i16 %tmp6
}
define void @STDBRX(i64 %i, i8* %ptr, i64 %off) {
%tmp1 = getelementptr i8* %ptr, i64 %off ; <i8*> [#uses=1]
%tmp1.upgrd.1 = bitcast i8* %tmp1 to i64* ; <i64*> [#uses=1]
%tmp13 = tail call i64 @llvm.bswap.i64( i64 %i ) ; <i64> [#uses=1]
store i64 %tmp13, i64* %tmp1.upgrd.1
ret void
}
define i64 @LDBRX(i8* %ptr, i64 %off) {
%tmp1 = getelementptr i8* %ptr, i64 %off ; <i8*> [#uses=1]
%tmp1.upgrd.2 = bitcast i8* %tmp1 to i64* ; <i64*> [#uses=1]
%tmp = load i64* %tmp1.upgrd.2 ; <i64> [#uses=1]
%tmp14 = tail call i64 @llvm.bswap.i64( i64 %tmp ) ; <i64> [#uses=1]
ret i64 %tmp14
}
declare i32 @llvm.bswap.i32(i32)
declare i16 @llvm.bswap.i16(i16)
declare i64 @llvm.bswap.i64(i64)
; X32: stwbrx
; X32: lwbrx
; X32: sthbrx
; X32: lhbrx
; X32-NOT: ldbrx
; X32-NOT: stdbrx
; X64: stwbrx
; X64: lwbrx
; X64: sthbrx
; X64: lhbrx
; X64-NOT: ldbrx
; X64-NOT: stdbrx
; PWR7: stwbrx
; PWR7: lwbrx
; PWR7: sthbrx
; PWR7: lhbrx
; PWR7: stdbrx
; PWR7: ldbrx