Add the PPC64 ldbrx/stdbrx instructions

These are 64-bit load/store with byte-swap, and available on the P7 and the A2. Like the similar instructions for 16- and 32-bit words, these are matched in the target DAG-combine phase against load/store-bswap pairs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178276 91177308-0d34-0410-b5e6-96231b3b80d8
2026-04-21 23:17:16 +00:00 · 2013-03-28 19:25:55 +00:00
parent ce88835110
commit efdd4673d6
7 changed files with 67 additions and 20 deletions
@@ -63,6 +63,8 @@ def FeatureISEL      : SubtargetFeature<"isel","HasISEL", "true",
                                        "Enable the isel instruction">;
 def FeaturePOPCNTD   : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
                                        "Enable the popcnt[dw] instructions">;
+def FeatureLDBRX     : SubtargetFeature<"ldbrx","HasLDBRX", "true",
+                                        "Enable the ldbrx instruction">;
 def FeatureBookE     : SubtargetFeature<"booke", "IsBookE", "true",
                                        "Enable Book E instructions">;
 def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
@@ -77,7 +79,6 @@ def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
 // FPRND        p5x, p6, p6x, p7   frim, frin, frip, friz
 // FRE          p5 through p7      fre (vs. fres, available since p3)
 // FRSQRTES     p5 through p7      frsqrtes (vs. frsqrte, available since p3)
-// LDBRX        p7                 load with byte reversal
 // LFIWAX       p6, p6x, p7        lfiwax
 // LFIWZX       p7                 lfiwzx
 // POPCNTB      p5 through p7      popcntb and related instructions
@@ -129,17 +130,16 @@ def : ProcessorModel<"e500mc", PPCE500mcModel,
 def : ProcessorModel<"e5500", PPCE5500Model,
                  [DirectiveE5500, FeatureMFOCRF, Feature64Bit,
                   FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
-def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
-                                         FeatureMFOCRF, FeatureFSqrt,
-                                         FeatureSTFIWX, FeatureISEL,
-                                         FeaturePOPCNTD, Feature64Bit
-                                     /*, Feature64BitRegs */]>;
-def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
-                                          FeatureMFOCRF, FeatureFSqrt,
-                                          FeatureSTFIWX, FeatureISEL,
-                                          FeaturePOPCNTD, Feature64Bit
-                                      /*, Feature64BitRegs */,
-                                          FeatureQPX]>;
+def : Processor<"a2", PPCA2Itineraries,
+                  [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+                   FeatureFSqrt, FeatureSTFIWX, FeatureISEL,
+                   FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+               /*, Feature64BitRegs */]>;
+def : Processor<"a2q", PPCA2Itineraries,
+                  [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+                   FeatureFSqrt, FeatureSTFIWX, FeatureISEL,
+                   FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+               /*, Feature64BitRegs */, FeatureQPX]>;
 def : Processor<"pwr3", G5Itineraries,
                  [DirectivePwr3, FeatureAltivec, FeatureMFOCRF,
                   FeatureSTFIWX, Feature64Bit]>;
@@ -162,8 +162,8 @@ def : Processor<"pwr6x", G5Itineraries,
 def : Processor<"pwr7", G5Itineraries,
                  [DirectivePwr7, FeatureAltivec,
                   FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
-                   FeatureISEL, FeaturePOPCNTD, Feature64Bit
-               /*, Feature64BitRegs */]>;
+                   FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
+                   Feature64Bit /*, Feature64BitRegs */]>;
 def : Processor<"ppc", G3Itineraries, [Directive32]>;
 def : Processor<"ppc64", G5Itineraries,
                  [Directive64, FeatureAltivec,
@@ -6604,7 +6604,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
        N->getOperand(1).getOpcode() == ISD::BSWAP &&
        N->getOperand(1).getNode()->hasOneUse() &&
        (N->getOperand(1).getValueType() == MVT::i32 ||
-         N->getOperand(1).getValueType() == MVT::i16)) {
+         N->getOperand(1).getValueType() == MVT::i16 ||
+         (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+          N->getOperand(1).getValueType() == MVT::i64))) {
      SDValue BSwapOp = N->getOperand(1).getOperand(0);
      // Do an any-extend to 32-bits if this is a half-word input.
      if (BSwapOp.getValueType() == MVT::i16)
@@ -6625,7 +6627,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
    // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
    if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
        N->getOperand(0).hasOneUse() &&
-        (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
+        (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
+         (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+          N->getValueType(0) == MVT::i64))) {
      SDValue Load = N->getOperand(0);
      LoadSDNode *LD = cast<LoadSDNode>(Load);
      // Create the byte-swapping load.
@@ -6636,7 +6640,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
      };
      SDValue BSLoad =
        DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
-                                DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
+                                DAG.getVTList(N->getValueType(0) == MVT::i64 ?
+                                              MVT::i64 : MVT::i32, MVT::Other),
+                                              Ops, 3,
                                LD->getMemoryVT(), LD->getMemOperand());

      // If this is an i16 load, insert the truncate.
@@ -658,7 +658,10 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
 def LDX  : XForm_1<31,  21, (outs G8RC:$rD), (ins memrr:$src),
                   "ldx $rD, $src", LdStLD,
                   [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
-                   
+def LDBRX : XForm_1<31,  532, (outs G8RC:$rD), (ins memrr:$src),
+                   "ldbrx $rD, $src", LdStLoad,
+                   [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
+
 let mayLoad = 1 in
 def LDU  : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
                    "ldu $rD, $addr", LdStLDU,
@@ -779,6 +782,11 @@ def STDX  : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
                   "stdx $rS, $dst", LdStSTD,
                   [(store i64:$rS, xaddr:$dst)]>, isPPC64,
                   PPC970_DGroup_Cracked;
+def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "stdbrx $rS, $dst", LdStStore,
+                   [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
+                   PPC970_DGroup_Cracked;
+
 // STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
 def STD_32  : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
                       "std $rT, $dst", LdStSTD,
@@ -36,10 +36,10 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [
 ]>;

 def SDT_PPClbrx : SDTypeProfile<1, 2, [
-  SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+  SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
 ]>;
 def SDT_PPCstbrx : SDTypeProfile<0, 3, [
-  SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+  SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
 ]>;

 def SDT_PPClarx : SDTypeProfile<1, 1, [
@@ -41,6 +41,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
  , HasSTFIWX(false)
  , HasISEL(false)
  , HasPOPCNTD(false)
+  , HasLDBRX(false)
  , IsBookE(false)
  , HasLazyResolverStubs(false)
  , IsJITCodeModel(false)
@@ -80,6 +80,7 @@ protected:
  bool HasSTFIWX;
  bool HasISEL;
  bool HasPOPCNTD;
+  bool HasLDBRX;
  bool IsBookE;
  bool HasLazyResolverStubs;
  bool IsJITCodeModel;
@@ -161,6 +162,7 @@ public:
  bool hasMFOCRF() const { return HasMFOCRF; }
  bool hasISEL() const { return HasISEL; }
  bool hasPOPCNTD() const { return HasPOPCNTD; }
+  bool hasLDBRX() const { return HasLDBRX; }
  bool isBookE() const { return IsBookE; }

  const Triple &getTargetTriple() const { return TargetTriple; }
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=ppc32 | FileCheck %s -check-prefix=X32
 ; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s -check-prefix=PWR7


 define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {
@@ -34,18 +35,47 @@ define i16 @LHBRX(i8* %ptr, i32 %off) {
        ret i16 %tmp6
 }

+define void @STDBRX(i64 %i, i8* %ptr, i64 %off) {
+        %tmp1 = getelementptr i8* %ptr, i64 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.1 = bitcast i8* %tmp1 to i64*               ; <i64*> [#uses=1]
+        %tmp13 = tail call i64 @llvm.bswap.i64( i64 %i )                ; <i64> [#uses=1]
+        store i64 %tmp13, i64* %tmp1.upgrd.1
+        ret void
+}
+
+define i64 @LDBRX(i8* %ptr, i64 %off) {
+        %tmp1 = getelementptr i8* %ptr, i64 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.2 = bitcast i8* %tmp1 to i64*               ; <i64*> [#uses=1]
+        %tmp = load i64* %tmp1.upgrd.2          ; <i64> [#uses=1]
+        %tmp14 = tail call i64 @llvm.bswap.i64( i64 %tmp )              ; <i64> [#uses=1]
+        ret i64 %tmp14
+}
+
 declare i32 @llvm.bswap.i32(i32)

 declare i16 @llvm.bswap.i16(i16)

+declare i64 @llvm.bswap.i64(i64)
+

 ; X32: stwbrx
 ; X32: lwbrx
 ; X32: sthbrx
 ; X32: lhbrx
+; X32-NOT: ldbrx
+; X32-NOT: stdbrx

 ; X64: stwbrx
 ; X64: lwbrx
 ; X64: sthbrx
 ; X64: lhbrx
+; X64-NOT: ldbrx
+; X64-NOT: stdbrx
+
+; PWR7: stwbrx
+; PWR7: lwbrx
+; PWR7: sthbrx
+; PWR7: lhbrx
+; PWR7: stdbrx
+; PWR7: ldbrx