Implement Regression/CodeGen/X86/rotate.ll: emit rotate instructions (which

typically cost 1 cycle) instead of shld/shrd instruction (which are typically 6 or more cycles). This also saves code space. For example, instead of emitting: rotr: mov %EAX, DWORD PTR [%ESP + 4] mov %CL, BYTE PTR [%ESP + 8] shrd %EAX, %EAX, %CL ret rotli: mov %EAX, DWORD PTR [%ESP + 4] shrd %EAX, %EAX, 27 ret Emit: rotr32: mov %CL, BYTE PTR [%ESP + 8] mov %EAX, DWORD PTR [%ESP + 4] ror %EAX, %CL ret rotli32: mov %EAX, DWORD PTR [%ESP + 4] ror %EAX, 27 ret We also emit byte rotate instructions which do not have a sh[lr]d counterpart at all. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@19692 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-05 17:39:16 +00:00 · 2005-01-19 08:07:05 +00:00 · 2005-01-19 08:07:05 +00:00 · 4053b1e30b
commit 4053b1e30b
parent b51f2e3de2
1 changed files with 84 additions and 43 deletions
--- a/lib/Target/X86/X86ISelPattern.cpp
+++ b/lib/Target/X86/X86ISelPattern.cpp
@ -1165,47 +1165,24 @@ bool ISel::EmitOrOpOp(SDOperand Op1, SDOperand Op2, unsigned DestReg) {
  // Find out if ShrAmt = 32-ShlAmt  or  ShlAmt = 32-ShrAmt.
  if (ShlAmt.getOpcode() == ISD::SUB && ShlAmt.getOperand(1) == ShrAmt)
    if (ConstantSDNode *SubCST = dyn_cast<ConstantSDNode>(ShlAmt.getOperand(0)))
-      if (SubCST->getValue() == RegSize && RegSize != 8) {
+      if (SubCST->getValue() == RegSize) {
+        // (A >> ShrAmt) | (A << (32-ShrAmt)) ==> ROR A, ShrAmt
        // (A >> ShrAmt) | (B << (32-ShrAmt)) ==> SHRD A, B, ShrAmt
-        unsigned AReg, BReg;
-        if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
-          AReg = SelectExpr(ShrVal);
-          BReg = SelectExpr(ShlVal);
-        } else {
-          BReg = SelectExpr(ShlVal);
-          AReg = SelectExpr(ShrVal);
-        }
-        unsigned ShAmt = SelectExpr(ShrAmt);
-        BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
-        unsigned Opc = RegSize == 16 ? X86::SHRD16rrCL : X86::SHRD32rrCL;
-        BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg);
-        return true;
-      }
-
-  if (ShrAmt.getOpcode() == ISD::SUB && ShrAmt.getOperand(1) == ShlAmt)
-    if (ConstantSDNode *SubCST = dyn_cast<ConstantSDNode>(ShrAmt.getOperand(0)))
-      if (SubCST->getValue() == RegSize && RegSize != 8) {
-        // (A << ShlAmt) | (B >> (32-ShlAmt)) ==> SHLD A, B, ShrAmt
-        unsigned AReg, BReg;
-        if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
-          AReg = SelectExpr(ShrVal);
-          BReg = SelectExpr(ShlVal);
-        } else {
-          BReg = SelectExpr(ShlVal);
-          AReg = SelectExpr(ShrVal);
-        }
-        unsigned ShAmt = SelectExpr(ShlAmt);
-        BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
-        unsigned Opc = RegSize == 16 ? X86::SHLD16rrCL : X86::SHLD32rrCL;
-        BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg);
-        return true;
-      }
-
-  if (ConstantSDNode *ShrCst = dyn_cast<ConstantSDNode>(ShrAmt))
-    if (ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(ShlAmt))
-      if (ShrCst->getValue() < RegSize && ShlCst->getValue() < RegSize) {
-        if (ShrCst->getValue() == RegSize-ShlCst->getValue() && RegSize != 8) {
-          // (A >> 5) | (B << 27) --> SHRD A, B, 5
+        if (ShrVal == ShlVal) {
+          unsigned Reg, ShAmt;
+          if (getRegPressure(ShrVal) > getRegPressure(ShrAmt)) {
+            Reg = SelectExpr(ShrVal);
+            ShAmt = SelectExpr(ShrAmt);
+          } else {
+            ShAmt = SelectExpr(ShrAmt);
+            Reg = SelectExpr(ShrVal);
+          }
+          BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
+          unsigned Opc = RegSize == 8 ? X86::ROR8rCL :
+                        (RegSize == 16 ? X86::ROR16rCL : X86::ROR32rCL);
+          BuildMI(BB, Opc, 1, DestReg).addReg(Reg);
+          return true;
+        } else if (RegSize != 8) {
          unsigned AReg, BReg;
          if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
            AReg = SelectExpr(ShrVal);
@ -1214,14 +1191,78 @@ bool ISel::EmitOrOpOp(SDOperand Op1, SDOperand Op2, unsigned DestReg) {
            BReg = SelectExpr(ShlVal);
            AReg = SelectExpr(ShrVal);
          }
-          unsigned Opc = RegSize == 16 ? X86::SHRD16rri8 : X86::SHRD32rri8;
-          BuildMI(BB, Opc, 3, DestReg).addReg(AReg).addReg(BReg)
-            .addImm(ShrCst->getValue());
+          unsigned ShAmt = SelectExpr(ShrAmt);
+          BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
+          unsigned Opc = RegSize == 16 ? X86::SHRD16rrCL : X86::SHRD32rrCL;
+          BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg);
          return true;
        }
      }

+  if (ShrAmt.getOpcode() == ISD::SUB && ShrAmt.getOperand(1) == ShlAmt)
+    if (ConstantSDNode *SubCST = dyn_cast<ConstantSDNode>(ShrAmt.getOperand(0)))
+      if (SubCST->getValue() == RegSize) {
+        // (A << ShlAmt) | (A >> (32-ShlAmt)) ==> ROL A, ShrAmt
+        // (A << ShlAmt) | (B >> (32-ShlAmt)) ==> SHLD A, B, ShrAmt
+        if (ShrVal == ShlVal) {
+          unsigned Reg, ShAmt;
+          if (getRegPressure(ShrVal) > getRegPressure(ShlAmt)) {
+            Reg = SelectExpr(ShrVal);
+            ShAmt = SelectExpr(ShlAmt);
+          } else {
+            ShAmt = SelectExpr(ShlAmt);
+            Reg = SelectExpr(ShrVal);
+          }
+          BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
+          unsigned Opc = RegSize == 8 ? X86::ROL8rCL :
+                        (RegSize == 16 ? X86::ROL16rCL : X86::ROL32rCL);
+          BuildMI(BB, Opc, 1, DestReg).addReg(Reg);
+          return true;
+        } else if (RegSize != 8) {
+          unsigned AReg, BReg;
+          if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
+            AReg = SelectExpr(ShrVal);
+            BReg = SelectExpr(ShlVal);
+          } else {
+            BReg = SelectExpr(ShlVal);
+            AReg = SelectExpr(ShrVal);
+          }
+          unsigned ShAmt = SelectExpr(ShlAmt);
+          BuildMI(BB, X86::MOV8rr, 1, X86::CL).addReg(ShAmt);
+          unsigned Opc = RegSize == 16 ? X86::SHLD16rrCL : X86::SHLD32rrCL;
+          BuildMI(BB, Opc, 2, DestReg).addReg(AReg).addReg(BReg);
+          return true;
+        }
+      }

+  if (ConstantSDNode *ShrCst = dyn_cast<ConstantSDNode>(ShrAmt))
+    if (ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(ShlAmt))
+      if (ShrCst->getValue() < RegSize && ShlCst->getValue() < RegSize)
+        if (ShrCst->getValue() == RegSize-ShlCst->getValue()) {
+          // (A >> 5) | (A << 27) --> ROR A, 5
+          // (A >> 5) | (B << 27) --> SHRD A, B, 5
+          if (ShrVal == ShlVal) {
+            unsigned Reg = SelectExpr(ShrVal);
+            unsigned Opc = RegSize == 8 ? X86::ROR8ri :
+              (RegSize == 16 ? X86::ROR16ri : X86::ROR32ri);
+            BuildMI(BB, Opc, 2, DestReg).addReg(Reg).addImm(ShrCst->getValue());
+            return true;
+          } else if (RegSize != 8) {
+            unsigned AReg, BReg;
+            if (getRegPressure(ShlVal) > getRegPressure(ShrVal)) {
+              AReg = SelectExpr(ShrVal);
+              BReg = SelectExpr(ShlVal);
+            } else {
+              BReg = SelectExpr(ShlVal);
+              AReg = SelectExpr(ShrVal);
+            }
+            unsigned Opc = RegSize == 16 ? X86::SHRD16rri8 : X86::SHRD32rri8;
+            BuildMI(BB, Opc, 3, DestReg).addReg(AReg).addReg(BReg)
+              .addImm(ShrCst->getValue());
+            return true;
+          }
+        }
+        
  return false;
 }