Re-commit 131641 with fixes; de-pseudoize MOVSX16rr8 and friends.

rdar://problem/8614450 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@131746 91177308-0d34-0410-b5e6-96231b3b80d8
2024-09-30 04:56:49 +00:00 · 2011-05-20 19:04:40 +00:00 · 2011-05-20 19:04:40 +00:00 · 0e29ed081b
commit 0e29ed081b
parent 17a1e87751
6 changed files with 60 additions and 50 deletions
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@ -2006,17 +2006,17 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
      if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
        Move =
-          SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16,
+          SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
                                         MVT::Other, Ops,
                                         array_lengthof(Ops)), 0);
        Chain = Move.getValue(1);
        ReplaceUses(N0.getValue(1), Chain);
      } else {
        Move =
-          SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
+          SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
        Chain = CurDAG->getEntryNode();
      }
-      Chain  = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
+      Chain  = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
      InFlag = Chain.getValue(1);
    } else {
      InFlag =
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@ -997,7 +997,8 @@ def : Pat<(extloadi64i32 addr:$src),

 // anyext. Define these to do an explicit zero-extend to
 // avoid partial-register updates.
-def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8  GR8 :$src)>;
+def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG
+                                     (MOVZX32rr8 GR8 :$src), sub_16bit)>;
 def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8  GR8 :$src)>;

 // Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
@ -1164,9 +1165,9 @@ def : Pat<(and GR32:$src1, 0xff),
      Requires<[In32BitMode]>;
 // r & (2^8-1) ==> movz
 def : Pat<(and GR16:$src1, 0xff),
-          (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1,
-                                                             GR16_ABCD)),
-                                      sub_8bit))>,
+           (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG
+            (i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)),
+             sub_16bit)>,
      Requires<[In32BitMode]>;

 // r & (2^32-1) ==> movz
@ -1184,7 +1185,8 @@ def : Pat<(and GR32:$src1, 0xff),
      Requires<[In64BitMode]>;
 // r & (2^8-1) ==> movz
 def : Pat<(and GR16:$src1, 0xff),
-           (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
+           (EXTRACT_SUBREG (MOVZX32rr8 (i8
+            (EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
      Requires<[In64BitMode]>;


@ -1196,10 +1198,11 @@ def : Pat<(sext_inreg GR32:$src, i8),
                                                             GR32_ABCD)),
                                      sub_8bit))>,
      Requires<[In32BitMode]>;
+
 def : Pat<(sext_inreg GR16:$src, i8),
-          (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
-                                                             GR16_ABCD)),
-                                      sub_8bit))>,
+           (EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG
+            (i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))),
+             sub_16bit)>,
      Requires<[In32BitMode]>;

 def : Pat<(sext_inreg GR64:$src, i32),
@ -1212,9 +1215,19 @@ def : Pat<(sext_inreg GR32:$src, i8),
          (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
      Requires<[In64BitMode]>;
 def : Pat<(sext_inreg GR16:$src, i8),
-          (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
+           (EXTRACT_SUBREG (MOVSX32rr8
+            (EXTRACT_SUBREG GR16:$src, sub_8bit)), sub_16bit)>,
      Requires<[In64BitMode]>;

+// sext, sext_load, zext, zext_load
+def: Pat<(i16 (sext GR8:$src)),
+          (EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;
+def: Pat<(sextloadi16i8 addr:$src),
+          (EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;
+def: Pat<(i16 (zext GR8:$src)),
+          (EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;
+def: Pat<(zextloadi16i8 addr:$src),
+          (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;

 // trunc patterns
 def : Pat<(i16 (trunc GR32:$src)),
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@ -38,22 +38,11 @@ let neverHasSideEffects = 1 in {


 // Sign/Zero extenders
-// Use movsbl intead of movsbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update.  Actual movsbw included for the disassembler.
-def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
-                    "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
-                    "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-
-// FIXME: Use a pat pattern or define a syntax here.                    
-let isCodeGenOnly=1 in {
-def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
-                   "", [(set GR16:$dst, (sext GR8:$src))]>, TB;
-def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
-                   "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
-}
-def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+                   "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+                   "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
                   [(set GR32:$dst, (sext GR8:$src))]>, TB;
 def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
@ -66,20 +55,10 @@ def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
                   [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;

-// Use movzbl intead of movzbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update.  Actual movzbw included for the disassembler.
-def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
-                    "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
-                    "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;  
-// FIXME: Use a pat pattern or define a syntax here.                    
-let isCodeGenOnly=1 in {
-def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
-                   "", [(set GR16:$dst, (zext GR8:$src))]>, TB;
-def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
-                   "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
-}
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+                   "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+                   "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
                   [(set GR32:$dst, (zext GR8:$src))]>, TB;
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@ -1546,8 +1546,8 @@ def : InstAlias<"movq $src, $dst",
 def : InstAlias<"movsd", (MOVSD)>;

 // movsx aliases
-def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src), 0>;
-def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>;
 def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
 def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
 def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
@ -1555,8 +1555,8 @@ def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
 def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;

 // movzx aliases
-def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src), 0>;
-def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>;
 def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
 def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
 def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>;
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@ -355,10 +355,6 @@ ReSimplify:
    assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
           "LEA has segment specified!");
    break;
-  case X86::MOVZX16rr8:   LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
-  case X86::MOVZX16rm8:   LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
-  case X86::MOVSX16rr8:   LowerSubReg32_Op0(OutMI, X86::MOVSX32rr8); break;
-  case X86::MOVSX16rm8:   LowerSubReg32_Op0(OutMI, X86::MOVSX32rm8); break;
  case X86::MOVZX64rr32:  LowerSubReg32_Op0(OutMI, X86::MOV32rr); break;
  case X86::MOVZX64rm32:  LowerSubReg32_Op0(OutMI, X86::MOV32rm); break;
  case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break;
--- a/test/CodeGen/X86/div8.ll
+++ b/test/CodeGen/X86/div8.ll
@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = '8div.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.6.6"
+
+define signext i8 @test_div(i8 %dividend, i8 %divisor) nounwind ssp {
+entry:
+  %dividend.addr = alloca i8, align 2
+  %divisor.addr = alloca i8, align 1
+  %quotient = alloca i8, align 1
+  store i8 %dividend, i8* %dividend.addr, align 2
+  store i8 %divisor, i8* %divisor.addr, align 1
+  %tmp = load i8* %dividend.addr, align 2
+  %tmp1 = load i8* %divisor.addr, align 1
+; Insist on i8->i32 zero extension, even though divb demands only i16:
+; CHECK: movzbl {{.*}}%eax
+; CHECK: divb
+  %div = udiv i8 %tmp, %tmp1
+  store i8 %div, i8* %quotient, align 1
+  %tmp4 = load i8* %quotient, align 1
+  ret i8 %tmp4
+}