Fix X86's isTruncateFree to not claim that truncate to i1 is free. This fixes Bill's testcase that failed for r48491.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48542 91177308-0d34-0410-b5e6-96231b3b80d8
2024-07-18 12:29:27 +00:00 · 2008-03-19 08:30:06 +00:00 · 2008-03-19 08:30:06 +00:00 · 15cbde3cf6
commit 15cbde3cf6
parent e6d5d39c07
4 changed files with 27 additions and 47 deletions
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -2383,6 +2383,31 @@ SDOperand DAGCombiner::visitSRA(SDNode *N) {
                         DAG.getConstant(Sum, N1C->getValueType(0)));
    }
  }
+
+  // fold sra (shl X, m), result_size - n
+  // -> (sign_extend (trunc (shl X, result_size - n - m))) for
+  // result_size - n != m. If truncate is free for the target sext(shl) is
+  // likely to result in better code.
+  if (N0.getOpcode() == ISD::SHL) {
+    // Get the two constanst of the shifts, CN0 = m, CN = n.
+    const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (N01C && N1C) {
+      // Determine if the truncate type's bitsize would correspond to
+      // an integer type for this target.
+      unsigned VTValSize = MVT::getSizeInBits(VT);
+      MVT::ValueType TruncVT = MVT::getIntegerType(VTValSize - N1C->getValue());
+      unsigned ShiftAmt = N1C->getValue() - N01C->getValue();
+
+      // If the shift wouldn't be a noop, the truncated type is an actual type,
+      // and the truncate is free, then proceed with the transform.
+      if (ShiftAmt != 0 && TLI.isTruncateFree(VT, TruncVT)) {
+        SDOperand Amt = DAG.getConstant(ShiftAmt, TLI.getShiftAmountTy());
+        SDOperand Shift = DAG.getNode(ISD::SRL, VT, N0.getOperand(0), Amt);
+        SDOperand Trunc = DAG.getNode(ISD::TRUNCATE, TruncVT, Shift);
+        return DAG.getNode(ISD::SIGN_EXTEND, N->getValueType(0), Trunc);
+      }
+    }
+  }
  
  // Simplify, based on bits shifted out of the LHS. 
  if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@ -1647,47 +1647,3 @@ The coalescer could coalesce "edx" with "eax" to avoid the movl in LBB1_2
 if it commuted the addl in LBB1_1.

 //===---------------------------------------------------------------------===//
-
-These two functions perform identical operations:
-
-define i32 @test(i32 %f12) {
-	%tmp7.25 = lshr i32 %f12, 16		
-	%tmp7.26 = trunc i32 %tmp7.25 to i8
-	%tmp78.2 = sext i8 %tmp7.26 to i32
-	ret i32 %tmp78.2
-}
-
-define i32 @test2(i32 %f12) {
-	%f11 = shl i32 %f12, 8
-	%tmp7.25 = ashr i32 %f11, 24
-	ret i32 %tmp7.25
-}
-
-but the first compiles into significantly better code on x86-32:
-
-_test:
-	movsbl	6(%esp), %eax
-	ret
-_test2:
-	movl	4(%esp), %eax
-	shll	$8, %eax
-	sarl	$24, %eax
-	ret
-        
-and on x86-64:
-
-_test:
-	shrl	$16, %edi
-	movsbl	%dil, %eax
-	ret
-_test2:
-	shll	$8, %edi
-	movl	%edi, %eax
-	sarl	$24, %eax
-	ret
-
-I would like instcombine to canonicalize the first into the second (since it is
-shorter and doesn't involve type width changes) but the x86 backend needs to do
-the right thing with the later sequence first.
-
-//===---------------------------------------------------------------------===//
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -5662,7 +5662,7 @@ bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
    return false;
  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
-  if (NumBits1 <= NumBits2)
+  if (NumBits1 <= NumBits2 || NumBits2 < 8)
    return false;
  return Subtarget->is64Bit() || NumBits1 < 64;
 }
@ -5673,7 +5673,7 @@ bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1,
    return false;
  unsigned NumBits1 = MVT::getSizeInBits(VT1);
  unsigned NumBits2 = MVT::getSizeInBits(VT2);
-  if (NumBits1 <= NumBits2)
+  if (NumBits1 <= NumBits2 || NumBits2 < 8)
    return false;
  return Subtarget->is64Bit() || NumBits1 < 64;
 }
--- a/test/CodeGen/X86/field-extract-use-trunc.ll
+++ b/test/CodeGen/X86/field-extract-use-trunc.ll
@ -1,6 +1,5 @@
 ; RUN: llvm-as < %s | llc -march=x86 | grep sar | count 1
 ; RUN: llvm-as < %s | llc -march=x86-64 | not grep sar
-; XFAIL: *

 define i32 @test(i32 %f12) {
 	%tmp7.25 = lshr i32 %f12, 16