Fix X86's isTruncateFree to not claim that truncate to i1 is free. This fixes Bill's testcase that failed for r48491.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48542 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Christopher Lamb 2008-03-19 08:30:06 +00:00
parent e6d5d39c07
commit 15cbde3cf6
4 changed files with 27 additions and 47 deletions

View File

@ -2383,6 +2383,31 @@ SDOperand DAGCombiner::visitSRA(SDNode *N) {
DAG.getConstant(Sum, N1C->getValueType(0)));
}
}
// fold sra (shl X, m), result_size - n
// -> (sign_extend (trunc (shl X, result_size - n - m))) for
// result_size - n != m. If truncate is free for the target sext(shl) is
// likely to result in better code.
if (N0.getOpcode() == ISD::SHL) {
// Get the two constanst of the shifts, CN0 = m, CN = n.
const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (N01C && N1C) {
// Determine if the truncate type's bitsize would correspond to
// an integer type for this target.
unsigned VTValSize = MVT::getSizeInBits(VT);
MVT::ValueType TruncVT = MVT::getIntegerType(VTValSize - N1C->getValue());
unsigned ShiftAmt = N1C->getValue() - N01C->getValue();
// If the shift wouldn't be a noop, the truncated type is an actual type,
// and the truncate is free, then proceed with the transform.
if (ShiftAmt != 0 && TLI.isTruncateFree(VT, TruncVT)) {
SDOperand Amt = DAG.getConstant(ShiftAmt, TLI.getShiftAmountTy());
SDOperand Shift = DAG.getNode(ISD::SRL, VT, N0.getOperand(0), Amt);
SDOperand Trunc = DAG.getNode(ISD::TRUNCATE, TruncVT, Shift);
return DAG.getNode(ISD::SIGN_EXTEND, N->getValueType(0), Trunc);
}
}
}
// Simplify, based on bits shifted out of the LHS.
if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))

View File

@ -1647,47 +1647,3 @@ The coalescer could coalesce "edx" with "eax" to avoid the movl in LBB1_2
if it commuted the addl in LBB1_1.
//===---------------------------------------------------------------------===//
These two functions perform identical operations:
define i32 @test(i32 %f12) {
%tmp7.25 = lshr i32 %f12, 16
%tmp7.26 = trunc i32 %tmp7.25 to i8
%tmp78.2 = sext i8 %tmp7.26 to i32
ret i32 %tmp78.2
}
define i32 @test2(i32 %f12) {
%f11 = shl i32 %f12, 8
%tmp7.25 = ashr i32 %f11, 24
ret i32 %tmp7.25
}
but the first compiles into significantly better code on x86-32:
_test:
movsbl 6(%esp), %eax
ret
_test2:
movl 4(%esp), %eax
shll $8, %eax
sarl $24, %eax
ret
and on x86-64:
_test:
shrl $16, %edi
movsbl %dil, %eax
ret
_test2:
shll $8, %edi
movl %edi, %eax
sarl $24, %eax
ret
I would like instcombine to canonicalize the first into the second (since it is
shorter and doesn't involve type width changes) but the x86 backend needs to do
the right thing with the later sequence first.
//===---------------------------------------------------------------------===//

View File

@ -5662,7 +5662,7 @@ bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
if (NumBits1 <= NumBits2)
if (NumBits1 <= NumBits2 || NumBits2 < 8)
return false;
return Subtarget->is64Bit() || NumBits1 < 64;
}
@ -5673,7 +5673,7 @@ bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1,
return false;
unsigned NumBits1 = MVT::getSizeInBits(VT1);
unsigned NumBits2 = MVT::getSizeInBits(VT2);
if (NumBits1 <= NumBits2)
if (NumBits1 <= NumBits2 || NumBits2 < 8)
return false;
return Subtarget->is64Bit() || NumBits1 < 64;
}

View File

@ -1,6 +1,5 @@
; RUN: llvm-as < %s | llc -march=x86 | grep sar | count 1
; RUN: llvm-as < %s | llc -march=x86-64 | not grep sar
; XFAIL: *
define i32 @test(i32 %f12) {
%tmp7.25 = lshr i32 %f12, 16