This patch addresses an incorrect transformation in the DAG combiner.

The included test case is derived from one of the GCC compatibility tests.
The problem arises after the selection DAG has been converted to type-legalized
form.  The combiner first sees a 64-bit load that can be converted into a
pre-increment form.  The original load feeds into a SRL that isolates the
upper 32 bits of the loaded doubleword.  This looks like an opportunity for
DAGCombiner::ReduceLoadWidth() to replace the 64-bit load with a 32-bit load.

However, this transformation is not valid, as the replacement load is not
a pre-increment load.  The pre-increment load produces an extra result,
which feeds a subsequent add instruction.  The replacement load only has
one result value, and this value is propagated to all uses of the pre-
increment load, including the add.  Because the add is looking for the
second result value as its operand, it ends up attempting to add a constant
to a token chain, resulting in a crash.

So the patch simply disables this transformation for any load with more than
two result values.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172480 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bill Schmidt 2013-01-14 22:04:38 +00:00
parent 19d5433716
commit 89e88e30bf
2 changed files with 49 additions and 5 deletions

View File

@ -5100,16 +5100,26 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
// If we haven't found a load, we can't narrow it. Don't transform one with // If we haven't found a load, we can't narrow it. Don't transform one with
// multiple uses, this would require adding a new load. // multiple uses, this would require adding a new load.
if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() || if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
return SDValue();
// Don't change the width of a volatile load. // Don't change the width of a volatile load.
cast<LoadSDNode>(N0)->isVolatile()) LoadSDNode *LN0 = cast<LoadSDNode>(N0);
if (LN0->isVolatile())
return SDValue(); return SDValue();
// Verify that we are actually reducing a load width here. // Verify that we are actually reducing a load width here.
if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits) if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
return SDValue();
// For the transform to be legal, the load must produce only two values
// (the value loaded and the chain). Don't transform a pre-increment
// load, for example, which produces an extra value. Otherwise the
// transformation is not equivalent, and the downstream logic to replace
// uses gets things wrong.
if (LN0->getNumValues() > 2)
return SDValue(); return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT PtrType = N0.getOperand(1).getValueType(); EVT PtrType = N0.getOperand(1).getValueType();
if (PtrType == MVT::Untyped || PtrType.isExtended()) if (PtrType == MVT::Untyped || PtrType.isExtended())

View File

@ -0,0 +1,34 @@
; RUN: llc < %s
; This used to cause a crash. A standard load is converted to a pre-increment
; load. Later the pre-increment load is combined with a subsequent SRL to
; produce a smaller load. This transform invalidly created a standard load
; and propagated the produced value into uses of both produced values of the
; pre-increment load. The result was a crash when attempting to process an
; add with a token-chain operand.
%struct.Info = type { i32, i32, i8*, i8*, i8*, [32 x i8*], i64, [32 x i64], i64, i64, i64, [32 x i64] }
%struct.S1847 = type { [12 x i8], [4 x i8], [8 x i8], [4 x i8], [8 x i8], [2 x i8], i8, [4 x i64], i8, [3 x i8], [4 x i8], i8, i16, [4 x %struct.anon.76], i16, i8, i8* }
%struct.anon.76 = type { i32 }
@info = common global %struct.Info zeroinitializer, align 8
@fails = common global i32 0, align 4
@a1847 = external global [5 x %struct.S1847]
define void @test1847() nounwind {
entry:
%j = alloca i32, align 4
%0 = load i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
%1 = load i32* @fails, align 4
%bf.load1 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
%bf.clear2 = and i96 %bf.load1, 302231454903657293676543
%bf.set3 = or i96 %bf.clear2, -38383394772764476296921088
store i96 %bf.set3, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
%2 = load i32* %j, align 4
%3 = load i32* %j, align 4
%inc11 = add nsw i32 %3, 1
store i32 %inc11, i32* %j, align 4
%bf.load15 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
%bf.clear16 = and i96 %bf.load15, -18446744069414584321
%bf.set17 = or i96 %bf.clear16, 18446743532543672320
store i96 %bf.set17, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
ret void
}