diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2523bb2451b..e4f30270538 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4241,12 +4241,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { return SDValue(); } + // At this point, we must have a load or else we can't do the transform. + if (!isa(N0)) return SDValue(); + // If the shift amount is larger than the input type then we're not // accessing any of the loaded bytes. If the load was a zextload/extload // then the result of the shift+trunc is zero/undef (handled elsewhere). // If the load was a sextload then the result is a splat of the sign bit // of the extended byte. This is not worth optimizing for. - if (ShAmt >= VT.getSizeInBits()) + if (ShAmt >= cast(N0)->getMemoryVT().getSizeInBits()) return SDValue(); } } diff --git a/test/CodeGen/X86/narrow-shl-load.ll b/test/CodeGen/X86/narrow-shl-load.ll index 53b03884a58..ef27cbc3418 100644 --- a/test/CodeGen/X86/narrow-shl-load.ll +++ b/test/CodeGen/X86/narrow-shl-load.ll @@ -6,7 +6,7 @@ target triple = "x86_64-pc-linux-gnu" ; DAGCombiner should fold this code in finite time. ; rdar://8606584 -define void @D() nounwind readnone { +define void @test1() nounwind readnone { bb.nph: br label %while.cond @@ -33,10 +33,10 @@ while.end: ; preds = %while.cond ; DAGCombiner shouldn't fold the sdiv (ashr) away. ; rdar://8636812 -; CHECK: main: +; CHECK: test2: ; CHECK: sarl -define i32 @main() nounwind { +define i32 @test2() nounwind { entry: %i = alloca i32, align 4 %j = alloca i8, align 1 @@ -63,3 +63,21 @@ if.end: ; preds = %entry declare void @abort() noreturn declare void @exit(i32) noreturn + +; DAG Combiner can't fold this into a load of the 1'th byte. +; PR8757 +define i32 @test3(i32 *%P) nounwind ssp { + volatile store i32 128, i32* %P + %tmp4.pre = load i32* %P + %phitmp = trunc i32 %tmp4.pre to i16 + %phitmp13 = shl i16 %phitmp, 8 + %phitmp14 = ashr i16 %phitmp13, 8 + %phitmp15 = lshr i16 %phitmp14, 8 + %phitmp16 = zext i16 %phitmp15 to i32 + ret i32 %phitmp16 + +; CHECK: movl $128, (%rdi) +; CHECK-NEXT: movsbl (%rdi), %eax +; CHECK-NEXT: movzbl %ah, %eax +; CHECK-NEXT: ret +}