From 530d09a22bd2f5c638ae1932bed560c8a46e399e Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 19 Sep 2013 13:28:20 +0000 Subject: [PATCH] DAGCombiner: Don't fold vector muls with constants that look like a splat of a power of 2 but differ in bit width. PR17283. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191000 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 +++++++--- test/CodeGen/X86/avx2-arith.ll | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 67f3f0643e4..0eecd39d247 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1824,20 +1824,24 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1 == 0) return N1; + // We require a splat of the entire scalar bit width for non-contiguous + // bit patterns. + bool IsFullSplat = + ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); // fold (mul x, 1) -> x - if (N1IsConst && ConstValue1 == 1) + if (N1IsConst && ConstValue1 == 1 && IsFullSplat) return N0; // fold (mul x, -1) -> 0-x if (N1IsConst && ConstValue1.isAllOnesValue()) return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), N0); // fold (mul x, (1 << c)) -> x << c - if (N1IsConst && ConstValue1.isPowerOf2()) + if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, DAG.getConstant(ConstValue1.logBase2(), getShiftAmountTy(N0.getValueType()))); // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1IsConst && (-ConstValue1).isPowerOf2()) { + if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { unsigned Log2Val = (-ConstValue1).logBase2(); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. diff --git a/test/CodeGen/X86/avx2-arith.ll b/test/CodeGen/X86/avx2-arith.ll index 997fa190eca..72bdd9d0472 100644 --- a/test/CodeGen/X86/avx2-arith.ll +++ b/test/CodeGen/X86/avx2-arith.ll @@ -148,3 +148,21 @@ define <8 x i32> @mul_const9(<8 x i32> %x) { %y = mul <8 x i32> %x, ret <8 x i32> %y } + +; CHECK: mul_const10 +; CHECK: vpmulld +; CHECK: ret +define <4 x i32> @mul_const10(<4 x i32> %x) { + ; %x * 0x01010101 + %m = mul <4 x i32> %x, + ret <4 x i32> %m +} + +; CHECK: mul_const11 +; CHECK: vpmulld +; CHECK: ret +define <4 x i32> @mul_const11(<4 x i32> %x) { + ; %x * 0x80808080 + %m = mul <4 x i32> %x, + ret <4 x i32> %m +}