X86: Generalize the x << (y & const) optimization to also catch masks with more set bits set than 31 or 63.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148024 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Benjamin Kramer 2012-01-12 12:41:34 +00:00
parent e5dafc3956
commit fb418bab97
2 changed files with 40 additions and 24 deletions

View File

@ -1458,58 +1458,62 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
// Helper imms that check if a mask doesn't change significant shift bits.
def immShift32 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 5; }]>;
def immShift64 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 6; }]>;
// (shl x (and y, 31)) ==> (shl x, y)
def : Pat<(shl GR8:$src1, (and CL, 31)),
def : Pat<(shl GR8:$src1, (and CL, immShift32)),
(SHL8rCL GR8:$src1)>;
def : Pat<(shl GR16:$src1, (and CL, 31)),
def : Pat<(shl GR16:$src1, (and CL, immShift32)),
(SHL16rCL GR16:$src1)>;
def : Pat<(shl GR32:$src1, (and CL, 31)),
def : Pat<(shl GR32:$src1, (and CL, immShift32)),
(SHL32rCL GR32:$src1)>;
def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
def : Pat<(store (shl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHL8mCL addr:$dst)>;
def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
def : Pat<(store (shl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHL16mCL addr:$dst)>;
def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
def : Pat<(store (shl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHL32mCL addr:$dst)>;
def : Pat<(srl GR8:$src1, (and CL, 31)),
def : Pat<(srl GR8:$src1, (and CL, immShift32)),
(SHR8rCL GR8:$src1)>;
def : Pat<(srl GR16:$src1, (and CL, 31)),
def : Pat<(srl GR16:$src1, (and CL, immShift32)),
(SHR16rCL GR16:$src1)>;
def : Pat<(srl GR32:$src1, (and CL, 31)),
def : Pat<(srl GR32:$src1, (and CL, immShift32)),
(SHR32rCL GR32:$src1)>;
def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
def : Pat<(store (srl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHR8mCL addr:$dst)>;
def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
def : Pat<(store (srl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHR16mCL addr:$dst)>;
def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
def : Pat<(store (srl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
(SHR32mCL addr:$dst)>;
def : Pat<(sra GR8:$src1, (and CL, 31)),
def : Pat<(sra GR8:$src1, (and CL, immShift32)),
(SAR8rCL GR8:$src1)>;
def : Pat<(sra GR16:$src1, (and CL, 31)),
def : Pat<(sra GR16:$src1, (and CL, immShift32)),
(SAR16rCL GR16:$src1)>;
def : Pat<(sra GR32:$src1, (and CL, 31)),
def : Pat<(sra GR32:$src1, (and CL, immShift32)),
(SAR32rCL GR32:$src1)>;
def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
def : Pat<(store (sra (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
(SAR8mCL addr:$dst)>;
def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
def : Pat<(store (sra (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
(SAR16mCL addr:$dst)>;
def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
def : Pat<(store (sra (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
(SAR32mCL addr:$dst)>;
// (shl x (and y, 63)) ==> (shl x, y)
def : Pat<(shl GR64:$src1, (and CL, 63)),
def : Pat<(shl GR64:$src1, (and CL, immShift64)),
(SHL64rCL GR64:$src1)>;
def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
(SHL64mCL addr:$dst)>;
def : Pat<(srl GR64:$src1, (and CL, 63)),
def : Pat<(srl GR64:$src1, (and CL, immShift64)),
(SHR64rCL GR64:$src1)>;
def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
(SHR64mCL addr:$dst)>;
def : Pat<(sra GR64:$src1, (and CL, 63)),
def : Pat<(sra GR64:$src1, (and CL, immShift64)),
(SAR64rCL GR64:$src1)>;
def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
(SAR64mCL addr:$dst)>;

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 | grep and | count 1
; RUN: llc < %s -march=x86 | grep and | count 2
; RUN: llc < %s -march=x86-64 | not grep and
define i32 @t1(i32 %t, i32 %val) nounwind {
@ -7,9 +7,15 @@ define i32 @t1(i32 %t, i32 %val) nounwind {
ret i32 %res
}
define i32 @t2(i32 %t, i32 %val) nounwind {
%shamt = and i32 %t, 63
%res = shl i32 %val, %shamt
ret i32 %res
}
@X = internal global i16 0
define void @t2(i16 %t) nounwind {
define void @t3(i16 %t) nounwind {
%shamt = and i16 %t, 31
%tmp = load i16* @X
%tmp1 = ashr i16 %tmp, %shamt
@ -17,8 +23,14 @@ define void @t2(i16 %t) nounwind {
ret void
}
define i64 @t3(i64 %t, i64 %val) nounwind {
define i64 @t4(i64 %t, i64 %val) nounwind {
%shamt = and i64 %t, 63
%res = lshr i64 %val, %shamt
ret i64 %res
}
define i64 @t5(i64 %t, i64 %val) nounwind {
%shamt = and i64 %t, 191
%res = lshr i64 %val, %shamt
ret i64 %res
}