From fb418bab977ad1484f9513de82d710d88d239bc2 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 12 Jan 2012 12:41:34 +0000 Subject: [PATCH] X86: Generalize the x << (y & const) optimization to also catch masks with more set bits set than 31 or 63. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148024 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrCompiler.td | 46 ++++++++++++++++-------------- test/CodeGen/X86/shift-and.ll | 18 ++++++++++-- 2 files changed, 40 insertions(+), 24 deletions(-) diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index a5c05de0199..281c51b2f37 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1458,58 +1458,62 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; +// Helper imms that check if a mask doesn't change significant shift bits. +def immShift32 : ImmLeaf= 5; }]>; +def immShift64 : ImmLeaf= 6; }]>; + // (shl x (and y, 31)) ==> (shl x, y) -def : Pat<(shl GR8:$src1, (and CL, 31)), +def : Pat<(shl GR8:$src1, (and CL, immShift32)), (SHL8rCL GR8:$src1)>; -def : Pat<(shl GR16:$src1, (and CL, 31)), +def : Pat<(shl GR16:$src1, (and CL, immShift32)), (SHL16rCL GR16:$src1)>; -def : Pat<(shl GR32:$src1, (and CL, 31)), +def : Pat<(shl GR32:$src1, (and CL, immShift32)), (SHL32rCL GR32:$src1)>; -def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (shl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst), (SHL8mCL addr:$dst)>; -def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (shl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst), (SHL16mCL addr:$dst)>; -def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (shl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst), (SHL32mCL addr:$dst)>; -def : Pat<(srl GR8:$src1, (and CL, 31)), +def : Pat<(srl GR8:$src1, (and CL, immShift32)), (SHR8rCL GR8:$src1)>; -def : Pat<(srl GR16:$src1, (and CL, 31)), +def : Pat<(srl GR16:$src1, (and CL, immShift32)), (SHR16rCL GR16:$src1)>; -def : Pat<(srl GR32:$src1, (and CL, 31)), +def : Pat<(srl GR32:$src1, (and CL, immShift32)), (SHR32rCL GR32:$src1)>; -def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (srl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst), (SHR8mCL addr:$dst)>; -def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (srl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst), (SHR16mCL addr:$dst)>; -def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (srl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst), (SHR32mCL addr:$dst)>; -def : Pat<(sra GR8:$src1, (and CL, 31)), +def : Pat<(sra GR8:$src1, (and CL, immShift32)), (SAR8rCL GR8:$src1)>; -def : Pat<(sra GR16:$src1, (and CL, 31)), +def : Pat<(sra GR16:$src1, (and CL, immShift32)), (SAR16rCL GR16:$src1)>; -def : Pat<(sra GR32:$src1, (and CL, 31)), +def : Pat<(sra GR32:$src1, (and CL, immShift32)), (SAR32rCL GR32:$src1)>; -def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (sra (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst), (SAR8mCL addr:$dst)>; -def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (sra (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst), (SAR16mCL addr:$dst)>; -def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (sra (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst), (SAR32mCL addr:$dst)>; // (shl x (and y, 63)) ==> (shl x, y) -def : Pat<(shl GR64:$src1, (and CL, 63)), +def : Pat<(shl GR64:$src1, (and CL, immShift64)), (SHL64rCL GR64:$src1)>; def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SHL64mCL addr:$dst)>; -def : Pat<(srl GR64:$src1, (and CL, 63)), +def : Pat<(srl GR64:$src1, (and CL, immShift64)), (SHR64rCL GR64:$src1)>; def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SHR64mCL addr:$dst)>; -def : Pat<(sra GR64:$src1, (and CL, 63)), +def : Pat<(sra GR64:$src1, (and CL, immShift64)), (SAR64rCL GR64:$src1)>; def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SAR64mCL addr:$dst)>; diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll index fd278c2239f..b747cc5580c 100644 --- a/test/CodeGen/X86/shift-and.ll +++ b/test/CodeGen/X86/shift-and.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | grep and | count 1 +; RUN: llc < %s -march=x86 | grep and | count 2 ; RUN: llc < %s -march=x86-64 | not grep and define i32 @t1(i32 %t, i32 %val) nounwind { @@ -7,9 +7,15 @@ define i32 @t1(i32 %t, i32 %val) nounwind { ret i32 %res } +define i32 @t2(i32 %t, i32 %val) nounwind { + %shamt = and i32 %t, 63 + %res = shl i32 %val, %shamt + ret i32 %res +} + @X = internal global i16 0 -define void @t2(i16 %t) nounwind { +define void @t3(i16 %t) nounwind { %shamt = and i16 %t, 31 %tmp = load i16* @X %tmp1 = ashr i16 %tmp, %shamt @@ -17,8 +23,14 @@ define void @t2(i16 %t) nounwind { ret void } -define i64 @t3(i64 %t, i64 %val) nounwind { +define i64 @t4(i64 %t, i64 %val) nounwind { %shamt = and i64 %t, 63 %res = lshr i64 %val, %shamt ret i64 %res } + +define i64 @t5(i64 %t, i64 %val) nounwind { + %shamt = and i64 %t, 191 + %res = lshr i64 %val, %shamt + ret i64 %res +}