[fast-isel] Fold "urem x, pow2" -> "and x, pow2-1". This should fix the 271%

execution-time regression for nsieve-bits on the ARMv7 -O0 -g nightly tester.
This may also improve compile-time on architectures that would otherwise 
generate a libcall for urem (e.g., ARM) or fall back to the DAG selector.
rdar://10810716


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153230 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chad Rosier 2012-03-22 00:21:17 +00:00
parent 55f43d6b7e
commit 544b9b426f
2 changed files with 16 additions and 0 deletions

View File

@ -395,6 +395,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
ISDOpcode = ISD::SRA; ISDOpcode = ISD::SRA;
} }
// Transform "urem x, pow2" -> "and x, pow2-1".
if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) &&
isPowerOf2_64(Imm)) {
--Imm;
ISDOpcode = ISD::AND;
}
unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
Op0IsKill, Imm, VT.getSimpleVT()); Op0IsKill, Imm, VT.getSimpleVT());
if (ResultReg == 0) return false; if (ResultReg == 0) return false;

View File

@ -217,3 +217,12 @@ entry:
; THUMB: vcmpe.f32 s0, #0 ; THUMB: vcmpe.f32 s0, #0
ret i1 %4 ret i1 %4
} }
; ARM: @urem_fold
; THUMB: @urem_fold
; ARM: and r0, r0, #31
; THUMB: and r0, r0, #31
define i32 @urem_fold(i32 %a) nounwind {
%rem = urem i32 %a, 32
ret i32 %rem
}