From 544b9b426f0dfa1beef6dc3640607dee3ca1160e Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 22 Mar 2012 00:21:17 +0000 Subject: [PATCH] [fast-isel] Fold "urem x, pow2" -> "and x, pow2-1". This should fix the 271% execution-time regression for nsieve-bits on the ARMv7 -O0 -g nightly tester. This may also improve compile-time on architectures that would otherwise generate a libcall for urem (e.g., ARM) or fall back to the DAG selector. rdar://10810716 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153230 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/FastISel.cpp | 7 +++++++ test/CodeGen/ARM/fast-isel.ll | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 9f4a44a4927..4db10b75868 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -395,6 +395,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { ISDOpcode = ISD::SRA; } + // Transform "urem x, pow2" -> "and x, pow2-1". + if (ISDOpcode == ISD::UREM && isa(I) && + isPowerOf2_64(Imm)) { + --Imm; + ISDOpcode = ISD::AND; + } + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, Imm, VT.getSimpleVT()); if (ResultReg == 0) return false; diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll index 905543a54cf..417e2d9e410 100644 --- a/test/CodeGen/ARM/fast-isel.ll +++ b/test/CodeGen/ARM/fast-isel.ll @@ -217,3 +217,12 @@ entry: ; THUMB: vcmpe.f32 s0, #0 ret i1 %4 } + +; ARM: @urem_fold +; THUMB: @urem_fold +; ARM: and r0, r0, #31 +; THUMB: and r0, r0, #31 +define i32 @urem_fold(i32 %a) nounwind { + %rem = urem i32 %a, 32 + ret i32 %rem +}