mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-05 14:34:55 +00:00
[fast-isel] Fold "urem x, pow2" -> "and x, pow2-1". This should fix the 271%
execution-time regression for nsieve-bits on the ARMv7 -O0 -g nightly tester. This may also improve compile-time on architectures that would otherwise generate a libcall for urem (e.g., ARM) or fall back to the DAG selector. rdar://10810716 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153230 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
55f43d6b7e
commit
544b9b426f
@ -395,6 +395,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
|
||||
ISDOpcode = ISD::SRA;
|
||||
}
|
||||
|
||||
// Transform "urem x, pow2" -> "and x, pow2-1".
|
||||
if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) &&
|
||||
isPowerOf2_64(Imm)) {
|
||||
--Imm;
|
||||
ISDOpcode = ISD::AND;
|
||||
}
|
||||
|
||||
unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
|
||||
Op0IsKill, Imm, VT.getSimpleVT());
|
||||
if (ResultReg == 0) return false;
|
||||
|
@ -217,3 +217,12 @@ entry:
|
||||
; THUMB: vcmpe.f32 s0, #0
|
||||
ret i1 %4
|
||||
}
|
||||
|
||||
; ARM: @urem_fold
|
||||
; THUMB: @urem_fold
|
||||
; ARM: and r0, r0, #31
|
||||
; THUMB: and r0, r0, #31
|
||||
define i32 @urem_fold(i32 %a) nounwind {
|
||||
%rem = urem i32 %a, 32
|
||||
ret i32 %rem
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user