mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-10 01:10:48 +00:00
733c6b1db1
((x & 0xff00) >> 8) << 2 to (x >> 6) & 0x3fc This is general goodness since it folds a left shift into the mask. However, the trailing zeros in the mask prevents the ARM backend from using the bit extraction instructions. And worse since the mask materialization may require an addition instruction. This comes up fairly frequently when the result of the bit twiddling is used as memory address. e.g. = ptr[(x & 0xFF0000) >> 16] We want to generate: ubfx r3, r1, #16, #8 ldr.w r3, [r0, r3, lsl #2] vs. mov.w r9, #1020 and.w r2, r9, r1, lsr #14 ldr r2, [r0, r2] Add a late ARM specific isel optimization to ARMDAGToDAGISel::PreprocessISelDAG(). It folds the left shift to the 'base + offset' address computation; change the mask to one which doesn't have trailing zeros and enable the use of ubfx. Note the optimization has to be done late since it's target specific and we don't want to change the DAG normalization. It's also fairly restrictive as shifter operands are not always free. It's only done for lsh 1 / 2. It's known to be free on some cpus and they are most common for address computation. This is a slight win for blowfish, rijndael, etc. rdar://12870177 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170581 91177308-0d34-0410-b5e6-96231b3b80d8
54 lines
1.4 KiB
LLVM
54 lines
1.4 KiB
LLVM
; RUN: llc < %s -march=arm -mattr=+v7 | FileCheck %s
|
|
|
|
define i32 @sbfx1(i32 %a) {
|
|
; CHECK: sbfx1
|
|
; CHECK: sbfx r0, r0, #7, #11
|
|
%t1 = lshr i32 %a, 7
|
|
%t2 = trunc i32 %t1 to i11
|
|
%t3 = sext i11 %t2 to i32
|
|
ret i32 %t3
|
|
}
|
|
|
|
define i32 @ubfx1(i32 %a) {
|
|
; CHECK: ubfx1
|
|
; CHECK: ubfx r0, r0, #7, #11
|
|
%t1 = lshr i32 %a, 7
|
|
%t2 = trunc i32 %t1 to i11
|
|
%t3 = zext i11 %t2 to i32
|
|
ret i32 %t3
|
|
}
|
|
|
|
define i32 @ubfx2(i32 %a) {
|
|
; CHECK: ubfx2
|
|
; CHECK: ubfx r0, r0, #7, #11
|
|
%t1 = lshr i32 %a, 7
|
|
%t2 = and i32 %t1, 2047
|
|
ret i32 %t2
|
|
}
|
|
|
|
; rdar://12870177
|
|
define i32 @ubfx_opt(i32* nocapture %ctx, i32 %x) nounwind readonly ssp {
|
|
entry:
|
|
; CHECK: ubfx_opt
|
|
; CHECK: lsr [[REG1:(lr|r[0-9]+)]], r1, #24
|
|
; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG1]], lsl #2]
|
|
; CHECK: ubfx [[REG2:(lr|r[0-9]+)]], r1, #16, #8
|
|
; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG2]], lsl #2]
|
|
; CHECK: ubfx [[REG3:(lr|r[0-9]+)]], r1, #8, #8
|
|
; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG3]], lsl #2]
|
|
%and = lshr i32 %x, 8
|
|
%shr = and i32 %and, 255
|
|
%and1 = lshr i32 %x, 16
|
|
%shr2 = and i32 %and1, 255
|
|
%shr4 = lshr i32 %x, 24
|
|
%arrayidx = getelementptr inbounds i32* %ctx, i32 %shr4
|
|
%0 = load i32* %arrayidx, align 4
|
|
%arrayidx5 = getelementptr inbounds i32* %ctx, i32 %shr2
|
|
%1 = load i32* %arrayidx5, align 4
|
|
%add = add i32 %1, %0
|
|
%arrayidx6 = getelementptr inbounds i32* %ctx, i32 %shr
|
|
%2 = load i32* %arrayidx6, align 4
|
|
%add7 = add i32 %add, %2
|
|
ret i32 %add7
|
|
}
|