Evan Cheng 733c6b1db1 LLVM sdisel normalize bit extraction of the form:
((x & 0xff00) >> 8) << 2
to
 (x >> 6) & 0x3fc

This is general goodness since it folds a left shift into the mask. However,
the trailing zeros in the mask prevents the ARM backend from using the bit
extraction instructions. And worse since the mask materialization may require
an addition instruction. This comes up fairly frequently when the result of 
the bit twiddling is used as memory address. e.g.

 = ptr[(x & 0xFF0000) >> 16]

We want to generate:
  ubfx   r3, r1, #16, #8
  ldr.w  r3, [r0, r3, lsl #2]

vs.
  mov.w  r9, #1020
  and.w  r2, r9, r1, lsr #14
  ldr    r2, [r0, r2]

Add a late ARM specific isel optimization to
ARMDAGToDAGISel::PreprocessISelDAG(). It folds the left shift to the
'base + offset' address computation; change the mask to one which doesn't have
trailing zeros and enable the use of ubfx.

Note the optimization has to be done late since it's target specific and we
don't want to change the DAG normalization. It's also fairly restrictive
as shifter operands are not always free. It's only done for lsh 1 / 2. It's
known to be free on some cpus and they are most common for address
computation.

This is a slight win for blowfish, rijndael, etc.

rdar://12870177


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170581 91177308-0d34-0410-b5e6-96231b3b80d8
2012-12-19 20:16:09 +00:00

54 lines
1.4 KiB
LLVM

; RUN: llc < %s -march=arm -mattr=+v7 | FileCheck %s
define i32 @sbfx1(i32 %a) {
; CHECK: sbfx1
; CHECK: sbfx r0, r0, #7, #11
%t1 = lshr i32 %a, 7
%t2 = trunc i32 %t1 to i11
%t3 = sext i11 %t2 to i32
ret i32 %t3
}
define i32 @ubfx1(i32 %a) {
; CHECK: ubfx1
; CHECK: ubfx r0, r0, #7, #11
%t1 = lshr i32 %a, 7
%t2 = trunc i32 %t1 to i11
%t3 = zext i11 %t2 to i32
ret i32 %t3
}
define i32 @ubfx2(i32 %a) {
; CHECK: ubfx2
; CHECK: ubfx r0, r0, #7, #11
%t1 = lshr i32 %a, 7
%t2 = and i32 %t1, 2047
ret i32 %t2
}
; rdar://12870177
define i32 @ubfx_opt(i32* nocapture %ctx, i32 %x) nounwind readonly ssp {
entry:
; CHECK: ubfx_opt
; CHECK: lsr [[REG1:(lr|r[0-9]+)]], r1, #24
; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG1]], lsl #2]
; CHECK: ubfx [[REG2:(lr|r[0-9]+)]], r1, #16, #8
; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG2]], lsl #2]
; CHECK: ubfx [[REG3:(lr|r[0-9]+)]], r1, #8, #8
; CHECK: ldr {{lr|r[0-9]+}}, [r0, [[REG3]], lsl #2]
%and = lshr i32 %x, 8
%shr = and i32 %and, 255
%and1 = lshr i32 %x, 16
%shr2 = and i32 %and1, 255
%shr4 = lshr i32 %x, 24
%arrayidx = getelementptr inbounds i32* %ctx, i32 %shr4
%0 = load i32* %arrayidx, align 4
%arrayidx5 = getelementptr inbounds i32* %ctx, i32 %shr2
%1 = load i32* %arrayidx5, align 4
%add = add i32 %1, %0
%arrayidx6 = getelementptr inbounds i32* %ctx, i32 %shr
%2 = load i32* %arrayidx6, align 4
%add7 = add i32 %add, %2
ret i32 %add7
}