Chandler Carruth 7782102c70 Use standard promotion for i8 CTTZ nodes and i8 CTLZ nodes when the
LZCNT instructions are available. Force promotion to i32 to get
a smaller encoding since the fix-ups necessary are just as complex for
either promoted type

We can't do standard promotion for CTLZ when lowering through BSR
because it results in poor code surrounding the 'xor' at the end of this
instruction. Essentially, if we promote the entire CTLZ node to i32, we
end up doing the xor on a 32-bit CTLZ implementation, and then
subtracting appropriately to get back to an i8 value. Instead, our
custom logic just uses the knowledge of the incoming size to compute
a perfect xor. I'd love to know of a way to fix this, but so far I'm
drawing a blank. I suspect the legalizer could be more clever and/or it
could collude with the DAG combiner, but how... ;]

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147251 91177308-0d34-0410-b5e6-96231b3b80d8
2011-12-24 12:12:34 +00:00

142 lines
2.9 KiB
LLVM

; RUN: llc < %s -march=x86-64 -mcpu=yonah | FileCheck %s
declare i8 @llvm.cttz.i8(i8, i1)
declare i16 @llvm.cttz.i16(i16, i1)
declare i32 @llvm.cttz.i32(i32, i1)
declare i64 @llvm.cttz.i64(i64, i1)
declare i8 @llvm.ctlz.i8(i8, i1)
declare i16 @llvm.ctlz.i16(i16, i1)
declare i32 @llvm.ctlz.i32(i32, i1)
declare i64 @llvm.ctlz.i64(i64, i1)
define i8 @cttz_i8(i8 %x) {
%tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
ret i8 %tmp
; CHECK: cttz_i8:
; CHECK: bsfl
; CHECK-NOT: cmov
; CHECK: ret
}
define i16 @cttz_i16(i16 %x) {
%tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
ret i16 %tmp
; CHECK: cttz_i16:
; CHECK: bsfw
; CHECK-NOT: cmov
; CHECK: ret
}
define i32 @cttz_i32(i32 %x) {
%tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
ret i32 %tmp
; CHECK: cttz_i32:
; CHECK: bsfl
; CHECK-NOT: cmov
; CHECK: ret
}
define i64 @cttz_i64(i64 %x) {
%tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
ret i64 %tmp
; CHECK: cttz_i64:
; CHECK: bsfq
; CHECK-NOT: cmov
; CHECK: ret
}
define i8 @ctlz_i8(i8 %x) {
entry:
%tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
ret i8 %tmp2
; CHECK: ctlz_i8:
; CHECK: bsrl
; CHECK-NOT: cmov
; CHECK: xorl $7,
; CHECK: ret
}
define i16 @ctlz_i16(i16 %x) {
entry:
%tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
ret i16 %tmp2
; CHECK: ctlz_i16:
; CHECK: bsrw
; CHECK-NOT: cmov
; CHECK: xorl $15,
; CHECK: ret
}
define i32 @ctlz_i32(i32 %x) {
%tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
ret i32 %tmp
; CHECK: ctlz_i32:
; CHECK: bsrl
; CHECK-NOT: cmov
; CHECK: xorl $31,
; CHECK: ret
}
define i64 @ctlz_i64(i64 %x) {
%tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
ret i64 %tmp
; CHECK: ctlz_i64:
; CHECK: bsrq
; CHECK-NOT: cmov
; CHECK: xorq $63,
; CHECK: ret
}
define i32 @ctlz_i32_cmov(i32 %n) {
entry:
; Generate a cmov to handle zero inputs when necessary.
; CHECK: ctlz_i32_cmov:
; CHECK: bsrl
; CHECK: cmov
; CHECK: xorl $31,
; CHECK: ret
%tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
ret i32 %tmp1
}
define i32 @ctlz_i32_fold_cmov(i32 %n) {
entry:
; Don't generate the cmovne when the source is known non-zero (and bsr would
; not set ZF).
; rdar://9490949
; CHECK: ctlz_i32_fold_cmov:
; CHECK: bsrl
; CHECK-NOT: cmov
; CHECK: xorl $31,
; CHECK: ret
%or = or i32 %n, 1
%tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
ret i32 %tmp1
}
define i32 @ctlz_bsr(i32 %n) {
entry:
; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
; the most significant bit, which is what 'bsr' does natively.
; CHECK: ctlz_bsr:
; CHECK: bsrl
; CHECK-NOT: xorl
; CHECK: ret
%ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
%bsr = xor i32 %ctlz, 31
ret i32 %bsr
}
define i32 @ctlz_bsr_cmov(i32 %n) {
entry:
; Same as ctlz_bsr, but ensure this happens even when there is a potential
; zero.
; CHECK: ctlz_bsr_cmov:
; CHECK: bsrl
; CHECK-NOT: xorl
; CHECK: ret
%ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
%bsr = xor i32 %ctlz, 31
ret i32 %bsr
}