A lo/hi mul has higher latency than an imul r,ri, e.g. 5 cycles compared to 3

on Core 2 and Nehalem, so the code we generate is better than GCC's here.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126100 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Cameron Zwarich 2011-02-21 01:29:32 +00:00
parent 7d0805dcb8
commit ad48a4fc32

View File

@ -1878,38 +1878,3 @@ _add32carry:
ret
//===---------------------------------------------------------------------===//
This:
char t(char c) {
return c/3;
}
Compiles to: $clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer
_t: ## @t
movslq %edi, %rax
imulq $1431655766, %rax, %rax ## imm = 0x55555556
movq %rax, %rcx
shrq $63, %rcx
shrq $32, %rax
addl %ecx, %eax
movsbl %al, %eax
ret
GCC gets:
_t:
movl $86, %eax
imulb %dil
shrw $8, %ax
sarb $7, %dil
subb %dil, %al
movsbl %al,%eax
ret
which is nicer. This also happens for int, not just char.
//===---------------------------------------------------------------------===//