A lo/hi mul has higher latency than an imul r,ri, e.g. 5 cycles compared to 3

on Core 2 and Nehalem, so the code we generate is better than GCC's here. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126100 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-12 23:37:33 +00:00 · 2011-02-21 01:29:32 +00:00 · 2011-02-21 01:29:32 +00:00 · ad48a4fc32
commit ad48a4fc32
parent 7d0805dcb8
1 changed files with 0 additions and 35 deletions
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@ -1878,38 +1878,3 @@ _add32carry:
 	ret

 //===---------------------------------------------------------------------===//
-
-This:
-char t(char c) {
-  return c/3;
-}
-
-Compiles to: $clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer
-
-_t:                                     ## @t
-	movslq	%edi, %rax
-	imulq	$1431655766, %rax, %rax ## imm = 0x55555556
-	movq	%rax, %rcx
-	shrq	$63, %rcx
-	shrq	$32, %rax
-	addl	%ecx, %eax
-	movsbl	%al, %eax
-	ret
-
-GCC gets:
-
-_t:
-	movl	$86, %eax
-	imulb	%dil
-	shrw	$8, %ax
-	sarb	$7, %dil
-	subb	%dil, %al
-	movsbl	%al,%eax
-	ret
-
-which is nicer.  This also happens for int, not just char.
-
-//===---------------------------------------------------------------------===//
-
-
-