llvm-6502/test/CodeGen/X86/umul-with-overflow.ll

; RUN: llc < %s -march=x86 | FileCheck %s

declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
define i1 @a(i32 %x) zeroext nounwind {
  %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
  %obil = extractvalue {i32, i1} %res, 1
  ret i1 %obil
  
; CHECK: a:
; CHECK: mull
; CHECK: seto %al
; CHECK: movzbl	%al, %eax
; CHECK: ret
}

define i32 @test2(i32 %a, i32 %b) nounwind readnone {
entry:
	%tmp0 = add i32 %b, %a
	%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2)
	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
	ret i32 %tmp2
; CHECK: test2:
; CHECK: addl
; CHECK-NEXT: addl
; CHECK-NEXT: ret
}

define i32 @test3(i32 %a, i32 %b) nounwind readnone {
entry:
	%tmp0 = add i32 %b, %a
	%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4)
	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
	ret i32 %tmp2
; CHECK: test3:
; CHECK: addl
; CHECK: mull
; CHECK-NEXT: ret
}
it turns out that when ".with.overflow" intrinsics were added to the X86 backend that they were all implemented except umul. This one fell back to the default implementation that did a hi/lo multiply and compared the top. Fix this to check the overflow flag that the 'mul' instruction sets, so we can avoid an explicit test. Now we compile: void *func(long count) { return new int[count]; } into: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] seto %cl ## encoding: [0x0f,0x90,0xc1] testb %cl, %cl ## encoding: [0x84,0xc9] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL instead of: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] testq %rdx, %rdx ## encoding: [0x48,0x85,0xd2] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL Other than the silly seto+test, this is using the o bit directly, so it's going in the right direction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120935 91177308-0d34-0410-b5e6-96231b3b80d8 2010-12-05 07:30:36 +00:00			`; RUN: llc < %s -march=x86 \| FileCheck %s`
Add some generic expansion logic for SMULO and UMULO. Fixes UMULO support for x86, and UMULO/SMULO for many architectures, including PPC (PR4201), ARM, and Cell. The resulting expansion isn't perfect, but it's not bad. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73477 91177308-0d34-0410-b5e6-96231b3b80d8 2009-06-16 06:58:29 +00:00
			`declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)`
			`define i1 @a(i32 %x) zeroext nounwind {`
			`%res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)`
			`%obil = extractvalue {i32, i1} %res, 1`
			`ret i1 %obil`
it turns out that when ".with.overflow" intrinsics were added to the X86 backend that they were all implemented except umul. This one fell back to the default implementation that did a hi/lo multiply and compared the top. Fix this to check the overflow flag that the 'mul' instruction sets, so we can avoid an explicit test. Now we compile: void *func(long count) { return new int[count]; } into: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] seto %cl ## encoding: [0x0f,0x90,0xc1] testb %cl, %cl ## encoding: [0x84,0xc9] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL instead of: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] testq %rdx, %rdx ## encoding: [0x48,0x85,0xd2] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL Other than the silly seto+test, this is using the o bit directly, so it's going in the right direction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120935 91177308-0d34-0410-b5e6-96231b3b80d8 2010-12-05 07:30:36 +00:00
			`; CHECK: a:`
			`; CHECK: mull`
			`; CHECK: seto %al`
As Dan pointed out, movzbl, movsbl, and friends are nicer than their alias (movzx/movsx) because they give more information. Revert that part of the patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129498 91177308-0d34-0410-b5e6-96231b3b80d8 2011-04-14 01:46:37 +00:00			`; CHECK: movzbl %al, %eax`
it turns out that when ".with.overflow" intrinsics were added to the X86 backend that they were all implemented except umul. This one fell back to the default implementation that did a hi/lo multiply and compared the top. Fix this to check the overflow flag that the 'mul' instruction sets, so we can avoid an explicit test. Now we compile: void *func(long count) { return new int[count]; } into: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] seto %cl ## encoding: [0x0f,0x90,0xc1] testb %cl, %cl ## encoding: [0x84,0xc9] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL instead of: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] testq %rdx, %rdx ## encoding: [0x48,0x85,0xd2] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL Other than the silly seto+test, this is using the o bit directly, so it's going in the right direction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120935 91177308-0d34-0410-b5e6-96231b3b80d8 2010-12-05 07:30:36 +00:00			`; CHECK: ret`
Add some generic expansion logic for SMULO and UMULO. Fixes UMULO support for x86, and UMULO/SMULO for many architectures, including PPC (PR4201), ARM, and Cell. The resulting expansion isn't perfect, but it's not bad. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73477 91177308-0d34-0410-b5e6-96231b3b80d8 2009-06-16 06:58:29 +00:00			`}`
Implement mulo x, 2 -> addo x, x in DAGCombiner. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@131800 91177308-0d34-0410-b5e6-96231b3b80d8 2011-05-21 18:31:55 +00:00
			`define i32 @test2(i32 %a, i32 %b) nounwind readnone {`
			`entry:`
			`%tmp0 = add i32 %b, %a`
			`%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2)`
			`%tmp2 = extractvalue { i32, i1 } %tmp1, 0`
			`ret i32 %tmp2`
			`; CHECK: test2:`
			`; CHECK: addl`
			`; CHECK-NEXT: addl`
			`; CHECK-NEXT: ret`
			`}`

			`define i32 @test3(i32 %a, i32 %b) nounwind readnone {`
			`entry:`
			`%tmp0 = add i32 %b, %a`
			`%tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4)`
			`%tmp2 = extractvalue { i32, i1 } %tmp1, 0`
			`ret i32 %tmp2`
			`; CHECK: test3:`
			`; CHECK: addl`
			`; CHECK: mull`
			`; CHECK-NEXT: ret`
			`}`