2011-01-07 11:35:21 +00:00
|
|
|
; RUN: llc -mtriple=i386-apple-darwin -mcpu=yonah < %s | FileCheck %s
|
2008-08-21 21:00:15 +00:00
|
|
|
|
|
|
|
declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
|
|
|
|
|
2010-04-01 18:19:11 +00:00
|
|
|
define fastcc void @t1() nounwind {
|
2008-08-21 21:00:15 +00:00
|
|
|
entry:
|
2013-07-14 06:24:09 +00:00
|
|
|
; CHECK-LABEL: t1:
|
2013-12-05 05:19:12 +00:00
|
|
|
; CHECK: calll L_memset$stub
|
2011-06-18 06:05:24 +00:00
|
|
|
call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 188, i32 1, i1 false)
|
2010-04-01 06:04:33 +00:00
|
|
|
unreachable
|
2008-08-21 21:00:15 +00:00
|
|
|
}
|
2010-04-01 18:19:11 +00:00
|
|
|
|
|
|
|
define fastcc void @t2(i8 signext %c) nounwind {
|
|
|
|
entry:
|
2013-07-14 06:24:09 +00:00
|
|
|
; CHECK-LABEL: t2:
|
2013-12-05 05:19:12 +00:00
|
|
|
; CHECK: calll L_memset$stub
|
2011-06-18 06:05:24 +00:00
|
|
|
call void @llvm.memset.p0i8.i32(i8* undef, i8 %c, i32 76, i32 1, i1 false)
|
2010-04-01 18:19:11 +00:00
|
|
|
unreachable
|
|
|
|
}
|
Lower the i8 extension in memset to a multiply instead of a potentially long series of shifts and ors.
We could implement a DAGCombine to turn x * 0x0101 back into logic operations
on targets that doesn't support the multiply or it is slow (p4) if someone cares
enough.
Example code:
void test(char *s, int a) {
__builtin_memset(s, a, 4);
}
before:
_test: ## @test
movzbl 8(%esp), %eax
movl %eax, %ecx
shll $8, %ecx
orl %eax, %ecx
movl %ecx, %eax
shll $16, %eax
orl %ecx, %eax
movl 4(%esp), %ecx
movl %eax, 4(%ecx)
movl %eax, (%ecx)
ret
after:
_test: ## @test
movzbl 8(%esp), %eax
imull $16843009, %eax, %eax ## imm = 0x1010101
movl 4(%esp), %ecx
movl %eax, 4(%ecx)
movl %eax, (%ecx)
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122707 91177308-0d34-0410-b5e6-96231b3b80d8
2011-01-02 19:44:58 +00:00
|
|
|
|
|
|
|
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
|
|
|
|
|
|
|
|
define void @t3(i8* nocapture %s, i8 %a) nounwind {
|
|
|
|
entry:
|
|
|
|
tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 8, i32 1, i1 false)
|
|
|
|
ret void
|
2013-07-14 06:24:09 +00:00
|
|
|
; CHECK-LABEL: t3:
|
Lower the i8 extension in memset to a multiply instead of a potentially long series of shifts and ors.
We could implement a DAGCombine to turn x * 0x0101 back into logic operations
on targets that doesn't support the multiply or it is slow (p4) if someone cares
enough.
Example code:
void test(char *s, int a) {
__builtin_memset(s, a, 4);
}
before:
_test: ## @test
movzbl 8(%esp), %eax
movl %eax, %ecx
shll $8, %ecx
orl %eax, %ecx
movl %ecx, %eax
shll $16, %eax
orl %ecx, %eax
movl 4(%esp), %ecx
movl %eax, 4(%ecx)
movl %eax, (%ecx)
ret
after:
_test: ## @test
movzbl 8(%esp), %eax
imull $16843009, %eax, %eax ## imm = 0x1010101
movl 4(%esp), %ecx
movl %eax, 4(%ecx)
movl %eax, (%ecx)
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122707 91177308-0d34-0410-b5e6-96231b3b80d8
2011-01-02 19:44:58 +00:00
|
|
|
; CHECK: imull $16843009
|
|
|
|
}
|
|
|
|
|
2011-01-02 19:57:05 +00:00
|
|
|
define void @t4(i8* nocapture %s, i8 %a) nounwind {
|
|
|
|
entry:
|
|
|
|
tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i32 1, i1 false)
|
|
|
|
ret void
|
2013-07-14 06:24:09 +00:00
|
|
|
; CHECK-LABEL: t4:
|
2011-01-02 19:57:05 +00:00
|
|
|
; CHECK: imull $16843009
|
|
|
|
; CHECK-NOT: imul
|
|
|
|
; CHECK: ret
|
|
|
|
}
|