2011-04-28 18:15:47 +00:00
|
|
|
; RUN: opt < %s -basicaa -gvn -S -die | FileCheck %s
|
2009-09-20 19:03:47 +00:00
|
|
|
|
|
|
|
; 32-bit little endian target.
|
2011-04-28 07:29:08 +00:00
|
|
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
|
2009-09-20 19:03:47 +00:00
|
|
|
|
|
|
|
;; Trivial RLE test.
|
|
|
|
define i32 @test0(i32 %V, i32* %P) {
|
|
|
|
store i32 %V, i32* %P
|
|
|
|
|
|
|
|
%A = load i32* %P
|
|
|
|
ret i32 %A
|
|
|
|
; CHECK: @test0
|
|
|
|
; CHECK: ret i32 %V
|
|
|
|
}
|
|
|
|
|
2009-09-21 17:24:04 +00:00
|
|
|
|
|
|
|
;;===----------------------------------------------------------------------===;;
|
|
|
|
;; Tests for crashers
|
|
|
|
;;===----------------------------------------------------------------------===;;
|
|
|
|
|
|
|
|
;; PR5016
|
|
|
|
define i8 @crash0({i32, i32} %A, {i32, i32}* %P) {
|
|
|
|
store {i32, i32} %A, {i32, i32}* %P
|
|
|
|
%X = bitcast {i32, i32}* %P to i8*
|
|
|
|
%Y = load i8* %X
|
|
|
|
ret i8 %Y
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-09-20 19:03:47 +00:00
|
|
|
;;===----------------------------------------------------------------------===;;
|
|
|
|
;; Store -> Load and Load -> Load forwarding where src and dst are different
|
|
|
|
;; types, but where the base pointer is a must alias.
|
|
|
|
;;===----------------------------------------------------------------------===;;
|
|
|
|
|
|
|
|
;; i32 -> f32 forwarding.
|
|
|
|
define float @coerce_mustalias1(i32 %V, i32* %P) {
|
|
|
|
store i32 %V, i32* %P
|
|
|
|
|
|
|
|
%P2 = bitcast i32* %P to float*
|
|
|
|
|
|
|
|
%A = load float* %P2
|
|
|
|
ret float %A
|
|
|
|
; CHECK: @coerce_mustalias1
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret float
|
|
|
|
}
|
|
|
|
|
|
|
|
;; i32* -> float forwarding.
|
|
|
|
define float @coerce_mustalias2(i32* %V, i32** %P) {
|
|
|
|
store i32* %V, i32** %P
|
|
|
|
|
|
|
|
%P2 = bitcast i32** %P to float*
|
|
|
|
|
|
|
|
%A = load float* %P2
|
|
|
|
ret float %A
|
|
|
|
; CHECK: @coerce_mustalias2
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret float
|
|
|
|
}
|
|
|
|
|
|
|
|
;; float -> i32* forwarding.
|
|
|
|
define i32* @coerce_mustalias3(float %V, float* %P) {
|
|
|
|
store float %V, float* %P
|
|
|
|
|
|
|
|
%P2 = bitcast float* %P to i32**
|
|
|
|
|
|
|
|
%A = load i32** %P2
|
|
|
|
ret i32* %A
|
|
|
|
; CHECK: @coerce_mustalias3
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i32*
|
|
|
|
}
|
|
|
|
|
|
|
|
;; i32 -> f32 load forwarding.
|
|
|
|
define float @coerce_mustalias4(i32* %P, i1 %cond) {
|
|
|
|
%A = load i32* %P
|
2009-09-20 20:09:34 +00:00
|
|
|
|
2009-09-20 19:03:47 +00:00
|
|
|
%P2 = bitcast i32* %P to float*
|
|
|
|
%B = load float* %P2
|
2009-09-20 20:09:34 +00:00
|
|
|
br i1 %cond, label %T, label %F
|
|
|
|
T:
|
2009-09-20 19:03:47 +00:00
|
|
|
ret float %B
|
|
|
|
|
|
|
|
F:
|
|
|
|
%X = bitcast i32 %A to float
|
|
|
|
ret float %X
|
|
|
|
|
|
|
|
; CHECK: @coerce_mustalias4
|
|
|
|
; CHECK: %A = load i32* %P
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret float
|
|
|
|
; CHECK: F:
|
|
|
|
}
|
|
|
|
|
|
|
|
;; i32 -> i8 forwarding
|
|
|
|
define i8 @coerce_mustalias5(i32 %V, i32* %P) {
|
|
|
|
store i32 %V, i32* %P
|
|
|
|
|
|
|
|
%P2 = bitcast i32* %P to i8*
|
|
|
|
|
|
|
|
%A = load i8* %P2
|
|
|
|
ret i8 %A
|
|
|
|
; CHECK: @coerce_mustalias5
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i8
|
|
|
|
}
|
|
|
|
|
|
|
|
;; i64 -> float forwarding
|
|
|
|
define float @coerce_mustalias6(i64 %V, i64* %P) {
|
|
|
|
store i64 %V, i64* %P
|
|
|
|
|
|
|
|
%P2 = bitcast i64* %P to float*
|
|
|
|
|
|
|
|
%A = load float* %P2
|
|
|
|
ret float %A
|
|
|
|
; CHECK: @coerce_mustalias6
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret float
|
|
|
|
}
|
|
|
|
|
|
|
|
;; i64 -> i8* (32-bit) forwarding
|
|
|
|
define i8* @coerce_mustalias7(i64 %V, i64* %P) {
|
|
|
|
store i64 %V, i64* %P
|
|
|
|
|
|
|
|
%P2 = bitcast i64* %P to i8**
|
|
|
|
|
|
|
|
%A = load i8** %P2
|
|
|
|
ret i8* %A
|
|
|
|
; CHECK: @coerce_mustalias7
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i8*
|
|
|
|
}
|
|
|
|
|
2009-12-06 01:57:02 +00:00
|
|
|
; memset -> i16 forwarding.
|
|
|
|
define signext i16 @memset_to_i16_local(i16* %A) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%conv = bitcast i16* %A to i8*
|
2011-06-18 06:05:24 +00:00
|
|
|
tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i32 1, i1 false)
|
2009-12-06 01:57:02 +00:00
|
|
|
%arrayidx = getelementptr inbounds i16* %A, i64 42
|
|
|
|
%tmp2 = load i16* %arrayidx
|
|
|
|
ret i16 %tmp2
|
|
|
|
; CHECK: @memset_to_i16_local
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i16 257
|
|
|
|
}
|
|
|
|
|
|
|
|
; memset -> float forwarding.
|
|
|
|
define float @memset_to_float_local(float* %A, i8 %Val) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%conv = bitcast float* %A to i8* ; <i8*> [#uses=1]
|
2011-06-18 06:05:24 +00:00
|
|
|
tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 %Val, i64 400, i32 1, i1 false)
|
2009-12-06 01:57:02 +00:00
|
|
|
%arrayidx = getelementptr inbounds float* %A, i64 42 ; <float*> [#uses=1]
|
|
|
|
%tmp2 = load float* %arrayidx ; <float> [#uses=1]
|
|
|
|
ret float %tmp2
|
|
|
|
; CHECK: @memset_to_float_local
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: zext
|
|
|
|
; CHECK-NEXT: shl
|
|
|
|
; CHECK-NEXT: or
|
|
|
|
; CHECK-NEXT: shl
|
|
|
|
; CHECK-NEXT: or
|
|
|
|
; CHECK-NEXT: bitcast
|
|
|
|
; CHECK-NEXT: ret float
|
|
|
|
}
|
|
|
|
|
2009-12-06 04:54:31 +00:00
|
|
|
;; non-local memset -> i16 load forwarding.
|
|
|
|
define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) {
|
|
|
|
%P3 = bitcast i16* %P to i8*
|
|
|
|
br i1 %cond, label %T, label %F
|
|
|
|
T:
|
2011-06-18 06:05:24 +00:00
|
|
|
tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 1, i64 400, i32 1, i1 false)
|
2009-12-06 04:54:31 +00:00
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
F:
|
2011-06-18 06:05:24 +00:00
|
|
|
tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 2, i64 400, i32 1, i1 false)
|
2009-12-06 04:54:31 +00:00
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
Cont:
|
|
|
|
%P2 = getelementptr i16* %P, i32 4
|
|
|
|
%A = load i16* %P2
|
|
|
|
ret i16 %A
|
|
|
|
|
|
|
|
; CHECK: @memset_to_i16_nonlocal0
|
|
|
|
; CHECK: Cont:
|
|
|
|
; CHECK-NEXT: %A = phi i16 [ 514, %F ], [ 257, %T ]
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i16 %A
|
|
|
|
}
|
|
|
|
|
2009-12-06 05:29:56 +00:00
|
|
|
@GCst = constant {i32, float, i32 } { i32 42, float 14., i32 97 }
|
|
|
|
|
|
|
|
; memset -> float forwarding.
|
|
|
|
define float @memcpy_to_float_local(float* %A) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%conv = bitcast float* %A to i8* ; <i8*> [#uses=1]
|
2011-06-18 06:05:24 +00:00
|
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i32 1, i1 false)
|
2009-12-06 05:29:56 +00:00
|
|
|
%arrayidx = getelementptr inbounds float* %A, i64 1 ; <float*> [#uses=1]
|
|
|
|
%tmp2 = load float* %arrayidx ; <float> [#uses=1]
|
|
|
|
ret float %tmp2
|
|
|
|
; CHECK: @memcpy_to_float_local
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret float 1.400000e+01
|
|
|
|
}
|
|
|
|
|
2009-12-06 04:54:31 +00:00
|
|
|
|
2009-12-06 01:57:02 +00:00
|
|
|
|
2009-09-20 20:09:34 +00:00
|
|
|
;; non-local i32/float -> i8 load forwarding.
|
|
|
|
define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
|
|
|
|
%P2 = bitcast i32* %P to float*
|
|
|
|
%P3 = bitcast i32* %P to i8*
|
|
|
|
br i1 %cond, label %T, label %F
|
|
|
|
T:
|
|
|
|
store i32 42, i32* %P
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
F:
|
|
|
|
store float 1.0, float* %P2
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
Cont:
|
|
|
|
%A = load i8* %P3
|
|
|
|
ret i8 %A
|
|
|
|
|
|
|
|
; CHECK: @coerce_mustalias_nonlocal0
|
|
|
|
; CHECK: Cont:
|
|
|
|
; CHECK: %A = phi i8 [
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i8 %A
|
|
|
|
}
|
|
|
|
|
2009-12-06 04:54:31 +00:00
|
|
|
|
Improve GVN to be able to forward substitute a small load
from a piece of a large store when both are in the same block.
This allows clang to compile the testcase in PR4216 to this code:
_test_bitfield:
movl 4(%esp), %eax
movl %eax, %ecx
andl $-65536, %ecx
orl $32962, %eax
andl $40186, %eax
orl %ecx, %eax
ret
This is not ideal, but is a whole lot better than the code produced
by llvm-gcc:
_test_bitfield:
movw $-32574, %ax
orw 4(%esp), %ax
andw $-25350, %ax
movw %ax, 4(%esp)
movw 7(%esp), %cx
shlw $8, %cx
movzbl 6(%esp), %edx
orw %cx, %dx
movzwl %dx, %ecx
shll $16, %ecx
movzwl %ax, %eax
orl %ecx, %eax
ret
and dramatically better than that produced by gcc 4.2:
_test_bitfield:
pushl %ebx
call L3
"L00000000001$pb":
L3:
popl %ebx
movl 8(%esp), %eax
leal 0(,%eax,4), %edx
sarb $7, %dl
movl %eax, %ecx
andl $7168, %ecx
andl $-7201, %ebx
movzbl %dl, %edx
andl $1, %edx
sall $5, %edx
orl %ecx, %ebx
orl %edx, %ebx
andl $24, %eax
andl $-58336, %ebx
orl %eax, %ebx
orl $32962, %ebx
movl %ebx, %eax
popl %ebx
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82439 91177308-0d34-0410-b5e6-96231b3b80d8
2009-09-21 05:57:11 +00:00
|
|
|
;; non-local i32/float -> i8 load forwarding. This also tests that the "P3"
|
|
|
|
;; bitcast equivalence can be properly phi translated.
|
|
|
|
define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) {
|
|
|
|
%P2 = bitcast i32* %P to float*
|
|
|
|
br i1 %cond, label %T, label %F
|
|
|
|
T:
|
|
|
|
store i32 42, i32* %P
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
F:
|
|
|
|
store float 1.0, float* %P2
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
Cont:
|
|
|
|
%P3 = bitcast i32* %P to i8*
|
|
|
|
%A = load i8* %P3
|
|
|
|
ret i8 %A
|
|
|
|
|
|
|
|
;; FIXME: This is disabled because this caused a miscompile in the llvm-gcc
|
|
|
|
;; bootstrap, see r82411
|
|
|
|
;
|
|
|
|
; HECK: @coerce_mustalias_nonlocal1
|
|
|
|
; HECK: Cont:
|
|
|
|
; HECK: %A = phi i8 [
|
|
|
|
; HECK-NOT: load
|
|
|
|
; HECK: ret i8 %A
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-09-20 20:09:34 +00:00
|
|
|
;; non-local i32 -> i8 partial redundancy load forwarding.
|
|
|
|
define i8 @coerce_mustalias_pre0(i32* %P, i1 %cond) {
|
|
|
|
%P3 = bitcast i32* %P to i8*
|
|
|
|
br i1 %cond, label %T, label %F
|
|
|
|
T:
|
|
|
|
store i32 42, i32* %P
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
F:
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
Cont:
|
|
|
|
%A = load i8* %P3
|
|
|
|
ret i8 %A
|
|
|
|
|
|
|
|
; CHECK: @coerce_mustalias_pre0
|
|
|
|
; CHECK: F:
|
|
|
|
; CHECK: load i8* %P3
|
|
|
|
; CHECK: Cont:
|
|
|
|
; CHECK: %A = phi i8 [
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i8 %A
|
|
|
|
}
|
|
|
|
|
Improve GVN to be able to forward substitute a small load
from a piece of a large store when both are in the same block.
This allows clang to compile the testcase in PR4216 to this code:
_test_bitfield:
movl 4(%esp), %eax
movl %eax, %ecx
andl $-65536, %ecx
orl $32962, %eax
andl $40186, %eax
orl %ecx, %eax
ret
This is not ideal, but is a whole lot better than the code produced
by llvm-gcc:
_test_bitfield:
movw $-32574, %ax
orw 4(%esp), %ax
andw $-25350, %ax
movw %ax, 4(%esp)
movw 7(%esp), %cx
shlw $8, %cx
movzbl 6(%esp), %edx
orw %cx, %dx
movzwl %dx, %ecx
shll $16, %ecx
movzwl %ax, %eax
orl %ecx, %eax
ret
and dramatically better than that produced by gcc 4.2:
_test_bitfield:
pushl %ebx
call L3
"L00000000001$pb":
L3:
popl %ebx
movl 8(%esp), %eax
leal 0(,%eax,4), %edx
sarb $7, %dl
movl %eax, %ecx
andl $7168, %ecx
andl $-7201, %ebx
movzbl %dl, %edx
andl $1, %edx
sall $5, %edx
orl %ecx, %ebx
orl %edx, %ebx
andl $24, %eax
andl $-58336, %ebx
orl %eax, %ebx
orl $32962, %ebx
movl %ebx, %eax
popl %ebx
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82439 91177308-0d34-0410-b5e6-96231b3b80d8
2009-09-21 05:57:11 +00:00
|
|
|
;;===----------------------------------------------------------------------===;;
|
|
|
|
;; Store -> Load and Load -> Load forwarding where src and dst are different
|
|
|
|
;; types, and the reload is an offset from the store pointer.
|
|
|
|
;;===----------------------------------------------------------------------===;;
|
|
|
|
|
2009-09-21 06:48:08 +00:00
|
|
|
;; i32 -> i8 forwarding.
|
2009-09-21 05:57:47 +00:00
|
|
|
;; PR4216
|
Improve GVN to be able to forward substitute a small load
from a piece of a large store when both are in the same block.
This allows clang to compile the testcase in PR4216 to this code:
_test_bitfield:
movl 4(%esp), %eax
movl %eax, %ecx
andl $-65536, %ecx
orl $32962, %eax
andl $40186, %eax
orl %ecx, %eax
ret
This is not ideal, but is a whole lot better than the code produced
by llvm-gcc:
_test_bitfield:
movw $-32574, %ax
orw 4(%esp), %ax
andw $-25350, %ax
movw %ax, 4(%esp)
movw 7(%esp), %cx
shlw $8, %cx
movzbl 6(%esp), %edx
orw %cx, %dx
movzwl %dx, %ecx
shll $16, %ecx
movzwl %ax, %eax
orl %ecx, %eax
ret
and dramatically better than that produced by gcc 4.2:
_test_bitfield:
pushl %ebx
call L3
"L00000000001$pb":
L3:
popl %ebx
movl 8(%esp), %eax
leal 0(,%eax,4), %edx
sarb $7, %dl
movl %eax, %ecx
andl $7168, %ecx
andl $-7201, %ebx
movzbl %dl, %edx
andl $1, %edx
sall $5, %edx
orl %ecx, %ebx
orl %edx, %ebx
andl $24, %eax
andl $-58336, %ebx
orl %eax, %ebx
orl $32962, %ebx
movl %ebx, %eax
popl %ebx
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82439 91177308-0d34-0410-b5e6-96231b3b80d8
2009-09-21 05:57:11 +00:00
|
|
|
define i8 @coerce_offset0(i32 %V, i32* %P) {
|
|
|
|
store i32 %V, i32* %P
|
|
|
|
|
|
|
|
%P2 = bitcast i32* %P to i8*
|
|
|
|
%P3 = getelementptr i8* %P2, i32 2
|
|
|
|
|
|
|
|
%A = load i8* %P3
|
|
|
|
ret i8 %A
|
|
|
|
; CHECK: @coerce_offset0
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i8
|
|
|
|
}
|
|
|
|
|
2009-09-21 06:48:08 +00:00
|
|
|
;; non-local i32/float -> i8 load forwarding.
|
|
|
|
define i8 @coerce_offset_nonlocal0(i32* %P, i1 %cond) {
|
|
|
|
%P2 = bitcast i32* %P to float*
|
|
|
|
%P3 = bitcast i32* %P to i8*
|
|
|
|
%P4 = getelementptr i8* %P3, i32 2
|
|
|
|
br i1 %cond, label %T, label %F
|
|
|
|
T:
|
|
|
|
store i32 42, i32* %P
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
F:
|
|
|
|
store float 1.0, float* %P2
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
Cont:
|
|
|
|
%A = load i8* %P4
|
|
|
|
ret i8 %A
|
|
|
|
|
|
|
|
; CHECK: @coerce_offset_nonlocal0
|
|
|
|
; CHECK: Cont:
|
|
|
|
; CHECK: %A = phi i8 [
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i8 %A
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
;; non-local i32 -> i8 partial redundancy load forwarding.
|
|
|
|
define i8 @coerce_offset_pre0(i32* %P, i1 %cond) {
|
|
|
|
%P3 = bitcast i32* %P to i8*
|
|
|
|
%P4 = getelementptr i8* %P3, i32 2
|
|
|
|
br i1 %cond, label %T, label %F
|
|
|
|
T:
|
|
|
|
store i32 42, i32* %P
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
F:
|
|
|
|
br label %Cont
|
|
|
|
|
|
|
|
Cont:
|
|
|
|
%A = load i8* %P4
|
|
|
|
ret i8 %A
|
|
|
|
|
|
|
|
; CHECK: @coerce_offset_pre0
|
|
|
|
; CHECK: F:
|
|
|
|
; CHECK: load i8* %P4
|
|
|
|
; CHECK: Cont:
|
|
|
|
; CHECK: %A = phi i8 [
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i8 %A
|
|
|
|
}
|
Improve GVN to be able to forward substitute a small load
from a piece of a large store when both are in the same block.
This allows clang to compile the testcase in PR4216 to this code:
_test_bitfield:
movl 4(%esp), %eax
movl %eax, %ecx
andl $-65536, %ecx
orl $32962, %eax
andl $40186, %eax
orl %ecx, %eax
ret
This is not ideal, but is a whole lot better than the code produced
by llvm-gcc:
_test_bitfield:
movw $-32574, %ax
orw 4(%esp), %ax
andw $-25350, %ax
movw %ax, 4(%esp)
movw 7(%esp), %cx
shlw $8, %cx
movzbl 6(%esp), %edx
orw %cx, %dx
movzwl %dx, %ecx
shll $16, %ecx
movzwl %ax, %eax
orl %ecx, %eax
ret
and dramatically better than that produced by gcc 4.2:
_test_bitfield:
pushl %ebx
call L3
"L00000000001$pb":
L3:
popl %ebx
movl 8(%esp), %eax
leal 0(,%eax,4), %edx
sarb $7, %dl
movl %eax, %ecx
andl $7168, %ecx
andl $-7201, %ebx
movzbl %dl, %edx
andl $1, %edx
sall $5, %edx
orl %ecx, %ebx
orl %edx, %ebx
andl $24, %eax
andl $-58336, %ebx
orl %eax, %ebx
orl $32962, %ebx
movl %ebx, %eax
popl %ebx
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82439 91177308-0d34-0410-b5e6-96231b3b80d8
2009-09-21 05:57:11 +00:00
|
|
|
|
2009-12-06 01:47:24 +00:00
|
|
|
define i32 @chained_load(i32** %p) {
|
|
|
|
block1:
|
2011-05-22 07:02:43 +00:00
|
|
|
%A = alloca i32*
|
|
|
|
|
2009-12-06 01:47:24 +00:00
|
|
|
%z = load i32** %p
|
2011-05-22 07:02:43 +00:00
|
|
|
store i32* %z, i32** %A
|
|
|
|
br i1 true, label %block2, label %block3
|
2009-12-06 01:47:24 +00:00
|
|
|
|
|
|
|
block2:
|
|
|
|
%a = load i32** %p
|
|
|
|
br label %block4
|
|
|
|
|
|
|
|
block3:
|
|
|
|
%b = load i32** %p
|
|
|
|
br label %block4
|
|
|
|
|
|
|
|
block4:
|
|
|
|
%c = load i32** %p
|
|
|
|
%d = load i32* %c
|
|
|
|
ret i32 %d
|
|
|
|
|
|
|
|
; CHECK: @chained_load
|
|
|
|
; CHECK: %z = load i32** %p
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: %d = load i32* %z
|
|
|
|
; CHECK-NEXT: ret i32 %d
|
|
|
|
}
|
|
|
|
|
Improve GVN to be able to forward substitute a small load
from a piece of a large store when both are in the same block.
This allows clang to compile the testcase in PR4216 to this code:
_test_bitfield:
movl 4(%esp), %eax
movl %eax, %ecx
andl $-65536, %ecx
orl $32962, %eax
andl $40186, %eax
orl %ecx, %eax
ret
This is not ideal, but is a whole lot better than the code produced
by llvm-gcc:
_test_bitfield:
movw $-32574, %ax
orw 4(%esp), %ax
andw $-25350, %ax
movw %ax, 4(%esp)
movw 7(%esp), %cx
shlw $8, %cx
movzbl 6(%esp), %edx
orw %cx, %dx
movzwl %dx, %ecx
shll $16, %ecx
movzwl %ax, %eax
orl %ecx, %eax
ret
and dramatically better than that produced by gcc 4.2:
_test_bitfield:
pushl %ebx
call L3
"L00000000001$pb":
L3:
popl %ebx
movl 8(%esp), %eax
leal 0(,%eax,4), %edx
sarb $7, %dl
movl %eax, %ecx
andl $7168, %ecx
andl $-7201, %ebx
movzbl %dl, %edx
andl $1, %edx
sall $5, %edx
orl %ecx, %ebx
orl %edx, %ebx
andl $24, %eax
andl $-58336, %ebx
orl %eax, %ebx
orl $32962, %ebx
movl %ebx, %eax
popl %ebx
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82439 91177308-0d34-0410-b5e6-96231b3b80d8
2009-09-21 05:57:11 +00:00
|
|
|
|
2009-12-04 02:12:12 +00:00
|
|
|
declare i1 @cond() readonly
|
|
|
|
declare i1 @cond2() readonly
|
|
|
|
|
|
|
|
define i32 @phi_trans2() {
|
2009-12-09 01:59:31 +00:00
|
|
|
; CHECK: @phi_trans2
|
2009-12-04 02:12:12 +00:00
|
|
|
entry:
|
|
|
|
%P = alloca i32, i32 400
|
|
|
|
br label %F1
|
|
|
|
|
|
|
|
F1:
|
|
|
|
%A = phi i32 [1, %entry], [2, %F]
|
|
|
|
%cond2 = call i1 @cond()
|
|
|
|
br i1 %cond2, label %T1, label %TY
|
|
|
|
|
|
|
|
T1:
|
|
|
|
%P2 = getelementptr i32* %P, i32 %A
|
|
|
|
%x = load i32* %P2
|
|
|
|
%cond = call i1 @cond2()
|
|
|
|
br i1 %cond, label %TX, label %F
|
|
|
|
|
|
|
|
F:
|
|
|
|
%P3 = getelementptr i32* %P, i32 2
|
|
|
|
store i32 17, i32* %P3
|
|
|
|
|
|
|
|
store i32 42, i32* %P2 ; Provides "P[A]".
|
|
|
|
br label %F1
|
|
|
|
|
|
|
|
TX:
|
2009-12-09 01:59:31 +00:00
|
|
|
; This load should not be compiled to 'ret i32 42'. An overly clever
|
|
|
|
; implementation of GVN would see that we're returning 17 if the loop
|
|
|
|
; executes once or 42 if it executes more than that, but we'd have to do
|
|
|
|
; loop restructuring to expose this, and GVN shouldn't do this sort of CFG
|
|
|
|
; transformation.
|
|
|
|
|
|
|
|
; CHECK: TX:
|
|
|
|
; CHECK: ret i32 %x
|
|
|
|
ret i32 %x
|
2009-12-04 02:12:12 +00:00
|
|
|
TY:
|
|
|
|
ret i32 0
|
|
|
|
}
|
|
|
|
|
2009-12-09 01:59:31 +00:00
|
|
|
define i32 @phi_trans3(i32* %p) {
|
|
|
|
; CHECK: @phi_trans3
|
|
|
|
block1:
|
|
|
|
br i1 true, label %block2, label %block3
|
|
|
|
|
|
|
|
block2:
|
|
|
|
store i32 87, i32* %p
|
|
|
|
br label %block4
|
|
|
|
|
|
|
|
block3:
|
|
|
|
%p2 = getelementptr i32* %p, i32 43
|
|
|
|
store i32 97, i32* %p2
|
|
|
|
br label %block4
|
|
|
|
|
|
|
|
block4:
|
|
|
|
%A = phi i32 [-1, %block2], [42, %block3]
|
|
|
|
br i1 true, label %block5, label %exit
|
|
|
|
|
|
|
|
; CHECK: block4:
|
|
|
|
; CHECK-NEXT: %D = phi i32 [ 87, %block2 ], [ 97, %block3 ]
|
|
|
|
; CHECK-NOT: load
|
|
|
|
|
|
|
|
block5:
|
|
|
|
%B = add i32 %A, 1
|
|
|
|
br i1 true, label %block6, label %exit
|
|
|
|
|
|
|
|
block6:
|
|
|
|
%C = getelementptr i32* %p, i32 %B
|
|
|
|
br i1 true, label %block7, label %exit
|
|
|
|
|
|
|
|
block7:
|
|
|
|
%D = load i32* %C
|
|
|
|
ret i32 %D
|
|
|
|
|
|
|
|
; CHECK: block7:
|
|
|
|
; CHECK-NEXT: ret i32 %D
|
|
|
|
|
|
|
|
exit:
|
|
|
|
ret i32 -1
|
|
|
|
}
|
2009-12-04 02:12:12 +00:00
|
|
|
|
2009-12-09 02:43:05 +00:00
|
|
|
define i8 @phi_trans4(i8* %p) {
|
|
|
|
; CHECK: @phi_trans4
|
|
|
|
entry:
|
2009-12-09 18:21:46 +00:00
|
|
|
%X3 = getelementptr i8* %p, i32 192
|
|
|
|
store i8 192, i8* %X3
|
|
|
|
|
2009-12-09 02:43:05 +00:00
|
|
|
%X = getelementptr i8* %p, i32 4
|
|
|
|
%Y = load i8* %X
|
|
|
|
br label %loop
|
|
|
|
|
|
|
|
loop:
|
|
|
|
%i = phi i32 [4, %entry], [192, %loop]
|
|
|
|
%X2 = getelementptr i8* %p, i32 %i
|
2009-12-09 18:21:46 +00:00
|
|
|
%Y2 = load i8* %X2
|
|
|
|
|
|
|
|
; CHECK: loop:
|
|
|
|
; CHECK-NEXT: %Y2 = phi i8 [ %Y, %entry ], [ 0, %loop ]
|
|
|
|
; CHECK-NOT: load i8
|
2009-12-09 02:43:05 +00:00
|
|
|
|
|
|
|
%cond = call i1 @cond2()
|
|
|
|
|
2009-12-09 18:21:46 +00:00
|
|
|
%Z = bitcast i8 *%X3 to i32*
|
2009-12-09 02:43:05 +00:00
|
|
|
store i32 0, i32* %Z
|
|
|
|
br i1 %cond, label %loop, label %out
|
|
|
|
|
|
|
|
out:
|
|
|
|
%R = add i8 %Y, %Y2
|
|
|
|
ret i8 %R
|
|
|
|
}
|
|
|
|
|
|
|
|
define i8 @phi_trans5(i8* %p) {
|
|
|
|
; CHECK: @phi_trans5
|
|
|
|
entry:
|
2009-12-09 18:21:46 +00:00
|
|
|
|
2009-12-09 02:43:05 +00:00
|
|
|
%X4 = getelementptr i8* %p, i32 2
|
|
|
|
store i8 19, i8* %X4
|
|
|
|
|
|
|
|
%X = getelementptr i8* %p, i32 4
|
|
|
|
%Y = load i8* %X
|
|
|
|
br label %loop
|
|
|
|
|
|
|
|
loop:
|
|
|
|
%i = phi i32 [4, %entry], [3, %cont]
|
|
|
|
%X2 = getelementptr i8* %p, i32 %i
|
2009-12-09 18:21:46 +00:00
|
|
|
%Y2 = load i8* %X2 ; Ensure this load is not being incorrectly replaced.
|
2009-12-09 02:43:05 +00:00
|
|
|
%cond = call i1 @cond2()
|
|
|
|
br i1 %cond, label %cont, label %out
|
|
|
|
|
|
|
|
cont:
|
|
|
|
%Z = getelementptr i8* %X2, i32 -1
|
|
|
|
%Z2 = bitcast i8 *%Z to i32*
|
|
|
|
store i32 50462976, i32* %Z2 ;; (1 << 8) | (2 << 16) | (3 << 24)
|
2009-12-09 18:21:46 +00:00
|
|
|
|
|
|
|
|
|
|
|
; CHECK: store i32
|
|
|
|
; CHECK-NEXT: getelementptr i8* %p, i32 3
|
|
|
|
; CHECK-NEXT: load i8*
|
2009-12-09 02:43:05 +00:00
|
|
|
br label %loop
|
|
|
|
|
|
|
|
out:
|
|
|
|
%R = add i8 %Y, %Y2
|
|
|
|
ret i8 %R
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-03-25 05:58:19 +00:00
|
|
|
; PR6642
|
|
|
|
define i32 @memset_to_load() nounwind readnone {
|
|
|
|
entry:
|
|
|
|
%x = alloca [256 x i32], align 4 ; <[256 x i32]*> [#uses=2]
|
|
|
|
%tmp = bitcast [256 x i32]* %x to i8* ; <i8*> [#uses=1]
|
2011-06-18 06:05:24 +00:00
|
|
|
call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 1024, i32 4, i1 false)
|
2010-03-25 05:58:19 +00:00
|
|
|
%arraydecay = getelementptr inbounds [256 x i32]* %x, i32 0, i32 0 ; <i32*>
|
|
|
|
%tmp1 = load i32* %arraydecay ; <i32> [#uses=1]
|
|
|
|
ret i32 %tmp1
|
|
|
|
; CHECK: @memset_to_load
|
|
|
|
; CHECK: ret i32 0
|
|
|
|
}
|
2009-12-09 02:43:05 +00:00
|
|
|
|
2011-04-26 01:21:15 +00:00
|
|
|
|
|
|
|
;;===----------------------------------------------------------------------===;;
|
|
|
|
;; Load -> Load forwarding in partial alias case.
|
|
|
|
;;===----------------------------------------------------------------------===;;
|
|
|
|
|
|
|
|
define i32 @load_load_partial_alias(i8* %P) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%0 = bitcast i8* %P to i32*
|
|
|
|
%tmp2 = load i32* %0
|
|
|
|
%add.ptr = getelementptr inbounds i8* %P, i64 1
|
|
|
|
%tmp5 = load i8* %add.ptr
|
|
|
|
%conv = zext i8 %tmp5 to i32
|
|
|
|
%add = add nsw i32 %tmp2, %conv
|
|
|
|
ret i32 %add
|
|
|
|
|
2011-06-04 06:48:50 +00:00
|
|
|
; TEMPORARILYDISABLED: @load_load_partial_alias
|
|
|
|
; TEMPORARILYDISABLED: load i32*
|
|
|
|
; TEMPORARILYDISABLED-NOT: load
|
|
|
|
; TEMPORARILYDISABLED: lshr i32 {{.*}}, 8
|
|
|
|
; TEMPORARILYDISABLED-NOT: load
|
|
|
|
; TEMPORARILYDISABLED: trunc i32 {{.*}} to i8
|
|
|
|
; TEMPORARILYDISABLED-NOT: load
|
|
|
|
; TEMPORARILYDISABLED: ret i32
|
2011-04-26 01:21:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-04-26 17:41:02 +00:00
|
|
|
; Cross block partial alias case.
|
|
|
|
define i32 @load_load_partial_alias_cross_block(i8* %P) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%xx = bitcast i8* %P to i32*
|
|
|
|
%x1 = load i32* %xx, align 4
|
|
|
|
%cmp = icmp eq i32 %x1, 127
|
|
|
|
br i1 %cmp, label %land.lhs.true, label %if.end
|
|
|
|
|
|
|
|
land.lhs.true: ; preds = %entry
|
|
|
|
%arrayidx4 = getelementptr inbounds i8* %P, i64 1
|
|
|
|
%tmp5 = load i8* %arrayidx4, align 1
|
|
|
|
%conv6 = zext i8 %tmp5 to i32
|
|
|
|
ret i32 %conv6
|
|
|
|
|
|
|
|
if.end:
|
|
|
|
ret i32 52
|
2011-06-04 06:48:50 +00:00
|
|
|
; TEMPORARILY_DISABLED: @load_load_partial_alias_cross_block
|
|
|
|
; TEMPORARILY_DISABLED: land.lhs.true:
|
|
|
|
; TEMPORARILY_DISABLED-NOT: load i8
|
|
|
|
; TEMPORARILY_DISABLED: ret i32 %conv6
|
2011-04-26 17:41:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-04-28 07:29:08 +00:00
|
|
|
;;===----------------------------------------------------------------------===;;
|
|
|
|
;; Load Widening
|
|
|
|
;;===----------------------------------------------------------------------===;;
|
|
|
|
|
2011-04-28 18:15:47 +00:00
|
|
|
%widening1 = type { i32, i8, i8, i8, i8 }
|
2011-04-28 07:29:08 +00:00
|
|
|
|
|
|
|
@f = global %widening1 zeroinitializer, align 4
|
|
|
|
|
2011-04-28 18:15:47 +00:00
|
|
|
define i32 @test_widening1(i8* %P) nounwind ssp noredzone {
|
2011-04-28 07:29:08 +00:00
|
|
|
entry:
|
|
|
|
%tmp = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 1), align 4
|
|
|
|
%conv = zext i8 %tmp to i32
|
|
|
|
%tmp1 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 2), align 1
|
|
|
|
%conv2 = zext i8 %tmp1 to i32
|
|
|
|
%add = add nsw i32 %conv, %conv2
|
|
|
|
ret i32 %add
|
|
|
|
; CHECK: @test_widening1
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: load i16*
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK-ret i32
|
|
|
|
}
|
|
|
|
|
2011-04-28 18:15:47 +00:00
|
|
|
define i32 @test_widening2() nounwind ssp noredzone {
|
|
|
|
entry:
|
|
|
|
%tmp = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 1), align 4
|
|
|
|
%conv = zext i8 %tmp to i32
|
|
|
|
%tmp1 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 2), align 1
|
|
|
|
%conv2 = zext i8 %tmp1 to i32
|
|
|
|
%add = add nsw i32 %conv, %conv2
|
|
|
|
|
|
|
|
%tmp2 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 3), align 2
|
|
|
|
%conv3 = zext i8 %tmp2 to i32
|
|
|
|
%add2 = add nsw i32 %add, %conv3
|
|
|
|
|
|
|
|
%tmp3 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 4), align 1
|
|
|
|
%conv4 = zext i8 %tmp3 to i32
|
|
|
|
%add3 = add nsw i32 %add2, %conv3
|
|
|
|
|
|
|
|
ret i32 %add3
|
|
|
|
; CHECK: @test_widening2
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: load i32*
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK-ret i32
|
|
|
|
}
|
2011-04-26 01:21:15 +00:00
|
|
|
|
2011-06-18 06:05:24 +00:00
|
|
|
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
|
|
|
|
|
|
|
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
|
|
|
|
|