mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-25 19:29:53 +00:00
The bug manifests when there are two loads and two stores chained as follows in a DAG, (ld v3f32) -> (st f32) -> (ld v3f32) -> (st f32) and the stores' values are extracted from the preceding vector loads. MergeConsecutiveStores would replace the first store in the chain with the merged vector store, which would create a cycle between the merged store node and the last load node that appears in the chain. This commits fixes the bug by replacing the last store in the chain instead. rdar://problem/20275084 Differential Revision: http://reviews.llvm.org/D8849 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234430 91177308-0d34-0410-b5e6-96231b3b80d8
53 lines
1.6 KiB
LLVM
53 lines
1.6 KiB
LLVM
; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win64 | FileCheck %s
|
|
|
|
; CHECK: merge_stores_can
|
|
; CHECK: callq foo
|
|
; CHECK: xorps %xmm0, %xmm0
|
|
; CHECK-NEXT: movups %xmm0
|
|
; CHECK: callq foo
|
|
; CHECK: ret
|
|
declare i32 @foo([10 x i32]* )
|
|
|
|
define i32 @merge_stores_can() nounwind ssp {
|
|
%object1 = alloca [10 x i32]
|
|
|
|
%ret0 = call i32 @foo([10 x i32]* %object1) nounwind
|
|
|
|
%O1_1 = getelementptr [10 x i32], [10 x i32]* %object1, i64 0, i32 1
|
|
%O1_2 = getelementptr [10 x i32], [10 x i32]* %object1, i64 0, i32 2
|
|
%O1_3 = getelementptr [10 x i32], [10 x i32]* %object1, i64 0, i32 3
|
|
%O1_4 = getelementptr [10 x i32], [10 x i32]* %object1, i64 0, i32 4
|
|
%ld_ptr = getelementptr [10 x i32], [10 x i32]* %object1, i64 0, i32 9
|
|
|
|
store i32 0, i32* %O1_1
|
|
store i32 0, i32* %O1_2
|
|
%ret = load i32, i32* %ld_ptr ; <--- does not alias.
|
|
store i32 0, i32* %O1_3
|
|
store i32 0, i32* %O1_4
|
|
|
|
%ret1 = call i32 @foo([10 x i32]* %object1) nounwind
|
|
|
|
ret i32 %ret
|
|
}
|
|
|
|
; CHECK: merge_stores_cant
|
|
; CHECK-NOT: xorps %xmm0, %xmm0
|
|
; CHECK-NOT: movups %xmm0
|
|
; CHECK: ret
|
|
define i32 @merge_stores_cant([10 x i32]* %in0, [10 x i32]* %in1) nounwind ssp {
|
|
|
|
%O1_1 = getelementptr [10 x i32], [10 x i32]* %in1, i64 0, i32 1
|
|
%O1_2 = getelementptr [10 x i32], [10 x i32]* %in1, i64 0, i32 2
|
|
%O1_3 = getelementptr [10 x i32], [10 x i32]* %in1, i64 0, i32 3
|
|
%O1_4 = getelementptr [10 x i32], [10 x i32]* %in1, i64 0, i32 4
|
|
%ld_ptr = getelementptr [10 x i32], [10 x i32]* %in0, i64 0, i32 2
|
|
|
|
store i32 0, i32* %O1_1
|
|
store i32 0, i32* %O1_2
|
|
%ret = load i32, i32* %ld_ptr ; <--- may alias
|
|
store i32 0, i32* %O1_3
|
|
store i32 0, i32* %O1_4
|
|
|
|
ret i32 %ret
|
|
}
|