mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 20:29:48 +00:00
55a060641f
This fixes a subtle issue that was introduced in r205153. When reusing a store for the extractelement expansion (to load directly from it, inserting of going through the stack), later stores to the same location might have overwritten the data we were expecting to extract from. To fix that, we need to explicitly replace the chain going out of the reused store, so that later stores also have an explicit dependency on the generated element-extracting loads, and can't clobber them. rdar://20066785 Differential Revision: http://reviews.llvm.org/D8180 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231721 91177308-0d34-0410-b5e6-96231b3b80d8
58 lines
2.1 KiB
LLVM
58 lines
2.1 KiB
LLVM
; RUN: llc < %s -mtriple i386-apple-darwin -mcpu=yonah | FileCheck %s
|
|
|
|
target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
|
|
|
|
; Make sure we don't break load/store ordering when turning an extractelement
|
|
; into loads, off the stack or a previous store.
|
|
; Be very explicit about the ordering/stack offsets.
|
|
|
|
; CHECK-LABEL: test_extractelement_legalization_storereuse:
|
|
; CHECK: # BB#0
|
|
; CHECK-NEXT: pushl %ebx
|
|
; CHECK-NEXT: pushl %edi
|
|
; CHECK-NEXT: pushl %esi
|
|
; CHECK-NEXT: movl 16(%esp), %eax
|
|
; CHECK-NEXT: movl 24(%esp), %ecx
|
|
; CHECK-NEXT: movl 20(%esp), %edx
|
|
; CHECK-NEXT: paddd (%edx), %xmm0
|
|
; CHECK-NEXT: movdqa %xmm0, (%edx)
|
|
; CHECK-NEXT: shll $4, %ecx
|
|
; CHECK-NEXT: movl (%ecx,%edx), %esi
|
|
; CHECK-NEXT: movl 12(%ecx,%edx), %edi
|
|
; CHECK-NEXT: movl 8(%ecx,%edx), %ebx
|
|
; CHECK-NEXT: movl 4(%ecx,%edx), %edx
|
|
; CHECK-NEXT: movl %esi, 12(%eax,%ecx)
|
|
; CHECK-NEXT: movl %edx, (%eax,%ecx)
|
|
; CHECK-NEXT: movl %ebx, 8(%eax,%ecx)
|
|
; CHECK-NEXT: movl %edi, 4(%eax,%ecx)
|
|
; CHECK-NEXT: popl %esi
|
|
; CHECK-NEXT: popl %edi
|
|
; CHECK-NEXT: popl %ebx
|
|
; CHECK-NEXT: retl
|
|
define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* nocapture %x, i32* nocapture readonly %y, i32 %i) #0 {
|
|
entry:
|
|
%0 = bitcast i32* %y to <4 x i32>*
|
|
%1 = load <4 x i32>, <4 x i32>* %0, align 16
|
|
%am = add <4 x i32> %a, %1
|
|
store <4 x i32> %am, <4 x i32>* %0, align 16
|
|
%ip0 = shl nsw i32 %i, 2
|
|
%ip1 = or i32 %ip0, 1
|
|
%ip2 = or i32 %ip0, 2
|
|
%ip3 = or i32 %ip0, 3
|
|
%vecext = extractelement <4 x i32> %am, i32 %ip0
|
|
%arrayidx = getelementptr inbounds i32, i32* %x, i32 %ip3
|
|
store i32 %vecext, i32* %arrayidx, align 4
|
|
%vecext5 = extractelement <4 x i32> %am, i32 %ip1
|
|
%arrayidx8 = getelementptr inbounds i32, i32* %x, i32 %ip0
|
|
store i32 %vecext5, i32* %arrayidx8, align 4
|
|
%vecext11 = extractelement <4 x i32> %am, i32 %ip2
|
|
%arrayidx14 = getelementptr inbounds i32, i32* %x, i32 %ip2
|
|
store i32 %vecext11, i32* %arrayidx14, align 4
|
|
%vecext17 = extractelement <4 x i32> %am, i32 %ip3
|
|
%arrayidx20 = getelementptr inbounds i32, i32* %x, i32 %ip1
|
|
store i32 %vecext17, i32* %arrayidx20, align 4
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|