llvm-6502/test/CodeGen/X86/extractelement-legalization-store-ordering.ll
Ahmed Bougacha 55a060641f [CodeGen] Replace the reused stores' chain for extractelt expansion.
This fixes a subtle issue that was introduced in r205153.

When reusing a store for the extractelement expansion (to load directly
from it, inserting of going through the stack), later stores to the
same location might have overwritten the data we were expecting to
extract from.

To fix that, we need to explicitly replace the chain going out of the
reused store, so that later stores also have an explicit dependency on
the generated element-extracting loads, and can't clobber them.

rdar://20066785
Differential Revision: http://reviews.llvm.org/D8180


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231721 91177308-0d34-0410-b5e6-96231b3b80d8
2015-03-09 22:51:05 +00:00

58 lines
2.1 KiB
LLVM

; RUN: llc < %s -mtriple i386-apple-darwin -mcpu=yonah | FileCheck %s
target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
; Make sure we don't break load/store ordering when turning an extractelement
; into loads, off the stack or a previous store.
; Be very explicit about the ordering/stack offsets.
; CHECK-LABEL: test_extractelement_legalization_storereuse:
; CHECK: # BB#0
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: movl 16(%esp), %eax
; CHECK-NEXT: movl 24(%esp), %ecx
; CHECK-NEXT: movl 20(%esp), %edx
; CHECK-NEXT: paddd (%edx), %xmm0
; CHECK-NEXT: movdqa %xmm0, (%edx)
; CHECK-NEXT: shll $4, %ecx
; CHECK-NEXT: movl (%ecx,%edx), %esi
; CHECK-NEXT: movl 12(%ecx,%edx), %edi
; CHECK-NEXT: movl 8(%ecx,%edx), %ebx
; CHECK-NEXT: movl 4(%ecx,%edx), %edx
; CHECK-NEXT: movl %esi, 12(%eax,%ecx)
; CHECK-NEXT: movl %edx, (%eax,%ecx)
; CHECK-NEXT: movl %ebx, 8(%eax,%ecx)
; CHECK-NEXT: movl %edi, 4(%eax,%ecx)
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: retl
define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* nocapture %x, i32* nocapture readonly %y, i32 %i) #0 {
entry:
%0 = bitcast i32* %y to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 16
%am = add <4 x i32> %a, %1
store <4 x i32> %am, <4 x i32>* %0, align 16
%ip0 = shl nsw i32 %i, 2
%ip1 = or i32 %ip0, 1
%ip2 = or i32 %ip0, 2
%ip3 = or i32 %ip0, 3
%vecext = extractelement <4 x i32> %am, i32 %ip0
%arrayidx = getelementptr inbounds i32, i32* %x, i32 %ip3
store i32 %vecext, i32* %arrayidx, align 4
%vecext5 = extractelement <4 x i32> %am, i32 %ip1
%arrayidx8 = getelementptr inbounds i32, i32* %x, i32 %ip0
store i32 %vecext5, i32* %arrayidx8, align 4
%vecext11 = extractelement <4 x i32> %am, i32 %ip2
%arrayidx14 = getelementptr inbounds i32, i32* %x, i32 %ip2
store i32 %vecext11, i32* %arrayidx14, align 4
%vecext17 = extractelement <4 x i32> %am, i32 %ip3
%arrayidx20 = getelementptr inbounds i32, i32* %x, i32 %ip1
store i32 %vecext17, i32* %arrayidx20, align 4
ret void
}
attributes #0 = { nounwind }