llvm-6502/test/CodeGen/X86/dagcombine-buildvector.ll
Nate Begeman abc0199680 Adapt the x86 build_vector dagcombine to the current state of the legalizer.
build vectors with i64 elements will only appear on 32b x86 before legalize.
Since vector widening occurs during legalize, and produces i64 build_vector 
elements, the dag combiner is never run on these before legalize splits them
into 32b elements.

Teach the build_vector dag combine in x86 back end to recognize consecutive 
loads producing the low part of the vector.

Convert the two uses of TLI's consecutive load recognizer to pass LoadSDNodes
since that was required implicitly.

Add a testcase for the transform.

Old:
	subl	$28, %esp
	movl	32(%esp), %eax
	movl	4(%eax), %ecx
	movl	%ecx, 4(%esp)
	movl	(%eax), %eax
	movl	%eax, (%esp)
	movaps	(%esp), %xmm0
	pmovzxwd	%xmm0, %xmm0
	movl	36(%esp), %eax
	movaps	%xmm0, (%eax)
	addl	$28, %esp
	ret

New:
	movl	4(%esp), %eax
	pmovzxwd	(%eax), %xmm0
	movl	8(%esp), %eax
	movaps	%xmm0, (%eax)
	ret




git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@72957 91177308-0d34-0410-b5e6-96231b3b80d8
2009-06-05 21:37:30 +00:00

26 lines
993 B
LLVM

; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn -disable-mmx -o %t -f
; RUN: grep unpcklpd %t | count 1
; RUN: grep movapd %t | count 1
; RUN: grep movaps %t | count 1
; Shows a dag combine bug that will generate an illegal build vector
; with v2i64 build_vector i32, i32.
define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
entry:
%tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 >
store <2 x double> %tmp7.i, <2 x double>* %dst
ret void
}
define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
entry:
%tmp1 = load <4 x i16>* %src
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
%0 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
store <4 x i32> %0, <4 x i32>* %dest
ret void
}
declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone