diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 7087c681737..4d7224514cb 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -781,41 +781,3 @@ LLVM should be able to generate the same thing as gcc. This looks like it is just a matter of matching (scalar_to_vector (load x)) to movd. //===---------------------------------------------------------------------===// - -These two functions should compile to identical code on x86-32: - -define <2 x i64> @test2(i64 %arg) { -entry: - %A = and i64 %arg, 1234567 - %B = insertelement <2 x i64> undef, i64 %A, i32 0 - ret <2 x i64> %B -} - -define <2 x i64> @test2(i64 %arg) { -entry: - %A = and i64 %arg, 1234567 - %B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0 - ret <2 x i64> %B -} - -The later compiles to: - -_test2: - movl $1234567, %eax - andl 4(%esp), %eax - movd %eax, %xmm0 - ret - -the former compiles to: - -_test2: - subl $28, %esp - movl $1234567, %eax - andl 32(%esp), %eax - movl %eax, (%esp) - movl $0, 4(%esp) - movaps (%esp), %xmm0 - addl $28, %esp - ret - -//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 133a2da1f6d..be7f91c6ae6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3063,11 +3063,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(ISD::UNDEF, VT); } - // Splat is obviously ok. Let legalizer expand it to a shuffle. - if (Values.size() == 1) - return SDOperand(); - - // Special case for single non-zero element. + // Special case for single non-zero, non-undef, element. if (NumNonZero == 1 && NumElems <= 4) { unsigned Idx = CountTrailingZeros_32(NonZeros); SDOperand Item = Op.getOperand(Idx); @@ -3141,6 +3137,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { } } + // Splat is obviously ok. Let legalizer expand it to a shuffle. + if (Values.size() == 1) + return SDOperand(); + // A vector full of immediates; various special cases are already // handled, so this is best done with a single constant-pool load. if (IsAllConstants) diff --git a/test/CodeGen/X86/vec_set-B.ll b/test/CodeGen/X86/vec_set-B.ll new file mode 100644 index 00000000000..e4e5667d2f6 --- /dev/null +++ b/test/CodeGen/X86/vec_set-B.ll @@ -0,0 +1,24 @@ +; RUN: llvm-as < %s | llc -march=x86 | not grep movaps +; RUN: llvm-as < %s | llc -march=x86 | grep esp | count 2 + +; These should both generate something like this: +;_test3: +; movl $1234567, %eax +; andl 4(%esp), %eax +; movd %eax, %xmm0 +; ret + +define <2 x i64> @test3(i64 %arg) { +entry: + %A = and i64 %arg, 1234567 + %B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0 + ret <2 x i64> %B +} + +define <2 x i64> @test2(i64 %arg) { +entry: + %A = and i64 %arg, 1234567 + %B = insertelement <2 x i64> undef, i64 %A, i32 0 + ret <2 x i64> %B +} +