From a956db2a6351927938a4d9960b496f15db4843c2 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 10 Apr 2006 21:51:03 +0000 Subject: [PATCH] add a note git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27567 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/README.txt | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 9db0cef4bd4..f9d36434ba2 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -675,6 +675,29 @@ Perhaps use pxor / xorp* to clear a XMM register first? //===---------------------------------------------------------------------===// +Better codegen for: + +void f(float a, float b, vector float * out) { *out = (vector float){ a, 0.0, 0.0, b}; } +void f(float a, float b, vector float * out) { *out = (vector float){ a, b, 0.0, 0}; } + +For the later we generate: + +_f: + pxor %xmm0, %xmm0 + movss 8(%esp), %xmm1 + movaps %xmm0, %xmm2 + unpcklps %xmm1, %xmm2 + movss 4(%esp), %xmm1 + unpcklps %xmm0, %xmm1 + unpcklps %xmm2, %xmm1 + movl 12(%esp), %eax + movaps %xmm1, (%eax) + ret + +This seems like it should use shufps, one for each of a & b. + +//===---------------------------------------------------------------------===// + Adding to the list of cmp / test poor codegen issues: int test(__m128 *A, __m128 *B) {