From dc4756bfda10d188103906039aeebaae48325c1c Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 29 Oct 2007 06:19:48 +0000 Subject: [PATCH] add a note. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43444 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/README-SSE.txt | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index d88685f6a11..aa3405ca339 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -708,3 +708,36 @@ The instruction at (1) can be moved out of the main body of the loop. //===---------------------------------------------------------------------===// +These functions: + +#include +__m128i a; +void x(unsigned short n) { + a = _mm_slli_epi32 (a, n); +} +void y(unsigned n) { + a = _mm_slli_epi32 (a, n); +} + +compile to ( -O3 -static -fomit-frame-pointer): +_x: + movzwl 4(%esp), %eax + movd %eax, %xmm0 + movaps _a, %xmm1 + pslld %xmm0, %xmm1 + movaps %xmm1, _a + ret +_y: + movd 4(%esp), %xmm0 + movaps _a, %xmm1 + pslld %xmm0, %xmm1 + movaps %xmm1, _a + ret + +"y" looks good, but "x" does silly movzwl stuff around into a GPR. It seems +like movd would be sufficient in both cases as the value is already zero +extended in the 32-bit stack slot IIRC. For signed short, it should also be +save, as a really-signed value would be undefined for pslld. + + +//===---------------------------------------------------------------------===//