diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 4016e4ee7a2..4dd7238b6f5 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1107,3 +1107,40 @@ icc generates: So icc is smart enough to know that B is in memory so it doesn't load it and store it back to stack. + +//===---------------------------------------------------------------------===// + +__m128d test1( __m128d A, __m128d B) { + return _mm_shuffle_pd(A, B, 0x3); +} + +compiles to + +shufpd $3, %xmm1, %xmm0 + +Perhaps it's better to use unpckhpd instead? + +unpckhpd %xmm1, %xmm0 + +Don't know if unpckhpd is faster. But it is shorter. + +//===---------------------------------------------------------------------===// + +typedef short v8i16 __attribute__ ((__vector_size__ (16))); +v8i16 test(v8i16 x, v8i16 y) { + return x + y; +} + +compiles to + +_test: + paddw %xmm0, %xmm1 + movaps %xmm1, %xmm0 + ret + +It should be + + paddw %xmm1, %xmm0 + ret + +since paddw is commutative.