llvm-6502/test/CodeGen
Nate Begeman bdcb5afb77 ~40% faster vector shl <4 x i32> on SSE 4.1 Larger improvements for smaller types coming in future patches.
For:

define <2 x i64> @shl(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
entry:
  %shl = shl <4 x i32> %r, %a                     ; <<4 x i32>> [#uses=1]
  %tmp2 = bitcast <4 x i32> %shl to <2 x i64>     ; <<2 x i64>> [#uses=1]
  ret <2 x i64> %tmp2
}

We get:

_shl:                                   ## @shl
	pslld	$23, %xmm1
	paddd	LCPI0_0, %xmm1
	cvttps2dq	%xmm1, %xmm1
	pmulld	%xmm1, %xmm0
	ret

Instead of:

_shl:                                   ## @shl
	pshufd	$3, %xmm0, %xmm2
	movd	%xmm2, %eax
	pshufd	$3, %xmm1, %xmm2
	movd	%xmm2, %ecx
	shll	%cl, %eax
	movd	%eax, %xmm2
	pshufd	$1, %xmm0, %xmm3
	movd	%xmm3, %eax
	pshufd	$1, %xmm1, %xmm3
	movd	%xmm3, %ecx
	shll	%cl, %eax
	movd	%eax, %xmm3
	punpckldq	%xmm2, %xmm3
	movd	%xmm0, %eax
	movd	%xmm1, %ecx
	shll	%cl, %eax
	movd	%eax, %xmm2
	movhlps	%xmm0, %xmm0
	movd	%xmm0, %eax
	movhlps	%xmm1, %xmm1
	movd	%xmm1, %ecx
	shll	%cl, %eax
	movd	%eax, %xmm0
	punpckldq	%xmm0, %xmm2
	movdqa	%xmm2, %xmm0
	punpckldq	%xmm3, %xmm0
	ret


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109549 91177308-0d34-0410-b5e6-96231b3b80d8
2010-07-27 22:37:06 +00:00
..
Alpha
ARM Currently EH lowering code expects typeinfo to be global only. 2010-07-26 18:45:39 +00:00
Blackfin Remove TargetInstrInfo::copyRegToReg entirely. 2010-07-11 17:01:17 +00:00
CBackend
CellSPU Remove many calls to TII::isMoveInstr. Targets should be producing COPY anyway. 2010-07-16 04:45:42 +00:00
CPP
Generic Fix a crash in the dag combiner caused by ConstantFoldBIT_CONVERTofBUILD_VECTOR calling itself 2010-07-27 18:02:18 +00:00
MBlaze
Mips Fix PR7174, a couple o Mips fixes: 2010-07-20 08:37:04 +00:00
MSP430
PIC16
PowerPC Consider this function: 2010-07-16 22:51:10 +00:00
SPARC
SystemZ SystemZ really does mean "has calls" and not just "adjusts stack." Go ahead and 2010-05-14 22:17:42 +00:00
Thumb Feed the right output into FileCheck. 2010-07-16 10:58:02 +00:00
Thumb2 update tests for smarter BIC usage 2010-07-20 16:16:48 +00:00
X86 ~40% faster vector shl <4 x i32> on SSE 4.1 Larger improvements for smaller types coming in future patches. 2010-07-27 22:37:06 +00:00
XCore