mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 04:30:23 +00:00
Use movaps / movd to extract vector element 0 even with sse4.1. It's still cheaper than pextrw especially if the value is in memory.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61555 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ccc776fd83
commit
52ceafa5c7
@ -4215,6 +4215,14 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
|
|||||||
DAG.getValueType(VT));
|
DAG.getValueType(VT));
|
||||||
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
|
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
|
||||||
} else if (VT.getSizeInBits() == 16) {
|
} else if (VT.getSizeInBits() == 16) {
|
||||||
|
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||||
|
// If Idx is 0, it's cheaper to do a move instead of a pextrw.
|
||||||
|
if (Idx == 0)
|
||||||
|
return DAG.getNode(ISD::TRUNCATE, MVT::i16,
|
||||||
|
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
|
||||||
|
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
|
||||||
|
Op.getOperand(0)),
|
||||||
|
Op.getOperand(1)));
|
||||||
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, MVT::i32,
|
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, MVT::i32,
|
||||||
Op.getOperand(0), Op.getOperand(1));
|
Op.getOperand(0), Op.getOperand(1));
|
||||||
SDValue Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract,
|
SDValue Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract,
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 -o %t -f
|
||||||
; RUN: grep movss %t | count 3
|
; RUN: grep movss %t | count 3
|
||||||
; RUN: grep movhlps %t | count 1
|
; RUN: grep movhlps %t | count 1
|
||||||
; RUN: grep pshufd %t | count 1
|
; RUN: grep pshufd %t | count 1
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {\$36,} | count 2
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep {\$36,} | count 2
|
||||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep shufps | count 2
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep shufps | count 2
|
||||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 1
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep pinsrw | count 1
|
||||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movhpd | count 1
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep movhpd | count 1
|
||||||
; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep unpcklpd | count 1
|
; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2,-sse41 | grep unpcklpd | count 1
|
||||||
|
|
||||||
define <4 x float> @t1(float %s, <4 x float> %tmp) nounwind {
|
define <4 x float> @t1(float %s, <4 x float> %tmp) nounwind {
|
||||||
%tmp1 = insertelement <4 x float> %tmp, float %s, i32 3
|
%tmp1 = insertelement <4 x float> %tmp, float %s, i32 3
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep punpcklqdq | count 1
|
; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2,-sse41 | grep punpcklqdq | count 1
|
||||||
|
|
||||||
define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) nounwind {
|
define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) nounwind {
|
||||||
%tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1
|
%tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss | count 1
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | grep movss | count 1
|
||||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep pinsrw
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2,-sse41 | not grep pinsrw
|
||||||
|
|
||||||
define void @test(<4 x float>* %F, i32 %I) {
|
define void @test(<4 x float>* %F, i32 %I) {
|
||||||
%tmp = load <4 x float>* %F ; <<4 x float>> [#uses=1]
|
%tmp = load <4 x float>* %F ; <<4 x float>> [#uses=1]
|
||||||
|
@ -5,24 +5,24 @@
|
|||||||
; RUN: grep pshuflw %t | count 3
|
; RUN: grep pshuflw %t | count 3
|
||||||
; RUN: grep pshufhw %t | count 2
|
; RUN: grep pshufhw %t | count 2
|
||||||
|
|
||||||
define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) {
|
define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
|
||||||
%tmp1 = load <8 x i16>* %A
|
%tmp1 = load <8 x i16>* %A
|
||||||
%tmp2 = load <8 x i16>* %B
|
%tmp2 = load <8 x i16>* %B
|
||||||
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
|
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
|
||||||
ret <8 x i16> %tmp3
|
ret <8 x i16> %tmp3
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) {
|
define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind {
|
||||||
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
|
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
|
||||||
ret <8 x i16> %tmp
|
ret <8 x i16> %tmp
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) {
|
define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind {
|
||||||
%tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
|
%tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
|
||||||
ret <8 x i16> %tmp
|
ret <8 x i16> %tmp
|
||||||
}
|
}
|
||||||
|
|
||||||
define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) {
|
define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
|
||||||
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
|
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
|
||||||
ret <8 x i16> %tmp
|
ret <8 x i16> %tmp
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -o %t -f
|
||||||
; RUN: grep punpcklwd %t | count 1
|
; RUN: grep punpcklwd %t | count 1
|
||||||
; RUN: grep pextrw %t | count 8
|
; RUN: grep pextrw %t | count 6
|
||||||
; RUN: grep pinsrw %t | count 8
|
; RUN: grep pinsrw %t | count 8
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse42 -disable-mmx -o %t -f
|
||||||
; RUN: grep paddb %t | count 1
|
; RUN: grep paddb %t | count 1
|
||||||
; RUN: grep pextrb %t | count 1
|
; RUN: grep pextrb %t | count 1
|
||||||
; RUN: grep pextrw %t | count 1
|
; RUN: not grep pextrw %t
|
||||||
|
|
||||||
; Widen a v3i8 to v16i8 to use a vector add
|
; Widen a v3i8 to v16i8 to use a vector add
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user