mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-26 23:24:34 +00:00
Teach DAGCombiner how to fold a SIGN_EXTEND_INREG of a BUILD_VECTOR of
ConstantSDNodes (or UNDEFs) into a simple BUILD_VECTOR. For example, given the following sequence of dag nodes: i32 C = Constant<1> v4i32 V = BUILD_VECTOR C, C, C, C v4i32 Result = SIGN_EXTEND_INREG V, ValueType:v4i1 The SIGN_EXTEND_INREG node can be folded into a build_vector since the vector in input is a BUILD_VECTOR of constants. The optimized sequence is: i32 C = Constant<-1> v4i32 Result = BUILD_VECTOR C, C, C, C git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198084 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -70,6 +70,10 @@ namespace ISD {
|
|||||||
/// BUILD_VECTOR where all of the elements are 0 or undef.
|
/// BUILD_VECTOR where all of the elements are 0 or undef.
|
||||||
bool isBuildVectorAllZeros(const SDNode *N);
|
bool isBuildVectorAllZeros(const SDNode *N);
|
||||||
|
|
||||||
|
/// \brief Return true if the specified node is a BUILD_VECTOR node of
|
||||||
|
/// all ConstantSDNode or undef.
|
||||||
|
bool isBuildVectorOfConstantSDNodes(const SDNode *N);
|
||||||
|
|
||||||
/// isScalarToVector - Return true if the specified node is a
|
/// isScalarToVector - Return true if the specified node is a
|
||||||
/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
|
/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
|
||||||
/// element is not an undef.
|
/// element is not an undef.
|
||||||
|
@ -5511,6 +5511,29 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
|
|||||||
BSwap, N1);
|
BSwap, N1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs
|
||||||
|
// into a build_vector.
|
||||||
|
if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
|
||||||
|
SmallVector<SDValue, 8> Elts;
|
||||||
|
unsigned NumElts = N0->getNumOperands();
|
||||||
|
unsigned ShAmt = VTBits - EVTBits;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i != NumElts; ++i) {
|
||||||
|
SDValue Op = N0->getOperand(i);
|
||||||
|
if (Op->getOpcode() == ISD::UNDEF) {
|
||||||
|
Elts.push_back(Op);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
|
||||||
|
const APInt &C = CurrentND->getAPIntValue();
|
||||||
|
Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt),
|
||||||
|
Op.getValueType()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Elts[0], NumElts);
|
||||||
|
}
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -179,6 +179,22 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// \brief Return true if the specified node is a BUILD_VECTOR node of
|
||||||
|
/// all ConstantSDNode or undef.
|
||||||
|
bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
|
||||||
|
if (N->getOpcode() != ISD::BUILD_VECTOR)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
|
||||||
|
SDValue Op = N->getOperand(i);
|
||||||
|
if (Op.getOpcode() == ISD::UNDEF)
|
||||||
|
continue;
|
||||||
|
if (!isa<ConstantSDNode>(Op))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// isScalarToVector - Return true if the specified node is a
|
/// isScalarToVector - Return true if the specified node is a
|
||||||
/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
|
/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
|
||||||
/// element is not an undef.
|
/// element is not an undef.
|
||||||
|
@ -3,10 +3,20 @@
|
|||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
target triple = "x86_64-apple-darwin11.2.0"
|
target triple = "x86_64-apple-darwin11.2.0"
|
||||||
|
|
||||||
; CHECK: @foo8
|
; During legalization, the vselect mask is 'type legalized' into a
|
||||||
; CHECK: psll
|
; wider BUILD_VECTOR. This causes the introduction of a new
|
||||||
; CHECK: psraw
|
; sign_extend_inreg in the DAG.
|
||||||
; CHECK: pblendvb
|
;
|
||||||
|
; A sign_extend_inreg of a vector of ConstantSDNode or undef can be
|
||||||
|
; always folded into a simple build_vector.
|
||||||
|
;
|
||||||
|
; Make sure that the sign_extend_inreg is simplified and that we
|
||||||
|
; don't generate psll, psraw and pblendvb from the vselect.
|
||||||
|
|
||||||
|
; CHECK-LABEL: foo8
|
||||||
|
; CHECK-NOT: psll
|
||||||
|
; CHECK-NOT: psraw
|
||||||
|
; CHECK-NOT: pblendvb
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define void @foo8(float* nocapture %RET) nounwind {
|
define void @foo8(float* nocapture %RET) nounwind {
|
||||||
allocas:
|
allocas:
|
||||||
@ -17,4 +27,3 @@ allocas:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
; shifting the needed bit to the MSB, and not using shl+sra.
|
; shifting the needed bit to the MSB, and not using shl+sra.
|
||||||
|
|
||||||
;CHECK-LABEL: vsel_float:
|
;CHECK-LABEL: vsel_float:
|
||||||
;CHECK: movl $-2147483648
|
;CHECK: movl $-1
|
||||||
;CHECK-NEXT: movd
|
;CHECK-NEXT: movd
|
||||||
;CHECK-NEXT: blendvps
|
;CHECK-NEXT: blendvps
|
||||||
;CHECK: ret
|
;CHECK: ret
|
||||||
@ -15,7 +15,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
;CHECK-LABEL: vsel_4xi8:
|
;CHECK-LABEL: vsel_4xi8:
|
||||||
;CHECK: movl $-2147483648
|
;CHECK: movl $-1
|
||||||
;CHECK-NEXT: movd
|
;CHECK-NEXT: movd
|
||||||
;CHECK-NEXT: blendvps
|
;CHECK-NEXT: blendvps
|
||||||
;CHECK: ret
|
;CHECK: ret
|
||||||
@ -26,12 +26,12 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
|
|||||||
|
|
||||||
|
|
||||||
; We do not have native support for v8i16 blends and we have to use the
|
; We do not have native support for v8i16 blends and we have to use the
|
||||||
; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not r
|
; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not
|
||||||
; reduce the mask in this case.
|
; reduce the mask in this case.
|
||||||
;CHECK-LABEL: vsel_8xi16:
|
;CHECK-LABEL: vsel_8xi16:
|
||||||
;CHECK: psllw
|
;CHECK: andps
|
||||||
;CHECK: psraw
|
;CHECK: andps
|
||||||
;CHECK: pblendvb
|
;CHECK: orps
|
||||||
;CHECK: ret
|
;CHECK: ret
|
||||||
define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
|
define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
|
||||||
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
|
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s
|
; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s
|
||||||
|
|
||||||
; CHECK: vsel_float
|
; CHECK: vsel_float
|
||||||
; CHECK: pandn
|
; CHECK: xorps
|
||||||
; CHECK: pand
|
; CHECK: movss
|
||||||
; CHECK: por
|
; CHECK: orps
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
|
define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
|
||||||
%A = load <4 x float>* %v1
|
%A = load <4 x float>* %v1
|
||||||
@ -14,9 +14,9 @@ define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK: vsel_i32
|
; CHECK: vsel_i32
|
||||||
; CHECK: pandn
|
; CHECK: xorps
|
||||||
; CHECK: pand
|
; CHECK: movss
|
||||||
; CHECK: por
|
; CHECK: orps
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
|
define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
|
||||||
%A = load <4 x i32>* %v1
|
%A = load <4 x i32>* %v1
|
||||||
|
133
test/CodeGen/X86/vselect.ll
Normal file
133
test/CodeGen/X86/vselect.ll
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=-sse4.1 < %s | FileCheck %s
|
||||||
|
|
||||||
|
; Verify that we don't emit packed vector shifts instructions if the
|
||||||
|
; condition used by the vector select is a vector of constants.
|
||||||
|
|
||||||
|
|
||||||
|
define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
|
||||||
|
%1 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %a, <4 x float> %b
|
||||||
|
ret <4 x float> %1
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test1
|
||||||
|
; CHECK-NOT: psllw
|
||||||
|
; CHECK-NOT: psraw
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
|
||||||
|
define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
|
||||||
|
%1 = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
|
||||||
|
ret <4 x float> %1
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test2
|
||||||
|
; CHECK-NOT: psllw
|
||||||
|
; CHECK-NOT: psraw
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
|
||||||
|
define <4 x float> @test3(<4 x float> %a, <4 x float> %b) {
|
||||||
|
%1 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
|
||||||
|
ret <4 x float> %1
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test3
|
||||||
|
; CHECK-NOT: psllw
|
||||||
|
; CHECK-NOT: psraw
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
|
||||||
|
define <4 x float> @test4(<4 x float> %a, <4 x float> %b) {
|
||||||
|
%1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
|
||||||
|
ret <4 x float> %1
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test4
|
||||||
|
; CHECK-NOT: psllw
|
||||||
|
; CHECK-NOT: psraw
|
||||||
|
; CHECK: movaps %xmm1, %xmm0
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
|
||||||
|
define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
|
||||||
|
%1 = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
|
||||||
|
ret <4 x float> %1
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test5
|
||||||
|
; CHECK-NOT: psllw
|
||||||
|
; CHECK-NOT: psraw
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
|
||||||
|
define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
|
||||||
|
%1 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i16> %a, <8 x i16> %a
|
||||||
|
ret <8 x i16> %1
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test6
|
||||||
|
; CHECK-NOT: psllw
|
||||||
|
; CHECK-NOT: psraw
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
|
||||||
|
define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) {
|
||||||
|
%1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b
|
||||||
|
ret <8 x i16> %1
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test7
|
||||||
|
; CHECK-NOT: psllw
|
||||||
|
; CHECK-NOT: psraw
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
|
||||||
|
define <8 x i16> @test8(<8 x i16> %a, <8 x i16> %b) {
|
||||||
|
%1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b
|
||||||
|
ret <8 x i16> %1
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test8
|
||||||
|
; CHECK-NOT: psllw
|
||||||
|
; CHECK-NOT: psraw
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) {
|
||||||
|
%1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b
|
||||||
|
ret <8 x i16> %1
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test9
|
||||||
|
; CHECK-NOT: psllw
|
||||||
|
; CHECK-NOT: psraw
|
||||||
|
; CHECK: movaps %xmm1, %xmm0
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
|
||||||
|
define <8 x i16> @test10(<8 x i16> %a, <8 x i16> %b) {
|
||||||
|
%1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b
|
||||||
|
ret <8 x i16> %1
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test10
|
||||||
|
; CHECK-NOT: psllw
|
||||||
|
; CHECK-NOT: psraw
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
define <8 x i16> @test11(<8 x i16> %a, <8 x i16> %b) {
|
||||||
|
%1 = select <8 x i1> <i1 false, i1 true, i1 true, i1 false, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b
|
||||||
|
ret <8 x i16> %1
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test11
|
||||||
|
; CHECK-NOT: psllw
|
||||||
|
; CHECK-NOT: psraw
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) {
|
||||||
|
%1 = select <8 x i1> <i1 false, i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b
|
||||||
|
ret <8 x i16> %1
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test12
|
||||||
|
; CHECK-NOT: psllw
|
||||||
|
; CHECK-NOT: psraw
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
define <8 x i16> @test13(<8 x i16> %a, <8 x i16> %b) {
|
||||||
|
%1 = select <8 x i1> <i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>, <8 x i16> %a, <8 x i16> %b
|
||||||
|
ret <8 x i16> %1
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test13
|
||||||
|
; CHECK-NOT: psllw
|
||||||
|
; CHECK-NOT: psraw
|
||||||
|
; CHECK: ret
|
||||||
|
|
||||||
|
|
Reference in New Issue
Block a user