mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 16:33:28 +00:00
Modify the code that lowers shuffles to blends from using blendvXX to vblendXX.
blendv uses a register for the selection while vblend uses an immediate. On sandybridge they still have the same latency and execute on the same execution ports. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154396 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
45fb79bc54
commit
50e64cfe6e
@ -5391,59 +5391,76 @@ static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op,
|
|||||||
SDValue V1 = SVOp->getOperand(0);
|
SDValue V1 = SVOp->getOperand(0);
|
||||||
SDValue V2 = SVOp->getOperand(1);
|
SDValue V2 = SVOp->getOperand(1);
|
||||||
DebugLoc dl = SVOp->getDebugLoc();
|
DebugLoc dl = SVOp->getDebugLoc();
|
||||||
LLVMContext *Context = DAG.getContext();
|
|
||||||
EVT VT = Op.getValueType();
|
EVT VT = Op.getValueType();
|
||||||
EVT InVT = V1.getValueType();
|
EVT InVT = V1.getValueType();
|
||||||
EVT EltVT = VT.getVectorElementType();
|
EVT EltVT = VT.getVectorElementType();
|
||||||
unsigned EltSize = EltVT.getSizeInBits();
|
|
||||||
int MaskSize = VT.getVectorNumElements();
|
int MaskSize = VT.getVectorNumElements();
|
||||||
int InSize = InVT.getVectorNumElements();
|
int InSize = InVT.getVectorNumElements();
|
||||||
|
|
||||||
// TODO: At the moment we only use AVX blends. We could also use SSE4 blends.
|
if (!Subtarget->hasSSE41())
|
||||||
if (!Subtarget->hasAVX())
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
if (MaskSize != InSize)
|
if (MaskSize != InSize)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
SmallVector<Constant*,2> MaskVals;
|
int ISDNo = 0;
|
||||||
ConstantInt *Zero = ConstantInt::get(*Context, APInt(EltSize, 0));
|
MVT OpTy;
|
||||||
ConstantInt *NegOne = ConstantInt::get(*Context, APInt(EltSize, -1));
|
|
||||||
|
switch (VT.getSimpleVT().SimpleTy) {
|
||||||
|
default: return SDValue();
|
||||||
|
case MVT::v8i16:
|
||||||
|
ISDNo = X86ISD::BLENDPW;
|
||||||
|
OpTy = MVT::v8i16;
|
||||||
|
break;
|
||||||
|
case MVT::v4i32:
|
||||||
|
case MVT::v4f32:
|
||||||
|
ISDNo = X86ISD::BLENDPS;
|
||||||
|
OpTy = MVT::v4f32;
|
||||||
|
break;
|
||||||
|
case MVT::v2i64:
|
||||||
|
case MVT::v2f64:
|
||||||
|
ISDNo = X86ISD::BLENDPD;
|
||||||
|
OpTy = MVT::v2f64;
|
||||||
|
break;
|
||||||
|
case MVT::v8i32:
|
||||||
|
case MVT::v8f32:
|
||||||
|
if (!Subtarget->hasAVX())
|
||||||
|
return SDValue();
|
||||||
|
ISDNo = X86ISD::BLENDPS;
|
||||||
|
OpTy = MVT::v8f32;
|
||||||
|
break;
|
||||||
|
case MVT::v4i64:
|
||||||
|
case MVT::v4f64:
|
||||||
|
if (!Subtarget->hasAVX())
|
||||||
|
return SDValue();
|
||||||
|
ISDNo = X86ISD::BLENDPD;
|
||||||
|
OpTy = MVT::v4f64;
|
||||||
|
break;
|
||||||
|
case MVT::v16i16:
|
||||||
|
if (!Subtarget->hasAVX2())
|
||||||
|
return SDValue();
|
||||||
|
ISDNo = X86ISD::BLENDPW;
|
||||||
|
OpTy = MVT::v16i16;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
assert(ISDNo && "Invalid Op Number");
|
||||||
|
|
||||||
|
unsigned MaskVals = 0;
|
||||||
|
|
||||||
for (int i = 0; i < MaskSize; ++i) {
|
for (int i = 0; i < MaskSize; ++i) {
|
||||||
int EltIdx = SVOp->getMaskElt(i);
|
int EltIdx = SVOp->getMaskElt(i);
|
||||||
if (EltIdx == i || EltIdx == -1)
|
if (EltIdx == i || EltIdx == -1)
|
||||||
MaskVals.push_back(NegOne);
|
MaskVals |= (1<<i);
|
||||||
else if (EltIdx == (i + MaskSize))
|
else if (EltIdx == (i + MaskSize))
|
||||||
MaskVals.push_back(Zero);
|
continue; // Bit is set to zero;
|
||||||
else return SDValue();
|
else return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
Constant *MaskC = ConstantVector::get(MaskVals);
|
V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1);
|
||||||
EVT MaskTy = EVT::getEVT(MaskC->getType());
|
V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2);
|
||||||
assert(MaskTy.getSizeInBits() == VT.getSizeInBits() && "Invalid mask size");
|
SDValue Ret = DAG.getNode(ISDNo, dl, OpTy, V1, V2,
|
||||||
SDValue MaskIdx = DAG.getConstantPool(MaskC, PtrTy);
|
DAG.getConstant(MaskVals, MVT::i32));
|
||||||
unsigned Alignment = cast<ConstantPoolSDNode>(MaskIdx)->getAlignment();
|
return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
|
||||||
SDValue Mask = DAG.getLoad(MaskTy, dl, DAG.getEntryNode(), MaskIdx,
|
|
||||||
MachinePointerInfo::getConstantPool(),
|
|
||||||
false, false, false, Alignment);
|
|
||||||
|
|
||||||
if (Subtarget->hasAVX2() && MaskTy == MVT::v32i8)
|
|
||||||
return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2);
|
|
||||||
|
|
||||||
if (Subtarget->hasAVX()) {
|
|
||||||
switch (MaskTy.getSimpleVT().SimpleTy) {
|
|
||||||
default: return SDValue();
|
|
||||||
case MVT::v16i8:
|
|
||||||
case MVT::v4i32:
|
|
||||||
case MVT::v2i64:
|
|
||||||
case MVT::v8i32:
|
|
||||||
case MVT::v4i64:
|
|
||||||
return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return SDValue();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// v8i16 shuffles - Prefer shuffles in the following order:
|
// v8i16 shuffles - Prefer shuffles in the following order:
|
||||||
@ -11050,6 +11067,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
case X86ISD::ANDNP: return "X86ISD::ANDNP";
|
case X86ISD::ANDNP: return "X86ISD::ANDNP";
|
||||||
case X86ISD::PSIGN: return "X86ISD::PSIGN";
|
case X86ISD::PSIGN: return "X86ISD::PSIGN";
|
||||||
case X86ISD::BLENDV: return "X86ISD::BLENDV";
|
case X86ISD::BLENDV: return "X86ISD::BLENDV";
|
||||||
|
case X86ISD::BLENDPW: return "X86ISD::BLENDPW";
|
||||||
|
case X86ISD::BLENDPS: return "X86ISD::BLENDPS";
|
||||||
|
case X86ISD::BLENDPD: return "X86ISD::BLENDPD";
|
||||||
case X86ISD::HADD: return "X86ISD::HADD";
|
case X86ISD::HADD: return "X86ISD::HADD";
|
||||||
case X86ISD::HSUB: return "X86ISD::HSUB";
|
case X86ISD::HSUB: return "X86ISD::HSUB";
|
||||||
case X86ISD::FHADD: return "X86ISD::FHADD";
|
case X86ISD::FHADD: return "X86ISD::FHADD";
|
||||||
|
@ -175,9 +175,14 @@ namespace llvm {
|
|||||||
/// PSIGN - Copy integer sign.
|
/// PSIGN - Copy integer sign.
|
||||||
PSIGN,
|
PSIGN,
|
||||||
|
|
||||||
/// BLEND family of opcodes
|
/// BLENDV - Blend where the selector is an XMM.
|
||||||
BLENDV,
|
BLENDV,
|
||||||
|
|
||||||
|
/// BLENDxx - Blend where the selector is an immediate.
|
||||||
|
BLENDPW,
|
||||||
|
BLENDPS,
|
||||||
|
BLENDPD,
|
||||||
|
|
||||||
/// HADD - Integer horizontal add.
|
/// HADD - Integer horizontal add.
|
||||||
HADD,
|
HADD,
|
||||||
|
|
||||||
|
@ -126,6 +126,8 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
|||||||
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
|
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
|
||||||
|
|
||||||
def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
|
def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
|
||||||
|
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||||
|
SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
|
||||||
|
|
||||||
def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
|
def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
|
||||||
|
|
||||||
@ -158,6 +160,10 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
|
|||||||
|
|
||||||
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
|
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
|
||||||
|
|
||||||
|
def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>;
|
||||||
|
def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>;
|
||||||
|
def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// SSE Complex Patterns
|
// SSE Complex Patterns
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -6735,12 +6735,22 @@ let Predicates = [HasAVX] in {
|
|||||||
def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
|
def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
|
||||||
(v4f64 VR256:$src2))),
|
(v4f64 VR256:$src2))),
|
||||||
(VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
|
(VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
|
||||||
|
|
||||||
|
def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2),
|
||||||
|
(imm:$mask))),
|
||||||
|
(VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>;
|
||||||
|
def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2),
|
||||||
|
(imm:$mask))),
|
||||||
|
(VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX2] in {
|
let Predicates = [HasAVX2] in {
|
||||||
def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
|
def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
|
||||||
(v32i8 VR256:$src2))),
|
(v32i8 VR256:$src2))),
|
||||||
(VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
|
(VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
|
||||||
|
def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2),
|
||||||
|
(imm:$mask))),
|
||||||
|
(VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// SS41I_ternary_int - SSE 4.1 ternary operator
|
/// SS41I_ternary_int - SSE 4.1 ternary operator
|
||||||
@ -6789,6 +6799,17 @@ let Predicates = [HasSSE41] in {
|
|||||||
def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
|
def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
|
||||||
(v2f64 VR128:$src2))),
|
(v2f64 VR128:$src2))),
|
||||||
(BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
|
(BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
|
||||||
|
|
||||||
|
def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
|
||||||
|
(imm:$mask))),
|
||||||
|
(VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
|
||||||
|
def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
|
||||||
|
(imm:$mask))),
|
||||||
|
(VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
|
||||||
|
def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
|
||||||
|
(imm:$mask))),
|
||||||
|
(VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX] in
|
let Predicates = [HasAVX] in
|
||||||
|
@ -164,7 +164,7 @@ i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK: blend1
|
; CHECK: blend1
|
||||||
; CHECK: vblendvps
|
; CHECK: vblendps
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
|
define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
|
||||||
%t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
%t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
||||||
@ -172,7 +172,7 @@ define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK: blend2
|
; CHECK: blend2
|
||||||
; CHECK: vblendvps
|
; CHECK: vblendps
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
|
define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
|
||||||
%t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
%t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||||
@ -180,7 +180,7 @@ define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK: blend2a
|
; CHECK: blend2a
|
||||||
; CHECK: vblendvps
|
; CHECK: vblendps
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline {
|
define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline {
|
||||||
%t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
%t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||||
@ -188,7 +188,7 @@ define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinlin
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK: blend3
|
; CHECK: blend3
|
||||||
; CHECK-NOT: vblendvps
|
; CHECK-NOT: vblendps
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
|
define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
|
||||||
%t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
|
%t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
|
||||||
@ -196,7 +196,7 @@ define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
; CHECK: blend4
|
; CHECK: blend4
|
||||||
; CHECK: vblendvpd
|
; CHECK: vblendpd
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
|
define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
|
||||||
%t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
%t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
|
; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2
|
||||||
|
|
||||||
define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind {
|
define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind {
|
||||||
entry:
|
entry:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user