mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-03-21 03:32:29 +00:00
[AVX] Add decode support for VUNPCKLPS/D instructions, both 128-bit
and 256-bit forms. Because the number of elements in a vector does not determine the vector type (4 elements could be v4f32 or v4f64), pass the full type of the vector to decode routines. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126664 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d436d5b1c9
commit
c4db4e5105
lib/Target/X86
@ -111,28 +111,28 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
||||
// FALL THROUGH.
|
||||
case X86::PUNPCKLBWrm:
|
||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
||||
DecodePUNPCKLMask(16, ShuffleMask);
|
||||
DecodePUNPCKLBWMask(16, ShuffleMask);
|
||||
break;
|
||||
case X86::PUNPCKLWDrr:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::PUNPCKLWDrm:
|
||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
||||
DecodePUNPCKLMask(8, ShuffleMask);
|
||||
DecodePUNPCKLWDMask(8, ShuffleMask);
|
||||
break;
|
||||
case X86::PUNPCKLDQrr:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::PUNPCKLDQrm:
|
||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
||||
DecodePUNPCKLMask(4, ShuffleMask);
|
||||
DecodePUNPCKLDQMask(4, ShuffleMask);
|
||||
break;
|
||||
case X86::PUNPCKLQDQrr:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::PUNPCKLQDQrm:
|
||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
||||
DecodePUNPCKLMask(2, ShuffleMask);
|
||||
DecodePUNPCKLQDQMask(2, ShuffleMask);
|
||||
break;
|
||||
|
||||
case X86::SHUFPDrri:
|
||||
@ -153,16 +153,44 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::UNPCKLPDrm:
|
||||
DecodeUNPCKLPMask(2, ShuffleMask);
|
||||
DecodeUNPCKLPDMask(2, ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::VUNPCKLPDrr:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VUNPCKLPDrm:
|
||||
DecodeUNPCKLPDMask(2, ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
break;
|
||||
case X86::VUNPCKLPDYrr:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VUNPCKLPDYrm:
|
||||
DecodeUNPCKLPDMask(4, ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
break;
|
||||
case X86::UNPCKLPSrr:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::UNPCKLPSrm:
|
||||
DecodeUNPCKLPMask(4, ShuffleMask);
|
||||
DecodeUNPCKLPSMask(4, ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::VUNPCKLPSrr:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VUNPCKLPSrm:
|
||||
DecodeUNPCKLPSMask(4, ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
break;
|
||||
case X86::VUNPCKLPSYrr:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VUNPCKLPSYrm:
|
||||
DecodeUNPCKLPSMask(8, ShuffleMask);
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
break;
|
||||
case X86::UNPCKHPDrr:
|
||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||
// FALL THROUGH.
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===//
|
||||
//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -95,12 +95,29 @@ void DecodePSHUFLWMask(unsigned Imm,
|
||||
ShuffleMask.push_back(7);
|
||||
}
|
||||
|
||||
void DecodePUNPCKLMask(unsigned NElts,
|
||||
void DecodePUNPCKLBWMask(unsigned NElts,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||
DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
|
||||
}
|
||||
|
||||
void DecodePUNPCKLWDMask(unsigned NElts,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||
DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
|
||||
}
|
||||
|
||||
void DecodePUNPCKLDQMask(unsigned NElts,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||
DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
|
||||
}
|
||||
|
||||
void DecodePUNPCKLQDQMask(unsigned NElts,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||
DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
|
||||
}
|
||||
|
||||
void DecodePUNPCKLMask(EVT VT,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||
for (unsigned i = 0; i != NElts/2; ++i) {
|
||||
ShuffleMask.push_back(i);
|
||||
ShuffleMask.push_back(i+NElts);
|
||||
}
|
||||
DecodeUNPCKLPMask(VT, ShuffleMask);
|
||||
}
|
||||
|
||||
void DecodePUNPCKHMask(unsigned NElts,
|
||||
@ -133,12 +150,24 @@ void DecodeUNPCKHPMask(unsigned NElts,
|
||||
}
|
||||
}
|
||||
|
||||
void DecodeUNPCKLPSMask(unsigned NElts,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||
DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
|
||||
}
|
||||
|
||||
void DecodeUNPCKLPDMask(unsigned NElts,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||
DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
|
||||
}
|
||||
|
||||
/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
|
||||
/// etc. NElts indicates the number of elements in the vector allowing it to
|
||||
/// handle different datatypes and vector widths.
|
||||
void DecodeUNPCKLPMask(unsigned NElts,
|
||||
/// etc. VT indicates the type of the vector allowing it to handle different
|
||||
/// datatypes and vector widths.
|
||||
void DecodeUNPCKLPMask(EVT VT,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||
|
||||
int NElts = VT.getVectorNumElements();
|
||||
|
||||
for (unsigned i = 0; i != NElts/2; ++i) {
|
||||
ShuffleMask.push_back(i); // Reads from dest
|
||||
ShuffleMask.push_back(i+NElts); // Reads from src
|
||||
|
@ -16,6 +16,7 @@
|
||||
#define X86_SHUFFLE_DECODE_H
|
||||
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/CodeGen/ValueTypes.h"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Vector Mask Decoding
|
||||
@ -45,7 +46,19 @@ void DecodePSHUFHWMask(unsigned Imm,
|
||||
void DecodePSHUFLWMask(unsigned Imm,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
|
||||
void DecodePUNPCKLMask(unsigned NElts,
|
||||
void DecodePUNPCKLBWMask(unsigned NElts,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
|
||||
void DecodePUNPCKLWDMask(unsigned NElts,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
|
||||
void DecodePUNPCKLDQMask(unsigned NElts,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
|
||||
void DecodePUNPCKLQDQMask(unsigned NElts,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
|
||||
void DecodePUNPCKLMask(EVT VT,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
|
||||
void DecodePUNPCKHMask(unsigned NElts,
|
||||
@ -57,11 +70,16 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
|
||||
void DecodeUNPCKHPMask(unsigned NElts,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
|
||||
void DecodeUNPCKLPSMask(unsigned NElts,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
|
||||
void DecodeUNPCKLPDMask(unsigned NElts,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
|
||||
/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
|
||||
/// etc. NElts indicates the number of elements in the vector allowing it to
|
||||
/// handle different datatypes and vector widths.
|
||||
void DecodeUNPCKLPMask(unsigned NElts,
|
||||
/// etc. VT indicates the type of the vector allowing it to handle different
|
||||
/// datatypes and vector widths.
|
||||
void DecodeUNPCKLPMask(EVT VT,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
|
||||
} // llvm namespace
|
||||
|
@ -3895,11 +3895,15 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
|
||||
case X86ISD::PUNPCKLWD:
|
||||
case X86ISD::PUNPCKLDQ:
|
||||
case X86ISD::PUNPCKLQDQ:
|
||||
DecodePUNPCKLMask(NumElems, ShuffleMask);
|
||||
DecodePUNPCKLMask(VT, ShuffleMask);
|
||||
break;
|
||||
case X86ISD::UNPCKLPS:
|
||||
case X86ISD::UNPCKLPD:
|
||||
DecodeUNPCKLPMask(NumElems, ShuffleMask);
|
||||
case X86ISD::VUNPCKLPS:
|
||||
case X86ISD::VUNPCKLPD:
|
||||
case X86ISD::VUNPCKLPSY:
|
||||
case X86ISD::VUNPCKLPDY:
|
||||
DecodeUNPCKLPMask(VT, ShuffleMask);
|
||||
break;
|
||||
case X86ISD::MOVHLPS:
|
||||
DecodeMOVHLPSMask(NumElems, ShuffleMask);
|
||||
@ -5263,6 +5267,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
|
||||
|
||||
// Break it into (shuffle shuffle_hi, shuffle_lo).
|
||||
Locs.clear();
|
||||
Locs.resize(4);
|
||||
SmallVector<int,8> LoMask(4U, -1);
|
||||
SmallVector<int,8> HiMask(4U, -1);
|
||||
|
||||
@ -5508,12 +5513,16 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
|
||||
X86::getShuffleSHUFImmediate(SVOp), DAG);
|
||||
}
|
||||
|
||||
static inline unsigned getUNPCKLOpcode(EVT VT) {
|
||||
static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) {
|
||||
switch(VT.getSimpleVT().SimpleTy) {
|
||||
case MVT::v4i32: return X86ISD::PUNPCKLDQ;
|
||||
case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
|
||||
case MVT::v4f32: return X86ISD::UNPCKLPS;
|
||||
case MVT::v2f64: return X86ISD::UNPCKLPD;
|
||||
case MVT::v4f32:
|
||||
return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS;
|
||||
case MVT::v2f64:
|
||||
return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
|
||||
case MVT::v8f32: return X86ISD::VUNPCKLPSY;
|
||||
case MVT::v4f64: return X86ISD::VUNPCKLPDY;
|
||||
case MVT::v16i8: return X86ISD::PUNPCKLBW;
|
||||
case MVT::v8i16: return X86ISD::PUNPCKLWD;
|
||||
default:
|
||||
@ -5641,7 +5650,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
// unpckh_undef). Only use pshufd if speed is more important than size.
|
||||
if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
|
||||
if (VT != MVT::v2i64 && VT != MVT::v2f64)
|
||||
return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
|
||||
return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG);
|
||||
if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
|
||||
if (VT != MVT::v2i64 && VT != MVT::v2f64)
|
||||
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
|
||||
@ -5762,7 +5771,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
|
||||
if (X86::isUNPCKLMask(SVOp))
|
||||
return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
|
||||
return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
|
||||
dl, VT, V1, V2, DAG);
|
||||
|
||||
if (X86::isUNPCKHMask(SVOp))
|
||||
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
|
||||
@ -5789,7 +5799,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
|
||||
|
||||
if (X86::isUNPCKLMask(NewSVOp))
|
||||
return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
|
||||
return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
|
||||
dl, VT, V2, V1, DAG);
|
||||
|
||||
if (X86::isUNPCKHMask(NewSVOp))
|
||||
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
|
||||
@ -5812,8 +5823,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
|
||||
SVOp->getSplatIndex() == 0 && V2IsUndef) {
|
||||
if (VT == MVT::v2f64)
|
||||
return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
|
||||
if (VT == MVT::v2f64) {
|
||||
X86ISD::NodeType Opcode =
|
||||
getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
|
||||
return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG);
|
||||
}
|
||||
if (VT == MVT::v2i64)
|
||||
return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
|
||||
}
|
||||
@ -5840,7 +5854,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
if (X86::isUNPCKL_v_undef_Mask(SVOp))
|
||||
if (VT != MVT::v2i64 && VT != MVT::v2f64)
|
||||
return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
|
||||
return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
|
||||
dl, VT, V1, V1, DAG);
|
||||
if (X86::isUNPCKH_v_undef_Mask(SVOp))
|
||||
if (VT != MVT::v2i64 && VT != MVT::v2f64)
|
||||
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
|
||||
|
Loading…
x
Reference in New Issue
Block a user