Fix shuffle decoding logic to handle UNPCKLPS/UNPCKLPD on 256-bit vectors correctly. Add support for decoding UNPCKHPS/UNPCKHPD for AVX 128-bit and 256-bit forms.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145055 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2011-11-22 01:57:35 +00:00
parent c0d82857e0
commit f7de577a08
4 changed files with 73 additions and 21 deletions

View File

@ -197,16 +197,44 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKHPDrm:
DecodeUNPCKHPMask(2, ShuffleMask);
DecodeUNPCKHPDMask(2, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKHPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKHPDrm:
DecodeUNPCKHPDMask(2, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
break;
case X86::VUNPCKHPDYrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKHPDYrm:
DecodeUNPCKLPDMask(4, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
break;
case X86::UNPCKHPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKHPSrm:
DecodeUNPCKHPMask(4, ShuffleMask);
DecodeUNPCKHPSMask(4, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKHPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKHPSrm:
DecodeUNPCKHPSMask(4, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
break;
case X86::VUNPCKHPSYrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKHPSYrm:
DecodeUNPCKHPSMask(8, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
break;
case X86::VPERMILPSri:
DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(),
ShuffleMask);

View File

@ -142,11 +142,32 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
}
}
void DecodeUNPCKHPMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(i+NElts/2); // Reads from dest
ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src
void DecodeUNPCKHPSMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask) {
DecodeUNPCKHPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
}
void DecodeUNPCKHPDMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask) {
DecodeUNPCKHPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
}
void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
// independently on 128-bit lanes.
unsigned NumLanes = VT.getSizeInBits() / 128;
if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
unsigned NumLaneElts = NumElts / NumLanes;
for (unsigned s = 0; s < NumLanes; ++s) {
unsigned Start = s * NumLaneElts + NumLaneElts/2;
unsigned End = s * NumLaneElts + NumLaneElts;
for (unsigned i = Start; i != End; ++i) {
ShuffleMask.push_back(i); // Reads from dest/src1
ShuffleMask.push_back(i+NumElts); // Reads from src/src2
}
}
}
@ -163,8 +184,7 @@ void DecodeUNPCKLPDMask(unsigned NElts,
/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// etc. VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
void DecodeUNPCKLPMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask) {
void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@ -173,16 +193,13 @@ void DecodeUNPCKLPMask(EVT VT,
if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
unsigned NumLaneElts = NumElts / NumLanes;
unsigned Start = 0;
unsigned End = NumLaneElts / 2;
for (unsigned s = 0; s < NumLanes; ++s) {
unsigned Start = s * NumLaneElts;
unsigned End = s * NumLaneElts + NumLaneElts/2;
for (unsigned i = Start; i != End; ++i) {
ShuffleMask.push_back(i); // Reads from dest/src1
ShuffleMask.push_back(i+NumLaneElts); // Reads from src/src2
ShuffleMask.push_back(i); // Reads from dest/src1
ShuffleMask.push_back(i+NumElts); // Reads from src/src2
}
// Process the next 128 bits.
Start += NumLaneElts;
End += NumLaneElts;
}
}

View File

@ -67,8 +67,16 @@ void DecodePUNPCKHMask(unsigned NElts,
void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodeUNPCKHPMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodeUNPCKHPSMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodeUNPCKHPDMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask);
/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd
/// etc. VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
void DecodeUNPCKLPSMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask);
@ -79,8 +87,7 @@ void DecodeUNPCKLPDMask(unsigned NElts,
/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// etc. VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
void DecodeUNPCKLPMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit

View File

@ -4653,7 +4653,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
case X86ISD::UNPCKHPD:
case X86ISD::VUNPCKHPSY:
case X86ISD::VUNPCKHPDY:
DecodeUNPCKHPMask(NumElems, ShuffleMask);
DecodeUNPCKHPMask(VT, ShuffleMask);
break;
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLWD: