Fix issues in shuffle decoding around VPERM* instructions. Fix shuffle decoding for VSHUFPS/D for 256-bit types. Add pattern matching for memory forms of VPERMILPS/VPERMILPD.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145390 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper
2011-11-29 07:49:05 +00:00
parent 5d2f8c3155
commit 36e36ace77
6 changed files with 90 additions and 30 deletions

View File

@ -163,14 +163,22 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg()); Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH. // FALL THROUGH.
case X86::SHUFPDrmi: case X86::SHUFPDrmi:
DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); DecodeSHUFPMask(MVT::v2f64, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg()); Src1Name = getRegName(MI->getOperand(0).getReg());
break; break;
case X86::VSHUFPDrri: case X86::VSHUFPDrri:
Src2Name = getRegName(MI->getOperand(2).getReg()); Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH. // FALL THROUGH.
case X86::VSHUFPDrmi: case X86::VSHUFPDrmi:
DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); DecodeSHUFPMask(MVT::v2f64, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VSHUFPDYrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VSHUFPDYrmi:
DecodeSHUFPMask(MVT::v4f64, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg()); DestName = getRegName(MI->getOperand(0).getReg());
break; break;
@ -179,14 +187,22 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg()); Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH. // FALL THROUGH.
case X86::SHUFPSrmi: case X86::SHUFPSrmi:
DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); DecodeSHUFPMask(MVT::v4f32, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg()); Src1Name = getRegName(MI->getOperand(0).getReg());
break; break;
case X86::VSHUFPSrri: case X86::VSHUFPSrri:
Src2Name = getRegName(MI->getOperand(2).getReg()); Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH. // FALL THROUGH.
case X86::VSHUFPSrmi: case X86::VSHUFPSrmi:
DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); DecodeSHUFPMask(MVT::v4f32, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VSHUFPSYrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VSHUFPSYrmi:
DecodeSHUFPMask(MVT::v8f32, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg()); DestName = getRegName(MI->getOperand(0).getReg());
break; break;
@ -284,29 +300,47 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg()); DestName = getRegName(MI->getOperand(0).getReg());
break; break;
case X86::VPERMILPSri: case X86::VPERMILPSri:
DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(), Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::VPERMILPSmi:
DecodeVPERMILPSMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask); ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg()); DestName = getRegName(MI->getOperand(0).getReg());
break; break;
case X86::VPERMILPSYri: case X86::VPERMILPSYri:
DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(), Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::VPERMILPSYmi:
DecodeVPERMILPSMask(8, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask); ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg()); DestName = getRegName(MI->getOperand(0).getReg());
break; break;
case X86::VPERMILPDri: case X86::VPERMILPDri:
DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(), Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::VPERMILPDmi:
DecodeVPERMILPDMask(2, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask); ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg()); DestName = getRegName(MI->getOperand(0).getReg());
break; break;
case X86::VPERMILPDYri: case X86::VPERMILPDYri:
DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(), Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::VPERMILPDYmi:
DecodeVPERMILPDMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask); ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg()); DestName = getRegName(MI->getOperand(0).getReg());
break; break;
case X86::VPERM2F128rr: case X86::VPERM2F128rr:
DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask); case X86::VPERM2I128rr:
Src1Name = getRegName(MI->getOperand(1).getReg());
Src2Name = getRegName(MI->getOperand(2).getReg()); Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VPERM2F128rm:
case X86::VPERM2I128rm:
DecodeVPERM2F128Mask(MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break; break;
} }

View File

@ -128,17 +128,27 @@ void DecodePUNPCKHMask(unsigned NElts,
} }
} }
void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, void DecodeSHUFPMask(EVT VT, unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask) { SmallVectorImpl<unsigned> &ShuffleMask) {
// Part that reads from dest. unsigned NumElts = VT.getVectorNumElements();
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(Imm % NElts); unsigned NumLanes = VT.getSizeInBits() / 128;
Imm /= NElts; unsigned NumLaneElts = NumElts / NumLanes;
}
// Part that reads from src. int NewImm = Imm;
for (unsigned i = 0; i != NElts/2; ++i) { for (unsigned l = 0; l < NumLanes; ++l) {
ShuffleMask.push_back(Imm % NElts + NElts); unsigned LaneStart = l * NumLaneElts;
Imm /= NElts; // Part that reads from dest.
for (unsigned i = 0; i != NumLaneElts/2; ++i) {
ShuffleMask.push_back(NewImm % NumLaneElts + LaneStart);
NewImm /= NumLaneElts;
}
// Part that reads from src.
for (unsigned i = 0; i != NumLaneElts/2; ++i) {
ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + LaneStart);
NewImm /= NumLaneElts;
}
if (NumLaneElts == 4) NewImm = Imm; // reload imm
} }
} }

View File

@ -64,8 +64,8 @@ void DecodePUNPCKLMask(EVT VT,
void DecodePUNPCKHMask(unsigned NElts, void DecodePUNPCKHMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask); SmallVectorImpl<unsigned> &ShuffleMask);
void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, void DecodeSHUFPMask(EVT VT, unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask); SmallVectorImpl<unsigned> &ShuffleMask);
/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd /// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd
/// etc. VT indicates the type of the vector allowing it to handle different /// etc. VT indicates the type of the vector allowing it to handle different

View File

@ -4567,9 +4567,8 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
case X86ISD::SHUFPS: case X86ISD::SHUFPS:
case X86ISD::SHUFPD: case X86ISD::SHUFPD:
ImmN = N->getOperand(N->getNumOperands()-1); ImmN = N->getOperand(N->getNumOperands()-1);
DecodeSHUFPSMask(NumElems, DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
cast<ConstantSDNode>(ImmN)->getZExtValue(), ShuffleMask);
ShuffleMask);
break; break;
case X86ISD::PUNPCKH: case X86ISD::PUNPCKH:
DecodePUNPCKHMask(NumElems, ShuffleMask); DecodePUNPCKHMask(NumElems, ShuffleMask);

View File

@ -7332,6 +7332,15 @@ def : Pat<(v8i32 (X86VPermilps VR256:$src1, (i8 imm:$imm))),
(VPERMILPSYri VR256:$src1, imm:$imm)>; (VPERMILPSYri VR256:$src1, imm:$imm)>;
def : Pat<(v4i64 (X86VPermilpd VR256:$src1, (i8 imm:$imm))), def : Pat<(v4i64 (X86VPermilpd VR256:$src1, (i8 imm:$imm))),
(VPERMILPDYri VR256:$src1, imm:$imm)>; (VPERMILPDYri VR256:$src1, imm:$imm)>;
def : Pat<(v8f32 (X86VPermilps (memopv8f32 addr:$src1), (i8 imm:$imm))),
(VPERMILPSYmi addr:$src1, imm:$imm)>;
def : Pat<(v4f64 (X86VPermilpd (memopv4f64 addr:$src1), (i8 imm:$imm))),
(VPERMILPDYmi addr:$src1, imm:$imm)>;
def : Pat<(v8i32 (X86VPermilps (bc_v8i32 (memopv4i64 addr:$src1)),
(i8 imm:$imm))),
(VPERMILPSYmi addr:$src1, imm:$imm)>;
def : Pat<(v4i64 (X86VPermilpd (memopv4i64 addr:$src1), (i8 imm:$imm))),
(VPERMILPDYmi addr:$src1, imm:$imm)>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks

View File

@ -28,6 +28,14 @@ entry:
ret <4 x i64> %shuffle ret <4 x i64> %shuffle
} }
; CHECK: vpermilpd
define <4 x i64> @funcQ(<4 x i64>* %a) nounwind uwtable readnone ssp {
entry:
%a2 = load <4 x i64>* %a
%shuffle = shufflevector <4 x i64> %a2, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
ret <4 x i64> %shuffle
}
; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the ; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the
; target specific mask was correctly generated. ; target specific mask was correctly generated.
; CHECK: vpermilps $-100 ; CHECK: vpermilps $-100