Fix issues in shuffle decoding around VPERM* instructions. Fix shuffle decoding for VSHUFPS/D for 256-bit types. Add pattern matching for memory forms of VPERMILPS/VPERMILPD.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145390 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper
2011-11-29 07:49:05 +00:00
parent 5d2f8c3155
commit 36e36ace77
6 changed files with 90 additions and 30 deletions

View File

@ -163,14 +163,22 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg()); Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH. // FALL THROUGH.
case X86::SHUFPDrmi: case X86::SHUFPDrmi:
DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); DecodeSHUFPMask(MVT::v2f64, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg()); Src1Name = getRegName(MI->getOperand(0).getReg());
break; break;
case X86::VSHUFPDrri: case X86::VSHUFPDrri:
Src2Name = getRegName(MI->getOperand(2).getReg()); Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH. // FALL THROUGH.
case X86::VSHUFPDrmi: case X86::VSHUFPDrmi:
DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); DecodeSHUFPMask(MVT::v2f64, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VSHUFPDYrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VSHUFPDYrmi:
DecodeSHUFPMask(MVT::v4f64, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg()); DestName = getRegName(MI->getOperand(0).getReg());
break; break;
@ -179,14 +187,22 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg()); Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH. // FALL THROUGH.
case X86::SHUFPSrmi: case X86::SHUFPSrmi:
DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); DecodeSHUFPMask(MVT::v4f32, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg()); Src1Name = getRegName(MI->getOperand(0).getReg());
break; break;
case X86::VSHUFPSrri: case X86::VSHUFPSrri:
Src2Name = getRegName(MI->getOperand(2).getReg()); Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH. // FALL THROUGH.
case X86::VSHUFPSrmi: case X86::VSHUFPSrmi:
DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); DecodeSHUFPMask(MVT::v4f32, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VSHUFPSYrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VSHUFPSYrmi:
DecodeSHUFPMask(MVT::v8f32, MI->getOperand(3).getImm(), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg()); DestName = getRegName(MI->getOperand(0).getReg());
break; break;
@ -284,29 +300,47 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg()); DestName = getRegName(MI->getOperand(0).getReg());
break; break;
case X86::VPERMILPSri: case X86::VPERMILPSri:
DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(), Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::VPERMILPSmi:
DecodeVPERMILPSMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask); ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg()); DestName = getRegName(MI->getOperand(0).getReg());
break; break;
case X86::VPERMILPSYri: case X86::VPERMILPSYri:
DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(), Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::VPERMILPSYmi:
DecodeVPERMILPSMask(8, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask); ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg()); DestName = getRegName(MI->getOperand(0).getReg());
break; break;
case X86::VPERMILPDri: case X86::VPERMILPDri:
DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(), Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::VPERMILPDmi:
DecodeVPERMILPDMask(2, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask); ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg()); DestName = getRegName(MI->getOperand(0).getReg());
break; break;
case X86::VPERMILPDYri: case X86::VPERMILPDYri:
DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(), Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::VPERMILPDYmi:
DecodeVPERMILPDMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask); ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg()); DestName = getRegName(MI->getOperand(0).getReg());
break; break;
case X86::VPERM2F128rr: case X86::VPERM2F128rr:
DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask); case X86::VPERM2I128rr:
Src1Name = getRegName(MI->getOperand(1).getReg());
Src2Name = getRegName(MI->getOperand(2).getReg()); Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VPERM2F128rm:
case X86::VPERM2I128rm:
DecodeVPERM2F128Mask(MI->getOperand(MI->getNumOperands()-1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break; break;
} }

View File

@ -128,17 +128,27 @@ void DecodePUNPCKHMask(unsigned NElts,
} }
} }
void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, void DecodeSHUFPMask(EVT VT, unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask) { SmallVectorImpl<unsigned> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits() / 128;
unsigned NumLaneElts = NumElts / NumLanes;
int NewImm = Imm;
for (unsigned l = 0; l < NumLanes; ++l) {
unsigned LaneStart = l * NumLaneElts;
// Part that reads from dest. // Part that reads from dest.
for (unsigned i = 0; i != NElts/2; ++i) { for (unsigned i = 0; i != NumLaneElts/2; ++i) {
ShuffleMask.push_back(Imm % NElts); ShuffleMask.push_back(NewImm % NumLaneElts + LaneStart);
Imm /= NElts; NewImm /= NumLaneElts;
} }
// Part that reads from src. // Part that reads from src.
for (unsigned i = 0; i != NElts/2; ++i) { for (unsigned i = 0; i != NumLaneElts/2; ++i) {
ShuffleMask.push_back(Imm % NElts + NElts); ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + LaneStart);
Imm /= NElts; NewImm /= NumLaneElts;
}
if (NumLaneElts == 4) NewImm = Imm; // reload imm
} }
} }

View File

@ -64,7 +64,7 @@ void DecodePUNPCKLMask(EVT VT,
void DecodePUNPCKHMask(unsigned NElts, void DecodePUNPCKHMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask); SmallVectorImpl<unsigned> &ShuffleMask);
void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, void DecodeSHUFPMask(EVT VT, unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask); SmallVectorImpl<unsigned> &ShuffleMask);
/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd /// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd

View File

@ -4567,8 +4567,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
case X86ISD::SHUFPS: case X86ISD::SHUFPS:
case X86ISD::SHUFPD: case X86ISD::SHUFPD:
ImmN = N->getOperand(N->getNumOperands()-1); ImmN = N->getOperand(N->getNumOperands()-1);
DecodeSHUFPSMask(NumElems, DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
cast<ConstantSDNode>(ImmN)->getZExtValue(),
ShuffleMask); ShuffleMask);
break; break;
case X86ISD::PUNPCKH: case X86ISD::PUNPCKH:

View File

@ -7332,6 +7332,15 @@ def : Pat<(v8i32 (X86VPermilps VR256:$src1, (i8 imm:$imm))),
(VPERMILPSYri VR256:$src1, imm:$imm)>; (VPERMILPSYri VR256:$src1, imm:$imm)>;
def : Pat<(v4i64 (X86VPermilpd VR256:$src1, (i8 imm:$imm))), def : Pat<(v4i64 (X86VPermilpd VR256:$src1, (i8 imm:$imm))),
(VPERMILPDYri VR256:$src1, imm:$imm)>; (VPERMILPDYri VR256:$src1, imm:$imm)>;
def : Pat<(v8f32 (X86VPermilps (memopv8f32 addr:$src1), (i8 imm:$imm))),
(VPERMILPSYmi addr:$src1, imm:$imm)>;
def : Pat<(v4f64 (X86VPermilpd (memopv4f64 addr:$src1), (i8 imm:$imm))),
(VPERMILPDYmi addr:$src1, imm:$imm)>;
def : Pat<(v8i32 (X86VPermilps (bc_v8i32 (memopv4i64 addr:$src1)),
(i8 imm:$imm))),
(VPERMILPSYmi addr:$src1, imm:$imm)>;
def : Pat<(v4i64 (X86VPermilpd (memopv4i64 addr:$src1), (i8 imm:$imm))),
(VPERMILPDYmi addr:$src1, imm:$imm)>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks

View File

@ -28,6 +28,14 @@ entry:
ret <4 x i64> %shuffle ret <4 x i64> %shuffle
} }
; CHECK: vpermilpd
define <4 x i64> @funcQ(<4 x i64>* %a) nounwind uwtable readnone ssp {
entry:
%a2 = load <4 x i64>* %a
%shuffle = shufflevector <4 x i64> %a2, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
ret <4 x i64> %shuffle
}
; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the ; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the
; target specific mask was correctly generated. ; target specific mask was correctly generated.
; CHECK: vpermilps $-100 ; CHECK: vpermilps $-100