mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-23 01:24:30 +00:00
Fix issues in shuffle decoding around VPERM* instructions. Fix shuffle decoding for VSHUFPS/D for 256-bit types. Add pattern matching for memory forms of VPERMILPS/VPERMILPD.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145390 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -163,14 +163,22 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
|||||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||||
// FALL THROUGH.
|
// FALL THROUGH.
|
||||||
case X86::SHUFPDrmi:
|
case X86::SHUFPDrmi:
|
||||||
DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
|
DecodeSHUFPMask(MVT::v2f64, MI->getOperand(3).getImm(), ShuffleMask);
|
||||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
||||||
break;
|
break;
|
||||||
case X86::VSHUFPDrri:
|
case X86::VSHUFPDrri:
|
||||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||||
// FALL THROUGH.
|
// FALL THROUGH.
|
||||||
case X86::VSHUFPDrmi:
|
case X86::VSHUFPDrmi:
|
||||||
DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
|
DecodeSHUFPMask(MVT::v2f64, MI->getOperand(3).getImm(), ShuffleMask);
|
||||||
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||||
|
DestName = getRegName(MI->getOperand(0).getReg());
|
||||||
|
break;
|
||||||
|
case X86::VSHUFPDYrri:
|
||||||
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||||
|
// FALL THROUGH.
|
||||||
|
case X86::VSHUFPDYrmi:
|
||||||
|
DecodeSHUFPMask(MVT::v4f64, MI->getOperand(3).getImm(), ShuffleMask);
|
||||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||||
DestName = getRegName(MI->getOperand(0).getReg());
|
DestName = getRegName(MI->getOperand(0).getReg());
|
||||||
break;
|
break;
|
||||||
@ -179,14 +187,22 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
|||||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||||
// FALL THROUGH.
|
// FALL THROUGH.
|
||||||
case X86::SHUFPSrmi:
|
case X86::SHUFPSrmi:
|
||||||
DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
|
DecodeSHUFPMask(MVT::v4f32, MI->getOperand(3).getImm(), ShuffleMask);
|
||||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
Src1Name = getRegName(MI->getOperand(0).getReg());
|
||||||
break;
|
break;
|
||||||
case X86::VSHUFPSrri:
|
case X86::VSHUFPSrri:
|
||||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||||
// FALL THROUGH.
|
// FALL THROUGH.
|
||||||
case X86::VSHUFPSrmi:
|
case X86::VSHUFPSrmi:
|
||||||
DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
|
DecodeSHUFPMask(MVT::v4f32, MI->getOperand(3).getImm(), ShuffleMask);
|
||||||
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||||
|
DestName = getRegName(MI->getOperand(0).getReg());
|
||||||
|
break;
|
||||||
|
case X86::VSHUFPSYrri:
|
||||||
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||||
|
// FALL THROUGH.
|
||||||
|
case X86::VSHUFPSYrmi:
|
||||||
|
DecodeSHUFPMask(MVT::v8f32, MI->getOperand(3).getImm(), ShuffleMask);
|
||||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||||
DestName = getRegName(MI->getOperand(0).getReg());
|
DestName = getRegName(MI->getOperand(0).getReg());
|
||||||
break;
|
break;
|
||||||
@ -284,29 +300,47 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
|||||||
DestName = getRegName(MI->getOperand(0).getReg());
|
DestName = getRegName(MI->getOperand(0).getReg());
|
||||||
break;
|
break;
|
||||||
case X86::VPERMILPSri:
|
case X86::VPERMILPSri:
|
||||||
DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(),
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||||
|
// FALL THROUGH.
|
||||||
|
case X86::VPERMILPSmi:
|
||||||
|
DecodeVPERMILPSMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||||
ShuffleMask);
|
ShuffleMask);
|
||||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
DestName = getRegName(MI->getOperand(0).getReg());
|
||||||
break;
|
break;
|
||||||
case X86::VPERMILPSYri:
|
case X86::VPERMILPSYri:
|
||||||
DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(),
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||||
|
// FALL THROUGH.
|
||||||
|
case X86::VPERMILPSYmi:
|
||||||
|
DecodeVPERMILPSMask(8, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||||
ShuffleMask);
|
ShuffleMask);
|
||||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
DestName = getRegName(MI->getOperand(0).getReg());
|
||||||
break;
|
break;
|
||||||
case X86::VPERMILPDri:
|
case X86::VPERMILPDri:
|
||||||
DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(),
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||||
|
// FALL THROUGH.
|
||||||
|
case X86::VPERMILPDmi:
|
||||||
|
DecodeVPERMILPDMask(2, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||||
ShuffleMask);
|
ShuffleMask);
|
||||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
DestName = getRegName(MI->getOperand(0).getReg());
|
||||||
break;
|
break;
|
||||||
case X86::VPERMILPDYri:
|
case X86::VPERMILPDYri:
|
||||||
DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(),
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||||
|
// FALL THROUGH.
|
||||||
|
case X86::VPERMILPDYmi:
|
||||||
|
DecodeVPERMILPDMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||||
ShuffleMask);
|
ShuffleMask);
|
||||||
Src1Name = getRegName(MI->getOperand(0).getReg());
|
DestName = getRegName(MI->getOperand(0).getReg());
|
||||||
break;
|
break;
|
||||||
case X86::VPERM2F128rr:
|
case X86::VPERM2F128rr:
|
||||||
DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask);
|
case X86::VPERM2I128rr:
|
||||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
|
||||||
Src2Name = getRegName(MI->getOperand(2).getReg());
|
Src2Name = getRegName(MI->getOperand(2).getReg());
|
||||||
|
// FALL THROUGH.
|
||||||
|
case X86::VPERM2F128rm:
|
||||||
|
case X86::VPERM2I128rm:
|
||||||
|
DecodeVPERM2F128Mask(MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||||
|
ShuffleMask);
|
||||||
|
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||||
|
DestName = getRegName(MI->getOperand(0).getReg());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -128,17 +128,27 @@ void DecodePUNPCKHMask(unsigned NElts,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
|
void DecodeSHUFPMask(EVT VT, unsigned Imm,
|
||||||
SmallVectorImpl<unsigned> &ShuffleMask) {
|
SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||||
// Part that reads from dest.
|
unsigned NumElts = VT.getVectorNumElements();
|
||||||
for (unsigned i = 0; i != NElts/2; ++i) {
|
|
||||||
ShuffleMask.push_back(Imm % NElts);
|
unsigned NumLanes = VT.getSizeInBits() / 128;
|
||||||
Imm /= NElts;
|
unsigned NumLaneElts = NumElts / NumLanes;
|
||||||
}
|
|
||||||
// Part that reads from src.
|
int NewImm = Imm;
|
||||||
for (unsigned i = 0; i != NElts/2; ++i) {
|
for (unsigned l = 0; l < NumLanes; ++l) {
|
||||||
ShuffleMask.push_back(Imm % NElts + NElts);
|
unsigned LaneStart = l * NumLaneElts;
|
||||||
Imm /= NElts;
|
// Part that reads from dest.
|
||||||
|
for (unsigned i = 0; i != NumLaneElts/2; ++i) {
|
||||||
|
ShuffleMask.push_back(NewImm % NumLaneElts + LaneStart);
|
||||||
|
NewImm /= NumLaneElts;
|
||||||
|
}
|
||||||
|
// Part that reads from src.
|
||||||
|
for (unsigned i = 0; i != NumLaneElts/2; ++i) {
|
||||||
|
ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + LaneStart);
|
||||||
|
NewImm /= NumLaneElts;
|
||||||
|
}
|
||||||
|
if (NumLaneElts == 4) NewImm = Imm; // reload imm
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,8 +64,8 @@ void DecodePUNPCKLMask(EVT VT,
|
|||||||
void DecodePUNPCKHMask(unsigned NElts,
|
void DecodePUNPCKHMask(unsigned NElts,
|
||||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||||
|
|
||||||
void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
|
void DecodeSHUFPMask(EVT VT, unsigned Imm,
|
||||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||||
|
|
||||||
/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd
|
/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd
|
||||||
/// etc. VT indicates the type of the vector allowing it to handle different
|
/// etc. VT indicates the type of the vector allowing it to handle different
|
||||||
|
@ -4567,9 +4567,8 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
|
|||||||
case X86ISD::SHUFPS:
|
case X86ISD::SHUFPS:
|
||||||
case X86ISD::SHUFPD:
|
case X86ISD::SHUFPD:
|
||||||
ImmN = N->getOperand(N->getNumOperands()-1);
|
ImmN = N->getOperand(N->getNumOperands()-1);
|
||||||
DecodeSHUFPSMask(NumElems,
|
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
|
||||||
cast<ConstantSDNode>(ImmN)->getZExtValue(),
|
ShuffleMask);
|
||||||
ShuffleMask);
|
|
||||||
break;
|
break;
|
||||||
case X86ISD::PUNPCKH:
|
case X86ISD::PUNPCKH:
|
||||||
DecodePUNPCKHMask(NumElems, ShuffleMask);
|
DecodePUNPCKHMask(NumElems, ShuffleMask);
|
||||||
|
@ -7332,6 +7332,15 @@ def : Pat<(v8i32 (X86VPermilps VR256:$src1, (i8 imm:$imm))),
|
|||||||
(VPERMILPSYri VR256:$src1, imm:$imm)>;
|
(VPERMILPSYri VR256:$src1, imm:$imm)>;
|
||||||
def : Pat<(v4i64 (X86VPermilpd VR256:$src1, (i8 imm:$imm))),
|
def : Pat<(v4i64 (X86VPermilpd VR256:$src1, (i8 imm:$imm))),
|
||||||
(VPERMILPDYri VR256:$src1, imm:$imm)>;
|
(VPERMILPDYri VR256:$src1, imm:$imm)>;
|
||||||
|
def : Pat<(v8f32 (X86VPermilps (memopv8f32 addr:$src1), (i8 imm:$imm))),
|
||||||
|
(VPERMILPSYmi addr:$src1, imm:$imm)>;
|
||||||
|
def : Pat<(v4f64 (X86VPermilpd (memopv4f64 addr:$src1), (i8 imm:$imm))),
|
||||||
|
(VPERMILPDYmi addr:$src1, imm:$imm)>;
|
||||||
|
def : Pat<(v8i32 (X86VPermilps (bc_v8i32 (memopv4i64 addr:$src1)),
|
||||||
|
(i8 imm:$imm))),
|
||||||
|
(VPERMILPSYmi addr:$src1, imm:$imm)>;
|
||||||
|
def : Pat<(v4i64 (X86VPermilpd (memopv4i64 addr:$src1), (i8 imm:$imm))),
|
||||||
|
(VPERMILPDYmi addr:$src1, imm:$imm)>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
|
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
|
||||||
|
@ -28,6 +28,14 @@ entry:
|
|||||||
ret <4 x i64> %shuffle
|
ret <4 x i64> %shuffle
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK: vpermilpd
|
||||||
|
define <4 x i64> @funcQ(<4 x i64>* %a) nounwind uwtable readnone ssp {
|
||||||
|
entry:
|
||||||
|
%a2 = load <4 x i64>* %a
|
||||||
|
%shuffle = shufflevector <4 x i64> %a2, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
|
||||||
|
ret <4 x i64> %shuffle
|
||||||
|
}
|
||||||
|
|
||||||
; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the
|
; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the
|
||||||
; target specific mask was correctly generated.
|
; target specific mask was correctly generated.
|
||||||
; CHECK: vpermilps $-100
|
; CHECK: vpermilps $-100
|
||||||
|
Reference in New Issue
Block a user