mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-21 02:24:22 +00:00
Add instruction selection for 256-bit VPSHUFD and 128-bit VPERMILPS/VPERMILPD.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@149968 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -3713,7 +3713,7 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
|
|||||||
/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
|
/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
|
||||||
/// to the same elements of the low, but to the higher half of the source.
|
/// to the same elements of the low, but to the higher half of the source.
|
||||||
/// In VPERMILPD the two lanes could be shuffled independently of each other
|
/// In VPERMILPD the two lanes could be shuffled independently of each other
|
||||||
/// with the same restriction that lanes can't be crossed.
|
/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
|
||||||
static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
|
static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
|
||||||
if (!HasAVX)
|
if (!HasAVX)
|
||||||
return false;
|
return false;
|
||||||
@ -6467,6 +6467,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
|
|
||||||
unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
|
unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
|
||||||
|
|
||||||
|
if (HasAVX && (VT == MVT::v4f32 || VT == MVT::v2f64))
|
||||||
|
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask, DAG);
|
||||||
|
|
||||||
if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
|
if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
|
||||||
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
|
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
|
||||||
|
|
||||||
@ -6636,9 +6639,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
|
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
|
||||||
|
|
||||||
// Handle VPERMILPS/D* permutations
|
// Handle VPERMILPS/D* permutations
|
||||||
if (isVPERMILPMask(M, VT, HasAVX))
|
if (isVPERMILPMask(M, VT, HasAVX)) {
|
||||||
|
if (HasAVX2 && VT == MVT::v8i32)
|
||||||
|
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
|
||||||
|
X86::getShuffleSHUFImmediate(SVOp), DAG);
|
||||||
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
|
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
|
||||||
X86::getShuffleSHUFImmediate(SVOp), DAG);
|
X86::getShuffleSHUFImmediate(SVOp), DAG);
|
||||||
|
}
|
||||||
|
|
||||||
// Handle VPERM2F128/VPERM2I128 permutations
|
// Handle VPERM2F128/VPERM2I128 permutations
|
||||||
if (isVPERM2X128Mask(M, VT, HasAVX))
|
if (isVPERM2X128Mask(M, VT, HasAVX))
|
||||||
|
@ -3993,21 +3993,19 @@ def mi : Ii8<0x70, MRMSrcMem,
|
|||||||
(undef))))]>;
|
(undef))))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, PatFrag pshuf_frag,
|
multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, SDNode OpNode> {
|
||||||
PatFrag bc_frag> {
|
|
||||||
def Yri : Ii8<0x70, MRMSrcReg,
|
def Yri : Ii8<0x70, MRMSrcReg,
|
||||||
(outs VR256:$dst), (ins VR256:$src1, i8imm:$src2),
|
(outs VR256:$dst), (ins VR256:$src1, i8imm:$src2),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||||
[(set VR256:$dst, (vt (pshuf_frag:$src2 VR256:$src1,
|
[(set VR256:$dst, (vt (OpNode VR256:$src1, (i8 imm:$src2))))]>;
|
||||||
(undef))))]>;
|
|
||||||
def Ymi : Ii8<0x70, MRMSrcMem,
|
def Ymi : Ii8<0x70, MRMSrcMem,
|
||||||
(outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2),
|
(outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||||
[(set VR256:$dst, (vt (pshuf_frag:$src2
|
[(set VR256:$dst,
|
||||||
(bc_frag (memopv4i64 addr:$src1)),
|
(vt (OpNode (bitconvert (memopv4i64 addr:$src1)),
|
||||||
(undef))))]>;
|
(i8 imm:$src2))))]>;
|
||||||
}
|
}
|
||||||
} // ExeDomain = SSEPackedInt
|
} // ExeDomain = SSEPackedInt
|
||||||
|
|
||||||
@ -4053,17 +4051,9 @@ let Predicates = [HasAVX] in {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX2] in {
|
let Predicates = [HasAVX2] in {
|
||||||
let AddedComplexity = 5 in
|
defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>, TB, OpSize, VEX;
|
||||||
defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, pshufd, bc_v8i32>, TB,
|
defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>, XS, VEX;
|
||||||
OpSize, VEX;
|
defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, XD, VEX;
|
||||||
|
|
||||||
// SSE2 with ImmT == Imm8 and XS prefix.
|
|
||||||
defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, pshufhw, bc_v16i16>, XS,
|
|
||||||
VEX;
|
|
||||||
|
|
||||||
// SSE2 with ImmT == Imm8 and XD prefix.
|
|
||||||
defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, pshuflw, bc_v16i16>, XD,
|
|
||||||
VEX;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasSSE2] in {
|
let Predicates = [HasSSE2] in {
|
||||||
@ -4225,10 +4215,10 @@ let Predicates = [HasAVX] in {
|
|||||||
|
|
||||||
// Splat v2f64 / v2i64
|
// Splat v2f64 / v2i64
|
||||||
let AddedComplexity = 10 in {
|
let AddedComplexity = 10 in {
|
||||||
def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
|
|
||||||
(PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
|
|
||||||
def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
|
def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
|
||||||
(VPUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasAVX]>;
|
(VPUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasAVX]>;
|
||||||
|
def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
|
||||||
|
(PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
@ -7200,6 +7190,19 @@ def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)),
|
|||||||
(VPERMILPSYmi addr:$src1, imm:$imm)>;
|
(VPERMILPSYmi addr:$src1, imm:$imm)>;
|
||||||
def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))),
|
def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))),
|
||||||
(VPERMILPDYmi addr:$src1, imm:$imm)>;
|
(VPERMILPDYmi addr:$src1, imm:$imm)>;
|
||||||
|
|
||||||
|
def : Pat<(v4f32 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
|
||||||
|
(VPERMILPSri VR128:$src1, imm:$imm)>;
|
||||||
|
def : Pat<(v2f64 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
|
||||||
|
(VPERMILPDri VR128:$src1, imm:$imm)>;
|
||||||
|
def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
|
||||||
|
(VPERMILPDri VR128:$src1, imm:$imm)>;
|
||||||
|
def : Pat<(v4f32 (X86VPermilp (memopv4f32 addr:$src1), (i8 imm:$imm))),
|
||||||
|
(VPERMILPSmi addr:$src1, imm:$imm)>;
|
||||||
|
def : Pat<(v2f64 (X86VPermilp (memopv2f64 addr:$src1), (i8 imm:$imm))),
|
||||||
|
(VPERMILPDmi addr:$src1, imm:$imm)>;
|
||||||
|
def : Pat<(v2i64 (X86VPermilp (memopv2i64 addr:$src1), (i8 imm:$imm))),
|
||||||
|
(VPERMILPDmi addr:$src1, imm:$imm)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -6,7 +6,7 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
|
|||||||
ret <4 x float> %b
|
ret <4 x float> %b
|
||||||
; CHECK: test1:
|
; CHECK: test1:
|
||||||
; CHECK: vshufps
|
; CHECK: vshufps
|
||||||
; CHECK: vpshufd
|
; CHECK: vpermilps
|
||||||
}
|
}
|
||||||
|
|
||||||
; rdar://10538417
|
; rdar://10538417
|
||||||
@ -98,22 +98,40 @@ define i32 @test10(<4 x i32> %a) nounwind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
define <4 x float> @test11(<4 x float> %a) nounwind {
|
define <4 x float> @test11(<4 x float> %a) nounwind {
|
||||||
; CHECK: pshufd $27
|
; check: test11
|
||||||
|
; check: vpermilps $27
|
||||||
%tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
%tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||||
ret <4 x float> %tmp1
|
ret <4 x float> %tmp1
|
||||||
}
|
}
|
||||||
|
|
||||||
define <4 x float> @test12(<4 x float>* %a) nounwind {
|
define <4 x float> @test12(<4 x float>* %a) nounwind {
|
||||||
; CHECK: pshufd $27, (
|
; CHECK: test12
|
||||||
|
; CHECK: vpermilps $27, (
|
||||||
%tmp0 = load <4 x float>* %a
|
%tmp0 = load <4 x float>* %a
|
||||||
%tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
%tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||||
ret <4 x float> %tmp1
|
ret <4 x float> %tmp1
|
||||||
}
|
}
|
||||||
|
|
||||||
;CHECK: test13
|
define <4 x i32> @test13(<4 x i32> %a) nounwind {
|
||||||
;CHECK: shufd
|
; check: test13
|
||||||
;CHECK: ret
|
; check: vpshufd $27
|
||||||
define <4 x i32> @test13(<2 x i32>%x) nounwind readnone {
|
%tmp1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||||
|
ret <4 x i32> %tmp1
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @test14(<4 x i32>* %a) nounwind {
|
||||||
|
; CHECK: test14
|
||||||
|
; CHECK: vpshufd $27, (
|
||||||
|
%tmp0 = load <4 x i32>* %a
|
||||||
|
%tmp1 = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||||
|
ret <4 x i32> %tmp1
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: test15
|
||||||
|
; CHECK: vpshufd $8
|
||||||
|
; CHECK: ret
|
||||||
|
define <4 x i32> @test15(<2 x i32>%x) nounwind readnone {
|
||||||
%x1 = shufflevector <2 x i32> %x, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
%x1 = shufflevector <2 x i32> %x, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||||
ret <4 x i32>%x1
|
ret <4 x i32>%x1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@ entry:
|
|||||||
ret <4 x i64> %vecinit6.i
|
ret <4 x i64> %vecinit6.i
|
||||||
}
|
}
|
||||||
|
|
||||||
; CHECK: vshufpd $0
|
; CHECK: vpermilpd $0
|
||||||
; CHECK-NEXT: vinsertf128 $1
|
; CHECK-NEXT: vinsertf128 $1
|
||||||
define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
|
define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
|
||||||
entry:
|
entry:
|
||||||
|
@ -45,7 +45,7 @@ entry:
|
|||||||
ret <8 x float> %shuffle
|
ret <8 x float> %shuffle
|
||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-NOT: vpermilps
|
; CHECK: vpermilps
|
||||||
define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
|
define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
|
||||||
entry:
|
entry:
|
||||||
%shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
|
%shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
|
||||||
|
Reference in New Issue
Block a user