mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-15 07:34:33 +00:00
Optimize sext <4 x i8> and <4 x i16> to <4 x i64>.
Patch by Ahmad, Muhammad T <muhammad.t.ahmad@intel.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177421 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a45a22758d
commit
b05130e1b2
@ -11827,8 +11827,23 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
|
|||||||
// fall through
|
// fall through
|
||||||
case MVT::v4i32:
|
case MVT::v4i32:
|
||||||
case MVT::v8i16: {
|
case MVT::v8i16: {
|
||||||
SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT,
|
// (sext (vzext x)) -> (vsext x)
|
||||||
Op.getOperand(0), ShAmt, DAG);
|
SDValue Op0 = Op.getOperand(0);
|
||||||
|
SDValue Op00 = Op0.getOperand(0);
|
||||||
|
SDValue Tmp1;
|
||||||
|
// Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
|
||||||
|
if (Op0.getOpcode() == ISD::BITCAST &&
|
||||||
|
Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
|
||||||
|
Tmp1 = LowerVectorIntExtend(Op00, DAG);
|
||||||
|
if (Tmp1.getNode()) {
|
||||||
|
SDValue Tmp1Op0 = Tmp1.getOperand(0);
|
||||||
|
assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
|
||||||
|
"This optimization is invalid without a VZEXT.");
|
||||||
|
return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the above didn't work, then just use Shift-Left + Shift-Right.
|
||||||
|
Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG);
|
||||||
return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
|
return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -257,8 +257,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
|
|||||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
|
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
|
||||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
|
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
|
||||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 },
|
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 },
|
||||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 },
|
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 },
|
||||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 },
|
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
|
||||||
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
|
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -44,9 +44,9 @@ define i32 @zext_sext(<8 x i1> %in) {
|
|||||||
%B = zext <8 x i16> undef to <8 x i32>
|
%B = zext <8 x i16> undef to <8 x i32>
|
||||||
;CHECK: cost of 1 {{.*}} sext
|
;CHECK: cost of 1 {{.*}} sext
|
||||||
%C = sext <4 x i32> undef to <4 x i64>
|
%C = sext <4 x i32> undef to <4 x i64>
|
||||||
;CHECK: cost of 8 {{.*}} sext
|
;CHECK: cost of 6 {{.*}} sext
|
||||||
%C1 = sext <4 x i8> undef to <4 x i64>
|
%C1 = sext <4 x i8> undef to <4 x i64>
|
||||||
;CHECK: cost of 8 {{.*}} sext
|
;CHECK: cost of 6 {{.*}} sext
|
||||||
%C2 = sext <4 x i16> undef to <4 x i64>
|
%C2 = sext <4 x i16> undef to <4 x i64>
|
||||||
|
|
||||||
;CHECK: cost of 1 {{.*}} zext
|
;CHECK: cost of 1 {{.*}} zext
|
||||||
|
@ -165,3 +165,24 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
|
|||||||
ret <4 x i64> %extmask
|
ret <4 x i64> %extmask
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; AVX: sext_4i8_to_4i64
|
||||||
|
; AVX: vpmovsxbd
|
||||||
|
; AVX: vpmovsxdq
|
||||||
|
; AVX: vpmovsxdq
|
||||||
|
; AVX: ret
|
||||||
|
define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
|
||||||
|
%X = load <4 x i8>* %ptr
|
||||||
|
%Y = sext <4 x i8> %X to <4 x i64>
|
||||||
|
ret <4 x i64>%Y
|
||||||
|
}
|
||||||
|
|
||||||
|
; AVX: sext_4i16_to_4i64
|
||||||
|
; AVX: vpmovsxwd
|
||||||
|
; AVX: vpmovsxdq
|
||||||
|
; AVX: vpmovsxdq
|
||||||
|
; AVX: ret
|
||||||
|
define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
|
||||||
|
%X = load <4 x i16>* %ptr
|
||||||
|
%Y = sext <4 x i16> %X to <4 x i64>
|
||||||
|
ret <4 x i64>%Y
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user