From 0509db27386f5cafffd364618365ecda741cf0bd Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 28 Dec 2012 05:45:24 +0000 Subject: [PATCH] AVX: Move the ZEXT/ANYEXT DAGCo optimizations to the lowering of these optimizations. The old test cases still cover all of these lowering/optimizations. The single change that we have is that now anyext does not need to zero a register, because it does not use the exact code path as the zero_extend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171178 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 111 ++++++++++++++++++----------- lib/Target/X86/X86ISelLowering.h | 5 +- test/CodeGen/X86/avx-zext.ll | 3 +- 3 files changed, 72 insertions(+), 47 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 263a5d668a5..ca86f660b27 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1125,8 +1125,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom); if (Subtarget->hasFMA() || Subtarget->hasFMA4()) { setOperationAction(ISD::FMA, MVT::v8f32, Legal); @@ -8292,12 +8296,70 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co } } -SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = Op->getValueType(0); + SDValue In = Op->getOperand(0); + EVT InVT = In.getValueType(); + DebugLoc dl = Op->getDebugLoc(); + + // Optimize vectors in AVX mode: + // + // v8i16 -> v8i32 + // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32. + // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32. + // Concat upper and lower parts. + // + // v4i32 -> v4i64 + // Use vpunpckldq for 4 lower elements v4i32 -> v2i64. + // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64. + // Concat upper and lower parts. + // + + if (((VT != MVT::v8i32) || (InVT != MVT::v8i16)) && + ((VT != MVT::v4i64) || (InVT != MVT::v4i32))) + return SDValue(); + + if (Subtarget->hasInt256()) + return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, In); + + SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl); + SDValue Undef = DAG.getUNDEF(InVT); + bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND; + SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); + SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); + + EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + VT.getVectorNumElements()/2); + + OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); +} + +SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op, + SelectionDAG &DAG) const { + if (Subtarget->hasFp256()) { + SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); + if (Res.getNode()) + return Res; + } + + return SDValue(); +} +SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT VT = Op.getValueType(); SDValue In = Op.getOperand(0); EVT SVT = In.getValueType(); + if (Subtarget->hasFp256()) { + SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); + if (Res.getNode()) + return Res; + } + if (!VT.is256BitVector() || !SVT.is128BitVector() || VT.getVectorNumElements() != SVT.getVectorNumElements()) return SDValue(); @@ -11849,7 +11911,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG); - case ISD::ZERO_EXTEND: return lowerZERO_EXTEND(Op, DAG); + case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG); + case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); + case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG); @@ -11859,7 +11923,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); - case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); @@ -16856,7 +16919,6 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, DebugLoc dl = N->getDebugLoc(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - EVT OpVT = N0.getValueType(); if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && @@ -16879,43 +16941,6 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, return R; } - // Optimize vectors in AVX mode: - // - // v8i16 -> v8i32 - // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32. - // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32. - // Concat upper and lower parts. - // - // v4i32 -> v4i64 - // Use vpunpckldq for 4 lower elements v4i32 -> v2i64. - // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64. - // Concat upper and lower parts. - // - if (!DCI.isBeforeLegalizeOps()) - return SDValue(); - - if (!Subtarget->hasFp256()) - return SDValue(); - - if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) || - ((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) { - - if (Subtarget->hasInt256()) - return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, N0); - - SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl); - SDValue OpLo = getUnpackl(DAG, dl, OpVT, N0, ZeroVec); - SDValue OpHi = getUnpackh(DAG, dl, OpVT, N0, ZeroVec); - - EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - VT.getVectorNumElements()/2); - - OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi); - - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); - } - return SDValue(); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index a19e7ec1a13..1a696a8f536 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -813,7 +813,9 @@ namespace llvm { SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; @@ -824,7 +826,6 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; diff --git a/test/CodeGen/X86/avx-zext.ll b/test/CodeGen/X86/avx-zext.ll index b630e9d1461..582537ea906 100755 --- a/test/CodeGen/X86/avx-zext.ll +++ b/test/CodeGen/X86/avx-zext.ll @@ -18,11 +18,10 @@ define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp ret <4 x i64>%B } - define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) { ;CHECK: zext_8i8_to_8i32 ;CHECK: vpunpckhwd -;CHECK: vpunpcklwd +;CHECK: vpmovzxwd ;CHECK: vinsertf128 ;CHECK: ret %t = zext <8 x i8> %z to <8 x i32>