[X86] Improved lowering of packed vector shifts to vpsllq/vpsrlq.

SSE2/AVX non-constant packed shift instructions only use the lower 64-bit of
the shift count. 

This patch teaches function 'getTargetVShiftNode' how to deal with shifts
where the shift count node is of type MVT::i64.

Before this patch, function 'getTargetVShiftNode' only knew how to deal with
shift count nodes of type MVT::i32. This forced the backend to wrongly
truncate the shift count to MVT::i32, and then zero-extend it back to MVT::i64.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223505 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Andrea Di Biagio 2014-12-05 20:02:22 +00:00
parent 189606dbfe
commit 6a9a49d7ab
2 changed files with 17 additions and 18 deletions

View File

@ -16713,7 +16713,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT,
static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
SDValue SrcOp, SDValue ShAmt, SDValue SrcOp, SDValue ShAmt,
SelectionDAG &DAG) { SelectionDAG &DAG) {
assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32"); MVT SVT = ShAmt.getSimpleValueType();
assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!");
// Catch shift-by-constant. // Catch shift-by-constant.
if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt)) if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
@ -16728,13 +16729,18 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break; case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
} }
// Need to build a vector containing shift amount // Need to build a vector containing shift amount.
// Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0 // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
SDValue ShOps[4]; SmallVector<SDValue, 4> ShOps;
ShOps[0] = ShAmt; ShOps.push_back(ShAmt);
ShOps[1] = DAG.getConstant(0, MVT::i32); if (SVT == MVT::i32) {
ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32); ShOps.push_back(DAG.getConstant(0, SVT));
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, ShOps); ShOps.push_back(DAG.getUNDEF(SVT));
}
ShOps.push_back(DAG.getUNDEF(SVT));
MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, BVT, ShOps);
// The return type has to be a 128-bit type with the same element // The return type has to be a 128-bit type with the same element
// type as the input type. // type as the input type.
@ -18469,8 +18475,9 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
} }
if (BaseShAmt.getNode()) { if (BaseShAmt.getNode()) {
if (EltVT.bitsGT(MVT::i32)) assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt); if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt);
else if (EltVT.bitsLT(MVT::i32)) else if (EltVT.bitsLT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);

View File

@ -44,14 +44,10 @@ entry:
define <2 x i64> @test3(<2 x i64> %A, <2 x i64> %B) { define <2 x i64> @test3(<2 x i64> %A, <2 x i64> %B) {
; SSE2-LABEL: test3: ; SSE2-LABEL: test3:
; SSE2: # BB#0 ; SSE2: # BB#0
; SSE2-NEXT: movd %xmm1, %rax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: psllq %xmm1, %xmm0 ; SSE2-NEXT: psllq %xmm1, %xmm0
; SSE2-NEXT: retq ; SSE2-NEXT: retq
; AVX-LABEL: test3: ; AVX-LABEL: test3:
; AVX: # BB#0 ; AVX: # BB#0
; AVX-NEXT: vmovq %xmm1, %rax
; AVX-NEXT: vmovd %eax, %xmm1
; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq ; AVX-NEXT: retq
entry: entry:
@ -103,14 +99,10 @@ entry:
define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) { define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) {
; SSE2-LABEL: test6: ; SSE2-LABEL: test6:
; SSE2: # BB#0 ; SSE2: # BB#0
; SSE2-NEXT: movd %xmm1, %rax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: psrlq %xmm1, %xmm0 ; SSE2-NEXT: psrlq %xmm1, %xmm0
; SSE2-NEXT: retq ; SSE2-NEXT: retq
; AVX-LABEL: test6: ; AVX-LABEL: test6:
; AVX: # BB#0 ; AVX: # BB#0
; AVX-NEXT: vmovq %xmm1, %rax
; AVX-NEXT: vmovd %eax, %xmm1
; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq ; AVX-NEXT: retq
entry: entry: