mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-07-21 18:29:45 +00:00
[X86] Improved lowering of packed vector shifts to vpsllq/vpsrlq.
SSE2/AVX non-constant packed shift instructions only use the lower 64-bit of the shift count. This patch teaches function 'getTargetVShiftNode' how to deal with shifts where the shift count node is of type MVT::i64. Before this patch, function 'getTargetVShiftNode' only knew how to deal with shift count nodes of type MVT::i32. This forced the backend to wrongly truncate the shift count to MVT::i32, and then zero-extend it back to MVT::i64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223505 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
189606dbfe
commit
6a9a49d7ab
@ -16713,7 +16713,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT,
|
|||||||
static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
|
static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
|
||||||
SDValue SrcOp, SDValue ShAmt,
|
SDValue SrcOp, SDValue ShAmt,
|
||||||
SelectionDAG &DAG) {
|
SelectionDAG &DAG) {
|
||||||
assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
|
MVT SVT = ShAmt.getSimpleValueType();
|
||||||
|
assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!");
|
||||||
|
|
||||||
// Catch shift-by-constant.
|
// Catch shift-by-constant.
|
||||||
if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
|
if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
|
||||||
@ -16728,13 +16729,18 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
|
|||||||
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
|
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Need to build a vector containing shift amount
|
// Need to build a vector containing shift amount.
|
||||||
// Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0
|
// SSE/AVX packed shifts only use the lower 64-bit of the shift count.
|
||||||
SDValue ShOps[4];
|
SmallVector<SDValue, 4> ShOps;
|
||||||
ShOps[0] = ShAmt;
|
ShOps.push_back(ShAmt);
|
||||||
ShOps[1] = DAG.getConstant(0, MVT::i32);
|
if (SVT == MVT::i32) {
|
||||||
ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32);
|
ShOps.push_back(DAG.getConstant(0, SVT));
|
||||||
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, ShOps);
|
ShOps.push_back(DAG.getUNDEF(SVT));
|
||||||
|
}
|
||||||
|
ShOps.push_back(DAG.getUNDEF(SVT));
|
||||||
|
|
||||||
|
MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
|
||||||
|
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, BVT, ShOps);
|
||||||
|
|
||||||
// The return type has to be a 128-bit type with the same element
|
// The return type has to be a 128-bit type with the same element
|
||||||
// type as the input type.
|
// type as the input type.
|
||||||
@ -18469,8 +18475,9 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (BaseShAmt.getNode()) {
|
if (BaseShAmt.getNode()) {
|
||||||
if (EltVT.bitsGT(MVT::i32))
|
assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
|
||||||
BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt);
|
if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
|
||||||
|
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt);
|
||||||
else if (EltVT.bitsLT(MVT::i32))
|
else if (EltVT.bitsLT(MVT::i32))
|
||||||
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
|
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
|
||||||
|
|
||||||
|
@ -44,14 +44,10 @@ entry:
|
|||||||
define <2 x i64> @test3(<2 x i64> %A, <2 x i64> %B) {
|
define <2 x i64> @test3(<2 x i64> %A, <2 x i64> %B) {
|
||||||
; SSE2-LABEL: test3:
|
; SSE2-LABEL: test3:
|
||||||
; SSE2: # BB#0
|
; SSE2: # BB#0
|
||||||
; SSE2-NEXT: movd %xmm1, %rax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm1
|
|
||||||
; SSE2-NEXT: psllq %xmm1, %xmm0
|
; SSE2-NEXT: psllq %xmm1, %xmm0
|
||||||
; SSE2-NEXT: retq
|
; SSE2-NEXT: retq
|
||||||
; AVX-LABEL: test3:
|
; AVX-LABEL: test3:
|
||||||
; AVX: # BB#0
|
; AVX: # BB#0
|
||||||
; AVX-NEXT: vmovq %xmm1, %rax
|
|
||||||
; AVX-NEXT: vmovd %eax, %xmm1
|
|
||||||
; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
@ -103,14 +99,10 @@ entry:
|
|||||||
define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) {
|
define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) {
|
||||||
; SSE2-LABEL: test6:
|
; SSE2-LABEL: test6:
|
||||||
; SSE2: # BB#0
|
; SSE2: # BB#0
|
||||||
; SSE2-NEXT: movd %xmm1, %rax
|
|
||||||
; SSE2-NEXT: movd %eax, %xmm1
|
|
||||||
; SSE2-NEXT: psrlq %xmm1, %xmm0
|
; SSE2-NEXT: psrlq %xmm1, %xmm0
|
||||||
; SSE2-NEXT: retq
|
; SSE2-NEXT: retq
|
||||||
; AVX-LABEL: test6:
|
; AVX-LABEL: test6:
|
||||||
; AVX: # BB#0
|
; AVX: # BB#0
|
||||||
; AVX-NEXT: vmovq %xmm1, %rax
|
|
||||||
; AVX-NEXT: vmovd %eax, %xmm1
|
|
||||||
; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
|
Loading…
Reference in New Issue
Block a user