R600: Make sign_extend_inreg legal.

Don't know why I didn't just do this in the first place.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206862 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2014-04-22 03:49:30 +00:00
parent 0240286c23
commit 3ddf868b04
4 changed files with 61 additions and 91 deletions

View File

@ -1017,81 +1017,22 @@ SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
MVT VT = Op.getSimpleValueType();
MVT ScalarVT = VT.getScalarType();
unsigned SrcBits = ExtraVT.getScalarType().getSizeInBits();
unsigned DestBits = ScalarVT.getSizeInBits();
unsigned BitsDiff = DestBits - SrcBits;
if (!Subtarget->hasBFE())
return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG);
if (!VT.isVector())
return SDValue();
SDValue Src = Op.getOperand(0);
if (VT.isVector()) {
SDLoc DL(Op);
// Need to scalarize this, and revisit each of the scalars later.
// TODO: Don't scalarize on Evergreen?
unsigned NElts = VT.getVectorNumElements();
SmallVector<SDValue, 8> Args;
DAG.ExtractVectorElements(Src, Args);
SDLoc DL(Op);
SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
for (unsigned I = 0; I < NElts; ++I)
Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
// TODO: Don't scalarize on Evergreen?
unsigned NElts = VT.getVectorNumElements();
SmallVector<SDValue, 8> Args;
DAG.ExtractVectorElements(Src, Args, 0, NElts);
return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args.data(), Args.size());
}
SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
for (unsigned I = 0; I < NElts; ++I)
Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
if (SrcBits == 32) {
SDLoc DL(Op);
// If the source is 32-bits, this is really half of a 2-register pair, and
// we need to discard the unused half of the pair.
SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src);
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, TruncSrc);
}
unsigned NElts = VT.isVector() ? VT.getVectorNumElements() : 1;
// TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it
// might not be worth the effort, and will need to expand to shifts when
// fixing SGPR copies.
if (SrcBits < 32 && DestBits <= 32) {
SDLoc DL(Op);
MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts);
if (DestBits != 32)
Src = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Src);
// FIXME: This should use TargetConstant, but that hits assertions for
// Evergreen.
SDValue Ext = DAG.getNode(AMDGPUISD::BFE_I32, DL, ExtVT,
Op.getOperand(0), // Operand
DAG.getConstant(0, ExtVT), // Offset
DAG.getConstant(SrcBits, ExtVT)); // Width
// Truncate to the original type if necessary.
if (ScalarVT == MVT::i32)
return Ext;
return DAG.getNode(ISD::TRUNCATE, DL, VT, Ext);
}
// For small types, extend to 32-bits first.
if (SrcBits < 32) {
SDLoc DL(Op);
MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts);
SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, Src);
SDValue Ext32 = DAG.getNode(AMDGPUISD::BFE_I32,
DL,
ExtVT,
TruncSrc, // Operand
DAG.getConstant(0, ExtVT), // Offset
DAG.getConstant(SrcBits, ExtVT)); // Width
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Ext32);
}
// For everything else, use the standard bitshift expansion.
return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG);
return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args.data(), Args.size());
}
//===----------------------------------------------------------------------===//

View File

@ -115,15 +115,15 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
@ -165,9 +165,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::i1, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);

View File

@ -74,6 +74,7 @@ def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8",
def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16",
[(set i32:$dst, (sext_inreg i32:$src0, i16))]
>;
////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
@ -128,21 +129,6 @@ def S_CMPK_EQ_I32 : SOPK <
>;
*/
// Handle sext_inreg in i64
def : Pat <
(i64 (sext_inreg i64:$src, i8)),
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
(S_MOV_B32 -1), sub1)
>;
def : Pat <
(i64 (sext_inreg i64:$src, i16)),
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
(S_MOV_B32 -1), sub1)
>;
let isCompare = 1 in {
def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
@ -2252,6 +2238,39 @@ def : Pat<
(V_CMP_U_F32_e64 $src0, $src1)
>;
//===----------------------------------------------------------------------===//
// Conversion Patterns
//===----------------------------------------------------------------------===//
def : Pat<(i32 (sext_inreg i32:$src, i1)),
(S_BFE_I32 i32:$src, 65536)>; // 0 | 1 << 16
// TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it
// might not be worth the effort, and will need to expand to shifts when
// fixing SGPR copies.
// Handle sext_inreg in i64
def : Pat <
(i64 (sext_inreg i64:$src, i1)),
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(S_BFE_I32 (EXTRACT_SUBREG i64:$src, sub0), 65536), sub0), // 0 | 1 << 16
(S_MOV_B32 -1), sub1)
>;
def : Pat <
(i64 (sext_inreg i64:$src, i8)),
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
(S_MOV_B32 -1), sub1)
>;
def : Pat <
(i64 (sext_inreg i64:$src, i16)),
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
(S_MOV_B32 -1), sub1)
>;
//===----------------------------------------------------------------------===//
// Miscellaneous Patterns
//===----------------------------------------------------------------------===//

View File

@ -74,6 +74,19 @@ define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a,
ret void
}
; FUNC-LABEL: @sext_in_reg_i1_to_i64
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000
; SI: S_MOV_B32 {{s[0-9]+}}, -1
; SI: BUFFER_STORE_DWORDX2
define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%c = add i64 %a, %b
%shl = shl i64 %c, 63
%ashr = ashr i64 %shl, 63
store i64 %ashr, i64 addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: @sext_in_reg_i8_to_i64
; SI: S_ADD_I32 [[VAL:s[0-9]+]],
; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]