mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-08-22 10:29:35 +00:00
[NVPTX] Add support for [SHL,SRA,SRL]_PARTS
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211936 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
10da1651ed
commit
863b0d45a5
@ -152,6 +152,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
|
|||||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
|
||||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
|
||||||
|
|
||||||
|
setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom);
|
||||||
|
setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom);
|
||||||
|
setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom);
|
||||||
|
setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom);
|
||||||
|
setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom);
|
||||||
|
setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom);
|
||||||
|
|
||||||
if (nvptxSubtarget.hasROT64()) {
|
if (nvptxSubtarget.hasROT64()) {
|
||||||
setOperationAction(ISD::ROTL, MVT::i64, Legal);
|
setOperationAction(ISD::ROTL, MVT::i64, Legal);
|
||||||
setOperationAction(ISD::ROTR, MVT::i64, Legal);
|
setOperationAction(ISD::ROTR, MVT::i64, Legal);
|
||||||
@ -345,6 +352,12 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
return "NVPTXISD::FUN_SHFL_CLAMP";
|
return "NVPTXISD::FUN_SHFL_CLAMP";
|
||||||
case NVPTXISD::FUN_SHFR_CLAMP:
|
case NVPTXISD::FUN_SHFR_CLAMP:
|
||||||
return "NVPTXISD::FUN_SHFR_CLAMP";
|
return "NVPTXISD::FUN_SHFR_CLAMP";
|
||||||
|
case NVPTXISD::IMAD:
|
||||||
|
return "NVPTXISD::IMAD";
|
||||||
|
case NVPTXISD::MUL_WIDE_SIGNED:
|
||||||
|
return "NVPTXISD::MUL_WIDE_SIGNED";
|
||||||
|
case NVPTXISD::MUL_WIDE_UNSIGNED:
|
||||||
|
return "NVPTXISD::MUL_WIDE_UNSIGNED";
|
||||||
case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32";
|
case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32";
|
||||||
case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat";
|
case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat";
|
||||||
case NVPTXISD::Tex1DFloatFloatLevel:
|
case NVPTXISD::Tex1DFloatFloatLevel:
|
||||||
@ -1279,6 +1292,127 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops);
|
return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
|
||||||
|
/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
|
||||||
|
/// amount, or
|
||||||
|
/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
|
||||||
|
/// amount.
|
||||||
|
SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
|
||||||
|
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
|
||||||
|
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
unsigned VTBits = VT.getSizeInBits();
|
||||||
|
SDLoc dl(Op);
|
||||||
|
SDValue ShOpLo = Op.getOperand(0);
|
||||||
|
SDValue ShOpHi = Op.getOperand(1);
|
||||||
|
SDValue ShAmt = Op.getOperand(2);
|
||||||
|
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
|
||||||
|
|
||||||
|
if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {
|
||||||
|
|
||||||
|
// For 32bit and sm35, we can use the funnel shift 'shf' instruction.
|
||||||
|
// {dHi, dLo} = {aHi, aLo} >> Amt
|
||||||
|
// dHi = aHi >> Amt
|
||||||
|
// dLo = shf.r.clamp aLo, aHi, Amt
|
||||||
|
|
||||||
|
SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
|
||||||
|
SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
|
||||||
|
ShAmt);
|
||||||
|
|
||||||
|
SDValue Ops[2] = { Lo, Hi };
|
||||||
|
return DAG.getMergeValues(Ops, dl);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
|
||||||
|
// {dHi, dLo} = {aHi, aLo} >> Amt
|
||||||
|
// - if (Amt>=size) then
|
||||||
|
// dLo = aHi >> (Amt-size)
|
||||||
|
// dHi = aHi >> Amt (this is either all 0 or all 1)
|
||||||
|
// else
|
||||||
|
// dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
|
||||||
|
// dHi = aHi >> Amt
|
||||||
|
|
||||||
|
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
|
||||||
|
DAG.getConstant(VTBits, MVT::i32), ShAmt);
|
||||||
|
SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
|
||||||
|
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
|
||||||
|
DAG.getConstant(VTBits, MVT::i32));
|
||||||
|
SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
|
||||||
|
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
|
||||||
|
SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
|
||||||
|
|
||||||
|
SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
|
||||||
|
DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
|
||||||
|
SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
|
||||||
|
SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
|
||||||
|
|
||||||
|
SDValue Ops[2] = { Lo, Hi };
|
||||||
|
return DAG.getMergeValues(Ops, dl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// LowerShiftLeftParts - Lower SHL_PARTS, which
|
||||||
|
/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
|
||||||
|
/// amount, or
|
||||||
|
/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
|
||||||
|
/// amount.
|
||||||
|
SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
|
||||||
|
assert(Op.getOpcode() == ISD::SHL_PARTS);
|
||||||
|
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
unsigned VTBits = VT.getSizeInBits();
|
||||||
|
SDLoc dl(Op);
|
||||||
|
SDValue ShOpLo = Op.getOperand(0);
|
||||||
|
SDValue ShOpHi = Op.getOperand(1);
|
||||||
|
SDValue ShAmt = Op.getOperand(2);
|
||||||
|
|
||||||
|
if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {
|
||||||
|
|
||||||
|
// For 32bit and sm35, we can use the funnel shift 'shf' instruction.
|
||||||
|
// {dHi, dLo} = {aHi, aLo} << Amt
|
||||||
|
// dHi = shf.l.clamp aLo, aHi, Amt
|
||||||
|
// dLo = aLo << Amt
|
||||||
|
|
||||||
|
SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
|
||||||
|
ShAmt);
|
||||||
|
SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
|
||||||
|
|
||||||
|
SDValue Ops[2] = { Lo, Hi };
|
||||||
|
return DAG.getMergeValues(Ops, dl);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
|
||||||
|
// {dHi, dLo} = {aHi, aLo} << Amt
|
||||||
|
// - if (Amt>=size) then
|
||||||
|
// dLo = aLo << Amt (all 0)
|
||||||
|
// dLo = aLo << (Amt-size)
|
||||||
|
// else
|
||||||
|
// dLo = aLo << Amt
|
||||||
|
// dHi = (aHi << Amt) | (aLo >> (size-Amt))
|
||||||
|
|
||||||
|
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
|
||||||
|
DAG.getConstant(VTBits, MVT::i32), ShAmt);
|
||||||
|
SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
|
||||||
|
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
|
||||||
|
DAG.getConstant(VTBits, MVT::i32));
|
||||||
|
SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
|
||||||
|
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
|
||||||
|
SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
|
||||||
|
|
||||||
|
SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
|
||||||
|
DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
|
||||||
|
SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
|
||||||
|
SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
|
||||||
|
|
||||||
|
SDValue Ops[2] = { Lo, Hi };
|
||||||
|
return DAG.getMergeValues(Ops, dl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
SDValue
|
SDValue
|
||||||
NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||||
switch (Op.getOpcode()) {
|
switch (Op.getOpcode()) {
|
||||||
@ -1299,6 +1433,11 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return LowerSTORE(Op, DAG);
|
return LowerSTORE(Op, DAG);
|
||||||
case ISD::LOAD:
|
case ISD::LOAD:
|
||||||
return LowerLOAD(Op, DAG);
|
return LowerLOAD(Op, DAG);
|
||||||
|
case ISD::SHL_PARTS:
|
||||||
|
return LowerShiftLeftParts(Op, DAG);
|
||||||
|
case ISD::SRA_PARTS:
|
||||||
|
case ISD::SRL_PARTS:
|
||||||
|
return LowerShiftRightParts(Op, DAG);
|
||||||
default:
|
default:
|
||||||
llvm_unreachable("Custom lowering not defined for operation");
|
llvm_unreachable("Custom lowering not defined for operation");
|
||||||
}
|
}
|
||||||
|
@ -49,6 +49,8 @@ enum NodeType {
|
|||||||
CallSeqBegin,
|
CallSeqBegin,
|
||||||
CallSeqEnd,
|
CallSeqEnd,
|
||||||
CallPrototype,
|
CallPrototype,
|
||||||
|
FUN_SHFL_CLAMP,
|
||||||
|
FUN_SHFR_CLAMP,
|
||||||
MUL_WIDE_SIGNED,
|
MUL_WIDE_SIGNED,
|
||||||
MUL_WIDE_UNSIGNED,
|
MUL_WIDE_UNSIGNED,
|
||||||
IMAD,
|
IMAD,
|
||||||
@ -259,6 +261,9 @@ private:
|
|||||||
SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
|
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
|
||||||
SelectionDAG &DAG) const override;
|
SelectionDAG &DAG) const override;
|
||||||
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
|
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
|
||||||
|
@ -1364,6 +1364,32 @@ def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
|
|||||||
(ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
|
(ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
|
||||||
(ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
|
(ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Funnnel shift in clamp mode
|
||||||
|
//
|
||||||
|
// - SDNodes are created so they can be used in the DAG code,
|
||||||
|
// e.g. NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)
|
||||||
|
//
|
||||||
|
def SDTIntShiftDOp: SDTypeProfile<1, 3,
|
||||||
|
[SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
|
||||||
|
SDTCisInt<0>, SDTCisInt<3>]>;
|
||||||
|
def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;
|
||||||
|
def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;
|
||||||
|
|
||||||
|
def FUNSHFLCLAMP : NVPTXInst<(outs Int32Regs:$dst),
|
||||||
|
(ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
|
||||||
|
"shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;",
|
||||||
|
[(set Int32Regs:$dst,
|
||||||
|
(FUN_SHFL_CLAMP Int32Regs:$lo,
|
||||||
|
Int32Regs:$hi, Int32Regs:$amt))]>;
|
||||||
|
|
||||||
|
def FUNSHFRCLAMP : NVPTXInst<(outs Int32Regs:$dst),
|
||||||
|
(ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
|
||||||
|
"shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;",
|
||||||
|
[(set Int32Regs:$dst,
|
||||||
|
(FUN_SHFR_CLAMP Int32Regs:$lo,
|
||||||
|
Int32Regs:$hi, Int32Regs:$amt))]>;
|
||||||
|
|
||||||
//-----------------------------------
|
//-----------------------------------
|
||||||
// Data Movement (Load / Store, Move)
|
// Data Movement (Load / Store, Move)
|
||||||
//-----------------------------------
|
//-----------------------------------
|
||||||
|
38
test/CodeGen/NVPTX/shift-parts.ll
Normal file
38
test/CodeGen/NVPTX/shift-parts.ll
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||||
|
|
||||||
|
; CHECK: shift_parts_left_128
|
||||||
|
define void @shift_parts_left_128(i128* %val, i128* %amtptr) {
|
||||||
|
; CHECK: shl.b64
|
||||||
|
; CHECK: mov.u32
|
||||||
|
; CHECK: sub.s32
|
||||||
|
; CHECK: shr.u64
|
||||||
|
; CHECK: or.b64
|
||||||
|
; CHECK: add.s32
|
||||||
|
; CHECK: shl.b64
|
||||||
|
; CHECK: setp.gt.s32
|
||||||
|
; CHECK: selp.b64
|
||||||
|
; CHECK: shl.b64
|
||||||
|
%amt = load i128* %amtptr
|
||||||
|
%a = load i128* %val
|
||||||
|
%val0 = shl i128 %a, %amt
|
||||||
|
store i128 %val0, i128* %val
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: shift_parts_right_128
|
||||||
|
define void @shift_parts_right_128(i128* %val, i128* %amtptr) {
|
||||||
|
; CHECK: shr.u64
|
||||||
|
; CHECK: sub.s32
|
||||||
|
; CHECK: shl.b64
|
||||||
|
; CHECK: or.b64
|
||||||
|
; CHECK: add.s32
|
||||||
|
; CHECK: shr.s64
|
||||||
|
; CHECK: setp.gt.s32
|
||||||
|
; CHECK: selp.b64
|
||||||
|
; CHECK: shr.s64
|
||||||
|
%amt = load i128* %amtptr
|
||||||
|
%a = load i128* %val
|
||||||
|
%val0 = ashr i128 %a, %amt
|
||||||
|
store i128 %val0, i128* %val
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user