mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-05-25 10:38:44 +00:00
- Convert remaining i64 custom lowering into custom instruction emission
sequences in SPUDAGToDAGISel.cpp and SPU64InstrInfo.td, killing custom DAG node types as needed. - i64 mul is now a legal instruction, but emits an instruction sequence that stretches tblgen and the imagination, as well as violating laws of several small countries and most southern US states (just kidding, but looking at a function with 80+ parameters is really weird and just plain wrong.) - Update tests as needed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62254 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f9b1d79a54
commit
94bd57e154
@ -2,7 +2,6 @@
|
|||||||
//
|
//
|
||||||
// Cell SPU 64-bit operations
|
// Cell SPU 64-bit operations
|
||||||
//
|
//
|
||||||
// Primary author: Scott Michel (scottm@aero.org)
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
@ -240,3 +239,145 @@ def : Pat<(setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
|
|||||||
// i64 setult:
|
// i64 setult:
|
||||||
def : I64SETCCNegCond<setlt, I64GEr64>;
|
def : I64SETCCNegCond<setlt, I64GEr64>;
|
||||||
def : I64SELECTNegCond<setlt, I64GEr64>;
|
def : I64SELECTNegCond<setlt, I64GEr64>;
|
||||||
|
|
||||||
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
|
// v2i64, i64 add
|
||||||
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
|
|
||||||
|
class v2i64_add_cg<dag lhs, dag rhs>:
|
||||||
|
CodeFrag<(CGv4i32 lhs, rhs)>;
|
||||||
|
|
||||||
|
class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
|
||||||
|
CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
|
||||||
|
|
||||||
|
class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
|
||||||
|
v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
|
||||||
|
|
||||||
|
def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
|
||||||
|
(ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA),
|
||||||
|
(ORv2i64_i64 R64C:$rB),
|
||||||
|
(v4i32 VECREG:$rCGmask)>.Fragment)>;
|
||||||
|
|
||||||
|
def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
|
||||||
|
(v4i32 VECREG:$rCGmask)),
|
||||||
|
v2i64_add<(v2i64 VECREG:$rA),
|
||||||
|
(v2i64 VECREG:$rB),
|
||||||
|
(v4i32 VECREG:$rCGmask)>.Fragment>;
|
||||||
|
|
||||||
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
|
// v2i64, i64 subtraction
|
||||||
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
|
|
||||||
|
class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
|
||||||
|
|
||||||
|
class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
|
||||||
|
CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
|
||||||
|
|
||||||
|
def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
|
||||||
|
(ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA),
|
||||||
|
(ORv2i64_i64 R64C:$rB),
|
||||||
|
v2i64_sub_bg<(ORv2i64_i64 R64C:$rA),
|
||||||
|
(ORv2i64_i64 R64C:$rB)>.Fragment,
|
||||||
|
(v4i32 VECREG:$rCGmask)>.Fragment)>;
|
||||||
|
|
||||||
|
def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
|
||||||
|
(v4i32 VECREG:$rCGmask)),
|
||||||
|
v2i64_sub<(v2i64 VECREG:$rA),
|
||||||
|
(v2i64 VECREG:$rB),
|
||||||
|
v2i64_sub_bg<(v2i64 VECREG:$rA),
|
||||||
|
(v2i64 VECREG:$rB)>.Fragment,
|
||||||
|
(v4i32 VECREG:$rCGmask)>.Fragment>;
|
||||||
|
|
||||||
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
|
// v2i64, i64 multiply
|
||||||
|
//
|
||||||
|
// Note: i64 multiply is simply the vector->scalar conversion of the
|
||||||
|
// full-on v2i64 multiply, since the entire vector has to be manipulated
|
||||||
|
// anyway.
|
||||||
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
|
|
||||||
|
class v2i64_mul_ahi64<dag rA> :
|
||||||
|
CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
|
||||||
|
|
||||||
|
class v2i64_mul_bhi64<dag rB> :
|
||||||
|
CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
|
||||||
|
|
||||||
|
class v2i64_mul_alo64<dag rB> :
|
||||||
|
CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
|
||||||
|
|
||||||
|
class v2i64_mul_blo64<dag rB> :
|
||||||
|
CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
|
||||||
|
|
||||||
|
class v2i64_mul_ashlq2<dag rA>:
|
||||||
|
CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
|
||||||
|
|
||||||
|
class v2i64_mul_ashlq4<dag rA>:
|
||||||
|
CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
|
||||||
|
|
||||||
|
class v2i64_mul_bshlq2<dag rB> :
|
||||||
|
CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
|
||||||
|
|
||||||
|
class v2i64_mul_bshlq4<dag rB> :
|
||||||
|
CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
|
||||||
|
|
||||||
|
class v2i64_highprod<dag rA, dag rB>:
|
||||||
|
CodeFrag<(Av4i32
|
||||||
|
(Av4i32
|
||||||
|
(MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3
|
||||||
|
v2i64_mul_ahi64<rA>.Fragment),
|
||||||
|
(MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3
|
||||||
|
v2i64_mul_bshlq4<rB>.Fragment)),
|
||||||
|
(Av4i32
|
||||||
|
(MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
|
||||||
|
v2i64_mul_ashlq4<rA>.Fragment),
|
||||||
|
(Av4i32
|
||||||
|
(MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
|
||||||
|
v2i64_mul_bhi64<rB>.Fragment),
|
||||||
|
(Av4i32
|
||||||
|
(MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
|
||||||
|
v2i64_mul_bhi64<rB>.Fragment),
|
||||||
|
(Av4i32
|
||||||
|
(MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
|
||||||
|
v2i64_mul_bshlq2<rB>.Fragment),
|
||||||
|
(MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
|
||||||
|
v2i64_mul_bshlq2<rB>.Fragment))))))>;
|
||||||
|
|
||||||
|
class v2i64_mul_a3_b3<dag rA, dag rB>:
|
||||||
|
CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
|
||||||
|
v2i64_mul_blo64<rB>.Fragment)>;
|
||||||
|
|
||||||
|
class v2i64_mul_a2_b3<dag rA, dag rB>:
|
||||||
|
CodeFrag<(SELBv4i32 (SHLQBYIv4i32
|
||||||
|
(MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
|
||||||
|
v2i64_mul_bshlq2<rB>.Fragment), 0x2),
|
||||||
|
(ILv4i32 0),
|
||||||
|
(FSMBIv4i32 0xc3c3))>;
|
||||||
|
|
||||||
|
class v2i64_mul_a3_b2<dag rA, dag rB>:
|
||||||
|
CodeFrag<(SELBv4i32 (SHLQBYIv4i32
|
||||||
|
(MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
|
||||||
|
v2i64_mul_ashlq2<rA>.Fragment), 0x2),
|
||||||
|
(ILv4i32 0),
|
||||||
|
(FSMBIv4i32 0xc3c3))>;
|
||||||
|
|
||||||
|
class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
|
||||||
|
v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
|
||||||
|
v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
|
||||||
|
v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
|
||||||
|
|
||||||
|
class v2i64_mul<dag rA, dag rB, dag rCGmask>:
|
||||||
|
v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
|
||||||
|
(SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
|
||||||
|
(ILv4i32 0),
|
||||||
|
(FSMBIv4i32 0x0f0f)),
|
||||||
|
rCGmask>;
|
||||||
|
|
||||||
|
def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
|
||||||
|
(ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA),
|
||||||
|
(ORv2i64_i64 R64C:$rB),
|
||||||
|
(v4i32 VECREG:$rCGmask)>.Fragment)>;
|
||||||
|
|
||||||
|
def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
|
||||||
|
(v4i32 VECREG:$rCGmask)),
|
||||||
|
v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
|
||||||
|
(v4i32 VECREG:$rCGmask)>.Fragment>;
|
||||||
|
@ -18,11 +18,13 @@
|
|||||||
#include "SPUHazardRecognizers.h"
|
#include "SPUHazardRecognizers.h"
|
||||||
#include "SPUFrameInfo.h"
|
#include "SPUFrameInfo.h"
|
||||||
#include "SPURegisterNames.h"
|
#include "SPURegisterNames.h"
|
||||||
|
#include "SPUTargetMachine.h"
|
||||||
#include "llvm/CodeGen/MachineConstantPool.h"
|
#include "llvm/CodeGen/MachineConstantPool.h"
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
#include "llvm/CodeGen/SelectionDAG.h"
|
#include "llvm/CodeGen/SelectionDAG.h"
|
||||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||||
|
#include "llvm/CodeGen/PseudoSourceValue.h"
|
||||||
#include "llvm/Target/TargetOptions.h"
|
#include "llvm/Target/TargetOptions.h"
|
||||||
#include "llvm/ADT/Statistic.h"
|
#include "llvm/ADT/Statistic.h"
|
||||||
#include "llvm/Constants.h"
|
#include "llvm/Constants.h"
|
||||||
@ -254,6 +256,26 @@ public:
|
|||||||
return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
|
return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDNode *emitBuildVector(SDValue build_vec) {
|
||||||
|
std::vector<Constant*> CV;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < build_vec.getNumOperands(); ++i) {
|
||||||
|
ConstantSDNode *V = dyn_cast<ConstantSDNode>(build_vec.getOperand(i));
|
||||||
|
CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
|
||||||
|
}
|
||||||
|
|
||||||
|
Constant *CP = ConstantVector::get(CV);
|
||||||
|
SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
|
||||||
|
unsigned Alignment = 1 << cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
|
||||||
|
SDValue CGPoolOffset =
|
||||||
|
SPU::LowerConstantPool(CPIdx, *CurDAG,
|
||||||
|
SPUtli.getSPUTargetMachine());
|
||||||
|
return SelectCode(CurDAG->getLoad(build_vec.getValueType(),
|
||||||
|
CurDAG->getEntryNode(), CGPoolOffset,
|
||||||
|
PseudoSourceValue::getConstantPool(), 0,
|
||||||
|
false, Alignment));
|
||||||
|
}
|
||||||
|
|
||||||
/// Select - Convert the specified operand from a target-independent to a
|
/// Select - Convert the specified operand from a target-independent to a
|
||||||
/// target-specific node if it hasn't already been changed.
|
/// target-specific node if it hasn't already been changed.
|
||||||
SDNode *Select(SDValue Op);
|
SDNode *Select(SDValue Op);
|
||||||
@ -647,22 +669,82 @@ SPUDAGToDAGISel::Select(SDValue Op) {
|
|||||||
TFI, Imm0), 0);
|
TFI, Imm0), 0);
|
||||||
n_ops = 2;
|
n_ops = 2;
|
||||||
}
|
}
|
||||||
} else if (Opc == ISD::ZERO_EXTEND) {
|
} else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
|
||||||
// (zero_extend:i16 (and:i8 <arg>, <const>))
|
&& OpVT == MVT::i64) {
|
||||||
const SDValue &Op1 = N->getOperand(0);
|
SDValue Op0 = Op.getOperand(0);
|
||||||
|
MVT Op0VT = Op0.getValueType();
|
||||||
|
MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
|
||||||
|
MVT OpVecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
|
||||||
|
SDValue shufMask;
|
||||||
|
|
||||||
if (Op.getValueType() == MVT::i16 && Op1.getValueType() == MVT::i8) {
|
switch (Op0VT.getSimpleVT()) {
|
||||||
if (Op1.getOpcode() == ISD::AND) {
|
default:
|
||||||
// Fold this into a single ANDHI. This is often seen in expansions of i1
|
cerr << "CellSPU Select: Unhandled zero/any extend MVT\n";
|
||||||
// to i8, then i8 to i16 in logical/branching operations.
|
abort();
|
||||||
DEBUG(cerr << "CellSPU: Coalescing (zero_extend:i16 (and:i8 "
|
/*NOTREACHED*/
|
||||||
"<arg>, <const>))\n");
|
break;
|
||||||
NewOpc = SPU::ANDHIi8i16;
|
case MVT::i32:
|
||||||
Ops[0] = Op1.getOperand(0);
|
shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||||
Ops[1] = Op1.getOperand(1);
|
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||||
n_ops = 2;
|
CurDAG->getConstant(0x00010203, MVT::i32),
|
||||||
}
|
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||||
|
CurDAG->getConstant(0x08090a0b, MVT::i32));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case MVT::i16:
|
||||||
|
shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||||
|
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||||
|
CurDAG->getConstant(0x80800203, MVT::i32),
|
||||||
|
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||||
|
CurDAG->getConstant(0x80800a0b, MVT::i32));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case MVT::i8:
|
||||||
|
shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||||
|
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||||
|
CurDAG->getConstant(0x80808003, MVT::i32),
|
||||||
|
CurDAG->getConstant(0x80808080, MVT::i32),
|
||||||
|
CurDAG->getConstant(0x8080800b, MVT::i32));
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDNode *shufMaskLoad = emitBuildVector(shufMask);
|
||||||
|
SDNode *PromoteScalar =
|
||||||
|
SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0));
|
||||||
|
|
||||||
|
SDValue zextShuffle =
|
||||||
|
CurDAG->getNode(SPUISD::SHUFB, OpVecVT,
|
||||||
|
SDValue(PromoteScalar, 0),
|
||||||
|
SDValue(PromoteScalar, 0),
|
||||||
|
SDValue(shufMaskLoad, 0));
|
||||||
|
|
||||||
|
// N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we
|
||||||
|
// re-use it in the VEC2PREFSLOT selection without needing to explicitly
|
||||||
|
// call SelectCode (it's already done for us.)
|
||||||
|
SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, OpVecVT, zextShuffle));
|
||||||
|
return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, OpVT,
|
||||||
|
zextShuffle));
|
||||||
|
} else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
|
||||||
|
SDNode *CGLoad =
|
||||||
|
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
|
||||||
|
|
||||||
|
return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, OpVT,
|
||||||
|
Op.getOperand(0), Op.getOperand(1),
|
||||||
|
SDValue(CGLoad, 0)));
|
||||||
|
} else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
|
||||||
|
SDNode *CGLoad =
|
||||||
|
emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG));
|
||||||
|
|
||||||
|
return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, OpVT,
|
||||||
|
Op.getOperand(0), Op.getOperand(1),
|
||||||
|
SDValue(CGLoad, 0)));
|
||||||
|
} else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
|
||||||
|
SDNode *CGLoad =
|
||||||
|
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
|
||||||
|
|
||||||
|
return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
|
||||||
|
Op.getOperand(0), Op.getOperand(1),
|
||||||
|
SDValue(CGLoad, 0)));
|
||||||
} else if (Opc == ISD::SHL) {
|
} else if (Opc == ISD::SHL) {
|
||||||
if (OpVT == MVT::i64) {
|
if (OpVT == MVT::i64) {
|
||||||
return SelectSHLi64(Op, OpVT);
|
return SelectSHLi64(Op, OpVT);
|
||||||
|
@ -78,6 +78,7 @@ namespace {
|
|||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
||||||
@ -208,13 +209,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
|||||||
// Custom lower i8, i32 and i64 multiplications
|
// Custom lower i8, i32 and i64 multiplications
|
||||||
setOperationAction(ISD::MUL, MVT::i8, Custom);
|
setOperationAction(ISD::MUL, MVT::i8, Custom);
|
||||||
setOperationAction(ISD::MUL, MVT::i32, Legal);
|
setOperationAction(ISD::MUL, MVT::i32, Legal);
|
||||||
setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
|
setOperationAction(ISD::MUL, MVT::i64, Legal);
|
||||||
|
|
||||||
// Need to custom handle (some) common i8, i64 math ops
|
// Need to custom handle (some) common i8, i64 math ops
|
||||||
setOperationAction(ISD::ADD, MVT::i8, Custom);
|
setOperationAction(ISD::ADD, MVT::i8, Custom);
|
||||||
setOperationAction(ISD::ADD, MVT::i64, Custom);
|
setOperationAction(ISD::ADD, MVT::i64, Legal);
|
||||||
setOperationAction(ISD::SUB, MVT::i8, Custom);
|
setOperationAction(ISD::SUB, MVT::i8, Custom);
|
||||||
setOperationAction(ISD::SUB, MVT::i64, Custom);
|
setOperationAction(ISD::SUB, MVT::i64, Legal);
|
||||||
|
|
||||||
// SPU does not have BSWAP. It does have i32 support CTLZ.
|
// SPU does not have BSWAP. It does have i32 support CTLZ.
|
||||||
// CTPOP has to be custom lowered.
|
// CTPOP has to be custom lowered.
|
||||||
@ -243,11 +244,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
|||||||
setOperationAction(ISD::SETCC, MVT::i32, Legal);
|
setOperationAction(ISD::SETCC, MVT::i32, Legal);
|
||||||
setOperationAction(ISD::SETCC, MVT::i64, Legal);
|
setOperationAction(ISD::SETCC, MVT::i64, Legal);
|
||||||
|
|
||||||
// Zero extension and sign extension for i64 have to be
|
|
||||||
// custom legalized
|
|
||||||
setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
|
|
||||||
setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
|
|
||||||
|
|
||||||
// Custom lower i128 -> i64 truncates
|
// Custom lower i128 -> i64 truncates
|
||||||
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
|
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
|
||||||
|
|
||||||
@ -416,10 +412,9 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
|||||||
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
|
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
|
||||||
node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
|
node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
|
||||||
node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
|
node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
|
||||||
node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
|
node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
|
||||||
node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
|
node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
|
||||||
node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
|
node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
|
||||||
node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
|
std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
|
||||||
@ -778,8 +773,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Generate the address of a constant pool entry.
|
//! Generate the address of a constant pool entry.
|
||||||
static SDValue
|
SDValue
|
||||||
LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||||
MVT PtrVT = Op.getValueType();
|
MVT PtrVT = Op.getValueType();
|
||||||
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
|
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
|
||||||
@ -805,6 +800,12 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//! Alternate entry point for generating the address of a constant pool entry
|
||||||
|
SDValue
|
||||||
|
SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
|
||||||
|
return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
|
||||||
|
}
|
||||||
|
|
||||||
static SDValue
|
static SDValue
|
||||||
LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
||||||
MVT PtrVT = Op.getValueType();
|
MVT PtrVT = Op.getValueType();
|
||||||
@ -2185,123 +2186,34 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
|
//! Generate the carry-generate shuffle mask.
|
||||||
{
|
SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) {
|
||||||
MVT VT = Op.getValueType();
|
SmallVector<SDValue, 16> ShufBytes;
|
||||||
MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
|
|
||||||
|
|
||||||
SDValue Op0 = Op.getOperand(0);
|
// Create the shuffle mask for "rotating" the borrow up one register slot
|
||||||
|
// once the borrow is generated.
|
||||||
|
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
|
||||||
|
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
|
||||||
|
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
|
||||||
|
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
|
||||||
|
|
||||||
switch (Opc) {
|
return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||||
case ISD::ZERO_EXTEND:
|
&ShufBytes[0], ShufBytes.size());
|
||||||
case ISD::ANY_EXTEND: {
|
}
|
||||||
MVT Op0VT = Op0.getValueType();
|
|
||||||
MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
|
|
||||||
|
|
||||||
SDValue PromoteScalar =
|
//! Generate the borrow-generate shuffle mask
|
||||||
DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
|
SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) {
|
||||||
|
SmallVector<SDValue, 16> ShufBytes;
|
||||||
|
|
||||||
// Use a shuffle to zero extend the i32 to i64 directly:
|
// Create the shuffle mask for "rotating" the borrow up one register slot
|
||||||
SDValue shufMask;
|
// once the borrow is generated.
|
||||||
|
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
|
||||||
|
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
|
||||||
|
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
|
||||||
|
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
|
||||||
|
|
||||||
switch (Op0VT.getSimpleVT()) {
|
return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
||||||
default:
|
&ShufBytes[0], ShufBytes.size());
|
||||||
cerr << "CellSPU LowerI64Math: Unhandled zero/any extend MVT\n";
|
|
||||||
abort();
|
|
||||||
/*NOTREACHED*/
|
|
||||||
break;
|
|
||||||
case MVT::i32:
|
|
||||||
shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
|
||||||
DAG.getConstant(0x80808080, MVT::i32),
|
|
||||||
DAG.getConstant(0x00010203, MVT::i32),
|
|
||||||
DAG.getConstant(0x80808080, MVT::i32),
|
|
||||||
DAG.getConstant(0x08090a0b, MVT::i32));
|
|
||||||
break;
|
|
||||||
|
|
||||||
case MVT::i16:
|
|
||||||
shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
|
||||||
DAG.getConstant(0x80808080, MVT::i32),
|
|
||||||
DAG.getConstant(0x80800203, MVT::i32),
|
|
||||||
DAG.getConstant(0x80808080, MVT::i32),
|
|
||||||
DAG.getConstant(0x80800a0b, MVT::i32));
|
|
||||||
break;
|
|
||||||
|
|
||||||
case MVT::i8:
|
|
||||||
shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
|
||||||
DAG.getConstant(0x80808080, MVT::i32),
|
|
||||||
DAG.getConstant(0x80808003, MVT::i32),
|
|
||||||
DAG.getConstant(0x80808080, MVT::i32),
|
|
||||||
DAG.getConstant(0x8080800b, MVT::i32));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
|
|
||||||
PromoteScalar, PromoteScalar, shufMask);
|
|
||||||
|
|
||||||
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
|
|
||||||
DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
|
|
||||||
}
|
|
||||||
|
|
||||||
case ISD::ADD: {
|
|
||||||
// Turn operands into vectors to satisfy type checking (shufb works on
|
|
||||||
// vectors)
|
|
||||||
SDValue Op0 =
|
|
||||||
DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
|
|
||||||
SDValue Op1 =
|
|
||||||
DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
|
|
||||||
SmallVector<SDValue, 16> ShufBytes;
|
|
||||||
|
|
||||||
// Create the shuffle mask for "rotating" the borrow up one register slot
|
|
||||||
// once the borrow is generated.
|
|
||||||
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
|
|
||||||
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
|
|
||||||
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
|
|
||||||
ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
|
|
||||||
|
|
||||||
SDValue CarryGen =
|
|
||||||
DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
|
|
||||||
SDValue ShiftedCarry =
|
|
||||||
DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
|
|
||||||
CarryGen, CarryGen,
|
|
||||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
|
||||||
&ShufBytes[0], ShufBytes.size()));
|
|
||||||
|
|
||||||
return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
|
|
||||||
DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
|
|
||||||
Op0, Op1, ShiftedCarry));
|
|
||||||
}
|
|
||||||
|
|
||||||
case ISD::SUB: {
|
|
||||||
// Turn operands into vectors to satisfy type checking (shufb works on
|
|
||||||
// vectors)
|
|
||||||
SDValue Op0 =
|
|
||||||
DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
|
|
||||||
SDValue Op1 =
|
|
||||||
DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
|
|
||||||
SmallVector<SDValue, 16> ShufBytes;
|
|
||||||
|
|
||||||
// Create the shuffle mask for "rotating" the borrow up one register slot
|
|
||||||
// once the borrow is generated.
|
|
||||||
ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
|
|
||||||
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
|
|
||||||
ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
|
|
||||||
ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
|
|
||||||
|
|
||||||
SDValue BorrowGen =
|
|
||||||
DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
|
|
||||||
SDValue ShiftedBorrow =
|
|
||||||
DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
|
|
||||||
BorrowGen, BorrowGen,
|
|
||||||
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
|
|
||||||
&ShufBytes[0], ShufBytes.size()));
|
|
||||||
|
|
||||||
return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
|
|
||||||
DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
|
|
||||||
Op0, Op1, ShiftedBorrow));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return SDValue();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//! Lower byte immediate operations for v16i8 vectors:
|
//! Lower byte immediate operations for v16i8 vectors:
|
||||||
@ -2576,11 +2488,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
|||||||
case ISD::RET:
|
case ISD::RET:
|
||||||
return LowerRET(Op, DAG, getTargetMachine());
|
return LowerRET(Op, DAG, getTargetMachine());
|
||||||
|
|
||||||
|
|
||||||
case ISD::ZERO_EXTEND:
|
|
||||||
case ISD::ANY_EXTEND:
|
|
||||||
return LowerI64Math(Op, DAG, Opc);
|
|
||||||
|
|
||||||
// i8, i64 math ops:
|
// i8, i64 math ops:
|
||||||
case ISD::ADD:
|
case ISD::ADD:
|
||||||
case ISD::SUB:
|
case ISD::SUB:
|
||||||
@ -2591,8 +2498,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
|||||||
case ISD::SRA: {
|
case ISD::SRA: {
|
||||||
if (VT == MVT::i8)
|
if (VT == MVT::i8)
|
||||||
return LowerI8Math(Op, DAG, Opc, *this);
|
return LowerI8Math(Op, DAG, Opc, *this);
|
||||||
else if (VT == MVT::i64)
|
|
||||||
return LowerI64Math(Op, DAG, Opc);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2831,6 +2736,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise, return unchanged.
|
// Otherwise, return unchanged.
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
if (Result.getNode()) {
|
if (Result.getNode()) {
|
||||||
|
@ -52,10 +52,11 @@ namespace llvm {
|
|||||||
ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
|
ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
|
||||||
SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
|
SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
|
||||||
SELB, ///< Select bits -> (b & mask) | (a & ~mask)
|
SELB, ///< Select bits -> (b & mask) | (a & ~mask)
|
||||||
ADD_EXTENDED, ///< Add extended, with carry
|
// Markers: These aren't used to generate target-dependent nodes, but
|
||||||
CARRY_GENERATE, ///< Carry generate for ADD_EXTENDED
|
// are used during instruction selection.
|
||||||
SUB_EXTENDED, ///< Subtract extended, with borrow
|
ADD64_MARKER, ///< i64 addition marker
|
||||||
BORROW_GENERATE, ///< Borrow generate for SUB_EXTENDED
|
SUB64_MARKER, ///< i64 subtraction marker
|
||||||
|
MUL64_MARKER, ///< i64 multiply marker
|
||||||
LAST_SPUISD ///< Last user-defined instruction
|
LAST_SPUISD ///< Last user-defined instruction
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -74,6 +75,12 @@ namespace llvm {
|
|||||||
MVT ValueType);
|
MVT ValueType);
|
||||||
SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG);
|
SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG);
|
||||||
SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG);
|
SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG);
|
||||||
|
|
||||||
|
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
|
||||||
|
const SPUTargetMachine &TM);
|
||||||
|
|
||||||
|
SDValue getBorrowGenerateShufMask(SelectionDAG &DAG);
|
||||||
|
SDValue getCarryGenerateShufMask(SelectionDAG &DAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
class SPUTargetMachine; // forward dec'l.
|
class SPUTargetMachine; // forward dec'l.
|
||||||
@ -86,8 +93,18 @@ namespace llvm {
|
|||||||
SPUTargetMachine &SPUTM;
|
SPUTargetMachine &SPUTM;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
//! The venerable constructor
|
||||||
|
/*!
|
||||||
|
This is where the CellSPU backend sets operation handling (i.e., legal,
|
||||||
|
custom, expand or promote.)
|
||||||
|
*/
|
||||||
SPUTargetLowering(SPUTargetMachine &TM);
|
SPUTargetLowering(SPUTargetMachine &TM);
|
||||||
|
|
||||||
|
//! Get the target machine
|
||||||
|
SPUTargetMachine &getSPUTargetMachine() {
|
||||||
|
return SPUTM;
|
||||||
|
}
|
||||||
|
|
||||||
/// getTargetNodeName() - This method returns the name of a target specific
|
/// getTargetNodeName() - This method returns the name of a target specific
|
||||||
/// DAG node.
|
/// DAG node.
|
||||||
virtual const char *getTargetNodeName(unsigned Opcode) const;
|
virtual const char *getTargetNodeName(unsigned Opcode) const;
|
||||||
|
@ -705,17 +705,14 @@ class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
|
|||||||
class ADDXVecInst<ValueType vectype>:
|
class ADDXVecInst<ValueType vectype>:
|
||||||
ADDXInst<(outs VECREG:$rT),
|
ADDXInst<(outs VECREG:$rT),
|
||||||
(ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
|
(ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
|
||||||
[(set (vectype VECREG:$rT),
|
[/* no pattern */]>,
|
||||||
(SPUaddx (vectype VECREG:$rA), (vectype VECREG:$rB),
|
|
||||||
(vectype VECREG:$rCarry)))]>,
|
|
||||||
RegConstraint<"$rCarry = $rT">,
|
RegConstraint<"$rCarry = $rT">,
|
||||||
NoEncode<"$rCarry">;
|
NoEncode<"$rCarry">;
|
||||||
|
|
||||||
class ADDXRegInst<RegisterClass rclass>:
|
class ADDXRegInst<RegisterClass rclass>:
|
||||||
ADDXInst<(outs rclass:$rT),
|
ADDXInst<(outs rclass:$rT),
|
||||||
(ins rclass:$rA, rclass:$rB, rclass:$rCarry),
|
(ins rclass:$rA, rclass:$rB, rclass:$rCarry),
|
||||||
[(set rclass:$rT,
|
[/* no pattern */]>,
|
||||||
(SPUaddx rclass:$rA, rclass:$rB, rclass:$rCarry))]>,
|
|
||||||
RegConstraint<"$rCarry = $rT">,
|
RegConstraint<"$rCarry = $rT">,
|
||||||
NoEncode<"$rCarry">;
|
NoEncode<"$rCarry">;
|
||||||
|
|
||||||
@ -737,14 +734,12 @@ class CGInst<dag OOL, dag IOL, list<dag> pattern>:
|
|||||||
class CGVecInst<ValueType vectype>:
|
class CGVecInst<ValueType vectype>:
|
||||||
CGInst<(outs VECREG:$rT),
|
CGInst<(outs VECREG:$rT),
|
||||||
(ins VECREG:$rA, VECREG:$rB),
|
(ins VECREG:$rA, VECREG:$rB),
|
||||||
[(set (vectype VECREG:$rT),
|
[/* no pattern */]>;
|
||||||
(SPUcarry_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
|
|
||||||
|
|
||||||
class CGRegInst<RegisterClass rclass>:
|
class CGRegInst<RegisterClass rclass>:
|
||||||
CGInst<(outs rclass:$rT),
|
CGInst<(outs rclass:$rT),
|
||||||
(ins rclass:$rA, rclass:$rB),
|
(ins rclass:$rA, rclass:$rB),
|
||||||
[(set rclass:$rT,
|
[/* no pattern */]>;
|
||||||
(SPUcarry_gen rclass:$rA, rclass:$rB))]>;
|
|
||||||
|
|
||||||
multiclass CarryGenerate {
|
multiclass CarryGenerate {
|
||||||
def v2i64 : CGVecInst<v2i64>;
|
def v2i64 : CGVecInst<v2i64>;
|
||||||
@ -765,17 +760,14 @@ class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
|
|||||||
class SFXVecInst<ValueType vectype>:
|
class SFXVecInst<ValueType vectype>:
|
||||||
SFXInst<(outs VECREG:$rT),
|
SFXInst<(outs VECREG:$rT),
|
||||||
(ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
|
(ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
|
||||||
[(set (vectype VECREG:$rT),
|
[/* no pattern */]>,
|
||||||
(SPUsubx (vectype VECREG:$rA), (vectype VECREG:$rB),
|
|
||||||
(vectype VECREG:$rCarry)))]>,
|
|
||||||
RegConstraint<"$rCarry = $rT">,
|
RegConstraint<"$rCarry = $rT">,
|
||||||
NoEncode<"$rCarry">;
|
NoEncode<"$rCarry">;
|
||||||
|
|
||||||
class SFXRegInst<RegisterClass rclass>:
|
class SFXRegInst<RegisterClass rclass>:
|
||||||
SFXInst<(outs rclass:$rT),
|
SFXInst<(outs rclass:$rT),
|
||||||
(ins rclass:$rA, rclass:$rB, rclass:$rCarry),
|
(ins rclass:$rA, rclass:$rB, rclass:$rCarry),
|
||||||
[(set rclass:$rT,
|
[/* no pattern */]>,
|
||||||
(SPUsubx rclass:$rA, rclass:$rB, rclass:$rCarry))]>,
|
|
||||||
RegConstraint<"$rCarry = $rT">,
|
RegConstraint<"$rCarry = $rT">,
|
||||||
NoEncode<"$rCarry">;
|
NoEncode<"$rCarry">;
|
||||||
|
|
||||||
@ -797,14 +789,12 @@ class BGInst<dag OOL, dag IOL, list<dag> pattern>:
|
|||||||
class BGVecInst<ValueType vectype>:
|
class BGVecInst<ValueType vectype>:
|
||||||
BGInst<(outs VECREG:$rT),
|
BGInst<(outs VECREG:$rT),
|
||||||
(ins VECREG:$rA, VECREG:$rB),
|
(ins VECREG:$rA, VECREG:$rB),
|
||||||
[(set (vectype VECREG:$rT),
|
[/* no pattern */]>;
|
||||||
(SPUborrow_gen (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
|
|
||||||
|
|
||||||
class BGRegInst<RegisterClass rclass>:
|
class BGRegInst<RegisterClass rclass>:
|
||||||
BGInst<(outs rclass:$rT),
|
BGInst<(outs rclass:$rT),
|
||||||
(ins rclass:$rA, rclass:$rB),
|
(ins rclass:$rA, rclass:$rB),
|
||||||
[(set rclass:$rT,
|
[/* no pattern */]>;
|
||||||
(SPUborrow_gen rclass:$rA, rclass:$rB))]>;
|
|
||||||
|
|
||||||
multiclass BorrowGenerate {
|
multiclass BorrowGenerate {
|
||||||
def v4i32 : BGVecInst<v4i32>;
|
def v4i32 : BGVecInst<v4i32>;
|
||||||
@ -894,7 +884,7 @@ class MPYAInst<dag OOL, dag IOL, list<dag> pattern>:
|
|||||||
"mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
|
"mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
|
||||||
pattern>;
|
pattern>;
|
||||||
|
|
||||||
def MPYAvec:
|
def MPYAv4i32:
|
||||||
MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
|
MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
|
||||||
[(set (v4i32 VECREG:$rT),
|
[(set (v4i32 VECREG:$rT),
|
||||||
(add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
|
(add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
|
||||||
@ -939,7 +929,7 @@ class MPYSInst<dag OOL, dag IOL>:
|
|||||||
"mpys\t$rT, $rA, $rB", IntegerMulDiv,
|
"mpys\t$rT, $rA, $rB", IntegerMulDiv,
|
||||||
[/* no pattern */]>;
|
[/* no pattern */]>;
|
||||||
|
|
||||||
def MPYSvec:
|
def MPYSv4i32:
|
||||||
MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
|
MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
|
||||||
|
|
||||||
def MPYSr16:
|
def MPYSr16:
|
||||||
@ -972,14 +962,20 @@ def MPYHHAvec:
|
|||||||
def MPYHHAr32:
|
def MPYHHAr32:
|
||||||
MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
|
MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
|
||||||
|
|
||||||
// mpyhhu: Multiply high-high, unsigned
|
// mpyhhu: Multiply high-high, unsigned, e.g.:
|
||||||
|
//
|
||||||
|
// +-------+-------+ +-------+-------+ +---------+
|
||||||
|
// | a0 . a1 | x | b0 . b1 | = | a0 x b0 |
|
||||||
|
// +-------+-------+ +-------+-------+ +---------+
|
||||||
|
//
|
||||||
|
// where a0, b0 are the upper 16 bits of the 32-bit word
|
||||||
|
|
||||||
class MPYHHUInst<dag OOL, dag IOL>:
|
class MPYHHUInst<dag OOL, dag IOL>:
|
||||||
RRForm<0b01110011110, OOL, IOL,
|
RRForm<0b01110011110, OOL, IOL,
|
||||||
"mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
|
"mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
|
||||||
[/* no pattern */]>;
|
[/* no pattern */]>;
|
||||||
|
|
||||||
def MPYHHUvec:
|
def MPYHHUv4i32:
|
||||||
MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
|
MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
|
||||||
|
|
||||||
def MPYHHUr32:
|
def MPYHHUr32:
|
||||||
|
@ -8,8 +8,6 @@
|
|||||||
//
|
//
|
||||||
// Any resemblance to libsimdmath or the Cell SDK simdmath library is
|
// Any resemblance to libsimdmath or the Cell SDK simdmath library is
|
||||||
// purely and completely coincidental.
|
// purely and completely coincidental.
|
||||||
//
|
|
||||||
// Primary author: Scott Michel (scottm@aero.org)
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
|
@ -61,18 +61,20 @@ def SPUselb_type: SDTypeProfile<1, 3, [
|
|||||||
def SPUvecshift_type: SDTypeProfile<1, 2, [
|
def SPUvecshift_type: SDTypeProfile<1, 2, [
|
||||||
SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
|
SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
|
||||||
|
|
||||||
|
// "marker" type for i64 operators that need a shuffle mask
|
||||||
|
// (i.e., uses cg or bg or another instruction that needs to
|
||||||
|
// use shufb to get things in the right place.)
|
||||||
|
// Op0: The result
|
||||||
|
// Op1, 2: LHS, RHS
|
||||||
|
// Op3: Carry-generate shuffle mask
|
||||||
|
|
||||||
|
def SPUmarker_type : SDTypeProfile<1, 3, [
|
||||||
|
SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Synthetic/pseudo-instructions
|
// Synthetic/pseudo-instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
/// Add extended, carry generate:
|
|
||||||
def SPUaddx : SDNode<"SPUISD::ADD_EXTENDED", SPUIntTrinaryOp, []>;
|
|
||||||
def SPUcarry_gen : SDNode<"SPUISD::CARRY_GENERATE", SDTIntBinOp, []>;
|
|
||||||
|
|
||||||
// Subtract extended, borrow generate
|
|
||||||
def SPUsubx : SDNode<"SPUISD::SUB_EXTENDED", SPUIntTrinaryOp, []>;
|
|
||||||
def SPUborrow_gen : SDNode<"SPUISD::BORROW_GENERATE", SDTIntBinOp, []>;
|
|
||||||
|
|
||||||
// SPU CNTB:
|
// SPU CNTB:
|
||||||
def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
|
def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
|
||||||
|
|
||||||
@ -127,6 +129,12 @@ def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
|
|||||||
// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses
|
// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses
|
||||||
def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>;
|
def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>;
|
||||||
|
|
||||||
|
// i64 markers: supplies extra operands used to generate the i64 operator
|
||||||
|
// instruction sequences
|
||||||
|
def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>;
|
||||||
|
def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>;
|
||||||
|
def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Constraints: (taken from PPCInstrInfo.td)
|
// Constraints: (taken from PPCInstrInfo.td)
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -2,9 +2,15 @@
|
|||||||
; RUN: grep xswd %t1.s | count 3
|
; RUN: grep xswd %t1.s | count 3
|
||||||
; RUN: grep xsbh %t1.s | count 1
|
; RUN: grep xsbh %t1.s | count 1
|
||||||
; RUN: grep xshw %t1.s | count 2
|
; RUN: grep xshw %t1.s | count 2
|
||||||
; RUN: grep shufb %t1.s | count 4
|
; RUN: grep shufb %t1.s | count 7
|
||||||
; RUN: grep cg %t1.s | count 1
|
; RUN: grep cg %t1.s | count 4
|
||||||
; RUN: grep addx %t1.s | count 1
|
; RUN: grep addx %t1.s | count 4
|
||||||
|
; RUN: grep fsmbi %t1.s | count 3
|
||||||
|
; RUN: grep il %t1.s | count 2
|
||||||
|
; RUN: grep mpy %t1.s | count 10
|
||||||
|
; RUN: grep mpyh %t1.s | count 6
|
||||||
|
; RUN: grep mpyhhu %t1.s | count 2
|
||||||
|
; RUN: grep mpyu %t1.s | count 4
|
||||||
|
|
||||||
; ModuleID = 'stores.bc'
|
; ModuleID = 'stores.bc'
|
||||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||||
@ -44,3 +50,8 @@ define i64 @add_i64(i64 %a, i64 %b) nounwind {
|
|||||||
%1 = add i64 %a, %b
|
%1 = add i64 %a, %b
|
||||||
ret i64 %1
|
ret i64 %1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define i64 @mul_i64(i64 %a, i64 %b) nounwind {
|
||||||
|
%1 = mul i64 %a, %b
|
||||||
|
ret i64 %1
|
||||||
|
}
|
||||||
|
@ -7,6 +7,7 @@ int64_t tval_c = 1234567890001LL;
|
|||||||
int64_t tval_d = 10001LL;
|
int64_t tval_d = 10001LL;
|
||||||
int64_t tval_e = 10000LL;
|
int64_t tval_e = 10000LL;
|
||||||
uint64_t tval_f = 0xffffff0750135eb9;
|
uint64_t tval_f = 0xffffff0750135eb9;
|
||||||
|
int64_t tval_g = -1;
|
||||||
|
|
||||||
/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
|
/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
|
||||||
|
|
||||||
@ -546,6 +547,12 @@ test_i64_variable_shift(const char *func_name, int64_t (*func)(int64_t, int), in
|
|||||||
|
|
||||||
/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
|
/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
|
||||||
|
|
||||||
|
int64_t i64_mul(int64_t a, int64_t b) {
|
||||||
|
return a * b;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
|
||||||
|
|
||||||
int
|
int
|
||||||
main(void)
|
main(void)
|
||||||
{
|
{
|
||||||
@ -553,12 +560,13 @@ main(void)
|
|||||||
const char *something_failed = " %d tests failed.\n";
|
const char *something_failed = " %d tests failed.\n";
|
||||||
const char *all_tests_passed = " All tests passed.\n";
|
const char *all_tests_passed = " All tests passed.\n";
|
||||||
|
|
||||||
printf("tval_a = %20lld (0x%020llx)\n", tval_a, tval_a);
|
printf("tval_a = %20lld (0x%016llx)\n", tval_a, tval_a);
|
||||||
printf("tval_b = %20lld (0x%020llx)\n", tval_b, tval_b);
|
printf("tval_b = %20lld (0x%016llx)\n", tval_b, tval_b);
|
||||||
printf("tval_c = %20lld (0x%020llx)\n", tval_c, tval_c);
|
printf("tval_c = %20lld (0x%016llx)\n", tval_c, tval_c);
|
||||||
printf("tval_d = %20lld (0x%020llx)\n", tval_d, tval_d);
|
printf("tval_d = %20lld (0x%016llx)\n", tval_d, tval_d);
|
||||||
printf("tval_e = %20lld (0x%020llx)\n", tval_e, tval_e);
|
printf("tval_e = %20lld (0x%016llx)\n", tval_e, tval_e);
|
||||||
printf("tval_f = %20llu (0x%020llx)\n", tval_f, tval_f);
|
printf("tval_f = %20llu (0x%016llx)\n", tval_f, tval_f);
|
||||||
|
printf("tval_g = %20llu (0x%016llx)\n", tval_g, tval_g);
|
||||||
printf("----------------------------------------\n");
|
printf("----------------------------------------\n");
|
||||||
|
|
||||||
for (i = 0; i < ARR_SIZE(int64_preds); ++i) {
|
for (i = 0; i < ARR_SIZE(int64_preds); ++i) {
|
||||||
@ -649,5 +657,17 @@ main(void)
|
|||||||
|
|
||||||
printf("----------------------------------------\n");
|
printf("----------------------------------------\n");
|
||||||
|
|
||||||
|
int64_t result;
|
||||||
|
|
||||||
|
result = i64_mul(tval_g, tval_g);
|
||||||
|
printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_g, tval_g, result, result);
|
||||||
|
result = i64_mul(tval_d, tval_e);
|
||||||
|
printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_d, tval_e, result, result);
|
||||||
|
/* 0xba7a664f13077c9 */
|
||||||
|
result = i64_mul(tval_a, tval_b);
|
||||||
|
printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_a, tval_b, result, result);
|
||||||
|
|
||||||
|
printf("----------------------------------------\n");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user