mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-15 22:28:18 +00:00
- Remove Tilmann's custom truncate lowering: it completely hosed over
DAGcombine's ability to find reasons to remove truncates when they were not needed. Consequently, the CellSPU backend would produce correct, but _really slow and horrible_, code. Replaced with instruction sequences that do the equivalent truncation in SPUInstrInfo.td. - Re-examine how unaligned loads and stores work. Generated unaligned load code has been tested on the CellSPU hardware; see the i32operations.c and i64operations.c in CodeGen/CellSPU/useful-harnesses. (While they may be toy test code, it does prove that some real world code does compile correctly.) - Fix truncating stores in bug 3193 (note: unpack_df.ll will still make llc fault because i64 ult is not yet implemented.) - Added i64 eq and neq for setcc and select/setcc; started new instruction information file for them in SPU64InstrInfo.td. Additional i64 operations should be added to this file and not to SPUInstrInfo.td. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61447 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -117,7 +117,7 @@ namespace {
|
||||
}
|
||||
|
||||
void
|
||||
printMemRegImmS7(const MachineInstr *MI, unsigned OpNo)
|
||||
printShufAddr(const MachineInstr *MI, unsigned OpNo)
|
||||
{
|
||||
char value = MI->getOperand(OpNo).getImm();
|
||||
O << (int) value;
|
||||
@@ -183,16 +183,16 @@ namespace {
|
||||
}
|
||||
|
||||
void
|
||||
printMemRegImmS10(const MachineInstr *MI, unsigned OpNo)
|
||||
printDFormAddr(const MachineInstr *MI, unsigned OpNo)
|
||||
{
|
||||
const MachineOperand &MO = MI->getOperand(OpNo);
|
||||
assert(MO.isImm() &&
|
||||
"printMemRegImmS10 first operand is not immedate");
|
||||
"printDFormAddr first operand is not immedate");
|
||||
int64_t value = int64_t(MI->getOperand(OpNo).getImm());
|
||||
int16_t value16 = int16_t(value);
|
||||
assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
|
||||
&& "Invalid dform s10 offset argument");
|
||||
O << value16 << "(";
|
||||
O << (value16 & ~0xf) << "(";
|
||||
printOperand(MI, OpNo+1);
|
||||
O << ")";
|
||||
}
|
||||
|
77
lib/Target/CellSPU/SPU64InstrInfo.td
Normal file
77
lib/Target/CellSPU/SPU64InstrInfo.td
Normal file
@@ -0,0 +1,77 @@
|
||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
// 64-bit comparisons:
|
||||
//
|
||||
// 1. The instruction sequences for vector vice scalar differ by a
|
||||
// constant.
|
||||
//
|
||||
// 2. There are no "immediate" forms, since loading 64-bit constants
|
||||
// could be a constant pool load.
|
||||
//
|
||||
// 3. i64 setcc results are i32, which are subsequently converted to a FSM
|
||||
// mask when used in a select pattern.
|
||||
//
|
||||
// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask
|
||||
// (TODO)
|
||||
//
|
||||
// M00$E Kan be Pretty N@sTi!!!!! (appologies to Monty!)
|
||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
|
||||
// selb instruction definition for i64. Note that the selection mask is
|
||||
// a vector, produced by various forms of FSM:
|
||||
def SELBr64_cond:
|
||||
SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
|
||||
[/* no pattern */]>;
|
||||
|
||||
class CodeFrag<dag frag> {
|
||||
dag Fragment = frag;
|
||||
}
|
||||
|
||||
class I64SELECTNegCond<PatFrag cond, CodeFrag cmpare>:
|
||||
Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
|
||||
(SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 cmpare.Fragment))>;
|
||||
|
||||
class I64SETCCNegCond<PatFrag cond, CodeFrag cmpare>:
|
||||
Pat<(cond R64C:$rA, R64C:$rB),
|
||||
(XORIr32 cmpare.Fragment, -1)>;
|
||||
|
||||
// The i64 seteq fragment that does the scalar->vector conversion and
|
||||
// comparison:
|
||||
def CEQr64compare:
|
||||
CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA),
|
||||
(ORv2i64_i64 R64C:$rB))),
|
||||
0x0000000c)>;
|
||||
|
||||
|
||||
// The i64 seteq fragment that does the vector comparison
|
||||
def CEQv2i64compare:
|
||||
CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)),
|
||||
0x0000000f)>;
|
||||
|
||||
// i64 seteq (equality): the setcc result is i32, which is converted to a
|
||||
// vector FSM mask when used in a select pattern.
|
||||
//
|
||||
// v2i64 seteq (equality): the setcc result is v4i32
|
||||
multiclass CompareEqual64 {
|
||||
// Plain old comparison, converts back to i32 scalar
|
||||
def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>;
|
||||
def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>;
|
||||
|
||||
// SELB mask from FSM:
|
||||
def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>;
|
||||
def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>;
|
||||
}
|
||||
|
||||
defm I64EQ: CompareEqual64;
|
||||
|
||||
def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
|
||||
|
||||
def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
|
||||
I64EQv2i64.Fragment>;
|
||||
|
||||
def I64Select:
|
||||
Pat<(select R32C:$rC, R64C:$rB, R64C:$rA),
|
||||
(SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>;
|
||||
|
||||
def : I64SETCCNegCond<setne, I64EQr64>;
|
||||
|
||||
def : I64SELECTNegCond<setne, I64EQr64>;
|
@@ -165,24 +165,23 @@ namespace {
|
||||
MVT VT;
|
||||
unsigned ldresult_ins; /// LDRESULT instruction (0 = undefined)
|
||||
bool ldresult_imm; /// LDRESULT instruction requires immediate?
|
||||
int prefslot_byte; /// Byte offset of the "preferred" slot
|
||||
unsigned lrinst; /// LR instruction
|
||||
};
|
||||
|
||||
const valtype_map_s valtype_map[] = {
|
||||
{ MVT::i1, 0, false, 3 },
|
||||
{ MVT::i8, SPU::ORBIr8, true, 3 },
|
||||
{ MVT::i16, SPU::ORHIr16, true, 2 },
|
||||
{ MVT::i32, SPU::ORIr32, true, 0 },
|
||||
{ MVT::i64, SPU::ORr64, false, 0 },
|
||||
{ MVT::f32, SPU::ORf32, false, 0 },
|
||||
{ MVT::f64, SPU::ORf64, false, 0 },
|
||||
{ MVT::i8, SPU::ORBIr8, true, SPU::LRr8 },
|
||||
{ MVT::i16, SPU::ORHIr16, true, SPU::LRr16 },
|
||||
{ MVT::i32, SPU::ORIr32, true, SPU::LRr32 },
|
||||
{ MVT::i64, SPU::ORr64, false, SPU::LRr64 },
|
||||
{ MVT::f32, SPU::ORf32, false, SPU::LRf32 },
|
||||
{ MVT::f64, SPU::ORf64, false, SPU::LRf64 },
|
||||
// vector types... (sigh!)
|
||||
{ MVT::v16i8, 0, false, 0 },
|
||||
{ MVT::v8i16, 0, false, 0 },
|
||||
{ MVT::v4i32, 0, false, 0 },
|
||||
{ MVT::v2i64, 0, false, 0 },
|
||||
{ MVT::v4f32, 0, false, 0 },
|
||||
{ MVT::v2f64, 0, false, 0 }
|
||||
{ MVT::v16i8, 0, false, SPU::LRv16i8 },
|
||||
{ MVT::v8i16, 0, false, SPU::LRv8i16 },
|
||||
{ MVT::v4i32, 0, false, SPU::LRv4i32 },
|
||||
{ MVT::v2i64, 0, false, SPU::LRv2i64 },
|
||||
{ MVT::v4f32, 0, false, SPU::LRv4f32 },
|
||||
{ MVT::v2f64, 0, false, SPU::LRv2f64 }
|
||||
};
|
||||
|
||||
const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
|
||||
@@ -686,33 +685,34 @@ SPUDAGToDAGISel::Select(SDValue Op) {
|
||||
Result = CurDAG->getTargetNode(Opc, VT, MVT::Other, Arg, Arg, Chain);
|
||||
}
|
||||
|
||||
Chain = SDValue(Result, 1);
|
||||
|
||||
return Result;
|
||||
} else if (Opc == SPUISD::IndirectAddr) {
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
if (Op0.getOpcode() == SPUISD::LDRESULT) {
|
||||
/* || Op0.getOpcode() == SPUISD::AFormAddr) */
|
||||
// (IndirectAddr (LDRESULT, imm))
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
// Look at the operands: SelectCode() will catch the cases that aren't
|
||||
// specifically handled here.
|
||||
//
|
||||
// SPUInstrInfo catches the following patterns:
|
||||
// (SPUindirect (SPUhi ...), (SPUlo ...))
|
||||
// (SPUindirect $sp, imm)
|
||||
MVT VT = Op.getValueType();
|
||||
SDValue Op0 = N->getOperand(0);
|
||||
SDValue Op1 = N->getOperand(1);
|
||||
RegisterSDNode *RN;
|
||||
|
||||
DEBUG(cerr << "CellSPU: IndirectAddr(LDRESULT, imm):\nOp0 = ");
|
||||
DEBUG(Op.getOperand(0).getNode()->dump(CurDAG));
|
||||
DEBUG(cerr << "\nOp1 = ");
|
||||
DEBUG(Op.getOperand(1).getNode()->dump(CurDAG));
|
||||
DEBUG(cerr << "\n");
|
||||
|
||||
if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo)
|
||||
|| (Op0.getOpcode() == ISD::Register
|
||||
&& ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0
|
||||
&& RN->getReg() != SPU::R1))) {
|
||||
NewOpc = SPU::Ar32;
|
||||
if (Op1.getOpcode() == ISD::Constant) {
|
||||
ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
|
||||
Op1 = CurDAG->getTargetConstant(CN->getZExtValue(), VT);
|
||||
Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT);
|
||||
NewOpc = (isI32IntS10Immediate(CN) ? SPU::AIr32 : SPU::Ar32);
|
||||
}
|
||||
Ops[0] = Op0;
|
||||
Ops[1] = Op1;
|
||||
n_ops = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (n_ops > 0) {
|
||||
if (N->hasOneUse())
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -39,7 +39,7 @@ namespace llvm {
|
||||
SHUFB, ///< Vector shuffle (permute)
|
||||
SHUFFLE_MASK, ///< Shuffle mask
|
||||
CNTB, ///< Count leading ones in bytes
|
||||
PROMOTE_SCALAR, ///< Promote scalar->vector
|
||||
PREFSLOT2VEC, ///< Promote scalar->vector
|
||||
VEC2PREFSLOT, ///< Extract element 0
|
||||
MPY, ///< 16-bit Multiply (low parts of a 32-bit)
|
||||
MPYU, ///< Multiply Unsigned
|
||||
@@ -58,6 +58,7 @@ namespace llvm {
|
||||
ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
|
||||
SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
|
||||
SELB, ///< Select bits -> (b & mask) | (a & ~mask)
|
||||
GATHER_BITS, ///< Gather bits from bytes/words/halfwords
|
||||
ADD_EXTENDED, ///< Add extended, with carry
|
||||
CARRY_GENERATE, ///< Carry generate for ADD_EXTENDED
|
||||
SUB_EXTENDED, ///< Subtract extended, with borrow
|
||||
@@ -120,6 +121,9 @@ namespace llvm {
|
||||
const SelectionDAG &DAG,
|
||||
unsigned Depth = 0) const;
|
||||
|
||||
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
|
||||
unsigned Depth = 0) const;
|
||||
|
||||
ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
|
||||
|
||||
std::pair<unsigned, const TargetRegisterClass*>
|
||||
|
@@ -120,9 +120,8 @@ class CVTIntFPForm<bits<10> opcode, dag OOL, dag IOL, string asmstr,
|
||||
}
|
||||
|
||||
let RA = 0 in {
|
||||
class BICondForm<bits<11> opcode, string asmstr, list<dag> pattern>
|
||||
: RRForm<opcode, (outs), (ins R32C:$rA, R32C:$func), asmstr,
|
||||
BranchResolv, pattern>
|
||||
class BICondForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, list<dag> pattern>
|
||||
: RRForm<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
|
||||
{ }
|
||||
|
||||
let RT = 0 in {
|
||||
|
@@ -34,10 +34,14 @@ namespace {
|
||||
inline bool isCondBranch(const MachineInstr *I) {
|
||||
unsigned opc = I->getOpcode();
|
||||
|
||||
return (opc == SPU::BRNZ
|
||||
|| opc == SPU::BRZ
|
||||
|| opc == SPU::BRHNZ
|
||||
|| opc == SPU::BRHZ);
|
||||
return (opc == SPU::BRNZr32
|
||||
|| opc == SPU::BRNZv4i32
|
||||
|| opc == SPU::BRZr32
|
||||
|| opc == SPU::BRZv4i32
|
||||
|| opc == SPU::BRHNZr16
|
||||
|| opc == SPU::BRHNZv8i16
|
||||
|| opc == SPU::BRHZr16
|
||||
|| opc == SPU::BRHZv8i16);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,6 +107,19 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case SPU::LRr8:
|
||||
case SPU::LRr16:
|
||||
case SPU::LRr32:
|
||||
case SPU::LRf32:
|
||||
case SPU::LRr64:
|
||||
case SPU::LRf64:
|
||||
case SPU::LRr128:
|
||||
case SPU::LRv16i8:
|
||||
case SPU::LRv8i16:
|
||||
case SPU::LRv4i32:
|
||||
case SPU::LRv4f32:
|
||||
case SPU::LRv2i64:
|
||||
case SPU::LRv2f64:
|
||||
case SPU::ORv16i8_i8:
|
||||
case SPU::ORv8i16_i16:
|
||||
case SPU::ORv4i32_i32:
|
||||
@@ -114,7 +131,18 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
|
||||
case SPU::ORi32_v4i32:
|
||||
case SPU::ORi64_v2i64:
|
||||
case SPU::ORf32_v4f32:
|
||||
case SPU::ORf64_v2f64:
|
||||
case SPU::ORf64_v2f64: {
|
||||
assert(MI.getNumOperands() == 2 &&
|
||||
MI.getOperand(0).isReg() &&
|
||||
MI.getOperand(1).isReg() &&
|
||||
"invalid SPU OR<type>_<vec> instruction!");
|
||||
if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
|
||||
sourceReg = MI.getOperand(0).getReg();
|
||||
destReg = MI.getOperand(0).getReg();
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SPU::ORv16i8:
|
||||
case SPU::ORv8i16:
|
||||
case SPU::ORv4i32:
|
||||
@@ -198,17 +226,13 @@ SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
|
||||
case SPU::STQDr8: {
|
||||
const MachineOperand MOp1 = MI->getOperand(1);
|
||||
const MachineOperand MOp2 = MI->getOperand(2);
|
||||
if (MOp1.isImm()
|
||||
&& (MOp2.isFI()
|
||||
|| (MOp2.isReg() && MOp2.getReg() == SPU::R1))) {
|
||||
if (MOp2.isFI())
|
||||
if (MOp1.isImm() && MOp2.isFI()) {
|
||||
FrameIndex = MOp2.getIndex();
|
||||
else
|
||||
FrameIndex = MOp1.getImm() / SPUFrameInfo::stackSlotSize();
|
||||
return MI->getOperand(0).getReg();
|
||||
}
|
||||
break;
|
||||
}
|
||||
#if 0
|
||||
case SPU::STQXv16i8:
|
||||
case SPU::STQXv8i16:
|
||||
case SPU::STQXv4i32:
|
||||
@@ -226,6 +250,7 @@ SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
|
||||
return MI->getOperand(0).getReg();
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -292,6 +317,8 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
opc = (isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16);
|
||||
} else if (RC == SPU::R8CRegisterClass) {
|
||||
opc = (isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8);
|
||||
} else if (RC == SPU::VECREGRegisterClass) {
|
||||
opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8;
|
||||
} else {
|
||||
assert(0 && "Unknown regclass!");
|
||||
abort();
|
||||
@@ -366,6 +393,8 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
opc = (isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16);
|
||||
} else if (RC == SPU::R8CRegisterClass) {
|
||||
opc = (isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8);
|
||||
} else if (RC == SPU::VECREGRegisterClass) {
|
||||
opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8;
|
||||
} else {
|
||||
assert(0 && "Unknown regclass in loadRegFromStackSlot!");
|
||||
abort();
|
||||
|
@@ -49,14 +49,14 @@ def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$fi
|
||||
|
||||
let canFoldAsLoad = 1 in {
|
||||
class LoadDFormVec<ValueType vectype>
|
||||
: RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src),
|
||||
: RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src),
|
||||
"lqd\t$rT, $src",
|
||||
LoadStore,
|
||||
[(set (vectype VECREG:$rT), (load dform_addr:$src))]>
|
||||
{ }
|
||||
|
||||
class LoadDForm<RegisterClass rclass>
|
||||
: RI10Form<0b00101100, (outs rclass:$rT), (ins memri10:$src),
|
||||
: RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src),
|
||||
"lqd\t$rT, $src",
|
||||
LoadStore,
|
||||
[(set rclass:$rT, (load dform_addr:$src))]>
|
||||
@@ -161,14 +161,14 @@ let canFoldAsLoad = 1 in {
|
||||
// Stores:
|
||||
//===----------------------------------------------------------------------===//
|
||||
class StoreDFormVec<ValueType vectype>
|
||||
: RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src),
|
||||
: RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src),
|
||||
"stqd\t$rT, $src",
|
||||
LoadStore,
|
||||
[(store (vectype VECREG:$rT), dform_addr:$src)]>
|
||||
{ }
|
||||
|
||||
class StoreDForm<RegisterClass rclass>
|
||||
: RI10Form<0b00100100, (outs), (ins rclass:$rT, memri10:$src),
|
||||
: RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src),
|
||||
"stqd\t$rT, $src",
|
||||
LoadStore,
|
||||
[(store rclass:$rT, dform_addr:$src)]>
|
||||
@@ -269,7 +269,7 @@ def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp),
|
||||
// Generate Controls for Insertion:
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
|
||||
"cbd\t$rT, $src", ShuffleOp,
|
||||
[(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
@@ -277,7 +277,7 @@ def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"cbx\t$rT, $src", ShuffleOp,
|
||||
[(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
|
||||
"chd\t$rT, $src", ShuffleOp,
|
||||
[(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
@@ -285,7 +285,7 @@ def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"chx\t$rT, $src", ShuffleOp,
|
||||
[(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
|
||||
"cwd\t$rT, $src", ShuffleOp,
|
||||
[(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
@@ -293,7 +293,7 @@ def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"cwx\t$rT, $src", ShuffleOp,
|
||||
[(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
|
||||
"cwd\t$rT, $src", ShuffleOp,
|
||||
[(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
@@ -301,7 +301,7 @@ def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"cwx\t$rT, $src", ShuffleOp,
|
||||
[(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
|
||||
"cdd\t$rT, $src", ShuffleOp,
|
||||
[(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
@@ -309,7 +309,7 @@ def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
|
||||
"cdx\t$rT, $src", ShuffleOp,
|
||||
[(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
|
||||
|
||||
def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src),
|
||||
def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
|
||||
"cdd\t$rT, $src", ShuffleOp,
|
||||
[(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
|
||||
|
||||
@@ -421,6 +421,7 @@ multiclass ImmLoadAddress
|
||||
def f32: ILARegInst<R32FP, f18imm, fpimm18>;
|
||||
def f64: ILARegInst<R64FP, f18imm_f64, fpimm18>;
|
||||
|
||||
def hi: ILARegInst<R32C, symbolHi, imm18>;
|
||||
def lo: ILARegInst<R32C, symbolLo, imm18>;
|
||||
|
||||
def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
|
||||
@@ -481,37 +482,77 @@ multiclass FormSelectMaskBytesImm
|
||||
defm FSMBI : FormSelectMaskBytesImm;
|
||||
|
||||
// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits
|
||||
def FSMB:
|
||||
RRForm_1<0b01101101100, (outs VECREG:$rT), (ins R16C:$rA),
|
||||
"fsmb\t$rT, $rA", SelectOp,
|
||||
[(set (v16i8 VECREG:$rT), (SPUselmask R16C:$rA))]>;
|
||||
class FSMBInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||
RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp,
|
||||
pattern>;
|
||||
|
||||
class FSMBRegInst<RegisterClass rclass, ValueType vectype>:
|
||||
FSMBInst<(outs VECREG:$rT), (ins rclass:$rA),
|
||||
[(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
|
||||
|
||||
class FSMBVecInst<ValueType vectype>:
|
||||
FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA),
|
||||
[(set (vectype VECREG:$rT),
|
||||
(SPUselmask (vectype VECREG:$rA)))]>;
|
||||
|
||||
multiclass FormSelectMaskBits {
|
||||
def v16i8_r16: FSMBRegInst<R16C, v16i8>;
|
||||
def v16i8: FSMBVecInst<v16i8>;
|
||||
}
|
||||
|
||||
defm FSMB: FormSelectMaskBits;
|
||||
|
||||
// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is
|
||||
// only 8-bits wide (even though it's input as 16-bits here)
|
||||
def FSMH:
|
||||
RRForm_1<0b10101101100, (outs VECREG:$rT), (ins R16C:$rA),
|
||||
"fsmh\t$rT, $rA", SelectOp,
|
||||
[(set (v8i16 VECREG:$rT), (SPUselmask R16C:$rA))]>;
|
||||
|
||||
class FSMHInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||
RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp,
|
||||
pattern>;
|
||||
|
||||
class FSMHRegInst<RegisterClass rclass, ValueType vectype>:
|
||||
FSMHInst<(outs VECREG:$rT), (ins rclass:$rA),
|
||||
[(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
|
||||
|
||||
class FSMHVecInst<ValueType vectype>:
|
||||
FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA),
|
||||
[(set (vectype VECREG:$rT),
|
||||
(SPUselmask (vectype VECREG:$rA)))]>;
|
||||
|
||||
multiclass FormSelectMaskHalfword {
|
||||
def v8i16_r16: FSMHRegInst<R16C, v8i16>;
|
||||
def v8i16: FSMHVecInst<v8i16>;
|
||||
}
|
||||
|
||||
defm FSMH: FormSelectMaskHalfword;
|
||||
|
||||
// fsm: Form select mask for words. Like the other fsm* instructions,
|
||||
// only the lower 4 bits of $rA are significant.
|
||||
class FSMInst<ValueType vectype, RegisterClass rclass>:
|
||||
RRForm_1<0b00101101100, (outs VECREG:$rT), (ins rclass:$rA),
|
||||
"fsm\t$rT, $rA",
|
||||
SelectOp,
|
||||
|
||||
class FSMInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||
RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp,
|
||||
pattern>;
|
||||
|
||||
class FSMRegInst<ValueType vectype, RegisterClass rclass>:
|
||||
FSMInst<(outs VECREG:$rT), (ins rclass:$rA),
|
||||
[(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
|
||||
|
||||
class FSMVecInst<ValueType vectype>:
|
||||
FSMInst<(outs VECREG:$rT), (ins VECREG:$rA),
|
||||
[(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>;
|
||||
|
||||
multiclass FormSelectMaskWord {
|
||||
def r32 : FSMInst<v4i32, R32C>;
|
||||
def r16 : FSMInst<v4i32, R16C>;
|
||||
def v4i32: FSMVecInst<v4i32>;
|
||||
|
||||
def r32 : FSMRegInst<v4i32, R32C>;
|
||||
def r16 : FSMRegInst<v4i32, R16C>;
|
||||
}
|
||||
|
||||
defm FSM : FormSelectMaskWord;
|
||||
|
||||
// Special case when used for i64 math operations
|
||||
multiclass FormSelectMaskWord64 {
|
||||
def r32 : FSMInst<v2i64, R32C>;
|
||||
def r16 : FSMInst<v2i64, R16C>;
|
||||
def r32 : FSMRegInst<v2i64, R32C>;
|
||||
def r16 : FSMRegInst<v2i64, R16C>;
|
||||
}
|
||||
|
||||
defm FSM64 : FormSelectMaskWord64;
|
||||
@@ -898,20 +939,31 @@ def MPYHHAUr32:
|
||||
[]>;
|
||||
|
||||
// clz: Count leading zeroes
|
||||
def CLZv4i32:
|
||||
RRForm_1<0b10100101010, (outs VECREG:$rT), (ins VECREG:$rA),
|
||||
"clz\t$rT, $rA", IntegerOp,
|
||||
[/* intrinsic */]>;
|
||||
class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||
RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA",
|
||||
IntegerOp, pattern>;
|
||||
|
||||
def CLZr32:
|
||||
RRForm_1<0b10100101010, (outs R32C:$rT), (ins R32C:$rA),
|
||||
"clz\t$rT, $rA", IntegerOp,
|
||||
[(set R32C:$rT, (ctlz R32C:$rA))]>;
|
||||
class CLZRegInst<RegisterClass rclass>:
|
||||
CLZInst<(outs rclass:$rT), (ins rclass:$rA),
|
||||
[(set rclass:$rT, (ctlz rclass:$rA))]>;
|
||||
|
||||
class CLZVecInst<ValueType vectype>:
|
||||
CLZInst<(outs VECREG:$rT), (ins VECREG:$rA),
|
||||
[(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>;
|
||||
|
||||
multiclass CountLeadingZeroes {
|
||||
def v4i32 : CLZVecInst<v4i32>;
|
||||
def r32 : CLZRegInst<R32C>;
|
||||
}
|
||||
|
||||
defm CLZ : CountLeadingZeroes;
|
||||
|
||||
// cntb: Count ones in bytes (aka "population count")
|
||||
//
|
||||
// NOTE: This instruction is really a vector instruction, but the custom
|
||||
// lowering code uses it in unorthodox ways to support CTPOP for other
|
||||
// data types!
|
||||
|
||||
def CNTBv16i8:
|
||||
RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
|
||||
"cntb\t$rT, $rA", IntegerOp,
|
||||
@@ -927,26 +979,88 @@ def CNTBv4i32 :
|
||||
"cntb\t$rT, $rA", IntegerOp,
|
||||
[(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>;
|
||||
|
||||
// gbb: Gather all low order bits from each byte in $rA into a single 16-bit
|
||||
// quantity stored into $rT
|
||||
def GBB:
|
||||
RRForm_1<0b01001101100, (outs R16C:$rT), (ins VECREG:$rA),
|
||||
"gbb\t$rT, $rA", GatherOp,
|
||||
[]>;
|
||||
// gbb: Gather the low order bits from each byte in $rA into a single 16-bit
|
||||
// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are
|
||||
// slots 1-3.
|
||||
//
|
||||
// Note: This instruction "pairs" with the fsmb instruction for all of the
|
||||
// various types defined here.
|
||||
//
|
||||
// Note 2: The "VecInst" and "RegInst" forms refer to the result being either
|
||||
// a vector or register.
|
||||
|
||||
class GBBInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||
RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>;
|
||||
|
||||
class GBBRegInst<RegisterClass rclass, ValueType vectype>:
|
||||
GBBInst<(outs rclass:$rT), (ins VECREG:$rA),
|
||||
[(set rclass:$rT, (SPUgatherbits (vectype VECREG:$rA)))]>;
|
||||
|
||||
class GBBVecInst<ValueType vectype>:
|
||||
GBBInst<(outs VECREG:$rT), (ins VECREG:$rA),
|
||||
[(set (vectype VECREG:$rT), (SPUgatherbits (vectype VECREG:$rA)))]>;
|
||||
|
||||
multiclass GatherBitsFromBytes {
|
||||
def v16i8_r32: GBBRegInst<R32C, v16i8>;
|
||||
def v16i8_r16: GBBRegInst<R16C, v16i8>;
|
||||
def v16i8: GBBVecInst<v16i8>;
|
||||
}
|
||||
|
||||
defm GBB: GatherBitsFromBytes;
|
||||
|
||||
// gbh: Gather all low order bits from each halfword in $rA into a single
|
||||
// 8-bit quantity stored in $rT
|
||||
def GBH:
|
||||
RRForm_1<0b10001101100, (outs R16C:$rT), (ins VECREG:$rA),
|
||||
"gbh\t$rT, $rA", GatherOp,
|
||||
[]>;
|
||||
// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0
|
||||
// and slots 1-3 also set to 0.
|
||||
//
|
||||
// See notes for GBBInst, above.
|
||||
|
||||
class GBHInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||
RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp,
|
||||
pattern>;
|
||||
|
||||
class GBHRegInst<RegisterClass rclass, ValueType vectype>:
|
||||
GBHInst<(outs rclass:$rT), (ins VECREG:$rA),
|
||||
[(set rclass:$rT, (SPUgatherbits (vectype VECREG:$rA)))]>;
|
||||
|
||||
class GBHVecInst<ValueType vectype>:
|
||||
GBHInst<(outs VECREG:$rT), (ins VECREG:$rA),
|
||||
[(set (vectype VECREG:$rT),
|
||||
(SPUgatherbits (vectype VECREG:$rA)))]>;
|
||||
|
||||
multiclass GatherBitsHalfword {
|
||||
def v8i16_r32: GBHRegInst<R32C, v8i16>;
|
||||
def v8i16_r16: GBHRegInst<R16C, v8i16>;
|
||||
def v8i16: GBHVecInst<v8i16>;
|
||||
}
|
||||
|
||||
defm GBH: GatherBitsHalfword;
|
||||
|
||||
// gb: Gather all low order bits from each word in $rA into a single
|
||||
// 4-bit quantity stored in $rT
|
||||
def GB:
|
||||
RRForm_1<0b00001101100, (outs R16C:$rT), (ins VECREG:$rA),
|
||||
"gb\t$rT, $rA", GatherOp,
|
||||
[]>;
|
||||
// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0,
|
||||
// as well as slots 1-3.
|
||||
//
|
||||
// See notes for gbb, above.
|
||||
|
||||
class GBInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||
RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp,
|
||||
pattern>;
|
||||
|
||||
class GBRegInst<RegisterClass rclass, ValueType vectype>:
|
||||
GBInst<(outs rclass:$rT), (ins VECREG:$rA),
|
||||
[(set rclass:$rT, (SPUgatherbits (vectype VECREG:$rA)))]>;
|
||||
|
||||
class GBVecInst<ValueType vectype>:
|
||||
GBInst<(outs VECREG:$rT), (ins VECREG:$rA),
|
||||
[(set (vectype VECREG:$rT),
|
||||
(SPUgatherbits (vectype VECREG:$rA)))]>;
|
||||
|
||||
multiclass GatherBitsWord {
|
||||
def v4i32_r32: GBRegInst<R32C, v4i32>;
|
||||
def v4i32_r16: GBRegInst<R16C, v4i32>;
|
||||
def v4i32: GBVecInst<v4i32>;
|
||||
}
|
||||
|
||||
defm GB: GatherBitsWord;
|
||||
|
||||
// avgb: average bytes
|
||||
def AVGB:
|
||||
@@ -976,30 +1090,26 @@ class XSBHVecInst<ValueType vectype>:
|
||||
XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
|
||||
[(set (v8i16 VECREG:$rDst), (sext (vectype VECREG:$rSrc)))]>;
|
||||
|
||||
class XSBHRegInst<RegisterClass rclass>:
|
||||
class XSBHInRegInst<RegisterClass rclass>:
|
||||
XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
|
||||
[(set rclass:$rDst, (sext_inreg rclass:$rSrc, i8))]>;
|
||||
|
||||
multiclass ExtendByteHalfword {
|
||||
def v16i8: XSBHVecInst<v8i16>;
|
||||
def r16: XSBHRegInst<R16C>;
|
||||
def r16: XSBHInRegInst<R16C>;
|
||||
def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
|
||||
[(set R16C:$rDst, (sext R8C:$rSrc))]>;
|
||||
|
||||
// 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
|
||||
// quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
|
||||
// pattern below). Intentionally doesn't match a pattern because we want the
|
||||
// sext 8->32 pattern to do the work for us, namely because we need the extra
|
||||
// XSHWr32.
|
||||
def r32: XSBHRegInst<R32C>;
|
||||
def r32: XSBHInRegInst<R32C>;
|
||||
}
|
||||
|
||||
defm XSBH : ExtendByteHalfword;
|
||||
|
||||
// Sign-extend, but take an 8-bit register to a 16-bit register (not done as
|
||||
// sext_inreg)
|
||||
def XSBHr8:
|
||||
XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
|
||||
[(set R16C:$rDst, (sext R8C:$rSrc))]>;
|
||||
|
||||
// Sign extend halfwords to words:
|
||||
def XSHWvec:
|
||||
RRForm_1<0b01101101010, (outs VECREG:$rDest), (ins VECREG:$rSrc),
|
||||
@@ -1208,13 +1318,44 @@ class ORRegInst<RegisterClass rclass>:
|
||||
ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
|
||||
[(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
|
||||
|
||||
// ORCvtForm: OR conversion form
|
||||
//
|
||||
// This is used to "convert" the preferred slot to its vector equivalent, as
|
||||
// well as convert a vector back to its preferred slot.
|
||||
//
|
||||
// These are effectively no-ops, but need to exist for proper type conversion
|
||||
// and type coercion.
|
||||
|
||||
class ORCvtForm<dag OOL, dag IOL>
|
||||
: SPUInstr<OOL, IOL, "or\t$rT, $rA, $rA", IntegerOp> {
|
||||
bits<7> RA;
|
||||
bits<7> RT;
|
||||
|
||||
let Pattern = [/* no pattern */];
|
||||
|
||||
let Inst{0-10} = 0b10000010000;
|
||||
let Inst{11-17} = RA;
|
||||
let Inst{18-24} = RA;
|
||||
let Inst{25-31} = RT;
|
||||
}
|
||||
|
||||
class ORPromoteScalar<RegisterClass rclass>:
|
||||
ORInst<(outs VECREG:$rT), (ins rclass:$rA, rclass:$rB),
|
||||
[/* no pattern */]>;
|
||||
ORCvtForm<(outs VECREG:$rT), (ins rclass:$rA)>;
|
||||
|
||||
class ORExtractElt<RegisterClass rclass>:
|
||||
ORInst<(outs rclass:$rT), (ins VECREG:$rA, VECREG:$rB),
|
||||
[/* no pattern */]>;
|
||||
ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>;
|
||||
|
||||
class ORCvtRegGPRC<RegisterClass rclass>:
|
||||
ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>;
|
||||
|
||||
class ORCvtVecGPRC:
|
||||
ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
|
||||
|
||||
class ORCvtGPRCReg<RegisterClass rclass>:
|
||||
ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>;
|
||||
|
||||
class ORCvtGPRCVec:
|
||||
ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>;
|
||||
|
||||
multiclass BitwiseOr
|
||||
{
|
||||
@@ -1260,48 +1401,115 @@ multiclass BitwiseOr
|
||||
def i64_v2i64: ORExtractElt<R64C>;
|
||||
def f32_v4f32: ORExtractElt<R32FP>;
|
||||
def f64_v2f64: ORExtractElt<R64FP>;
|
||||
|
||||
// Conversion from GPRC to register
|
||||
def i128_r64: ORCvtRegGPRC<R64C>;
|
||||
def i128_f64: ORCvtRegGPRC<R64FP>;
|
||||
def i128_r32: ORCvtRegGPRC<R32C>;
|
||||
def i128_f32: ORCvtRegGPRC<R32FP>;
|
||||
def i128_r16: ORCvtRegGPRC<R16C>;
|
||||
def i128_r8: ORCvtRegGPRC<R8C>;
|
||||
|
||||
// Conversion from GPRC to vector
|
||||
def i128_vec: ORCvtVecGPRC;
|
||||
|
||||
// Conversion from register to GPRC
|
||||
def r64_i128: ORCvtGPRCReg<R64C>;
|
||||
def f64_i128: ORCvtGPRCReg<R64FP>;
|
||||
def r32_i128: ORCvtGPRCReg<R32C>;
|
||||
def f32_i128: ORCvtGPRCReg<R32FP>;
|
||||
def r16_i128: ORCvtGPRCReg<R16C>;
|
||||
def r8_i128: ORCvtGPRCReg<R8C>;
|
||||
|
||||
// Conversion from vector to GPRC
|
||||
def vec_i128: ORCvtGPRCVec;
|
||||
}
|
||||
|
||||
defm OR : BitwiseOr;
|
||||
|
||||
// scalar->vector promotion patterns:
|
||||
def : Pat<(v16i8 (SPUpromote_scalar R8C:$rA)),
|
||||
(ORv16i8_i8 R8C:$rA, R8C:$rA)>;
|
||||
// scalar->vector promotion patterns (preferred slot to vector):
|
||||
def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
|
||||
(ORv16i8_i8 R8C:$rA)>;
|
||||
|
||||
def : Pat<(v8i16 (SPUpromote_scalar R16C:$rA)),
|
||||
(ORv8i16_i16 R16C:$rA, R16C:$rA)>;
|
||||
def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
|
||||
(ORv8i16_i16 R16C:$rA)>;
|
||||
|
||||
def : Pat<(v4i32 (SPUpromote_scalar R32C:$rA)),
|
||||
(ORv4i32_i32 R32C:$rA, R32C:$rA)>;
|
||||
def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
|
||||
(ORv4i32_i32 R32C:$rA)>;
|
||||
|
||||
def : Pat<(v2i64 (SPUpromote_scalar R64C:$rA)),
|
||||
(ORv2i64_i64 R64C:$rA, R64C:$rA)>;
|
||||
def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
|
||||
(ORv2i64_i64 R64C:$rA)>;
|
||||
|
||||
def : Pat<(v4f32 (SPUpromote_scalar R32FP:$rA)),
|
||||
(ORv4f32_f32 R32FP:$rA, R32FP:$rA)>;
|
||||
def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
|
||||
(ORv4f32_f32 R32FP:$rA)>;
|
||||
|
||||
def : Pat<(v2f64 (SPUpromote_scalar R64FP:$rA)),
|
||||
(ORv2f64_f64 R64FP:$rA, R64FP:$rA)>;
|
||||
def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
|
||||
(ORv2f64_f64 R64FP:$rA)>;
|
||||
|
||||
// ORi*_v*: Used to extract vector element 0 (the preferred slot)
|
||||
// ORi*_v*: Used to extract vector element 0 (the preferred slot), otherwise
|
||||
// known as converting the vector back to its preferred slot
|
||||
|
||||
def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)),
|
||||
(ORi8_v16i8 VECREG:$rA, VECREG:$rA)>;
|
||||
(ORi8_v16i8 VECREG:$rA)>;
|
||||
|
||||
def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)),
|
||||
(ORi16_v8i16 VECREG:$rA, VECREG:$rA)>;
|
||||
(ORi16_v8i16 VECREG:$rA)>;
|
||||
|
||||
def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)),
|
||||
(ORi32_v4i32 VECREG:$rA, VECREG:$rA)>;
|
||||
(ORi32_v4i32 VECREG:$rA)>;
|
||||
|
||||
def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)),
|
||||
(ORi64_v2i64 VECREG:$rA, VECREG:$rA)>;
|
||||
(ORi64_v2i64 VECREG:$rA)>;
|
||||
|
||||
def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)),
|
||||
(ORf32_v4f32 VECREG:$rA, VECREG:$rA)>;
|
||||
(ORf32_v4f32 VECREG:$rA)>;
|
||||
|
||||
def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)),
|
||||
(ORf64_v2f64 VECREG:$rA, VECREG:$rA)>;
|
||||
(ORf64_v2f64 VECREG:$rA)>;
|
||||
|
||||
// Load Register: This is an assembler alias for a bitwise OR of a register
|
||||
// against itself. It's here because it brings some clarity to assembly
|
||||
// language output.
|
||||
|
||||
let hasCtrlDep = 1 in {
|
||||
class LRInst<dag OOL, dag IOL>
|
||||
: SPUInstr<OOL, IOL, "lr\t$rT, $rA", IntegerOp> {
|
||||
bits<7> RA;
|
||||
bits<7> RT;
|
||||
|
||||
let Pattern = [/*no pattern*/];
|
||||
|
||||
let Inst{0-10} = 0b10000010000; /* It's an OR operation */
|
||||
let Inst{11-17} = RA;
|
||||
let Inst{18-24} = RA;
|
||||
let Inst{25-31} = RT;
|
||||
}
|
||||
|
||||
class LRVecInst<ValueType vectype>:
|
||||
LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
|
||||
|
||||
class LRRegInst<RegisterClass rclass>:
|
||||
LRInst<(outs rclass:$rT), (ins rclass:$rA)>;
|
||||
|
||||
multiclass LoadRegister {
|
||||
def v2i64: LRVecInst<v2i64>;
|
||||
def v2f64: LRVecInst<v2f64>;
|
||||
def v4i32: LRVecInst<v4i32>;
|
||||
def v4f32: LRVecInst<v4f32>;
|
||||
def v8i16: LRVecInst<v8i16>;
|
||||
def v16i8: LRVecInst<v16i8>;
|
||||
|
||||
def r128: LRRegInst<GPRC>;
|
||||
def r64: LRRegInst<R64C>;
|
||||
def f64: LRRegInst<R64FP>;
|
||||
def r32: LRRegInst<R32C>;
|
||||
def f32: LRRegInst<R32FP>;
|
||||
def r16: LRRegInst<R16C>;
|
||||
def r8: LRRegInst<R8C>;
|
||||
}
|
||||
|
||||
defm LR: LoadRegister;
|
||||
}
|
||||
|
||||
// ORC: Bitwise "or" with complement (c = a | ~b)
|
||||
|
||||
@@ -1585,12 +1793,24 @@ class SELBVecInst<ValueType vectype>:
|
||||
(and (vnot (vectype VECREG:$rC)),
|
||||
(vectype VECREG:$rA))))]>;
|
||||
|
||||
class SELBVecCondInst<ValueType vectype>:
|
||||
SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC),
|
||||
[(set (vectype VECREG:$rT),
|
||||
(select R32C:$rC,
|
||||
(vectype VECREG:$rB),
|
||||
(vectype VECREG:$rA)))]>;
|
||||
|
||||
class SELBRegInst<RegisterClass rclass>:
|
||||
SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC),
|
||||
[(set rclass:$rT,
|
||||
(or (and rclass:$rA, rclass:$rC),
|
||||
(and rclass:$rB, (not rclass:$rC))))]>;
|
||||
|
||||
class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>:
|
||||
SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC),
|
||||
[(set rclass:$rT,
|
||||
(select rcond:$rC, rclass:$rB, rclass:$rA))]>;
|
||||
|
||||
multiclass SelectBits
|
||||
{
|
||||
def v16i8: SELBVecInst<v16i8>;
|
||||
@@ -1603,6 +1823,16 @@ multiclass SelectBits
|
||||
def r32: SELBRegInst<R32C>;
|
||||
def r16: SELBRegInst<R16C>;
|
||||
def r8: SELBRegInst<R8C>;
|
||||
|
||||
def v16i8_cond: SELBVecCondInst<v16i8>;
|
||||
def v8i16_cond: SELBVecCondInst<v8i16>;
|
||||
def v4i32_cond: SELBVecCondInst<v4i32>;
|
||||
def v2i64_cond: SELBVecCondInst<v2i64>;
|
||||
|
||||
// SELBr64_cond is defined further down, look for i64 comparisons
|
||||
def r32_cond: SELBRegCondInst<R32C, R32C>;
|
||||
def r16_cond: SELBRegCondInst<R16C, R16C>;
|
||||
def r8_cond: SELBRegCondInst<R8C, R8C>;
|
||||
}
|
||||
|
||||
defm SELB : SelectBits;
|
||||
@@ -1625,14 +1855,6 @@ def : SPUselbPatReg<R16C, SELBr16>;
|
||||
def : SPUselbPatReg<R32C, SELBr32>;
|
||||
def : SPUselbPatReg<R64C, SELBr64>;
|
||||
|
||||
class SelectConditional<RegisterClass rclass, SPUInstr inst>:
|
||||
Pat<(select rclass:$rCond, rclass:$rTrue, rclass:$rFalse),
|
||||
(inst rclass:$rFalse, rclass:$rTrue, rclass:$rCond)>;
|
||||
|
||||
def : SelectConditional<R32C, SELBr32>;
|
||||
def : SelectConditional<R16C, SELBr16>;
|
||||
def : SelectConditional<R8C, SELBr8>;
|
||||
|
||||
// EQV: Equivalence (1 for each same bit, otherwise 0)
|
||||
//
|
||||
// Note: There are a lot of ways to match this bit operator and these patterns
|
||||
@@ -1753,6 +1975,10 @@ class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
|
||||
(resultvec VECREG:$rB),
|
||||
(maskvec VECREG:$rC)))]>;
|
||||
|
||||
class SHUFBGPRCInst:
|
||||
SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC),
|
||||
[/* no pattern */]>;
|
||||
|
||||
multiclass ShuffleBytes
|
||||
{
|
||||
def v16i8 : SHUFBVecInst<v16i8, v16i8>;
|
||||
@@ -1769,6 +1995,8 @@ multiclass ShuffleBytes
|
||||
|
||||
def v2f64 : SHUFBVecInst<v2f64, v16i8>;
|
||||
def v2f64_m32 : SHUFBVecInst<v2f64, v4i32>;
|
||||
|
||||
def gprc : SHUFBGPRCInst;
|
||||
}
|
||||
|
||||
defm SHUFB : ShuffleBytes;
|
||||
@@ -2698,9 +2926,9 @@ let isTerminator = 1, isBarrier = 1 in {
|
||||
[/* no pattern to match */]>;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// Comparison operators:
|
||||
//------------------------------------------------------------------------
|
||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
// Comparison operators for i8, i16 and i32:
|
||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
|
||||
class CEQBInst<dag OOL, dag IOL, list<dag> pattern> :
|
||||
RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB",
|
||||
@@ -2990,8 +3218,14 @@ defm CLGTI : CmpLGtrWordImm;
|
||||
// define a pattern to generate the right code, as a binary operator
|
||||
// (in a manner of speaking.)
|
||||
//
|
||||
// N.B.: This only matches the setcc set of conditionals. Special pattern
|
||||
// Notes:
|
||||
// 1. This only matches the setcc set of conditionals. Special pattern
|
||||
// matching is used for select conditionals.
|
||||
//
|
||||
// 2. The "DAG" versions of these classes is almost exclusively used for
|
||||
// i64 comparisons. See the tblgen fundamentals documentation for what
|
||||
// ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern
|
||||
// class for where ResultInstrs originates.
|
||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||
|
||||
class SETCCNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
|
||||
@@ -3128,8 +3362,8 @@ class SELECTBinOpReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
|
||||
SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
|
||||
SPUInstr cmpOp2>:
|
||||
Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
|
||||
rclass:$rFalse, rclass:$rTrue),
|
||||
(selinstr rclass:$rTrue, rclass:$rFalse,
|
||||
rclass:$rTrue, rclass:$rFalse),
|
||||
(selinstr rclass:$rFalse, rclass:$rTrue,
|
||||
(binop (cmpOp1 rclass:$rA, rclass:$rB),
|
||||
(cmpOp2 rclass:$rA, rclass:$rB)))>;
|
||||
|
||||
@@ -3226,39 +3460,114 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
|
||||
BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
|
||||
|
||||
// Various branches:
|
||||
def BRNZ:
|
||||
RI16Form<0b010000100, (outs), (ins R32C:$rCond, brtarget:$dest),
|
||||
"brnz\t$rCond,$dest",
|
||||
BranchResolv,
|
||||
[(brcond R32C:$rCond, bb:$dest)]>;
|
||||
class BRNZInst<dag IOL, list<dag> pattern>:
|
||||
RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest",
|
||||
BranchResolv, pattern>;
|
||||
|
||||
def BRZ:
|
||||
RI16Form<0b000000100, (outs), (ins R32C:$rT, brtarget:$dest),
|
||||
"brz\t$rT,$dest",
|
||||
BranchResolv,
|
||||
[/* no pattern */]>;
|
||||
class BRNZRegInst<RegisterClass rclass>:
|
||||
BRNZInst<(ins rclass:$rCond, brtarget:$dest),
|
||||
[(brcond rclass:$rCond, bb:$dest)]>;
|
||||
|
||||
def BRHNZ:
|
||||
RI16Form<0b011000100, (outs), (ins R16C:$rCond, brtarget:$dest),
|
||||
"brhnz\t$rCond,$dest",
|
||||
BranchResolv,
|
||||
[(brcond R16C:$rCond, bb:$dest)]>;
|
||||
class BRNZVecInst<ValueType vectype>:
|
||||
BRNZInst<(ins VECREG:$rCond, brtarget:$dest),
|
||||
[(brcond (vectype VECREG:$rCond), bb:$dest)]>;
|
||||
|
||||
def BRHZ:
|
||||
RI16Form<0b001000100, (outs), (ins R16C:$rT, brtarget:$dest),
|
||||
"brhz\t$rT,$dest",
|
||||
BranchResolv,
|
||||
[/* no pattern */]>;
|
||||
multiclass BranchNotZero {
|
||||
def v4i32 : BRNZVecInst<v4i32>;
|
||||
def r32 : BRNZRegInst<R32C>;
|
||||
}
|
||||
|
||||
/*
|
||||
def BINZ:
|
||||
BICondForm<0b10010100100, "binz\t$rA, $func",
|
||||
[(SPUbinz R32C:$rA, R32C:$func)]>;
|
||||
defm BRNZ : BranchNotZero;
|
||||
|
||||
def BIZ:
|
||||
BICondForm<0b00010100100, "biz\t$rA, $func",
|
||||
[(SPUbiz R32C:$rA, R32C:$func)]>;
|
||||
*/
|
||||
class BRZInst<dag IOL, list<dag> pattern>:
|
||||
RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest",
|
||||
BranchResolv, pattern>;
|
||||
|
||||
class BRZRegInst<RegisterClass rclass>:
|
||||
BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
|
||||
|
||||
class BRZVecInst<ValueType vectype>:
|
||||
BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
|
||||
|
||||
multiclass BranchZero {
|
||||
def v4i32: BRZVecInst<v4i32>;
|
||||
def r32: BRZRegInst<R32C>;
|
||||
}
|
||||
|
||||
defm BRZ: BranchZero;
|
||||
|
||||
// Note: LLVM doesn't do branch conditional, indirect. Otherwise these would
|
||||
// be useful:
|
||||
/*
|
||||
class BINZInst<dag IOL, list<dag> pattern>:
|
||||
BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>;
|
||||
|
||||
class BINZRegInst<RegisterClass rclass>:
|
||||
BINZInst<(ins rclass:$rA, brtarget:$dest),
|
||||
[(brcond rclass:$rA, R32C:$dest)]>;
|
||||
|
||||
class BINZVecInst<ValueType vectype>:
|
||||
BINZInst<(ins VECREG:$rA, R32C:$dest),
|
||||
[(brcond (vectype VECREG:$rA), R32C:$dest)]>;
|
||||
|
||||
multiclass BranchNotZeroIndirect {
|
||||
def v4i32: BINZVecInst<v4i32>;
|
||||
def r32: BINZRegInst<R32C>;
|
||||
}
|
||||
|
||||
defm BINZ: BranchNotZeroIndirect;
|
||||
|
||||
class BIZInst<dag IOL, list<dag> pattern>:
|
||||
BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>;
|
||||
|
||||
class BIZRegInst<RegisterClass rclass>:
|
||||
BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>;
|
||||
|
||||
class BIZVecInst<ValueType vectype>:
|
||||
BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>;
|
||||
|
||||
multiclass BranchZeroIndirect {
|
||||
def v4i32: BIZVecInst<v4i32>;
|
||||
def r32: BIZRegInst<R32C>;
|
||||
}
|
||||
|
||||
defm BIZ: BranchZeroIndirect;
|
||||
*/
|
||||
|
||||
class BRHNZInst<dag IOL, list<dag> pattern>:
|
||||
RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv,
|
||||
pattern>;
|
||||
|
||||
class BRHNZRegInst<RegisterClass rclass>:
|
||||
BRHNZInst<(ins rclass:$rCond, brtarget:$dest),
|
||||
[(brcond rclass:$rCond, bb:$dest)]>;
|
||||
|
||||
class BRHNZVecInst<ValueType vectype>:
|
||||
BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>;
|
||||
|
||||
multiclass BranchNotZeroHalfword {
|
||||
def v8i16: BRHNZVecInst<v8i16>;
|
||||
def r16: BRHNZRegInst<R16C>;
|
||||
}
|
||||
|
||||
defm BRHNZ: BranchNotZeroHalfword;
|
||||
|
||||
class BRHZInst<dag IOL, list<dag> pattern>:
|
||||
RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv,
|
||||
pattern>;
|
||||
|
||||
class BRHZRegInst<RegisterClass rclass>:
|
||||
BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
|
||||
|
||||
class BRHZVecInst<ValueType vectype>:
|
||||
BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
|
||||
|
||||
multiclass BranchZeroHalfword {
|
||||
def v8i16: BRHZVecInst<v8i16>;
|
||||
def r16: BRHZRegInst<R16C>;
|
||||
}
|
||||
|
||||
defm BRHZ: BranchZeroHalfword;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@@ -3266,14 +3575,14 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest),
|
||||
(BRHZ R16C:$rA, bb:$dest)>;
|
||||
(BRHZr16 R16C:$rA, bb:$dest)>;
|
||||
def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest),
|
||||
(BRHNZ R16C:$rA, bb:$dest)>;
|
||||
(BRHNZr16 R16C:$rA, bb:$dest)>;
|
||||
|
||||
def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest),
|
||||
(BRZ R32C:$rA, bb:$dest)>;
|
||||
(BRZr32 R32C:$rA, bb:$dest)>;
|
||||
def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest),
|
||||
(BRNZ R32C:$rA, bb:$dest)>;
|
||||
(BRNZr32 R32C:$rA, bb:$dest)>;
|
||||
|
||||
multiclass BranchCondEQ<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
|
||||
{
|
||||
@@ -3290,8 +3599,8 @@ multiclass BranchCondEQ<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
|
||||
(brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>;
|
||||
}
|
||||
|
||||
defm BRCONDeq : BranchCondEQ<seteq, BRHZ, BRZ>;
|
||||
defm BRCONDne : BranchCondEQ<setne, BRHNZ, BRNZ>;
|
||||
defm BRCONDeq : BranchCondEQ<seteq, BRHZr16, BRZr32>;
|
||||
defm BRCONDne : BranchCondEQ<setne, BRHNZr16, BRNZr32>;
|
||||
|
||||
multiclass BranchCondLGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
|
||||
{
|
||||
@@ -3308,8 +3617,8 @@ multiclass BranchCondLGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
|
||||
(brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
|
||||
}
|
||||
|
||||
defm BRCONDugt : BranchCondLGT<setugt, BRHNZ, BRNZ>;
|
||||
defm BRCONDule : BranchCondLGT<setule, BRHZ, BRZ>;
|
||||
defm BRCONDugt : BranchCondLGT<setugt, BRHNZr16, BRNZr32>;
|
||||
defm BRCONDule : BranchCondLGT<setule, BRHZr16, BRZr32>;
|
||||
|
||||
multiclass BranchCondLGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
|
||||
SPUInstr orinst32, SPUInstr brinst32>
|
||||
@@ -3335,8 +3644,8 @@ multiclass BranchCondLGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
|
||||
bb:$dest)>;
|
||||
}
|
||||
|
||||
defm BRCONDuge : BranchCondLGTEQ<setuge, ORr16, BRHNZ, ORr32, BRNZ>;
|
||||
defm BRCONDult : BranchCondLGTEQ<setult, ORr16, BRHZ, ORr32, BRZ>;
|
||||
defm BRCONDuge : BranchCondLGTEQ<setuge, ORr16, BRHNZr16, ORr32, BRNZr32>;
|
||||
defm BRCONDult : BranchCondLGTEQ<setult, ORr16, BRHZr16, ORr32, BRZr32>;
|
||||
|
||||
multiclass BranchCondGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
|
||||
{
|
||||
@@ -3353,8 +3662,8 @@ multiclass BranchCondGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
|
||||
(brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
|
||||
}
|
||||
|
||||
defm BRCONDgt : BranchCondGT<setgt, BRHNZ, BRNZ>;
|
||||
defm BRCONDle : BranchCondGT<setle, BRHZ, BRZ>;
|
||||
defm BRCONDgt : BranchCondGT<setgt, BRHNZr16, BRNZr32>;
|
||||
defm BRCONDle : BranchCondGT<setle, BRHZr16, BRZr32>;
|
||||
|
||||
multiclass BranchCondGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
|
||||
SPUInstr orinst32, SPUInstr brinst32>
|
||||
@@ -3380,8 +3689,8 @@ multiclass BranchCondGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
|
||||
bb:$dest)>;
|
||||
}
|
||||
|
||||
defm BRCONDge : BranchCondGTEQ<setge, ORr16, BRHNZ, ORr32, BRNZ>;
|
||||
defm BRCONDlt : BranchCondGTEQ<setlt, ORr16, BRHZ, ORr32, BRZ>;
|
||||
defm BRCONDge : BranchCondGTEQ<setge, ORr16, BRHNZr16, ORr32, BRNZr32>;
|
||||
defm BRCONDlt : BranchCondGTEQ<setlt, ORr16, BRHZr16, ORr32, BRZr32>;
|
||||
|
||||
let isTerminator = 1, isBarrier = 1 in {
|
||||
let isReturn = 1 in {
|
||||
@@ -3397,10 +3706,12 @@ let isTerminator = 1, isBarrier = 1 in {
|
||||
class FAInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||
RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB",
|
||||
SPrecFP, pattern>;
|
||||
|
||||
class FAVecInst<ValueType vectype>:
|
||||
FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
|
||||
[(set (vectype VECREG:$rT),
|
||||
(fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
|
||||
|
||||
multiclass SFPAdd
|
||||
{
|
||||
def v4f32: FAVecInst<v4f32>;
|
||||
@@ -4000,6 +4311,69 @@ def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))),
|
||||
def : Pat<(i32 (anyext R16C:$rSrc)),
|
||||
(ORIi16i32 R16C:$rSrc, 0)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Truncates:
|
||||
// These truncates are for the SPU's supported types (i8, i16, i32). i64 and
|
||||
// above are custom lowered.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def : Pat<(i8 (trunc GPRC:$src)),
|
||||
(ORi8_v16i8
|
||||
(SHUFBgprc GPRC:$src, GPRC:$src,
|
||||
(IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)))>;
|
||||
|
||||
def : Pat<(i8 (trunc R64C:$src)),
|
||||
(ORi8_v16i8
|
||||
(SHUFBv2i64_m32
|
||||
(ORv2i64_i64 R64C:$src),
|
||||
(ORv2i64_i64 R64C:$src),
|
||||
(IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)))>;
|
||||
|
||||
def : Pat<(i8 (trunc R32C:$src)),
|
||||
(ORi8_v16i8
|
||||
(SHUFBv4i32_m32
|
||||
(ORv4i32_i32 R32C:$src),
|
||||
(ORv4i32_i32 R32C:$src),
|
||||
(IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
|
||||
|
||||
def : Pat<(i8 (trunc R16C:$src)),
|
||||
(ORi8_v16i8
|
||||
(SHUFBv4i32_m32
|
||||
(ORv8i16_i16 R16C:$src),
|
||||
(ORv8i16_i16 R16C:$src),
|
||||
(IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
|
||||
|
||||
def : Pat<(i16 (trunc GPRC:$src)),
|
||||
(ORi16_v8i16
|
||||
(SHUFBgprc GPRC:$src, GPRC:$src,
|
||||
(IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)))>;
|
||||
|
||||
def : Pat<(i16 (trunc R64C:$src)),
|
||||
(ORi16_v8i16
|
||||
(SHUFBv2i64_m32
|
||||
(ORv2i64_i64 R64C:$src),
|
||||
(ORv2i64_i64 R64C:$src),
|
||||
(IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)))>;
|
||||
|
||||
def : Pat<(i16 (trunc R32C:$src)),
|
||||
(ORi16_v8i16
|
||||
(SHUFBv4i32_m32
|
||||
(ORv4i32_i32 R32C:$src),
|
||||
(ORv4i32_i32 R32C:$src),
|
||||
(IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)))>;
|
||||
|
||||
def : Pat<(i32 (trunc GPRC:$src)),
|
||||
(ORi32_v4i32
|
||||
(SHUFBgprc GPRC:$src, GPRC:$src,
|
||||
(IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)))>;
|
||||
|
||||
def : Pat<(i32 (trunc R64C:$src)),
|
||||
(ORi32_v4i32
|
||||
(SHUFBv2i64_m32
|
||||
(ORv2i64_i64 R64C:$src),
|
||||
(ORv2i64_i64 R64C:$src),
|
||||
(IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)))>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Address generation: SPU, like PPC, has to split addresses into high and
|
||||
// low parts in order to load them into a register.
|
||||
@@ -4047,3 +4421,5 @@ def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)),
|
||||
|
||||
// Instrinsics:
|
||||
include "CellSDKIntrinsics.td"
|
||||
// 64-bit "instructions"/support
|
||||
include "SPU64InstrInfo.td"
|
||||
|
@@ -66,6 +66,13 @@ def SPUselb_type: SDTypeProfile<1, 3, [
|
||||
def SPUvecshift_type: SDTypeProfile<1, 2, [
|
||||
SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
|
||||
|
||||
// SPU gather bits:
|
||||
// This instruction looks at each vector (word|halfword|byte) slot's low bit
|
||||
// and forms a mask in the low order bits of the first word's preferred slot.
|
||||
def SPUgatherbits_type: SDTypeProfile<1, 1, [
|
||||
/* no type constraints defined */
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Synthetic/pseudo-instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@@ -137,14 +144,17 @@ def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
|
||||
// SPU select bits instruction
|
||||
def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;
|
||||
|
||||
// SPU gather bits instruction:
|
||||
def SPUgatherbits: SDNode<"SPUISD::GATHER_BITS", SPUgatherbits_type, []>;
|
||||
|
||||
// SPU floating point interpolate
|
||||
def SPUinterpolate : SDNode<"SPUISD::FPInterp", SDTFPBinOp, []>;
|
||||
|
||||
// SPU floating point reciprocal estimate (used for fdiv)
|
||||
def SPUreciprocalEst: SDNode<"SPUISD::FPRecipEst", SDTFPUnaryOp, []>;
|
||||
|
||||
def SDTpromote_scalar: SDTypeProfile<1, 1, []>;
|
||||
def SPUpromote_scalar: SDNode<"SPUISD::PROMOTE_SCALAR", SDTpromote_scalar, []>;
|
||||
def SDTprefslot2vec: SDTypeProfile<1, 1, []>;
|
||||
def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;
|
||||
|
||||
def SPU_vec_demote : SDTypeProfile<1, 1, []>;
|
||||
def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
|
||||
|
@@ -609,15 +609,15 @@ def symbolLSA: Operand<i32> {
|
||||
let PrintMethod = "printSymbolLSA";
|
||||
}
|
||||
|
||||
// memory s7imm(reg) operaand
|
||||
def memri7 : Operand<iPTR> {
|
||||
let PrintMethod = "printMemRegImmS7";
|
||||
// Shuffle address memory operaand [s7imm(reg) d-format]
|
||||
def shufaddr : Operand<iPTR> {
|
||||
let PrintMethod = "printShufAddr";
|
||||
let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg);
|
||||
}
|
||||
|
||||
// memory s10imm(reg) operand
|
||||
def memri10 : Operand<iPTR> {
|
||||
let PrintMethod = "printMemRegImmS10";
|
||||
def dformaddr : Operand<iPTR> {
|
||||
let PrintMethod = "printDFormAddr";
|
||||
let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg);
|
||||
}
|
||||
|
||||
|
@@ -403,11 +403,6 @@ SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const
|
||||
void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
|
||||
RegScavenger *RS)
|
||||
const {
|
||||
#if 0
|
||||
// Save and clear the LR state.
|
||||
SPUFunctionInfo *FI = MF.getInfo<SPUFunctionInfo>();
|
||||
FI->setUsesLR(MF.getRegInfo().isPhysRegUsed(LR));
|
||||
#endif
|
||||
// Mark LR and SP unused, since the prolog spills them to stack and
|
||||
// we don't want anyone else to spill them for us.
|
||||
//
|
||||
|
@@ -26,6 +26,13 @@ SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) :
|
||||
PrivateGlobalPrefix = ".L";
|
||||
// This corresponds to what the gcc SPU compiler emits, for consistency.
|
||||
CStringSection = ".rodata.str";
|
||||
|
||||
// BSS section needs to be emitted as ".section"
|
||||
BSSSection = "\t.section\t.bss";
|
||||
BSSSection_ = getUnnamedSection("\t.section\t.bss",
|
||||
SectionFlags::Writeable | SectionFlags::BSS,
|
||||
true);
|
||||
|
||||
}
|
||||
|
||||
/// PreferredEHDataFormat - This hook allows the target to select data
|
||||
|
@@ -2,7 +2,7 @@
|
||||
; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
|
||||
; RUN: grep bisl %t1.s | count 7
|
||||
; RUN: grep ila %t1.s | count 1
|
||||
; RUN: grep rotqbyi %t1.s | count 4
|
||||
; RUN: grep rotqby %t1.s | count 6
|
||||
; RUN: grep lqa %t1.s | count 1
|
||||
; RUN: grep lqd %t1.s | count 12
|
||||
; RUN: grep dispatch_tab %t1.s | count 5
|
||||
|
144
test/CodeGen/CellSPU/icmp64.ll
Normal file
144
test/CodeGen/CellSPU/icmp64.ll
Normal file
@@ -0,0 +1,144 @@
|
||||
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
||||
; RUN: grep ceq %t1.s | count 4
|
||||
; RUN: grep cgti %t1.s | count 4
|
||||
; RUN: grep gb %t1.s | count 4
|
||||
; RUN: grep fsm %t1.s | count 2
|
||||
; RUN: grep xori %t1.s | count 1
|
||||
; RUN: grep selb %t1.s | count 2
|
||||
|
||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||
target triple = "spu"
|
||||
|
||||
; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
|
||||
; $3 = %arg1, $4 = %val1, $5 = %val2
|
||||
;
|
||||
; i64 integer comparisons:
|
||||
define i64 @icmp_eq_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
entry:
|
||||
%A = icmp eq i64 %arg1, %arg2
|
||||
%B = select i1 %A, i64 %val1, i64 %val2
|
||||
ret i64 %B
|
||||
}
|
||||
|
||||
define i1 @icmp_eq_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
entry:
|
||||
%A = icmp eq i64 %arg1, %arg2
|
||||
ret i1 %A
|
||||
}
|
||||
|
||||
define i64 @icmp_ne_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
entry:
|
||||
%A = icmp ne i64 %arg1, %arg2
|
||||
%B = select i1 %A, i64 %val1, i64 %val2
|
||||
ret i64 %B
|
||||
}
|
||||
|
||||
define i1 @icmp_ne_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
entry:
|
||||
%A = icmp ne i64 %arg1, %arg2
|
||||
ret i1 %A
|
||||
}
|
||||
|
||||
;; define i64 @icmp_ugt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp ugt i64 %arg1, %arg2
|
||||
;; %B = select i1 %A, i64 %val1, i64 %val2
|
||||
;; ret i64 %B
|
||||
;; }
|
||||
;;
|
||||
;; define i1 @icmp_ugt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp ugt i64 %arg1, %arg2
|
||||
;; ret i1 %A
|
||||
;; }
|
||||
;;
|
||||
;; define i64 @icmp_uge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp uge i64 %arg1, %arg2
|
||||
;; %B = select i1 %A, i64 %val1, i64 %val2
|
||||
;; ret i64 %B
|
||||
;; }
|
||||
;;
|
||||
;; define i1 @icmp_uge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp uge i64 %arg1, %arg2
|
||||
;; ret i1 %A
|
||||
;; }
|
||||
;;
|
||||
;; define i64 @icmp_ult_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp ult i64 %arg1, %arg2
|
||||
;; %B = select i1 %A, i64 %val1, i64 %val2
|
||||
;; ret i64 %B
|
||||
;; }
|
||||
;;
|
||||
;; define i1 @icmp_ult_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp ult i64 %arg1, %arg2
|
||||
;; ret i1 %A
|
||||
;; }
|
||||
;;
|
||||
;; define i64 @icmp_ule_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp ule i64 %arg1, %arg2
|
||||
;; %B = select i1 %A, i64 %val1, i64 %val2
|
||||
;; ret i64 %B
|
||||
;; }
|
||||
;;
|
||||
;; define i1 @icmp_ule_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp ule i64 %arg1, %arg2
|
||||
;; ret i1 %A
|
||||
;; }
|
||||
;;
|
||||
;; define i64 @icmp_sgt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp sgt i64 %arg1, %arg2
|
||||
;; %B = select i1 %A, i64 %val1, i64 %val2
|
||||
;; ret i64 %B
|
||||
;; }
|
||||
;;
|
||||
;; define i1 @icmp_sgt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp sgt i64 %arg1, %arg2
|
||||
;; ret i1 %A
|
||||
;; }
|
||||
;;
|
||||
;; define i64 @icmp_sge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp sge i64 %arg1, %arg2
|
||||
;; %B = select i1 %A, i64 %val1, i64 %val2
|
||||
;; ret i64 %B
|
||||
;; }
|
||||
;;
|
||||
;; define i1 @icmp_sge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp sge i64 %arg1, %arg2
|
||||
;; ret i1 %A
|
||||
;; }
|
||||
;;
|
||||
;; define i64 @icmp_slt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp slt i64 %arg1, %arg2
|
||||
;; %B = select i1 %A, i64 %val1, i64 %val2
|
||||
;; ret i64 %B
|
||||
;; }
|
||||
;;
|
||||
;; define i1 @icmp_slt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp slt i64 %arg1, %arg2
|
||||
;; ret i1 %A
|
||||
;; }
|
||||
;;
|
||||
;; define i64 @icmp_sle_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp sle i64 %arg1, %arg2
|
||||
;; %B = select i1 %A, i64 %val1, i64 %val2
|
||||
;; ret i64 %B
|
||||
;; }
|
||||
;;
|
||||
;; define i1 @icmp_sle_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
|
||||
;; entry:
|
||||
;; %A = icmp sle i64 %arg1, %arg2
|
||||
;; ret i1 %A
|
||||
;; }
|
@@ -3,8 +3,17 @@
|
||||
; RUN: grep {stqd.*16(\$3)} %t1.s | count 4
|
||||
; RUN: grep 16256 %t1.s | count 2
|
||||
; RUN: grep 16384 %t1.s | count 1
|
||||
; RUN: grep 771 %t1.s | count 4
|
||||
; RUN: grep 515 %t1.s | count 2
|
||||
; RUN: grep 1799 %t1.s | count 2
|
||||
; RUN: grep 1543 %t1.s | count 5
|
||||
; RUN: grep 1029 %t1.s | count 3
|
||||
; RUN: grep {shli.*, 4} %t1.s | count 4
|
||||
; RUN: grep stqx %t1.s | count 4
|
||||
; RUN: grep ilhu %t1.s | count 11
|
||||
; RUN: grep iohl %t1.s | count 8
|
||||
; RUN: grep shufb %t1.s | count 15
|
||||
; RUN: grep frds %t1.s | count 1
|
||||
|
||||
; ModuleID = 'stores.bc'
|
||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||
@@ -89,3 +98,54 @@ entry:
|
||||
store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %arrayidx
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test truncating stores:
|
||||
|
||||
define zeroext i8 @tstore_i16_i8(i16 signext %val, i8* %dest) nounwind {
|
||||
entry:
|
||||
%conv = trunc i16 %val to i8
|
||||
store i8 %conv, i8* %dest
|
||||
ret i8 %conv
|
||||
}
|
||||
|
||||
define zeroext i8 @tstore_i32_i8(i32 %val, i8* %dest) nounwind {
|
||||
entry:
|
||||
%conv = trunc i32 %val to i8
|
||||
store i8 %conv, i8* %dest
|
||||
ret i8 %conv
|
||||
}
|
||||
|
||||
define signext i16 @tstore_i32_i16(i32 %val, i16* %dest) nounwind {
|
||||
entry:
|
||||
%conv = trunc i32 %val to i16
|
||||
store i16 %conv, i16* %dest
|
||||
ret i16 %conv
|
||||
}
|
||||
|
||||
define zeroext i8 @tstore_i64_i8(i64 %val, i8* %dest) nounwind {
|
||||
entry:
|
||||
%conv = trunc i64 %val to i8
|
||||
store i8 %conv, i8* %dest
|
||||
ret i8 %conv
|
||||
}
|
||||
|
||||
define signext i16 @tstore_i64_i16(i64 %val, i16* %dest) nounwind {
|
||||
entry:
|
||||
%conv = trunc i64 %val to i16
|
||||
store i16 %conv, i16* %dest
|
||||
ret i16 %conv
|
||||
}
|
||||
|
||||
define i32 @tstore_i64_i32(i64 %val, i32* %dest) nounwind {
|
||||
entry:
|
||||
%conv = trunc i64 %val to i32
|
||||
store i32 %conv, i32* %dest
|
||||
ret i32 %conv
|
||||
}
|
||||
|
||||
define float @tstore_f64_f32(double %val, float* %dest) nounwind {
|
||||
entry:
|
||||
%conv = fptrunc double %val to float
|
||||
store float %conv, float* %dest
|
||||
ret float %conv
|
||||
}
|
||||
|
@@ -35,7 +35,7 @@ target triple = "spu"
|
||||
; int i2; // offset 12 [ignored]
|
||||
; unsigned char c4; // offset 16 [ignored]
|
||||
; unsigned char c5; // offset 17 [ignored]
|
||||
; unsigned char c6; // offset 18 [ignored]
|
||||
; unsigned char c6; // offset 18 (rotate left by 14 bytes to byte 3)
|
||||
; unsigned char c7; // offset 19 (no rotate, in preferred slot)
|
||||
; int i3; // offset 20 [ignored]
|
||||
; int i4; // offset 24 [ignored]
|
||||
|
@@ -1,16 +1,12 @@
|
||||
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
|
||||
; RUN: grep shufb %t1.s | count 9
|
||||
; RUN: grep shufb %t1.s | count 10
|
||||
; RUN: grep {ilhu.*1799} %t1.s | count 1
|
||||
; RUN: grep {ilhu.*771} %t1.s | count 3
|
||||
; RUN: grep {ilhu.*771} %t1.s | count 1
|
||||
; RUN: grep {ilhu.*1543} %t1.s | count 1
|
||||
; RUN: grep {ilhu.*1029} %t1.s | count 1
|
||||
; RUN: grep {ilhu.*515} %t1.s | count 1
|
||||
; RUN: grep {iohl.*1799} %t1.s | count 1
|
||||
; RUN: grep {iohl.*771} %t1.s | count 3
|
||||
; RUN: grep {iohl.*1543} %t1.s | count 2
|
||||
; RUN: grep {iohl.*515} %t1.s | count 1
|
||||
; RUN: grep xsbh %t1.s | count 6
|
||||
; RUN: grep sfh %t1.s | count 5
|
||||
; RUN: grep {ilhu.*515} %t1.s | count 2
|
||||
; RUN: grep xsbh %t1.s | count 2
|
||||
; RUN: grep sfh %t1.s | count 1
|
||||
|
||||
; ModuleID = 'trunc.bc'
|
||||
target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
|
||||
@@ -41,23 +37,22 @@ target triple = "spu"
|
||||
; ret i64 %0
|
||||
;}
|
||||
|
||||
define i8 @trunc_i64_i8(i64 %u, i8 %v) nounwind readnone {
|
||||
define <16 x i8> @trunc_i64_i8(i64 %u, <16 x i8> %v) nounwind readnone {
|
||||
entry:
|
||||
%0 = trunc i64 %u to i8
|
||||
%1 = sub i8 %0, %v
|
||||
ret i8 %1
|
||||
%tmp1 = insertelement <16 x i8> %v, i8 %0, i32 10
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
define i16 @trunc_i64_i16(i64 %u, i16 %v) nounwind readnone {
|
||||
define <8 x i16> @trunc_i64_i16(i64 %u, <8 x i16> %v) nounwind readnone {
|
||||
entry:
|
||||
%0 = trunc i64 %u to i16
|
||||
%1 = sub i16 %0, %v
|
||||
ret i16 %1
|
||||
%tmp1 = insertelement <8 x i16> %v, i16 %0, i32 6
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
define i32 @trunc_i64_i32(i64 %u, i32 %v) nounwind readnone {
|
||||
entry:
|
||||
%0 = trunc i64 %u to i32
|
||||
%1 = sub i32 %0, %v
|
||||
ret i32 %1
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
define i8 @trunc_i32_i8(i32 %u, i8 %v) nounwind readnone {
|
||||
@@ -66,16 +61,16 @@ entry:
|
||||
%1 = sub i8 %0, %v
|
||||
ret i8 %1
|
||||
}
|
||||
define i16 @trunc_i32_i16(i32 %u, i16 %v) nounwind readnone {
|
||||
define <8 x i16> @trunc_i32_i16(i32 %u, <8 x i16> %v) nounwind readnone {
|
||||
entry:
|
||||
%0 = trunc i32 %u to i16
|
||||
%1 = sub i16 %0, %v
|
||||
ret i16 %1
|
||||
%tmp1 = insertelement <8 x i16> %v, i16 %0, i32 3
|
||||
ret <8 x i16> %tmp1
|
||||
}
|
||||
|
||||
define i8 @trunc_i16_i8(i16 %u, i8 %v) nounwind readnone {
|
||||
define <16 x i8> @trunc_i16_i8(i16 %u, <16 x i8> %v) nounwind readnone {
|
||||
entry:
|
||||
%0 = trunc i16 %u to i8
|
||||
%1 = sub i8 %0, %v
|
||||
ret i8 %1
|
||||
%tmp1 = insertelement <16 x i8> %v, i8 %0, i32 5
|
||||
ret <16 x i8> %tmp1
|
||||
}
|
||||
|
69
test/CodeGen/CellSPU/useful-harnesses/i32operations.c
Normal file
69
test/CodeGen/CellSPU/useful-harnesses/i32operations.c
Normal file
@@ -0,0 +1,69 @@
|
||||
#include <stdio.h>
|
||||
|
||||
typedef unsigned int uint32_t;
|
||||
typedef int int32_t;
|
||||
|
||||
const char *boolstring(int val) {
|
||||
return val ? "true" : "false";
|
||||
}
|
||||
|
||||
int i32_eq(int32_t a, int32_t b) {
|
||||
return (a == b);
|
||||
}
|
||||
|
||||
int i32_neq(int32_t a, int32_t b) {
|
||||
return (a != b);
|
||||
}
|
||||
|
||||
int32_t i32_eq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
|
||||
return ((a == b) ? c : d);
|
||||
}
|
||||
|
||||
int32_t i32_neq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
|
||||
return ((a != b) ? c : d);
|
||||
}
|
||||
|
||||
struct pred_s {
|
||||
const char *name;
|
||||
int (*predfunc)(int32_t, int32_t);
|
||||
int (*selfunc)(int32_t, int32_t, int32_t, int32_t);
|
||||
};
|
||||
|
||||
struct pred_s preds[] = {
|
||||
{ "eq", i32_eq, i32_eq_select },
|
||||
{ "neq", i32_neq, i32_neq_select }
|
||||
};
|
||||
|
||||
int main(void) {
|
||||
int i;
|
||||
int32_t a = 1234567890;
|
||||
int32_t b = 345678901;
|
||||
int32_t c = 1234500000;
|
||||
int32_t d = 10001;
|
||||
int32_t e = 10000;
|
||||
|
||||
printf("a = %12d (0x%08x)\n", a, a);
|
||||
printf("b = %12d (0x%08x)\n", b, b);
|
||||
printf("c = %12d (0x%08x)\n", c, c);
|
||||
printf("d = %12d (0x%08x)\n", d, d);
|
||||
printf("e = %12d (0x%08x)\n", e, e);
|
||||
printf("----------------------------------------\n");
|
||||
|
||||
for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) {
|
||||
printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
|
||||
printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
|
||||
printf("a %s b = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, b)));
|
||||
printf("a %s c = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, c)));
|
||||
printf("d %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(d, e)));
|
||||
printf("e %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(e, e)));
|
||||
|
||||
printf("a %s a ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, a, c, d));
|
||||
printf("a %s a ? c : d == c (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, a, c, d) == c));
|
||||
printf("a %s b ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, b, c, d));
|
||||
printf("a %s b ? c : d == d (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, b, c, d) == d));
|
||||
|
||||
printf("----------------------------------------\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
68
test/CodeGen/CellSPU/useful-harnesses/i64operations.c
Normal file
68
test/CodeGen/CellSPU/useful-harnesses/i64operations.c
Normal file
@@ -0,0 +1,68 @@
|
||||
#include <stdio.h>
|
||||
|
||||
typedef unsigned long long int uint64_t;
|
||||
typedef long long int int64_t;
|
||||
|
||||
const char *boolstring(int val) {
|
||||
return val ? "true" : "false";
|
||||
}
|
||||
|
||||
int i64_eq(int64_t a, int64_t b) {
|
||||
return (a == b);
|
||||
}
|
||||
|
||||
int i64_neq(int64_t a, int64_t b) {
|
||||
return (a != b);
|
||||
}
|
||||
|
||||
int64_t i64_eq_select(int64_t a, int64_t b, int64_t c, int64_t d) {
|
||||
return ((a == b) ? c : d);
|
||||
}
|
||||
|
||||
int64_t i64_neq_select(int64_t a, int64_t b, int64_t c, int64_t d) {
|
||||
return ((a != b) ? c : d);
|
||||
}
|
||||
|
||||
struct pred_s {
|
||||
const char *name;
|
||||
int (*predfunc)(int64_t, int64_t);
|
||||
int64_t (*selfunc)(int64_t, int64_t, int64_t, int64_t);
|
||||
};
|
||||
|
||||
struct pred_s preds[] = {
|
||||
{ "eq", i64_eq, i64_eq_select },
|
||||
{ "neq", i64_neq, i64_neq_select }
|
||||
};
|
||||
|
||||
int main(void) {
|
||||
int i;
|
||||
int64_t a = 1234567890000LL;
|
||||
int64_t b = 2345678901234LL;
|
||||
int64_t c = 1234567890001LL;
|
||||
int64_t d = 10001LL;
|
||||
int64_t e = 10000LL;
|
||||
|
||||
printf("a = %16lld (0x%016llx)\n", a, a);
|
||||
printf("b = %16lld (0x%016llx)\n", b, b);
|
||||
printf("c = %16lld (0x%016llx)\n", c, c);
|
||||
printf("d = %16lld (0x%016llx)\n", d, d);
|
||||
printf("e = %16lld (0x%016llx)\n", e, e);
|
||||
printf("----------------------------------------\n");
|
||||
|
||||
for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) {
|
||||
printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
|
||||
printf("a %s b = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, b)));
|
||||
printf("a %s c = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, c)));
|
||||
printf("d %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(d, e)));
|
||||
printf("e %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(e, e)));
|
||||
|
||||
printf("a %s a ? c : d = %lld\n", preds[i].name, (*preds[i].selfunc)(a, a, c, d));
|
||||
printf("a %s a ? c : d == c (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, a, c, d) == c));
|
||||
printf("a %s b ? c : d = %lld\n", preds[i].name, (*preds[i].selfunc)(a, b, c, d));
|
||||
printf("a %s b ? c : d == d (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, b, c, d) == d));
|
||||
|
||||
printf("----------------------------------------\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
Reference in New Issue
Block a user