- Rename fcmp.ll test to fcmp32.ll, start adding new double tests to fcmp64.ll
- Fix select_bits.ll test
- Capitulate to the DAGCombiner and move i64 constant loads to instruction
  selection (SPUISelDAGtoDAG.cpp).

  <rant>DAGCombiner will insert all kinds of 64-bit optimizations after
  operation legalization occurs and now we have to do most of the work that
  instruction selection should be doing twice (once to determine if v2i64
  build_vector can be handled by SelectCode(), which then runs all of the
  predicates a second time to select the necessary instructions.) But,
  CellSPU is a good citizen.</rant>


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62990 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Scott Michel 2009-01-26 03:31:40 +00:00
parent 5bf4b7556f
commit c9c8b2a804
11 changed files with 556 additions and 304 deletions

View File

@ -30,8 +30,8 @@
// selb instruction definition for i64. Note that the selection mask is
// a vector, produced by various forms of FSM:
def SELBr64_cond:
SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
[/* no pattern */]>;
SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
[/* no pattern */]>;
// The generic i64 select pattern, which assumes that the comparison result
// is in a 32-bit register that contains a select mask pattern (i.e., gather

View File

@ -254,26 +254,56 @@ public:
/// getSmallIPtrImm - Return a target constant of pointer type.
inline SDValue getSmallIPtrImm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
}
}
SDNode *emitBuildVector(SDValue build_vec) {
MVT vecVT = build_vec.getValueType();
SDNode *bvNode = build_vec.getNode();
bool canBeSelected = false;
// Check to see if this vector can be represented as a CellSPU immediate
// constant.
if (vecVT == MVT::v8i16) {
if (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0) {
canBeSelected = true;
}
} else if (vecVT == MVT::v4i32) {
if ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0)
|| (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0)
|| (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0)
|| (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0)) {
canBeSelected = true;
}
} else if (vecVT == MVT::v2i64) {
if ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)
|| (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)
|| (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)) {
canBeSelected = true;
}
}
if (canBeSelected) {
return Select(build_vec);
}
// No, need to emit a constant pool spill:
std::vector<Constant*> CV;
for (size_t i = 0; i < build_vec.getNumOperands(); ++i) {
ConstantSDNode *V = dyn_cast<ConstantSDNode>(build_vec.getOperand(i));
CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
ConstantSDNode *V = dyn_cast<ConstantSDNode > (build_vec.getOperand(i));
CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue()));
}
Constant *CP = ConstantVector::get(CV);
SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
unsigned Alignment = 1 << cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
unsigned Alignment = 1 << cast<ConstantPoolSDNode > (CPIdx)->getAlignment();
SDValue CGPoolOffset =
SPU::LowerConstantPool(CPIdx, *CurDAG,
SPUtli.getSPUTargetMachine());
return SelectCode(CurDAG->getLoad(build_vec.getValueType(),
CurDAG->getEntryNode(), CGPoolOffset,
PseudoSourceValue::getConstantPool(), 0,
false, Alignment));
CurDAG->getEntryNode(), CGPoolOffset,
PseudoSourceValue::getConstantPool(), 0,
false, Alignment));
}
/// Select - Convert the specified operand from a target-independent to a
@ -289,6 +319,9 @@ public:
//! Emit the instruction sequence for i64 sra
SDNode *SelectSRAi64(SDValue &Op, MVT OpVT);
//! Emit the necessary sequence for loading i64 constants:
SDNode *SelectI64Constant(SDValue &Op, MVT OpVT);
//! Returns true if the address N is an A-form (local store) address
bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
SDValue &Index);
@ -652,7 +685,9 @@ SPUDAGToDAGISel::Select(SDValue Op) {
if (N->isMachineOpcode()) {
return NULL; // Already selected.
} else if (Opc == ISD::FrameIndex) {
}
if (Opc == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType());
SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType());
@ -669,6 +704,11 @@ SPUDAGToDAGISel::Select(SDValue Op) {
TFI, Imm0), 0);
n_ops = 2;
}
} else if (Opc == ISD::Constant && OpVT == MVT::i64) {
// Catch the i64 constants that end up here. Note: The backend doesn't
// attempt to legalize the constant (it's useless because DAGCombiner
// will insert 64-bit constants and we can't stop it).
return SelectI64Constant(Op, OpVT);
} else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
&& OpVT == MVT::i64) {
SDValue Op0 = Op.getOperand(0);
@ -745,27 +785,38 @@ SPUDAGToDAGISel::Select(SDValue Op) {
return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
Op.getOperand(0), Op.getOperand(1),
SDValue(CGLoad, 0)));
} else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
} else if (Opc == ISD::TRUNCATE) {
SDValue Op0 = Op.getOperand(0);
if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
&& OpVT == MVT::i32
&& Op0.getValueType() == MVT::i64) {
// Catch the (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32 to
// take advantage of the fact that the upper 32 bits are in the
// i32 preferred slot and avoid all kinds of other shuffle gymnastics:
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
if (CN != 0) {
unsigned shift_amt = unsigned(CN->getZExtValue());
return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, OpVT,
Op.getOperand(0), Op.getOperand(1),
SDValue(CGLoad, 0)));
} else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG));
if (shift_amt >= 32) {
SDNode *hi32 =
CurDAG->getTargetNode(SPU::ORr32_r64, OpVT, Op0.getOperand(0));
return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, OpVT,
Op.getOperand(0), Op.getOperand(1),
SDValue(CGLoad, 0)));
} else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
shift_amt -= 32;
if (shift_amt > 0) {
// Take care of the additional shift, if present:
SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32);
unsigned Opc = SPU::ROTMAIr32_i32;
if (Op0.getOpcode() == ISD::SRL)
Opc = SPU::ROTMr32;
return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
Op.getOperand(0), Op.getOperand(1),
SDValue(CGLoad, 0)));
hi32 = CurDAG->getTargetNode(Opc, OpVT, SDValue(hi32, 0), shift);
}
return hi32;
}
}
}
} else if (Opc == ISD::SHL) {
if (OpVT == MVT::i64) {
return SelectSHLi64(Op, OpVT);
@ -1046,6 +1097,70 @@ SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) {
return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(Shift, 0));
}
/*!
Do the necessary magic necessary to load a i64 constant
*/
SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT) {
ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
MVT OpVecVT = MVT::getVectorVT(OpVT, 2);
SDValue i64vec =
SPU::LowerSplat_v2i64(OpVecVT, *CurDAG, CN->getZExtValue());
// Here's where it gets interesting, because we have to parse out the
// subtree handed back in i64vec:
if (i64vec.getOpcode() == ISD::BIT_CONVERT) {
// The degenerate case where the upper and lower bits in the splat are
// identical:
SDValue Op0 = i64vec.getOperand(0);
ReplaceUses(i64vec, Op0);
return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT,
SDValue(emitBuildVector(Op0), 0));
} else if (i64vec.getOpcode() == SPUISD::SHUFB) {
SDValue lhs = i64vec.getOperand(0);
SDValue rhs = i64vec.getOperand(1);
SDValue shufmask = i64vec.getOperand(2);
if (lhs.getOpcode() == ISD::BIT_CONVERT) {
ReplaceUses(lhs, lhs.getOperand(0));
lhs = lhs.getOperand(0);
}
SDNode *lhsNode = (lhs.getNode()->isMachineOpcode()
? lhs.getNode()
: emitBuildVector(lhs));
if (rhs.getOpcode() == ISD::BIT_CONVERT) {
ReplaceUses(rhs, rhs.getOperand(0));
rhs = rhs.getOperand(0);
}
SDNode *rhsNode = (rhs.getNode()->isMachineOpcode()
? rhs.getNode()
: emitBuildVector(rhs));
if (shufmask.getOpcode() == ISD::BIT_CONVERT) {
ReplaceUses(shufmask, shufmask.getOperand(0));
shufmask = shufmask.getOperand(0);
}
SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode()
? shufmask.getNode()
: emitBuildVector(shufmask));
SDNode *shufNode =
Select(CurDAG->getNode(SPUISD::SHUFB, OpVecVT,
SDValue(lhsNode, 0), SDValue(rhsNode, 0),
SDValue(shufMaskNode, 0)));
return CurDAG->getTargetNode(SPU::ORi64_v2i64, OpVT, SDValue(shufNode, 0));
} else {
cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n";
abort();
}
}
/// createSPUISelDag - This pass converts a legalized DAG into a
/// SPU-specific DAG, ready for instruction scheduling.
///

View File

@ -17,6 +17,7 @@
#include "SPUFrameInfo.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@ -79,6 +80,43 @@ namespace {
return retval;
}
//! Expand a library call into an actual call DAG node
/*!
\note
This code is taken from SelectionDAGLegalize, since it is not exposed as
part of the LLVM SelectionDAG API.
*/
SDValue
ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
// The input chain to this libcall is the entry node of the function.
// Legalizing the call will automatically add the previous call to the
// dependence.
SDValue InChain = DAG.getEntryNode();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
MVT ArgVT = Op.getOperand(i).getValueType();
const Type *ArgTy = ArgVT.getTypeForMVT();
Entry.Node = Op.getOperand(i);
Entry.Ty = ArgTy;
Entry.isSExt = isSigned;
Entry.isZExt = !isSigned;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy());
// Splice the libcall in wherever FindInputOutputChains tells us to.
const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
CallingConv::C, false, Callee, Args, DAG);
return CallInfo.first;
}
}
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
@ -113,7 +151,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
// SPU constant load actions are custom lowered:
setOperationAction(ISD::Constant, MVT::i64, Custom);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
@ -128,10 +165,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
// SMUL_LOHI, UMUL_LOHI are not legal for Cell:
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
MVT StoreVT = (MVT::SimpleValueType) stype;
setTruncStoreAction(VT, StoreVT, Expand);
@ -179,16 +212,14 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
// If we're enabling GP optimizations, use hardware square root
// Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
// for f32!)
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
// Make sure that DAGCombine doesn't insert illegal 64-bit constants
setOperationAction(ISD::FABS, MVT::f64, Custom);
// SPU can do rotate right and left, so legalize it... but customize for i8
// because instructions don't exist.
@ -254,22 +285,21 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// Custom lower i128 -> i64 truncates
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
// SPU has a legal FP -> signed INT instruction
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
// SPU has a legal FP -> signed INT instruction for f32, but for f64, need
// to expand to a libcall, hence the custom lowering:
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
// FDIV on SPU requires custom lowering
setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
// SPU has [U|S]INT_TO_FP
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
@ -338,24 +368,23 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
MVT VT = (MVT::SimpleValueType)i;
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD , VT, Legal);
setOperationAction(ISD::SUB , VT, Legal);
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
// mul has to be custom lowered.
// TODO: v2i64 vector multiply
setOperationAction(ISD::MUL , VT, Legal);
setOperationAction(ISD::MUL, VT, Legal);
setOperationAction(ISD::AND , VT, Legal);
setOperationAction(ISD::OR , VT, Legal);
setOperationAction(ISD::XOR , VT, Legal);
setOperationAction(ISD::LOAD , VT, Legal);
setOperationAction(ISD::SELECT, VT, Legal);
setOperationAction(ISD::STORE, VT, Legal);
setOperationAction(ISD::AND, VT, Legal);
setOperationAction(ISD::OR, VT, Legal);
setOperationAction(ISD::XOR, VT, Legal);
setOperationAction(ISD::LOAD, VT, Legal);
setOperationAction(ISD::SELECT, VT, Legal);
setOperationAction(ISD::STORE, VT, Legal);
// These operations need to be expanded:
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
// Custom lower build_vector, constant pool spills, insert and
// extract vector elements:
@ -866,31 +895,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
return SDValue();
}
//! Custom lower i64 integer constants
/*!
This code inserts all of the necessary juggling that needs to occur to load
a 64-bit constant into a register.
*/
static SDValue
LowerConstant(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
if (VT == MVT::i64) {
ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
} else {
cerr << "LowerConstant: unhandled constant type "
<< VT.getMVTString()
<< "\n";
abort();
/*NOTREACHED*/
}
return SDValue();
}
//! Custom lower double precision floating point constants
static SDValue
LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
@ -1564,7 +1568,7 @@ static bool isConstantSplat(const uint64_t Bits128[2],
//! Lower a BUILD_VECTOR instruction creatively:
SDValue
SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
// If this is a vector of constants or undefs, get the bits. A bit in
// UndefBits is set if the corresponding element of the vector is an
@ -1588,7 +1592,7 @@ SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
abort();
/*NOTREACHED*/
case MVT::v4f32: {
uint32_t Value32 = SplatBits;
uint32_t Value32 = uint32_t(SplatBits);
assert(SplatSize == 4
&& "LowerBUILD_VECTOR: Unexpected floating point vector element.");
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
@ -1598,7 +1602,7 @@ SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
break;
}
case MVT::v2f64: {
uint64_t f64val = SplatBits;
uint64_t f64val = uint64_t(SplatBits);
assert(SplatSize == 8
&& "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
@ -1638,95 +1642,101 @@ SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T);
}
case MVT::v2i64: {
uint64_t val = SplatBits;
uint32_t upper = uint32_t(val >> 32);
uint32_t lower = uint32_t(val);
if (upper == lower) {
// Magic constant that can be matched by IL, ILA, et. al.
SDValue Val = DAG.getTargetConstant(val, MVT::i64);
return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
} else {
SDValue LO32;
SDValue HI32;
SmallVector<SDValue, 16> ShufBytes;
SDValue Result;
bool upper_special, lower_special;
// NOTE: This code creates common-case shuffle masks that can be easily
// detected as common expressions. It is not attempting to create highly
// specialized masks to replace any and all 0's, 0xff's and 0x80's.
// Detect if the upper or lower half is a special shuffle mask pattern:
upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
// Create lower vector if not a special pattern
if (!lower_special) {
SDValue LO32C = DAG.getConstant(lower, MVT::i32);
LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
LO32C, LO32C, LO32C, LO32C));
}
// Create upper vector if not a special pattern
if (!upper_special) {
SDValue HI32C = DAG.getConstant(upper, MVT::i32);
HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
HI32C, HI32C, HI32C, HI32C));
}
// If either upper or lower are special, then the two input operands are
// the same (basically, one of them is a "don't care")
if (lower_special)
LO32 = HI32;
if (upper_special)
HI32 = LO32;
if (lower_special && upper_special) {
// Unhappy situation... both upper and lower are special, so punt with
// a target constant:
SDValue Zero = DAG.getConstant(0, MVT::i32);
HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
Zero, Zero);
}
for (int i = 0; i < 4; ++i) {
uint64_t val = 0;
for (int j = 0; j < 4; ++j) {
SDValue V;
bool process_upper, process_lower;
val <<= 8;
process_upper = (upper_special && (i & 1) == 0);
process_lower = (lower_special && (i & 1) == 1);
if (process_upper || process_lower) {
if ((process_upper && upper == 0)
|| (process_lower && lower == 0))
val |= 0x80;
else if ((process_upper && upper == 0xffffffff)
|| (process_lower && lower == 0xffffffff))
val |= 0xc0;
else if ((process_upper && upper == 0x80000000)
|| (process_lower && lower == 0x80000000))
val |= (j == 0 ? 0xe0 : 0x80);
} else
val |= i * 4 + j + ((i & 1) * 16);
}
ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
}
return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
&ShufBytes[0], ShufBytes.size()));
}
return SPU::LowerSplat_v2i64(VT, DAG, SplatBits);
}
}
return SDValue();
}
SDValue
SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) {
uint32_t upper = uint32_t(SplatVal >> 32);
uint32_t lower = uint32_t(SplatVal);
if (upper == lower) {
// Magic constant that can be matched by IL, ILA, et. al.
SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
return DAG.getNode(ISD::BIT_CONVERT, OpVT,
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
Val, Val, Val, Val));
} else {
SDValue LO32;
SDValue HI32;
SmallVector<SDValue, 16> ShufBytes;
SDValue Result;
bool upper_special, lower_special;
// NOTE: This code creates common-case shuffle masks that can be easily
// detected as common expressions. It is not attempting to create highly
// specialized masks to replace any and all 0's, 0xff's and 0x80's.
// Detect if the upper or lower half is a special shuffle mask pattern:
upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
// Create lower vector if not a special pattern
if (!lower_special) {
SDValue LO32C = DAG.getConstant(lower, MVT::i32);
LO32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
LO32C, LO32C, LO32C, LO32C));
}
// Create upper vector if not a special pattern
if (!upper_special) {
SDValue HI32C = DAG.getConstant(upper, MVT::i32);
HI32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
HI32C, HI32C, HI32C, HI32C));
}
// If either upper or lower are special, then the two input operands are
// the same (basically, one of them is a "don't care")
if (lower_special)
LO32 = HI32;
if (upper_special)
HI32 = LO32;
if (lower_special && upper_special) {
// Unhappy situation... both upper and lower are special, so punt with
// a target constant:
SDValue Zero = DAG.getConstant(0, MVT::i32);
HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
Zero, Zero);
}
for (int i = 0; i < 4; ++i) {
uint64_t val = 0;
for (int j = 0; j < 4; ++j) {
SDValue V;
bool process_upper, process_lower;
val <<= 8;
process_upper = (upper_special && (i & 1) == 0);
process_lower = (lower_special && (i & 1) == 1);
if (process_upper || process_lower) {
if ((process_upper && upper == 0)
|| (process_lower && lower == 0))
val |= 0x80;
else if ((process_upper && upper == 0xffffffff)
|| (process_lower && lower == 0xffffffff))
val |= 0xc0;
else if ((process_upper && upper == 0x80000000)
|| (process_lower && lower == 0x80000000))
val |= (j == 0 ? 0xe0 : 0x80);
} else
val |= i * 4 + j + ((i & 1) * 16);
}
ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
}
return DAG.getNode(SPUISD::SHUFB, OpVT, HI32, LO32,
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
&ShufBytes[0], ShufBytes.size()));
}
}
/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
/// which the Cell can operate. The code inspects V3 to ascertain whether the
/// permutation vector, V3, is monotonically increasing with one "exception"
@ -2384,81 +2394,180 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
//! Lower ISD::FABS
//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
/*!
DAGCombine does the same basic reduction: convert the double to i64 and mask
off the sign bit. Unfortunately, DAGCombine inserts the i64 constant, which
CellSPU has to legalize. Hence, the custom lowering.
f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
All conversions to i64 are expanded to a libcall.
*/
static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) {
static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
SPUTargetLowering &TLI) {
MVT OpVT = Op.getValueType();
MVT IntVT(MVT::i64);
SDValue Op0 = Op.getOperand(0);
MVT Op0VT = Op0.getValueType();
assert(OpVT == MVT::f64 && "LowerFABS: expecting MVT::f64!\n");
if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
|| OpVT == MVT::i64) {
// Convert f32 / f64 to i32 / i64 via libcall.
RTLIB::Libcall LC =
(Op.getOpcode() == ISD::FP_TO_SINT)
? RTLIB::getFPTOSINT(Op0VT, OpVT)
: RTLIB::getFPTOUINT(Op0VT, OpVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
SDValue Dummy;
return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
}
SDValue iABS =
DAG.getNode(ISD::AND, IntVT,
DAG.getNode(ISD::BIT_CONVERT, IntVT, Op0),
DAG.getConstant(~IntVT.getIntegerVTSignBit(), IntVT));
return SDValue();
}
return DAG.getNode(ISD::BIT_CONVERT, MVT::f64, iABS);
//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
/*!
i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
All conversions from i64 are expanded to a libcall.
*/
static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
SPUTargetLowering &TLI) {
MVT OpVT = Op.getValueType();
SDValue Op0 = Op.getOperand(0);
MVT Op0VT = Op0.getValueType();
if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
|| Op0VT == MVT::i64) {
// Convert i32, i64 to f64 via libcall:
RTLIB::Libcall LC =
(Op.getOpcode() == ISD::SINT_TO_FP)
? RTLIB::getSINTTOFP(Op0VT, OpVT)
: RTLIB::getUINTTOFP(Op0VT, OpVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
SDValue Dummy;
return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
}
return SDValue();
}
//! Lower ISD::SETCC
/*!
This handles MVT::f64 (double floating point) condition lowering
*/
static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
SDValue lhs = Op.getOperand(0);
SDValue rhs = Op.getOperand(1);
CondCodeSDNode *CC = dyn_cast<CondCodeSDNode > (Op.getOperand(2));
MVT lhsVT = lhs.getValueType();
SDValue posNaN = DAG.getConstant(0x7ff0000000000001ULL, MVT::i64);
assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
MVT IntVT(MVT::i64);
// Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
// selected to a NOP:
SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, lhs);
SDValue lhsHi32 =
DAG.getNode(ISD::TRUNCATE, MVT::i32,
DAG.getNode(ISD::SRL, IntVT,
i64lhs, DAG.getConstant(32, MVT::i32)));
SDValue lhsHi32abs =
DAG.getNode(ISD::AND, MVT::i32,
lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
SDValue lhsLo32 =
DAG.getNode(ISD::TRUNCATE, MVT::i32, i64lhs);
// SETO and SETUO only use the lhs operand:
if (CC->get() == ISD::SETO) {
// Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
// SETUO
APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
return DAG.getNode(ISD::XOR, ccResultVT,
DAG.getSetCC(ccResultVT,
lhs, DAG.getConstantFP(0.0, lhsVT),
ISD::SETUO),
DAG.getConstant(ccResultAllOnes, ccResultVT));
} else if (CC->get() == ISD::SETUO) {
// Evaluates to true if Op0 is [SQ]NaN
return DAG.getNode(ISD::AND, ccResultVT,
DAG.getSetCC(ccResultVT,
lhsHi32abs,
DAG.getConstant(0x7ff00000, MVT::i32),
ISD::SETGE),
DAG.getSetCC(ccResultVT,
lhsLo32,
DAG.getConstant(0, MVT::i32),
ISD::SETGT));
}
SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, rhs);
SDValue rhsHi32 =
DAG.getNode(ISD::TRUNCATE, MVT::i32,
DAG.getNode(ISD::SRL, IntVT,
i64rhs, DAG.getConstant(32, MVT::i32)));
// If a value is negative, subtract from the sign magnitude constant:
SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
// Convert the sign-magnitude representation into 2's complement:
SDValue lhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT,
lhsHi32, DAG.getConstant(31, MVT::i32));
SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64lhs);
SDValue lhsSelect =
DAG.getNode(ISD::SELECT, IntVT,
lhsSelectMask, lhsSignMag2TC, i64lhs);
SDValue rhsSelectMask = DAG.getNode(ISD::SRA, ccResultVT,
rhsHi32, DAG.getConstant(31, MVT::i32));
SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, IntVT, signMag2TC, i64rhs);
SDValue rhsSelect =
DAG.getNode(ISD::SELECT, IntVT,
rhsSelectMask, rhsSignMag2TC, i64rhs);
unsigned compareOp;
switch (CC->get()) {
case ISD::SETOEQ:
case ISD::SETOGT:
case ISD::SETOGE:
case ISD::SETOLT:
case ISD::SETOLE:
case ISD::SETONE:
cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
abort();
break;
case ISD::SETO: {
SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
SDValue i64lhs =
DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETLT);
}
case ISD::SETUO: {
SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
SDValue i64lhs =
DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETGE);
}
case ISD::SETUEQ:
compareOp = ISD::SETEQ; break;
case ISD::SETOGT:
case ISD::SETUGT:
compareOp = ISD::SETGT; break;
case ISD::SETOGE:
case ISD::SETUGE:
compareOp = ISD::SETGE; break;
case ISD::SETOLT:
case ISD::SETULT:
compareOp = ISD::SETLT; break;
case ISD::SETOLE:
case ISD::SETULE:
compareOp = ISD::SETLE; break;
case ISD::SETUNE:
case ISD::SETONE:
compareOp = ISD::SETNE; break;
default:
cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
abort();
break;
}
return SDValue();
SDValue result =
DAG.getSetCC(ccResultVT, lhsSelect, rhsSelect, (ISD::CondCode) compareOp);
if ((CC->get() & 0x8) == 0) {
// Ordered comparison:
SDValue lhsNaN = DAG.getSetCC(ccResultVT,
lhs, DAG.getConstantFP(0.0, MVT::f64),
ISD::SETO);
SDValue rhsNaN = DAG.getSetCC(ccResultVT,
rhs, DAG.getConstantFP(0.0, MVT::f64),
ISD::SETO);
SDValue ordered = DAG.getNode(ISD::AND, ccResultVT, lhsNaN, rhsNaN);
result = DAG.getNode(ISD::AND, ccResultVT, ordered, result);
}
return result;
}
//! Lower ISD::SELECT_CC
@ -2566,8 +2675,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
case ISD::JumpTable:
return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
case ISD::Constant:
return LowerConstant(Op, DAG);
case ISD::ConstantFP:
return LowerConstantFP(Op, DAG);
case ISD::FORMAL_ARGUMENTS:
@ -2590,12 +2697,17 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
break;
}
case ISD::FABS:
return LowerFABS(Op, DAG);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return LowerFP_TO_INT(Op, DAG, *this);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return LowerINT_TO_FP(Op, DAG, *this);
// Vector-related lowering.
case ISD::BUILD_VECTOR:
return SPU::LowerBUILD_VECTOR(Op, DAG);
return LowerBUILD_VECTOR(Op, DAG);
case ISD::SCALAR_TO_VECTOR:
return LowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:

View File

@ -61,7 +61,7 @@ namespace llvm {
};
}
//! Utility functions specific to CellSPU-only:
//! Utility functions specific to CellSPU:
namespace SPU {
SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
MVT ValueType);
@ -78,7 +78,7 @@ namespace llvm {
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
const SPUTargetMachine &TM);
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
SDValue LowerSplat_v2i64(MVT OpVT, SelectionDAG &DAG, uint64_t splat);
SDValue getBorrowGenerateShufMask(SelectionDAG &DAG);
SDValue getCarryGenerateShufMask(SelectionDAG &DAG);

View File

@ -155,13 +155,13 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
case SPU::ORr8_r32:
case SPU::ORr32_r16:
case SPU::ORr32_r8:
case SPU::ORr32_r64:
case SPU::ORr16_r64:
case SPU::ORr8_r64:
case SPU::ORr64_r32:
case SPU::ORr64_r16:
case SPU::ORr64_r8:
*/
case SPU::ORr64_r32:
case SPU::ORr32_r64:
case SPU::ORf32_r32:
case SPU::ORr32_f32:
case SPU::ORf64_r64:

View File

@ -1259,6 +1259,9 @@ multiclass BitwiseAnd
def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
[/* Intentionally does not match a pattern */]>;
def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB),
[/* Intentionally does not match a pattern */]>;
// Could use v4i32, but won't for clarity
def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[/* Intentionally does not match a pattern */]>;
@ -1525,17 +1528,17 @@ multiclass BitwiseOr
// Conversion from R32C to register
def r32_r16: ORCvtFormR32Reg<R16C>;
def r32_r8: ORCvtFormR32Reg<R8C>;
// Conversion from register to R64C:
def r32_r64: ORCvtFormR64Reg<R32C>;
def r16_r64: ORCvtFormR64Reg<R16C>;
def r8_r64: ORCvtFormR64Reg<R8C>;
// Conversion from R64C to register
def r64_r32: ORCvtFormRegR64<R32C>;
def r64_r16: ORCvtFormRegR64<R16C>;
def r64_r8: ORCvtFormRegR64<R8C>;
*/
// Conversion to register from R64C:
def r32_r64: ORCvtFormR64Reg<R32C>;
// def r16_r64: ORCvtFormR64Reg<R16C>;
// def r8_r64: ORCvtFormR64Reg<R8C>;
// Conversion to R64C from register
def r64_r32: ORCvtFormRegR64<R32C>;
// def r64_r16: ORCvtFormRegR64<R16C>;
// def r64_r8: ORCvtFormRegR64<R8C>;
// bitconvert patterns:
def r32_f32: ORCvtFormR32Reg<R32FP,
@ -1910,11 +1913,11 @@ class SELBInst<dag OOL, dag IOL, list<dag> pattern>:
RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC",
IntegerOp, pattern>;
class SELBVecInst<ValueType vectype>:
class SELBVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
[(set (vectype VECREG:$rT),
(or (and (vectype VECREG:$rC), (vectype VECREG:$rB)),
(and (vnot (vectype VECREG:$rC)),
(and (vnot_frag (vectype VECREG:$rC)),
(vectype VECREG:$rA))))]>;
class SELBVecVCondInst<ValueType vectype>:
@ -1947,7 +1950,7 @@ multiclass SelectBits
def v16i8: SELBVecInst<v16i8>;
def v8i16: SELBVecInst<v8i16>;
def v4i32: SELBVecInst<v4i32>;
def v2i64: SELBVecInst<v2i64>;
def v2i64: SELBVecInst<v2i64, vnot_conv>;
def r128: SELBRegInst<GPRC>;
def r64: SELBRegInst<R64C>;
@ -4321,6 +4324,13 @@ def : Pat<(fabs (v4f32 VECREG:$rA)),
(ANDfabsvec (v4f32 VECREG:$rA),
(v4f32 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
def : Pat<(fabs R64FP:$rA),
(ANDfabs64 R64FP:$rA, (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f))>;
def : Pat<(fabs (v2f64 VECREG:$rA)),
(ANDfabsvec (v2f64 VECREG:$rA),
(v2f64 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
//===----------------------------------------------------------------------===//
// Hint for branch instructions:
//===----------------------------------------------------------------------===//

View File

@ -1,22 +1,23 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
; RUN: grep fceq %t1.s | count 1
; RUN: grep fcmeq %t1.s | count 1
;
; This file includes standard floating point arithmetic instructions
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"
; Exercise the floating point comparison operators for f32:
declare double @fabs(double)
declare float @fabsf(float)
define i1 @fcmp_eq(float %arg1, float %arg2) {
%A = fcmp oeq float %arg1, %arg2 ; <float> [#uses=1]
%A = fcmp oeq float %arg1, %arg2
ret i1 %A
}
define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
%A = call float @fabsf(float %arg1) ; <float> [#uses=1]
%B = call float @fabsf(float %arg2) ; <float> [#uses=1]
%C = fcmp oeq float %A, %B ; <float> [#uses=1]
ret i1 %C
%1 = call float @fabsf(float %arg1)
%2 = call float @fabsf(float %arg2)
%3 = fcmp oeq float %1, %2
ret i1 %3
}

View File

@ -0,0 +1,7 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
define i1 @fcmp_eq_setcc_f64(double %arg1, double %arg2) nounwind {
entry:
%A = fcmp oeq double %arg1, %arg2
ret i1 %A
}

View File

@ -1,9 +1,10 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
; RUN: grep fsmbi %t1.s | count 2
; RUN: grep fsmbi %t1.s | count 3
; RUN: grep 32768 %t1.s | count 2
; RUN: grep xor %t1.s | count 4
; RUN: grep and %t1.s | count 4
; RUN: grep andbi %t1.s | count 2
; RUN: grep and %t1.s | count 5
; RUN: grep andbi %t1.s | count 3
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"
@ -33,11 +34,11 @@ declare double @fabs(double)
declare float @fabsf(float)
define double @fabs_dp(double %X) {
%Y = call double @fabs( double %X ) ; <double> [#uses=1]
%Y = call double @fabs( double %X )
ret double %Y
}
define float @fabs_sp(float %X) {
%Y = call float @fabsf( float %X ) ; <float> [#uses=1]
%Y = call float @fabsf( float %X )
ret float %Y
}

View File

@ -1,5 +1,5 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
; RUN: grep selb %t1.s | count 280
; RUN: grep selb %t1.s | count 56
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"
@ -9,7 +9,7 @@ target triple = "spu"
;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
; (or (and rC, rB), (and (not rC), rA))
define <2 x i64> @selb_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
define <2 x i64> @selectbits_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
%C = and <2 x i64> %rC, %rB
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
%B = and <2 x i64> %A, %rA
@ -18,7 +18,7 @@ define <2 x i64> @selb_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
}
; (or (and rB, rC), (and (not rC), rA))
define <2 x i64> @selb_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
define <2 x i64> @selectbits_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
%C = and <2 x i64> %rB, %rC
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
%B = and <2 x i64> %A, %rA
@ -27,7 +27,7 @@ define <2 x i64> @selb_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
}
; (or (and (not rC), rA), (and rB, rC))
define <2 x i64> @selb_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
define <2 x i64> @selectbits_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
%B = and <2 x i64> %A, %rA
%C = and <2 x i64> %rB, %rC
@ -36,7 +36,7 @@ define <2 x i64> @selb_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
}
; (or (and (not rC), rA), (and rC, rB))
define <2 x i64> @selb_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
define <2 x i64> @selectbits_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
%B = and <2 x i64> %A, %rA
%C = and <2 x i64> %rC, %rB
@ -45,7 +45,7 @@ define <2 x i64> @selb_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
}
; (or (and rC, rB), (and rA, (not rC)))
define <2 x i64> @selb_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
define <2 x i64> @selectbits_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
%C = and <2 x i64> %rC, %rB
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
%B = and <2 x i64> %rA, %A
@ -54,7 +54,7 @@ define <2 x i64> @selb_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
}
; (or (and rB, rC), (and rA, (not rC)))
define <2 x i64> @selb_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
define <2 x i64> @selectbits_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
%C = and <2 x i64> %rB, %rC
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
%B = and <2 x i64> %rA, %A
@ -63,7 +63,7 @@ define <2 x i64> @selb_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
}
; (or (and rA, (not rC)), (and rB, rC))
define <2 x i64> @selb_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
define <2 x i64> @selectbits_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
%B = and <2 x i64> %rA, %A
%C = and <2 x i64> %rB, %rC
@ -72,7 +72,7 @@ define <2 x i64> @selb_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
}
; (or (and rA, (not rC)), (and rC, rB))
define <2 x i64> @selb_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
define <2 x i64> @selectbits_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
%A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
%B = and <2 x i64> %rA, %A
%C = and <2 x i64> %rC, %rB
@ -85,7 +85,7 @@ define <2 x i64> @selb_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
; (or (and rC, rB), (and (not rC), rA))
define <4 x i32> @selb_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
define <4 x i32> @selectbits_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
%C = and <4 x i32> %rC, %rB
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
%B = and <4 x i32> %A, %rA
@ -94,7 +94,7 @@ define <4 x i32> @selb_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
}
; (or (and rB, rC), (and (not rC), rA))
define <4 x i32> @selb_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
define <4 x i32> @selectbits_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
%C = and <4 x i32> %rB, %rC
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
%B = and <4 x i32> %A, %rA
@ -103,7 +103,7 @@ define <4 x i32> @selb_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
}
; (or (and (not rC), rA), (and rB, rC))
define <4 x i32> @selb_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
define <4 x i32> @selectbits_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
%B = and <4 x i32> %A, %rA
%C = and <4 x i32> %rB, %rC
@ -112,7 +112,7 @@ define <4 x i32> @selb_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
}
; (or (and (not rC), rA), (and rC, rB))
define <4 x i32> @selb_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
define <4 x i32> @selectbits_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
%B = and <4 x i32> %A, %rA
%C = and <4 x i32> %rC, %rB
@ -121,7 +121,7 @@ define <4 x i32> @selb_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
}
; (or (and rC, rB), (and rA, (not rC)))
define <4 x i32> @selb_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
define <4 x i32> @selectbits_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
%C = and <4 x i32> %rC, %rB
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
%B = and <4 x i32> %rA, %A
@ -130,7 +130,7 @@ define <4 x i32> @selb_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
}
; (or (and rB, rC), (and rA, (not rC)))
define <4 x i32> @selb_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
define <4 x i32> @selectbits_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
%C = and <4 x i32> %rB, %rC
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
%B = and <4 x i32> %rA, %A
@ -139,7 +139,7 @@ define <4 x i32> @selb_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
}
; (or (and rA, (not rC)), (and rB, rC))
define <4 x i32> @selb_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
define <4 x i32> @selectbits_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
%B = and <4 x i32> %rA, %A
%C = and <4 x i32> %rB, %rC
@ -148,7 +148,7 @@ define <4 x i32> @selb_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
}
; (or (and rA, (not rC)), (and rC, rB))
define <4 x i32> @selb_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
define <4 x i32> @selectbits_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
%A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
%B = and <4 x i32> %rA, %A
%C = and <4 x i32> %rC, %rB
@ -161,7 +161,7 @@ define <4 x i32> @selb_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
; (or (and rC, rB), (and (not rC), rA))
define <8 x i16> @selb_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
define <8 x i16> @selectbits_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
%C = and <8 x i16> %rC, %rB
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1 >
@ -171,7 +171,7 @@ define <8 x i16> @selb_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
}
; (or (and rB, rC), (and (not rC), rA))
define <8 x i16> @selb_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
define <8 x i16> @selectbits_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
%C = and <8 x i16> %rB, %rC
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1 >
@ -181,7 +181,7 @@ define <8 x i16> @selb_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
}
; (or (and (not rC), rA), (and rB, rC))
define <8 x i16> @selb_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
define <8 x i16> @selectbits_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1 >
%B = and <8 x i16> %A, %rA
@ -191,7 +191,7 @@ define <8 x i16> @selb_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
}
; (or (and (not rC), rA), (and rC, rB))
define <8 x i16> @selb_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
define <8 x i16> @selectbits_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1 >
%B = and <8 x i16> %A, %rA
@ -201,7 +201,7 @@ define <8 x i16> @selb_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
}
; (or (and rC, rB), (and rA, (not rC)))
define <8 x i16> @selb_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
define <8 x i16> @selectbits_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
%C = and <8 x i16> %rC, %rB
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1 >
@ -211,7 +211,7 @@ define <8 x i16> @selb_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
}
; (or (and rB, rC), (and rA, (not rC)))
define <8 x i16> @selb_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
define <8 x i16> @selectbits_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
%C = and <8 x i16> %rB, %rC
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1 >
@ -221,7 +221,7 @@ define <8 x i16> @selb_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
}
; (or (and rA, (not rC)), (and rB, rC))
define <8 x i16> @selb_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
define <8 x i16> @selectbits_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1 >
%B = and <8 x i16> %rA, %A
@ -231,7 +231,7 @@ define <8 x i16> @selb_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
}
; (or (and rA, (not rC)), (and rC, rB))
define <8 x i16> @selb_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
define <8 x i16> @selectbits_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
%A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1 >
%B = and <8 x i16> %rA, %A
@ -245,7 +245,7 @@ define <8 x i16> @selb_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
; (or (and rC, rB), (and (not rC), rA))
define <16 x i8> @selb_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
define <16 x i8> @selectbits_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
%C = and <16 x i8> %rC, %rB
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
@ -257,7 +257,7 @@ define <16 x i8> @selb_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
}
; (or (and rB, rC), (and (not rC), rA))
define <16 x i8> @selb_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
define <16 x i8> @selectbits_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
%C = and <16 x i8> %rB, %rC
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
@ -269,7 +269,7 @@ define <16 x i8> @selb_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
}
; (or (and (not rC), rA), (and rB, rC))
define <16 x i8> @selb_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
define <16 x i8> @selectbits_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
@ -281,7 +281,7 @@ define <16 x i8> @selb_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
}
; (or (and (not rC), rA), (and rC, rB))
define <16 x i8> @selb_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
define <16 x i8> @selectbits_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
@ -293,7 +293,7 @@ define <16 x i8> @selb_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
}
; (or (and rC, rB), (and rA, (not rC)))
define <16 x i8> @selb_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
define <16 x i8> @selectbits_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
%C = and <16 x i8> %rC, %rB
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
@ -305,7 +305,7 @@ define <16 x i8> @selb_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
}
; (or (and rB, rC), (and rA, (not rC)))
define <16 x i8> @selb_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
define <16 x i8> @selectbits_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
%C = and <16 x i8> %rB, %rC
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
@ -317,7 +317,7 @@ define <16 x i8> @selb_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
}
; (or (and rA, (not rC)), (and rB, rC))
define <16 x i8> @selb_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
define <16 x i8> @selectbits_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
@ -329,7 +329,7 @@ define <16 x i8> @selb_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
}
; (or (and rA, (not rC)), (and rC, rB))
define <16 x i8> @selb_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
define <16 x i8> @selectbits_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
%A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
@ -345,7 +345,7 @@ define <16 x i8> @selb_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
; (or (and rC, rB), (and (not rC), rA))
define i32 @selb_i32_01(i32 %rA, i32 %rB, i32 %rC) {
define i32 @selectbits_i32_01(i32 %rA, i32 %rB, i32 %rC) {
%C = and i32 %rC, %rB
%A = xor i32 %rC, -1
%B = and i32 %A, %rA
@ -354,7 +354,7 @@ define i32 @selb_i32_01(i32 %rA, i32 %rB, i32 %rC) {
}
; (or (and rB, rC), (and (not rC), rA))
define i32 @selb_i32_02(i32 %rA, i32 %rB, i32 %rC) {
define i32 @selectbits_i32_02(i32 %rA, i32 %rB, i32 %rC) {
%C = and i32 %rB, %rC
%A = xor i32 %rC, -1
%B = and i32 %A, %rA
@ -363,7 +363,7 @@ define i32 @selb_i32_02(i32 %rA, i32 %rB, i32 %rC) {
}
; (or (and (not rC), rA), (and rB, rC))
define i32 @selb_i32_03(i32 %rA, i32 %rB, i32 %rC) {
define i32 @selectbits_i32_03(i32 %rA, i32 %rB, i32 %rC) {
%A = xor i32 %rC, -1
%B = and i32 %A, %rA
%C = and i32 %rB, %rC
@ -372,7 +372,7 @@ define i32 @selb_i32_03(i32 %rA, i32 %rB, i32 %rC) {
}
; (or (and (not rC), rA), (and rC, rB))
define i32 @selb_i32_04(i32 %rA, i32 %rB, i32 %rC) {
define i32 @selectbits_i32_04(i32 %rA, i32 %rB, i32 %rC) {
%A = xor i32 %rC, -1
%B = and i32 %A, %rA
%C = and i32 %rC, %rB
@ -381,7 +381,7 @@ define i32 @selb_i32_04(i32 %rA, i32 %rB, i32 %rC) {
}
; (or (and rC, rB), (and rA, (not rC)))
define i32 @selb_i32_05(i32 %rA, i32 %rB, i32 %rC) {
define i32 @selectbits_i32_05(i32 %rA, i32 %rB, i32 %rC) {
%C = and i32 %rC, %rB
%A = xor i32 %rC, -1
%B = and i32 %rA, %A
@ -390,7 +390,7 @@ define i32 @selb_i32_05(i32 %rA, i32 %rB, i32 %rC) {
}
; (or (and rB, rC), (and rA, (not rC)))
define i32 @selb_i32_06(i32 %rA, i32 %rB, i32 %rC) {
define i32 @selectbits_i32_06(i32 %rA, i32 %rB, i32 %rC) {
%C = and i32 %rB, %rC
%A = xor i32 %rC, -1
%B = and i32 %rA, %A
@ -399,7 +399,7 @@ define i32 @selb_i32_06(i32 %rA, i32 %rB, i32 %rC) {
}
; (or (and rA, (not rC)), (and rB, rC))
define i32 @selb_i32_07(i32 %rA, i32 %rB, i32 %rC) {
define i32 @selectbits_i32_07(i32 %rA, i32 %rB, i32 %rC) {
%A = xor i32 %rC, -1
%B = and i32 %rA, %A
%C = and i32 %rB, %rC
@ -408,7 +408,7 @@ define i32 @selb_i32_07(i32 %rA, i32 %rB, i32 %rC) {
}
; (or (and rA, (not rC)), (and rC, rB))
define i32 @selb_i32_08(i32 %rA, i32 %rB, i32 %rC) {
define i32 @selectbits_i32_08(i32 %rA, i32 %rB, i32 %rC) {
%A = xor i32 %rC, -1
%B = and i32 %rA, %A
%C = and i32 %rC, %rB
@ -421,7 +421,7 @@ define i32 @selb_i32_08(i32 %rA, i32 %rB, i32 %rC) {
;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
; (or (and rC, rB), (and (not rC), rA))
define i16 @selb_i16_01(i16 %rA, i16 %rB, i16 %rC) {
define i16 @selectbits_i16_01(i16 %rA, i16 %rB, i16 %rC) {
%C = and i16 %rC, %rB
%A = xor i16 %rC, -1
%B = and i16 %A, %rA
@ -430,7 +430,7 @@ define i16 @selb_i16_01(i16 %rA, i16 %rB, i16 %rC) {
}
; (or (and rB, rC), (and (not rC), rA))
define i16 @selb_i16_02(i16 %rA, i16 %rB, i16 %rC) {
define i16 @selectbits_i16_02(i16 %rA, i16 %rB, i16 %rC) {
%C = and i16 %rB, %rC
%A = xor i16 %rC, -1
%B = and i16 %A, %rA
@ -439,7 +439,7 @@ define i16 @selb_i16_02(i16 %rA, i16 %rB, i16 %rC) {
}
; (or (and (not rC), rA), (and rB, rC))
define i16 @selb_i16_03(i16 %rA, i16 %rB, i16 %rC) {
define i16 @selectbits_i16_03(i16 %rA, i16 %rB, i16 %rC) {
%A = xor i16 %rC, -1
%B = and i16 %A, %rA
%C = and i16 %rB, %rC
@ -448,7 +448,7 @@ define i16 @selb_i16_03(i16 %rA, i16 %rB, i16 %rC) {
}
; (or (and (not rC), rA), (and rC, rB))
define i16 @selb_i16_04(i16 %rA, i16 %rB, i16 %rC) {
define i16 @selectbits_i16_04(i16 %rA, i16 %rB, i16 %rC) {
%A = xor i16 %rC, -1
%B = and i16 %A, %rA
%C = and i16 %rC, %rB
@ -457,7 +457,7 @@ define i16 @selb_i16_04(i16 %rA, i16 %rB, i16 %rC) {
}
; (or (and rC, rB), (and rA, (not rC)))
define i16 @selb_i16_05(i16 %rA, i16 %rB, i16 %rC) {
define i16 @selectbits_i16_05(i16 %rA, i16 %rB, i16 %rC) {
%C = and i16 %rC, %rB
%A = xor i16 %rC, -1
%B = and i16 %rA, %A
@ -466,7 +466,7 @@ define i16 @selb_i16_05(i16 %rA, i16 %rB, i16 %rC) {
}
; (or (and rB, rC), (and rA, (not rC)))
define i16 @selb_i16_06(i16 %rA, i16 %rB, i16 %rC) {
define i16 @selectbits_i16_06(i16 %rA, i16 %rB, i16 %rC) {
%C = and i16 %rB, %rC
%A = xor i16 %rC, -1
%B = and i16 %rA, %A
@ -475,7 +475,7 @@ define i16 @selb_i16_06(i16 %rA, i16 %rB, i16 %rC) {
}
; (or (and rA, (not rC)), (and rB, rC))
define i16 @selb_i16_07(i16 %rA, i16 %rB, i16 %rC) {
define i16 @selectbits_i16_07(i16 %rA, i16 %rB, i16 %rC) {
%A = xor i16 %rC, -1
%B = and i16 %rA, %A
%C = and i16 %rB, %rC
@ -484,7 +484,7 @@ define i16 @selb_i16_07(i16 %rA, i16 %rB, i16 %rC) {
}
; (or (and rA, (not rC)), (and rC, rB))
define i16 @selb_i16_08(i16 %rA, i16 %rB, i16 %rC) {
define i16 @selectbits_i16_08(i16 %rA, i16 %rB, i16 %rC) {
%A = xor i16 %rC, -1
%B = and i16 %rA, %A
%C = and i16 %rC, %rB
@ -497,7 +497,7 @@ define i16 @selb_i16_08(i16 %rA, i16 %rB, i16 %rC) {
;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
; (or (and rC, rB), (and (not rC), rA))
define i8 @selb_i8_01(i8 %rA, i8 %rB, i8 %rC) {
define i8 @selectbits_i8_01(i8 %rA, i8 %rB, i8 %rC) {
%C = and i8 %rC, %rB
%A = xor i8 %rC, -1
%B = and i8 %A, %rA
@ -506,7 +506,7 @@ define i8 @selb_i8_01(i8 %rA, i8 %rB, i8 %rC) {
}
; (or (and rB, rC), (and (not rC), rA))
define i8 @selb_i8_02(i8 %rA, i8 %rB, i8 %rC) {
define i8 @selectbits_i8_02(i8 %rA, i8 %rB, i8 %rC) {
%C = and i8 %rB, %rC
%A = xor i8 %rC, -1
%B = and i8 %A, %rA
@ -515,7 +515,7 @@ define i8 @selb_i8_02(i8 %rA, i8 %rB, i8 %rC) {
}
; (or (and (not rC), rA), (and rB, rC))
define i8 @selb_i8_03(i8 %rA, i8 %rB, i8 %rC) {
define i8 @selectbits_i8_03(i8 %rA, i8 %rB, i8 %rC) {
%A = xor i8 %rC, -1
%B = and i8 %A, %rA
%C = and i8 %rB, %rC
@ -524,7 +524,7 @@ define i8 @selb_i8_03(i8 %rA, i8 %rB, i8 %rC) {
}
; (or (and (not rC), rA), (and rC, rB))
define i8 @selb_i8_04(i8 %rA, i8 %rB, i8 %rC) {
define i8 @selectbits_i8_04(i8 %rA, i8 %rB, i8 %rC) {
%A = xor i8 %rC, -1
%B = and i8 %A, %rA
%C = and i8 %rC, %rB
@ -533,7 +533,7 @@ define i8 @selb_i8_04(i8 %rA, i8 %rB, i8 %rC) {
}
; (or (and rC, rB), (and rA, (not rC)))
define i8 @selb_i8_05(i8 %rA, i8 %rB, i8 %rC) {
define i8 @selectbits_i8_05(i8 %rA, i8 %rB, i8 %rC) {
%C = and i8 %rC, %rB
%A = xor i8 %rC, -1
%B = and i8 %rA, %A
@ -542,7 +542,7 @@ define i8 @selb_i8_05(i8 %rA, i8 %rB, i8 %rC) {
}
; (or (and rB, rC), (and rA, (not rC)))
define i8 @selb_i8_06(i8 %rA, i8 %rB, i8 %rC) {
define i8 @selectbits_i8_06(i8 %rA, i8 %rB, i8 %rC) {
%C = and i8 %rB, %rC
%A = xor i8 %rC, -1
%B = and i8 %rA, %A
@ -551,7 +551,7 @@ define i8 @selb_i8_06(i8 %rA, i8 %rB, i8 %rC) {
}
; (or (and rA, (not rC)), (and rB, rC))
define i8 @selb_i8_07(i8 %rA, i8 %rB, i8 %rC) {
define i8 @selectbits_i8_07(i8 %rA, i8 %rB, i8 %rC) {
%A = xor i8 %rC, -1
%B = and i8 %rA, %A
%C = and i8 %rB, %rC
@ -560,7 +560,7 @@ define i8 @selb_i8_07(i8 %rA, i8 %rB, i8 %rC) {
}
; (or (and rA, (not rC)), (and rC, rB))
define i8 @selb_i8_08(i8 %rA, i8 %rB, i8 %rC) {
define i8 @selectbits_i8_08(i8 %rA, i8 %rB, i8 %rC) {
%A = xor i8 %rC, -1
%B = and i8 %rA, %A
%C = and i8 %rC, %rB

View File

@ -275,3 +275,9 @@ define i64 @ashr_i64_3(i64 %arg1, i32 %shift) {
%2 = ashr i64 %arg1, %1
ret i64 %2
}
define i32 @hi32_i64(i64 %arg) {
%1 = lshr i64 %arg, 32
%2 = trunc i64 %1 to i32
ret i32 %2
}