mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-07-26 05:25:47 +00:00
Fix memory access lowering on SPU, adding
support for the case where alignment<value size. These cases were silently miscompiled before this patch. Now they are overly verbose -especially storing is- and any front-end should still avoid misaligned memory accesses as much as possible. The bit juggling algorithm added here probably has some room for improvement still. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118889 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -42,41 +42,12 @@ using namespace llvm;
|
|||||||
namespace {
|
namespace {
|
||||||
std::map<unsigned, const char *> node_names;
|
std::map<unsigned, const char *> node_names;
|
||||||
|
|
||||||
//! EVT mapping to useful data for Cell SPU
|
// Byte offset of the preferred slot (counted from the MSB)
|
||||||
struct valtype_map_s {
|
int prefslotOffset(EVT VT) {
|
||||||
EVT valtype;
|
int retval=0;
|
||||||
int prefslot_byte;
|
if (VT==MVT::i1) retval=3;
|
||||||
};
|
if (VT==MVT::i8) retval=3;
|
||||||
|
if (VT==MVT::i16) retval=2;
|
||||||
const valtype_map_s valtype_map[] = {
|
|
||||||
{ MVT::i1, 3 },
|
|
||||||
{ MVT::i8, 3 },
|
|
||||||
{ MVT::i16, 2 },
|
|
||||||
{ MVT::i32, 0 },
|
|
||||||
{ MVT::f32, 0 },
|
|
||||||
{ MVT::i64, 0 },
|
|
||||||
{ MVT::f64, 0 },
|
|
||||||
{ MVT::i128, 0 }
|
|
||||||
};
|
|
||||||
|
|
||||||
const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
|
|
||||||
|
|
||||||
const valtype_map_s *getValueTypeMapEntry(EVT VT) {
|
|
||||||
const valtype_map_s *retval = 0;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < n_valtype_map; ++i) {
|
|
||||||
if (valtype_map[i].valtype == VT) {
|
|
||||||
retval = valtype_map + i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef NDEBUG
|
|
||||||
if (retval == 0) {
|
|
||||||
report_fatal_error("getValueTypeMapEntry returns NULL for " +
|
|
||||||
Twine(VT.getEVTString()));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
@@ -440,9 +411,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
|
|||||||
setOperationAction(ISD::AND, VT, Legal);
|
setOperationAction(ISD::AND, VT, Legal);
|
||||||
setOperationAction(ISD::OR, VT, Legal);
|
setOperationAction(ISD::OR, VT, Legal);
|
||||||
setOperationAction(ISD::XOR, VT, Legal);
|
setOperationAction(ISD::XOR, VT, Legal);
|
||||||
setOperationAction(ISD::LOAD, VT, Legal);
|
setOperationAction(ISD::LOAD, VT, Custom);
|
||||||
setOperationAction(ISD::SELECT, VT, Legal);
|
setOperationAction(ISD::SELECT, VT, Legal);
|
||||||
setOperationAction(ISD::STORE, VT, Legal);
|
setOperationAction(ISD::STORE, VT, Custom);
|
||||||
|
|
||||||
// These operations need to be expanded:
|
// These operations need to be expanded:
|
||||||
setOperationAction(ISD::SDIV, VT, Expand);
|
setOperationAction(ISD::SDIV, VT, Expand);
|
||||||
@@ -503,8 +474,8 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
|||||||
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
|
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
|
||||||
node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
|
node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
|
||||||
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
|
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
|
||||||
node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
|
node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
|
||||||
node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
|
node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
|
||||||
node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
|
node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
|
||||||
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
|
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
|
||||||
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
|
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
|
||||||
@@ -573,11 +544,26 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
EVT OutVT = Op.getValueType();
|
EVT OutVT = Op.getValueType();
|
||||||
ISD::LoadExtType ExtType = LN->getExtensionType();
|
ISD::LoadExtType ExtType = LN->getExtensionType();
|
||||||
unsigned alignment = LN->getAlignment();
|
unsigned alignment = LN->getAlignment();
|
||||||
const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
|
int pso = prefslotOffset(InVT);
|
||||||
DebugLoc dl = Op.getDebugLoc();
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
|
EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
|
||||||
|
(128 / InVT.getSizeInBits()));
|
||||||
|
|
||||||
|
// two sanity checks
|
||||||
|
assert( LN->getAddressingMode() == ISD::UNINDEXED
|
||||||
|
&& "we should get only UNINDEXED adresses");
|
||||||
|
// clean aligned loads can be selected as-is
|
||||||
|
if (InVT.getSizeInBits() == 128 && alignment == 16)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// Get pointerinfos to the memory chunk(s) that contain the data to load
|
||||||
|
uint64_t mpi_offset = LN->getPointerInfo().Offset;
|
||||||
|
mpi_offset -= mpi_offset%16;
|
||||||
|
MachinePointerInfo lowMemPtr( LN->getPointerInfo().V, mpi_offset);
|
||||||
|
MachinePointerInfo highMemPtr( LN->getPointerInfo().V, mpi_offset+16);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
switch (LN->getAddressingMode()) {
|
|
||||||
case ISD::UNINDEXED: {
|
|
||||||
SDValue result;
|
SDValue result;
|
||||||
SDValue basePtr = LN->getBasePtr();
|
SDValue basePtr = LN->getBasePtr();
|
||||||
SDValue rotate;
|
SDValue rotate;
|
||||||
@@ -591,7 +577,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
&& (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
|
&& (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
|
||||||
// Known offset into basePtr
|
// Known offset into basePtr
|
||||||
int64_t offset = CN->getSExtValue();
|
int64_t offset = CN->getSExtValue();
|
||||||
int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
|
int64_t rotamt = int64_t((offset & 0xf) - pso);
|
||||||
|
|
||||||
if (rotamt < 0)
|
if (rotamt < 0)
|
||||||
rotamt += 16;
|
rotamt += 16;
|
||||||
@@ -611,14 +597,14 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
&& basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
|
&& basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
|
||||||
// Plain aligned a-form address: rotate into preferred slot
|
// Plain aligned a-form address: rotate into preferred slot
|
||||||
// Same for (SPUindirect (SPUhi ...), (SPUlo ...))
|
// Same for (SPUindirect (SPUhi ...), (SPUlo ...))
|
||||||
int64_t rotamt = -vtm->prefslot_byte;
|
int64_t rotamt = -pso;
|
||||||
if (rotamt < 0)
|
if (rotamt < 0)
|
||||||
rotamt += 16;
|
rotamt += 16;
|
||||||
rotate = DAG.getConstant(rotamt, MVT::i16);
|
rotate = DAG.getConstant(rotamt, MVT::i16);
|
||||||
} else {
|
} else {
|
||||||
// Offset the rotate amount by the basePtr and the preferred slot
|
// Offset the rotate amount by the basePtr and the preferred slot
|
||||||
// byte offset
|
// byte offset
|
||||||
int64_t rotamt = -vtm->prefslot_byte;
|
int64_t rotamt = -pso;
|
||||||
if (rotamt < 0)
|
if (rotamt < 0)
|
||||||
rotamt += 16;
|
rotamt += 16;
|
||||||
rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
|
rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
|
||||||
@@ -658,20 +644,23 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
// byte offset
|
// byte offset
|
||||||
rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
|
rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
|
||||||
basePtr,
|
basePtr,
|
||||||
DAG.getConstant(-vtm->prefslot_byte, PtrVT));
|
DAG.getConstant(-pso, PtrVT));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Re-emit as a v16i8 vector load
|
// Do the load as a i128 to allow possible shifting
|
||||||
result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
|
SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
|
||||||
LN->getPointerInfo(),
|
lowMemPtr,
|
||||||
LN->isVolatile(), LN->isNonTemporal(), 16);
|
LN->isVolatile(), LN->isNonTemporal(), 16);
|
||||||
|
|
||||||
|
// When the size is not greater than alignment we get all data with just
|
||||||
|
// one load
|
||||||
|
if (alignment >= InVT.getSizeInBits()/8) {
|
||||||
// Update the chain
|
// Update the chain
|
||||||
the_chain = result.getValue(1);
|
the_chain = low.getValue(1);
|
||||||
|
|
||||||
// Rotate into the preferred slot:
|
// Rotate into the preferred slot:
|
||||||
result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
|
result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
|
||||||
result.getValue(0), rotate);
|
low.getValue(0), rotate);
|
||||||
|
|
||||||
// Convert the loaded v16i8 vector to the appropriate vector type
|
// Convert the loaded v16i8 vector to the appropriate vector type
|
||||||
// specified by the operand:
|
// specified by the operand:
|
||||||
@@ -679,7 +668,56 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
InVT, (128 / InVT.getSizeInBits()));
|
InVT, (128 / InVT.getSizeInBits()));
|
||||||
result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
|
result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
|
||||||
DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
|
DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
|
||||||
|
}
|
||||||
|
// When alignment is less than the size, we might need (known only at
|
||||||
|
// run-time) two loads
|
||||||
|
// TODO: if the memory address is composed only from constants, we have
|
||||||
|
// extra kowledge, and might avoid the second load
|
||||||
|
else {
|
||||||
|
// storage position offset from lower 16 byte aligned memory chunk
|
||||||
|
SDValue offset = DAG.getNode( ISD::AND, dl, MVT::i32,
|
||||||
|
basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
|
||||||
|
// 16 - offset
|
||||||
|
SDValue offset_compl = DAG.getNode( ISD::SUB, dl, MVT::i32,
|
||||||
|
DAG.getConstant( 16, MVT::i32),
|
||||||
|
offset );
|
||||||
|
// get a registerfull of ones. (this implementation is a workaround: LLVM
|
||||||
|
// cannot handle 128 bit signed int constants)
|
||||||
|
SDValue ones = DAG.getConstant( -1, MVT::v4i32 );
|
||||||
|
ones = DAG.getNode( ISD::BIT_CONVERT, dl, MVT::i128, ones);
|
||||||
|
|
||||||
|
SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
|
||||||
|
DAG.getNode(ISD::ADD, dl, PtrVT,
|
||||||
|
basePtr,
|
||||||
|
DAG.getConstant(16, PtrVT)),
|
||||||
|
highMemPtr,
|
||||||
|
LN->isVolatile(), LN->isNonTemporal(), 16);
|
||||||
|
|
||||||
|
the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
|
||||||
|
high.getValue(1));
|
||||||
|
|
||||||
|
// Shift the (possible) high part right to compensate the misalignemnt.
|
||||||
|
// if there is no highpart (i.e. value is i64 and offset is 4), this
|
||||||
|
// will zero out the high value.
|
||||||
|
high = DAG.getNode( SPUISD::SRL_BYTES, dl, MVT::i128, high,
|
||||||
|
DAG.getNode( ISD::SUB, dl, MVT::i32,
|
||||||
|
DAG.getConstant( 16, MVT::i32),
|
||||||
|
offset
|
||||||
|
));
|
||||||
|
|
||||||
|
// Shift the low similarily
|
||||||
|
// TODO: add SPUISD::SHL_BYTES
|
||||||
|
low = DAG.getNode( SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
|
||||||
|
|
||||||
|
// Merge the two parts
|
||||||
|
result = DAG.getNode( ISD::BIT_CONVERT, dl, vecVT,
|
||||||
|
DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
|
||||||
|
|
||||||
|
if (!InVT.isVector()) {
|
||||||
|
result = DAG.getNode( SPUISD::VEC2PREFSLOT, dl, InVT, result );
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
// Handle extending loads by extending the scalar result:
|
// Handle extending loads by extending the scalar result:
|
||||||
if (ExtType == ISD::SEXTLOAD) {
|
if (ExtType == ISD::SEXTLOAD) {
|
||||||
result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
|
result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
|
||||||
@@ -703,21 +741,6 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
|
result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
|
||||||
retops, sizeof(retops) / sizeof(retops[0]));
|
retops, sizeof(retops) / sizeof(retops[0]));
|
||||||
return result;
|
return result;
|
||||||
}
|
|
||||||
case ISD::PRE_INC:
|
|
||||||
case ISD::PRE_DEC:
|
|
||||||
case ISD::POST_INC:
|
|
||||||
case ISD::POST_DEC:
|
|
||||||
case ISD::LAST_INDEXED_MODE:
|
|
||||||
{
|
|
||||||
report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
|
|
||||||
"than UNINDEXED\n" +
|
|
||||||
Twine((unsigned)LN->getAddressingMode()));
|
|
||||||
/*NOTREACHED*/
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return SDValue();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Custom lower stores for CellSPU
|
/// Custom lower stores for CellSPU
|
||||||
@@ -735,12 +758,24 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
||||||
DebugLoc dl = Op.getDebugLoc();
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
unsigned alignment = SN->getAlignment();
|
unsigned alignment = SN->getAlignment();
|
||||||
|
SDValue result;
|
||||||
|
EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
|
||||||
|
(128 / StVT.getSizeInBits()));
|
||||||
|
// Get pointerinfos to the memory chunk(s) that contain the data to load
|
||||||
|
uint64_t mpi_offset = SN->getPointerInfo().Offset;
|
||||||
|
mpi_offset -= mpi_offset%16;
|
||||||
|
MachinePointerInfo lowMemPtr( SN->getPointerInfo().V, mpi_offset);
|
||||||
|
MachinePointerInfo highMemPtr( SN->getPointerInfo().V, mpi_offset+16);
|
||||||
|
|
||||||
|
|
||||||
|
// two sanity checks
|
||||||
|
assert( SN->getAddressingMode() == ISD::UNINDEXED
|
||||||
|
&& "we should get only UNINDEXED adresses");
|
||||||
|
// clean aligned loads can be selected as-is
|
||||||
|
if (StVT.getSizeInBits() == 128 && alignment == 16)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
|
||||||
switch (SN->getAddressingMode()) {
|
|
||||||
case ISD::UNINDEXED: {
|
|
||||||
// The vector type we really want to load from the 16-byte chunk.
|
|
||||||
EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
|
|
||||||
VT, (128 / VT.getSizeInBits()));
|
|
||||||
|
|
||||||
SDValue alignLoadVec;
|
SDValue alignLoadVec;
|
||||||
SDValue basePtr = SN->getBasePtr();
|
SDValue basePtr = SN->getBasePtr();
|
||||||
@@ -811,17 +846,17 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
DAG.getConstant(0, PtrVT));
|
DAG.getConstant(0, PtrVT));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load the memory to which to store.
|
// Load the lower part of the memory to which to store.
|
||||||
alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr,
|
SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
|
||||||
SN->getPointerInfo(),
|
lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
|
||||||
SN->isVolatile(), SN->isNonTemporal(), 16);
|
|
||||||
|
|
||||||
|
// if we don't need to store over the 16 byte boundary, one store suffices
|
||||||
|
if (alignment >= StVT.getSizeInBits()/8) {
|
||||||
// Update the chain
|
// Update the chain
|
||||||
the_chain = alignLoadVec.getValue(1);
|
the_chain = low.getValue(1);
|
||||||
|
|
||||||
LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
|
LoadSDNode *LN = cast<LoadSDNode>(low);
|
||||||
SDValue theValue = SN->getValue();
|
SDValue theValue = SN->getValue();
|
||||||
SDValue result;
|
|
||||||
|
|
||||||
if (StVT != VT
|
if (StVT != VT
|
||||||
&& (theValue.getOpcode() == ISD::AssertZext
|
&& (theValue.getOpcode() == ISD::AssertZext
|
||||||
@@ -849,14 +884,14 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
theValue);
|
theValue);
|
||||||
|
|
||||||
result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
|
result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
|
||||||
vectorizeOp, alignLoadVec,
|
vectorizeOp, low,
|
||||||
DAG.getNode(ISD::BIT_CONVERT, dl,
|
DAG.getNode(ISD::BIT_CONVERT, dl,
|
||||||
MVT::v4i32, insertEltOp));
|
MVT::v4i32, insertEltOp));
|
||||||
|
|
||||||
result = DAG.getStore(the_chain, dl, result, basePtr,
|
result = DAG.getStore(the_chain, dl, result, basePtr,
|
||||||
LN->getPointerInfo(),
|
lowMemPtr,
|
||||||
LN->isVolatile(), LN->isNonTemporal(),
|
LN->isVolatile(), LN->isNonTemporal(),
|
||||||
LN->getAlignment());
|
16);
|
||||||
|
|
||||||
#if 0 && !defined(NDEBUG)
|
#if 0 && !defined(NDEBUG)
|
||||||
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
|
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
|
||||||
@@ -869,24 +904,106 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
|
|||||||
DAG.setRoot(currentRoot);
|
DAG.setRoot(currentRoot);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return result;
|
|
||||||
/*UNREACHED*/
|
|
||||||
}
|
}
|
||||||
case ISD::PRE_INC:
|
// do the store when it might cross the 16 byte memory access boundary.
|
||||||
case ISD::PRE_DEC:
|
else {
|
||||||
case ISD::POST_INC:
|
// TODO issue a warning if SN->isVolatile()== true? This is likely not
|
||||||
case ISD::POST_DEC:
|
// what the user wanted.
|
||||||
case ISD::LAST_INDEXED_MODE:
|
|
||||||
{
|
// address offset from nearest lower 16byte alinged address
|
||||||
report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
|
SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
|
||||||
"than UNINDEXED\n" +
|
SN->getBasePtr(),
|
||||||
Twine((unsigned)SN->getAddressingMode()));
|
DAG.getConstant(0xf, MVT::i32));
|
||||||
/*NOTREACHED*/
|
// 16 - offset
|
||||||
|
SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
|
||||||
|
DAG.getConstant( 16, MVT::i32),
|
||||||
|
offset);
|
||||||
|
SDValue hi_shift = DAG.getNode(ISD::SUB, dl, MVT::i32,
|
||||||
|
DAG.getConstant( VT.getSizeInBits()/8,
|
||||||
|
MVT::i32),
|
||||||
|
offset_compl);
|
||||||
|
// 16 - sizeof(Value)
|
||||||
|
SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
|
||||||
|
DAG.getConstant( 16, MVT::i32),
|
||||||
|
DAG.getConstant( VT.getSizeInBits()/8,
|
||||||
|
MVT::i32));
|
||||||
|
// get a registerfull of ones
|
||||||
|
SDValue ones = DAG.getConstant(-1, MVT::v4i32);
|
||||||
|
ones = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, ones);
|
||||||
|
|
||||||
|
// Create the 128 bit masks that have ones where the data to store is
|
||||||
|
// located.
|
||||||
|
SDValue lowmask, himask;
|
||||||
|
// if the value to store don't fill up the an entire 128 bits, zero
|
||||||
|
// out the last bits of the mask so that only the value we want to store
|
||||||
|
// is masked.
|
||||||
|
// this is e.g. in the case of store i32, align 2
|
||||||
|
if (!VT.isVector()){
|
||||||
|
Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
|
||||||
|
lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
|
||||||
|
lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
|
||||||
|
surplus);
|
||||||
|
Value = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, Value);
|
||||||
|
Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
else {
|
||||||
|
lowmask = ones;
|
||||||
|
Value = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, Value);
|
||||||
|
}
|
||||||
|
// this will zero, if there are no data that goes to the high quad
|
||||||
|
himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
|
||||||
|
offset_compl);
|
||||||
|
lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
|
||||||
|
offset);
|
||||||
|
|
||||||
|
// Load in the old data and zero out the parts that will be overwritten with
|
||||||
|
// the new data to store.
|
||||||
|
SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
|
||||||
|
DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
|
||||||
|
DAG.getConstant( 16, PtrVT)),
|
||||||
|
highMemPtr,
|
||||||
|
SN->isVolatile(), SN->isNonTemporal(), 16);
|
||||||
|
the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
|
||||||
|
hi.getValue(1));
|
||||||
|
|
||||||
|
low = DAG.getNode(ISD::AND, dl, MVT::i128,
|
||||||
|
DAG.getNode( ISD::BIT_CONVERT, dl, MVT::i128, low),
|
||||||
|
DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
|
||||||
|
hi = DAG.getNode(ISD::AND, dl, MVT::i128,
|
||||||
|
DAG.getNode( ISD::BIT_CONVERT, dl, MVT::i128, hi),
|
||||||
|
DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
|
||||||
|
|
||||||
|
// Shift the Value to store into place. rlow contains the parts that go to
|
||||||
|
// the lower memory chunk, rhi has the parts that go to the upper one.
|
||||||
|
SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
|
||||||
|
rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
|
||||||
|
SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
|
||||||
|
offset_compl);
|
||||||
|
|
||||||
|
// Merge the old data and the new data and store the results
|
||||||
|
// Need to convert vectors here to integer as 'OR'ing floats assert
|
||||||
|
rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
|
||||||
|
DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, low),
|
||||||
|
DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, rlow));
|
||||||
|
rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
|
||||||
|
DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, hi),
|
||||||
|
DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, rhi));
|
||||||
|
|
||||||
|
low = DAG.getStore(the_chain, dl, rlow, basePtr,
|
||||||
|
lowMemPtr,
|
||||||
|
SN->isVolatile(), SN->isNonTemporal(), 16);
|
||||||
|
hi = DAG.getStore(the_chain, dl, rhi,
|
||||||
|
DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
|
||||||
|
DAG.getConstant( 16, PtrVT)),
|
||||||
|
highMemPtr,
|
||||||
|
SN->isVolatile(), SN->isNonTemporal(), 16);
|
||||||
|
result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
|
||||||
|
hi.getValue(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
|
||||||
return SDValue();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//! Generate the address of a constant pool entry.
|
//! Generate the address of a constant pool entry.
|
||||||
@@ -2002,7 +2119,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
|
|||||||
DAG.getConstant(scaleShift, MVT::i32));
|
DAG.getConstant(scaleShift, MVT::i32));
|
||||||
}
|
}
|
||||||
|
|
||||||
vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
|
vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
|
||||||
|
|
||||||
// Replicate the bytes starting at byte 0 across the entire vector (for
|
// Replicate the bytes starting at byte 0 across the entire vector (for
|
||||||
// consistency with the notion of a unified register set)
|
// consistency with the notion of a unified register set)
|
||||||
@@ -2911,8 +3028,8 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case SPUISD::SHLQUAD_L_BITS:
|
case SPUISD::SHL_BITS:
|
||||||
case SPUISD::SHLQUAD_L_BYTES:
|
case SPUISD::SHL_BYTES:
|
||||||
case SPUISD::ROTBYTES_LEFT: {
|
case SPUISD::ROTBYTES_LEFT: {
|
||||||
SDValue Op1 = N->getOperand(1);
|
SDValue Op1 = N->getOperand(1);
|
||||||
|
|
||||||
|
@@ -41,8 +41,9 @@ namespace llvm {
|
|||||||
CNTB, ///< Count leading ones in bytes
|
CNTB, ///< Count leading ones in bytes
|
||||||
PREFSLOT2VEC, ///< Promote scalar->vector
|
PREFSLOT2VEC, ///< Promote scalar->vector
|
||||||
VEC2PREFSLOT, ///< Extract element 0
|
VEC2PREFSLOT, ///< Extract element 0
|
||||||
SHLQUAD_L_BITS, ///< Rotate quad left, by bits
|
SHL_BITS, ///< Shift quad left, by bits
|
||||||
SHLQUAD_L_BYTES, ///< Rotate quad left, by bytes
|
SHL_BYTES, ///< Shift quad left, by bytes
|
||||||
|
SRL_BYTES, ///< Shift quad right, by bytes. Insert zeros.
|
||||||
VEC_ROTL, ///< Vector rotate left
|
VEC_ROTL, ///< Vector rotate left
|
||||||
VEC_ROTR, ///< Vector rotate right
|
VEC_ROTR, ///< Vector rotate right
|
||||||
ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
|
ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
|
||||||
|
@@ -2369,10 +2369,13 @@ class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>:
|
|||||||
RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB",
|
RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB",
|
||||||
RotateShift, pattern>;
|
RotateShift, pattern>;
|
||||||
|
|
||||||
class ROTQBYVecInst<ValueType vectype>:
|
class ROTQBYGenInst<ValueType type, RegisterClass rc>:
|
||||||
ROTQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
|
ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB),
|
||||||
[(set (vectype VECREG:$rT),
|
[(set (type rc:$rT),
|
||||||
(SPUrotbytes_left (vectype VECREG:$rA), R32C:$rB))]>;
|
(SPUrotbytes_left (type rc:$rA), R32C:$rB))]>;
|
||||||
|
|
||||||
|
class ROTQBYVecInst<ValueType type>:
|
||||||
|
ROTQBYGenInst<type, VECREG>;
|
||||||
|
|
||||||
multiclass RotateQuadLeftByBytes
|
multiclass RotateQuadLeftByBytes
|
||||||
{
|
{
|
||||||
@@ -2382,6 +2385,7 @@ multiclass RotateQuadLeftByBytes
|
|||||||
def v4f32: ROTQBYVecInst<v4f32>;
|
def v4f32: ROTQBYVecInst<v4f32>;
|
||||||
def v2i64: ROTQBYVecInst<v2i64>;
|
def v2i64: ROTQBYVecInst<v2i64>;
|
||||||
def v2f64: ROTQBYVecInst<v2f64>;
|
def v2f64: ROTQBYVecInst<v2f64>;
|
||||||
|
def i128: ROTQBYGenInst<i128, GPRC>;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm ROTQBY: RotateQuadLeftByBytes;
|
defm ROTQBY: RotateQuadLeftByBytes;
|
||||||
@@ -2394,10 +2398,13 @@ class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
|
|||||||
RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val",
|
RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val",
|
||||||
RotateShift, pattern>;
|
RotateShift, pattern>;
|
||||||
|
|
||||||
|
class ROTQBYIGenInst<ValueType type, RegisterClass rclass>:
|
||||||
|
ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val),
|
||||||
|
[(set (type rclass:$rT),
|
||||||
|
(SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>;
|
||||||
|
|
||||||
class ROTQBYIVecInst<ValueType vectype>:
|
class ROTQBYIVecInst<ValueType vectype>:
|
||||||
ROTQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
|
ROTQBYIGenInst<vectype, VECREG>;
|
||||||
[(set (vectype VECREG:$rT),
|
|
||||||
(SPUrotbytes_left (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
|
|
||||||
|
|
||||||
multiclass RotateQuadByBytesImm
|
multiclass RotateQuadByBytesImm
|
||||||
{
|
{
|
||||||
@@ -2407,6 +2414,7 @@ multiclass RotateQuadByBytesImm
|
|||||||
def v4f32: ROTQBYIVecInst<v4f32>;
|
def v4f32: ROTQBYIVecInst<v4f32>;
|
||||||
def v2i64: ROTQBYIVecInst<v2i64>;
|
def v2i64: ROTQBYIVecInst<v2i64>;
|
||||||
def vfi64: ROTQBYIVecInst<v2f64>;
|
def vfi64: ROTQBYIVecInst<v2f64>;
|
||||||
|
def i128: ROTQBYIGenInst<i128, GPRC>;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm ROTQBYI: RotateQuadByBytesImm;
|
defm ROTQBYI: RotateQuadByBytesImm;
|
||||||
@@ -2661,6 +2669,10 @@ multiclass RotateQuadBytes
|
|||||||
|
|
||||||
defm ROTQMBY : RotateQuadBytes;
|
defm ROTQMBY : RotateQuadBytes;
|
||||||
|
|
||||||
|
def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB),
|
||||||
|
(ROTQMBYr128 GPRC:$rA,
|
||||||
|
(SFIr32 R32C:$rB, 0))>;
|
||||||
|
|
||||||
class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
|
class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
|
||||||
RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
|
RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
|
||||||
RotateShift, pattern>;
|
RotateShift, pattern>;
|
||||||
@@ -2749,6 +2761,11 @@ multiclass RotateMaskQuadByBits
|
|||||||
|
|
||||||
defm ROTQMBI: RotateMaskQuadByBits;
|
defm ROTQMBI: RotateMaskQuadByBits;
|
||||||
|
|
||||||
|
def : Pat<(srl GPRC:$rA, R32C:$rB),
|
||||||
|
(ROTQMBIr128 GPRC:$rA,
|
||||||
|
(SFIr32 R32C:$rB, 0))>;
|
||||||
|
|
||||||
|
|
||||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
// Rotate quad and mask by bits, immediate
|
// Rotate quad and mask by bits, immediate
|
||||||
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
|
||||||
|
@@ -83,10 +83,6 @@ def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
|
|||||||
// SPUISelLowering.h):
|
// SPUISelLowering.h):
|
||||||
def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
|
def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
|
||||||
|
|
||||||
// Shift left quadword by bits and bytes
|
|
||||||
def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>;
|
|
||||||
def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>;
|
|
||||||
|
|
||||||
// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
|
// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
|
||||||
def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>;
|
def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>;
|
||||||
def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>;
|
def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>;
|
||||||
@@ -105,6 +101,12 @@ def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
|
|||||||
def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
|
def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
|
||||||
SPUvecshift_type>;
|
SPUvecshift_type>;
|
||||||
|
|
||||||
|
// Shift entire quad left by bytes/bits. Zeros are shifted in on the right
|
||||||
|
// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128
|
||||||
|
def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>;
|
||||||
|
def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>;
|
||||||
|
def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>;
|
||||||
|
|
||||||
// SPU form select mask for bytes, immediate
|
// SPU form select mask for bytes, immediate
|
||||||
def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
|
def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
|
||||||
|
|
||||||
|
@@ -26,7 +26,7 @@ define ccc i32 @test_regs_and_stack( %paramstruct %prm, i32 %stackprm )
|
|||||||
|
|
||||||
define ccc %paramstruct @test_return( i32 %param, %paramstruct %prm )
|
define ccc %paramstruct @test_return( i32 %param, %paramstruct %prm )
|
||||||
{
|
{
|
||||||
;CHECK: lqd $75, 80($sp)
|
;CHECK: lqd {{\$[0-9]+}}, 80($sp)
|
||||||
;CHECK-NOT: ori {{\$[0-9]+, \$[0-9]+, 0}}
|
;CHECK-NOT: ori {{\$[0-9]+, \$[0-9]+, 0}}
|
||||||
;CHECK: lr $3, $4
|
;CHECK: lr $3, $4
|
||||||
ret %paramstruct %prm
|
ret %paramstruct %prm
|
||||||
|
@@ -38,3 +38,15 @@ define <4 x float> @load_undef(){
|
|||||||
%val = load <4 x float>* undef
|
%val = load <4 x float>* undef
|
||||||
ret <4 x float> %val
|
ret <4 x float> %val
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;check that 'misaligned' loads that may span two memory chunks
|
||||||
|
;have two loads. Don't check for the bitmanipulation, as that
|
||||||
|
;might change with improved algorithms or scheduling
|
||||||
|
define i32 @load_misaligned( i32* %ptr ){
|
||||||
|
;CHECK: load_misaligned
|
||||||
|
;CHECK: lqd
|
||||||
|
;CHECK: lqd
|
||||||
|
;CHECK: bi $lr
|
||||||
|
%rv = load i32* %ptr, align 2
|
||||||
|
ret i32 %rv
|
||||||
|
}
|
||||||
|
@@ -14,6 +14,7 @@
|
|||||||
; RUN: grep iohl %t1.s | count 8
|
; RUN: grep iohl %t1.s | count 8
|
||||||
; RUN: grep shufb %t1.s | count 15
|
; RUN: grep shufb %t1.s | count 15
|
||||||
; RUN: grep frds %t1.s | count 1
|
; RUN: grep frds %t1.s | count 1
|
||||||
|
; RUN: llc < %s -march=cellspu | FileCheck %s
|
||||||
|
|
||||||
; ModuleID = 'stores.bc'
|
; ModuleID = 'stores.bc'
|
||||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||||
@@ -149,3 +150,15 @@ entry:
|
|||||||
store float %conv, float* %dest
|
store float %conv, float* %dest
|
||||||
ret float %conv
|
ret float %conv
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;Check stores that might span two 16 byte memory blocks
|
||||||
|
define void @store_misaligned( i32 %val, i32* %ptr) {
|
||||||
|
;CHECK: store_misaligned
|
||||||
|
;CHECK: lqd
|
||||||
|
;CHECK: lqd
|
||||||
|
;CHECK: stqd
|
||||||
|
;CHECK: stqd
|
||||||
|
;CHECK: bi $lr
|
||||||
|
store i32 %val, i32*%ptr, align 2
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user