ARM: expand atomic ldrex/strex loops in IR

The previous situation where ATOMIC_LOAD_WHATEVER nodes were expanded
at MachineInstr emission time had grown to be extremely large and
involved, to account for the subtly different code needed for the
various flavours (8/16/32/64 bit, cmpxchg/add/minmax).

Moving this transformation into the IR clears up the code
substantially, and makes future optimisations much easier:

1. an atomicrmw followed by using the *new* value can be more
   efficient. As an IR pass, simple CSE could handle this
   efficiently.
2. Making use of cmpxchg success/failure orderings only has to be done
   in one (simpler) place.
3. The common "cmpxchg; did we store?" idiom can be exposed to
   optimisation.

I intend to gradually improve this situation within the ARM backend
and make sure there are no hidden issues before moving the code out
into CodeGen to be shared with (at least ARM64/AArch64, though I think
PPC & Mips could benefit too).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205525 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover
2014-04-03 11:44:58 +00:00
parent 37e5cfa4aa
commit badb137729
11 changed files with 752 additions and 1319 deletions

View File

@@ -252,8 +252,6 @@ private:
SDNode *SelectConcatVector(SDNode *N);
SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
@@ -2411,38 +2409,6 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
}
SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
unsigned Op16,unsigned Op32,
unsigned Op64) {
// Mostly direct translation to the given operations, except that we preserve
// the AtomicOrdering for use later on.
AtomicSDNode *AN = cast<AtomicSDNode>(Node);
EVT VT = AN->getMemoryVT();
unsigned Op;
SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other);
if (VT == MVT::i8)
Op = Op8;
else if (VT == MVT::i16)
Op = Op16;
else if (VT == MVT::i32)
Op = Op32;
else if (VT == MVT::i64) {
Op = Op64;
VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other);
} else
llvm_unreachable("Unexpected atomic operation");
SmallVector<SDValue, 6> Ops;
for (unsigned i = 1; i < AN->getNumOperands(); ++i)
Ops.push_back(AN->getOperand(i));
Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
Ops.push_back(AN->getOperand(0)); // Chain moves to the end
return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size());
}
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
@@ -3320,85 +3286,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
case ISD::ATOMIC_LOAD:
if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64)
return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64);
else
break;
case ISD::ATOMIC_LOAD_ADD:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_ADD_I8,
ARM::ATOMIC_LOAD_ADD_I16,
ARM::ATOMIC_LOAD_ADD_I32,
ARM::ATOMIC_LOAD_ADD_I64);
case ISD::ATOMIC_LOAD_SUB:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_SUB_I8,
ARM::ATOMIC_LOAD_SUB_I16,
ARM::ATOMIC_LOAD_SUB_I32,
ARM::ATOMIC_LOAD_SUB_I64);
case ISD::ATOMIC_LOAD_AND:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_AND_I8,
ARM::ATOMIC_LOAD_AND_I16,
ARM::ATOMIC_LOAD_AND_I32,
ARM::ATOMIC_LOAD_AND_I64);
case ISD::ATOMIC_LOAD_OR:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_OR_I8,
ARM::ATOMIC_LOAD_OR_I16,
ARM::ATOMIC_LOAD_OR_I32,
ARM::ATOMIC_LOAD_OR_I64);
case ISD::ATOMIC_LOAD_XOR:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_XOR_I8,
ARM::ATOMIC_LOAD_XOR_I16,
ARM::ATOMIC_LOAD_XOR_I32,
ARM::ATOMIC_LOAD_XOR_I64);
case ISD::ATOMIC_LOAD_NAND:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_NAND_I8,
ARM::ATOMIC_LOAD_NAND_I16,
ARM::ATOMIC_LOAD_NAND_I32,
ARM::ATOMIC_LOAD_NAND_I64);
case ISD::ATOMIC_LOAD_MIN:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_MIN_I8,
ARM::ATOMIC_LOAD_MIN_I16,
ARM::ATOMIC_LOAD_MIN_I32,
ARM::ATOMIC_LOAD_MIN_I64);
case ISD::ATOMIC_LOAD_MAX:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_MAX_I8,
ARM::ATOMIC_LOAD_MAX_I16,
ARM::ATOMIC_LOAD_MAX_I32,
ARM::ATOMIC_LOAD_MAX_I64);
case ISD::ATOMIC_LOAD_UMIN:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_UMIN_I8,
ARM::ATOMIC_LOAD_UMIN_I16,
ARM::ATOMIC_LOAD_UMIN_I32,
ARM::ATOMIC_LOAD_UMIN_I64);
case ISD::ATOMIC_LOAD_UMAX:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_UMAX_I8,
ARM::ATOMIC_LOAD_UMAX_I16,
ARM::ATOMIC_LOAD_UMAX_I32,
ARM::ATOMIC_LOAD_UMAX_I64);
case ISD::ATOMIC_SWAP:
return SelectAtomic(N,
ARM::ATOMIC_SWAP_I8,
ARM::ATOMIC_SWAP_I16,
ARM::ATOMIC_SWAP_I32,
ARM::ATOMIC_SWAP_I64);
case ISD::ATOMIC_CMP_SWAP:
return SelectAtomic(N,
ARM::ATOMIC_CMP_SWAP_I8,
ARM::ATOMIC_CMP_SWAP_I16,
ARM::ATOMIC_CMP_SWAP_I32,
ARM::ATOMIC_CMP_SWAP_I64);
}
return SelectCode(N);