mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-03-02 07:30:52 +00:00
X86: lower ATOMIC_CMP_SWAP_WITH_SUCCESS directly
Lowering this new node allows us to fold the almost universal comparison for success before it's even formed. Instead we can create a copy from EFLAGS and an X86ISD::SETCC operation since all "cmpxchg" instructions set the zero-flag to the correct value. rdar://problem/13201607 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210923 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f317d86f8f
commit
eee7a7a836
@ -580,7 +580,7 @@ void X86TargetLowering::resetOperationActions() {
|
||||
// Expand certain atomics
|
||||
for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
|
||||
MVT VT = IntVTs[i];
|
||||
setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
|
||||
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
|
||||
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
|
||||
}
|
||||
@ -601,7 +601,7 @@ void X86TargetLowering::resetOperationActions() {
|
||||
}
|
||||
|
||||
if (Subtarget->hasCmpxchg16b()) {
|
||||
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
|
||||
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
|
||||
}
|
||||
|
||||
// FIXME - use subtarget debug flags
|
||||
@ -14529,7 +14529,7 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
|
||||
break;
|
||||
}
|
||||
SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
|
||||
Op.getOperand(2), SDValue());
|
||||
Op.getOperand(2), SDValue());
|
||||
SDValue Ops[] = { cpIn.getValue(0),
|
||||
Op.getOperand(1),
|
||||
Op.getOperand(3),
|
||||
@ -14539,9 +14539,18 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
|
||||
MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
|
||||
SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
|
||||
Ops, T, MMO);
|
||||
|
||||
SDValue cpOut =
|
||||
DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
|
||||
return cpOut;
|
||||
SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS,
|
||||
MVT::i32, cpOut.getValue(2));
|
||||
SDValue Success = DAG.getNode(X86ISD::SETCC, DL, Op->getValueType(1),
|
||||
DAG.getConstant(X86::COND_E, MVT::i8), EFLAGS);
|
||||
|
||||
DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut);
|
||||
DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
|
||||
DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), EFLAGS.getValue(1));
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
|
||||
@ -14721,7 +14730,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
default: llvm_unreachable("Should not custom lower this!");
|
||||
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG);
|
||||
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG);
|
||||
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op, Subtarget, DAG);
|
||||
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
|
||||
return LowerCMP_SWAP(Op, Subtarget, DAG);
|
||||
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
|
||||
case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG);
|
||||
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
|
||||
@ -14803,8 +14813,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
|
||||
static void ReplaceATOMIC_LOAD(SDNode *Node,
|
||||
SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) {
|
||||
SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) {
|
||||
SDLoc dl(Node);
|
||||
EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
|
||||
|
||||
@ -14813,16 +14823,16 @@ static void ReplaceATOMIC_LOAD(SDNode *Node,
|
||||
// (The only way to get a 16-byte load is cmpxchg16b)
|
||||
// FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment.
|
||||
SDValue Zero = DAG.getConstant(0, VT);
|
||||
SDVTList VTs = DAG.getVTList(VT, MVT::Other);
|
||||
SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
|
||||
SDValue Swap =
|
||||
DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP, dl, VT, VTs,
|
||||
DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, VT, VTs,
|
||||
Node->getOperand(0), Node->getOperand(1), Zero, Zero,
|
||||
cast<AtomicSDNode>(Node)->getMemOperand(),
|
||||
cast<AtomicSDNode>(Node)->getOrdering(),
|
||||
cast<AtomicSDNode>(Node)->getOrdering(),
|
||||
cast<AtomicSDNode>(Node)->getSynchScope());
|
||||
Results.push_back(Swap.getValue(0));
|
||||
Results.push_back(Swap.getValue(1));
|
||||
Results.push_back(Swap.getValue(2));
|
||||
}
|
||||
|
||||
static void
|
||||
@ -14938,7 +14948,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
|
||||
Results);
|
||||
}
|
||||
case ISD::ATOMIC_CMP_SWAP: {
|
||||
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
|
||||
EVT T = N->getValueType(0);
|
||||
assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
|
||||
bool Regs64bit = T == MVT::i128;
|
||||
@ -14980,8 +14990,17 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
Regs64bit ? X86::RDX : X86::EDX,
|
||||
HalfT, cpOutL.getValue(2));
|
||||
SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
|
||||
|
||||
SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS,
|
||||
MVT::i32, cpOutH.getValue(2));
|
||||
SDValue Success =
|
||||
DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
|
||||
DAG.getConstant(X86::COND_E, MVT::i8), EFLAGS);
|
||||
Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1));
|
||||
|
||||
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF));
|
||||
Results.push_back(cpOutH.getValue(1));
|
||||
Results.push_back(Success);
|
||||
Results.push_back(EFLAGS.getValue(1));
|
||||
return;
|
||||
}
|
||||
case ISD::ATOMIC_LOAD_ADD:
|
||||
|
87
test/CodeGen/X86/cmpxchg-i1.ll
Normal file
87
test/CodeGen/X86/cmpxchg-i1.ll
Normal file
@ -0,0 +1,87 @@
|
||||
; RUN: llc -mtriple=x86_64 -o - %s | FileCheck %s
|
||||
|
||||
define i1 @try_cmpxchg(i32* %addr, i32 %desired, i32 %new) {
|
||||
; CHECK-LABEL: try_cmpxchg:
|
||||
; CHECK: cmpxchgl
|
||||
; CHECK-NOT: cmp
|
||||
; CHECK: sete %al
|
||||
; CHECK: retq
|
||||
%pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
|
||||
%success = extractvalue { i32, i1 } %pair, 1
|
||||
ret i1 %success
|
||||
}
|
||||
|
||||
define void @cmpxchg_flow(i64* %addr, i64 %desired, i64 %new) {
|
||||
; CHECK-LABEL: cmpxchg_flow:
|
||||
; CHECK: cmpxchgq
|
||||
; CHECK-NOT: cmp
|
||||
; CHECK-NOT: set
|
||||
; CHECK: {{jne|jeq}}
|
||||
%pair = cmpxchg i64* %addr, i64 %desired, i64 %new seq_cst seq_cst
|
||||
%success = extractvalue { i64, i1 } %pair, 1
|
||||
br i1 %success, label %true, label %false
|
||||
|
||||
true:
|
||||
call void @foo()
|
||||
ret void
|
||||
|
||||
false:
|
||||
call void @bar()
|
||||
ret void
|
||||
}
|
||||
|
||||
define i64 @cmpxchg_sext(i32* %addr, i32 %desired, i32 %new) {
|
||||
; CHECK-LABEL: cmpxchg_sext:
|
||||
; CHECK-DAG: cmpxchgl
|
||||
; CHECK-NOT: cmpl
|
||||
; CHECK: sete %al
|
||||
; CHECK: retq
|
||||
%pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
|
||||
%success = extractvalue { i32, i1 } %pair, 1
|
||||
%mask = sext i1 %success to i64
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i32 @cmpxchg_zext(i32* %addr, i32 %desired, i32 %new) {
|
||||
; CHECK-LABEL: cmpxchg_zext:
|
||||
; CHECK: cmpxchgl
|
||||
; CHECK-NOT: cmp
|
||||
; CHECK: sete [[BYTE:%[a-z0-9]+]]
|
||||
; CHECK: movzbl [[BYTE]], %eax
|
||||
%pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
|
||||
%success = extractvalue { i32, i1 } %pair, 1
|
||||
%mask = zext i1 %success to i32
|
||||
ret i32 %mask
|
||||
}
|
||||
|
||||
|
||||
define i32 @cmpxchg_use_eflags_and_val(i32* %addr, i32 %offset) {
|
||||
; CHECK-LABEL: cmpxchg_use_eflags_and_val:
|
||||
; CHECK: movl (%rdi), %e[[OLDVAL:[a-z0-9]+]]
|
||||
|
||||
; CHECK: [[LOOPBB:.?LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: leal (%r[[OLDVAL]],%rsi), [[NEW:%[a-z0-9]+]]
|
||||
; CHECK: cmpxchgl [[NEW]], (%rdi)
|
||||
; CHECK-NOT: cmpl
|
||||
; CHECK: jne [[LOOPBB]]
|
||||
|
||||
; Result already in %eax
|
||||
; CHECK: retq
|
||||
entry:
|
||||
%init = load atomic i32* %addr seq_cst, align 4
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%old = phi i32 [%init, %entry], [%oldval, %loop]
|
||||
%new = add i32 %old, %offset
|
||||
%pair = cmpxchg i32* %addr, i32 %old, i32 %new seq_cst seq_cst
|
||||
%oldval = extractvalue { i32, i1 } %pair, 0
|
||||
%success = extractvalue { i32, i1 } %pair, 1
|
||||
br i1 %success, label %done, label %loop
|
||||
|
||||
done:
|
||||
ret i32 %oldval
|
||||
}
|
||||
|
||||
declare void @foo()
|
||||
declare void @bar()
|
83
test/CodeGen/X86/cmpxchg-i128-i1.ll
Normal file
83
test/CodeGen/X86/cmpxchg-i128-i1.ll
Normal file
@ -0,0 +1,83 @@
|
||||
; RUN: llc -mcpu=core-avx2 -mtriple=x86_64 -o - %s | FileCheck %s
|
||||
|
||||
define i1 @try_cmpxchg(i128* %addr, i128 %desired, i128 %new) {
|
||||
; CHECK-LABEL: try_cmpxchg:
|
||||
; CHECK: cmpxchg16b
|
||||
; CHECK-NOT: cmp
|
||||
; CHECK: sete %al
|
||||
; CHECK: retq
|
||||
%pair = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst
|
||||
%success = extractvalue { i128, i1 } %pair, 1
|
||||
ret i1 %success
|
||||
}
|
||||
|
||||
define void @cmpxchg_flow(i128* %addr, i128 %desired, i128 %new) {
|
||||
; CHECK-LABEL: cmpxchg_flow:
|
||||
; CHECK: cmpxchg16b
|
||||
; CHECK-NOT: cmp
|
||||
; CHECK-NOT: set
|
||||
; CHECK: {{jne|jeq}}
|
||||
%pair = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst
|
||||
%success = extractvalue { i128, i1 } %pair, 1
|
||||
br i1 %success, label %true, label %false
|
||||
|
||||
true:
|
||||
call void @foo()
|
||||
ret void
|
||||
|
||||
false:
|
||||
call void @bar()
|
||||
ret void
|
||||
}
|
||||
|
||||
; Can't use the flags here because cmpxchg16b only sets ZF.
|
||||
define i1 @cmpxchg_arithcmp(i128* %addr, i128 %desired, i128 %new) {
|
||||
; CHECK-LABEL: cmpxchg_arithcmp:
|
||||
; CHECK: cmpxchg16b
|
||||
; CHECK: cmpq
|
||||
; CHECK: retq
|
||||
%pair = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst
|
||||
%oldval = extractvalue { i128, i1 } %pair, 0
|
||||
%success = icmp sge i128 %oldval, %desired
|
||||
ret i1 %success
|
||||
}
|
||||
|
||||
define i128 @cmpxchg_zext(i128* %addr, i128 %desired, i128 %new) {
|
||||
; CHECK-LABEL: cmpxchg_zext:
|
||||
; CHECK: cmpxchg16b
|
||||
; CHECK-NOT: cmpq
|
||||
; CHECK: sete [[BYTE:%[a-z0-9]+]]
|
||||
; CHECK: movzbl [[BYTE]], %eax
|
||||
%pair = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst
|
||||
%success = extractvalue { i128, i1 } %pair, 1
|
||||
%mask = zext i1 %success to i128
|
||||
ret i128 %mask
|
||||
}
|
||||
|
||||
|
||||
define i128 @cmpxchg_use_eflags_and_val(i128* %addr, i128 %offset) {
|
||||
; CHECK-LABEL: cmpxchg_use_eflags_and_val:
|
||||
|
||||
; CHECK: cmpxchg16b
|
||||
; CHECK-NOT: cmpq
|
||||
; CHECK: jne
|
||||
entry:
|
||||
%init = load atomic i128* %addr seq_cst, align 16
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%old = phi i128 [%init, %entry], [%oldval, %loop]
|
||||
%new = add i128 %old, %offset
|
||||
|
||||
%pair = cmpxchg i128* %addr, i128 %old, i128 %new seq_cst seq_cst
|
||||
%oldval = extractvalue { i128, i1 } %pair, 0
|
||||
%success = extractvalue { i128, i1 } %pair, 1
|
||||
|
||||
br i1 %success, label %done, label %loop
|
||||
|
||||
done:
|
||||
ret i128 %old
|
||||
}
|
||||
|
||||
declare void @foo()
|
||||
declare void @bar()
|
Loading…
x
Reference in New Issue
Block a user