Add support for part-word atomics for PPC

http://reviews.llvm.org/D8090#inline-67337


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231843 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nemanja Ivanovic 2015-03-10 20:51:07 +00:00
parent b8bf97b7e1
commit dc12298109
10 changed files with 241 additions and 77 deletions

View File

@ -113,12 +113,14 @@ def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true"
"Enable POWER8 Altivec instructions",
[FeatureAltivec]>;
def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
"Enable POWER8 Crypto instructions",
[FeatureP8Altivec]>;
"Enable POWER8 Crypto instructions",
[FeatureP8Altivec]>;
def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
"Enable POWER8 vector instructions",
[FeatureVSX, FeatureP8Altivec]>;
def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics",
"HasPartwordAtomics", "true",
"Enable l[bh]arx and st[bh]cx.">;
def FeatureInvariantFunctionDescriptors :
SubtargetFeature<"invariant-function-descriptors",
"HasInvariantFunctionDescriptors", "true",
@ -263,7 +265,7 @@ def ProcessorFeatures {
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto,
Feature64Bit /*, Feature64BitRegs */, FeatureICBT,
DeprecatedMFTB, DeprecatedDST];
FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST];
}
def : ProcessorModel<"970", G5Model,
@ -342,7 +344,7 @@ def : ProcessorModel<"pwr7", P7Model,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */,
Feature64Bit /*, Feature64BitRegs */, FeaturePartwordAtomic,
DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;

View File

@ -1002,8 +1002,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::STBRX: return "PPCISD::STBRX";
case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
case PPCISD::LARX: return "PPCISD::LARX";
case PPCISD::STCX: return "PPCISD::STCX";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::BDNZ: return "PPCISD::BDNZ";
case PPCISD::BDZ: return "PPCISD::BDZ";
@ -7788,10 +7786,36 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
MachineBasicBlock *
PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
bool is64bit, unsigned BinOpcode) const {
unsigned AtomicSize,
unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
auto LoadMnemonic = PPC::LDARX;
auto StoreMnemonic = PPC::STDCX;
switch (AtomicSize) {
default:
llvm_unreachable("Unexpected size of atomic entity");
case 1:
LoadMnemonic = PPC::LBARX;
StoreMnemonic = PPC::STBCX;
assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
break;
case 2:
LoadMnemonic = PPC::LHARX;
StoreMnemonic = PPC::STHCX;
assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
break;
case 4:
LoadMnemonic = PPC::LWARX;
StoreMnemonic = PPC::STWCX;
break;
case 8:
LoadMnemonic = PPC::LDARX;
StoreMnemonic = PPC::STDCX;
break;
}
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
MachineFunction::iterator It = BB;
@ -7813,7 +7837,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineRegisterInfo &RegInfo = F->getRegInfo();
unsigned TmpReg = (!BinOpcode) ? incr :
RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass
RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
: &PPC::GPRCRegClass);
// thisMBB:
@ -7828,11 +7852,11 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// bne- loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
.addReg(ptrA).addReg(ptrB);
if (BinOpcode)
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(TmpReg).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
@ -7850,6 +7874,10 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
MachineBasicBlock *BB,
bool is8bit, // operation
unsigned BinOpcode) const {
// If we support part-word atomic mnemonics, just use them
if (Subtarget.hasPartwordAtomics())
return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode);
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// In 64 bit mode we have to use 64 bits for addresses, even though the
@ -8415,68 +8443,96 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);
BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);
BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
BB = EmitAtomicBinary(MI, BB, false, 0);
BB = EmitAtomicBinary(MI, BB, 4, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
BB = EmitAtomicBinary(MI, BB, true, 0);
BB = EmitAtomicBinary(MI, BB, 8, 0);
else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
(Subtarget.hasPartwordAtomics() &&
MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
(Subtarget.hasPartwordAtomics() &&
MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
auto LoadMnemonic = PPC::LDARX;
auto StoreMnemonic = PPC::STDCX;
switch(MI->getOpcode()) {
default:
llvm_unreachable("Compare and swap of unknown size");
case PPC::ATOMIC_CMP_SWAP_I8:
LoadMnemonic = PPC::LBARX;
StoreMnemonic = PPC::STBCX;
assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
break;
case PPC::ATOMIC_CMP_SWAP_I16:
LoadMnemonic = PPC::LHARX;
StoreMnemonic = PPC::STHCX;
assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
break;
case PPC::ATOMIC_CMP_SWAP_I32:
LoadMnemonic = PPC::LWARX;
StoreMnemonic = PPC::STWCX;
break;
case PPC::ATOMIC_CMP_SWAP_I64:
LoadMnemonic = PPC::LDARX;
StoreMnemonic = PPC::STDCX;
break;
}
unsigned dest = MI->getOperand(0).getReg();
unsigned ptrA = MI->getOperand(1).getReg();
unsigned ptrB = MI->getOperand(2).getReg();
@ -8502,18 +8558,18 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(loop1MBB);
// loop1MBB:
// l[wd]arx dest, ptr
// l[bhwd]arx dest, ptr
// cmp[wd] dest, oldval
// bne- midMBB
// loop2MBB:
// st[wd]cx. newval, ptr
// st[bhwd]cx. newval, ptr
// bne- loopMBB
// b exitBB
// midMBB:
// st[wd]cx. dest, ptr
// st[bhwd]cx. dest, ptr
// exitBB:
BB = loop1MBB;
BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
.addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
.addReg(oldval).addReg(dest);
@ -8523,7 +8579,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(midMBB);
BB = loop2MBB;
BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(newval).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
@ -8532,7 +8588,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(exitMBB);
BB = midMBB;
BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(dest).addReg(ptrA).addReg(ptrB);
BB->addSuccessor(exitMBB);

View File

@ -166,14 +166,6 @@ namespace llvm {
/// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
MFFS,
/// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
/// reserve indexed. This is used to implement atomic operations.
LARX,
/// STCX = This corresponds to PPC stcx. instrcution: store conditional
/// indexed. This is used to implement atomic operations.
STCX,
/// TC_RETURN - A tail call return.
/// operand #0 chain
/// operand #1 callee (register or absolute)
@ -489,7 +481,8 @@ namespace llvm {
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;
MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
MachineBasicBlock *MBB, bool is64Bit,
MachineBasicBlock *MBB,
unsigned AtomicSize,
unsigned BinOpcode) const;
MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
MachineBasicBlock *MBB,

View File

@ -235,15 +235,19 @@ let usesCustomInserter = 1 in {
}
// Instructions to support atomic operations
let mayLoad = 1, hasSideEffects = 0 in {
def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
"ldarx $rD, $ptr", IIC_LdStLDARX,
[(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
"ldarx $rD, $ptr", IIC_LdStLDARX, []>;
let Defs = [CR0] in
// Instruction to support lock versions of atomics
// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
def LDARXL : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
"ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isDOT;
}
let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in
def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
"stdcx. $rS, $dst", IIC_LdStSTDCX,
[(PPCstcx i64:$rS, xoaddr:$dst)]>,
isDOT;
"stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in

View File

@ -46,13 +46,6 @@ def SDT_PPCstbrx : SDTypeProfile<0, 3, [
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
def SDT_PPClarx : SDTypeProfile<1, 1, [
SDTCisInt<0>, SDTCisPtrTy<1>
]>;
def SDT_PPCstcx : SDTypeProfile<0, 2, [
SDTCisInt<0>, SDTCisPtrTy<1>
]>;
def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
SDTCisPtrTy<0>, SDTCisVT<1, i32>
]>;
@ -225,12 +218,6 @@ def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
// Instructions to support atomic operations
def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
[SDNPHasChain, SDNPMayLoad]>;
def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
[SDNPHasChain, SDNPMayStore]>;
// Instructions to support dynamic alloca.
def SDTDynOp : SDTypeProfile<1, 2, []>;
def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
@ -724,7 +711,7 @@ def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">;
def IsE500 : Predicate<"PPCSubTarget->isE500()">;
def HasSPE : Predicate<"PPCSubTarget->HasSPE()">;
def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">;
def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">;
@ -1455,15 +1442,44 @@ let usesCustomInserter = 1 in {
}
// Instructions to support atomic operations
def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
"lwarx $rD, $src", IIC_LdStLWARX,
[(set i32:$rD, (PPClarx xoaddr:$src))]>;
let mayLoad = 1, hasSideEffects = 0 in {
def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
"lbarx $rD, $src", IIC_LdStLWARX, []>,
Requires<[HasPartwordAtomics]>;
def LHARX : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src),
"lharx $rD, $src", IIC_LdStLWARX, []>,
Requires<[HasPartwordAtomics]>;
def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
"lwarx $rD, $src", IIC_LdStLWARX, []>;
// Instructions to support lock versions of atomics
// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
def LBARXL : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
"lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
Requires<[HasPartwordAtomics]>;
def LHARXL : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src),
"lharx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
Requires<[HasPartwordAtomics]>;
def LWARXL : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
"lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT;
}
let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in {
def STBCX : XForm_1<31, 694, (outs), (ins gprc:$rS, memrr:$dst),
"stbcx. $rS, $dst", IIC_LdStSTWCX, []>,
isDOT, Requires<[HasPartwordAtomics]>;
def STHCX : XForm_1<31, 726, (outs), (ins gprc:$rS, memrr:$dst),
"sthcx. $rS, $dst", IIC_LdStSTWCX, []>,
isDOT, Requires<[HasPartwordAtomics]>;
let Defs = [CR0] in
def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
"stwcx. $rS, $dst", IIC_LdStSTWCX,
[(PPCstcx i32:$rS, xoaddr:$dst)]>,
isDOT;
"stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT;
}
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>;

View File

@ -95,6 +95,7 @@ void PPCSubtarget::initializeEnvironment() {
HasLazyResolverStubs = false;
HasICBT = false;
HasInvariantFunctionDescriptors = false;
HasPartwordAtomics = false;
IsQPXStackUnaligned = false;
}

View File

@ -114,6 +114,7 @@ protected:
bool IsLittleEndian;
bool HasICBT;
bool HasInvariantFunctionDescriptors;
bool HasPartwordAtomics;
/// When targeting QPX running a stock PPC64 Linux kernel where the stack
/// alignment has not been changed, we need to keep the 16-byte alignment
@ -236,6 +237,7 @@ public:
bool hasInvariantFunctionDescriptors() const {
return HasInvariantFunctionDescriptors;
}
bool hasPartwordAtomics() const { return HasPartwordAtomics; }
bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
unsigned getPlatformStackAlignment() const {

View File

@ -1,4 +1,6 @@
; RUN: llc < %s -march=ppc64 | FileCheck %s
; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s -check-prefix=CHECK-P7U
; RUN: llc < %s -march=ppc64 -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-P7U
define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
; CHECK-LABEL: exchange_and_add:
@ -8,6 +10,22 @@ define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
ret i64 %tmp
}
define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind {
; CHECK-LABEL: exchange_and_add8:
; CHECK-P7U: lbarx
%tmp = atomicrmw add i8* %mem, i8 %val monotonic
; CHECK-P7U: stbcx.
ret i8 %tmp
}
define i16 @exchange_and_add16(i16* %mem, i16 %val) nounwind {
; CHECK-LABEL: exchange_and_add16:
; CHECK-P7U: lharx
%tmp = atomicrmw add i16* %mem, i16 %val monotonic
; CHECK-P7U: sthcx.
ret i16 %tmp
}
define i64 @exchange_and_cmp(i64* %mem) nounwind {
; CHECK-LABEL: exchange_and_cmp:
; CHECK: ldarx
@ -18,6 +36,26 @@ define i64 @exchange_and_cmp(i64* %mem) nounwind {
ret i64 %tmp
}
define i8 @exchange_and_cmp8(i8* %mem) nounwind {
; CHECK-LABEL: exchange_and_cmp8:
; CHECK-P7U: lbarx
%tmppair = cmpxchg i8* %mem, i8 0, i8 1 monotonic monotonic
%tmp = extractvalue { i8, i1 } %tmppair, 0
; CHECK-P7U: stbcx.
; CHECK-P7U: stbcx.
ret i8 %tmp
}
define i16 @exchange_and_cmp16(i16* %mem) nounwind {
; CHECK-LABEL: exchange_and_cmp16:
; CHECK-P7U: lharx
%tmppair = cmpxchg i16* %mem, i16 0, i16 1 monotonic monotonic
%tmp = extractvalue { i16, i1 } %tmppair, 0
; CHECK-P7U: sthcx.
; CHECK-P7U: sthcx.
ret i16 %tmp
}
define i64 @exchange(i64* %mem, i64 %val) nounwind {
; CHECK-LABEL: exchange:
; CHECK: ldarx
@ -26,6 +64,22 @@ define i64 @exchange(i64* %mem, i64 %val) nounwind {
ret i64 %tmp
}
define i8 @exchange8(i8* %mem, i8 %val) nounwind {
; CHECK-LABEL: exchange8:
; CHECK-P7U: lbarx
%tmp = atomicrmw xchg i8* %mem, i8 1 monotonic
; CHECK-P7U: stbcx.
ret i8 %tmp
}
define i16 @exchange16(i16* %mem, i16 %val) nounwind {
; CHECK-LABEL: exchange16:
; CHECK-P7U: lharx
%tmp = atomicrmw xchg i16* %mem, i16 1 monotonic
; CHECK-P7U: sthcx.
ret i16 %tmp
}
define void @atomic_store(i64* %mem, i64 %val) nounwind {
entry:
; CHECK: @atomic_store

View File

@ -42,12 +42,30 @@
# CHECK: dcbf 2, 3
0x7c 0x02 0x18 0xac
# CHECK: lwarx 2, 3, 4
# CHECK: lbarx 2, 3, 4
0x7c 0x43 0x20 0x68
# CHECK: lharx 2, 3, 4
0x7c 0x43 0x20 0xe8
# CHECK: lwarx 2, 3, 4
0x7c 0x43 0x20 0x28
# CHECK: ldarx 2, 3, 4
# CHECK: ldarx 2, 3, 4
0x7c 0x43 0x20 0xa8
# CHECK: lbarx 2, 3, 4, 1
0x7c 0x43 0x20 0x69
# CHECK: lharx 2, 3, 4, 1
0x7c 0x43 0x20 0xe9
# CHECK: lwarx 2, 3, 4, 1
0x7c 0x43 0x20 0x29
# CHECK: ldarx 2, 3, 4, 1
0x7c 0x43 0x20 0xa9
# CHECK: sync 0
0x7c 0x00 0x04 0xac

View File

@ -34,11 +34,6 @@
# CHECK-LE: isync # encoding: [0x2c,0x01,0x00,0x4c]
isync
# FIXME: lbarx 2, 3, 4, 1
# FIXME: lharx 2, 3, 4, 1
# FIXME: lwarx 2, 3, 4, 1
# FIXME: ldarx 2, 3, 4, 1
# FIXME: stbcx. 2, 3, 4
# FIXME: sthcx. 2, 3, 4
# CHECK-BE: stwcx. 2, 3, 4 # encoding: [0x7c,0x43,0x21,0x2d]
@ -70,15 +65,38 @@
dcbf 2, 3
# FIXME: dcbfl 2, 3
# FIXME: lbarx 2, 3, 4
# FIXME: lharx 2, 3, 4
# CHECK-BE: lbarx 2, 3, 4 # encoding: [0x7c,0x43,0x20,0x68]
# CHECK-LE: lbarx 2, 3, 4 # encoding: [0x68,0x20,0x43,0x7c]
lbarx 2, 3, 4
# CHECK-BE: lharx 2, 3, 4 # encoding: [0x7c,0x43,0x20,0xe8]
# CHECK-LE: lharx 2, 3, 4 # encoding: [0xe8,0x20,0x43,0x7c]
lharx 2, 3, 4
# CHECK-BE: lwarx 2, 3, 4 # encoding: [0x7c,0x43,0x20,0x28]
# CHECK-LE: lwarx 2, 3, 4 # encoding: [0x28,0x20,0x43,0x7c]
lwarx 2, 3, 4
# CHECK-BE: ldarx 2, 3, 4 # encoding: [0x7c,0x43,0x20,0xa8]
# CHECK-LE: ldarx 2, 3, 4 # encoding: [0xa8,0x20,0x43,0x7c]
ldarx 2, 3, 4
# CHECK-BE: lbarx 2, 3, 4, 1 # encoding: [0x7c,0x43,0x20,0x69]
# CHECK-LE: lbarx 2, 3, 4, 1 # encoding: [0x69,0x20,0x43,0x7c]
lbarx 2, 3, 4, 1
# CHECK-BE: lharx 2, 3, 4, 1 # encoding: [0x7c,0x43,0x20,0xe9]
# CHECK-LE: lharx 2, 3, 4, 1 # encoding: [0xe9,0x20,0x43,0x7c]
lharx 2, 3, 4, 1
# CHECK-BE: lwarx 2, 3, 4, 1 # encoding: [0x7c,0x43,0x20,0x29]
# CHECK-LE: lwarx 2, 3, 4, 1 # encoding: [0x29,0x20,0x43,0x7c]
lwarx 2, 3, 4, 1
# CHECK-BE: ldarx 2, 3, 4, 1 # encoding: [0x7c,0x43,0x20,0xa9]
# CHECK-LE: ldarx 2, 3, 4, 1 # encoding: [0xa9,0x20,0x43,0x7c]
ldarx 2, 3, 4, 1
# CHECK-BE: sync 0 # encoding: [0x7c,0x00,0x04,0xac]
# CHECK-LE: sync 0 # encoding: [0xac,0x04,0x00,0x7c]
sync