From 140be2dfb76928cb660b7de23f8310d76ca794b5 Mon Sep 17 00:00:00 2001 From: Dale Johannesen Date: Tue, 19 Aug 2008 18:47:28 +0000 Subject: [PATCH] Add support for 8 and 16 bit forms of __sync builtins on X86. Change "lock" instructions to be on a separate line. This is needed to work around a bug in the Darwin assembler. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@54999 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 103 +++++++++++++++++++++++++---- lib/Target/X86/X86ISelLowering.h | 6 ++ lib/Target/X86/X86Instr64bit.td | 4 +- lib/Target/X86/X86InstrInfo.td | 53 ++++++++++++--- test/CodeGen/X86/atomic_op.ll | 5 +- 5 files changed, 146 insertions(+), 25 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f5e9b08f5c0..b67c625287b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6136,6 +6136,12 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, MachineBasicBlock *MBB, unsigned regOpc, unsigned immOpc, + unsigned LoadOpc, + unsigned CXchgOpc, + unsigned copyOpc, + unsigned notOpc, + unsigned EAXreg, + TargetRegisterClass *RC, bool invSrc) { // For the atomic bitwise operator, we generate // thisMBB: @@ -6181,19 +6187,19 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, int lastAddrIndx = 3; // [0,3] int valArgIndx = 4; - unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); - MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), t1); + unsigned t1 = F->getRegInfo().createVirtualRegister(RC); + MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(LoadOpc), t1); for (int i=0; i <= lastAddrIndx; ++i) (*MIB).addOperand(*argOpers[i]); - unsigned tt = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + unsigned tt = F->getRegInfo().createVirtualRegister(RC); if (invSrc) { - MIB = BuildMI(newMBB, TII->get(X86::NOT32r), tt).addReg(t1); + MIB = BuildMI(newMBB, TII->get(notOpc), tt).addReg(t1); } else tt = t1; - unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + unsigned t2 = F->getRegInfo().createVirtualRegister(RC); assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm()) && "invalid operand"); if (argOpers[valArgIndx]->isReg()) @@ -6203,18 +6209,18 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, MIB.addReg(tt); (*MIB).addOperand(*argOpers[valArgIndx]); - MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), X86::EAX); + MIB = BuildMI(newMBB, TII->get(copyOpc), EAXreg); MIB.addReg(t1); - MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32)); + MIB = BuildMI(newMBB, TII->get(CXchgOpc)); for (int i=0; i <= lastAddrIndx; ++i) (*MIB).addOperand(*argOpers[i]); MIB.addReg(t2); assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); - MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg()); - MIB.addReg(X86::EAX); + MIB = BuildMI(newMBB, TII->get(copyOpc), destOper.getReg()); + MIB.addReg(EAXreg); // insert branch BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); @@ -6463,16 +6469,28 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, } case X86::ATOMAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, - X86::AND32ri); + X86::AND32ri, X86::MOV32rm, + X86::LCMPXCHG32, X86::MOV32rr, + X86::NOT32r, X86::EAX, + X86::GR32RegisterClass); case X86::ATOMOR32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, - X86::OR32ri); + X86::OR32ri, X86::MOV32rm, + X86::LCMPXCHG32, X86::MOV32rr, + X86::NOT32r, X86::EAX, + X86::GR32RegisterClass); case X86::ATOMXOR32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, - X86::XOR32ri); + X86::XOR32ri, X86::MOV32rm, + X86::LCMPXCHG32, X86::MOV32rr, + X86::NOT32r, X86::EAX, + X86::GR32RegisterClass); case X86::ATOMNAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, - X86::AND32ri, true); + X86::AND32ri, X86::MOV32rm, + X86::LCMPXCHG32, X86::MOV32rr, + X86::NOT32r, X86::EAX, + X86::GR32RegisterClass, true); case X86::ATOMMIN32: return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr); case X86::ATOMMAX32: @@ -6481,6 +6499,65 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr); case X86::ATOMUMAX32: return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr); + + case X86::ATOMAND16: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, + X86::AND16ri, X86::MOV16rm, + X86::LCMPXCHG16, X86::MOV16rr, + X86::NOT16r, X86::AX, + X86::GR16RegisterClass); + case X86::ATOMOR16: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr, + X86::OR16ri, X86::MOV16rm, + X86::LCMPXCHG16, X86::MOV16rr, + X86::NOT16r, X86::AX, + X86::GR16RegisterClass); + case X86::ATOMXOR16: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr, + X86::XOR16ri, X86::MOV16rm, + X86::LCMPXCHG16, X86::MOV16rr, + X86::NOT16r, X86::AX, + X86::GR16RegisterClass); + case X86::ATOMNAND16: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, + X86::AND16ri, X86::MOV16rm, + X86::LCMPXCHG16, X86::MOV16rr, + X86::NOT16r, X86::AX, + X86::GR16RegisterClass, true); + case X86::ATOMMIN16: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr); + case X86::ATOMMAX16: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr); + case X86::ATOMUMIN16: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr); + case X86::ATOMUMAX16: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr); + + case X86::ATOMAND8: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, + X86::AND8ri, X86::MOV8rm, + X86::LCMPXCHG8, X86::MOV8rr, + X86::NOT8r, X86::AL, + X86::GR8RegisterClass); + case X86::ATOMOR8: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr, + X86::OR8ri, X86::MOV8rm, + X86::LCMPXCHG8, X86::MOV8rr, + X86::NOT8r, X86::AL, + X86::GR8RegisterClass); + case X86::ATOMXOR8: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr, + X86::XOR8ri, X86::MOV8rm, + X86::LCMPXCHG8, X86::MOV8rr, + X86::NOT8r, X86::AL, + X86::GR8RegisterClass); + case X86::ATOMNAND8: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, + X86::AND8ri, X86::MOV8rm, + X86::LCMPXCHG8, X86::MOV8rr, + X86::NOT8r, X86::AL, + X86::GR8RegisterClass, true); + // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 8632f3c0c70..63866e76dac 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -576,6 +576,12 @@ namespace llvm { MachineBasicBlock *BB, unsigned regOpc, unsigned immOpc, + unsigned loadOpc, + unsigned cxchgOpc, + unsigned copyOpc, + unsigned notOpc, + unsigned EAXreg, + TargetRegisterClass *RC, bool invSrc = false); /// Utility function to emit atomic min and max. It takes the min/max diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index f891a9e818b..d651b214cb6 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1133,14 +1133,14 @@ def TLS_addr64 : I<0, Pseudo, (outs GR64:$dst), (ins i64imm:$sym), let Defs = [RAX, EFLAGS], Uses = [RAX] in { def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), - "lock cmpxchgq $swap,$ptr", + "lock\n\tcmpxchgq $swap,$ptr", [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK; } let Constraints = "$val = $dst" in { let Defs = [EFLAGS] in def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), - "lock xadd $val, $ptr", + "lock\n\txadd $val, $ptr", [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, TB, LOCK; def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index c4db5882908..37a5fed51c2 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -2598,38 +2598,38 @@ def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), // Atomic compare and swap. let Defs = [EAX, EFLAGS], Uses = [EAX] in { def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), - "lock cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}", + "lock\n\tcmpxchg{l}\t{$swap, $ptr|$ptr, $swap}", [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK; } let Defs = [EAX, EBX, ECX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in { def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i32mem:$ptr), - "lock cmpxchg8b\t$ptr", + "lock\n\tcmpxchg8b\t$ptr", [(X86cas8 addr:$ptr)]>, TB, LOCK; } let Defs = [AX, EFLAGS], Uses = [AX] in { def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap), - "lock cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}", + "lock\n\tcmpxchg{w}\t{$swap, $ptr|$ptr, $swap}", [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK; } let Defs = [AL, EFLAGS], Uses = [AL] in { def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap), - "lock cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}", + "lock\n\tcmpxchg{b}\t{$swap, $ptr|$ptr, $swap}", [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK; } // Atomic exchange and add let Constraints = "$val = $dst", Defs = [EFLAGS] in { def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), - "lock xadd{l}\t{$val, $ptr|$ptr, $val}", + "lock\n\txadd{l}\t{$val, $ptr|$ptr, $val}", [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>, TB, LOCK; def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), - "lock xadd{w}\t{$val, $ptr|$ptr, $val}", + "lock\n\txadd{w}\t{$val, $ptr|$ptr, $val}", [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>, TB, OpSize, LOCK; def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), - "lock xadd{b}\t{$val, $ptr|$ptr, $val}", + "lock\n\txadd{b}\t{$val, $ptr|$ptr, $val}", [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>, TB, LOCK; } @@ -2649,7 +2649,6 @@ def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), "#ATOMNAND32 PSUEDO!", [(set GR32:$dst, (atomic_load_nand addr:$ptr, GR32:$val))]>; - def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), "#ATOMMIN32 PSUEDO!", [(set GR32:$dst, (atomic_load_min addr:$ptr, GR32:$val))]>; @@ -2662,6 +2661,44 @@ def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), "#ATOMUMAX32 PSUEDO!", [(set GR32:$dst, (atomic_load_umax addr:$ptr, GR32:$val))]>; + +def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMAND16 PSUEDO!", + [(set GR16:$dst, (atomic_load_and addr:$ptr, GR16:$val))]>; +def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMOR16 PSUEDO!", + [(set GR16:$dst, (atomic_load_or addr:$ptr, GR16:$val))]>; +def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMXOR16 PSUEDO!", + [(set GR16:$dst, (atomic_load_xor addr:$ptr, GR16:$val))]>; +def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMNAND16 PSUEDO!", + [(set GR16:$dst, (atomic_load_nand addr:$ptr, GR16:$val))]>; +def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), + "#ATOMMIN16 PSUEDO!", + [(set GR16:$dst, (atomic_load_min addr:$ptr, GR16:$val))]>; +def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMMAX16 PSUEDO!", + [(set GR16:$dst, (atomic_load_max addr:$ptr, GR16:$val))]>; +def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMUMIN16 PSUEDO!", + [(set GR16:$dst, (atomic_load_umin addr:$ptr, GR16:$val))]>; +def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMUMAX16 PSUEDO!", + [(set GR16:$dst, (atomic_load_umax addr:$ptr, GR16:$val))]>; + +def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMAND8 PSUEDO!", + [(set GR8:$dst, (atomic_load_and addr:$ptr, GR8:$val))]>; +def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMOR8 PSUEDO!", + [(set GR8:$dst, (atomic_load_or addr:$ptr, GR8:$val))]>; +def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMXOR8 PSUEDO!", + [(set GR8:$dst, (atomic_load_xor addr:$ptr, GR8:$val))]>; +def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMNAND8 PSUEDO!", + [(set GR8:$dst, (atomic_load_nand addr:$ptr, GR8:$val))]>; } //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll index a17f0285aba..6871a08b29e 100644 --- a/test/CodeGen/X86/atomic_op.ll +++ b/test/CodeGen/X86/atomic_op.ll @@ -1,6 +1,7 @@ ; RUN: llvm-as < %s | llc -march=x86 -o %t1 -f -; RUN: grep "lock xaddl" %t1 | count 4 -; RUN: grep "lock cmpxchgl" %t1 | count 13 +; RUN: grep "lock" %t1 | count 17 +; RUN: grep "xaddl" %t1 | count 4 +; RUN: grep "cmpxchgl" %t1 | count 13 ; RUN: grep "xchgl" %t1 | count 14 ; RUN: grep "cmova" %t1 | count 2 ; RUN: grep "cmovb" %t1 | count 2