mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-12 13:30:51 +00:00
Custom lower the memory barrier instructions and add support
for lowering without sse2. Add a couple of new testcases. Fixes a few libgomp tests and latent bugs. Remove a few todos. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109078 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b1c857bb7b
commit
9a9d275dc7
@ -1135,13 +1135,6 @@ void test(double *P) {
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
handling llvm.memory.barrier on pre SSE2 cpus
|
||||
|
||||
should generate:
|
||||
lock ; mov %esp, %esp
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
The generated code on x86 for checking for signed overflow on a multiply the
|
||||
obvious way is much longer than it needs to be.
|
||||
|
||||
|
@ -343,8 +343,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
if (Subtarget->hasSSE1())
|
||||
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
|
||||
|
||||
if (!Subtarget->hasSSE2())
|
||||
setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
|
||||
// We may not have a libcall for MEMBARRIER so we should lower this.
|
||||
setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
|
||||
|
||||
// On X86 and X86-64, atomic operations are lowered to locked instructions.
|
||||
// Locked instructions, in turn, have implicit fence semantics (all memory
|
||||
// operations are flushed before issuing the locked instruction, and they
|
||||
@ -7509,6 +7510,36 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
|
||||
return Sum;
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
|
||||
if (!Subtarget->hasSSE2())
|
||||
return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
|
||||
DAG.getConstant(0, MVT::i32));
|
||||
|
||||
unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
|
||||
if(!isDev)
|
||||
return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
|
||||
else {
|
||||
unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
|
||||
unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
|
||||
unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
|
||||
|
||||
// def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
|
||||
if (!Op1 && !Op2 && !Op3 && Op4)
|
||||
return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
|
||||
|
||||
// def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
|
||||
if (Op1 && !Op2 && !Op3 && !Op4)
|
||||
return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
|
||||
|
||||
// def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
|
||||
// (MFENCE)>;
|
||||
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
|
||||
}
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
|
||||
EVT T = Op.getValueType();
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
@ -7598,6 +7629,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
switch (Op.getOpcode()) {
|
||||
default: llvm_unreachable("Should not custom lower this!");
|
||||
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG);
|
||||
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
|
||||
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
|
||||
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
|
||||
|
@ -265,7 +265,13 @@ namespace llvm {
|
||||
ATOMXOR64_DAG,
|
||||
ATOMAND64_DAG,
|
||||
ATOMNAND64_DAG,
|
||||
ATOMSWAP64_DAG
|
||||
ATOMSWAP64_DAG,
|
||||
|
||||
// Memory barrier
|
||||
MEMBARRIER,
|
||||
MFENCE,
|
||||
SFENCE,
|
||||
LFENCE
|
||||
|
||||
// WARNING: Do not add anything in the end unless you want the node to
|
||||
// have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
|
||||
@ -715,6 +721,7 @@ namespace llvm {
|
||||
SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
virtual SDValue
|
||||
LowerFormalArguments(SDValue Chain,
|
||||
|
@ -80,6 +80,21 @@ def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
|
||||
|
||||
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
|
||||
|
||||
def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
|
||||
def SDT_X86MEMBARRIERNoSSE : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
|
||||
|
||||
def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
|
||||
[SDNPHasChain]>;
|
||||
def X86MemBarrierNoSSE : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIERNoSSE,
|
||||
[SDNPHasChain]>;
|
||||
def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
|
||||
[SDNPHasChain]>;
|
||||
def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER,
|
||||
[SDNPHasChain]>;
|
||||
def X86LFence : SDNode<"X86ISD::LFENCE", SDT_X86MEMBARRIER,
|
||||
[SDNPHasChain]>;
|
||||
|
||||
|
||||
def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>;
|
||||
def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>;
|
||||
def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
|
||||
@ -3906,6 +3921,20 @@ def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
|
||||
// Atomic support
|
||||
//
|
||||
|
||||
// Memory barriers
|
||||
let hasSideEffects = 1 in {
|
||||
def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
|
||||
"#MEMBARRIER",
|
||||
[(X86MemBarrier)]>, Requires<[HasSSE2]>;
|
||||
|
||||
// TODO: Get this to fold the constant into the instruction.
|
||||
let Uses = [ESP] in
|
||||
def Int_MemBarrierNoSSE : I<0x0B, Pseudo, (outs), (ins GR32:$zero),
|
||||
"lock\n\t"
|
||||
"or{l}\t{$zero, (%esp)|(%esp), $zero}",
|
||||
[(X86MemBarrierNoSSE GR32:$zero)]>, LOCK;
|
||||
}
|
||||
|
||||
// Atomic swap. These are just normal xchg instructions. But since a memory
|
||||
// operand is referenced, the atomicity is ensured.
|
||||
let Constraints = "$val = $dst" in {
|
||||
|
@ -2001,6 +2001,7 @@ def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
|
||||
// Load, store, and memory fence
|
||||
def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
|
||||
TB, Requires<[HasSSE1]>;
|
||||
def : Pat<(X86SFence), (SFENCE)>;
|
||||
|
||||
// Alias instructions that map zero vector to pxor / xorp* for sse.
|
||||
// We set canFoldAsLoad because this can be converted to a constant-pool
|
||||
@ -3024,19 +3025,14 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
|
||||
"lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
|
||||
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
|
||||
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
|
||||
def : Pat<(X86LFence), (LFENCE)>;
|
||||
def : Pat<(X86MFence), (MFENCE)>;
|
||||
|
||||
|
||||
// Pause. This "instruction" is encoded as "rep; nop", so even though it
|
||||
// was introduced with SSE2, it's backward compatible.
|
||||
def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
|
||||
|
||||
//TODO: custom lower this so as to never even generate the noop
|
||||
def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
|
||||
(i8 0)), (NOOP)>;
|
||||
def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
|
||||
def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
|
||||
def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
|
||||
(i8 1)), (MFENCE)>;
|
||||
|
||||
// Alias instructions that map zero vector to pxor / xorp* for sse.
|
||||
// We set canFoldAsLoad because this can be converted to a constant-pool
|
||||
// load of an all-ones value if folding it would be beneficial.
|
||||
|
21
test/CodeGen/X86/barrier-sse.ll
Normal file
21
test/CodeGen/X86/barrier-sse.ll
Normal file
@ -0,0 +1,21 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep mfence
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 | grep MEMBARRIER
|
||||
|
||||
|
||||
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
|
||||
|
||||
define void @test() {
|
||||
call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false)
|
||||
call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 false, i1 false)
|
||||
call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true, i1 false)
|
||||
|
||||
call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 false, i1 false)
|
||||
call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 true, i1 false)
|
||||
call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 true, i1 false)
|
||||
|
||||
call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 false)
|
||||
call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 false)
|
||||
ret void
|
||||
}
|
7
test/CodeGen/X86/barrier.ll
Normal file
7
test/CodeGen/X86/barrier.ll
Normal file
@ -0,0 +1,7 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=-sse2 | grep lock
|
||||
declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
|
||||
|
||||
define void @test() {
|
||||
call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false)
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user