diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b7408976363..04758146e55 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12840,7 +12840,7 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, MachineFunction::iterator I = MBB; ++I; - assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 && + assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 && "Unexpected number of operands"); assert(MI->hasOneMemOperand() && @@ -13072,7 +13072,7 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, MachineFunction::iterator I = MBB; ++I; - assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 && + assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 && "Unexpected number of operands"); assert(MI->hasOneMemOperand() && diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 2a26a22bc15..f3879628c5c 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -513,15 +513,19 @@ def CMOV_RFP80 : I<0, Pseudo, multiclass PSEUDO_ATOMIC_LOAD_BINOP { let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in { + let Defs = [EFLAGS, AL] in def NAME#8 : I<0, Pseudo, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), !strconcat(mnemonic, "8 PSEUDO!"), []>; + let Defs = [EFLAGS, AX] in def NAME#16 : I<0, Pseudo,(outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), !strconcat(mnemonic, "16 PSEUDO!"), []>; + let Defs = [EFLAGS, EAX] in def NAME#32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), !strconcat(mnemonic, "32 PSEUDO!"), []>; + let Defs = [EFLAGS, RAX] in def NAME#64 : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val), !strconcat(mnemonic, "64 PSEUDO!"), []>; @@ -559,7 +563,8 @@ defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">; defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">; multiclass PSEUDO_ATOMIC_LOAD_BINOP6432 { - let usesCustomInserter = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in + let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX], + mayLoad = 1, mayStore = 1, hasSideEffects = 0 in def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), (ins i64mem:$ptr, GR32:$val1, GR32:$val2), !strconcat(mnemonic, "6432 PSEUDO!"), []>; diff --git a/test/CodeGen/X86/atomic-dagsched.ll b/test/CodeGen/X86/atomic-dagsched.ll new file mode 100644 index 00000000000..00891d6434c --- /dev/null +++ b/test/CodeGen/X86/atomic-dagsched.ll @@ -0,0 +1,110 @@ +; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s + +define void @test(i8** %a, i64* %b, i64 %c, i64 %d) nounwind { +entry: + %ptrtoarg4 = load i8** %a, align 8 + %brglist1 = getelementptr i8** %a, i64 1 + %ptrtoarg25 = load i8** %brglist1, align 8 + %0 = load i64* %b, align 8 + %1 = mul i64 %0, 4 + %scevgep = getelementptr i8* %ptrtoarg25, i64 %1 + %2 = mul i64 %d, 4 + br label %loop.cond + +loop.cond: ; preds = %test.exit, %entry + %asr.iv6 = phi i8* [ %29, %test.exit ], [ %scevgep, %entry ] + %iv = phi i64 [ %0, %entry ], [ %28, %test.exit ] + %3 = icmp eq i64 %iv, %c + br i1 %3, label %return, label %loop + +loop: ; preds = %loop.cond + %4 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8, !tbaa !0 + %5 = load i64* %4, align 8, !tbaa !3 + %vector.size.i = ashr i64 %5, 3 + %num.vector.wi.i = shl i64 %vector.size.i, 3 + %6 = icmp eq i64 %vector.size.i, 0 + br i1 %6, label %scalarIf.i, label %dim_0_vector_pre_head.i + +dim_0_vector_pre_head.i: ; preds = %loop + %7 = trunc i64 %5 to i32 + %tempvector_func.i = insertelement <8 x i32> undef, i32 %7, i32 0 + %vectorvector_func.i = shufflevector <8 x i32> %tempvector_func.i, <8 x i32> undef, <8 x i32> zeroinitializer + br label %vector_kernel_entry.i + +vector_kernel_entry.i: ; preds = %vector_kernel_entry.i, %dim_0_vector_pre_head.i + %asr.iv9 = phi i8* [ %scevgep10, %vector_kernel_entry.i ], [ %asr.iv6, %dim_0_vector_pre_head.i ] + %asr.iv = phi i64 [ %asr.iv.next, %vector_kernel_entry.i ], [ %vector.size.i, %dim_0_vector_pre_head.i ] + %8 = bitcast i8* %ptrtoarg4 to i32 addrspace(1)* + %asr.iv911 = bitcast i8* %asr.iv9 to <8 x i32> addrspace(1)* + %9 = load <8 x i32> addrspace(1)* %asr.iv911, align 4 + %extract8vector_func.i = extractelement <8 x i32> %9, i32 0 + %extract9vector_func.i = extractelement <8 x i32> %9, i32 1 + %extract10vector_func.i = extractelement <8 x i32> %9, i32 2 + %extract11vector_func.i = extractelement <8 x i32> %9, i32 3 + %extract12vector_func.i = extractelement <8 x i32> %9, i32 4 + %extract13vector_func.i = extractelement <8 x i32> %9, i32 5 + %extract14vector_func.i = extractelement <8 x i32> %9, i32 6 + %extract15vector_func.i = extractelement <8 x i32> %9, i32 7 + %10 = atomicrmw min i32 addrspace(1)* %8, i32 %extract8vector_func.i seq_cst + %11 = atomicrmw min i32 addrspace(1)* %8, i32 %extract9vector_func.i seq_cst + %12 = atomicrmw min i32 addrspace(1)* %8, i32 %extract10vector_func.i seq_cst + %13 = atomicrmw min i32 addrspace(1)* %8, i32 %extract11vector_func.i seq_cst + %14 = atomicrmw min i32 addrspace(1)* %8, i32 %extract12vector_func.i seq_cst + %15 = atomicrmw min i32 addrspace(1)* %8, i32 %extract13vector_func.i seq_cst + %16 = atomicrmw min i32 addrspace(1)* %8, i32 %extract14vector_func.i seq_cst + %17 = atomicrmw min i32 addrspace(1)* %8, i32 %extract15vector_func.i seq_cst + store <8 x i32> %vectorvector_func.i, <8 x i32> addrspace(1)* %asr.iv911, align 4 + %asr.iv.next = add i64 %asr.iv, -1 + %scevgep10 = getelementptr i8* %asr.iv9, i64 32 + %dim_0_vector_cmp.to.max.i = icmp eq i64 %asr.iv.next, 0 + br i1 %dim_0_vector_cmp.to.max.i, label %scalarIf.i, label %vector_kernel_entry.i + +scalarIf.i: ; preds = %vector_kernel_entry.i, %loop + %exec_wi.i = phi i64 [ 0, %loop ], [ %num.vector.wi.i, %vector_kernel_entry.i ] + %18 = icmp eq i64 %exec_wi.i, %5 + br i1 %18, label %test.exit, label %dim_0_pre_head.i + +dim_0_pre_head.i: ; preds = %scalarIf.i + %19 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8, !tbaa !0 + %20 = load i64* %19, align 8, !tbaa !3 + %21 = trunc i64 %20 to i32 + %22 = mul i64 %vector.size.i, 8 + br label %scalar_kernel_entry.i + +scalar_kernel_entry.i: ; preds = %scalar_kernel_entry.i, %dim_0_pre_head.i + %asr.iv12 = phi i64 [ %asr.iv.next13, %scalar_kernel_entry.i ], [ %22, %dim_0_pre_head.i ] + %23 = bitcast i8* %asr.iv6 to i32 addrspace(1)* + %24 = bitcast i8* %ptrtoarg4 to i32 addrspace(1)* + %scevgep16 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12 + %25 = load i32 addrspace(1)* %scevgep16, align 4, !tbaa !4 + %26 = atomicrmw min i32 addrspace(1)* %24, i32 %25 seq_cst + %scevgep15 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12 + store i32 %21, i32 addrspace(1)* %scevgep15, align 4, !tbaa !4 + %asr.iv.next13 = add i64 %asr.iv12, 1 + %dim_0_cmp.to.max.i = icmp eq i64 %5, %asr.iv.next13 + br i1 %dim_0_cmp.to.max.i, label %test.exit, label %scalar_kernel_entry.i + +test.exit: ; preds = %scalar_kernel_entry.i, %scalarIf.i + %27 = bitcast i8* %asr.iv6 to i1* + %28 = add i64 %iv, %d + store i64 %28, i64* %b, align 8 + %scevgep8 = getelementptr i1* %27, i64 %2 + %29 = bitcast i1* %scevgep8 to i8* + br label %loop.cond + +return: ; preds = %loop.cond + store i64 %0, i64* %b, align 8 + ret void +} + +!0 = metadata !{metadata !"any pointer", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"long", metadata !1} +!4 = metadata !{metadata !"int", metadata !1} + +; CHECK: test +; CHECK: decq +; CHECK-NOT: cmpxchgl +; CHECK: jne +; CHECK: ret