From f1e309eb4862459a76445942ba4dafc433b6f317 Mon Sep 17 00:00:00 2001 From: Dale Johannesen Date: Fri, 2 Jul 2010 20:16:09 +0000 Subject: [PATCH] Propagate the AlignStack bit in InlineAsm's to the PrologEpilog code, and use it to determine whether the asm forces stack alignment or not. gcc consistently does not do this for GCC-style asms; Apple gcc inconsistently sometimes does it for asm blocks. There is no convenient place to put a bit in either the SDNode or the MachineInstr form, so I've added an extra operand to each; unlovely, but it does allow for expansion for more bits, should we need it. PR 5125. Some existing testcases are affected. The operand lists of the SDNode and MachineInstr forms are indexed with awesome mnemonics, like "2"; I may fix this someday, but not now. I'm not making it any worse. If anyone is inspired I think you can find all the right places from this patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107506 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/InlineAsm.h | 3 +- .../AsmPrinter/AsmPrinterInlineAsm.cpp | 2 +- lib/CodeGen/MachineInstr.cpp | 14 ++++----- lib/CodeGen/PrologEpilogInserter.cpp | 6 ++-- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 6 ++++ .../SelectionDAG/SelectionDAGBuilder.cpp | 6 +++- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 1 + test/CodeGen/Thumb/push.ll | 2 +- test/CodeGen/X86/2009-04-20-LinearScanOpt.ll | 2 +- test/CodeGen/X86/2010-07-02-asm-alignstack.ll | 31 +++++++++++++++++++ test/CodeGen/X86/inline-asm-tied.ll | 2 +- test/CodeGen/X86/iv-users-in-other-loops.ll | 2 +- 12 files changed, 60 insertions(+), 17 deletions(-) create mode 100644 test/CodeGen/X86/2010-07-02-asm-alignstack.ll diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h index f4d125b4dc3..105b1bcd94c 100644 --- a/include/llvm/InlineAsm.h +++ b/include/llvm/InlineAsm.h @@ -154,7 +154,8 @@ public: Op_InputChain = 0, Op_AsmString = 1, Op_MDNode = 2, - Op_FirstOperand = 3, + Op_IsAlignStack = 3, + Op_FirstOperand = 4, Kind_RegUse = 1, Kind_RegDef = 2, diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index e181305ea90..f6f3bae42a8 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -279,7 +279,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // Okay, we finally have a value number. Ask the target to print this // operand! if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { - unsigned OpNo = 1; + unsigned OpNo = 2; bool Error = false; diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index eeea9f2e305..39ab43bb5ea 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -881,14 +881,14 @@ int MachineInstr::findFirstPredOperandIdx() const { bool MachineInstr:: isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { if (isInlineAsm()) { - assert(DefOpIdx >= 2); + assert(DefOpIdx >= 3); const MachineOperand &MO = getOperand(DefOpIdx); if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) return false; // Determine the actual operand index that corresponds to this index. unsigned DefNo = 0; unsigned DefPart = 0; - for (unsigned i = 1, e = getNumOperands(); i < e; ) { + for (unsigned i = 2, e = getNumOperands(); i < e; ) { const MachineOperand &FMO = getOperand(i); // After the normal asm operands there may be additional imp-def regs. if (!FMO.isImm()) @@ -903,7 +903,7 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { } ++DefNo; } - for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { + for (unsigned i = 2, e = getNumOperands(); i != e; ++i) { const MachineOperand &FMO = getOperand(i); if (!FMO.isImm()) continue; @@ -946,7 +946,7 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { // Find the flag operand corresponding to UseOpIdx unsigned FlagIdx, NumOps=0; - for (FlagIdx = 1; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) { + for (FlagIdx = 2; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) { const MachineOperand &UFMO = getOperand(FlagIdx); // After the normal asm operands there may be additional imp-def regs. if (!UFMO.isImm()) @@ -964,9 +964,9 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { if (!DefOpIdx) return true; - unsigned DefIdx = 1; - // Remember to adjust the index. First operand is asm string, then there - // is a flag for each. + unsigned DefIdx = 2; + // Remember to adjust the index. First operand is asm string, second is + // the AlignStack bit, then there is a flag for each. while (DefNo) { const MachineOperand &FMO = getOperand(DefIdx); assert(FMO.isImm()); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index e92b44fcff1..62b7c21cc8a 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -158,9 +158,9 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { AdjustsStack = true; FrameSDOps.push_back(I); } else if (I->isInlineAsm()) { - // An InlineAsm might be a call; assume it is to get the stack frame - // aligned correctly for calls. - AdjustsStack = true; + // Some inline asm's need a stack frame, as indicated by operand 1. + if (I->getOperand(1).getImm()) + AdjustsStack = true; } MFI->setAdjustsStack(AdjustsStack); diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 0cb463dcd39..a6f63c746b7 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -834,6 +834,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, const char *AsmStr = cast(AsmStrV)->getSymbol(); MI->addOperand(MachineOperand::CreateES(AsmStr)); + // Add the isAlignStack bit. + int64_t isAlignStack = + cast(Node->getOperand(InlineAsm::Op_IsAlignStack))-> + getZExtValue(); + MI->addOperand(MachineOperand::CreateImm(isAlignStack)); + // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c818f0d8a2d..af0a28da8fe 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5454,6 +5454,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); + // Remember the AlignStack bit as operand 3. + AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0, + MVT::i1)); + // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. RegsForValue RetValRegs; @@ -5642,7 +5646,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Finish up input operands. Set the input chain and add the flag last. - AsmNodeOperands[0] = Chain; + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 00e1f4f6b67..39076353a44 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1089,6 +1089,7 @@ SelectInlineAsmMemoryOperands(std::vector &Ops) { Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc + Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]); // 3 unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); if (InOps[e-1].getValueType() == MVT::Flag) diff --git a/test/CodeGen/Thumb/push.ll b/test/CodeGen/Thumb/push.ll index fd05ef4dd76..94ef8e90043 100644 --- a/test/CodeGen/Thumb/push.ll +++ b/test/CodeGen/Thumb/push.ll @@ -5,6 +5,6 @@ define void @t() nounwind { ; CHECK: t: ; CHECK: push {r7} entry: - call void asm sideeffect ".long 0xe7ffdefe", ""() nounwind + call void asm sideeffect alignstack ".long 0xe7ffdefe", ""() nounwind ret void } diff --git a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll index d7b9463b5e1..fcb2ed07dc1 100644 --- a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll +++ b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 83 +; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 82 ; rdar://6802189 ; Test if linearscan is unfavoring registers for allocation to allow more reuse diff --git a/test/CodeGen/X86/2010-07-02-asm-alignstack.ll b/test/CodeGen/X86/2010-07-02-asm-alignstack.ll new file mode 100644 index 00000000000..cb47d208dd4 --- /dev/null +++ b/test/CodeGen/X86/2010-07-02-asm-alignstack.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s + +define void @foo() nounwind ssp { +entry: +; CHECK: foo +; CHECK: subq $8, %rsp +; CHECK: int $3 + call void asm sideeffect alignstack "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind + call void asm sideeffect alignstack ".file \22small.c\22", "~{dirflag},~{fpsr},~{flags}"() nounwind + call void asm sideeffect alignstack ".line 3", "~{dirflag},~{fpsr},~{flags}"() nounwind + call void asm sideeffect alignstack "int $$3", "~{dirflag},~{fpsr},~{flags},~{memory}"() nounwind + br label %return + +return: ; preds = %entry + ret void +} + +define void @bar() nounwind ssp { +entry: +; CHECK: bar +; CHECK-NOT: subq $8, %rsp +; CHECK: int $3 + call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind + call void asm sideeffect ".file \22small.c\22", "~{dirflag},~{fpsr},~{flags}"() nounwind + call void asm sideeffect ".line 3", "~{dirflag},~{fpsr},~{flags}"() nounwind + call void asm sideeffect "int $$3", "~{dirflag},~{fpsr},~{flags},~{memory}"() nounwind + br label %return + +return: ; preds = %entry + ret void +} diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll index cfa03bb17ec..79b688551eb 100644 --- a/test/CodeGen/X86/inline-asm-tied.ll +++ b/test/CodeGen/X86/inline-asm-tied.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -regalloc=linearscan | grep {movl %edx, 12(%esp)} | count 2 +; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -regalloc=linearscan | grep {movl %edx, 4(%esp)} | count 2 ; rdar://6992609 target triple = "i386-apple-darwin9.0" diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll index 408fb20b8d8..8385a29fa22 100644 --- a/test/CodeGen/X86/iv-users-in-other-loops.ll +++ b/test/CodeGen/X86/iv-users-in-other-loops.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86-64 -o %t ; RUN: not grep inc %t ; RUN: grep dec %t | count 2 -; RUN: grep addq %t | count 13 +; RUN: grep addq %t | count 12 ; RUN: not grep addb %t ; RUN: not grep leaq %t ; RUN: not grep leal %t