From f1b4d26e674a067b9f5c8622ad79c95d1e094046 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 12 Jan 2010 04:42:54 +0000 Subject: [PATCH] Reapply the MOV64r0 patch, with a fix: MOV64r0 clobbers EFLAGS. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@93229 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/AsmPrinter/X86MCInstLower.cpp | 8 +++++++ lib/Target/X86/X86ISelDAGToDAG.cpp | 25 ++++---------------- lib/Target/X86/X86Instr64bit.td | 20 +++++++++------- lib/Target/X86/X86InstrInfo.cpp | 14 +++++++++-- lib/Target/X86/X86InstrInfo.td | 15 +++++++----- test/CodeGen/X86/remat-mov-0.ll | 1 - 6 files changed, 45 insertions(+), 38 deletions(-) diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 1015b692473..a4939af1b8e 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -399,6 +399,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(X86::MOVZX32rm16); lower_subreg32(&OutMI, 0); break; + case X86::MOV16r0: + OutMI.setOpcode(X86::MOV32r0); + lower_subreg32(&OutMI, 0); + break; + case X86::MOV64r0: + OutMI.setOpcode(X86::MOV32r0); + lower_subreg32(&OutMI, 0); + break; } } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index c795b62050e..e2a53d1118b 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1873,7 +1873,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { unsigned LoReg, HiReg, ClrReg; unsigned ClrOpcode, SExtOpcode; - EVT ClrVT = NVT; switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: @@ -1883,7 +1882,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { break; case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; - ClrOpcode = X86::MOV32r0; ClrReg = X86::EDX; ClrVT = MVT::i32; + ClrOpcode = X86::MOV16r0; ClrReg = X86::DX; SExtOpcode = X86::CWD; break; case MVT::i32: @@ -1893,7 +1892,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { break; case MVT::i64: LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; - ClrOpcode = ~0U; // NOT USED. + ClrOpcode = X86::MOV64r0; SExtOpcode = X86::CQO; break; } @@ -1932,24 +1931,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0); } else { // Zero out the high part, effectively zero extending the input. - SDValue ClrNode; - - if (NVT.getSimpleVT() == MVT::i64) { - ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32), - 0); - // We just did a 32-bit clear, insert it into a 64-bit register to - // clear the whole 64-bit reg. - SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64); - SDValue SubRegNo = - CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32); - ClrNode = - SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl, - MVT::i64, Zero, ClrNode, SubRegNo), - 0); - } else { - ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, ClrVT), 0); - } - + SDValue ClrNode = + SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, ClrNode, InFlag).getValue(1); } diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 7077cf9bb0a..d67a48291e2 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1598,17 +1598,21 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), // Alias Instructions //===----------------------------------------------------------------------===// -// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's -// equivalent due to implicit zero-extending, and it sometimes has a smaller -// encoding. +// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a +// smaller encoding, but doing so at isel time interferes with rematerialization +// in the current register allocator. For now, this is rewritten when the +// instruction is lowered to an MCInst. // FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove // when we have a better way to specify isel priority. -let AddedComplexity = 1 in -def : Pat<(i64 0), - (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>; +let Defs = [EFLAGS], + AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), + "", + [(set GR64:$dst, 0)]>; - -// Materialize i64 constant where top 32-bits are zero. +// Materialize i64 constant where top 32-bits are zero. This could theoretically +// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however +// that would make it more difficult to rematerialize. let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), "", [(set GR64:$dst, i64immZExt32:$src)]>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 52077cfd79d..5ef3354f350 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1072,12 +1072,16 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, switch (Opc) { default: break; case X86::MOV8r0: - case X86::MOV32r0: { + case X86::MOV16r0: + case X86::MOV32r0: + case X86::MOV64r0: { if (!isSafeToClobberEFLAGS(MBB, I)) { switch (Opc) { default: break; case X86::MOV8r0: Opc = X86::MOV8ri; break; + case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; + case X86::MOV64r0: Opc = X86::MOV64ri; break; } Clone = false; } @@ -2344,8 +2348,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, OpcodeTablePtr = &RegOp2MemOpTable2Addr; isTwoAddrFold = true; } else if (i == 0) { // If operand 0 - if (MI->getOpcode() == X86::MOV32r0) + if (MI->getOpcode() == X86::MOV64r0) + NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); + else if (MI->getOpcode() == X86::MOV32r0) NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); + else if (MI->getOpcode() == X86::MOV16r0) + NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); else if (MI->getOpcode() == X86::MOV8r0) NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); if (NewMI) @@ -2613,7 +2621,9 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, } else if (OpNum == 0) { // If operand 0 switch (Opc) { case X86::MOV8r0: + case X86::MOV16r0: case X86::MOV32r0: + case X86::MOV64r0: return true; default: break; } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 9b69018fa8b..aac9f38c8a9 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -3734,18 +3734,21 @@ let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "xor{b}\t$dst, $dst", [(set GR8:$dst, 0)]>; + +// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller +// encoding and avoids a partial-register update sometimes, but doing so +// at isel time interferes with rematerialization in the current register +// allocator. For now, this is rewritten when the instruction is lowered +// to an MCInst. +def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), + "", + [(set GR16:$dst, 0)]>, OpSize; def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "xor{l}\t$dst, $dst", [(set GR32:$dst, 0)]>; } -// Use xorl instead of xorw since we don't care about the high 16 bits, -// it's smaller, and it avoids a partial-register update. -let AddedComplexity = 1 in -def : Pat<(i16 0), - (EXTRACT_SUBREG (MOV32r0), x86_subreg_16bit)>; - //===----------------------------------------------------------------------===// // Thread Local Storage Instructions // diff --git a/test/CodeGen/X86/remat-mov-0.ll b/test/CodeGen/X86/remat-mov-0.ll index 4c96cb4e8ac..c4f768ca529 100644 --- a/test/CodeGen/X86/remat-mov-0.ll +++ b/test/CodeGen/X86/remat-mov-0.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -march=x86-64 | grep {xorl %edi, %edi} | count 4 -; XFAIL: * ; CodeGen should remat the zero instead of spilling it.