From f36a4afaaee1885a14c1b9d58f61b2bd26a75e88 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 19 Mar 2013 21:16:56 +0000 Subject: [PATCH] Annotate X86InstrCompiler.td with SchedRW lists. Add a new WriteZero SchedWrite type for the common dependency-breaking instructions that clear a register. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177442 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrCompiler.td | 17 +++++++++-------- lib/Target/X86/X86Schedule.td | 4 ++++ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 0be017cb09c..2b27bc5bc5b 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -153,7 +153,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR32:$addr)], IIC_RET>; + [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } @@ -161,7 +161,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR64:$addr)], IIC_RET>; + [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } @@ -220,7 +220,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in { def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", - [(set GR8:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR8:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller // encoding and avoids a partial-register update sometimes, but doing so @@ -229,11 +229,12 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", // to an MCInst. def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize; + [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize, + Sched<[WriteZero]>; // FIXME: Set encoding to pseudo. def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; } // We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a @@ -245,7 +246,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", let Defs = [EFLAGS], isCodeGenOnly=1, AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR64:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however @@ -254,10 +255,10 @@ let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), "", [(set GR64:$dst, i64immZExt32:$src)], - IIC_ALU_NONMEM>; + IIC_ALU_NONMEM>, Sched<[WriteALU]>; // Use sbb to materialize carry bit. -let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1 in { +let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in { // FIXME: These are pseudo ops that should be replaced with Pat<> patterns. // However, Pat<> can't replicate the destination reg into the inputs of the // result. diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index da0ca7d28ee..dec3f43bd77 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -53,6 +53,10 @@ def WriteLoad : SchedWrite; def WriteStore : SchedWrite; def WriteMove : SchedWrite; +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +def WriteZero : SchedWrite; + // Branches don't produce values, so they have no latency, but they still // consume resources. Indirect branches can fold loads. defm WriteJump : X86SchedWritePair;