From 9fd58f05d5f87ca4c4113eb3f78e1932a4e2f8d5 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Wed, 29 Feb 2012 19:44:41 +0000 Subject: [PATCH] Intel Atom instruction itineraries for mov sign extension and mov zero extension. Patch by Tyler Nowicki! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151743 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrExtension.td | 78 +++++++++++++++++------------ lib/Target/X86/X86Schedule.td | 11 ++++ lib/Target/X86/X86ScheduleAtom.td | 11 ++++ 3 files changed, 68 insertions(+), 32 deletions(-) diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index 8835d0aaa18..0d5490ad9cc 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -37,40 +37,47 @@ let neverHasSideEffects = 1 in { } + // Sign/Zero extenders def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), - "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>, + TB, OpSize; def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), - "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>, + TB, OpSize; def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR8:$src))]>, TB; + [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB; + [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB; def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR16:$src))]>, TB; + [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB; def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB; + [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>, + TB; def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), - "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>, + TB, OpSize; def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), - "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>, + TB, OpSize; def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR8:$src))]>, TB; + [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB; + [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB; def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR16:$src))]>, TB; + [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB; def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB; + [(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>, + TB; // These are the same as the regular MOVZX32rr8 and MOVZX32rm8 // except that they use GR32_NOREX for the output operand register class @@ -78,12 +85,12 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - []>, TB; + [], IIC_MOVZX>, TB; let mayLoad = 1 in def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - []>, TB; + [], IIC_MOVZX>, TB; // MOVSX64rr8 always has a REX prefix and it has an 8-bit register // operand, which makes it a rare instruction with an 8-bit register @@ -91,32 +98,38 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, // were generalized, this would require a special register class. def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR8:$src))]>, TB; + [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB; + [(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>, + TB; def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR16:$src))]>, TB; + [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB; def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB; + [(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>, + TB; def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR32:$src))]>; + [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>; def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i32 addr:$src))]>; + [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>; // movzbq and movzwq encodings for the disassembler def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), - "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB; + "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, + TB; def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src), - "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB; + "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, + TB; def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB; + "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, + TB; def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB; + "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, + TB; // FIXME: These should be Pat patterns. let isCodeGenOnly = 1 in { @@ -124,15 +137,17 @@ let isCodeGenOnly = 1 in { // Use movzbl instead of movzbq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), - "", [(set GR64:$dst, (zext GR8:$src))]>, TB; + "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), - "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB; + "", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>, + TB; // Use movzwl instead of movzwq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "", [(set GR64:$dst, (zext GR16:$src))]>, TB; + "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB; def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; + "", [(set GR64:$dst, (zextloadi64i16 addr:$src))], + IIC_MOVZX>, TB; // There's no movzlq instruction, but movl can be used for this purpose, using // implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero @@ -142,10 +157,9 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), // necessarily all zero. In such cases, we fall back to these explicit zext // instructions. def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), - "", [(set GR64:$dst, (zext GR32:$src))]>; + "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>; def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), - "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; - - + "", [(set GR64:$dst, (zextloadi64i32 addr:$src))], + IIC_MOVZX>; } diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 9692833486e..d6d01495666 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -103,6 +103,17 @@ def IIC_CALL_FAR_PTR : InstrItinClass; // ret def IIC_RET : InstrItinClass; def IIC_RET_IMM : InstrItinClass; +//sign extension movs +def IIC_MOVSX : InstrItinClass; +def IIC_MOVSX_R16_R8 : InstrItinClass; +def IIC_MOVSX_R16_M8 : InstrItinClass; +def IIC_MOVSX_R16_R16 : InstrItinClass; +def IIC_MOVSX_R32_R32 : InstrItinClass; +//zero extension movs +def IIC_MOVZX : InstrItinClass; +def IIC_MOVZX_R16_R8 : InstrItinClass; +def IIC_MOVZX_R16_M8 : InstrItinClass; + // SSE scalar/parallel binary operations def IIC_SSE_ALU_F32S_RR : InstrItinClass; def IIC_SSE_ALU_F32S_RM : InstrItinClass; diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 67cd0f27668..e8cf72a2934 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -133,6 +133,17 @@ def AtomItineraries : ProcessorItineraries< //ret InstrItinData] >, InstrItinData, InstrStage<1, [Port1]>] >, + //sign extension movs + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + //zero extension movs + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // SSE binary operations // arithmetic fp scalar InstrItinData] >,