From a5296875f101923b76d383d57a383bfa16406fbe Mon Sep 17 00:00:00 2001 From: gbeauche <> Date: Sun, 9 Jul 2006 15:18:08 +0000 Subject: [PATCH] Optimize alignment routine for x86 & x86_64. --- .../src/kpx_cpu/src/cpu/jit/basic-dyngen.cpp | 93 +++++++++++++++++++ .../src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp | 8 -- 2 files changed, 93 insertions(+), 8 deletions(-) diff --git a/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.cpp b/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.cpp index 19153af2..7c8d863e 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.cpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.cpp @@ -78,3 +78,96 @@ DEFINE_INVOKE(CPU_im_im, (void (*func)(dyngen_cpu_base, uint32, uint32), uint32 DEFINE_INVOKE(CPU_A0_ret_A0, (void *(*func)(dyngen_cpu_base)), (funcptr)); #undef DEFINE_INVOKE + +uint8 * +basic_dyngen::gen_align(int align) +{ + int nbytes = align - (((uintptr)code_ptr()) % align); + if (nbytes == 0) + return code_ptr(); + +#if defined(__i386__) || defined(__x86_64__) + /* Source: GNU Binutils 2.12.90.0.15 */ + /* Various efficient no-op patterns for aligning code labels. + Note: Don't try to assemble the instructions in the comments. + 0L and 0w are not legal. */ + static const uint8 f32_1[] = + {0x90}; /* nop */ + static const uint8 f32_2[] = + {0x89,0xf6}; /* movl %esi,%esi */ + static const uint8 f32_3[] = + {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */ + static const uint8 f32_4[] = + {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ + static const uint8 f32_5[] = + {0x90, /* nop */ + 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ + static const uint8 f32_6[] = + {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */ + static const uint8 f32_7[] = + {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ + static const uint8 f32_8[] = + {0x90, /* nop */ + 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ + static const uint8 f32_9[] = + {0x89,0xf6, /* movl %esi,%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uint8 f32_10[] = + {0x8d,0x76,0x00, /* leal 0(%esi),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uint8 f32_11[] = + {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uint8 f32_12[] = + {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ + 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */ + static const uint8 f32_13[] = + {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uint8 f32_14[] = + {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uint8 f32_15[] = + {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ + 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; + static const uint8 f32_16[] = + {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ + 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; + static const uint8 *const f32_patt[] = { + f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8, + f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15 + }; + static const uint8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 }; + +#if defined(__x86_64__) + /* The recommended way to pad 64bit code is to use NOPs preceded by + maximally four 0x66 prefixes. Balance the size of nops. */ + int i; + int nnops = (nbytes + 3) / 4; + int len = nbytes / nnops; + int remains = nbytes - nnops * len; + + for (i = 0; i < remains; i++) { + emit_block(prefixes, len); + emit_8(0x90); // NOP + } + for (; i < nnops; i++) { + emit_block(prefixes, len - 1); + emit_8(0x90); // NOP + } +#else + int nloops = nbytes / 16; + while (nloops-- > 0) + emit_block(f32_16, sizeof(f32_16)); + + nbytes %= 16; + if (nbytes) + emit_block(f32_patt[nbytes - 1], nbytes); +#endif +#else +#warning "FIXME: tune for your target" + for (int i = 0; i < nbytes; i++) + emit_8(0); +#endif + return code_ptr(); +} diff --git a/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp b/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp index f3a56237..b0d5831c 100644 --- a/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp +++ b/SheepShaver/src/kpx_cpu/src/cpu/jit/basic-dyngen.hpp @@ -290,14 +290,6 @@ basic_dyngen::direct_call_possible(uintptr target) const return false; } -inline uint8 * -basic_dyngen::gen_align(int align) -{ - while ((uintptr)code_ptr() & (align - 1)) - inc_code_ptr(1); - return code_ptr(); -} - inline uint8 * basic_dyngen::gen_start() {