Some minor optimizations: xchg (unused), movdqa in sse2 code.

This commit is contained in:
gbeauche 2006-07-09 12:19:50 +00:00
parent abc911eaa7
commit 7d5898f97a
2 changed files with 11 additions and 5 deletions

View File

@ -39,6 +39,9 @@ DYNGEN_DEFINE_GLOBAL_REGISTER(2);
* Native ALU operations optimization * Native ALU operations optimization
**/ **/
#if defined(__i386__) || defined(__x86_64__)
#define do_xchg_32(x, y) asm volatile ("xchg %0,%1" : "+r" (x), "+r" (y))
#endif
#ifndef do_udiv_32 #ifndef do_udiv_32
#define do_udiv_32(x, y) ((uint32)x / (uint32)y) #define do_udiv_32(x, y) ((uint32)x / (uint32)y)
#endif #endif
@ -51,6 +54,9 @@ DYNGEN_DEFINE_GLOBAL_REGISTER(2);
#ifndef do_ror_32 #ifndef do_ror_32
#define do_ror_32(x, y) ((x >> y) | (x << (32 - y))) #define do_ror_32(x, y) ((x >> y) | (x << (32 - y)))
#endif #endif
#ifndef do_xchg_32
#define do_xchg_32(x, y) do { uint32 t = x; x = y; y = t; } while (0)
#endif
/** /**
@ -130,7 +136,7 @@ DEFINE_OP(umul_32_T0_T1, T0 = (uint32)T0 * (uint32)T1);
DEFINE_OP(smul_32_T0_T1, T0 = (int32)T0 * (int32)T1); DEFINE_OP(smul_32_T0_T1, T0 = (int32)T0 * (int32)T1);
DEFINE_OP(udiv_32_T0_T1, T0 = do_udiv_32(T0, T1)); DEFINE_OP(udiv_32_T0_T1, T0 = do_udiv_32(T0, T1));
DEFINE_OP(sdiv_32_T0_T1, T0 = do_sdiv_32(T0, T1)); DEFINE_OP(sdiv_32_T0_T1, T0 = do_sdiv_32(T0, T1));
DEFINE_OP(xchg_32_T0_T1, { uint32 tmp = T0; T0 = T1; T1 = tmp; }); DEFINE_OP(xchg_32_T0_T1, do_xchg_32(T0, T1));
DEFINE_OP(bswap_16_T0, T0 = bswap_16(T0)); DEFINE_OP(bswap_16_T0, T0 = bswap_16(T0));
DEFINE_OP(bswap_32_T0, T0 = bswap_32(T0)); DEFINE_OP(bswap_32_T0, T0 = bswap_32(T0));

View File

@ -90,8 +90,8 @@ struct powerpc_dyngen_helper {
static inline powerpc_cr_register & cr() { return CPU->cr(); } static inline powerpc_cr_register & cr() { return CPU->cr(); }
static inline powerpc_xer_register & xer() { return CPU->xer(); } static inline powerpc_xer_register & xer() { return CPU->xer(); }
static inline powerpc_spcflags & spcflags() { return CPU->spcflags(); } static inline powerpc_spcflags & spcflags() { return CPU->spcflags(); }
static double & fp_result() { return CPU->fp_result(); } static inline double & fp_result() { return CPU->fp_result(); }
static uint64 & fp_result_dw() { return CPU->fp_result_dw(); } static inline uint64 & fp_result_dw() { return CPU->fp_result_dw(); }
static inline void set_cr(int crfd, int v) { CPU->cr().set(crfd, v); } static inline void set_cr(int crfd, int v) { CPU->cr().set(crfd, v); }
#ifndef REG_T3 #ifndef REG_T3
@ -1715,7 +1715,7 @@ void op_sse2_##NAME(void) \
{ \ { \
asm volatile ("movdqa (%1),%%xmm0\n" \ asm volatile ("movdqa (%1),%%xmm0\n" \
#OP " (%2),%%xmm0\n" \ #OP " (%2),%%xmm0\n" \
"movaps %%xmm0,(%0)\n" \ "movdqa %%xmm0,(%0)\n" \
: : "r" (reg_VD), "r" (reg_##VA), "r" (reg_##VB) \ : : "r" (reg_VD), "r" (reg_##VA), "r" (reg_##VB) \
: __sse_clobbers("xmm0")); \ : __sse_clobbers("xmm0")); \
} }
@ -1752,7 +1752,7 @@ void op_sse2_vsldoi_##SH(void) \
"psrldq %5,%%xmm1\n" \ "psrldq %5,%%xmm1\n" \
"por %%xmm1,%%xmm0\n" \ "por %%xmm1,%%xmm0\n" \
"pshufd %3,%%xmm0,%%xmm0\n" \ "pshufd %3,%%xmm0,%%xmm0\n" \
"movaps %%xmm0,(%0)\n" \ "movdqa %%xmm0,(%0)\n" \
: : \ : : \
"r" (reg_VD), "r" (reg_V0), "r" (reg_V1), \ "r" (reg_VD), "r" (reg_V0), "r" (reg_V1), \
"i" (0x1b), "i" (SH), "i" (16 - SH) \ "i" (0x1b), "i" (SH), "i" (16 - SH) \