From 4852a06dd25abed3d5cb09d926b0868ec19357ea Mon Sep 17 00:00:00 2001 From: gbeauche <> Date: Sun, 19 Aug 2001 17:38:11 +0000 Subject: [PATCH] - 64-bit blitters will use native "quad"-sized loads/stores, if available. --- BasiliskII/src/Unix/video_blit.cpp | 82 +++++++++++++++++++++++------- BasiliskII/src/Unix/video_blit.h | 50 ++++++++++++------ 2 files changed, 97 insertions(+), 35 deletions(-) diff --git a/BasiliskII/src/Unix/video_blit.cpp b/BasiliskII/src/Unix/video_blit.cpp index 163be1d2..0b304adb 100644 --- a/BasiliskII/src/Unix/video_blit.cpp +++ b/BasiliskII/src/Unix/video_blit.cpp @@ -64,6 +64,10 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length) #define FB_BLIT_2(dst, src) \ (dst = (((src) >> 8) & 0x00ff00ff) | (((src) & 0x00ff00ff) << 8)) +#define FB_BLIT_4(dst, src) \ + (dst = (((src) >> 8) & UVAL64(0x00ff00ff00ff00ff)) | \ + (((src) & UVAL64(0x00ff00ff00ff00ff)) << 8)) + #define FB_DEPTH 15 #include "video_blit.h" @@ -81,6 +85,11 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length) #define FB_BLIT_2(dst, src) \ (dst = (((src) >> 10) & 0x001f001f) | ((src) & 0x03e003e0) | (((src) << 10) & 0x7c007c00)) +#define FB_BLIT_4(dst, src) \ + (dst = (((src) >> 10) & UVAL64(0x001f001f001f001f)) | \ + ( (src) & UVAL64(0x03e003e003e003e0)) | \ + (((src) << 10) & UVAL64(0x7c007c007c007c00))) + #define FB_DEPTH 15 #define FB_FUNC_NAME Blit_BGR555_NBO #include "video_blit.h" @@ -93,6 +102,12 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length) #define FB_BLIT_2(dst, src) \ (dst = (((src) >> 2) & 0x1f001f00) | (((src) >> 8) & 0x30003) | (((src) << 8) & 0xe000e000) | (((src) << 2) & 0x7c007c)) +#define FB_BLIT_4(dst, src) \ + (dst = (((src) >> 2) & UVAL64(0x1f001f001f001f00)) | \ + (((src) >> 8) & UVAL64(0x0003000300030003)) | \ + (((src) << 8) & UVAL64(0xe000e000e000e000)) | \ + (((src) << 2) & UVAL64(0x007c007c007c007c))) + #define FB_DEPTH 15 #define FB_FUNC_NAME Blit_BGR555_OBO #include "video_blit.h" @@ -107,6 +122,12 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length) #define FB_BLIT_2(dst, src) \ (dst = (((src) >> 2) & 0x1f001f) | (((src) >> 8) & 0xe000e0) | (((src) << 8) & 0x03000300) | (((src) << 2) & 0x7c007c00)) +#define FB_BLIT_4(dst, src) \ + (dst = (((src) >> 2) & UVAL64(0x001f001f001f001f)) | \ + (((src) >> 8) & UVAL64(0x00e000e000e000e0)) | \ + (((src) << 8) & UVAL64(0x0300030003000300)) | \ + (((src) << 2) & UVAL64(0x7c007c007c007c00))) + #define FB_DEPTH 15 #define FB_FUNC_NAME Blit_BGR555_NBO #include "video_blit.h" @@ -119,6 +140,11 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length) #define FB_BLIT_2(dst, src) \ (dst = (((src) << 6) & 0x1f001f00) | ((src) & 0xe003e003) | (((src) >> 6) & 0x7c007c)) +#define FB_BLIT_4(dst, src) \ + (dst = (((src) << 6) & UVAL64(0x1f001f001f001f00)) | \ + ( (src) & UVAL64(0xe003e003e003e003)) | \ + (((src) >> 6) & UVAL64(0x007c007c007c007c))) + #define FB_DEPTH 15 #define FB_FUNC_NAME Blit_BGR555_OBO #include "video_blit.h" @@ -139,6 +165,10 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length) #define FB_BLIT_2(dst, src) \ (dst = (((src) & 0x001f001f) | (((src) << 1) & 0xffc0ffc0))) +#define FB_BLIT_4(dst, src) \ + (dst = (((src) & UVAL64(0x001f001f001f001f)) | \ + (((src) << 1) & UVAL64(0xffc0ffc0ffc0ffc0)))) + #define FB_DEPTH 16 #define FB_FUNC_NAME Blit_RGB565_NBO #include "video_blit.h" @@ -151,6 +181,11 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length) #define FB_BLIT_2(dst, src) \ (dst = ((((src) >> 7) & 0x00ff00ff) | (((src) << 9) & 0xc000c000) | (((src) << 8) & 0x1f001f00))) +#define FB_BLIT_4(dst, src) \ + (dst = (((src) >> 7) & UVAL64(0x00ff00ff00ff00ff)) | \ + (((src) << 9) & UVAL64(0xc000c000c000c000)) | \ + (((src) << 8) & UVAL64(0x1f001f001f001f00))) + #define FB_DEPTH 16 #define FB_FUNC_NAME Blit_RGB565_OBO #include "video_blit.h" @@ -162,28 +197,13 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length) #define FB_BLIT_1(dst, src) \ (dst = (((src) >> 8) & 0x001f) | (((src) << 9) & 0xfe00) | (((src) >> 7) & 0x01c0)) -// gb-- Disabled because I don't see any improvement -#if 0 && defined(__i386__) && defined(X86_ASSEMBLY) - -#define FB_BLIT_2(dst, src) \ - __asm__ ( "movl %0,%%ebx\n\t" \ - "movl %0,%%ebp\n\t" \ - "andl $0x1f001f00,%%ebx\n\t" \ - "andl $0x007f007f,%0\n\t" \ - "andl $0xe000e000,%%ebp\n\t" \ - "shrl $8,%%ebx\n\t" \ - "shrl $7,%%ebp\n\t" \ - "shll $9,%0\n\t" \ - "orl %%ebx,%%ebp\n\t" \ - "orl %%ebp,%0\n\t" \ - : "=r" (dst) : "0" (src) : "ebx", "ebp", "cc" ) - -#else - #define FB_BLIT_2(dst, src) \ (dst = (((src) >> 8) & 0x001f001f) | (((src) << 9) & 0xfe00fe00) | (((src) >> 7) & 0x01c001c0)) -#endif +#define FB_BLIT_4(dst, src) \ + (dst = (((src) >> 8) & UVAL64(0x001f001f001f001f)) | \ + (((src) << 9) & UVAL64(0xfe00fe00fe00fe00)) | \ + (((src) >> 7) & UVAL64(0x01c001c001c001c0))) #define FB_DEPTH 16 #define FB_FUNC_NAME Blit_RGB565_NBO @@ -197,6 +217,11 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length) #define FB_BLIT_2(dst, src) \ (dst = (((src) & 0x1f001f00) | (((src) << 1) & 0xe0fee0fe) | (((src) >> 15) & 0x10001))) +#define FB_BLIT_4(dst, src) \ + (dst = (((src) & UVAL64(0x1f001f001f001f00)) | \ + (((src) << 1) & UVAL64(0xe0fee0fee0fee0fe)) | \ + (((src) >> 15) & UVAL64(0x0001000100010001)))) + #define FB_DEPTH 16 #define FB_FUNC_NAME Blit_RGB565_OBO #include "video_blit.h" @@ -216,6 +241,12 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length) #define FB_BLIT_2(dst, src) \ (dst = (((src) >> 24) & 0xff) | (((src) >> 8) & 0xff00) | (((src) & 0xff00) << 8) | (((src) & 0xff) << 24)) +#define FB_BLIT_4(dst, src) \ + (dst = (((src) >> 24) & UVAL64(0x000000ff000000ff)) | \ + (((src) >> 8) & UVAL64(0x0000ff000000ff00)) | \ + (((src) & UVAL64(0x0000ff000000ff00)) << 8) | \ + (((src) & UVAL64(0x000000ff000000ff)) << 24)) + #define FB_DEPTH 24 #include "video_blit.h" @@ -230,6 +261,11 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length) #define FB_BLIT_2(dst, src) \ (dst = (((src) >> 16) & 0xff) | ((src) & 0xff00) | (((src) & 0xff) << 16)) +#define FB_BLIT_4(dst, src) \ + (dst = (((src) >> 16) & UVAL64(0x000000ff000000ff)) | \ + ( (src) & UVAL64(0x0000ff000000ff00)) | \ + (((src) & UVAL64(0x000000ff000000ff)) << 16)) + #define FB_FUNC_NAME Blit_BGR888_NBO #define FB_DEPTH 24 #include "video_blit.h" @@ -241,6 +277,11 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length) #define FB_BLIT_2(dst, src) \ (dst = (((src) >> 16) & 0xff) | ((src) & 0xff0000) | (((src) & 0xff) << 16)) +#define FB_BLIT_4(dst, src) \ + (dst = (((src) >> 16) & UVAL64(0x000000ff000000ff)) | \ + ( (src) & UVAL64(0x00ff000000ff0000)) | \ + (((src) & UVAL64(0x000000ff000000ff)) << 16)) + #define FB_FUNC_NAME Blit_BGR888_OBO #define FB_DEPTH 24 #include "video_blit.h" @@ -258,6 +299,9 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length) #define FB_BLIT_2(dst, src) \ (dst = ((src) & 0xff00ff) | (((src) & 0xff00) << 16)) +#define FB_BLIT_4(dst, src) \ + (dst = ((src) & UVAL64(0x00ff00ff00ff00ff)) | (((src) & UVAL64(0x0000ff000000ff00)) << 16)) + #define FB_DEPTH 24 #include "video_blit.h" diff --git a/BasiliskII/src/Unix/video_blit.h b/BasiliskII/src/Unix/video_blit.h index e69f9927..94d41410 100644 --- a/BasiliskII/src/Unix/video_blit.h +++ b/BasiliskII/src/Unix/video_blit.h @@ -30,10 +30,15 @@ # error "Undefined 32-bit word blit function" #endif +#if !defined(FB_BLIT_4) +# error "Undefined 64-bit word blit function" +#endif + static void FB_FUNC_NAME(uint8 * dest, const uint8 * source, uint32 length) { -#define DEREF_LONG_PTR(ptr, ofs) (((uint32 *)(ptr))[(ofs)]) #define DEREF_WORD_PTR(ptr, ofs) (((uint16 *)(ptr))[(ofs)]) +#define DEREF_LONG_PTR(ptr, ofs) (((uint32 *)(ptr))[(ofs)]) +#define DEREF_QUAD_PTR(ptr, ofs) (((uint64 *)(ptr))[(ofs)]) #ifndef UNALIGNED_PROFITABLE #if FB_DEPTH <= 8 @@ -54,28 +59,37 @@ static void FB_FUNC_NAME(uint8 * dest, const uint8 * source, uint32 length) #endif #endif - // Blit 4-byte words - if (length >= 4) { - const int remainder = (length / 4) % 8; - source += remainder * 4; - dest += remainder * 4; + // Blit 8-byte words + if (length >= 8) { + const int remainder = (length / 8) % 8; + source += remainder * 8; + dest += remainder * 8; - int n = ((length / 4) + 7) / 8; + int n = ((length / 8) + 7) / 8; switch (remainder) { case 0: do { - dest += 32; source += 32; - FB_BLIT_2(DEREF_LONG_PTR(dest, -8), DEREF_LONG_PTR(source, -8)); - case 7: FB_BLIT_2(DEREF_LONG_PTR(dest, -7), DEREF_LONG_PTR(source, -7)); - case 6: FB_BLIT_2(DEREF_LONG_PTR(dest, -6), DEREF_LONG_PTR(source, -6)); - case 5: FB_BLIT_2(DEREF_LONG_PTR(dest, -5), DEREF_LONG_PTR(source, -5)); - case 4: FB_BLIT_2(DEREF_LONG_PTR(dest, -4), DEREF_LONG_PTR(source, -4)); - case 3: FB_BLIT_2(DEREF_LONG_PTR(dest, -3), DEREF_LONG_PTR(source, -3)); - case 2: FB_BLIT_2(DEREF_LONG_PTR(dest, -2), DEREF_LONG_PTR(source, -2)); - case 1: FB_BLIT_2(DEREF_LONG_PTR(dest, -1), DEREF_LONG_PTR(source, -1)); + dest += 64; source += 64; + FB_BLIT_4(DEREF_QUAD_PTR(dest, -8), DEREF_QUAD_PTR(source, -8)); + case 7: FB_BLIT_4(DEREF_QUAD_PTR(dest, -7), DEREF_QUAD_PTR(source, -7)); + case 6: FB_BLIT_4(DEREF_QUAD_PTR(dest, -6), DEREF_QUAD_PTR(source, -6)); + case 5: FB_BLIT_4(DEREF_QUAD_PTR(dest, -5), DEREF_QUAD_PTR(source, -5)); + case 4: FB_BLIT_4(DEREF_QUAD_PTR(dest, -4), DEREF_QUAD_PTR(source, -4)); + case 3: FB_BLIT_4(DEREF_QUAD_PTR(dest, -3), DEREF_QUAD_PTR(source, -3)); + case 2: FB_BLIT_4(DEREF_QUAD_PTR(dest, -2), DEREF_QUAD_PTR(source, -2)); + case 1: FB_BLIT_4(DEREF_QUAD_PTR(dest, -1), DEREF_QUAD_PTR(source, -1)); } while (--n > 0); } } + // There could be one long left to blit + if (length & 4) { + FB_BLIT_2(DEREF_LONG_PTR(dest, 0), DEREF_LONG_PTR(source, 0)); +#if FB_DEPTH <= 16 + dest += 4; + source += 4; +#endif + } + #if FB_DEPTH <= 16 // There could be one word left to blit if (length & 2) { @@ -107,6 +121,10 @@ static void FB_FUNC_NAME(uint8 * dest, const uint8 * source, uint32 length) #undef FB_BLIT_2 #endif +#ifdef FB_BLIT_4 +#undef FB_BLIT_4 +#endif + #ifdef FB_DEPTH #undef FB_DEPTH #endif