- 64-bit blitters will use native "quad"-sized loads/stores, if available.

This commit is contained in:
gbeauche 2001-08-19 17:38:11 +00:00
parent f93d1b483d
commit 4852a06dd2
2 changed files with 97 additions and 35 deletions

View File

@ -64,6 +64,10 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length)
#define FB_BLIT_2(dst, src) \
(dst = (((src) >> 8) & 0x00ff00ff) | (((src) & 0x00ff00ff) << 8))
#define FB_BLIT_4(dst, src) \
(dst = (((src) >> 8) & UVAL64(0x00ff00ff00ff00ff)) | \
(((src) & UVAL64(0x00ff00ff00ff00ff)) << 8))
#define FB_DEPTH 15
#include "video_blit.h"
@ -81,6 +85,11 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length)
#define FB_BLIT_2(dst, src) \
(dst = (((src) >> 10) & 0x001f001f) | ((src) & 0x03e003e0) | (((src) << 10) & 0x7c007c00))
#define FB_BLIT_4(dst, src) \
(dst = (((src) >> 10) & UVAL64(0x001f001f001f001f)) | \
( (src) & UVAL64(0x03e003e003e003e0)) | \
(((src) << 10) & UVAL64(0x7c007c007c007c00)))
#define FB_DEPTH 15
#define FB_FUNC_NAME Blit_BGR555_NBO
#include "video_blit.h"
@ -93,6 +102,12 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length)
#define FB_BLIT_2(dst, src) \
(dst = (((src) >> 2) & 0x1f001f00) | (((src) >> 8) & 0x30003) | (((src) << 8) & 0xe000e000) | (((src) << 2) & 0x7c007c))
#define FB_BLIT_4(dst, src) \
(dst = (((src) >> 2) & UVAL64(0x1f001f001f001f00)) | \
(((src) >> 8) & UVAL64(0x0003000300030003)) | \
(((src) << 8) & UVAL64(0xe000e000e000e000)) | \
(((src) << 2) & UVAL64(0x007c007c007c007c)))
#define FB_DEPTH 15
#define FB_FUNC_NAME Blit_BGR555_OBO
#include "video_blit.h"
@ -107,6 +122,12 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length)
#define FB_BLIT_2(dst, src) \
(dst = (((src) >> 2) & 0x1f001f) | (((src) >> 8) & 0xe000e0) | (((src) << 8) & 0x03000300) | (((src) << 2) & 0x7c007c00))
#define FB_BLIT_4(dst, src) \
(dst = (((src) >> 2) & UVAL64(0x001f001f001f001f)) | \
(((src) >> 8) & UVAL64(0x00e000e000e000e0)) | \
(((src) << 8) & UVAL64(0x0300030003000300)) | \
(((src) << 2) & UVAL64(0x7c007c007c007c00)))
#define FB_DEPTH 15
#define FB_FUNC_NAME Blit_BGR555_NBO
#include "video_blit.h"
@ -119,6 +140,11 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length)
#define FB_BLIT_2(dst, src) \
(dst = (((src) << 6) & 0x1f001f00) | ((src) & 0xe003e003) | (((src) >> 6) & 0x7c007c))
#define FB_BLIT_4(dst, src) \
(dst = (((src) << 6) & UVAL64(0x1f001f001f001f00)) | \
( (src) & UVAL64(0xe003e003e003e003)) | \
(((src) >> 6) & UVAL64(0x007c007c007c007c)))
#define FB_DEPTH 15
#define FB_FUNC_NAME Blit_BGR555_OBO
#include "video_blit.h"
@ -139,6 +165,10 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length)
#define FB_BLIT_2(dst, src) \
(dst = (((src) & 0x001f001f) | (((src) << 1) & 0xffc0ffc0)))
#define FB_BLIT_4(dst, src) \
(dst = (((src) & UVAL64(0x001f001f001f001f)) | \
(((src) << 1) & UVAL64(0xffc0ffc0ffc0ffc0))))
#define FB_DEPTH 16
#define FB_FUNC_NAME Blit_RGB565_NBO
#include "video_blit.h"
@ -151,6 +181,11 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length)
#define FB_BLIT_2(dst, src) \
(dst = ((((src) >> 7) & 0x00ff00ff) | (((src) << 9) & 0xc000c000) | (((src) << 8) & 0x1f001f00)))
#define FB_BLIT_4(dst, src) \
(dst = (((src) >> 7) & UVAL64(0x00ff00ff00ff00ff)) | \
(((src) << 9) & UVAL64(0xc000c000c000c000)) | \
(((src) << 8) & UVAL64(0x1f001f001f001f00)))
#define FB_DEPTH 16
#define FB_FUNC_NAME Blit_RGB565_OBO
#include "video_blit.h"
@ -162,28 +197,13 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length)
#define FB_BLIT_1(dst, src) \
(dst = (((src) >> 8) & 0x001f) | (((src) << 9) & 0xfe00) | (((src) >> 7) & 0x01c0))
// gb-- Disabled because I don't see any improvement
#if 0 && defined(__i386__) && defined(X86_ASSEMBLY)
#define FB_BLIT_2(dst, src) \
__asm__ ( "movl %0,%%ebx\n\t" \
"movl %0,%%ebp\n\t" \
"andl $0x1f001f00,%%ebx\n\t" \
"andl $0x007f007f,%0\n\t" \
"andl $0xe000e000,%%ebp\n\t" \
"shrl $8,%%ebx\n\t" \
"shrl $7,%%ebp\n\t" \
"shll $9,%0\n\t" \
"orl %%ebx,%%ebp\n\t" \
"orl %%ebp,%0\n\t" \
: "=r" (dst) : "0" (src) : "ebx", "ebp", "cc" )
#else
#define FB_BLIT_2(dst, src) \
(dst = (((src) >> 8) & 0x001f001f) | (((src) << 9) & 0xfe00fe00) | (((src) >> 7) & 0x01c001c0))
#endif
#define FB_BLIT_4(dst, src) \
(dst = (((src) >> 8) & UVAL64(0x001f001f001f001f)) | \
(((src) << 9) & UVAL64(0xfe00fe00fe00fe00)) | \
(((src) >> 7) & UVAL64(0x01c001c001c001c0)))
#define FB_DEPTH 16
#define FB_FUNC_NAME Blit_RGB565_NBO
@ -197,6 +217,11 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length)
#define FB_BLIT_2(dst, src) \
(dst = (((src) & 0x1f001f00) | (((src) << 1) & 0xe0fee0fe) | (((src) >> 15) & 0x10001)))
#define FB_BLIT_4(dst, src) \
(dst = (((src) & UVAL64(0x1f001f001f001f00)) | \
(((src) << 1) & UVAL64(0xe0fee0fee0fee0fe)) | \
(((src) >> 15) & UVAL64(0x0001000100010001))))
#define FB_DEPTH 16
#define FB_FUNC_NAME Blit_RGB565_OBO
#include "video_blit.h"
@ -216,6 +241,12 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length)
#define FB_BLIT_2(dst, src) \
(dst = (((src) >> 24) & 0xff) | (((src) >> 8) & 0xff00) | (((src) & 0xff00) << 8) | (((src) & 0xff) << 24))
#define FB_BLIT_4(dst, src) \
(dst = (((src) >> 24) & UVAL64(0x000000ff000000ff)) | \
(((src) >> 8) & UVAL64(0x0000ff000000ff00)) | \
(((src) & UVAL64(0x0000ff000000ff00)) << 8) | \
(((src) & UVAL64(0x000000ff000000ff)) << 24))
#define FB_DEPTH 24
#include "video_blit.h"
@ -230,6 +261,11 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length)
#define FB_BLIT_2(dst, src) \
(dst = (((src) >> 16) & 0xff) | ((src) & 0xff00) | (((src) & 0xff) << 16))
#define FB_BLIT_4(dst, src) \
(dst = (((src) >> 16) & UVAL64(0x000000ff000000ff)) | \
( (src) & UVAL64(0x0000ff000000ff00)) | \
(((src) & UVAL64(0x000000ff000000ff)) << 16))
#define FB_FUNC_NAME Blit_BGR888_NBO
#define FB_DEPTH 24
#include "video_blit.h"
@ -241,6 +277,11 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length)
#define FB_BLIT_2(dst, src) \
(dst = (((src) >> 16) & 0xff) | ((src) & 0xff0000) | (((src) & 0xff) << 16))
#define FB_BLIT_4(dst, src) \
(dst = (((src) >> 16) & UVAL64(0x000000ff000000ff)) | \
( (src) & UVAL64(0x00ff000000ff0000)) | \
(((src) & UVAL64(0x000000ff000000ff)) << 16))
#define FB_FUNC_NAME Blit_BGR888_OBO
#define FB_DEPTH 24
#include "video_blit.h"
@ -258,6 +299,9 @@ static void Blit_Copy_Raw(uint8 * dest, const uint8 * source, uint32 length)
#define FB_BLIT_2(dst, src) \
(dst = ((src) & 0xff00ff) | (((src) & 0xff00) << 16))
#define FB_BLIT_4(dst, src) \
(dst = ((src) & UVAL64(0x00ff00ff00ff00ff)) | (((src) & UVAL64(0x0000ff000000ff00)) << 16))
#define FB_DEPTH 24
#include "video_blit.h"

View File

@ -30,10 +30,15 @@
# error "Undefined 32-bit word blit function"
#endif
#if !defined(FB_BLIT_4)
# error "Undefined 64-bit word blit function"
#endif
static void FB_FUNC_NAME(uint8 * dest, const uint8 * source, uint32 length)
{
#define DEREF_LONG_PTR(ptr, ofs) (((uint32 *)(ptr))[(ofs)])
#define DEREF_WORD_PTR(ptr, ofs) (((uint16 *)(ptr))[(ofs)])
#define DEREF_LONG_PTR(ptr, ofs) (((uint32 *)(ptr))[(ofs)])
#define DEREF_QUAD_PTR(ptr, ofs) (((uint64 *)(ptr))[(ofs)])
#ifndef UNALIGNED_PROFITABLE
#if FB_DEPTH <= 8
@ -54,28 +59,37 @@ static void FB_FUNC_NAME(uint8 * dest, const uint8 * source, uint32 length)
#endif
#endif
// Blit 4-byte words
if (length >= 4) {
const int remainder = (length / 4) % 8;
source += remainder * 4;
dest += remainder * 4;
// Blit 8-byte words
if (length >= 8) {
const int remainder = (length / 8) % 8;
source += remainder * 8;
dest += remainder * 8;
int n = ((length / 4) + 7) / 8;
int n = ((length / 8) + 7) / 8;
switch (remainder) {
case 0: do {
dest += 32; source += 32;
FB_BLIT_2(DEREF_LONG_PTR(dest, -8), DEREF_LONG_PTR(source, -8));
case 7: FB_BLIT_2(DEREF_LONG_PTR(dest, -7), DEREF_LONG_PTR(source, -7));
case 6: FB_BLIT_2(DEREF_LONG_PTR(dest, -6), DEREF_LONG_PTR(source, -6));
case 5: FB_BLIT_2(DEREF_LONG_PTR(dest, -5), DEREF_LONG_PTR(source, -5));
case 4: FB_BLIT_2(DEREF_LONG_PTR(dest, -4), DEREF_LONG_PTR(source, -4));
case 3: FB_BLIT_2(DEREF_LONG_PTR(dest, -3), DEREF_LONG_PTR(source, -3));
case 2: FB_BLIT_2(DEREF_LONG_PTR(dest, -2), DEREF_LONG_PTR(source, -2));
case 1: FB_BLIT_2(DEREF_LONG_PTR(dest, -1), DEREF_LONG_PTR(source, -1));
dest += 64; source += 64;
FB_BLIT_4(DEREF_QUAD_PTR(dest, -8), DEREF_QUAD_PTR(source, -8));
case 7: FB_BLIT_4(DEREF_QUAD_PTR(dest, -7), DEREF_QUAD_PTR(source, -7));
case 6: FB_BLIT_4(DEREF_QUAD_PTR(dest, -6), DEREF_QUAD_PTR(source, -6));
case 5: FB_BLIT_4(DEREF_QUAD_PTR(dest, -5), DEREF_QUAD_PTR(source, -5));
case 4: FB_BLIT_4(DEREF_QUAD_PTR(dest, -4), DEREF_QUAD_PTR(source, -4));
case 3: FB_BLIT_4(DEREF_QUAD_PTR(dest, -3), DEREF_QUAD_PTR(source, -3));
case 2: FB_BLIT_4(DEREF_QUAD_PTR(dest, -2), DEREF_QUAD_PTR(source, -2));
case 1: FB_BLIT_4(DEREF_QUAD_PTR(dest, -1), DEREF_QUAD_PTR(source, -1));
} while (--n > 0);
}
}
// There could be one long left to blit
if (length & 4) {
FB_BLIT_2(DEREF_LONG_PTR(dest, 0), DEREF_LONG_PTR(source, 0));
#if FB_DEPTH <= 16
dest += 4;
source += 4;
#endif
}
#if FB_DEPTH <= 16
// There could be one word left to blit
if (length & 2) {
@ -107,6 +121,10 @@ static void FB_FUNC_NAME(uint8 * dest, const uint8 * source, uint32 length)
#undef FB_BLIT_2
#endif
#ifdef FB_BLIT_4
#undef FB_BLIT_4
#endif
#ifdef FB_DEPTH
#undef FB_DEPTH
#endif