macemu/SheepShaver/src/gfxaccel.cpp

462 lines
13 KiB
C++

/*
* gfxaccel.cpp - Generic Native QuickDraw acceleration
*
* SheepShaver (C) 1997-2008 Marc Hellwig and Christian Bauer
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "sysdeps.h"
#include "prefs.h"
#include "video.h"
#include "video_defs.h"
#define DEBUG 0
#include "debug.h"
/*
* Utility functions
*/
// Return bytes per pixel for requested depth
static inline int bytes_per_pixel(int depth)
{
int bpp;
switch (depth) {
case 8:
bpp = 1;
break;
case 15: case 16:
bpp = 2;
break;
case 24: case 32:
bpp = 4;
break;
default:
abort();
}
return bpp;
}
// Pass-through dirty areas to redraw functions
static inline void NQD_set_dirty_area(uint32 p)
{
if (ReadMacInt32(p + acclDestBaseAddr) == screen_base) {
int16 x = (int16)ReadMacInt16(p + acclDestRect + 2) - (int16)ReadMacInt16(p + acclDestBoundsRect + 2);
int16 y = (int16)ReadMacInt16(p + acclDestRect + 0) - (int16)ReadMacInt16(p + acclDestBoundsRect + 0);
int16 w = (int16)ReadMacInt16(p + acclDestRect + 6) - (int16)ReadMacInt16(p + acclDestRect + 2);
int16 h = (int16)ReadMacInt16(p + acclDestRect + 4) - (int16)ReadMacInt16(p + acclDestRect + 0);
video_set_dirty_area(x, y, w, h);
}
}
/*
* Rectangle inversion
*/
template< int bpp >
static inline void do_invrect(uint8 *dest, uint32 length)
{
#define INVERT_1(PTR, OFS) ((uint8 *)(PTR))[OFS] = ~((uint8 *)(PTR))[OFS]
#define INVERT_2(PTR, OFS) ((uint16 *)(PTR))[OFS] = ~((uint16 *)(PTR))[OFS]
#define INVERT_4(PTR, OFS) ((uint32 *)(PTR))[OFS] = ~((uint32 *)(PTR))[OFS]
#define INVERT_8(PTR, OFS) ((uint64 *)(PTR))[OFS] = ~((uint64 *)(PTR))[OFS]
#ifndef UNALIGNED_PROFITABLE
// Align on 16-bit boundaries
if (bpp < 16 && (((uintptr)dest) & 1)) {
INVERT_1(dest, 0);
dest += 1; length -= 1;
}
// Align on 32-bit boundaries
if (bpp < 32 && (((uintptr)dest) & 2) && length >= 2) {
INVERT_2(dest, 0);
dest += 2; length -= 2;
}
#endif
// Invert 8-byte words
if (length >= 8) {
const int r = (length / 8) % 8;
dest += r * 8;
int n = ((length / 8) + 7) / 8;
switch (r) {
case 0: do {
dest += 64;
INVERT_8(dest, -8);
case 7: INVERT_8(dest, -7);
case 6: INVERT_8(dest, -6);
case 5: INVERT_8(dest, -5);
case 4: INVERT_8(dest, -4);
case 3: INVERT_8(dest, -3);
case 2: INVERT_8(dest, -2);
case 1: INVERT_8(dest, -1);
} while (--n > 0);
}
}
// 32-bit cell to invert?
if (length & 4) {
INVERT_4(dest, 0);
if (bpp <= 16)
dest += 4;
}
// 16-bit cell to invert?
if (bpp <= 16 && (length & 2)) {
INVERT_2(dest, 0);
if (bpp <= 8)
dest += 2;
}
// 8-bit cell to invert?
if (bpp <= 8 && (length & 1))
INVERT_1(dest, 0);
#undef INVERT_1
#undef INVERT_2
#undef INVERT_4
#undef INVERT_8
}
void NQD_invrect(uint32 p)
{
D(bug("accl_invrect %08x\n", p));
// Get inversion parameters
int16 dest_X = (int16)ReadMacInt16(p + acclDestRect + 2) - (int16)ReadMacInt16(p + acclDestBoundsRect + 2);
int16 dest_Y = (int16)ReadMacInt16(p + acclDestRect + 0) - (int16)ReadMacInt16(p + acclDestBoundsRect + 0);
int16 width = (int16)ReadMacInt16(p + acclDestRect + 6) - (int16)ReadMacInt16(p + acclDestRect + 2);
int16 height = (int16)ReadMacInt16(p + acclDestRect + 4) - (int16)ReadMacInt16(p + acclDestRect + 0);
D(bug(" dest X %d, dest Y %d\n", dest_X, dest_Y));
D(bug(" width %d, height %d, bytes_per_row %d\n", width, height, (int32)ReadMacInt32(p + acclDestRowBytes)));
//!!?? pen_mode == 14
// And perform the inversion
const int bpp = bytes_per_pixel(ReadMacInt32(p + acclDestPixelSize));
const int dest_row_bytes = (int32)ReadMacInt32(p + acclDestRowBytes);
uint8 *dest = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + (dest_Y * dest_row_bytes) + (dest_X * bpp));
width *= bpp;
switch (bpp) {
case 1:
for (int i = 0; i < height; i++) {
do_invrect<8>(dest, width);
dest += dest_row_bytes;
}
break;
case 2:
for (int i = 0; i < height; i++) {
do_invrect<16>(dest, width);
dest += dest_row_bytes;
}
break;
case 4:
for (int i = 0; i < height; i++) {
do_invrect<32>(dest, width);
dest += dest_row_bytes;
}
break;
}
}
/*
* Rectangle filling
*/
template< int bpp >
static inline void do_fillrect(uint8 *dest, uint32 color, uint32 length)
{
#define FILL_1(PTR, OFS, VAL) ((uint8 *)(PTR))[OFS] = (VAL)
#define FILL_2(PTR, OFS, VAL) ((uint16 *)(PTR))[OFS] = (VAL)
#define FILL_4(PTR, OFS, VAL) ((uint32 *)(PTR))[OFS] = (VAL)
#define FILL_8(PTR, OFS, VAL) ((uint64 *)(PTR))[OFS] = (VAL)
#ifndef UNALIGNED_PROFITABLE
// Align on 16-bit boundaries
if (bpp < 16 && (((uintptr)dest) & 1)) {
FILL_1(dest, 0, color);
dest += 1; length -= 1;
}
// Align on 32-bit boundaries
if (bpp < 32 && (((uintptr)dest) & 2) && length >= 2) {
FILL_2(dest, 0, color);
dest += 2; length -= 2;
}
#endif
// Fill 8-byte words
if (length >= 8) {
const uint64 c = (((uint64)color) << 32) | color;
const int r = (length / 8) % 8;
dest += r * 8;
int n = ((length / 8) + 7) / 8;
switch (r) {
case 0: do {
dest += 64;
FILL_8(dest, -8, c);
case 7: FILL_8(dest, -7, c);
case 6: FILL_8(dest, -6, c);
case 5: FILL_8(dest, -5, c);
case 4: FILL_8(dest, -4, c);
case 3: FILL_8(dest, -3, c);
case 2: FILL_8(dest, -2, c);
case 1: FILL_8(dest, -1, c);
} while (--n > 0);
}
}
// 32-bit cell to fill?
if (length & 4) {
FILL_4(dest, 0, color);
if (bpp <= 16)
dest += 4;
}
// 16-bit cell to fill?
if (bpp <= 16 && (length & 2)) {
FILL_2(dest, 0, color);
if (bpp <= 8)
dest += 2;
}
// 8-bit cell to fill?
if (bpp <= 8 && (length & 1))
FILL_1(dest, 0, color);
#undef FILL_1
#undef FILL_2
#undef FILL_4
#undef FILL_8
}
void NQD_fillrect(uint32 p)
{
D(bug("accl_fillrect %08x\n", p));
// Get filling parameters
int16 dest_X = (int16)ReadMacInt16(p + acclDestRect + 2) - (int16)ReadMacInt16(p + acclDestBoundsRect + 2);
int16 dest_Y = (int16)ReadMacInt16(p + acclDestRect + 0) - (int16)ReadMacInt16(p + acclDestBoundsRect + 0);
int16 width = (int16)ReadMacInt16(p + acclDestRect + 6) - (int16)ReadMacInt16(p + acclDestRect + 2);
int16 height = (int16)ReadMacInt16(p + acclDestRect + 4) - (int16)ReadMacInt16(p + acclDestRect + 0);
uint32 color = htonl(ReadMacInt32(p + acclPenMode) == 8 ? ReadMacInt32(p + acclForePen) : ReadMacInt32(p + acclBackPen));
D(bug(" dest X %d, dest Y %d\n", dest_X, dest_Y));
D(bug(" width %d, height %d\n", width, height));
D(bug(" bytes_per_row %d color %08x\n", (int32)ReadMacInt32(p + acclDestRowBytes), color));
// And perform the fill
const int bpp = bytes_per_pixel(ReadMacInt32(p + acclDestPixelSize));
const int dest_row_bytes = (int32)ReadMacInt32(p + acclDestRowBytes);
uint8 *dest = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + (dest_Y * dest_row_bytes) + (dest_X * bpp));
width *= bpp;
switch (bpp) {
case 1:
for (int i = 0; i < height; i++) {
memset(dest, color, width);
dest += dest_row_bytes;
}
break;
case 2:
for (int i = 0; i < height; i++) {
do_fillrect<16>(dest, color, width);
dest += dest_row_bytes;
}
break;
case 4:
for (int i = 0; i < height; i++) {
do_fillrect<32>(dest, color, width);
dest += dest_row_bytes;
}
break;
}
}
bool NQD_fillrect_hook(uint32 p)
{
D(bug("accl_fillrect_hook %08x\n", p));
NQD_set_dirty_area(p);
// Check if we can accelerate this fillrect
if (ReadMacInt32(p + 0x284) != 0 && ReadMacInt32(p + acclDestPixelSize) >= 8) {
const int transfer_mode = ReadMacInt32(p + acclTransferMode);
if (transfer_mode == 8) {
// Fill
WriteMacInt32(p + acclDrawProc, NativeTVECT(NATIVE_NQD_FILLRECT));
return true;
}
else if (transfer_mode == 10) {
// Invert
WriteMacInt32(p + acclDrawProc, NativeTVECT(NATIVE_NQD_INVRECT));
return true;
}
}
return false;
}
/*
* Isomorphic rectangle blitting
*/
void NQD_bitblt(uint32 p)
{
D(bug("accl_bitblt %08x\n", p));
// Get blitting parameters
int16 src_X = (int16)ReadMacInt16(p + acclSrcRect + 2) - (int16)ReadMacInt16(p + acclSrcBoundsRect + 2);
int16 src_Y = (int16)ReadMacInt16(p + acclSrcRect + 0) - (int16)ReadMacInt16(p + acclSrcBoundsRect + 0);
int16 dest_X = (int16)ReadMacInt16(p + acclDestRect + 2) - (int16)ReadMacInt16(p + acclDestBoundsRect + 2);
int16 dest_Y = (int16)ReadMacInt16(p + acclDestRect + 0) - (int16)ReadMacInt16(p + acclDestBoundsRect + 0);
int16 width = (int16)ReadMacInt16(p + acclDestRect + 6) - (int16)ReadMacInt16(p + acclDestRect + 2);
int16 height = (int16)ReadMacInt16(p + acclDestRect + 4) - (int16)ReadMacInt16(p + acclDestRect + 0);
D(bug(" src addr %08x, dest addr %08x\n", ReadMacInt32(p + acclSrcBaseAddr), ReadMacInt32(p + acclDestBaseAddr)));
D(bug(" src X %d, src Y %d, dest X %d, dest Y %d\n", src_X, src_Y, dest_X, dest_Y));
D(bug(" width %d, height %d\n", width, height));
// And perform the blit
const int bpp = bytes_per_pixel(ReadMacInt32(p + acclSrcPixelSize));
width *= bpp;
if ((int32)ReadMacInt32(p + acclSrcRowBytes) > 0) {
const int src_row_bytes = (int32)ReadMacInt32(p + acclSrcRowBytes);
const int dst_row_bytes = (int32)ReadMacInt32(p + acclDestRowBytes);
uint8 *src = Mac2HostAddr(ReadMacInt32(p + acclSrcBaseAddr) + (src_Y * src_row_bytes) + (src_X * bpp));
uint8 *dst = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + (dest_Y * dst_row_bytes) + (dest_X * bpp));
for (int i = 0; i < height; i++) {
memmove(dst, src, width);
src += src_row_bytes;
dst += dst_row_bytes;
}
}
else {
const int src_row_bytes = -(int32)ReadMacInt32(p + acclSrcRowBytes);
const int dst_row_bytes = -(int32)ReadMacInt32(p + acclDestRowBytes);
uint8 *src = Mac2HostAddr(ReadMacInt32(p + acclSrcBaseAddr) + ((src_Y + height - 1) * src_row_bytes) + (src_X * bpp));
uint8 *dst = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + ((dest_Y + height - 1) * dst_row_bytes) + (dest_X * bpp));
for (int i = height - 1; i >= 0; i--) {
memmove(dst, src, width);
src -= src_row_bytes;
dst -= dst_row_bytes;
}
}
}
/*
BitBlt transfer modes:
0 : srcCopy
1 : srcOr
2 : srcXor
3 : srcBic
4 : notSrcCopy
5 : notSrcOr
6 : notSrcXor
7 : notSrcBic
32 : blend
33 : addPin
34 : addOver
35 : subPin
36 : transparent
37 : adMax
38 : subOver
39 : adMin
50 : hilite
*/
bool NQD_bitblt_hook(uint32 p)
{
D(bug("accl_draw_hook %08x\n", p));
NQD_set_dirty_area(p);
// Check if we can accelerate this bitblt
if (ReadMacInt32(p + 0x018) + ReadMacInt32(p + 0x128) == 0 &&
ReadMacInt32(p + 0x130) == 0 &&
ReadMacInt32(p + acclSrcPixelSize) >= 8 &&
ReadMacInt32(p + acclSrcPixelSize) == ReadMacInt32(p + acclDestPixelSize) &&
(int32)(ReadMacInt32(p + acclSrcRowBytes) ^ ReadMacInt32(p + acclDestRowBytes)) >= 0 && // same sign?
ReadMacInt32(p + acclTransferMode) == 0 && // srcCopy?
(int32)ReadMacInt32(p + 0x15c) > 0) {
// Yes, set function pointer
WriteMacInt32(p + acclDrawProc, NativeTVECT(NATIVE_NQD_BITBLT));
return true;
}
return false;
}
// Unknown hook
bool NQD_unknown_hook(uint32 arg)
{
D(bug("accl_unknown_hook %08x\n", arg));
NQD_set_dirty_area(arg);
return false;
}
// Wait for graphics operation to finish
bool NQD_sync_hook(uint32 arg)
{
D(bug("accl_sync_hook %08x\n", arg));
return true;
}
/*
* Install Native QuickDraw acceleration hooks
*/
void VideoInstallAccel(void)
{
// Install acceleration hooks
if (PrefsFindBool("gfxaccel")) {
D(bug("Video: Installing acceleration hooks\n"));
uint32 base;
SheepVar bitblt_hook_info(sizeof(accl_hook_info));
base = bitblt_hook_info.addr();
WriteMacInt32(base + 0, NativeTVECT(NATIVE_NQD_BITBLT_HOOK));
WriteMacInt32(base + 4, NativeTVECT(NATIVE_NQD_SYNC_HOOK));
WriteMacInt32(base + 8, ACCL_BITBLT);
NQDMisc(6, bitblt_hook_info.addr());
SheepVar fillrect_hook_info(sizeof(accl_hook_info));
base = fillrect_hook_info.addr();
WriteMacInt32(base + 0, NativeTVECT(NATIVE_NQD_FILLRECT_HOOK));
WriteMacInt32(base + 4, NativeTVECT(NATIVE_NQD_SYNC_HOOK));
WriteMacInt32(base + 8, ACCL_FILLRECT);
NQDMisc(6, fillrect_hook_info.addr());
for (int op = 0; op < 8; op++) {
switch (op) {
case ACCL_BITBLT:
case ACCL_FILLRECT:
continue;
}
SheepVar unknown_hook_info(sizeof(accl_hook_info));
base = unknown_hook_info.addr();
WriteMacInt32(base + 0, NativeTVECT(NATIVE_NQD_UNKNOWN_HOOK));
WriteMacInt32(base + 4, NativeTVECT(NATIVE_NQD_SYNC_HOOK));
WriteMacInt32(base + 8, op);
NQDMisc(6, unknown_hook_info.addr());
}
}
}