macemu/SheepShaver/src/gfxaccel.cpp
gbeauche 3ace37f4eb Implement Direct Addressing mode similarly to Basilisk II. This is to get
SheepShaver working on OSes that don't support maipping of Low Memory globals
at 0x00000000, e.g. Windows.
2004-11-13 14:09:16 +00:00

426 lines
12 KiB
C++

/*
* gfxaccel.cpp - Generic Native QuickDraw acceleration
*
* SheepShaver (C) 1997-2004 Marc Hellwig and Christian Bauer
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "sysdeps.h"
#include "prefs.h"
#include "video.h"
#include "video_defs.h"
#define DEBUG 0
#include "debug.h"
/*
* Utility functions
*/
// Return bytes per pixel for requested depth
static inline int bytes_per_pixel(int depth)
{
int bpp;
switch (depth) {
case 8:
bpp = 1;
break;
case 15: case 16:
bpp = 2;
break;
case 24: case 32:
bpp = 4;
break;
default:
abort();
}
return bpp;
}
/*
* Rectangle inversion
*/
template< int bpp >
static inline void do_invrect(uint8 *dest, uint32 length)
{
#define INVERT_1(PTR, OFS) ((uint8 *)(PTR))[OFS] = ~((uint8 *)(PTR))[OFS]
#define INVERT_2(PTR, OFS) ((uint16 *)(PTR))[OFS] = ~((uint16 *)(PTR))[OFS]
#define INVERT_4(PTR, OFS) ((uint32 *)(PTR))[OFS] = ~((uint32 *)(PTR))[OFS]
#define INVERT_8(PTR, OFS) ((uint64 *)(PTR))[OFS] = ~((uint64 *)(PTR))[OFS]
#ifndef UNALIGNED_PROFITABLE
// Align on 16-bit boundaries
if (bpp < 16 && (((uintptr)dest) & 1)) {
INVERT_1(dest, 0);
dest += 1; length -= 1;
}
// Align on 32-bit boundaries
if (bpp < 32 && (((uintptr)dest) & 2)) {
INVERT_2(dest, 0);
dest += 2; length -= 2;
}
#endif
// Invert 8-byte words
if (length >= 8) {
const int r = (length / 8) % 8;
dest += r * 8;
int n = ((length / 8) + 7) / 8;
switch (r) {
case 0: do {
dest += 64;
INVERT_8(dest, -8);
case 7: INVERT_8(dest, -7);
case 6: INVERT_8(dest, -6);
case 5: INVERT_8(dest, -5);
case 4: INVERT_8(dest, -4);
case 3: INVERT_8(dest, -3);
case 2: INVERT_8(dest, -2);
case 1: INVERT_8(dest, -1);
} while (--n > 0);
}
}
// 32-bit cell to invert?
if (length & 4) {
INVERT_4(dest, 0);
if (bpp <= 16)
dest += 4;
}
// 16-bit cell to invert?
if (bpp <= 16 && (length & 2)) {
INVERT_2(dest, 0);
if (bpp <= 8)
dest += 2;
}
// 8-bit cell to invert?
if (bpp <= 8 && (length & 1))
INVERT_1(dest, 0);
#undef INVERT_1
#undef INVERT_2
#undef INVERT_4
#undef INVERT_8
}
void NQD_invrect(uint32 p)
{
D(bug("accl_invrect %08x\n", p));
// Get inversion parameters
int16 dest_X = (int16)ReadMacInt16(p + acclDestRect + 2) - (int16)ReadMacInt16(p + acclDestBoundsRect + 2);
int16 dest_Y = (int16)ReadMacInt16(p + acclDestRect + 0) - (int16)ReadMacInt16(p + acclDestBoundsRect + 0);
int16 width = (int16)ReadMacInt16(p + acclDestRect + 6) - (int16)ReadMacInt16(p + acclDestRect + 2);
int16 height = (int16)ReadMacInt16(p + acclDestRect + 4) - (int16)ReadMacInt16(p + acclDestRect + 0);
D(bug(" dest X %d, dest Y %d\n", dest_X, dest_Y));
D(bug(" width %d, height %d, bytes_per_row %d\n", width, height, (int32)ReadMacInt32(p + acclDestRowBytes)));
//!!?? pen_mode == 14
// And perform the inversion
const int bpp = bytes_per_pixel(ReadMacInt32(p + acclDestPixelSize));
const int dest_row_bytes = (int32)ReadMacInt32(p + acclDestRowBytes);
uint8 *dest = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + (dest_Y * dest_row_bytes) + (dest_X * bpp));
width *= bpp;
switch (bpp) {
case 1:
for (int i = 0; i < height; i++) {
do_invrect<8>(dest, width);
dest += dest_row_bytes;
}
break;
case 2:
for (int i = 0; i < height; i++) {
do_invrect<16>(dest, width);
dest += dest_row_bytes;
}
break;
case 4:
for (int i = 0; i < height; i++) {
do_invrect<32>(dest, width);
dest += dest_row_bytes;
}
break;
}
}
/*
* Rectangle filling
*/
template< int bpp >
static inline void do_fillrect(uint8 *dest, uint32 color, uint32 length)
{
#define FILL_1(PTR, OFS, VAL) ((uint8 *)(PTR))[OFS] = (VAL)
#define FILL_2(PTR, OFS, VAL) ((uint16 *)(PTR))[OFS] = (VAL)
#define FILL_4(PTR, OFS, VAL) ((uint32 *)(PTR))[OFS] = (VAL)
#define FILL_8(PTR, OFS, VAL) ((uint64 *)(PTR))[OFS] = (VAL)
#ifndef UNALIGNED_PROFITABLE
// Align on 16-bit boundaries
if (bpp < 16 && (((uintptr)dest) & 1)) {
FILL_1(dest, 0, color);
dest += 1; length -= 1;
}
// Align on 32-bit boundaries
if (bpp < 32 && (((uintptr)dest) & 2)) {
FILL_2(dest, 0, color);
dest += 2; length -= 2;
}
#endif
// Fill 8-byte words
if (length >= 8) {
const uint64 c = (((uint64)color) << 32) | color;
const int r = (length / 8) % 8;
dest += r * 8;
int n = ((length / 8) + 7) / 8;
switch (r) {
case 0: do {
dest += 64;
FILL_8(dest, -8, c);
case 7: FILL_8(dest, -7, c);
case 6: FILL_8(dest, -6, c);
case 5: FILL_8(dest, -5, c);
case 4: FILL_8(dest, -4, c);
case 3: FILL_8(dest, -3, c);
case 2: FILL_8(dest, -2, c);
case 1: FILL_8(dest, -1, c);
} while (--n > 0);
}
}
// 32-bit cell to fill?
if (length & 4) {
FILL_4(dest, 0, color);
if (bpp <= 16)
dest += 4;
}
// 16-bit cell to fill?
if (bpp <= 16 && (length & 2)) {
FILL_2(dest, 0, color);
if (bpp <= 8)
dest += 2;
}
// 8-bit cell to fill?
if (bpp <= 8 && (length & 1))
FILL_1(dest, 0, color);
#undef FILL_1
#undef FILL_2
#undef FILL_4
#undef FILL_8
}
void NQD_fillrect(uint32 p)
{
D(bug("accl_fillrect %08x\n", p));
// Get filling parameters
int16 dest_X = (int16)ReadMacInt16(p + acclDestRect + 2) - (int16)ReadMacInt16(p + acclDestBoundsRect + 2);
int16 dest_Y = (int16)ReadMacInt16(p + acclDestRect + 0) - (int16)ReadMacInt16(p + acclDestBoundsRect + 0);
int16 width = (int16)ReadMacInt16(p + acclDestRect + 6) - (int16)ReadMacInt16(p + acclDestRect + 2);
int16 height = (int16)ReadMacInt16(p + acclDestRect + 4) - (int16)ReadMacInt16(p + acclDestRect + 0);
uint32 color = htonl(ReadMacInt32(p + acclPenMode) == 8 ? ReadMacInt32(p + acclForePen) : ReadMacInt32(p + acclBackPen));
D(bug(" dest X %d, dest Y %d\n", dest_X, dest_Y));
D(bug(" width %d, height %d\n", width, height));
D(bug(" bytes_per_row %d color %08x\n", (int32)ReadMacInt32(p + acclDestRowBytes), color));
// And perform the fill
const int bpp = bytes_per_pixel(ReadMacInt32(p + acclDestPixelSize));
const int dest_row_bytes = (int32)ReadMacInt32(p + acclDestRowBytes);
uint8 *dest = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + (dest_Y * dest_row_bytes) + (dest_X * bpp));
width *= bpp;
switch (bpp) {
case 1:
for (int i = 0; i < height; i++) {
memset(dest, color, width);
dest += dest_row_bytes;
}
break;
case 2:
for (int i = 0; i < height; i++) {
do_fillrect<16>(dest, color, width);
dest += dest_row_bytes;
}
break;
case 4:
for (int i = 0; i < height; i++) {
do_fillrect<32>(dest, color, width);
dest += dest_row_bytes;
}
break;
}
}
bool NQD_fillrect_hook(uint32 p)
{
D(bug("accl_fillrect_hook %08x\n", p));
// Check if we can accelerate this fillrect
if (ReadMacInt32(p + 0x284) != 0 && ReadMacInt32(p + acclDestPixelSize) >= 8) {
const int transfer_mode = ReadMacInt32(p + acclTransferMode);
if (transfer_mode == 8) {
// Fill
WriteMacInt32(p + acclDrawProc, NativeTVECT(NATIVE_FILLRECT));
return true;
}
else if (transfer_mode == 10) {
// Invert
WriteMacInt32(p + acclDrawProc, NativeTVECT(NATIVE_INVRECT));
return true;
}
}
return false;
}
/*
* Isomorphic rectangle blitting
*/
// TODO: optimize for VOSF and target pixmap == screen
void NQD_bitblt(uint32 p)
{
D(bug("accl_bitblt %08x\n", p));
// Get blitting parameters
int16 src_X = (int16)ReadMacInt16(p + acclSrcRect + 2) - (int16)ReadMacInt16(p + acclSrcBoundsRect + 2);
int16 src_Y = (int16)ReadMacInt16(p + acclSrcRect + 0) - (int16)ReadMacInt16(p + acclSrcBoundsRect + 0);
int16 dest_X = (int16)ReadMacInt16(p + acclDestRect + 2) - (int16)ReadMacInt16(p + acclDestBoundsRect + 2);
int16 dest_Y = (int16)ReadMacInt16(p + acclDestRect + 0) - (int16)ReadMacInt16(p + acclDestBoundsRect + 0);
int16 width = (int16)ReadMacInt16(p + acclDestRect + 6) - (int16)ReadMacInt16(p + acclDestRect + 2);
int16 height = (int16)ReadMacInt16(p + acclDestRect + 4) - (int16)ReadMacInt16(p + acclDestRect + 0);
D(bug(" src addr %08x, dest addr %08x\n", ReadMacInt32(p + acclSrcBaseAddr), ReadMacInt32(p + acclDestBaseAddr)));
D(bug(" src X %d, src Y %d, dest X %d, dest Y %d\n", src_X, src_Y, dest_X, dest_Y));
D(bug(" width %d, height %d\n", width, height));
// And perform the blit
const int bpp = bytes_per_pixel(ReadMacInt32(p + acclSrcPixelSize));
width *= bpp;
if ((int32)ReadMacInt32(p + acclSrcRowBytes) > 0) {
const int src_row_bytes = (int32)ReadMacInt32(p + acclSrcRowBytes);
const int dst_row_bytes = (int32)ReadMacInt32(p + acclDestRowBytes);
uint8 *src = Mac2HostAddr(ReadMacInt32(p + acclSrcBaseAddr) + (src_Y * src_row_bytes) + (src_X * bpp));
uint8 *dst = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + (dest_Y * dst_row_bytes) + (dest_X * bpp));
for (int i = 0; i < height; i++) {
memmove(dst, src, width);
src += src_row_bytes;
dst += dst_row_bytes;
}
}
else {
const int src_row_bytes = -(int32)ReadMacInt32(p + acclSrcRowBytes);
const int dst_row_bytes = -(int32)ReadMacInt32(p + acclDestRowBytes);
uint8 *src = Mac2HostAddr(ReadMacInt32(p + acclSrcBaseAddr) + ((src_Y + height - 1) * src_row_bytes) + (src_X * bpp));
uint8 *dst = Mac2HostAddr(ReadMacInt32(p + acclDestBaseAddr) + ((dest_Y + height - 1) * dst_row_bytes) + (dest_X * bpp));
for (int i = height - 1; i >= 0; i--) {
memmove(dst, src, width);
src -= src_row_bytes;
dst -= dst_row_bytes;
}
}
}
/*
BitBlt transfer modes:
0 : srcCopy
1 : srcOr
2 : srcXor
3 : srcBic
4 : notSrcCopy
5 : notSrcOr
6 : notSrcXor
7 : notSrcBic
32 : blend
33 : addPin
34 : addOver
35 : subPin
36 : transparent
37 : adMax
38 : subOver
39 : adMin
50 : hilite
*/
bool NQD_bitblt_hook(uint32 p)
{
D(bug("accl_draw_hook %08x\n", p));
// Check if we can accelerate this bitblt
if (ReadMacInt32(p + 0x018) + ReadMacInt32(p + 0x128) == 0 &&
ReadMacInt32(p + 0x130) == 0 &&
ReadMacInt32(p + acclSrcPixelSize) >= 8 &&
ReadMacInt32(p + acclSrcPixelSize) == ReadMacInt32(p + acclDestPixelSize) &&
(ReadMacInt32(p + acclSrcRowBytes) ^ ReadMacInt32(p + acclDestRowBytes)) >= 0 && // same sign?
ReadMacInt32(p + acclTransferMode) == 0 && // srcCopy?
ReadMacInt32(p + 0x15c) > 0) {
// Yes, set function pointer
WriteMacInt32(p + acclDrawProc, NativeTVECT(NATIVE_BITBLT));
return true;
}
return false;
}
// Wait for graphics operation to finish
bool NQD_sync_hook(uint32 arg)
{
D(bug("accl_sync_hook %08x\n", arg));
return true;
}
/*
* Install Native QuickDraw acceleration hooks
*/
void VideoInstallAccel(void)
{
// Install acceleration hooks
if (PrefsFindBool("gfxaccel")) {
D(bug("Video: Installing acceleration hooks\n"));
uint32 base;
SheepVar bitblt_hook_info(sizeof(accl_hook_info));
base = bitblt_hook_info.addr();
WriteMacInt32(base + 0, NativeTVECT(NATIVE_BITBLT_HOOK));
WriteMacInt32(base + 4, NativeTVECT(NATIVE_SYNC_HOOK));
WriteMacInt32(base + 8, ACCL_BITBLT);
NQDMisc(6, bitblt_hook_info.addr());
SheepVar fillrect_hook_info(sizeof(accl_hook_info));
base = fillrect_hook_info.addr();
WriteMacInt32(base + 0, NativeTVECT(NATIVE_FILLRECT_HOOK));
WriteMacInt32(base + 4, NativeTVECT(NATIVE_SYNC_HOOK));
WriteMacInt32(base + 8, ACCL_FILLRECT);
NQDMisc(6, fillrect_hook_info.addr());
}
}